|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/alignmgr.c |
source navigation diff markup identifier search freetext search file search |
1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information (NCBI)
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government do not place any restriction on its use or reproduction.
12 * We would, however, appreciate having the NCBI and the author cited in
13 * any work or product based on this material.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * ===========================================================================
24 *
25 * File Name: alignmgr.c
26 *
27 * Author: Sarah Wheelan
28 *
29 * Version Creation Date: 7/99
30 *
31 * $Revision: 6.180 $
32 *
33 * File Description: SeqAlign indexing and messaging functions
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: alignmgr.c,v $
38 * Revision 6.180 2008/10/22 17:18:40 bollin
39 * Improvement to function for freeing an alignment index - if a freefunc was
40 * provided, use it.
41 *
42 * Revision 6.179 2004/05/20 19:44:28 bollin
43 * removed unused variables
44 *
45 * Revision 6.178 2001/11/09 17:22:34 wheelan
46 * fixed bug in TruncateSeqAlign
47 *
48 * Revision 6.177 2001/08/07 14:39:34 wheelan
49 * added am_cleanupsalp
50 *
51 * Revision 6.176 2001/07/10 16:44:01 wheelan
52 * added AlnMgrMakeFakeMultipleEx for AlnMgrIndexIndexedSet
53 *
54 * Revision 6.175 2001/07/10 11:12:23 wheelan
55 * added AlnMgrIndexIndexedChain
56 *
57 * Revision 6.174 2001/05/30 12:13:58 wheelan
58 * AlnMsgNew and AlnMsgReNew initialize from_m and to_m
59 *
60 * Revision 6.173 2001/04/30 17:51:58 wheelan
61 * minor bug fix
62 *
63 * Revision 6.172 2001/04/19 17:59:58 wheelan
64 * added protection against NULL strands in AlnMgrIndexSingleChildSeqAlign
65 *
66 * Revision 6.171 2001/03/21 19:59:21 hurwitz
67 * remove AlnMgrMergeNeighbors call from AlnMgrMakeMultByIntersectOnMaster
68 *
69 * Revision 6.170 2001/03/08 21:04:39 hurwitz
70 * rolled back AlnMgrMakeMultByIntersectOnMaster to rev 6.156
71 *
72 * Revision 6.169 2001/03/08 17:07:10 wheelan
73 * added AlnMgrGetParent and structure to support it
74 *
75 * Revision 6.168 2001/03/01 19:15:15 wheelan
76 * fixed bug in MapBioseqToSeqAlign
77 *
78 * Revision 6.167 2001/02/16 13:29:38 wheelan
79 * Added AMFreeAllIndexes
80 *
81 * Revision 6.166 2001/02/07 12:04:19 wheelan
82 * bug fix in AlnMgrGetNthUnalignedForNthRow
83 *
84 * Revision 6.165 2001/02/05 13:21:17 wheelan
85 * bug fix in AlnMgrGetNthUnalignedForNthRow
86 *
87 * Revision 6.164 2001/02/01 00:39:18 lewisg
88 * fix uninitialized variable bugs
89 *
90 * Revision 6.163 2001/01/29 12:29:16 wheelan
91 * fixed bug which missed residues in AlnMgrGetNthUnalignedForNthRow
92 *
93 * Revision 6.162 2001/01/25 14:05:11 wheelan
94 * fixed bug in AlnMgrSetUnalignedLengths
95 *
96 * Revision 6.161 2001/01/23 13:35:20 wheelan
97 * bug fix in AlnMgrConstructOverlaps
98 *
99 * Revision 6.160 2001/01/19 03:08:15 bauer
100 * commented-out debug printfs
101 *
102 * Revision 6.159 2001/01/18 19:09:00 wheelan
103 * added functions to better handle jagged-edged segmented master-slave alignments
104 *
105 * Revision 6.158 2001/01/12 20:58:25 wheelan
106 * backed out prev changes
107 *
108 * Revision 6.157 2001/01/12 19:00:29 wheelan
109 * changes in AlnMgrGetNthUnalignedForNthRow to avoid problems when flanking seqalign regions are NULL for that row
110 *
111 * Revision 6.156 2001/01/09 23:18:55 lewisg
112 * fix memory leaks
113 *
114 * Revision 6.155 2001/01/05 20:02:32 wheelan
115 * fixed some memory leaks
116 *
117 * Revision 6.154 2000/10/06 10:34:20 wheelan
118 * changed behavior of AlnMgrGetSubAlign
119 *
120 * Revision 6.153 2000/10/02 13:52:31 wheelan
121 * fixed memory leak in SAIndexFree
122 *
123 * Revision 6.152 2000/09/26 16:10:58 kans
124 * removed const from AlnMgrCompareSortStruct heapsort callback - error caught by Mac compiler
125 *
126 * Revision 6.151 2000/09/26 14:23:49 lewisg
127 * use AlnMgrSortbyID instead of AlnMgrSortSeqAligns
128 *
129 * Revision 6.150 2000/09/25 15:25:36 wheelan
130 * bug fixes in AlnMgrMapBioseqToSeqAlign
131 *
132 * Revision 6.149 2000/09/20 12:20:16 wheelan
133 * bug fixes in AlnMgrMakeSegmentedMasterSlave to guide better preservation of input row structure
134 *
135 * Revision 6.148 2000/09/14 19:37:13 wheelan
136 * *** empty log message ***
137 *
138 * Revision 6.147 2000/09/14 19:32:21 wheelan
139 * bug fix in AlnMgrMapBioseqToSeqAlign
140 *
141 * Revision 6.146 2000/09/14 18:29:46 wheelan
142 * fixed binary search in MapBioseqToSeqAlign, took out merge behavior of AlnMgrGetSubAlignSpecial
143 *
144 * Revision 6.145 2000/09/08 20:34:31 lewisg
145 * hacks to speed up bioseq to align coord computation
146 *
147 * Revision 6.144 2000/09/07 04:53:42 sicotte
148 * fix alignment calls, bad matrix calls, and misc alignments problems for sequence update
149 *
150 * Revision 6.142 2000/09/05 22:28:06 lewisg
151 * PLEASE DO NOT DELETE THE STARTSIZE FIELD
152 *
153 * Revision 6.141 2000/08/30 10:33:55 wheelan
154 * fixed gcc compiler warnings
155 *
156 * Revision 6.140 2000/08/29 20:12:09 lewisg
157 * speed up color by alignment
158 *
159 * Revision 6.139 2000/08/28 16:18:20 sicotte
160 * moved AlnMgrSeqAlignMergeTwoPairwiseEx AlnMgrSeqAlignMergeTwoPairwise AlnMgrSeqAlignMergePairwiseSet to actutils.c
161 *
162 * Revision 6.138 2000/08/28 13:39:00 sicotte
163 * Get around Indexing bug in AlnMgrSeqAlignMergePairwiseSet
164 *
165 * Revision 6.137 2000/08/25 19:24:32 sicotte
166 * Add many functions to deal with merging alignment to go from pairwise sets to a single global (or local) alignment
167 *
168 * Revision 6.136 2000/08/23 20:01:07 hurwitz
169 * fixed bug in AlnMgrGetMaxUnalignedLength
170 *
171 * Revision 6.135 2000/08/18 14:20:50 lewisg
172 * add startsize field to AMAlignIndex so that lnMgrCopyIndexedParentIntoSap knows how big starts is
173 *
174 * Revision 6.134 2000/08/14 14:40:58 lewisg
175 * bug fixes for mixed alignment
176 *
177 * Revision 6.133 2000/08/11 12:53:57 wheelan
178 * bug fixes in AlnMgrMakeMultipleByScoreExEx
179 *
180 * Revision 6.132 2000/08/10 19:09:37 wheelan
181 * bug fixes in AlnMgrMakeMultipleByScoreExEx
182 *
183 * Revision 6.131 2000/07/27 19:38:40 hurwitz
184 * fixes split block bug
185 *
186 * Revision 6.130 2000/07/26 17:26:25 lewisg
187 * fix code for c++ inclusion
188 *
189 * Revision 6.129 2000/07/26 16:48:48 sicotte
190 * Fix bug and Memory leaks in AlnMgrGetSubAlign wrt SeqIds
191 *
192 * Revision 6.128 2000/07/26 14:58:13 sicotte
193 * bug fixes to AlnMgrGetNextAlnBit. bug fix (overlapping fuzz) in AlnMgrMakeMultipleByScore, Added AlnMgrMakeMultipleByScoreExEx and AlnMgrRemoveInconsistentEx and AlnMgrDeleteHiddenEx to allow optional deletion of sealigns when converting indexes to seqaligns
194 *
195 * Revision 6.127 2000/07/25 18:55:53 sicotte
196 * Added AlnMgrDeleteHiddenEx and AlnMgrRemoveInconsistentFromPairwiseSetEx to make optional deleting of SeqAligns. Needed for Sequence Update
197 *
198 * Revision 6.126 2000/07/24 19:07:56 sicotte
199 * Fix Master-Slave bugs in AlnMgrMakeFakeMultiple and alignment coordinate bugs in AlnMgrGetNextAlnBit
200 *
201 * Revision 6.125 2000/07/21 21:36:20 sicotte
202 * fix bug for sequence update in sequin when the alignment was two
203 * discontinous seqaligns. Fixed AlnMgrMakeFakeMultiple.
204 *
205 * Revision 6.124 2000/07/21 21:07:43 hurwitz
206 * bug fix when deleting last block and block preceeding it has just one aligned column
207 *
208 * Revision 6.123 2000/07/20 22:27:41 hurwitz
209 * working on bug fixes
210 *
211 * Revision 6.122 2000/06/29 23:15:13 hurwitz
212 * leave single space between aligned blocks with no unaligned sequence between them, no auto-merge of adjacent aligned blocks
213 *
214 * Revision 6.121 2000/06/15 14:15:45 wheelan
215 * alignmgr.c
216 *
217 * Revision 6.120 2000/06/02 18:37:45 wheelan
218 * bug fix in am_is_consistent (for editing)
219 *
220 * Revision 6.119 2000/06/01 17:37:46 wheelan
221 * various bug fixes
222 *
223 * Revision 6.118 2000/06/01 14:18:10 wheelan
224 * added AlnMgrCheckOrdered and AlnMgrMakeRowsForOrdered
225 *
226 * Revision 6.117 2000/05/24 15:46:53 wheelan
227 * added AlnMgrRemoveInconsistentFromPairwiseSet and AlnMgrSortAlnSetByNthRowPos
228 *
229 * Revision 6.116 2000/05/23 22:00:14 hurwitz
230 * working on launch of DDE from DDV
231 *
232 * Revision 6.115 2000/05/19 17:52:07 wheelan
233 * fixed incorrect strands in AlnMgrGetSubAlign
234 *
235 * Revision 6.114 2000/05/18 20:54:32 wheelan
236 * bug fix in AlnMgrIsEditable
237 *
238 * Revision 6.113 2000/05/18 11:29:19 wheelan
239 * finished AlnMgrIsIBMable and AlnMgrIsEditable
240 *
241 * Revision 6.112 2000/05/16 17:14:46 wheelan
242 * added AlnMgrIsIBMable, AlnMgrIsEditable; made am_guess_numrows extern
243 *
244 * Revision 6.111 2000/05/15 13:12:21 wheelan
245 * fixes to AlnMgrAddBlock to allow creation of a new block in the tail of an alignment with only one block
246 *
247 * Revision 6.110 2000/05/14 22:28:32 wheelan
248 * added am_is_new_row to fix row numbering problems in IntersectOnMaster function
249 *
250 * Revision 6.109 2000/05/10 16:46:48 wheelan
251 * bug fix in IntersectByMaster
252 *
253 * Revision 6.108 2000/05/10 15:40:24 wheelan
254 * bug fixes in IntersectOnMaster
255 *
256 * Revision 6.107 2000/05/10 13:09:36 wheelan
257 * bug fix in am_is_consistent; added am_is_ok_block to check newly edited blocks
258 *
259 * Revision 6.106 2000/05/09 18:42:49 wheelan
260 * fixes for editing
261 *
262 * Revision 6.105 2000/05/09 14:23:00 wheelan
263 * added AlnMgrMakeMultipleByScoreEx
264 *
265 * Revision 6.104 2000/05/08 13:17:05 wheelan
266 * added AlnMgrGetNumAlnBlocks and AlnMgrGetNthBlockRange; fixed memory leaks
267 *
268 * Revision 6.103 2000/05/05 12:48:12 wheelan
269 * fixed crash when deleting last block of an alignment
270 *
271 * Revision 6.102 2000/05/05 11:53:39 wheelan
272 * bug fix in AlnMgrMapBioseqToSeqAlign
273 *
274 * Revision 6.101 2000/05/04 14:07:45 wheelan
275 * several changes to correctly merge blocks when edits remove an unaligned region
276 *
277 * Revision 6.100 2000/05/03 19:30:37 wheelan
278 * fixed bugs in NULL alignment handling
279 *
280 * Revision 6.99 2000/05/02 19:50:38 hurwitz
281 * fixed some bugs with launching DDE from DDV, added new alnMgr fn for positioning DDE on proper column
282 *
283 * Revision 6.98 2000/05/02 12:00:03 wheelan
284 * added SASeqDatFree and fixed more memory leaks
285 *
286 * Revision 6.97 2000/05/01 19:54:51 wheelan
287 * fixed memory leaks
288 *
289 * Revision 6.96 2000/05/01 13:58:17 wheelan
290 * fixed am_is_consistent to recognize row rearrangments
291 *
292 * Revision 6.95 2000/05/01 12:12:17 wheelan
293 * fixes in AlnMgrMapBioseqToSeqAlign
294 *
295 * Revision 6.94 2000/04/26 21:53:21 hurwitz
296 * added save function to tell AlnMgr about edits made in DDE
297 *
298 * Revision 6.93 2000/04/22 15:54:09 wheelan
299 * added AlnMgrIndexLite; several assorted bug fixes
300 *
301 * Revision 6.92 2000/04/17 17:03:33 wheelan
302 * fixes in AlnMgrNeatlyIndex and AlnMgrIntersectByMaster
303 *
304 * Revision 6.91 2000/04/10 19:35:15 wheelan
305 * added AlnMgrIsSAPNULL, bug fixes in AlnMgrMakeFakeMultiple, added ability to deal with NULL alignments, completed AlnMgrReplaceBlock and AlnMgrAddBlock
306 *
307 * Revision 6.90 2000/04/07 13:21:17 wheelan
308 * bug fixes in MapBioseqToAlnCoords and free functions
309 *
310 * Revision 6.89 2000/04/05 17:41:29 wheelan
311 * added AlnMgrAddBlock, AlnMgrReplaceBlock, and fixed AlnMgrGetSubAlignSpecial
312 *
313 * Revision 6.88 2000/04/04 13:39:14 wheelan
314 * fixed bug in mapping bioseq coords for segmented alignments
315 *
316 * Revision 6.87 2000/04/03 17:20:22 wheelan
317 * finished AlnMgrGetSubAlignSpecial, bug fix in AlnMgrGetNextAlnBit
318 *
319 * Revision 6.86 2000/04/03 12:50:31 wheelan
320 * bug fixes for partial alignments
321 *
322 * Revision 6.85 2000/03/17 14:25:24 wheelan
323 * changes to AlnMgrGetSubAlign
324 *
325 * Revision 6.84 2000/03/16 19:53:32 wheelan
326 * fixed bug which allowed all-gap columns after AlnMgrForceMasterSlave
327 *
328 * Revision 6.83 2000/03/16 15:07:15 wheelan
329 * bug fix in AlnMgrTruncateByOverlap
330 *
331 * Revision 6.82 2000/03/15 20:40:19 lewisg
332 * bug fixes for AlnMgrCarefulIndex
333 *
334 * Revision 6.81 2000/03/10 18:47:01 lewisg
335 * add show/hide
336 *
337 * Revision 6.80 2000/03/09 20:24:20 wheelan
338 * bug fixes in AlnMgrSetUnalignedLengths and IBM
339 *
340 * Revision 6.79 2000/03/07 18:32:22 wheelan
341 * miscellaneous bug fixes
342 *
343 * Revision 6.78 2000/03/03 19:58:35 wheelan
344 * added AlnMgrDupTopNByScore
345 *
346 * Revision 6.77 2000/03/02 20:00:33 wheelan
347 * bug fixes to more gracefully handle a sequence aligned with itself
348 *
349 * Revision 6.76 2000/02/29 18:02:34 wheelan
350 * added AlnMgrMergeNeighbors to get rid of unaligned regions of length 0 after intersection; bug fixes in copy functions
351 *
352 * Revision 6.75 2000/02/28 17:18:14 wheelan
353 * Added AlnMgrTossNeatRows for Cn3D
354 *
355 * Revision 6.74 2000/02/28 14:49:44 wheelan
356 * added AlnMgrSetUnalignedLengths
357 *
358 * Revision 6.73 2000/02/24 18:07:33 wheelan
359 * bug fixes in alignment truncation function
360 *
361 * Revision 6.72 2000/02/23 20:06:05 thiessen
362 * added missing pointer assignment
363 *
364 * Revision 6.71 2000/02/23 18:45:17 wheelan
365 * added AlnMgrNeatlyIndex for structure alignments, added more careful row indexing, finished AlnMgrMapBioseqToSeqAlign function
366 *
367 * Revision 6.70 2000/02/16 15:47:57 wheelan
368 * changed behavior of AlnMgrMakeMultByIntersectOnMaster
369 *
370 * Revision 6.69 2000/02/11 17:30:50 kans
371 * AlnMgrForcePairwiseContinuous moved to tools/actutils (SW)
372 *
373 * Revision 6.68 2000/02/10 19:13:13 wheelan
374 * bug fixes for IntersectOnMaster
375 *
376 * Revision 6.67 2000/02/10 15:20:17 lewisg
377 * sarah's fixes for < 0 indices
378 *
379 * Revision 6.66 2000/02/09 20:23:03 wheelan
380 * finished AlnMgrDeleteNthRow
381 *
382 * Revision 6.65 2000/02/07 16:15:50 wheelan
383 * added AlnMgrTruncateSAP and several helper functions
384 *
385 * Revision 6.64 2000/02/04 22:57:01 kans
386 * changed BioseqUnlockById to BioseqUnlock to avoid scoping problem
387 *
388 * Revision 6.63 2000/02/02 17:55:50 wheelan
389 * bug fixes
390 *
391 * Revision 6.62 2000/02/02 14:37:30 wheelan
392 * added AlnMgrGetNthAlignedSegInNthRow and AlnMgrGetNthSegmentRange to make alignment editing easier
393 *
394 * Revision 6.60 2000/02/01 13:14:24 wheelan
395 * took out debugging AsnWrite, bug fixes in AlnMgrGetNextAlnBit and GetNthUnaligned
396 *
397 * Revision 6.59 2000/01/31 21:00:53 kans
398 * changes to AlnMgrForcePairwiseContinuous and AlnMgrGetNthSeqRangeInSA to support Update Sequence with very long sequences in Sequin (SW)
399 *
400 * Revision 6.58 2000/01/31 16:08:33 wheelan
401 * added unpacking functions, and AlnMgrMakeMultByIntersectOnMaster (does not work yet)
402 *
403 * Revision 6.57 2000/01/29 14:03:15 wheelan
404 * added AlnMgrDeleteHidden and AlnMgrForceContinuous (uses bandalign) plus a couple utilities for these, plus many bug fixes
405 *
406 * Revision 6.56 2000/01/19 15:45:09 wheelan
407 * many, many bug fixes in AlnMgrGetSubAlign and AlnMgrGetNextAlnBit
408 *
409 * Revision 6.55 2000/01/14 18:50:36 wheelan
410 * fixed bug in AlnMgrGetSubAlign
411 *
412 * Revision 6.54 2000/01/12 17:43:19 wheelan
413 * added AlnMgrGetNumSegments, AlnMgrDeleteRow
414 *
415 * Revision 6.53 1999/12/02 20:31:59 lewisg
416 * put seqentries into bioseqset and fix calling convention in alignmgr.c
417 *
418 * Revision 6.52 1999/11/30 14:36:39 wheelan
419 * added AlnMgrMakeMultipleByScore; bug fixes
420 *
421 * Revision 6.51 1999/11/26 15:42:19 vakatov
422 * Fixed for the C++ and/or MSVC DLL compilation
423 *
424 * Revision 6.50 1999/11/24 11:29:52 wheelan
425 * added missing return values
426 *
427 * Revision 6.49 1999/11/18 19:30:33 wheelan
428 * added AlnMgrDeleteChildByPointer, bug fixes
429 *
430 * Revision 6.48 1999/11/03 12:47:05 wheelan
431 * added code to correctly handle internal gaps in segmented master-slave alignments
432 *
433 * Revision 6.47 1999/11/02 12:38:38 wheelan
434 * bug fixes when only one child
435 *
436 * Revision 6.46 1999/10/25 18:17:23 wheelan
437 * Added AlnMgrGetUniqueSeqs, fixed merge function to handle single child seqalign correctly
438 *
439 * Revision 6.45 1999/10/19 19:27:03 wheelan
440 * added static defines; changed behavior of AlnMgrGetNextNthSeqRange; rewrote AlnMgrMakeSegmentedMasterSlave to handle more cases
441 *
442 * Revision 6.44 1999/10/15 21:51:02 durand
443 * add AlnMgrIsSAPDiscAli()
444 *
445 * Revision 6.43 1999/10/15 18:19:05 wheelan
446 * added rudimentary ability to default to master-slave type if possible
447 *
448 * Revision 6.42 1999/10/15 13:48:47 wheelan
449 * added AlnMgrGetNthRowTail, extended capability of AlnMgrGetNthStrand
450 *
451 * Revision 6.41 1999/10/14 16:10:30 kans
452 * new includes and prototypes added
453 *
454 * Revision 6.40 1999/10/13 19:29:03 wheelan
455 * added speedup for segmented master-slave creation
456 *
457 * Revision 6.39 1999/10/07 13:37:16 wheelan
458 * added AlnMgrIndexSingleSeqAlign, which only indexes the first seqalign in a list; also added automatic computation of max length of unaligned regions for time savings
459 *
460 * Revision 6.38 1999/10/06 19:35:09 wheelan
461 * added several viewer and editor management functions; fixed many bugs in AlnMgrGetNextAlnBit
462 *
463 * Revision 6.37 1999/10/05 15:15:31 wheelan
464 * added AlnMgrGetNthUnalignedForNthRow
465 *
466 * Revision 6.36 1999/10/05 14:02:31 wheelan
467 * bug fixes in AlnMgrGetNextAlnBit
468 *
469 * Revision 6.35 1999/10/04 14:58:08 wheelan
470 * bug fixes; added AlnMgrMapBioseqToSeqAlign
471 *
472 * Revision 6.34 1999/09/24 15:04:55 lewisg
473 * AlnMgrGetNextAlnBit: amp->to_m changed when calling child
474 *
475 * Revision 6.33 1999/09/24 14:29:58 wheelan
476 * changed behavior of AlnMgrGetNextLengthBit to mimic other GetNext functions, completed functionality of AlnMgrGetSubAlign, bug fixes
477 *
478 * Revision 6.32 1999/09/23 16:03:32 wheelan
479 * Added structures and functions to support segmented master-slave alignments
480 *
481 * Revision 6.31 1999/09/22 13:19:15 wheelan
482 * made AlnMsg row_num field 1-based, added AlnMgrGetNextNthSeqRange, started adding functions to handle a segmented master-slave alignment
483 *
484 * Revision 6.30 1999/09/21 19:15:28 wheelan
485 * changed AlnMgrGetNextAlnBit to return FALSE if called once more past the end; various bug fixes; implemented part of AlnMgrGetSubAlign
486 *
487 * Revision 6.29 1999/09/20 12:12:58 wheelan
488 * added safety checks in case input seqalign has no strand or score information
489 *
490 * Revision 6.28 1999/09/20 11:58:52 wheelan
491 * modified AlnMgrGetNthSeqRange to use new row information structures
492 *
493 * Revision 6.27 1999/09/17 16:55:33 wheelan
494 * bug fixes, added AlnMgrPropagateSeqIdsBySapList to correctly associate seqids with rows
495 *
496 * Revision 6.26 1999/09/14 15:48:50 kans
497 * AlnMgrMapRowCoords returns -1 on failure at end of function
498 *
499 * Revision 6.25 1999/09/13 19:57:10 sicotte
500 * Make AlnMgrMapBsqCoord work for continous alignments
501 *
502 * Revision 6.24 1999/09/13 19:43:09 sicotte
503 * bug fixes
504 *
505 * Revision 6.23 1999/09/13 14:33:24 wheelan
506 * added support for row numbers in AlnMgrGetNextAlnBit
507 *
508 * Revision 6.22 1999/09/08 13:36:16 wheelan
509 * fixed bugs found by Patrick Durand
510 *
511 * Revision 6.21 1999/09/08 11:55:35 sicotte
512 * fix bug that was missing end segments
513 *
514 * Revision 6.20 1999/09/08 11:49:13 wheelan
515 * added capability to return length of unaligned regions
516 *
517 * Revision 6.19 1999/09/07 12:11:17 wheelan
518 * fixed bugs pointed out by Hugues
519 *
520 * Revision 6.18 1999/09/06 16:37:44 wheelan
521 * added AlnMgrGetNextLengthBit and associated function
522 *
523 * Revision 6.17 1999/09/06 15:55:55 wheelan
524 * IndexSeqAlign now makes the fake multiple if possible
525 *
526 * Revision 6.16 1999/09/06 15:52:25 wheelan
527 * added row management functions, made most functions minus-strand compliant, added smarter test for master-slave vs partial
528 *
529 * Revision 6.15 1999/09/01 20:11:56 wheelan
530 * added new merge function and the typedef for the structure it uses
531 *
532 * Revision 6.14 1999/09/01 14:40:06 wheelan
533 * added AlnMgrGetStrand, fixed bugs in GetNextAlnBit, added more cases to AlnMgrIndexSeqAlign
534 *
535 * Revision 6.13 1999/08/30 19:28:06 wheelan
536 * modified AlnMgrGetNextAlnBit to handle master-slave alignments
537 *
538 * Revision 6.12 1999/08/26 20:35:21 wheelan
539 * added parent indexing and pairwise-to-multiple functions
540 *
541 * Revision 6.11 1999/08/20 11:23:53 wheelan
542 * fixed AlnMgrGetNthSeqRange for minus strands
543 *
544 * Revision 6.10 1999/08/19 19:30:26 wheelan
545 * made case for SAT_PARTIAL in AlnMgrGetNextAlnBit
546 *
547 * Revision 6.9 1999/08/19 17:24:50 wheelan
548 * changed AMAlignIndex structure, added more api functions
549 *
550 * Revision 6.8 1999/08/12 20:56:56 vakatov
551 * [WIN32] Added missed LIBCALLBACK
552 *
553 * Revision 6.7 1999/08/12 12:41:53 wheelan
554 * added comments, and functions to index the parent
555 *
556 * Revision 6.6 1999/08/06 18:31:19 wheelan
557 * fixed compiler error
558 *
559 * Revision 6.5 1999/08/06 16:38:43 kans
560 * fixed Mac compiler complaints
561 *
562 * Revision 6.4 1999/08/06 13:44:14 wheelan
563 * added several functions; changed all function names to AlnMgr..
564 *
565 * Revision 6.3 1999/07/30 14:17:52 wheelan
566 * fixes to keep Mac compiler happy
567 *
568 * Revision 6.2 1999/07/30 14:08:37 wheelan
569 * added api functions to access indexes
570 *
571 * Revision 6.1 1999/07/29 12:56:25 wheelan
572 * initial checkin
573 *
574
575 * ==========================================================================
576 */
577
578
579
580 #include <alignmgr.h>
581 #include <needleman.h>
582 #include <stdlib.h>
583
584 /***************************************************************************
585 *
586 * static functions
587 *
588 ***************************************************************************/
589 static void heapsort_with_userdata (VoidPtr b, size_t nel, size_t width, int (LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata);
590 static void heapify_with_userdata(CharPtr base0, CharPtr base, CharPtr lim, CharPtr last, size_t width, int(LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata);
591 static void AlnMgrSetUnalignedLengths(SeqAlignPtr sap);
592 static Boolean am_get_nth_range_for_partial(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop, Int4Ptr where, BoolPtr is_aligned, Boolean unaligned);
593 static AMmsmsPtr am_sort_ammsms(AMmsmsPtr ams_head, Int4 n);
594 static AMmsmsPtr am_sort_masterams(AMmsmsPtr ams_head, Int4 n);
595 static Int4 am_get_first_rsp_for_sip(SeqIdPtr sip, AMsiplistPtr siplist);
596 static int LIBCALLBACK AMCompareAlignInfoProc(VoidPtr ptr1, VoidPtr ptr2);
597 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2);
598 static Int4 AlnMgrMapSegmentCoords(SeqAlignPtr sap, Uint4 pos, Int4 row, SeqIdPtr master, Int4Ptr len);
599 static void am_set_master(SeqAlignPtr sap, SeqIdPtr sip);
600 static SeqIdPtr am_find_master(SeqAlignPtr sap);
601 static AMmsmsPtr AlnMgrFindOverlapOnMaster(SeqAlignPtr sap);
602 static void AlnMgrMergeNeighbors(SeqAlignPtr salp);
603 static Boolean am_is_new_row(SeqIdPtr sip1, SeqIdPtr sip2);
604 static Int4Ptr am_save_rowinfo(SeqAlignPtr sap, Int4 numrows);
605 static void am_set_rows(AMmsmsPtr ams, Int4Ptr rowarray, Int4 numrows);
606 static AMmsmsPtr am_create_overlap(SeqAlignPtr sap);
607 static void am_densediag_reverse(DenseDiagPtr ddp);
608 static AMmsmsPtr AlnMgrTruncateByOverlap(SeqAlignPtr sap, AMmsmsPtr ams_head);
609 static void am_compare_alignids(AMmsmsPtr ams_prev, AMmsmsPtr ams);
610 static Boolean AlnMgrCarefulIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Boolean allinblock, Int4 numrows);
611 static Boolean am_make_null_alignment(SeqAlignPtr sap);
612 static void am_trim_master(AMmsmsPtr PNTR ams_mhead, AMmsmsPtr ams_head, Int4 numrows);
613 static Boolean am_check_gaps(SeqAlignPtr sap);
614 static void am_fix_empty_columns(SeqAlignPtr sap);
615 static Int4 am_translate_row_num (AMAlignIndexPtr amaip, Int4 n, Int4 row);
616 static Boolean am_is_consistent(SeqAlignPtr sap, SeqAlignPtr sap_new, Int4Ptr block_num);
617 static Boolean am_is_ok_block(DenseSegPtr dsp);
618 static void am_do_merge (AMAlignIndexPtr amaip, Int4 left, Int4 right);
619 static Boolean am_merge_after_edit (SeqAlignPtr sap);
620 static Boolean am_same_ids(SeqIdPtr sip1, SeqIdPtr sip2);
621 static AMmsmsPtr AlnMgrConstructOverlaps(AMmsmsPtr ams_head);
622 static Boolean AlnMgrJaggedIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Int4 numrows);
623 static Boolean AlnMgrMakeFakeMultipleEx(SeqAlignPtr sap, Boolean forcestraightms);
624
625
626
627
628
629 /*******************************************************************
630 *
631 * all the memory allocation/deallocation functions
632 *
633 *******************************************************************/
634
635 NLM_EXTERN SeqAlignIndexPtr SeqAlignIndexNew(void)
636 {
637 return (SeqAlignIndexPtr)(MemNew(sizeof(SeqAlignIndex)));
638 }
639
640 static Boolean LIBCALLBACK SAIndexFreeFunc(VoidPtr index)
641 {
642 return SAIndexFree(index);
643 }
644
645 NLM_EXTERN SAIndexPtr SAIndexNew(void)
646 {
647 SAIndexPtr saip;
648
649 saip = (SAIndexPtr)MemNew(sizeof(SAIndex));
650 saip->master = -1;
651 saip->freefunc = (SeqAlignIndexFreeFunc)(SAIndexFreeFunc);
652 return saip;
653 }
654
655 NLM_EXTERN Boolean SAIndexFree(VoidPtr index)
656 {
657 Int4 i;
658 Boolean retval;
659 SAIndexPtr saip;
660
661 retval = FALSE;
662 if (!index)
663 return retval;
664 saip = (SAIndexPtr)index;
665 if (saip->indextype != INDEX_SEGS)
666 return retval;
667 MemFree(saip->aligncoords);
668 for (i=0; i<saip->numseqs; i++)
669 {
670 SASeqDatFree(saip->ssdp[i]);
671 }
672 MemFree(saip->ssdp);
673 MemFree(saip);
674 retval = TRUE;
675 return retval;
676 }
677
678 NLM_EXTERN SASeqDatPtr SASeqDatNew(void)
679 {
680 return (SASeqDatPtr)(MemNew(sizeof(SASeqDat)));
681 }
682
683 NLM_EXTERN void SASeqDatFree(SASeqDatPtr ssdp)
684 {
685 if (ssdp == NULL)
686 return;
687 if (ssdp->sect != NULL)
688 MemFree(ssdp->sect);
689 if (ssdp->unsect != NULL)
690 MemFree(ssdp->unsect);
691 MemFree(ssdp);
692 }
693
694 NLM_EXTERN RowSourcePtr RowSourceNew(void)
695 {
696 return (RowSourcePtr)(MemNew(sizeof(RowSource)));
697 }
698
699 NLM_EXTERN RowSourcePtr RowSourceFree(RowSourcePtr rsp)
700 {
701 if (rsp == NULL)
702 return NULL;
703 rsp->id = SeqIdSetFree(rsp->id);
704 MemFree(rsp->which_saps);
705 MemFree(rsp->num_in_sap);
706 MemFree(rsp);
707 return NULL;
708 }
709
710 static Boolean LIBCALLBACK AMAlignIndexFreeFunc (VoidPtr data)
711 {
712 return AMAlignIndexFree(data);
713 }
714
715
716 NLM_EXTERN AMAlignIndexPtr AMAlignIndexNew(void)
717 {
718 AMAlignIndexPtr amaip;
719
720 amaip = (AMAlignIndexPtr)MemNew(sizeof(AMAlignIndex));
721 amaip->freefunc = (SeqAlignIndexFreeFunc)(AMAlignIndexFreeFunc);
722 amaip->master = -2;
723 amaip->indextype = INDEX_PARENT;
724 return amaip;
725 }
726
727 NLM_EXTERN Boolean AMAlignIndexFree(VoidPtr index)
728 {
729 AMAlignIndexPtr amaip;
730 Int4 i;
731 Boolean retval;
732
733 retval = FALSE;
734 amaip = (AMAlignIndexPtr)(index);
735 if (!amaip)
736 return retval;
737 if (amaip->indextype != INDEX_PARENT)
738 return retval;
739 if (amaip->mstype == AM_NEATINDEX)
740 {
741 MemFree(amaip->saps);
742 MemFree(amaip);
743 return TRUE;
744 }
745 amaip->ids = SeqIdSetFree(amaip->ids);
746 for (i=0; i<(amaip->numbsqs); i++)
747 {
748 amaip->amadp[i] = AMAlignDatFree(amaip->amadp[i]);
749 }
750 if (amaip->saps != NULL)
751 MemFree(amaip->saps);
752 if (amaip->amadp != NULL)
753 MemFree(amaip->amadp);
754 if (amaip->aligncoords != NULL)
755 MemFree(amaip->aligncoords);
756 if (amaip->lens != NULL)
757 MemFree(amaip->lens);
758 if (amaip->ulens != NULL)
759 MemFree(amaip->ulens);
760 if (amaip->starts != NULL)
761 MemFree(amaip->starts);
762 if (amaip->rowsource != NULL)
763 {
764 for (i=0; i<(amaip->numrows); i++)
765 {
766 amaip->rowsource[i] = RowSourceFree(amaip->rowsource[i]);
767 }
768 MemFree(amaip->rowsource);
769 }
770 MemFree(amaip);
771 retval = TRUE;
772 return retval;
773 }
774
775 NLM_EXTERN AMAlignDatPtr AMAlignDatNew(void)
776 {
777 return (AMAlignDatPtr)(MemNew(sizeof(AMAlignDat)));
778 }
779
780 NLM_EXTERN AMAlignDatPtr AMAlignDatFree(AMAlignDatPtr amadp)
781 {
782 if (amadp == NULL)
783 return NULL;
784 SeqIdFree(amadp->sip);
785 MemFree(amadp->saps);
786 MemFree(amadp->segments);
787 MemFree(amadp);
788 return NULL;
789 }
790
791 NLM_EXTERN void AMFreeAllIndexes(SeqAlignPtr sap)
792 {
793 SeqAlignPtr salp;
794
795 if (sap->saip->indextype == INDEX_PARENT)
796 {
797 salp = (SeqAlignPtr)(sap->segs);
798 while (salp != NULL)
799 {
800 SAIndexFree((Pointer)(salp->saip));
801 salp->saip = NULL;
802 salp = salp->next;
803 }
804 AMAlignIndexFree((Pointer)(sap->saip));
805 sap->saip = NULL;
806 } else
807 {
808 while (sap != NULL)
809 {
810 if (sap->saip != NULL)
811 {
812 if (sap->saip->freefunc != NULL)
813 {
814 (sap->saip->freefunc) (sap->saip);
815 }
816 else
817 {
818 SAIndexFree((Pointer)(sap->saip));
819 }
820 sap->saip = NULL;
821 }
822 sap = sap->next;
823 }
824 }
825 }
826
827 NLM_EXTERN AlnMsgPtr AlnMsgNew(void)
828 {
829 AlnMsgPtr amp;
830
831 amp = (AlnMsgPtr)MemNew(sizeof(AlnMsg));
832 amp->to_m = -1;
833 amp->send_space = FALSE;
834 amp->row_num = -1;
835 amp->prev = -2;
836 amp->prev_sap = -2;
837 amp->place = 0;
838 amp->flag = FALSE;
839 amp->which_bsq = NULL;
840 return amp;
841 }
842
843 NLM_EXTERN AlnMsgPtr AlnMsgFree(AlnMsgPtr amp)
844 {
845 return ((AlnMsgPtr)MemFree(amp));
846 }
847
848 NLM_EXTERN AlnMsgPtr AlnMsgReNew(AlnMsgPtr amp)
849 {
850 amp->from_m = 0;
851 amp->to_m = -1;
852 amp->send_space = FALSE;
853 amp->row_num = -1;
854 amp->prev = -2;
855 amp->prev_sap = -2;
856 amp->place = 0;
857 amp->flag = FALSE;
858 amp->which_bsq = NULL;
859 return amp;
860 }
861
862 /********************************************************************************
863 *
864 * AlnMgrIndexSingleSeqAlign indexes (in place) only the first seqalign or
865 * seqalign set in the chain that is passed in. It will extensively
866 * rearrange the first seqalign given.
867 *
868 ********************************************************************************/
869 NLM_EXTERN Boolean AlnMgrIndexSingleSeqAlign(SeqAlignPtr sap)
870 {
871 SeqAlignPtr sap_next;
872
873 if (sap == NULL)
874 return TRUE;
875 sap_next = NULL;
876 if (sap->next)
877 sap_next = sap->next;
878 sap->next = NULL;
879 AlnMgrIndexSeqAlign(sap);
880 sap->next = sap_next;
881 if (sap->saip)
882 return TRUE;
883 else
884 return FALSE;
885 }
886
887 NLM_EXTERN Boolean AlnMgrIndexSingleChildSeqAlign(SeqAlignPtr sap)
888 {
889 DenseSegPtr dsp;
890 Int4 i;
891 SeqAlignPtr sap_next;
892
893 if (sap == NULL)
894 return FALSE;
895 if (sap->segtype == SAS_DISC)
896 return FALSE;
897 sap_next = NULL;
898 if (sap->next)
899 sap_next = sap->next;
900 sap->next = NULL;
901 if (sap->saip != NULL)
902 {
903 if (sap->saip->indextype == INDEX_SEGS)
904 SAIndexFree(sap->saip);
905 }
906 if (sap->segtype == SAS_DENSEG)
907 AlnMgrIndexLinkedSegs(sap);
908 else if (sap->segtype == SAS_DENDIAG)
909 AlnMgrIndexSingleSeqAlign(sap);
910 dsp = (DenseSegPtr)(sap->segs);
911 if (dsp->strands == NULL)
912 {
913 dsp->strands = (Uint1Ptr)MemNew((dsp->dim*dsp->numseg)*sizeof(Uint1));
914 for (i=0; i<dsp->dim*dsp->numseg; i++)
915 {
916 dsp->strands[i] = Seq_strand_plus;
917 }
918 }
919 sap->next = sap_next;
920 if (sap->saip)
921 return TRUE;
922 else
923 return FALSE;
924 }
925
926 /********************************************************************************
927 *
928 * AlnMgrReIndexSeqAlign frees the parent indexes, indexes any child
929 * seqaligns that are not indexed (it assumes that any indexed child
930 * seqaligns are correctly indexed), and reindexes the set.
931 *
932 ********************************************************************************/
933 NLM_EXTERN Boolean AlnMgrReIndexSeqAlign(SeqAlignPtr sap)
934 {
935 SeqAlignPtr sap_tmp;
936 SeqAlignPtr tmp_next;
937
938 if (sap == NULL)
939 return FALSE;
940 if (sap->segtype != SAS_DISC) /* we don't know what we're dealing with */
941 return FALSE;
942 if (!AMAlignIndexFree((Pointer)sap->saip))
943 return FALSE;
944 sap->saip = NULL;
945 sap_tmp = (SeqAlignPtr)sap->segs;
946 while (sap_tmp)
947 {
948 if (sap_tmp->saip == NULL)
949 {
950 tmp_next = sap_tmp->next;
951 sap_tmp->next = NULL;
952 if (!AlnMgrIndexLinkedSegs(sap_tmp))
953 return FALSE;
954 sap_tmp->next = tmp_next;
955 }
956 sap_tmp = sap_tmp->next;
957 }
958 if (!AlnMgrIndexParentSA(sap))
959 return FALSE;
960 if (!AlnMgrMakeFakeMultiple(sap))
961 return FALSE;
962 return TRUE;
963 }
964
965 /********************************************************************************
966 *
967 * AlnMgrIndexSeqAlign indexes (in place) the ENTIRE chain of seqaligns
968 * and seqalign sets passed in, and extensively rearranges the seqalign.
969 *
970 ********************************************************************************/
971 NLM_EXTERN Boolean AlnMgrIndexSeqAlign(SeqAlignPtr sap)
972 {
973 SAIndexPtr saip;
974 SeqAlignPtr salp;
975
976 if (!sap)
977 return FALSE;
978 if (sap->saip != NULL)
979 {
980 return TRUE;
981 }
982 if (!AlnMgrUnpackSeqAlign(sap))
983 return FALSE;
984 if (!AlnMgrRearrangeUnpacked(sap))
985 return FALSE;
986 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
987 return FALSE;
988 if (!AlnMgrIndexParentSA(sap))
989 return FALSE;
990 if (!AlnMgrMakeFakeMultiple(sap))
991 return FALSE;
992 salp = (SeqAlignPtr)(sap->segs);
993 while (salp != NULL)
994 {
995 saip = (SAIndexPtr)(salp->saip);
996 saip->parent = sap;
997 salp = salp->next;
998 }
999 return TRUE;
1000 }
1001
1002 /***************************************************************************
1003 *
1004 * AlnMgrIndexIndexedChain takes a linked list of indexed seqaligns and
1005 * indexes them as a set.
1006 *
1007 ***************************************************************************/
1008 NLM_EXTERN SeqAlignPtr AlnMgrIndexIndexedChain(SeqAlignPtr sap)
1009 {
1010 SAIndexPtr saip;
1011 SeqAlignPtr sap_new;
1012
1013 if (sap == NULL || sap->saip == NULL || sap->saip->indextype == INDEX_PARENT)
1014 return NULL;
1015 sap_new = SeqAlignNew();
1016 sap_new->segtype = SAS_DISC;
1017 sap_new->segs = (Pointer)(sap);
1018 if (!AlnMgrIndexParentSA(sap_new))
1019 return NULL;
1020 if (!AlnMgrMakeFakeMultipleEx(sap_new, TRUE))
1021 return NULL;
1022 sap = (SeqAlignPtr)(sap_new->segs);
1023 while (sap != NULL)
1024 {
1025 saip = (SAIndexPtr)(sap->saip);
1026 saip->parent = sap_new;
1027 sap = sap->next;
1028 }
1029 return sap_new;
1030 }
1031
1032
1033
1034 /**********************************************************************
1035 *
1036 * AlnMgrIndexLite disassembles the input alignment, indexes all child
1037 * alignments, and then puts them in the amaip->saps array. It does
1038 * not attempt to create alignment coordinates across the whole set.
1039 * This is useful to keep sets of child alignments together (managing
1040 * BLAST hits, for example) when creating an overall alignment is
1041 * unnecessary. This alignment can be freed normally, but many
1042 * alignmgr functions will not work on the parent alignment (they
1043 * will work on the child alignments).
1044 *
1045 **********************************************************************/
1046 NLM_EXTERN Boolean AlnMgrIndexLite(SeqAlignPtr sap)
1047 {
1048 AMAlignIndexPtr amaip;
1049 Int4 i;
1050 SAIndexPtr saip;
1051 SeqAlignPtr sap_tmp;
1052
1053 if (!sap)
1054 return FALSE;
1055 if (sap->saip != NULL)
1056 {
1057 return TRUE;
1058 }
1059 if (!AlnMgrUnpackSeqAlign(sap))
1060 return FALSE;
1061 if (!AlnMgrRearrangeUnpacked(sap))
1062 return FALSE;
1063 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
1064 return FALSE;
1065 amaip = AMAlignIndexNew();
1066 amaip->mstype = AM_LITE;
1067 i = 0;
1068 sap_tmp = (SeqAlignPtr)(sap->segs);
1069 while (sap_tmp != NULL)
1070 {
1071 sap_tmp = sap_tmp->next;
1072 i++;
1073 }
1074 amaip->saps = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
1075 amaip->numsaps = i;
1076 amaip->parent = sap;
1077 sap_tmp = (SeqAlignPtr)(sap->segs);
1078 for (i=0; i<amaip->numsaps; i++)
1079 {
1080 amaip->saps[i] = sap_tmp;
1081 saip = (SAIndexPtr)(sap_tmp->saip);
1082 saip->parent = sap;
1083 sap_tmp = sap_tmp->next;
1084 }
1085 sap->saip = (Pointer)amaip;
1086 return TRUE;
1087 }
1088
1089 NLM_EXTERN SeqAlignPtr AlnMgrGetParent(SeqAlignPtr sap)
1090 {
1091 SAIndexPtr saip;
1092
1093 if (sap->saip->indextype == INDEX_PARENT)
1094 return sap;
1095 saip = (SAIndexPtr)sap->saip;
1096 return saip->parent;
1097 }
1098
1099
1100 /***************************************************************************
1101 *
1102 * AlnMgrUnpackSeqAlign rearranges any seqalign (except alignments with
1103 * more than two levels of nested discontinuous alignments) to a simple
1104 * discontinuous alignment or a linked list of alignments.
1105 *
1106 ***************************************************************************/
1107 NLM_EXTERN Boolean AlnMgrUnpackSeqAlign(SeqAlignPtr sap)
1108 {
1109 SeqAlignPtr sap_new;
1110 SeqAlignPtr sap_next;
1111 SeqAlignPtr sap_segs;
1112 SeqAlignPtr sap_segs_head;
1113 SeqAlignPtr sap_segs_prev;
1114
1115 if (sap == NULL)
1116 return FALSE;
1117 if (sap->segtype == SAS_DISC)
1118 {
1119 sap_segs_head = (SeqAlignPtr)(sap->segs);
1120 if (sap_segs_head->segtype == SAS_DISC)
1121 {
1122 sap_segs_prev = (SeqAlignPtr)(sap_segs_head->segs);
1123 sap_segs_head->segs = NULL;
1124 sap_next = sap_segs_head->next;
1125 sap_segs_head->next = NULL;
1126 SeqAlignFree(sap_segs_head);
1127 sap_segs_head = sap_segs_prev;
1128 sap->segs = (Pointer)(sap_segs_head);
1129 while (sap_segs_prev->next)
1130 {
1131 sap_segs_prev = sap_segs_prev->next;
1132 if (sap_segs_prev->segtype == SAS_DISC)
1133 return FALSE;
1134 }
1135 sap_segs_prev->next = sap_next;
1136 sap_segs = sap_next;
1137 } else
1138 sap_segs = sap_segs_head->next;
1139 while (sap_segs)
1140 {
1141 if (sap_segs->segtype == SAS_DISC)
1142 {
1143 sap_next = sap_segs->next;
1144 sap_segs->next = NULL;
1145 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1146 sap_segs->segs = NULL;
1147 SeqAlignFree(sap_segs);
1148 while (sap_segs_prev->next)
1149 {
1150 sap_segs_prev = sap_segs_prev->next;
1151 if (sap_segs_prev->segtype == SAS_DISC)
1152 return FALSE;
1153 }
1154 sap_segs_prev->next = sap_next;
1155 sap_segs = sap_next;
1156 } else
1157 sap_segs = sap_segs->next;
1158 }
1159 } else
1160 {
1161 sap_new = SeqAlignNew();
1162 sap_new->type = SAT_GLOBAL;
1163 sap_new->segtype = sap->segtype;
1164 sap_new->dim = sap->dim;
1165 sap_new->segs = sap->segs;
1166 sap_new->master = sap->master;
1167 sap_new->bounds = sap->bounds;
1168 sap_new->next = sap->next;
1169 sap_new->score = sap->score;
1170 sap->next = NULL;
1171 sap->segtype = SAS_DISC;
1172 sap->type = 0;
1173 sap->dim = 0;
1174 sap->master = NULL;
1175 sap->bounds = NULL;
1176 sap->score = NULL;
1177 sap->segs = (Pointer)sap_new;
1178 sap_segs_prev = sap_new;
1179 sap_segs = sap_new->next;
1180 while (sap_segs)
1181 {
1182 if (sap_segs->segtype == SAS_DISC)
1183 {
1184 sap_next = sap_segs->next;
1185 sap_segs->next = NULL;
1186 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1187 sap_segs->segs = NULL;
1188 SeqAlignFree(sap_segs);
1189 while (sap_segs_prev->next)
1190 {
1191 sap_segs_prev = sap_segs_prev->next;
1192 if (sap_segs_prev->segtype == SAS_DISC)
1193 return FALSE;
1194 }
1195 sap_segs_prev->next = sap_next;
1196 sap_segs = sap_next;
1197 } else
1198 sap_segs = sap_segs->next;
1199 }
1200 }
1201 return TRUE;
1202 }
1203
1204 /***************************************************************************
1205 *
1206 * AlnMgrRearrangeUnpacked transforms all child seqaligns into dense-seg
1207 * types, requiring some rearrangement for dense-diag sets. This function
1208 * presumes that AlnMgrUnpackSeqAlign has already been called on the
1209 * alignment.
1210 *
1211 ***************************************************************************/
1212 NLM_EXTERN Boolean AlnMgrRearrangeUnpacked(SeqAlignPtr sap)
1213 {
1214 DenseDiagPtr ddp;
1215 DenseDiagPtr ddp_prev;
1216 DenseSegPtr dsp;
1217 Int4 i;
1218 SeqAlignPtr salp;
1219 SeqAlignPtr salp_tmp;
1220 SeqAlignPtr sap_head;
1221 SeqAlignPtr sap_new;
1222 SeqAlignPtr sap_prev;
1223 StdSegPtr ssp;
1224 StdSegPtr ssp_next;
1225
1226 if (sap == NULL || sap->segtype != SAS_DISC)
1227 return FALSE;
1228 salp = (SeqAlignPtr)(sap->segs);
1229 sap_head = sap_prev = NULL;
1230 while (salp)
1231 {
1232 if (salp->segtype < 1)
1233 {
1234 return FALSE;
1235 } else if (salp->segtype == SAS_DENDIAG)
1236 {
1237 ddp = (DenseDiagPtr)salp->segs;
1238 while (ddp)
1239 {
1240 sap_new = SeqAlignNew();
1241 sap_new->type = SAT_GLOBAL;
1242 sap_new->segtype = SAS_DENSEG;
1243 sap_new->dim = ddp->dim;
1244 dsp = DenseSegNew();
1245 dsp->dim = sap_new->dim;
1246 dsp->numseg = 1;
1247 dsp->starts = ddp->starts;
1248 ddp->starts = NULL;
1249 dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
1250 dsp->lens[0] = ddp->len;
1251 ddp->len = 0;
1252 dsp->scores = ddp->scores;
1253 ddp->scores = NULL;
1254 dsp->strands = ddp->strands;
1255 ddp->strands = NULL;
1256 if (dsp->strands == NULL)
1257 {
1258 dsp->strands = (Uint1Ptr)MemNew(dsp->dim * sizeof(Uint1));
1259 for (i=0; i<dsp->dim; i++)
1260 {
1261 dsp->strands[i] = Seq_strand_plus;
1262 }
1263 }
1264 dsp->ids = SeqIdDupList(ddp->id);
1265 sap_new->segs = (Pointer)dsp;
1266 if (dsp->scores)
1267 sap_new->score = ScoreDup(dsp->scores);
1268 if (!sap_head)
1269 {
1270 sap_head = sap_prev = sap_new;
1271 } else
1272 {
1273 sap_prev->next = sap_new;
1274 sap_prev = sap_new;
1275 }
1276 ddp_prev = ddp;
1277 ddp = ddp->next;
1278 DenseDiagFree(ddp_prev);
1279 }
1280 salp_tmp = salp->next;
1281 sap_prev->next = salp_tmp;
1282 salp->next = NULL;
1283 salp->segs = NULL;
1284 SeqAlignFree(salp);
1285 salp = salp_tmp;
1286 } else if (salp->segtype == SAS_DENSEG)
1287 {
1288 if (!sap_head)
1289 sap_head = sap_prev = salp;
1290 else
1291 {
1292 sap_prev->next = salp;
1293 sap_prev = salp;
1294 }
1295 dsp = (DenseSegPtr)salp->segs;
1296 if (dsp->strands == NULL)
1297 {
1298 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)* sizeof(Uint1));
1299 for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1300 {
1301 dsp->strands[i] = Seq_strand_plus;
1302 }
1303 }
1304 salp = salp->next;
1305 } else if (salp->segtype == SAS_STD)
1306 {
1307 sap_prev = sap_head = NULL;
1308 ssp = (StdSegPtr)salp->segs;
1309 while (ssp)
1310 {
1311 sap_new = SeqAlignNew();
1312 if (sap_head)
1313 {
1314 sap_prev->next = sap_new;
1315 sap_prev = sap_new;
1316 } else
1317 {
1318 sap_head = sap_prev = sap_new;
1319 }
1320 sap_new->segtype = SAS_STD;
1321 sap_new->type = SAT_GLOBAL;
1322 sap_new->segs = (Pointer)ssp;
1323 ssp_next = ssp->next;
1324 ssp->next = NULL;
1325 ssp = ssp_next;
1326 }
1327 salp_tmp = salp->next;
1328 salp->next = NULL;
1329 salp->segs = NULL;
1330 SeqAlignFree(salp);
1331 salp = (Pointer)sap_head;
1332 sap_prev->next = salp_tmp;
1333 salp = salp_tmp;
1334 }
1335 }
1336 sap->segs = (Pointer)sap_head;
1337 return TRUE;
1338 }
1339
1340
1341 /***************************************************************************
1342 *
1343 * AlnMgrAnythingToSeg takes any SeqAlign and does an in-place transformation
1344 * to the parent-child structure. Each dense-seg, dense-diag and std-seg
1345 * is put into its own seqalign, and the child seqaligns are linked
1346 * together in no particular order and put in the sap->segs field of the
1347 * new parent (which takes over the pointer passed in). The parent
1348 * has segtype SAS_DISC, and each child has segtype SAS_DENSEG or SAS_STD.
1349 * Each child, then, is a continuous, nonoverlapping alignment and therefore
1350 * may be indexed.
1351 *
1352 ***************************************************************************/
1353 NLM_EXTERN Boolean AlnMgrAnythingToSeg (SeqAlignPtr sap)
1354 {
1355 DenseDiagPtr ddp;
1356 DenseDiagPtr ddp_prev;
1357 DenseSegPtr dsp;
1358 Int4 i;
1359 Boolean retval;
1360 SeqAlignPtr salp;
1361 SeqAlignPtr salp_tmp;
1362 SeqAlignPtr sap_head;
1363 SeqAlignPtr sap_new;
1364 SeqAlignPtr sap_prev;
1365 StdSegPtr ssp;
1366 StdSegPtr ssp_next;
1367
1368 retval = FALSE;
1369 if (!sap)
1370 return retval;
1371 sap_new = SeqAlignNew();
1372 sap_new->type = SAT_GLOBAL;
1373 sap_new->segtype = sap->segtype;
1374 sap_new->dim = sap->dim;
1375 sap_new->segs = sap->segs;
1376 sap_new->master = sap->master;
1377 sap_new->bounds = sap->bounds;
1378 sap_new->next = sap->next;
1379 sap_new->score = sap->score;
1380 sap->next = NULL;
1381 sap->segtype = SAS_DISC;
1382 sap->type = 0;
1383 sap->dim = 0;
1384 sap->master = NULL;
1385 sap->bounds = NULL;
1386 sap->score = NULL;
1387 salp = sap_new;
1388 sap_head = sap_prev = NULL;
1389 while (salp)
1390 {
1391 if (salp->segtype < 1)
1392 {
1393 return retval;
1394 } else if (salp->segtype == SAS_DENDIAG)
1395 {
1396 ddp = (DenseDiagPtr)salp->segs;
1397 while (ddp)
1398 {
1399 sap_new = SeqAlignNew();
1400 sap_new->type = SAT_GLOBAL;
1401 sap_new->segtype = SAS_DENSEG;
1402 sap_new->dim = ddp->dim;
1403 dsp = DenseSegNew();
1404 dsp->dim = sap_new->dim;
1405 dsp->numseg = 1;
1406 dsp->starts = ddp->starts;
1407 ddp->starts = NULL;
1408 dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
1409 dsp->lens[0] = ddp->len;
1410 ddp->len = 0;
1411 dsp->scores = ddp->scores;
1412 ddp->scores = NULL;
1413 dsp->strands = ddp->strands;
1414 ddp->strands = NULL;
1415 if (dsp->strands == NULL)
1416 {
1417 dsp->strands = (Uint1Ptr)MemNew(dsp->dim * sizeof(Uint1));
1418 for (i=0; i<dsp->dim; i++)
1419 {
1420 dsp->strands[i] = Seq_strand_plus;
1421 }
1422 }
1423 dsp->ids = SeqIdDupList(ddp->id);
1424 sap_new->segs = (Pointer)dsp;
1425 if (dsp->scores)
1426 sap_new->score = ScoreDup(dsp->scores);
1427 if (!sap_head)
1428 {
1429 sap_head = sap_prev = sap_new;
1430 } else
1431 {
1432 sap_prev->next = sap_new;
1433 sap_prev = sap_new;
1434 }
1435 ddp_prev = ddp;
1436 ddp = ddp->next;
1437 DenseDiagFree(ddp_prev);
1438 }
1439 salp_tmp = salp->next;
1440 sap_prev->next = salp_tmp;
1441 salp = salp_tmp;
1442 retval = TRUE;
1443 } else if (salp->segtype == SAS_DENSEG)
1444 {
1445 if (!sap_head)
1446 sap_head = sap_prev = salp;
1447 else
1448 {
1449 sap_prev->next = salp;
1450 sap_prev = salp;
1451 }
1452 dsp = (DenseSegPtr)salp->segs;
1453 if (dsp->strands == NULL)
1454 {
1455 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)* sizeof(Uint1));
1456 for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1457 {
1458 dsp->strands[i] = Seq_strand_plus;
1459 }
1460 }
1461 salp = salp->next;
1462 retval = TRUE;
1463 } else if (salp->segtype == SAS_STD)
1464 {
1465 sap_prev = sap_head = NULL;
1466 ssp = (StdSegPtr)salp->segs;
1467 while (ssp)
1468 {
1469 sap_new = SeqAlignNew();
1470 if (sap_head)
1471 {
1472 sap_prev->next = sap_new;
1473 sap_prev = sap_new;
1474 } else
1475 {
1476 sap_head = sap_prev = sap_new;
1477 }
1478 sap_new->segtype = SAS_STD;
1479 sap_new->type = SAT_GLOBAL;
1480 sap_new->segs = (Pointer)ssp;
1481 ssp_next = ssp->next;
1482 ssp->next = NULL;
1483 ssp = ssp_next;
1484 }
1485 salp_tmp = salp->next;
1486 salp = (Pointer)sap_head;
1487 sap_prev->next = salp_tmp;
1488 salp = salp_tmp;
1489 }
1490 }
1491 sap->segs = (Pointer)sap_head;
1492 return retval;
1493 }
1494
1495
1496 /***********************************************************************
1497 *
1498 * AlnMgrIndexLinkedSegs and AlnMgrIndexParentSA create and fill in the
1499 * SASeqIndex and AMAlignIndex structures on the children and the parent,
1500 * respectively. IndexLinkedSegs is called on the sap->segs field of
1501 * the parent, so that the pointer of the first child in the list
1502 * gets passed in. AlnMgrIndexParentSA is called on the parent, and
1503 * the children must already be indexed (the function does check) in order
1504 * for it to work. AlnMgrIndexParentSA calls AlnMgrPropagateUpSeqIdPtrs
1505 * to create a list of all SeqIdPtrs present in all the children (each
1506 * is only listed once, in the order that its AMAlignDat structure occurs
1507 * in).
1508 *
1509 ***********************************************************************/
1510 NLM_EXTERN Boolean AlnMgrIndexLinkedSegs (SeqAlignPtr sap)
1511 { /* all the Uint2's may have to be changed to Uint4's */
1512 Int4 currseq;
1513 DenseSegPtr dsp;
1514 Uint2 i;
1515 Uint4 qlen;
1516 Boolean retval;
1517 SAIndexPtr saip;
1518 SASeqDatPtr ssdp;
1519
1520 retval = FALSE;
1521 while (sap)
1522 {
1523 if (sap->segtype == SAS_DENSEG)
1524 {
1525 dsp = (DenseSegPtr)sap->segs;
1526 saip = SAIndexNew();
1527 saip->aligncoords = (Uint4Ptr)MemNew((dsp->numseg+1)*sizeof(Uint4));
1528 qlen = 0;
1529 saip->ssdp = (SASeqDatPtr PNTR)MemNew((dsp->dim+1)*sizeof(SASeqDatPtr));
1530 saip->numseqs = dsp->dim+1;
1531 for (i = 0; i<(dsp->dim); i++)
1532 {
1533 ssdp = SASeqDatNew();
1534 saip->ssdp[i] = ssdp;
1535 }
1536 for (i = 0; i<(dsp->numseg); i++)
1537 {
1538 saip->aligncoords[i] = qlen;
1539 qlen += dsp->lens[i];
1540 for (currseq = 0; currseq<(dsp->dim); currseq++)
1541 {
1542 if ((dsp->starts[dsp->dim*i+currseq]) != -1)
1543 {
1544 saip->ssdp[currseq]->numsect++;
1545 }
1546 }
1547 }
1548 for (currseq = 0; currseq<(dsp->dim); currseq++)
1549 {
1550 saip->ssdp[currseq]->sect = (Uint2Ptr)MemNew((saip->ssdp[currseq]->numsect)*sizeof(Uint2));
1551 saip->ssdp[currseq]->unsect = (Uint2Ptr)MemNew((dsp->numseg - saip->ssdp[currseq]->numsect)*sizeof(Uint2));
1552 saip->ssdp[currseq]->numsect = 0;
1553 }
1554 for (i=0; i<(dsp->numseg); i++)
1555 {
1556 for (currseq=0; currseq<(dsp->dim); currseq++)
1557 {
1558 if ((dsp->starts[dsp->dim*i+currseq]) != -1)
1559 {
1560 saip->ssdp[currseq]->sect[saip->ssdp[currseq]->numsect] = i;
1561 saip->ssdp[currseq]->numsect++;
1562 } else
1563 {
1564 saip->ssdp[currseq]->unsect[saip->ssdp[currseq]->numunsect]=i;
1565 saip->ssdp[currseq]->numunsect++;
1566 }
1567 }
1568 }
1569 saip->indextype = INDEX_SEGS;
1570 sap->saip = (SeqAlignIndexPtr)saip;
1571 }
1572 sap = sap->next;
1573 retval = TRUE;
1574 }
1575 return retval;
1576 }
1577
1578 NLM_EXTERN Boolean AlnMgrIndexParentSA(SeqAlignPtr sap)
1579 {
1580 AMAlignDatPtr amadp;
1581 AMAlignIndexPtr amaip;
1582 Int4 count;
1583 Boolean done;
1584 Int4 i;
1585 Int4 notfound;
1586 Int4 numsap;
1587 Boolean retval;
1588 SeqAlignPtr salp;
1589 SeqIdPtr sip;
1590
1591 retval = FALSE;
1592 if (!sap)
1593 return retval;
1594 if (sap->segtype != SAS_DISC)
1595 return retval;
1596 if (((SeqAlignPtr)(sap->segs))->saip == NULL)
1597 {
1598 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
1599 return retval;
1600 }
1601 amaip = (AMAlignIndexPtr)sap->saip;
1602 if (amaip)
1603 sap->saip = (Pointer)AMAlignIndexFree(amaip);
1604 sap->saip = NULL;
1605 amaip = AMAlignIndexNew();
1606 count = 0;
1607 amaip->indextype = INDEX_PARENT;
1608 amaip->ids = AlnMgrPropagateUpSeqIdPtrs(sap, &count);
1609 sip = amaip->ids;
1610 amaip->numbsqs = count;
1611 amaip->amadp = (AMAlignDatPtr PNTR)MemNew((count+1)*sizeof(AMAlignDatPtr));
1612 for (count = 0; count < amaip->numbsqs; count++)
1613 {
1614 amadp = AMAlignDatNew();
1615 amaip->amadp[count] = amadp;
1616 numsap = 0;
1617 /*amadp->saps = AlnMgrSortSeqAligns((SeqAlignPtr)sap->segs, AlnMgrCompareIncreasingBySeqIdPtr, sip, &numsap);*/
1618 amadp->saps = AlnMgrSortbyID((SeqAlignPtr)sap->segs, sip, &numsap);
1619 done = FALSE;
1620 notfound = 0;
1621 for (i=0; i<numsap && !done; i++)
1622 {
1623 if (AlnMgrGetNForSip(amadp->saps[i], sip) < 0)
1624 {
1625 notfound++;
1626 } else
1627 {
1628 done = TRUE;
1629 }
1630 }
1631 amadp->numsaps = numsap - notfound;
1632 for (i=0; i<(numsap - notfound); i++)
1633 {
1634 amadp->saps[i] = amadp->saps[i+notfound];
1635 }
1636 for (i=(numsap - notfound); i<numsap; i++)
1637 {
1638 amadp->saps[i] = NULL;
1639 }
1640 amadp->sip = SeqIdDup(sip);
1641 sip = sip->next;
1642 }
1643 i = 0;
1644 salp = (SeqAlignPtr)sap->segs;
1645 while (salp)
1646 {
1647 i++;
1648 salp = salp->next;
1649 }
1650 amaip->numsaps = i;
1651 amaip->parent = sap;
1652 sap->saip = (Pointer)amaip;
1653 retval = TRUE;
1654 return retval;
1655 }
1656
1657 NLM_EXTERN SeqIdPtr AlnMgrPropagateUpSeqIdPtrs(SeqAlignPtr sap, Int4Ptr num)
1658 {
1659 Int4 count;
1660 DenseSegPtr dsp;
1661 Boolean found;
1662 SeqAlignPtr salp;
1663 SeqIdPtr sip_head;
1664 SeqIdPtr sip_list;
1665 SeqIdPtr sip_tmp;
1666 SeqIdPtr sip_tmp2;
1667
1668 if (!sap)
1669 return NULL;
1670 if (sap->segtype == SAS_DISC)
1671 salp = (SeqAlignPtr)(sap->segs);
1672 else
1673 salp = sap;
1674 count = 0;
1675 sip_list = sip_head = NULL;
1676 while (salp)
1677 {
1678 dsp = (DenseSegPtr)salp->segs;
1679 sip_tmp = dsp->ids;
1680 if (!sip_list)
1681 {
1682 sip_head = sip_list = SeqIdDup(sip_tmp);
1683 sip_tmp = sip_tmp->next;
1684 count++;
1685 }
1686 while (sip_tmp)
1687 {
1688 sip_tmp2 = sip_head;
1689 found = FALSE;
1690 while (sip_tmp2 && !found)
1691 {
1692 if (SeqIdComp(sip_tmp, sip_tmp2) == SIC_YES)
1693 found = TRUE;
1694 sip_tmp2 = sip_tmp2->next;
1695 }
1696 if (!found)
1697 {
1698 sip_list->next = SeqIdDup(sip_tmp);
1699 sip_list = sip_list->next;
1700 sip_list->next = NULL;
1701 count++;
1702 }
1703 sip_tmp = sip_tmp->next;
1704 }
1705 salp = salp->next;
1706 }
1707 if (num)
1708 *num = count;
1709 return sip_head;
1710 }
1711
1712 NLM_EXTERN SeqIdPtr AlnMgrPropagateSeqIdsBySapList(AMAlignIndexPtr amaip)
1713 {
1714 DenseSegPtr dsp;
1715 Int4 i;
1716 Int4 j;
1717 SAIndexPtr saip;
1718 SeqAlignPtr salp;
1719 SeqIdPtr sip;
1720 SeqIdPtr sip_head;
1721 SeqIdPtr sip_tmp;
1722 SeqIdPtr sip_tmp2;
1723
1724 if (amaip == NULL)
1725 return NULL;
1726 if (amaip->saps == NULL)
1727 return NULL;
1728 sip_head = NULL;
1729 for (i=0; i<(amaip->alnsaps); i++)
1730 {
1731 j=1;
1732 salp = amaip->saps[i];
1733 saip = (SAIndexPtr)salp->saip;
1734 dsp = (DenseSegPtr)(salp->segs);
1735 sip_tmp = dsp->ids;
1736 while (j<saip->master)
1737 {
1738 sip_tmp = sip_tmp->next;
1739 j++;
1740 }
1741 if (sip_head == NULL)
1742 sip_head = sip = SeqIdDup(sip_tmp);
1743 sip_tmp = dsp->ids;
1744 j=0;
1745 while (sip_tmp)
1746 {
1747 j++;
1748 if (j!=saip->master)
1749 {
1750 sip_tmp2 = SeqIdDup(sip_tmp);
1751 sip->next = sip_tmp2;
1752 sip = sip->next;
1753 }
1754 sip_tmp = sip_tmp->next;
1755 }
1756 }
1757 return sip_head;
1758 }
1759
1760 NLM_EXTERN SeqIdPtr AlnMgrPropagateSeqIdsByRow(AMAlignIndexPtr amaip)
1761 {
1762 Int4 i;
1763 SeqIdPtr sip;
1764 SeqIdPtr sip_head;
1765 SeqIdPtr sip_tmp;
1766
1767 if (amaip->rowsource == NULL)
1768 return NULL;
1769 sip_head = sip = SeqIdDup(amaip->rowsource[0]->id);
1770 for (i=1; i<amaip->numrows; i++)
1771 {
1772 sip_tmp = SeqIdDup(amaip->rowsource[i]->id);
1773 sip->next = sip_tmp;
1774 sip = sip->next;
1775 }
1776 return sip_head;
1777 }
1778
1779 /***************************************************************************
1780 *
1781 * AlnMgrRemoveInconsistentFromPairwiseSet is a greedy function to make
1782 * a consistent (nonoverlapping, linear) subset of alignments from a
1783 * set of pairwise alignments (often BLAST output, gapped or ungapped).
1784 * The input seqalign should either not be indexed or indexed using
1785 * AlnMgrIndexLite (just call it on the BLAST output). fuzz specifies
1786 * how much overlap, if any, is allowed between alignments that are kept (for
1787 * example, if fuzz = 5, any alignments that overlap by 5 or less are
1788 * considered consistent). If fuzz is less than 0, this will force spaces
1789 * between alignments (not sure why someone would want to do that, but
1790 * it is allowed).
1791 *
1792 * The "Ex" version also returns the Discarded SeqAligns separated into two
1793 * lists according to the reason for their rejection.
1794 *
1795 *
1796 ***************************************************************************/
1797 NLM_EXTERN void AlnMgrRemoveInconsistentFromPairwiseSet(SeqAlignPtr sap, Int4 fuzz) {
1798 AlnMgrRemoveInconsistentFromPairwiseSetEx(sap, fuzz,NULL,NULL,NULL);
1799 }
1800
1801
1802 NLM_EXTERN void AlnMgrRemoveInconsistentFromPairwiseSetEx(SeqAlignPtr sap, Int4 fuzz, SeqAlignPtr PNTR wrong_strand, SeqAlignPtr PNTR overlaps_m,SeqAlignPtr PNTR overlaps_s)
1803 {
1804 AMAlignInfoPtr aip_list;
1805 AMAlignIndexPtr amaip;
1806 FloatHi bit_score;
1807 Boolean conflict;
1808 FloatHi evalue;
1809 Int4 i;
1810 Int4 j;
1811 Int4 number;
1812 SAIndexPtr saip1;
1813 SAIndexPtr saip2;
1814 SeqAlignPtr salp;
1815 Int4 score;
1816 Int4 start1;
1817 Int4 start2;
1818 Int4 startm1;
1819 Int4 startm2;
1820 Int4 stop1;
1821 Int4 stop2;
1822 Int4 stopm1;
1823 Int4 stopm2;
1824 Uint1 strand;
1825 Uint1 strand_curr;
1826 Uint1 conflict_type;
1827 if(wrong_strand)
1828 *wrong_strand = NULL;
1829 if(overlaps_m)
1830 *overlaps_m=NULL;
1831 if(overlaps_s)
1832 *overlaps_s=NULL;
1833 if (sap == NULL || (sap->saip != NULL && sap->saip->indextype != INDEX_PARENT))
1834 return;
1835 if (sap->saip == NULL)
1836 {
1837 if (!AlnMgrIndexLite(sap))
1838 return;
1839 }
1840 amaip = (AMAlignIndexPtr)(sap->saip);
1841 if (amaip->numbsqs > 2)
1842 return;
1843
1844 salp = (SeqAlignPtr)(sap->segs);
1845 aip_list = (AMAlignInfoPtr)MemNew((amaip->numsaps)*sizeof(AMAlignInfo));
1846 for (i=0; i<amaip->numsaps && salp != NULL; i++)
1847 {
1848 aip_list[i].align = salp;
1849 GetScoreAndEvalue(salp, &score, &bit_score, &evalue, &number);
1850 aip_list[i].align_len = score;
1851 salp = salp->next;
1852 }
1853 HeapSort(aip_list, amaip->numsaps, sizeof(AMAlignInfo), AMCompareAlignInfoProc);
1854 saip1 = (SAIndexPtr)aip_list[0].align->saip;
1855 if (saip1 == NULL)
1856 return;
1857 strand = AlnMgrGetNthStrand(aip_list[0].align, 2);
1858 if (strand != Seq_strand_minus)
1859 strand = Seq_strand_plus;
1860 amaip->alnsaps = 0;
1861 for (i=0; i<amaip->numsaps; i++)
1862 {
1863 if ((saip1 = (SAIndexPtr)aip_list[i].align->saip) == NULL)
1864 return;
1865 AlnMgrGetNthSeqRangeInSA(aip_list[i].align, 1, &startm1, &stopm1);
1866 AlnMgrGetNthSeqRangeInSA(aip_list[i].align, 2, &start1, &stop1);
1867 strand_curr = AlnMgrGetNthStrand(aip_list[i].align, 2);
1868 if (strand_curr != Seq_strand_minus)
1869 strand_curr = Seq_strand_plus;
1870 if (strand_curr != strand) {
1871 conflict = TRUE;
1872 conflict_type = 1; /* wrong strand */
1873 } else
1874 conflict = FALSE;
1875 for (j=0; j<amaip->alnsaps && !conflict; j++)
1876 {
1877 if ((saip2 = (SAIndexPtr)(amaip->saps[j]->saip)) == NULL)
1878 return;
1879 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], 1, &startm2, &stopm2);
1880 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], 2, &start2, &stop2);
1881 if (startm1 < startm2)
1882 {
1883 if (stopm1 >= startm2 + fuzz) {
1884 conflict = TRUE;
1885 conflict_type = 2 ; /* overlap on master */
1886 } else if (strand == Seq_strand_minus)
1887 {
1888 if (start1 <= stop2 - fuzz) {
1889 conflict = TRUE;
1890 conflict_type = 3 ; /*overlap on query */
1891 }
1892 } else
1893 {
1894 if (stop1 >= start2 + fuzz) {
1895 conflict = TRUE;
1896 conflict_type = 3 ; /*overlap on query */
1897 }
1898 }
1899 } else if (startm1 > startm2)
1900 {
1901 if (startm1 <= stopm2 - fuzz) {
1902 conflict = TRUE;
1903 conflict_type = 2 ; /* overlap on master */
1904 } else if (strand == Seq_strand_minus)
1905 {
1906 if (stop1 >= start2 + fuzz) {
1907 conflict = TRUE;
1908 conflict_type = 3 ; /*overlap on query */
1909 }
1910 } else
1911 {
1912 if (stop2 >= start1 + fuzz) {
1913 conflict = TRUE;
1914 conflict_type = 3 ; /*overlap on query */
1915 }
1916 }
1917 } else if (startm1 == startm2) {
1918 conflict = TRUE;
1919 conflict_type = 2 ; /*overlap on master */
1920 }
1921 }
1922 if (!conflict)
1923 {
1924 amaip->saps[amaip->alnsaps] = aip_list[i].align;
1925 amaip->alnsaps++;
1926 } else {
1927 switch(conflict_type) {
1928 case 1:
1929 if(wrong_strand) {
1930 if(*wrong_strand) {
1931 aip_list[i].align->next = *wrong_strand;
1932 }
1933 *wrong_strand = aip_list[i].align;
1934 } else {
1935 SeqAlignFree(aip_list[i].align);
1936 }
1937 break;
1938 case 2:
1939 if(overlaps_m) {
1940 if(*overlaps_m) {
1941 aip_list[i].align->next = *overlaps_m;
1942 }
1943 *overlaps_m = aip_list[i].align;
1944 } else {
1945 SeqAlignFree(aip_list[i].align);
1946 }
1947 break;
1948 case 3:
1949 if(overlaps_s) {
1950 if(*overlaps_s) {
1951 aip_list[i].align->next = *overlaps_s;
1952 }
1953 *overlaps_s = aip_list[i].align;
1954 } else {
1955 SeqAlignFree(aip_list[i].align);
1956 }
1957 break;
1958 default:
1959 SeqAlignFree(aip_list[i].align);
1960 }
1961 }
1962 }
1963 MemFree(aip_list);
1964 AlnMgrDeleteHiddenEx(sap, FALSE,FALSE);
1965 {
1966 /* Revert the SeqAlign order, to reorder them by score */
1967 SeqAlignPtr salp_next,salp_last=NULL;
1968 if(wrong_strand && *wrong_strand) {
1969 salp_next = (*wrong_strand)->next;
1970 while(salp_next!=NULL) {
1971 (*wrong_strand)->next = salp_last;
1972 salp_last = *wrong_strand;
1973 *wrong_strand = salp_next;
1974 salp_next = (*wrong_strand)->next;
1975 }
1976 }
1977 if(overlaps_m && *overlaps_m) {
1978 salp_next = (*overlaps_m)->next;
1979 while(salp_next!=NULL) {
1980 (*overlaps_m)->next = salp_last;
1981 salp_last = *overlaps_m;
1982 *overlaps_m = salp_next;
1983 salp_next = (*overlaps_m)->next;
1984 }
1985 }
1986 if(overlaps_s && *overlaps_s) {
1987 salp_next = (*overlaps_s)->next;
1988 while(salp_next!=NULL) {
1989 (*overlaps_s)->next = salp_last;
1990 salp_last = *overlaps_s;
1991 *overlaps_s = salp_next;
1992 salp_next = (*overlaps_s)->next;
1993 }
1994 }
1995
1996 }
1997 }
1998
1999
2000 NLM_EXTERN Boolean AlnMgrMakeMultipleByScore(SeqAlignPtr sap)
2001 {
2002 return (AlnMgrMakeMultipleByScoreExEx(sap, 0,NULL,NULL,NULL));
2003 }
2004
2005 NLM_EXTERN Boolean AlnMgrMakeMultipleByScoreEx(SeqAlignPtr sap, Int4 fuzz) {
2006 return (AlnMgrMakeMultipleByScoreExEx(sap, fuzz,NULL,NULL,NULL));
2007 }
2008
2009
2010 NLM_EXTERN Boolean AlnMgrMakeMultipleByScoreExEx(SeqAlignPtr sap, Int4 fuzz,SeqAlignPtr PNTR wrong_strand, SeqAlignPtr PNTR overlaps_m,SeqAlignPtr PNTR overlaps_s)
2011 {
2012 AMAlignIndexPtr amaip;
2013 FloatHi bit_score;
2014 Boolean conflict;
2015 FloatHi evalue;
2016 Int4 i;
2017 Int4 j;
2018 Int4 n;
2019 Int4 number;
2020 SAIndexPtr saip1;
2021 SAIndexPtr saip2;
2022 SeqAlignPtr salp;
2023 AMAlignInfoPtr salp_list;
2024 SeqAlignPtr PNTR saparray;
2025 Int4 score;
2026 SeqIdPtr sip;
2027 Int4 start1;
2028 Int4 start2;
2029 Int4 startm1;
2030 Int4 startm2;
2031 Int4 stop1;
2032 Int4 stop2;
2033 Int4 stopm1;
2034 Int4 stopm2;
2035 Int4 fuzzstop;
2036 Uint1 strand;
2037 Uint1 strand_curr;
2038 AMTinyInfoPtr PNTR tiparray;
2039 Uint1 conflict_type;
2040
2041 if(wrong_strand)
2042 *wrong_strand = NULL;
2043 if(overlaps_m)
2044 *overlaps_m=NULL;
2045 if(overlaps_s)
2046 *overlaps_s=NULL;
2047
2048 if (sap == NULL)
2049 return FALSE;
2050 i = AlnMgrCheckAlignForParent(sap);
2051 if (i != AM_PARENT)
2052 return FALSE;
2053 amaip = (AMAlignIndexPtr)sap->saip;
2054 if (amaip == NULL)
2055 return FALSE;
2056 if (amaip->numbsqs > 2)
2057 return FALSE;
2058 if (sap->master == NULL)
2059 return FALSE;
2060 salp = (SeqAlignPtr)sap->segs;
2061 n = amaip->numsaps;
2062 if(n==1)
2063 return TRUE;
2064
2065 salp_list = Calloc(n, sizeof (AMAlignInfo));
2066 for (i=0; i<n; i++, salp=salp->next)
2067 {
2068 salp_list[i].align=salp;
2069 GetScoreAndEvalue(salp, &score, &bit_score, &evalue, &number);
2070 salp_list[i].align_len = score;
2071 }
2072 HeapSort (salp_list, n, sizeof (AMAlignInfo), AMCompareAlignInfoProc);
2073 saip1 = (SAIndexPtr)salp_list[0].align->saip;
2074 if (saip1 == NULL)
2075 return FALSE;
2076 strand = AlnMgrGetNthStrand(salp_list[0].align, 3-saip1->master);
2077 if (strand != Seq_strand_minus)
2078 strand = Seq_strand_plus;
2079 amaip->alnsaps = 0;
2080 for (i=0; i<n; i++)
2081 {
2082 if ((saip1 = (SAIndexPtr)salp_list[i].align->saip) == NULL)
2083 return FALSE;
2084 AlnMgrGetNthSeqRangeInSA(salp_list[i].align, saip1->master, &startm1, &stopm1);
2085 AlnMgrGetNthSeqRangeInSA(salp_list[i].align, 3-saip1->master, &start1, &stop1);
2086 strand_curr = AlnMgrGetNthStrand(salp_list[i].align, 3-saip1->master);
2087 if (strand_curr != Seq_strand_minus)
2088 strand_curr = Seq_strand_plus;
2089 if (strand_curr != strand) {
2090 conflict = TRUE;
2091 conflict_type = 1;
2092 } else
2093 conflict = FALSE;
2094 for (j=0; j<amaip->alnsaps && !conflict; j++)
2095 {
2096 if ((saip2 = (SAIndexPtr)(amaip->saps[j]->saip)) == NULL)
2097 return FALSE;
2098 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], saip2->master, &startm2, &stopm2);
2099 AlnMgrGetNthSeqRangeInSA(amaip->saps[j], 3-saip2->master, &start2, &stop2);
2100 if (startm1 < startm2)
2101 {
2102 fuzzstop = stopm1-fuzz;
2103 if (fuzzstop<startm1)
2104 fuzzstop = startm1;
2105 if (fuzzstop >= startm2) {
2106 conflict = TRUE;
2107 conflict_type = 2; /* overlap on master */
2108 } else if (strand == Seq_strand_minus)
2109 {
2110 fuzzstop = stop2-fuzz;
2111 if (fuzzstop<start2)
2112 fuzzstop = start2;
2113 if (start1 <= fuzzstop) {
2114 conflict = TRUE;
2115 conflict_type = 3; /* overlap on subject */
2116 }
2117 } else
2118 {
2119 fuzzstop = stop1-fuzz;
2120 if (fuzzstop<start1)
2121 fuzzstop = start1;
2122 if (fuzzstop >= start2) {
2123 conflict = TRUE;
2124 conflict_type = 3; /* overlap on subject */
2125 }
2126 }
2127 } else if (startm1 > startm2)
2128 {
2129 fuzzstop = stopm2-fuzz;
2130 if (fuzzstop<startm2)
2131 fuzzstop = startm2;
2132 if (startm1 <= fuzzstop) {
2133 conflict = TRUE;
2134 conflict_type = 2; /* overlap on master */
2135 } else if (strand == Seq_strand_minus)
2136 {
2137 fuzzstop = stop1-fuzz;
2138 if (fuzzstop<start1)
2139 fuzzstop = start1;
2140 if (fuzzstop >= start2) {
2141 conflict = TRUE;
2142 conflict_type = 3; /* overlap on subject */
2143 }
2144 } else
2145 {
2146 fuzzstop = stop2-fuzz;
2147 if (fuzzstop<start2)
2148 fuzzstop = start2;
2149 if (fuzzstop >= start1) {
2150 conflict = TRUE;
2151 conflict_type = 3; /* overlap on subject */
2152 }
2153 }
2154 } else if (startm1 == startm2) {
2155 conflict = TRUE;
2156 conflict_type = 2; /* overlap on master */
2157 }
2158 }
2159 if (!conflict)
2160 {
2161 amaip->saps[amaip->alnsaps] = salp_list[i].align;
2162 amaip->alnsaps++;
2163 } else {
2164 switch(conflict_type) {
2165 case 1:
2166 if(wrong_strand) {
2167 if(*wrong_strand) {
2168 (salp_list[i].align)->next = *wrong_strand;
2169 }
2170 *wrong_strand = salp_list[i].align;
2171 } else {
2172 SeqAlignFree(salp_list[i].align);
2173 }
2174 break;
2175 case 2:
2176 if(overlaps_m) {
2177 if(*overlaps_m) {
2178 (salp_list[i].align)->next = *overlaps_m;
2179 }
2180 *overlaps_m = salp_list[i].align;
2181 } else {
2182 SeqAlignFree(salp_list[i].align);
2183 }
2184 break;
2185 case 3:
2186 if(overlaps_s) {
2187 if(*overlaps_s) {
2188 (salp_list[i].align)->next = *overlaps_s;
2189 }
2190 *overlaps_s = salp_list[i].align;
2191 } else {
2192 SeqAlignFree(salp_list[i].align);
2193 }
2194 break;
2195 default:
2196 SeqAlignFree(salp_list[i].align);
2197 }
2198 }
2199 }
2200 tiparray = (AMTinyInfoPtr PNTR)MemNew((amaip->alnsaps)*sizeof(AMTinyInfoPtr));
2201 for (i=0; i<amaip->alnsaps; i++)
2202 {
2203 saip1 = (SAIndexPtr)amaip->saps[i]->saip;
2204 AlnMgrGetNthSeqRangeInSA(amaip->saps[i], saip1->master, &start1, &stop1);
2205 tiparray[i] = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
2206 tiparray[i]->start = start1;
2207 tiparray[i]->stop = stop1;
2208 tiparray[i]->numgap = saip1->master;
2209 tiparray[i]->numsap = i;
2210 }
2211 HeapSort((Pointer)tiparray, (size_t)(amaip->alnsaps), sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
2212 saparray = (SeqAlignPtr PNTR)(MemNew((amaip->alnsaps)*sizeof(SeqAlignPtr)));
2213 for (i=0; i<amaip->alnsaps; i++)
2214 {
2215 saparray[i] = amaip->saps[i];
2216 }
2217 for (i=0; i<amaip->alnsaps; i++)
2218 {
2219 amaip->saps[i] = saparray[tiparray[i]->numsap];
2220 tiparray[i]->numsap = i;
2221 }
2222 MemFree(saparray);
2223 amaip->numseg = amaip->alnsaps;
2224 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
2225 amaip->lens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
2226 amaip->rowsource = (RowSourcePtr PNTR)MemNew(2*sizeof(RowSourcePtr));
2227 amaip->rowsource[0] = (RowSourcePtr)MemNew(sizeof(RowSource));
2228 amaip->rowsource[0]->id = SeqIdDup(sap->master);
2229 amaip->rowsource[0]->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2230 amaip->rowsource[0]->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2231 amaip->rowsource[1] = (RowSourcePtr)MemNew(sizeof(RowSource));
2232 sip = AlnMgrGetNthSeqIdPtr(amaip->saps[0], 3-((SAIndexPtr)amaip->saps[0]->saip)->master);
2233 amaip->rowsource[1]->id = sip;
2234 amaip->rowsource[1]->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2235 amaip->rowsource[1]->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps+1)*sizeof(Uint4));
2236 for (i=0; i<amaip->alnsaps; i++)
2237 {
2238 amaip->rowsource[0]->which_saps[i] = amaip->rowsource[1]->which_saps[i] = tiparray[i]->numsap + 1;
2239 amaip->rowsource[0]->num_in_sap[i] = tiparray[i]->numgap;
2240 amaip->rowsource[1]->num_in_sap[i] = 3-tiparray[i]->numgap;
2241 amaip->lens[i] = AlnMgrGetAlnLength(amaip->saps[tiparray[i]->numsap], FALSE);
2242 if (i>0)
2243 amaip->aligncoords[i] = amaip->aligncoords[i-1] + amaip->lens[i-1];
2244 else
2245 amaip->aligncoords[i] = 0;
2246 }
2247 amaip->rowsource[0]->numsaps = amaip->rowsource[1]->numsaps = amaip->alnsaps;
2248 amaip->master = 1;
2249 amaip->numrows = 2;
2250 for (i=0; i<amaip->alnsaps; i++)
2251 {
2252 MemFree(tiparray[i]);
2253 }
2254 MemFree(tiparray);
2255 sap->segs = (Pointer)(amaip->saps[0]);
2256 for (i=1; i<amaip->alnsaps; i++)
2257 {
2258 amaip->saps[i-1]->next = amaip->saps[i];
2259 }
2260 amaip->saps[amaip->alnsaps-1]->next = NULL;
2261 /* amaip->numsaps = amaip->alnsaps; */
2262 MemFree(salp_list);
2263 sap->type = SAT_MASTERSLAVE;
2264 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
2265 AlnMgrDeleteHiddenEx(sap, FALSE,FALSE);
2266 return TRUE;
2267 }
2268
2269 NLM_EXTERN SeqAlignPtr AlnMgrDupTopNByScore(SeqAlignPtr sap, Int4 n)
2270 {
2271 AMAlignIndexPtr amaip;
2272 FloatHi bit_score;
2273 FloatHi evalue;
2274 Int4 i;
2275 Int4 num;
2276 Int4 number;
2277 SeqAlignPtr salp;
2278 SeqAlignPtr salp_head;
2279 AMAlignInfoPtr salp_list;
2280 SeqAlignPtr salp_prev;
2281 Int4 score;
2282
2283 if (sap == NULL)
2284 return FALSE;
2285 i = AlnMgrCheckAlignForParent(sap);
2286 if (i != AM_PARENT)
2287 return FALSE;
2288 amaip = (AMAlignIndexPtr)sap->saip;
2289 if (amaip == NULL)
2290 return FALSE;
2291 salp = (SeqAlignPtr)sap->segs;
2292 num = amaip->numsaps;
2293 if (n > num || n < 0)
2294 n = num;
2295 salp_list = Calloc(num, sizeof (AMAlignInfo));
2296 for (i=0; i<num; i++, salp=salp->next)
2297 {
2298 salp_list[i].align=salp;
2299 GetScoreAndEvalue(salp, &score, &bit_score, &evalue, &number);
2300 salp_list[i].align_len = score;
2301 }
2302 HeapSort (salp_list, num, sizeof (AMAlignInfo), AMCompareAlignInfoProc);
2303 salp_head = salp_prev = SeqAlignDup(salp_list[0].align);
2304 for (i=1; i<n; i++)
2305 {
2306 salp_prev->next = SeqAlignDup(salp_list[i].align);
2307 salp_prev = salp_prev->next;
2308 }
2309 return salp_head;
2310 }
2311
2312 static int LIBCALLBACK AMCompareAlignInfoProc(VoidPtr ptr1, VoidPtr ptr2)
2313 {
2314 AMAlignInfoPtr aip_1;
2315 AMAlignInfoPtr aip_2;
2316 if (ptr1 != NULL && ptr2 != NULL)
2317 {
2318 aip_1 = (AMAlignInfoPtr) ptr1;
2319 aip_2 = (AMAlignInfoPtr) ptr2;
2320 if(aip_1->align_len > aip_2->align_len)
2321 return -1;
2322 else if(aip_1->align_len < aip_2->align_len)
2323 return 1;
2324 else
2325 return 0;
2326 }
2327 return 0;
2328 }
2329
2330 /***************************************************************************
2331 *
2332 * AlnMgrSortAlnSetByNthRowPos sorts a set of alignments so that they
2333 * are in (increasing) order along the specified row (to make sense, this
2334 * set of alignments should all have the same rows).
2335 *
2336 ***************************************************************************/
2337 NLM_EXTERN void AlnMgrSortAlnSetByNthRowPos(SeqAlignPtr sap, Int4 row)
2338 {
2339 AMAlignIndexPtr amaip;
2340 AMBittyPtr bit;
2341 Int4 i;
2342 SeqAlignPtr PNTR saparray;
2343
2344 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
2345 return;
2346 amaip = (AMAlignIndexPtr)(sap->saip);
2347 bit = (AMBittyPtr)MemNew((amaip->numsaps)*sizeof(AMBitty));
2348 saparray = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
2349 for (i=0; i<amaip->numsaps; i++)
2350 {
2351 bit[i].num2 = i;
2352 AlnMgrGetNthSeqRangeInSA(amaip->saps[i], row, &bit[i].num1, NULL);
2353 saparray[i] = amaip->saps[i];
2354 }
2355 HeapSort(bit, amaip->numsaps, sizeof(AMBitty), AMCompareStarts);
2356 for (i=0; i<amaip->numsaps; i++)
2357 {
2358 amaip->saps[i] = saparray[bit[i].num2];
2359 }
2360 MemFree(saparray);
2361 MemFree(bit);
2362 }
2363
2364 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2)
2365 {
2366 AMBittyPtr bit1;
2367 AMBittyPtr bit2;
2368
2369 if (ptr1 != NULL && ptr2 != NULL)
2370 {
2371 bit1 = (AMBittyPtr)ptr1;
2372 bit2 = (AMBittyPtr)ptr2;
2373 if (bit1->num1 < bit2->num1)
2374 return -1;
2375 else if (bit1->num1 > bit2->num1)
2376 return 1;
2377 else
2378 return 0;
2379 }
2380 return 0;
2381 }
2382
2383 NLM_EXTERN void am_print_seqalign_indexes(SeqAlignPtr sap)
2384 {
2385 AMAlignIndexPtr amaip;
2386 DenseSegPtr dsp;
2387 Int4 i;
2388 Int4 j;
2389 SAIndexPtr saip;
2390
2391 if (!sap)
2392 return;
2393 if (!sap->saip)
2394 return;
2395 while (sap)
2396 {
2397 if (sap->segtype == SAS_DENSEG && sap->saip)
2398 {
2399 dsp = (DenseSegPtr)sap->segs;
2400 if (sap->saip->indextype == INDEX_SEGS)
2401 saip = (SAIndexPtr)(sap->saip);
2402 printf("\naligncoords: ");
2403 for (i=0; i<(dsp->numseg); i++)
2404 {
2405 printf("%d ", saip->aligncoords[i]);
2406 }
2407 fflush(stdout);
2408 for (i=0; i<(dsp->dim); i++)
2409 {
2410 printf("\n");
2411 printf("Sequence %d:", i);
2412 for (j=0; j<(saip->ssdp[i]->numsect); j++)
2413 {
2414 printf("%d ", saip->ssdp[i]->sect[j]);
2415 }
2416 fflush(stdout);
2417 }
2418 } else if (sap->segtype == SAS_DISC && sap->saip)
2419 {
2420 if (sap->saip->indextype == INDEX_PARENT)
2421 amaip = (AMAlignIndexPtr)(sap->saip);
2422 if (sap->type == SAT_PARTIAL)
2423 printf("SAT_PARTIAL\n");
2424 else if (sap->type == SAT_MASTERSLAVE)
2425 printf("SAT_MASTERSLAVE\n");
2426 printf("Parent info:\n");
2427 printf("numbsqs = %d\n", amaip->numbsqs);
2428 printf("numsaps = %d\n", amaip->numsaps);
2429 printf("alnsaps = %d\n", amaip->alnsaps);
2430 printf("numseg = %d\n", amaip->numseg);
2431 fflush(stdout);
2432 for (i=0; i<amaip->numbsqs; i++)
2433 {
2434 printf("Sequence %d:", i);
2435 printf(" %d saps\n", amaip->amadp[i]->numsaps);
2436 fflush(stdout);
2437 }
2438 printf("Starts: ");
2439 if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE)
2440 {
2441 printf("Segmented\n");
2442 } else
2443 {
2444 for (i=0; i<(amaip->numseg*amaip->numsaps); i++)
2445 {
2446 printf("%d ", amaip->starts[i]);
2447 if (!fmod(i+1, amaip->numsaps))
2448 printf("\n");
2449 fflush(stdout);
2450 }
2451 }
2452 fflush(stdout);
2453 printf("\nTotal Length: %d \n", AlnMgrGetAlnLength(sap, TRUE));
2454 printf("Alignment Length: %d\n", AlnMgrGetAlnLength(sap, FALSE));
2455 if (amaip->lens)
2456 {
2457 printf("lens: ");
2458 for (i=0; i<amaip->numseg; i++)
2459 {
2460 printf("%i ", amaip->lens[i]);
2461 }
2462 printf("\n");
2463 fflush(stdout);
2464 printf("aligncoords: ");
2465 for (i=0; i<amaip->numseg; i++)
2466 {
2467 printf("%i ", amaip->aligncoords[i]);
2468 }
2469 printf("\n");
2470 fflush(stdout);
2471 }
2472 if (amaip->saps)
2473 {
2474 for (i=0; i<amaip->numbsqs; i++)
2475 {
2476 printf("Segments: ");
2477 for (j=0; j<(amaip->amadp[i]->numseg); j++)
2478 {
2479 printf("%d ", amaip->amadp[i]->segments[j]);
2480 }
2481 printf("\n");
2482 fflush(stdout);
2483 }
2484 }
2485 if (amaip->rowsource)
2486 {
2487 printf("Rowsource arrays:\n");
2488 for (i=0; i<(amaip->numrows); i++)
2489 {
2490 printf("row %d ", (i+1));
2491 for (j=0; j<(amaip->rowsource[i]->numsaps); j++)
2492 {
2493 printf("%d: %d ", amaip->rowsource[i]->which_saps[j], amaip->rowsource[i]->num_in_sap[j]);
2494 }
2495 printf("\n");
2496 }
2497 }
2498 am_print_seqalign_indexes((SeqAlignPtr)sap->segs);
2499 }
2500 sap = sap->next;
2501 }
2502 return;
2503 }
2504
2505 /*CHECK*/
2506 NLM_EXTERN Int4 AlnMgrCheckAlignForParent(SeqAlignPtr sap)
2507 {
2508 AMAlignIndexPtr amaip;
2509
2510 if (sap->segtype == SAS_DISC)
2511 {
2512 if (!sap->saip)
2513 {
2514 if (!AlnMgrIndexSeqAlign(sap))
2515 return -1;
2516 else
2517 return AM_PARENT;
2518 } else if (sap->saip->indextype == INDEX_PARENT)
2519 {
2520 amaip = (AMAlignIndexPtr)(sap->saip);
2521 if (amaip->mstype != AM_LITE)
2522 return AM_PARENT;
2523 else
2524 return -1;
2525 } else
2526 {
2527 return -1;
2528 }
2529 } else if (sap->segtype == SAS_DENSEG)
2530 {
2531 if (!sap->saip)
2532 {
2533 if (sap->segs == NULL)
2534 return -1;
2535 AlnMgrAnythingToSeg(sap);
2536 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)sap->segs))
2537 return -1;
2538 return AM_PARENT;
2539 } else if (sap->saip->indextype == INDEX_SEGS)
2540 {
2541 return AM_CHILD;
2542 } else
2543 {
2544 return -1;
2545 }
2546 }
2547 return -1;
2548 }
2549
2550 /* check to see if a seqalign contains a sip, return row */
2551 static Int4 AlnMgrContainsID(SeqAlignPtr sap, SeqId *sip)
2552 {
2553 DenseSegPtr dsp;
2554 SeqId *sip_tmp;
2555 Int4 count;
2556
2557 if (!sap || !sip) return FALSE;
2558 dsp = (DenseSegPtr)sap->segs;
2559 if (!dsp) return 0;
2560
2561 for(sip_tmp = dsp->ids, count = 1; sip_tmp;
2562 sip_tmp = sip_tmp->next, count++)
2563 if (SeqIdComp(sip_tmp, sip) == SIC_YES) return count;
2564 return 0;
2565 }
2566
2567 /* used to pass in values for sorting using AlnMgrCompareSortStruct() */
2568 typedef struct _AlnMgrSortStruct
2569 {
2570 SeqId *sip;
2571 SeqAlign *sap;
2572 Int4 row, start, stop;
2573 Uint2 strand;
2574 } AlnMgrSortStruct;
2575
2576 /* compare two seqaligns by position on a particular sequence. Assumes both
2577 seqaligns contains the sequence */
2578 static int LIBCALLBACK AlnMgrCompareSortStruct(void *e1, void *e2)
2579 {
2580 AlnMgrSortStruct *amss1, *amss2;
2581
2582 if (!e1||!e2) return 0;
2583 amss1 = (AlnMgrSortStruct *)e1;
2584 amss2 = (AlnMgrSortStruct *)e2;
2585 if(!amss1->sap || !amss2->sap) return 0;
2586
2587 if (amss1->strand == 0)
2588 amss1->strand = Seq_strand_plus;
2589 if (amss2->strand == 0)
2590 amss2->strand = Seq_strand_plus;
2591
2592 if ((amss1->strand == amss2->strand) && amss1->strand != Seq_strand_minus)
2593 {
2594 if (amss1->start < amss2->start)
2595 return -1;
2596 else if (amss2->start < amss1->start)
2597 return 1;
2598 else if (amss1->start == amss2->start)
2599 {
2600 if (amss1->stop < amss2->stop)
2601 return -1;
2602 else if (amss2->stop < amss1->stop)
2603 return 1;
2604 else
2605 return 0;
2606 }
2607 } else if ((amss1->strand == amss2->strand) && amss1->strand == Seq_strand_minus)
2608 {
2609 if (amss1->start > amss2->start)
2610 return -1;
2611 else if (amss2->start > amss1->start)
2612 return 1;
2613 else if (amss1->start == amss2->start)
2614 {
2615 if (amss1->stop < amss2->stop)
2616 return -1;
2617 else if (amss2->stop < amss1->stop)
2618 return 1;
2619 else
2620 return 0;
2621 }
2622 }
2623 return 0;
2624 }
2625
2626 /* same functionality as AlnMgrSortSeqAligns */
2627 NLM_EXTERN SeqAlignPtr PNTR AlnMgrSortbyID (SeqAlignPtr sap, SeqId *sip, Int4Ptr numsap)
2628 {
2629 SeqAlignPtr PNTR head;
2630 Int4 i, j;
2631 SeqAlignPtr tmp;
2632 AlnMgrSortStruct *amss;
2633 Int4 row;
2634
2635 if(!sap || !sap || !numsap) return NULL;
2636
2637 /* count number of seqaligns and init return array */
2638 for(tmp = sap; tmp; tmp=tmp->next) (*numsap)++;
2639 head = MemNew((*numsap)*sizeof(SeqAlignPtr));
2640 amss = MemNew((*numsap)*sizeof(AlnMgrSortStruct));
2641
2642 /* separate out the seqaligns that don't contain the sip */
2643 for(tmp = sap, i = j = 0; tmp; tmp = tmp->next) {
2644 row = AlnMgrContainsID(tmp, sip);
2645 if(!row) {
2646 head[i] = tmp;
2647 i++;
2648 } else {
2649 amss[j].sip = sip;
2650 amss[j].sap = tmp;
2651 amss[j].row = row;
2652 AlnMgrGetNthSeqRangeInSA(tmp, row, &(amss[j].start), &(amss[j].stop));
2653 amss[j].strand = AlnMgrGetNthStrand(tmp, row);
2654 j++;
2655 }
2656 }
2657
2658 /* qsort(amss, j, sizeof(AlnMgrSortStruct), AlnMgrCompareSortStruct); */
2659 HeapSort((Pointer)amss, (size_t)j, sizeof(AlnMgrSortStruct), AlnMgrCompareSortStruct);
2660
2661 for(i = 0; i < j; i++) head[*numsap - j + i] = amss[i].sap;
2662
2663 MemFree(amss);
2664 return head;
2665 }
2666
2667 /***********************************************************************
2668 *
2669 * AlnMgrSortSeqAligns is a variant of the ValNodeSort function, and
2670 * calls very similar heapsort functions. It can take a comparison
2671 * function that needs userdata, so more specific sorts are possible
2672 * without defining special structures for every type of sort.
2673 *
2674 ***********************************************************************/
2675 NLM_EXTERN SeqAlignPtr PNTR AlnMgrSortSeqAligns (SeqAlignPtr sap, int (LIBCALLBACK *compar)(VoidPtr, VoidPtr, VoidPtr), VoidPtr userdata, Int4Ptr numsap)
2676 {
2677 SeqAlignPtr PNTR head;
2678 Int4 i;
2679 Int4 num;
2680 SeqAlignPtr tmp;
2681
2682 if (!sap)
2683 return NULL;
2684 tmp = sap;
2685 num = 0;
2686 while (tmp)
2687 {
2688 num++;
2689 tmp = tmp->next;
2690 }
2691 head = MemNew(((size_t) num + 1)*sizeof(SeqAlignPtr));
2692 tmp = sap;
2693
2694 for (i = 0; i<num; i++)
2695 {
2696 head[i]=tmp;
2697 tmp = tmp->next;
2698 if (!tmp)
2699 break;
2700 }
2701 heapsort_with_userdata(head, (size_t)num, sizeof(SeqAlignPtr), compar, userdata);
2702 if (numsap)
2703 *numsap = num;
2704 return head;
2705 }
2706
2707 static void heapsort_with_userdata (VoidPtr b, size_t nel, size_t width, int (LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata)
2708 {
2709 register CharPtr base = (CharPtr)b;
2710 register size_t i;
2711 register char ch;
2712 register CharPtr base0=(CharPtr)base, lim, basef;
2713
2714 if (nel<2)
2715 return;
2716 lim = &base[((nel-2)/2)*width];
2717 basef = &base[(nel-1)*width];
2718 i = nel/2;
2719 for (base = &base0[(i-1)*width]; i>0; base=base-width)
2720 {
2721 heapify_with_userdata(base0, base, lim, basef, width, compar, userdata);
2722 i--;
2723 }
2724 for (base=&base0[(nel-1)*width]; base>base0; base -= width)
2725 {
2726 for (i = 0; i<width; i++)
2727 {
2728 ch = base0[i];
2729 base0[i] = base[i];
2730 base[i] = ch;
2731 }
2732 lim = base0 + ((base-base0)/2 - width);
2733 if (base> (base0+width))
2734 heapify_with_userdata(base0, base0, lim, base-width, width, compar, userdata);
2735 }
2736 return;
2737 }
2738
2739 static void heapify_with_userdata(CharPtr base0, CharPtr base, CharPtr lim, CharPtr last, size_t width, int(LIBCALLBACK *compar)PROTO((VoidPtr, VoidPtr, VoidPtr)), VoidPtr userdata)
2740 {
2741 register size_t i;
2742 register char ch;
2743 register CharPtr left_son, large_son;
2744
2745 left_son = base0 + 2*(base-base0) + width;
2746 while (base<=lim)
2747 {
2748 if (left_son == last)
2749 {
2750 large_son = left_son;
2751 } else
2752 {
2753 if((*compar)(left_son, left_son+width, userdata) >= 0)
2754 large_son = left_son;
2755 else
2756 large_son = left_son + width;
2757 }
2758 if ((*compar)(base, large_son, userdata) < 0)
2759 {
2760 for (i = 0; i<width; i++)
2761 {
2762 ch = base[i];
2763 base[i] = large_son[i];
2764 large_son[i] = ch;
2765 }
2766 base = large_son;
2767 left_son = base0 + 2*(base-base0) + width;
2768 } else
2769 {
2770 break;
2771 }
2772 }
2773 return;
2774 }
2775
2776 /*************************************************************************
2777 *
2778 * sorting comparison functions
2779 *
2780 *************************************************************************/
2781 /**********************************************************************
2782 *
2783 * AlnMgrCompareIncreasingBySeqIdPtr takes a SeqIdPtr as userdata,
2784 * and sorts the alignments in increasing order according to the
2785 * region of the bioseq indicated that is contained in the alignment.
2786 * If the bioseq is not in the alignment, the alignment will be put
2787 * first, so all alignments in which the given bioseq does not
2788 * participate occur at the beginning of the list, making it easy to
2789 * check for them and remove them.
2790 *
2791 **********************************************************************/
2792 NLM_EXTERN int LIBCALLBACK AlnMgrCompareIncreasingBySeqIdPtr (VoidPtr base, VoidPtr large_son, VoidPtr userdata)
2793 {
2794 Boolean done;
2795 DenseSegPtr dsp1;
2796 DenseSegPtr dsp2;
2797 Int4 n1;
2798 Int4 n2;
2799 SeqAlignPtr sap1;
2800 SeqAlignPtr sap2;
2801 SeqIdPtr sip;
2802 SeqIdPtr sip_tmp;
2803 Int4 start1;
2804 Int4 start2;
2805 Int4 stop1;
2806 Int4 stop2;
2807 Uint2 strand1;
2808 Uint2 strand2;
2809
2810 sap1 = *((SeqAlignPtr PNTR) base);
2811 sip = (SeqIdPtr)userdata;
2812 if (!sap1||!sip) return 0;
2813 dsp1 = (DenseSegPtr)sap1->segs;
2814 if (!dsp1) return 0;
2815 n1 = 0;
2816 done = FALSE;
2817 sip_tmp = dsp1->ids;
2818 while (sip_tmp)
2819 {
2820 n1++;
2821 if (SeqIdComp(sip_tmp, sip) == SIC_YES) {
2822 done = TRUE;
2823 break;
2824 }
2825 sip_tmp = sip_tmp->next;
2826 }
2827 if (!done) return -1;
2828 sap2 = *((SeqAlignPtr PNTR) large_son);
2829 if (!sap2) return 0;
2830 dsp2 = (DenseSegPtr)sap2->segs;
2831 if (!dsp2) return 0;
2832 n2 = 0;
2833 done = FALSE;
2834 sip_tmp = dsp2->ids;
2835 while (sip_tmp)
2836 {
2837 n2++;
2838 if (SeqIdComp(sip_tmp, sip) == SIC_YES) {
2839 done = TRUE;
2840 break;
2841 }
2842 sip_tmp = sip_tmp->next;
2843 }
2844 if (!done)
2845 return 1;
2846 AlnMgrGetNthSeqRangeInSA(sap1, n1, &start1, &stop1);
2847 AlnMgrGetNthSeqRangeInSA(sap2, n2, &start2, &stop2);
2848 strand1 = AlnMgrGetNthStrand(sap1, n1);
2849 strand2 = AlnMgrGetNthStrand(sap2, n2);
2850 if (strand1 == 0)
2851 strand1 = Seq_strand_plus;
2852 if (strand2 == 0)
2853 strand2 = Seq_strand_plus;
2854 if ((strand1 == strand2) && strand1 != Seq_strand_minus)
2855 {
2856 if (start1 < start2)
2857 return -1;
2858 else if (start2 < start1)
2859 return 1;
2860 else if (start1 == start2)
2861 {
2862 if (stop1 < stop2)
2863 return -1;
2864 else if (stop2 < stop1)
2865 return 1;
2866 else
2867 return 0;
2868 }
2869 } else if ((strand1 == strand2) && strand1 == Seq_strand_minus)
2870 {
2871 if (start1 > start2)
2872 return -1;
2873 else if (start2 > start1)
2874 return 1;
2875 else if (start1 == start2)
2876 {
2877 if (stop1 < stop2)
2878 return -1;
2879 else if (stop2 < stop1)
2880 return 1;
2881 else
2882 return 0;
2883 }
2884 }
2885 else
2886 return 0;
2887 return 0;
2888 }
2889
2890 /*********************************************************************
2891 *
2892 * AlnMgrFindFirst is crucial to the AlnMgrMakeFakeMultiple function;
2893 * it uses the sorted order of the seqaligns in each AMAlignDat
2894 * structure to guide a heapsort of all the seqaligns.
2895 *
2896 *********************************************************************/
2897 NLM_EXTERN int LIBCALLBACK AlnMgrFindFirst(VoidPtr base, VoidPtr large_son, VoidPtr userdata)
2898 {
2899 AMAlignDatPtr amadp;
2900 AMAlignIndexPtr amaip;
2901 Int4 i;
2902 SeqAlignPtr sap1;
2903 SeqAlignPtr sap2;
2904 Int4 x;
2905 Int4 y;
2906 Int4 z;
2907
2908 amaip = (AMAlignIndexPtr)userdata;
2909 if (amaip == NULL || base == NULL || large_son == NULL)
2910 return 0;
2911 sap1 = *((SeqAlignPtr PNTR) base);
2912 sap2 = *((SeqAlignPtr PNTR) large_son);
2913 if (base == large_son)
2914 return 0;
2915 x = y = -1;
2916 z = amaip->numbsqs;
2917 while (z)
2918 {
2919 amadp = amaip->amadp[(amaip->numbsqs - z)];
2920 for (i=0; i<(amadp->numsaps); i++)
2921 {
2922 if (amadp->saps[i] == sap1)
2923 x=i;
2924 else if (amadp->saps[i] == sap2)
2925 y=i;
2926 }
2927 if (x!=-1 && y!=-1)
2928 {
2929 if (x < y)
2930 return -1;
2931 else if (y < x)
2932 return 1;
2933 }
2934 z--;
2935 }
2936 return 0;
2937 }
2938
2939 NLM_EXTERN int LIBCALLBACK AlnMgrCompareTips(VoidPtr base, VoidPtr large_son)
2940 {
2941 AMTinyInfoPtr tip1;
2942 AMTinyInfoPtr tip2;
2943
2944 tip1 = *((AMTinyInfoPtr PNTR) base);
2945 tip2 = *((AMTinyInfoPtr PNTR) large_son);
2946 if (tip1 == NULL || tip2 == NULL)
2947 return 0;
2948 if (tip1->start < tip2->start)
2949 return -1;
2950 else if (tip1->start > tip2->start)
2951 return 1;
2952 else
2953 {
2954 if (tip1->which < tip2->which)
2955 return -1;
2956 else if (tip1->which > tip2->which)
2957 return 1;
2958 else if(tip1->stop>tip2->stop)
2959 return -1; /* put longer segments first */
2960 else if (tip1->stop<tip2->stop)
2961 return 1;
2962 else
2963 return 0;
2964 }
2965 }
2966
2967
2968 /************************************************************************
2969 *
2970 * AlnMgrGetNextLengthBit should be called iteratively on an alignment
2971 * to return the lengths of all the aligned and unaligned pieces in
2972 * the alignment. Don't change the value in r, just pass in a pointer
2973 * to an allocated Int4 set to 0 initially. The lengths of the unaligned
2974 * pieces are precomputed using AlnMgrGetMaxUnalignedLength; if no
2975 * precomputed values are found, this function is used to compute the
2976 * lengths on the fly.
2977 *
2978 ************************************************************************/
2979 NLM_EXTERN Boolean AlnMgrGetNextLengthBit(SeqAlignPtr sap, Int4Ptr length, Int4Ptr r)
2980 {
2981 AMAlignIndexPtr amaip;
2982 Int4 i;
2983 RowSourcePtr rsp;
2984
2985 if (sap == NULL || length == NULL || r == NULL)
2986 return FALSE;
2987 i = AlnMgrCheckAlignForParent(sap);
2988 if (i == AM_CHILD)
2989 {
2990 if (*r == 1)
2991 return FALSE;
2992 *length = AlnMgrGetAlnLength(sap, FALSE);
2993 *r = 1;
2994 return TRUE;
2995 } else if (i == AM_PARENT)
2996 {
2997 amaip = (AMAlignIndexPtr)sap->saip;
2998 if (amaip->mstype == AM_LITE)
2999 return FALSE;
3000 if (sap->type == SAT_PARTIAL)
3001 {
3002 if (*r < 0)
3003 {
3004 if ((-*r) >= amaip->numsaps)
3005 return FALSE;
3006 if (amaip->ulens == NULL)
3007 AlnMgrSetUnalignedLengths(sap);
3008 *length = -(amaip->ulens[(-*r)-1]);
3009 *r = -(*r);
3010 return TRUE;
3011 } else
3012 {
3013 if (*r >= amaip->numsaps)
3014 return FALSE;
3015 *length = AlnMgrGetAlnLength(amaip->saps[*r], FALSE);
3016 *r = -((*r)+1);
3017 return TRUE;
3018 }
3019 } else if (sap->type == SAT_MASTERSLAVE)
3020 {
3021 if (amaip->mstype == AM_MASTERSLAVE)
3022 {
3023 if (*r == 1)
3024 return FALSE;
3025 *length = amaip->aligncoords[amaip->numseg-1] + amaip->lens[amaip->numseg-1];
3026 *r = 1;
3027 return TRUE;
3028 } else if (amaip->mstype == AM_SEGMENTED_MASTERSLAVE || amaip->mstype == AM_NULL)
3029 {
3030 if (*r < 0)
3031 {
3032 if ((-*r) >= amaip->numseg)
3033 return FALSE;
3034 rsp = amaip->rowsource[amaip->master-1];
3035 if (amaip->ulens == NULL)
3036 AlnMgrSetUnalignedLengths(sap);
3037 *length = -(amaip->ulens[(-*r)-1]);
3038 *r = -(*r);
3039 return TRUE;
3040 } else
3041 {
3042 if (*r >= amaip->numseg)
3043 {
3044 if (amaip->numseg > 0)
3045 return FALSE;
3046 else
3047 {
3048 *length = -(amaip->ulens[0]);
3049 *r = -((*r)+1);
3050 return TRUE;
3051 }
3052 }
3053 rsp = amaip->rowsource[amaip->master-1];
3054 *length = AlnMgrGetAlnLength(amaip->saps[rsp->which_saps[*r]-1], FALSE);
3055 *r = -((*r)+1);
3056 return TRUE;
3057 }
3058 }
3059 }
3060 }
3061 return FALSE;
3062 }
3063
3064 NLM_EXTERN Int4 AlnMgrGetMaxUnalignedLength(SeqAlignPtr sap1, SeqAlignPtr sap2)
3065 {
3066 Int4 max;
3067 Int4 n1, n1max;
3068 Int4 start1;
3069 Int4 start2;
3070 Int4 stop1;
3071 Int4 stop2;
3072 SeqId *sip1, *sip2;
3073
3074 if (sap1 == NULL || sap2 == NULL)
3075 return 0;
3076 max = 0;
3077 n1max = AlnMgrGetNumRows(sap1);
3078 if(n1max != AlnMgrGetNumRows(sap2)) return 0;
3079 for (n1 = 1; n1 <= n1max; n1++)
3080 {
3081 sip1 = AlnMgrGetNthSeqIdPtr(sap1, n1);
3082 sip2 = AlnMgrGetNthSeqIdPtr(sap2, n1);
3083 if(SeqIdComp(sip1, sip2) != SIC_YES) {
3084 SeqIdFree(sip1);
3085 SeqIdFree(sip2);
3086 return 0;
3087 }
3088 SeqIdFree(sip1);
3089 SeqIdFree(sip2);
3090 AlnMgrGetNthSeqRangeInSA(sap1, n1, &start1, &stop1);
3091 if (n1 >= 0)
3092 {
3093 AlnMgrGetNthSeqRangeInSA(sap2, n1, &start2, &stop2);
3094 if (start2 > stop1)
3095 {
3096 if (start2 - stop1 - 1 > max)
3097 max = start2 - stop1 - 1;
3098 } else
3099 {
3100 if (start1 - stop2 - 1 > max)
3101 max = start1 - stop2 - 1;
3102 }
3103 }
3104 }
3105 return max;
3106 }
3107
3108 static void AlnMgrSetUnalignedLengths(SeqAlignPtr sap)
3109 {
3110 AMAlignIndexPtr amaip;
3111 Int4 i;
3112 Int4 l;
3113 Int4 max;
3114 Int4 n;
3115 RowSourcePtr rsp;
3116 SeqAlignPtr sap1;
3117 SeqAlignPtr sap2;
3118 Int4 start1;
3119 Int4 start2;
3120 Int4 stop1;
3121 Int4 stop2;
3122
3123 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
3124 return;
3125 amaip = (AMAlignIndexPtr)(sap->saip);
3126 if (amaip->ulens != NULL)
3127 MemFree(amaip->ulens);
3128 amaip->ulens = (Int4Ptr)MemNew((amaip->numsaps)*sizeof(Int4));
3129 for (i=1; i<amaip->numseg; i++)
3130 {
3131 max = 0;
3132 for (n=0; n<amaip->numrows; n++)
3133 {
3134 rsp = amaip->rowsource[n];
3135 if (rsp != NULL)
3136 {
3137 if (rsp->which_saps[i-1] != 0 && rsp->which_saps[i] != 0 && rsp->which_saps[i-1] <= amaip->numsaps && rsp->which_saps[i] <= amaip->numsaps)
3138 {
3139 sap1 = amaip->saps[rsp->which_saps[i-1]-1];
3140 sap2 = amaip->saps[rsp->which_saps[i]-1];
3141 AlnMgrGetNthSeqRangeInSA(sap1, rsp->num_in_sap[i-1], &start1, &stop1);
3142 AlnMgrGetNthSeqRangeInSA(sap2, rsp->num_in_sap[i], &start2, &stop2);
3143 if (rsp->strand != Seq_strand_minus)
3144 l = start2 - stop1 - 1;
3145 else
3146 l = start1 - stop2 - 1;
3147 if (l > max)
3148 max = l;
3149 }
3150 }
3151 }
3152 amaip->ulens[i-1] = max;
3153 }
3154 return;
3155 }
3156
3157 /*************************************************************************
3158 *
3159 * AlnMgrGetNextAlnBit takes an AlnMsgPtr, with (at the minimum) the
3160 * which_bsq field filled in to indicate which bioseq should be returned.
3161 * The function returns the segments of the bioseq which span the region
3162 * of the alignment indicated, and can return them according to either
3163 * alignment coordinates (if which_master is NULL) or a master coordinate
3164 * system (need to fill in the SeqIdPtr of the master). The function
3165 * returns TRUE if there are more segments of the bioseq to retrieve,
3166 * and FALSE if not. It uses the two binary search functions to quickly
3167 * retrieve the required data from the indexes. (NEXT)
3168 *
3169 *************************************************************************/
3170 NLM_EXTERN Boolean AlnMgrGetNextAlnBit (SeqAlignPtr sap, AlnMsgPtr amp)
3171 {
3172 AMAlignIndexPtr amaip;
3173 DenseSegPtr dsp;
3174 Int4 endoffset;
3175 Boolean found;
3176 Int4 i;
3177 Int4 len;
3178 Int4 offset;
3179 Boolean retval;
3180 Int4 rf_tmp;
3181 SAIndexPtr saip;
3182 SASeqDatPtr ssdp;
3183 Int4 start_b;
3184 Uint4 start_m;
3185 Uint4 start_tmp;
3186 Uint4 stop_m;
3187 Uint4 stop_tmp;
3188
3189 retval = FALSE;
3190 if (!sap)
3191 return retval;
3192 if (!amp)
3193 return retval;
3194 i = AlnMgrCheckAlignForParent(sap);
3195 if (i == AM_CHILD)
3196 {
3197 saip = (SAIndexPtr)sap->saip;
3198 dsp = (DenseSegPtr)sap->segs;
3199 if (!dsp)
3200 return retval;
3201 if (!amp->which_master)
3202 {
3203 if (amp->place == 1)
3204 return retval;
3205 len = AlnMgrGetAlnLength(sap, FALSE);
3206 if (amp->to_m > len-1) /* len-1 was len : HS 7/24/00 */
3207 return retval;
3208 if (amp->to_m < 0)
3209 amp->to_m = len - 1;
3210 if (amp->row_num == -1)
3211 {
3212 if (!amp->which_bsq)
3213 return retval;
3214 amp->row_num = AlnMgrGetNForSip(sap, amp->which_bsq) - 1;
3215 if (amp->row_num == -1)
3216 return retval;
3217 }
3218 if (amp->prev != -2)
3219 {
3220 start_m = amp->prev;
3221 } else
3222 {
3223 start_m = binary_search_on_uint4_list(saip->aligncoords, amp->from_m, dsp->numseg);
3224 amp->real_from = amp->from_m;
3225 }
3226 stop_m = binary_search_on_uint4_list(saip->aligncoords, amp->to_m, dsp->numseg);
3227 ssdp = saip->ssdp[amp->row_num-1];
3228 offset = amp->real_from - saip->aligncoords[start_m];
3229 start_b = binary_search_on_uint2_list(ssdp->sect, start_m, ssdp->numsect);
3230
3231 /* bug fix -- Dave & Lewis 7/20/00 */
3232 if (start_b == -1) {
3233 amp->strand = Seq_strand_plus;
3234 }
3235 /* end of fix */
3236 else {
3237 if (dsp->strands != NULL && dsp->strands[start_b*(dsp->dim)+amp->row_num-1] == Seq_strand_minus)
3238 amp->strand = Seq_strand_minus;
3239 else
3240 amp->strand = Seq_strand_plus;
3241 }
3242 if ((stop_m - start_m) > 0)
3243 {
3244 retval = TRUE;
3245 if (start_b >= 0)
3246 {
3247 if (amp->strand != Seq_strand_minus)
3248 {
3249 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + offset;
3250 amp->to_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + dsp->lens[start_b] - 1;
3251 } else
3252 {
3253 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1];
3254 amp->to_b = amp->from_b + dsp->lens[start_b] - 1 - offset;
3255 }
3256 amp->gap = 0;
3257 } else
3258 {
3259 amp->from_b = amp->real_from;
3260 amp->to_b = saip->aligncoords[start_m + 1] - 1;
3261 amp->gap = 1;
3262 }
3263 amp->real_from = saip->aligncoords[start_m + 1];
3264 amp->prev = start_m + 1;
3265 } else
3266 {
3267 amp->place = 1;
3268 endoffset = amp->to_m - saip->aligncoords[start_m];
3269 if (start_b >= 0)
3270 {
3271 if (amp->strand != Seq_strand_minus)
3272 {
3273 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + offset;
3274 amp->to_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + endoffset;
3275 } else
3276 {
3277 amp->from_b = dsp->starts[start_b*(dsp->dim)+amp->row_num-1] + dsp->lens[start_b] - endoffset - 1;
3278 amp->to_b = amp->from_b + amp->to_m - amp->real_from;
3279 }
3280 amp->gap = 0;
3281 } else
3282 {
3283 amp->from_b = amp->real_from;
3284 amp->to_b = amp->to_m;
3285 amp->gap = 1;
3286 }
3287 amp->real_from = 0;
3288 amp->prev = -2;
3289 retval = TRUE;
3290 }
3291 }
3292 } else if (i == AM_PARENT)
3293 {
3294 amaip = (AMAlignIndexPtr)(sap->saip);
3295 if (amaip && (!amaip->saps || amaip->mstype == AM_LITE || amaip->mstype == AM_NEATINDEX || amaip->mstype == AM_NULL))
3296 return retval;
3297 if (amp->place == 1)
3298 return retval;
3299 if (!amp->which_bsq && amp->row_num==-1)
3300 return retval;
3301 if (sap->type == SAT_PARTIAL && amp->which_master == NULL)
3302 {
3303 len = AlnMgrGetAlnLength(sap, FALSE);
3304 if (amp->to_m < 0)
3305 amp->to_m = len-1;
3306 if (amp->to_m > len-1)
3307 return FALSE;
3308 if (amp->prev_sap != -2)
3309 {
3310 start_m = amp->prev_sap;
3311 amp->len_left = amp->to_m - amp->real_from + 1;
3312 } else
3313 {
3314 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->alnsaps);
3315 amp->real_from = amp->from_m;
3316 amp->prev_sap = start_m;
3317 amp->len_left = amp->to_m - amp->from_m + 1;
3318 }
3319 stop_m = binary_search_on_uint4_list(amaip->aligncoords, amp->to_m, amaip->alnsaps);
3320 offset = amp->real_from - amaip->aligncoords[start_m];
3321 if (amp->len_left > (amaip->lens[start_m]-offset))
3322 {
3323 endoffset = amaip->lens[start_m] - offset;
3324 } else
3325 {
3326 endoffset = amp->len_left;
3327 }
3328 stop_tmp = amp->to_m;
3329 start_tmp = amp->from_m;
3330 if ((stop_m - start_m) == 0)
3331 {
3332 amp->from_m = offset + amaip->starts[start_m];
3333 amp->to_m = amp->from_m + endoffset - 1;
3334 /* amp->prev = -2; HS 7/24/00 removed */
3335 rf_tmp = amp->real_from;
3336 AlnMgrGetNextAlnBit((amaip->saps[start_m]), amp);
3337 /* HS 7/24/00 amp->len_left = amp->len_left - (amp->to_b - amp->from_b + 1); */
3338 amp->len_left = amp->len_left - (amp->to_m - amp->from_m + 1);
3339 amp->to_m = stop_tmp;
3340 amp->from_m = start_tmp;
3341 if (amp->len_left == 0)
3342 {
3343 amp->real_from = amp->to_m + 1;
3344 amp->prev_sap = -2;
3345 amp->place = 1;
3346 amp->send_space = 1;
3347 } else
3348 {
3349 amp->real_from = rf_tmp + (amp->to_b - amp->from_b + 1);
3350 amp->place = 0;
3351 }
3352 retval = TRUE;
3353 } else
3354 {
3355 retval = TRUE;
3356 amp->from_m = offset + amaip->starts[start_m];
3357 amp->to_m = amp->from_m + endoffset - 1;
3358 AlnMgrGetNextAlnBit((amaip->saps[start_m]), amp);
3359 amp->len_left = amp->len_left - (amp->to_m - amp->from_m + 1);
3360 amp->to_m = stop_tmp;
3361 amp->real_from = amp->to_m - amp->len_left + 1;
3362 amp->from_m = start_tmp;
3363 if (amp->place == 1)
3364 {
3365 amp->prev_sap += 1;
3366 amp->send_space = TRUE;
3367 if (amp->len_left > 0)
3368 amp->place = 0;
3369 }
3370 /* bug fix -- Dave 7/21/00 */
3371 /* if (amp->len_left == 0 || amp->real_from >= amp->to_m) */
3372 if (amp->len_left == 0 || amp->real_from > amp->to_m)
3373 {
3374 amp->place = 1;
3375 retval = FALSE;
3376 amp->prev_sap = -2;
3377 }
3378 }
3379 } else if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_MASTERSLAVE && amp->which_master == NULL)
3380 {
3381 int j;
3382 if (amp->place == 1)
3383 return retval;
3384 len = AlnMgrGetAlnLength(sap, FALSE);
3385 if (amp->to_m > len-1)
3386 return retval;
3387 if (amp->to_m < 0)
3388 amp->to_m = len-1;
3389 if (amp->row_num == -1)
3390 {
3391 if(!amp->which_bsq)
3392 return retval;
3393 else
3394 {
3395 amp->row_num = AlnMgrGetNForSip(sap,amp->which_bsq);
3396 if(amp->row_num == -1)
3397 return retval;
3398 }
3399 }
3400 if (amp->row_num == amaip->master)
3401 {
3402 amp->strand = Seq_strand_plus;
3403 if (amp->prev != -2)
3404 {
3405 amp->prev += 1;
3406 start_m = amp->prev;
3407 } else
3408 {
3409 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->numseg);
3410 amp->real_from = amp->from_m;
3411 amp->prev = start_m;
3412 }
3413 offset = amp->real_from - amaip->aligncoords[start_m];
3414 endoffset = amaip->lens[start_m] - offset - (amp->to_m - amp->real_from + 1);
3415 if (endoffset < 0 && (start_m+1) < amaip->numseg)
3416 retval = TRUE;
3417 else
3418 {
3419 retval = TRUE;
3420 amp->place = 1;
3421 amp->row_num = -1;
3422 amp->prev = -2;
3423 }
3424 j=0;
3425 found = FALSE;
3426 while (!found && j < amaip->numsaps)
3427 {
3428 if (amaip->starts[j+(amaip->numsaps)*start_m] >= 0)
3429 found = TRUE;
3430 else if (amaip->starts[j+(amaip->numsaps)*start_m] == -3)
3431 found = TRUE;
3432 else
3433 j++;
3434 }
3435 if (amaip->starts[j+(amaip->numsaps)*start_m] == -3)
3436 {
3437 found = FALSE;
3438 j=0;
3439 while (!found && j < amaip->numsaps)
3440 {
3441 if (amaip->starts[j+(amaip->numsaps)*(start_m-1)] >= 0)
3442 found = TRUE;
3443 else
3444 j++;
3445 }
3446 amp->from_b = AlnMgrMapToBsqCoords(amaip->saps[j], amaip->starts[j+(amaip->numsaps)*(start_m-1)]+offset, NULL, NULL) + amaip->lens[start_m - 1];
3447 if (endoffset >=0)
3448 amp->to_b = amp->from_b + amaip->lens[start_m] - 1 - offset - endoffset;
3449 else
3450 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3451 amp->gap = 0;
3452 amp->real_from += amp->to_b - amp->from_b + 1;
3453 } else if (j<amaip->numsaps)
3454 {
3455 amp->from_b = AlnMgrMapToBsqCoords(amaip->saps[j], amaip->starts[j+(amaip->numsaps)*start_m]+offset, NULL, NULL);
3456 if (amp->from_b >= 0)
3457 {
3458 if (endoffset >=0)
3459 amp->to_b = amp->from_b + amaip->lens[start_m] - 1 - offset - endoffset;
3460 else
3461 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3462 amp->gap = 0;
3463 } else
3464 {
3465 amp->from_b = amp->real_from;
3466 amp->gap = 1;
3467 if (endoffset >= 0)
3468 amp->to_b = amp->from_b + amaip->lens[start_m] - 1 - offset - endoffset;
3469 else
3470 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3471 }
3472 amp->real_from += amp->to_b - amp->from_b + 1;
3473 }
3474 } else
3475 {
3476 if (amp->prev != -2)
3477 {
3478 amp->prev += 1;
3479 start_m = amp->prev;
3480 } else
3481 {
3482 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->numseg);
3483 amp->real_from = amp->from_m;
3484 amp->prev = start_m;
3485 }
3486 if (amp->prev_sap == -2)
3487 amp->prev_sap=amaip->rowsource[amp->row_num-1]->which_saps[0];
3488 j = amp->prev_sap-1;
3489 amp->strand = AlnMgrGetNthStrand(amaip->saps[j], amaip->rowsource[amp->row_num-1]->num_in_sap[0]);
3490 offset = amp->real_from - amaip->aligncoords[start_m];
3491 endoffset = amaip->lens[start_m] - offset - (amp->to_m - amp->real_from + 1);
3492 if (endoffset <= 0 && (start_m + 1) < amaip->numseg)
3493 retval = TRUE;
3494 else
3495 {
3496 retval = TRUE;
3497 amp->place = 1;
3498 amp->prev = amp->prev_sap = -2;
3499 }
3500 if (amaip->starts[j+(amaip->numsaps)*start_m] < 0)
3501 amp->from_b = -1;
3502 else
3503 amp->from_b = AlnMgrMapRowCoords(amaip->saps[j], amaip->starts[j+(amaip->numsaps)*start_m]+offset, amaip->rowsource[amp->row_num-1]->num_in_sap[0], NULL);
3504 if (amp->from_b >= 0)
3505 {
3506 if (amp->strand != Seq_strand_minus)
3507 {
3508 if (endoffset >=0)
3509 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1 -
3510 endoffset;
3511 else
3512 amp->to_b = amp->from_b + amaip->lens[start_m] - offset -1;
3513 } else
3514 {
3515 amp->to_b = amp->from_b;
3516 if (endoffset >= 0)
3517 amp->from_b = amp->to_b - amaip->lens[start_m] + 1 + endoffset
3518 ;
3519 else
3520 amp->from_b = amp->to_b - amaip->lens[start_m] + 1;
3521 }
3522 amp->gap = 0;
3523 } else
3524 {
3525 amp->from_b = amp->real_from;
3526 amp->gap = 1;
3527 if (endoffset >= 0)
3528 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1 - endoffset;
3529 else
3530 amp->to_b = amp->from_b + amaip->lens[start_m] - offset - 1;
3531 }
3532 amp->real_from += amp->to_b - amp->from_b + 1;
3533 if (amp->real_from > amp->to_m)
3534 {
3535 retval = TRUE;
3536 amp->place = 1;
3537 amp->row_num = -1;
3538 amp->prev = -2;
3539 }
3540 }
3541 } else if (sap->type == SAT_MASTERSLAVE && amp->which_master)
3542 {
3543 } else if (sap->type == SAT_DIAGS && amp->which_master)
3544 {
3545 } else if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE && amp->which_master == NULL)
3546 {
3547 if (amp->place == 1)
3548 return retval;
3549 len = AlnMgrGetAlnLength(sap, FALSE);
3550 if (amp->to_m > len-1)
3551 return retval;
3552 if (amp->to_m < 0)
3553 amp->to_m = len-1;
3554 if (amp->row_num == -1)
3555 {
3556 if(!amp->which_bsq)
3557 return retval;
3558 else
3559 {
3560 amp->row_num = AlnMgrGetNForSip(sap,amp->which_bsq);
3561 if(amp->row_num == -1)
3562 return retval;
3563 }
3564 }
3565 if (amp->prev == -2)
3566 {
3567 start_m = binary_search_on_uint4_list(amaip->aligncoords, amp->from_m, amaip->numseg);
3568 amp->real_from = amp->from_m;
3569 amp->prev = start_m;
3570 } else
3571 start_m = amp->prev;
3572 offset = amp->real_from - amaip->aligncoords[start_m];
3573 if (offset < 0)
3574 offset = 0;
3575 if (amaip->rowsource[amp->row_num-1]->which_saps[start_m] == 0)
3576 {
3577 len = amaip->lens[start_m];
3578 amp->from_b = amaip->aligncoords[start_m]+offset;
3579 amp->gap = 2;
3580 amp->strand = Seq_strand_unknown;
3581 } else
3582 {
3583 len = 0;
3584 amp->strand = AlnMgrGetNthStrand(amaip->saps[amaip->rowsource[amp->row_num-1]->which_saps[start_m]-1], amaip->rowsource[amp->row_num-1]->num_in_sap[start_m]);
3585 amp->from_b = AlnMgrMapSegmentCoords(amaip->saps[amaip->rowsource[amp->row_num-1]->which_saps[start_m]-1], offset, amaip->rowsource[amp->row_num-1]->num_in_sap[start_m], NULL, &len);
3586 if (amp->from_b == -1)
3587 {
3588 amp->from_b = amaip->aligncoords[start_m]+offset;
3589 amp->gap = 1;
3590 } else
3591 amp->gap = 0;
3592 }
3593 endoffset = amp->to_m - (amaip->aligncoords[start_m] + len + offset -1);
3594 if (endoffset <= 0)
3595 {
3596 amp->send_space = 1;
3597 amp->place = 1;
3598 } else if (len >= amaip->lens[start_m] - offset)
3599 {
3600 amp->prev++;
3601 amp->send_space = 1;
3602 } else
3603 amp->send_space = 0;
3604 if (endoffset <= 0)
3605 amp->to_b = amp->from_b + len + endoffset -1;
3606 else
3607 {
3608 amp->to_b = amp->from_b + len - 1;
3609 amp->real_from = amp->real_from + amp->to_b - amp->from_b + 1;
3610 }
3611 if (amp->strand == Seq_strand_minus && amp->gap == 0)
3612 {
3613 offset = amp->to_b - amp->from_b;
3614 amp->to_b = amp->from_b;
3615 amp->from_b = amp->to_b - offset;
3616 }
3617 retval = TRUE;
3618 }
3619 }
3620 return retval;
3621 }
3622
3623 NLM_EXTERN Uint4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
3624 {
3625 Uint4 L;
3626 Uint4 mid;
3627 Uint4 R;
3628
3629 if (list == NULL || listlen == 0)
3630 return 0;
3631 L = 0;
3632 R = listlen - 1;
3633 while (L < R)
3634 {
3635 mid = (L+R)/2;
3636 if (list[mid + 1] <= pos)
3637 {
3638 L = mid + 1;
3639 } else
3640 {
3641 R = mid;
3642 }
3643 }
3644 return R;
3645 }
3646
3647 NLM_EXTERN Int4 binary_search_on_uint2_list(Uint2Ptr list, Uint2 ele, Uint2 listlen)
3648 {
3649 Uint2 L;
3650 Uint2 mid;
3651 Uint2 R;
3652
3653 if (list == NULL || listlen == 0)
3654 return -1;
3655 L = 0;
3656 R = listlen - 1;
3657 while (L < R)
3658 {
3659 mid = (L+R)/2;
3660 if (ele <= list[mid])
3661 {
3662 R = mid;
3663 } else
3664 {
3665 L = mid+1;
3666 }
3667 }
3668 if (ele == list[R])
3669 return list[R];
3670 else
3671 return -1;
3672 }
3673
3674 NLM_EXTERN Int4 binary_search_by_chunk(Int4Ptr list, Int4 ele, Int4 listlen, Int4 chunksize, Int4 offset)
3675 {
3676 Int4 L;
3677 Int4 mid;
3678 Int4 R;
3679
3680 if (list == NULL || listlen == 0)
3681 return -1;
3682 L = 0;
3683 R = (listlen/chunksize) - 1;
3684 while (L < R)
3685 {
3686 mid = (L+R)/2;
3687 if (ele <= list[(mid)*chunksize + offset] && list[(mid)*chunksize + offset] >= 0)
3688 {
3689 R = mid;
3690 } else
3691 {
3692 L = mid + 1;
3693 }
3694 }
3695 return R;
3696 }
3697
3698 NLM_EXTERN Int4 binary_search_segment_array(SASeqDatPtr ssdp, Int4 pos, Int4 numseq, Int4 offset, DenseSegPtr dsp)
3699 {
3700 Int4 L;
3701 Int4 mid;
3702 Int4 R;
3703
3704 if (ssdp == NULL || numseq == 0)
3705 return -1;
3706 L = 0;
3707 R = ssdp->numsect - 1;
3708 while (L < R)
3709 {
3710 mid = (L+R)/2;
3711 if (pos <= (dsp->starts[(ssdp->sect[mid])*numseq + offset]))
3712 {
3713 R = mid;
3714 } else
3715 {
3716 L = mid+1;
3717 }
3718 }
3719 return (ssdp->sect[R]);
3720 }
3721
3722 /************************************************************************
3723 *
3724 * These are several utility functions which get needed data from the
3725 * indexes. "N" refers to row number.
3726 *
3727 ************************************************************************/
3728 NLM_EXTERN Int4 AlnMgrGetAlnLength(SeqAlignPtr sap, Boolean fill_in)
3729 {
3730 AMAlignIndexPtr amaip;
3731 DenseSegPtr dsp;
3732 Int4 i;
3733 Int4 length;
3734 SAIndexPtr saip;
3735
3736 if (!sap)
3737 return 0;
3738 i = AlnMgrCheckAlignForParent(sap);
3739 if (i<0)
3740 {
3741 return 0;
3742 } else if (i == AM_CHILD)
3743 {
3744 dsp = (DenseSegPtr)sap->segs;
3745 if (!dsp)
3746 return 0;
3747 saip = (SAIndexPtr)sap->saip;
3748 return ((saip->aligncoords[dsp->numseg-1])+dsp->lens[dsp->numseg-1]);
3749 } else if (i == AM_PARENT)
3750 {
3751 amaip = (AMAlignIndexPtr)sap->saip;
3752 if (!amaip || amaip->mstype == AM_LITE)
3753 return 0;
3754 if (!amaip->saps)
3755 {
3756 if (!AlnMgrMakeFakeMultiple(sap))
3757 return 0;
3758 }
3759 if (fill_in)
3760 {
3761 if (sap->type == SAT_MASTERSLAVE && (amaip->mstype == AM_MASTERSLAVE || amaip->mstype == AM_NULL))
3762 return (amaip->lens[(amaip->numseg)-1] + amaip->aligncoords[amaip->numseg-1]);
3763 else if (sap->type == SAT_PARTIAL)
3764 {
3765 length = 0;
3766 for (i=0; i<(amaip->numsaps-1); i++)
3767 {
3768 length += AlnMgrGetMaxUnalignedLength(amaip->saps[i], amaip->saps[i+1]);
3769 }
3770 return (length + amaip->lens[(amaip->numseg)-1] + amaip->aligncoords[amaip->numseg-1]);
3771 }
3772 } else
3773 {
3774 if (amaip->mstype == AM_NULL)
3775 return 0;
3776 return (amaip->lens[(amaip->numseg)-1] + amaip->aligncoords[amaip->numseg-1]);
3777 }
3778 }
3779 return 0;
3780 }
3781
3782 NLM_EXTERN Int4 AlnMgrGetNumSeqs(SeqAlignPtr sap)
3783 {
3784 AMAlignIndexPtr amaip;
3785 DenseSegPtr dsp;
3786 Int4 i;
3787
3788 if (!sap)
3789 return 0;
3790 i = AlnMgrCheckAlignForParent(sap);
3791 if (i<0)
3792 return 0;
3793 if (i == AM_CHILD)
3794 {
3795 dsp = (DenseSegPtr)sap->segs;
3796 if (!dsp)
3797 return 0;
3798 return (dsp->dim);
3799 } else if (i == AM_PARENT)
3800 {
3801 amaip = (AMAlignIndexPtr)(sap->saip);
3802 if (!amaip || amaip->mstype == AM_LITE)
3803 return 0;
3804 return (amaip->numbsqs);
3805 }
3806 return 0;
3807 }
3808
3809 NLM_EXTERN SeqIdPtr AlnMgrGetUniqueSeqs(SeqAlignPtr sap, Int4Ptr n)
3810 {
3811 AMAlignIndexPtr amaip;
3812 Int4 c;
3813 DenseSegPtr dsp;
3814 Boolean found;
3815 Int4 i;
3816 Int4 m;
3817 SeqIdPtr sip;
3818 SeqIdPtr sip_head;
3819 SeqIdPtr sip_prev;
3820 SeqIdPtr sip_tmp;
3821
3822 if (sap == NULL)
3823 return 0;
3824 i = AlnMgrCheckAlignForParent(sap);
3825 if (i<0)
3826 return 0;
3827 sip_head = sip_prev = NULL;
3828 if (i == AM_CHILD)
3829 {
3830 dsp = (DenseSegPtr)sap->segs;
3831 if (dsp == NULL)
3832 return 0;
3833 sip = dsp->ids;
3834 m = 0;
3835 while (sip)
3836 {
3837 sip_tmp = sip_head;
3838 found = FALSE;
3839 while (!found && sip_tmp != NULL)
3840 {
3841 if (SAM_OrderSeqID(sip, sip_tmp) == 0)
3842 found = TRUE;
3843 sip_tmp = sip_tmp->next;
3844 }
3845 if (!found)
3846 {
3847 m++;
3848 if (sip_head == NULL)
3849 {
3850 sip_head = sip_prev = SeqIdDup(sip);
3851 } else
3852 {
3853 sip_prev->next = SeqIdDup(sip);
3854 sip_prev = sip_prev->next;
3855 }
3856 }
3857 sip = sip->next;
3858 }
3859 if (n)
3860 *n = m;
3861 return sip_head;
3862 } else if (i == AM_PARENT)
3863 {
3864 amaip = (AMAlignIndexPtr)(sap->saip);
3865 if (amaip == NULL || amaip->mstype == AM_LITE)
3866 return 0;
3867 m = 0;
3868 if (amaip->alnsaps == 1)
3869 {
3870 return (AlnMgrGetUniqueSeqs((SeqAlignPtr)sap->segs, n));
3871 }
3872 for (c=0; c<amaip->numrows; c++)
3873 {
3874 sip = amaip->rowsource[c]->id;
3875 sip_tmp = sip_head;
3876 found = FALSE;
3877 while (!found && sip_tmp != NULL)
3878 {
3879 if (SAM_OrderSeqID(sip, sip_tmp) == 0)
3880 found = TRUE;
3881 sip_tmp = sip_tmp->next;
3882 }
3883 if (!found)
3884 {
3885 m++;
3886 if (sip_head == NULL)
3887 {
3888 sip_head = sip_prev = SeqIdDup(sip);
3889 } else
3890 {
3891 sip_prev->next = SeqIdDup(sip);
3892 sip_prev = sip_prev->next;
3893 }
3894 }
3895 }
3896 if (n)
3897 *n = m;
3898 return sip_head;
3899 }
3900 return NULL;
3901 }
3902
3903 NLM_EXTERN SeqIdPtr AlnMgrGetNthSeqIdPtr(SeqAlignPtr sap, Int4 n)
3904 {
3905 AMAlignIndexPtr amaip;
3906 Int4 count;
3907 DenseSegPtr dsp;
3908 Int4 i;
3909 SeqIdPtr sip;
3910
3911 if (!sap)
3912 return NULL;
3913 i = AlnMgrCheckAlignForParent(sap);
3914 if (i<0)
3915 return NULL;
3916 else if (i == AM_CHILD)
3917 {
3918 dsp = (DenseSegPtr)sap->segs;
3919 if (!dsp)
3920 return NULL;
3921 sip = dsp->ids;
3922 count = 0;
3923 while (sip)
3924 {
3925 count++;
3926 if (count == n)
3927 return (SeqIdDup(sip));
3928 sip = sip->next;
3929 }
3930 } else if (i == AM_PARENT)
3931 {
3932 amaip = (AMAlignIndexPtr)(sap->saip);
3933 if (amaip->mstype == AM_LITE)
3934 return NULL;
3935 if (n <= amaip->numrows)
3936 {
3937 return (SeqIdDup(amaip->rowsource[n-1]->id));
3938 } else
3939 {
3940 if (AlnMgrIsSAPNULL(sap))
3941 {
3942 sip = amaip->ids;
3943 count = 1;
3944 while (sip != NULL && count < n)
3945 {
3946 sip = sip->next;
3947 count++;
3948 }
3949 return SeqIdDup(sip);
3950 } else
3951 return NULL;
3952 }
3953 }
3954 return NULL;
3955 }
3956
3957 /* (RANGE) */
3958 NLM_EXTERN void AlnMgrGetNthSeqRangeInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
3959 {
3960 AMAlignDatPtr amadp;
3961 AMAlignIndexPtr amaip;
3962 Uint2 beg;
3963 Int4 bsq;
3964 DenseSegPtr dsp;
3965 Uint2 end;
3966 Int4 i;
3967 Int4 j;
3968 RowSourcePtr rsp;
3969 SAIndexPtr saip;
3970 SeqIdPtr sip;
3971 Uint2 strand;
3972 Int4 tmp_beg;
3973 Int4 tmp_end;
3974 Int4 tmp_start;
3975 Int4 tmp_stop;
3976
3977 if (!sap)
3978 return;
3979 i = AlnMgrCheckAlignForParent(sap);
3980 if (i < 0)
3981 {
3982 return;
3983 } else if (i == AM_CHILD)
3984 {
3985 if (n<1)
3986 return;
3987 saip = (SAIndexPtr)(sap->saip);
3988 if(!saip)
3989 return;
3990 bsq = n-1;
3991 dsp = (DenseSegPtr)sap->segs;
3992 if (n > dsp->dim)
3993 return;
3994 if (!dsp)
3995 return;
3996 strand = dsp->strands[bsq];
3997 if (strand != Seq_strand_minus)
3998 {
3999 if (saip->ssdp[bsq]->numsect == 0)
4000 {
4001 if (start)
4002 *start = -1;
4003 if (stop)
4004 *stop = -1;
4005 return;
4006 }
4007 beg = saip->ssdp[bsq]->sect[0];
4008 if (start)
4009 *start = dsp->starts[beg*(dsp->dim)+bsq];
4010 end = saip->ssdp[bsq]->sect[(saip->ssdp[bsq]->numsect)-1];
4011 if (stop)
4012 *stop = (dsp->starts[end*(dsp->dim)+bsq] + dsp->lens[end] - 1);
4013 return;
4014 } else
4015 {
4016 if (saip->ssdp[bsq]->numsect == 0)
4017 {
4018 if (start)
4019 *start = -1;
4020 if (stop)
4021 *stop = -1;
4022 return;
4023 }
4024 beg = saip->ssdp[bsq]->sect[(saip->ssdp[bsq]->numsect)-1];
4025 if (start)
4026 *start = dsp->starts[beg*(dsp->dim)+bsq];
4027 end = saip->ssdp[bsq]->sect[0];
4028 if (stop)
4029 *stop = (dsp->starts[end*(dsp->dim)+bsq] + dsp->lens[end] - 1);
4030 return;
4031 }
4032 } else if (i == AM_PARENT)
4033 {
4034 if (n<1)
4035 return;
4036 bsq = n-1;
4037 amaip = (AMAlignIndexPtr)(sap->saip);
4038 if (amaip->mstype == AM_LITE)
4039 return;
4040 if (amaip->numseg == 0)
4041 {
4042 if (start)
4043 *start = -1;
4044 if (stop)
4045 *stop = -1;
4046 return;
4047 }
4048 if (amaip->rowsource == NULL)
4049 {
4050 amadp = amaip->amadp[bsq];
4051 sip = amaip->ids;
4052 for (j = 0; j<bsq; j++)
4053 {
4054 sip = sip->next;
4055 if (sip == NULL)
4056 return;
4057 }
4058 for (j = 0; j<(amadp->numsaps); j++)
4059 {
4060 tmp_start = tmp_stop = 0;
4061 AlnMgrGetNthSeqRangeInSA(amadp->saps[j], AlnMgrGetNForSip(amadp->saps[j], sip), &tmp_start, &tmp_stop);
4062 if (j == 0)
4063 {
4064 tmp_beg = tmp_start;
4065 tmp_end = tmp_stop;
4066 } else
4067 {
4068 if (tmp_start < tmp_beg)
4069 tmp_beg = tmp_start;
4070 if (tmp_stop > tmp_end)
4071 tmp_end = tmp_stop;
4072 }
4073 }
4074 if (start)
4075 *start = tmp_beg;
4076 if (stop)
4077 *stop = tmp_end;
4078 return;
4079 } else
4080 {
4081 sip = amaip->ids;
4082 if (n > amaip->numrows)
4083 return;
4084 rsp = (RowSourcePtr)amaip->rowsource[n-1];
4085 for (j=0; j<(rsp->numsaps); j++)
4086 {
4087 tmp_start = tmp_stop = 0;
4088 if(rsp->which_saps[j] == 0)
4089 {
4090 if (start)
4091 *start = 0;
4092 if (stop)
4093 *stop = 0;
4094 return;
4095 }
4096 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[j]-1], rsp->num_in_sap[j], &tmp_start, &tmp_stop);
4097
4098 if (j==0)
4099 {
4100 tmp_beg = tmp_start;
4101 tmp_end = tmp_stop;
4102 } else
4103 {
4104 if (tmp_start < tmp_beg)
4105 tmp_beg = tmp_start;
4106 if (tmp_stop > tmp_end)
4107 tmp_end = tmp_stop;
4108 }
4109 }
4110 if (start)
4111 *start = tmp_beg;
4112 if (stop)
4113 *stop = tmp_end;
4114 return;
4115 }
4116 }
4117 return;
4118 }
4119
4120 NLM_EXTERN Int4 AlnMgrGetNumSegments(SeqAlignPtr sap)
4121 {
4122 AMAlignIndexPtr amaip;
4123 DenseSegPtr dsp;
4124 Int4 i;
4125
4126 if (sap == NULL)
4127 return -1;
4128 i = AlnMgrCheckAlignForParent(sap);
4129 if (i == AM_CHILD)
4130 {
4131 dsp = (DenseSegPtr)(sap->segs);
4132 return (dsp->numseg);
4133 } else if (i == AM_PARENT)
4134 {
4135 amaip = (AMAlignIndexPtr)(sap->saip);
4136 return (amaip->numseg);
4137 } else
4138 return -1;
4139 }
4140
4141 /***************************************************************************
4142 *
4143 * AlnMgrGetNumAlnBlocks returns the number of separate aligned regions
4144 * in the seqalign. A return value of -1 indicates an error; a return
4145 * value of 0 indicates a NULL alignment (only one unaligned region and
4146 * no aligned regions.
4147 *
4148 ***************************************************************************/
4149 NLM_EXTERN Int4 AlnMgrGetNumAlnBlocks(SeqAlignPtr sap)
4150 {
4151 AMAlignIndexPtr amaip;
4152
4153 if (sap == NULL || sap->saip == NULL)
4154 return -1;
4155 if (sap->saip->indextype == INDEX_SEGS)
4156 return 1;
4157 else if (sap->saip->indextype == INDEX_PARENT)
4158 {
4159 amaip = (AMAlignIndexPtr)(sap->saip);
4160 if (amaip->mstype == AM_LITE || amaip->mstype == AM_NEATINDEX)
4161 return -1;
4162 else if (amaip->mstype == AM_NULL)
4163 return 0;
4164 else if (amaip->mstype == AM_MASTERSLAVE)
4165 return 1;
4166 else if (amaip->mstype == AM_SEGMENTED_MASTERSLAVE)
4167 {
4168 if (amaip->rowsource == NULL || amaip->master < 1 || amaip->master > amaip->numrows)
4169 return -1;
4170 return (amaip->rowsource[amaip->master-1]->numsaps);
4171 } else if (sap->type == SAT_PARTIAL)
4172 return amaip->alnsaps;
4173 } else
4174 return -1;
4175 return -1;
4176 }
4177
4178 NLM_EXTERN Boolean AlnMgrGetNthBlockRange(SeqAlignPtr sap, Int4 n, Int4Ptr from, Int4Ptr to)
4179 {
4180 AMAlignIndexPtr amaip;
4181 Int4 i;
4182 Int4 len;
4183
4184 if (sap == NULL || sap->saip == NULL)
4185 return FALSE;
4186 if (sap->saip->indextype == INDEX_SEGS)
4187 {
4188 len = AlnMgrGetAlnLength(sap, FALSE);
4189 if (from)
4190 *from = 0;
4191 if (to)
4192 *to = len - 1;
4193 return TRUE;
4194 } else if (sap->saip->indextype == INDEX_PARENT)
4195 {
4196 i = AlnMgrGetNumAlnBlocks(sap);
4197 if (n > i || i < 1)
4198 return FALSE;
4199 amaip = (AMAlignIndexPtr)(sap->saip);
4200 if (i == 1)
4201 {
4202 len = AlnMgrGetAlnLength(sap, FALSE);
4203 if (from)
4204 *from = 0;
4205 if (to)
4206 *to = len - 1;
4207 return TRUE;
4208 } else
4209 return (AlnMgrGetNthSegmentRange(sap, n, from, to));
4210 } else
4211 return FALSE;
4212 }
4213
4214
4215 /***************************************************************************
4216 *
4217 * AlnMgrGetNthAlignedSegInNthRow is similar to AlnMgrGetNextAlignBit,
4218 * but it takes an extra argument -- the number (1-based) of the segment
4219 * for which you want the alignment. Fill in the AlnMsg structure as for
4220 * AlnMgrGetNextAlignBit, but leave out the from_m and to_m; and as usual,
4221 * don't modify the AlnMsg structure in between calls, and call AlnMsgReNew
4222 * for a new segment. The from_b and to_b fields will work as in
4223 * AlnMgrGetNextAlnBit -- if there is a gap, these are alignment coordinates;
4224 * otherwise they're sequence coordinates.
4225 *
4226 ***************************************************************************/
4227 NLM_EXTERN Boolean AlnMgrGetNthAlignedSegInNthRow(SeqAlignPtr sap, AlnMsgPtr amp, Int4 segnum)
4228 {
4229 Int4 n;
4230 Boolean retval;
4231
4232 if (sap == NULL || amp == NULL)
4233 return FALSE;
4234 if (amp->flag == FALSE)
4235 {
4236 n = AlnMgrGetNumSegments(sap);
4237 if (segnum > n)
4238 return FALSE;
4239 if (!AlnMgrGetNthSegmentRange(sap, segnum, &->from_m, &->to_m))
4240 return FALSE;
4241 amp->flag = TRUE;
4242 }
4243 retval = AlnMgrGetNextAlnBit(sap, amp);
4244 if (retval == FALSE)
4245 amp->flag = FALSE;
4246 return retval;
4247 }
4248
4249
4250 NLM_EXTERN Boolean AlnMgrGetNthSegmentRange(SeqAlignPtr sap, Int4 n, Int4Ptr from, Int4Ptr to)
4251 {
4252 AMAlignIndexPtr amaip;
4253 Int4 i;
4254 Boolean last;
4255 Int4 num;
4256 SAIndexPtr saip;
4257
4258 if (sap == NULL)
4259 return FALSE;
4260 i = AlnMgrCheckAlignForParent(sap);
4261 num = AlnMgrGetNumSegments(sap);
4262 if (n > num)
4263 return FALSE;
4264 last = FALSE;
4265 if (n == num)
4266 last = TRUE;
4267 if (i == AM_CHILD)
4268 {
4269 saip = (SAIndexPtr)(sap->saip);
4270 if (!last)
4271 {
4272 if (from)
4273 *from = saip->aligncoords[n-1];
4274 if (to)
4275 *to = saip->aligncoords[n] - 1;
4276 return TRUE;
4277 } else
4278 {
4279 if (from)
4280 *from = saip->aligncoords[n-1];
4281 if (to)
4282 *to = AlnMgrGetAlnLength(sap, FALSE);
4283 return TRUE;
4284 }
4285 } else if (i == AM_PARENT)
4286 {
4287 amaip = (AMAlignIndexPtr)(sap->saip);
4288 if (!last)
4289 {
4290 if (from)
4291 *from = amaip->aligncoords[n-1];
4292 if (to)
4293 *to = amaip->aligncoords[n] - 1;
4294 return TRUE;
4295 } else
4296 {
4297 if (from)
4298 *from = amaip->aligncoords[n-1];
4299 if (to)
4300 *to = amaip->aligncoords[n-1] + amaip->lens[n-1] - 1;
4301 return TRUE;
4302 }
4303 } else
4304 return FALSE;
4305 }
4306
4307
4308 /********************************************************************************
4309 *
4310 * AlnMgrGetNextNthSeqRange is called recursively to return the lengths of
4311 * all aligned and all internal unaligned regions of any row in a seqalign.
4312 * If there is an error, or if the function is called past the last block,
4313 * the function returns FALSE. Set where to point to an allocated integer
4314 * equal to 0 on the first call and don't change it during the loop. Set
4315 * the boolean unaligned to FALSE to get only the aligned regions, and TRUE to
4316 * get the aligned regions plus all internal unaligned regions. For unaligned
4317 * regions, *is_aligned will be FALSE.
4318 *
4319 ********************************************************************************/
4320 NLM_EXTERN Boolean AlnMgrGetNextNthSeqRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop, Int4Ptr where, BoolPtr is_aligned, Boolean unaligned)
4321 {
4322 if (sap == NULL || n <= 0)
4323 return FALSE;
4324 if (sap->saip == NULL)
4325 return FALSE;
4326 if (sap->saip->indextype == INDEX_PARENT && sap->type == SAT_PARTIAL)
4327 {
4328 return (am_get_nth_range_for_partial(sap, n, start, stop, where, is_aligned, unaligned));
4329 } else
4330 {
4331 if (*where == 0)
4332 {
4333 AlnMgrGetNthSeqRangeInSA(sap, n, start, stop);
4334 *where = 1;
4335 return TRUE;
4336 } else
4337 return FALSE;
4338 }
4339 }
4340
4341 static Boolean am_get_nth_range_for_partial(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop, Int4Ptr where, BoolPtr is_aligned, Boolean unaligned)
4342 {
4343 AMAlignIndexPtr amaip;
4344 RowSourcePtr rsp;
4345 Uint2 strand;
4346 Int4 tmp_start;
4347 Int4 tmp_stop;
4348 Int4 tmp_where;
4349
4350 amaip = (AMAlignIndexPtr)sap->saip;
4351 if (amaip->mstype == AM_LITE)
4352 return FALSE;
4353 rsp = amaip->rowsource[n-1];
4354 tmp_where = *where;
4355 if (tmp_where >= 0)
4356 {
4357 if (tmp_where >= rsp->numsaps)
4358 return FALSE;
4359 if (is_aligned)
4360 *is_aligned = TRUE;
4361 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[tmp_where]-1], rsp->num_in_sap[tmp_where], start, stop);
4362 if (unaligned && (sap->type == SAT_PARTIAL || (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE)))
4363 tmp_where = -(tmp_where+1);
4364 else
4365 tmp_where += 1;
4366 } else if (tmp_where < 0 && unaligned == TRUE)
4367 {
4368 if (-tmp_where >= rsp->numsaps)
4369 return FALSE;
4370 if (is_aligned)
4371 *is_aligned = FALSE;
4372 strand = AlnMgrGetNthStrand(amaip->saps[rsp->which_saps[(-tmp_where)]-1], n);
4373 tmp_start = tmp_stop = 0;
4374 if (start)
4375 {
4376 if (strand == Seq_strand_minus)
4377 {
4378 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[(-tmp_where)]-1], rsp->num_in_sap[(-tmp_where)], &tmp_start, NULL);
4379 *start = tmp_start + 1;
4380 } else
4381 {
4382 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[(-tmp_where)-1]-1], rsp->num_in_sap[(-tmp_where)-1], NULL, &tmp_start);
4383 *start = tmp_start + 1;
4384 }
4385 }
4386 if (stop)
4387 {
4388 if (strand == Seq_strand_minus)
4389 {
4390 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[(-tmp_where)-1]-1], rsp->num_in_sap[(-tmp_where)-1], NULL, &tmp_stop);
4391 *stop = tmp_stop - 1;
4392 } else
4393 {
4394 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[-tmp_where]-1], rsp->num_in_sap[-tmp_where], &tmp_stop, NULL);
4395 *stop = tmp_stop - 1;
4396 }
4397 }
4398 if (tmp_start + 1 > tmp_stop - 1)
4399 {
4400 if (start)
4401 *start = -1;
4402 if (stop)
4403 *stop = -1;
4404 }
4405 tmp_where = -tmp_where;
4406 }
4407 *where = tmp_where;
4408 return TRUE;
4409 }
4410
4411 /********************************************************************************
4412 *
4413 * AlnMgrGetNthRowTail retrieves the blocks of sequence on either end of the
4414 * alignment, by row. which_tail is LEFT_TAIL to retrieve the ends which come
4415 * before alignment coordinate 0, and RIGHT_TAIL to retrieve the other ends.
4416 * The function returns TRUE if successful, FALSE for an error.
4417 *
4418 ********************************************************************************/
4419 NLM_EXTERN Boolean AlnMgrGetNthRowTail(SeqAlignPtr sap, Int4 n, Uint1 which_tail, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
4420 {
4421 BioseqPtr bsp;
4422 SeqIdPtr sip = NULL;
4423 Int4 tmp_start;
4424 Int4 tmp_stop;
4425 Uint1 tmp_strand;
4426
4427 if (sap == NULL || n < 1 || sap->segs == NULL)
4428 return FALSE;
4429 tmp_start = tmp_stop = -1;
4430 AlnMgrGetNthSeqRangeInSA(sap, n, &tmp_start, &tmp_stop);
4431 if (tmp_start == -1 || tmp_stop == -1)
4432 return FALSE;
4433 tmp_strand = AlnMgrGetNthStrand(sap, n);
4434 if (which_tail == LEFT_TAIL)
4435 {
4436 if (tmp_strand == Seq_strand_minus)
4437 {
4438 sip = AlnMgrGetNthSeqIdPtr(sap, n);
4439 bsp = BioseqLockById(sip);
4440 if (tmp_stop == bsp->length-1 || stop == NULL)
4441 {
4442 if (start)
4443 *start = -1;
4444 if (stop)
4445 *stop = -1;
4446 } else
4447 {
4448 if (bsp == NULL)
4449 return FALSE;
4450 if (start)
4451 *start = tmp_stop-1;
4452 if (stop)
4453 *stop = bsp->length-1;
4454 }
4455 BioseqUnlock(bsp);
4456 if (strand)
4457 *strand = tmp_strand;
4458 } else
4459 {
4460 if (tmp_start >= 1)
4461 {
4462 if (start)
4463 *start = 0;
4464 if (stop)
4465 *stop = tmp_start - 1;
4466 } else
4467 {
4468 if (start)
4469 *start = -1;
4470 if (stop)
4471 *stop = -1;
4472 }
4473 if (strand)
4474 *strand = tmp_strand;
4475 }
4476 } else if (which_tail == RIGHT_TAIL)
4477 {
4478 if (tmp_strand == Seq_strand_minus)
4479 {
4480 if (tmp_start >= 1)
4481 {
4482 if (start)
4483 *start = 0;
4484 if (stop)
4485 *stop = tmp_start - 1;
4486 } else
4487 {
4488 if (start)
4489 *start = -1;
4490 if (stop)
4491 *stop = -1;
4492 }
4493 if (strand)
4494 *strand = tmp_strand;
4495 } else
4496 {
4497 sip = AlnMgrGetNthSeqIdPtr(sap, n);
4498 bsp = BioseqLockById(sip);
4499 if (bsp == NULL)
4500 return FALSE;
4501 if (bsp->length-1 == tmp_stop)
4502 {
4503 if (start)
4504 *start = -1;
4505 if (stop)
4506 *stop = -1;
4507 } else
4508 {
4509 if (start)
4510 *start = tmp_stop + 1;
4511 if (stop)
4512 *stop = bsp->length-1;
4513 }
4514 if (strand)
4515 *strand = tmp_strand;
4516 BioseqUnlock(bsp);
4517 }
4518 } else
4519 return FALSE;
4520 SeqIdFree(sip);
4521 return TRUE;
4522 }
4523
4524 NLM_EXTERN Int4 AlnMgrGetMaxTailLength (SeqAlignPtr sap, Uint1 which_tail)
4525 {
4526 Int4 i;
4527 Int4 max;
4528 Int4 n;
4529 Int4 start;
4530 Int4 stop;
4531
4532 if (sap == NULL)
4533 return -1;
4534 n = AlnMgrGetNumRows(sap);
4535 max = -1;
4536 for (i=0; i<n; i++)
4537 {
4538 if (!AlnMgrGetNthRowTail (sap, i+1, which_tail, &start, &stop, NULL))
4539 return -1;
4540 if (stop - start + 1 > max)
4541 max = stop - start + 1;
4542 }
4543 return max;
4544 }
4545
4546 NLM_EXTERN Boolean AlnMgrGetNthUnalignedForNthRow(SeqAlignPtr sap, Int4 unaligned, Int4 row, Int4Ptr start, Int4Ptr stop)
4547 {
4548 AMAlignIndexPtr amaip;
4549 Int4 beg;
4550 BioseqPtr bsp;
4551 Int4 end;
4552 Boolean go = TRUE;
4553 Int4 i;
4554 Int4 j;
4555 RowSourcePtr rsp;
4556 SeqIdPtr sip;
4557 Uint2 strand;
4558 Int4 tmp_start;
4559 Int4 tmp_stop;
4560
4561 i = AlnMgrCheckAlignForParent(sap);
4562 if (i == AM_PARENT)
4563 {
4564 amaip = (AMAlignIndexPtr)(sap->saip);
4565 if (amaip->numseg > 0 && row > amaip->numrows)
4566 return FALSE;
4567 if (sap->type == SAT_PARTIAL || (sap->type == SAT_MASTERSLAVE && (amaip->mstype == AM_SEGMENTED_MASTERSLAVE || amaip->mstype == AM_NULL)))
4568 {
4569 if (amaip->numseg == 0)
4570 {
4571 if (row > amaip->numbsqs)
4572 return FALSE;
4573 sip = amaip->ids;
4574 for (j=1; j<row; j++)
4575 {
4576 sip = sip->next;
4577 }
4578 bsp = BioseqLockById(sip);
4579 if (bsp == NULL)
4580 return FALSE;
4581 if (start != NULL)
4582 *start = 0;
4583 if (stop != NULL)
4584 *stop = bsp->length-1;
4585 BioseqUnlock(bsp);
4586 return TRUE;
4587 } else if (unaligned > amaip->numseg - 1)
4588 return FALSE;
4589 tmp_start = tmp_stop = 0;
4590 rsp = amaip->rowsource[row-1];
4591 if (rsp->which_saps[unaligned-1] == 0 || rsp->which_saps[unaligned] == 0)
4592 {
4593 beg = unaligned-1;
4594 while (rsp->which_saps[beg] == 0 && beg >= 0)
4595 {
4596 beg--;
4597 }
4598 if (beg<0)
4599 go = FALSE;
4600 end = unaligned;
4601 if (rsp->which_saps[end] == 0)
4602 go = FALSE;
4603 } else
4604 {
4605 beg = unaligned-1;
4606 end = unaligned;
4607 }
4608 if (go)
4609 strand = AlnMgrGetNthStrand(amaip->saps[rsp->which_saps[end]-1], row);
4610 if (start && go)
4611 {
4612 if (strand == Seq_strand_minus)
4613 {
4614 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[end]-1], rsp->num_in_sap[end], NULL, &tmp_start);
4615 *start = tmp_start + 1;
4616 } else
4617 {
4618 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[beg]-1], rsp->num_in_sap[beg], NULL, &tmp_start);
4619 *start = tmp_start + 1;
4620 }
4621 }
4622 if (stop && go)
4623 {
4624 if (strand == Seq_strand_minus)
4625 {
4626 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[beg]-1], rsp->num_in_sap[beg], &tmp_stop, NULL);
4627 *stop = tmp_stop - 1;
4628 } else
4629 {
4630 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[end]-1], rsp->num_in_sap[end], &tmp_stop, NULL);
4631 *stop = tmp_stop - 1;
4632 }
4633 }
4634 if (tmp_start + 1 > tmp_stop - 1)
4635 {
4636 if (start)
4637 *start = -1;
4638 if (stop)
4639 *stop = -1;
4640 }
4641 return TRUE;
4642 } else
4643 return FALSE;
4644 } else
4645 return FALSE;
4646 }
4647
4648
4649 NLM_EXTERN Uint1 AlnMgrGetStrand(SeqAlignPtr sap, SeqIdPtr sip)
4650 {
4651 Int4 i;
4652
4653 i = AlnMgrGetNForSip(sap, sip);
4654 return (AlnMgrGetNthStrand(sap, i));
4655 }
4656
4657 NLM_EXTERN Uint1 AlnMgrGetNthStrand(SeqAlignPtr sap, Int4 n)
4658 {
4659 AMAlignIndexPtr amaip;
4660 Int4 c;
4661 DenseSegPtr dsp;
4662 Int4 m;
4663 SeqAlignPtr salp;
4664
4665 if (!sap || n < 1 || sap->segs == NULL)
4666 return 0;
4667 if (sap->segtype != SAS_DENSEG)
4668 {
4669 if (sap->saip == NULL)
4670 return 0;
4671 amaip = (AMAlignIndexPtr)sap->saip;
4672 if (n > amaip->numrows)
4673 return 0;
4674 c = 0;
4675 while (amaip->rowsource[n-1]->which_saps[c] == 0)
4676 {
4677 c++;
4678 if (c >= amaip->alnsaps)
4679 return (Seq_strand_unknown);
4680 }
4681 salp = amaip->saps[amaip->rowsource[n-1]->which_saps[c]-1];
4682 dsp = (DenseSegPtr)salp->segs;
4683 m = amaip->rowsource[n-1]->num_in_sap[0];
4684 if (m > dsp->dim)
4685 return 0;
4686 return (dsp->strands[m-1]);
4687 } else
4688 {
4689 dsp = (DenseSegPtr)sap->segs;
4690 if (!dsp)
4691 return 0;
4692 if (n==0)
4693 return 0;
4694 if (dsp->strands)
4695 return (dsp->strands[n-1]);
4696 else
4697 return (Seq_strand_plus);
4698 }
4699 }
4700
4701 NLM_EXTERN Int4 AlnMgrGetNForSip(SeqAlignPtr sap, SeqIdPtr sip)
4702 {
4703 AMAlignIndexPtr amaip;
4704 DenseSegPtr dsp;
4705 Int4 i;
4706 Int4 n;
4707 SeqIdPtr sip_tmp;
4708
4709 i = AlnMgrCheckAlignForParent(sap);
4710 if (i<0)
4711 return -1;
4712 if (i == AM_PARENT)
4713 {
4714 amaip = (AMAlignIndexPtr)(sap->saip);
4715 sip_tmp = amaip->ids;
4716 n = 0;
4717 while (sip_tmp)
4718 {
4719 n++;
4720 if (SeqIdComp(sip_tmp, sip) == SIC_YES)
4721 return n;
4722 sip_tmp = sip_tmp->next;
4723 }
4724 } else if (i == AM_CHILD)
4725 {
4726 dsp = (DenseSegPtr)(sap->segs);
4727 sip_tmp = dsp->ids;
4728 n = 0;
4729 while (sip_tmp)
4730 {
4731 n++;
4732 if (SeqIdComp(sip_tmp, sip) == SIC_YES)
4733 return n;
4734 sip_tmp = sip_tmp->next;
4735 }
4736 }
4737 return -1;
4738 }
4739
4740 NLM_EXTERN Int4 AlnMgrGetNForSap(AMAlignIndexPtr amaip, SeqAlignPtr sap)
4741 {
4742 Int4 i;
4743
4744 if (sap == NULL || amaip == NULL)
4745 return -1;
4746 if (sap->saip->indextype != INDEX_SEGS)
4747 return -1;
4748 i = 0;
4749 while (i<amaip->alnsaps)
4750 {
4751 if (amaip->saps[i] == sap)
4752 return (i+1);
4753 i++;
4754 }
4755 return -1;
4756 }
4757
4758
4759 /********************************************************************************
4760 *
4761 * AlnMgrGetAllNForSip is called in a while loop to return all the rows that a
4762 * seqid appears in in a given seqalign. Use n = 0 to start, and then on
4763 * return, if the return is TRUE, n will be the row number of the next row
4764 * that the seqid appears in. If the return is FALSE, either there was an
4765 * error or there are no (more) rows containing that seqid.
4766 *
4767 ********************************************************************************/
4768 NLM_EXTERN Boolean AlnMgrGetAllNForSip(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr n)
4769 {
4770 AMAlignIndexPtr amaip;
4771 DenseSegPtr dsp;
4772 Int4 i;
4773 SeqIdPtr sip_tmp;
4774
4775 if (sap == NULL || sip == NULL || n == NULL)
4776 return FALSE;
4777 if (sap->saip == NULL)
4778 return FALSE;
4779 if (sap->saip->indextype == INDEX_SEGS)
4780 {
4781 i = 1;
4782 dsp = (DenseSegPtr)sap->segs;
4783 sip_tmp = dsp->ids;
4784 while (i <= *n)
4785 {
4786 sip_tmp = sip_tmp->next;
4787 i++;
4788 }
4789 while (sip_tmp)
4790 {
4791 if (SeqIdComp(sip_tmp, sip) == SIC_YES)
4792 {
4793 *n = i;
4794 return TRUE;
4795 }
4796 i++;
4797 sip_tmp = sip_tmp->next;
4798 }
4799 } else if (sap->saip->indextype == INDEX_PARENT)
4800 {
4801 amaip = (AMAlignIndexPtr)sap->saip;
4802 if (amaip->mstype == AM_LITE)
4803 return FALSE;
4804 i = *n + 1;
4805 while (i <= amaip->numrows)
4806 {
4807 if (SeqIdComp(amaip->rowsource[i-1]->id, sip) == SIC_YES)
4808 {
4809 *n = i;
4810 return TRUE;
4811 }
4812 i++;
4813 }
4814 }
4815 return FALSE;
4816 }
4817
4818 NLM_EXTERN Int4 AlnMgrGetSapForSip(AMAlignIndexPtr amaip, SeqIdPtr sip, Int4 which)
4819 {
4820 Int4 i;
4821 Int4 j;
4822 Int4 n;
4823
4824 i = 0;
4825 for (n=0; n<(amaip->numsaps); n++)
4826 {
4827 j = AlnMgrGetNForSip(amaip->saps[n], sip);
4828 if (j != -1)
4829 {
4830 if (i==which)
4831 return n;
4832 else
4833 i++;
4834 }
4835 }
4836 return -1;
4837 }
4838
4839 /********************************************************************************
4840 *
4841 * AlnMgrMapToBsqCoords returns the bioseq coordinate for an alignment
4842 * position. If master is NULL, the alignment position is taken to be from
4843 * a flattened alignment; otherwise, the function returns the corresponding
4844 * position in the given master.
4845 *
4846 ********************************************************************************/
4847
4848 NLM_EXTERN Int4 AlnMgrMapToBsqCoords(SeqAlignPtr sap, Uint4 pos, SeqIdPtr sip, SeqIdPtr master)
4849 {
4850 DenseSegPtr dsp;
4851 Int4 n;
4852 Int4 offset;
4853 SAIndexPtr saip;
4854 Int4 start;
4855
4856 if (!sap)
4857 return -1;
4858 if (sap->segtype == SAS_DENSEG)
4859 {
4860 saip = (SAIndexPtr)(sap->saip);
4861 dsp = (DenseSegPtr)(sap->segs);
4862 if (sip == NULL)
4863 n = saip->master;
4864 else
4865 n = AlnMgrGetNForSip(sap, sip);
4866 if (!master)
4867 {
4868 start = binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
4869 offset = pos - saip->aligncoords[start];
4870 if (dsp->starts[(dsp->dim*start) + n - 1] == -1)
4871 return -1;
4872 else
4873 if (dsp->strands[(dsp->dim*start) + n - 1] != Seq_strand_minus)
4874 return (dsp->starts[(dsp->dim*start) + n - 1] + offset);
4875 else
4876 return (dsp->starts[(dsp->dim*start) + n - 1] + dsp->lens[start] - 1 - offset);
4877 } else
4878 {
4879 }
4880 } else if (sap->segtype == SAS_DISC)
4881 {
4882 SeqAlignPtr salp;
4883 salp = (SeqAlignPtr)sap->segs;
4884 if(salp->next==NULL)
4885 return AlnMgrMapToBsqCoords(salp, pos, sip, master);
4886 }
4887 return -1;
4888 }
4889
4890 static Int4 AlnMgrMapSegmentCoords(SeqAlignPtr sap, Uint4 pos, Int4 row, SeqIdPtr master, Int4Ptr len)
4891 {
4892 DenseSegPtr dsp;
4893 Int4 offset;
4894 SAIndexPtr saip;
4895 Int4 start;
4896
4897 if (sap == NULL || row < 0 || len == NULL)
4898 return -1;
4899 if (sap->saip == NULL)
4900 return -1;
4901 if (sap->saip->indextype == INDEX_SEGS)
4902 {
4903 saip = (SAIndexPtr)sap->saip;
4904 dsp = (DenseSegPtr)sap->segs;
4905 if (master == NULL)
4906 {
4907 start = binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
4908 offset = pos - saip->aligncoords[start];
4909 *len = dsp->lens[start]-offset;
4910 if (dsp->starts[(dsp->dim*start) + row - 1] == -1)
4911 return -1;
4912 else
4913 if (dsp->strands[(dsp->dim*start) + row - 1] != Seq_strand_minus)
4914 return (dsp->starts[(dsp->dim*start) + row - 1] + offset);
4915 else
4916 return (dsp->starts[(dsp->dim*start) + row - 1] + dsp->lens[start] - 1 - offset);
4917 } else
4918 {
4919 }
4920 }
4921 return -1;
4922 }
4923
4924
4925 /********************************************************************************
4926 *
4927 * AlnMgrMapRowCoords maps a position in a given row to the bioseq coordinate
4928 * of that row. If master is NULL, the alignment is taken to be flattened;
4929 * otherwise it is an alignment according to that master (this will change the
4930 * correspondence between row coordinates and bioseq coordinates). The return
4931 * value will be either a positive bioseq coordinate, or -1 if the bioseq is
4932 * gapped at that row position.
4933 *
4934 ********************************************************************************/
4935 NLM_EXTERN Int4 AlnMgrMapRowCoords(SeqAlignPtr sap, Uint4 pos, Int4 row, SeqIdPtr master)
4936 {
4937 AMAlignIndexPtr amaip;
4938 AlnMsg amp;
4939 DenseSegPtr dsp;
4940 Boolean more;
4941 Int4 offset;
4942 SAIndexPtr saip;
4943 Int4 start;
4944
4945 if (sap == NULL || row < 0)
4946 return -1;
4947 if (sap->saip == NULL)
4948 return -1;
4949 if (sap->saip->indextype == INDEX_PARENT)
4950 {
4951 amaip = (AMAlignIndexPtr) sap->saip;
4952 if (row > amaip->numrows)
4953 return -1;
4954
4955 AlnMsgReNew(&);
4956 amp.row_num = row;
4957 amp.which_master = master;
4958 amp.from_m = pos;
4959 amp.to_m = pos;
4960 more = AlnMgrGetNextAlnBit(sap, &);
4961 if (more && amp.gap == 0)
4962 start = amp.from_b;
4963 else
4964 start = -1;
4965 return start;
4966 } else if (sap->saip->indextype == INDEX_SEGS)
4967 {
4968 saip = (SAIndexPtr)sap->saip;
4969 dsp = (DenseSegPtr)sap->segs;
4970 if (master == NULL)
4971 {
4972 start = binary_search_on_uint4_list(saip->aligncoords, pos, dsp->numseg);
4973 offset = pos - saip->aligncoords[start];
4974 if (dsp->starts[(dsp->dim*start) + row - 1] == -1)
4975 return -1;
4976 else
4977 if (dsp->strands[(dsp->dim*start) + row - 1] != Seq_strand_minus)
4978 return (dsp->starts[(dsp->dim*start) + row - 1] + offset);
4979 else
4980 return (dsp->starts[(dsp->dim*start) + row - 1] + dsp->lens[start] - 1 - offset);
4981 } else
4982 {
4983 }
4984 }
4985 return -1;
4986 }
4987
4988
4989 /********************************************************************************
4990 *
4991 * AlnMgrMapBioseqToSeqAlign takes a position in bioseq coordinates in a
4992 * row and maps it to seqalign coordinates, using the given master as
4993 * the alignment master (if master is NULL the alignment is flat). A
4994 * return value of -1 indicates an error; a return value of -2 indicates
4995 * that the given bioseq coordinates are not contained in the alignment
4996 * specified.
4997 *
4998 ********************************************************************************/
4999 NLM_EXTERN Int4 AlnMgrMapBioseqToSeqAlign(SeqAlignPtr sap, Int4 pos, Int4 row_num, SeqIdPtr master)
5000 {
5001 Int4 start, stop;
5002
5003 if (sap == NULL || row_num < 0)
5004 return -1;
5005 AlnMgrGetNthSeqRangeInSA(sap, row_num, &start, &stop);
5006 if (pos < start || pos > stop)
5007 return -2;
5008 return AlnMgrMapBioseqToSeqAlignEx(sap, pos, row_num, master, NULL);
5009 }
5010
5011 NLM_EXTERN Int4 AlnMgrMapBioseqToSeqAlignEx(SeqAlignPtr sap, Int4 pos, Int4 row_num, SeqIdPtr master,
5012 Int4 *oldj)
5013 {
5014 AMAlignIndexPtr amaip;
5015 Boolean done;
5016 DenseSegPtr dsp;
5017 Int4 i;
5018 Int4 j;
5019 Int4 k;
5020 Int2 L;
5021 Int4 m;
5022 Int4 mid;
5023 Uint1 n;
5024 Int4 offset;
5025 Int2 R;
5026 SAIndexPtr saip;
5027 Int4 seg;
5028 SASeqDatPtr ssdp;
5029
5030 if (sap == NULL || row_num < 0)
5031 return -1;
5032 i = AlnMgrCheckAlignForParent(sap);
5033 if (i == AM_CHILD)
5034 {
5035 if (master == NULL)
5036 {
5037 saip = (SAIndexPtr)sap->saip;
5038 ssdp = saip->ssdp[row_num-1];
5039 if (ssdp == NULL)
5040 return -1;
5041 dsp = (DenseSegPtr)sap->segs;
5042 L = 0;
5043 R = ssdp->numsect - 1;
5044 n = AlnMgrGetNthStrand(sap, row_num);
5045 if (n != Seq_strand_minus)
5046 {
5047 while (L < R)
5048 {
5049 mid = (L + R)/2;
5050 if (dsp->starts[dsp->dim*ssdp->sect[mid+1]+row_num-1] <= pos)
5051 L = mid+1;
5052 else
5053 R = mid;
5054 }
5055 } else
5056 {
5057 while (L < R)
5058 {
5059 mid = (L + R)/2;
5060 if (dsp->starts[dsp->dim*ssdp->sect[mid]+row_num-1] > pos)
5061 L = mid+1;
5062 else
5063 R = mid;
5064 }
5065 }
5066 seg = L;
5067 offset = pos - dsp->starts[dsp->dim*ssdp->sect[seg]+row_num-1];
5068 if (n!=Seq_strand_minus)
5069 return (saip->aligncoords[ssdp->sect[seg]] + offset);
5070 else
5071 return (saip->aligncoords[ssdp->sect[seg]] + dsp->lens[ssdp->sect[seg]] - offset -1);
5072 } else
5073 {
5074 }
5075 } else if (i == AM_PARENT)
5076 {
5077 amaip = (AMAlignIndexPtr)sap->saip;
5078 if (row_num > amaip->numrows)
5079 return -1;
5080 if (amaip->mstype != AM_SEGMENTED_MASTERSLAVE)
5081 {
5082 j = k = 0;
5083 m = -1;
5084 done = FALSE;
5085 while (!done && j < amaip->numseg)
5086 {
5087 k = AlnMgrMapRowCoords(sap, amaip->aligncoords[j], row_num, master);
5088 if (k == -1)
5089 j++;
5090 else if (k > pos)
5091 done = TRUE;
5092 else if (k <= pos)
5093 {
5094 m = j;
5095 offset = pos - k;
5096 j++;
5097 }
5098 }
5099 if (m == -1 || offset >= amaip->lens[m])
5100 return -2;
5101 n = AlnMgrGetNthStrand(sap, row_num);
5102 if (n != Seq_strand_minus)
5103 {
5104 return (amaip->aligncoords[m] + offset);
5105 } else
5106 {
5107 return (amaip->aligncoords[m] + amaip->lens[m] - 1 - offset);
5108 }
5109 } else
5110 {
5111 if(oldj) j = *oldj;
5112 else j = 0;
5113 k = 0;
5114 done = FALSE;
5115 while (!done && j < amaip->aligncoords[amaip->numseg-1]+amaip->lens[amaip->numseg-1])
5116 {
5117 k = AlnMgrMapRowCoords(sap, j, row_num, master);
5118 if (k == -1 || k < pos)
5119 j++;
5120 else if (k > pos)
5121 return -2;
5122 else if (k == pos)
5123 return j;
5124 }
5125 if (!done)
5126 return -2;
5127 }
5128 } else
5129 return -1;
5130 return -1;
5131 }
5132
5133
5134 /***********************************************************************
5135 *
5136 * AlnMgrMakeFakeMultiple calls AlnMgrCheckOverlapping to decide whether
5137 * an alignment is linear. Then, if possible, it calls AlnMgrMakeAlignCoords
5138 * to create alignment coordinates across all children contained in the
5139 * parent. (MULT)
5140 *
5141 ***********************************************************************/
5142 NLM_EXTERN Boolean AlnMgrMakeFakeMultiple(SeqAlignPtr sap)
5143 {
5144 return AlnMgrMakeFakeMultipleEx(sap, FALSE);
5145 }
5146
5147 static Boolean AlnMgrMakeFakeMultipleEx(SeqAlignPtr sap, Boolean forcestraightms)
5148 {
5149 AMAlignDatPtr amadp;
5150 AMAlignIndexPtr amaip;
5151 Int4 i;
5152 Int4 j;
5153 Boolean ms;
5154 Int4 n;
5155 Boolean nogap;
5156 Boolean retval;
5157 int unaligned=0;
5158
5159 retval = FALSE;
5160 if (!sap)
5161 return retval;
5162 i = AlnMgrCheckAlignForParent(sap);
5163 if (i<0)
5164 {
5165 return retval;
5166 }
5167 if (i==AM_PARENT)
5168 {
5169 n = AlnMgrCheckOrdered(sap);
5170 if (n == AM_ORDERED)
5171 {
5172 sap->type = SAT_PARTIAL;
5173 amaip = (AMAlignIndexPtr)sap->saip;
5174 amaip->alnsaps = amaip->numsaps;
5175 amaip->startsize = (amaip->alnsaps)*(amaip->alnsaps);
5176 amaip->starts = (Int4Ptr)MemNew((amaip->alnsaps)*(amaip->alnsaps)*sizeof(Int4));
5177 amaip->lens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5178 amaip->ulens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5179 amaip->numseg = amaip->alnsaps;
5180 for (j=0; j<(amaip->alnsaps); j++)
5181 {
5182 amaip->lens[j] = AlnMgrGetAlnLength(amaip->saps[j], FALSE);
5183 amaip->starts[j] = 0;
5184 }
5185 AlnMgrMakeAlignCoords(sap);
5186 if (!AlnMgrMakeRowsForOrdered(sap))
5187 return FALSE;
5188 for (j=0; j<(amaip->alnsaps-1); j++)
5189 {
5190 amaip->ulens[j] = AlnMgrGetMaxUnalignedLength(amaip->saps[j], amaip->saps[j+1]);
5191 unaligned += amaip->ulens[j];
5192 }
5193 { /* HS 07/24/00 */
5194 sap->master = AlnMgrFindMaster(sap);
5195 ms = FALSE;
5196 ms = AlnMgrCheckRealMaster(sap, sap->master);
5197 if (sap->master && ms == TRUE)
5198 {
5199 sap->type = SAT_MASTERSLAVE;
5200 /* if there's more than 1 denseseg, call it segmented, DIH, 7/27/00 */
5201 if (amaip->numseg > 1) {
5202 /* if (unaligned > 0) { */
5203 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
5204 } else {
5205 amaip->mstype = AM_MASTERSLAVE;
5206 }
5207 AlnMgrSetMaster(sap, sap->master);
5208 AlnMgrMakeMasterPlus(sap);
5209 n = AlnMgrGetNForSip(sap, sap->master);
5210 sap->type = SAT_MASTERSLAVE;
5211 amaip->master = n;
5212 } else {
5213 sap->master=NULL;
5214 }
5215 }
5216 return TRUE;
5217 }
5218 n = AlnMgrCheckOverlapping(sap);
5219 nogap = am_check_gaps(sap);
5220 if (n == NO_OVERLAP)
5221 {
5222 sap->type = SAT_PARTIAL;
5223 amaip = (AMAlignIndexPtr)sap->saip;
5224 if (amaip->saps)
5225 MemFree(amaip->saps);
5226 amaip->saps = AlnMgrSortSeqAligns((SeqAlignPtr)(sap->segs), AlnMgrFindFirst, amaip, &amaip->numsaps);
5227 amaip->alnsaps = amaip->numsaps;
5228 amaip->startsize = (amaip->alnsaps)*(amaip->alnsaps);
5229 amaip->starts = (Int4Ptr)MemNew((amaip->alnsaps)*(amaip->alnsaps)*sizeof(Int4));
5230 amaip->lens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5231 amaip->ulens = (Int4Ptr)MemNew((amaip->alnsaps)*sizeof(Int4));
5232 amaip->numseg = amaip->alnsaps;
5233 for (j=0; j<(amaip->alnsaps); j++)
5234 {
5235 amaip->lens[j] = AlnMgrGetAlnLength(amaip->saps[j], FALSE);
5236 amaip->starts[j] = 0;
5237 }
5238 AlnMgrMakeAlignCoords(sap);
5239 if (!AlnMgrGetRowsForPartial(sap))
5240 return retval;
5241 for (j=0; j<(amaip->alnsaps-1); j++)
5242 {
5243 amaip->ulens[j] = AlnMgrGetMaxUnalignedLength(amaip->saps[j], amaip->saps[j+1]);
5244 unaligned += amaip->ulens[j];
5245 }
5246 { /* HS */
5247 sap->master = AlnMgrFindMaster(sap);
5248 ms = FALSE;
5249 ms = AlnMgrCheckRealMaster(sap, sap->master);
5250 if (sap->master && ms == TRUE)
5251 {
5252 sap->type = SAT_MASTERSLAVE;
5253 if(unaligned>0) {
5254 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
5255 } else {
5256 amaip->mstype = AM_MASTERSLAVE;
5257 }
5258 AlnMgrSetMaster(sap, sap->master);
5259 AlnMgrMakeMasterPlus(sap);
5260 n = AlnMgrGetNForSip(sap, sap->master);
5261 sap->type = SAT_MASTERSLAVE;
5262 amaip->master = n;
5263 } else {
5264 sap->master=NULL;
5265 }
5266 }
5267 retval = TRUE;
5268 } else /*should add function to check for pairwise multiple vs. diags*/
5269 {
5270 amaip = (AMAlignIndexPtr)sap->saip;
5271 if (amaip->saps)
5272 MemFree(amaip->saps);
5273 sap->master = AlnMgrFindMaster(sap);
5274 amaip->alnsaps = amaip->numsaps;
5275 ms = FALSE;
5276 ms = AlnMgrCheckRealMaster(sap, sap->master);
5277 if (sap->master && ms == TRUE)
5278 {
5279 retval = TRUE;
5280 AlnMgrSetMaster(sap, sap->master);
5281 AlnMgrMakeMasterPlus(sap);
5282 n = AlnMgrGetNForSip(sap, sap->master);
5283 sap->type = SAT_MASTERSLAVE;
5284 amaip->master = n; /* HS 7/24/00 was ==1 */
5285 amaip->numseg = AlnMgrGetMaxSegments((SeqAlignPtr)(sap->segs));
5286 amaip->alnsaps = amaip->numsaps;
5287 amaip->lens = (Int4Ptr)MemNew((amaip->numseg)*sizeof(Int4));
5288 amadp = amaip->amadp[n-1];
5289 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
5290 for (j=0; j<amaip->numsaps; j++)
5291 {
5292 amaip->saps[j] = amadp->saps[j];
5293 }
5294 if (forcestraightms || amaip->numsaps < amaip->numbsqs)
5295 {
5296 amaip->ids = SeqIdSetFree(amaip->ids);
5297 amaip->ids = AlnMgrPropagateSeqIdsBySapList(amaip);
5298 if (!AlnMgrMergeIntoMSMultByMaster(amaip, amaip->lens, &amaip->numseg))
5299 retval = FALSE;
5300 amaip->startsize = (amaip->numseg)*(amaip->numsaps);
5301 amaip->starts = (Int4Ptr)MemNew((amaip->numseg)*(amaip->numsaps)*sizeof(Int4));
5302 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg)*sizeof(Uint4));
5303 if (!AlnMgrFillInStarts(amadp->saps, amaip->starts, amaip->numseg, amaip->lens, amaip->numsaps, amaip->aligncoords))
5304 retval = FALSE;
5305 if (amaip->numseg > 1)
5306 amaip->numseg -= 1;
5307 if (!AlnMgrMakeMultSegments(amaip))
5308 retval = FALSE;
5309 if (!AlnMgrGetRowsForMasterSlave(sap))
5310 retval = FALSE;
5311 } else
5312 retval = FALSE;
5313 }
5314 if (retval == FALSE && sap->master != NULL)
5315 {
5316 if (AlnMgrMakeSegmentedMasterSlave(sap))
5317 {
5318 sap->type = SAT_MASTERSLAVE;
5319 amaip->ids = SeqIdSetFree(amaip->ids);
5320 amaip->ids = AlnMgrPropagateSeqIdsByRow(amaip);
5321 retval = TRUE;
5322 } else
5323 {
5324 if (AlnMgrForceMasterSlave(sap))
5325 {
5326 amaip->ids = SeqIdSetFree(amaip->ids);
5327 amaip->ids = AlnMgrPropagateSeqIdsByRow(amaip);
5328 amaip->mstype = AM_MASTERSLAVE;
5329 retval = TRUE;
5330 }
5331 }
5332 } else
5333 {
5334 amaip->mstype = AM_MASTERSLAVE;
5335 }
5336 }
5337 }
5338 return retval;
5339 }
5340
5341 /**********************************************************************
5342 *
5343 * AlnMgrNeatlyIndex is a very specialized function for structure
5344 * alignments and other alignments that have an implied row order.
5345 * It puts an index on the first seqalign in the set, and that index
5346 * only designates which seqaligns belong in which rows. The input
5347 * alignments are not changed at all, and only the first alignment is
5348 * indexed. This alignment cannot be displayed or accessed; it's only
5349 * a way to designate rows.
5350 *
5351 **********************************************************************/
5352 NLM_EXTERN Boolean AlnMgrNeatlyIndex(SeqAlignPtr sap)
5353 {
5354 AMAlignIndexPtr amaip;
5355 Int4 numrows;
5356 SeqAlignPtr salp;
5357
5358 if (sap == NULL)
5359 return FALSE;
5360 if (sap->saip != NULL)
5361 {
5362 if (sap->saip->indextype == INDEX_PARENT)
5363 AMAlignIndexFree((Pointer)(sap->saip));
5364 else if (sap->saip->indextype == INDEX_SEGS)
5365 SAIndexFree((Pointer)(sap->saip));
5366 }
5367 sap->saip = NULL;
5368 if (sap->next != NULL)
5369 salp = sap;
5370 else if (sap->segtype == SAS_DISC)
5371 salp = (SeqAlignPtr)(sap->segs);
5372 else
5373 salp = sap;
5374 numrows = 0;
5375 while (salp != NULL)
5376 {
5377 numrows++;
5378 salp = salp->next;
5379 }
5380 amaip = AMAlignIndexNew();
5381 sap->saip = (SeqAlignIndexPtr)(amaip);
5382 amaip->mstype = AM_NEATINDEX;
5383 amaip->master = 1;
5384 if (sap->next != NULL)
5385 salp = sap;
5386 else if (sap->segtype == SAS_DISC)
5387 salp = (SeqAlignPtr)(sap->segs);
5388 else
5389 salp = sap;
5390 amaip->saps = (SeqAlignPtr PNTR)MemNew(numrows*sizeof(SeqAlignPtr));
5391 numrows = 0;
5392 while (salp != NULL)
5393 {
5394 amaip->saps[numrows] = salp;
5395 numrows++;
5396 salp = salp->next;
5397 }
5398 amaip->numrows = numrows;
5399 amaip->master = 1;
5400 return TRUE;
5401 }
5402
5403 static void AlnMgrMergeNeighbors(SeqAlignPtr salp)
5404 {
5405 DenseDiagPtr ddp;
5406 DenseDiagPtr ddp_prev;
5407 Boolean gap;
5408 Int4 i;
5409
5410 if (salp == NULL)
5411 return;
5412 if (salp->segtype == SAS_DENDIAG)
5413 {
5414 ddp_prev = (DenseDiagPtr)(salp->segs);
5415 while (ddp_prev && ddp_prev->next != NULL)
5416 {
5417 ddp = ddp_prev->next;
5418 if (ddp_prev->dim != ddp->dim)
5419 return;
5420 gap = FALSE;
5421 for (i=0; i<ddp->dim && !gap; i++)
5422 {
5423 if (ddp->strands != NULL && ddp->strands[i] == Seq_strand_minus)
5424 {
5425 if (ddp->starts[i] + ddp->len < ddp_prev->starts[i])
5426 gap = TRUE;
5427 } else
5428 {
5429 if (ddp_prev->starts[i] + ddp_prev->len < ddp->starts[i])
5430 gap = TRUE;
5431 }
5432 }
5433 if (gap == FALSE)
5434 {
5435 if (ddp->strands != NULL && ddp->strands[i] == Seq_strand_minus)
5436 {
5437 ddp_prev->len += ddp->len;
5438 for (i=0; i<ddp->dim; i++)
5439 {
5440 ddp_prev->starts[i] = ddp->starts[i];
5441 }
5442 } else
5443 ddp_prev->len += ddp->len;
5444 ddp_prev->next = ddp->next;
5445 ddp->next = NULL;
5446 DenseDiagFree(ddp);
5447 } else
5448 ddp_prev = ddp_prev->next;
5449 }
5450 } else if (salp->segtype == SAS_DISC)
5451 {
5452 return;
5453 } else
5454 return;
5455 return;
5456 }
5457
5458 /**********************************************************************
5459 *
5460 * AlnMgrTossNeatRows is called to create a subset of the NeatlyIndexed
5461 * alignment, only containing certain rows. The array throwarray, of
5462 * length len, contains the (1-based) numbers of the rows to be left
5463 * out. The function returns a duplicated alignment, which is not
5464 * yet indexed. This function assumes a very strict row structure --
5465 * each row is represented by a single seqalign or seqalign set, and
5466 * the first row is the master.
5467 *
5468 **********************************************************************/
5469 NLM_EXTERN SeqAlignPtr AlnMgrTossNeatRows(SeqAlignPtr sap, Int4Ptr throwarray, Int4 len)
5470 {
5471 AMAlignIndexPtr amaip;
5472 Int4 i;
5473 Int4 n;
5474 SeqAlignPtr salp;
5475 SeqAlignPtr sap_head;
5476 SeqAlignPtr sap_new;
5477 SeqAlignPtr sap_prev;
5478 Boolean toss;
5479
5480 if (sap == NULL || throwarray == NULL)
5481 return NULL;
5482 if (sap->saip == NULL)
5483 return NULL;
5484 if (sap->next != NULL)
5485 salp = sap;
5486 else if (sap->segtype == SAS_DISC)
5487 salp = (SeqAlignPtr)(sap->segs);
5488 else
5489 salp = sap;
5490 amaip = (AMAlignIndexPtr)(sap->saip);
5491 if (amaip->mstype != AM_NEATINDEX)
5492 return NULL;
5493 i = 2;
5494 sap_head = NULL;
5495 while (salp != NULL)
5496 {
5497 toss = FALSE;
5498 for (n=0; n<len; n++)
5499 {
5500 if (throwarray[n] == i)
5501 toss = TRUE;
5502 }
5503 if (toss == FALSE)
5504 {
5505 sap_new = SeqAlignDup(salp);
5506 if (sap_head != NULL)
5507 {
5508 sap_prev->next = sap_new;
5509 sap_prev = sap_new;
5510 } else
5511 sap_prev = sap_head = sap_new;
5512 }
5513 salp = salp->next;
5514 i++;
5515 }
5516 return sap_head;
5517 }
5518
5519 /***************************************************************************
5520 *
5521 * AlnMgrMakeMultByIntersectOnMaster is a specialized function that
5522 * truncates the segments of segmented master-slave alignments to
5523 * force them to line up:
5524 *
5525 * Master XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
5526 * seq1 XXXXXXXX XXXXXXXXXXXXXX
5527 * seq2 XXXXXXXXXX XXXXXXX
5528 * seq3 XXXXXXXXXXXXXXXXXXXXXXX
5529 *
5530 * becomes
5531 *
5532 * Master XXXXXX XXXXXXX
5533 * seq1 XXXXXX XXXXXXX
5534 * seq2 XXXXXX XXXXXXX
5535 * seq3 XXXXXX XXXXXXX
5536 *
5537 * This indexing is different from the other type -- the input
5538 * alignments may actually be truncated, instead of just rearranged.
5539 * If allinblock is TRUE, then the function throws away any blocks that
5540 * have a missing sequence.
5541 *
5542 ***************************************************************************/
5543 NLM_EXTERN Boolean AlnMgrMakeMultByIntersectOnMaster(SeqAlignPtr sap, Boolean allinblock)
5544 {
5545 AMmsmsPtr ams;
5546 AMmsmsPtr ams_master;
5547 AMmsmsPtr ams_tmp;
5548 Int4 numrows;
5549 Int4Ptr rowarray;
5550 SeqAlignPtr salp;
5551 SeqIdPtr sip_tmp;
5552
5553 if (sap == NULL)
5554 return FALSE;
5555 if (sap->saip != NULL)
5556 {
5557 if (sap->saip->indextype == INDEX_PARENT)
5558 AMAlignIndexFree((Pointer)(sap->saip));
5559 else if (sap->saip->indextype == INDEX_SEGS)
5560 SAIndexFree((Pointer)(sap->saip));
5561 }
5562 sap->saip = NULL;
5563 if (sap->segtype == SAS_DISC)
5564 salp = (SeqAlignPtr)(sap->segs);
5565 else
5566 salp = sap;
5567 while (salp != NULL)
5568 {
5569 /* AlnMgrMergeNeighbors(salp); */ /* dih - 3/21/01 */
5570 salp = salp->next;
5571 }
5572 rowarray = NULL;
5573 if (allinblock)
5574 {
5575 numrows = am_guess_numrows(sap);
5576 rowarray = am_save_rowinfo(sap, numrows-1);
5577 } else
5578 numrows = 0;
5579 if (!AlnMgrUnpackSeqAlign(sap))
5580 return FALSE;
5581 if (!AlnMgrRearrangeUnpacked(sap))
5582 return FALSE;
5583 if (!AlnMgrIndexLinkedSegs((SeqAlignPtr)(sap->segs)))
5584 return FALSE;
5585 if (sap->master == NULL) /* if a master is already set, believe it */
5586 sip_tmp = am_find_master(sap); /* if not, find it */
5587 if (sip_tmp == NULL)
5588 return FALSE;
5589 am_set_master(sap, sip_tmp);
5590 SeqIdFree(sip_tmp);
5591 ams = AlnMgrFindOverlapOnMaster(sap);
5592 if (ams == NULL)
5593 return FALSE;
5594 if (allinblock)
5595 am_set_rows(ams, rowarray, numrows);
5596 ams_master = AlnMgrTruncateByOverlap(sap, ams);
5597 if (ams_master == NULL)
5598 return FALSE;
5599 if (!AlnMgrCarefulIndex(sap, ams, &ams_master, allinblock, numrows))
5600 return FALSE;
5601 while (ams)
5602 {
5603 ams_tmp = ams->next;
5604 ams->next = NULL;
5605 ams->sip = NULL;
5606 ams->sap = NULL;
5607 MemFree(ams);
5608 ams = ams_tmp;
5609 }
5610 while (ams_master)
5611 {
5612 ams_tmp = ams_master->next;
5613 ams_master->next = NULL;
5614 ams_master->sip = NULL;
5615 ams_master->sap = NULL;
5616 MemFree(ams_master);
5617 ams_master = ams_tmp;
5618 }
5619 if (rowarray != NULL)
5620 MemFree(rowarray);
5621 return TRUE;
5622 }
5623
5624 static Boolean am_is_new_row(SeqIdPtr sip1, SeqIdPtr sip2)
5625 {
5626 if (sip1 == NULL || sip2 == NULL)
5627 return FALSE;
5628 while (sip1 != NULL && sip2 != NULL)
5629 {
5630 if (SeqIdComp(sip1, sip2) != SIC_YES)
5631 return TRUE;
5632 sip1 = sip1->next;
5633 sip2 = sip2->next;
5634 }
5635 if (sip1 != NULL || sip2 != NULL)
5636 return TRUE;
5637 return FALSE;
5638 }
5639
5640 NLM_EXTERN Int4 am_guess_numrows(SeqAlignPtr sap)
5641 {
5642 DenseDiagPtr ddp;
5643 Boolean disc;
5644 DenseSegPtr dsp;
5645 SeqIdPtr id_prev;
5646 Int4 n;
5647 SeqAlignPtr salp;
5648
5649 if (sap == NULL)
5650 return 0;
5651 disc = FALSE;
5652 if (sap->segtype == SAS_DISC)
5653 {
5654 disc = TRUE;
5655 salp = (SeqAlignPtr)(sap->segs);
5656 sap = sap->next;
5657 } else
5658 salp = sap;
5659 n = 0;
5660 id_prev = NULL;
5661 while (salp)
5662 {
5663 if (salp->segtype == SAS_DENSEG)
5664 {
5665 dsp = (DenseSegPtr)(salp->segs);
5666 if (am_is_new_row(dsp->ids, id_prev) || salp->next == NULL)
5667 n+= dsp->dim - 1;
5668 id_prev = dsp->ids;
5669 } else if (salp->segtype == SAS_DENDIAG)
5670 {
5671 ddp = (DenseDiagPtr)(salp->segs);
5672 n+=ddp->dim;
5673 n = n-1;
5674 id_prev = ddp->id;
5675 } else if (salp->segtype == SAS_DISC)
5676 n++;
5677 salp = salp->next;
5678 if (salp == NULL)
5679 {
5680 if (disc && sap!=NULL)
5681 {
5682 if (sap->segtype == SAS_DISC)
5683 {
5684 salp = (SeqAlignPtr)(sap->segs);
5685 sap = sap->next;
5686 }
5687 }
5688 }
5689 }
5690 return (n+1);
5691 }
5692
5693 static Int4Ptr am_save_rowinfo(SeqAlignPtr sap, Int4 numrows)
5694 {
5695 DenseDiagPtr ddp;
5696 Int4 dim_prev;
5697 Boolean disc;
5698 DenseSegPtr dsp;
5699 Int4 i;
5700 SeqIdPtr id_prev;
5701 Int4 n;
5702 Int4 num;
5703 Int4Ptr rowarray;
5704 SeqAlignPtr salp;
5705 SeqAlignPtr sap2;
5706
5707 if (sap == NULL)
5708 return 0;
5709 disc = FALSE;
5710 if (sap->segtype == SAS_DISC)
5711 {
5712 disc = TRUE;
5713 salp = (SeqAlignPtr)(sap->segs);
5714 sap = sap->next;
5715 } else
5716 salp = sap;
5717 n = 0;
5718 id_prev = NULL;
5719 dim_prev = 0;
5720 num = 1;
5721 rowarray = (Int4Ptr)MemNew((numrows+1)*sizeof(Int4));
5722 while (salp)
5723 {
5724 if (salp->segtype == SAS_DENSEG)
5725 {
5726 dsp = (DenseSegPtr)(salp->segs);
5727 if (am_is_new_row(dsp->ids, id_prev) || salp->next == NULL)
5728 {
5729 for (i=n; i<(n+dim_prev-1); i++)
5730 {
5731 rowarray[i] = num;
5732 }
5733 n += dim_prev-1;
5734 num = 1;
5735 } else
5736 num++;
5737 id_prev = dsp->ids;
5738 dim_prev = dsp->dim;
5739 } else if (salp->segtype == SAS_DENDIAG)
5740 {
5741 ddp = (DenseDiagPtr)(salp->segs);
5742 num = 0;
5743 while (ddp)
5744 {
5745 num++;
5746 ddp = ddp->next;
5747 }
5748 ddp = (DenseDiagPtr)(salp->segs);
5749 for (i=n; i<(n+ddp->dim-1); i++)
5750 {
5751 rowarray[i] = num;
5752 }
5753 n+=ddp->dim;
5754 id_prev = ddp->id;
5755 n = n-1;
5756 } else if (salp->segtype == SAS_DISC)
5757 {
5758 sap2 = (SeqAlignPtr)(salp->segs);
5759 num = 0;
5760 while (sap2 != NULL)
5761 {
5762 num++;
5763 sap2 = sap2->next;
5764 }
5765 sap2 = (SeqAlignPtr)(salp->segs);
5766 for (i=n; i<(n+sap2->dim-1); i++)
5767 {
5768 rowarray[i] = num;
5769 }
5770 n+=sap2->dim;
5771 n = n - 1;
5772 }
5773 salp = salp->next;
5774 if (salp == NULL)
5775 {
5776 if (disc && sap!=NULL)
5777 {
5778 if (sap->segtype == SAS_DISC)
5779 {
5780 salp = (SeqAlignPtr)(sap->segs);
5781 sap = sap->next;
5782 }
5783 }
5784 }
5785 }
5786 return rowarray;
5787 }
5788
5789 static void am_set_rows(AMmsmsPtr ams, Int4Ptr rowarray, Int4 numrows)
5790 {
5791 Int4 i;
5792 Int4 n;
5793
5794 if (ams == NULL || rowarray == NULL)
5795 return;
5796 for (n=0; n<numrows; n++)
5797 {
5798 if (ams == NULL)
5799 return;
5800 ams->stop = 1;
5801 for (i=0; i<rowarray[n]; i++)
5802 {
5803 ams = ams->next;
5804 }
5805 }
5806 return;
5807 }
5808
5809 static void am_set_master(SeqAlignPtr sap, SeqIdPtr sip)
5810 {
5811 SeqAlignPtr salp;
5812
5813 if (sap == NULL || sip == NULL)
5814 return;
5815 if (sap->segtype == SAS_DISC)
5816 {
5817 if (sap->master != NULL)
5818 SeqIdSetFree(sap->master);
5819 sap->master = SeqIdDup(sip);
5820 salp = (SeqAlignPtr)(sap->segs);
5821 } else
5822 salp = sap;
5823 while (salp)
5824 {
5825 if (salp->master != NULL)
5826 SeqIdSetFree(salp->master);
5827 salp->master = SeqIdDup(sip);
5828 salp = salp->next;
5829 }
5830 return;
5831 }
5832
5833 static SeqIdPtr am_find_master(SeqAlignPtr sap)
5834 {
5835 DenseDiagPtr ddp;
5836 Boolean done;
5837 DenseSegPtr dsp;
5838 Boolean found;
5839 Boolean here;
5840 SeqAlignPtr salp;
5841 SeqAlignPtr sap_tmp;
5842 SeqIdPtr sip;
5843 SeqIdPtr sip_head;
5844 SeqIdPtr sip_tmp;
5845
5846 if (sap == NULL)
5847 return NULL;
5848 if (sap->segtype == SAS_DISC)
5849 salp = (SeqAlignPtr)(sap->segs);
5850 else
5851 salp = sap;
5852 if (salp->segtype == SAS_DENSEG)
5853 {
5854 dsp = (DenseSegPtr)(salp->segs);
5855 sip_head = dsp->ids;
5856 } else if (salp->segtype == SAS_DENDIAG)
5857 {
5858 ddp = (DenseDiagPtr)(salp->segs);
5859 sip_head = ddp->id;
5860 }
5861 sip = sip_head;
5862 done = FALSE;
5863 while (sip && !done)
5864 {
5865 sap_tmp = salp->next;
5866 found = TRUE;
5867 while (sap_tmp != NULL && found)
5868 {
5869 if (sap_tmp->segtype == SAS_DENSEG)
5870 {
5871 dsp = (DenseSegPtr)(sap_tmp->segs);
5872 sip_tmp = dsp->ids;
5873 } else if (sap_tmp->segtype == SAS_DENDIAG)
5874 {
5875 ddp = (DenseDiagPtr)(sap_tmp->segs);
5876 sip_tmp = ddp->id;
5877 }
5878 here = FALSE;
5879 while (sip_tmp != NULL && !here)
5880 {
5881 if (SAM_OrderSeqID(sip_tmp, sip) == 0)
5882 here = TRUE;
5883 sip_tmp = sip_tmp->next;
5884 }
5885 if (!here)
5886 found = FALSE;
5887 sap_tmp = sap_tmp->next;
5888 }
5889 if (found)
5890 done = TRUE;
5891 else
5892 sip = sip->next;
5893 }
5894 if (!done)
5895 return NULL;
5896 else
5897 return (SeqIdDup(sip));
5898 }
5899
5900 static AMmsmsPtr AlnMgrFindOverlapOnMaster(SeqAlignPtr sap)
5901 {
5902 AMmsmsPtr ams;
5903 AMmsmsPtr ams_head;
5904 AMmsmsPtr ams_prev;
5905 SeqAlignPtr salp;
5906 SeqAlignPtr salp_tmp;
5907
5908 if (sap == NULL)
5909 return NULL;
5910 if (sap->master == NULL)
5911 return NULL;
5912 ams_head = ams_prev = NULL;
5913 while (sap)
5914 {
5915 if (sap->segtype == SAS_DISC)
5916 {
5917 salp = (SeqAlignPtr)(sap->segs);
5918 while (salp)
5919 {
5920 if (salp->segtype == SAS_DISC)
5921 {
5922 salp_tmp = (SeqAlignPtr)(sap->segs);
5923 while (salp_tmp)
5924 {
5925 ams = am_create_overlap(salp_tmp);
5926 if (ams == NULL)
5927 return NULL;
5928 if (ams_head != NULL)
5929 {
5930 ams_prev->next = ams;
5931 while (ams->next)
5932 {
5933 ams = ams->next;
5934 }
5935 ams_prev = ams;
5936 } else
5937 {
5938 ams_head = ams;
5939 while (ams->next)
5940 {
5941 ams = ams->next;
5942 }
5943 ams_prev = ams;
5944 }
5945 }
5946 } else
5947 {
5948 ams = am_create_overlap(salp);
5949 if (ams == NULL)
5950 return NULL;
5951 if (ams_head != NULL)
5952 {
5953 ams_prev->next = ams;
5954 while (ams->next)
5955 {
5956 ams = ams->next;
5957 }
5958 ams_prev = ams;
5959 } else
5960 {
5961 ams_head = ams;
5962 while (ams->next)
5963 {
5964 ams = ams->next;
5965 }
5966 ams_prev = ams;
5967 }
5968 }
5969 salp = salp->next;
5970 }
5971 } else
5972 {
5973 ams = am_create_overlap(sap);
5974 if (ams == NULL)
5975 return NULL;
5976 if (ams_head != NULL)
5977 {
5978 ams_prev->next = ams;
5979 while (ams->next)
5980 {
5981 ams = ams->next;
5982 }
5983 ams_prev = ams;
5984 } else
5985 {
5986 ams_head = ams;
5987 while (ams->next)
5988 {
5989 ams = ams->next;
5990 }
5991 ams_prev = ams;
5992 }
5993 }
5994 sap = sap->next;
5995 }
5996 return ams_head;
5997 }
5998
5999 static AMmsmsPtr am_create_overlap(SeqAlignPtr sap)
6000 {
6001 AMmsmsPtr ams;
6002 AMmsmsPtr ams_head;
6003 AMmsmsPtr ams_prev;
6004 DenseDiagPtr ddp;
6005 DenseSegPtr dsp;
6006 Boolean found;
6007 SeqIdPtr id;
6008 Int4 n;
6009 SeqAlignPtr sap_tmp;
6010 Int4 start;
6011 Int4 stop;
6012
6013 if (sap == NULL || sap->master == NULL)
6014 return NULL;
6015 ams_head = ams_prev = NULL;
6016 if (sap->segtype == SAS_DENSEG)
6017 {
6018 dsp = (DenseSegPtr)(sap->segs);
6019 id = (dsp->ids);
6020 found = FALSE;
6021 n = 0;
6022 while (id!=NULL && !found)
6023 {
6024 n++;
6025 if (SAM_OrderSeqID(id, sap->master)== 0)
6026 found = TRUE;
6027 id = id->next;
6028 }
6029 if (!found || n>dsp->dim)
6030 return NULL;
6031 if (dsp->strands != NULL)
6032 {
6033 if (dsp->strands[n-1] == Seq_strand_minus)
6034 {
6035 sap_tmp = sap->next;
6036 sap->next = NULL;
6037 SeqAlignListReverseStrand(sap);
6038 sap->next = sap_tmp;
6039 }
6040 }
6041 if (!AlnMgrIndexSingleChildSeqAlign(sap))
6042 return NULL;
6043 AlnMgrGetNthSeqRangeInSA(sap, n, &start, &stop);
6044 ams = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6045 ams->sap = sap;
6046 ams->n = n;
6047 ams->sstart = start;
6048 ams->sstop = stop;
6049 ams->count = AlnMgrGetNumRows(sap);
6050 ams->count = ams->count - 1;
6051 return ams;
6052 } else if (sap->segtype == SAS_DENDIAG)
6053 {
6054 ddp = (DenseDiagPtr)(sap->segs);
6055 while (ddp)
6056 {
6057 id = ddp->id;
6058 found = FALSE;
6059 n = 0;
6060 while (id!=NULL && !found)
6061 {
6062 n++;
6063 if (SeqIdComp(id, sap->master) == SIC_YES)
6064 found = TRUE;
6065 id = id->next;
6066 }
6067 if (!found || n > ddp->dim)
6068 {
6069 while (ams_head != NULL)
6070 {
6071 ams = ams_head->next;
6072 ams_head->next = NULL;
6073 MemFree(ams_head);
6074 ams_head = ams;
6075 }
6076 return NULL;
6077 }
6078 if (ddp->strands != NULL)
6079 {
6080 if (ddp->strands[n-1] == Seq_strand_minus)
6081 am_densediag_reverse(ddp);
6082 }
6083 start = ddp->starts[n-1];
6084 stop = start + ddp->len-1;
6085 ams = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6086 ams->sip = id;
6087 ams->sap = sap;
6088 ams->n = n;
6089 ams->sstart = start;
6090 ams->sstop = stop;
6091 ams->count = AlnMgrGetNumRows(sap);
6092 ams->count = ams->count - 1;
6093 if (ams_head != NULL)
6094 {
6095 ams_prev->next = ams;
6096 ams_prev = ams;
6097 } else
6098 ams_head = ams_prev = ams;
6099 ddp = ddp->next;
6100 }
6101 return ams_head;
6102 } else
6103 return NULL;
6104 }
6105
6106 static void am_densediag_reverse(DenseDiagPtr ddp)
6107 {
6108 Int4 i;
6109
6110 if (ddp == NULL || ddp->strands == NULL)
6111 return;
6112 for (i=0; i<ddp->dim; i++)
6113 {
6114 if (ddp->strands[i] == Seq_strand_minus)
6115 ddp->strands[i] = Seq_strand_plus;
6116 else
6117 ddp->strands[i] = Seq_strand_minus;
6118 }
6119 }
6120
6121 static AMmsmsPtr AlnMgrConstructOverlaps(AMmsmsPtr ams_head)
6122 {
6123 AMmsmsPtr ams;
6124 AMmsmsPtr ams_master;
6125 AMmsmsPtr ams_mhead;
6126 AMmsmsPtr ams_mprev;
6127 AMmsmsPtr ams_new;
6128 Boolean found;
6129 Int4 i;
6130 Int4 n;
6131 Int4 open;
6132 Int4 start;
6133 AMTinyInfoPtr tip;
6134 AMTinyInfoPtr PNTR tiparray;
6135 AMTinyInfoPtr tip_head;
6136 AMTinyInfoPtr tip_prev;
6137
6138 if (ams_head == NULL)
6139 return NULL;
6140 n = 0;
6141 tip_head = NULL;
6142 ams = ams_head;
6143 while (ams != NULL)
6144 {
6145 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6146 tip->start = ams->sstart;
6147 tip->which = 1;
6148 if (tip_head != NULL)
6149 {
6150 tip_prev->next = tip;
6151 tip_prev = tip;
6152 } else
6153 tip_prev = tip_head = tip;
6154 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6155 tip->start = ams->sstop;
6156 tip->which = 2;
6157 tip_prev->next = tip;
6158 tip_prev = tip;
6159 n += 2;
6160 ams = ams->next;
6161 }
6162 tiparray = (AMTinyInfoPtr PNTR)MemNew(n*sizeof(AMTinyInfoPtr));
6163 tip = tip_head;
6164 i = 0;
6165 while (tip != NULL)
6166 {
6167 tiparray[i] = tip;
6168 tip = tip->next;
6169 i++;
6170 }
6171 HeapSort((Pointer)tiparray, (size_t)n, sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
6172 tip_head = tip_prev = tiparray[0];
6173 tip_head->numsap = 1;
6174 for (i=1; i<n; i++)
6175 {
6176 if (tiparray[i]->start == tip_prev->start && tiparray[i]->which == tip_prev->which)
6177 {
6178 tip_prev->numsap++;
6179 MemFree(tiparray[i]);
6180 } else
6181 {
6182 tip_prev->next = tiparray[i];
6183 tiparray[i]->numsap = 1;
6184 tip_prev = tiparray[i];
6185 }
6186 }
6187 tip_prev->next = NULL;
6188 MemFree(tiparray);
6189 open = 0;
6190 ams_mhead = NULL;
6191 tip = tip_head;
6192 while (tip != NULL)
6193 {
6194 if (open > 0 && ((tip->which==1 && start <=tip->start-1)||(tip->which==2 && start <=tip->start)))
6195 {
6196 ams_master = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6197 ams_master->sstart = start;
6198 if (tip->which == 1)
6199 ams_master->sstop = tip->start-1;
6200 else
6201 ams_master->sstop = tip->start;
6202 if (ams_mhead != NULL)
6203 {
6204 ams_mprev->next = ams_master;
6205 ams_mprev = ams_master;
6206 } else
6207 ams_mhead = ams_mprev = ams_master;
6208 }
6209 if (tip->which == 1)
6210 {
6211 start = tip->start;
6212 open += tip->numsap;
6213 } else
6214 {
6215 start = tip->start+1;
6216 open -= tip->numsap;
6217 }
6218 tip = tip->next;
6219 }
6220 while (tip_head != NULL)
6221 {
6222 tip = tip_head->next;
6223 MemFree(tip_head);
6224 tip_head = tip;
6225 }
6226 ams = ams_head;
6227 while (ams)
6228 {
6229 ams_master = ams_mhead;
6230 found = FALSE;
6231 while (ams_master != NULL && !found)
6232 {
6233 if (ams->sstart >= ams_master->sstart && ams->sstart <= ams_master->sstop)
6234 {
6235 found = TRUE;
6236 ams->sstop = ams_master->sstop;
6237 if (AlnMgrTruncateSAP(ams->sap, ams_master->sstart, ams_master->sstop, ams->n))
6238 {
6239 ams_new = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6240 ams_new->sap = ams->sap->next;
6241 ams->sap->next = NULL;
6242 AlnMgrGetNthSeqRangeInSA(ams_new->sap, ams->n, &ams_new->sstart, &ams_new->sstop);
6243 ams_new->next = ams->next;
6244 ams->next = ams_new;
6245 ams_new->n = ams->n;
6246 ams_new->count = ams->count;
6247 }
6248 ams_master->sap = ams->sap;
6249 ams_master->n = ams->n;
6250 }
6251 ams_master = ams_master->next;
6252 }
6253 ams = ams->next;
6254 }
6255 return ams_mhead;
6256 }
6257
6258 static AMmsmsPtr AlnMgrTruncateByOverlap(SeqAlignPtr sap, AMmsmsPtr ams_head)
6259 {
6260 AMmsmsPtr ams;
6261 AMmsmsPtr ams_master;
6262 AMmsmsPtr ams_mhead;
6263 AMmsmsPtr ams_mprev;
6264 AMmsmsPtr ams_new;
6265 AMmsmsPtr ams_prev;
6266 Boolean found;
6267 Int4 i;
6268 Int4 n;
6269 SeqAlignPtr salp_prev;
6270 AMTinyInfoPtr tip;
6271 AMTinyInfoPtr PNTR tiparray;
6272 AMTinyInfoPtr tip_head;
6273 AMTinyInfoPtr tip_prev;
6274
6275 if (sap == NULL || ams_head == NULL)
6276 return NULL;
6277 ams_mhead = ams_mprev = NULL;
6278 n = 0;
6279 ams = ams_head;
6280 tip_head = NULL;
6281 while (ams != NULL) /* create linked list of starts and stops */
6282 {
6283 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6284 tip->start = ams->sstart;
6285 tip->which = 1;
6286 if (tip_head != NULL)
6287 {
6288 tip_prev->next = tip;
6289 tip_prev = tip;
6290 } else
6291 tip_head = tip_prev = tip;
6292 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
6293 tip->start = ams->sstop;
6294 tip->which = 2;
6295 tip_prev->next = tip;
6296 tip_prev = tip;
6297 n+=2;
6298 ams = ams->next;
6299 }
6300 /* sort the list */
6301 tiparray = (AMTinyInfoPtr PNTR)MemNew(n*sizeof(AMTinyInfoPtr));
6302 tip = tip_head;
6303 for (i=0; i<n; i++)
6304 {
6305 if (tip == NULL)
6306 return FALSE;
6307 tiparray[i] = tip;
6308 tip = tip->next;
6309 }
6310 HeapSort((Pointer)tiparray, (size_t)(n), sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
6311 tip_head = tiparray[0];
6312 for (i=0; i<n-1; i++)
6313 {
6314 tiparray[i]->next = tiparray[i+1];
6315 tiparray[i+1]->next = NULL;
6316 }
6317 /* now look for start-stop pairs -- these are the blocks */
6318 tip = tip_head;
6319 while (tip->next != NULL)
6320 {
6321 if (tip->which == 1 && tip->next->which == 2)
6322 {
6323 ams_master = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6324 ams_master->sstart = tip->start;
6325 ams_master->sstop = tip->next->start;
6326 if (ams_mhead != NULL)
6327 {
6328 ams_mprev->next = ams_master;
6329 ams_mprev = ams_master;
6330 } else
6331 ams_mhead = ams_mprev = ams_master;
6332 }
6333 tip = tip->next;
6334 }
6335 for (i=0; i<n; i++)
6336 {
6337 MemFree(tiparray[i]);
6338 }
6339 MemFree(tiparray);
6340 /* now compare the ams with the master blocks and truncate as needed */
6341 ams = ams_head;
6342 ams_prev = NULL;
6343 while (ams)
6344 {
6345 ams_master = ams_mhead;
6346 found = FALSE;
6347 while (ams_master && !found)
6348 {
6349 if (ams->sstart <= ams_master->sstart && ams->sstop >= ams_master->sstart)
6350 {
6351 found = TRUE;
6352 ams->sstart = ams_master->sstart;
6353 ams->sstop = ams_master->sstop;
6354 if (AlnMgrTruncateSAP(ams->sap, ams_master->sstart, ams_master->sstop, ams->n))
6355 {
6356 ams_new = (AMmsmsPtr)MemNew(sizeof(AMmsms));
6357 ams_new->sap = ams->sap->next;
6358 AlnMgrGetNthSeqRangeInSA(ams_new->sap, ams->n, &ams_new->sstart, &ams_new->sstop);
6359 ams_new->next = ams->next;
6360 ams_new->n = ams->n;
6361 ams_new->count = ams->count;
6362 ams->next = ams_new;
6363 }
6364 ams_master->sap = ams->sap;
6365 ams_master->masternum = ams->n;
6366 ams_prev = ams;
6367 ams = ams->next;
6368 } else if (ams->sstart <= ams_master->sstart && ams->sstop < ams_master->sstart)
6369 { /*this one isn't going to get used -- free it */
6370 found = TRUE;
6371 if (ams_prev != NULL)
6372 {
6373 salp_prev = ams_prev->sap;
6374 salp_prev->next = ams->sap->next;
6375 ams_prev->next = ams->next;
6376 ams->next = NULL;
6377 ams->sap->next = NULL;
6378 SeqAlignFree(ams->sap);
6379 MemFree(ams);
6380 ams = ams_prev->next;
6381 } else
6382 {
6383 ams->sap->next = NULL;
6384 SeqAlignFree(ams->sap);
6385 ams_prev = ams;
6386 ams = ams->next;
6387 MemFree(ams_prev);
6388 ams_prev = NULL;
6389 }
6390 }
6391 ams_master = ams_master->next;
6392 }
6393 if (!found)
6394 {
6395 found = TRUE;
6396 if (ams_prev != NULL)
6397 {
6398 salp_prev = ams_prev->sap;
6399 salp_prev->next = ams->sap->next;
6400 ams_prev->next = ams->next;
6401 ams->next = NULL;
6402 ams->sap->next = NULL;
6403 SeqAlignFree(ams->sap);
6404 MemFree(ams);
6405 ams = ams_prev->next;
6406 } else
6407 {
6408 ams->sap->next = NULL;
6409 SeqAlignFree(ams->sap);
6410 ams_prev = ams;
6411 ams = ams->next;
6412 MemFree(ams_prev);
6413 ams_prev = NULL;
6414 }
6415 }
6416 }
6417 return ams_mhead;
6418 }
6419
6420 /***************************************************************************
6421 *
6422 * AlnMgrTruncateSAP truncates a given seqalign to contain only the
6423 * bioseq coordinates from start to stop on the indicated row. Anything
6424 * before those coordinates is discarded; anything remaining afterwards
6425 * is made into another seqalign and put in sap->next (the original next,
6426 * if any, is now at sap->next->next). Doesn't work on parent seqaligns.
6427 * The function returns TRUE if the orignal alignment extended past stop.
6428 *
6429 ***************************************************************************/
6430 NLM_EXTERN Boolean AlnMgrTruncateSAP(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row)
6431 {
6432 DenseDiagPtr ddp;
6433 DenseDiagPtr ddp2;
6434 DenseSegPtr dsp;
6435 Int4 from;
6436 Int4 i;
6437 Int4 mstart;
6438 Int4 mstop;
6439 SeqAlignPtr sap1;
6440 SeqAlignPtr sap2;
6441 Int4 tmp;
6442 Int4 to;
6443
6444 if (sap == NULL || stop<start || row < 1)
6445 return FALSE;
6446 if (sap->segtype == SAS_DENSEG)
6447 {
6448 if (sap->saip == NULL)
6449 AlnMgrIndexSingleChildSeqAlign(sap);
6450 AlnMgrGetNthSeqRangeInSA(sap, row, &mstart, &mstop);
6451 if (mstart > start || mstop < stop)
6452 return FALSE;
6453 if (mstart == start)
6454 {
6455 if (mstop == stop)
6456 return FALSE;
6457 else if (mstop > stop)
6458 {
6459 from = AlnMgrMapBioseqToSeqAlign(sap, start, row, NULL);
6460 to = AlnMgrMapBioseqToSeqAlign(sap, stop, row, NULL);
6461 if (to < from)
6462 {
6463 tmp = to;
6464 to = from;
6465 from = tmp;
6466 }
6467 sap1 = AlnMgrGetSubAlign(sap, NULL, from, to);
6468 AlnMgrIndexSingleChildSeqAlign(sap1);
6469 from = AlnMgrMapBioseqToSeqAlign(sap, stop+1, row, NULL);
6470 if (from < 0)
6471 return FALSE;
6472 to = AlnMgrMapBioseqToSeqAlign(sap, mstop, row, NULL);
6473 if (to < from)
6474 {
6475 tmp = to;
6476 to = from;
6477 from = tmp;
6478 }
6479 sap2 = AlnMgrGetSubAlign(sap, NULL, from, to);
6480 sap2->next = sap->next;
6481 sap->next = sap2;
6482 dsp = sap->segs;
6483 sap->segs = (Pointer)(sap1->segs);
6484 sap1->segs = NULL;
6485 DenseSegFree(dsp);
6486 SeqAlignFree(sap1);
6487 AlnMgrIndexSingleChildSeqAlign(sap);
6488 AlnMgrIndexSingleChildSeqAlign(sap2);
6489 return TRUE;
6490 }
6491 } else if (mstart < start) /* throw away the first part */
6492 {
6493 from = AlnMgrMapBioseqToSeqAlign(sap, start, row, NULL);
6494 to = AlnMgrMapBioseqToSeqAlign(sap, stop, row, NULL);
6495 if (to < from)
6496 {
6497 tmp = to;
6498 to = from;
6499 from = tmp;
6500 }
6501 sap1 = AlnMgrGetSubAlign(sap, NULL, from, to);
6502 if (mstop == stop) /* done */
6503 {
6504 dsp = sap->segs;
6505 sap->segs = (Pointer)(sap1->segs);
6506 sap1->segs = NULL;
6507 DenseSegFree(dsp);
6508 SeqAlignFree(sap1);
6509 AlnMgrIndexSingleChildSeqAlign(sap);
6510 return TRUE;
6511 } else if (mstop > stop)
6512 {
6513 from = AlnMgrMapBioseqToSeqAlign(sap, stop+1, row, NULL);
6514 if (from < 0)
6515 return FALSE;
6516 to = AlnMgrMapBioseqToSeqAlign(sap, mstop, row, NULL);
6517 if (to < from)
6518 {
6519 tmp = to;
6520 to = from;
6521 from = tmp;
6522 }
6523 sap2 = AlnMgrGetSubAlign(sap, NULL, from, to);
6524 sap2->next = sap->next;
6525 sap->next = sap2;
6526 AlnMgrIndexSingleChildSeqAlign(sap2);
6527 dsp = sap->segs;
6528 sap->segs = (Pointer)(sap1->segs);
6529 sap1->segs = NULL;
6530 DenseSegFree(dsp);
6531 SeqAlignFree(sap1);
6532 AlnMgrIndexSingleChildSeqAlign(sap);
6533 return TRUE;
6534 }
6535 }
6536 } else if (sap->segtype == SAS_DENDIAG)
6537 {
6538 ddp = (DenseDiagPtr)(sap->segs);
6539 if (ddp->dim < row)
6540 return FALSE;
6541 mstart = ddp->starts[row-1];
6542 mstop = mstart + ddp->len - 1;
6543 if (mstart > start || mstop < stop)
6544 return FALSE;
6545 if (mstart == start)
6546 {
6547 if (mstop == stop)
6548 return FALSE;
6549 else if (mstop > stop)
6550 {
6551 ddp2 = DenseDiagNew();
6552 ddp2->dim = ddp->dim;
6553 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
6554 ddp2->id = SeqIdDupList(ddp->id);
6555 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
6556 ddp2->scores = ScoreDup(ddp->scores);
6557 for (i=0; i<ddp->dim; i++)
6558 {
6559 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
6560 ddp2->strands[i] = ddp->strands[i];
6561 }
6562 ddp2->len = mstop - stop;
6563 ddp->len = ddp->len - (mstop - stop);
6564 sap2 = SeqAlignNew();
6565 sap2->type = SAT_PARTIAL;
6566 sap2->segtype = SAS_DENSEG;
6567 sap2->segs = (Pointer)ddp2;
6568 sap2->next = sap->next;
6569 sap->next = sap2;
6570 AlnMgrIndexSingleChildSeqAlign(sap2);
6571 return TRUE;
6572 }
6573 } else if (mstart < start)
6574 {
6575 for (i=0; i<ddp->dim; i++)
6576 {
6577 ddp->starts[i] = ddp->starts[i] + start - mstart;
6578 }
6579 ddp->len = ddp->len - (start - mstart);
6580 AlnMgrIndexSingleChildSeqAlign(sap);
6581 if (mstop == stop)
6582 return FALSE;
6583 else if (mstop > stop)
6584 {
6585 ddp2 = DenseDiagNew();
6586 ddp2->dim = ddp->dim;
6587 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
6588 ddp2->id = SeqIdDupList(ddp->id);
6589 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
6590 ddp2->scores = ScoreDup(ddp->scores);
6591 for (i=0; i<ddp->dim; i++)
6592 {
6593 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
6594 ddp2->strands[i] = ddp->strands[i];
6595 }
6596 ddp2->len = mstop - stop;
6597 ddp->len = ddp->len - (mstop - stop);
6598 sap2 = SeqAlignNew();
6599 sap2->type = SAT_PARTIAL;
6600 sap2->segtype = SAS_DENSEG;
6601 sap2->segs = (Pointer)ddp2;
6602 sap2->next = sap->next;
6603 sap->next = sap2;
6604 AlnMgrIndexSingleChildSeqAlign(sap2);
6605 return TRUE;
6606 }
6607 }
6608 } else
6609 return FALSE;
6610 return FALSE;
6611 }
6612
6613 static void am_compare_alignids(AMmsmsPtr ams_prev, AMmsmsPtr ams)
6614 {
6615 DenseSegPtr dsp1;
6616 DenseSegPtr dsp2;
6617 Boolean found;
6618 SeqIdPtr sip;
6619 SeqIdPtr sip1;
6620 SeqIdPtr sip2;
6621
6622 if (ams_prev == NULL || ams == NULL)
6623 return;
6624 dsp1 = (DenseSegPtr)(ams_prev->sap->segs);
6625 dsp2 = (DenseSegPtr)(ams->sap->segs);
6626 sip1 = dsp1->ids;
6627 sip2 = dsp2->ids;
6628 while (sip1 != NULL)
6629 {
6630 sip = sip2;
6631 found = FALSE;
6632 while (sip != NULL && !found)
6633 {
6634 if (SeqIdComp(sip1, sip) == SIC_YES)
6635 found = TRUE;
6636 sip = sip->next;
6637 }
6638 if (!found)
6639 {
6640 ams->stop = 1;
6641 return;
6642 }
6643 sip1 = sip1->next;
6644 }
6645 return;
6646 }
6647
6648 static Boolean AlnMgrJaggedIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Int4 numrows)
6649 {
6650 AMAlignIndexPtr amaip;
6651 AMmsmsPtr ams;
6652 AMmsmsPtr ams_master;
6653 AMmsmsPtr ams_prev;
6654 Boolean found;
6655 Int4 i;
6656 Int4 j;
6657 Int4 n;
6658 Int4 numblocks;
6659 RowSourcePtr rsp;
6660 RowSourcePtr rsp_curr_head;
6661 RowSourcePtr rsp_head;
6662 RowSourcePtr rsp_prev;
6663 SeqAlignPtr PNTR saparray;
6664 Boolean usethis;
6665
6666 if (sap == NULL || ams_head == NULL || ams_mhead == NULL)
6667 return FALSE;
6668 if (!AlnMgrIndexParentSA(sap))
6669 return FALSE;
6670 AlnMgrSetMaster(sap, sap->master);
6671 ams_master = *ams_mhead;
6672 numblocks = 0;
6673 while (ams_master != NULL)
6674 {
6675 numblocks++;
6676 ams_master->count = numblocks;
6677 ams_master = ams_master->next;
6678 }
6679 ams = ams_head;
6680 j = 0;
6681 while (ams != NULL)
6682 {
6683 ams->count = j;
6684 ams = ams->next;
6685 j++;
6686 }
6687 saparray = (SeqAlignPtr PNTR)MemNew(j*sizeof(SeqAlignPtr));
6688 ams = ams_head;
6689 j = 0;
6690 while (ams != NULL)
6691 {
6692 saparray[j] = ams->sap;
6693 ams = ams->next;
6694 j++;
6695 }
6696 ams_master = *ams_mhead;
6697 rsp_head = RowSourceNew();
6698 rsp_head->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6699 rsp_head->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6700 rsp_head->id = SeqIdDup(sap->master);
6701 rsp_head->strand = Seq_strand_plus;
6702 rsp_head->numsaps = 0;
6703 while (ams_master != NULL)
6704 {
6705 found = FALSE;
6706 for (i=0; i<j && !found; i++)
6707 {
6708 if (saparray[i] == ams_master->sap)
6709 found = TRUE;
6710 }
6711 if (!found)
6712 return FALSE;
6713 rsp_head->which_saps[rsp_head->numsaps] = i;
6714 rsp_head->num_in_sap[rsp_head->numsaps] = ams_master->n;
6715 rsp_head->numsaps++;
6716 ams_master = ams_master->next;
6717 }
6718 rsp_prev = rsp_head;
6719 ams = ams_head;
6720 amaip = (AMAlignIndexPtr)(sap->saip);
6721 amaip->numsaps = amaip->alnsaps = j;
6722 amaip->saps = saparray;
6723 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
6724 amaip->numrows = 1;
6725 sap->type = SAT_MASTERSLAVE;
6726 ams_prev = ams_head = NULL;
6727 while (ams)
6728 {
6729 ams_master = *ams_mhead;
6730 n = AlnMgrGetNumRows(ams->sap);
6731 usethis = FALSE;
6732 while (!usethis && ams_master != NULL)
6733 {
6734 if (ams->sstart == ams_master->sstart && ams->sstop == ams_master->sstop)
6735 usethis = TRUE;
6736 else
6737 ams_master = ams_master->next;
6738 }
6739 if (usethis)
6740 {
6741 if (ams_prev != NULL && ams->sstart > ams_prev->sstart && ams->stop != 1)
6742 {
6743 rsp = rsp_curr_head;
6744 for (i=0; i<n; i++)
6745 {
6746 if (i+1 != ams->n)
6747 {
6748 if (rsp == NULL)
6749 return FALSE;
6750 rsp->which_saps[ams_master->count-1] = ams->count+1;
6751 rsp->num_in_sap[ams_master->count-1] = i+1;
6752 rsp = rsp->next;
6753 }
6754 }
6755 } else /* new row */
6756 {
6757 found = FALSE;
6758 for (i=0; i<n; i++)
6759 {
6760 if (i+1 != ams->n)
6761 {
6762 rsp = RowSourceNew();
6763 amaip->numrows++;
6764 rsp->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6765 rsp->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6766 rsp->id = AlnMgrGetNthSeqIdPtr(ams->sap, i+1);
6767 rsp->strand = AlnMgrGetNthStrand(ams->sap, i+1);
6768 rsp->which_saps[ams_master->count-1] = ams->count + 1;
6769 rsp->num_in_sap[ams_master->count-1] = i+1;
6770 rsp->numsaps = numblocks;
6771 rsp_prev->next = rsp;
6772 rsp_prev = rsp;
6773 if (!found)
6774 {
6775 rsp_curr_head = rsp;
6776 found = TRUE;
6777 }
6778 }
6779 }
6780 }
6781 ams_prev = ams;
6782 }
6783 ams = ams->next;
6784 }
6785 amaip->master = 1;
6786 amaip->rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
6787 rsp = rsp_head;
6788 for (i=0; i<amaip->numrows; i++)
6789 {
6790 if (rsp == NULL)
6791 return FALSE;
6792 amaip->rowsource[i] = rsp;
6793 rsp = rsp->next;
6794 }
6795 rsp = rsp_head;
6796 amaip->numseg = rsp->numsaps;
6797 amaip->lens = (Int4Ptr)MemNew((amaip->numseg)*sizeof(Int4));
6798 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg+1)*sizeof(Uint4));
6799 amaip->aligncoords[0] = 0;
6800 for (i=0; i<amaip->numseg; i++)
6801 {
6802 sap = amaip->saps[rsp->which_saps[i]-1];
6803 amaip->lens[i] = AlnMgrGetAlnLength(sap, FALSE);
6804 amaip->aligncoords[i+1] = amaip->aligncoords[i] + amaip->lens[i];
6805 }
6806 amaip->parent = sap;
6807 return TRUE;
6808 }
6809
6810 static Boolean AlnMgrCarefulIndex(SeqAlignPtr sap, AMmsmsPtr ams_head, AMmsmsPtr *ams_mhead, Boolean allinblock, Int4 numrows)
6811 {
6812 AMAlignIndexPtr amaip;
6813 AMmsmsPtr ams;
6814 AMmsmsPtr ams_master;
6815 AMmsmsPtr ams_prev;
6816 AMmsmsPtr currmaster;
6817 Boolean found;
6818 Int4 i;
6819 Int4 j;
6820 Int4 n;
6821 Int4 numblocks;
6822 RowSourcePtr rsp;
6823 RowSourcePtr rsp_curr_head;
6824 RowSourcePtr rsp_head;
6825 RowSourcePtr rsp_prev;
6826 SeqAlignPtr PNTR saparray;
6827 Boolean usethis;
6828
6829 if (sap == NULL || ams_head == NULL || ams_mhead == NULL)
6830 return FALSE;
6831 if (!AlnMgrIndexParentSA(sap))
6832 return FALSE;
6833 AlnMgrSetMaster(sap, sap->master);
6834 if (allinblock)
6835 am_trim_master(ams_mhead, ams_head, numrows);
6836 ams_master = *ams_mhead;
6837 if (ams_master == NULL) /* these alignments do not overlap at all */
6838 {
6839 return (am_make_null_alignment(sap));
6840 }
6841 numblocks = 0;
6842 while (ams_master != NULL)
6843 {
6844 numblocks++;
6845 ams_master->count = numblocks;
6846 ams_master = ams_master->next;
6847 }
6848 ams = ams_head;
6849 j = 0;
6850 while (ams)
6851 {
6852 j++;
6853 ams = ams->next;
6854 }
6855 saparray = (SeqAlignPtr PNTR)MemNew(j*sizeof(SeqAlignPtr));
6856 ams = ams_head;
6857 j = 0;
6858 currmaster = *ams_mhead;
6859 rsp_head = RowSourceNew();
6860 rsp_head->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6861 rsp_head->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6862 rsp_head->id = SeqIdDup(sap->master);
6863 rsp_head->strand = Seq_strand_plus;
6864 rsp_head->numsaps = 0;
6865 while (ams)
6866 {
6867 if (currmaster && currmaster->sstart < ams->sstart)
6868 {
6869 while (currmaster && currmaster->sstart < ams->sstart)
6870 {
6871 currmaster = currmaster->next;
6872 }
6873 if (currmaster == NULL)
6874 currmaster = *ams_mhead;
6875 } else if (currmaster && currmaster->sstart > ams->sstart)
6876 currmaster = *ams_mhead;
6877 if (currmaster && currmaster->sstart == ams->sstart && currmaster->sstop == ams->sstop && rsp_head->numsaps < numblocks && rsp_head->which_saps[currmaster->count-1] == 0)
6878 {
6879 rsp_head->which_saps[currmaster->count-1] = j+1;
6880 rsp_head->num_in_sap[currmaster->count-1] = ams->n;
6881 rsp_head->numsaps++;
6882 if (rsp_head->numsaps < numblocks)
6883 currmaster = *ams_mhead;
6884 }
6885 saparray[j] = ams->sap;
6886 ams->count = j;
6887 j++;
6888 ams_prev = ams;
6889 ams = ams->next;
6890 if (!allinblock)
6891 am_compare_alignids(ams_prev, ams);
6892 }
6893 rsp_prev = rsp_head;
6894 ams = ams_head;
6895 amaip = (AMAlignIndexPtr)(sap->saip);
6896 amaip->numsaps = amaip->alnsaps = j;
6897 amaip->saps = saparray;
6898 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
6899 amaip->numrows = 1;
6900 sap->type = SAT_MASTERSLAVE;
6901 ams_prev = ams_head = NULL;
6902 while (ams)
6903 {
6904 ams_master = *ams_mhead;
6905 n = AlnMgrGetNumRows(ams->sap);
6906 usethis = FALSE;
6907 while (!usethis && ams_master != NULL)
6908 {
6909 if (ams->sstart == ams_master->sstart && ams->sstop == ams_master->sstop)
6910 usethis = TRUE;
6911 else
6912 ams_master = ams_master->next;
6913 }
6914 if (usethis)
6915 {
6916 if (ams_prev != NULL && ams->sstart > ams_prev->sstart && ams->stop != 1)
6917 {
6918 rsp = rsp_curr_head;
6919 for (i=0; i<n; i++)
6920 {
6921 if (i+1 != ams->n)
6922 {
6923 if (rsp == NULL)
6924 return FALSE;
6925 rsp->which_saps[ams_master->count-1] = ams->count+1;
6926 rsp->num_in_sap[ams_master->count-1] = i+1;
6927 rsp = rsp->next;
6928 }
6929 }
6930 } else /* new row */
6931 {
6932 found = FALSE;
6933 for (i=0; i<n; i++)
6934 {
6935 if (i+1 != ams->n)
6936 {
6937 rsp = RowSourceNew();
6938 amaip->numrows++;
6939 rsp->which_saps = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6940 rsp->num_in_sap = (Uint4Ptr)MemNew(numblocks*sizeof(Uint4));
6941 rsp->id = AlnMgrGetNthSeqIdPtr(ams->sap, i+1);
6942 rsp->strand = AlnMgrGetNthStrand(ams->sap, i+1);
6943 rsp->which_saps[ams_master->count-1] = ams->count + 1;
6944 rsp->num_in_sap[ams_master->count-1] = i+1;
6945 rsp->numsaps = numblocks;
6946 rsp_prev->next = rsp;
6947 rsp_prev = rsp;
6948 if (!found)
6949 {
6950 rsp_curr_head = rsp;
6951 found = TRUE;
6952 }
6953 }
6954 }
6955 }
6956 ams_prev = ams;
6957 }
6958 ams = ams->next;
6959 }
6960 amaip->master = 1;
6961 amaip->rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
6962 rsp = rsp_head;
6963 for (i=0; i<amaip->numrows; i++)
6964 {
6965 if (rsp == NULL)
6966 return FALSE;
6967 amaip->rowsource[i] = rsp;
6968 rsp = rsp->next;
6969 }
6970 rsp = rsp_head;
6971 amaip->numseg = rsp->numsaps;
6972 amaip->lens = (Int4Ptr)MemNew((amaip->numseg)*sizeof(Int4));
6973 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg+1)*sizeof(Uint4));
6974 amaip->aligncoords[0] = 0;
6975 for (i=0; i<amaip->numseg; i++)
6976 {
6977 sap = amaip->saps[rsp->which_saps[i]-1];
6978 amaip->lens[i] = AlnMgrGetAlnLength(sap, FALSE);
6979 amaip->aligncoords[i+1] = amaip->aligncoords[i] + amaip->lens[i];
6980 }
6981 amaip->parent = sap;
6982 return TRUE;
6983 }
6984
6985 static Boolean am_make_null_alignment(SeqAlignPtr sap)
6986 {
6987 AMAlignIndexPtr amaip;
6988 BioseqPtr bsp;
6989 Int4 count;
6990 SeqAlignPtr salp;
6991 SeqAlignPtr salp_tmp;
6992 SeqIdPtr sip;
6993
6994 amaip = (AMAlignIndexPtr)(sap->saip);
6995 amaip->numsaps = amaip->alnsaps = 0;
6996 amaip->mstype = AM_NULL;
6997 amaip->ids = AlnMgrPropagateUpSeqIdPtrs(sap, &(count));
6998 if (count <= 0)
6999 return FALSE;
7000 else
7001 amaip->numbsqs = count;
7002 amaip->numrows = 0;
7003 amaip->numseg = 0;
7004 sap->type = SAT_MASTERSLAVE;
7005 sap->dim = amaip->numrows;
7006 salp = (SeqAlignPtr)(sap->segs);
7007 sap->segs = NULL;
7008 while (salp != NULL)
7009 {
7010 salp_tmp = salp->next;
7011 salp->next = NULL;
7012 SeqAlignFree(salp);
7013 salp = salp_tmp;
7014 }
7015 amaip->ulens = (Int4Ptr)MemNew(sizeof(Int4));
7016 amaip->ulens[0] = 0;
7017 sip = amaip->ids;
7018 while (sip != NULL)
7019 {
7020 bsp = BioseqLockById(sip);
7021 if (bsp->length > amaip->ulens[0])
7022 amaip->ulens[0] = bsp->length;
7023 BioseqUnlock(bsp);
7024 sip = sip->next;
7025 }
7026 amaip->parent = sap;
7027 return TRUE;
7028 }
7029
7030 static void am_trim_master(AMmsmsPtr PNTR ams_mhead, AMmsmsPtr ams_head, Int4 numrows)
7031 {
7032 AMmsmsPtr ams;
7033 AMmsmsPtr ams_master;
7034 AMmsmsPtr ams_prev;
7035 Int4 n;
7036
7037 if (ams_mhead == NULL || ams_head == NULL || numrows < 1)
7038 return;
7039 ams_master = *ams_mhead;
7040 ams_prev = NULL;
7041 while (ams_master)
7042 {
7043 ams = ams_head;
7044 n = 1;
7045 while (ams)
7046 {
7047 if (ams_master->sstart == ams->sstart && ams_master->sstop == ams->sstop)
7048 n+=ams->count;
7049 ams = ams->next;
7050 }
7051 if (n < numrows)
7052 {
7053 if (ams_prev != NULL)
7054 {
7055 ams_prev->next = ams_master->next;
7056 ams_master->next = NULL;
7057 ams_master->sap = NULL;
7058 ams_master->sip = NULL;
7059 MemFree(ams_master);
7060 ams_master = ams_prev->next;
7061 } else
7062 {
7063 *ams_mhead = ams_master->next;
7064 ams_master->next = NULL;
7065 ams_master->sap = NULL;
7066 ams_master->sip = NULL;
7067 MemFree(ams_master);
7068 ams_master = *ams_mhead;
7069 }
7070 } else
7071 {
7072 ams_prev = ams_master;
7073 ams_master = ams_master->next;
7074 }
7075 }
7076 return;
7077 }
7078
7079
7080 NLM_EXTERN void AlnMgrMakeAlignCoords(SeqAlignPtr sap)
7081 {
7082 AMAlignIndexPtr amaip;
7083 Int4 i;
7084 Int4 j;
7085
7086 i = AlnMgrCheckAlignForParent(sap);
7087 if (i < 0 || i==AM_CHILD)
7088 return;
7089 amaip = (AMAlignIndexPtr)(sap->saip);
7090 if (!amaip->saps)
7091 return;
7092 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7093 amaip->aligncoords[0] = 0;
7094 for (j=0; j<((amaip->alnsaps)-1); j++)
7095 {
7096 amaip->aligncoords[j+1] = AlnMgrGetAlnLength(amaip->saps[j], FALSE) + amaip->aligncoords[j];
7097 }
7098 return;
7099 }
7100
7101 NLM_EXTERN Boolean AlnMgrMergeIntoMSMultByMaster(AMAlignIndexPtr amaip, Int4Ptr lens, Uint4Ptr numseg)
7102 {
7103 Uint4 count;
7104 DenseSegPtr dsp;
7105 Int4 gap;
7106 Int4 i;
7107 Int4 j;
7108 Int4 n;
7109 Boolean retval;
7110 SAIndexPtr saip;
7111 AMTinyInfoPtr tip;
7112 AMTinyInfoPtr PNTR tiparray;
7113
7114 retval = FALSE;
7115 if (numseg == NULL)
7116 return retval;
7117 tiparray = (AMTinyInfoPtr PNTR)MemNew((*numseg+1)*sizeof(AMTinyInfoPtr));
7118 j = 0;
7119 count = 0;
7120 for (i=0; i<(amaip->alnsaps); i++)
7121 {
7122 dsp = (DenseSegPtr)(amaip->saps[i]->segs);
7123 saip = (SAIndexPtr)amaip->saps[i]->saip;
7124 gap = 0;
7125 for (n=0; n<(dsp->numseg); n++)
7126 {
7127 if (dsp->starts[n*(dsp->dim)+saip->master-1] != -1)
7128 {
7129 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
7130 tip->start = dsp->starts[n*(dsp->dim)+saip->master-1];
7131 tip->which = i+1;
7132 tip->numgap = gap;
7133 tiparray[j] = tip;
7134 j++;
7135 count++;
7136 gap = 0;
7137 } else
7138 {
7139 gap++;
7140 }
7141 }
7142 tip = (AMTinyInfoPtr)MemNew(sizeof(AMTinyInfo));
7143 AlnMgrGetNthSeqRangeInSA(amaip->saps[i], saip->master, NULL, &tip->start);
7144 tip->start += 1;
7145 tip->which = i+1;
7146 tip->numgap = gap;
7147 tiparray[j] = tip;
7148 j++;
7149 count++;
7150 }
7151 *numseg = count;
7152 HeapSort((Pointer)tiparray, (size_t)(*numseg), sizeof(AMTinyInfoPtr), AlnMgrCompareTips);
7153 *numseg = count-1;
7154 count = 0;
7155 for (i=0; i<=(*numseg); i++)
7156 {
7157 if (count!=0 && (tiparray[i]->start == lens[count-1]))
7158 count--;
7159 for (j=1; j<=(tiparray[i]->numgap); j++)
7160 {
7161 lens[count] = -(tiparray[i]->which);
7162 count++;
7163 }
7164 lens[count] = tiparray[i]->start;
7165 count++;
7166 }
7167 for (i=0; i<=(*numseg); i++)
7168 {
7169 MemFree(tiparray[i]);
7170 }
7171 MemFree(tiparray);
7172 *numseg = count;
7173 return TRUE;
7174 }
7175
7176 NLM_EXTERN Boolean AlnMgrMergeSegments(Int4Ptr lens, SeqAlignPtr sap, SeqIdPtr master, Int4Ptr where, Int4 which)
7177 {
7178 DenseSegPtr dsp;
7179 Boolean found;
7180 Int4 i;
7181 Int4 j;
7182 Int4 n;
7183 Int4 num;
7184 Int4 r;
7185 Boolean retval;
7186 Int4 s;
7187 SAIndexPtr saip;
7188 Int4Ptr tmp;
7189 Int4 z;
7190
7191 retval = FALSE;
7192 if (!sap || !master || !lens)
7193 return retval;
7194 if (!where)
7195 return retval;
7196 n = AlnMgrGetNForSip(sap, master);
7197 if (n<0)
7198 return retval;
7199 if (sap->segtype == SAS_DENSEG)
7200 {
7201 dsp = (DenseSegPtr)(sap->segs);
7202 if (!dsp)
7203 return retval;
7204 saip = (SAIndexPtr)(sap->saip);
7205 if (!saip)
7206 return retval;
7207 } else
7208 {
7209 return retval;
7210 }
7211 if (*where == 0)
7212 {
7213 for(j=0; j<(dsp->numseg); j++)
7214 {
7215 if (dsp->starts[(j*(dsp->dim)) + n - 1] < 0)
7216 {
7217 s = -(which);
7218 } else
7219 {
7220 s = dsp->starts[(j*(dsp->dim)) + n - 1];
7221 }
7222 lens[*where] = s;
7223 *where = *where + 1;
7224 }
7225 AlnMgrGetNthSeqRangeInSA(sap, saip->master, NULL, &lens[dsp->numseg]);
7226 lens[dsp->numseg] += 1;
7227 *where = *where + 1;
7228 } else
7229 {
7230 tmp = (Int4Ptr)MemNew((dsp->numseg+1)*sizeof(Int4));
7231 for(j=0; j<(dsp->numseg); j++)
7232 {
7233 if (dsp->starts[(j*(dsp->dim)) + n - 1] < 0)
7234 {
7235 s = -(which);
7236 } else
7237 {
7238 s = dsp->starts[(j*(dsp->dim)) + n - 1];
7239 }
7240 tmp[j] = s;
7241 }
7242 AlnMgrGetNthSeqRangeInSA(sap, saip->master, NULL, &tmp[dsp->numseg]);
7243 tmp[dsp->numseg] += 1;
7244 s = 0;
7245 for (j=0; j<=(dsp->numseg); j++)
7246 {
7247 num = 0;
7248 while (tmp[j] < 0 && num<(dsp->numseg))
7249 {
7250 num++;
7251 j++;
7252 }
7253 num++;
7254 found = FALSE;
7255 for (i=s; !found && i<*where; i++)
7256 {
7257 r = 0;
7258 if (lens[i] < 0)
7259 {
7260 } else if (tmp[j] < lens[i])
7261 {
7262 if (i>0)
7263 {
7264 while (((i-r-1)>=0) && (lens[i-r-1] < 0))
7265 {
7266 r++;
7267 }
7268 }
7269 s = i;
7270 for (z = *where-1; z >= i-r; z--)
7271 {
7272 lens[z+num] = lens[z];
7273 }
7274 for (z = num; z > 0; z--)
7275 {
7276 lens[i-r] = tmp[j-z+1];
7277 i++;
7278 }
7279 found = TRUE;
7280 *where = *where + num;
7281 } else if (tmp[j] == lens[i])
7282 {
7283 s = i;
7284 for (z = *where-1; z >= i; z--)
7285 {
7286 lens[z+num-1] = lens[z];
7287 }
7288 for (z = num-1; z > 0; z--)
7289 {
7290 lens[i] = tmp[j-z];
7291 i++;
7292 }
7293 found = TRUE;
7294 *where = *where + num - 1;
7295 }
7296 }
7297 if (!found)
7298 {
7299 s = *where;
7300 for (z = *where+num-1; z >= *where; z--)
7301 {
7302 lens[z+num] = lens[z];
7303 }
7304 for (z = num-1; z >= 0; z--)
7305 {
7306 lens[i] = tmp[j-z];
7307 i++;
7308 }
7309 found = TRUE;
7310 *where = *where + num;
7311 }
7312 }
7313 MemFree(tmp);
7314 }
7315 retval = TRUE;
7316 return retval;
7317 }
7318
7319
7320 NLM_EXTERN Boolean AlnMgrFillInStarts(SeqAlignPtr PNTR saparray, Int4Ptr starts, Int4 numseg, Int4Ptr lens, Int4 numsaps, Uint4Ptr aligncoords)
7321 {
7322 Int4Ptr alnlen;
7323 Boolean done;
7324 Int4 gap_pos;
7325 Int4 i;
7326 Int4 j;
7327 Int4 length;
7328 Boolean retval;
7329
7330 retval = FALSE;
7331 for (i=0; i<numsaps; i++)
7332 {
7333 gap_pos = 0;
7334 for (j=0; j<numseg; j++)
7335 {
7336 if(lens[j] >= 0)
7337 {
7338 starts[(numsaps*j)+i] = AlnMgrGetStartFromMaster(saparray[i], lens[j]);
7339 } else
7340 {
7341 if (lens[j] == -(i+1))
7342 {
7343 starts[(numsaps*j)+i] = AlnMgrGetMasterGapStartForSeg(saparray[i], gap_pos, &aligncoords[j]);
7344 gap_pos += 1;
7345 } else
7346 {
7347 starts[(numsaps*j)+i] = -1;
7348 }
7349 }
7350 }
7351 }
7352 if (!AlnMgrReconcileGaps(lens, aligncoords, numseg))
7353 return retval;
7354 alnlen = (Int4Ptr)MemNew(numsaps*sizeof(Int4));
7355 for (i=0; i<numsaps; i++)
7356 {
7357 alnlen[i] = AlnMgrGetAlnLength(saparray[i], FALSE);
7358 }
7359 for (i=0; i<numsaps; i++)
7360 {
7361 length = 0;
7362 done = FALSE;
7363 for (j=0; j<numseg; j++)
7364 {
7365 if (starts[(numsaps*j)+i] == -2)
7366 {
7367 if (length > 0)
7368 {
7369 if (lens[j]+length-1 < alnlen[i])
7370 {
7371 starts[(numsaps*j)+i] = length;
7372 length += lens[j];
7373 } else
7374 {
7375 done = TRUE;
7376 }
7377 }
7378 } else if (starts[(numsaps*j)+i] == -1)
7379 {
7380 if (length == 0)
7381 starts[(numsaps*j)+i] = -2;
7382 else if (done)
7383 starts[(numsaps*j)+i] = -2;
7384 } else
7385 {
7386 length = starts[(numsaps*j)+i] + lens[j];
7387 }
7388 }
7389 }
7390 j = 0;
7391 numseg -= 1;
7392 done = FALSE;
7393 if (numseg != 0)
7394 done = FALSE;
7395 else
7396 done = TRUE;
7397 for (i=(numsaps*(numseg-1)); (!done && i<(numsaps*numseg)); i++)
7398 {
7399 if (starts[i] >= 0)
7400 {
7401 done = TRUE;
7402 lens[numseg-1] = alnlen[j]-starts[i];
7403 }
7404 else
7405 j++;
7406 }
7407 MemFree(alnlen);
7408 retval = TRUE;
7409 return retval;
7410 }
7411
7412 NLM_EXTERN Int4 AlnMgrGetStartFromMaster(SeqAlignPtr sap, Int4 pos)
7413 {
7414 DenseSegPtr dsp;
7415 SAIndexPtr saip;
7416 Int4 start;
7417
7418 saip = (SAIndexPtr)(sap->saip);
7419 dsp = (DenseSegPtr)(sap->segs);
7420 start = binary_search_segment_array(saip->ssdp[saip->master-1], pos, dsp->dim, saip->master - 1, (DenseSegPtr)sap->segs);
7421 if (dsp->starts[(start*dsp->dim)+saip->master-1] != pos)
7422 {
7423 return -2;
7424 } else
7425 {
7426 return (saip->aligncoords[start]);
7427 }
7428 }
7429
7430 NLM_EXTERN Uint4 AlnMgrGetMasterGapStartForSeg(SeqAlignPtr sap, Int4 which_gap, Uint4Ptr aligncoord)
7431 {
7432 DenseSegPtr dsp;
7433 SAIndexPtr saip;
7434
7435 saip = (SAIndexPtr)(sap->saip);
7436 dsp = (DenseSegPtr)(sap->segs);
7437 if (which_gap >= saip->ssdp[saip->master-1]->numunsect)
7438 {
7439 if (aligncoord)
7440 *aligncoord = dsp->lens[dsp->numseg-1];
7441 return saip->aligncoords[dsp->numseg-1];
7442 }
7443 if (aligncoord)
7444 *aligncoord = dsp->lens[saip->ssdp[saip->master-1]->unsect[which_gap]];
7445 return saip->aligncoords[saip->ssdp[saip->master-1]->unsect[which_gap]];
7446 }
7447
7448
7449 NLM_EXTERN Boolean AlnMgrReconcileGaps(Int4Ptr lens, Uint4Ptr aligncoords, Int4 num)
7450 {
7451 Int4 i;
7452 Int4 j;
7453 Int4 r;
7454
7455 for (i=0; i<num; i++)
7456 {
7457 if (lens[i] < 0)
7458 {
7459 r = 1;
7460 while (lens[i+r] < 0)
7461 {
7462 r++;
7463 }
7464 lens[i] = lens[i+r];
7465 for (j=i+1; j<num; j++)
7466 {
7467 if (lens[j] >= 0)
7468 lens[j] = lens[j] + aligncoords[i];
7469 }
7470 }
7471 }
7472 for (i=0; i<num; i++)
7473 {
7474 aligncoords[i] = lens[i] - lens[0];
7475 }
7476 for (i=0; i<num-1; i++)
7477 {
7478 lens[i] = lens[i+1] - lens[i];
7479 }
7480 return TRUE;
7481 }
7482
7483 NLM_EXTERN Boolean AlnMgrMakeMultSegments(AMAlignIndexPtr amaip)
7484 {
7485 Int4 i;
7486 Int4 j;
7487 Uint2 n;
7488 Boolean retval;
7489 Uint2Ptr segments;
7490 Uint2Ptr tmp;
7491
7492 retval = FALSE;
7493 tmp = (Uint2Ptr)MemNew((amaip->numseg)*sizeof(Uint2));
7494 for (i=0; i<amaip->numsaps; i++)
7495 {
7496 n = 0;
7497 for (j=0; j<amaip->numseg; j++)
7498 {
7499 if (amaip->starts[((amaip->numsaps)*j)+i] >= 0)
7500 {
7501 tmp[n] = j;
7502 n++;
7503 }
7504 }
7505 segments = (Uint2Ptr)MemNew(n*sizeof(Uint2));
7506 for (j=0; j<n; j++)
7507 {
7508 segments[j] = tmp[j];
7509 }
7510 if (!amaip->amadp[i])
7511 return retval;
7512 amaip->amadp[i]->segments = segments;
7513 amaip->amadp[i]->numseg = n;
7514 amaip->amadp[i]->numseg = n;
7515 }
7516 MemFree(tmp);
7517 retval = TRUE;
7518 return retval;
7519 }
7520
7521 NLM_EXTERN Int4 AlnMgrCheckOrdered(SeqAlignPtr sap)
7522 {
7523 AMAlignIndexPtr amaip;
7524 Int4 dim;
7525 DenseSegPtr dsp;
7526 Int4 i;
7527 Int4 n;
7528 SeqAlignPtr salp;
7529 SeqIdPtr sip;
7530 SeqIdPtr sip_prev;
7531 SeqIdPtr sip_tmp;
7532 Int4 start1;
7533 Int4 start2;
7534 Int4 stop1;
7535 Int4 stop2;
7536 Uint1Ptr strands;
7537
7538 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
7539 return CHECK_ERROR;
7540 amaip = (AMAlignIndexPtr)(sap->saip);
7541 salp = (SeqAlignPtr)(sap->segs);
7542 if (salp->next == NULL) /* only one child alignment */
7543 return AM_NOTORDERED;
7544 sip_prev = NULL;
7545 dim = -1;
7546 n = 0;
7547 strands = NULL;
7548 while (salp)
7549 {
7550 n++;
7551 dsp = (DenseSegPtr)(salp->segs);
7552 if (dim != -1)
7553 {
7554 if (dsp->dim != dim)
7555 return AM_NOTORDERED;
7556 } else
7557 dim = dsp->dim;
7558 sip = dsp->ids;
7559 if (sip_prev)
7560 {
7561 sip_tmp = sip_prev;
7562 while (sip && sip_tmp)
7563 {
7564 if (SeqIdComp(sip_tmp, sip) != SIC_YES)
7565 return AM_NOTORDERED;
7566 sip = sip->next;
7567 sip_tmp = sip_tmp->next;
7568 }
7569 if (sip || sip_tmp)
7570 return AM_NOTORDERED;
7571 } else
7572 sip_prev = sip;
7573 if (strands)
7574 {
7575 if (dsp->strands)
7576 {
7577 for (i=0; i<dsp->dim; i++)
7578 {
7579 if ((dsp->strands[i] == Seq_strand_minus && strands[i] != Seq_strand_minus) || (strands[i] == Seq_strand_minus && dsp->strands[i] != Seq_strand_minus))
7580 return AM_NOTORDERED;
7581 }
7582 }
7583 } else
7584 strands = dsp->strands;
7585 salp = salp->next;
7586 }
7587 if (amaip->saps)
7588 MemFree(amaip->saps);
7589 amaip->saps = (SeqAlignPtr PNTR)MemNew(n*sizeof(SeqAlignPtr));
7590 amaip->numrows = dim;
7591 salp = (SeqAlignPtr)(sap->segs);
7592 n = 0;
7593 while (salp)
7594 {
7595 amaip->saps[n] = salp;
7596 salp = salp->next;
7597 n++;
7598 }
7599 amaip->numsaps = n;
7600 AlnMgrSortAlnSetByNthRowPos(sap, 1);
7601 for (n=0; n<amaip->numsaps-1; n++)
7602 {
7603 for (i=0; i<dim; i++)
7604 {
7605 AlnMgrGetNthSeqRangeInSA(amaip->saps[n], i+1, &start1, &stop1);
7606 AlnMgrGetNthSeqRangeInSA(amaip->saps[n+1], i+1, &start2, &stop2);
7607 if (AlnMgrGetNthStrand(amaip->saps[n], i+1) == Seq_strand_minus)
7608 {
7609 if (start1 <= stop2)
7610 return AM_NOTORDERED;
7611 } else
7612 {
7613 if (stop1 >= start2)
7614 return AM_NOTORDERED;
7615 }
7616 }
7617 }
7618 return AM_ORDERED;
7619 }
7620
7621 NLM_EXTERN Int4 AlnMgrCheckOverlapping(SeqAlignPtr sap)
7622 {
7623 AMAlignDatPtr amadp;
7624 AMAlignIndexPtr amaip;
7625 Int4 end;
7626 Int4 c;
7627 Int4 i;
7628 Int4 j;
7629 Int4 n;
7630 Int4 prevstrand;
7631 SeqIdPtr sip;
7632 Int4 start;
7633 Int4 stop;
7634 Uint2 strand;
7635
7636 i = AlnMgrCheckAlignForParent(sap);
7637 if (i<0)
7638 return CHECK_ERROR;
7639 else if (i==AM_PARENT)
7640 {
7641 amaip = (AMAlignIndexPtr)sap->saip;
7642 if (amaip->numsaps == 1)
7643 return 1;
7644 sip = amaip->ids;
7645 for (j=0; j<(amaip->numbsqs); j++)
7646 {
7647 end = -1;
7648 amadp = amaip->amadp[j];
7649 prevstrand = -1;
7650 for (c=0; c<(amadp->numsaps); c++)
7651 {
7652 n = AlnMgrGetNForSip(amadp->saps[c], sip);
7653 strand = AlnMgrGetNthStrand(amadp->saps[c], n);
7654 if (strand == 0)
7655 {
7656 if (prevstrand != -1)
7657 strand = prevstrand;
7658 } else if (prevstrand != -1)
7659 {
7660 if (strand != prevstrand)
7661 return j;
7662 } else
7663 prevstrand = strand;
7664 AlnMgrGetNthSeqRangeInSA(amadp->saps[c], n, &start, &stop);
7665 if (strand != Seq_strand_minus)
7666 {
7667 if (start <= end && end != -1)
7668 return j;
7669 else
7670 end = stop;
7671 } else
7672 {
7673 if (end != -1 && stop >= end)
7674 return j;
7675 else
7676 end = start;
7677 }
7678 }
7679 sip = sip->next;
7680 }
7681 } else if (i==AM_CHILD)
7682 {
7683 return NO_OVERLAP;
7684 }
7685 return NO_OVERLAP;
7686 }
7687
7688 static Boolean am_check_gaps(SeqAlignPtr sap)
7689 {
7690 DenseSegPtr dsp;
7691 SeqAlignPtr salp;
7692
7693 salp = (SeqAlignPtr)(sap->segs);
7694 while (salp)
7695 {
7696 dsp = (DenseSegPtr)(salp->segs);
7697 if (dsp->dim > 1)
7698 return FALSE;
7699 salp = salp->next;
7700 }
7701 return TRUE;
7702 }
7703
7704 /*****************************************************************************
7705 *
7706 * AlnMgrGetMaxSegments simply adds up the number of segments for each
7707 * SeqAlign in a linked list, to get the maximum number of segments
7708 * for the merge of the list (for memory allocation in AlnMgrMakeFakeMultiple).
7709 *
7710 ******************************************************************************/
7711
7712 NLM_EXTERN Int4 AlnMgrGetMaxSegments(SeqAlignPtr sap)
7713 {
7714 DenseSegPtr dsp;
7715 Int4 ernie; /* the running total, also a happy hamster */
7716
7717 ernie = 0;
7718 while (sap)
7719 {
7720 if (sap->segtype == SAS_DENSEG)
7721 {
7722 dsp = (DenseSegPtr)(sap->segs);
7723 ernie += dsp->numseg;
7724 } else if (sap->segtype == SAS_STD)
7725 {
7726 ernie += 1;
7727 } else
7728 return 0;
7729 sap = sap->next;
7730 ernie += 1;
7731 }
7732 return ernie;
7733 }
7734
7735 /*******************************************************************************
7736 *
7737 * Row Management functions:
7738 *
7739 *******************************************************************************/
7740 NLM_EXTERN Int4 AlnMgrGetNumRows(SeqAlignPtr sap)
7741 {
7742 AMAlignIndexPtr amaip;
7743 DenseSegPtr dsp;
7744
7745 if (sap == NULL || sap->saip == NULL)
7746 return -1;
7747 if (sap->saip->indextype == INDEX_SEGS)
7748 {
7749 dsp = (DenseSegPtr)sap->segs;
7750 if (dsp == NULL)
7751 return -1;
7752 return (dsp->dim);
7753 } else if (sap->saip->indextype == INDEX_PARENT)
7754 {
7755 if ((amaip = (AMAlignIndexPtr)sap->saip) == NULL)
7756 return -1;
7757 if (amaip->numseg == 0)
7758 return (amaip->numbsqs);
7759 if (amaip->numrows)
7760 return (amaip->numrows);
7761 }
7762 return 0;
7763 }
7764
7765 NLM_EXTERN Int4 AlnMgrGetMaxRowsForParentPartial(SeqAlignPtr sap)
7766 {
7767 AMAlignIndexPtr amaip;
7768 Int4 i;
7769 Int4 j;
7770 Int4 max;
7771
7772 if (sap == NULL || sap->saip == NULL)
7773 return -1;
7774 max = -1;
7775 if (sap->saip->indextype == INDEX_PARENT)
7776 {
7777 amaip = (AMAlignIndexPtr)sap->saip;
7778 for (i=0; i<(amaip->alnsaps); i++)
7779 {
7780 j = AlnMgrGetNumRows(amaip->saps[i]);
7781 if (j==-1)
7782 return -1;
7783 if (j>max)
7784 max = j;
7785 }
7786 }
7787 return max;
7788 }
7789
7790 NLM_EXTERN Boolean AlnMgrMakeRowsForOrdered(SeqAlignPtr sap)
7791 {
7792 AMAlignIndexPtr amaip;
7793 DenseSegPtr dsp;
7794 Int4 i;
7795 SeqIdPtr id;
7796 Int4 n;
7797 RowSourcePtr PNTR rowsource;
7798 RowSourcePtr rsp;
7799
7800 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
7801 return FALSE;
7802 amaip = (AMAlignIndexPtr)(sap->saip);
7803 rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
7804 dsp = (DenseSegPtr)(((SeqAlignPtr)(sap->segs))->segs);
7805 id = dsp->ids;
7806 for (i=0; i<amaip->numrows; i++)
7807 {
7808 rsp = RowSourceNew();
7809 rsp->id = SeqIdDup(id);
7810 id = id->next;
7811 rsp->which_saps = (Uint4Ptr)MemNew((amaip->numsaps)*sizeof(Uint4));
7812 rsp->num_in_sap = (Uint4Ptr)MemNew((amaip->numsaps)*sizeof(Uint4));
7813 rsp->numsaps = amaip->numsaps;
7814 if (dsp->strands)
7815 rsp->strand = dsp->strands[i];
7816 for (n=0; n<amaip->numsaps; n++)
7817 {
7818 rsp->which_saps[n] = n+1;
7819 rsp->num_in_sap[n] = i+1;
7820 }
7821 rowsource[i] = rsp;
7822 }
7823 amaip->rowsource = rowsource;
7824 amaip->master = -2;
7825 return TRUE;
7826 }
7827
7828 NLM_EXTERN Boolean AlnMgrGetRowsForPartial(SeqAlignPtr sap)
7829 {
7830 AMAlignIndexPtr amaip;
7831 Int4 curr;
7832 DenseSegPtr dsp;
7833 Boolean found;
7834 Int4 i;
7835 Int4 j;
7836 Int4 k;
7837 Boolean retval;
7838 RowSourcePtr PNTR rowsource;
7839 RowSourcePtr rsp;
7840 SeqAlignPtr salp;
7841 SeqIdPtr sip;
7842
7843 retval = FALSE;
7844 if (sap == NULL || sap->saip == NULL)
7845 return retval;
7846 if (sap->saip->indextype != INDEX_PARENT)
7847 return retval;
7848 if (sap->type != SAT_PARTIAL)
7849 return retval;
7850 amaip = (AMAlignIndexPtr)sap->saip;
7851 i = AlnMgrGetMaxRowsForParentPartial(sap);
7852 if (i < 0)
7853 return retval;
7854 else
7855 amaip->numrows = i;
7856 rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows)*sizeof(RowSourcePtr));
7857 curr = -1;
7858 for (i=0; i<(amaip->alnsaps); i++)
7859 {
7860 salp = amaip->saps[i];
7861 dsp = (DenseSegPtr)salp->segs;
7862 sip = dsp->ids;
7863 for (j=0; j<(dsp->dim); j++)
7864 {
7865 found = FALSE;
7866 k = 0;
7867 while (!found && k <= curr)
7868 {
7869 if (SeqIdComp(sip, rowsource[k]->id) == SIC_YES)
7870 found = TRUE;
7871 else
7872 k++;
7873 }
7874 if (!found)
7875 {
7876 curr++;
7877 rsp = RowSourceNew();
7878 rsp->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7879 rsp->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7880 rowsource[curr] = rsp;
7881 rsp->id = SeqIdDup(sip);
7882 rsp->which_saps[rsp->numsaps] = i+1;
7883 rsp->num_in_sap[rsp->numsaps] = AlnMgrGetNForSip(salp, sip);
7884 (rsp->numsaps)++;
7885 } else
7886 {
7887 rowsource[k]->which_saps[rowsource[k]->numsaps] = i+1;
7888 rowsource[k]->num_in_sap[rowsource[k]->numsaps] = AlnMgrGetNForSip(salp, sip);
7889 (rowsource[k]->numsaps)++;
7890 }
7891 sip = sip->next;
7892 }
7893 }
7894 amaip->numrows = curr+1;
7895 amaip->rowsource = rowsource;
7896 amaip->master = -2;
7897 return TRUE;
7898 }
7899
7900 NLM_EXTERN Boolean AlnMgrGetRowsForMasterSlave(SeqAlignPtr sap)
7901 {
7902 AMAlignIndexPtr amaip;
7903 DenseSegPtr dsp;
7904 Int4 i;
7905 Int4 j;
7906 Int4 k;
7907 Boolean retval;
7908 RowSourcePtr PNTR rowsource;
7909 RowSourcePtr rsp;
7910 SAIndexPtr saip;
7911 SeqAlignPtr salp;
7912 SeqIdPtr sip;
7913
7914 retval = FALSE;
7915 if (sap == NULL || sap->saip == NULL)
7916 return retval;
7917 if (sap->saip->indextype != INDEX_PARENT)
7918 return retval;
7919 if (sap->type != SAT_MASTERSLAVE)
7920 return retval;
7921 amaip = (AMAlignIndexPtr)sap->saip;
7922 i = 1;
7923 salp = (SeqAlignPtr)sap->segs;
7924 while (salp)
7925 {
7926 j = AlnMgrGetNumRows(salp);
7927 if (j < 0)
7928 return retval;
7929 else
7930 i += (j-1); /*don't count the master over and over*/
7931 salp = salp->next;
7932 }
7933 rowsource = (RowSourcePtr PNTR)MemNew((i+1)*sizeof(RowSourcePtr));
7934 rsp = RowSourceNew();
7935 rsp->id = SeqIdDup(sap->master);
7936 rsp->which_saps = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7937 rsp->num_in_sap = (Uint4Ptr)MemNew((amaip->alnsaps)*sizeof(Uint4));
7938 rsp->numsaps = amaip->alnsaps;
7939 rowsource[0] = rsp;
7940 amaip->numrows = 1;
7941 for (j=0; j<(amaip->alnsaps); j++)
7942 {
7943 salp = amaip->saps[j];
7944 saip = (SAIndexPtr)salp->saip;
7945 dsp = (DenseSegPtr)(salp->segs);
7946 sip = dsp->ids;
7947 k=1;
7948 while (sip)
7949 {
7950 if (k != saip->master)
7951 {
7952 rsp = RowSourceNew();
7953 rsp->id = SeqIdDup(sip);
7954 rsp->which_saps = (Uint4Ptr)MemNew(sizeof(Uint4));
7955 rsp->num_in_sap = (Uint4Ptr)MemNew(sizeof(Uint4));
7956 rsp->numsaps = 1;
7957 rsp->which_saps[0] = j+1;
7958 rsp->num_in_sap[0] = k;
7959 rowsource[amaip->numrows] = rsp;
7960 amaip->numrows++;
7961 } else
7962 {
7963 rowsource[0]->which_saps[j] = j+1;
7964 rowsource[0]->num_in_sap[j] = k;
7965 amaip->master = 1;
7966 }
7967 k++;
7968 sip = sip->next;
7969 }
7970 }
7971 amaip->rowsource = rowsource;
7972 return TRUE;
7973 }
7974
7975
7976 /*******************************************************************************
7977 *
7978 * AlnMgrFindMaster returns the (duplicated) SeqIdPtr of the first bioseq
7979 * that is present in every child alignment, unless the sap->master field
7980 * is set in the child alignments, in which case that SeqIdPtr is returned
7981 * (if it's the same in all children).
7982 *
7983 *******************************************************************************/
7984
7985 NLM_EXTERN SeqIdPtr AlnMgrFindMaster(SeqAlignPtr sap)
7986 {
7987 AMAlignDatPtr amadp;
7988 AMAlignIndexPtr amaip;
7989 Boolean done;
7990 Int4 i;
7991 SeqAlignPtr salp;
7992 SeqIdPtr sip;
7993
7994 i = AlnMgrCheckAlignForParent(sap);
7995 if (i<0)
7996 return NULL;
7997 else if (i==AM_CHILD)
7998 {
7999 return SeqIdDup(sap->master);
8000 } else if (i==AM_PARENT)
8001 {
8002 salp = (SeqAlignPtr)(sap->segs);
8003 sip = NULL;
8004 done = FALSE;
8005 while (salp && !done)
8006 {
8007 if (salp->master)
8008 return (SeqIdDup(salp->master));
8009 salp = salp->next;
8010 }
8011 amaip = (AMAlignIndexPtr)(sap->saip);
8012 sip = amaip->ids;
8013 for (i=0; i<(amaip->numbsqs); i++)
8014 {
8015 amadp = amaip->amadp[i];
8016 if (!amadp || !sip)
8017 return NULL;
8018 else
8019 {
8020 if (amadp->numsaps == amaip->numsaps)
8021 return (SeqIdDup(sip));
8022 }
8023 sip = sip->next;
8024 }
8025 return NULL;
8026 }
8027 return NULL;
8028 }
8029
8030
8031 /*******************************************************************************
8032 *
8033 * AlnMgrCheckRealMaster makes sure that the master seqid given appears
8034 * once and only once in each seqalign in the set if a parent is given,
8035 * or once and only one in the seqalign if a child is given.
8036 *
8037 *******************************************************************************/
8038 NLM_EXTERN Boolean AlnMgrCheckRealMaster(SeqAlignPtr sap, SeqIdPtr master)
8039 {
8040 DenseSegPtr dsp;
8041 Int4 i;
8042 Boolean retval;
8043 SeqAlignPtr salp;
8044 SeqIdPtr sip;
8045
8046 retval = FALSE;
8047 if (!sap || !master)
8048 return retval;
8049 if (sap->segtype == SAS_DISC)
8050 {
8051 salp = (SeqAlignPtr)sap->segs;
8052 while (salp)
8053 {
8054 dsp = (DenseSegPtr)salp->segs;
8055 sip = dsp->ids;
8056 i = 0;
8057 while (sip)
8058 {
8059 if (SeqIdComp(sip, master) == SIC_YES)
8060 {
8061 i++;
8062 if (i > 1)
8063 return retval;
8064 }
8065 sip = sip->next;
8066 }
8067 salp = salp->next;
8068 }
8069 } else if (sap->segtype == SAS_DENSEG)
8070 {
8071 dsp = (DenseSegPtr)sap->segs;
8072 sip = dsp->ids;
8073 i = 0;
8074 while (sip)
8075 {
8076 if (SeqIdComp(sip, master) == SIC_YES)
8077 {
8078 i++;
8079 if (i > 1)
8080 return retval;
8081 }
8082 sip = sip->next;
8083 }
8084 }
8085 return TRUE;
8086 }
8087
8088 NLM_EXTERN Boolean AlnMgrMakeSegmentedMasterSlave(SeqAlignPtr sap)
8089 {
8090 AMAlignIndexPtr amaip;
8091 AMmsmsPtr ams;
8092 AMmsmsPtr PNTR amsarray;
8093 AMmsmsPtr ams_head;
8094 AMmsmsPtr ams_master;
8095 AMmsmsPtr ams_mtmp;
8096 AMmsmsPtr ams_tmp;
8097 Int4 c;
8098 Boolean done;
8099 DenseSegPtr dsp;
8100 Boolean found;
8101 Int4 i;
8102 Int4 j;
8103 Int4 n;
8104 Int4 max;
8105 Boolean ok;
8106 RowSourcePtr rsp;
8107 Int4 rspnum;
8108 SAIndexPtr saip;
8109 SeqAlignPtr salp;
8110 SeqIdPtr sip;
8111 AMsiplistPtr siplist;
8112 AMsiplistPtr siplist_new;
8113 AMsiplistPtr siplist_tmp;
8114 Int4 sstart;
8115 Int4 sstop;
8116 Int4 start;
8117 Int4 stop;
8118 Int4Ptr tmparray;
8119
8120 if (sap == NULL)
8121 return FALSE;
8122 amaip = (AMAlignIndexPtr)sap->saip;
8123 if (amaip == NULL)
8124 return FALSE;
8125 if (amaip->master < 0)
8126 return FALSE;
8127 ams_head = NULL;
8128 n = 0;
8129 salp = (SeqAlignPtr)(sap->segs);
8130 for (i=0; i<(amaip->numsaps); i++)
8131 {
8132 amaip->saps[i] = salp;
8133 saip = (SAIndexPtr)(salp->saip);
8134 if (saip->master < 0)
8135 return FALSE;
8136 AlnMgrGetNthSeqRangeInSA(salp, saip->master, &start, &stop);
8137 dsp = (DenseSegPtr)salp->segs;
8138 sip = dsp->ids;
8139 j = 1;
8140 while (sip != NULL)
8141 {
8142 if (j != saip->master)
8143 {
8144 n++;
8145 ams = (AMmsmsPtr)MemNew(sizeof(AMmsms));
8146 ams->start = start;
8147 ams->stop = stop;
8148 ams->sap = salp;
8149 ams->nsap = i+1;
8150 ams->sip = sip;
8151 ams->n = j;
8152 ams->j = i;
8153 AlnMgrGetNthSeqRangeInSA(salp, j, &sstart, &sstop);
8154 ams->sstart = sstart;
8155 ams->sstop = sstop;
8156 ams->strand = AlnMgrGetNthStrand(salp, j);
8157 if (ams_head == NULL)
8158 {
8159 ams_head = ams_tmp = ams;
8160 } else
8161 {
8162 ams_tmp->next = ams;
8163 ams_tmp = ams;
8164 }
8165 }
8166 sip = sip->next;
8167 j++;
8168 }
8169 salp = salp->next;
8170 }
8171 ams_head = am_sort_ammsms(ams_head, n);
8172 ams_master = NULL;
8173 ams = ams_head;
8174 n = 0;
8175 while (ams)
8176 {
8177 if (ams_master)
8178 {
8179 ams_mtmp = ams_master;
8180 found = FALSE;
8181 while (!found && ams_mtmp)
8182 {
8183 if (ams->start == ams_mtmp->start && ams->stop == ams_mtmp->stop)
8184 {
8185 found = TRUE;
8186 ams->masternum = ams_mtmp->masternum;
8187 ams_mtmp->count++;
8188 }
8189 else
8190 ams_mtmp = ams_mtmp->next;
8191 }
8192 if (!found)
8193 {
8194 n++;
8195 ams_tmp = (AMmsmsPtr)MemNew(sizeof(AMmsms));
8196 ams_tmp->start = ams->start;
8197 ams_tmp->stop = ams->stop;
8198 ams_tmp->sap = ams->sap;
8199 ams_tmp->nsap = ams->nsap;
8200 ams_tmp->sip = sap->master;
8201 ams_tmp->count = 1;
8202 ams_tmp->masternum = ams->masternum = n;
8203 saip = (SAIndexPtr)(ams->sap->saip);
8204 ams_tmp->n = saip->master;
8205 ams_tmp->next = ams_master;
8206 ams_tmp->j = ams->n;
8207 ams_master = ams_tmp;
8208 }
8209 } else
8210 {
8211 n++;
8212 ams_tmp = (AMmsmsPtr)MemNew(sizeof(AMmsms));
8213 ams_tmp->start = ams->start;
8214 ams_tmp->stop = ams->stop;
8215 ams_tmp->sap = ams->sap;
8216 ams_tmp->nsap = ams->nsap;
8217 ams_tmp->sip = sap->master;
8218 ams_tmp->count = 1;
8219 ams_tmp->masternum = ams->masternum = n;
8220 saip = (SAIndexPtr)(ams->sap->saip);
8221 ams_tmp->n = saip->master;
8222 ams_tmp->j = ams->n;
8223 ams_master = ams_tmp;
8224 }
8225 ams = ams->next;
8226 }
8227 ams_master = am_sort_masterams(ams_master, n);
8228 max = c = 0;
8229 ams = ams_master;
8230 ams_tmp = NULL;
8231 amsarray = (AMmsmsPtr PNTR)MemNew((n+1)*sizeof(AMmsmsPtr));
8232 while (ams)
8233 {
8234 amsarray[c] = ams;
8235 if (ams_tmp)
8236 {
8237 if (ams->start <= ams_tmp->stop)
8238 {
8239 MemFree(amsarray);
8240 return FALSE; /* add code here to compress all lines??? */
8241 }
8242 }
8243 max += ams->count;
8244 c++;
8245 ams_tmp = ams;
8246 ams = ams->next;
8247 }
8248 amaip->mstype = AM_SEGMENTED_MASTERSLAVE;
8249 amaip->rowsource = (RowSourcePtr PNTR)MemNew((max+1)*sizeof(RowSourcePtr));
8250 if (amaip->aligncoords)
8251 MemFree(amaip->aligncoords);
8252 amaip->aligncoords = (Uint4Ptr)MemNew((c+1)*sizeof(Uint4));
8253 amaip->lens = (Int4Ptr)MemNew((c+1)*sizeof(Int4));
8254 amaip->numseg = c;
8255 tmparray = (Int4Ptr)MemNew((c+1)*sizeof(Int4));
8256 ams = ams_master;
8257 for (j=0; ams && j < c; j++)
8258 {
8259 amaip->lens[j] = AlnMgrGetAlnLength(ams->sap, FALSE);
8260 amaip->aligncoords[j+1] = amaip->aligncoords[j] + amaip->lens[j];
8261 tmparray[ams->masternum] = j;
8262 ams = ams->next;
8263 }
8264 rsp = RowSourceNew();
8265 rsp->id = SeqIdDup(ams_master->sip);
8266 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8267 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8268 rsp->numsaps = 0;
8269 ams = ams_master;
8270 while (ams)
8271 {
8272 rsp->which_saps[rsp->numsaps] = ams->nsap;
8273 rsp->num_in_sap[rsp->numsaps] = ams->n;
8274 rsp->numsaps++;
8275 ams = ams->next;
8276 }
8277 amaip->rowsource[0] = rsp;
8278 amaip->numrows = 1;
8279 siplist = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8280 siplist->sip = rsp->id;
8281 siplist->first_row = 0;
8282 siplist_tmp = siplist;
8283 ams = ams_head;
8284 rsp = RowSourceNew();
8285 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8286 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8287 amaip->rowsource[amaip->numrows] = rsp;
8288 amaip->numrows++;
8289 while (ams && amaip->numrows <= max)
8290 {
8291 if (rsp->id == NULL) /* new rsp */
8292 {
8293 rsp->id = SeqIdDup(ams->sip);
8294 rsp->strand = ams->strand;
8295 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8296 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8297 rsp->numsaps = c;
8298 rspnum = am_get_first_rsp_for_sip(ams->sip, siplist);
8299 if (rspnum == -1) /* need to add to seqid list */
8300 {
8301 siplist_new = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8302 siplist_new->sip = ams->sip;
8303 siplist_new->first_row = amaip->numrows-1;
8304 siplist_tmp->next = siplist_new;
8305 siplist_tmp = siplist_new;
8306 }
8307 } else /* some fields already filled -- check for conflicts or new row */
8308 {
8309 n = SeqIdComp(rsp->id, ams->sip);
8310 if (n == SIC_YES && ams->strand == rsp->strand) /* could be same row -- check for conflicts */
8311 {
8312 ok = FALSE;
8313 if (rsp->which_saps[tmparray[ams->masternum]] == 0) /* put in same row */
8314 {
8315 done = FALSE;
8316 i = 0;
8317 while (!done && i<c)
8318 {
8319 if (rsp->which_saps[i] != 0)
8320 done = TRUE;
8321 else
8322 i++;
8323 }
8324 if (done)
8325 {
8326 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[i] - 1], rsp->num_in_sap[i], &start, &stop);
8327 if (ams->strand == Seq_strand_minus)
8328 {
8329 if (tmparray[ams->masternum] < i)
8330 {
8331 if (stop >= ams->sstart)
8332 ok = FALSE;
8333 else
8334 ok = TRUE;
8335 } else
8336 {
8337 if (start <= ams->sstop)
8338 ok = FALSE;
8339 else
8340 ok = TRUE;
8341 }
8342 } else
8343 {
8344 if (tmparray[ams->masternum] < i)
8345 {
8346 if (start <= ams->sstop)
8347 ok = FALSE;
8348 else
8349 ok = TRUE;
8350 } else
8351 {
8352 if (stop >= ams->sstart)
8353 ok = FALSE;
8354 else
8355 ok = TRUE;
8356 }
8357 }
8358 }
8359 }
8360 if (ok)
8361 {
8362 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8363 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8364 rsp->numsaps=c;
8365 } else
8366 {
8367 rspnum = am_get_first_rsp_for_sip(ams->sip, siplist);
8368 if (rspnum == -1) /* make a new row */
8369 {
8370 rsp = RowSourceNew();
8371 rsp->strand = ams->strand;
8372 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8373 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8374 amaip->rowsource[amaip->numrows] = rsp;
8375 amaip->numrows++;
8376 rsp->id = SeqIdDup(ams->sip);
8377 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8378 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8379 rsp->numsaps = c;
8380 siplist_new = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8381 siplist_new->sip = ams->sip;
8382 siplist_new->first_row = amaip->numrows-1;
8383 siplist_tmp->next = siplist_new;
8384 siplist_tmp = siplist_new;
8385 } else
8386 {
8387 done = FALSE;
8388 while (rspnum < amaip->numrows && !done && SAM_OrderSeqID(ams->sip, amaip->rowsource[rspnum]->id) == 0)
8389 {
8390 rsp = amaip->rowsource[rspnum];
8391 if (rsp->which_saps[tmparray[ams->masternum]] == 0) /* fits here */
8392 {
8393 done = TRUE;
8394 found = FALSE;
8395 i = 0;
8396 while (!found && i<c)
8397 {
8398 if (rsp->which_saps[i] != 0)
8399 found = TRUE;
8400 else
8401 i++;
8402 }
8403 if (found)
8404 {
8405 AlnMgrGetNthSeqRangeInSA(amaip->saps[rsp->which_saps[i] - 1], rsp->num_in_sap[i], &start, &stop);
8406 if (ams->strand == Seq_strand_minus)
8407 {
8408 if (tmparray[ams->masternum] < i)
8409 {
8410 if (stop >= ams->sstart)
8411 ok = FALSE;
8412 else
8413 ok = TRUE;
8414 } else
8415 {
8416 if (start <= ams->sstop)
8417 ok = FALSE;
8418 else
8419 ok = TRUE;
8420 }
8421 } else
8422 {
8423 if (tmparray[ams->masternum] < i)
8424 {
8425 if (start <= ams->sstop)
8426 ok = FALSE;
8427 else
8428 ok = TRUE;
8429 } else
8430 {
8431 if (stop >= ams->sstart)
8432 ok = FALSE;
8433 else
8434 ok = TRUE;
8435 }
8436 }
8437 }
8438 if (ok && found)
8439 {
8440 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8441 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8442 rsp->numsaps = c;
8443 } else
8444 {
8445 rsp = RowSourceNew();
8446 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8447 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8448 rsp->strand = ams->strand;
8449 amaip->rowsource[amaip->numrows] = rsp;
8450 amaip->numrows++;
8451 rsp->id = SeqIdDup(ams->sip);
8452 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8453 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8454 rsp->numsaps = c;
8455 }
8456 }
8457 rspnum++;
8458 }
8459 if (!done) /* didn't fit */
8460 {
8461 rsp = RowSourceNew();
8462 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8463 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8464 amaip->rowsource[amaip->numrows] = rsp;
8465 amaip->numrows++;
8466 rsp->id = SeqIdDup(ams->sip);
8467 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8468 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8469 rsp->numsaps=c;
8470 rsp->strand = ams->strand;
8471 }
8472 }
8473 }
8474 } else /* make a new row */
8475 {
8476 rsp = RowSourceNew();
8477 rsp->which_saps = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8478 rsp->num_in_sap = (Uint4Ptr)MemNew(c*sizeof(Uint4));
8479 amaip->rowsource[amaip->numrows] = rsp;
8480 amaip->numrows++;
8481 rsp->id = SeqIdDup(ams->sip);
8482 rsp->strand = ams->strand;
8483 rsp->which_saps[tmparray[ams->masternum]] = ams->nsap;
8484 rsp->num_in_sap[tmparray[ams->masternum]] = ams->n;
8485 rsp->numsaps=c;
8486 siplist_new = (AMsiplistPtr)MemNew(sizeof(AMsiplist));
8487 siplist_new->sip = ams->sip;
8488 siplist_new->first_row = amaip->numrows-1;
8489 siplist_tmp->next = siplist_new;
8490 siplist_tmp = siplist_new;
8491 }
8492 }
8493 ams = ams->next;
8494 }
8495 siplist_tmp = siplist;
8496 while (siplist_tmp)
8497 {
8498 siplist_new = siplist_tmp->next;
8499 siplist_tmp->sip = NULL;
8500 siplist_tmp->next = NULL;
8501 MemFree(siplist_tmp);
8502 siplist_tmp = siplist_new;
8503 }
8504 ams = ams_master;
8505 while (ams)
8506 {
8507 ams_tmp = ams->next;
8508 ams->sap = NULL;
8509 ams->sip = NULL;
8510 ams->next = NULL;
8511 MemFree(ams);
8512 ams = ams_tmp;
8513 }
8514 ams = ams_head;
8515 while (ams)
8516 {
8517 ams_tmp = ams->next;
8518 ams->sap = NULL;
8519 ams->sip = NULL;
8520 ams->next = NULL;
8521 MemFree(ams);
8522 ams = ams_tmp;
8523 }
8524 MemFree(amsarray);
8525 MemFree(tmparray);
8526 amaip->startsize = amaip->numseg;
8527 amaip->starts = (Int4Ptr)MemNew(amaip->numseg*sizeof(Int4));
8528 return TRUE;
8529 }
8530
8531 static Int4 am_get_first_rsp_for_sip(SeqIdPtr sip, AMsiplistPtr siplist)
8532 {
8533 AMsiplistPtr siplist_tmp;
8534
8535 if (sip == NULL || siplist == NULL)
8536 return -1;
8537 siplist_tmp = siplist;
8538 while (siplist_tmp)
8539 {
8540 if (SeqIdComp(sip, siplist_tmp->sip) == SIC_YES)
8541 {
8542 return (siplist_tmp->first_row);
8543 }
8544 siplist_tmp = siplist_tmp->next;
8545 }
8546 return -1;
8547 }
8548
8549 static AMmsmsPtr am_sort_ammsms(AMmsmsPtr ams_head, Int4 n)
8550 {
8551 AMmsmsPtr ams;
8552 AMmsmsPtr ams_tmp;
8553 AMmsmsPtr PNTR ams_array;
8554 Int4 i;
8555
8556 if (ams_head == NULL || n == 0)
8557 return NULL;
8558 if (n == 1)
8559 return ams_head;
8560 ams_array = (AMmsmsPtr PNTR)MemNew((n+1)*sizeof(AMmsmsPtr));
8561 ams = ams_head;
8562 for (i=0; ams!=NULL && i<n; i++)
8563 {
8564 ams_array[i] = ams;
8565 ams = ams->next;
8566 }
8567 HeapSort((Pointer)ams_array, (size_t)(n), sizeof(AMmsmsPtr), AlnMgrCompareAMS);
8568 ams_tmp = NULL;
8569 for (i=0; i<n; i++)
8570 {
8571 if (ams_tmp != NULL)
8572 {
8573 ams->next = ams_array[i];
8574 ams = ams->next;
8575 ams->next = NULL;
8576 } else
8577 {
8578 ams_tmp = ams = ams_array[i];
8579 ams_tmp->next = NULL;
8580 }
8581 }
8582 MemFree(ams_array);
8583 return ams_tmp;
8584 }
8585
8586 NLM_EXTERN int LIBCALLBACK AlnMgrCompareAMS(VoidPtr base, VoidPtr large_son)
8587 {
8588 AMmsmsPtr ams1;
8589 AMmsmsPtr ams2;
8590 Int4 r;
8591
8592 ams1 = *((AMmsmsPtr PNTR) base);
8593 ams2 = *((AMmsmsPtr PNTR) large_son);
8594 if (ams1 == NULL || ams2 == NULL)
8595 return 0;
8596 r = SAM_OrderSeqID(ams1->sip, ams2->sip);
8597 if (r == 0)
8598 {
8599 if (ams1->sstart < ams2->sstart)
8600 return -1;
8601 else
8602 return 1;
8603 } else
8604 return r;
8605 }
8606
8607 static AMmsmsPtr am_sort_masterams(AMmsmsPtr ams_head, Int4 n)
8608 {
8609 AMmsmsPtr ams;
8610 AMmsmsPtr ams_tmp;
8611 AMmsmsPtr PNTR ams_array;
8612 Int4 i;
8613
8614 if (ams_head == NULL || n == 0)
8615 return NULL;
8616 if (n == 1)
8617 return ams_head;
8618 ams_array = (AMmsmsPtr PNTR)MemNew((n+1)*sizeof(AMmsmsPtr));
8619 ams = ams_head;
8620 for (i=0; ams!=NULL && i<n; i++)
8621 {
8622 ams_array[i] = ams;
8623 ams = ams->next;
8624 }
8625 HeapSort((Pointer)ams_array, (size_t)(n), sizeof(AMmsmsPtr), AlnMgrCompareMasterAMS);
8626 ams_tmp = NULL;
8627 for (i=0; i<n; i++)
8628 {
8629 if (ams_tmp != NULL)
8630 {
8631 ams->next = ams_array[i];
8632 ams = ams->next;
8633 ams->next = NULL;
8634 } else
8635 {
8636 ams_tmp = ams = ams_array[i];
8637 ams_tmp->next = NULL;
8638 }
8639 }
8640 return ams_tmp;
8641 }
8642
8643 NLM_EXTERN int LIBCALLBACK AlnMgrCompareMasterAMS(VoidPtr base, VoidPtr large_son)
8644 {
8645 AMmsmsPtr ams1;
8646 AMmsmsPtr ams2;
8647
8648 ams1 = *((AMmsmsPtr PNTR) base);
8649 ams2 = *((AMmsmsPtr PNTR) large_son);
8650 if (ams1 == NULL || ams2 == NULL)
8651 return 0;
8652 if (ams1->start < ams2->start)
8653 return -1;
8654 else if (ams1->start > ams2->start)
8655 return 1;
8656 else if (ams1->stop < ams2->stop)
8657 return -1;
8658 else if (ams1->j < ams2->j)
8659 return -1;
8660 else
8661 return 1;
8662 }
8663
8664
8665 NLM_EXTERN void AlnMgrSetMaster(SeqAlignPtr sap, SeqIdPtr master)
8666 {
8667 SAIndexPtr saip;
8668 SeqAlignPtr salp;
8669
8670 if (sap->segtype != SAS_DISC || !master)
8671 return;
8672 sap->master = SeqIdDup(master);
8673 salp = (SeqAlignPtr)(sap->segs);
8674 while (salp)
8675 {
8676 if (!salp->saip)
8677 return;
8678 if (salp->master != NULL)
8679 SeqIdFree(salp->master);
8680 salp->master = SeqIdDup(master);
8681 if (salp->saip != NULL)
8682 {
8683 saip = (SAIndexPtr)(salp->saip);
8684 saip->master = AlnMgrGetNForSip(salp, master);
8685 }
8686 salp = salp->next;
8687 }
8688 return;
8689 }
8690
8691 NLM_EXTERN void AlnMgrMakeMasterPlus(SeqAlignPtr sap)
8692 {
8693 DenseSegPtr dsp;
8694 Int4 i;
8695 Int4 master;
8696 SAIndexPtr saip;
8697 SeqAlignPtr sap_tmp;
8698
8699 i = AlnMgrCheckAlignForParent(sap);
8700 if (i==AM_CHILD)
8701 {
8702 saip = (SAIndexPtr)(sap->saip);
8703 if (saip->master < 0)
8704 return;
8705 else
8706 master = saip->master;
8707 dsp = (DenseSegPtr)(sap->segs);
8708 if (dsp->strands[saip->master-1] == Seq_strand_minus)
8709 {
8710 sap_tmp = sap;
8711 sap = sap->next;
8712 sap_tmp->next = NULL;
8713 sap_tmp = SeqAlignListReverseStrand(sap_tmp);
8714 if (!AlnMgrIndexSingleChildSeqAlign(sap_tmp))
8715 return;
8716 saip = (SAIndexPtr)(sap_tmp->saip);
8717 saip->master = master;
8718 sap_tmp->next = sap;
8719 sap = sap_tmp;
8720 }
8721 } else if (i==AM_PARENT)
8722 {
8723 sap_tmp = (SeqAlignPtr)(sap->segs);
8724 while (sap_tmp)
8725 {
8726 AlnMgrMakeMasterPlus(sap_tmp);
8727 sap_tmp = sap_tmp->next;
8728 }
8729 }
8730 return;
8731 }
8732
8733 NLM_EXTERN Boolean AlnMgrForceMasterSlave(SeqAlignPtr sap)
8734 {
8735 AMAlignIndexPtr amaip;
8736 AMAlignDatPtr amadp;
8737 Int4 j;
8738 Int4 n;
8739
8740 if (sap == NULL || sap->master == NULL || sap->saip == NULL)
8741 return FALSE;
8742 amaip = (AMAlignIndexPtr)sap->saip;
8743 n = AlnMgrGetNForSip(sap, sap->master);
8744 if (n < 1)
8745 return FALSE;
8746 amadp = amaip->amadp[n-1];
8747 if (amaip->saps == NULL)
8748 {
8749 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
8750 for (j=0; j<amaip->numsaps; j++)
8751 {
8752 amaip->saps[j] = amadp->saps[j];
8753 }
8754 n = AlnMgrGetMaxSegments((SeqAlignPtr)(sap->segs));
8755 amaip->lens = (Int4Ptr)MemNew(n*sizeof(Int4));
8756 AlnMgrSetMaster(sap, sap->master);
8757 amaip->numseg = n;
8758 }
8759 if (!AlnMgrMergeIntoMSMultByMaster(amaip, amaip->lens, &amaip->numseg))
8760 return FALSE;
8761 amaip->startsize = (amaip->numseg)*(amaip->numsaps);
8762 amaip->starts = (Int4Ptr)MemNew((amaip->numseg)*(amaip->numsaps)*sizeof(Int4));
8763 amaip->aligncoords = (Uint4Ptr)MemNew((amaip->numseg)*sizeof(Uint4));
8764 if (!AlnMgrFillInStarts(amadp->saps, amaip->starts, amaip->numseg, amaip->lens, amaip->numsaps, amaip->aligncoords))
8765 return FALSE;
8766 if (amaip->numseg > 1)
8767 amaip->numseg -= 1;
8768 sap->type = SAT_MASTERSLAVE;
8769 if (!AlnMgrGetRowsForMasterSlave(sap))
8770 return FALSE;
8771 am_fix_empty_columns(sap);
8772 return TRUE;
8773 }
8774
8775 static void am_fix_empty_columns(SeqAlignPtr sap)
8776 {
8777 AMAlignIndexPtr amaip;
8778 Boolean found;
8779 Int4 i;
8780 Int4 j;
8781
8782 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
8783 return;
8784 amaip = (AMAlignIndexPtr)sap->saip;
8785 for (i=0; i<amaip->numseg; i++)
8786 {
8787 found = FALSE;
8788 for (j=0; j<amaip->numsaps && !found; j++)
8789 {
8790 if (amaip->starts[(i*amaip->numsaps)+j] >= 0)
8791 found = TRUE;
8792 }
8793 if (!found)
8794 {
8795 for (j=0; j<amaip->numsaps && !found; j++)
8796 {
8797 amaip->starts[(i*amaip->numsaps)+j] = -3;
8798 }
8799 }
8800 }
8801 }
8802
8803 /* a little kludge function to tide us over until the new */
8804 /* alignment manager arrives. */
8805
8806 static SeqAlignPtr am_cleanupsalp(SeqAlignPtr salp)
8807 {
8808 Int4 badseg;
8809 DenseSegPtr dsp;
8810 DenseSegPtr dsp_new;
8811 Boolean found;
8812 Int4 i;
8813 Int4 j;
8814 Int4 n;
8815
8816 dsp = (DenseSegPtr)(salp->segs);
8817 badseg = 0;
8818 for (i=0; i<dsp->numseg; i++)
8819 {
8820 found = FALSE;
8821 for (j=0; found==FALSE && j<dsp->dim; j++)
8822 {
8823 if (dsp->starts[dsp->dim*i + j] != -1)
8824 found = TRUE;
8825 }
8826 if (!found)
8827 badseg++;
8828 }
8829 if (badseg == 0)
8830 return salp;
8831 dsp_new = DenseSegNew();
8832 dsp_new->numseg = dsp->numseg-badseg;
8833 dsp_new->dim = dsp->dim;
8834 dsp_new->ids = dsp->ids;
8835 dsp->ids = NULL;
8836 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
8837 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
8838 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
8839 n = 0;
8840 for (i=0; i<dsp->numseg; i++)
8841 {
8842 found = FALSE;
8843 for (j=0; found==FALSE && j<dsp->dim; j++)
8844 {
8845 if (dsp->starts[dsp->dim*i + j] != -1)
8846 found = TRUE;
8847 }
8848 if (found)
8849 {
8850 for (j=0; j<dsp->dim; j++)
8851 {
8852 dsp_new->starts[dsp->dim*n+j] = dsp->starts[dsp->dim*i+j];
8853 dsp_new->strands[dsp->dim*n+j] = dsp->strands[dsp->dim*i+j];
8854 }
8855 dsp_new->lens[n] = dsp->lens[i];
8856 n++;
8857 }
8858 }
8859 DenseSegFree(dsp);
8860 salp->segs = (Pointer)dsp_new;
8861 return salp;
8862 }
8863
8864 /***************************************************************************
8865 *
8866 * AlnMgrGetSubAlign returns a flattened multiple or pairwise alignment
8867 * corresponding to the indexed input alignment. To get the entire
8868 * alignment, set from = 0 and to = -1. (SUBALIGN)
8869 *
8870 ***************************************************************************/
8871 NLM_EXTERN SeqAlignPtr AlnMgrGetSubAlign(SeqAlignPtr sap, SeqIdPtr which_master, Int4 from, Int4 to)
8872 {
8873 AMAlignIndexPtr amaip;
8874 AlnMsgPtr amp;
8875 Int4 c;
8876 DenseSegPtr dsp;
8877 DenseSegPtr dsp_orig;
8878 Int4 i;
8879 Int4 j;
8880 Boolean more;
8881 Uint4 n;
8882 Int4 numaln;
8883 SeqAlignPtr salp;
8884 SeqAlignPtr salp_head;
8885 SeqAlignPtr salp_prev;
8886 SeqIdPtr sip;
8887 SeqIdPtr sip_curr,sip_next;
8888 SeqIdPtr sip_prev;
8889 Int4Ptr trackarray;
8890
8891 i = AlnMgrCheckAlignForParent(sap);
8892 if (i == AM_CHILD)
8893 {
8894 salp = SeqAlignNew();
8895 amp = AlnMsgNew();
8896 amp->row_num = 1;
8897 amp->which_master = which_master;
8898 amp->from_m = from;
8899 amp->to_m = to;
8900 dsp_orig = (DenseSegPtr)(sap->segs);
8901 dsp = DenseSegNew();
8902 dsp->ids = SeqIdDupList(dsp_orig->ids);
8903 while ((Boolean) (more = AlnMgrGetNextAlnBit(sap, amp)))
8904 {
8905 dsp->numseg++; /* what to do if a row has all gaps?? */
8906 }
8907 dsp->dim = dsp_orig->dim;
8908 dsp->starts = (Int4Ptr)MemNew((dsp->numseg*dsp_orig->dim)*sizeof(Int4));
8909 dsp->strands = (Uint1Ptr)MemNew((dsp->numseg*dsp_orig->dim)*sizeof(Int4));
8910 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8911 for (j=0; j<dsp_orig->dim; j++)
8912 {
8913 amp = AlnMsgReNew(amp);
8914 amp->row_num = j+1;
8915 amp->which_master = which_master;
8916 amp->from_m = from;
8917 amp->to_m = to;
8918 c = 0;
8919 while ((Boolean) (more = AlnMgrGetNextAlnBit(sap, amp)))
8920 {
8921 dsp->lens[c] = amp->to_b - amp->from_b + 1;
8922 if (amp->gap == 0)
8923 dsp->starts[(c*dsp->dim)+j] = amp->from_b;
8924 else
8925 dsp->starts[(c*dsp->dim)+j] = -1;
8926 c++;
8927 }
8928 }
8929 for (c=0; c<(dsp->dim*dsp->numseg); c++)
8930 {
8931 dsp->strands[c] = dsp_orig->strands[c];
8932 }
8933 salp->type = SAT_PARTIAL;
8934 salp->segtype = SAS_DENSEG;
8935 salp->segs = (Pointer)dsp;
8936 AlnMsgFree(amp);
8937 salp = am_cleanupsalp(salp);
8938 return salp;
8939 } else if (i == AM_PARENT)
8940 {
8941 amaip = (AMAlignIndexPtr)(sap->saip);
8942 if (amaip == NULL)
8943 return NULL;
8944 if (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_MASTERSLAVE)
8945 {
8946 salp = SeqAlignNew();
8947 salp->type = SAT_MASTERSLAVE;
8948 salp->segtype = SAS_DENSEG;
8949 salp->dim = amaip->numrows;
8950 dsp = DenseSegNew();
8951 dsp->dim = amaip->numrows;
8952 dsp->numseg = amaip->numseg;
8953 dsp->starts = (Int4Ptr)MemNew((amaip->numseg+1)*(amaip->numrows)*sizeof(Int4));
8954 dsp->lens = (Int4Ptr)MemNew((amaip->numseg+1)* sizeof(Int4));
8955 dsp->strands = (Uint1Ptr)MemNew((amaip->numseg+1)*(amaip->numrows)*sizeof(Uint1));
8956 sip_curr = NULL;
8957 for (j=0; j<amaip->numrows; j++)
8958 {
8959 sip = AlnMgrGetNthSeqIdPtr(sap, j+1);
8960 if (sip_curr != NULL)
8961 {
8962 sip_prev->next = sip;
8963 sip_prev = sip;
8964 } else
8965 sip_curr = sip_prev = sip;
8966 }
8967 dsp->ids = SeqIdDupList(sip_curr);
8968 amp = AlnMsgNew();
8969 for (j=0; j<(amaip->numrows); j++)
8970 {
8971 if (j == amaip->master - 1)
8972 salp->master = SeqIdDup(sip_curr);
8973 sip_next = sip_curr->next;
8974 SeqIdFree(sip_curr);
8975 sip_curr = sip_next;
8976 amp->which_master = which_master;
8977 amp->from_m = from;
8978 amp->to_m = to;
8979 amp->row_num = j + 1;
8980 more = TRUE;
8981 n = 0;
8982 while (more)
8983 {
8984 more = AlnMgrGetNextAlnBit(sap, amp);
8985 if (amp->gap == 0)
8986 {
8987 dsp->starts[n*(dsp->dim) + j] = amp->from_b;
8988 } else
8989 {
8990 dsp->starts[n*(dsp->dim) + j] = -1;
8991 }
8992 if (j == 0)
8993 dsp->lens[n] = amp->to_b - amp->from_b + 1;
8994 dsp->strands[n*(dsp->dim) + j] = amp->strand;
8995 n++;
8996 }
8997 amp = AlnMsgReNew(amp);
8998 }
8999 AlnMsgFree(amp);
9000 salp->segs = (Pointer)dsp;
9001 salp = am_cleanupsalp(salp);
9002 return salp;
9003 } else if (sap->type == SAT_PARTIAL || (sap->type == SAT_MASTERSLAVE && amaip->mstype == AM_SEGMENTED_MASTERSLAVE))
9004 {
9005 amp = AlnMsgNew();
9006 amp->which_master = which_master;
9007 amp->from_m = from;
9008 amp->to_m = to;
9009 amp->row_num = 1;
9010 trackarray = (Int4Ptr)MemNew((amaip->numseg+1)*sizeof(Int4));
9011 numaln = 0;
9012 while ((Boolean) (more = AlnMgrGetNextAlnBit(sap, amp)))
9013 {
9014 if (amp->send_space)
9015 {
9016 numaln++;
9017 amp->send_space = FALSE;
9018 } else
9019 trackarray[numaln]++;
9020 }
9021 salp_head = NULL;
9022 sip_curr = NULL;
9023 for (j=0; j<amaip->numrows; j++)
9024 {
9025 sip = AlnMgrGetNthSeqIdPtr(sap, j+1);
9026 if (sip_curr != NULL)
9027 {
9028 sip_prev->next = sip;
9029 sip_prev = sip;
9030 } else
9031 sip_curr = sip_prev = sip;
9032 }
9033 for (j=0; j<numaln; j++)
9034 {
9035 salp = SeqAlignNew();
9036 if (salp_head != NULL)
9037 {
9038 salp_prev->next = salp;
9039 salp_prev = salp;
9040 } else
9041 salp_prev = salp_head = salp;
9042 salp->type = SAT_PARTIAL;
9043 salp->segtype = SAS_DENSEG;
9044 salp->dim = amaip->numrows;
9045 dsp = DenseSegNew();
9046 dsp->dim = amaip->numrows;
9047 dsp->numseg = trackarray[j]+1;
9048 dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(trackarray[j]+1)*sizeof(Int4));
9049 dsp->lens = (Int4Ptr)MemNew((dsp->dim)*(trackarray[j]+1)*sizeof(Int4));
9050 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(trackarray[j]+1)*sizeof(Uint1));
9051 dsp->ids = SeqIdDupList(sip_curr);
9052 salp->segs = (Pointer)dsp;
9053 }
9054 amp = AlnMsgReNew(amp);
9055 for (j=0; j<(amaip->numrows); j++)
9056 {
9057 salp = salp_head;
9058 dsp = (Pointer)(salp->segs);
9059 if (j == amaip->master - 1)
9060 salp->master = SeqIdDup(sip_curr);
9061 sip_next = sip_curr->next;
9062 SeqIdFree(sip_curr);
9063 sip_curr = sip_next;
9064 amp->which_master = which_master;
9065 amp->from_m = from;
9066 amp->to_m = to;
9067 amp->row_num = j + 1;
9068 more = TRUE;
9069 n = 0;
9070 while ((more = AlnMgrGetNextAlnBit(sap, amp)) == TRUE)
9071 {
9072 if (amp->gap == 0)
9073 {
9074 dsp->starts[n*(dsp->dim) + j] = amp->from_b;
9075 } else
9076 {
9077 dsp->starts[n*(dsp->dim) + j] = -1;
9078 }
9079 if (j == 0)
9080 dsp->lens[n] = amp->to_b - amp->from_b + 1;
9081 dsp->strands[n*(dsp->dim) + j] = amp->strand;
9082 n++;
9083 if (amp->send_space == TRUE && amp->place != 1)
9084 {
9085 salp = salp->next;
9086 dsp = (DenseSegPtr)(salp->segs);
9087 amp->send_space = FALSE;
9088 n=0;
9089 }
9090 }
9091 amp = AlnMsgReNew(amp);
9092 }
9093 MemFree(trackarray);
9094 AlnMsgFree(amp);
9095 salp = am_cleanupsalp(salp);
9096 return salp_head;
9097 } else if (sap->type == SAT_DIAGS)
9098 {
9099 salp = SeqAlignDup(sap);
9100 return salp;
9101 }
9102 }
9103 return NULL;
9104 }
9105
9106 NLM_EXTERN SeqAlignPtr AlnMgrGetSubAlignSpecial(SeqAlignPtr sap, Int4 master, Int4 from, Int4 to)
9107 {
9108 AMAlignIndexPtr amaip;
9109 AlnMsgPtr amp1;
9110 AlnMsgPtr amp2;
9111 DenseDiagPtr ddp_head;
9112 DenseDiagPtr ddp_new;
9113 DenseDiagPtr ddp_prev;
9114 Int4 i;
9115 Boolean more1;
9116 Boolean more2;
9117 SeqAlignPtr sap_head;
9118 SeqAlignPtr sap_new;
9119 SeqAlignPtr sap_prev;
9120
9121 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
9122 return NULL;
9123 amaip = (AMAlignIndexPtr)(sap->saip);
9124 if (master > amaip->numrows)
9125 return NULL;
9126 amp1 = AlnMsgNew();
9127 amp2 = AlnMsgNew();
9128 sap_head = sap_prev = NULL;
9129 for (i=0; i<amaip->numrows; i++)
9130 {
9131 if ((i+1)!=master)
9132 {
9133 sap_new = SeqAlignNew();
9134 sap_new->type = SAT_PARTIAL;
9135 sap_new->segtype = SAS_DENDIAG;
9136 sap_new->dim = 2;
9137 amp1 = AlnMsgReNew(amp1);
9138 amp2 = AlnMsgReNew(amp2);
9139 amp1->row_num = master;
9140 amp2->row_num = i+1;
9141 amp1->from_m = amp2->from_m = from;
9142 amp1->to_m = amp2->to_m = to;
9143 ddp_head = ddp_prev = NULL;
9144 while ((Boolean) (more1 = AlnMgrGetNextAlnBit(sap, amp1)) && (Boolean)(more2 = AlnMgrGetNextAlnBit(sap, amp2)))
9145 {
9146 if (amp1->gap == 0 && amp2->gap == 0)
9147 {
9148 ddp_new = DenseDiagNew();
9149 ddp_new->dim = 2;
9150 ddp_new->starts = (Int4Ptr)MemNew(2*sizeof(Int4));
9151 ddp_new->len = amp1->to_b - amp1->from_b + 1;
9152 ddp_new->starts[0] = amp1->from_b;
9153 ddp_new->starts[1] = amp2->from_b;
9154 ddp_new->strands = (Uint1Ptr)MemNew(2*sizeof(Uint1));
9155 ddp_new->strands[0] = amp1->strand;
9156 ddp_new->strands[1] = amp2->strand;
9157 ddp_new->id = AlnMgrGetNthSeqIdPtr(sap, master);
9158 ddp_new->id->next = AlnMgrGetNthSeqIdPtr(sap, i+1);
9159 if (ddp_head != NULL)
9160 {
9161 ddp_prev->next = ddp_new;
9162 ddp_prev = ddp_new;
9163 } else
9164 ddp_head = ddp_prev = ddp_new;
9165 }
9166 }
9167 sap_new->segs = (Pointer)ddp_head;
9168 /* AlnMgrMergeNeighbors(sap_new); */
9169 if (sap_head != NULL)
9170 {
9171 sap_prev->next = sap_new;
9172 sap_prev = sap_new;
9173 } else
9174 sap_head = sap_prev = sap_new;
9175 }
9176 }
9177 amp1 = AlnMsgFree(amp1);
9178 amp2 = AlnMsgFree(amp2);
9179 return sap_head;
9180 }
9181
9182
9183 /********************************************************************************
9184 *
9185 * viewer and editor management functions
9186 *
9187 ********************************************************************************/
9188
9189 NLM_EXTERN SeqAlignPtr AlnMgrCopyIndexedParentSeqAlign(SeqAlignPtr sap)
9190 {
9191 AMAlignIndexPtr amaip;
9192 AMAlignIndexPtr amaip_new;
9193 Boolean found;
9194 Int4 i;
9195 Int4Ptr orderarray;
9196 Int4 r;
9197 SeqAlignPtr sap_new;
9198 SeqAlignPtr sap_tmp;
9199 SeqAlignPtr seg_head;
9200 SeqAlignPtr seg_new;
9201 SeqAlignPtr seg_prev;
9202 SeqAlignPtr seg_tmp;
9203
9204 if (sap->saip == NULL)
9205 return NULL;
9206 if (sap->saip->indextype != INDEX_PARENT)
9207 return NULL;
9208 amaip = (AMAlignIndexPtr)sap->saip;
9209 amaip_new = AMAlignIndexNew();
9210 sap_new = SeqAlignDup(sap);
9211 sap_new->saip = (SeqAlignIndexPtr)amaip_new;
9212 amaip_new->indextype = amaip->indextype;
9213 amaip_new->freefunc = amaip->freefunc;
9214 amaip_new->mstype = amaip->mstype;
9215 amaip_new->aligncoords = (Uint4Ptr)MemNew((amaip->numseg+1)*sizeof(Uint4));
9216 amaip_new->numseg = amaip->numseg;
9217 amaip_new->lens = (Int4Ptr)MemNew((amaip->numseg+1)*sizeof(Int4));
9218 for (i=0; i<amaip->numseg; i++)
9219 {
9220 amaip_new->aligncoords[i] = amaip->aligncoords[i];
9221 amaip_new->lens[i] = amaip->lens[i];
9222 }
9223 amaip_new->starts = (Int4Ptr)MemNew(amaip->startsize*sizeof(Int4));
9224 amaip_new->startsize = amaip->startsize;
9225 MemCpy(amaip_new->starts, amaip->starts, amaip->startsize*sizeof(Int4));
9226 amaip_new->alnsaps = amaip->alnsaps;
9227 amaip_new->numsaps = amaip->numsaps;
9228 amaip_new->ids = SeqIdDupList(amaip->ids);
9229 amaip_new->numbsqs = amaip->numbsqs;
9230 amaip_new->rowsource = (RowSourcePtr PNTR)MemNew((amaip->numrows+1)*sizeof(RowSourcePtr));
9231 for (i=0; i<amaip->numrows; i++)
9232 {
9233 amaip_new->rowsource[i] = AlnMgrCopyRowSource(amaip->rowsource[i]);
9234 }
9235 amaip_new->numrows = amaip->numrows;
9236 amaip_new->master = amaip->master;
9237 seg_head = NULL;
9238 sap_tmp = (SeqAlignPtr)sap->segs;
9239 while (sap_tmp != NULL)
9240 {
9241 seg_new = SeqAlignDup(sap_tmp);
9242 if (seg_head != NULL)
9243 {
9244 seg_prev->next = seg_new;
9245 seg_prev = seg_new;
9246 } else
9247 seg_head = seg_prev = seg_new;
9248 sap_tmp = sap_tmp->next;
9249 }
9250 sap_new->segs = (Pointer)seg_head;
9251 i = 0;
9252 orderarray = (Int4Ptr)MemNew((amaip->numsaps)*sizeof(Int4));
9253 seg_new = seg_head;
9254 sap_tmp = (SeqAlignPtr)sap->segs;
9255 while (sap_tmp != NULL && seg_new != NULL)
9256 {
9257 seg_new->saip = AlnMgrCopyIndexesForChildSeqAlign(sap_tmp);
9258 found = FALSE;
9259 r = 0;
9260 while (!found && r < amaip->numsaps)
9261 {
9262 if (sap_tmp == amaip->saps[r])
9263 {
9264 orderarray[i] = r;
9265 found = TRUE;
9266 }
9267 r++;
9268 }
9269 i++;
9270 seg_new = seg_new->next;
9271 sap_tmp = sap_tmp->next;
9272 }
9273 amaip_new->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps+1)*sizeof(SeqAlignPtr));
9274 seg_tmp = (SeqAlignPtr)sap_new->segs;
9275 i = 0;
9276 while (seg_tmp)
9277 {
9278 amaip_new->saps[orderarray[i]] = seg_tmp;
9279 i++;
9280 seg_tmp = seg_tmp->next;
9281 }
9282 sap_tmp = (SeqAlignPtr)sap->segs;
9283 amaip_new->amadp = (AMAlignDatPtr PNTR)MemNew((amaip->numbsqs+1)*sizeof(AMAlignDatPtr));
9284 seg_head = (SeqAlignPtr)sap_new->segs;
9285 for (i=0; i<amaip->numbsqs; i++)
9286 {
9287 amaip_new->amadp[i] = AlnMgrCopyamadp(amaip->amadp[i], sap_tmp, seg_head);
9288 }
9289 MemFree(orderarray);
9290 return sap_new;
9291 }
9292
9293 NLM_EXTERN RowSourcePtr AlnMgrCopyRowSource(RowSourcePtr rsp)
9294 {
9295 Int4 i;
9296 RowSourcePtr rsp_new;
9297
9298 rsp_new = RowSourceNew();
9299 rsp_new->id = SeqIdDup(rsp->id);
9300 rsp_new->which_saps = (Uint4Ptr)MemNew((rsp->numsaps+1)*sizeof(Uint4));
9301 rsp_new->num_in_sap = (Uint4Ptr)MemNew((rsp->numsaps+1)*sizeof(Uint4));
9302 for (i=0; i<rsp->numsaps; i++)
9303 {
9304 rsp_new->which_saps[i] = rsp->which_saps[i];
9305 rsp_new->num_in_sap[i] = rsp->num_in_sap[i];
9306 }
9307 rsp_new->numsaps = rsp->numsaps;
9308 return rsp_new;
9309 }
9310
9311 NLM_EXTERN AMAlignDatPtr AlnMgrCopyamadp(AMAlignDatPtr amadp, SeqAlignPtr sap_tmp, SeqAlignPtr seg_head)
9312 {
9313 AMAlignDatPtr amadp_new;
9314 Boolean found;
9315 Int4 i;
9316 Int4 j;
9317 Int4Ptr orderarray;
9318 SeqAlignPtr sap_old;
9319 SeqAlignPtr sap_new;
9320
9321 if (sap_tmp == NULL || amadp == NULL || seg_head == NULL)
9322 return