|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/alignmgr2.c |
source navigation diff markup identifier search freetext search file search |
1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information (NCBI)
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government do not place any restriction on its use or reproduction.
12 * We would, however, appreciate having the NCBI and the author cited in
13 * any work or product based on this material.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * ===========================================================================
24 *
25 * File Name: alignmgr2.c
26 *
27 * Author: Sarah Wheelan
28 *
29 * Version Creation Date: 10/01
30 *
31 * $Revision: 6.63 $
32 *
33 * File Description: SeqAlign indexing, access, and manipulation functions
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: alignmgr2.c,v $
38 * Revision 6.63 2008/12/01 19:35:39 bollin
39 * prevent crash when mapping positions and row of alignment is entirely in the gapl.
40 *
41 * Revision 6.62 2007/03/09 20:37:06 bollin
42 * Fixed insidious double-increment bug in AlnMgr2MergeTwoAlignments - if the
43 * second alignment to be merged had more than one segment, the seg index was
44 * incremented past the number of segments (and some segments were not initialized).
45 *
46 * Revision 6.61 2007/01/09 14:13:52 bollin
47 * Fixed bug in AlnMgr2ExtendToCoords - prior version was not extending on 5' end.
48 *
49 * Revision 6.60 2006/09/06 15:48:33 bollin
50 * removed compiler warnings
51 *
52 * Revision 6.59 2006/09/06 15:14:54 bollin
53 * fixed bug that was generating segments of length zero at the end of an
54 * alignment
55 *
56 * Revision 6.58 2005/03/01 13:56:03 bollin
57 * if the alignment we want to index is a DenseSeg and not a list of alignments,
58 * just give it a simple index - don't decompose to pairwise and reconstruct it.
59 *
60 * Revision 6.57 2005/02/23 14:40:55 bollin
61 * when condensing columns in AlnMgr2CondenseColumns, make sure we do not
62 * disturb the ascending order of starts for each row
63 *
64 * Revision 6.56 2004/09/15 14:59:19 bollin
65 * make sure we do not read outside the alignment index arrays
66 *
67 * Revision 6.55 2004/05/20 19:46:25 bollin
68 * removed unused variables
69 *
70 * Revision 6.54 2004/05/11 13:19:49 bollin
71 * update the dimension of the shared alignment after adding a sequence.
72 *
73 * Revision 6.53 2004/04/13 14:43:07 kskatz
74 * Final resolution of revisions 6.51 and 6.52: reverted 6.52; then cleaned up readability of AlnMgr2SeqPortRead() and ensured that it will never call SeqPortRead for a length > AM_SEQPORTSIZE
75 *
76 * Revision 6.52 2004/04/12 19:52:15 kskatz
77 * Revision 6.51 was right neighborhood,wrong off-by-one: It was in AlnMgr2ComputeFreqMatrix() call to AlnMgr2SeqPortRead() when using l+AM_SEQPORTSIZE instead of l+AM_SEQPORTSIZE-1
78 *
79 * Revision 6.51 2004/04/12 17:00:44 kskatz
80 * Fixed off-by-one error in AlnMgr2SeqPortRead() length passed to SeqPortRead(); stop-start+1 changed to stop-start
81 *
82 * Revision 6.50 2004/03/11 14:15:41 bollin
83 * added extra check in AlnMgr2GetNthSeqIdPtr to avoid core dump if there are
84 * fewer than N SeqIDs in the alignment.
85 *
86 * Revision 6.49 2003/10/20 17:54:34 kans
87 * AlnMgr2ComputeFreqMatrix protect against dereferencing NULL bsp
88 *
89 * Revision 6.48 2003/10/09 13:46:52 rsmith
90 * Add AlnMgr2GetFirstNForSipList.
91 *
92 * Revision 6.47 2003/05/15 18:53:10 rsmith
93 * in AlnMgr2GetSeqRangeForSipInStdSeg always return start & stop in coordinate order. Do not assume what minus strand will do or not.
94 *
95 * Revision 6.46 2003/04/24 20:28:48 rsmith
96 * made AlnMgr2GetNthStdSeg use 1 based numbering like the other Nth functions.
97 *
98 * Revision 6.45 2003/04/23 20:36:13 rsmith
99 * Added four functions in Section 11 to get information about Std-Seg alignments.
100 *
101 * Revision 6.44 2003/03/31 20:17:11 todorov
102 * Added AlnMgr2IndexSeqAlignEx
103 *
104 * Revision 6.43 2003/02/03 12:36:22 kans
105 * AlnMgr2ComputeScoreForSeqAlign checks return value of AlnMgr2ComputeFreqMatrix, returns -1 if NULL to avoid dereference crash
106 *
107 * Revision 6.42 2002/10/23 16:32:19 todorov
108 * CondenseColumns fixed: needed to move the lens too.
109 *
110 * Revision 6.40 2002/10/16 15:54:28 todorov
111 * use the default dim value if not set
112 *
113 * Revision 6.39 2002/08/07 21:57:33 kans
114 * added AlignMgr2GetFirstNForStdSeg
115 *
116 * Revision 6.38 2002/07/11 14:35:51 kans
117 * fixed Mac complaints about prototypes
118 *
119 * Revision 6.37 2002/07/11 12:55:38 wheelan
120 * added support for std-seg alignments
121 *
122 * Revision 6.36 2002/06/04 17:43:07 todorov
123 * 1) Substituted AddInNewSA with a new and optimized AddInNewPairwiseSA function.
124 * 2) Fixed a few bugs in other functions.
125 *
126 * Revision 6.35 2002/05/17 15:04:42 wheelan
127 * bug fix in ExtendToCoords
128 *
129 * Revision 6.34 2002/05/17 11:02:36 wheelan
130 * bug fixes in Merge func
131 *
132 * Revision 6.32 2002/03/04 17:19:18 wheelan
133 * added AlnMgr2FuseSet, changed behavior of RemoveInconsistent, fixed GetNextAlnBitBugs
134 *
135 * Revision 6.31 2002/01/31 17:41:47 wheelan
136 * various bug fixes -- no more 0 len segments, better handling of rows that are one big insert, etc.
137 *
138 * Revision 6.30 2002/01/30 19:12:53 wheelan
139 * added RemoveInconsistentAlnsFromSet, ExtractPairwiseSeqAlign, changed behavior of GetSubAlign, changed structures and behavior of GetNextAlnBit, added GetInterruptInfo, added AlnMgr2IndexAsRows, bug fixes in indexing routines
140 *
141 * Revision 6.29 2002/01/02 15:05:07 wheelan
142 * changes to force more efficient ordering in CompareAsp callbacks, plus more stringent checks in AlnMgr2AddInNewSA
143 *
144 * Revision 6.28 2001/12/28 22:53:20 wheelan
145 * bug fixes; added AlnMgr2DupAlnAndIndexes, changed some New and Free funcs
146 *
147 * Revision 6.27 2001/12/27 16:07:22 wheelan
148 * bug fix in ExtendToEnd
149 *
150 * Revision 6.26 2001/12/20 19:43:20 wheelan
151 * bug fix in GetNextAlnBit -- no more incorrect inserts
152 *
153 * Revision 6.25 2001/12/18 16:36:57 wheelan
154 * scattered fixes to unaligned region code
155 *
156 * Revision 6.24 2001/12/17 19:36:39 wheelan
157 * various fixes in AlnMgr2AddInNewSA
158 *
159 * Revision 6.23 2001/12/14 12:38:50 wheelan
160 * added functions for ddv
161 *
162 * Revision 6.22 2001/12/05 12:25:49 wheelan
163 * bug fix in SortByNthRow
164 *
165 * Revision 6.21 2001/12/04 19:28:55 wheelan
166 * bug fixes in AddInNewSA and in IndexSingleDenseSegSA
167 *
168 * Revision 6.20 2001/12/04 14:31:27 wheelan
169 * fixes to avoid mistakenly processing AM2_LITE as real indexed alignments
170 *
171 * Revision 6.19 2001/11/30 16:55:21 wheelan
172 * added AlnMgr2PadConservatively
173 *
174 * Revision 6.18 2001/11/29 18:38:47 wheelan
175 * cleanup as recommended by Mac compiler
176 *
177 * Revision 6.17 2001/11/29 17:37:16 wheelan
178 * added ExtendToCoords and MergeTwoAlignments
179 *
180 * Revision 6.16 2001/11/27 15:47:40 wheelan
181 * bug fixes in AnchorSeqAlign, DoCondense, and AddInNewSA
182 *
183 * Revision 6.15 2001/11/15 18:23:06 wheelan
184 * small change in AlnMgr2GetNthRowSpan
185 *
186 * Revision 6.14 2001/11/15 18:09:38 wheelan
187 * another bug fix in AddInNewSA
188 *
189 * Revision 6.13 2001/11/15 15:30:54 wheelan
190 * many bugs fixed, leaks plugged, plus reworked AddInNewSA to use new AMSmall field
191 *
192 * Revision 6.12 2001/11/13 14:36:13 wheelan
193 * many bug fixes in AddInNewSA and MapBioseqToSeqAlign
194 *
195 * Revision 6.11 2001/11/08 19:56:07 wheelan
196 * added AlnMgr2GetNthRowSpanInSA, fixed various memory errors
197 *
198 * Revision 6.10 2001/11/08 01:39:15 wheelan
199 * many bug fixes in and around AddInNewSA
200 *
201 * Revision 6.9 2001/11/02 14:01:30 wheelan
202 * bug fixes in AlnMgr2AddInNewSA
203 *
204 * Revision 6.8 2001/10/31 12:00:46 wheelan
205 * commented out the mistakenly uncommented comment
206 *
207 * Revision 6.7 2001/10/30 20:14:38 wheelan
208 * bug fixes for minus strands in AddInNewSA, bug fix in GetSubAlign
209 *
210 * Revision 6.6 2001/10/23 12:14:27 wheelan
211 * changes in AlnMgr2GetNextAlnBit as well as tree-based multiple alignment algorithm
212 *
213 * Revision 6.5 2001/10/18 15:10:53 wheelan
214 * fixed AlnMgr2ComputeScoreForSeqAlign
215 *
216 * Revision 6.4 2001/10/16 12:00:17 wheelan
217 * added GetParent and FreeEitherIndex
218 *
219 * Revision 6.3 2001/10/08 18:43:29 wheelan
220 * added comments
221 *
222 * Revision 6.2 2001/10/03 18:13:01 wheelan
223 * changed some colliding defines
224 *
225 * Revision 6.1 2001/10/03 14:20:11 wheelan
226 * initial checkin
227 *
228 * ==========================================================================
229 *
230 */
231
232 #include <alignmgr2.h>
233
234 /***************************************************************************
235 *
236 * static functions
237 *
238 ***************************************************************************/
239 /* SECTION 1 */
240 static SARowDat2Ptr SARowDat2New(void);
241 static void SARowDat2Free(SARowDat2Ptr srdp);
242 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp);
243 static SAIndex2Ptr SAIndex2New(void);
244 static SAIndex2Ptr SAIndex2Copy(VoidPtr index);
245 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index);
246 static void AMIntervalSetFree(AMIntervalSetPtr amint);
247 /* SECTION 2 */
248 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap);
249 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap);
250 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap);
251 static void AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap);
252 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap);
253 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap);
254 static void AlnMgr2SortBySeqId(SeqAlignPtr sap);
255 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2);
256 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j);
257 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap);
258 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint);
259 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2);
260 static AMVertexPtr PNTR AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices);
261 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices);
262 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2);
263 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head);
264 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2);
265 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices);
266 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
267 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
268 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge);
269 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used);
270 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head);
271 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2);
272 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap);
273 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head);
274 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap);
275 static void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap);
276 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len);
277 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row);
278 static void AlnMgr2CondenseColumns(DenseSegPtr dsp);
279 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow);
280 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2);
281 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2);
282 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2);
283 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2);
284 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2);
285 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2);
286 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2);
287 static void AlnMgr2SetUnaln(SeqAlignPtr sap);
288 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2);
289 /* SECTION 4 */
290 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen);
291 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen);
292 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to);
293 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);
294 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg);
295 /* SECTION 5 */
296 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row);
297 /* SECTION 8 */
298 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot);
299 /* SECTION 9 */
300 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2);
301
302
303 typedef struct am_seqpieceset AMSeqPieceSet, PNTR AMSeqPieceSetPtr;
304 typedef struct am_seqpiece AMSeqPiece, PNTR AMSeqPiecePtr;
305
306 struct am_seqpiece {
307 Int4 beg;
308 Int4 end;
309 Int4 left;
310 Int4 right;
311 Int4 orig_left;
312 Int4 orig_right;
313 Boolean aligned;
314 Int4 seg;
315 Int4 pos;
316 DenseSegPtr alt_dsp;
317 Int4 alt_seg;
318 Int4 alt_pos;
319 AMSeqPiecePtr next;
320 AMSeqPiecePtr prev;
321 AMSeqPieceSetPtr set;
322 };
323
324 struct am_seqpieceset {
325 AMSeqPiecePtr head;
326 AMSeqPiecePtr tail;
327 DenseSegPtr dsp;
328 DenseSegPtr alt_dsp;
329 Int4 row;
330 Int4 row2;
331 Int4 alt_row;
332 Int4 alt_row2;
333 Uint1 strand;
334 Boolean plus;
335 Int4 max_pos;
336 Boolean sign;
337 AMSeqPieceSetPtr next;
338 };
339
340
341
342 /***************************************************************************
343 *
344 * SECTION 1: Functions for allocating and freeing data structures used
345 * by the alignment manager; copying functions are also here.
346 *
347 ***************************************************************************/
348
349 /* SECTION 1 */
350 static SARowDat2Ptr SARowDat2New(void)
351 {
352 return (SARowDat2Ptr)MemNew(sizeof(SARowDat2));
353 }
354
355 /* SECTION 1 */
356 static void SARowDat2Free(SARowDat2Ptr srdp)
357 {
358 if (srdp == NULL)
359 return;
360 if (srdp->sect != NULL)
361 MemFree(srdp->sect);
362 if (srdp->unsect != NULL)
363 MemFree(srdp->unsect);
364 MemFree(srdp->insect);
365 MemFree(srdp->unaligned);
366 MemFree(srdp);
367 }
368
369 /* SECTION 1 */
370 static SARowDat2Ptr SARowDat2Copy(SARowDat2Ptr srdp)
371 {
372 Int4 i;
373 SARowDat2Ptr srdp2;
374
375 if (srdp == NULL)
376 return NULL;
377 srdp2 = SARowDat2New();
378 srdp2->numsect = srdp->numsect;
379 srdp2->sect = (Uint2Ptr)MemNew(srdp2->numsect*sizeof(Uint2));
380 for (i=0; i<srdp2->numsect; i++)
381 {
382 srdp2->sect[i] = srdp->sect[i];
383 }
384 srdp2->numunsect = srdp->numunsect;
385 srdp2->unsect = (Uint2Ptr)MemNew(srdp2->numunsect*sizeof(Uint2));
386 for (i=0; i<srdp2->numunsect; i++)
387 {
388 srdp2->unsect[i] = srdp->unsect[i];
389 }
390 srdp2->numinsect = srdp->numinsect;
391 srdp2->insect = (Uint2Ptr)MemNew(srdp2->numinsect*sizeof(Uint2));
392 for (i=0; i<srdp2->numinsect; i++)
393 {
394 srdp2->insect[i] = srdp->insect[i];
395 }
396 srdp2->numunaln = srdp->numunaln;
397 srdp2->unaligned = (Uint2Ptr)MemNew(srdp2->numunaln*sizeof(Uint2));
398 for (i=0; i<srdp2->numunaln; i++)
399 {
400 srdp2->unaligned[i] = srdp->unaligned[i];
401 }
402 return srdp2;
403 }
404
405 /* SECTION 1 */
406 static SAIndex2Ptr SAIndex2New(void)
407 {
408 SAIndex2Ptr saip;
409
410 saip = (SAIndex2Ptr)MemNew(sizeof(SAIndex2));
411 saip->indextype = INDEX_CHILD;
412 saip->freefunc = (SeqAlignIndexFreeFunc)(SAIndex2Free2);
413 saip->anchor = -1;
414 return saip;
415 }
416
417 /* SECTION 1 */
418 NLM_EXTERN Boolean LIBCALLBACK SAIndex2Free2(VoidPtr index)
419 {
420 Int4 i;
421 SAIndex2Ptr saip;
422
423 if (index == NULL)
424 return TRUE;
425 saip = (SAIndex2Ptr)(index);
426 MemFree(saip->aligncoords);
427 for (i=0; i<saip->numrows; i++)
428 {
429 SARowDat2Free(saip->srdp[i]);
430 }
431 MemFree(saip->srdp);
432 MemFree(saip);
433 return TRUE;
434 }
435
436 /* SECTION 1 */
437 NLM_EXTERN void AlnMgr2FreeInterruptInfo(AMInterrInfoPtr interr)
438 {
439 if (interr == NULL)
440 return;
441 MemFree(interr->starts);
442 MemFree(interr->lens);
443 MemFree(interr->types);
444 MemFree(interr);
445 }
446
447 /* SECTION 1*/
448 static SAIndex2Ptr SAIndex2Copy(VoidPtr index)
449 {
450 Int4 i;
451 SAIndex2Ptr saip;
452 SAIndex2Ptr saip2;
453
454 saip2 = SAIndex2New();
455 saip = (SAIndex2Ptr)(index);
456 saip2->numseg = saip->numseg;
457 saip2->aligncoords = (Uint4Ptr)MemNew(saip2->numseg*sizeof(Uint4));
458 for (i=0; i<saip2->numseg; i++)
459 {
460 saip2->aligncoords[i] = saip->aligncoords[i];
461 }
462 saip2->anchor = saip->anchor;
463 saip2->numrows = saip->numrows;
464 saip2->numseg = saip->numseg;
465 saip2->srdp = (SARowDat2Ptr PNTR)MemNew(saip2->numrows*sizeof(SARowDat2));
466 for (i=0; i<saip2->numrows; i++)
467 {
468 saip2->srdp[i] = SARowDat2Copy(saip->srdp[i]);
469 }
470 saip2->numunaln = saip->numunaln;
471 saip2->unaln = (Uint4Ptr)MemNew(saip2->numunaln*sizeof(Uint4));
472 for (i=0; i<saip2->numunaln; i++)
473 {
474 saip2->unaln[i] = saip->unaln[i];
475 }
476 saip2->numinchain = saip->numinchain;
477 saip2->numsplitaln = saip->numsplitaln;
478 saip2->score = saip->score;
479 saip2->aligned = saip->aligned;
480 return saip2;
481 }
482
483 /* SECTION 1 */
484 static AMAlignIndex2Ptr AMAlignIndex2New(void)
485 {
486 AMAlignIndex2Ptr amaip;
487
488 amaip = (AMAlignIndex2Ptr)MemNew(sizeof(AMAlignIndex2));
489 amaip->indextype = INDEX_PARENT;
490 amaip->freefunc = (SeqAlignIndexFreeFunc)(AMAlignIndex2Free2);
491 return amaip;
492 }
493
494 /* SECTION 1 */
495 NLM_EXTERN Boolean LIBCALLBACK AMAlignIndex2Free2(VoidPtr index)
496 {
497 AMAlignIndex2Ptr amaip;
498 Int4 i;
499
500 if (index == NULL)
501 return FALSE;
502 amaip = (AMAlignIndex2Ptr)(index);
503 for (i=0; i<amaip->numrows; i++)
504 {
505 SeqIdFree(amaip->ids[i]);
506 }
507 MemFree(amaip->ids);
508 MemFree(amaip->saps);
509 MemFree(amaip->aligned);
510 SeqAlignFree(amaip->sharedaln);
511 MemFree(amaip);
512 return TRUE;
513 }
514
515 /* SECTION 1 */
516 static AMAlignIndex2Ptr AMAlignIndex2Copy(VoidPtr index)
517 {
518 AMAlignIndex2Ptr amaip;
519 AMAlignIndex2Ptr amaip2;
520 Int4 i;
521
522 if (index == NULL)
523 return NULL;
524 amaip = (AMAlignIndex2Ptr)(index);
525 amaip2 = AMAlignIndex2New();
526 amaip2->alnstyle = amaip->alnstyle;
527 amaip2->anchor = amaip->anchor;
528 amaip2->numrows = amaip->numrows;
529 amaip2->ids = (SeqIdPtr PNTR)MemNew(amaip2->numrows*sizeof(SeqIdPtr));
530 for (i=0; i<amaip2->numrows; i++)
531 {
532 amaip2->ids[i] = SeqIdDup(amaip->ids[i]);
533 }
534 amaip2->numsaps = amaip->numsaps;
535 amaip2->saps = (SeqAlignPtr PNTR)MemNew(amaip2->numsaps*sizeof(SeqAlignPtr));
536 amaip2->aligned = (Boolean PNTR)MemNew(amaip2->numsaps*sizeof(Boolean));
537 for (i=0; i<amaip2->numsaps; i++)
538 {
539 amaip2->saps[i] = SeqAlignDup(amaip->saps[i]);
540 amaip2->aligned[i] = amaip->aligned[i];
541 if (i>0)
542 amaip2->saps[i-1]->next = amaip2->saps[i];
543 }
544 amaip2->sharedaln = AlnMgr2DupAlnAndIndexes(amaip->sharedaln);
545 return amaip2;
546 }
547
548 /* SECTION 1 */
549 NLM_EXTERN void AMAlignIndexFreeEitherIndex(SeqAlignPtr sap)
550 {
551 if (sap == NULL || sap->saip == NULL)
552 return;
553 if (sap->saip->indextype == INDEX_PARENT)
554 AMAlignIndex2Free2(sap->saip);
555 else
556 SAIndex2Free2(sap->saip);
557 sap->saip = NULL;
558 }
559
560 /* SECTION 1 */
561 NLM_EXTERN SeqAlignPtr AlnMgr2DupAlnAndIndexes(SeqAlignPtr sap)
562 {
563 AMAlignIndex2Ptr amaip;
564 SAIndex2Ptr saip;
565 SeqAlignPtr sap_new;
566
567 if (sap == NULL)
568 return NULL;
569 if (sap->saip == NULL)
570 return (SeqAlignDup(sap));
571 sap_new = NULL;
572 if (sap->saip->indextype == INDEX_CHILD)
573 {
574 sap_new = SeqAlignDup(sap);
575 sap_new->saip = (Pointer)SAIndex2Copy(sap->saip);
576 saip = (SAIndex2Ptr)(sap_new->saip);
577 saip->top = AlnMgr2GetParent(sap);
578 } else if (sap->saip->indextype == INDEX_PARENT)
579 {
580 sap_new = SeqAlignNew();
581 sap_new->type = sap->type;
582 sap_new->segtype = sap->segtype;
583 sap_new->saip = (Pointer)(AMAlignIndex2Copy(sap->saip));
584 amaip = (AMAlignIndex2Ptr)(sap_new->saip);
585 sap_new->segs = amaip->saps[0];
586 }
587 return sap_new;
588 }
589
590 /* SECTION 1 */
591 NLM_EXTERN AlnMsg2Ptr AlnMsgNew2(void)
592 {
593 AlnMsg2Ptr amp;
594
595 amp = (AlnMsg2Ptr)MemNew(sizeof(AlnMsg2));
596 amp->real_from = -2;
597 return amp;
598 }
599
600 /* SECTION 1 */
601 NLM_EXTERN AlnMsg2Ptr AlnMsgFree2(AlnMsg2Ptr amp)
602 {
603 if (amp->left_interrupt != NULL)
604 {
605 MemFree(amp->left_interrupt);
606 amp->left_interrupt = NULL;
607 }
608 if (amp->right_interrupt != NULL)
609 {
610 MemFree(amp->right_interrupt);
611 amp->right_interrupt = NULL;
612 }
613 MemFree(amp);
614 return NULL;
615 }
616
617 /* SECTION 1 */
618 NLM_EXTERN void AlnMsgReNew2(AlnMsg2Ptr amp)
619 {
620 if (amp == NULL)
621 return;
622 if (amp->left_interrupt != NULL)
623 {
624 MemFree(amp->left_interrupt);
625 amp->left_interrupt = NULL;
626 }
627 if (amp->right_interrupt != NULL)
628 {
629 MemFree(amp->right_interrupt);
630 amp->right_interrupt = NULL;
631 }
632 amp->real_from = -2;
633 amp->len = -2;
634 return;
635 }
636
637 /* SECTION 1 */
638 static void AMIntervalSetFree(AMIntervalSetPtr amint)
639 {
640 AMIntervalPtr intv;
641 AMIntervalPtr intv_next;
642
643 intv = amint->int_head;
644 while (intv != NULL)
645 {
646 intv_next = intv->next;
647 MemFree(intv);
648 intv = intv_next;
649 }
650 SeqIdFree(amint->sip);
651 MemFree(amint);
652 }
653
654 /* SECTION 1 */
655 NLM_EXTERN void AMFreqFree(AMFreqPtr afp)
656 {
657 Int4 i;
658
659 if (afp == NULL)
660 return;
661 for (i=0; i<afp->size; i++)
662 {
663 MemFree(afp->freq[i]);
664 }
665 MemFree(afp->freq);
666 MemFree(afp);
667 }
668
669 /* SECTION 1 */
670 static void AMSeqPieceSetFree(AMSeqPieceSetPtr s_set)
671 {
672 AMSeqPieceSetPtr s_set_next;
673 AMSeqPiecePtr s, s_next;
674
675 while (s_set) {
676 s = s_set->head;
677 while (s) {
678 s_next = s->next;
679 MemFree(s);
680 s = s_next;
681 }
682 s_set_next = s_set->next;
683 MemFree(s_set);
684 s_set = s_set_next;
685 }
686 }
687
688 /***************************************************************************
689 *
690 * SECTION 2: Functions used to create the indexes for parent and child
691 * seqaligns.
692 * SECTION 2a: Functions to create indexes for child seqaligns, and
693 * to convert seqaligns to dense-seg type
694 * SECTION 2b: Functions to unpack and rearrange complicated seqaligns
695 * into simple chains of dense-seg and dense-diag types
696 * SECTION 2c: Functions to create indexes for parent seqaligns
697 * SECTION 2d: Accessory functions for parent indexing
698 *
699 ***************************************************************************/
700
701 /***************************************************************************
702 *
703 * AlnMgr2ConvertDendiagToDensegChain takes a dense-diag style alignment
704 * and makes each diag into its own denseg seqalign, then links the new
705 * alignments together.
706 *
707 ***************************************************************************/
708 /* SECTION 2a */
709 static void AlnMgr2ConvertDendiagToDensegChain(SeqAlignPtr sap)
710 {
711 DenseDiagPtr ddp;
712 DenseDiagPtr ddp_next;
713 DenseSegPtr dsp;
714 Int4 i;
715 SeqAlignPtr sap_new;
716 SeqAlignPtr sap_next;
717 SeqAlignPtr sap_prev;
718
719 if (sap == NULL || sap->segtype != SAS_DENDIAG)
720 return;
721 sap_next = sap->next;
722 ddp = (DenseDiagPtr)(sap->segs);
723 /* convert the first diag to dense-seg and put it in the original alignment */
724 dsp = DenseSegNew();
725 dsp->ids = ddp->id;
726 ddp->id = NULL;
727 dsp->dim = ddp->dim;
728 dsp->numseg = 1;
729 dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
730 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
731 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
732 for (i=0; i<dsp->dim; i++)
733 {
734 dsp->starts[i] = ddp->starts[i];
735 if (ddp->strands != NULL)
736 dsp->strands[i] = ddp->strands[i];
737 else
738 dsp->strands[i] = Seq_strand_plus;
739 }
740 dsp->lens[0] = ddp->len;
741 sap->segs = (Pointer)(dsp);
742 sap->segtype = SAS_DENSEG;
743 ddp_next = ddp->next;
744 ddp->next = NULL;
745 DenseDiagFree(ddp);
746 ddp = ddp_next;
747 if (ddp == NULL)
748 return;
749 sap_prev = sap;
750 while (ddp)
751 {
752 sap_new = SeqAlignNew();
753 sap_new->type = SAT_PARTIAL;
754 sap_new->segtype = SAS_DENSEG;
755 sap_new->dim = ddp->dim;
756 dsp = DenseSegNew();
757 dsp->ids = ddp->id;
758 ddp->id = NULL;
759 dsp->dim = ddp->dim;
760 dsp->numseg = 1;
761 dsp->starts = (Int4Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Int4));
762 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
763 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
764 for (i=0; i<dsp->dim; i++)
765 {
766 dsp->starts[i] = ddp->starts[i];
767 if (ddp->strands != NULL)
768 dsp->strands[i] = ddp->strands[i];
769 else
770 dsp->strands[i] = Seq_strand_plus;
771 }
772 dsp->lens[0] = ddp->len;
773 sap_new->segs = (Pointer)(dsp);
774 ddp_next = ddp->next;
775 ddp->next = NULL;
776 DenseDiagFree(ddp);
777 ddp = ddp_next;
778 sap_prev->next = sap_new;
779 sap_prev = sap_new;
780 }
781 sap_new->next = sap_next;
782 }
783
784 /* SECTION 2a */
785 /***************************************************************************
786 *
787 * AlnMgr2IndexSingleDenseSegSA creates the SAIndex2 structure for a given
788 * dense-seg seqalign. This structure has binary-searchable indexes into
789 * the segs. If the strands are not allocated, this function allocates
790 * them and sets them to Seq_strand_plus.
791 *
792 ***************************************************************************/
793 static void AlnMgr2IndexSingleDenseSegSA(SeqAlignPtr sap)
794 {
795 DenseSegPtr dsp;
796 Int4 i;
797 Int4 j;
798 Int4 last;
799 Int4 next;
800 Int4 row;
801 SAIndex2Ptr saip;
802 Boolean unal;
803
804 if (sap->segtype != SAS_DENSEG)
805 return;
806 dsp = (DenseSegPtr)(sap->segs);
807 if (dsp->strands == NULL)
808 {
809 dsp->strands = (Uint1Ptr)MemNew(dsp->dim*dsp->numseg*sizeof(Uint1));
810 for (i=0; i<dsp->dim*dsp->numseg; i++)
811 {
812 dsp->strands[i] = Seq_strand_plus;
813 }
814 }
815 saip = SAIndex2New();
816 saip->aligncoords = (Uint4Ptr)MemNew((dsp->numseg)*sizeof(Uint4));
817 saip->srdp = (SARowDat2Ptr PNTR)MemNew((dsp->dim)*sizeof(SARowDat2Ptr));
818 saip->numrows = dsp->dim;
819 saip->numseg = dsp->numseg;
820 for (i=0; i<dsp->dim; i++)
821 {
822 saip->srdp[i] = SARowDat2New();
823 }
824 for (i=0; i<dsp->numseg; i++)
825 {
826 if (i != 0)
827 saip->aligncoords[i] = saip->aligncoords[i-1] + dsp->lens[i-1];
828 for (row=0; row<dsp->dim; row++)
829 {
830 if (dsp->starts[dsp->dim*i + row] != -1)
831 saip->srdp[row]->numsect++;
832 }
833 }
834 for (row=0; row<dsp->dim; row++)
835 {
836 saip->srdp[row]->sect = (Uint2Ptr)MemNew((saip->srdp[row]->numsect)*sizeof(Uint2));
837 saip->srdp[row]->unsect = (Uint2Ptr)MemNew((dsp->numseg - saip->srdp[row]->numsect)*sizeof(Uint2));
838 saip->srdp[row]->numsect = 0;
839 saip->srdp[row]->unaligned = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
840 }
841 for (i=0; i<dsp->numseg; i++)
842 {
843 for (row=0; row<dsp->dim; row++)
844 {
845 if (dsp->starts[dsp->dim*i + row] != -1)
846 {
847 saip->srdp[row]->sect[saip->srdp[row]->numsect] = i;
848 saip->srdp[row]->numsect++;
849 } else
850 {
851 saip->srdp[row]->unsect[saip->srdp[row]->numunsect] = i;
852 saip->srdp[row]->numunsect++;
853 }
854 }
855 }
856 for (row=0; row<dsp->dim; row++)
857 {
858 for (i=0; i<dsp->numseg; i++)
859 {
860 unal = FALSE;
861 last = -1;
862 j = i; /* only blocks with sequence can have flanking unal. regions */
863 if (j >= 0 && dsp->starts[dsp->dim*j+row] != -1)
864 {
865 if (dsp->strands[row] == Seq_strand_minus)
866 last = dsp->starts[dsp->dim*j+row];
867 else
868 last = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
869 }
870 if (last > -1)
871 {
872 next = -1;
873 j++;
874 /* find next block of aligned sequence in this row */
875 for (j; j<dsp->numseg && next == -1; j++)
876 {
877 if (dsp->starts[dsp->dim*j+row] != -1)
878 {
879 if (dsp->strands[row] == Seq_strand_minus)
880 next = dsp->starts[dsp->dim*j+row] + dsp->lens[j];
881 else
882 next = dsp->starts[dsp->dim*j+row];
883 }
884 }
885 if (next > -1) /* look for unaligned seq on right side of this seg */
886 {
887 if (next != last)
888 unal = TRUE;
889 }
890 }
891 if (unal == TRUE)
892 {
893 saip->srdp[row]->unaligned[saip->srdp[row]->numunaln] = i;
894 saip->srdp[row]->numunaln++;
895 }
896 }
897 }
898 sap->saip = (SeqAlignIndexPtr)(saip);
899 }
900
901 /* SECTION 2a */
902 /***************************************************************************
903 *
904 * AlnMgr2IndexSingleChildSeqAlign takes a simple dense-seg or dense-diag
905 * seqalign, converts it to dense-seg, and then calls
906 * AlnMgr2IndexSingleDenseSegSA to create the indexes. If the alignment has
907 * already been indexed, this erases that index and reindexes the alignment.
908 * (SINGCHILD)
909 *
910 ***************************************************************************/
911 NLM_EXTERN Boolean AlnMgr2IndexSingleChildSeqAlign(SeqAlignPtr sap)
912 {
913 SeqAlignPtr salp;
914 SeqAlignPtr salp_prev;
915 SeqAlignPtr sap_next;
916
917 if (sap == NULL)
918 return FALSE;
919 if (sap->saip != NULL)
920 {
921 if (sap->saip->indextype != INDEX_CHILD)
922 return FALSE;
923 SAIndex2Free2(sap->saip);
924 sap->saip = NULL;
925 }
926 sap_next = sap->next;
927 sap->next = NULL;
928 if (sap->segtype == SAS_DISC)
929 return FALSE;
930 if (sap->segtype == SAS_DENDIAG)
931 AlnMgr2ConvertDendiagToDensegChain(sap);
932 salp = sap;
933 salp_prev = sap;
934 while (salp != NULL)
935 {
936 AlnMgr2IndexSingleDenseSegSA(salp);
937 salp_prev = salp;
938 salp = salp->next;
939 }
940 salp_prev->next = sap_next;
941 return TRUE;
942 }
943
944 /***************************************************************************
945 *
946 * AlnMgr2UnpackSeqAlign rearranges any seqalign (except alignments with
947 * more than two levels of nested discontinuous alignments) to a simple
948 * discontinuous alignment or a linked list of alignments.
949 *
950 ***************************************************************************/
951 /* SECTION 2b */
952 static Boolean AlnMgr2UnpackSeqAlign(SeqAlignPtr sap)
953 {
954 SeqAlignPtr sap_new;
955 SeqAlignPtr sap_next;
956 SeqAlignPtr sap_segs;
957 SeqAlignPtr sap_segs_head;
958 SeqAlignPtr sap_segs_prev;
959
960 if (sap == NULL)
961 return FALSE;
962 sap_segs = NULL;
963 if (sap->segtype == SAS_DISC)
964 {
965 sap_segs_head = (SeqAlignPtr)(sap->segs);
966 if (sap_segs_head->segtype == SAS_DISC)
967 {
968 sap_segs_prev = (SeqAlignPtr)(sap_segs_head->segs);
969 sap_segs_head->segs = NULL;
970 sap_next = sap_segs_head->next;
971 sap_segs_head->next = NULL;
972 SeqAlignFree(sap_segs_head);
973 sap_segs_head = sap_segs_prev;
974 sap->segs = (Pointer)(sap_segs_head);
975 while (sap_segs_prev->next)
976 {
977 sap_segs_prev = sap_segs_prev->next;
978 if (sap_segs_prev->segtype == SAS_DISC)
979 return FALSE;
980 }
981 sap_segs_prev->next = sap_next;
982 sap_segs = sap_next;
983 } else
984 sap_segs = sap_segs_head->next;
985 while (sap_segs)
986 {
987 if (sap_segs->segtype == SAS_DISC)
988 {
989 sap_next = sap_segs->next;
990 sap_segs->next = NULL;
991 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
992 sap_segs->segs = NULL;
993 SeqAlignFree(sap_segs);
994 while (sap_segs_prev->next)
995 {
996 sap_segs_prev = sap_segs_prev->next;
997 if (sap_segs_prev->segtype == SAS_DISC)
998 return FALSE;
999 }
1000 sap_segs_prev->next = sap_next;
1001 sap_segs = sap_next;
1002 } else
1003 sap_segs = sap_segs->next;
1004 }
1005 } else
1006 {
1007 sap_new = SeqAlignNew();
1008 sap_new->type = SAT_GLOBAL;
1009 sap_new->segtype = sap->segtype;
1010 sap_new->dim = sap->dim;
1011 sap_new->segs = sap->segs;
1012 sap_new->master = sap->master;
1013 sap_new->bounds = sap->bounds;
1014 sap_new->next = sap->next;
1015 sap_new->score = sap->score;
1016 sap->next = NULL;
1017 sap->segtype = SAS_DISC;
1018 sap->type = 0;
1019 sap->dim = 0;
1020 sap->master = NULL;
1021 sap->bounds = NULL;
1022 sap->score = NULL;
1023 sap->segs = (Pointer)sap_new;
1024 sap_segs_prev = sap_new;
1025 sap_segs = sap_new->next;
1026 while (sap_segs)
1027 {
1028 if (sap_segs->segtype == SAS_DISC)
1029 {
1030 sap_next = sap_segs->next;
1031 sap_segs->next = NULL;
1032 sap_segs_prev->next = (SeqAlignPtr)(sap_segs->segs);
1033 sap_segs->segs = NULL;
1034 SeqAlignFree(sap_segs);
1035 while (sap_segs_prev->next)
1036 {
1037 sap_segs_prev = sap_segs_prev->next;
1038 if (sap_segs_prev->segtype == SAS_DISC)
1039 return FALSE;
1040 }
1041 sap_segs_prev->next = sap_next;
1042 sap_segs = sap_next;
1043 } else
1044 sap_segs = sap_segs->next;
1045 }
1046 }
1047 return TRUE;
1048 }
1049
1050 /* SECTION 2b */
1051 static void AlnMgr2UnpackSeqAlignChain(SeqAlignPtr sap)
1052 {
1053 Int4 i;
1054 SeqAlignPtr salp_head;
1055 SeqAlignPtr salp_prev;
1056 SeqAlignPtr sap_next;
1057 SeqAlignPtr sap_orig;
1058 SeqAlignPtr sap_prev;
1059
1060 salp_head = salp_prev = NULL;
1061 i = 0;
1062 while (sap != NULL)
1063 {
1064 sap_next = sap->next;
1065 sap->next = NULL;
1066 AlnMgr2UnpackSeqAlign(sap);
1067 while (sap != NULL)
1068 {
1069 if (salp_prev != NULL)
1070 {
1071 salp_prev->next = (SeqAlignPtr)(sap->segs);
1072 sap->segs = NULL;
1073 while (salp_prev->next != NULL)
1074 {
1075 salp_prev = salp_prev->next;
1076 }
1077 } else
1078 {
1079 salp_head = salp_prev = (SeqAlignPtr)(sap->segs);
1080 sap->segs = NULL;
1081 while (salp_prev->next != NULL)
1082 {
1083 salp_prev = salp_prev->next;
1084 }
1085 }
1086 sap_prev = sap;
1087 sap = sap->next;
1088 sap_prev->next = NULL;
1089 if (i>0)
1090 SeqAlignFree(sap_prev);
1091 else
1092 sap_orig = sap_prev; /* this is the pointer that was passed in */
1093 i++;
1094 }
1095 sap = sap_next;
1096 }
1097 sap_orig->segs = (Pointer)(salp_head);
1098 }
1099
1100 /* SECTION 2b */
1101 /***************************************************************************
1102 *
1103 * AlnMgr2ConvertAllToDenseSeg goes through a chain of simple child
1104 * seqaligns and makes sure that each is a dense-seg seqalign with the
1105 * strands explicitly allocated; dense-diag alignments are converted and
1106 * non-allocated strands are allocated and all set to Seq_strand_plus.
1107 *
1108 ***************************************************************************/
1109 static void AlnMgr2ConvertAllToDenseSeg(SeqAlignPtr sap)
1110 {
1111 DenseSegPtr dsp;
1112 Int4 i;
1113 SeqAlignPtr sap_next;
1114
1115 while (sap != NULL)
1116 {
1117 sap_next = sap->next;
1118 if (sap->segtype == SAS_DENDIAG)
1119 AlnMgr2ConvertDendiagToDensegChain(sap);
1120 else if (sap->segtype == SAS_DENSEG)
1121 {
1122 dsp = (DenseSegPtr)(sap->segs);
1123 if (dsp->strands == NULL)
1124 {
1125 dsp->strands = (Uint1Ptr)MemNew((dsp->dim)*(dsp->numseg)*sizeof(Uint1));
1126 for (i=0; i<(dsp->dim)*(dsp->numseg); i++)
1127 {
1128 dsp->strands[i] = Seq_strand_plus;
1129 }
1130 }
1131 }
1132 sap = sap_next;
1133 }
1134 }
1135
1136 /* SECTION 2c */
1137 /***************************************************************************
1138 *
1139 * AlnMgr2IndexLite takes a seqalign or a list of seqaligns, converts
1140 * each alignment to a dense-seg structure and indexes it, and then
1141 * allocates an AMAlignIndex2 structure and fills in the saps array.
1142 *
1143 ***************************************************************************/
1144 NLM_EXTERN Boolean AlnMgr2IndexLite(SeqAlignPtr sap)
1145 {
1146 AMAlignIndex2Ptr amaip;
1147 Int4 i;
1148 SAIndex2Ptr saip;
1149 SeqAlignPtr salp;
1150
1151 if (sap == NULL)
1152 return FALSE;
1153 if (!AlnMgr2UnpackSeqAlign(sap))
1154 return FALSE;
1155 AlnMgr2ConvertAllToDenseSeg((SeqAlignPtr)sap->segs);
1156 amaip = AMAlignIndex2New();
1157 amaip->alnstyle = AM2_LITE;
1158 salp = (SeqAlignPtr)(sap->segs);
1159 while (salp != NULL)
1160 {
1161 amaip->numsaps++;
1162 AlnMgr2IndexSingleChildSeqAlign(salp);
1163 salp = salp->next;
1164 }
1165 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
1166 salp = (SeqAlignPtr)(sap->segs);
1167 i = 0;
1168 while (salp != NULL)
1169 {
1170 amaip->saps[i] = salp;
1171 i++;
1172 saip = (SAIndex2Ptr)(salp->saip);
1173 saip->numinchain = i;
1174 saip->top = sap;
1175 salp = salp->next;
1176 }
1177 sap->saip = (SeqAlignIndexPtr)amaip;
1178 amaip->aligned = (Boolean PNTR)MemNew((amaip->numsaps)*sizeof(Boolean));
1179 for (i=0; i<amaip->numsaps; i++)
1180 {
1181 amaip->aligned[i] = TRUE;
1182 }
1183 return TRUE;
1184 }
1185
1186 /* SECTION 2c */
1187 /***************************************************************************
1188 *
1189 * AlnMgr2IndexSeqAlign takes a seqalign of any type except std-seg and
1190 * creates indexes on it for easy retrieval of useful information by other
1191 * AlnMgr2 functions. If the seqalign is a single alignment, that alignment
1192 * gets a simple index and is left alone otherwise. If the seqalign is
1193 * a set of alignments or a dense-diag set, the subalignments get
1194 * individually indexed and then are combined into a (fake) multiple
1195 * alignment which also gets indexed. The subalignments can now be accessed
1196 * as a multiple alignment by AlnMgr2 functions.
1197 *
1198 ***************************************************************************/
1199
1200 NLM_EXTERN void AlnMgr2IndexSeqAlign(SeqAlignPtr sap)
1201 {
1202 AlnMgr2IndexSeqAlignEx(sap, TRUE);
1203 }
1204
1205 NLM_EXTERN void AlnMgr2IndexSeqAlignEx(SeqAlignPtr sap, Boolean replace_gi)
1206 {
1207 AMAlignIndex2Ptr amaip;
1208 AMIntervalSetPtr amint;
1209 AMIntervalSetPtr amint_head;
1210 AMEdgePtr edge;
1211 AMEdgePtr edge_head;
1212 Int4 i;
1213 Int4 numvertices;
1214 AMVertexPtr vertex_head;
1215 AMVertexPtr PNTR vertexarray;
1216
1217 if (sap == NULL || sap->saip != NULL)
1218 return;
1219 if (replace_gi) {
1220 SAM_ReplaceGI(sap);
1221 }
1222
1223 if (sap->next == NULL && sap->segtype == SAS_DENSEG)
1224 {
1225 AlnMgr2IndexSingleChildSeqAlign(sap);
1226 return;
1227 }
1228
1229 AlnMgr2IndexLite(sap);
1230 AlnMgr2DecomposeToPairwise(sap);
1231 amaip = (AMAlignIndex2Ptr)(sap->saip);
1232 amaip->alnstyle = AM2_FULLINDEX;
1233 AlnMgr2HidePairwiseConflicts(sap);
1234 amint_head = AlnMgr2MakeIntervals(sap);
1235 vertex_head = NULL;
1236 edge_head = NULL;
1237 vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1238 while (amint_head != NULL)
1239 {
1240 amint = amint_head->next;
1241 AMIntervalSetFree(amint_head);
1242 amint_head = amint;
1243 }
1244 AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1245 AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1246 for (i=0; i<numvertices; i++)
1247 {
1248 SeqIdFree(vertexarray[i]->sip);
1249 MemFree(vertexarray[i]);
1250 }
1251 MemFree(vertexarray);
1252 while (edge_head != NULL)
1253 {
1254 edge = edge_head->next;
1255 MemFree(edge_head);
1256 edge_head = edge;
1257 }
1258 amaip = (AMAlignIndex2Ptr)(sap->saip);
1259 amaip->alnstyle = AM2_FULLINDEX;
1260 }
1261
1262 /* SECTION 2c */
1263 /***************************************************************************
1264 *
1265 * AlnMgr2ReIndexSeqAlign takes an indexed alignment (that has, presumably,
1266 * been changed), makes sure all child seqaligns are indexed (if they are
1267 * already indexed they are not reindexed), and reindexes all the child
1268 * seqaligns as a set.
1269 *
1270 ***************************************************************************/
1271 NLM_EXTERN void AlnMgr2ReIndexSeqAlign(SeqAlignPtr sap)
1272 {
1273 AMAlignIndex2Ptr amaip;
1274 AMIntervalSetPtr amint;
1275 AMIntervalSetPtr amint_head;
1276 AMEdgePtr edge_head;
1277 Int4 i;
1278 Int4 numvertices;
1279 AMVertexPtr vertex_head;
1280 AMVertexPtr PNTR vertexarray;
1281
1282 if (sap == NULL)
1283 return;
1284 if (sap->saip == NULL)
1285 {
1286 AlnMgr2IndexSeqAlign(sap);
1287 return;
1288 }
1289 if (sap->saip->indextype == INDEX_CHILD)
1290 return;
1291 amaip = (AMAlignIndex2Ptr)(sap->saip);
1292 for (i=0; i<amaip->numsaps; i++)
1293 {
1294 if (amaip->saps[i]->saip == NULL)
1295 AlnMgr2IndexSingleChildSeqAlign(amaip->saps[i]);
1296 }
1297 if (amaip->alnstyle != AM2_LITE)
1298 return;
1299 AlnMgr2DecomposeToPairwise(sap);
1300 AlnMgr2HidePairwiseConflicts(sap);
1301 amint_head = AlnMgr2MakeIntervals(sap);
1302 vertex_head = NULL;
1303 edge_head = NULL;
1304 vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1305 while (amint_head != NULL)
1306 {
1307 amint = amint_head->next;
1308 AMIntervalSetFree(amint_head);
1309 amint_head = amint;
1310 }
1311 AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1312 AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1313 MemFree(vertexarray);
1314 }
1315
1316 static int LIBCALLBACK AlnMgr2CompareByAnchor(VoidPtr ptr1, VoidPtr ptr2)
1317 {
1318 DenseSegPtr dsp;
1319 int ret;
1320 SAIndex2Ptr saip1;
1321 SAIndex2Ptr saip2;
1322 SeqAlignPtr sap1;
1323 SeqAlignPtr sap2;
1324 SeqIdPtr sip1;
1325 SeqIdPtr sip2;
1326 Int4 start1;
1327 Int4 start2;
1328 Int4 stop1;
1329 Int4 stop2;
1330
1331 sap1 = *((SeqAlignPtr PNTR)ptr1);
1332 sap2 = *((SeqAlignPtr PNTR)ptr2);
1333 saip1 = (SAIndex2Ptr)(sap1->saip);
1334 saip2 = (SAIndex2Ptr)(sap2->saip);
1335 dsp = (DenseSegPtr)(sap1->segs);
1336 if (saip1->tmp == 1)
1337 sip1 = dsp->ids->next;
1338 else
1339 sip1 = dsp->ids;
1340 dsp = (DenseSegPtr)(sap2->segs);
1341 if (saip2->tmp == 1)
1342 sip2 = dsp->ids->next;
1343 else
1344 sip2 = dsp->ids;
1345 ret = AlnMgr2OrderSeqIds(sip1, sip2);
1346 if (ret != 0)
1347 return ret;
1348 /* these share both ids -- put best first */
1349 if (saip1->score == 0)
1350 saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1351 if (saip2->score == 0)
1352 saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1353 if (saip1->score > saip2->score)
1354 return -1;
1355 else if (saip1->score < saip2->score)
1356 return 1;
1357 AlnMgr2GetNthSeqRangeInSA(sap1, saip1->tmp, &start1, &stop1);
1358 AlnMgr2GetNthSeqRangeInSA(sap2, saip2->tmp, &start2, &stop2);
1359 if (start1 < start2)
1360 return -1;
1361 else if (start1 > start2)
1362 return 1;
1363 else if (stop1 > stop2)
1364 return -1;
1365 else if (stop1 < stop2)
1366 return 1;
1367 return 0;
1368 }
1369
1370 /* SECTION 2c */
1371 NLM_EXTERN Boolean AlnMgr2IndexAsRows(SeqAlignPtr sap, Uint1 strand, Boolean truncate)
1372 {
1373 AMAlignIndex2Ptr amaip;
1374 DenseSegPtr dsp;
1375 DenseSegPtr dsp_tmp;
1376 Boolean found;
1377 Int4 i;
1378 Boolean impossible;
1379 Int4 numsaps;
1380 SAIndex2Ptr saip;
1381 SeqAlignPtr salp;
1382 SeqAlignPtr sap_head;
1383 SeqAlignPtr sap_prev;
1384 SeqAlignPtr sap_tmp;
1385 SeqAlignPtr PNTR saparray;
1386 SeqAlignPtr set_head;
1387 SeqAlignPtr set_prev;
1388 SeqIdPtr sharedsip;
1389 SeqIdPtr sip;
1390 SeqIdPtr sip_next;
1391 SeqIdPtr sip_tmp;
1392 Int4 tmp;
1393
1394 if (sap == NULL)
1395 return FALSE;
1396 if (sap->saip != NULL)
1397 AMAlignIndexFreeEitherIndex(sap);
1398 AlnMgr2IndexLite(sap);
1399 AlnMgr2DecomposeToPairwise(sap);
1400 /* need to figure out which row is shared by all saps */
1401 sap_tmp = (SeqAlignPtr)(sap->segs);
1402 dsp = (DenseSegPtr)(sap_tmp->segs);
1403 sip = dsp->ids;
1404 found = FALSE;
1405 while (!found && sip != NULL)
1406 {
1407 sap_tmp = (SeqAlignPtr)(sap->segs);
1408 sip_next = sip->next;
1409 sip->next = NULL;
1410 impossible = FALSE;
1411 while (!impossible && sap_tmp != NULL)
1412 {
1413 dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1414 if (AlnMgr2SeqIdListsOverlap(sip, dsp_tmp->ids) == NULL)
1415 impossible = TRUE;
1416 sap_tmp = sap_tmp->next;
1417 }
1418 sip->next = sip_next;
1419 if (!impossible) /* found one that matched a row in every alignment */
1420 found = TRUE;
1421 else
1422 sip = sip_next;
1423 }
1424 if (!found) /* didn't find a seqid that was contained in all alignments */
1425 return FALSE;
1426 /* mark the shared row to make things easier */
1427 sharedsip = SeqIdDup(sip);
1428 sap_tmp = (SeqAlignPtr)(sap->segs);
1429 i = 0;
1430 while (sap_tmp != NULL)
1431 {
1432 saip = (SAIndex2Ptr)(sap_tmp->saip);
1433 dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1434 if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1435 saip->tmp = 1;
1436 else
1437 saip->tmp = 2;
1438 sap_tmp = sap_tmp->next;
1439 i++;
1440 }
1441 saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
1442 sap_tmp = (SeqAlignPtr)(sap->segs);
1443 i = 0;
1444 while (sap_tmp != NULL)
1445 {
1446 saparray[i] = sap_tmp;
1447 i++;
1448 sap_tmp = sap_tmp->next;
1449 }
1450 numsaps = i;
1451 HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByAnchor);
1452 /* now each clump of alignments is a row -- need to eliminate overlaps next */
1453 sip = NULL;
1454 i = 0;
1455 sap_head = sap_prev = NULL;
1456 while (i<numsaps)
1457 {
1458 saparray[i]->next = NULL;
1459 set_head = set_prev = saparray[i];
1460 saip = (SAIndex2Ptr)(saparray[i]->saip);
1461 sip = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp); /* get other seqid */
1462 i++;
1463 if (i<numsaps)
1464 sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1465 while (i<numsaps && SeqIdComp(sip, sip_tmp) == SIC_YES)
1466 {
1467 set_prev->next = saparray[i];
1468 set_prev = saparray[i];
1469 saparray[i]->next = NULL;
1470 i++;
1471 SeqIdFree(sip_tmp);
1472 if (i<numsaps)
1473 sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1474 }
1475 AlnMgr2IndexLite(set_head);
1476 if (!truncate)
1477 AlnMgr2RemoveInconsistentAlnsFromSet(set_head, 0);
1478 else
1479 AlnMgr2RemoveInconsistentAlnsFromSet(set_head, -1);
1480 sap_tmp = (SeqAlignPtr)(set_head->segs);
1481 while (sap_tmp != NULL)
1482 {
1483 saip = (SAIndex2Ptr)(sap_tmp->saip);
1484 dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1485 if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1486 saip->tmp = 1;
1487 else
1488 saip->tmp = 2;
1489 sap_tmp = sap_tmp->next;
1490 }
1491 if (sap_head != NULL)
1492 sap_prev->next = set_head;
1493 else
1494 sap_head = sap_prev = set_head;
1495 while (sap_prev->next != NULL)
1496 {
1497 sap_prev = sap_prev->next;
1498 }
1499 sap_prev->next = NULL;
1500 }
1501 /* now we have lots of freed pointers sitting in the array */
1502 MemFree(saparray);
1503 saparray = NULL;
1504 /* sap_head is the head of a chain of LITE-indexed alignments, each of which is one row */
1505 /* first make sure that the shared row is on the requested strand */
1506 sap_tmp = sap_head;
1507 if (strand == Seq_strand_both || strand == Seq_strand_unknown || strand == 0)
1508 strand = Seq_strand_plus;
1509 while (sap_tmp != NULL)
1510 {
1511 salp = (SeqAlignPtr)(sap_tmp->segs);
1512 saip = (SAIndex2Ptr)(salp->saip);
1513 /* strand is same for all children */
1514 if (AlnMgr2GetNthStrand(salp, saip->tmp) != strand)
1515 {
1516 SeqAlignListReverseStrand(salp);
1517 while (salp != NULL)
1518 {
1519 saip = (SAIndex2Ptr)salp->saip;
1520 tmp = saip->tmp;
1521 SAIndex2Free2(salp->saip);
1522 salp->saip = NULL;
1523 AlnMgr2IndexSingleChildSeqAlign(salp);
1524 saip = (SAIndex2Ptr)salp->saip;
1525 saip->tmp = tmp;
1526 salp = salp->next;
1527 }
1528 }
1529 sap_tmp = sap_tmp->next;
1530 }
1531 sap_tmp = sap_head;
1532 sap->segs = NULL;
1533 AMAlignIndex2Free2(sap->saip);
1534 sap->saip = (SeqAlignIndexPtr)AMAlignIndex2New();
1535 amaip = (AMAlignIndex2Ptr)(sap->saip);
1536 amaip->alnstyle = AM2_FULLINDEX;
1537 set_head = set_prev = NULL;
1538 while (sap_tmp != NULL)
1539 {
1540 salp = (SeqAlignPtr)(sap_tmp->segs);
1541 while (salp != NULL)
1542 {
1543 AlnMgr2AddInNewPairwiseSA(sap, salp);
1544 if (set_head != NULL)
1545 {
1546 set_prev->next = salp;
1547 set_prev = salp;
1548 } else
1549 set_head = set_prev = salp;
1550 salp = salp->next;
1551 }
1552 sap_tmp->segs = NULL;
1553 sap_tmp = sap_tmp->next;
1554 }
1555 AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
1556 AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
1557 set_prev->next = NULL;
1558 sap->segs = (Pointer)(set_head);
1559 SeqAlignListFree(sap_head);
1560 SeqIdFree(sharedsip);
1561 return TRUE;
1562 }
1563
1564 /* SECTION 2c */
1565 /***************************************************************************
1566 *
1567 * AlnMgr2IndexIndexedChain takes a linked list of indexed seqaligns
1568 * and does an in-place transformation to an indexed parent-child
1569 * seqalign set.
1570 *
1571 ***************************************************************************/
1572 NLM_EXTERN void AlnMgr2IndexIndexedChain(SeqAlignPtr sap)
1573 {
1574 AMAlignIndex2Ptr amaip;
1575 AMIntervalSetPtr amint;
1576 AMIntervalSetPtr amint_head;
1577 AMEdgePtr edge_head;
1578 Int4 numvertices;
1579 AMVertexPtr vertex_head;
1580 AMVertexPtr PNTR vertexarray;
1581
1582 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
1583 return;
1584 AlnMgr2IndexLite(sap);
1585 AlnMgr2DecomposeToPairwise(sap);
1586 amaip = (AMAlignIndex2Ptr)(sap->saip);
1587 amaip->alnstyle = AM2_FULLINDEX;
1588 AlnMgr2HidePairwiseConflicts(sap);
1589 amint_head = AlnMgr2MakeIntervals(sap);
1590 vertex_head = NULL;
1591 edge_head = NULL;
1592 vertexarray = AlnMgr2MakeVerticesFromIntervals(sap, amint_head, &vertex_head, &edge_head, &numvertices);
1593 while (amint_head != NULL)
1594 {
1595 amint = amint_head->next;
1596 AMIntervalSetFree(amint_head);
1597 amint_head = amint;
1598 }
1599 AlnMgr2UsePrimsAlgorithm(vertexarray, numvertices, edge_head);
1600 AlnMgr2BuildAlignmentFromTree(vertexarray, numvertices, edge_head, sap);
1601 MemFree(vertexarray);
1602 }
1603
1604 /* SECTION 2c */
1605 /***************************************************************************
1606 *
1607 * AlnMgr2DecomposeToPairwise takes a parent seqalign and goes through all
1608 * its children, checking their dimensions. If a child seqalign is found
1609 * with dimension greater than 2, that alignment is copied into a set of
1610 * two-row alignments, each new alignment containing the first row of the
1611 * original alignment and a different row. This function does NOT take out
1612 * segs with only gaps (is this a problem????). The resulting seqaligns
1613 * are all individually indexed and then the whole set is indexed lite.
1614 *
1615 ***************************************************************************/
1616 static void AlnMgr2DecomposeToPairwise(SeqAlignPtr sap)
1617 {
1618 DenseSegPtr dsp;
1619 DenseSegPtr dsp_orig;
1620 Int4 i;
1621 Int4 j;
1622 Int4 n;
1623 SAIndex2Ptr saip;
1624 SAIndex2Ptr saip_orig;
1625 SeqAlignPtr salp;
1626 SeqAlignPtr salp_new;
1627 SeqAlignPtr salp_next;
1628 SeqAlignPtr salp_prev;
1629
1630 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1631 return;
1632 salp = (SeqAlignPtr)(sap->segs);
1633 salp_prev = NULL;
1634 while (salp)
1635 {
1636 n = AlnMgr2GetNumRows(salp);
1637 if (n > 2)
1638 {
1639 salp_next = salp->next;
1640 saip_orig = (SAIndex2Ptr)(salp->saip);
1641 for (i=2; i<=n; i++)
1642 {
1643 salp_new = SeqAlignNew();
1644 dsp_orig = (DenseSegPtr)(salp->segs);
1645 dsp = DenseSegNew();
1646 dsp->dim = 2;
1647 dsp->numseg = dsp_orig->numseg;
1648 dsp->ids = AlnMgr2GetNthSeqIdPtr(salp, 1);
1649 dsp->ids->next = AlnMgr2GetNthSeqIdPtr(salp, i);
1650 dsp->starts = (Int4Ptr)MemNew(dsp->numseg*2*sizeof(Int4));
1651 dsp->lens = (Int4Ptr)MemNew(dsp->numseg*sizeof(Int4));
1652 dsp->strands = (Uint1Ptr)MemNew(dsp->numseg*2*sizeof(Uint1));
1653 for (j=0; j<dsp->numseg; j++)
1654 {
1655 dsp->lens[j] = dsp_orig->lens[j];
1656 dsp->starts[2*j] = dsp_orig->starts[dsp_orig->dim*j];
1657 dsp->starts[2*j+1] = dsp_orig->starts[dsp_orig->dim*j+i-1];
1658 dsp->strands[2*j] = dsp_orig->strands[dsp_orig->dim*j];
1659 dsp->strands[2*j+1] = dsp_orig->strands[dsp_orig->dim*j+i-1];
1660 }
1661 salp_new = SeqAlignNew();
1662 salp_new->dim = 2;
1663 salp_new->segs = (Pointer)dsp;
1664 salp_new->segtype = SAS_DENSEG;
1665 AlnMgr2IndexSingleChildSeqAlign(salp_new);
1666 saip = (SAIndex2Ptr)(salp_new->saip);
1667 saip->numinchain = saip_orig->numinchain;
1668 saip->numsplitaln = i-1;
1669 if (salp_prev == NULL)
1670 {
1671 salp_prev = salp_new;
1672 sap->segs = (Pointer)salp_new;
1673 } else
1674 {
1675 salp_prev->next = salp_new;
1676 salp_prev = salp_new;
1677 }
1678 }
1679 salp_prev->next = salp_next;
1680 salp->next = NULL;
1681 SeqAlignFree(salp);
1682 salp = salp_next;
1683 } else
1684 {
1685 salp_prev = salp;
1686 salp = salp->next;
1687 }
1688 }
1689 AMAlignIndex2Free2(sap->saip);
1690 sap->saip = NULL;
1691 AlnMgr2IndexLite(sap);
1692 }
1693
1694 /* SECTION 2c */
1695 /***************************************************************************
1696 *
1697 * AlnMgr2HidePairwiseConflicts looks through a set of indexed seqaligns
1698 * to find pairs of alignments that share the same seqids and that provide
1699 * conflicting information. These pairs are then sent to AlnMgr2TossWorse,
1700 * which hides the worse alignment by unaligning it. Note that the hidden
1701 * alignments are not destroyed and are not taken out of the set.
1702 *
1703 ***************************************************************************/
1704 static void AlnMgr2HidePairwiseConflicts(SeqAlignPtr sap)
1705 {
1706 AMAlignIndex2Ptr amaip;
1707 Int4 i;
1708 Boolean inset;
1709 Int4 j;
1710 Boolean match;
1711 SeqIdPtr sip11;
1712 SeqIdPtr sip12;
1713 SeqIdPtr sip21;
1714 SeqIdPtr sip22;
1715 Int4 start11;
1716 Int4 start12;
1717 Int4 start21;
1718 Int4 start22;
1719 Int4 stop11;
1720 Int4 stop12;
1721 Int4 stop21;
1722 Int4 stop22;
1723 Int4Ptr tossed;
1724
1725 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1726 return;
1727 amaip = (AMAlignIndex2Ptr)(sap->saip);
1728 AlnMgr2SortBySeqId(sap);
1729 tossed = (Int4Ptr)MemNew(amaip->numsaps*sizeof(Int4));
1730 for (i=0; i<amaip->numsaps-1; i++)
1731 {
1732 for (j=0; j<amaip->numsaps; j++)
1733 {
1734 tossed[j] = 0;
1735 }
1736 inset = TRUE;
1737 for (j=i+1; amaip->aligned[i] && j<amaip->numsaps && inset == TRUE; j++)
1738 {
1739 if (tossed[j] == 0 && amaip->aligned[i] && amaip->aligned[j])
1740 {
1741 sip11 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 1);
1742 sip12 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], 2);
1743 sip21 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 1);
1744 sip22 = AlnMgr2GetNthSeqIdPtr(amaip->saps[j], 2);
1745 match = FALSE;
1746 if (SeqIdComp(sip11, sip21) == SIC_YES && SeqIdComp(sip12, sip22) == SIC_YES)
1747 {
1748 match = TRUE;
1749 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1750 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1751 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start21, &stop21);
1752 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start22, &stop22);
1753 } else if (SeqIdComp(sip11, sip22) == SIC_YES && SeqIdComp(sip12, sip21) == SIC_YES)
1754 {
1755 match = TRUE;
1756 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start11, &stop11);
1757 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 2, &start12, &stop12);
1758 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 2, &start21, &stop21);
1759 AlnMgr2GetNthSeqRangeInSA(amaip->saps[j], 1, &start22, &stop22);
1760 } else if (SeqIdComp(sip11, sip21) != SIC_YES && SeqIdComp(sip11, sip22) != SIC_YES)
1761 inset = FALSE;
1762 if (match == TRUE)
1763 {
1764 if ((start11 < start21 && stop11 > stop21) || (start11 < stop21 && stop11 > stop21) || (start11 > start21 && stop11 < stop21))
1765 {
1766 AlnMgr2TossWorse(sap, i, j);
1767 if (amaip->aligned[j] == 0) /* j just got tossed -- put it in the list */
1768 tossed[j] = 1;
1769 }
1770 }
1771 SeqIdFree(sip11);
1772 SeqIdFree(sip12);
1773 SeqIdFree(sip21);
1774 SeqIdFree(sip22);
1775 }
1776 }
1777 if (amaip->aligned[i] == 0) /* the query alignment got tossed -- restore */
1778 { /* all the ones that it tossed out */
1779 for (j=0; j<amaip->numsaps; j++)
1780 {
1781 if (tossed[j] == 1)
1782 amaip->aligned[j] = 1;
1783 }
1784 }
1785 }
1786 MemFree(tossed);
1787 }
1788
1789 /* SECTION 2c */
1790 static void AlnMgr2SortBySeqId(SeqAlignPtr sap)
1791 {
1792 AMAlignIndex2Ptr amaip;
1793 Int4 i;
1794 SAIndex2Ptr saip;
1795
1796 amaip = (AMAlignIndex2Ptr)(sap->saip);
1797 for (i=0; i<amaip->numsaps; i++)
1798 {
1799 saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1800 saip->aligned = amaip->aligned[i];
1801 }
1802 HeapSort(amaip->saps, amaip->numsaps, sizeof(amaip->saps), AlnMgr2CompareIds);
1803 for (i=0; i<amaip->numsaps; i++)
1804 {
1805 saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
1806 amaip->aligned[i] = saip->aligned;
1807 }
1808 }
1809
1810 /* SECTION 2c */
1811 static int LIBCALLBACK AlnMgr2CompareIds(VoidPtr ptr1, VoidPtr ptr2)
1812 {
1813 Int4 ret;
1814 SAIndex2Ptr saip1;
1815 SAIndex2Ptr saip2;
1816 SeqAlignPtr sap1;
1817 SeqAlignPtr sap2;
1818 SeqIdPtr sip1;
1819 SeqIdPtr sip2;
1820
1821 if (ptr1 == NULL || ptr2 == NULL)
1822 return 0;
1823 sap1 = *((SeqAlignPtr PNTR) ptr1);
1824 sap2 = *((SeqAlignPtr PNTR) ptr2);
1825 sip1 = AlnMgr2GetNthSeqIdPtr(sap1, 1);
1826 sip2 = AlnMgr2GetNthSeqIdPtr(sap2, 1);
1827 ret = (AlnMgr2OrderSeqIds(sip1, sip2));
1828 SeqIdFree(sip1);
1829 SeqIdFree(sip2);
1830 if (ret != 0)
1831 return ret;
1832 saip1 = (SAIndex2Ptr)(sap1->saip);
1833 saip2 = (SAIndex2Ptr)(sap2->saip);
1834 if (saip1->score == 0)
1835 saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1836 if (saip2->score == 0)
1837 saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1838 if (saip1->score > saip2->score)
1839 return -1;
1840 if (saip1->score < saip2->score)
1841 return 1;
1842 return 0;
1843 }
1844
1845 /* SECTION 2c */
1846 /***************************************************************************
1847 *
1848 * Given an indexed seqalign set, AlnMgr2TossWorse looks at the indicated
1849 * pair of seqaligns, gets their scores, and sets the unaligned bit of the
1850 * seqalign with the worse score.
1851 *
1852 ***************************************************************************/
1853 static void AlnMgr2TossWorse(SeqAlignPtr sap, Int4 i, Int4 j)
1854 {
1855 AMAlignIndex2Ptr amaip;
1856 SAIndex2Ptr saip1;
1857 SAIndex2Ptr saip2;
1858 Int4 score1;
1859 Int4 score2;
1860
1861 amaip = (AMAlignIndex2Ptr)(sap->saip);
1862 saip1 = (SAIndex2Ptr)(amaip->saps[i]->saip);
1863 saip2 = (SAIndex2Ptr)(amaip->saps[j]->saip);
1864 if (saip1->score == 0)
1865 saip1->score = score1 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
1866 else
1867 score1 = saip1->score;
1868 if (saip1->score == 0)
1869 saip2->score = score2 = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[j]);
1870 else
1871 score2 = saip2->score;
1872 if (score1 >= score2)
1873 amaip->aligned[j] = FALSE;
1874 else if (score2 > score1)
1875 amaip->aligned[i] = FALSE;
1876 }
1877
1878 /* SECTION 2c */
1879 /***************************************************************************
1880 *
1881 * AlnMgr2MakeIntervals takes every row from every seqalign and bins it
1882 * with other sequences with the same seqid and the same strand.
1883 *
1884 ***************************************************************************/
1885 static AMIntervalSetPtr AlnMgr2MakeIntervals(SeqAlignPtr sap)
1886 {
1887 AMAlignIndex2Ptr amaip;
1888 AMIntervalSetPtr amint;
1889 AMIntervalSetPtr amint_head;
1890 AMIntervalSetPtr amint_prev;
1891 Boolean found;
1892 Int4 i;
1893 AMIntervalPtr intv;
1894 AMIntervalPtr int_prev;
1895 Int4 j;
1896 Int4 k;
1897 SeqIdPtr sip;
1898 Uint1 strand;
1899
1900 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
1901 return NULL;
1902 amaip = (AMAlignIndex2Ptr)(sap->saip);
1903 amint_head = amint_prev = NULL;
1904 for (i=0; i<amaip->numsaps; i++)
1905 {
1906 if (amaip->aligned[i])
1907 {
1908 j = AlnMgr2GetNumRows(amaip->saps[i]);
1909 for (k=0; k<j; k++)
1910 {
1911 intv = (AMIntervalPtr)MemNew(sizeof(AMInterval));
1912 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &(intv->from), &(intv->to));
1913 sip = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
1914 strand = AlnMgr2GetNthStrand(amaip->saps[i], k+1);
1915 if (strand != Seq_strand_minus)
1916 strand = Seq_strand_plus; /* to avoid dealing with Seq_strand_unknown */
1917 intv->strand = strand;
1918 if (amint_head != NULL) /* figure out which interval set this goes in */
1919 {
1920 amint = amint_head;
1921 found = FALSE;
1922 while (amint != NULL && !found)
1923 {
1924 if (SeqIdComp(sip, amint->sip) == SIC_YES && strand == amint->strand)
1925 found = TRUE;
1926 else
1927 amint = amint->next;
1928 }
1929 if (found) /* add this to the interval set matched */
1930 {
1931 int_prev = amint->int_head;
1932 while (int_prev->next != NULL)
1933 {
1934 int_prev = int_prev->next;
1935 }
1936 int_prev->next = intv;
1937 } else /* make a new interval set */
1938 {
1939 amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1940 amint->sip = SeqIdDup(sip);
1941 amint->strand = strand;
1942 amint->int_head = intv;
1943 amint_prev = amint_head;
1944 while (amint_prev->next != NULL)
1945 {
1946 amint_prev = amint_prev->next;
1947 }
1948 amint_prev->next = amint;
1949 }
1950 } else /* make a new interval set */
1951 {
1952 amint = (AMIntervalSetPtr)MemNew(sizeof(AMIntervalSet));
1953 amint->sip = SeqIdDup(sip);
1954 amint->strand = strand;
1955 amint->int_head = intv;
1956 amint_head = amint;
1957 }
1958 SeqIdFree(sip);
1959 }
1960 }
1961 }
1962 return amint_head;
1963 }
1964
1965 /* SECTION 2c */
1966 /***************************************************************************
1967 *
1968 * AlnMgr2MakeVerticesFromIntervals takes the set of intervals created from
1969 * the alignments, and makes nonoverlapping vertices. Each vertex is a
1970 * single seqid plus a start and stop (so one seqid may have more than one
1971 * vertex). Each vertex is also associated with edges, or alignments, which
1972 * link the vertices together. An edge is simply two vertices plus a weight,
1973 * which is the alignment quality score. This function creates the vertices,
1974 * then creates the edges, and sorts the edges and vertices by quality and
1975 * by number of edges per vertex.
1976 *
1977 ***************************************************************************/
1978 static AMVertexPtr PNTR AlnMgr2MakeVerticesFromIntervals(SeqAlignPtr sap, AMIntervalSetPtr amint_head, AMVertexPtr PNTR vertexhead, AMEdgePtr PNTR edgehead, Int4Ptr numvertices)
1979 {
1980 AMAlignIndex2Ptr amaip;
1981 AMIntervalSetPtr amint;
1982 AMEdgePtr edge;
1983 AMEdgePtr edge_head;
1984 AMEdgePtr edge_prev;
1985 Int4 i;
1986 AMIntervalPtr intv;
1987 Int4 j;
1988 Int4 k;
1989 Int4 n;
1990 SAIndex2Ptr saip;
1991 SeqIdPtr sip1;
1992 SeqIdPtr sip2;
1993 Int4 start;
1994 Int4 stop;
1995 Int4 v1;
1996 Int4 v2;
1997 AMVertexPtr vertex;
1998 AMVertexPtr vertex_head;
1999 AMVertexPtr vertex_prev;
2000 AMVertexPtr PNTR vertexarray;
2001
2002 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
2003 return NULL;
2004 amint = amint_head;
2005 vertex_head = vertex_prev = NULL;
2006 while (amint != NULL)
2007 {
2008 AlnMgr2SortIntervals(amint);
2009 vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2010 intv = amint->int_head;
2011 vertex->sip = SeqIdDup(amint->sip);
2012 vertex->strand = amint->strand;
2013 vertex->from = intv->from;
2014 vertex->to = intv->to;
2015 intv = intv->next;
2016 while (intv != NULL)
2017 {
2018 if ((intv->from <= vertex->to && intv->from >= vertex->from) || (intv->to <= vertex->to && intv->to >= vertex->from))
2019 {
2020 if (intv->from < vertex->from)
2021 vertex->from = intv->from;
2022 if (intv->to > vertex->to)
2023 vertex->to = intv->to;
2024 } else
2025 {
2026 if (vertex_head != NULL)
2027 {
2028 vertex_prev->next = vertex;
2029 vertex_prev = vertex;
2030 } else
2031 vertex_head = vertex_prev = vertex;
2032 vertex = (AMVertexPtr)MemNew(sizeof(AMVertex));
2033 vertex->from = intv->from;
2034 vertex->to = intv->to;
2035 vertex->sip = SeqIdDup(amint->sip);
2036 vertex->strand = amint->strand;
2037 }
2038 intv = intv->next;
2039 }
2040 if (vertex_head != NULL)
2041 {
2042 vertex_prev->next = vertex;
2043 vertex_prev = vertex;
2044 } else
2045 vertex_head = vertex_prev = vertex;
2046 amint = amint->next;
2047 }
2048 vertex = vertex_head;
2049 i = 0;
2050 while (vertex != NULL)
2051 {
2052 i++;
2053 vertex = vertex->next;
2054 }
2055 vertexarray = (AMVertexPtr PNTR)MemNew(i*sizeof(AMVertexPtr));
2056 *numvertices = i;
2057 vertex = vertex_head;
2058 i = 0;
2059 while (vertex != NULL)
2060 {
2061 vertexarray[i] = vertex;
2062 vertex = vertex->next;
2063 i++;
2064 }
2065 amaip = (AMAlignIndex2Ptr)(sap->saip);
2066 /* now make the edges from the alignments */
2067 edge_head = NULL;
2068 for (i=0; i<amaip->numsaps; i++)
2069 {
2070 if (amaip->aligned[i])
2071 {
2072 j = AlnMgr2GetNumRows(amaip->saps[i]);
2073 for (k=0; k<j; k++)
2074 {
2075 sip1 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], k+1);
2076 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], k+1, &start, &stop);
2077 v1 = AlnMgr2MatchToVertex(sip1, start, stop, vertexarray, *numvertices);
2078 for (n=k+1; n<j; n++)
2079 {
2080 vertexarray[v1]->numedges++;
2081 sip2 = AlnMgr2GetNthSeqIdPtr(amaip->saps[i], n+1);
2082 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start, &stop);
2083 v2 = AlnMgr2MatchToVertex(sip2, start, stop, vertexarray, *numvertices);
2084 vertexarray[v2]->numedges++;
2085 edge = (AMEdgePtr)MemNew(sizeof(AMEdge));
2086 edge->vertex1 = v1;
2087 edge->vertex2 = v2;
2088 saip = NULL;
2089 if (amaip->saps[i]->saip != NULL)
2090 saip = (SAIndex2Ptr)(amaip->saps[i]->saip);
2091 if (saip != NULL && saip->score != 0)
2092 edge->weight = saip->score;
2093 else
2094 edge->weight = AlnMgr2ComputeScoreForSeqAlign(amaip->saps[i]);
2095 edge->sap = amaip->saps[i];
2096 edge->used = 0;
2097 if (edge_head != NULL)
2098 {
2099 edge_prev->next = edge;
2100 edge_prev = edge;
2101 } else
2102 edge_head = edge_prev = edge;
2103 SeqIdFree(sip2);
2104 }
2105 SeqIdFree(sip1);
2106 }
2107 }
2108 }
2109 AlnMgr2SortEdgesByWeight(&edge_head);
2110 *vertexhead = vertexarray[0];
2111 *edgehead = edge_head;
2112 return vertexarray;
2113 }
2114
2115 /* SECTION 2C */
2116 /***************************************************************************
2117 *
2118 * AlnMgr2SortVerticesByNumEdges -- the name says it all -- each vertex is
2119 * associated with one or more edges and the most populated vertices get
2120 * put first.
2121 *
2122 ***************************************************************************/
2123 static void AlnMgr2SortVerticesByNumEdges(AMVertexPtr PNTR vertexarray, Int4 numvertices)
2124 {
2125 Int4 i;
2126
2127 HeapSort(vertexarray, numvertices, sizeof(vertexarray), AlnMgr2CompareVertices);
2128 for (i=0; i<numvertices-1; i++)
2129 {
2130 vertexarray[i]->next = vertexarray[i+1];
2131 }
2132 vertexarray[numvertices-1]->next = NULL;
2133 }
2134
2135 /* SECTION 2c */
2136 /***************************************************************************
2137 *
2138 * AlnMgr2CompareVertices is the HeapSort callback for
2139 * AlnMgr2SortVerticesByNumEdges.
2140 *
2141 ***************************************************************************/
2142 static int LIBCALLBACK AlnMgr2CompareVertices(VoidPtr ptr1, VoidPtr ptr2)
2143 {
2144 AMVertexPtr vertex1;
2145 AMVertexPtr vertex2;
2146
2147 if (ptr1 != NULL && ptr2 != NULL)
2148 {
2149 vertex1 = *((AMVertexPtr PNTR)ptr1);
2150 vertex2 = *((AMVertexPtr PNTR)ptr2);
2151 if (vertex1->numedges > vertex2->numedges)
2152 return -1;
2153 else if (vertex1->numedges < vertex2->numedges)
2154 return 1;
2155 else
2156 return 0;
2157 }
2158 return 0;
2159 }
2160
2161 /* SECTION 2C */
2162 /***************************************************************************
2163 *
2164 * AlnMgr2SortEdgesByWeight takes a set of edges (alignments) and sorts
2165 * them by their preset weights (alignment scores), using AlnMgr2CompareEdges
2166 * as its HeapSort callback.
2167 *
2168 ***************************************************************************/
2169 static void AlnMgr2SortEdgesByWeight(AMEdgePtr PNTR edge_head)
2170 {
2171 AMEdgePtr edge;
2172 AMEdgePtr PNTR edgearray;
2173 Int4 i;
2174 Int4 j;
2175
2176 if (edge_head == NULL || *edge_head == NULL)
2177 return;
2178 edge = *edge_head;
2179 i = 0;
2180 while (edge != NULL)
2181 {
2182 i++;
2183 edge = edge->next;
2184 }
2185 edgearray = (AMEdgePtr PNTR)MemNew(i*sizeof(AMEdgePtr));
2186 edge = *edge_head;
2187 i = 0;
2188 while (edge != NULL)
2189 {
2190 edgearray[i] = edge;
2191 edge = edge->next;
2192 i++;
2193 }
2194 HeapSort(edgearray, i, sizeof(edgearray), AlnMgr2CompareEdges);
2195 for (j=0; j<i-1; j++)
2196 {
2197 edgearray[j]->next = edgearray[j+1];
2198 }
2199 edgearray[i-1]->next = NULL;
2200 *edge_head = edgearray[0];
2201 MemFree(edgearray);
2202 }
2203
2204 /* SECTION 2c */
2205 /***************************************************************************
2206 *
2207 * AlnMgr2CompareEdges is the HeapSort callback for AlnMgr2SortEdgesByWeight.
2208 * It simply compares the preset edge weights.
2209 *
2210 ***************************************************************************/
2211 static int LIBCALLBACK AlnMgr2CompareEdges(VoidPtr ptr1, VoidPtr ptr2)
2212 {
2213 AMEdgePtr edge1;
2214 AMEdgePtr edge2;
2215
2216 if (ptr1 != NULL && ptr2 != NULL)
2217 {
2218 edge1 = *((AMEdgePtr PNTR)ptr1);
2219 edge2 = *((AMEdgePtr PNTR)ptr2);
2220 if (edge1->weight > edge2->weight)
2221 return -1;
2222 else if (edge1->weight < edge2->weight)
2223 return 1;
2224 else
2225 return 0;
2226 }
2227 return 0;
2228 }
2229
2230 /* SECTION 2c */
2231 /***************************************************************************
2232 *
2233 * AlnMgr2MatchToVertex is called by AlnMgr2MakeVerticesFromIntervals to
2234 * figure out which vertex in the array the seqid, start, and stop match to.
2235 *
2236 ***************************************************************************/
2237 static Int4 AlnMgr2MatchToVertex(SeqIdPtr sip, Int4 start, Int4 stop, AMVertexPtr PNTR vertexarray, Int4 numvertices)
2238 {
2239 Int4 i;
2240
2241 if (sip == NULL || vertexarray == NULL)
2242 return -1;
2243 i = 0;
2244 while (i<numvertices)
2245 {
2246 if (SeqIdComp(sip, vertexarray[i]->sip) == SIC_YES)
2247 {
2248 if (start >= vertexarray[i]->from && start <= vertexarray[i]->to && stop >= vertexarray[i]->from && stop <= vertexarray[i]->to)
2249 return i;
2250 }
2251 i++;
2252 }
2253 return -1;
2254 }
2255
2256 /* SECTION 2c */
2257 /***************************************************************************
2258 *
2259 * AlnMgr2SortIntervals sorts the AMIntervals by start position within the
2260 * set, calling AlnMgr2CompareIntervals in a HeapSort.
2261 *
2262 ***************************************************************************/
2263 static void AlnMgr2SortIntervals(AMIntervalSetPtr amint)
2264 {
2265 Int4 i;
2266 AMIntervalPtr PNTR intarray;
2267 AMIntervalPtr intv;
2268 AMIntervalPtr intv_head;
2269 Int4 j;
2270
2271 i = 0;
2272 intv = amint->int_head;
2273 while (intv != NULL)
2274 {
2275 i++;
2276 intv = intv->next;
2277 }
2278 intarray = (AMIntervalPtr PNTR)MemNew(i*sizeof(AMIntervalPtr));
2279 intv = amint->int_head;
2280 i = 0;
2281 while (intv != NULL)
2282 {
2283 intarray[i] = intv;
2284 intv = intv->next;
2285 i++;
2286 }
2287 HeapSort(intarray, i, sizeof(intarray), AlnMgr2CompareIntervals);
2288 intv_head = intv = intarray[0];
2289 for (j=1; j<i; j++)
2290 {
2291 intv->next = intarray[j];
2292 intarray[j]->next = NULL;
2293 intv = intv->next;
2294 }
2295 amint->int_head = intv_head;
2296 MemFree(intarray);
2297 }
2298
2299 /* SECTION 2c */
2300 /***************************************************************************
2301 *
2302 * AlnMgr2CompareIntervals is the HeapSort callback for
2303 * AlnMgr2SortIntervals, which sorts a set of AMIntervals by start position.
2304 *
2305 ***************************************************************************/
2306 static int LIBCALLBACK AlnMgr2CompareIntervals(VoidPtr ptr1, VoidPtr ptr2)
2307 {
2308 AMIntervalPtr intv1;
2309 AMIntervalPtr intv2;
2310
2311 if (ptr1 != NULL && ptr2 != NULL)
2312 {
2313 intv1 = *((AMIntervalPtr PNTR)ptr1);
2314 intv2 = *((AMIntervalPtr PNTR)ptr2);
2315 if (intv1->from > intv2->from)
2316 return 1;
2317 else if (intv1->from < intv2->from)
2318 return -1;
2319 else
2320 {
2321 if (intv1->to > intv2->to)
2322 return 1;
2323 else
2324 return -1;
2325 }
2326 }
2327 return 0;
2328 }
2329
2330 /* SECTION 2c */
2331 /***************************************************************************
2332 *
2333 * AlnMgr2UsePrimsAlgorithm takes the set of edges and vertices produced by
2334 * earlier functions and creates a subset of edges that can be made into
2335 * a multiple alignment.
2336 *
2337 ***************************************************************************/
2338 static void AlnMgr2UsePrimsAlgorithm(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2339 {
2340 if (vertexarray == NULL || edge_head == NULL)
2341 return;
2342 edge_head->used = AM_USED;
2343 vertexarray[edge_head->vertex1]->used = TRUE;
2344 vertexarray[edge_head->vertex2]->used = TRUE;
2345 AlnMgr2RecursePrims(vertexarray, edge_head);
2346 AlnMgr2CleanUpLeftovers(vertexarray, numvertices, edge_head);
2347 return;
2348 }
2349
2350 /* SECTION 2C */
2351 static AMEdgePtr AlnMgr2GetEdgeList(Int4 vertexnum, AMEdgePtr edge_head, AMEdgePtr already_used)
2352 {
2353 AMEdgePtr edge;
2354 AMEdgePtr list;
2355 AMEdgePtr list_head;
2356 AMEdgePtr list_prev;
2357
2358 edge = edge_head;
2359 list_head = NULL;
2360 while (edge != NULL)
2361 {
2362 if ((edge->vertex1 == vertexnum || edge->vertex2 == vertexnum) && edge != already_used)
2363 {
2364 list = (AMEdgePtr)MemNew(sizeof(AMEdge));
2365 list->vertex1 = edge->vertex1;
2366 list->vertex2 = edge->vertex2;
2367 list->weight = edge->weight;
2368 list->used = edge->used;
2369 if (list_head != NULL)
2370 {
2371 list_prev->next = list;
2372 list_prev = list;
2373 } else
2374 list_head = list_prev = list;
2375 }
2376 edge = edge->next;
2377 }
2378 return list_head;
2379 }
2380
2381 /* SECTION 2C */
2382 /***************************************************************************
2383 *
2384 * AlnMgr2GetBetterVertex returns the vertex of the edge indicated that
2385 * is shared by the largest number of other edges.
2386 *
2387 ***************************************************************************/
2388 static AMVertexPtr AlnMgr2GetBetterVertex(AMVertexPtr PNTR vertexarray, AMEdgePtr edge)
2389 {
2390 if (vertexarray[edge->vertex1]->numedges >= vertexarray[edge->vertex2]->numedges)
2391 return vertexarray[edge->vertex1];
2392 else
2393 return vertexarray[edge->vertex2];
2394 }
2395
2396 /* SECTION 2C */
2397 /***************************************************************************
2398 *
2399 * AlnMgr2RecursePrims is a simple yet powerful algorithm that builds a
2400 * minimal spanning tree of the edges and vertexes by starting with a set
2401 * of edges and vertices, picking the best/shortest edge, then picking
2402 * other edges one by one that join a vertex in the set with a vertex not
2403 * in the set, until all edges are used (or deemed impossible).
2404 *
2405 ***************************************************************************/
2406 static void AlnMgr2RecursePrims(AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2407 {
2408 AMEdgePtr edge;
2409 Boolean found;
2410
2411 edge = edge_head;
2412 found = FALSE;
2413 /* find an edge that isn't used, that joins a vertex in the set */
2414 /* with a vertex outside the set, and add it and the new vertex */
2415 while (edge != NULL && !found)
2416 {
2417 if (edge->used == AM_NOTUSED)
2418 {
2419 if (vertexarray[edge->vertex1]->used != vertexarray[edge->vertex2]->used)
2420 {
2421 found = TRUE;
2422 vertexarray[edge->vertex1]->used = TRUE;
2423 vertexarray[edge->vertex2]->used = TRUE;
2424 edge->used = AM_USED;
2425 AlnMgr2RecursePrims(vertexarray, edge_head);
2426 }
2427 }
2428 edge = edge->next;
2429 }
2430 }
2431
2432 /* SECTION 2C */
2433 /***************************************************************************
2434 *
2435 * AlnMgr2CleanUpLeftovers takes the edges that are unused after
2436 * AlnMgr2RecursePrims and looks for edges that duplicate another edge in
2437 * the set or edges that share a seqid (but not a vertex) with another edge
2438 * already in the set. It adds these edges to the set; they don't belong
2439 * there in tree-based terms but as alignments they are related.
2440 *
2441 ***************************************************************************/
2442 static void AlnMgr2CleanUpLeftovers(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head)
2443 {
2444 AMEdgePtr edge;
2445 AMEdgePtr edge_tmp;
2446 Boolean found;
2447 Int4 i;
2448 BoolPtr tmpverts;
2449
2450 tmpverts = (BoolPtr)MemNew(numvertices*sizeof(Boolean));
2451 for (i=0; i<numvertices; i++)
2452 {
2453 tmpverts[i] = vertexarray[i]->used;
2454 }
2455 edge = edge_head;
2456 while (edge != NULL)
2457 {
2458 if (edge->used == AM_NOTUSED)
2459 {
2460 if (tmpverts[edge->vertex1] == TRUE && tmpverts[edge->vertex2] == TRUE)
2461 {
2462 /* see if this edge duplicates another edge; if so, add it */
2463 edge_tmp = edge_head;
2464 found = FALSE;
2465 while (edge_tmp != NULL && !found)
2466 {
2467 if ((edge->vertex1 == edge_tmp->vertex1 && edge->vertex2 == edge_tmp->vertex2) || (edge->vertex1 == edge_tmp->vertex2 && edge->vertex2 == edge_tmp->vertex1))
2468 {
2469 found = TRUE;
2470 edge->used = AM_USED;
2471 }
2472 edge_tmp = edge_tmp->next;
2473 }
2474 if (!found)
2475 edge->used = AM_CONFLICT;
2476 } else if (tmpverts[edge->vertex1] == FALSE && tmpverts[edge->vertex2] == FALSE)
2477 {
2478 /* if one of the vertices shares a seqid with a vertex in the set, put both vertices */
2479 /* and the edge in the set. */
2480 found = FALSE;
2481 for (i=0; i<numvertices && !found; i++)
2482 {
2483 if (tmpverts[i] == TRUE && (SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex1]->sip) == SIC_YES || SeqIdComp(vertexarray[i]->sip, vertexarray[edge->vertex2]->sip) == SIC_YES))
2484 {
2485 found = TRUE;
2486 vertexarray[edge->vertex1]->used = TRUE;
2487 vertexarray[edge->vertex2]->used = TRUE;
2488 edge->used = AM_USED;
2489 }
2490 }
2491 if (!found)
2492 edge->used = AM_CONFLICT;
2493 }
2494 }
2495 edge = edge->next;
2496 }
2497 MemFree(tmpverts);
2498 }
2499
2500 /* SECTION 2C */
2501 /***************************************************************************
2502 *
2503 * AlnMgr2SameSeq decides whether two vertices come from the same
2504 * sequence (simple seqid compare).
2505 *
2506 ***************************************************************************/
2507 static Boolean AlnMgr2SameSeq(AMVertexPtr vertex1, AMVertexPtr vertex2)
2508 {
2509 if (vertex1 == NULL || vertex2 == NULL)
2510 return FALSE;
2511 if (SeqIdComp(vertex1->sip, vertex2->sip) == SIC_YES)
2512 return TRUE;
2513 else
2514 return FALSE;
2515 }
2516
2517
2518 /* SECTION 2C */
2519 /***************************************************************************
2520 *
2521 * AlnMgr2BuildAlignmentFromTree performs a breadth-first traversal of
2522 * the tree, adding edges to the growing alignment as it goes.
2523 *
2524 ***************************************************************************/
2525 static void AlnMgr2BuildAlignmentFromTree(AMVertexPtr PNTR vertexarray, Int4 numvertices, AMEdgePtr edge_head, SeqAlignPtr sap)
2526 {
2527 AMAlignIndex2Ptr amaip;
2528 AMVertexPtr adj;
2529 AMVertexPtr adj_head;
2530 AMEdgePtr edge;
2531 Int4 i;
2532 Int4 j;
2533 AMQueuePtr q;
2534 AMQueuePtr q_head;
2535 AMQueuePtr q_prev;
2536
2537 amaip = (AMAlignIndex2Ptr)(sap->saip);
2538 AlnMgr2AddInNewPairwiseSA(sap, edge_head->sap);
2539 edge_head->aligned = TRUE;
2540 q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2541 q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge_head);
2542 q_head->vertex->visited = TRUE;
2543 /* unlink the vertices */
2544 for (i=0; i<numvertices; i++)
2545 {
2546 vertexarray[i]->next = NULL;
2547 }
2548 while (q_head != NULL)
2549 {
2550 q_prev = q_head;
2551 while (q_prev->next != NULL)
2552 {
2553 q_prev = q_prev->next;
2554 }
2555 adj_head = AlnMgr2GetAdjacentVertices(q_head->vertex, vertexarray, edge_head);
2556 adj = adj_head;
2557 while (adj != NULL)
2558 {
2559 if (adj->visited == FALSE)
2560 {
2561 edge = edge_head;
2562 while (edge != NULL)
2563 {
2564 /* if the edge is used in the tree but not yet aligned, and it's adjacent, align it */
2565 if (edge->aligned == FALSE && edge->used == AM_USED && ((AlnMgr2SameSeq(vertexarray[edge->vertex1], q_head->vertex) && AlnMgr2SameSeq(vertexarray[edge->vertex2], adj)) || (AlnMgr2SameSeq(vertexarray[edge->vertex1], adj) && AlnMgr2SameSeq(vertexarray[edge->vertex2], q_head->vertex))))
2566 {
2567 AlnMgr2AddInNewPairwiseSA(sap, edge->sap);
2568 edge->aligned = TRUE;
2569 }
2570 edge = edge->next;
2571 }
2572 q = (AMQueuePtr)MemNew(sizeof(AMQueue));
2573 q->vertex = adj;
2574 q_prev->next = q;
2575 q_prev = q;
2576 adj->visited = TRUE;
2577 }
2578 adj = adj->next;
2579 }
2580 q = q_head->next;
2581 MemFree(q_head);
2582 q_head = q;
2583 if (q_head == NULL) /* look for discontinuous sets -- those will be left over */
2584 {
2585 edge = edge_head;
2586 while (edge != NULL && q_head == NULL)
2587 {
2588 if (edge->aligned == FALSE && (vertexarray[edge->vertex1]->visited == FALSE || vertexarray[edge->vertex2]->visited == FALSE))
2589 {
2590 q_head = (AMQueuePtr)MemNew(sizeof(AMQueue));
2591 q_head->vertex = AlnMgr2GetBetterVertex(vertexarray, edge);
2592 vertexarray[edge->vertex1]->visited = vertexarray[edge->vertex2]->visited = TRUE;
2593 }
2594 edge = edge->next;
2595 }
2596 }
2597 }
2598 /* now the vertices are no longer in a linked list -> put them back together */
2599 for (j=0; j<i-1; j++)
2600 {
2601 vertexarray[j]->next = vertexarray[j+1];
2602 vertexarray[j+1]->next = NULL;
2603 }
2604 AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
2605 AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
2606 }
2607
2608 /* SECTION 2c */
2609 /***************************************************************************
2610 *
2611 * AlnMgr2GetAdjacentVertices returns a linked list of all vertices which
2612 * are adjacent to the given edge; that is, it returns a list of all
2613 * vertices which are linked by an edge to either vertex of the given edge.
2614 *
2615 ***************************************************************************/
2616 static AMVertexPtr AlnMgr2GetAdjacentVertices(AMVertexPtr vertex, AMVertexPtr PNTR vertexarray, AMEdgePtr edge_head)
2617 {
2618 AMVertexPtr adj_head;
2619 AMVertexPtr adj_prev;
2620 AMEdgePtr edge;
2621
2622 edge = edge_head;
2623 adj_head = adj_prev = NULL;
2624 while (edge != NULL)
2625 {
2626 if (AlnMgr2SameSeq(vertexarray[edge->vertex1], vertex))
2627 {
2628 if (adj_head == NULL)
2629 adj_head = adj_prev = vertexarray[edge->vertex2];
2630 else
2631 {
2632 adj_prev->next = vertexarray[edge->vertex2];
2633 adj_prev = adj_prev->next;
2634 }
2635 } else if (AlnMgr2SameSeq(vertexarray[edge->vertex2], vertex))
2636 {
2637 if (adj_head == NULL)
2638 adj_head = adj_prev = vertexarray[edge->vertex1];
2639 else
2640 {
2641 adj_prev->next = vertexarray[edge->vertex1];
2642 adj_prev = adj_prev->next;
2643 }
2644 }
2645 if (adj_prev != NULL)
2646 adj_prev->next = NULL;
2647 edge = edge->next;
2648 }
2649 return adj_head;
2650 }
2651
2652 /* SECTION 2c */
2653
2654 static Boolean AlnMgr2GetFirstRowForSeqId(
2655 DenseSegPtr dsp,
2656 SeqIdPtr sip,
2657 Uint1 strand,
2658 Int4Ptr row_curr,
2659 SeqIdPtr PNTR sip_curr)
2660 {
2661 Boolean found = FALSE;
2662
2663 while (*sip_curr) {
2664 (*row_curr)++;
2665 if (SeqIdComp(sip, *sip_curr) == SIC_YES &&
2666 strand == dsp->strands[*row_curr]) {
2667 found = TRUE;
2668 }
2669 *sip_curr = (*sip_curr)->next;
2670 if (found) return TRUE;
2671 }
2672 return FALSE;
2673 }
2674
2675
2676 static AMSeqPieceSetPtr AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp, Int4 row)
2677 {
2678 AMSeqPieceSetPtr s_set = (AMSeqPieceSetPtr)MemNew(sizeof(AMSeqPieceSet));
2679 AMSeqPiecePtr s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2680 s->beg = -1;
2681 s->end = -1;
2682 s->seg = -1;
2683 s->pos = row - dsp->dim;
2684 s->set = s_set;
2685 s->prev = NULL;
2686 s->next = NULL;
2687 s->left = -1;
2688 s->right = -1;
2689 s->orig_left = -2;
2690 s->orig_right = -2;
2691 s->aligned = FALSE;
2692 s->alt_dsp = NULL;
2693 s->alt_seg = -1;
2694 s->alt_pos = -1;
2695
2696 s->next = NULL;
2697
2698 s_set->dsp = dsp;
2699 s_set->row = row;
2700 s_set->row2 = -1;
2701 s_set->alt_row = -1;
2702 s_set->alt_row2 = -1;
2703 s_set->head = s;
2704 s_set->tail = s;
2705 s_set->max_pos = dsp->dim * dsp->numseg;
2706 s_set->strand = dsp->strands[row];
2707 s_set->plus = s_set->strand != Seq_strand_minus;
2708 s_set->next = NULL;
2709
2710 return s_set;
2711 }
2712
2713 static AMSeqPiecePtr AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)
2714 {
2715 DenseSegPtr dsp;
2716 Int4 max_pos;
2717 AMSeqPiecePtr s_new;
2718
2719 dsp = s->set->dsp;
2720 max_pos = s->set->max_pos;
2721
2722 if (s->pos < max_pos) {
2723 s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2724 s_new->pos = s->pos + dsp->dim;
2725 s_new->seg = s->seg + 1;
2726 s_new->set = s->set;
2727 s_new->prev = s;
2728 s = s->next = s_new;
2729 s->set->tail = s;
2730
2731 s->next = NULL;
2732
2733 /* initialize the following */
2734 s->left = -1;
2735 s->right = -1;
2736 s->aligned = FALSE;
2737 s->alt_dsp = NULL;
2738 s->alt_seg = -1;
2739 s->alt_pos = -1;
2740 s->orig_left = -2;
2741 s->orig_right = -2;
2742
2743 /* find the beg and end */
2744 while (s->pos < max_pos) {
2745 if (dsp->starts[s->pos] != -1) {
2746 s->beg = s->end = dsp->starts[s->pos];
2747 if (s->set->plus) {
2748 s->end += dsp->lens[s->seg] - 1;
2749 } else {
2750 s->beg += dsp->lens[s->seg] - 1;
2751 }
2752 return s;
2753 } else {
2754 s->seg++;
2755 s->pos += dsp->dim;
2756 }
2757 }
2758 s->beg = -1;
2759 s->end = -1;
2760 return s;
2761 }
2762 return NULL;
2763 }
2764
2765 static AMSeqPiecePtr AlnMgr2GetNextLimitedSeqPiece(
2766 AMSeqPiecePtr s,
2767 AMSeqPiecePtr right)
2768 {
2769 DenseSegPtr dsp;
2770 Int4 new_pos, new_seg, max_pos, max_seg;
2771 AMSeqPiecePtr s_new;
2772
2773 AMSeqPiecePtr left = right->prev;
2774
2775 dsp = s->set->dsp;
2776 max_pos = s->set->max_pos;
2777 max_seg = right->seg;
2778 new_pos = s->pos + dsp->dim;
2779 new_seg = s->seg + 1;
2780
2781 while (new_pos < max_pos && new_seg <= max_seg) {
2782 if (dsp->starts[new_pos] != -1) {
2783 s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2784 s_new->pos = new_pos;
2785 s_new->seg = new_seg;
2786 s_new->set = s->set;
2787 s_new->next = NULL;
2788 s_new->prev = s;
2789 s = s->next = s_new;
2790 s->set->tail = s;
2791 s->beg = s->end = dsp->starts[s->pos];
2792 if (s->set->plus) {
2793 s->end += dsp->lens[s->seg] - 1;
2794 } else {
2795 s->beg += dsp->lens[s->seg] - 1;
2796 }
2797 /* aligned to a sequence in anchor or not */
2798 if (s->seg == right->seg) {
2799 s->aligned = TRUE;
2800 s->left = right->beg;
2801 s->right = right->end;
2802 } else {
2803 s->aligned = FALSE;
2804 s->left = left->end;
2805 s->right = right->beg;
2806 }
2807 /* these are not yet used */
2808 s->orig_left = -2;
2809 s->orig_right = -2;
2810 s->alt_dsp = NULL;
2811 s->alt_seg = -1;
2812 s->alt_pos = -1;
2813 return s;
2814 }
2815 new_pos += dsp->dim;
2816 new_seg++;
2817 }
2818 return NULL;
2819 }
2820
2821 static void AlnMgr2AddSeqPiece(
2822 AMSeqPieceSetPtr set,
2823 AMSeqPiecePtr what)
2824 {
2825 AMSeqPiecePtr s;
2826 DenseSegPtr dsp = set->dsp;
2827 DenseSegPtr alt_dsp = what->set->dsp;
2828
2829 s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2830 s->beg = what->beg;
2831 s->end = what->end;
2832
2833 if (alt_dsp == dsp) {
2834 s->seg = what->seg;
2835 s->pos = what->pos;
2836 s->alt_dsp = NULL;
2837 s->alt_seg = -1;
2838 s->alt_pos = -1;
2839 } else {
2840 s->seg = -1;
2841 s->pos = -1;
2842 s->alt_dsp = alt_dsp;
2843 s->alt_seg = what->seg;
2844 s->alt_pos = what->pos;
2845 }
2846 s->left = what->left;
2847 s->right = what->right;
2848 s->orig_left = what->orig_left;
2849 s->orig_right = what->orig_right;
2850 s->aligned = what->aligned;
2851 s->set = set;
2852 s->next = NULL;
2853 if ((s->prev = set->tail) != NULL) {
2854 s->prev->next = s;
2855 }
2856 set->tail = s;
2857 }
2858
2859 static void AlnMgr2InsertSeqPiece(
2860 AMSeqPiecePtr where,
2861 AMSeqPiecePtr what,
2862 Int4 end)
2863 {
2864 AMSeqPiecePtr s;
2865 DenseSegPtr dsp = where->set->dsp;
2866 DenseSegPtr alt_dsp = what->set->dsp;
2867
2868
2869 s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2870 s->beg = what->beg;
2871 s->end = end;
2872
2873 if (where->beg == what->beg) {
2874 s->seg = where->seg;
2875 s->pos = where->pos;
2876 where->beg = end + (where->set->plus? 1 : -1);
2877 if (alt_dsp == dsp) {
2878 s->alt_dsp = NULL;
2879 s->alt_seg = -1;
2880 s->alt_pos = -1;
2881 } else {
2882 s->alt_dsp = alt_dsp;
2883 s->alt_seg = what->seg;
2884 s->alt_pos = what->pos;
2885 }
2886 } else {
2887 if (alt_dsp == dsp) {
2888 s->seg = what->seg;
2889 s->pos = what->pos;
2890 s->alt_dsp = NULL;
2891 s->alt_seg = -1;
2892 s->alt_pos = -1;
2893 } else {
2894 s->seg = -1;
2895 s->pos = -1;
2896 s->alt_dsp = alt_dsp;
2897 s->alt_seg = what->seg;
2898 s->alt_pos = what->pos;
2899 }
2900 }
2901 s->left = what->left;
2902 s->right = what->right;
2903 s->orig_left = what->orig_left;
2904 s->orig_right = what->orig_right;
2905 s->aligned = what->aligned;
2906 s->set = where->set;
2907 s->next = where;
2908 if ((s->prev = where->prev) != NULL) {
2909 if (s->prev) {
2910 s->prev->next = s;
2911 } else {
2912 if (s->set->head == where) {
2913 s->set->head = s;
2914 }
2915 }
2916 where->prev = s;
2917 }
2918 }
2919
2920 static void AlnMgr2CopySeg(
2921 DenseSegPtr DSP,
2922 Int4 PNTR SEG_ptr,
2923 Int4 PNTR POS_ptr,
2924 DenseSegPtr Dsp,
2925 Int4 PNTR Seg_ptr,
2926 Int4 PNTR Pos_ptr,
2927 AMSeqPiecePtr PNTR s_ptr)
2928 {
2929 Int4 i, rdelta, ldelta, POS, Pos, max_Pos, pos2, alt_pos2, SEG, Seg,
2930 beg, end;
2931 AMSeqPiecePtr s;
2932 Boolean plus;
2933
2934 POS = *POS_ptr; Pos = *Pos_ptr;
2935 SEG = *SEG_ptr; Seg = *Seg_ptr;
2936 s = *s_ptr;
2937
2938 if (s->set->row != s->set->row2) { /* if not a B */
2939 if (!(s->next)) {
2940 *s_ptr = NULL;
2941 return; /* skip the last A */
2942 }
2943 }
2944
2945 max_Pos = POS+Dsp->dim;
2946
2947 DSP->lens[SEG] = ABS(s->end - s->beg) + 1;
2948
2949 if (s->set->dsp != Dsp) { /* the extra row for the non-anchor seq */
2950 for (i = 0; POS < max_Pos; POS++, i++) {
2951 DSP->starts[POS] = -1;
2952 DSP->strands[POS] = Dsp->strands[i];
2953 }
2954 DSP->starts[POS] = MIN(s->beg, s->end);
2955 DSP->strands[POS] = s->set->strand;
2956 POS++;
2957
2958 } else { /* not dealing with the extra row itself */
2959
2960 if (s->pos >= 0 && s->set->row != s->set->row2) { /* Dsp involved */
2961 beg = end = s->set->dsp->starts[s->pos];
2962 if (s->set->plus) {
2963 end += s->set->dsp->lens[s->seg]-1;
2964 } else {
2965 beg += s->set->dsp->lens[s->seg]-1;
2966 }
2967 if (ldelta = ABS(s->beg - beg)) {
2968 /* need to "continue" from the orig seg */
2969 Pos = s->pos - s->set->row;
2970 Seg = s->seg;
2971 }
2972 rdelta = ABS(end - s->end);
2973
2974 for (; POS < max_Pos; POS++, Pos++) {
2975 DSP->strands[POS] = Dsp->strands[Pos];
2976 plus = DSP->strands[POS] != Seq_strand_minus;
2977 if (Dsp->starts[Pos] != -1) {
2978 DSP->starts[POS] = Dsp->starts[Pos] + (plus ? ldelta : rdelta);
2979 } else {
2980 DSP->starts[POS] = -1;
2981 }
2982 }
2983 if (ldelta) {
2984 /* restore these */
2985 Pos = *Pos_ptr;
2986 Seg = *Seg_ptr;
2987 } else {
2988 Seg++;
2989 }
2990
2991 if (s->alt_dsp) { /* dsp involved too */
2992 alt_pos2 =
2993 s->alt_pos + s->set->alt_row2 - s->set->alt_row;
2994 beg = end = s->alt_dsp->starts[s->alt_pos];
2995 if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
2996 beg += s->alt_dsp->lens[s->alt_seg]-1;
2997 } else {
2998 end += s->alt_dsp->lens[s->alt_seg]-1;
2999 }
3000 ldelta = ABS(s->beg - beg);
3001 rdelta = ABS(end - s->end);
3002
3003 if (s->set->row2 != -1) { /* 2nd row merged*/
3004 pos2 = POS - DSP->dim + s->set->row2;
3005 } else { /* extra row */
3006 pos2 = POS;
3007 POS++;
3008 }
3009 DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3010 plus = DSP->strands[pos2] != Seq_strand_minus;
3011 if (s->alt_dsp->starts[alt_pos2] != -1) {
3012 DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3013 (plus ? ldelta : rdelta);
3014 } else {
3015 DSP->starts[pos2] = -1;
3016 }
3017 } else { /* dsp not involved */
3018 if (s->set->row2 == -1) { /* 2nd row not merged */
3019 DSP->starts[POS] = -1;
3020 DSP->strands[POS] =
3021 s->set->alt_dsp->strands[s->set->alt_row2];
3022 POS++;
3023 }
3024 }
3025 } else { /* Dsp not involved */
3026 for (i = 0; POS < max_Pos; POS++, i++) {
3027 DSP->starts[POS] = -1;
3028 DSP->strands[POS] = Dsp->strands[i];
3029 }
3030 if (s->set->row == s->set->row2) { /* if a B */
3031 if (!(s->alt_dsp)) {
3032 Pos += s->set->dsp->dim; /* move to next seg */
3033 Seg++;
3034 }
3035 } else { /* not a B */
3036 alt_pos2 =
3037 s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3038
3039 beg = end = s->alt_dsp->starts[s->alt_pos];
3040 if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3041 beg += s->alt_dsp->lens[s->alt_seg]-1;
3042 } else {
3043 end += s->alt_dsp->lens[s->alt_seg]-1;
3044 }
3045 ldelta = ABS(s->beg - beg);
3046 rdelta = ABS(end - s->end);
3047
3048 if (s->set->row2 != -1) { /* merged row2 */
3049 pos2 = POS - DSP->dim + s->set->row2;
3050 } else {
3051 pos2 = POS;
3052 POS++;
3053 }
3054 DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3055 plus = DSP->strands[pos2] != Seq_strand_minus;
3056 if (s->alt_dsp->starts[alt_pos2] != -1) {
3057 DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3058 (plus ? ldelta : rdelta);
3059 } else {
3060 DSP->starts[pos2] = -1;
3061 }
3062 }
3063 DSP->starts[POS + s->set->row - DSP->dim] = MIN(s->beg, s->end);
3064 }
3065 }
3066 (*SEG_ptr)++;
3067 *Seg_ptr = Seg;
3068 *s_ptr = (*s_ptr)->next;
3069 *POS_ptr = POS;
3070 *Pos_ptr = Pos;
3071 }
3072
3073 NLM_EXTERN void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap)
3074 {
3075 AMAlignIndex2Ptr amaip;
3076 DenseSegPtr dsp, Dsp, DSP;
3077 Int4 Seg, SEG;
3078 Int4 Pos, POS, max_POS;
3079 Int4 A_end, B_beg;
3080 Int4 anchor, Anchor;
3081 Int4 row;
3082 SeqIdPtr sip, extra_sip;
3083 AMSeqPieceSetPtr a_set, A_set, b_set, B_set_head, B_set;
3084 AMSeqPiecePtr a, A, b, B;
3085 Boolean conflict;
3086 Boolean a_plus, b_plus;
3087 Int4 upper_limit;
3088 Int4 extra_segs;
3089
3090 dsp = (DenseSegPtr)(sap->segs);
3091 if (dsp->dim != 2) {
3092 if (dsp->dim == 0) {
3093 dsp->dim = 2; /* set to default */
3094 } else {
3095 ErrPostEx(SEV_ERROR, 0,0,
3096 "AlnMgr2AddInNewPairwiseSA: dsp->dim (=%d) should be 2.",
3097 dsp->dim);
3098 return;
3099 }
3100 }
3101 if (dsp->numseg < 1) {
3102 ErrPostEx(SEV_ERROR, 0,0,
3103 "AlnMgr2AddInNewPairwiseSA: dsp->numseg (=%d) should be > 0.",
3104 dsp->numseg);
3105 return;
3106 }
3107
3108 amaip = (AMAlignIndex2Ptr)(parent->saip);
3109 if (amaip->sharedaln == NULL) {/* first alignment to be added */
3110 SeqAlignPtr salp;
3111 Int4 i;
3112
3113 salp = SeqAlignDup(sap);
3114 AlnMgr2IndexSingleChildSeqAlign(salp);
3115 amaip->sharedaln = salp;
3116 amaip->numrows = dsp->dim;
3117 sip = dsp->ids;
3118 amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3119 i = 0;
3120 while (sip != NULL) {
3121 amaip->ids[i] = SeqIdDup(sip);
3122 sip = sip->next;
3123 i++;
3124 }
3125 MemFree(amaip->saps);
3126 amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3127 amaip->saps[0] = sap;
3128 amaip->numsaps = 1;
3129 MemFree(amaip->aligned);
3130 amaip->aligned = (Boolean PNTR) MemNew(sizeof(Boolean));
3131 amaip->aligned[0] = TRUE;
3132
3133 return;
3134 }
3135
3136 /* add the new sap */
3137 amaip->numsaps++;
3138 amaip->saps = (SeqAlignPtr PNTR) MemMore
3139 (amaip->saps, amaip->numsaps*sizeof(SeqAlignPtr));
3140 amaip->saps[amaip->numsaps-1] = sap;
3141 amaip->aligned = (Boolean PNTR) MemMore
3142 (amaip->aligned, (amaip->numsaps)*sizeof(Boolean));
3143 amaip->aligned[amaip->numsaps-1] = TRUE;
3144
3145 Dsp = (DenseSegPtr)(amaip->sharedaln->segs);
3146
3147 AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &Anchor, &anchor);
3148
3149 {{ /* make sure the shared rows are on the same strand */
3150 Uint1 Strand, strand;
3151
3152 Strand = AlnMgr2GetNthStrand(amaip->sharedaln, Anchor);
3153 if (Strand == Seq_strand_unknown)
3154 Strand = Seq_strand_plus;
3155 strand = AlnMgr2GetNthStrand(sap, anchor);
3156 if (strand == Seq_strand_unknown)
3157 strand = Seq_strand_plus;
3158 if (Strand != strand) {
3159 SeqAlignListReverseStrand(sap);
3160 SAIndex2Free2(sap->saip);
3161 sap->saip = NULL;
3162 AlnMgr2IndexSingleChildSeqAlign(sap);
3163 dsp = (DenseSegPtr)(sap->segs);
3164 strand = AlnMgr2GetNthStrand(sap, anchor);
3165 if (strand == Seq_strand_unknown)
3166 strand = Seq_strand_plus;
3167 }
3168 a_plus = strand != Seq_strand_minus;
3169 }}
3170 anchor--; Anchor--; /* make them 0-based */
3171
3172 /* create new dsp */
3173 DSP = DenseSegNew();
3174 DSP->numseg = Dsp->numseg;
3175 DSP->dim = Dsp->dim;
3176 /* DSP->ids = SeqIdDupList(Dsp->ids); */
3177
3178 /* collect other shared seqids */
3179 b_set = B_set = B_set_head = NULL;
3180 row = -1; sip = Dsp->ids;
3181 extra_sip = dsp->ids;
3182 if (anchor == 0) {
3183 extra_sip = extra_sip->next;
3184 }
3185 while (AlnMgr2GetFirstRowForSeqId
3186 (Dsp, extra_sip, dsp->strands[1-anchor], &row, &sip)) {
3187 if (B_set) {
3188 B_set->next = AlnMgr2CreateSeqPieceSet(Dsp, row);
3189 B_set = B_set->next;
3190 } else {
3191 B_set = B_set_head = AlnMgr2CreateSeqPieceSet(Dsp, row);
3192 }
3193 }
3194 b_plus = dsp->strands[1-anchor] != Seq_strand_minus;
3195
3196 /* ids */
3197 DSP->ids = Dsp->ids;
3198 Dsp->ids = NULL;
3199
3200 /* collect a, b */
3201 a_set = AlnMgr2CreateSeqPieceSet(dsp, anchor);
3202 a = a_set->head;
3203 b_set = AlnMgr2CreateSeqPieceSet(dsp, 1-anchor);
3204 while (a = AlnMgr2GetNextSeqPiece(a)) {
3205 b = b_set->tail;
3206 while (b = AlnMgr2GetNextLimitedSeqPiece(b, a)) {
3207 if (!b->aligned) {
3208 DSP->numseg++;
3209 }
3210 }
3211 }
3212
3213 /* collect A, B */
3214 A_set = AlnMgr2CreateSeqPieceSet(Dsp, Anchor);
3215 A = A_set->head;
3216 while (A = AlnMgr2GetNextSeqPiece(A)) {
3217 B_set = B_set_head;
3218 while (B_set) {
3219 B = B_set->tail;
3220 while (B = AlnMgr2GetNextLimitedSeqPiece(B, A)) {};
3221 B_set=B_set->next;
3222 }
3223 }
3224
3225 /* resolve a, A */
3226 A_set->alt_row = a_set->row;
3227 a = a_set->head->next;
3228 A = A_set->head->next;
3229 while (a && A && a->next && A->next) {
3230 if (a_plus ? a->beg < A->beg : a->beg > A->beg) {
3231 AlnMgr2InsertSeqPiece
3232 (A, a, a_plus ? MIN(a->end, A->beg-1) : MAX(a->end, A->beg+1));
3233 DSP->numseg++;
3234 if (a_plus ? a->end < A->beg : a->end > A->beg) {
3235 a = a->next;
3236 } else {
3237 a->beg = A->beg;
3238 }
3239 } else if (a_plus ? A->beg < a->beg : A->beg > a->beg) {
3240 if (a_plus ? A->end < a->beg : A->end > a->beg) {
3241 A = A->next;
3242 } else {
3243 AlnMgr2InsertSeqPiece(A, A, a_plus ? a->beg - 1 : a->beg + 1);
3244 DSP->numseg++;
3245 }
3246 } else { /* a->beg == A->beg */
3247 if (a_plus ? a->end < A->end : a->end > A->end) {
3248 AlnMgr2InsertSeqPiece(A, a, a->end);
3249 DSP->numseg++;
3250 a = a->next;
3251 } else if (a_plus ? a->end > A->end : a->end < A->end) {
3252 a->beg = A->end + (a_plus ? 1 : -1);
3253 A->alt_dsp = a->set->dsp;
3254 A->alt_seg = a->seg;
3255 A->alt_pos = a->pos;
3256 A = A->next;
3257 } else { /* a->end == A->end */
3258 A->alt_dsp = a->set->dsp;
3259 A->alt_seg = a->seg;
3260 A->alt_pos = a->pos;
3261 a = a->next;
3262 A = A->next;
3263 }
3264 }
3265 }
3266 while (a && a->next) {
3267 AlnMgr2InsertSeqPiece(A, a, a->end);
3268 DSP->numseg++;
3269 a = a->next;
3270 }
3271
3272 /* set the upper limits */
3273 if (B_set_head) {
3274 if (a_plus) {
3275 upper_limit =
3276 A_set->tail->end = A_set->tail->beg = A_set->tail->prev->end + 1;
3277
3278 b = b_set->tail;
3279 while (b && b->right == -1) {
3280 b->right = upper_limit;
3281 b = b->prev;
3282 }
3283
3284 B_set = B_set_head;
3285 while (B_set) {
3286 B = B_set->tail;
3287 while (B && B->right == -1) {
3288 B->right = upper_limit;
3289 B = B->prev;
3290 }
3291 B_set = B_set->next;
3292 }
3293
3294 } else {
3295 upper_limit =
3296 A_set->head->beg = A_set->head->end = A_set->head->next->beg + 1;
3297
3298 b = b_set->head;
3299 while (b && b->left == -1) {
3300 b->left = upper_limit;
3301 b = b->next;
3302 }
3303
3304 B_set = B_set_head;
3305 while (B_set) {
3306 B = B_set->head;
3307 while (B && B->left == -1) {
3308 B->left = upper_limit;
3309 B = B->next;
3310 }
3311 B_set = B_set->next;
3312 }
3313
3314 }
3315 }
3316
3317 /* try to resolve b, B */
3318 if (B_set_head) {
3319 b = b_set->head->next;
3320 B_set = B_set_head;
3321 while (B_set) {
3322 B = B_set->head->next;
3323 conflict = FALSE;
3324 extra_segs = 0;
3325 while (b && B) {
3326 if (b_plus ? b->beg < B->beg : b->beg > B->beg) {
3327 if (b_plus ? b->end < B->beg : b->end > B->beg) {
3328 /* trim the limits */
3329 if (a_plus ? B->left <= b->left : B->left >= b->left) {
3330 if (a_plus ? B->right < b->left : B->right > b->left) {
3331 conflict = TRUE; break;
3332 } else {
3333 if (B->aligned) {
3334 conflict = TRUE; break; /* no trimming allowed */
3335 } else {
3336 B->left = b->left;
3337 }
3338 }
3339 if (a_plus ? b->right > B->right : b->right < B->right) {
3340 if (b->aligned) {
3341 conflict = TRUE; break; /* no trimming allowed */
3342 } else {
3343 b->orig_right = b->right; /* for recovering */
3344 b->right = B->right;
3345 }
3346 }
3347 }
3348 AlnMgr2InsertSeqPiece(B, b, b->end);
3349 if (!(b->aligned)) extra_segs++;
3350 b = b->next;
3351 } else {
3352 conflict = TRUE; break;
3353 }
3354
3355 } else if (b_plus ? B->beg < b->beg : B->beg > b->beg) {
3356 if (b_plus ? B->end < b->beg : B->end > b->beg) {
3357 /* trim the limits */
3358 if (a_plus ? b->left < B->left : b->left > B->left) {
3359 if (a_plus ? b->right < B->left : b->right > B->left) {
3360 conflict = TRUE; break;
3361 } else {
3362 if (b->aligned) {
3363 conflict = TRUE; break; /* no trimming allowed */
3364 } else {
3365 b->orig_left = b->left; /* for recovering */
3366 b->left = B->left;
3367 }
3368 }
3369 if (a_plus ? B->right > b->right : B->right < b->right) {
3370 if (B->aligned) {
3371 conflict = TRUE; break; /* no trimming allowed */
3372 } else {
3373 B->right = b->right;
3374 }
3375 }
3376 }
3377
3378 B = B->next;
3379
3380 } else {
3381 conflict = TRUE; break;
3382 }
3383 } else { /* B->beg == b->beg */
3384 conflict = TRUE; break;
3385 }
3386 }
3387 if (!conflict) {
3388 while (b) {
3389 AlnMgr2AddSeqPiece(B_set, b);
3390 if (!(b->aligned)) extra_segs++;
3391 b = b->next;
3392 }
3393 /* DSP->numseg += extra_segs; */
3394 break;
3395 }
3396 /* conflict, roll back b, recovering limits, try next B */
3397 if (!b) {
3398 b = b_set->tail;
3399 }
3400 while (b) {
3401 if (b->orig_left != -2) {
3402 b->left = b->orig_left;
3403 }
3404 if (b->orig_right != -2) {
3405 b->right = b->orig_right;
3406 }
3407 b = b->prev;
3408 }
3409 b = b_set->head->next;
3410 B_set = B_set->next;
3411 }
3412 }
3413 if (B_set) { /* B_set has no conflict with b_set */
3414 B = B_set->head->next;
3415 B_set->row2 = B_set->row; /* mark the set */
3416 A_set->row2 = B_set->row;
3417 A_set->alt_row2 = b_set->row;
3418 } else { /* this mean extra row */
3419 A_set->row2 = -1;
3420 A_set->alt_row2 = b_set->row;
3421 A_set->alt_dsp = b_set->dsp;
3422 DSP->dim++;
3423 sip = DSP->ids;
3424 while (sip->next) {
3425 sip = sip->next;
3426 }
3427 AddSeqId(&sip, extra_sip);
3428
3429 /* fix the index too */
3430 amaip->numrows = DSP->dim;
3431 amaip->ids = (SeqIdPtr PNTR)MemMore
3432 (amaip->ids,amaip->numrows*sizeof(SeqIdPtr));
3433 amaip->ids[amaip->numrows-1] = SeqIdDup(extra_sip);
3434
3435 b_set->row2 = b_set->row; /* mark the set */
3436 B = b_set->head->next;
3437 B_beg = -1; /* nothing to comp Bs to */
3438 }
3439
3440 /* allocate memory for the new sharedaln matrix */
3441 DSP->starts = (Int4Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Int4));
3442 DSP->strands = (Uint1Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Uint1));
3443 DSP->lens = (Int4Ptr)MemNew(DSP->numseg * sizeof(Int4));
3444
3445 /* loop through segments */
3446 POS = 0; Pos = 0; Seg = 0; SEG = 0;
3447 A = A_set->head->next;
3448 while (Seg < Dsp->numseg) {
3449
3450 A_end = Dsp->starts[Pos+A_set->row];
3451 if (a_plus && A_end >= 0) {
3452 A_end += Dsp->lens[Seg] - 1;
3453 }
3454 if (B_set) {
3455 B_beg = Dsp->starts[Pos+B_set->row];
3456 }
3457
3458 if (A_end >= 0) {
3459 while (A && (a_plus ? A->end <= A_end : A->end >= A_end)) {
3460 while (B && (a_plus ? B->left < A->beg : B->left > A->beg)) {
3461 if (B->aligned) {
3462 B = B->next;
3463 break; /* the aligned piece should be last */
3464 } else {
3465 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3466 }
3467 }
3468 if (B && B->aligned && B->left == A->beg) {
3469 B = B->next;
3470 }
3471 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3472 }
3473 } else if (B && B_beg >= 0) {
3474 while (B && (b_plus ? B->beg <= B_beg : B->beg >= B_beg)) {
3475 while (A && (a_plus ? A->beg <= B->left : A->beg >= B->left)) {
3476 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3477 }
3478 if (B->aligned) {
3479 B = B->next;
3480 } else {
3481 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3482 }
3483 }
3484 } else {
3485 /* just copy the Dsp segment */
3486 DSP->lens[SEG] = Dsp->lens[Seg];
3487 max_POS = POS + Dsp->dim;
3488 for (; POS < max_POS; POS++, Pos++) {
3489 DSP->starts[POS] = Dsp->starts[Pos];
3490 DSP->strands[POS] = Dsp->strands[Pos];
3491 }
3492 if (DSP->dim > Dsp->dim) {
3493 DSP->starts[POS] = -1;
3494 DSP->strands[POS] = dsp->strands[1-anchor];
3495 POS++;
3496 }
3497 SEG++;
3498 Seg++;
3499 }
3500 }
3501 while (A) {
3502 while (B && (a_plus ? B->right <= A->beg : B->right >= A->beg)) {
3503 if (B->aligned) {
3504 B = B->next;
3505 } else {
3506 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3507 }
3508 }
3509 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3510 }
3511 while (B) {
3512 if (B->aligned) {
3513 B = B->next;
3514 } else {
3515 AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3516 }
3517 }
3518
3519 /* Done */
3520 AMSeqPieceSetFree(A_set);
3521 AMSeqPieceSetFree(a_set);
3522 AMSeqPieceSetFree(B_set_head);
3523 AMSeqPieceSetFree(b_set);
3524
3525 amaip->sharedaln->segs = DSP;
3526 /* update the dim for the shared_aln to match the new DensegPtr */
3527 amaip->sharedaln->dim = DSP->dim;
3528
3529 DenseSegFree(Dsp);
3530 }
3531
3532 /***************************************************************************
3533 *
3534 * AlnMgr2AddInNewSA adds a seqalign to an existing seqalign. The new
3535 * seqalign must share at least one row with the existing seqalign. The
3536 * new, combined dense-seg structure is computed, and then it is condensed
3537 * using AlnMgr2CondenseRows to make sure that there are no superfluous rows.
3538 *
3539 ***************************************************************************/
3540 static void AlnMgr2AddInNewSA(SeqAlignPtr parent, SeqAlignPtr sap)
3541 {
3542 AMAlignIndex2Ptr amaip;
3543 AM_Small2Ptr asp;
3544 AM_Small2Ptr asp_head;
3545 AM_Small2Ptr asp_prev;
3546 AM_Small2Ptr asp_tmp;
3547 AM_Small2Ptr asp_tmp2;
3548 AM_Small2Ptr PNTR asparray;
3549 Int4 currstop;
3550 DenseSegPtr dsp;
3551 DenseSegPtr dsp_new;
3552 DenseSegPtr dsp_shared;
3553 Boolean found;
3554 Int4 i;
3555 Int4 j;
3556 Int4 k;
3557 Int4 n1;
3558 Int4 n2;
3559 Int4 numrows;
3560 Int4 offset;
3561 SeqAlignPtr salp;
3562 SeqAlignPtr sap_new;
3563 SeqAlignPtr PNTR saptmp;
3564 SeqIdPtr sip;
3565 SeqIdPtr sip_head;
3566 SeqIdPtr sip_tmp;
3567 Int4 state;
3568 Int4 stop1;
3569 Int4 stop2;
3570 Uint1 strand1;
3571 Uint1 strand2;
3572
3573 amaip = (AMAlignIndex2Ptr)(parent->saip);
3574 if (amaip->sharedaln == NULL) /* this is the first alignment to be added */
3575 {
3576 salp = SeqAlignDup(sap);
3577 AlnMgr2IndexSingleChildSeqAlign(salp);
3578 dsp = (DenseSegPtr)(salp->segs);
3579 amaip->sharedaln = salp;
3580 amaip->numrows = dsp->dim;
3581 sip = dsp->ids;
3582 amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3583 i = 0;
3584 while (sip != NULL)
3585 {
3586 amaip->ids[i] = SeqIdDup(sip);
3587 sip = sip->next;
3588 i++;
3589 }
3590 MemFree(amaip->saps);
3591 amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3592 amaip->saps[0] = sap;
3593 amaip->numsaps = 1;
3594 } else
3595 {
3596 /* free ids */
3597 for (i=0; i<amaip->numrows; i++)
3598 {
3599 SeqIdFree(amaip->ids[i]);
3600 }
3601 MemFree(amaip->ids);
3602
3603 /* add the new sap */
3604 saptmp = amaip->saps;
3605 amaip->saps = (SeqAlignPtr PNTR)MemNew((amaip->numsaps+1)*sizeof(SeqAlignPtr));
3606 for (i=0; i<amaip->numsaps; i++)
3607 {
3608 amaip->saps[i] = saptmp[i];
3609 }
3610 amaip->saps[amaip->numsaps] = sap;
3611 MemFree(saptmp);
3612 amaip->numsaps++;
3613
3614 /* dsp, dsp_shared, n1, n2 */
3615 dsp = (DenseSegPtr)(sap->segs);
3616 dsp_shared = (DenseSegPtr)(amaip->sharedaln->segs);
3617 AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &n1, &n2);
3618 if (n1 == n2 && n1 == 0)
3619 return;
3620
3621 /* make sure the shared rows are on the same strand */
3622 strand1 = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
3623 if (strand1 == Seq_strand_unknown)
3624 strand1 = Seq_strand_plus;
3625 strand2 = AlnMgr2GetNthStrand(sap, n2);
3626 if (strand2 == Seq_strand_unknown)
3627 strand2 = Seq_strand_plus;
3628 if (strand1 != strand2)
3629 {
3630 SeqAlignListReverseStrand(sap);
3631 SAIndex2Free2(sap->saip);
3632 sap->saip = NULL;
3633 AlnMgr2IndexSingleChildSeqAlign(sap);
3634 dsp = (DenseSegPtr)(sap->segs);
3635 strand2 = AlnMgr2GetNthStrand(sap, n2);
3636 if (strand2 == Seq_strand_unknown)
3637 strand2 = Seq_strand_plus;
3638 }
3639
3640 /* numrows */
3641 numrows = dsp->dim + dsp_shared->dim - 1; /* for now this works; compress at the end */
3642 asp_head = NULL;
3643
3644 /* currstop */
3645 if (strand1 == Seq_strand_minus)
3646 AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &currstop);
3647 else
3648 currstop = -1;
3649
3650 /* add asp for each dsp_shared seg */
3651 for (i=0; i<dsp_shared->numseg; i++)
3652 {
3653 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3654 if (dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] < 0)
3655 {
3656 asp->n1 = currstop;
3657 asp->n2 = i+1;
3658 asp->n3 = AM_GAP;
3659 asp->n4 = dsp_shared->lens[i];
3660 if (asp_head != NULL)
3661 {
3662 asp_prev->next = asp;
3663 /*if (asp_prev->n1 == asp->n1)
3664 asp->n5 = asp_prev->n5+1;*/
3665 asp_prev = asp;
3666 } else
3667 asp_head = asp_prev = asp;
3668 } else
3669 {
3670 asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1];
3671 asp->n2 = 1;
3672 asp->n3 = AM_START;
3673 asp->n4 = dsp_shared->lens[i];
3674 if (asp_head != NULL)
3675 {
3676 asp_prev->next = asp;
3677 /*if (asp_prev->n1 == asp->n1)
3678 asp->n5 = asp_prev->n5+1;*/
3679 asp_prev = asp;
3680 } else
3681 asp_head = asp_prev = asp;
3682 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3683 asp->n1 = dsp_shared->starts[(dsp_shared->dim)*i + n1 - 1] + dsp_shared->lens[i] - 1;
3684 asp->n2 = 1;
3685 j = i+1;
3686 while (j<dsp_shared->numseg && dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] == -1)
3687 {
3688 j++;
3689 }
3690 if (j<dsp_shared->numseg)
3691 {
3692 if (dsp_shared->starts[(dsp_shared->dim)*j + n1 - 1] > asp->n1 + 1)
3693 asp->n3 = AM_HARDSTOP;
3694 else
3695 asp->n3 = AM_STOP;
3696 } else
3697 asp->n3 = AM_HARDSTOP;
3698 if (asp->n3 == AM_HARDSTOP)
3699 {
3700 if (strand1 != Seq_strand_minus)
3701 asp->n4 = -(dsp_shared->starts[(dsp_shared->dim)*i+n1-1] + dsp_shared->lens[i]-1);
3702 else
3703 asp->n4 = -dsp_shared->starts[(dsp_shared->dim)*i+n1-1];
3704 } else
3705 asp->n4 = -dsp_shared->lens[i];
3706 if (strand1 != Seq_strand_minus)
3707 currstop = asp->n1;
3708 else
3709 currstop = asp_prev->n1-1;
3710 asp_prev->next = asp;
3711 /*if (asp_prev->n1 == asp->n1)
3712 asp->n5 = asp_prev->n5+1;*/
3713 asp_prev = asp;
3714 }
3715 } /* asp for each dsp_shared seg */
3716
3717 /* currstop = start of sap's n2-th seq */
3718 if (strand1 == Seq_strand_minus)
3719 AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &currstop);
3720 else
3721 AlnMgr2GetNthSeqRangeInSA(sap, n2, &currstop, NULL);
3722
3723 /* add asp for each dsp seg */
3724 for (i=0; i<dsp->numseg; i++)
3725 {
3726 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3727 if (dsp->starts[(dsp->dim)*i + n2 - 1] < 0)
3728 {
3729 asp->n1 = currstop;
3730 asp->n2 = dsp_shared->numseg+i+1;
3731 asp->n3 = AM_GAP;
3732 asp->n4 = dsp->lens[i];
3733 asp_prev->next = asp;
3734 /*if (asp_prev->n1 == asp->n1)
3735 asp->n5 = asp_prev->n5 + 1;*/
3736 asp_prev = asp;
3737 } else
3738 {
3739 asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1];
3740 asp->n2 = 1;
3741 asp->n3 = AM_START;
3742 asp->n4 = dsp->lens[i];
3743 asp_prev->next = asp;
3744 /*if (asp_prev->n1 == asp->n1)
3745 asp->n5 = asp_prev->n5+1;*/
3746 asp_prev = asp;
3747 asp = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
3748 asp->n1 = dsp->starts[(dsp->dim)*i + n2 - 1] + dsp->lens[i] - 1;
3749 asp->n2 = 1;
3750 j = i+1;
3751 while (j<dsp->numseg && dsp->starts[(dsp->dim)* j + n2 - 1] == -1)
3752 {
3753 j++;
3754 }
3755 if (j<dsp->numseg)
3756 {
3757 if (dsp->starts[(dsp->dim)*j + n2 - 1] > asp->n1 + 1)
3758 asp->n3 = AM_HARDSTOP;
3759 else
3760 asp->n3 = AM_STOP;
3761 } else
3762 asp->n3 = AM_HARDSTOP;
3763 if (asp->n3 == AM_HARDSTOP)
3764 {
3765 if (strand1 != Seq_strand_minus)
3766 asp->n4 = -(dsp->starts[(dsp->dim)*i+n1-1] + dsp->lens[i]-1);
3767 else
3768 asp->n4 = -dsp->starts[(dsp->dim)*i+n1-1];
3769 /* so if n4 is negative, this is the highest-numbered residue in the interval */
3770 } else
3771 asp->n4 = dsp->lens[i];
3772 if (strand1 != Seq_strand_minus)
3773 currstop = asp->n1;
3774 else
3775 currstop = asp_prev->n1-1;
3776 asp_prev->next = asp;
3777 /*if (asp_prev->n1 == asp->n1)
3778 asp->n5 = asp_prev->n5 + 1;*/
3779 asp_prev = asp;
3780 }
3781 }
3782
3783 /* create asparray and heapsort it */
3784 asp = asp_head;
3785 i = 0;
3786 while (asp != NULL)
3787 {
3788 i++;
3789 asp = asp->next;
3790 }
3791 asparray = (AM_Small2Ptr PNTR)MemNew(i*sizeof(AM_Small2Ptr));
3792 asp = asp_head;
3793 i = 0;
3794 while (asp != NULL)
3795 {
3796 asparray[i] = asp;
3797 i++;
3798 asp = asp->next;
3799 }
3800 if (strand1 != Seq_strand_minus)
3801 HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAsps);
3802 else
3803 HeapSort(asparray, i, sizeof(asparray), AlnMgr2CompareAspsMinus);
3804 /* now need to remove redundant (identical) points */
3805 /* but still need to count those points toward the states */
3806 asp = asparray[0];
3807 asp->next = NULL;
3808 for (j=0; j<i-1; j++)
3809 {
3810 if (asparray[j+1]->n1 != asp->n1 || asparray[j+1]->n3 != asp->n3 || asp->n3 == AM_GAP)
3811 {
3812 asp->next = asparray[j+1];
3813 asp->next->next = NULL;
3814 asp = asp->next;
3815 } else
3816 {
3817 k = j;
3818 while (asparray[k] == NULL && k >= 0)
3819 {
3820 k--;
3821 }
3822 if (k>=0 && asparray[k]->n3 != AM_GAP)
3823 asparray[k]->n2++;
3824 MemFree(asparray[j+1]);
3825 asparray[j+1] = NULL;
3826 }
3827 }
3828 asp_head = asparray[0];
3829 MemFree(asparray);
3830 j=0;
3831 asp = asp_head;
3832 asp_prev = NULL;
3833 /* count up the segments; two consecutive stops make a segment */
3834 state = 0;
3835 if (strand1 != Seq_strand_minus)
3836 {
3837 while (asp != NULL)
3838 {
3839 if (asp->n3 == AM_START)
3840 {
3841 state += asp->n2;
3842 j++;
3843 } else if (asp->n3 == AM_STOP)
3844 {
3845 state -= asp->n2;
3846 asp_tmp = asp->next;
3847 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3848 {
3849 asp_tmp = asp_tmp->next;
3850 }
3851 if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3852 j++;
3853 else if (state != 0 && asp->next != NULL && asp_tmp != NULL && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3854 {
3855 asp_tmp2 = asp_tmp;
3856 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3857 {
3858 asp_tmp2 = asp_tmp2->next;
3859 }
3860 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3861 j++;
3862 }
3863 } else if (asp->n3 == AM_GAP)
3864 j++;
3865 else if (asp->n3 == AM_HARDSTOP)
3866 {
3867 state -= asp->n2;
3868 asp_tmp = asp->next;
3869 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3870 {
3871 asp_tmp = asp_tmp->next;
3872 }
3873 if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1)
3874 j++;
3875 else if (state != 0 && asp->next != NULL && asp_tmp != NULL)
3876 {
3877 asp_tmp2 = asp_tmp;
3878 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3879 {
3880 asp_tmp2 = asp_tmp2->next;
3881 }
3882 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
3883 j++;
3884 else if (asp_tmp2 == NULL)
3885 j++;
3886 }
3887 }
3888 asp = asp->next;
3889 }
3890 } else
3891 {
3892 currstop = -1;
3893 while (asp != NULL)
3894 {
3895 if (asp->n3 == AM_STOP || asp->n3 == AM_HARDSTOP)
3896 {
3897 if (currstop != asp->n1 && state > 0)
3898 j++;
3899 currstop = asp->n1;
3900 state += asp->n2;
3901 } else if (asp->n3 == AM_START)
3902 {
3903 state -= asp->n2;
3904 j++;
3905 currstop = asp->n1 - 1;
3906 } else if (asp->n3 == AM_GAP)
3907 j++;
3908 asp = asp->next;
3909 }
3910 }
3911
3912 /* dsp_new */
3913 dsp_new = DenseSegNew();
3914 dsp_new->dim = numrows;
3915 dsp_new->numseg = j;
3916 dsp_new->ids = SeqIdDupList(dsp_shared->ids);
3917 dsp_new->starts = (Int4Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Int4));
3918 dsp_new->strands = (Uint1Ptr)MemNew((dsp_new->numseg)*(dsp_new->dim)*sizeof(Uint1));
3919 dsp_new->lens = (Int4Ptr)MemNew((dsp_new->numseg)*sizeof(Int4));
3920
3921 /* get all the ids except for the duplicated one */
3922 sip_head = NULL;
3923 sip_tmp = NULL;
3924 sip = dsp->ids;
3925 i=0;
3926 /* get all the ids except for the duplicated one */
3927 while (sip != NULL)
3928 {
3929 if (i+1 != n2)
3930 {
3931 if (sip_tmp != NULL)
3932 {
3933 sip_tmp->next = SeqIdDup(sip);
3934 sip_tmp = sip;
3935 } else
3936 sip_head = sip_tmp = SeqIdDup(sip);
3937 }
3938 i++;
3939 sip = sip->next;
3940 }
3941 sip = dsp_new->ids;
3942 while (sip->next != NULL)
3943 {
3944 sip = sip->next;
3945 }
3946 sip->next = sip_head;
3947
3948 /* construct starts and lens from asps */
3949 asp = asp_head;
3950 i=0;
3951 state = 0;
3952 currstop = -1;
3953 if (strand1 != Seq_strand_minus)
3954 {
3955 while (asp != NULL)
3956 {
3957 if (asp->n3 == AM_START)
3958 {
3959 state += asp->n2;
3960 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
3961 dsp_new->lens[i] = asp->n4;
3962 i++;
3963 } else if (asp->n3 == AM_STOP)
3964 {
3965 state -= asp->n2;
3966 asp_tmp = asp->next;
3967 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
3968 {
3969 asp_tmp = asp_tmp->next;
3970 }
3971 if (state != 0 && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3972 {
3973 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
3974 dsp_new->lens[i] = asp->n4;
3975 i++;
3976 } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3977 {
3978 asp_tmp2 = asp_tmp;
3979 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
3980 {
3981 asp_tmp2 = asp_tmp2->next;
3982 }
3983 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1) && (asp_tmp->n3 != AM_HARDSTOP || asp_tmp->n1 != asp->n1))
3984 {
3985 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
3986 dsp_new->lens[i] = asp->n4;
3987 i++;
3988 }
3989 }
3990 } else if (asp->n3 == AM_GAP)
3991 {
3992 dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
3993 if (asp->n2 > dsp_shared->numseg)
3994 dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
3995 else
3996 dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
3997 i++;
3998 } else if (asp->n3 == AM_HARDSTOP)
3999 {
4000 state -= asp->n2;
4001 asp_tmp = asp->next;
4002 while (asp_tmp != NULL && asp_tmp->n3 == AM_GAP)
4003 {
4004 asp_tmp = asp_tmp->next;
4005 }
4006 if (state != 0 && asp->next != NULL && asp_tmp != NULL && asp_tmp->n1 != asp->n1+1 && i < dsp_new->numseg)
4007 {
4008 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4009 if (asp->n1 > -asp->n4)
4010 dsp_new->lens[i] = asp->n4;
4011 i++;
4012 } else if (state != 0 && asp->next != NULL && asp_tmp != NULL && i < dsp_new->numseg)
4013 {
4014 asp_tmp2 = asp_tmp;
4015 while (asp_tmp2 != NULL && asp->n1+1 == asp_tmp2->n1 && asp_tmp2->n3 != AM_START)
4016 {
4017 asp_tmp2 = asp_tmp2->next;
4018 }
4019 if (asp_tmp2 != NULL && ((asp_tmp2->n1 == asp->n1+1 && asp_tmp2->n3 != AM_START) || asp_tmp2->n1 != asp->n1+1))
4020 {
4021 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4022 if (asp->n1 > -asp->n4)
4023 dsp_new->lens[i] = asp->n4;
4024 i++;
4025 } else if (asp_tmp2 == NULL)
4026 {
4027 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1 + 1;
4028 if (asp->n1 > -asp->n4)
4029 dsp_new->lens[i] = asp->n4;
4030 i++;
4031 }
4032 }
4033 }
4034 asp = asp->next;
4035 }
4036 for (i=0; i<dsp_new->numseg; i++)
4037 {
4038 found = FALSE;
4039 for (j=i+1; j<dsp_new->numseg && !found; j++)
4040 {
4041 if (dsp_new->starts[dsp_new->dim*j+n1-1] > -1)
4042 {
4043 if (dsp_new->lens[i] == 0)
4044 dsp_new->lens[i] = dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1];
4045 else if (dsp_new->lens[i] > 0)
4046 dsp_new->lens[i] = MIN(dsp_new->lens[i], dsp_new->starts[dsp_new->dim*j+n1-1] - dsp_new->starts[dsp_new->dim*i+n1-1]);
4047 else if (dsp_new->lens[i] < 0)
4048 dsp_new->lens[i] = -dsp_new->lens[i]-dsp_new->starts[dsp_new->dim*i+n1-1]+1;
4049 found = TRUE;
4050 }
4051 }
4052 if (!found) /* last segment */
4053 {
4054 if (dsp_new->starts[dsp_new->dim*i+n1-1] >= 0)
4055 {
4056 AlnMgr2GetNthSeqRangeInSA(amaip->sharedaln, n1, NULL, &stop1);
4057 AlnMgr2GetNthSeqRangeInSA(sap, n2, NULL, &stop2);
4058 dsp_new->lens[i] = (MAX(stop1, stop2) + 1) - dsp_new->starts[dsp_new->dim*i+n1-1];
4059 }
4060 }
4061 }
4062 } else
4063 {
4064 while (asp != NULL)
4065 {
4066 if (asp->n3 == AM_STOP)
4067 {
4068 if (currstop != asp->n1 && state > 0)
4069 {
4070 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4071 dsp_new->lens[i] = currstop - asp->n1;
4072 i++;
4073 }
4074 currstop = asp->n1;
4075 state += asp->n2;
4076 } else if (asp->n3 == AM_START)
4077 {
4078 state -= asp->n2;
4079 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1;
4080 dsp_new->lens[i] = currstop - asp->n1 + 1;
4081 i++;
4082 currstop = asp->n1 - 1;
4083 } else if (asp->n3 == AM_GAP)
4084 {
4085 dsp_new->starts[dsp_new->dim*i+n1-1] = -asp->n2;
4086 if (asp->n2 > dsp_shared->numseg)
4087 dsp_new->lens[i] = dsp->lens[(asp->n2-1)-(dsp_shared->numseg)];
4088 else
4089 dsp_new->lens[i] = dsp_shared->lens[asp->n2-1];
4090 i++;
4091 } else if (asp->n3 == AM_HARDSTOP)
4092 {
4093 if (currstop != asp->n1 && state > 0 && asp->next != NULL)
4094 {
4095 dsp_new->starts[dsp_new->dim*i+n1-1] = asp->n1+1;
4096 dsp_new->lens[i] = currstop - asp->n1;
4097 i++;
4098 }
4099 currstop = asp->n1;
4100 state += asp->n2;
4101 }
4102 asp = asp->next;
4103 }
4104 }
4105 /* now add in the other rows, starting with rows from the sharedaln */
4106 for (i=0; i<dsp_shared->dim; i++)
4107 {
4108 if (i+1 != n1)
4109 {
4110 for (j=0; j<dsp_new->numseg; j++)
4111 {
4112 if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4113 dsp_new->starts[dsp_new->dim*j+i] = AlnMgr2MapSegStartToSegStart(amaip->sharedaln, dsp_new->starts[dsp_new->dim*j+n1-1], n2, i+1, dsp_new->lens[j]);
4114 else
4115 {
4116 if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4117 /* this gap came from the new sap */
4118 dsp_new->starts[dsp_new->dim*j+i] = -1;
4119 else /* this gap came from the sharedaln */
4120 dsp_new->starts[dsp_new->dim*j+i] = dsp_shared->starts[dsp_shared->dim*(-dsp_new->starts[dsp_new->dim*j+n1-1]-1)+i];
4121 }
4122 dsp_new->strands[dsp_new->dim*j+i] = AlnMgr2GetNthStrand(amaip->sharedaln, i+1);
4123 }
4124 }
4125 }
4126 for (i=0; i<dsp->dim; i++)
4127 {
4128 if (i+1 != n2)
4129 {
4130 if (i+1 > n2)
4131 offset = 1;
4132 else
4133 offset = 0;
4134 for (j=0; j<dsp_new->numseg; j++)
4135 {
4136 if (dsp_new->starts[dsp_new->dim*j+n1-1] >= 0)
4137 dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2MapSegStartToSegStart(sap, dsp_new->starts[dsp_new->dim*j+n1-1], n1, i+1, dsp_new->lens[j]);
4138 else
4139 {
4140 if (-(dsp_new->starts[dsp_new->dim*j+n1-1]) > dsp_shared->numseg)
4141 /* this gap is from the new sap */
4142 dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = dsp->starts[dsp->dim*((-dsp_new->starts[dsp_new->dim*j+n1-1])-dsp_shared->numseg-1)+i];
4143 else /* this gap is from the shared alignment */
4144 dsp_new->starts[dsp_new->dim*j+i+dsp_shared->dim-offset] = -1;
4145 }
4146 dsp_new->strands[dsp_new->dim*j+i+dsp_shared->dim-offset] = AlnMgr2GetNthStrand(sap, i+1);
4147 }
4148 }
4149 }
4150 /* fill in strand info for shared row, and get rid of segment keys (neg numbers) */
4151 for (j=0; j<dsp_new->numseg; j++)
4152 {
4153 dsp_new->strands[dsp_new->dim*j+n1-1] = AlnMgr2GetNthStrand(amaip->sharedaln, n1);
4154 if (dsp_new->starts[dsp_new->dim*j+n1-1] < 0)
4155 dsp_new->starts[dsp_new->dim*j+n1-1] = -1;
4156 }
4157 if (dsp_new->dim > 10)
4158 dsp_new->dim = dsp_new->dim;
4159 AlnMgr2CondenseRows(dsp_new, dsp_new->dim);
4160 sap_new = SeqAlignNew();
4161 sap_new->segtype = SAS_DENSEG;
4162 sap_new->segs = (Pointer)(dsp_new);
4163 AlnMgr2IndexSingleChildSeqAlign(sap_new);
4164 SeqAlignFree(amaip->sharedaln);
4165 amaip->sharedaln = sap_new;
4166 amaip->numrows = dsp_new->dim;
4167 amaip->ids = (SeqIdPtr PNTR)MemNew(amaip->numrows*sizeof(SeqIdPtr));
4168 sip = dsp_new->ids;
4169 for (i=0; i<amaip->numrows; i++)
4170 {
4171 amaip->ids[i] = SeqIdDup(sip);
4172 sip = sip->next;
4173 }
4174 while (asp_head != NULL)
4175 {
4176 asp = asp_head->next;
4177 MemFree(asp_head);
4178 asp_head = asp;
4179 }
4180 }
4181 }
4182
4183 /* SECTION 2c */
4184 static Int4 AlnMgr2MapSegStartToSegStart(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2, Int4 len)
4185 {
4186 Int4 diff;
4187 DenseSegPtr dsp;
4188 Int4 pos2;
4189 Int4 seg;
4190 Uint1 strand1;
4191 Uint1 strand2;
4192
4193 if (sap == NULL)
4194 return -1;
4195 seg = AlnMgr2GetSegForStartPos(sap, pos, row1);
4196 if (seg < 0)
4197 return -1;
4198 dsp = (DenseSegPtr)(sap->segs);
4199 if (dsp->starts[dsp->dim*seg+row2-1] == -1)
4200 return -1;
4201 strand1 = dsp->strands[dsp->dim*seg+row1-1];
4202 strand2 = dsp->strands[dsp->dim*seg+row2-1];
4203 if (strand1 != strand2)
4204 pos = pos + len - 1;
4205 if (strand1 == Seq_strand_minus)
4206 diff = dsp->lens[seg] - (pos - dsp->starts[dsp->dim*seg+row1-1]) - 1;
4207 else
4208 diff = pos - dsp->starts[dsp->dim*seg+row1-1];
4209 if (diff > dsp->lens[seg]) /* unaligned here */
4210 return -1;
4211 if (strand2 == Seq_strand_minus)
4212 pos2 = dsp->starts[dsp->dim*seg+row2-1] + dsp->lens[seg] - diff -1;
4213 else
4214 pos2 = dsp->starts[dsp->dim*seg+row2-1]+ diff;
4215 return pos2;
4216 }
4217
4218 /* SECTION 2c */
4219 static Int4 AlnMgr2GetSegForStartPos(SeqAlignPtr sap, Int4 pos, Int4 row)
4220 {
4221 Uint2Ptr array;
4222 DenseSegPtr dsp;
4223 Int4 L;
4224 Int4 mid;
4225 Int4 offset;
4226 Int4 R;
4227 SAIndex2Ptr saip;
4228 SARowDat2Ptr srdp;
4229 Int4 start;
4230 Int4 stop;
4231 Uint1 strand;
4232
4233 if (sap == NULL || sap->saip == NULL || row < 1)
4234 return -1;
4235 AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
4236 if (pos < start || pos > stop)
4237 return -1;
4238 saip = (SAIndex2Ptr)(sap->saip);
4239 if (row > saip->numrows)
4240 return -1;
4241 srdp = saip->srdp[row-1];
4242 strand = AlnMgr2GetNthStrand(sap, row);
4243 dsp = (DenseSegPtr)(sap->segs);
4244 L = 0;
4245 R = srdp->numsect - 1;
4246 if (strand != Seq_strand_minus)
4247 {
4248 while (L < R)
4249 {
4250 mid = MIN((L + R)/2, srdp->numsect-2);
4251 if (dsp->starts[(srdp->sect[mid + 1])*(dsp->dim)+row-1] <= pos)
4252 L = mid+1;
4253 else
4254 R = mid;
4255 }
4256 } else
4257 {
4258 while (L < R)
4259 {
4260 mid = (L + R)/2;
4261 if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
4262 L = mid + 1;
4263 else
4264 R = mid;
4265 }
4266 }
4267 offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
4268 if (offset >= dsp->lens[srdp->sect[L]])
4269 return -2; /* this is an insert */
4270 if (saip->anchor > 0)
4271 {
4272 array = saip->srdp[saip->anchor-1]->sect;
4273 R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
4274 L = R;
4275 }
4276 return srdp->sect[L];
4277 }
4278
4279 static Int4 GetNextStart (DenseSegPtr dsp, Int4 row, Int4 col, Int4Ptr pnext_start_col)
4280 {
4281 Int4 next_start_col;
4282
4283 if (dsp == NULL || row < 0 || row >= dsp->dim || col < 0 || col >= dsp->numseg)
4284 {
4285 return -1;
4286 }
4287
4288 for (next_start_col = col + 1;
4289 next_start_col < dsp->numseg
4290 && dsp->starts[(next_start_col * dsp->dim) + row] == -1;
4291 next_start_col++)
4292 {
4293 }
4294 if (next_start_col < dsp->numseg)
4295 {
4296 if (pnext_start_col != NULL)
4297 {
4298 *pnext_start_col = next_start_col;
4299 }
4300 return dsp->starts[(next_start_col * dsp->dim) + row];
4301 }
4302 else
4303 {
4304 return -1;
4305 }
4306 }
4307
4308 static void AlnMgr2CondenseColumns(DenseSegPtr dsp)
4309 /***************************************************************************
4310 *
4311 * AlnMgr2CondenseColumns finds adjacent columns which appear to align but
4312 * were not put in one column by the mixing mechanism because the input was
4313 * a set of pairwise alignment with a gap on the common sequence in this
4314 * segment. Or graphically:
4315 *
4316 * ----- ----- ----- ----- -----
4317 * AACCG ----- ----- ----- becomes AACCG
4318 * ----- AACCG ----- ----- AACCG
4319 * ----- ----- AACCG ----- AACCG
4320 * ----- ----- ----- AACCG AACCG
4321 *
4322 ***************************************************************************/
4323 {
4324 int gap_start_seg = -1;
4325 int gap_end_seg = -1;
4326 int row, seg, base_col, col, next_start, next_start_col;
4327 Boolean can_fit;
4328
4329 for (seg = 0; seg < dsp->numseg; ++seg) {
4330 if (dsp->starts[dsp->dim * seg] == -1) {
4331 if (gap_start_seg == -1) {
4332 gap_start_seg = seg;
4333 }
4334 else {
4335 if (seg == dsp->numseg - 1) {
4336 gap_end_seg = seg + 1;
4337 }
4338 }
4339 }
4340 else {
4341 if (gap_start_seg != -1) {
4342 gap_end_seg = seg;
4343 }
4344 }
4345
4346 if (gap_end_seg != -1) {
4347 for (base_col = gap_start_seg; base_col<gap_end_seg; ++base_col) {
4348 int len = dsp->lens[base_col];
4349 for (col = base_col + 1; col<gap_end_seg; ++col) {
4350 if (dsp->lens[col] != len) {
4351 continue;
4352 }
4353
4354 can_fit = TRUE;
4355 for (row = 0; row < dsp->dim; ++row) {
4356 if (dsp->starts[dsp->dim * col + row] != -1 &&
4357 dsp->starts[dsp->dim * base_col + row] != -1) {
4358 can_fit = FALSE;
4359 break;
4360 }
4361 else if (dsp->starts[dsp->dim * col + row] != -1)
4362 {
4363 /* make sure we aren't going to disturb the order of
4364 * the starts */
4365 next_start = GetNextStart (dsp, row, base_col, &next_start_col);
4366 if (next_start > -1
4367 && next_start < dsp->starts[dsp->dim * col + row]
4368 && next_start_col < col)
4369 {
4370 can_fit = FALSE;
4371 }
4372 }
4373 }
4374
4375 if (can_fit) {
4376 for (row = 0; row<dsp->dim; ++row) {
4377 if (dsp->starts[dsp->dim * col + row] != -1) {
4378 dsp->starts[dsp->dim * base_col + row] =
4379 dsp->starts[dsp->dim * col + row];
4380 }
4381 }
4382
4383 /* remove column col */
4384 {{
4385 Int4Ptr starts, lens;
4386 Uint1Ptr strands;
4387 Uint4 pos, new_pos;
4388
4389 starts = (Int4Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Int4));
4390 strands = (Uint1Ptr)MemNew(dsp->dim*(dsp->numseg-1)*sizeof(Uint1));
4391 lens = (Int4Ptr)MemNew((dsp->numseg-1)*sizeof(Int4));
4392
4393 for (pos=0; pos<dsp->dim*col; pos++) {
4394 starts[pos] = dsp->starts[pos];
4395 strands[pos] = dsp->strands[pos];
4396 }
4397 for (new_pos=pos, pos+=dsp->dim; pos<dsp->dim*dsp->numseg;
4398 pos++, new_pos++) {
4399 starts[new_pos] = dsp->starts[pos];
4400 strands[new_pos] = dsp->strands[pos];
4401 }
4402
4403 for (pos=0; pos<col; pos++) {
4404 lens[pos] = dsp->lens[pos];
4405 }
4406 for (new_pos=pos, pos++; pos<dsp->numseg; pos++, new_pos++) {
4407 lens[new_pos] = dsp->lens[pos];
4408 }
4409
4410 MemFree(dsp->starts);
4411 MemFree(dsp->strands);
4412 dsp->starts = starts;
4413 dsp->strands = strands;
4414 dsp->lens = lens;
4415
4416 dsp->numseg--;
4417
4418 }}
4419
4420 --gap_end_seg;
4421 --seg;
4422 --col;
4423 }
4424 }
4425 }
4426
4427 gap_start_seg = -1;
4428 gap_end_seg = -1;
4429 }
4430 }
4431 }
4432
4433 /* SECTION 2c */
4434 /***************************************************************************
4435 *
4436 * AlnMgr2CondenseRows finds rows of a dense-seg structure that are related
4437 * and that could be condensed into a single row (or fewer rows). It then
4438 * calls AlnMgr2DoCondense to condense those rows into continuous or
4439 * discontinuous rows. whichrow designates which row to merge, if
4440 * less than 1, the function tries to merge the last row.
4441 *
4442 ***************************************************************************/
4443 static void AlnMgr2CondenseRows(DenseSegPtr dsp, Int4 whichrow)
4444 {
4445 Boolean done;
4446 Int4 i;
4447 Int4 j;
4448 Int4 k;
4449 Int4 numrows;
4450 AMCdRowPtr row;
4451 AMCdRowPtr PNTR rowarray;
4452 SeqIdPtr sip;
4453 SeqIdPtr targetsip;
4454
4455 sip = dsp->ids;
4456 rowarray = (AMCdRowPtr PNTR)MemNew((dsp->dim)*sizeof(AMCdRowPtr));
4457 if (whichrow < 1 || whichrow > dsp->dim)
4458 whichrow = dsp->dim;
4459 for (i=0; i<dsp->dim; i++)
4460 {
4461 row = (AMCdRowPtr)MemNew(sizeof(AMCdRow));
4462 row->sip = SeqIdDup(sip);
4463 sip = sip->next;
4464 row->strand = dsp->strands[i];
4465 row->rownum = i+1;
4466 rowarray[i] = row;
4467 if (i+1 == whichrow)
4468 targetsip = row->sip;
4469 }
4470 HeapSort(rowarray, i, sizeof(rowarray), AlnMgr2CompareCdRows);
4471 numrows = dsp->dim;
4472 j = -1; /* j marks the first occurrence of each sip */
4473 for (i=0; j==-1 && i<numrows; i++)
4474 {
4475 if (SeqIdComp(rowarray[i]->sip, targetsip) == SIC_YES)
4476 {
4477 j = i;
4478 if (rowarray[i]->rownum == whichrow) /* no other rows w/sip */
4479 {
4480 for (i=0; i<numrows; i++)
4481 {
4482 SeqIdFree(rowarray[i]->sip);
4483 MemFree(rowarray[i]);
4484 }
4485 MemFree(rowarray);
4486 return;
4487 }
4488 }
4489 }
4490 sip = SeqIdDup(rowarray[j]->sip);
4491 done = FALSE;
4492 for (i=j; !done && rowarray[i]->rownum < whichrow; i++)
4493 {
4494 if (SeqIdComp(rowarray[i]->sip, sip) == SIC_YES)
4495 {
4496 if (rowarray[i]->strand == rowarray[j]->strand)
4497 {
4498 if (AlnMgr2DoCondense(dsp, rowarray[i]->rownum, whichrow))
4499 {
4500 for (k=0; k<numrows; k++)
4501 {
4502 if (rowarray[k]->rownum > rowarray[i]->rownum)
4503 {
4504 rowarray[k]->rownum--;
4505 whichrow--;
4506 }
4507 }
4508 }
4509 }
4510 } else
4511 {
4512 done = TRUE;
4513 SeqIdFree(sip);
4514 sip = SeqIdDup(rowarray[i]->sip);
4515 j = i;
4516 }
4517 }
4518 SeqIdFree(sip);
4519 for (i=0; i<numrows; i++)
4520 {
4521 SeqIdFree(rowarray[i]->sip);
4522 MemFree(rowarray[i]);
4523 }
4524 MemFree(rowarray);
4525 }
4526
4527 /* SECTION 2c */
4528 /***************************************************************************
4529 *
4530 * AlnMgr2DoCondense arithmetically condenses two related rows of a dense-seg
4531 * structure into a single continuous row, a single discontinuous row, or
4532 * two rows with different information than before.
4533 *
4534 ***************************************************************************/
4535 static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2)
4536 {
4537 Int4 aln;
4538 SeqAlignPtr fake_sap;
4539 Boolean fits;
4540 Boolean found;
4541 Int4 i;
4542 SeqIdPtr id;
4543 SeqIdPtr id_head;
4544 SeqIdPtr id_prev;
4545 Int4 j;
4546 Int4 k;
4547 Int4 max1;
4548 Int4 max2;
4549 Boolean merged;
4550 Int4 min1;
4551 Int4 min2;
4552 SAIndex2Ptr saip;
4553 Boolean someseq1;
4554 Boolean someseq2;
4555 Int4Ptr starts;
4556 Uint1 strand1;
4557 Uint1 strand2;
4558 Uint1Ptr strands;
4559 AM_Small2Ptr window;
4560 AM_Small2Ptr window_head;
4561 AM_Small2Ptr window_prev;
4562
4563 /* always merge up to rownum1 (better rows are first) */
4564 if (rownum1 > rownum2)
4565 {
4566 i = rownum2;
4567 rownum2 = rownum1;
4568 rownum1 = i;
4569 }
4570 strand1 = dsp->strands[rownum1-1];
4571 strand2 = dsp->strands[rownum2-1];
4572 if (strand1 != strand2)
4573 return FALSE;
4574 i = 0;
4575 window_head = window_prev = NULL;
4576 while (i < dsp->numseg)
4577 {
4578 j = i;
4579 someseq1 = someseq2 = FALSE;
4580 if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
4581 {
4582 someseq1 = TRUE;
4583 while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum2-1] < 0)
4584 {
4585 j++;
4586 }
4587 } else if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
4588 {
4589 someseq2 = TRUE;
4590 while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum1-1] < 0)
4591 {
4592 j++;
4593 }
4594 }
4595 fits = FALSE;
4596 if (j > i)
4597 {
4598 if (strand1 == Seq_strand_minus)
4599 {
4600 if (someseq1 == FALSE)
4601 {
4602 min1 = -1;
4603 for (k=j; min1 == -1 && k<dsp->numseg; k++)
4604 {
4605 if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4606 min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4607 }
4608 max1 = -1;
4609 for (k=(i-1); max1 == -1 && k>=0; k--)
4610 {
4611 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4612 }
4613 } else
4614 {
4615 min1 = -1;
4616 for (k=j-1; min1 == -1 && k>=i; k--)
4617 {
4618 min1 = dsp->starts[dsp->dim*(k)+rownum1-1];
4619 }
4620 max1 = -1;
4621 for (k=i; min1 == -1 && k<j; k++)
4622 {
4623 if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4624 max1 = dsp->starts[dsp->dim*k+rownum1-1] + dsp->lens[k] -1;
4625 }
4626 }
4627 } else
4628 {
4629 if (someseq1 == FALSE)
4630 {
4631 min1 = -1;
4632 for (k=i-1; min1 == -1 && k >= 0; k--)
4633 {
4634 if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4635 min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4636 }
4637 max1 = -1;
4638 for (k=j; max1 == -1 && k<dsp->numseg; k++)
4639 {
4640 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4641 }
4642 } else
4643 {
4644 min1 = -1;
4645 for (k=i; min1 == -1 && k<j; k++)
4646 {
4647 min1 = dsp->starts[dsp->dim*k+rownum1-1];
4648 }
4649 max1 = -1;
4650 for (k=j-1; max1 == -1 && k>i; k--)
4651 {
4652 if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4653 max1 = dsp->starts[dsp->dim*(k)+rownum1-1] + dsp->lens[k] - 1;
4654 }
4655 }
4656 }
4657 if (strand2 == Seq_strand_minus)
4658 {
4659 if (someseq2 == FALSE)
4660 {
4661 min2 = -1;
4662 for (k=j; min2 == -1 && k<dsp->numseg; k++)
4663 {
4664 if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4665 min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4666 }
4667 max2 = -1;
4668 for (k=(i-1); max2 == -1 && k>=0; k--)
4669 {
4670 max2 = dsp->starts[dsp->dim*k+rownum2-1];
4671 }
4672 } else
4673 {
4674 min2 = -1;
4675 for (k=j-1; min2 == -1 && k>=i; k--)
4676 {
4677 min2 = dsp->starts[dsp->dim*(k)+rownum2-1];
4678 }
4679 max2 = -1;
4680 for (k=i; max2 == -1 && k<j; k++)
4681 {
4682 if (dsp->starts[dsp->dim*k+rownum2-1] >= 0)
4683 max2 = dsp->starts[dsp->dim*k+rownum2-1] + dsp->lens[k]-1;
4684 }
4685 }
4686 } else
4687 {
4688 if (someseq2 == FALSE)
4689 {
4690 min2 = -1;
4691 for (k=i-1; min2 == -1 && k >= 0; k--)
4692 {
4693 if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4694 min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4695 }
4696 max2 = -1;
4697 for (k=j; max2 == -1 && k<dsp->numseg; k++)
4698 {
4699 max2 = dsp->starts[dsp->dim*k+rownum2-1];
4700 }
4701 } else
4702 {
4703 min2 = -1;
4704 for (k=i; min2 == -1 && k<j; k++)
4705 {
4706 min2 = dsp->starts[dsp->dim*k+rownum2-1];
4707 }
4708 max2 = -1;
4709 for (k=j-1; max2 == -1 && k>=i; k--)
4710 {
4711 if (dsp->starts[dsp->dim*(k)+rownum2-1] >= 0)
4712 max2 = dsp->starts[dsp->dim*(k)+rownum2-1] + dsp->lens[k] - 1;
4713 }
4714 }
4715 }
4716 if (someseq1 == FALSE)
4717 {
4718 if ((min1 < min2 || min2 == -1) && (max1 > max2 || max1 == -1))
4719 fits = TRUE;
4720 } else
4721 {
4722 if ((min2 < min1 || min1 == -1) && (max2 > max1 || max2 == -1))
4723 fits = TRUE;
4724 }
4725 window = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
4726 window->n1 = i;
4727 window->n2 = j-1;
4728 if (!fits)
4729 window->n4 = -1;
4730 if (window_head != NULL)
4731 {
4732 window_prev->next = window;
4733 window_prev = window;
4734 } else
4735 window_head = window_prev = window;
4736 }
4737 if (i == j)
4738 i++;
4739 else
4740 i = j;
4741 }
4742 if (window_head == NULL)
4743 return FALSE;
4744 fake_sap = SeqAlignNew();
4745 fake_sap->segtype = SAS_DENSEG;
4746 fake_sap->segs = (Pointer)dsp;
4747 AlnMgr2IndexSingleChildSeqAlign(fake_sap);
4748 aln = AlnMgr2GetNumAlnBlocks(fake_sap);
4749 if (aln == 1) /* only merge if there is a single fitted window flanked by gaps */
4750 /*or if there are several contiguous fitted windows flanked by gaps */
4751 {
4752 if (window_head->next != NULL && window_head->n4 == 0)
4753 {
4754 window = window_head->next;
4755 while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4756 {
4757 window_head->n2++;
4758 }
4759 while (window != NULL && window->n4 == 0 && window->n1 == window_head->n2+1)
4760 {
4761 window_head->n2 = window->n2;
4762 window = window->next;
4763 while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4764 {
4765 window_head->n2++;
4766 }
4767 }
4768 if (window != NULL)
4769 {
4770 while (window_head != NULL)
4771 {
4772 window = window_head->next;
4773 MemFree(window_head);
4774 window_head = window;
4775 }
4776 fake_sap->segs = NULL;
4777 SeqAlignFree(fake_sap);
4778 return FALSE;
4779 }
4780 }
4781 if (window_head->n4 == -1)
4782 {
4783 while (window_head != NULL)
4784 {
4785 window = window_head->next;
4786 MemFree(window_head);
4787 window_head = window;
4788 }
4789 fake_sap->segs = NULL;
4790 SeqAlignFree(fake_sap);
4791 return FALSE;
4792 }
4793 found = FALSE;
4794 for (i=0; !found && i<window_head->n1; i++)
4795 {
4796 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4797 found = TRUE;
4798 }
4799 for (i=window_head->n2+1; !found && i<dsp->numseg; i++)
4800 {
4801 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4802 found = TRUE;
4803 }
4804 if (found)
4805 {
4806 while (window_head != NULL)
4807 {
4808 window = window_head->next;
4809 MemFree(window_head);
4810 window_head = window;
4811 }
4812 fake_sap->segs = NULL;
4813 SeqAlignFree(fake_sap);
4814 return FALSE;
4815 }
4816 /* merge whole row up to rownum1 */
4817 for (i=0; i<dsp->numseg; i++)
4818 {
4819 dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
4820 }
4821 starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
4822 strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
4823 k = 0;
4824 for (i=0; i<dsp->dim; i++)
4825 {
4826 if (i != rownum2-1)
4827 {
4828 for (j=0; j<dsp->numseg; j++)
4829 {
4830 starts[(dsp->dim-1)*j+k] = dsp->starts[dsp->dim*j+i];
4831 strands[(dsp->dim-1)*j+k] = dsp->strands[dsp->dim*j+i];
4832 }
4833 k++;
4834 }
4835 }
4836 MemFree(dsp->starts);
4837 MemFree(dsp->strands);
4838 dsp->starts = starts;
4839 dsp->strands = strands;
4840 dsp->dim--;
4841 id_head = id_prev = NULL;
4842 id = dsp->ids;
4843 j = 0;
4844 while (id != NULL)
4845 {
4846 if (j+1 != rownum2)
4847 {
4848 if (id_head != NULL)
4849 {
4850 id_prev->next = SeqIdDup(id);
4851 id_prev = id_prev->next;
4852 } else
4853 id_head = id_prev = SeqIdDup(id);
4854 }
4855 j++;
4856 id = id->next;
4857 }
4858 SeqIdSetFree(dsp->ids);
4859 dsp->ids = id_head;
4860 while (window_head != NULL)
4861 {
4862 window = window_head->next;
4863 MemFree(window_head);
4864 window_head = window;
4865 }
4866 fake_sap->segs = NULL;
4867 SeqAlignFree(fake_sap);
4868 return TRUE;
4869 }
4870 /* now go through and find the largest piece of every window that can be merged */
4871 /* (can't split up an aligned region with the merge, though) */
4872 window = window_head;
4873 saip = (SAIndex2Ptr)(fake_sap->saip);
4874 while (window != NULL)
4875 {
4876 j = k = -1;
4877 found = FALSE;
4878 for (i=0; !found && i<window->n1; i++)
4879 {
4880 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4881 found = TRUE;
4882 }
4883 if (!found)
4884 j = window->n1;
4885 found = FALSE;
4886 for (i=window->n2+1; !found && i<dsp->numseg; i++)
4887 {
4888 if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4889 found = TRUE;
4890 }
4891 if (!found)
4892 k = window->n2;
4893 if (j == -1)
4894 {
4895 found = FALSE;
4896 for (i = window->n1-1; !found && i<window->n2; i++)
4897 {
4898 j = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4899 if (j == i)
4900 found = TRUE;
4901 else
4902 j = -1;
4903 }
4904 }
4905 if (k == -1)
4906 {
4907 found = FALSE;
4908 for (i = window->n2; !found && i>=window->n1; i++)
4909 {
4910 k = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4911 if (k == i)
4912 found = TRUE;
4913 else
4914 k = -1;
4915 }
4916 }
4917 if (j > -1 && k > -1 && k > j)
4918 {
4919 window->n1 = j+1;
4920 window->n2 = k;
4921 } else
4922 window->n1 = -1;
4923 window = window->next;
4924 }
4925 window = window_head;
4926 while (window != NULL)
4927 {
4928 if (window->n4 == -1 && i >= 0) /* see if it fits now */
4929 {
4930 i = window->n1;
4931 j = window->n2+1;
4932 if (strand1 == Seq_strand_minus)
4933 {
4934 if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4935 {
4936 min1 = -1;
4937 for (k=j; min1 == -1 && k<dsp->numseg; k++)
4938 {
4939 min1 = dsp->starts[dsp->dim*k+rownum1-1];
4940 }
4941 max1 = -1;
4942 for (k=(i-1); max1 == -1 && k>=0; k--)
4943 {
4944 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4945 }
4946 } else
4947 {
4948 min1 = dsp->starts[dsp->dim*(j-1)+rownum1-1];
4949 max1 = dsp->starts[dsp->dim*i+rownum1-1] + dsp->lens[i];
4950 }
4951 } else
4952 {
4953 if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4954 {
4955 min1 = -1;
4956 for (k=i-1; min1 == -1 && k >= 0; k--)
4957 {
4958 min1 = dsp->starts[dsp->dim*k+rownum1-1];
4959 }
4960 max1 = -1;
4961 for (k=j; max1 == -1 && k<dsp->numseg; k++)
4962 {
4963 max1 = dsp->starts[dsp->dim*k+rownum1-1];
4964 }
4965 } else
4966 {
4967 min1 = dsp->starts[dsp->dim*i+rownum1-1];
4968 max1 = dsp->starts[dsp->dim*(j-1)+rownum1-1] + dsp->lens[j-1];
4969 }
4970 }
4971 if (strand2 == Seq_strand_minus)
4972 {
4973 if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
4974 {
4975 min2 = -1;
4976 for (k=j; min2 == -1 && k<dsp->numseg; k++)
4977 {
4978 min2 = dsp->starts[dsp->dim*k+rownum2-1];
4979 }
4980 max2 = -1;
4981 for (k=(i-1); max2 == -1 && k>=0; k--)
4982 {
4983 max2 = dsp->starts[dsp->dim*k+rownum2-1];
4984 }
4985 } else
4986 {
4987 min2 = dsp->starts[dsp->dim*(j-1)+rownum2-1];
4988 max2 = dsp->starts[dsp->dim*i+rownum2-1] + dsp->lens[i];
4989 }
4990 } else
4991 {
4992 if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
4993 {
4994 min2 = -1;
4995 for (k=i-1; min2 == -1 && k >= 0; k--)
4996 {
4997 min2 = dsp->starts[dsp->dim*k+rownum2-1];
4998 }
4999 max2 = -1;
5000 for (k=j; max2 == -1 && k<dsp->numseg; k++)
5001 {
5002 max2 = dsp->starts[dsp->dim*k+rownum2-1];
5003 }
5004 } else
5005 {
5006 min2 = dsp->starts[dsp->dim*i+rownum2-1];
5007 max2 = dsp->starts[dsp->dim*(j-1)+rownum2-1] + dsp->lens[j-1];
5008 }
5009 }
5010 if (dsp->starts[dsp->dim*j+rownum1-1] == -1)
5011 {
5012 if (min1 < min2 && (max1 > max2 || max1 == -1))
5013 window->n4 = 0;
5014 } else
5015 {
5016 if (min2 < min1 && (max2 > max1 || max2 == -1))
5017 window->n4 = 0;
5018 }
5019 }
5020 if (window->n1 >= 0 && window->n4 >= 0)
5021 {
5022 for (i=window->n1; i<=window->n2; i++)
5023 {
5024 dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim+i+rownum2-1]);
5025 }
5026 }
5027 window = window->next;
5028 }
5029 found = FALSE;
5030 /* check to see if rownum2 is all gaps now */
5031 for (i=0; !found && i<dsp->numseg; i++)
5032 {
5033 if (dsp->starts[dsp->dim*i+rownum2-1] != -1)
5034 found = TRUE;
5035 }
5036 merged = FALSE;
5037 if (!found) /* just gaps */
5038 {
5039 /* merge whole row up to rownum1 */
5040 for (i=0; i<dsp->numseg; i++)
5041 {
5042 dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
5043 }
5044 starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
5045 strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
5046 k = 0;
5047 for (i=0; i<dsp->dim; i++)
5048 {
5049 if (i != rownum2-1)
5050 {
5051 for (j=0; j<dsp->numseg; j++)
5052 {
5053 starts[dsp->dim*j+k] = dsp->starts[dsp->dim*j+i];
5054 strands[dsp->dim*j+k] = dsp->strands[dsp->dim*j+i];
5055 }
5056 k++;
5057 }
5058 }
5059 MemFree(dsp->starts);
5060 MemFree(dsp->strands);
5061 dsp->starts = starts;
5062 dsp->strands = strands;
5063 dsp->dim--;
5064 id_head = id_prev = NULL;
5065 id = dsp->ids;
5066 j = 0;
5067 while (id != NULL)
5068 {
5069 if (j+1 != rownum2)
5070 {
5071 if (id_head != NULL)
5072 {
5073 id_prev->next = SeqIdDup(id);
5074 id_prev = id_prev->next;
5075 } else
5076 id_head = id_prev = SeqIdDup(id);
5077 }
5078 j++;
5079 id = id->next;
5080 }
5081 SeqIdSetFree(dsp->ids);
5082 dsp->ids = id_head;
5083 merged = TRUE;
5084 }
5085 while (window_head != NULL)
5086 {
5087 window = window_head->next;
5088 MemFree(window_head);
5089 window_head = window;
5090 }
5091 fake_sap->segs = NULL;
5092 SeqAlignFree(fake_sap);
5093 return merged;
5094 }
5095
5096 /* SECTION 2c */
5097 /***************************************************************************
5098 *
5099 * AlnMgr2CompareCdRows is the HeapSort callback for AlnMgr2CondenseRows.
5100 * It puts the CDRows in order first by seqid and secondarily by row number.
5101 *
5102 ***************************************************************************/
5103 static int LIBCALLBACK AlnMgr2CompareCdRows(VoidPtr ptr1, VoidPtr ptr2)
5104 {
5105 Int4 i;
5106 AMCdRowPtr row1;
5107 AMCdRowPtr row2;
5108
5109 if (ptr1 == NULL || ptr2 == NULL)
5110 return 0;
5111 row1 = *((AMCdRowPtr PNTR)ptr1);
5112 row2 = *((AMCdRowPtr PNTR)ptr2);
5113 i = AlnMgr2OrderSeqIds(row1->sip, row2->sip);
5114 if (i == 0) /* sort from least rownum to greatest within each seqid */
5115 {
5116 if (row1->rownum < row2->rownum)
5117 return -1;
5118 else
5119 return 1;
5120 } else
5121 return i;
5122 }
5123
5124 /* SECTION 2c */
5125 /***************************************************************************
5126 *
5127 * AlnMgr2CompareAsps is a HeapSort callback for AlnMgr2AddInNewSA. It
5128 * compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5129 * it compares the types.
5130 *
5131 ***************************************************************************/
5132 static int LIBCALLBACK AlnMgr2CompareAsps(VoidPtr ptr1, VoidPtr ptr2)
5133 {
5134 AM_Small2Ptr asp1;
5135 AM_Small2Ptr asp2;
5136
5137 if (ptr1 != NULL && ptr2 != NULL)
5138 {
5139 asp1 = *((AM_Small2Ptr PNTR)ptr1);
5140 asp2 = *((AM_Small2Ptr PNTR)ptr2);
5141 if (asp1->n1 < asp2->n1)
5142 return -1;
5143 else if (asp1->n1 > asp2->n1)
5144 return 1;
5145 else if (asp1->n5 < asp2->n5)
5146 return -1;
5147 else if (asp1->n5 > asp2->n5)
5148 return 1;
5149 else
5150 {
5151 if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5152 {
5153 if (asp1->n2 < asp2->n2)
5154 return -1;
5155 if (asp1->n2 > asp2->n2)
5156 return 1;
5157 }
5158 if (asp1->n3 == AM_START)
5159 {
5160 if (asp2->n3 == AM_STOP)
5161 return -1;
5162 else if (asp2->n3 == AM_GAP)
5163 return -1;
5164 else if (asp2->n3 == AM_HARDSTOP)
5165 return -1;
5166 else
5167 return 0;
5168 } else if (asp1->n3 == AM_STOP)
5169 {
5170 if (asp2->n3 == AM_START)
5171 return 1;
5172 else if (asp2->n3 == AM_GAP)
5173 return 1;
5174 else if (asp2->n3 == AM_HARDSTOP)
5175 return -1;
5176 else
5177 return 0;
5178 } else if (asp1->n3 == AM_GAP)
5179 {
5180 if (asp2->n3 == AM_START)
5181 return 1;
5182 else if (asp2->n3 == AM_STOP)
5183 return -1;
5184 else if (asp2->n3 == AM_HARDSTOP)
5185 return -1;
5186 else
5187 return 0;
5188 } else if (asp1->n3 == AM_HARDSTOP)
5189 {
5190 if (asp2->n3 == AM_START)
5191 return 1;
5192 else if (asp2->n3 == AM_STOP)
5193 return 1;
5194 else if (asp2->n3 == AM_GAP)
5195 return 1;
5196 else
5197 return 0;
5198 }
5199 }
5200 }
5201 return 0;
5202 }
5203
5204 /* SECTION 2c */
5205 /***************************************************************************
5206 *
5207 * AlnMgr2CompareAspsMinus is a HeapSort callback for AlnMgr2AddInNewSA. It
5208 * compares the starts (n1) of the two AM_Small2Ptrs; if those are the same
5209 * it compares the types. The only difference from AlnMgr2CompareAsps is
5210 * that it sorts the structures in the opposite order.
5211 *
5212 ***************************************************************************/
5213 static int LIBCALLBACK AlnMgr2CompareAspsMinus(VoidPtr ptr1, VoidPtr ptr2)
5214 {
5215 AM_Small2Ptr asp1;
5216 AM_Small2Ptr asp2;
5217
5218 if (ptr1 != NULL && ptr2 != NULL)
5219 {
5220 asp1 = *((AM_Small2Ptr PNTR)ptr1);
5221 asp2 = *((AM_Small2Ptr PNTR)ptr2);
5222 if (asp1->n1 > asp2->n1)
5223 return -1;
5224 else if (asp1->n1 < asp2->n1)
5225 return 1;
5226 else if (asp1->n5 < asp2->n5)
5227 return -1;
5228 else if (asp1->n5 > asp2->n5)
5229 return 1;
5230 else
5231 {
5232 if (asp1->n3 == AM_GAP && asp2->n3 == AM_GAP)
5233 {
5234 if (asp1->n2 < asp2->n2)
5235 return -1;
5236 if (asp1->n2 > asp2->n2)
5237 return 1;
5238 }
5239 if (asp1->n3 == AM_START)
5240 {
5241 if (asp2->n3 == AM_STOP)
5242 return 1;
5243 else if (asp2->n3 == AM_GAP)
5244 return -1;
5245 else if (asp2->n3 == AM_HARDSTOP)
5246 return 1;
5247 else
5248 return 0;
5249 } else if (asp1->n3 == AM_STOP)
5250 {
5251 if (asp2->n3 == AM_START)
5252 return -1;
5253 else if (asp2->n3 == AM_GAP)
5254 return -1;
5255 else if (asp2->n3 == AM_HARDSTOP)
5256 return 1;
5257 else
5258 return 0;
5259 } else if (asp1->n3 == AM_GAP)
5260 {
5261 if (asp2->n3 == AM_START)
5262 return 1;
5263 else if (asp2->n3 == AM_STOP)
5264 return 1;
5265 else if (asp2->n3 == AM_HARDSTOP)
5266 return 1;
5267 else
5268 return 0;
5269 } else if (asp1->n3 == AM_HARDSTOP)
5270 {
5271 if (asp2->n3 == AM_START)
5272 return -1;
5273 else if (asp2->n3 == AM_STOP)
5274 return -1;
5275 else if (asp2->n3 == AM_GAP)
5276 return -1;
5277 else
5278 return 0;
5279 }
5280 }
5281 }
5282 return 0;
5283 }
5284
5285
5286 /* SECTION 2c */
5287 /***************************************************************************
5288 *
5289 * AlnMgr2GetFirstSharedRow takes two indexed or unindexed dense-seg
5290 * seqaligns and returns the row numbers of the first sequence that is
5291 * shared between the two alignments. If the alignments do not share any
5292 * sequences, both n1 and n2 are set to 0.
5293 *
5294 ***************************************************************************/
5295 static void AlnMgr2GetFirstSharedRow(SeqAlignPtr sap1, SeqAlignPtr sap2, Int4Ptr n1, Int4Ptr n2)
5296 {
5297 DenseSegPtr dsp1;
5298 DenseSegPtr dsp2;
5299 Int4 i;
5300 Int4 j;
5301 SeqIdPtr sip1;
5302 SeqIdPtr sip2;
5303
5304 dsp1 = (DenseSegPtr)(sap1->segs);
5305 dsp2 = (DenseSegPtr)(sap2->segs);
5306 sip1 = dsp1->ids;
5307 i = 1;
5308 while (sip1 != NULL)
5309 {
5310 j = 1;
5311 sip2 = dsp2->ids;
5312 while (sip2 != NULL)
5313 {
5314 if (SeqIdComp(sip1, sip2) == SIC_YES)
5315 {
5316 *n1 = i;
5317 *n2 = j;
5318 return;
5319 }
5320 sip2 = sip2->next;
5321 j++;
5322 }
5323 sip1 = sip1->next;
5324 i++;
5325 }
5326 /* nothing found */
5327 *n1 = 0;
5328 *n2 = 0;
5329 }
5330
5331 /* SECTION 2d */
5332 static SeqIdPtr AlnMgr2SeqIdListsOverlap(SeqIdPtr sip1, SeqIdPtr sip2)
5333 {
5334 SeqIdPtr sip;
5335 SeqIdPtr sip_tmp;
5336
5337 if (sip1 == NULL || sip2 == NULL)
5338 return NULL;
5339 sip = sip1;
5340 while (sip != NULL)
5341 {
5342 sip_tmp = sip2;
5343 while (sip_tmp != NULL)
5344 {
5345 if (SeqIdComp(sip, sip_tmp) == SIC_YES)
5346 return sip;
5347 sip_tmp = sip_tmp->next;
5348 }
5349 sip = sip->next;
5350 }
5351 return NULL;
5352 }
5353
5354 /***************************************************************************
5355 *
5356 * AlnMgr2OrderSeqIds simply alphabetizes printed seqids in order to sort
5357 * them in order to group identical ones in a set.
5358 *
5359 ***************************************************************************/
5360 static Int4 AlnMgr2OrderSeqIds(SeqIdPtr sip1, SeqIdPtr sip2)
5361 {
5362 Char txt1[42];
5363 Char txt2[42];
5364
5365 if (sip1 == NULL && sip2 == NULL)
5366 return 0;
5367 if (sip1 == NULL && sip2 != NULL)
5368 return 1;
5369 if (sip1 != NULL && sip2 == NULL)
5370 return -1;
5371 SeqIdWrite(sip1, txt1, PRINTID_TEXTID_ACC_VER, 41);
5372 SeqIdWrite(sip2, txt2, PRINTID_TEXTID_ACC_VER, 41);
5373 txt1[41] = txt2[41] = '\0';
5374 return StringICmp(txt1, txt2);
5375 }
5376
5377 /* SECTION 2d */
5378 /***************************************************************************
5379 *
5380 * AlnMgr2SetUnaln takes an indexed alignment and sets the numunaln and
5381 * unaln array fields. The unaligned regions are numbered the same
5382 * regardless of whether the alignment is anchored, although they will
5383 * most likely be accessed and displayed differently.
5384 *
5385 ***************************************************************************/
5386 static void AlnMgr2SetUnaln(SeqAlignPtr sap)
5387 {
5388 AMAlignIndex2Ptr amaip;
5389 AM_Small2Ptr ams;
5390 AM_Small2Ptr ams_head;
5391 AM_Small2Ptr ams_prev;
5392 AM_Small2Ptr PNTR amsarray;
5393 DenseSegPtr dsp;
5394 Int4 i;
5395 Int4 j;
5396 SAIndex2Ptr saip;
5397
5398 if (sap == NULL || sap->saip == NULL)
5399 return;
5400 if (sap->saip->indextype == INDEX_CHILD)
5401 {
5402 saip = (SAIndex2Ptr)(sap->saip);
5403 dsp = (DenseSegPtr)(sap->segs);
5404 } else if (sap->saip->indextype == INDEX_PARENT)
5405 {
5406 amaip = (AMAlignIndex2Ptr)(sap->saip);
5407 if (amaip->alnstyle == AM2_LITE)
5408 return;
5409 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5410 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5411 } else
5412 return;
5413 MemFree(saip->unaln);
5414 saip->unaln = NULL;
5415 ams_head = ams_prev = NULL;
5416 for (i=0; i<saip->numrows; i++)
5417 {
5418 for (j=0; j<saip->srdp[i]->numunaln; j++)
5419 {
5420 ams = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
5421 ams->n1 = saip->srdp[i]->unaligned[j];
5422 if (ams_head != NULL)
5423 {
5424 ams_prev->next = ams;
5425 ams_prev = ams;
5426 } else
5427 ams_head = ams_prev = ams;
5428 }
5429 }
5430 if (ams_head == NULL)
5431 {
5432 saip->numunaln = -1;
5433 return;
5434 }
5435 j = 0;
5436 ams = ams_head;
5437 while (ams != NULL)
5438 {
5439 j++;
5440 ams = ams->next;
5441 }
5442 amsarray = (AM_Small2Ptr PNTR)MemNew(j*sizeof(AM_Small2Ptr));
5443 j = 0;
5444 ams = ams_head;
5445 while (ams != NULL)
5446 {
5447 amsarray[j] = ams;
5448 j++;
5449 ams = ams->next;
5450 }
5451 HeapSort(amsarray, j, sizeof(AM_Small2Ptr), AlnMgr2CompareUnalnAMS);
5452 saip->numunaln = 1;
5453 for (i=1; i<j; i++)
5454 {
5455 if (amsarray[i]->n1 != amsarray[i-1]->n1)
5456 saip->numunaln++;
5457 }
5458 saip->unaln = (Uint4Ptr)MemNew(saip->numunaln*sizeof(Uint4));
5459 saip->unaln[0] = amsarray[0]->n1;
5460 saip->numunaln = 1;
5461 for (i=1; i<j; i++)
5462 {
5463 if (amsarray[i]->n1 != amsarray[i-1]->n1)
5464 {
5465 saip->unaln[saip->numunaln] = amsarray[i]->n1;
5466 saip->numunaln++;
5467 }
5468 }
5469 for (i=0; i<j; i++)
5470 {
5471 MemFree(amsarray[i]);
5472 }
5473 MemFree(amsarray);
5474 }
5475
5476 /* SECTION 2d */
5477 /***************************************************************************
5478 *
5479 * AlnMgr2CompareUnalnAMS is the HeapSort callback for AlnMgr2SetUnaln;
5480 * it simply compares two AM_Small2 structures and orders them by their
5481 * n1 fields.
5482 *
5483 ***************************************************************************/
5484 static int LIBCALLBACK AlnMgr2CompareUnalnAMS(VoidPtr ptr1, VoidPtr ptr2)
5485 {
5486 AM_Small2Ptr ams1;
5487 AM_Small2Ptr ams2;
5488
5489 if (ptr1 == NULL || ptr2 == NULL)
5490 return 0;
5491 ams1 = *((AM_Small2Ptr PNTR)ptr1);
5492 ams2 = *((AM_Small2Ptr PNTR)ptr2);
5493 if (ams1->n1 < ams2->n1)
5494 return -1;
5495 else if (ams1->n1 > ams2->n1)
5496 return 1;
5497 else
5498 return 0;
5499 }
5500
5501 /***************************************************************************
5502 *
5503 * SECTION 3: Functions for debugging
5504 *
5505 ***************************************************************************/
5506
5507 /* SECTION 3 */
5508 NLM_EXTERN void am_print_sa_index(SeqAlignPtr sap, FILE *ofp)
5509 {
5510 Int4 i;
5511 Int4 j;
5512 SAIndex2Ptr saip;
5513
5514 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
5515 return;
5516 saip = (SAIndex2Ptr)(sap->saip);
5517 fprintf(ofp, "Rows: %d\n", saip->numrows);
5518 fprintf(ofp, "Segments: %d\n", saip->numseg);
5519 fprintf(ofp, "Anchor: %d\n", saip->anchor);
5520 fprintf(ofp, "Alignment coordinates: ");
5521 for (i=0; i<saip->numseg; i++)
5522 {
5523 fprintf(ofp, "%d ", saip->aligncoords[i]);
5524 }
5525 fprintf(ofp, "\n\n");
5526 for (i=0; i<saip->numrows; i++)
5527 {
5528 fprintf(ofp, "row %d\n", i+1);
5529 fprintf(ofp, "numsect: %d\n", saip->srdp[i]->numsect);
5530 for (j=0; j<saip->srdp[i]->numsect; j++)
5531 {
5532 fprintf(ofp, "%d ", saip->srdp[i]->sect[j]);
5533 }
5534 fprintf(ofp, "\n");
5535 fprintf(ofp, "numunsect: %d\n", saip->srdp[i]->numunsect);
5536 for (j=0; j<saip->srdp[i]->numunsect; j++)
5537 {
5538 fprintf(ofp, "%d ", saip->srdp[i]->unsect[j]);
5539 }
5540 fprintf(ofp, "\n");
5541 fprintf(ofp, "numinsect: %d\n", saip->srdp[i]->numinsect);
5542 for (j=0; j<saip->srdp[i]->numinsect; j++)
5543 {
5544 fprintf(ofp, "%d ", saip->srdp[i]->insect[j]);
5545 }
5546 fprintf(ofp, "\n");
5547 }
5548 }
5549
5550 /* SECTION 3 */
5551 /***************************************************************************
5552 *
5553 * AlnMgr2PrintSeqAlign prints an interleaved output of the entire
5554 * indexed alignment, with 'linesize' characters on each line (max 200).
5555 *
5556 ***************************************************************************/
5557 NLM_EXTERN void AlnMgr2PrintSeqAlign(SeqAlignPtr sap, Int4 linesize, Boolean isnuc, FILE *ofp)
5558 {
5559 AlnMsg2Ptr amp;
5560 BioseqPtr bsp;
5561 Char buf[201];
5562 Int4 ctr;
5563 Boolean done;
5564 Int4 i;
5565 Int4 j;
5566 Int4 len;
5567 Boolean more;
5568 Int4 numrows;
5569 Int4 row;
5570 Uint1 seqcode;
5571 SeqIdPtr sip;
5572 SeqPortPtr spp;
5573 Char text[42];
5574
5575 if (sap == NULL || sap->saip == NULL || linesize > 200)
5576 return;
5577 if (isnuc)
5578 seqcode = Seq_code_iupacna;
5579 else
5580 seqcode = Seq_code_iupacaa;
5581 amp = AlnMsgNew2();
5582 numrows = AlnMgr2GetNumRows(sap);
5583 len = AlnMgr2GetAlnLength(sap, FALSE);
5584 for (i=0; i<len; i+=linesize)
5585 {
5586 fprintf(ofp, "%d - %d\n", i, MIN(i+linesize-1, len-1));
5587 for (row=0; row<numrows; row++)
5588 {
5589 sip = AlnMgr2GetNthSeqIdPtr(sap, row+1);
5590 SeqIdWrite(sip, text, PRINTID_FASTA_SHORT, 41);
5591 done = FALSE;
5592 for (j=0; j<12; j++)
5593 {
5594 if (text[j] == '\0')
5595 done = TRUE;
5596 if (done == TRUE)
5597 fprintf(ofp, " ");
5598 else
5599 fprintf(ofp, "%c", text[j]);
5600 }
5601 bsp = BioseqLockById(sip);
5602 AlnMsgReNew2(amp);
5603 amp->row_num = row+1;
5604 amp->from_aln = i;
5605 amp->to_aln = MIN(i+linesize-1, len-1);
5606 while (more = AlnMgr2GetNextAlnBit(sap, amp))
5607 {
5608 if (amp->type == AM_GAP)
5609 {
5610 for (j=amp->from_row; j<=amp->to_row; j++)
5611 {
5612 fprintf(ofp, "-");
5613 }
5614 } else
5615 {
5616 spp = SeqPortNew(bsp, amp->from_row, amp->to_row, amp->strand, seqcode);
5617 ctr = SeqPortRead(spp, (Uint1Ptr)buf, amp->to_row-amp->from_row+1);
5618 buf[ctr] = '\0';
5619 fprintf(ofp, buf);
5620 SeqPortFree(spp);
5621 }
5622 }
5623 BioseqUnlock(bsp);
5624 fprintf(ofp, "\n");
5625 }
5626 fprintf(ofp, "\n\n");
5627 }
5628 AlnMsgFree2(amp);
5629 }
5630
5631 /* SECTION 3 */
5632 NLM_EXTERN void AlnMgr2DumpIndexedAlnToFile(SeqAlignPtr sap, CharPtr filename)
5633 {
5634 AsnIoPtr aip;
5635 AMAlignIndex2Ptr amaip;
5636 SeqAlignPtr sap_tmp;
5637
5638 if (sap == NULL || sap->saip == NULL)
5639 return;
5640 if (sap->saip->indextype == INDEX_CHILD)
5641 {
5642 if (sap->dim == 0)
5643 sap->dim = AlnMgr2GetNumRows(sap);
5644 aip = AsnIoOpen(filename, "w");
5645 SeqAlignAsnWrite(sap, aip, NULL);
5646 AsnIoClose(aip);
5647 return;
5648 }
5649 amaip = (AMAlignIndex2Ptr)(sap->saip);
5650 aip = AsnIoOpen(filename, "w");
5651 if (amaip->alnstyle != AM2_LITE)
5652 {
5653 amaip->sharedaln->dim = 0; /* mark it as the sharedaln */
5654 SeqAlignAsnWrite(amaip->sharedaln, aip, NULL);
5655 }
5656 sap_tmp = sap;
5657 if (sap->dim == 0)
5658 sap->dim = AlnMgr2GetNumRows(sap);
5659 while (sap_tmp != NULL)
5660 {
5661 SeqAlignAsnWrite(sap_tmp, aip, NULL);
5662 sap_tmp = sap_tmp->next;
5663 }
5664 AsnIoClose(aip);
5665 }
5666
5667 /***************************************************************************
5668 *
5669 * SECTION 4: API-level functions (and their helper functions) used to
5670 * access an indexed alignment.
5671 * SECTION 4a: AlnMgr2GetNextAlnBit and associated functions
5672 * SECTION 4b: "GetNth" functions
5673 * SECTION 4c: other functions for accessing the alignment
5674 *
5675 ***************************************************************************/
5676
5677 /* SECTION 4a */
5678 /***************************************************************************
5679 *
5680 * AlnMgr2GetNextAlnBit takes an indexed seqalign and returns it, piece
5681 * by piece, in the row and across the range specified in the AlnMsg
5682 * structure. amp->from_aln and amp->to_aln must be filled in; these are
5683 * in alignment coordinates. AlnMgr2GetNextAlnBit will return the AlnMsg
5684 * structure with amp->from_row and amp->to_row filled in. If amp->type is
5685 * AM_SEQ, these numbers are sequence coordinates; if amp->type is AM_GAP
5686 * the numbers are alignment coordinates and there is a gap in that row.
5687 * AlnMgr2GetNextAlnBit returns one continuous piece of sequence or gap
5688 * at each call, and keeps returning TRUE until it has returned all the
5689 * information for the piece of the alignment requested.
5690 *
5691 ***************************************************************************/
5692 NLM_EXTERN Boolean AlnMgr2GetNextAlnBit(SeqAlignPtr sap, AlnMsg2Ptr amp) /* NEXT */
5693 {
5694 AMAlignIndex2Ptr amaip;
5695 Uint2Ptr array;
5696 Int4 arraylen;
5697 Int4 ctr;
5698 Int4 disc;
5699 Int4 disc1;
5700 DenseSegPtr dsp;
5701 Int4 endoffset;
5702 Boolean found;
5703 Int4 i;
5704 Int4 index;
5705 Int4 intfrom;
5706 Int4 intto;
5707 Int4 j;
5708 Int4 len;
5709 Int4 offset;
5710 SAIndex2Ptr saip;
5711 SARowDat2Ptr srdp;
5712 Int4 start_sect;
5713 Int4 stop_sect;
5714 Uint2Ptr trans;
5715 Int4 translen;
5716
5717 if (sap == NULL || sap->saip == NULL || amp == NULL)
5718 return FALSE;
5719 if (amp->left_interrupt != NULL)
5720 {
5721 MemFree(amp->left_interrupt);
5722 amp->left_interrupt = NULL;
5723 }
5724 if (amp->right_interrupt != NULL)
5725 {
5726 MemFree(amp->right_interrupt);
5727 amp->right_interrupt = NULL;
5728 }
5729 if (sap->saip->indextype == INDEX_CHILD)
5730 {
5731 dsp = (DenseSegPtr)(sap->segs);
5732 saip = (SAIndex2Ptr)(sap->saip);
5733 } else if (sap->saip->indextype == INDEX_PARENT)
5734 {
5735 amaip = (AMAlignIndex2Ptr)(sap->saip);
5736 if (amaip->alnstyle == AM2_LITE)
5737 return FALSE;
5738 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
5739 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
5740 }
5741 /* reality checks */
5742 if (amp->row_num > saip->numrows)
5743 return FALSE;
5744 if (amp->len <= 0)
5745 amp->len = AlnMgr2GetAlnLength(sap, FALSE);
5746 if (amp->from_aln < 0 || amp->from_aln > amp->len-1 || amp->real_from > amp->to_aln)
5747 return FALSE;
5748 if (amp->to_aln == -1)
5749 amp->to_aln = amp->len - 1;
5750 if (amp->to_aln < amp->from_aln || amp->to_aln > amp->len-1)
5751 return FALSE;
5752 if (amp->real_from == -2)
5753 amp->real_from = amp->from_aln;
5754 amp->strand = AlnMgr2GetNthStrand(sap, amp->row_num);
5755 srdp = saip->srdp[amp->row_num-1];
5756 len = 0;
5757 start_sect = binary_search_on_uint4_list(saip->aligncoords, amp->real_from, saip->numseg);
5758 offset = amp->real_from - saip->aligncoords[start_sect];
5759 endoffset = 0;
5760 stop_sect = binary_search_on_uint4_list(saip->aligncoords, amp->to_aln, saip->numseg);
5761 /* now figure out whether it starts in sequence or a gap, and figure out how */
5762 /* long it continues in the same mode without interruption by inserts or unaligned */
5763 /* regions; the whole contiguous stretch will be reported */
5764 if (saip->anchor > 0)
5765 {
5766 trans = saip->srdp[saip->anchor-1]->sect;
5767 translen = saip->srdp[saip->anchor-1]->numsect;
5768 } else
5769 {
5770 trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
5771 for (i=0; i<dsp->numseg; i++)
5772 {
5773 trans[i] = i;
5774 }
5775 translen = dsp->numseg;
5776 }
5777 arraylen = -1;
5778 if ((index = binary_search_on_uint2_list(srdp->sect, trans[start_sect], srdp->numsect)) != -1)
5779 {
5780 amp->type = AM_SEQ;
5781 array = srdp->sect;
5782 arraylen = srdp->numsect;
5783 } else if ((index = binary_search_on_uint2_list(srdp->unsect, trans[start_sect], srdp->numunsect)) != -1)
5784 {
5785 amp->type = AM_GAP;
5786 array = srdp->unsect;
5787 arraylen = srdp->numunsect;
5788 }
5789 if (arraylen == -1) /* error */
5790 return FALSE;
5791 if (amp->row_num == saip->anchor)
5792 {
5793 amp->type = AM_SEQ;
5794 /* find limits of aligned region */
5795 i = start_sect;
5796 j = srdp->sect[start_sect];
5797 disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5798 while (j<srdp->sect[stop_sect] && disc == -1)
5799 {
5800 j++;
5801 disc = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5802 }
5803 i = binary_search_on_uint2_list(srdp->sect, j, srdp->numsect);
5804 if (i == -1)
5805 {
5806 i = binary_search_on_uint2_list(srdp->unsect, j, srdp->numunsect);
5807 }
5808 endoffset = dsp->lens[trans[i]] - (amp->to_aln - saip->aligncoords[i]) - 1;
5809 if (endoffset < 0)
5810 endoffset = 0;
5811 if (i<stop_sect && endoffset == 0) /* there's an unaligned region here, and we go to the end of the segment */
5812 {
5813 AlnMgr2GetUnalignedInfo(sap, trans[i], amp->row_num, &intfrom, &intto);
5814 amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5815 amp->right_interrupt->row = amp->row_num;
5816 amp->right_interrupt->unalnlen = intto - intfrom + 1;
5817 amp->right_interrupt->segnum = trans[i];
5818 amp->right_interrupt->which_side = AM2_RIGHT;
5819 }
5820 stop_sect = i;
5821 if (start_sect > 0 && offset == 0)
5822 {
5823 disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-1, srdp->numunaln);
5824 if (disc != -1) /* there is a left unaligned region */
5825 {
5826 AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-1, amp->row_num, &intfrom, &intto);
5827 amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5828 amp->left_interrupt->row = amp->row_num;
5829 amp->left_interrupt->unalnlen = intto - intfrom + 1;
5830 amp->left_interrupt->segnum = trans[start_sect];
5831 amp->left_interrupt->which_side = AM2_LEFT;
5832 }
5833 }
5834 len = 0;
5835 for (i=start_sect; i<= stop_sect; i++)
5836 {
5837 len += dsp->lens[trans[i]];
5838 }
5839 len = len - offset - endoffset;
5840 if (amp->strand == Seq_strand_minus)
5841 amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
5842 else
5843 amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
5844 amp->to_row = amp->from_row + len - 1;
5845 amp->real_from += amp->to_row - amp->from_row + 1;
5846 if (saip->anchor <= 0)
5847 MemFree(trans);
5848 return TRUE;
5849 }
5850 /* look for limits of aligned/gapped region */
5851 i = index;
5852 j = start_sect+1;
5853 disc = -1;
5854 found = FALSE;
5855 while (i+1<arraylen && disc == -1 && array[i] <= trans[stop_sect] && array[i+1]-1 == array[i])
5856 {
5857 disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5858 if (disc == -1)
5859 i++;
5860 }
5861 disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5862 j = binary_search_on_uint2_list(trans, array[i], translen);
5863 if (amp->type == AM_SEQ && j <= stop_sect) /* there is an interrupting region, either seq/gap, insert, or unaligned, plus just check last piece */
5864 {
5865 i = binary_search_on_uint2_list(srdp->insect, trans[j]+1, srdp->numinsect);
5866 if (i != -1) /* there's an insert */
5867 {
5868 amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5869 amp->right_interrupt->row = amp->row_num;
5870 amp->right_interrupt->segnum = trans[j];
5871 amp->right_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5872 amp->right_interrupt->which_side = AM2_RIGHT;
5873 /* look for unaligned regions off insert */
5874 disc1 = -1;
5875 if (j > 0)
5876 disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1, srdp->numunaln);
5877 if (disc1 != -1)
5878 {
5879 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5880 amp->right_interrupt->unalnlen = intto - intfrom + 1;
5881 }
5882 i++;
5883 ctr = 1;
5884 while (i<srdp->numinsect && srdp->insect[i] == srdp->insect[i-1]+1)
5885 {
5886 amp->right_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5887 /* look for unaligned regions off insert */
5888 disc1 = -1;
5889 if (j > 0) {
5890 disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1+ctr, srdp->numunaln);
5891 }
5892 if (disc1 != -1)
5893 {
5894 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5895 amp->right_interrupt->unalnlen += intto - intfrom + 1;
5896 }
5897 i++;
5898 ctr++;
5899 }
5900 }
5901 if (disc != -1) /* there's an unaligned region */
5902 {
5903 if (amp->right_interrupt == NULL)
5904 amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5905 amp->right_interrupt->row = amp->row_num;
5906 amp->right_interrupt->segnum = trans[j];
5907 amp->right_interrupt->which_side = AM2_RIGHT;
5908 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc], amp->row_num, &intfrom, &intto);
5909 amp->right_interrupt->unalnlen += intto - intfrom + 1;
5910 }
5911 }
5912 stop_sect = j;
5913 /* now look for left-side unaligned or inserted regions if offset == 0 */
5914 if (amp->type == AM_SEQ && offset == 0)
5915 {
5916 disc = -1;
5917 j = 1;
5918 i = -1;
5919 if ((Int2)trans[start_sect]-j > 0)
5920 i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j, srdp->numsect);
5921 while (i == -1 && (Int2)(trans[start_sect])-j-1 >= 0)
5922 {
5923 i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j-1, srdp->numsect);
5924 j++;
5925 }
5926 disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-j, srdp->numunaln);;
5927 if (disc > -1)
5928 {
5929 AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-j, amp->row_num, &intfrom, &intto);
5930 amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5931 amp->left_interrupt->row = amp->row_num;
5932 amp->left_interrupt->segnum = trans[start_sect];
5933 amp->left_interrupt->which_side = AM2_LEFT;
5934 amp->left_interrupt->unalnlen = intto - intfrom + 1;
5935 }
5936 i = binary_search_on_uint2_list(srdp->insect, trans[start_sect]-j, srdp->numinsect);
5937 if (i != -1) /* there's an insert */
5938 {
5939 if (amp->left_interrupt == NULL)
5940 amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5941 amp->left_interrupt->row = amp->row_num;
5942 amp->left_interrupt->segnum = trans[start_sect];
5943 amp->left_interrupt->which_side = AM2_LEFT;
5944 amp->left_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5945 /* look for unaligned regions off insert */
5946 j = trans[start_sect]-j;
5947 disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5948 if (disc1 != -1)
5949 {
5950 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5951 amp->left_interrupt->unalnlen += intto - intfrom + 1;
5952 }
5953 i--;
5954 j--;
5955 while (i-1>=0 && srdp->insect[i] == srdp->insect[i+1]-1)
5956 {
5957 amp->left_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5958 disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5959 if (disc1 != -1)
5960 {
5961 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5962 amp->left_interrupt->unalnlen += intto - intfrom + 1;
5963 }
5964 i--;
5965 j--;
5966 }
5967 if (i>=0) /* look one more over for unaligned */
5968 {
5969 disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5970 if (disc1 != -1)
5971 {
5972 AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5973 amp->left_interrupt->unalnlen += intto - intfrom + 1;
5974 }
5975 }
5976 }
5977 }
5978 endoffset = dsp->lens[trans[stop_sect]] - (amp->to_aln - saip->aligncoords[stop_sect]) - 1;
5979 if (endoffset < 0)
5980 endoffset = 0;
5981 if (amp->right_interrupt != NULL && endoffset > 0)
5982 {
5983 MemFree(amp->right_interrupt);
5984 amp->right_interrupt = NULL;
5985 }
5986 len = 0;
5987 for (i=start_sect; i<=stop_sect; i++)
5988 {
5989 len += dsp->lens[trans[i]];
5990 }
5991 len = len - offset - endoffset;
5992 if (amp->type == AM_GAP)
5993 {
5994 amp->from_row = amp->real_from;
5995 amp->to_row = amp->from_row + len - 1;
5996 } else
5997 {
5998 if (amp->strand == Seq_strand_minus)
5999 {
6000 amp->from_row = dsp->starts[trans[stop_sect]*dsp->dim+amp->row_num-1] + endoffset;
6001 amp->to_row = amp->from_row + len - 1;
6002 } else
6003 {
6004 amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
6005 amp->to_row = amp->from_row + len - 1;
6006 }
6007 }
6008 if (saip->anchor <= 0)
6009 MemFree(trans);
6010 amp->real_from += amp->to_row - amp->from_row + 1;
6011 return TRUE;
6012 }
6013
6014 /* SECTION 4a */
6015 static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
6016 {
6017 Uint4 L;
6018 Uint4 mid;
6019 Uint4 R;
6020
6021 if (list == NULL || listlen == 0)
6022 return 0;
6023 L = 0;
6024 R = listlen - 1;
6025 while (L < R)
6026 {
6027 mid = (L+R)/2;
6028 if (list[mid + 1] <= pos)
6029 L = mid + 1;
6030 else
6031 R = mid;
6032 }
6033 return R;
6034 }
6035
6036 /* SECTION 4a */
6037 static Int4 binary_search_on_uint2_list(Uint2Ptr list, Int4 ele, Uint2 listlen)
6038 {
6039 Uint2 L;
6040 Uint2 mid;
6041 Uint2 R;
6042
6043 if (list == NULL || listlen == 0 || ele < 0)
6044 return -1;
6045 L = 0;
6046 R = listlen - 1;
6047 while (L < R)
6048 {
6049 mid = (L+R)/2;
6050 if (ele <= list[mid])
6051 R = mid;
6052 else
6053 L = mid+1;
6054 }
6055 if (ele == list[R])
6056 return R;
6057 else
6058 return -1;
6059 }
6060
6061 /* SECTION 4a */
6062 static void AlnMgr2GetUnalignedInfo(SeqAlignPtr sap, Int4 segment, Int4 row, Int4Ptr from, Int4Ptr to)
6063 {
6064 AMAlignIndex2Ptr amaip;
6065 DenseSegPtr dsp;
6066 Boolean found;
6067 Int4 i;
6068 SAIndex2Ptr saip;
6069 Uint1 strand;
6070 Int4 tmp;
6071
6072 if (sap == NULL)
6073 return;
6074 strand = AlnMgr2GetNthStrand(sap, row);
6075 if (sap->saip->indextype == INDEX_CHILD)
6076 {
6077 saip = (SAIndex2Ptr)(sap->saip);
6078 dsp = (DenseSegPtr)(sap->segs);
6079 } else if (sap->saip->indextype == INDEX_PARENT)
6080 {
6081 amaip = (AMAlignIndex2Ptr)(sap->saip);
6082 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6083 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6084 }
6085 found = FALSE;
6086 *from = *to = -1;
6087 for (i=segment; i>=0 && !found; i--)
6088 {
6089 if (dsp->starts[dsp->dim*i+row-1] != -1)
6090 {
6091 found = TRUE;
6092 if (strand == Seq_strand_minus)
6093 *to = dsp->starts[dsp->dim*i+row-1]-1;
6094 else
6095 *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6096 }
6097 }
6098 found = FALSE;
6099 for (i=segment+1; i<dsp->numseg && !found; i++)
6100 {
6101 if (dsp->starts[dsp->dim*i+row-1] != -1)
6102 {
6103 found = TRUE;
6104 if (strand == Seq_strand_minus)
6105 *from = dsp->starts[dsp->dim*i+row-1]+dsp->lens[i];
6106 else
6107 *to = dsp->starts[dsp->dim*i+row-1]-1;
6108 }
6109 }
6110 if (*from > *to)
6111 {
6112 tmp = *from;
6113 *from = *to;
6114 *to = tmp;
6115 }
6116 }
6117
6118 /* SECTION 4a */
6119 /***************************************************************************
6120 *
6121 * AlnMgr2GetInterruptInfo returns a structure describing the inserts and
6122 * unaligned regions in an interrupt. The structure is allocated by this
6123 * function and must be freed with AlnMgr2FreeInterruptInfo.
6124 *
6125 ***************************************************************************/
6126 NLM_EXTERN AMInterrInfoPtr AlnMgr2GetInterruptInfo(SeqAlignPtr sap, AMInterruptPtr interrupt)
6127 {
6128 AMAlignIndex2Ptr amaip;
6129 Int4 disc;
6130 Boolean done;
6131 DenseSegPtr dsp;
6132 Int4 i;
6133 AMInterrInfoPtr iip;
6134 Int4 inserts;
6135 Int4 intfrom;
6136 Int4 intto;
6137 Int4 j;
6138 Int4 k;
6139 Int4 n;
6140 SAIndex2Ptr saip;
6141 SARowDat2Ptr srdp;
6142 Uint1 strand;
6143 Uint2Ptr trans;
6144 Int4 translen;
6145 Int4 u;
6146
6147 if (interrupt == NULL || sap == NULL || sap->saip == NULL)
6148 return NULL;
6149 if (sap->saip->indextype == INDEX_CHILD)
6150 {
6151 dsp = (DenseSegPtr)(sap->segs);
6152 saip = (SAIndex2Ptr)(sap->saip);
6153 } else if (sap->saip->indextype == INDEX_PARENT)
6154 {
6155 amaip = (AMAlignIndex2Ptr)(sap->saip);
6156 if (amaip->alnstyle == AM2_LITE)
6157 return FALSE;
6158 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6159 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6160 }
6161 if (dsp->numseg < interrupt->segnum)
6162 return NULL;
6163 if (saip->anchor > 0)
6164 {
6165 trans = saip->srdp[saip->anchor-1]->sect;
6166 translen = saip->srdp[saip->anchor-1]->numsect;
6167 } else
6168 {
6169 trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
6170 for (i=0; i<dsp->numseg; i++)
6171 {
6172 trans[i] = i;
6173 }
6174 translen = dsp->numseg;
6175 }
6176 strand = AlnMgr2GetNthStrand(sap, interrupt->row-1);
6177 srdp = saip->srdp[interrupt->row-1];
6178 /* now look for inserts and unaligned regions on the side indicated */
6179 if (interrupt->which_side == AM2_RIGHT)
6180 {
6181 /* check if this is unaligned */
6182 disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6183 /* then look for inserts */
6184 done = FALSE;
6185 iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6186 if (disc != -1)
6187 iip->num = 1;
6188 inserts = 0;
6189 for (i=interrupt->segnum+1; !done; i++)
6190 {
6191 n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6192 if (n == -1)
6193 n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6194 if (n == -1)
6195 {
6196 done = TRUE;
6197 } else
6198 {
6199 inserts++; /* only increment if region gets interrupted */
6200 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6201 if (disc != -1) /* this insert has an unaligned region */
6202 {
6203 iip->num += inserts;
6204 iip->num++;
6205 inserts = 0;
6206 }
6207 }
6208 }
6209 if (inserts != 0)
6210 iip->num++;
6211 iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6212 iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6213 iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6214 k = 0;
6215 disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6216 if (disc != -1) /* starts with unaligned */
6217 {
6218 AlnMgr2GetUnalignedInfo(sap, interrupt->segnum, interrupt->row, &intfrom, &intto);
6219 iip->starts[k] = intfrom;
6220 iip->lens[k] = intto - intfrom + 1;
6221 iip->types[k] = AM_UNALIGNED;
6222 k++;
6223 }
6224 disc = 0;
6225 done = FALSE;
6226 for (i=interrupt->segnum+1; !done; i++)
6227 {
6228 n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6229 u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numinsect);
6230 if (n == -1 && u == -1)
6231 {
6232 done = TRUE;
6233 } else
6234 {
6235 if (u == -1)
6236 {
6237 if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6238 iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6239 iip->lens[k] += dsp->lens[i];
6240 iip->types[k] = AM_INSERT;
6241 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6242 if (disc != -1) /* this insert has an unaligned region */
6243 {
6244 k++;
6245 AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6246 iip->starts[k] = intfrom;
6247 iip->lens[k] = intto - intfrom + 1;
6248 iip->types[k] = AM_UNALIGNED;
6249 k++;
6250 }
6251 }
6252 }
6253 }
6254 } else if (interrupt->which_side == AM2_LEFT)
6255 {
6256 /* check if the next non-gap segment to the left has unaligned */
6257 j = 1;
6258 n = 0;
6259 while (n != -1 && interrupt->segnum-j >= 0)
6260 {
6261 n = binary_search_on_uint2_list(srdp->unsect, interrupt->segnum-j, srdp->numunsect);
6262 if (n == -1)
6263 n = binary_search_on_uint2_list(srdp->insect, interrupt->segnum-j, srdp->numinsect);
6264 if (n != -1)
6265 j++;
6266 }
6267 disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum-j, srdp->numunaln);
6268 /* then look for inserts */
6269 done = FALSE;
6270 iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6271 if (disc != -1)
6272 iip->num = 1;
6273 inserts = 0;
6274 for (i=interrupt->segnum-1; !done; i--)
6275 {
6276 n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6277 if (n == -1)
6278 n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6279 if (n == -1)
6280 {
6281 done = TRUE;
6282 } else
6283 {
6284 inserts++; /* only increment if region gets interrupted */
6285 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6286 if (disc != -1) /* this insert has an unaligned region */
6287 {
6288 iip->num += inserts;
6289 iip->num++;
6290 inserts = 0;
6291 }
6292 }
6293 }
6294 i++;
6295 iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6296 iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6297 iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6298 k = 0;
6299 disc = 0;
6300 /* check first non-inserted segment for unaligned */
6301 if (i >= 0)
6302 {
6303 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6304 if (disc != -1) /* there's an unaligned region */
6305 {
6306 AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6307 iip->starts[k] = intfrom;
6308 iip->lens[k] = intto - intfrom + 1;
6309 iip->types[k] = AM_UNALIGNED;
6310 k++;
6311 }
6312 }
6313 i++; /* start from leftmost end of inserts/unaligned */
6314 for (i; i<interrupt->segnum; i++)
6315 {
6316 u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6317 if (u == -1)
6318 {
6319 if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6320 iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6321 iip->lens[k] += dsp->lens[i];
6322 iip->types[k] = AM_INSERT;
6323 disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6324 if (disc != -1) /* this insert has an unaligned region */
6325 {
6326 k++;
6327 AlnMgr2GetUnalignedInfo(sap, binary_search_on_uint2_list(trans, i, translen), interrupt->row, &intfrom, &intto);
6328 iip->starts[k] = intfrom;
6329 iip->lens[k] = intto - intfrom + 1;
6330 iip->types[k] = AM_UNALIGNED;
6331 k++;
6332 }
6333 }
6334 }
6335 }
6336 iip->strand = strand;
6337 return iip;
6338 }
6339
6340 /* SECTION 4b */
6341 /***************************************************************************
6342 *
6343 * AlnMgr2GetNthStrand takes an indexed seqalign and a row number and
6344 * returns the strand of the row indicated. A return of 0 indicates
6345 * an error.
6346 *
6347 ***************************************************************************/
6348 NLM_EXTERN Uint1 AlnMgr2GetNthStrand(SeqAlignPtr sap, Int4 n)
6349 {
6350 AMAlignIndex2Ptr amaip;
6351 DenseSegPtr dsp;
6352
6353 if (sap == NULL || sap->saip == NULL || n < 1)
6354 return 0;
6355 if (sap->saip->indextype == INDEX_CHILD)
6356 {
6357 dsp = (DenseSegPtr)(sap->segs);
6358 if (n > dsp->dim)
6359 return 0;
6360 if (dsp->strands == NULL)
6361 return Seq_strand_plus;
6362 return (dsp->strands[n-1]);
6363 } else if (sap->saip->indextype == INDEX_PARENT)
6364 {
6365 amaip = (AMAlignIndex2Ptr)(sap->saip);
6366 if (amaip->alnstyle == AM2_LITE) /* can't get Nth strand for this */
6367 return 0;
6368 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6369 if (n > dsp->dim)
6370 return 0;
6371 if (dsp->strands == NULL)
6372 return Seq_strand_plus;
6373 return (dsp->strands[n-1]);
6374 }
6375 return 0;
6376 }
6377
6378 /* SECTION 4b */
6379 /***************************************************************************
6380 *
6381 * AlnMgr2GetNthSeqIdPtr returns the seqid (this is a duplicated,
6382 * allocated seqid that must be freed) of the nth row (1-based) of an
6383 * indexed parent or child seqalign.
6384 *
6385 ***************************************************************************/
6386 NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtr(SeqAlignPtr sap, Int4 n)
6387 {
6388 AMAlignIndex2Ptr amaip;
6389 DenseSegPtr dsp;
6390 Int4 i;
6391 SeqIdPtr sip;
6392
6393 if (sap == NULL || sap->saip == NULL)
6394 return NULL;
6395 if (sap->saip->indextype == INDEX_CHILD)
6396 {
6397 dsp = (DenseSegPtr)(sap->segs);
6398 if (n > dsp->dim)
6399 return NULL;
6400 sip = dsp->ids;
6401 for (i=1; i<n && sip != NULL; i++)
6402 {
6403 sip = sip->next;
6404 }
6405 if (sip == NULL) return NULL;
6406 return (SeqIdDup(sip));
6407 } else if (sap->saip->indextype == INDEX_PARENT)
6408 {
6409 amaip = (AMAlignIndex2Ptr)(sap->saip);
6410 if (n > amaip->numrows)
6411 return NULL;
6412 sip = SeqIdDup(amaip->ids[n-1]);
6413 return sip;
6414 } else
6415 return NULL;
6416 }
6417
6418 /* SECTION 4b */
6419 /***************************************************************************
6420 *
6421 * AlnMgr2GetNthSeqRangeInSA returns the smallest and largest sequence
6422 * coordinates contained in the nth row of an indexed seqalign. Either
6423 * start or stop can be NULL to only retrieve one of the coordinates.
6424 * If start and stop are -1, there is an error; if they are both -2, the
6425 * row is just one big insert. RANGE
6426 *
6427 ***************************************************************************/
6428 NLM_EXTERN void AlnMgr2GetNthSeqRangeInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6429 {
6430 AMAlignIndex2Ptr amaip;
6431 Int4 beg;
6432 DenseSegPtr dsp;
6433 Int4 end;
6434 SAIndex2Ptr saip;
6435 SARowDat2Ptr srdp;
6436 Uint1 strand;
6437
6438 if (start != NULL)
6439 *start = -1;
6440 if (stop != NULL)
6441 *stop = -1;
6442 if (sap == NULL || sap->saip == NULL)
6443 return;
6444 if (sap->saip->indextype == INDEX_CHILD)
6445 {
6446 saip = (SAIndex2Ptr)(sap->saip);
6447 dsp = (DenseSegPtr)(sap->segs);
6448 } else if (sap->saip->indextype == INDEX_PARENT)
6449 {
6450 amaip = (AMAlignIndex2Ptr)(sap->saip);
6451 if (amaip->alnstyle == AM2_LITE)
6452 {
6453 AlnMgr2GetNthSeqRangeInSASet(sap, n, start, stop);
6454 return;
6455 }
6456 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6457 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6458 }
6459 if (n > saip->numrows || n <= 0)
6460 return;
6461 srdp = saip->srdp[n-1];
6462 beg = -1;
6463 if (srdp->numsect == 0) /* just one big insert */
6464 beg = end = -2;
6465 strand = AlnMgr2GetNthStrand(sap, n);
6466 if (beg != -2 && strand != Seq_strand_minus)
6467 {
6468 beg = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1];
6469 end = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6470 } else if (beg != -2)
6471 {
6472 beg = dsp->starts[srdp->sect[srdp->numsect-1]*(dsp->dim) + n-1];
6473 end = dsp->starts[srdp->sect[0]*(dsp->dim) + n-1] + dsp->lens[srdp->sect[0]] - 1;
6474 }
6475 if (start != NULL)
6476 *start = beg;
6477 if (stop != NULL)
6478 *stop = end;
6479 return;
6480 }
6481
6482 /* SECTION 4b */
6483 /***************************************************************************
6484 *
6485 * AlnMgr2GetNthRowSpanInSA returns the least and greatest alignment
6486 * coordinates (inclusive) spanned by the indicated row. Either stop or
6487 * start can be NULL to retrieve just one of the coordinates.
6488 *
6489 ***************************************************************************/
6490 NLM_EXTERN void AlnMgr2GetNthRowSpanInSA(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6491 {
6492 AMAlignIndex2Ptr amaip;
6493 DenseSegPtr dsp;
6494 Int4 i;
6495 SAIndex2Ptr saip;
6496 SARowDat2Ptr srdp;
6497
6498 if (start != NULL)
6499 *start = -1;
6500 if (stop != NULL)
6501 *stop = -1;
6502 if (sap == NULL || sap->saip == NULL)
6503 return;
6504 if (sap->saip->indextype == INDEX_CHILD)
6505 {
6506 saip = (SAIndex2Ptr)(sap->saip);
6507 dsp = (DenseSegPtr)(sap->segs);
6508 } else if (sap->saip->indextype == INDEX_PARENT)
6509 {
6510 amaip = (AMAlignIndex2Ptr)(sap->saip);
6511 if (amaip->alnstyle == AM2_LITE)
6512 return;
6513 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6514 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6515 }
6516 if (n > saip->numrows || n <= 0)
6517 return;
6518 srdp = saip->srdp[n-1];
6519 if (srdp->numsect == 0)
6520 {
6521 if (start != NULL)
6522 *start = -1;
6523 if (stop != NULL)
6524 *stop = -1;
6525 return;
6526 }
6527 if (start != NULL)
6528 {
6529 if (saip->anchor > 0)
6530 i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[0], saip->srdp[saip->anchor-1]->numsect);
6531 else
6532 i = srdp->sect[0];
6533 *start = saip->aligncoords[i];
6534 }
6535 if (stop != NULL)
6536 {
6537 if (saip->anchor > 0)
6538 i = binary_search_on_uint2_list(saip->srdp[saip->anchor-1]->sect, srdp->sect[srdp->numsect-1], saip->srdp[saip->anchor-1]->numsect);
6539 else
6540 i = srdp->sect[srdp->numsect-1];
6541 *stop = saip->aligncoords[i] + dsp->lens[srdp->sect[srdp->numsect-1]] - 1;
6542 }
6543 return;
6544 }
6545
6546 /* SECTION 4b */
6547 static void AlnMgr2GetNthSeqRangeInSASet(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6548 {
6549 AMAlignIndex2Ptr amaip;
6550 Int4 from;
6551 Int4 i;
6552 Int4 max;
6553 Int4 min;
6554 Int4 to;
6555
6556 if (start != NULL)
6557 *start = -1;
6558 if (stop != NULL)
6559 *stop = -1;
6560 if (sap == NULL || sap->saip == NULL || n < 0)
6561 return;
6562 if (sap->saip->indextype == INDEX_CHILD)
6563 {
6564 AlnMgr2GetNthSeqRangeInSA(sap, n, start, stop);
6565 return;
6566 }
6567 amaip = (AMAlignIndex2Ptr)(sap->saip);
6568 min = max = -1;
6569 for (i=0; i<amaip->numsaps; i++)
6570 {
6571 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n, &from, &to);
6572 if (from != -1 && (from < min || min == -1))
6573 min = from;
6574 if (to > max)
6575 max = to;
6576 }
6577 if (start != NULL)
6578 *start = from;
6579 if (stop != NULL)
6580 *stop = to;
6581 }
6582
6583 /* SECTION 4b */
6584 NLM_EXTERN Int4 AlnMgr2GetMaxTailLength(SeqAlignPtr sap, Uint1 which_tail)
6585 {
6586 Int4 i;
6587 Int4 maxlen;
6588 Int4 n;
6589 Int4 start;
6590 Int4 stop;
6591 Uint1 strand;
6592
6593 if (sap == NULL || sap->saip == NULL)
6594 return 0;
6595 n = AlnMgr2GetNumRows(sap);
6596 maxlen = -1;
6597 for (i=0; i<n; i++)
6598 {
6599 AlnMgr2GetNthRowTail(sap, i+1, which_tail, &start, &stop, &strand);
6600 if (stop - start + 1 > maxlen)
6601 maxlen = stop - start + 1;
6602 }
6603 return maxlen;
6604 }
6605
6606 /* SECTION 4b */
6607 /***************************************************************************
6608 *
6609 * AlnMgr2GetNthRowTail returns the sequence extremities that are not
6610 * contained in the alignment (if the alignment starts at 10 in row 2, the
6611 * tail in that row is 0-9). It takes an indexed seqalign, a 1-based row
6612 * number, and AM2_LEFT_TAIL or AM2_RIGHT_TAIL, and returns the start, stop,
6613 * and strand of the tail indicated in the row desired. AlnMgr2GetNthRowTail
6614 * returns TRUE if the calculations were successfully completed.
6615 *
6616 ***************************************************************************/
6617 NLM_EXTERN Boolean AlnMgr2GetNthRowTail(SeqAlignPtr sap, Int4 n, Uint1 which_tail, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
6618 {
6619 BioseqPtr bsp;
6620 SeqIdPtr sip;
6621 Int4 tmp_start;
6622 Int4 tmp_stop;
6623 Uint1 tmp_strand;
6624
6625 if (sap == NULL || n < 1 || sap->saip == NULL)
6626 return FALSE;
6627 tmp_start = tmp_stop = -1;
6628 AlnMgr2GetNthSeqRangeInSA(sap, n, &tmp_start, &tmp_stop);
6629 if (tmp_start == -1 || tmp_stop == -1)
6630 return FALSE;
6631 tmp_strand = AlnMgr2GetNthStrand(sap, n);
6632 if (which_tail == AM2_LEFT_TAIL)
6633 {
6634 if (tmp_strand == Seq_strand_minus)
6635 {
6636 sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6637 bsp = BioseqLockById(sip);
6638 SeqIdFree(sip);
6639 if (bsp == NULL)
6640 return FALSE;
6641 if (tmp_stop == bsp->length-1 || stop == NULL)
6642 {
6643 if (start)
6644 *start = -1;
6645 if (stop)
6646 *stop = -1;
6647 } else
6648 {
6649 if (start)
6650 *start = tmp_stop+1;
6651 if (stop)
6652 *stop = bsp->length-1;
6653 }
6654 BioseqUnlock(bsp);
6655 if (strand)
6656 *strand = tmp_strand;
6657 } else
6658 {
6659 if (tmp_start >= 1)
6660 {
6661 if (start)
6662 *start = 0;
6663 if (stop)
6664 *stop = tmp_start - 1;
6665 } else
6666 {
6667 if (start)
6668 *start = -1;
6669 if (stop)
6670 *stop = -1;
6671 }
6672 if (strand)
6673 *strand = tmp_strand;
6674 }
6675 } else if (which_tail == AM2_RIGHT_TAIL)
6676 {
6677 if (tmp_strand == Seq_strand_minus)
6678 {
6679 if (tmp_start >= 1)
6680 {
6681 if (start)
6682 *start = 0;
6683 if (stop)
6684 *stop = tmp_start - 1;
6685 } else
6686 {
6687 if (start)
6688 *start = -1;
6689 if (stop)
6690 *stop = -1;
6691 }
6692 if (strand)
6693 *strand = tmp_strand;
6694 } else
6695 {
6696 sip = AlnMgr2GetNthSeqIdPtr(sap, n);
6697 bsp = BioseqLockById(sip);
6698 SeqIdFree(sip);
6699 if (bsp == NULL)
6700 return FALSE;
6701 if (bsp->length-1 == tmp_stop)
6702 {
6703 if (start)
6704 *start = -1;
6705 if (stop)
6706 *stop = -1;
6707 } else
6708 {
6709 if (start)
6710 *start = tmp_stop + 1;
6711 if (stop)
6712 *stop = bsp->length-1;
6713 }
6714 if (strand)
6715 *strand = tmp_strand;
6716 BioseqUnlock(bsp);
6717 }
6718 }
6719 return TRUE;
6720 }
6721
6722 /* SECTION 4c */
6723 /***************************************************************************
6724 *
6725 * AlnMgr2GetAlnLength returns the total alignment length of an indexed
6726 * alignment. If fill_in is TRUE, the function computes the total length
6727 * of all the internal unaligned regions and adds that to the alignment
6728 * length; otherwise only the aligned portions are considered. (LENGTH)
6729 *
6730 ***************************************************************************/
6731 NLM_EXTERN Int4 AlnMgr2GetAlnLength(SeqAlignPtr sap, Boolean fill_in)
6732 {
6733 AMAlignIndex2Ptr amaip;
6734 DenseSegPtr dsp;
6735 Int4 i;
6736 Uint2 lastseg;
6737 Int4 len;
6738 SAIndex2Ptr saip;
6739 SeqAlignPtr salp;
6740
6741 if (sap == NULL || sap->saip == NULL)
6742 return -1;
6743 if (sap->saip->indextype == INDEX_CHILD)
6744 {
6745 dsp = (DenseSegPtr)(sap->segs);
6746 saip = (SAIndex2Ptr)(sap->saip);
6747 salp = sap;
6748 } else if (sap->saip->indextype == INDEX_PARENT)
6749 {
6750 amaip = (AMAlignIndex2Ptr)(sap->saip);
6751 if (amaip->alnstyle == AM2_LITE)
6752 return -1;
6753 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6754 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6755 salp = amaip->sharedaln;
6756 }
6757 if (saip->unaln == FALSE || fill_in == FALSE)
6758 {
6759 if (saip->anchor == -1)
6760 return (saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1]);
6761 else
6762 {
6763 lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6764 return (saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg]);
6765 }
6766 } else
6767 {
6768 len = 0;
6769 for (i=0; i<dsp->numseg; i++)
6770 {
6771 len += dsp->lens[i];
6772 len += AlnMgr2GetMaxUnalignedLength(salp, i);
6773 }
6774 }
6775 return len;
6776 }
6777
6778 /* SECTION 4c */ /* FOR DDV */
6779 NLM_EXTERN Boolean AlnMgr2IsSAPDiscAli(SeqAlignPtr sap)
6780 {
6781 AMAlignIndex2Ptr amaip;
6782 SAIndex2Ptr saip;
6783
6784 if (sap == NULL || sap->saip == NULL)
6785 return FALSE;
6786 if (sap->saip->indextype == INDEX_CHILD)
6787 {
6788 saip = (SAIndex2Ptr)(sap->saip);
6789 } else if (sap->saip->indextype == INDEX_PARENT)
6790 {
6791 amaip = (AMAlignIndex2Ptr)(sap->saip);
6792 if (amaip->alnstyle == AM2_LITE)
6793 return FALSE;
6794 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6795 }
6796 if (saip->numunaln > 0)
6797 return TRUE;
6798 return FALSE;
6799 }
6800
6801 /* SECTION 4c */ /* FOR DDV */
6802 NLM_EXTERN Int4 AlnMgr2GetNumAlnBlocks(SeqAlignPtr sap)
6803 {
6804 AMAlignIndex2Ptr amaip;
6805 SAIndex2Ptr saip;
6806
6807 if (sap == NULL || sap->saip == NULL)
6808 return -1;
6809 if (sap->saip->indextype == INDEX_CHILD)
6810 {
6811 saip = (SAIndex2Ptr)(sap->saip);
6812 } else if (sap->saip->indextype == INDEX_PARENT)
6813 {
6814 amaip = (AMAlignIndex2Ptr)(sap->saip);
6815 if (amaip->alnstyle == AM2_LITE)
6816 return -1;
6817 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6818 }
6819 if (saip->numunaln >= 0)
6820 return (saip->numunaln + 1);
6821 else if (saip->numunaln == -1)
6822 return 1;
6823 else
6824 return -1;
6825 }
6826
6827 /* SECTION 4c */ /* FOR DDV */
6828 NLM_EXTERN Boolean AlnMgr2GetNthBlockRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
6829 {
6830 AMAlignIndex2Ptr amaip;
6831 SAIndex2Ptr saip;
6832
6833 if (sap == NULL || sap->saip == NULL)
6834 return FALSE;
6835 if (sap->saip->indextype == INDEX_CHILD)
6836 {
6837 saip = (SAIndex2Ptr)(sap->saip);
6838 } else if (sap->saip->indextype == INDEX_PARENT)
6839 {
6840 amaip = (AMAlignIndex2Ptr)(sap->saip);
6841 if (amaip->alnstyle == AM2_LITE)
6842 return FALSE;
6843 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6844 }
6845 if (!start || !stop)
6846 return FALSE;
6847 *start = -1;
6848 *stop = -1;
6849 if (n >= saip->numunaln)
6850 return FALSE;
6851 if (n < saip->numunaln)
6852 {
6853 *start = saip->aligncoords[saip->unaln[n-1]+1];
6854 *stop = saip->aligncoords[saip->unaln[n]] - 1;
6855 }
6856 return TRUE;
6857 }
6858
6859 /* SECTION 4c */ /* FOR DDV */
6860 /***************************************************************************
6861 *
6862 * AlnMgr2GetNthUnalignedForNthRow returns the bioseq coordinates for the
6863 * requested row, in the requested unaligned region. Any error will result
6864 * in -1 returns for both start and stop.
6865 *
6866 ***************************************************************************/
6867 NLM_EXTERN Boolean AlnMgr2GetNthUnalignedForNthRow(SeqAlignPtr sap, Int4 unaligned, Int4 row, Int4Ptr start, Int4Ptr stop)
6868 {
6869 AMAlignIndex2Ptr amaip;
6870 DenseSegPtr dsp;
6871 Int4 i;
6872 SAIndex2Ptr saip;
6873 Int4 seg;
6874 Uint1 strand;
6875
6876 if (sap == NULL || sap->saip == NULL)
6877 return FALSE;
6878 if (sap->saip->indextype == INDEX_CHILD)
6879 {
6880 saip = (SAIndex2Ptr)(sap->saip);
6881 dsp = (DenseSegPtr)(sap->segs);
6882 } else if (sap->saip->indextype == INDEX_PARENT)
6883 {
6884 amaip = (AMAlignIndex2Ptr)(sap->saip);
6885 if (amaip->alnstyle == AM2_LITE)
6886 return FALSE;
6887 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6888 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6889 }
6890 if (row > saip->numrows)
6891 return FALSE;
6892 if (saip->numunaln == 0) /* not set yet */
6893 AlnMgr2SetUnaln(sap);
6894 if (saip->numunaln == -1 || unaligned > saip->numunaln)
6895 {
6896 if (start)
6897 *start = -1;
6898 if (stop)
6899 *stop = -1;
6900 return FALSE;
6901 }
6902 seg = -1;
6903 if (unaligned <= saip->numunaln && unaligned > 0)
6904 seg = saip->unaln[unaligned-1];
6905 if (start)
6906 *start = -1;
6907 if (stop)
6908 *stop = -1;
6909 i = binary_search_on_uint2_list(saip->srdp[row-1]->unaligned, seg, saip->srdp[row-1]->numunaln);
6910 if (i == -1 || saip->srdp[row-1]->unaligned[i] >= dsp->numseg-1)
6911 return FALSE;
6912 strand = AlnMgr2GetNthStrand(sap, row);
6913 if (strand == Seq_strand_minus)
6914 {
6915 *start = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6916 *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] - 1;
6917 } else
6918 {
6919 *start = dsp->starts[(saip->srdp[row-1]->unaligned[i])*dsp->dim+row-1] + dsp->lens[(saip->srdp[row-1]->unaligned[i])];
6920 *stop = dsp->starts[(saip->srdp[row-1]->unaligned[i]+1)*dsp->dim+row-1] - 1;
6921 }
6922 return TRUE;
6923 }
6924
6925 /* SECTION 4c */ /* FOR DDV */
6926 /***************************************************************************
6927 *
6928 * AlnMgr2GetNextLengthBit is called in a loop on an indexed alignment, with
6929 * seg starting at 0, to return the lengths of the aligned and unaligned
6930 * regions. If the length returned is negative, it's an unaligned region;
6931 * otherwise it's aligned.
6932 *
6933 ***************************************************************************/
6934 NLM_EXTERN Boolean AlnMgr2GetNextLengthBit(SeqAlignPtr sap, Int4Ptr len, Int4Ptr seg)
6935 {
6936 AMAlignIndex2Ptr amaip;
6937 DenseSegPtr dsp;
6938 Int4 i;
6939 Int4 lastseg;
6940 Int4 maxseg;
6941 SAIndex2Ptr saip;
6942
6943 if (sap == NULL || sap->saip == NULL || seg == NULL)
6944 return FALSE;
6945 if (sap->saip->indextype == INDEX_CHILD)
6946 {
6947 saip = (SAIndex2Ptr)(sap->saip);
6948 dsp = (DenseSegPtr)(sap->segs);
6949 } else if (sap->saip->indextype == INDEX_PARENT)
6950 {
6951 amaip = (AMAlignIndex2Ptr)(sap->saip);
6952 if (amaip->alnstyle == AM2_LITE)
6953 return FALSE;
6954 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6955 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6956 }
6957 if (saip->numunaln == -1) /* the whole thing is just one big aligned segment */
6958 {
6959 if (*seg != 0)
6960 return FALSE;
6961 if (saip->anchor == -1)
6962 {
6963 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
6964 *seg = 1;
6965 } else
6966 {
6967 lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6968 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
6969 *seg = 1;
6970 }
6971 return TRUE;
6972 } else
6973 {
6974 if (saip->unaln == 0) /* not set */
6975 {
6976 AlnMgr2SetUnaln(sap);
6977 if (saip->numunaln == -1) /* no unaligned regions */
6978 {
6979 if (*seg != 0)
6980 return FALSE;
6981 if (saip->anchor == -1)
6982 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[saip->numseg-1];
6983 else
6984 {
6985 lastseg = saip->srdp[saip->anchor-1]->sect[saip->srdp[saip->anchor-1]->numsect-1];
6986 *len = saip->aligncoords[saip->numseg-1]+dsp->lens[lastseg];
6987 }
6988 *seg = 1;
6989 return TRUE;
6990 }
6991 }
6992 if (*seg > saip->numunaln || -(*seg) > saip->numunaln)
6993 return FALSE;
6994 if (*seg >= 0)
6995 {
6996 *len = 0;
6997 if (*seg == 0)
6998 i = 0;
6999 else
7000 i = saip->unaln[*seg-1]+1;
7001 if (*seg < saip->numunaln)
7002 maxseg = saip->unaln[*seg];
7003 else
7004 maxseg = dsp->numseg-1;
7005 while (i<=maxseg)
7006 {
7007 (*len) += dsp->lens[i];
7008 i++;
7009 }
7010 *seg = -(*seg+1);
7011 return TRUE;
7012 } else
7013 {
7014 *len = -AlnMgr2GetMaxUnalignedLength(sap, saip->unaln[-(*seg)-1]);
7015 *seg = -(*seg);
7016 return TRUE;
7017 }
7018 }
7019 }
7020
7021 /* SECTION 4c */
7022 static Int4 AlnMgr2GetMaxUnalignedLength(SeqAlignPtr sap, Int4 seg)
7023 {
7024 AMAlignIndex2Ptr amaip;
7025 DenseSegPtr dsp;
7026 Boolean found;
7027 Int4 from;
7028 Int4 i;
7029 Int4 max;
7030 Int4 row;
7031 SAIndex2Ptr saip;
7032 Int4 to;
7033
7034 if (sap == NULL)
7035 return -1;
7036 if (sap->saip->indextype == INDEX_CHILD)
7037 {
7038 saip = (SAIndex2Ptr)(sap->saip);
7039 dsp = (DenseSegPtr)(sap->segs);
7040 } else if (sap->saip->indextype == INDEX_PARENT)
7041 {
7042 amaip = (AMAlignIndex2Ptr)(sap->saip);
7043 if (amaip->alnstyle == AM2_LITE)
7044 return -1;
7045 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7046 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7047 }
7048 found = FALSE;
7049 for (row=0; row<dsp->dim && !found; row++)
7050 {
7051 for (i=0; i<saip->srdp[row]->numunaln && !found; i++)
7052 {
7053 if (saip->srdp[row]->unaligned[i] == seg)
7054 found = TRUE;
7055 }
7056 }
7057 if (!found)
7058 return 0;
7059 max = 0;
7060 for (i=0; i<dsp->dim; i++)
7061 {
7062 AlnMgr2GetUnalignedInfo(sap, seg, i+1, &from, &to);
7063 if (to - from > max)
7064 max = to - from;
7065 }
7066 return max;
7067 }
7068
7069 /* SECTION 4c */
7070 /***************************************************************************
7071 *
7072 * AlnMgr2GetNumRows returns the number of rows in an indexed seqalign.
7073 *
7074 ***************************************************************************/
7075 NLM_EXTERN Int4 AlnMgr2GetNumRows(SeqAlignPtr sap)
7076 {
7077 AMAlignIndex2Ptr amaip;
7078 SAIndex2Ptr saip;
7079
7080 if (sap == NULL || sap->saip == NULL)
7081 return -1;
7082 if (sap->saip->indextype == INDEX_CHILD)
7083 {
7084 saip = (SAIndex2Ptr)(sap->saip);
7085 return (saip->numrows);
7086 } else if (sap->saip->indextype == INDEX_PARENT)
7087 {
7088 amaip = (AMAlignIndex2Ptr)(sap->saip);
7089 return (amaip->numrows);
7090 }
7091 return -1;
7092 }
7093
7094 /* SECTION 4c */
7095 /***************************************************************************
7096 *
7097 * AlnMgr2GetNumSegs returns the number of gap- or aligned- contiguous
7098 * segments in the alignment (continuous or not).
7099 *
7100 ***************************************************************************/
7101 NLM_EXTERN Int4 AlnMgr2GetNumSegs(SeqAlignPtr sap)
7102 {
7103 AMAlignIndex2Ptr amaip;
7104 DenseSegPtr dsp;
7105
7106 if (sap == NULL || sap->saip == NULL)
7107 return -1;
7108 if (sap->saip->indextype == INDEX_CHILD)
7109 {
7110 dsp = (DenseSegPtr)(sap->segs);
7111 return dsp->numseg;
7112 } else if (sap->saip->indextype == INDEX_PARENT)
7113 {
7114 amaip = (AMAlignIndex2Ptr)(sap->saip);
7115 if (amaip->alnstyle == AM2_LITE)
7116 return -1;
7117 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7118 return dsp->numseg;
7119 }
7120 return -1;
7121 }
7122
7123 /* SECTION 4c */
7124 /***************************************************************************
7125 *
7126 * AlnMgr2GetNumSegsInRange returns the number of alignment segments
7127 * spanned by the given range (partially or fully). The range is
7128 * given in alignment coordinates.
7129 *
7130 ***************************************************************************/
7131 NLM_EXTERN Int4 AlnMgr2GetNumSegsInRange(SeqAlignPtr sap, Int4 from, Int4 to, Int4Ptr start_seg)
7132 {
7133 Uint4Ptr aligncoords;
7134 AMAlignIndex2Ptr amaip;
7135 DenseSegPtr dsp;
7136 Int4 len;
7137 SAIndex2Ptr saip;
7138 Int4 start;
7139 Int4 stop;
7140
7141 if (start_seg != NULL)
7142 *start_seg = -1;
7143 if (sap == NULL || sap->saip == NULL)
7144 return -1;
7145 len = AlnMgr2GetAlnLength(sap, FALSE);
7146 if (from < 0 || to > len-1)
7147 return -1;
7148 if (sap->saip->indextype == INDEX_CHILD)
7149 {
7150 dsp = (DenseSegPtr)(sap->segs);
7151 saip = (SAIndex2Ptr)(sap->saip);
7152 aligncoords = saip->aligncoords;
7153 } else if (sap->saip->indextype == INDEX_PARENT)
7154 {
7155 amaip = (AMAlignIndex2Ptr)(sap->saip);
7156 if (amaip->alnstyle == AM2_LITE)
7157 return -1;
7158 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7159 saip = (SAIndex2Ptr)(sap->saip);
7160 aligncoords = saip->aligncoords;
7161 }
7162 if (from == 0 && to == len-1) /* whole alignment */
7163 {
7164 if (start_seg)
7165 *start_seg = 0;
7166 return dsp->numseg;
7167 }
7168 start = binary_search_on_uint4_list(aligncoords, from, dsp->numseg);
7169 stop = binary_search_on_uint4_list(aligncoords, to, dsp->numseg);
7170 if (start_seg != NULL)
7171 *start_seg = start;
7172 return (stop-start+1);
7173 }
7174
7175 /* SECTION 4c */
7176 /***************************************************************************
7177 *
7178 * AlnMgr2GetNthSegmentRange returns the alignment coordinate range of the
7179 * Nth segment (count starts at 1) of the seqalign. start and stop are
7180 * optional arguments (in case only one end is desired).
7181 *
7182 ***************************************************************************/
7183 NLM_EXTERN void AlnMgr2GetNthSegmentRange(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
7184 {
7185 AMAlignIndex2Ptr amaip;
7186 Int4 i;
7187 SAIndex2Ptr saip;
7188
7189 if (sap == NULL || sap->saip == NULL)
7190 return;
7191 i = AlnMgr2GetNumSegs(sap);
7192 if (n > i || n < 0)
7193 return;
7194 if (sap->saip->indextype == INDEX_CHILD)
7195 {
7196 saip = (SAIndex2Ptr)(sap->saip);
7197 if (start != NULL)
7198 *start = saip->aligncoords[n-1];
7199 if (stop != NULL)
7200 {
7201 if (i > n) /* not the last segment */
7202 *stop = saip->aligncoords[n] - 1;
7203 else
7204 *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7205 }
7206 return;
7207 } else if (sap->saip->indextype == INDEX_PARENT)
7208 {
7209 amaip = (AMAlignIndex2Ptr)(sap->saip);
7210 if (amaip->alnstyle == AM2_LITE)
7211 return;
7212 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7213 if (start != NULL)
7214 *start = saip->aligncoords[n-1];
7215 if (stop != NULL)
7216 {
7217 if (i > n) /* not the last segment */
7218 *stop = saip->aligncoords[n] - 1;
7219 else
7220 *stop = AlnMgr2GetAlnLength(sap, FALSE) - 1;
7221 }
7222 return;
7223 }
7224 }
7225
7226 /* SECTION 4c */
7227 /***************************************************************************
7228 *
7229 * AlnMgr2GetFirstNForSip returns the first row that a seqid occurs on,
7230 * or -1 if the seqid is not in the alignment or if there is another
7231 * error.
7232 *
7233 ***************************************************************************/
7234 NLM_EXTERN Int4 AlnMgr2GetFirstNForSip(SeqAlignPtr sap, SeqIdPtr sip)
7235 {
7236 AMAlignIndex2Ptr amaip;
7237 DenseSegPtr dsp;
7238 Int4 i;
7239 SeqIdPtr sip_tmp;
7240
7241 if (sap == NULL || sip == NULL || sap->saip == NULL)
7242 return -1;
7243 if (sap->saip->indextype == INDEX_CHILD)
7244 {
7245 dsp = (DenseSegPtr)(sap->segs);
7246 sip_tmp = dsp->ids;
7247 i = 1;
7248 while (sip_tmp != NULL)
7249 {
7250 if (SeqIdComp(sip, sip_tmp) == SIC_YES)
7251 return i;
7252 sip_tmp = sip_tmp->next;
7253 i++;
7254 }
7255 } else if (sap->saip->indextype == INDEX_PARENT)
7256 {
7257 amaip = (AMAlignIndex2Ptr)(sap->saip);
7258 if (amaip->alnstyle == AM2_LITE)
7259 return -1;
7260 for (i=0; i<amaip->numrows; i++)
7261 {
7262 if (SeqIdComp(sip, amaip->ids[i]) == SIC_YES)
7263 return (i+1);
7264 }
7265 }
7266 return -1;
7267 }
7268
7269 /***************************************************************************
7270 *
7271 * AlnMgr2GetFirstNForSipList returns the first row that one of a list of seqids occur on,
7272 * or -1 if none of the seqids are in the alignment or if there is another
7273 * error.
7274 * Handy if sip comes from a BioSeq, where it can point to a linked list
7275 * of SeqIds.
7276 *
7277 ***************************************************************************/
7278 NLM_EXTERN Int4 AlnMgr2GetFirstNForSipList(SeqAlignPtr sap, SeqIdPtr sip)
7279 {
7280 Int4 i;
7281 if (sap == NULL || sap->saip == NULL)
7282 return -1;
7283
7284 for (; sip; sip = sip->next) {
7285 i = AlnMgr2GetFirstNForSip(sap, sip);
7286 if (i != -1)
7287 return i;
7288 }
7289 return -1;
7290 }
7291
7292 /***************************************************************************
7293 *
7294 * AlnMgr2GetParent returns the top-level seqalign associated with a given
7295 * indexed alignment. It returns the actual pointer, not a copy.
7296 *
7297 ***************************************************************************/
7298 NLM_EXTERN SeqAlignPtr AlnMgr2GetParent(SeqAlignPtr sap)
7299 {
7300 SAIndex2Ptr saip;
7301
7302 if (sap == NULL || sap->saip == NULL)
7303 return NULL;
7304 if (sap->saip->indextype == INDEX_PARENT)
7305 return sap;
7306 saip = (SAIndex2Ptr)(sap->saip);
7307 return (saip->top);
7308 }
7309
7310 /***************************************************************************
7311 *
7312 * SECTION 5: Functions to change, assign or retrieve an anchor row.
7313 * SECTION 5a: functions for child seqaligns
7314 * SECTION 5b: functions for parent seqaligns
7315 * SECTION 5c: functions to retrieve anchor row information
7316 *
7317 ***************************************************************************/
7318
7319 /* SECTION 5a */
7320 static void AlnMgr2AnchorChild(SeqAlignPtr sap, Int4 which_row)
7321 {
7322 AMBitty2Ptr abp;
7323 AMBitty2Ptr abp_head;
7324 AMBitty2Ptr abp_head2;
7325 AMBitty2Ptr abp_prev;
7326 AMBitty2Ptr abp_prev2;
7327 AMBitty2Ptr abp_uhead;
7328 AMBitty2Ptr abp_uprev;
7329 Uint2Ptr anchor_unsect;
7330 Int4 curr;
7331 Int4 curr2;
7332 DenseSegPtr dsp;
7333 Int4 i;
7334 Int4 j;
7335 Uint2 numunsect;
7336 SAIndex2Ptr saip;
7337 SARowDat2Ptr srdp;
7338
7339 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
7340 return;
7341 saip = (SAIndex2Ptr)(sap->saip);
7342 if (which_row > saip->numrows)
7343 return;
7344 if (saip->anchor == which_row) /*already anchored to the right row */
7345 return;
7346 if (saip->anchor != -1 || which_row <= 0) /* already anchored -- must reindex as a flat alignment first */
7347 {
7348 SAIndex2Free2(sap->saip);
7349 sap->saip = NULL;
7350 AlnMgr2IndexSingleDenseSegSA(sap);
7351 if (which_row <= 0)
7352 return;
7353 saip = (SAIndex2Ptr)(sap->saip);
7354 }
7355 numunsect = saip->srdp[which_row-1]->numunsect;
7356 if (numunsect > 0)
7357 anchor_unsect = saip->srdp[which_row-1]->unsect;
7358 else
7359 anchor_unsect = NULL;
7360 for (i=0; i<saip->numrows; i++)
7361 {
7362 if (i+1 != which_row)
7363 {
7364 abp_head = NULL;
7365 abp_head2 = NULL;
7366 abp_uhead = NULL;
7367 curr = 0;
7368 curr2 = 0;
7369 srdp = saip->srdp[i];
7370 for (j=0; j<srdp->numsect; j++)
7371 {
7372 if (anchor_unsect != NULL && curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7373 {
7374 while (curr < numunsect && srdp->sect[j] > anchor_unsect[curr])
7375 {
7376 curr++;
7377 }
7378 }
7379 if (curr < numunsect && anchor_unsect != NULL && srdp->sect[j] == anchor_unsect[curr]) /* this one is an insert */
7380 {
7381 abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7382 abp->n = srdp->sect[j];
7383 if (abp_head == NULL)
7384 abp_head = abp_prev = abp;
7385 else
7386 {
7387 abp_prev->next = abp;
7388 abp_prev = abp;
7389 }
7390 curr++;
7391 } else /* put it in the keeper pile */
7392 {
7393 abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7394 abp->n = srdp->sect[j];
7395 if (abp_head2 == NULL)
7396 abp_head2 = abp_prev2 = abp;
7397 else
7398 {
7399 abp_prev2->next = abp;
7400 abp_prev2 = abp;
7401 }
7402 }
7403 }
7404 for (j=0; j<srdp->numunsect; j++)
7405 {
7406 if (anchor_unsect != NULL && curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7407 {
7408 while (curr2 < numunsect && srdp->unsect[j] > anchor_unsect[curr2])
7409 {
7410 curr2++;
7411 }
7412 }
7413 if (curr2 >= numunsect || (curr2 < numunsect && (anchor_unsect == NULL || srdp->unsect[j] != anchor_unsect[curr2]))) /* these get kept */
7414 {
7415 abp = (AMBitty2Ptr)MemNew(sizeof(AMBitty2));
7416 abp->n = srdp->unsect[j];
7417 if (abp_uhead == NULL)
7418 abp_uhead = abp_uprev = abp;
7419 else
7420 {
7421 abp_uprev->next = abp;
7422 abp_uprev = abp;
7423 }
7424 }
7425 }
7426 MemFree(srdp->sect);
7427 MemFree(srdp->unsect);
7428 srdp->numsect = srdp->numunsect = srdp->numinsect = 0;
7429 abp = abp_head; /* inserts */
7430 while (abp != NULL)
7431 {
7432 srdp->numinsect++;
7433 abp = abp->next;
7434 }
7435 srdp->insect = (Uint2Ptr)MemNew((srdp->numinsect)*sizeof(Uint2));
7436 abp = abp_head;
7437 j = 0;
7438 while (abp != NULL)
7439 {
7440 srdp->insect[j] = abp->n;
7441 j++;
7442 abp_prev = abp;
7443 abp = abp->next;
7444 MemFree(abp_prev);
7445 }
7446 abp = abp_head2; /* aligned sections */
7447 while (abp != NULL)
7448 {
7449 srdp->numsect++;
7450 abp = abp->next;
7451 }
7452 srdp->sect = (Uint2Ptr)MemNew((srdp->numsect)*sizeof(Uint2));
7453 abp = abp_head2;
7454 j = 0;
7455 while (abp != NULL)
7456 {
7457 srdp->sect[j] = abp->n;
7458 j++;
7459 abp_prev = abp;
7460 abp = abp->next;
7461 MemFree(abp_prev);
7462 }
7463 abp = abp_uhead; /* aligned gaps */
7464 while (abp != NULL)
7465 {
7466 srdp->numunsect++;
7467 abp = abp->next;
7468 }
7469 srdp->unsect = (Uint2Ptr)MemNew((srdp->numunsect)*sizeof(Uint2));
7470 abp = abp_uhead;
7471 j = 0;
7472 while (abp != NULL)
7473 {
7474 srdp->unsect[j] = abp->n;
7475 j++;
7476 abp_prev = abp;
7477 abp = abp->next;
7478 MemFree(abp_prev);
7479 }
7480 } else /* this is the anchor row -- fill in the alignment coords*/
7481 {
7482 srdp = saip->srdp[i];
7483 MemFree(saip->aligncoords);
7484 saip->numseg = srdp->numsect;
7485 saip->aligncoords = (Uint4Ptr)MemNew((saip->numseg)*sizeof(Uint4));
7486 dsp = (DenseSegPtr)(sap->segs);
7487 for (j=1; j<saip->numseg; j++)
7488 {
7489 saip->aligncoords[j] = saip->aligncoords[j-1] + dsp->lens[srdp->sect[j-1]];
7490 }
7491 saip->anchor = i+1;
7492 }
7493 }
7494 }
7495
7496 /* SECTION 5c */
7497 /***************************************************************************
7498 *
7499 * AlnMgr2AnchorSeqAlign takes an indexed seqalign and a row (1-based) and
7500 * reindexes the alignment so that there are no gaps in the row indicated.
7501 * Other rows may contain inserts after this operation. After an alignment
7502 * is anchored, its length often shrinks. If which_row is less than 1, the
7503 * function reindexes the alignment as a flat alignment.
7504 *
7505 ***************************************************************************/
7506 NLM_EXTERN void AlnMgr2AnchorSeqAlign(SeqAlignPtr sap, Int4 which_row)
7507 {
7508 AMAlignIndex2Ptr amaip;
7509
7510 if (sap == NULL || sap->saip == NULL)
7511 return;
7512 if (sap->saip->indextype == INDEX_CHILD)
7513 AlnMgr2AnchorChild(sap, which_row);
7514 else if (sap->saip->indextype == INDEX_PARENT)
7515 {
7516 amaip = (AMAlignIndex2Ptr)(sap->saip);
7517 if (amaip->alnstyle == AM2_LITE)
7518 return;
7519 AlnMgr2AnchorChild(amaip->sharedaln, which_row);
7520 amaip->anchor = which_row;
7521 }
7522 }
7523
7524 /* SECTION 5c */
7525 /***************************************************************************
7526 *
7527 * AlnMgr2FindAnchor returns the row number (1-based) of the anchor row
7528 * for an indexed seqalign, or -1 if the alignment is unanchored or if
7529 * there is another type of error.
7530 *
7531 ***************************************************************************/
7532 NLM_EXTERN Int4 AlnMgr2FindAnchor(SeqAlignPtr sap)
7533 {
7534 AMAlignIndex2Ptr amaip;
7535 SAIndex2Ptr saip;
7536
7537 if (sap == NULL || sap->saip == NULL)
7538 return -1;
7539 if (sap->saip->indextype == INDEX_CHILD)
7540 {
7541 saip = (SAIndex2Ptr)(sap->saip);
7542 return (saip->anchor);
7543 } else if (sap->saip->indextype == INDEX_PARENT)
7544 {
7545 amaip = (AMAlignIndex2Ptr)(sap->saip);
7546 if (amaip->alnstyle == AM2_LITE)
7547 return -1;
7548 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7549 return (saip->anchor);
7550 } else
7551 return -1;
7552 }
7553
7554 /***************************************************************************
7555 *
7556 * SECTION 6: Functions for coordinate conversion (bioseq to seqalign
7557 * coordinates and vice versa)
7558 *
7559 ***************************************************************************/
7560
7561 /* SECTION 6 */
7562 /***************************************************************************
7563 *
7564 * AlnMgr2MapBioseqToSeqAlign takes an indexed seqalign, a position in a
7565 * row of the alignment, and a 1-based row number, and maps the row position
7566 * to alignment coordinates.
7567 *
7568 ***************************************************************************/
7569 NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlign(SeqAlignPtr sap, Int4 pos, Int4 row)
7570 {
7571 AMAlignIndex2Ptr amaip;
7572 Uint2Ptr array;
7573 DenseSegPtr dsp;
7574 Int4 L;
7575 Int4 mid;
7576 Int4 offset;
7577 Int4 R;
7578 Int4 retval;
7579 SAIndex2Ptr saip;
7580 SARowDat2Ptr srdp;
7581 Int4 start;
7582 Int4 stop;
7583 Uint1 strand;
7584
7585 if (sap == NULL || sap->saip == NULL || row < 1)
7586 return -1;
7587 AlnMgr2GetNthSeqRangeInSA(sap, row, &start, &stop);
7588 if (pos < start || pos > stop)
7589 return -1;
7590 if (sap->saip->indextype == INDEX_CHILD)
7591 {
7592 saip = (SAIndex2Ptr)(sap->saip);
7593 dsp = (DenseSegPtr)(sap->segs);
7594 } else if (sap->saip->indextype == INDEX_PARENT)
7595 {
7596 amaip = (AMAlignIndex2Ptr)(sap->saip);
7597 if (amaip->alnstyle == AM2_LITE)
7598 return -1;
7599 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7600 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7601 }
7602 if (row > saip->numrows)
7603 return -1;
7604 srdp = saip->srdp[row-1];
7605 if (srdp->numsect < 1) {
7606 return -1;
7607 }
7608 strand = AlnMgr2GetNthStrand(sap, row);
7609 L = 0;
7610 R = srdp->numsect - 1;
7611 if (strand != Seq_strand_minus)
7612 {
7613 while (L < R)
7614 {
7615 mid = (L + R)/2;
7616 if (dsp->starts[(srdp->sect[mid+1])*(dsp->dim)+row-1] <= pos)
7617 L = mid + 1;
7618 else
7619 R = mid;
7620 }
7621 } else
7622 {
7623 while (L < R)
7624 {
7625 mid = ceil((L + R)/2);
7626 if (dsp->starts[(srdp->sect[mid])*(dsp->dim)+row-1] > pos)
7627 L = mid + 1;
7628 else
7629 R = mid;
7630 }
7631 }
7632 offset = pos - dsp->starts[(srdp->sect[L])*(dsp->dim)+row-1];
7633 if (offset > dsp->lens[srdp->sect[L]])
7634 return -2; /* this is an insert */
7635 if (saip->anchor > 0)
7636 {
7637 array = saip->srdp[saip->anchor-1]->sect;
7638 R = binary_search_on_uint2_list(array, srdp->sect[L], saip->srdp[saip->anchor-1]->numsect);
7639 L = R;
7640 srdp = saip->srdp[saip->anchor-1];
7641 if (strand != Seq_strand_minus)
7642 retval = (saip->aligncoords[L] + offset);
7643 else
7644 retval = (saip->aligncoords[L] + dsp->lens[srdp->sect[L]] - offset - 1);
7645 } else
7646 {
7647 if (strand != Seq_strand_minus)
7648 retval = saip->aligncoords[srdp->sect[L]] + offset;
7649 else
7650 retval = (saip->aligncoords[srdp->sect[L]] + dsp->lens[srdp->sect[L]] - offset - 1);
7651 }
7652 return retval;
7653 }
7654
7655 /* SECTION 6 */
7656 /***************************************************************************
7657 *
7658 * AlnMgr2MapSeqAlignToBioseq takes an indexed seqalign, an alignment
7659 * coordinate (pos), and the 1-based number of a row, and maps the alignment
7660 * coordinate to the corresponding bioseq coordinate of the row desired.
7661 * A return of -1 indicates an error; a return of -2 means that the bioseq
7662 * is gapped at this alignment position.
7663 *
7664 ***************************************************************************/
7665 NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseq(SeqAlignPtr sap, Int4 pos, Int4 row)
7666 {
7667 AMAlignIndex2Ptr amaip;
7668 DenseSegPtr dsp;
7669 Int4 len;
7670 Int4 offset;
7671 SAIndex2Ptr saip;
7672 Int4 sect;
7673 SARowDat2Ptr srdp;
7674 Int4 start;
7675 Uint1 strand;
7676 Uint2Ptr trans;
7677
7678 if (sap == NULL || sap->saip == NULL)
7679 return -1;
7680 len = AlnMgr2GetAlnLength(sap, FALSE);
7681 if (pos < 0 || pos > len - 1)
7682 return -1;
7683 if (sap->saip->indextype == INDEX_CHILD)
7684 {
7685 saip = (SAIndex2Ptr)(sap->saip);
7686 dsp = (DenseSegPtr)(sap->segs);
7687 } else if (sap->saip->indextype == INDEX_PARENT)
7688 {
7689 amaip = (AMAlignIndex2Ptr)(sap->saip);
7690 if (amaip->alnstyle == AM2_LITE)
7691 return -1;
7692 saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
7693 dsp = (DenseSegPtr)(amaip->sharedaln->segs);
7694 }
7695 if (row > saip->numrows)
7696 return -1;
7697
7698 sect = binary_search_on_uint4_list(saip->aligncoords, pos, saip->numseg);
7699 offset = pos - saip->aligncoords[sect];
7700 if (saip->anchor > 0)
7701 {
7702 trans = saip->srdp[saip->anchor-1]->sect;
7703 sect = trans[sect];
7704 }
7705 srdp = saip->srdp[row-1];
7706 start = binary_search_on_uint2_list(srdp->sect, sect, srdp->numsect);
7707 if (start == -1)
7708 return -2; /* this row has a gap or insert at this alignment position */
7709 strand = AlnMgr2GetNthStrand(sap, row);
7710 if (strand != Seq_strand_minus)
7711 return (dsp->starts[sect*(dsp->dim)+row-1] + offset);
7712 else
7713 return (dsp->starts[sect*(dsp->dim)+row-1] + dsp->lens[sect] - 1 - offset);
7714 }
7715
7716 /* SECTION 6 */
7717 /***************************************************************************
7718 *
7719 * AlnMgr2MapRowToRow takes an indexed seqalign, a position in row1, the
7720 * 1-based number of row1, and a target row (row2), and maps the bioseq
7721 * coordinate in row 1 to the corresponding (aligned) bioseq coordinate in
7722 * row2. A return of -1 indicates an error while a return of -2 means that
7723 * the bioseq in row2 is gapped at the desired position.
7724 *
7725 ***************************************************************************/
7726 NLM_EXTERN Int4 AlnMgr2MapRowToRow(SeqAlignPtr sap, Int4 pos, Int4 row1, Int4 row2)
7727 {
7728 Int4 alnpos;
7729
7730 if (sap == NULL)
7731 return -1;
7732 alnpos = AlnMgr2MapBioseqToSeqAlign(sap, pos, row1);
7733 return (AlnMgr2MapSeqAlignToBioseq(sap, alnpos, row2));
7734 }
7735
7736 /***************************************************************************
7737 *
7738 * SECTION 7: Functions to change an alignment and retrieve parts of an
7739 * alignment
7740 *
7741 ***************************************************************************/
7742
7743 /***************************************************************************
7744 *
7745 * AlnMgr2TruncateSeqAlign truncates a given seqalign to contain only the
7746 * bioseq coordinates from start to stop on the indicated row. Anything
7747 * before those coordinates is discarded; anything remaining afterwards
7748 * is made into another seqalign and put in sap->next (the original next,
7749 * if any, is now at sap->next->next). Doesn't work on parent seqaligns.
7750 * The function returns TRUE if the orignal alignment extended past stop.
7751 *
7752 ***************************************************************************/
7753 /* SECTION 7 */
7754 NLM_EXTERN Boolean AlnMgr2TruncateSeqAlign(SeqAlignPtr sap, Int4 start, Int4 stop, Int4 row)
7755 {
7756 DenseDiagPtr ddp;
7757 DenseDiagPtr ddp2;
7758 DenseSegPtr dsp;
7759 Int4 from;
7760 Int4 i;
7761 Int4 mstart;
7762 Int4 mstop;
7763 SeqAlignPtr sap1;
7764 SeqAlignPtr sap2;
7765 Int4 tmp;
7766 Int4 to;
7767
7768 if (sap == NULL || stop<start || row < 1)
7769 return FALSE;
7770 if (sap->segtype == SAS_DENSEG)
7771 {
7772 if (sap->saip == NULL)
7773 AlnMgr2IndexSingleChildSeqAlign(sap);
7774 AlnMgr2GetNthSeqRangeInSA(sap, row, &mstart, &mstop);
7775 if (mstart > start || mstop < stop)
7776 return FALSE;
7777 if (mstart == start)
7778 {
7779 if (mstop == stop)
7780 return FALSE;
7781 else if (mstop > stop)
7782 {
7783 from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7784 to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7785 if (to < from)
7786 {
7787 tmp = to;
7788 to = from;
7789 from = tmp;
7790 }
7791 sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7792 AlnMgr2IndexSingleChildSeqAlign(sap1);
7793 from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7794 if (from < 0)
7795 return FALSE;
7796 to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7797 if (to < from)
7798 {
7799 tmp = to;
7800 to = from;
7801 from = tmp;
7802 }
7803 sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7804 sap2->next = sap->next;
7805 sap->next = sap2;
7806 dsp = (DenseSegPtr)(sap->segs);
7807 sap->segs = (Pointer)(sap1->segs);
7808 sap1->segs = NULL;
7809 DenseSegFree(dsp);
7810 SeqAlignFree(sap1);
7811 AlnMgr2IndexSingleChildSeqAlign(sap);
7812 AlnMgr2IndexSingleChildSeqAlign(sap2);
7813 return TRUE;
7814 }
7815 } else if (mstart < start) /* throw away the first part */
7816 {
7817 from = AlnMgr2MapBioseqToSeqAlign(sap, start, row);
7818 to = AlnMgr2MapBioseqToSeqAlign(sap, stop, row);
7819 if (to < from)
7820 {
7821 tmp = to;
7822 to = from;
7823 from = tmp;
7824 }
7825 sap1 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7826 if (mstop == stop) /* done */
7827 {
7828 dsp = (DenseSegPtr)(sap->segs);
7829 sap->segs = (Pointer)(sap1->segs);
7830 sap1->segs = NULL;
7831 DenseSegFree(dsp);
7832 SeqAlignFree(sap1);
7833 AlnMgr2IndexSingleChildSeqAlign(sap);
7834 return FALSE;
7835 } else if (mstop > stop)
7836 {
7837 from = AlnMgr2MapBioseqToSeqAlign(sap, stop+1, row);
7838 if (from < 0)
7839 return FALSE;
7840 to = AlnMgr2MapBioseqToSeqAlign(sap, mstop, row);
7841 if (to < from)
7842 {
7843 tmp = to;
7844 to = from;
7845 from = tmp;
7846 }
7847 sap2 = AlnMgr2GetSubAlign(sap, from, to, 0, TRUE);
7848 sap2->next = sap->next;
7849 sap->next = sap2;
7850 AlnMgr2IndexSingleChildSeqAlign(sap2);
7851 dsp = (DenseSegPtr)(sap->segs);
7852 sap->segs = (Pointer)(sap1->segs);
7853 sap1->segs = NULL;
7854 DenseSegFree(dsp);
7855 SeqAlignFree(sap1);
7856 AlnMgr2IndexSingleChildSeqAlign(sap);
7857 return TRUE;
7858 }
7859 }
7860 } else if (sap->segtype == SAS_DENDIAG)
7861 {
7862 ddp = (DenseDiagPtr)(sap->segs);
7863 if (ddp->dim < row)
7864 return FALSE;
7865 mstart = ddp->starts[row-1];
7866 mstop = mstart + ddp->len - 1;
7867 if (mstart > start || mstop < stop)
7868 return FALSE;
7869 if (mstart == start)
7870 {
7871 if (mstop == stop)
7872 return FALSE;
7873 else if (mstop > stop)
7874 {
7875 ddp2 = DenseDiagNew();
7876 ddp2->dim = ddp->dim;
7877 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7878 ddp2->id = SeqIdDupList(ddp->id);
7879 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7880 ddp2->scores = ScoreDup(ddp->scores);
7881 for (i=0; i<ddp->dim; i++)
7882 {
7883 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7884 ddp2->strands[i] = ddp->strands[i];
7885 }
7886 ddp2->len = mstop - stop;
7887 ddp->len = ddp->len - (mstop - stop);
7888 sap2 = SeqAlignNew();
7889 sap2->type = SAT_PARTIAL;
7890 sap2->segtype = SAS_DENSEG;
7891 sap2->segs = (Pointer)ddp2;
7892 sap2->next = sap->next;
7893 sap->next = sap2;
7894 AlnMgr2IndexSingleChildSeqAlign(sap2);
7895 return TRUE;
7896 }
7897 } else if (mstart < start)
7898 {
7899 for (i=0; i<ddp->dim; i++)
7900 {
7901 ddp->starts[i] = ddp->starts[i] + start - mstart;
7902 }
7903 ddp->len = ddp->len - (start - mstart);
7904 AlnMgr2IndexSingleChildSeqAlign(sap);
7905 if (mstop == stop)
7906 return FALSE;
7907 else if (mstop > stop)
7908 {
7909 ddp2 = DenseDiagNew();
7910 ddp2->dim = ddp->dim;
7911 ddp2->starts = (Int4Ptr)MemNew((ddp->dim)*sizeof(Int4));
7912 ddp2->id = SeqIdDupList(ddp->id);
7913 ddp2->strands = (Uint1Ptr)MemNew((ddp->dim)*sizeof(Uint1));
7914 ddp2->scores = ScoreDup(ddp->scores);
7915 for (i=0; i<ddp->dim; i++)
7916 {
7917 ddp2->starts[i] = ddp->starts[i] + ddp->len - (mstop - stop);
7918 ddp2->strands[i] = ddp->strands[i];
7919 }
7920 ddp2->len = mstop - stop;
7921 ddp->len = ddp->len - (mstop - stop);
7922 sap2 = SeqAlignNew();
7923 sap2->type = SAT_PARTIAL;
7924 sap2->segtype = SAS_DENSEG;
7925 sap2->segs = (Pointer)ddp2;
7926 sap2->next = sap->next;
7927 sap->next = sap2;
7928 AlnMgr2IndexSingleChildSeqAlign(sap2);
7929 return TRUE;
7930 }
7931 }
7932 } else
7933 return FALSE;
7934 return FALSE;
7935 }
7936
7937 /* SECTION 7 */
7938 /***************************************************************************
7939 *
7940 * AlnMgr2GetSubAlign retrieves a portion of an indexed alignment, from
7941 * 'from' to 'to' in the row coordinates specified, or if which_row is 0,
7942 * 'from' and 'to' are assumed to be alignment coordinates. If 'to' is -1,
7943 * the subalignment will go to the end of the specified row (or to the end
7944 * of the whole alignment). If the alignment is discontinuous and fill_in
7945 * is FALSE, the alignment will be returned as an SAS_DISC set, each piece
7946 * represented by a single alignment. If the alignment is discontinuous and
7947 * fill_in is TRUE, the unaligned regions will be added in to the alignment,
7948 * with all gaps in all other rows. If the alignment is continuous, it
7949 * doesn't matter whether fill_in is TRUE or FALSE. (SUBALIGN)
7950 *
7951 ***************************************************************************/
7952 NLM_EXTERN SeqAlignPtr AlnMgr2GetSubAlign(SeqAlignPtr sap, Int4 from, Int4 to, Int4 which_row, Boolean fill_in)
7953 {
7954 Int4 a;
7955 AMAlignIndex2Ptr amaip;
7956 AlnMsg2Ptr amp;
7957 Boolean anchored;
7958 Int4 currlen;
7959 DenseSegPtr dsp;
7960 DenseSegPtr dsp_new;
7961 Int4 from_aln;
7962 Int4 from_seq;
7963 Int4 i;
7964 SeqIdPtr id;
7965 Int4 j;
7966 Int4 k;
7967 Int4 len;
7968 Int4 lengthbit;
7969 Int4 minlen;
7970 Boolean more;
7971 Int4 n;
7972 Int4 numseg;
7973 Int4 numunaln;
7974 AMRowInfoPtr row;
7975 AMRowInfoPtr row_head;
7976 AMRowInfoPtr row_prev;
7977 AMRowInfoPtr PNTR rowheads;
7978 AMRowInfoPtr PNTR rows;
7979 SeqAlignPtr salp;
7980 SeqAlignPtr salp_head;
7981 SeqAlignPtr salp_prev;
7982 SeqAlignPtr sap_real;
7983 Int4 seg;
7984 Int4 start_seg;
7985 Uint1 strand;
7986 SeqAlignPtr subsalp;
7987 Int4 tmp;
7988 Int4 to_aln;
7989 Int4 to_seq;
7990 Int4 ustart;
7991 Int4 ustop;
7992
7993 if (sap == NULL || sap->saip == NULL)
7994 return NULL;
7995 len = AlnMgr2GetAlnLength(sap, FALSE);
7996 if (which_row == 0 && (to > len-1 || from < 0))
7997 return NULL;
7998 n = AlnMgr2GetNumRows(sap);
7999 if (which_row < 0 || which_row > n)
8000 return NULL;
8001 if (to == -1)
8002 {
8003 if (which_row == 0)
8004 to = len-1;
8005 else
8006 AlnMgr2GetNthSeqRangeInSA(sap, which_row, NULL, &to);
8007 }
8008 if (sap->saip->indextype == INDEX_CHILD)
8009 sap_real = sap;
8010 else if (sap->saip->indextype == INDEX_PARENT)
8011 {
8012 amaip = (AMAlignIndex2Ptr)(sap->saip);
8013 if (amaip->alnstyle == AM2_LITE)
8014 return NULL;
8015 sap_real = amaip->sharedaln;
8016 if (from == 0 && to == len-1 && !AlnMgr2IsSAPDiscAli(sap_real)) /* need whole aln -- take a shortcut! */
8017 return SeqAlignDup(sap_real);
8018 }
8019 if ((a = AlnMgr2FindAnchor(sap_real)) > 0)
8020 {
8021 anchored = TRUE;
8022 salp = SeqAlignDup(sap_real);
8023 AlnMgr2IndexSingleChildSeqAlign(salp);
8024 if (which_row == 0) /* anchor coordinates */
8025 {
8026 AlnMgr2GetNthSeqRangeInSA(salp, a, &from_seq, &to_seq);
8027 from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from_seq, a);
8028 to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to_seq, a);
8029 if (from_aln > to_aln)
8030 {
8031 tmp = from_aln;
8032 from_aln = to_aln;
8033 to_aln = tmp;
8034 }
8035 } else
8036 {
8037 from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8038 to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8039 if (from_aln > to_aln)
8040 {
8041 tmp = from_aln;
8042 from_aln = to_aln;
8043 to_aln = tmp;
8044 }
8045 }
8046 } else
8047 {
8048 anchored = FALSE;
8049 salp = sap_real;
8050 if (which_row == 0) /* alignment coordinates */
8051 {
8052 from_aln = from;
8053 to_aln = to;
8054 } else
8055 {
8056 from_aln = AlnMgr2MapBioseqToSeqAlign(salp, from, which_row);
8057 to_aln = AlnMgr2MapBioseqToSeqAlign(salp, to, which_row);
8058 if (from_aln > to_aln)
8059 {
8060 tmp = from_aln;
8061 from_aln = to_aln;
8062 to_aln = tmp;
8063 }
8064 }
8065 }
8066 rows = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8067 amp = AlnMsgNew2();
8068 seg = lengthbit = 0;
8069 currlen = 0;
8070 numunaln = 0;
8071 salp_head = salp_prev = NULL;
8072 while (AlnMgr2GetNextLengthBit(sap, &lengthbit, &seg))
8073 {
8074 if (currlen <= to_aln && seg >= 0 && currlen+lengthbit-1 >= from_aln)
8075 {
8076 numseg = AlnMgr2GetNumSegsInRange(sap, currlen, currlen+lengthbit-1, &start_seg);
8077 numunaln = 0;
8078 for (i=0; i<n; i++)
8079 {
8080 row_head = NULL;
8081 for (j=start_seg; j<numseg+start_seg; j++)
8082 {
8083 AlnMsgReNew2(amp);
8084 AlnMgr2GetNthSegmentRange(sap, j+1, &->from_aln, &->to_aln);
8085 amp->from_aln = MAX(amp->from_aln, from_aln);
8086 amp->to_aln = MIN(amp->to_aln, to_aln);
8087 amp->row_num = i+1;
8088 while ((more = AlnMgr2GetNextAlnBit(salp, amp)) == TRUE)
8089 {
8090 if (amp->right_interrupt != NULL && amp->right_interrupt->unalnlen > 0)
8091 numunaln++;
8092 row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8093 if (amp->type == AM_GAP)
8094 row->from = -1;
8095 else
8096 row->from = amp->from_row;
8097 row->len = amp->to_row - amp->from_row + 1;
8098 if (row_head != NULL)
8099 {
8100 row_prev->next = row;
8101 row_prev = row;
8102 } else
8103 row_head = row_prev = row;
8104 }
8105 }
8106 rows[i] = row_head;
8107 }
8108 }
8109 rowheads = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8110 for (i=0; i<n; i++)
8111 {
8112 rowheads[i] = rows[i];
8113 }
8114 while (rows[0] != NULL)
8115 {
8116 minlen = -1;
8117 for (i=0; i<n; i++)
8118 {
8119 if (rows[i]->len < minlen || minlen == -1)
8120 minlen = rows[i]->len;
8121 }
8122 for (i=0; i<n; i++)
8123 {
8124 if (rows[i]->len > minlen)
8125 {
8126 row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8127 row->next = rows[i]->next;
8128 rows[i]->next = row;
8129 if (rows[i]->from == -1)
8130 row->from = -1;
8131 else if (AlnMgr2GetNthStrand(salp, i) == Seq_strand_minus)
8132 {
8133 row->from = rows[i]->from;
8134 rows[i]->from = rows[i]->from + rows[i]->len - 1 - minlen;
8135 } else
8136 row->from = rows[i]->from + minlen;
8137 row->len = rows[i]->len - minlen;
8138 rows[i]->len = minlen;
8139 }
8140 rows[i] = rows[i]->next;
8141 }
8142 }
8143 for (i=0; i<n; i++)
8144 {
8145 rows[i] = rowheads[i];
8146 }
8147 MemFree(rowheads);
8148 dsp = DenseSegNew();
8149 row = rows[0];
8150 while (row != NULL)
8151 {
8152 dsp->numseg++;
8153 row = row->next;
8154 }
8155 if (fill_in)
8156 dsp->numseg += numunaln;
8157 dsp->dim = n;
8158 dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8159 dsp->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8160 dsp->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8161 j = 0;
8162 row = rows[0];
8163 while (row != NULL)
8164 {
8165 dsp->lens[j] = row->len;
8166 j++;
8167 row = row->next;
8168 }
8169 id = AlnMgr2GetNthSeqIdPtr(salp, 0);
8170 dsp->ids = id;
8171 for (i=0; i<n; i++)
8172 {
8173 if (i > 0)
8174 {
8175 id->next = AlnMgr2GetNthSeqIdPtr(salp, i+1);
8176 id = id->next;
8177 }
8178 row = rows[i];
8179 j = 0;
8180 strand = AlnMgr2GetNthStrand(salp, i+1);
8181 while (row != NULL)
8182 {
8183 dsp->starts[n*j + i] = row->from;
8184 dsp->strands[n*j + i] = strand;
8185 j++;
8186 row = row->next;
8187 }
8188 }
8189 if (fill_in)
8190 {
8191 for (i=0; i<n; i++)
8192 {
8193 AlnMgr2GetNthUnalignedForNthRow(sap, seg+1, i+1, &ustart, &ustop);
8194 if (ustart >= 0 && ustop >= ustart)
8195 {
8196 for (k=0; k<n; k++)
8197 {
8198 dsp->starts[n*j + k] = -1;
8199 dsp->strands[n*j + k] = dsp->strands[i];
8200 }
8201 dsp->starts[n*j + i] = ustart;
8202 j++;
8203 }
8204 }
8205 }
8206 subsalp = SeqAlignNew();
8207 subsalp->type = SAT_PARTIAL;
8208 subsalp->segtype = SAS_DENSEG;
8209 subsalp->dim = n;
8210 subsalp->segs = (Pointer)(dsp);
8211 for (i=0; i<n; i++)
8212 {
8213 row = rows[i];
8214 while (row != NULL)
8215 {
8216 row_prev = row->next;
8217 MemFree(row);
8218 row = row_prev;
8219 }
8220 }
8221 if (seg < 0)
8222 seg = -seg;
8223 currlen += lengthbit;
8224 seg++;
8225 if (salp_head != NULL)
8226 {
8227 salp_prev->next = subsalp;
8228 salp_prev = subsalp;
8229 } else
8230 salp_head = salp_prev = subsalp;
8231 }
8232 MemFree(rows);
8233 AlnMsgFree2(amp);
8234 if (fill_in && salp_head->next != NULL) /* stick subsalps together into a big aln */
8235 {
8236 j = 0;
8237 subsalp = salp_head;
8238 while (subsalp != NULL)
8239 {
8240 dsp = (DenseSegPtr)(subsalp->segs);
8241 j += dsp->numseg;
8242 subsalp = subsalp->next;
8243 }
8244 dsp_new = DenseSegNew();
8245 dsp_new->dim = n;
8246 dsp_new->numseg = j;
8247 dsp_new->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8248 dsp_new->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8249 dsp_new->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8250 subsalp = salp_head;
8251 k = 0;
8252 while (subsalp != NULL)
8253 {
8254 dsp = (DenseSegPtr)(subsalp->segs);
8255 for (j=0; j<dsp->numseg; j++)
8256 {
8257 dsp_new->lens[k] = dsp->lens[j];
8258 for (i=0; i<n; i++)
8259 {
8260 dsp_new->starts[k*n+i] = dsp->starts[j*n+i];
8261 dsp_new->strands[k*n+i] = dsp->strands[j*n+i];
8262 }
8263 k++;
8264 }
8265 subsalp = subsalp->next;
8266 }
8267 subsalp = SeqAlignNew();
8268 subsalp->type = SAT_PARTIAL;
8269 subsalp->segtype = SAS_DENSEG;
8270 subsalp->dim = n;
8271 subsalp->segs = (Pointer)(dsp_new);
8272 SeqAlignSetFree(salp_head);
8273 } else if (!fill_in && salp_head->next != NULL)
8274 {
8275 subsalp = SeqAlignNew();
8276 subsalp->segtype = SAS_DISC;
8277 subsalp->type = SAT_PARTIAL;
8278 subsalp->segs = (SeqAlignPtr)(salp_head);
8279 salp_prev = salp_head;
8280 while (salp_prev != NULL)
8281 {
8282 AMAlignIndexFreeEitherIndex(salp_prev);
8283 salp_prev = salp_prev->next;
8284 }
8285 } else /* if !salp_head->next */
8286 {
8287 subsalp = salp_head;
8288 subsalp->dim = AlnMgr2GetNumRows(subsalp);
8289 subsalp->type = SAT_PARTIAL;
8290 AMAlignIndexFreeEitherIndex(subsalp);
8291 }
8292 if (anchored)
8293 SeqAlignFree(salp);
8294 return subsalp;
8295 }
8296
8297 /***************************************************************************
8298 *
8299 * SECTION 8: Miscellaneous functions to compute useful information
8300 * about an alignment
8301 *
8302 ***************************************************************************/
8303 /* SECTION 8 */
8304 /***************************************************************************
8305 *
8306 * AlnMgr2ComputeScoreForSeqAlign computes an ad hoc numerical score for
8307 * an indexed alignment by computing a similarity score for the whole
8308 * alignment (residue pair by residue pair score, from a matrix for proteins
8309 * and identity for nucleotides) and then subtracting gap open and gap
8310 * extension penalties.
8311 *
8312 ***************************************************************************/
8313 NLM_EXTERN Int4 AlnMgr2ComputeScoreForSeqAlign(SeqAlignPtr sap)
8314 {
8315 AMFreqPtr afp;
8316 DenseSegPtr dsp;
8317 Int4 gaplen;
8318 Int4 i;
8319 Boolean is_prot;
8320 Int4 j;
8321 Int4 len;
8322 Int4 mismatch;
8323 Int4 numgaps;
8324 Int4 numseqs;
8325 Boolean open;
8326 Int4 res1;
8327 Int4 res2;
8328 Int4 score;
8329 Int4 seqscore;
8330
8331 if (sap->segtype == SAS_DISC)
8332 return -1;
8333 if (sap->saip == NULL)
8334 AlnMgr2IndexSingleChildSeqAlign(sap);
8335 is_prot = AlnMgr2IsItProtein(sap);
8336 len = AlnMgr2GetAlnLength(sap, FALSE);
8337 dsp = (DenseSegPtr)(sap->segs);
8338 numseqs = dsp->dim;
8339 open = FALSE;
8340 gaplen = 0;
8341 numgaps = 0;
8342 for (i=0; i<dsp->dim; i++)
8343 {
8344 for (j=0; j<dsp->numseg; j++)
8345 {
8346 if (dsp->starts[(dsp->dim)*j+i] == -1)
8347 {
8348 if (!open)
8349 {
8350 gaplen += dsp->lens[j];
8351 numgaps++;
8352 open = TRUE;
8353 } else
8354 gaplen += dsp->lens[j];
8355 } else
8356 open = FALSE;
8357 }
8358 }
8359 mismatch = 0;
8360 seqscore = 0;
8361 afp = AlnMgr2ComputeFreqMatrix(sap, 0, -1, 0);
8362 if (afp == NULL)
8363 return -1;
8364 for (i=0; i<afp->len; i++)
8365 {
8366 res1 = -1;
8367 res2 = -1;
8368 for (j=0; j<afp->size; j++)
8369 {
8370 if (afp->freq[j][i] == 1)
8371 {
8372 if (res1 == -1)
8373 res1 = j;
8374 else
8375 res2 = j;
8376 } else if (afp->freq[j][i] == 2)
8377 res1 = res2 = j;
8378 }
8379 if (res1 > 0 && res2 > 0) /* don't penalize gaps */
8380 seqscore += AlnMgr2GetScoreForPair(res1, res2, is_prot);
8381 }
8382 AMFreqFree(afp);
8383 score = seqscore + numgaps*AM_GAPOPEN + gaplen*AM_GAPEXT;
8384 return score;
8385 }
8386
8387 static Int4 AlnMgr2SeqPortRead(SeqPortPtr PNTR spp, Uint1Ptr buf, Int4Ptr bufpos, Int4 start, Int4 stop, Uint1 strand, Uint1 code, BioseqPtr bsp)
8388 {
8389 if (*spp == NULL) /* first call */ {
8390 if (strand == Seq_strand_minus){
8391 *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8392 *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8393 }
8394 else {
8395 *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8396 *bufpos = start;
8397 }
8398 }
8399 /* see if what we need is in current seqport or a new one is needed */
8400 else if ((start < *bufpos) || (start > *bufpos+AM_SEQPORTSIZE)
8401 || (stop < *bufpos) || (stop > *bufpos+AM_SEQPORTSIZE)) {
8402 SeqPortFree(*spp);
8403 if (strand == Seq_strand_minus) {
8404 *spp = SeqPortNew(bsp, MAX(0, stop-AM_SEQPORTSIZE), stop, strand, code);
8405 *bufpos = MAX(0, stop-AM_SEQPORTSIZE);
8406 }
8407 else {
8408 *spp = SeqPortNew(bsp, start, MIN(start+AM_SEQPORTSIZE, bsp->length-1), strand, code);
8409 *bufpos = start;
8410 }
8411 }
8412 return (SeqPortRead(*spp, buf, (MIN(start+AM_SEQPORTSIZE-1, stop)) - start+1));
8413 }
8414
8415 /* SECTION 8 */
8416 /***************************************************************************
8417 *
8418 * AlnMgr2ComputeFreqMatrix takes an indexed seqalign and returns a matrix
8419 * indicating nucleotide or amino acid frequency at each position of the
8420 * alignment. The matrix can be made over only a part of the alignment, if
8421 * from and to are nonzero, and if row is nonzero, from and to are taken
8422 * to be bioseq coordinates from that row (if row == 0 from and to are
8423 * assumed to be alignment coordinates).
8424 *
8425 ***************************************************************************/
8426 NLM_EXTERN AMFreqPtr AlnMgr2ComputeFreqMatrix(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8427 {
8428 AMFreqPtr afp;
8429 AlnMsg2Ptr amp;
8430 BioseqPtr bsp;
8431 Uint1 buf[AM_SEQPORTSIZE];
8432 Int4 bufpos;
8433 Uint1 code;
8434 Int4 counter;
8435 Int4 ctr;
8436 Int4 from_a;
8437 Int4 i;
8438 Boolean isna;
8439 Int4 j;
8440 Int4 l;
8441 Int4 len;
8442 Boolean more;
8443 Int4 n;
8444 Int4 numrows;
8445 Uint1 res;
8446 SeqIdPtr sip;
8447 SeqPortPtr spp;
8448 Int4 tmp;
8449 Int4 to_a;
8450
8451 if (sap == NULL || sap->saip == NULL || (from > to && to != -1))
8452 return NULL;
8453 numrows = AlnMgr2GetNumRows(sap);
8454 bufpos = -1;
8455 if (row > numrows || row < 0)
8456 return NULL;
8457 len = AlnMgr2GetAlnLength(sap, FALSE);
8458 if (to >= len)
8459 return NULL;
8460 if (to == -1)
8461 to = len-1;
8462 sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8463 bsp = BioseqLockById(sip);
8464 if (bsp != NULL)
8465 isna = ISA_na(bsp->mol);
8466 else
8467 {
8468 SeqIdFree(sip);
8469 return NULL;
8470 }
8471 BioseqUnlock(bsp);
8472 SeqIdFree(sip);
8473 if (isna)
8474 code = Seq_code_ncbi4na;
8475 else
8476 code = Seq_code_ncbistdaa;
8477 afp = (AMFreqPtr)MemNew(sizeof(AMFreq));
8478 afp->len = len;
8479 if (isna)
8480 afp->size = AM_NUCSIZE;
8481 else
8482 afp->size = AM_PROTSIZE;
8483 afp->freq = (Int4Ptr PNTR)MemNew((afp->size)*sizeof(Int4Ptr));
8484 for (i=0; i<afp->size; i++)
8485 {
8486 afp->freq[i] = (Int4Ptr)MemNew((afp->len)*sizeof(Int4));
8487 }
8488 amp = AlnMsgNew2();
8489 if (row != 0)
8490 {
8491 from_a = AlnMgr2MapBioseqToSeqAlign(sap, from, row);
8492 to_a = AlnMgr2MapBioseqToSeqAlign(sap, to, row);
8493 if (from_a > to_a)
8494 {
8495 tmp = to_a;
8496 to_a = from_a;
8497 from_a = tmp;
8498 }
8499 } else
8500 {
8501 from_a = from;
8502 to_a = to;
8503 }
8504 for (i=0; i<numrows; i++)
8505 {
8506 spp = NULL;
8507 AlnMsgReNew2(amp);
8508 amp->from_aln = from_a;
8509 amp->to_aln = to_a;
8510 amp->row_num = i+1;
8511 j = 0;
8512 while ((more = AlnMgr2GetNextAlnBit(sap, amp)))
8513 {
8514 if (amp->type == AM_GAP)
8515 {
8516 for (n=0; n<(amp->to_row - amp->from_row+1); n++)
8517 {
8518 afp->freq[0][j] = afp->freq[0][j]+1;
8519 j++;
8520 }
8521 } else if (amp->type == AM_SEQ)
8522 {
8523 sip = AlnMgr2GetNthSeqIdPtr(sap, i+1);
8524 bsp = BioseqLockById(sip);
8525 if (bsp != NULL) {
8526 for (l=amp->from_row; l<=amp->to_row; l+=AM_SEQPORTSIZE)
8527 {
8528 counter = AlnMgr2SeqPortRead(&spp, buf, &bufpos, l, MIN(l+AM_SEQPORTSIZE, amp->to_row), amp->strand, code, bsp);
8529 ctr = 0;
8530 while (ctr < counter)
8531 {
8532 res = buf[ctr];
8533 if (isna)
8534 {
8535 if (res == 1 || res == 2)
8536 afp->freq[res][j]++;
8537 else if (res == 4)
8538 afp->freq[3][j]++;
8539 else if (res == 8)
8540 afp->freq[4][j]++;
8541 else
8542 afp->freq[5][j]++;
8543 } else
8544 afp->freq[res][j]++;
8545 j++;
8546 ctr++;
8547 }
8548 }
8549 BioseqUnlock(bsp);
8550 }
8551 SeqIdFree(sip);
8552 }
8553 }
8554 SeqPortFree(spp);
8555 }
8556 AlnMsgFree2(amp);
8557 return afp;
8558 }
8559
8560 /* SECTION 8 */
8561 /***************************************************************************
8562 *
8563 * AlnMgr2GetScoreForPair assigns scores to nucleotide and protein residue
8564 * pairs. Nucleotide pairs are scored according to a standard mismatch
8565 * penalty, and amino acid pairs are scored according to the BLOSUM62
8566 * matrix below. This matrix has been rearranged so that the rows and
8567 * columns appear in alphabetical order, so that it directly correlates
8568 * to the NCBIstdaa alphabet (with a minus-one difference).
8569 *
8570 ***************************************************************************/
8571 static Int4 AlnMgr2GetScoreForPair(Int4 res1, Int4 res2, Boolean is_prot)
8572 {
8573 Int4 matrix[24][24] = {
8574 {4, -2, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, 0, -2, -1, -4},
8575 {-2, 4, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -1, -3, 1, -4},
8576 {0, -3, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, -2, -3, -4},
8577 {-2, 4, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -1, -3, 1, -4},
8578 {-1, 1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8579 {-2, -3, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, -1, 3, -3, -4},
8580 {0, -1, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -1, -3, -2, -4},
8581 {-2, 0, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, -1, 2, 0, -4},
8582 {-1, -3, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, -1, -3, -4},
8583 {-1, 0, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -1, -2, 1, -4},
8584 {-1, -4, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, -1, -3, -4},
8585 {-1, -3, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, -1, -1, -4},
8586 {-2, 3, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -1, -2, 0, -4},
8587 {-1, -2, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -2, -3, -1, -4},
8588 {-1, 0, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, -1, 3, -4},
8589 {-1, -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -1, -2, 0, -4},
8590 {1, 0, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, 0, -2, 0, -4},
8591 {0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, 0, -2, -1, -4},
8592 {0, -3, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, -1, -2, -4},
8593 {-3, -4, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, -2, 2, -3, -4},
8594 {0, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, -1, -1, -4},
8595 {-2, -3, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, -1, 7, -2, -4},
8596 {-1, 1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -1, -2, 4, -4},
8597 {-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1}};
8598
8599 if (is_prot) /* protein->use BLOSUM62 matrix */
8600 return matrix[res1-1][res2-1];
8601 else /* nucleotide->use match score/mismatch penalty */
8602 {
8603 if (res1 == 0 || res2 == 0) /* don't count gaps */
8604 return 0;
8605 if (res1 == res2)
8606 return 1;
8607 else
8608 return -3;
8609 }
8610 }
8611
8612 /* SECTION 8 */
8613 /***************************************************************************
8614 *
8615 * AlnMgr2IsItProtein takes an indexed alignment and quickly decides if
8616 * it's a protein or nucleotide alignment, returning TRUE for protein.
8617 *
8618 ***************************************************************************/
8619 NLM_EXTERN Boolean AlnMgr2IsItProtein(SeqAlignPtr sap)
8620 {
8621 BioseqPtr bsp;
8622 Boolean is_na;
8623 SeqIdPtr sip;
8624
8625 if (sap == NULL || sap->saip == NULL)
8626 return FALSE;
8627 sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
8628 bsp = BioseqLockById(sip);
8629 if (bsp == NULL)
8630 return FALSE;
8631 is_na = ISA_na(bsp->mol);
8632 SeqIdFree(sip);
8633 BioseqUnlock(bsp);
8634 return (!is_na);
8635 }
8636
8637 /***************************************************************************
8638 *
8639 * SECTION 9: Sorting functions and other algorithms to help order
8640 * alignments for various purposes
8641 *
8642 ***************************************************************************/
8643
8644 /* SECTION 9 */
8645 static int LIBCALLBACK AMCompareStarts(VoidPtr ptr1, VoidPtr ptr2)
8646 {
8647 AMBitty2Ptr bit1;
8648 AMBitty2Ptr bit2;
8649
8650 if (ptr1 != NULL && ptr2 != NULL)
8651 {
8652 bit1 = (AMBitty2Ptr)ptr1;
8653 bit2 = (AMBitty2Ptr)ptr2;
8654 if (bit1->num2 < bit2->num2)
8655 return -1;
8656 else if (bit1->num2 > bit2->num2)
8657 return 1;
8658 else if (bit1->num3 > bit2->num3) /* compare aln lengths */
8659 return -1;
8660 else if (bit1->num3 < bit2->num3)
8661 return 1;
8662 else
8663 return 0;
8664 }
8665 return 0;
8666 }
8667
8668 /* SECTION 9 */
8669 /***************************************************************************
8670 *
8671 * AlnMgr2SortAlnSetByNthRowPos takes an indexed parent alignment and sorts
8672 * all the child alignments along the row indicated. If the indicated row
8673 * is aligned on the plus strand, the alignments are sorted from smaller
8674 * to larger coordinates along that row; otherwise they are sorted in
8675 * reverse order.
8676 *
8677 ***************************************************************************/
8678 NLM_EXTERN void AlnMgr2SortAlnSetByNthRowPos(SeqAlignPtr sap, Int4 row)
8679 {
8680 AMAlignIndex2Ptr amaip;
8681 AMBitty2Ptr bit;
8682 Int4 i;
8683 SeqAlignPtr PNTR saparray;
8684 Uint1 strand;
8685
8686 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_PARENT)
8687 return;
8688 amaip = (AMAlignIndex2Ptr)(sap->saip);
8689 bit = (AMBitty2Ptr)MemNew((amaip->numsaps)*sizeof(AMBitty2));
8690 saparray = (SeqAlignPtr PNTR)MemNew((amaip->numsaps)*sizeof(SeqAlignPtr));
8691 for (i=0; i<amaip->numsaps; i++)
8692 {
8693 bit[i].num1 = i;
8694 AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], row, &bit[i].num2, NULL);
8695 bit[i].num3 = AlnMgr2GetAlnLength(amaip->saps[i], FALSE);
8696 strand = AlnMgr2GetNthStrand(amaip->saps[i], row);
8697 if (strand == Seq_strand_minus)
8698 bit[i].num2 = -bit[i].num2;
8699 saparray[i] = amaip->saps[i];
8700 }
8701 HeapSort(bit, amaip->numsaps, sizeof(AMBitty2), AMCompareStarts);
8702 for (i=0; i<amaip->numsaps; i++)
8703 {
8704 amaip->saps[i] = saparray[bit[i].num1];
8705 }
8706 MemFree(saparray);
8707 MemFree(bit);
8708 if (amaip->alnstyle != AM2_LITE)
8709 AlnMgr2ReIndexSeqAlign(sap);
8710 }
8711
8712
8713 /***************************************************************************
8714 *
8715 * SECTION 10: Basic alignment operations
8716 *
8717 ***************************************************************************/
8718
8719 /***************************************************************************
8720 *
8721 * AlnMgr2MergeTwoAlignments takes two alignments, with identical rows in
8722 * the same order (otherwise it rejects the alignments), and merges them
8723 * into a single alignment. If there is unaligned space between the two
8724 * alignments and this space is the same length for every row, the function
8725 * aligns those sequences; it rejects alignments when the unaligned spaces
8726 * are different sizes. The function returns a newly allocated alignment.
8727 *
8728 ***************************************************************************/
8729 NLM_EXTERN SeqAlignPtr AlnMgr2MergeTwoAlignments(SeqAlignPtr sap1_orig, SeqAlignPtr sap2_orig)
8730 {
8731 Int4 c;
8732 DenseSegPtr dsp;
8733 DenseSegPtr dsp1;
8734 DenseSegPtr dsp2;
8735 DenseSegPtr dsp_new;
8736 Int4 i;
8737 Int4 j;
8738 Int4 n1;
8739 Int4 n2;
8740 SeqAlignPtr sap1;
8741 SeqAlignPtr sap2;
8742 SeqAlignPtr sap_new;
8743 SeqIdPtr sip1;
8744 SeqIdPtr sip2;
8745 Int4 start1;
8746 Int4 start2;
8747 Int4 stop1;
8748 Int4 stop2;
8749 Uint1 strand1;
8750 Uint1 strand2;
8751 SeqAlignPtr tmp;
8752
8753 if (sap1_orig == NULL || sap2_orig == NULL)
8754 return NULL;
8755 if (sap1_orig->next != NULL)
8756 {
8757 AlnMgr2IndexSeqAlign(sap1_orig);
8758 sap1 = AlnMgr2GetSubAlign(sap1_orig, 0, -1, 0, TRUE);
8759 } else
8760 sap1 = SeqAlignDup(sap1_orig);
8761 if (sap2_orig->next != NULL)
8762 {
8763 AlnMgr2IndexSeqAlign(sap2_orig);
8764 sap2 = AlnMgr2GetSubAlign(sap2_orig, 0, -1, 0, TRUE);
8765 } else
8766 sap2 = SeqAlignDup(sap2_orig);
8767 AlnMgr2IndexSingleChildSeqAlign(sap1);
8768 AlnMgr2IndexSingleChildSeqAlign(sap2);
8769 n1 = AlnMgr2GetNumRows(sap1);
8770 n2 = AlnMgr2GetNumRows(sap2);
8771 if (n1 != n2)
8772 {
8773 SeqAlignFree(sap1);
8774 SeqAlignFree(sap2);
8775 return NULL;
8776 }
8777 /* put the alignments in order by the first row */
8778 AlnMgr2GetNthSeqRangeInSA(sap1, 1, &start1, &stop1);
8779 AlnMgr2GetNthSeqRangeInSA(sap2, 1, &start2, &stop2);
8780 strand1 = AlnMgr2GetNthStrand(sap1, 1);
8781 if (strand1 == Seq_strand_minus)
8782 {
8783 if (stop2 > start1)
8784 {
8785 tmp = sap1;
8786 sap1 = sap2;
8787 sap2 = tmp;
8788 }
8789 } else
8790 {
8791 if (stop1 > start2)
8792 {
8793 tmp = sap1;
8794 sap1 = sap2;
8795 sap2 = tmp;
8796 }
8797 }
8798 dsp1 = (DenseSegPtr)(sap1->segs);
8799 dsp2 = (DenseSegPtr)(sap2->segs);
8800 sip1 = dsp1->ids;
8801 sip2 = dsp2->ids;
8802 while (sip1 != NULL && sip2 != NULL)
8803 {
8804 if (SeqIdComp(sip1, sip2) != SIC_YES)
8805 {
8806 SeqAlignFree(sap1);
8807 SeqAlignFree(sap2);
8808 return NULL;
8809 }
8810 sip1 = sip1->next;
8811 sip2 = sip2->next;
8812 }
8813 dsp = DenseSegNew();
8814 dsp->dim = n1;
8815 dsp->numseg = 1;
8816 dsp->starts = (Int4Ptr)MemNew(n1*sizeof(Int4));
8817 dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
8818 dsp->strands = (Uint1Ptr)MemNew(n1*sizeof(Int4));
8819 for (i=0; i<n1; i++)
8820 {
8821 strand1 = AlnMgr2GetNthStrand(sap1, i+1);
8822 strand2 = AlnMgr2GetNthStrand(sap2, i+1);
8823 if (strand1 != strand2)
8824 {
8825 DenseSegFree(dsp);
8826 SeqAlignFree(sap1);
8827 SeqAlignFree(sap2);
8828 return NULL;
8829 }
8830 AlnMgr2GetNthSeqRangeInSA(sap1, i+1, &start1, &stop1);
8831 AlnMgr2GetNthSeqRangeInSA(sap2, i+1, &start2, &stop2);
8832 if (strand1 == Seq_strand_minus)
8833 {
8834 dsp->starts[i] = stop2 + 1;
8835 if (i == 0)
8836 dsp->lens[0] = start2 - (stop2 + 1);
8837 else
8838 {
8839 if (start2 - (stop2 + 1) != dsp->lens[0])
8840 {
8841 DenseSegFree(dsp);
8842 SeqAlignFree(sap1);
8843 SeqAlignFree(sap2);
8844 return NULL;
8845 }
8846 }
8847 } else
8848 {
8849 dsp->starts[i] = stop1 + 1;
8850 if (i == 0)
8851 dsp->lens[0] = start2 - (stop1 + 1);
8852 else
8853 {
8854 if (start2 - (stop1 + 1) != dsp->lens[0])
8855 {
8856 DenseSegFree(dsp);
8857 SeqAlignFree(sap1);
8858 SeqAlignFree(sap2);
8859 return NULL;
8860 }
8861 }
8862 }
8863 dsp->strands[i] = strand1;
8864 }
8865 if (dsp->lens[0] == 0)
8866 {
8867 DenseSegFree(dsp);
8868 dsp = NULL;
8869 }
8870 dsp_new = DenseSegNew();
8871 dsp_new->numseg = dsp1->numseg + dsp2->numseg;
8872 if (dsp != NULL)
8873 dsp_new->numseg++;
8874 dsp_new->dim = n1;
8875 dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
8876 dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
8877 dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
8878 for (i=0; i<dsp1->numseg; i++)
8879 {
8880 for (j=0; j<n1; j++)
8881 {
8882 dsp_new->starts[i*n1 + j] = dsp1->starts[i*n1 + j];
8883 dsp_new->strands[i*n1 + j] = dsp1->strands[i*n1 + j];
8884 }
8885 dsp_new->lens[i] = dsp1->lens[i];
8886 }
8887 c = dsp1->numseg;
8888 if (dsp != NULL)
8889 {
8890 for (j=0; j<n1; j++)
8891 {
8892 dsp_new->starts[c*n1 + j] = dsp->starts[j];
8893 dsp_new->strands[c*n1 + j] = dsp->strands[j];
8894 }
8895 dsp_new->lens[c] = dsp->lens[0];
8896 c++;
8897 }
8898 for (i=0; i<dsp2->numseg; i++, c++)
8899 {
8900 for (j=0; j<n1; j++)
8901 {
8902 dsp_new->starts[c*n1 + j] = dsp2->starts[i*n1 + j];
8903 dsp_new->strands[c*n1 + j] = dsp2->strands[i*n1 + j];
8904 }
8905 dsp_new->lens[c] = dsp2->lens[i];
8906 }
8907 dsp_new->ids = SeqIdDupList(dsp1->ids);
8908 sap_new = SeqAlignNew();
8909 sap_new->segtype = SAS_DENSEG;
8910 sap_new->dim = n1;
8911 sap_new->segs = (Pointer)dsp_new;
8912 if (dsp != NULL)
8913 DenseSegFree(dsp);
8914 SeqAlignFree(sap1);
8915 SeqAlignFree(sap2);
8916 return sap_new;
8917 }
8918
8919 /* SECTION 10 */
8920 /***************************************************************************
8921 *
8922 * AlnMgr2ExtendToCoords takes an indexed child seqalign and blindly extends
8923 * it to the coordinates specified on the given row. If other rows are too
8924 * short to allow this extension, the alignment is extended as far as
8925 * possible. If to == -1 the extension goes to the end of the sequence
8926 * specified.
8927 *
8928 ***************************************************************************/
8929 NLM_EXTERN void AlnMgr2ExtendToCoords(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row)
8930 {
8931 BioseqPtr bsp;
8932 Int4 diff1;
8933 Int4 diff2;
8934 DenseSegPtr dsp;
8935 DenseSegPtr dsp_new;
8936 Int4 i;
8937 Int4 j;
8938 Int4 numrows;
8939 Int4 numseg;
8940 Int4 prediff1;
8941 Int4 prediff2;
8942 Int4 seg;
8943 SeqIdPtr sip;
8944 Int4 start;
8945 Int4 stop;
8946
8947 if (sap == NULL || sap->saip == NULL || sap->saip->indextype != INDEX_CHILD)
8948 return;
8949 numrows = AlnMgr2GetNumRows(sap);
8950 if (row < 1 || row > numrows)
8951 return;
8952