|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/falign.c |
source navigation diff markup identifier search freetext search file search |
1 static char const rcsid[] = "$Id: falign.c,v 6.3 2003/05/30 17:25:36 coulouri Exp $";
2
3 /* falign.c
4 * ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 *
28 * File Name: falign.c
29 *
30 * Author: Jinghui Zhang and Kun-Mao Chao
31 *
32 * Version Creation Date: 5/24/95
33 *
34 *
35 * File Description:
36 * Part 1 of sim2: generate the K best fragment alignments.
37 * This program reads two DNA sequences from files and finds their local
38 * similarities. It can detect alignments containing gaps, while striking a
39 * balance between speed and sensitivity. The algorithm is based on three ideas:
40 * (1) techniques developed by Galil and coworkers that considerably improve
41 * time efficiency,
42 * (2) modification of an approach of Hirschberg that dramatically improves
43 * space efficiency, and
44 * (3) techniques that allow K best alignments to be determined efficiently
45 * for K > 1.
46 *
47 * R or r gives the score for replacement.
48 * O or o gives the score for opening up a gap.
49 * E or e gives the score for extending one more symbol in a gap.
50 * W or w gives the word size (<=MAXW).
51 * (cost of a gap of length L is O + E*L
52 * defaults: R = 0.2, O = 6.0, E = 0.2, W = 5)
53 *
54 * ODDITY: any character other than A, C, G or T is changed to 'A'.
55 *
56 * Modifications:
57 * --------------------------------------------------------------------------
58 * Date Name Description of modification
59 *
60 * $Log: falign.c,v $
61 * Revision 6.3 2003/05/30 17:25:36 coulouri
62 * add rcsid
63 *
64 * Revision 6.2 2000/11/02 20:56:17 vakatov
65 * Renamed "join()" to "static s_Join()" to avoid name conflicts
66 *
67 * Revision 6.1 1998/06/16 18:30:55 kans
68 * fixed unix compiler warnings
69 *
70 * Revision 6.0 1997/08/25 18:53:08 madden
71 * Revision changed to 6.0
72 *
73 * Revision 5.1 1997/03/14 15:45:14 kans
74 * undefine DEFAULT_W if previously defined in simutil.h
75 *
76 * Revision 5.0 1996/05/28 13:43:15 ostell
77 * Set to revision 5.0
78 *
79 * Revision 4.0 1995/07/26 13:50:15 ostell
80 * force revision to 4.0
81 *
82 * Revision 1.2 1995/05/25 20:21:54 zjing
83 * add header
84 *
85 *
86 *
87 * ==========================================================================
88 */
89
90
91 #ifndef _SIMUTIL_
92 #include <simutil.h>
93 #endif
94
95 #ifndef _LIST_
96 #include <list.h>
97 #endif
98
99 #ifndef _DB_LIST_
100 #include <db_list.h>
101 #endif
102
103
104 #define ALPHABET_SIZE 4 /* number of letters in the alphabet */
105 #define DEFAULT_IR 2 /* default score for replacement (0.2) */
106 #define DEFAULT_IO 60 /* default score for opening up a gap (6.0) */
107 #define DEFAULT_IE 2 /* default score for gap extension (0.2) */
108 #ifdef DEFAULT_W
109 #undef DEFAULT_W /* in simutil.h */
110 #endif
111 #define DEFAULT_W 6 /* default word size */
112 #define MAXW 8 /* maximum word size */
113 #define GC_ROW 200 /* garbage collection frequency */
114 #define MININT -99999 /* minimum value */
115
116
117 #define INTER_ADD(c) \
118 { if (!cintersect[in_row]) \
119 cintersect[in_row] = idb_newList(); \
120 db_insert(cintersect[in_row],c,NULL); \
121 ccol_int[c] = in_row; \
122 }
123
124 typedef struct string5 {
125 Int4 pos; /* starting position */
126 struct string5 PNTR link; /* previous location with same 5-mer */
127 } string5;
128
129 typedef struct cut_frag { /* cross fragment */
130 fragment PNTR ff; /* pointer to the forward fragment */
131 fragment PNTR bf; /* pointer to the backward fragment */
132 } cut_frag;
133
134 typedef struct frag_pt { /* endpoint of the fragment */
135 fragment PNTR fp; /* pointer to the fragment */
136 Int4 col; /* end column of the fragment */
137 struct frag_pt PNTR link;
138 } frag_pt;
139
140 Int4Ptr bucket; /* hash table for rowwise computation */
141 Int4Ptr rcbucket; /* hash table for columnwise computation */
142 Int4Ptr bls;
143 Int4Ptr rcbls;
144 Int4Ptr blink;
145 Int4Ptr rcblink;
146
147
148 CharPtr seq1, seq2;
149
150
151 Int4 W; /* word size */
152 Int4 BUCKETS; /* hash table size (ALPHABET_SIZE ** W) */
153 Int4 len1, len2; /* lengths of sequences */
154 Int4 offset, offset1; /* offset to make index positive */
155 Int4 number_frags; /* number of fragments generated */
156 Int4 deleted_frags; /* number of fragments deleted */
157 Int4 output_frags; /* number of fragments output */
158 Int4 total_frags; /* total number of fragments between two sequences */
159 Int4 old_i_start, old_i_end, old_j_start, old_j_end; /* WEBB */
160
161 /* [sb1,sb2]X[se1,se2] is the area for aligning globally */
162 Int4 sb1, se1, sb2, se2, mid1, mid2, cur_end, cur_end1;
163
164 frag_pt PNTR PNTR end_row; /* lists of endpoints row by row */
165 frag_pt PNTR PNTR end_col; /* lists of endpoints column by column */
166 fragment PNTR PNTR diag_prev; /* store the previous fragment on the same diagonal */
167 fragment PNTR PNTR bdiag_prev; /* store the previous fragment on the same diagonal */
168 Int4Ptr col_int; /* store the row position of intersection point */
169 Int4Ptr rccol_int; /* store the row position of intersection point */
170 CharPtr diag_flag; /* 1 if some intersection point is on the diagonal */
171 CharPtr rcdiag_flag; /* 1 if some intersection point is on the diagonal */
172 idb_list PNTR intersect; /* list for intersection points */
173 idb_list PNTR rcintersect; /* list for intersection points */
174 ulist PNTR used_frag; /* mark used fragments */
175 cut_frag PNTR PNTR cut; /* store cross fragments */
176
177 Int4 K; /* output K best local alignments */
178 Int4 IR,IO,IE;
179 /* cost of a gap of length L is IO/DIGIT + (IE/DIGIT)*L */
180
181 fragment PNTR pfo, PNTR pbo; /* pseudo origins */
182 fragment PNTR pff, PNTR pbf; /* pseudo aligning fragments */
183 Int4 best_score = 0; /* best score in the recomputation */
184 list ri; /* right influence list */
185 list bri; /* backward right influence list */
186 db_list li_c, li_d; /* left influence lists */
187 db_list bli_c, bli_d; /* backward left influence lists */
188
189 typedef struct kbclass { /* k best equivalent classes */
190 fragment PNTR first; /* first fragment in the alignment */
191 fragment PNTR last; /* last fragment in the alignment */
192 Int4 score; /* best score for the class */
193 Int4 t; /* [t,b]X[l,r] contains all points whose */
194 Int4 b; /* score >= mw in the class */
195 Int4 l;
196 Int4 r;
197 } kbclass;
198 kbclass PNTR PNTR S; /* store k best classes */
199 Int4 lastS = 0; /* store number of classes */
200 Int4 curS = 0; /* locality usage */
201 Int4 minS = 0; /* store the class whose best score is mw */
202 Int4 mw = 0; /* minimum score of all classes */
203
204 /* Prototype - added on Feb. 22, 1994 */
205 void table_init(void);
206 void fbld_table(void);
207 void bbld_table(void);
208 Int4 rcbld_table(Int4Ptr, Int4Ptr, Int4Ptr, CharPtr, Int4, Int4);
209 Int4 ex_table(Int4Ptr, Int4Ptr, Int4Ptr, CharPtr, Int4, Int4);
210 void free_table(Int4Ptr);
211 void update_active(list, db_list, db_list, Int4);
212 void rcupdate_active(list, db_list, db_list, Int4);
213 void rotupdate_active(frag_pt PNTR, Int4, Int4);
214 void cotupdate_active(frag_pt PNTR, Int4, Int4);
215 void free_all(Int4, Int4, Int4, fragment PNTR PNTR, list, db_list, db_list);
216 void scan(void);
217 void fscan_init(void);
218 void bscan_init(void);
219 void fscan(void);
220 void bscan(void);
221 void found(Int4, Int4);
222 void rfscan(Int4, Int4, Int4, Int4);
223 void rffound(Int4, Int4);
224 void enter_endrow(fragment PNTR);
225 void enter_endcol(fragment PNTR);
226 void ffound(Int4, Int4);
227 void bfound(Int4, Int4);
228 void setup(void);
229 void global_setup(fragment PNTR, fragment PNTR);
230 void global(fragment PNTR, fragment PNTR, Int4, SeqLocPtr, SeqLocPtr, SeqAlignPtr);
231 static Int4 s_Join(fragment PNTR, fragment PNTR);
232 void recom_setup(kbclass PNTR);
233 void recompute(kbclass PNTR);
234 Int4 disjoint(void);
235 void syn_area(void);
236 void ex_row(void);
237 void ex_col(void);
238 void rcfound(Int4, Int4);
239 void trcscan(void);
240 void trcfound(Int4, Int4);
241 void rclocal(fragment PNTR, Int4, Int4, list, db_list, db_list);
242 void lactive_ext(fragment PNTR);
243 void ractive_ext(fragment PNTR);
244 void rcfree(void);
245 SeqAlignPtr printS(kbclass PNTR, SeqLocPtr, SeqLocPtr, SeqAlignPtr);
246 SeqAlignPtr class_print(SeqLocPtr, SeqLocPtr);
247 kbclass PNTR findmaxS(void);
248 kbclass PNTR findS(fragment PNTR);
249 void set_class(Int4, fragment PNTR);
250 void replaceS(fragment PNTR);
251 void insertS(fragment PNTR);
252 Int4 sizeS(void);
253 Int4 minscoreS(void);
254 Int4 enterS(fragment PNTR, Int4, Int4);
255 void local(fragment PNTR);
256 void align(fragment PNTR, fragment PNTR PNTR, list, db_list, db_list, fragment PNTR);
257 void calign(fragment PNTR, fragment PNTR PNTR, list, db_list, db_list, fragment PNTR);
258 void malign(fragment PNTR, fragment PNTR PNTR, list, db_list, db_list, fragment PNTR);
259 void malign1(fragment PNTR, list, db_list, db_list, fragment PNTR);
260 frag_pt PNTR tailor(frag_pt PNTR, Int4);
261 frag_pt PNTR otr_survive(frag_pt PNTR, Int4);
262 void rotr_merge(frag_pt PNTR);
263 Int4 rotr_win(fragment PNTR, fragment PNTR);
264 void cotr_merge(frag_pt PNTR);
265 Int4 cotr_win(fragment PNTR, fragment PNTR);
266 frag_pt PNTR otl_survive(frag_pt PNTR, Int4);
267 void otl_merge(frag_pt PNTR, Int4, Int4, db_list, db_list, idb_list PNTR, Int4 PNTR, Char PNTR, Int4 (PNTR)(fragment PNTR), Int4 (PNTR)(fragment PNTR, fragment PNTR));
268 Int4 rotl_win(fragment PNTR, fragment PNTR);
269 Int4 cotl_win(fragment PNTR, fragment PNTR);
270 void r_survive(frag_pt PNTR);
271 void r_merge(list, frag_pt PNTR, Int4);
272 void r_add_point(list, fragment PNTR, Int4);
273 void resolve(db_list, db_list, idb_list PNTR, Char PNTR, Int4 PNTR, Int4,Int4 (PNTR)(fragment PNTR, Int4, Int4));
274 void l_merge(db_list, db_list, idb_list PNTR, Char PNTR, Int4 PNTR, frag_pt PNTR, Int4, Int4 (PNTR)(fragment PNTR, Int4, Int4));
275 void rcr_merge(list, frag_pt PNTR, Int4);
276 void rcr_add_point(list, fragment PNTR, Int4);
277 Int4 topo_win(fragment PNTR, fragment PNTR);
278 Int4 pt_lscore(fragment PNTR, Int4, Int4);
279 Int4 rcpt_lscore(fragment PNTR, Int4, Int4);
280 Int4 weight(fragment PNTR, fragment PNTR);
281 Int4 gap_cost(Int4);
282 Int4 diag(fragment PNTR);
283 Int4 rcdiag(fragment PNTR);
284 Int4 pos_diag(fragment PNTR);
285 Int4 rcpos_diag(fragment PNTR);
286 void pf_set(fragment PNTR, Int4, Int4);
287 Int4 power(Int4, Int4);
288 void decre(fragment PNTR);
289 void incre(fragment PNTR);
290 DenseDiagPtr frag_print(fragment PNTR, SeqLocPtr, SeqLocPtr, DenseDiagPtr);
291 void Xfrag_print(fragment PNTR);
292
293 /* falign - building fragment alignments (Feb., 1994) */
294 /**********************************************************************
295 *
296 * falign(slp1, slp2, A, B, w, k, r, o, e)
297 * compute the diagnols between slp1 and slp2. It is the first
298 * part of sim2. returns the Dense-Diag as Seq-align
299 * A, B: string of Char for slp1 and slp2. Can be set to NULL as well
300 * w: word size
301 * k: top K alignment
302 * r: penalty for replacement
303 * o: penalty for open gap
304 * e: penalty for gap extension
305 *
306 **********************************************************************/
307 SeqAlignPtr falign(SeqLocPtr slp1, SeqLocPtr slp2, CharPtr A, CharPtr B, Int4 w, Int4 k, Int4 r, Int4 o, Int4 e)
308 {
309 Int4 i, j;
310 SeqAlignPtr align;
311
312 if(A == NULL)
313 seq1 = make_sim_seq(slp1, TRUE, NULL);
314 else
315 seq1 = A;
316 len1 = SeqLocLen(slp1);
317
318 if(B == NULL)
319 seq2 = make_sim_seq(slp2, TRUE, NULL);
320 else
321 seq2 = B;
322 len2 = SeqLocLen(slp2);
323
324 lastS = 0; /* store number of classes */
325 curS = 0; /* locality usage */
326 minS = 0; /* store the class whose best score is mw */
327 mw = 0;
328 W = w;
329 K = k;
330 IR = r;
331 IO = o;
332 IE = e;
333 if (IR > 2*IE)
334 fatal("repl_cost should not be greater than 2*gap_extend");
335 if (K<0 || IR<0 || IO<0 || IE<0)
336 fatal("error! all input should be nonnegative.");
337
338 S = (kbclass **)ckalloc((K+1)*sizeof(kbclass *));
339
340
341 BUCKETS = power(ALPHABET_SIZE,W);
342 j = BUCKETS*sizeof(Int4);
343 bucket = (Int4 *)ckalloc(j);
344 bls = (Int4 *)ckalloc(j);
345 blink = (Int4 *)ckalloc((len2+1)*sizeof(Int4));
346 rcbucket = (Int4 *)ckalloc(j);
347 rcbls = (Int4 *)ckalloc(j);
348 rcblink = (Int4 *)ckalloc((len1+1)*sizeof(Int4));
349
350 for (i=0; i<BUCKETS; ++i){
351 bucket[i] = -1;
352 rcbucket[i] = -1;
353 }
354
355 used_frag = (ulist *)ckalloc((len1+1)*sizeof(ulist));
356 cut = (cut_frag **)ckalloc((len2+1)*sizeof(cut_frag *));
357 end_row = (frag_pt **)ckalloc((len1+1)*sizeof(frag_pt *));
358 end_col = (frag_pt **)ckalloc((len2+1)*sizeof(frag_pt *));
359 col_int = (Int4 *)ckalloc((len2+1)*sizeof(Int4));
360 rccol_int = (Int4 *)ckalloc((len1+1)*sizeof(Int4));
361 intersect = (idb_list *)ckalloc((len1+1)*sizeof(idb_list));
362 rcintersect = (idb_list *)ckalloc((len2+1)*sizeof(idb_list));
363 diag_prev = (fragment **)ckalloc((len1+len2+1)*sizeof(fragment *));
364 bdiag_prev = (fragment **)ckalloc((len1+len2+1)*sizeof(fragment *));
365 diag_flag = (Char *)ckalloc((len1+len2+1)*sizeof(Char));
366 rcdiag_flag = (Char *)ckalloc((len1+len2+1)*sizeof(Char));
367 for (i=0; i<=len1; ++i) {
368 used_frag[i] = NULL;
369 end_row[i] = NULL;
370 intersect[i] = NULL;
371 }
372 for (i=0; i<=len2; ++i) {
373 cut[i] = NULL;
374 end_col[i] = NULL;
375 col_int[i] = 0;
376 rcintersect[i] = NULL;
377 }
378 for (i=len1+len2; i>=0; --i) {
379 diag_prev[i] = NULL;
380 bdiag_prev[i] = NULL;
381 diag_flag[i] = 0;
382 rcdiag_flag[i] = 0;
383 }
384 /* align sequence with itself */
385 if (slp1 == slp2) {
386 used_frag[0] = unewList();
387 uinsert(used_frag[0],0);
388 }
389
390 #ifdef STATS
391 /*
392 fprintf(out_fp, "#:lav\n\n");
393
394 fprintf(out_fp, "d {\n\"CHAO output with parameters:\n");
395 fprintf(out_fp, "W = %ld, ",W);
396 fprintf(out_fp, "K = %ld, M = 1, I = V = %.1f,\nO = %.1f, E = %.1f\"\n}\n",
397 K, ((FloatLo)IR)/DIGIT, ((FloatLo)IO)/DIGIT, ((FloatLo)IE)/DIGIT);
398 fprintf(out_fp, "s {\n \"%s\" 1 %ld\n \"%s\" 1 %ld\n}\n", sname1, len1,
399 sname2, len2);
400 */
401 #endif
402 /*
403 fprintf(out_fp, "%s %ld\n%s %ld\n", sname1, len1, sname2, len2);
404 fprintf(out_fp, "%ld\n",K);
405 */
406 setup();
407 scan();
408 #ifdef STATS
409 /*
410 fprintf(out_fp, "\nnumber_frags= %d\n",number_frags);
411 */
412 #endif
413 free_all(len1-1,0,len2-1,diag_prev,ri,li_c,li_d);
414 align = class_print(slp1, slp2);
415 #ifdef STATS
416 /*
417 fprintf(out_fp, "\nnumber_frags= %ld\n",number_frags);
418 fprintf(out_fp, "deleted_frags= %ld\n",deleted_frags);
419 */
420 #endif
421 /* free space before exit (Feb. 10, 1994) */
422 list_NIL_free();
423 dblist_NIL_free();
424 MemFree(bucket);
425 MemFree(bls);
426 MemFree(blink);
427 MemFree(rcbucket);
428 MemFree(rcbls);
429 MemFree(rcblink);
430 /*
431 MemFree(seq1);
432 MemFree(seq2);
433 */
434 for (i=0; i<=len1; ++i) {
435 if (used_frag[i]) ufreeList(used_frag[i]);
436 }
437 MemFree(used_frag);
438 MemFree(cut);
439 MemFree(end_row);
440 MemFree(end_col);
441 MemFree(col_int);
442 MemFree(rccol_int);
443 MemFree(intersect);
444 MemFree(rcintersect);
445 MemFree(diag_prev);
446 MemFree(bdiag_prev);
447 MemFree(diag_flag);
448 MemFree(rcdiag_flag);
449 MemFree(pfo);
450 MemFree(pbo);
451 MemFree(pff);
452 MemFree(pbf);
453
454 MemFree(S);
455 if(A == NULL)
456 MemFree(seq1);
457 if(B== NULL)
458 MemFree(seq2);
459
460 return align;
461 }
462
463
464 Int4 encoding[128];
465 Int4 mask;
466
467 /* hash table initialization */
468 void table_init(void)
469 {
470
471 /* perform initializations */
472 encoding['C'] = 1;
473 encoding['G'] = 2;
474 encoding['T'] = 3;
475 mask = (1 << (W+W-2)) - 1;
476
477 }
478
479 /* build hash table */
480 void fbld_table(void)
481 {
482 Int4 ecode;
483 Int4 i, pos;
484 CharPtr q, endp;
485 Int4 s5;
486
487 q = seq2 + sb2 - 1;
488 ecode = 0L;
489 for (i = 1; i < W; ++i)
490 ecode = (ecode << 2) + encoding[*++q];
491 pos = sb2;
492 endp = seq2 + se2;
493 while (++q <= endp) {
494 ecode = ((ecode & mask) << 2) + encoding[*q];
495 s5 = pos++;
496 if (bucket[ecode] != -1) {
497 blink[bls[ecode]] = s5;
498 } else {
499 bucket[ecode] = s5;
500 }
501 blink[s5] = -1;
502 bls[ecode] = s5;
503 }
504 }
505
506 /* build hash table */
507 void bbld_table(void)
508 {
509 Int4 ecode;
510 Int4 i, pos;
511 CharPtr q, endp;
512 Int4 s5;
513
514 q = seq2 + se2 + 1;
515 ecode = 0L;
516 for (i = 1; i < W; ++i)
517 ecode = (ecode << 2) + encoding[*--q];
518 pos = se2;
519 endp = seq2 + sb2;
520 while (--q >= endp) {
521 ecode = ((ecode & mask) << 2) + encoding[*q];
522 s5 = pos--;
523 if (bucket[ecode] != -1) {
524 blink[bls[ecode]] = s5;
525 } else {
526 bucket[ecode] = s5;
527 }
528 blink[s5] = -1;
529 bls[ecode] = s5;
530 }
531 }
532
533 /* build hash table */
534 Int4 rcbld_table(Int4Ptr buc, Int4Ptr cbls, Int4Ptr cblink, CharPtr seq, Int4 b, Int4 e)
535 {
536 Int4 ecode;
537 Int4 i, pos;
538 CharPtr q, endp;
539 Int4 s5;
540
541 q = seq + e + 1;
542 ecode = 0L;
543 for (i = 1; i < W; ++i)
544 ecode = (ecode << 2) + encoding[*--q];
545 pos = e;
546 endp = (b-W+1 >= 0)? seq + b-W+1 : seq;
547 while (--q >= endp) {
548 ecode = ((ecode & mask) << 2) + encoding[*q];
549 s5 = pos--;
550 if (buc[ecode] != -1) {
551 cblink[cbls[ecode]] = s5;
552 } else {
553 buc[ecode] = s5;
554 }
555 cblink[s5] = -1;
556 cbls[ecode] = s5;
557 }
558 return(ecode);
559 }
560
561 /* ex_table - extend the hash table */
562 Int4 ex_table(Int4Ptr buc, Int4Ptr cbls, Int4Ptr cblink, CharPtr seq, Int4 pos,Int4 ecode)
563 {
564 CharPtr q;
565
566 q = seq + (pos-W+1);
567 ecode = ((ecode & mask) << 2) + encoding[*q];
568 if (buc[ecode] != -1) {
569 cblink[cbls[ecode]] = pos;
570 } else {
571 buc[ecode] = pos;
572 }
573 cblink[pos] = -1;
574 cbls[ecode] = pos;
575 return(ecode);
576 }
577
578 /* free_table - free the memory occupied by the table */
579 void free_table(Int4Ptr buc)
580 {
581 Int4 i;
582
583 for (i = 0; i < BUCKETS; ++i) {
584 buc[i] = -1;
585 }
586 }
587
588 /* update_active - update active region */
589 void update_active(list cri, db_list cli_c, db_list cli_d, Int4 l)
590 {
591 /* update left influence active regions */
592 db_reset_pos(cli_c);
593 db_reset_pos(cli_d);
594 resolve(cli_c,cli_d,intersect,diag_flag,col_int,l,pt_lscore);
595 db_reset_pos(cli_c);
596 db_reset_pos(cli_d);
597 l_merge(cli_c,cli_d,intersect,diag_flag,col_int,end_row[l],l,pt_lscore);
598
599 /* update right influence active regions */
600 reset_pos(cri);
601 r_survive(end_row[l]);
602 r_merge(cri,end_row[l],l);
603
604 if (intersect[l]) {
605 idb_freeList(intersect[l]);
606 intersect[l] = NULL;
607 }
608 end_row[l] = NULL;
609 }
610
611 /* rcupdate_active - update active region */
612 void rcupdate_active(list cri, db_list cli_c, db_list cli_d, Int4 l)
613 {
614 /* update left influence active regions */
615 db_reset_pos(cli_c);
616 db_reset_pos(cli_d);
617 resolve(cli_c,cli_d,rcintersect,rcdiag_flag,rccol_int,l,rcpt_lscore);
618 db_reset_pos(cli_c);
619 db_reset_pos(cli_d);
620 l_merge(cli_c,cli_d,rcintersect,rcdiag_flag,rccol_int,end_col[l],l,rcpt_lscore);
621
622 /* update right influence active regions */
623 reset_pos(cri);
624 r_survive(end_col[l]);
625 rcr_merge(cri,end_col[l],l);
626
627 if (rcintersect[l]) {
628 idb_freeList(rcintersect[l]);
629 rcintersect[l] = NULL;
630 }
631 end_col[l] = NULL;
632 }
633
634 /* rotupdate_active - update active regions orthogonally */
635 void rotupdate_active(frag_pt PNTR rcopy, Int4 col, Int4 row)
636 {
637 frag_pt PNTR r;
638
639 reset_pos(ri);
640 r = otr_survive(rcopy,col);
641 rotr_merge(r);
642 r = otl_survive(rcopy,col);
643 otl_merge(r,col,row,li_c,li_d,rcintersect,rccol_int,
644 rcdiag_flag,rcpos_diag,rotl_win);
645 }
646
647 /* cotupdate_active - update active regions orthogonally */
648 void cotupdate_active(frag_pt PNTR rcopy, Int4 col, Int4 row)
649 {
650 frag_pt PNTR r;
651
652 reset_pos(bri);
653 r = otr_survive(rcopy,col);
654 cotr_merge(r);
655 r = otl_survive(rcopy,col);
656 otl_merge(r,col,row,bli_c,bli_d,intersect,col_int,
657 diag_flag,pos_diag,cotl_win);
658 }
659
660 /* free_all - free all useless space */
661 void free_all(Int4 i, Int4 j1, Int4 j2, fragment PNTR cdiag_prev[], list cri, db_list cli_c, db_list cli_d)
662 {
663 Int4 l, m;
664 fragment PNTR df;
665
666 m = len1 + (j2 - i);
667 for (l = len1+(j1-i); l <= m; ++l)
668 if (df = cdiag_prev[l]) {
669 decre(df);
670 cdiag_prev[l] = NULL;
671 }
672 freeList(cri);
673 db_freeList(cli_c);
674 db_freeList(cli_d);
675 }
676
677 void scan(void) /* scan sequence 1 */
678 {
679 Int4 ecode;
680 Int4 i, l, m, pos;
681 CharPtr q;
682 Int4 s5;
683 fragment PNTR df;
684
685 q = seq1 - 1;
686 ecode = 0L;
687 for (i = 1; i < W; ++i)
688 ecode = (ecode << 2) + encoding[*++q];
689 pos = 0;
690 while (*++q) {
691 reset_pos(ri);
692 db_reset_pos(li_c);
693 db_reset_pos(li_d);
694 ecode = ((ecode & mask) << 2) + encoding[*q];
695 for (s5 = bucket[ecode]; s5 != -1; s5 = blink[s5])
696 found(pos, s5);
697 /* free part of space occupied by diag_prev[] */
698 if (pos % GC_ROW == 0) {
699 m = len1 - pos + len2;
700 for (l = len1-pos; l <= m; ++l) {
701 if ((df = diag_prev[l]) &&
702 (df->score - IR * (pos - df->i - df->k) < 0)) {
703 decre(df);
704 diag_prev[l] = NULL;
705 }
706 }
707 }
708 /* update active regions */
709 update_active(ri,li_c,li_d,pos);
710 if (df = diag_prev[len1+len2-1-pos]) {
711 diag_prev[len1+len2-1-pos] = NULL;
712 decre(df);
713 }
714 ++pos;
715 }
716 for (l = pos; l < len1; ++l) update_active(ri,li_c,li_d,l);
717 m = len2; /* len1 + (len2-len1) */
718 for (l = len1+(len2-1-pos); l>m; --l)
719 if (df=diag_prev[l]) {
720 decre(df);
721 diag_prev[l]=NULL;
722 }
723 }
724
725 void fscan_init(void)
726 {
727 Int4 i,j;
728
729 ri = newList();
730 li_c = db_newList();
731 li_d = db_newList();
732 for (i=sb2; i<=se2; ++i) col_int[i] = 0;
733 j = len1 + (se2-sb1);
734 for (i = len1 + (sb2-mid1); i <= j; ++i) diag_flag[i]=0;
735 cur_end = mid1;
736 }
737
738 void bscan_init(void)
739 {
740 Int4 i,j;
741
742 bri = newList();
743 bli_c = db_newList();
744 bli_d = db_newList();
745 j=len2-sb2;
746 for (i=len2-se2; i<=j; ++i) col_int[i] = 0;
747 j=len2-(sb2-se1);
748 for (i = len2-(se2-mid2); i<=j; ++i) diag_flag[i]=0;
749 cur_end = len1 - mid2;
750 }
751
752 void fscan(void) /* scan sequence 1 */
753 {
754 Int4 ecode;
755 Int4 i, pos;
756 CharPtr q;
757 Int4 s5;
758 fragment PNTR df;
759
760 q = seq1 + sb1 - 1;
761 ecode = 0L;
762 for (i = 1; i < W; ++i)
763 ecode = (ecode << 2) + encoding[*++q];
764 pos = sb1;
765 while (*++q && pos <= mid1) {
766 reset_pos(ri);
767 db_reset_pos(li_c);
768 db_reset_pos(li_d);
769 ecode = ((ecode & mask) << 2) + encoding[*q];
770 for (s5 = bucket[ecode]; s5 != -1; s5 = blink[s5])
771 ffound(pos, s5);
772 /* update active regions */
773 update_active(ri,li_c,li_d,pos);
774 if ((pos != mid1) && (df = diag_prev[len1+se2-pos])) {
775 decre(df);
776 diag_prev[len1+se2-pos] = NULL;
777 }
778
779 ++pos;
780 }
781 }
782
783 void bscan(void) /* scan sequence 1 */
784 {
785 Int4 ecode;
786 Int4 i, pos, dd;
787 CharPtr q;
788 Int4 s5;
789 fragment PNTR df;
790
791 q = seq1 + se1 + 1;
792 ecode = 0L;
793 for (i = 1; i < W; ++i)
794 ecode = (ecode << 2) + encoding[*--q];
795 pos = se1;
796 while (*--q && pos >= mid2) {
797 reset_pos(bri);
798 db_reset_pos(bli_c);
799 db_reset_pos(bli_d);
800 ecode = ((ecode & mask) << 2) + encoding[*q];
801 for (s5 = bucket[ecode]; s5 != -1; s5 = blink[s5])
802 bfound(pos, s5);
803 /* update active regions */
804 update_active(bri,bli_c,bli_d,len1-pos);
805 dd = len2-sb2+pos;
806 if ((pos != mid2) && (df = bdiag_prev[dd])) {
807 bdiag_prev[dd] = NULL;
808 decre(df);
809 }
810
811 --pos;
812 }
813 while (pos >= mid2) {
814 /* update active regions */
815 update_active(bri,bli_c,bli_d,len1-pos);
816 dd = len2-sb2+pos;
817 if ((pos != mid2) && (df = bdiag_prev[dd])) {
818 bdiag_prev[dd] = NULL;
819 decre(df);
820 }
821 --pos;
822 }
823 }
824
825 /* found a matching 5-mer at (i,j) */
826 void found(Int4 i, Int4 j)
827 {
828 Int4 k;
829 fragment PNTR f;
830
831 if (i && j && seq1[i-1] == seq2[j-1])
832 return;
833 if (usearch(used_frag[i],j)) return;
834 for (k = W; seq1[i+k] && seq2[j+k] && seq1[i+k] == seq2[j+k]; ++k)
835 ;
836 f = (fragment *) ckalloc(sizeof(fragment));
837 f->i = i;
838 f->j = j;
839 f->k = k;
840 f->ref = 0;
841
842 local(f);
843
844 enter_endrow(f);
845 ++number_frags;
846 ++total_frags;
847 }
848
849 /* rfscan - recompute the given area: [t,b]X[l,r] */
850 void rfscan(Int4 t, Int4 b, Int4 l, Int4 r)
851 {
852 Int4 ecode;
853 Int4 i, pos;
854 CharPtr q;
855 Int4 s5;
856 fragment PNTR df;
857
858 if (b-t+1 < W || r-l+1 < W) return;
859 sb1 = t;
860 se1 = mid1 = b;
861 sb2 = l;
862 se2 = r;
863 free_table(bucket);
864 fbld_table();
865 fscan_init();
866
867 q = seq1 + sb1 - 1;
868 ecode = 0L;
869 for (i = 1; i < W; ++i)
870 ecode = (ecode << 2) + encoding[*++q];
871 pos = sb1;
872 while (*++q && pos <= mid1) {
873 reset_pos(ri);
874 db_reset_pos(li_c);
875 db_reset_pos(li_d);
876 ecode = ((ecode & mask) << 2) + encoding[*q];
877 for (s5 = bucket[ecode]; s5 != -1; s5 = blink[s5])
878 rffound(pos, s5);
879 /* update active regions */
880 update_active(ri,li_c,li_d,pos);
881 if (df = diag_prev[len1+se2-pos]) {
882 decre(df);
883 diag_prev[len1+se2-pos] = NULL;
884 }
885 ++pos;
886 }
887 while (pos<=mid1) {
888 update_active(ri,li_c,li_d,pos);
889 if (df = diag_prev[len1+se2-pos]) {
890 decre(df);
891 diag_prev[len1+se2-pos] = NULL;
892 }
893 ++pos;
894 }
895 free_all(mid1,sb2,se2,diag_prev,ri,li_c,li_d);
896 }
897
898 /* rffound a matching 5-mer at (i,j) */
899 void rffound(Int4 i, Int4 j)
900 {
901 Int4 k;
902 fragment PNTR f;
903
904 if (i && j && seq1[i-1] == seq2[j-1])
905 return;
906 if (usearch(used_frag[i],j)) return;
907 for (k = W; seq1[i+k] && seq2[j+k] && seq1[i+k] == seq2[j+k]; ++k);
908 if (i+k-1 > se1 || j+k-1 > se2) return; /* out of boundary */
909 f = (fragment *) ckalloc(sizeof(fragment));
910 f->i = i;
911 f->j = j;
912 f->k = k;
913 f->ref = 0;
914
915 local(f);
916
917 enter_endrow(f);
918 ++number_frags;
919 }
920
921 /* enter_endrow - enter end_point of the given fragment */
922 void enter_endrow(fragment PNTR f)
923 {
924 Int4 row,col;
925 frag_pt PNTR e, PNTR e1, PNTR e2;
926
927 row = f->i + f->k - 1;
928 col = f->j + f->k - 1;
929 /* put each fragment row by row according to its end point */
930 e = (frag_pt *) ckalloc(sizeof(frag_pt));
931 e->fp = f;
932 e->col = col;
933
934 if (end_row[row]) { /* end_row[row] isn't empty */
935 e1 = end_row[row];
936 e2 = e1->link;
937 if (e1->col > e->col) { /* e contains the smallest column # */
938 e->link = e1;
939 end_row[row] = e;
940 } else {
941 while (e2 && e2->col < e->col) {
942 e1 = e2;
943 e2 = e2->link;
944 }
945 e1->link = e;
946 e->link = e2;
947 }
948 } else { /* end_row[row] is empty */
949 e->link = end_row[row];
950 end_row[row] = e;
951 }
952 }
953
954 /* enter_endcol - enter end_point of the given fragment */
955 void enter_endcol(fragment PNTR f)
956 {
957 Int4 row,col;
958 frag_pt PNTR e, PNTR e1, PNTR e2;
959
960 row = f->i + f->k - 1;
961 col = f->j + f->k - 1;
962 /* put each fragment row by row according to its end point */
963 e = (frag_pt *) ckalloc(sizeof(frag_pt));
964 e->fp = f;
965 e->col = row;
966 if (end_col[col]) { /* end_col[col] isn't empty */
967 e1 = end_col[col];
968 e2 = e1->link;
969 if (e1->col > e->col) { /* e contains the smallest column # */
970 e->link = e1;
971 end_col[col] = e;
972 } else {
973 while (e2 && e2->col < e->col) {
974 e1 = e2;
975 e2 = e2->link;
976 }
977 e1->link = e;
978 e->link = e2;
979 }
980 } else { /* end_col[col] is empty */
981 e->link = end_col[col];
982 end_col[col] = e;
983 }
984 }
985
986 /* ffound a matching 5-mer at (i,j) */
987 void ffound(Int4 i, Int4 j)
988 {
989 Int4 k;
990 fragment PNTR f;
991
992 if (i && j && seq1[i-1] == seq2[j-1])
993 return;
994 if (usearch(used_frag[i],j)) return;
995 for (k = W; seq1[i+k] && seq2[j+k] && seq1[i+k] == seq2[j+k]; ++k);
996 if (i+k-1 > se1 || j+k-1 > se2) return; /* out of boundary */
997 f = (fragment *) ckalloc(sizeof(fragment));
998 f->i = i;
999 f->j = j;
1000 f->k = k;
1001 f->ref = 0;
1002
1003 ++number_frags;
1004 if (i+k-1>mid1) {
1005 cut[mid1+j-i] = (cut_frag *) ckalloc(sizeof(cut_frag));
1006 cut[mid1+j-i]->ff = f;
1007 calign(pfo,diag_prev,ri,li_c,li_d,f);
1008 return;
1009 } else align(pfo,diag_prev,ri,li_c,li_d,f);
1010
1011 enter_endrow(f);
1012 }
1013
1014 /* bfound a matching 5-mer at (i,j) */
1015 void bfound(Int4 i, Int4 j)
1016 {
1017 Int4 k;
1018 fragment PNTR f;
1019
1020 if (seq1[i+1] && seq2[j+1] && seq1[i+1] == seq2[j+1])
1021 return;
1022 /*
1023 for (k = W; seq1[i-k] && seq2[j-k] && seq1[i-k] == seq2[j-k]; ++k);
1024 */
1025 for (k = W; i-k>=0 && j-k>=0 && seq1[i-k] == seq2[j-k]; ++k);
1026 if (usearch(used_frag[i-k+1],j-k+1)) return;
1027 if (i-k+1 < sb1 || j-k+1 < sb2) return; /* out of boundary */
1028 f = (fragment *) ckalloc(sizeof(fragment));
1029 f->i = len1 - i;
1030 f->j = len2 - j;
1031 f->k = k;
1032 f->ref = 0;
1033
1034 ++number_frags;
1035 if (i-k+1 < mid2) {
1036 cut[mid1+(j-i)]->bf = f;
1037 calign(pbo,bdiag_prev,bri,bli_c,bli_d,f);
1038 return;
1039 } else align(pbo,bdiag_prev,bri,bli_c,bli_d,f);
1040
1041 enter_endrow(f);
1042 }
1043
1044 /* setup - initialization */
1045 void setup(void)
1046 {
1047 /* skip lists initialization */
1048 init();
1049 db_init();
1050
1051 /* hash table initialization */
1052 table_init();
1053
1054 /* pseudo fragments initialization */
1055 pfo = (fragment *) ckalloc(sizeof(fragment));
1056 pfo->k = 0;
1057 pfo->score = 0;
1058 pfo->bgf = pfo;
1059 pbo = (fragment *) ckalloc(sizeof(fragment));
1060 pbo->k = 0;
1061 pbo->score = 0;
1062 pbo->bgf = pbo;
1063 pff = (fragment *) ckalloc(sizeof(fragment));
1064 pff->k = 0;
1065 pbf = (fragment *) ckalloc(sizeof(fragment));
1066 pbf->k = 0;
1067
1068 /* initialization */
1069 ri = newList();
1070 li_c = db_newList();
1071 li_d = db_newList();
1072 sb2 = 0;
1073 se2 = len2 - 1;
1074 cur_end = len1 - 1;
1075 fbld_table();
1076 pfo->ref = 1;
1077 pbo->ref = 1;
1078 offset = offset1 = len1;
1079 number_frags = 0;
1080 deleted_frags = 0;
1081 output_frags = 0;
1082 total_frags = 0;
1083 }
1084
1085 void global_setup(fragment PNTR bf, fragment PNTR ef)
1086 {
1087 sb1 = bf->i + bf->k;
1088 se1 = ef->i - 1;
1089 sb2 = bf->j + bf->k;
1090 se2 = ef->j - 1;
1091 mid1 = (sb1 + se1) / 2;
1092 mid2 = mid1 + 1;
1093 pfo->i = sb1;
1094 pfo->j = sb2;
1095 pbo->i = len1-se1;
1096 pbo->j = len2-se2;
1097 }
1098
1099 /* global - find the best global alignment in a given area */
1100 void global(fragment PNTR bf, fragment PNTR ef, Int4 gs, SeqLocPtr slp1, SeqLocPtr slp2, SeqAlignPtr align)
1101 {
1102 Int4 i, ll;
1103 fragment PNTR fdf, PNTR bdf;
1104 fragment PNTR fsb, PNTR fse = NULL, PNTR bsb, PNTR bse = NULL, PNTR mf;
1105 Int4 js = 0;
1106
1107 global_setup(bf,ef);
1108 if ((se1-sb1+1 < W) || (se2-sb2+1 < W))
1109 return;
1110 free_table(bucket);
1111 fbld_table();
1112 fscan_init();
1113 fscan();
1114 free_table(bucket);
1115 bbld_table();
1116 bscan_init();
1117 bscan();
1118 pf_set(pff,mid2,se2+1);
1119 pf_set(pbf,len1-mid1,len2-se2);
1120 mf = NULL;
1121
1122 reset_pos(bri);
1123 db_reset_pos(bli_c);
1124 db_reset_pos(bli_d);
1125 ll = se2-sb2+1;
1126 for (i = 0; i <= ll; ++i) {
1127 if (cut[pff->j]) {
1128 fdf = cut[pff->j]->ff;
1129 bdf = cut[pff->j]->bf;
1130 js = s_Join(fdf,bdf)- (Int4) (DIGIT*(bdf->k));
1131
1132 if (js == gs) {
1133 mf = fdf;
1134 incre(mf);
1135 fse = fdf->bgf;
1136 incre(fse);
1137 bse = bdf->bgf;
1138 incre(bse);
1139 break;
1140 }
1141 }
1142 reset_pos(ri);
1143 db_reset_pos(li_c);
1144 db_reset_pos(li_d);
1145 malign(pfo,diag_prev,ri,li_c,li_d,pff);
1146 malign(pbo,bdiag_prev,bri,bli_c,bli_d,pbf);
1147 js = s_Join(pff,pbf);
1148 if ((pff->j-pff->i != (pff->bgf)->j-(pff->bgf)->i) &&
1149 (pbf->j-pbf->i != (pbf->bgf)->j-(pbf->bgf)->i))
1150 js = js + IO;
1151 if (js == gs) {
1152 fse = pff->bgf;
1153 bse = pbf->bgf;
1154 incre(fse);
1155 incre(bse);
1156 break;
1157 }
1158 --(pff->j);
1159 ++(pbf->j);
1160 }
1161 if (js != gs) { /* type 2 connection */
1162 reset_pos(bri);
1163 db_reset_pos(bli_c);
1164 db_reset_pos(bli_d);
1165 pf_set(pff,mid2,se2+1);
1166 pf_set(pbf,len1-mid1,len2-se2);
1167 ll = se2-sb2+1;
1168 for (i = 0; i <= ll; ++i) {
1169 reset_pos(ri);
1170 db_reset_pos(li_c);
1171 db_reset_pos(li_d);
1172 malign1(pfo,ri,li_c,li_d,pff);
1173 malign1(pbo,bri,bli_c,bli_d,pbf);
1174 js = s_Join(pff,pbf);
1175 if ((pff->j-pff->i != (pff->bgf)->j-(pff->bgf)->i) &&
1176 (pbf->j-pbf->i != (pbf->bgf)->j-(pbf->bgf)->i))
1177 js = js + IO;
1178 if (js == gs) {
1179 fse = pff->bgf;
1180 bse = pbf->bgf;
1181 incre(fse);
1182 incre(bse);
1183 break;
1184 }
1185 --(pff->j);
1186 ++(pbf->j);
1187 }
1188 if (js != gs) {
1189 #ifdef STATS
1190 /*
1191 fprintf(out_fp, "wrong!!! \n");
1192 fprintf(out_fp, "sb1=%ld se1=%ld mid1=%ld \n",sb1,se1,mid1);
1193 fprintf(out_fp, "gs=%ld, js=%ld\n",gs,js);
1194 */
1195 #endif
1196 free_all(mid1,sb2,se2,diag_prev,ri,li_c,li_d);
1197 free_all(len1-mid2,len2-se2,len2-sb2,bdiag_prev,bri,bli_c,bli_d);
1198 return;
1199 }
1200 }
1201 fsb = fse->bgf;
1202 if (fse == pfo) fse = NULL;
1203 if (fsb == pfo || fsb == fse) fsb = NULL;
1204 bsb = bse->bgf;
1205 if (bse == pbo) bse = NULL;
1206 else {
1207 bse->i = len1 - bse->i - bse->k + 1;
1208 bse->j = len2 - bse->j - bse->k + 1;
1209 }
1210 if (bsb == pbo || bsb == bse) bsb = NULL;
1211 else {
1212 bsb->i = len1 - bsb->i - bsb->k + 1;
1213 bsb->j = len2 - bsb->j - bsb->k + 1;
1214 }
1215 for (i=sb2; i<=se2; ++i)
1216 if (cut[i]) {
1217 decre(cut[i]->ff);
1218 decre(cut[i]->bf);
1219 MemFree(cut[i]);
1220 cut[i]=NULL;
1221 }
1222 free_all(mid1,sb2,se2,diag_prev,ri,li_c,li_d);
1223 free_all(len1-mid2,len2-se2,len2-sb2,bdiag_prev,bri,bli_c,bli_d);
1224 if (fsb) {
1225 align->segs = frag_print(fsb, slp1, slp2, align->segs);
1226 gs = fse->score-fsb->score-(Int4) (DIGIT*fse->k);
1227 global(fsb,fse,gs, slp1, slp2, align);
1228 }
1229 if (fse) {
1230 align->segs = frag_print(fse, slp1, slp2, align->segs);
1231 decre(fse);
1232 }
1233 if (mf) {
1234 align->segs = frag_print(mf, slp1, slp2, align->segs);
1235 decre(mf);
1236 }
1237 if (bse) {
1238 align->segs = frag_print(bse, slp1, slp2, align->segs);
1239 if (bsb) {
1240 gs = bse->score-bsb->score-(Int4) (DIGIT*bse->k);
1241 global(bse,bsb,gs, slp1, slp2, align);
1242 align->segs = frag_print(bsb, slp1, slp2, align->segs);
1243 }
1244 decre(bse);
1245 }
1246 }
1247
1248 /* join - return the cost after join */
1249 static Int4 s_Join(fragment PNTR ff, fragment PNTR bf)
1250 {
1251 return(ff->score + bf->score);
1252 }
1253
1254 Int4 tt,bb,ll,rr,tt1,ll1; /* current recomputation area */
1255 Int4 ecol,erow;
1256 Int4 row_code, col_code;
1257
1258 /* recom_setup - initialization for recomputation */
1259 void recom_setup(kbclass PNTR sp)
1260 {
1261 Int4 i,j;
1262
1263 erow = tt1 = tt = sp->t;
1264 bb = sp->b;
1265 ecol = ll1 = ll = sp->l;
1266 rr = sp->r;
1267 free_table(bucket);
1268 free_table(rcbucket);
1269 col_code = rcbld_table(bucket,bls,blink,seq2,ll1,rr);
1270 row_code = rcbld_table(rcbucket,rcbls,rcblink,seq1,tt1,bb);
1271
1272 /* Lists initialization for column-ward recomputation */
1273 ri = newList();
1274 li_c = db_newList();
1275 li_d = db_newList();
1276
1277 /* Lists initialization for row-ward recomputation */
1278 bri = newList();
1279 bli_c = db_newList();
1280 bli_d = db_newList();
1281
1282 for (i=len2-rr; i<=len2; ++i) col_int[i] = 0;
1283 for (i=len1-bb; i<=len1; ++i) rccol_int[i] = 0;
1284 j = len2+bb;
1285 for (i=len2-rr; i<=j; ++i) diag_flag[i] = 0;
1286 j = len1+rr;
1287 for (i=len1-bb; i<=j; ++i) rcdiag_flag[i] = 0;
1288
1289 }
1290
1291 /* recompute - recompute the scores of the area affected by removing sp */
1292 void recompute(kbclass PNTR sp)
1293 {
1294 recom_setup(sp);
1295 offset = cur_end = len2; /* len2 - 0 */
1296 offset1 = cur_end1 = len1; /* len1 - 0 */
1297 trcscan();
1298 for (;;) {
1299 while(tt>erow || ll>ecol) {
1300 offset = cur_end = len1;
1301 offset1 = cur_end1 = len2;
1302 while (tt>erow) ex_row();
1303 offset = cur_end = len2;
1304 offset1 = cur_end1 = len1;
1305 while (ll>ecol) ex_col();
1306 }
1307 if (disjoint() || (tt==0 && ll==0)) break;
1308 }
1309 rcfree();
1310 offset = offset1 = len1;
1311 #ifdef STATS
1312 /*
1313 fprintf(out_fp, "before tt=%ld, bb=%ld, ll=%ld, rr=%ld\n",tt,bb,ll,rr);
1314 fprintf(out_fp, "best_score= %ld\n",best_score);
1315 */
1316 #endif
1317 if (best_score > minscoreS()) {
1318 syn_area();
1319 #ifdef STATS
1320 /*
1321 fprintf(out_fp, "after tt=%ld, bb=%ld, ll=%ld, rr=%ld\n",tt,bb,ll,rr);
1322 */
1323 #endif
1324 rfscan(tt,bb,ll,rr);
1325 }
1326 best_score = 0;
1327 }
1328
1329 /* disjoint - return TRUE if [tt,bb]X[ll,rr]-[tt1,bb]X[ll1,rr] shares
1330 no vertices with the areas bounded by S */
1331 Int4 disjoint(void)
1332 {
1333 kbclass PNTR sp;
1334 Int4 i,j;
1335 Int4 endj;
1336
1337 for (i=0; i<lastS; ++i) {
1338 sp = S[i];
1339 if (sp->t <= bb && sp->b >= tt && sp->l <= rr && sp->r >= ll
1340 && (sp->t < tt1 || sp->l < ll1)) {
1341 if (sp->t < tt1) tt1 = sp->t;
1342 if (sp->l < ll1) ll1 = sp->l;
1343 if (erow > tt1) erow = tt1;
1344 if (ecol > ll1) ecol = ll1;
1345
1346 /* extend erow and ecol */
1347 endj = len2 + (bb-ll);
1348 for (j=len2+(tt-rr); j<=endj; ++j)
1349 ractive_ext(diag_prev[j]);
1350 sreset_pos(ri);
1351 sreset_pos(bri);
1352 db_sreset_pos(li_c);
1353 db_sreset_pos(li_d);
1354 db_sreset_pos(bli_c);
1355 db_sreset_pos(bli_d);
1356 while(next_key(ri)!=-1) ractive_ext(snext(ri));
1357 while(next_key(bri)!=-1) ractive_ext(snext(bri));
1358 while(db_next_key(li_c)!=-1) lactive_ext(db_snext(li_c));
1359 while(db_next_key(li_d)!=-1) lactive_ext(db_snext(li_d));
1360 while(db_next_key(bli_c)!=-1) lactive_ext(db_snext(bli_c));
1361 while(db_next_key(bli_d)!=-1) lactive_ext(db_snext(bli_d));
1362
1363 return FALSE;
1364 }
1365 }
1366 return TRUE;
1367 }
1368
1369 /* syn_area - synchronize [tt,bb]X[ll,rr] with all other areas */
1370 void syn_area(void)
1371 {
1372 kbclass PNTR sp;
1373 Int4 i,si,sj;
1374 Int4 flag;
1375
1376 flag = TRUE;
1377 while (flag) {
1378 flag = FALSE;
1379 for (i=0; i<lastS; ++i) {
1380 sp = S[i];
1381 si = (sp->first)->i;
1382 sj = (sp->first)->j;
1383 if (si <= bb && sp->b >= tt && sj <= rr && sp->r >= ll
1384 && (si < tt || sj < ll)) {
1385 if (si < tt) tt = si;
1386 if (sj < ll) ll = sj;
1387 flag = TRUE;
1388 }
1389 }
1390 }
1391 }
1392
1393 /* ex_row - extend one row (incrementally) */
1394 void ex_row(void)
1395 {
1396 Int4 s5;
1397 Int4 tpos;
1398
1399 --tt;
1400 if (tt-W+1 >= 0) {
1401 row_code = ex_table(rcbucket,rcbls,rcblink,seq1,tt,row_code);
1402 reset_pos(bri);
1403 db_reset_pos(bli_c);
1404 db_reset_pos(bli_d);
1405 for (s5 = bucket[row_code]; s5 != -1; s5 = blink[s5])
1406 rcfound(tt, s5);
1407 }
1408 /* update active regions */
1409 tpos = len1-tt;
1410 end_row[tpos] = tailor(end_row[tpos],len2-ll);
1411 rotupdate_active(end_row[tpos],len2-ll,tpos);
1412 update_active(bri,bli_c,bli_d,tpos);
1413 }
1414
1415 /* ex_col - extend one col (incrementally) */
1416 void ex_col(void)
1417 {
1418 Int4 s5;
1419 Int4 tpos;
1420
1421 --ll;
1422 if (ll-W+1 >= 0) {
1423 col_code = ex_table(bucket,bls,blink,seq2,ll,col_code);
1424 reset_pos(ri);
1425 db_reset_pos(li_c);
1426 db_reset_pos(li_d);
1427 for (s5 = rcbucket[col_code]; s5 != -1; s5 = rcblink[s5])
1428 trcfound(s5,ll);
1429 }
1430 /* update active regions */
1431 tpos = len2-ll;
1432 end_col[tpos] = tailor(end_col[tpos],len1-tt);
1433 cotupdate_active(end_col[tpos],len1-tt,tpos);
1434 rcupdate_active(ri,li_c,li_d,tpos);
1435 }
1436
1437
1438 /* rcfound a matching 5-mer at (i,j) */
1439 void rcfound(Int4 i, Int4 j)
1440 {
1441 Int4 k;
1442 fragment PNTR f;
1443
1444 if (seq1[i+1] && seq2[j+1] && seq1[i+1] == seq2[j+1])
1445 return;
1446 /* modified (Feb., 1994) */
1447 /*
1448 for (k = W; seq1[i-k] && seq2[j-k] && seq1[i-k] == seq2[j-k]; ++k);
1449 */
1450 for (k = W; i-k>=0 && j-k>=0 && seq1[i-k] == seq2[j-k]; ++k);
1451 if (usearch(used_frag[i-k+1],j-k+1)) return;
1452 f = (fragment *) ckalloc(sizeof(fragment));
1453 f->i = len1 - i;
1454 f->j = len2 - j;
1455 f->k = k;
1456 f->ref = 0;
1457
1458 rclocal(f,f->j,pos_diag(f),bri,bli_c,bli_d);
1459 if ((f->j + f->k - 1 <= len2-ll) || (f->i + f->k - 1 > len1-tt))
1460 enter_endrow(f);
1461 if (f->j + f->k - 1 > len2-ll) enter_endcol(f);
1462 ++number_frags;
1463 }
1464
1465 /* trcscan - scan seq1 and seq2 backward (columnwise) */
1466 void trcscan(void)
1467 {
1468 Int4 ecode;
1469 Int4 i, pos, tpos;
1470 CharPtr q;
1471 Int4 s5;
1472
1473 q = seq2 + rr + 1;
1474 ecode = 0L;
1475 for (i = 1; i < W; ++i)
1476 ecode = (ecode << 2) + encoding[*--q];
1477 pos = rr;
1478 while (*--q && pos >= ll) {
1479 reset_pos(ri);
1480 db_reset_pos(li_c);
1481 db_reset_pos(li_d);
1482 ecode = ((ecode & mask) << 2) + encoding[*q];
1483 for (s5 = rcbucket[ecode]; s5 != -1; s5 = rcblink[s5])
1484 trcfound(s5,pos);
1485 /* update active regions */
1486 tpos = len2-pos;
1487 end_col[tpos] = tailor(end_col[tpos],len1-tt);
1488 cotupdate_active(end_col[tpos],len1-tt,tpos);
1489 rcupdate_active(ri,li_c,li_d,tpos);
1490
1491 --pos;
1492 }
1493 }
1494
1495 /* trcfound a matching 5-mer at (i,j) */
1496 void trcfound(Int4 i, Int4 j)
1497 {
1498 Int4 k;
1499 fragment PNTR f;
1500
1501 if (seq1[i+1] && seq2[j+1] && seq1[i+1] == seq2[j+1])
1502 return;
1503 /*
1504 for (k = W; seq1[i-k] && seq2[j-k] && seq1[i-k] == seq2[j-k]; ++k);
1505 */
1506 for (k = W; i-k>=0 && j-k>=0 && seq1[i-k] == seq2[j-k]; ++k);
1507 if (usearch(used_frag[i-k+1],j-k+1)) return;
1508 f = (fragment *) ckalloc(sizeof(fragment));
1509 f->i = len1 - i;
1510 f->j = len2 - j;
1511 f->k = k;
1512 f->ref = 0;
1513
1514 rclocal(f,f->i,rcpos_diag(f),ri,li_c,li_d);
1515 if ((f->i + f->k - 1<= len1-tt) || (f->j + f->k - 1> len2-ll))
1516 enter_endcol(f);
1517 if (f->i + f->k - 1> len1-tt) enter_endrow(f);
1518 ++number_frags;
1519 }
1520
1521 void rclocal(fragment PNTR f, Int4 cur_col, Int4 cur_diag, list cri, db_list cli_c, db_list cli_d)
1522 {
1523 Int4 max_score, cs;
1524 fragment PNTR pf, PNTR cf, PNTR df;
1525 Int4 col, diag, ocur_diag;
1526
1527 max_score = 0;
1528 f->bgf = f;
1529
1530 /* compute the best left influence */
1531 cf = db_rsearch_next(cli_c, cur_col);
1532 col = db_cur_key(cli_c);
1533 df = db_lsearch_next(cli_d, cur_diag);
1534 diag = db_cur_key(cli_d);
1535 pf = ((cur_col - col > cur_diag-diag)&&(diag != -1)) ? df : cf;
1536 if (pf) {
1537 cs = pf->score - weight(pf, f);
1538 if (cs > max_score) {
1539 max_score = cs;
1540 f->bgf = pf->bgf;
1541 incre(f->bgf);
1542 }
1543 }
1544
1545 /* compute the best mismatch */
1546 ocur_diag = pos_diag(f);
1547 if (pf = diag_prev[ocur_diag]) {
1548 cs = pf->score - weight(pf, f);
1549 if (cs > max_score) {
1550 max_score = cs;
1551 if (f != f->bgf) decre(f->bgf);
1552 f->bgf = pf->bgf;
1553 incre(f->bgf);
1554 }
1555 decre(pf);
1556 }
1557 diag_prev[ocur_diag] = f;
1558 incre(f);
1559
1560 /* compute the best right influence */
1561 pf = rsearch_next(cri,cur_diag);
1562 if (pf) {
1563 cs = pf->score - weight(pf, f);
1564 if (cs > max_score) {
1565 max_score = cs;
1566 if (f != f->bgf) decre(f->bgf);
1567 f->bgf = pf->bgf;
1568 incre(f->bgf);
1569 }
1570 }
1571
1572 f->score = max_score + (Int4) (DIGIT * f->k);
1573 if (f->score > best_score) best_score = f->score;
1574 lactive_ext(f);
1575 }
1576
1577 /* lactive_ext - check if we need to extend the area reached by active
1578 fragments (for all cases) */
1579 void lactive_ext(fragment PNTR f)
1580 {
1581 Int4 endi,endj,ai,aj,ai1,aj1,cur_score;
1582
1583 if (!f) return;
1584 /* check if we need to extend the area reached by active fragments */
1585 if ((f->bgf)->i <= len1-tt1 && (f->bgf)->j <= len2-ll1) {
1586 endi = f->i + f->k - 1;
1587 endj = f->j + f->k - 1;
1588 cur_score = f->score;
1589 ai = endi + cur_score / IR + 2;
1590 /*
1591 ai1 = endi + (cur_score-IO)/IE + 1;
1592 */
1593 ai1 = endi + cur_score/IE + 2;
1594 if (ai1 > ai) ai = ai1;
1595 if (ai > len1) ai = len1;
1596 ai = len1-ai;
1597 if (ai < erow) erow = ai;
1598 aj = endj + cur_score / IR + 2;
1599 /*
1600 aj1 = endj + (cur_score-IO)/IE + 1;
1601 */
1602 aj1 = endj + cur_score/IE + 2;
1603 if (aj1 > aj) aj = aj1;
1604 if (aj > len2) aj = len2;
1605 aj = len2-aj;
1606 if (aj < ecol) ecol = aj;
1607 }
1608 }
1609
1610 /* ractive_ext - check if we need to extend the area reached by active
1611 fragments (for right influence list or mismatch) */
1612 void ractive_ext(fragment PNTR f)
1613 {
1614 Int4 endi,endj,ai,aj,cur_score;
1615
1616 if (!f) return;
1617 /* check if we need to extend the area reached by active fragments */
1618 if ((f->bgf)->i <= len1-tt1 && (f->bgf)->j <= len2-ll1) {
1619 endi = f->i + f->k - 1;
1620 endj = f->j + f->k - 1;
1621 cur_score = f->score;
1622 ai = endi + cur_score / IR + 2;
1623 if (ai > len1) ai = len1;
1624 ai = len1-ai;
1625 if (ai < erow) erow = ai;
1626 aj = endj + cur_score / IR + 2;
1627 if (aj > len2) aj = len2;
1628 aj = len2-aj;
1629 if (aj < ecol) ecol = aj;
1630 }
1631 }
1632
1633 void rcfree(void)
1634 {
1635 Int4 i,j;
1636 frag_pt PNTR p, PNTR q;
1637
1638 for (i=len1-tt+1; i<=len1; ++i) {
1639 if (intersect[i]) {
1640 idb_freeList(intersect[i]);
1641 intersect[i] = NULL;
1642 }
1643 if (end_row[i]) {
1644 for (p=end_row[i]; p; q=p, p=p->link, MemFree(q));
1645 end_row[i] = NULL;
1646 }
1647 }
1648 for (i=len2-ll+1; i<=len2; ++i) {
1649 if (rcintersect[i]) {
1650 idb_freeList(rcintersect[i]);
1651 rcintersect[i] = NULL;
1652 }
1653 if (end_col[i]) {
1654 for (p=end_col[i]; p; q=p, p=p->link, MemFree(q));
1655 end_col[i] = NULL;
1656 }
1657 }
1658 j = len2-ll+bb;
1659 for (i=len2-rr+tt; i<=j; ++i)
1660 if (diag_prev[i]) {
1661 decre(diag_prev[i]);
1662 diag_prev[i]=NULL;
1663 }
1664 freeList(ri);
1665 db_freeList(li_c);
1666 db_freeList(li_d);
1667 freeList(bri);
1668 db_freeList(bli_c);
1669 db_freeList(bli_d);
1670 }
1671
1672 /*static SeqAlignPtr link_align(SeqAlignPtr align, SeqAlignPtr head)
1673 {
1674 if(head == NULL)
1675 return align;
1676 while(head->next != NULL)
1677 head = head->next;
1678 head->next = align;
1679 return align;
1680
1681 }*/
1682
1683 /* printS - print the given class */
1684 SeqAlignPtr printS(kbclass PNTR sp, SeqLocPtr slp1, SeqLocPtr slp2, SeqAlignPtr head)
1685 {
1686 Int4 gs;
1687 SeqAlignPtr align = NULL;
1688 ScorePtr scp;
1689
1690 #ifdef STATS
1691 /*
1692 fprintf(out_fp, "first: ");
1693 */
1694
1695 align->segs = frag_print(sp->first, slp1, slp2, align->segs);
1696
1697 /*
1698 fprintf(out_fp, "last: ");
1699 */
1700
1701 align->segs = frag_print(sp->last, slp1, slp2, align->segs);
1702
1703 /*
1704 fprintf(out_fp, "score = %.1f\n", ((FloatLo) sp->score)/DIGIT);
1705 fprintf(out_fp, "t = %ld, b = %ld, l = %ld, r = %ld\n\n", sp->t,sp->b,sp->l,sp->r);
1706 */
1707 #endif
1708 #ifdef STATS
1709 old_i_end = 0;
1710 /*
1711 fprintf(out_fp, "a {\n s %.1f\n", ((FloatLo) sp->score)/DIGIT);
1712 fprintf(out_fp, " b %ld %ld\n e %ld %ld\n", sp->first->i, sp->first->j,
1713 sp->last->i + sp->last->k - 1, sp->last->j + sp->last->k - 1);
1714 */
1715 #endif
1716
1717 align = SeqAlignNew();
1718 align->type = 2; /*diag*/
1719 align->segtype = 1;
1720 align->segs = NULL;
1721
1722 scp = ScoreNew();
1723 scp->choice = 2; /**it is a real number**/
1724 scp->value.realvalue =((FloatHi) sp->score)/DIGIT;
1725 scp->next = NULL;
1726 align->score = scp;
1727
1728
1729 /*
1730 fprintf(out_fp, "%.1f\n", ((FloatHi) sp->score)/DIGIT);
1731 */
1732 /*
1733 align->segs = frag_print(sp->first, slp1, slp2, align->segs);
1734 align->segs = frag_print(sp->last, slp1, slp2, align->segs);
1735 */
1736 if (sp->last) {
1737 if (sp->last != (sp->last)->bgf) {
1738 align->segs = frag_print((sp->last)->bgf, slp1, slp2, align->segs);
1739 gs = sp->score - (Int4) (DIGIT * (((sp->last)->bgf)->k + (sp->last)->k));
1740 global((sp->last)->bgf,sp->last,gs, slp1, slp2, align);
1741 }
1742 align->segs = frag_print(sp->last, slp1, slp2, align->segs);
1743 decre(sp->last);
1744 }
1745 #ifdef STATS
1746 /*
1747 fprintf(out_fp, " l %ld %ld %ld %ld 0.0\n}\n",
1748 old_i_start, old_j_start, old_i_end, old_j_end);
1749 */
1750 #endif
1751 if(head == NULL)
1752 head = align;
1753 else
1754 link_align(align, head);
1755 return head;
1756 }
1757
1758 /* class_print - print K best local alignments */
1759 SeqAlignPtr class_print(SeqLocPtr slp1, SeqLocPtr slp2)
1760 {
1761 Int4 i,j;
1762 kbclass PNTR sp;
1763 SeqAlignPtr align;
1764
1765 j = lastS;
1766 align = NULL;
1767
1768 for (i=0; i<j; ++i) {
1769 #ifdef STATS
1770 /*
1771 fprintf(out_fp, "\n\n************ Rank %ld ************ \n",i+1);
1772 */
1773 #endif
1774 sp = findmaxS();
1775 align = printS(sp, slp1, slp2, align);
1776 /**fflush(stdout);**/
1777 curS = 0;
1778 --K;
1779 if (i != j-1) {
1780 if (IE==0 || IR==0) rfscan(0,sp->b,0,sp->r);
1781 else recompute(sp);
1782 }
1783 MemFree(sp);
1784 }
1785 return align;
1786 }
1787
1788 /* findmaxS - return the class with the maximum score */
1789 kbclass PNTR findmaxS(void)
1790 {
1791 Int4 i, ms, mi;
1792 kbclass PNTR sp;
1793
1794 mi = 0;
1795 ms = S[0]->score;
1796 for (i=1; i<lastS; ++i)
1797 if (S[i]->score > ms) {
1798 ms = S[i]->score;
1799 mi = i;
1800 }
1801 sp = S[mi];
1802 if (minS == lastS-1) minS = mi;
1803 if (lastS>1) {
1804 S[mi] = S[lastS-1];
1805 S[lastS-1] = NULL;
1806 }
1807 --lastS;
1808 return(sp);
1809 }
1810
1811 /* findS - find the equivalent class corresponding to f */
1812 kbclass PNTR findS(fragment PNTR f)
1813 {
1814 Int4 i, j;
1815 fragment PNTR sf;
1816
1817 for (i=curS, j=0; j<lastS; i= (i+1) % lastS, ++j) {
1818 sf = S[i]->first;
1819 if (sf->i == f->i && sf->j == f->j) {
1820 curS = i;
1821 return S[i];
1822 }
1823 }
1824 return(NULL);
1825 }
1826
1827 /* set_class - setup values for a new class */
1828 void set_class(Int4 si, fragment PNTR f)
1829 {
1830 kbclass PNTR sp;
1831 Int4 endi, endj, t1, t2;
1832 Int4 endi1, endj1;
1833
1834 sp = S[si];
1835 sp->first = f->bgf;
1836 sp->last = f;
1837 sp->score = f->score;
1838 endi = f->i + f->k - 1;
1839 endj = f->j + f->k - 1;
1840 t1 = (f->score - mw) / IR;
1841 t2 = (f->score - mw - IO) / IE;
1842 if (t2 > t1) t1 = t2;
1843 endi1 = endi + t1;
1844 if (endi1 >= len1) endi1 = len1-1;
1845 if (endi1 < endi) endi1 = endi;
1846 endj1 = endj + t1;
1847 if (endj1 >= len2) endj1 = len2-1;
1848 if (endj1 < endj) endj1 = endj;
1849 sp->t = endi;
1850 sp->b = endi1;
1851 sp->l = endj;
1852 sp->r = endj1;
1853 curS = si;
1854 }
1855
1856 /* replaceS - replace the min_score class */
1857 void replaceS(fragment PNTR f)
1858 {
1859 Int4 i;
1860
1861 incre(f);
1862 decre(S[minS]->last);
1863 set_class(minS,f);
1864 for (i=0; i<lastS; ++i)
1865 if (S[i]->score < S[minS]->score) minS = i;
1866 }
1867
1868 /* insertS - insert a new class */
1869 void insertS(fragment PNTR f)
1870 {
1871 incre(f);
1872 S[lastS] = (kbclass *) ckalloc(sizeof(kbclass));
1873 set_class(lastS,f);
1874 if (S[lastS]->score < S[minS]->score) minS = lastS;
1875 ++lastS;
1876 }
1877
1878 /* sizeS - return the number of current equivalent classes */
1879 Int4 sizeS(void)
1880 {
1881 return lastS;
1882 }
1883
1884 /* minscoreS - return the minimum score in all classes */
1885 Int4 minscoreS(void)
1886 {
1887 return S[minS]->score;
1888 }
1889
1890 Int4 enterS(fragment PNTR f, Int4 w, Int4 l)
1891 {
1892 kbclass PNTR s;
1893 Int4 i;
1894 Int4 endi, endj;
1895 Int4 endi1, endj1, t1, t2;
1896
1897 if (s = findS(f->bgf)) {
1898 if (s->score < f->score) {
1899 incre(f);
1900 decre(s->last);
1901 s->score = f->score;
1902 s->first = f->bgf;
1903 s->last = f;
1904 if (s == S[minS])
1905 for (i=0; i<lastS; ++i)
1906 if (S[i]->score < S[minS]->score) minS = i;
1907 }
1908 endi = f->i + f->k - 1;
1909 endj = f->j + f->k - 1;
1910 t1 = (f->score - mw) / IR;
1911 t2 = (f->score - mw - IO) / IE;
1912 if (t2 > t1) t1 = t2;
1913 endi1 = endi + t1;
1914 if (endi1 >= len1) endi1 = len1-1;
1915 if (endi1 < endi) endi1 = endi;
1916 endj1 = endj + t1;
1917 if (endj1 >= len2) endj1 = len2-1;
1918 if (endj1 < endj) endj1 = endj;
1919 if (s->t > endi) s->t = endi;
1920 if (s->b < endi1) s->b = endi1;
1921 if (s->l > endj) s->l = endj;
1922 if (s->r < endj1) s->r = endj1;
1923 } else {
1924 if (sizeS() == l) replaceS(f);
1925 else insertS(f);
1926 }
1927 if (sizeS() == l) return minscoreS();
1928 else return w;
1929 }
1930
1931 /* find the best local alignment */
1932 void local(fragment PNTR f)
1933 {
1934 Int4 max_score, cs;
1935 fragment PNTR pf, PNTR cf, PNTR df;
1936 Int4 cur_diag, col, diag;
1937
1938 max_score = 0;
1939 cur_diag = pos_diag(f);
1940 f->bgf = f;
1941
1942 /* compute the best left influence */
1943 cf = db_rsearch_next(li_c, f->j);
1944 col = db_cur_key(li_c);
1945 df = db_lsearch_next(li_d, cur_diag);
1946 diag = db_cur_key(li_d);
1947 pf = ((f->j - col > cur_diag-diag)&&(diag != -1)) ? df : cf;
1948 if (pf) {
1949 cs = pf->score - weight(pf, f);
1950 if (cs > max_score || (cs == max_score && topo_win(f,pf))) {
1951 max_score = cs;
1952 f->bgf = pf->bgf;
1953 incre(f->bgf);
1954 }
1955 }
1956
1957 /* compute the best mismatch */
1958 if (pf = diag_prev[cur_diag]) {
1959 cs = pf->score - weight(pf, f);
1960 if (cs > max_score || (cs == max_score && topo_win(f,pf))) {
1961 max_score = cs;
1962 if (f != f->bgf) decre(f->bgf);
1963 f->bgf = pf->bgf;
1964 incre(f->bgf);
1965 }
1966 decre(pf);
1967 }
1968 diag_prev[cur_diag] = f;
1969 incre(f);
1970
1971 /* compute the best right influence */
1972 pf = rsearch_next(ri,cur_diag);
1973 if (pf) {
1974 cs = pf->score - weight(pf, f);
1975 if (cs > max_score || (cs == max_score && topo_win(f,pf))) {
1976 max_score = cs;
1977 if (f != f->bgf) decre(f->bgf);
1978 f->bgf = pf->bgf;
1979 incre(f->bgf);
1980 }
1981 }
1982
1983 f->score = max_score + (Int4) (DIGIT * f->k);
1984
1985 /* update current equivalent classes */
1986 if (f->score > mw) mw = enterS(f,mw,K);
1987 }
1988
1989 /* find the best alignment */
1990 void align(fragment PNTR pof, fragment PNTR cdiag_prev[],list cri,db_list cli_c, db_list cli_d, fragment PNTR f)
1991 {
1992 Int4 max_score, cs;
1993 fragment PNTR pf, PNTR cf, PNTR df;
1994 Int4 cur_diag, col, diag;
1995
1996 cur_diag = pos_diag(f);
1997
1998 /* compute the best mismatch */
1999 if (pf = cdiag_prev[cur_diag]) {
2000 cs = pf->score - weight(pf, f);
2001 max_score = cs;
2002 f->bgf = pf->bgf;
2003 incre(f->bgf);
2004 decre(pf);
2005 } else {
2006 f->bgf = f;
2007 max_score = -1 * weight(pof,f);
2008 }
2009 cdiag_prev[cur_diag] = f;
2010 incre(f);
2011
2012 /* compute the best right influence */
2013 pf = rsearch_next(cri,cur_diag);
2014 if (pf) {
2015 cs = pf->score - weight(pf, f);
2016 if (cs > max_score) {
2017 max_score = cs;
2018 if (f != f->bgf) decre(f->bgf);
2019 f->bgf = pf->bgf;
2020 incre(f->bgf);
2021 }
2022 }
2023
2024 /* compute the best left influence */
2025 cf = db_rsearch_next(cli_c, f->j);
2026 col = db_cur_key(cli_c);
2027 df = db_lsearch_next(cli_d, cur_diag);
2028 diag = db_cur_key(cli_d);
2029 pf = ((f->j - col > cur_diag-diag)&&(diag != -1)) ? df : cf;
2030 if (pf) {
2031 cs = pf->score - weight(pf, f);
2032 if (cs > max_score) {
2033 max_score = cs;
2034 if (f != f->bgf) decre(f->bgf);
2035 f->bgf = pf->bgf;
2036 incre(f->bgf);
2037 }
2038 }
2039
2040 /* update the current score */
2041 f->score = max_score + (Int4) (DIGIT * f->k);
2042 }
2043
2044 /* find the best alignment for cut_frag */
2045 void calign(fragment PNTR pof, fragment PNTR cdiag_prev[], list cri, db_list cli_c, db_list cli_d, fragment PNTR f)
2046 {
2047 Int4 max_score, cs;
2048 fragment PNTR pf, PNTR cf, PNTR df;
2049 Int4 cur_diag, col, diag;
2050
2051 cur_diag = pos_diag(f);
2052
2053 /* compute the best mismatch */
2054 if (pf = cdiag_prev[cur_diag]) {
2055 cs = pf->score - weight(pf, f);
2056 max_score = cs;
2057 f->bgf = pf;
2058 incre(pf);
2059 } else {
2060 f->bgf = pof;
2061 incre(pof);
2062 max_score = -1 * weight(pof,f);
2063 }
2064 incre(f);
2065
2066 /* compute the best right influence */
2067 pf = rsearch_next(cri,cur_diag);
2068 if (pf) {
2069 cs = pf->score - weight(pf, f);
2070 if (cs > max_score) {
2071 max_score = cs;
2072 decre(f->bgf);
2073 f->bgf = pf;
2074 incre(pf);
2075 }
2076 }
2077
2078 /* compute the best left influence */
2079 cf = db_rsearch_next(cli_c, f->j);
2080 col = db_cur_key(cli_c);
2081 df = db_lsearch_next(cli_d, cur_diag);
2082 diag = db_cur_key(cli_d);
2083 pf = ((f->j - col > cur_diag-diag)&&(diag != -1)) ? df : cf;
2084 if (pf) {
2085 cs = pf->score - weight(pf, f);
2086 if (cs > max_score) {
2087 max_score = cs;
2088 decre(f->bgf);
2089 f->bgf = pf;
2090 incre(pf);
2091 }
2092 }
2093
2094 /* update the current score */
2095 f->score = max_score + (Int4) (DIGIT * f->k);
2096 }
2097
2098 /* malign - midpoint alignment */
2099 void malign(fragment PNTR pof, fragment PNTR cdiag_prev[], list cri, db_list cli_c, db_list cli_d, fragment PNTR f)
2100 {
2101 Int4 max_score, cs;
2102 fragment PNTR pf, PNTR cf, PNTR df;
2103 Int4 cur_diag, col, diag;
2104
2105 cur_diag = pos_diag(f);
2106 f->bgf = pof;
2107
2108 /* compute the best mismatch */
2109 if (pf = cdiag_prev[cur_diag]) {
2110 cs = pf->score - weight(pf, f);
2111 max_score = cs;
2112 f->bgf = pf;
2113 } else max_score = -1 * weight(pof,f);
2114
2115 /* compute the best right influence */
2116 pf = rsearch_next(cri,cur_diag);
2117 if (pf) {
2118 cs = pf->score - weight(pf, f);
2119 if (cs > max_score) {
2120 max_score = cs;
2121 f->bgf = pf;
2122 }
2123 }
2124
2125 /* compute the best left influence */
2126 cf = db_rsearch_next(cli_c, f->j);
2127 col = db_cur_key(cli_c);
2128 df = db_lsearch_next(cli_d, cur_diag);
2129 diag = db_cur_key(cli_d);
2130 pf = ((f->j - col > cur_diag-diag)&&(diag != -1)) ? df : cf;
2131 if (pf) {
2132 cs = pf->score - weight(pf, f);
2133 if (cs > max_score) {
2134 max_score = cs;
2135 f->bgf = pf;
2136 }
2137 }
2138
2139 /* update the current score */
2140 f->score = max_score + (Int4) (DIGIT * f->k);
2141 }
2142
2143 /* malign1 - midpoint alignment */
2144 void malign1(fragment PNTR pof, list cri, db_list cli_c, db_list cli_d, fragment PNTR f)
2145 {
2146 Int4 max_score, cs;
2147 fragment PNTR pf, PNTR cf, PNTR df;
2148 Int4 cur_diag, col, diag;
2149
2150 cur_diag = pos_diag(f);
2151 f->bgf = pof;
2152
2153 if (cur_diag == pos_diag(pof))
2154 max_score = MININT;
2155 else
2156 max_score = -1 * weight(pof,f);
2157
2158 /* compute the best right influence */
2159 pf = rsearch_next(cri,cur_diag);
2160 if (pf) {
2161 cs = pf->score - weight(pf, f);
2162 if (cs > max_score) {
2163 max_score = cs;
2164 f->bgf = pf;
2165 }
2166 }
2167
2168 /* compute the best left influence */
2169 cf = db_rsearch_next(cli_c, f->j);
2170 col = db_cur_key(cli_c);
2171 df = db_lsearch_next(cli_d, cur_diag);
2172 diag = db_cur_key(cli_d);
2173 pf = ((f->j - col > cur_diag-diag)&&(diag != -1)) ? df : cf;
2174 if (pf) {
2175 cs = pf->score - weight(pf, f);
2176 if (cs > max_score) {
2177 max_score = cs;
2178 f->bgf = pf;
2179 }
2180 }
2181
2182 /* update the current score */
2183 f->score = max_score + (Int4) (DIGIT * f->k);
2184 }
2185
2186 /* tailor - return proper rcopy */
2187 frag_pt PNTR tailor(frag_pt PNTR rcopy, Int4 col)
2188 {
2189 frag_pt PNTR p, PNTR t1, PNTR t2;
2190
2191 if (!rcopy) return NULL;
2192 p = rcopy;
2193 t1 = NULL;
2194 while (p && p->col <= col) {
2195 t1 = p;
2196 p = p->link;
2197 }
2198 while (p) {
2199 t2 = p->link;
2200 MemFree(p);
2201 p = t2;
2202 }
2203 if (t1) { /* rcopy->col <= col */
2204 t1->link = NULL;
2205 return rcopy;
2206 } else return NULL;
2207 }
2208
2209 /* otr_survive - reduce the rcopy orthogonally */
2210 frag_pt PNTR otr_survive(frag_pt PNTR rcopy, Int4 col)
2211 {
2212 frag_pt PNTR p, PNTR q, PNTR r, PNTR t1, PNTR t2;
2213 Int4 cur_score;
2214
2215 if (!rcopy) return NULL;
2216 r = NULL;
2217 q = NULL;
2218 p = rcopy;
2219 while (p) {
2220 if ((p->fp)->score-IR*(col-p->col) > 0) {
2221 r = (frag_pt *) ckalloc(sizeof(frag_pt));
2222 r->fp = p->fp;
2223 r->col = p->col;
2224 r->link = q;
2225 q = r;
2226 }
2227 p = p->link;
2228 }
2229
2230 /* sweep right */
2231 p = r;
2232 while (p) {
2233 cur_score = (p->fp)->score;
2234 for (q = p->link; q && ((cur_score - IE*(p->col-q->col))
2235 > (q->fp)->score - IR*(p->col-q->col)) ; q = q->link);
2236 t1 = p->link;
2237 while (t1 != q) {
2238 t2 = t1->link;
2239 MemFree(t1);
2240 t1 = t2;
2241 }
2242 p->link = q;
2243 p = q;
2244 }
2245 return r;
2246 }
2247
2248 /* rotr_merge - r_merge orthogonally */
2249 void rotr_merge(frag_pt PNTR rcopy)
2250 {
2251 frag_pt PNTR p, PNTR q;
2252 fragment PNTR pf, PNTR rf;
2253
2254 for (p = rcopy; p; q=p, p=p->link, MemFree(q)) {
2255 pf = p->fp;
2256 rf = rsearch_next(ri,rcpos_diag(pf));
2257 /* determine if we need to keep pf in cri */
2258 if (!rf || (rf && rotr_win(pf,rf)))
2259 rcr_add_point(ri,pf,p->col);
2260 }
2261 }
2262
2263 /* rotr_win - TRUE if pf is better in common right influence area */
2264 Int4 rotr_win(fragment PNTR pf, fragment PNTR rf)
2265 {
2266 Int4 r1, r2, diag_diff;
2267
2268 diag_diff = rcpos_diag(pf) - rcpos_diag(rf);
2269 r1 = pf->j + pf->k - 1;
2270 r2 = rf->j + rf->k - 1;
2271 if (rf->score-IR*(r1-r2)-IE*diag_diff < pf->score)
2272 return TRUE;
2273 return FALSE;
2274 }
2275
2276 /* cotr_merge - r_merge orthogonally */
2277 void cotr_merge(frag_pt PNTR rcopy)
2278 {
2279 frag_pt PNTR p, PNTR q;
2280 fragment PNTR pf, PNTR rf;
2281
2282 for (p = rcopy; p; q=p, p=p->link, MemFree(q)) {
2283 pf = p->fp;
2284 rf = rsearch_next(bri,pos_diag(pf));
2285 /* determine if we need to keep pf in cri */
2286 if (!rf || (rf && cotr_win(pf,rf)))
2287 r_add_point(bri,pf,p->col);
2288 }
2289 }
2290
2291 /* cotr_win - TRUE if pf is better in common right influence area */
2292 Int4 cotr_win(fragment PNTR pf, fragment PNTR rf)
2293 {
2294 Int4 r1, r2, diag_diff;
2295
2296 diag_diff = pos_diag(pf) - pos_diag(rf);
2297 r1 = pf->i + pf->k - 1;
2298 r2 = rf->i + rf->k - 1;
2299 if (rf->score-IR*(r1-r2)-IE*diag_diff < pf->score)
2300 return TRUE;
2301 return FALSE;
2302 }
2303
2304 /* otl_survive - reduce the rcopy orthogonally (left influence) */
2305 frag_pt PNTR otl_survive(frag_pt PNTR rcopy, Int4 col)
2306 {
2307 frag_pt PNTR p, PNTR q, PNTR r, PNTR t;
2308 Int4 cur_score,col_diff;
2309 fragment PNTR pf;
2310
2311 if (!rcopy) return NULL;
2312 r = NULL;
2313 t = NULL;
2314 p = rcopy;
2315 while (p) {
2316 pf = p->fp;
2317 cur_score = pf->score;
2318 col_diff = col-p->col;
2319 if ((cur_score-(IO+IE*col_diff) > 0) ||
2320 (cur_score-IR*col_diff> 0)) {
2321 r = (frag_pt *) ckalloc(sizeof(frag_pt));
2322 r->fp = p->fp;
2323 r->col = p->col;
2324 r->link = t;
2325 t = r;
2326 for (q = p->link; q && ((cur_score - IE*(q->col-p->col))
2327 >= (q->fp)->score) ; q = q->link);
2328 p = q;
2329 } else p=p->link;
2330 }
2331 return r;
2332 }
2333
2334 /* otl_merge - l_merge for orthogonally recomputation */
2335 void otl_merge(frag_pt PNTR r, Int4 col, Int4 row, db_list cli_c, db_list cli_d, idb_list cintersect[], Int4 ccol_int[], Char cdiag_flag[], Int4 (PNTR cpos_diag)(fragment PNTR), Int4 (PNTR otl_win)(fragment PNTR, fragment PNTR))
2336 {
2337 fragment PNTR df, PNTR cf, PNTR rf, PNTR pf, PNTR nf;
2338 Int4 cur_diag, lrow, diag, in_row, next_diag;
2339 frag_pt PNTR p;
2340 Int4 fs;
2341
2342 if (!r) return;
2343 db_reset_pos(cli_c);
2344 db_reset_pos(cli_d);
2345 cur_diag = offset1 + (row-col);
2346 cf = db_rsearch_next(cli_c, row);
2347 lrow = db_cur_key(cli_c);
2348 df = db_rsearch_next(cli_d,cur_diag);
2349 diag = db_cur_key(cli_d);
2350 rf = ((row-lrow > cur_diag-diag) && (diag != -1)) ? df : cf;
2351 if ((rf == NULL) && (diag == -1)) { /* cli_c && cli_d are empty */
2352 db_insert_next(cli_c,row,r->fp);
2353 incre(r->fp);
2354 for (p = r; p; MemFree(p),p=r) {
2355 r = p->link;
2356 if (r) df = r->fp;
2357 else df = NULL;
2358 fs = (*cpos_diag)(p->fp);
2359 db_insert_next(cli_d,fs,df);
2360 incre(df);
2361 }
2362 return;
2363 }
2364 if ((*otl_win)(r->fp,rf)) { /* one more column boundary */
2365 db_insert_next(cli_c,row,r->fp);
2366 incre(r->fp);
2367 if (rf == df) { /* cintersect list is affected */
2368 in_row = row - diag + offset1;
2369 if (in_row < cur_end1) {
2370 if (!cintersect[in_row])
2371 cintersect[in_row]=idb_newList();
2372 db_insert(cintersect[in_row],row,NULL);
2373 ccol_int[row] = in_row;
2374 cdiag_flag[diag] = 1;
2375 }
2376 }
2377 }
2378 incre(rf);
2379 while (r) {
2380 pf = r->fp;
2381 cur_diag = (*cpos_diag)(pf);
2382 p = r->link;
2383 if (p) df = p->fp;
2384 else df = NULL;
2385 next_diag = db_next_key(cli_d);
2386 nf = db_next_val(cli_d);
2387 if (next_diag == -1) {
2388 db_insert_next(cli_d,cur_diag,df);
2389 incre(df);
2390 MemFree(r);
2391 r = p;
2392 } else if (next_diag > cur_diag) {
2393 if ((*otl_win)(pf,rf)) {
2394 cf = ((*otl_win)(df,rf))? df:rf;
2395 db_insert_next(cli_d,cur_diag,cf);
2396 incre(cf);
2397 }
2398 MemFree(r);
2399 r = p;
2400 } else if (next_diag == cur_diag) {
2401 decre(rf);
2402 rf = db_next(cli_d);
2403 incre(rf);
2404 if ((*otl_win)(df,nf)) {
2405 db_update_node(cli_d,df);
2406 decre(nf);
2407 incre(df);
2408 }
2409 MemFree(r);
2410 r = p;
2411 } else if ((*otl_win)(pf,rf)) {
2412 /* next_diag < cur_diag and pf is better */
2413 decre(rf);
2414 rf = nf;
2415 db_delete_next(cli_d);
2416 } else { /* next_diag < cur_diag and rf is better */
2417 decre(rf);
2418 rf = db_next(cli_d);
2419 incre(rf);
2420 if ((*otl_win)(pf,nf)) {
2421 db_update_node(cli_d,pf);
2422 decre(nf);
2423 incre(pf);
2424 }
2425 }
2426
2427 }
2428 decre(rf);
2429 }
2430
2431 /* rotl_win - TRUE if pf is better in common left influence area */
2432 Int4 rotl_win(fragment PNTR pf, fragment PNTR rf)
2433 {
2434 Int4 c1, c2, diag_diff;
2435
2436 if (!rf) return TRUE;
2437 if (!pf) return FALSE;
2438 diag_diff = rcpos_diag(rf) - rcpos_diag(pf);
2439 c1 = pf->i + pf->k - 1;
2440 c2 = rf->i + rf->k - 1;
2441 if (rf->score-IR*(c1-c2)-IE*diag_diff < pf->score)
2442 return TRUE;
2443 return FALSE;
2444 }
2445
2446 /* cotl_win - TRUE if pf is better in common left influence area */
2447 Int4 cotl_win(fragment PNTR pf, fragment PNTR rf)
2448 {
2449 Int4 c1, c2, diag_diff;
2450
2451 if (!rf) return TRUE;
2452 if (!pf) return FALSE;
2453 diag_diff = pos_diag(rf) - pos_diag(pf);
2454 c1 = pf->j + pf->k - 1;
2455 c2 = rf->j + rf->k - 1;
2456 if (rf->score-IR*(c1-c2)-IE*diag_diff < pf->score)
2457 return TRUE;
2458 return FALSE;
2459 }
2460
2461 /* r_survive - reduce the rcopy */
2462 void r_survive(frag_pt PNTR rcopy)
2463 {
2464 frag_pt PNTR p, PNTR q, PNTR r, PNTR t;
2465 Int4 cur_score;
2466
2467 p = rcopy;
2468 while (p) {
2469 cur_score = (p->fp)->score;
2470 /* sweep right */
2471 for (q = p->link; q && ((cur_score - IE * (q->col - p->col))
2472 > (q->fp)->score || ((cur_score - IE * (q->col - p->col))
2473 == (q->fp)->score && topo_win(q->fp,p->fp))); q = q->link);
2474 r = p->link;
2475 while (r != q) {
2476 t = r->link;
2477 MemFree(r);
2478 r = t;
2479 }
2480 p->link = q;
2481 p = q;
2482 }
2483 }
2484
2485 /* r_merge - update the active right influence points */
2486 void r_merge(list cri, frag_pt PNTR rcopy, Int4 row)
2487 {
2488 frag_pt PNTR p, PNTR q;
2489 Int4 row_diff, diag_diff;
2490 fragment PNTR pf, PNTR rf;
2491
2492 for (p = rcopy; p; q = p, p = p->link, MemFree(q)) {
2493 pf = p->fp;
2494 rf = rsearch_next(cri,pos_diag(pf));
2495 /* determine if we need to keep pf in cri */
2496 if (rf) {
2497 row_diff = row - (rf->i + rf->k - 1);
2498 diag_diff = diag(pf) - diag(rf);
2499 if (pf->score > rf->score-IE*diag_diff-IR * row_diff
2500 || (pf->score == rf->score-IE*diag_diff-IR*row_diff
2501 && topo_win(rf,pf)))
2502 r_add_point(cri,pf,row);
2503 } else r_add_point(cri,pf,row);
2504 }
2505 }
2506
2507
2508 /* r_add_point - add point to right influence list cri */
2509 void r_add_point(list cri, fragment PNTR f, Int4 row)
2510 {
2511 fragment PNTR nf;
2512 Int4 row_diff,diag_diff;
2513
2514 nf = next_val(cri);
2515 if (pos_diag(f) == next_key(cri)) {
2516 next(cri);
2517 if ((f->score > (nf->score) -IR*(row-(nf->i +nf->k -1))) ||
2518 ((f->score == (nf->score) -IR*(row-(nf->i +nf->k -1))) &&
2519 topo_win(nf,f))) {
2520 update_node(cri,f);
2521 decre(nf);
2522 incre(f);
2523 }
2524 } else {
2525 insert_next(cri,pos_diag(f),f);
2526 incre(f);
2527 }
2528 nf = next_val(cri);
2529 while (nf) {
2530 row_diff = row - (nf->i + nf->k -1);
2531 diag_diff = diag(nf) - diag(f);
2532 if ((nf->score - IR * row_diff < f->score - IE * diag_diff) ||
2533 ((nf->score - IR * row_diff == f->score - IE * diag_diff) &&
2534 topo_win(nf,f))) {
2535 delete_next(cri);
2536 decre(nf);
2537 nf = next_val(cri);
2538 } else nf = NULL;
2539 }
2540 }
2541
2542 /* resolve - resolve the conflicts of the intersection lists */
2543 void resolve(db_list cli_c, db_list cli_d, idb_list cintersect[], Char cdiag_flag[], Int4 ccol_int[], Int4 row,Int4 (PNTR cpt_lscore)(fragment PNTR, Int4, Int4))
2544 {
2545 Int4 col, diag;
2546 fragment PNTR cp, PNTR dp, PNTR cf, PNTR df, PNTR nf;
2547 Int4 prev_col, prev_diag, next_col, in_row;
2548 Int4 d_score, c_score;
2549 idb_list inter;
2550
2551 if (!cintersect[row]) return;
2552 inter = cintersect[row];
2553 idb_reset_pos(inter);
2554 for (col = idb_key_next(inter); col != -1; col = idb_key_next(inter)){
2555 diag = col - row + offset;
2556 cp = db_rsearch_next(cli_c,col);
2557 cf = db_next_val(cli_c);
2558 c_score = (*cpt_lscore)(cf,col,diag);
2559 dp = db_rsearch_next(cli_d,diag);
2560 prev_col = db_cur_key(cli_c);
2561 prev_diag = db_cur_key(cli_d);
2562 df = ((col - prev_col > diag - prev_diag) &&
2563 (prev_diag != -1))? dp : cp;
2564 d_score = (*cpt_lscore)(df,col,diag);
2565 if (c_score > d_score || (c_score == d_score && topo_win(df,cf))) { /* extend the column boundary */
2566 nf = db_next_val(cli_d);
2567 db_delete_next(cli_d);
2568 decre(nf);
2569 cdiag_flag[diag] = 0;
2570 in_row = col-prev_diag+offset;
2571 ccol_int[col] = 0;
2572 if (df == dp && in_row<=cur_end)
2573 { /* intersection lists are affected */
2574 INTER_ADD(col);
2575 cdiag_flag[prev_diag] = 1;
2576 }
2577 } else { /* extend the diagonal boundary */
2578 db_delete_next(cli_c);
2579 next_col = db_next_key(cli_c);
2580 in_row = next_col-diag+offset;
2581 if ((next_col != -1) && in_row<=cur_end &&(ccol_int[next_col] == 0)) {
2582 INTER_ADD(next_col);
2583 cdiag_flag[diag] = 1;
2584 } else cdiag_flag[diag] = 0;
2585 nf = db_next_val(cli_d);
2586 db_next(cli_d);
2587 db_update_node(cli_d,cf);
2588 decre(nf);
2589 ccol_int[col] = 0;
2590
2591 }
2592 }
2593 }
2594
2595 /* l_merge - merge lcopy into cli_c & cli_d */
2596 void l_merge(db_list cli_c, db_list cli_d, idb_list cintersect[], Char cdiag_flag[], Int4 ccol_int[], frag_pt PNTR lcopy, Int4 row, Int4 (PNTR cpt_lscore)(fragment PNTR, Int4, Int4))
2597 {
2598 Int4 col, diag;
2599 Int4 c_col, d_diag, in_row;
2600 Int4 next_col, p_score;
2601 Int4 prev_diag;
2602 fragment PNTR f, PNTR cf, PNTR df, PNTR dp, PNTR nf, PNTR lp;
2603 frag_pt PNTR e;
2604
2605 for (e = lcopy; e; e = e->link) {
2606 col = e->col;
2607 f = e->fp;
2608 diag = col - row + offset;
2609 cf = db_rsearch_next(cli_c,col);
2610 c_col = db_cur_key(cli_c);
2611 df = db_lsearch_next(cli_d,diag);
2612 d_diag = db_cur_key(cli_d);
2613 next_col = db_next_key(cli_c);
2614 lp = ((col - c_col >= diag - d_diag) && (d_diag != -1)) ?
2615 df : cf;
2616 if (lp != df) { /* it's owned by the column bound */
2617 if (col != next_col) { /* next_col > col */
2618 p_score = (*cpt_lscore)(lp,col,diag);
2619 if (f->score > p_score || (f->score == p_score
2620 && topo_win(lp,f))) {
2621 in_row = next_col-diag+offset;
2622 if ((next_col != -1) && in_row<=cur_end && (ccol_int[next_col] == 0)){
2623 INTER_ADD(next_col);
2624 cdiag_flag[diag] = 1;
2625 }
2626 db_insert_next(cli_d,diag,lp);
2627 incre(lp);
2628 db_insert_next(cli_c,col,f);
2629 incre(f);
2630 }
2631 } else { /* next_col == col */
2632 nf = db_next(cli_c);
2633 p_score = (*cpt_lscore)(nf,col,diag);
2634 if (f->score > p_score || (f->score == p_score
2635 && topo_win(nf,f))) {
2636 db_update_node(cli_c,f);
2637 db_insert_next(cli_d,diag,nf);
2638 incre(f); /* nf->ref remains the same */
2639 next_col = db_next_key(cli_c);
2640 in_row = next_col-diag+offset;
2641 if ((next_col != -1) && in_row<=cur_end
2642 && (ccol_int[next_col] == 0)) {
2643 INTER_ADD(next_col);
2644 cdiag_flag[diag] = 1;
2645 }
2646 }
2647 }
2648 } else { /* it is owned by the diagonal bound */
2649 if ((col != next_col) && (diag != d_diag)) {
2650 p_score = (*cpt_lscore)(df,col,diag);
2651 if ((df == NULL) || (f->score > p_score) ||
2652 (f->score == p_score && topo_win(df,f))){
2653 if (d_diag == -1) {
2654 /* it isn't in any active region */
2655 in_row = next_col-diag+offset;
2656 if (next_col != -1 && in_row<=cur_end) {
2657 INTER_ADD(next_col);
2658 cdiag_flag[diag] = 1;
2659 }
2660 } else if (cdiag_flag[d_diag] && ccol_int[next_col]) {
2661 db_delete(cintersect[ccol_int[next_col]],next_col);
2662 in_row = col-d_diag+offset;
2663 INTER_ADD(col);
2664 in_row = next_col-diag+offset;
2665 INTER_ADD(next_col);
2666 cdiag_flag[diag] = 1;
2667 } else {
2668 in_row = col-d_diag+offset;
2669 if (in_row<=cur_end) {
2670 INTER_ADD(col);
2671 cdiag_flag[d_diag] = 1;
2672 }
2673 in_row=next_col-diag+offset;
2674 if (next_col != -1 && !ccol_int[next_col] && in_row<=cur_end) {
2675 INTER_ADD(next_col);
2676 cdiag_flag[diag]=1;
2677 }
2678 }
2679 db_insert_next(cli_c,col,f);
2680 incre(f);
2681 db_insert_next(cli_d,diag,df);
2682 incre(df);
2683 }
2684 } else if ((col != next_col) && (diag == d_diag)) {
2685 dp = db_prev_val(cli_d);
2686 prev_diag = db_prev_key(cli_d);
2687 df = ((col - c_col >= diag - prev_diag) &&
2688 (prev_diag != -1))?
2689 dp : cf;
2690 p_score = (*cpt_lscore)(df,col,diag);
2691 if (f->score > p_score || (f->score == p_score
2692 && topo_win(df,f))) {
2693 db_insert_next(cli_c,col,f);
2694 incre(f);
2695 in_row = col-prev_diag+offset;
2696 if (df == dp && in_row<=cur_end) {
2697 INTER_ADD(col);
2698 cdiag_flag[prev_diag] = 1;
2699 }
2700 }
2701 } else { /* col == next_col */
2702 nf = db_next(cli_c);
2703 p_score = (*cpt_lscore)(nf,col,diag);
2704 if (f->score > p_score || (f->score == p_score
2705 && topo_win(nf,f))) {
2706 db_update_node(cli_c,f);
2707 db_insert_next(cli_d,diag,nf);
2708 incre(f);
2709 next_col = db_next_key(cli_c);
2710 in_row = next_col-diag+offset;
2711 if ((next_col != -1) && in_row<=cur_end
2712 && (ccol_int[next_col] == 0)) {
2713 INTER_ADD(next_col);
2714 cdiag_flag[diag] = 1;
2715 }
2716 }
2717
2718 }
2719 }
2720
2721 }
2722 }
2723
2724 /* rcr_merge - update the active right influence points */
2725 void rcr_merge(list cri, frag_pt PNTR rcopy, Int4 row)
2726 {
2727 frag_pt PNTR p, PNTR q;
2728 Int4 row_diff, diag_diff;
2729 fragment PNTR pf, PNTR rf;
2730
2731 for (p = rcopy; p; q = p, p = p->link, MemFree(q)) {
2732 pf = p->fp;
2733 rf = rsearch_next(cri,rcpos_diag(pf));
2734 /* determine if we need to keep pf in cri */
2735 if (rf) {
2736 row_diff = row - (rf->j + rf->k - 1);
2737 diag_diff = rcdiag(pf) - rcdiag(rf);
2738 if (pf->score > rf->score-IE*diag_diff-IR * row_diff
2739 || (pf->score == rf->score-IE*diag_diff-IR*row_diff
2740 && topo_win(rf,pf)))
2741 rcr_add_point(cri,pf,row);
2742 } else rcr_add_point(cri,pf,row);
2743 }
2744 }
2745
2746
2747 /* rcr_add_point - add point to right influence list cri */
2748 void rcr_add_point(list cri, fragment PNTR f, Int4 row)
2749 {
2750 fragment PNTR nf;
2751 Int4 row_diff,diag_diff;
2752
2753 nf = next_val(cri);
2754 if (rcpos_diag(f) == next_key(cri)) {
2755 next(cri);
2756 if ((f->score > (nf->score) -IR*(row-(nf->j +nf->k -1))) ||
2757 ((f->score == (nf->score) -IR*(row-(nf->j +nf->k -1))) &&
2758 topo_win(nf,f))) {
2759 update_node(cri,f);
2760 decre(nf);
2761 incre(f);
2762 }
2763 } else {
2764 insert_next(cri,rcpos_diag(f),f);
2765 incre(f);
2766 }
2767 nf = next_val(cri);
2768 while (nf) {
2769 row_diff = row - (nf->j + nf->k -1);
2770 diag_diff = rcdiag(nf) - rcdiag(f);
2771 if ((nf->score - IR * row_diff < f->score - IE * diag_diff) ||
2772 ((nf->score - IR * row_diff == f->score - IE * diag_diff) &&
2773 topo_win(nf,f))) {
2774 delete_next(cri);
2775 decre(nf);
2776 nf = next_val(cri);
2777 } else nf = NULL;
2778 }
2779 }
2780
2781 /* topo_win - return TRUE if f->bgf is greater than pf->bgf in the topological
2782 order */
2783 Int4 topo_win(fragment PNTR pf, fragment PNTR f)
2784 {
2785 if (!pf) return(TRUE);
2786 if (!f) return(FALSE);
2787 if (diag(f->bgf) > diag(pf->bgf)) return(TRUE);
2788 if (diag(f->bgf) == diag(pf->bgf) && (f->bgf)->i >= (pf->bgf)->i)
2789 return(TRUE);
2790 return(FALSE);
2791 }
2792
2793 /* pt_lscore - compute the fading score of the given fragment (left inf)*/
2794 Int4 pt_lscore(fragment PNTR pf, Int4 col, Int4 diag)
2795 {
2796 Int4 col_diff, diag_diff;
2797
2798 if (!pf) return(MININT);
2799 else { col_diff = col - (pf->j + pf->k -1);
2800 diag_diff = pos_diag(pf) - diag;
2801 return(pf->score -IE*diag_diff-IR*col_diff);
2802 }
2803 }
2804
2805 /* rcpt_lscore - compute the fading score of the given fragment (left inf)*/
2806 Int4 rcpt_lscore(fragment PNTR pf, Int4 col, Int4 diag)
2807 {
2808 Int4 col_diff, diag_diff;
2809
2810 if (!pf) return(MININT);
2811 else { col_diff = col - (pf->i + pf->k -1);
2812 diag_diff = rcpos_diag(pf) - diag;
2813 return(pf->score -IE*diag_diff-IR*col_diff);
2814 }
2815 }
2816
2817 /* weight - compute the cost of the connection of fragments pf and f */
2818 Int4 weight(fragment PNTR pf, fragment PNTR f)
2819 {
2820 Int4 msi, msj;
2821
2822 msi = IR*(f->i - pf->i - pf->k);
2823 msj = IR*(f->j - pf->j - pf->k);
2824 if (diag(pf) == diag(f))
2825 return(msi);
2826 else if (diag(pf) < diag(f))
2827 return(gap_cost(diag(f)-diag(pf)) + msi);
2828 else
2829 return(gap_cost(diag(pf)-diag(f)) + msj);
2830 }
2831
2832 /* gap_cost - compute the cost of a gap, given length l */
2833 Int4 gap_cost(Int4 l)
2834 {
2835 return(IO+IE*l);
2836 }
2837
2838 /* diag - give the diagonal number for fragment */
2839 Int4 diag(fragment PNTR f)
2840 {
2841 return(f->j - f->i);
2842 }
2843
2844 /* rcdiag - give the diagonal number for reversed fragment */
2845 Int4 rcdiag(fragment PNTR f)
2846 {
2847 return(f->i - f->j);
2848 }
2849
2850 /* pos_diag - give the positive diagonal number for fragment */
2851 Int4 pos_diag(fragment PNTR f)
2852 {
2853 return(f->j - f->i + len1);
2854 }
2855
2856 /* rcpos_diag - give the positive diagonal number for reversed fragment */
2857 Int4 rcpos_diag(fragment PNTR f)
2858 {
2859 return(f->i - f->j + len2);
2860 }
2861
2862 /* pf_set - set the coordinate of the given pseudo fragment */
2863 void pf_set(fragment PNTR f, Int4 i, Int4 j)
2864 {
2865 f->i = i;
2866 f->j = j;
2867 }
2868
2869 /* power - raise base to n-th power; n >= 0 */
2870 Int4 power(Int4 base, Int4 n)
2871 {
2872 Int4 i, p;
2873
2874 p = 1;
2875 for (i=1; i<=n; ++i)
2876 p = p*base;
2877 return p;
2878 }
2879
2880 /* decre - decrease the reference count */
2881 void decre(fragment PNTR f)
2882 {
2883 if (f && (--(f->ref) == 0)) {
2884 if (f != f->bgf) decre(f->bgf);
2885 MemFree(f);
2886 ++deleted_frags;
2887 }
2888 }
2889
2890 /* incre - increase the reference count */
2891 void incre(fragment PNTR f)
2892 {
2893 if (f) ++(f->ref);
2894 }
2895
2896
2897 static DenseDiagPtr link_diag(DenseDiagPtr new, DenseDiagPtr head)
2898 {
2899 if(head == NULL)
2900 return new;
2901 while(head->next != NULL)
2902 head = head->next;
2903 head->next = new;
2904 return new;
2905 }
2906
2907 /* frag_print - print the given fragment */
2908 DenseDiagPtr frag_print(fragment PNTR f, SeqLocPtr slp1, SeqLocPtr slp2, DenseDiagPtr head)
2909 {
2910 DenseDiagPtr new;
2911 Int4Ptr starts;
2912 Uint1Ptr strands;
2913
2914 if (f) {
2915 new = DenseDiagNew();
2916 new->dim = 2;
2917 new->id = SeqIdDup(SeqLocId(slp1));
2918 new->id->next = SeqIdDup(SeqLocId(slp2));
2919
2920 starts = MemNew((size_t)(new->dim) * sizeof(Int4));
2921 strands = MemNew((size_t)(new->dim) * sizeof(Uint1));
2922 starts[0] = f->i + SeqLocStart(slp1);
2923 if(SeqLocStrand(slp2) == Seq_strand_minus)
2924 starts[1] = SeqLocStop(slp2) - (f->j+ f->k -1);
2925 else
2926 starts[1] = f->j + SeqLocStart(slp2);
2927 strands[0] = SeqLocStrand(slp1);
2928 strands[1] = SeqLocStrand(slp2);
2929 new->starts = starts;
2930 new->strands = strands;
2931 new->len = f->k;
2932
2933 if(head == NULL)
2934 head = new;
2935 else
2936 link_diag(new, head);
2937
2938 /*
2939 fprintf(out_fp, "%ld %ld %ld \n",f->i,f->j,f->k);
2940 */
2941 if (!used_frag[f->i]) used_frag[f->i]=unewList();
2942 uinsert(used_frag[f->i],f->j);
2943 }
2944 ++output_frags;
2945
2946 return head;
2947 }
2948
2949 void Xfrag_print(fragment PNTR f)
2950 {
2951 Int4 len, decr_i, decr_j, decr;
2952
2953 if (f) {
2954 len = f->k - 1;
2955 if (old_i_end) {
2956 if (old_j_end - old_i_end == f->j - f->i) {
2957 old_i_end = f->i + len;
2958 old_j_end = f->j + len;
2959 } else {
2960 decr_i = f->i - old_i_end;
2961 decr_j = f->j - old_j_end;
2962 decr = (decr_i < decr_j) ? decr_i : decr_j;
2963 --decr;
2964 /*
2965 fprintf(out_fp, " l %ld %ld %ld %ld 0.0\n", old_i_start,
2966 old_j_start, old_i_end, old_j_end);
2967 */
2968 old_i_start = f->i - decr;
2969 old_i_end = f->i + len;
2970 old_j_start = f->j - decr;
2971 old_j_end = f->j + len;
2972 }
2973 } else {
2974 old_i_start = f->i;
2975 old_i_end = f->i + len;
2976 old_j_start = f->j;
2977 old_j_end = f->j + len;
2978 }
2979
2980 if (!used_frag[f->i]) used_frag[f->i]=unewList();
2981 uinsert(used_frag[f->i],f->j);
2982 }
2983 ++output_frags;
2984 }
2985 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |