|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/bandalg2.c |
source navigation diff markup identifier search freetext search file search |
1 static char const rcsid[] = "$Id: bandalg2.c,v 6.5 2003/05/30 17:25:35 coulouri Exp $";
2
3 /* ===========================================================================
4 *
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's official duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government have not placed any restriction on its use or reproduction.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26 /*****************************************************************************
27
28 File name: bandalg2.c
29
30 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
31
32 Contents: Functions to perform both local and global banded alignments.
33
34 *****************************************************************************/
35
36 #include <bandalgn.h>
37
38 static Int4 gb_linear_align(Uint1Ptr A, Uint1Ptr B,
39 Int4 M, Int4 N,
40 Int4 low, Int4 up,
41 Int1 tb, Int1 te,
42 Boolean legA, Boolean legB,
43 Boolean regA, Boolean regB,
44 data_t *data);
45
46 static Int4 gb_linear_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
47 Int4 M, Int4 N,
48 Int4 PNTR S,
49 Boolean lgA, Boolean lgB,
50 Boolean rgA, Boolean rgB,
51 data_t *data);
52
53 /*********************************************************
54 *
55 * Int4 gband_linear_gap(A, B, M, N, option, S, Slen)
56 * compute the global alignment with flexible end gap
57 * penalty for DNA-DNA and protein protein alignment.
58 * The alignment is computed with linear space
59 * This function was originally from the g_band2.c file
60 *
61 * align the two sequences A, B
62 * A, B starts with index 1
63 * M, N is the length of A, B
64 * option sets the option of the alignment,
65 * which includes penalties for end gaps
66 * S is the script that contains the alignment results
67 * Slen stores the length of the alignment (the size of S)
68 * return the score of the alignment
69 *
70 ***********************************************************/
71 Int4 LIBCALL gband_linear_gap(Uint1Ptr A, Uint1Ptr B,
72 Int4 M, Int4 N,
73 Int4Ptr PNTR matrix,
74 PSUGapOptionsPtr option,
75 Int4Ptr S, Int4Ptr Slen)
76 {
77 data_t data;
78 Int4 c, i, j;
79 Int4 low, up;
80 Int4 band;
81 Int4 score;
82
83 /* set up global parameter */
84 data.g = option->gopen;
85 data.zzh = option->gext;
86 data.w = matrix;
87 data.m = data.g + data.zzh;
88 data.sapp = data.sapp0 = S;
89 data.last = 0;
90 *Slen = 0;
91
92 low = option->start_diag; /* start diagonal of band */
93 band = option->width; /* the band width for the alignment */
94 /* band = up-low+1 */
95 up = band + low - 1;
96 low = MIN(MAX(-M, low), MIN(N - M, 0));
97 up = MAX(MIN(N, up), MAX(N - M, 0));
98
99 if (up < 0 || low > 0) {
100 #ifdef DO_WWW
101 content();
102 printf("The band does not include (0,0) grid point");
103 exit (1);
104 #else
105 ErrPostEx(SEV_WARNING, 0, 0, "The band does not include (0,0) grid point");
106 return 0;
107 #endif
108 }
109 if (up + M < N || low + M > N) {
110 #ifdef DO_WWW
111 content();
112 printf("The band does not include lower corner");
113 exit (1);
114 #else
115 ErrPostEx(SEV_WARNING, 0, 0, "The band does not include lower corner");
116 return 0;
117 #endif
118 }
119 if (N <= 0) {
120 if (M > 0)
121 DEL_(M);
122 return -gap_(M);
123 }
124 if (M <= 0) {
125 INS_(N);
126 return -gap_(N);
127 }
128 if ((band = up - low + 1) <= 1) {
129 c = 0;
130 for (i = 1; i <= M; i++) {
131 REP_;
132 c += data.w[A[i]][B[i]];
133 }
134 return c;
135 }
136 data.CD = MemGet((size_t) (band + 2) * sizeof(dp_node), MGET_ERRPOST);
137 j = M + 1;
138 data.MT[0] = MemGet((size_t) j, MGET_ERRPOST);
139 data.MT[1] = MemGet((size_t) j, MGET_ERRPOST);
140 data.MT[2] = MemGet((size_t) j, MGET_ERRPOST);
141 data.FT = MemGet((size_t) j, MGET_ERRPOST);
142 j *= sizeof(Int4);
143 data.MP[0] = MemGet((size_t) j, MGET_ERRPOST);
144 data.MP[1] = MemGet((size_t) j, MGET_ERRPOST);
145 data.MP[2] = MemGet((size_t) j, MGET_ERRPOST);
146 data.FP = MemGet((size_t) j, MGET_ERRPOST);
147
148 /*Initiate the end gap penalties */
149 data.leggA = option->lg1_open;
150 data.leghA = option->lg1_ext;
151 data.reggA = option->rg1_open;
152 data.reghA = option->rg1_ext;
153
154 data.leggB = option->lg2_open;
155 data.leghB = option->lg2_ext;
156 data.reggB = option->rg2_open;
157 data.reghB = option->rg2_ext;
158
159 c = gb_linear_align(A, B, M, N, low, up, 0, 0, 0, 0, 0, 0, &data);
160 score = gb_linear_CHECK_SCORE(A, B, M, N, S, 0, 0, 0, 0, &data);
161 if (score != c) {
162 #ifdef DO_WWW
163 content();
164 printf("Check_Score = %ld align_score = %ld ", score, c);
165 exit (1);
166 #else
167 ErrPostEx(SEV_WARNING, 0, 0, "Check_Score = %ld align_score = %ld ",
168 (long) score, (long) c);
169 return 0;
170 #endif
171 }
172 MemFree(data.CD);
173 for (i = 0; i < 3; i++) {
174 MemFree(data.MT[i]);
175 MemFree(data.MP[i]);
176 }
177
178 MemFree(data.FT);
179 MemFree(data.FP);
180
181 *Slen = data.sapp - S;
182 return c;
183 }
184
185 /* align(A, B, M, N, up, low, tb, te, data) returns the cost
186 of an optimum conversion between
187 A[1..M] and B[1..N] and appends such a conversion to the current script.
188 tb(te)= 1 no gap-open penalty if the conversion begins(ends) with a delete.
189 tb(te)= 2 no gap-open penalty if the conversion begins(ends) with an
190 insert.
191 */
192
193 /************************************************************/
194 static Int4 gb_linear_align(Uint1Ptr A, Uint1Ptr B,
195 Int4 M, Int4 N,
196 Int4 low, Int4 up,
197 Int1 tb, Int1 te,
198 Boolean legA, Boolean legB,
199 Boolean regA, Boolean regB,
200 data_t *data)
201 {
202 Int4 rmid, k =0, l=0, r, v, kt, X, Y;
203 Int4 t1, t2, t3;
204 Int4Ptr PNTR dmp = data->MP;
205 Uint1Ptr PNTR dmt = data->MT;
206 register Uint1Ptr BB;
207 register Uint1 AA;
208 register Int4 match, misma;
209 {
210 Int4 band, midd, j;
211 Int4 leftd, rightd; /* for CC, DD, CP and DP */
212 register Int4 curd; /* current index for CC, DD CP and DP */
213 register Int4 i;
214 register Int4 c, d, e, x;
215 register dp_ptr ap;
216 Int4 t, fr, tz = data->zzh, tm = data->m;
217 Int4Ptr wa;
218 Int4 ib, best = MININT;
219
220 /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
221 if (up > N) {
222 up = N;
223 }
224 if (N <= 0) {
225 if (M > 0) {
226 _DEL(M);
227 }
228 return 0;
229 }
230 if (M <= 0) {
231 _INS(N);
232 return 0;
233 }
234 if ((band = up - low + 1) <= 1) {
235 for (i = 1; i <= M; i++) {
236 _REP;
237 }
238 return 0;
239 }
240 /* Divide: Find all crossing points */
241
242 /* Initialization */
243 midd = band/2 + 1;
244 rmid = low + midd - 1;
245 leftd = 1-low;
246 rightd = up-low+1;
247 if (leftd < midd) {
248 fr = -1;
249 for (j = 0; j < midd; j++)
250 data->CD[j].CP = data->CD[j].DPDP = -1;
251 for (j = midd; j <= rightd; j++) {
252 data->CD[j].CP = data->CD[j].DPDP = 0;
253 }
254 data->CD[midd - 1].DPDP = 0;
255 dmp[0][0] = -1;
256 dmp[1][0] = -1;
257 dmp[2][0] = -1;
258 dmt[0][0] = dmt[1][0] = dmt[2][0] = 0;
259 } else if (leftd > midd) {
260 fr = leftd - midd;
261 for (j = 0; j <= midd; j++) {
262 data->CD[j].CP = data->CD[j].DPDP = fr;
263 }
264 for (j = midd + 1; j <= rightd; j++)
265 data->CD[j].CP = data->CD[j].DPDP = -1;
266 data->CD[midd].DPDP = -1;
267 dmp[0][fr] = -1;
268 dmp[1][fr] = -1;
269 dmp[2][fr] = -1;
270 dmt[0][fr] = dmt[1][fr] = dmt[2][fr] = 0;
271 } else {
272 for (j = 0; j < rightd; j++) {
273 data->CD[j].CP = data->CD[j].DPDP = 0;
274 }
275 dmp[0][0] = -1;
276 dmp[1][0] = -1;
277 dmp[2][0] = -1;
278 dmt[0][0] = dmt[1][0] = dmt[2][0] = 0;
279 }
280
281 data->CD[leftd].CC = 0;
282 if (!legB) {
283 t = -data->leggB;
284 for (j = leftd + 1; j <= rightd; j++) {
285 data->CD[j].CC = t = t - data->leghB;
286 data->CD[j - 1].DD = t - data->m;
287 }
288 } else {
289 if (tb == 2) {
290 t = 0;
291 } else {
292 t = -data->g;
293 }
294 for (j = leftd + 1; j <= rightd; j++) {
295 data->CD[j].CC = t = t - tz;
296 data->CD[j - 1].DD = t - tm;
297 }
298 }
299 data->CD[rightd + 1].CC = MININT;
300 data->CD[rightd].DD = MININT;
301 data->CD[rightd + 1].CP = data->CD[rightd].DPDP = 0;
302 if (tb == 1) {
303 data->CD[leftd - 1].DD = -tz;
304 } else {
305 data->CD[leftd - 1].DD = -tm;
306 }
307 if (!legA) {
308 data->CD[leftd - 1].DD = -data->leggA - data->leghA;
309 }
310 data->CD[leftd - 1].CC = MININT;
311 match = data->w[0][0];
312 misma = data->w[0][1];
313 for (i = 1; i <= M; i++) {
314 if (i > N - up) {
315 rightd--;
316 }
317 if (leftd > 1) {
318 leftd--;
319 }
320 wa = data->w[A[i]];
321 AA = A[i];
322 d = data->CD[leftd].DD;
323 if ((ib = leftd + low - 1 + i) > 0) {
324 c = data->CD[leftd].CC + wa[B[ib]];
325 }
326 if (d > c || ib <= 0) {
327 c = d;
328 data->CD[leftd].CP = data->CD[leftd].DPDP;
329 }
330 e = c - data->m;
331 data->IP = data->CD[leftd].CP;
332 if (leftd > 1 && !legA) {
333 d -= data->leghA;
334 } else {
335 d -= tz;
336 }
337 if (d >= e) {
338 data->CD[leftd - 1].DD = d;
339 data->CD[leftd - 1].DPDP = data->CD[leftd].DPDP;
340 } else {
341 data->CD[leftd - 1].DD = e;
342 data->CD[leftd - 1].DPDP = data->IP;
343 }
344 if (leftd == midd) {
345 data->CD[leftd].CP = data->CD[leftd - 1].DPDP = data->IP = i;
346 }
347 data->CD[leftd].CC = c;
348 if (midd <= rightd && midd >= leftd + 1) {
349 x = midd;
350 } else {
351 x = rightd + 1;
352 }
353 BB = B + low - 1 + i;
354 for (curd = leftd + 1, ap = &data->CD[curd]; curd < x; curd++) {
355 c = ap->CC + wa[BB[curd]];
356 d = ap->DD;
357 if (c < d || c < e) {
358 if (d < e) {
359 c = e;
360 ap->CP = data->IP;
361 } else {
362 c = d;
363 ap->CP = ap->DPDP;
364 }
365 }
366 ap->CC = c;
367 if ((c -= tm) > (e -= tz)) {
368 e = c;
369 data->IP = ap->CP;
370 }
371 if (c > (d -= tz)) {
372 (ap - 1)->DD = c;
373 (ap - 1)->DPDP = ap->CP;
374 ap++;
375 } else {
376 (ap - 1)->DD = d;
377 (ap - 1)->DPDP = ap->DPDP;
378 ap++;
379 }
380 }
381 if (x != rightd+1) {
382 dmp[1][i] = data->IP;
383 dmt[1][i] = 2;
384 dmp[2][i] = ap->DPDP;
385 dmt[2][i] = 1;
386 c = ap->CC + wa[B[curd+low-1+i]];
387 d = ap->DD;
388 if (c < d || c < e) {
389 if (e > d) {
390 c = e;
391 dmp[0][i] = dmp[1][i];
392 dmt[0][i] = 2;
393 } else {
394 c = d;
395 dmp[0][i] = dmp[2][i];
396 dmt[0][i] = 1;
397 }
398 } else {
399 dmp[0][i] = i-1;
400 dmt[0][i] = 0;
401 }
402 if (c - data->g > e) {
403 dmp[1][i] = dmp[0][i];
404 dmt[1][i] = dmt[0][i];
405 }
406 if (c - data->g > d) {
407 dmp[2][i] = dmp[0][i];
408 dmt[2][i] = dmt[0][i];
409 }
410 ap->CP = (ap - 1)->DPDP = data->IP = i;
411 ap->CC = c;
412 if ((c -= data->m) > (e -= tz)) {
413 e = c;
414 }
415 if (c > (d -= tz)) {
416 (ap++ - 1)->DD = c;
417 } else {
418 (ap++ - 1)->DD = d;
419 }
420 for (curd++; curd <= rightd; curd++) {
421 c = ap->CC + wa[BB[curd]];
422 d = ap->DD;
423 if (c < d || c < e) {
424 if (d < e) {
425 c = e;
426 ap->CP = data->IP;
427 } else {
428 c = d;
429 ap->CP = ap->DPDP;
430 }
431 } /* otherwise, CP is unchanged */
432 ap->CC = c;
433 if ((c -= tm) > (e -= tz)) {
434 e = c;
435 data->IP = ap->CP;
436 }
437 if (c > (d -= tz)) {
438 (ap - 1)->DD = c;
439 (ap-1)->DPDP = ap->CP;
440 ap++;
441 } else {
442 (ap-1)->DD = d;
443 (ap-1)->DPDP = ap->DPDP;
444 ap++;
445 }
446 }
447 }
448 if (!regA && i > N-up &&
449 best < (j = (ap - 1)->CC - data->reggA - (M - i) * data->reghA)) {
450 best = j;
451 X = i;
452 Y = N;
453 k = (ap-1)->CP;
454 }
455 }
456 if (!regB) {
457 for (ap = &data->CD[leftd]; ap <= &data->CD[rightd]; ap++) {
458 if ((j = ap->CC - data->reggB - data->reghB *
459 (x = (&data->CD[rightd] - ap))) > best) {
460 X = M;
461 best = j;
462 Y = N - x;
463 k = ap->CP;
464 }
465 }
466 }
467 if (best < data->CD[rightd].CC) {
468 X = M;
469 Y = N;
470 best = data->CD[rightd].CC;
471 k = data->CD[rightd].CP;
472 }
473
474 /* decide which path to be traced back */
475 if (regA && regB) {
476 X = M;
477 Y = N;
478 v = c = (ap-1)->CC;
479 d = (ap-2)->DD;
480 if (te == 1 && d + data->m > c) {
481 k = data->CD[rightd-1].DPDP;
482 l = 2;
483 } else if (te == 2 && e + data->m > c) {
484 k = data->IP;
485 l = 1;
486 } else {
487 k = data->CD[rightd].CP;
488 l = 0;
489 }
490 } else {
491 l = 0;
492 v = best;
493 }
494 if (rmid > Y - X) {
495 l = 2;
496 } else if (rmid < Y - X) {
497 l = 1;
498 }
499 }
500 /* Conquer: Solve subproblems recursively */
501 /* trace back */
502 r = -1;
503 for (; k > -1; r=k, k=dmp[l][r], l=dmt[l][r]) {
504 data->FP[k] = r;
505 data->FT[k] = (Uint1) l;
506 }
507 /* forward dividing */
508 if(r == -1) { /* optimal alignment did not cross the middle diagonal */
509 if(rmid < 0) {
510 gb_linear_align(A, B, X, Y, rmid + 1, up, tb, te,
511 legA, legB, regA, regB, data);
512 } else {
513 gb_linear_align(A, B, X, Y, low, rmid-1, tb, te,
514 legA, legB, regA, regB, data);
515 }
516 } else {
517 k = r;
518 l = data->FP[k];
519 kt = data->FT[k];
520
521 /* first block */
522 if (rmid < 0) {
523 gb_linear_align(A, B, r - 1, r + rmid, rmid + 1,
524 MIN(up, r + rmid), tb, 1, legA, legB, 1, 1, data);
525 _DEL(1);
526 legB = 1;
527 if (data->sapp > data->sapp0 + 1) {
528 legA = 1;
529 }
530 } else if (rmid > 0) {
531 gb_linear_align(A, B, r, r + rmid - 1, MAX(-r, low), rmid - 1,
532 tb, 2, legA, legB, 1, 1, data);
533 _INS(1);
534 legA = 1;
535 if(data->sapp > data->sapp0 + 1) {
536 legB = 1;
537 }
538 }
539 /* intermediate blocks */
540 t2 = up-rmid-1;
541 t3 = low-rmid+1;
542 for (; l > -1; k = l, l = data->FP[k], kt = data->FT[k]) {
543 if (kt == 0) {
544 _REP;
545 } else if (kt == 1) { /* right-hand side triangle */
546 _INS(1);
547 t1 = l - k - 1;
548 gb_linear_align(A + k, B + k + rmid + 1,
549 t1, t1, 0, MIN(t1, t2), 2, 1, legA, legB, 1, 1, data);
550 _DEL(1);
551 } else { /* kt == 2, left-hand side triangle */
552 _DEL(1);
553 t1 = l - k - 1;
554 gb_linear_align(A + k + 1, B + k + rmid, t1, t1,
555 MAX(-t1, t3), 0, 1, 2, legA, legB, 1, 1, data);
556 _INS(1);
557 }
558 legA = legB = 1;
559 }
560
561 /* last block */
562 if (Y-X > rmid) {
563 _INS(1);
564 t1 = k + rmid + 1;
565 gb_linear_align(A + k, B + t1, X - k, Y - t1, 0,
566 MIN(Y - t1, t2), 2, te, legA, legB, 1, 1, data);
567 } else if (Y-X < rmid) {
568 _DEL(1);
569 t1 = X - (k + 1);
570 gb_linear_align(A + k + 1, B + k + rmid, t1, Y - (k + rmid),
571 MAX(-t1, t3), 0, 1, te, legA, legB, 1, 1, data);
572 }
573 }
574 if(X != M) {
575 _DEL(M - X)
576 } else if(Y != N) {
577 _INS(N - Y);
578 }
579 return(v);
580 }
581
582 /************************************************************/
583 /* gb_linear_CHECK_SCORE - return the score of the alignment stored in S */
584 static Int4 gb_linear_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
585 Int4 M, Int4 N,
586 Int4 PNTR S,
587 Boolean lgA, Boolean lgB,
588 Boolean rgA, Boolean rgB,
589 data_t *data)
590 {
591 register Int4 i, j, op;
592 Int4 score;
593
594 score = i = j = op = 0;
595 if (!lgA && (*S) < 0) {
596 i = i-*S++;
597 } else if (!lgB && (*S) >0) {
598 j = j+*S++;
599 }
600 while (i < M || j < N) {
601 op = *S++;
602 if (op == 0) {
603 score = data->w[A[++i]][B[++j]] + score;
604 } else if (op > 0) {
605 score = score - (data->g + op * data->zzh);
606 j = j + op;
607 } else {
608 score = score - (data->g - op * data->zzh);
609 i = i - op;
610 }
611 if (i==M)
612 break;
613 }
614 if (!rgA && op < 0) {
615 score += (data->g - op * data->zzh);
616 }
617 if (!rgB && op > 0) {
618 score += (data->g + op * data->zzh);
619 }
620 return (score);
621 }
622 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |