NCBI C Toolkit Cross Reference

C/tools/bandalg2.c


  1 static char const rcsid[] = "$Id: bandalg2.c,v 6.5 2003/05/30 17:25:35 coulouri Exp $";
  2 
  3 /* ===========================================================================
  4 *
  5 *                            PUBLIC DOMAIN NOTICE
  6 *               National Center for Biotechnology Information
  7 *
  8 *  This software/database is a "United States Government Work" under the
  9 *  terms of the United States Copyright Act.  It was written as part of
 10 *  the author's official duties as a United States Government employee and
 11 *  thus cannot be copyrighted.  This software/database is freely available
 12 *  to the public for use. The National Library of Medicine and the U.S.
 13 *  Government have not placed any restriction on its use or reproduction.
 14 *
 15 *  Although all reasonable efforts have been taken to ensure the accuracy
 16 *  and reliability of the software and data, the NLM and the U.S.
 17 *  Government do not and cannot warrant the performance or results that
 18 *  may be obtained by using this software or data. The NLM and the U.S.
 19 *  Government disclaim all warranties, express or implied, including
 20 *  warranties of performance, merchantability or fitness for any particular
 21 *  purpose.
 22 *
 23 *  Please cite the author in any work or product based on this material.
 24 *
 25 * ===========================================================================*/
 26 /*****************************************************************************
 27 
 28 File name: bandalg2.c
 29 
 30 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
 31 
 32 Contents: Functions to perform both local and global banded alignments.
 33 
 34 *****************************************************************************/
 35 
 36 #include <bandalgn.h>
 37 
 38 static Int4 gb_linear_align(Uint1Ptr A, Uint1Ptr B,
 39                             Int4 M, Int4 N,
 40                             Int4 low, Int4 up,
 41                             Int1 tb, Int1 te,
 42                             Boolean legA, Boolean legB, 
 43                             Boolean regA, Boolean regB,
 44                             data_t *data);
 45 
 46 static Int4 gb_linear_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
 47                                   Int4 M, Int4 N,
 48                                   Int4 PNTR S,
 49                                   Boolean lgA, Boolean lgB, 
 50                                   Boolean rgA, Boolean rgB,
 51                                   data_t *data);
 52 
 53 /*********************************************************
 54 *
 55 *       Int4 gband_linear_gap(A, B, M, N, option, S, Slen)
 56 *       compute the global alignment with flexible end gap 
 57 *       penalty for DNA-DNA and protein protein alignment. 
 58 *       The alignment is computed with linear space
 59 *       This function was originally from the g_band2.c file
 60 *
 61 *       align the two sequences A, B
 62 *       A, B starts with index 1
 63 *       M, N is the length of A, B
 64 *       option sets the option of the alignment, 
 65 *       which includes penalties for end gaps
 66 *       S is the script that contains the alignment results
 67 *       Slen stores the length of the alignment (the size of S)
 68 *       return the score of the alignment
 69 *
 70 ***********************************************************/
 71 Int4 LIBCALL gband_linear_gap(Uint1Ptr A, Uint1Ptr B,
 72                               Int4 M, Int4 N, 
 73                               Int4Ptr PNTR matrix,
 74                               PSUGapOptionsPtr option,
 75                               Int4Ptr S, Int4Ptr Slen)
 76 {
 77         data_t data;
 78         Int4 c, i, j;
 79         Int4 low, up;
 80         Int4 band;
 81         Int4 score;
 82         
 83   /* set up global parameter */
 84         data.g = option->gopen;
 85         data.zzh = option->gext;
 86         data.w = matrix;
 87         data.m = data.g + data.zzh;
 88         data.sapp = data.sapp0 = S;
 89         data.last = 0;
 90         *Slen = 0;
 91         
 92         low = option->start_diag;       /* start diagonal of band */
 93         band = option->width;           /* the band width for the alignment */
 94         /* band = up-low+1 */
 95         up = band + low - 1;
 96         low = MIN(MAX(-M, low), MIN(N - M, 0));
 97         up = MAX(MIN(N, up), MAX(N - M, 0));
 98         
 99         if (up < 0 || low > 0) {
100 #ifdef DO_WWW
101                         content();
102                         printf("The band does not include (0,0) grid point"); 
103                                 exit (1);
104 #else
105                 ErrPostEx(SEV_WARNING, 0, 0, "The band does not include (0,0) grid point");
106                 return 0;
107 #endif
108         }
109         if (up + M < N || low + M > N) {
110 #ifdef DO_WWW
111                         content();
112                         printf("The band does not include lower corner"); 
113                                 exit (1);
114 #else
115                 ErrPostEx(SEV_WARNING, 0, 0, "The band does not include lower corner");
116                 return 0;
117 #endif
118         }
119         if (N <= 0) {
120                 if (M > 0)
121                         DEL_(M);
122                 return -gap_(M);
123         }
124         if (M <= 0) {
125                 INS_(N);
126                 return -gap_(N);
127         }
128         if ((band = up - low + 1) <= 1) {
129                 c = 0;
130                 for (i = 1; i <= M; i++) {
131                         REP_;
132                         c += data.w[A[i]][B[i]];
133                 }
134                 return c;
135         }
136         data.CD = MemGet((size_t) (band + 2) * sizeof(dp_node), MGET_ERRPOST);
137         j = M + 1;
138         data.MT[0] = MemGet((size_t) j, MGET_ERRPOST);
139         data.MT[1] = MemGet((size_t) j, MGET_ERRPOST);
140         data.MT[2] = MemGet((size_t) j, MGET_ERRPOST);
141         data.FT = MemGet((size_t) j, MGET_ERRPOST);
142         j *= sizeof(Int4);
143         data.MP[0] = MemGet((size_t) j, MGET_ERRPOST);
144         data.MP[1] = MemGet((size_t) j, MGET_ERRPOST);
145         data.MP[2] = MemGet((size_t) j, MGET_ERRPOST);
146         data.FP = MemGet((size_t) j, MGET_ERRPOST);
147 
148   /*Initiate the end gap penalties */
149         data.leggA = option->lg1_open;
150         data.leghA = option->lg1_ext;
151         data.reggA = option->rg1_open;
152         data.reghA = option->rg1_ext;
153         
154         data.leggB = option->lg2_open;
155         data.leghB = option->lg2_ext;
156         data.reggB = option->rg2_open;
157         data.reghB = option->rg2_ext;
158         
159         c = gb_linear_align(A, B, M, N, low, up, 0, 0, 0, 0, 0, 0, &data);
160         score = gb_linear_CHECK_SCORE(A, B, M, N, S, 0, 0, 0, 0, &data);
161         if (score != c) {
162 #ifdef DO_WWW
163                         content();
164                         printf("Check_Score = %ld align_score = %ld ", score, c); 
165                                 exit (1);
166 #else
167                 ErrPostEx(SEV_WARNING, 0, 0, "Check_Score = %ld align_score = %ld ",
168                           (long) score, (long) c);
169                 return 0;
170 #endif
171         }
172         MemFree(data.CD);
173         for (i = 0; i < 3; i++) {
174                 MemFree(data.MT[i]);
175                 MemFree(data.MP[i]);
176         }
177         
178         MemFree(data.FT);
179         MemFree(data.FP);
180         
181         *Slen = data.sapp - S;
182         return c;
183 }
184 
185 /* align(A, B, M, N, up, low, tb, te, data) returns the cost
186    of an optimum conversion between
187    A[1..M] and B[1..N] and appends such a conversion to the current script.
188    tb(te)= 1  no gap-open penalty if the conversion begins(ends) with a delete.
189    tb(te)= 2  no gap-open penalty if the conversion begins(ends) with an
190    insert.
191 */
192 
193 /************************************************************/
194 static Int4 gb_linear_align(Uint1Ptr A, Uint1Ptr B,
195                             Int4 M, Int4 N,
196                             Int4 low, Int4 up,
197                             Int1 tb, Int1 te,
198                             Boolean legA, Boolean legB, 
199                             Boolean regA, Boolean regB,
200                             data_t *data)
201 {
202   Int4 rmid, k =0, l=0, r, v, kt, X, Y;
203   Int4 t1, t2, t3;
204   Int4Ptr PNTR dmp = data->MP;
205   Uint1Ptr PNTR dmt = data->MT;
206   register Uint1Ptr BB;
207   register Uint1 AA;
208   register Int4 match, misma;
209   {
210     Int4 band, midd, j;
211     Int4 leftd, rightd; /* for CC, DD, CP and DP */
212     register Int4 curd; /* current index for CC, DD CP and DP */
213     register Int4 i;
214     register Int4 c, d, e, x;
215     register dp_ptr ap;
216     Int4 t, fr, tz = data->zzh, tm = data->m;
217     Int4Ptr wa;
218     Int4 ib, best = MININT;
219 
220     /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
221         if (up > N) {
222                 up = N;
223         }
224         if (N <= 0) {
225                 if (M > 0) {
226                         _DEL(M);
227                 }
228                 return 0;
229         }
230         if (M <= 0) {
231                 _INS(N);
232                 return 0;
233         }
234         if ((band = up - low + 1) <= 1) {
235                 for (i = 1; i <= M; i++) {
236                         _REP;
237                 }
238                 return 0;
239         }
240     /* Divide: Find all crossing points */
241 
242     /* Initialization */
243         midd = band/2 + 1;
244         rmid = low + midd - 1;
245         leftd = 1-low;
246         rightd = up-low+1;
247         if (leftd < midd) {
248                 fr = -1;
249                 for (j = 0; j < midd; j++)
250                         data->CD[j].CP = data->CD[j].DPDP = -1;
251                 for (j = midd; j <= rightd; j++) {
252                         data->CD[j].CP = data->CD[j].DPDP = 0;
253                 }
254                 data->CD[midd - 1].DPDP = 0;
255                 dmp[0][0] = -1;
256                 dmp[1][0] = -1;
257                 dmp[2][0] = -1;
258                 dmt[0][0] = dmt[1][0] = dmt[2][0] = 0;
259         } else if (leftd > midd) {
260                 fr = leftd - midd;
261                 for (j = 0; j <= midd; j++) {
262                         data->CD[j].CP = data->CD[j].DPDP = fr;
263                 }
264                 for (j = midd + 1; j <= rightd; j++)
265                         data->CD[j].CP = data->CD[j].DPDP = -1;
266                 data->CD[midd].DPDP = -1;
267                 dmp[0][fr] = -1;
268                 dmp[1][fr] = -1;
269                 dmp[2][fr] = -1;
270                 dmt[0][fr] = dmt[1][fr] = dmt[2][fr] = 0;
271         } else {
272                 for (j = 0; j < rightd; j++) {
273                         data->CD[j].CP = data->CD[j].DPDP = 0;
274                 }
275                 dmp[0][0] = -1;
276                 dmp[1][0] = -1;
277                 dmp[2][0] = -1;
278                 dmt[0][0] = dmt[1][0] = dmt[2][0] = 0;
279         }
280 
281         data->CD[leftd].CC = 0;
282         if (!legB) {
283                 t = -data->leggB;
284                 for (j = leftd + 1; j <= rightd; j++) {
285                         data->CD[j].CC = t = t - data->leghB;
286                         data->CD[j - 1].DD = t - data->m;
287                 }
288         } else {
289                 if (tb == 2) {
290                         t = 0;
291                 } else {
292                         t = -data->g;
293                 }
294                 for (j = leftd + 1; j <= rightd; j++) {
295                         data->CD[j].CC = t = t - tz;
296                         data->CD[j - 1].DD = t - tm;
297                 }
298         }
299         data->CD[rightd + 1].CC = MININT;
300         data->CD[rightd].DD = MININT;
301         data->CD[rightd + 1].CP = data->CD[rightd].DPDP = 0;
302         if (tb == 1) {
303                 data->CD[leftd - 1].DD = -tz;
304         } else {
305                 data->CD[leftd - 1].DD = -tm;
306         }
307         if (!legA) {
308                 data->CD[leftd - 1].DD = -data->leggA - data->leghA;
309         }
310         data->CD[leftd - 1].CC = MININT;
311         match = data->w[0][0];
312         misma = data->w[0][1];
313     for (i = 1; i <= M; i++) {
314                 if (i > N - up) {
315                         rightd--;
316                 }
317                 if (leftd > 1) {
318                         leftd--;
319                 }
320                 wa = data->w[A[i]];
321                 AA = A[i];
322                 d = data->CD[leftd].DD;
323                 if ((ib = leftd + low - 1 + i) > 0) {
324                         c = data->CD[leftd].CC + wa[B[ib]];
325                 }
326                 if (d > c || ib <= 0) {
327                         c = d;
328                         data->CD[leftd].CP = data->CD[leftd].DPDP;
329         }
330         e = c - data->m;            
331         data->IP = data->CD[leftd].CP;    
332                 if (leftd > 1 && !legA) {
333                         d -= data->leghA;
334                 } else {
335                         d -= tz;
336                 }
337                 if (d >= e) {
338                         data->CD[leftd - 1].DD = d;
339                         data->CD[leftd - 1].DPDP = data->CD[leftd].DPDP;
340                 } else {
341                         data->CD[leftd - 1].DD = e;
342                         data->CD[leftd - 1].DPDP = data->IP;
343                 }
344                 if (leftd == midd) {
345                         data->CD[leftd].CP = data->CD[leftd - 1].DPDP = data->IP = i;
346                 }
347                 data->CD[leftd].CC = c;
348                 if (midd <= rightd && midd >= leftd + 1) {
349                         x = midd;
350                 } else {
351                         x = rightd + 1;
352                 }
353                 BB = B + low - 1 + i;
354                 for (curd = leftd + 1, ap = &data->CD[curd]; curd < x; curd++) {
355                         c = ap->CC + wa[BB[curd]];
356                         d = ap->DD;
357                         if (c < d || c < e) {
358                                 if (d < e) {
359                                         c = e;
360                                         ap->CP = data->IP;
361                                 } else {
362                                         c = d;
363                                         ap->CP = ap->DPDP;
364                                 }
365                         }
366                         ap->CC = c;
367                         if ((c -= tm) > (e -= tz)) {
368                                 e = c;
369                                 data->IP = ap->CP;
370                         } 
371                         if (c > (d -= tz)) {
372                                 (ap - 1)->DD = c;
373                                 (ap - 1)->DPDP = ap->CP;
374                                  ap++;
375                         } else {
376                                 (ap - 1)->DD = d;
377                                 (ap - 1)->DPDP = ap->DPDP;
378                                 ap++;
379                         }
380                 }
381                 if (x != rightd+1) {
382                         dmp[1][i] = data->IP;
383                         dmt[1][i] = 2;
384                         dmp[2][i] = ap->DPDP;
385                         dmt[2][i] = 1;     
386                         c = ap->CC + wa[B[curd+low-1+i]];
387                         d = ap->DD;
388                         if (c < d || c < e) {
389                                 if (e > d) {
390                                 c = e;
391                                 dmp[0][i] = dmp[1][i];
392                                 dmt[0][i] = 2;
393                                 } else {
394                                         c = d;
395                                         dmp[0][i] = dmp[2][i];
396                                 dmt[0][i] = 1;
397                                 }
398                         } else {
399                                 dmp[0][i] = i-1;
400                                 dmt[0][i] = 0;
401                         }
402                         if (c - data->g > e) {
403                                 dmp[1][i] = dmp[0][i];
404                                 dmt[1][i] = dmt[0][i];
405                         }
406                         if (c - data->g > d) {
407                                 dmp[2][i] = dmp[0][i];
408                                 dmt[2][i] = dmt[0][i];
409                         }
410                         ap->CP = (ap - 1)->DPDP = data->IP = i;
411                         ap->CC = c;
412                         if ((c -= data->m) > (e -= tz)) {
413                                 e = c;
414                         }
415                         if (c > (d -= tz)) {
416                                 (ap++ - 1)->DD = c;
417                         } else {
418                                 (ap++ - 1)->DD = d;
419                         }
420                         for (curd++; curd <= rightd; curd++) {
421                                 c = ap->CC + wa[BB[curd]];
422                                 d = ap->DD;
423                                 if (c < d || c < e) { 
424                                 if (d < e) {
425                                         c = e;
426                                         ap->CP  = data->IP;
427                                 } else {
428                                         c = d;
429                                         ap->CP = ap->DPDP;
430                                 }
431                                 }   /* otherwise, CP is unchanged */
432                                 ap->CC = c;
433                                 if ((c -= tm) > (e -= tz)) {
434                                 e = c;
435                                 data->IP = ap->CP;
436                                 } 
437                                 if (c > (d -= tz)) {
438                                 (ap - 1)->DD = c;
439                                 (ap-1)->DPDP = ap->CP;
440                                 ap++;
441                                 } else {
442                                 (ap-1)->DD = d;
443                                 (ap-1)->DPDP = ap->DPDP;
444                                 ap++;
445                                 }
446                         }
447                 }
448                 if (!regA && i > N-up && 
449                   best < (j = (ap - 1)->CC - data->reggA - (M - i) * data->reghA)) {
450                         best = j; 
451                         X = i; 
452                         Y = N; 
453                         k = (ap-1)->CP;
454         }
455         }
456         if (!regB) {
457                 for (ap = &data->CD[leftd]; ap <= &data->CD[rightd]; ap++) {
458                         if ((j = ap->CC - data->reggB - data->reghB *
459                              (x = (&data->CD[rightd] - ap))) > best) {
460                                 X = M;
461                                 best = j;
462                                 Y = N - x;
463                                 k = ap->CP;
464                         }
465                 }
466         }
467         if (best < data->CD[rightd].CC) {
468                 X = M;
469                 Y = N;
470                 best = data->CD[rightd].CC;
471                 k = data->CD[rightd].CP;
472         }
473 
474     /* decide which path to be traced back */
475         if (regA && regB) {
476                 X = M; 
477                 Y = N;
478                 v = c = (ap-1)->CC; 
479                 d = (ap-2)->DD;
480                 if (te == 1 && d + data->m > c) {
481                         k = data->CD[rightd-1].DPDP;
482                         l = 2;
483         } else if (te == 2 && e + data->m > c) {
484                         k = data->IP;
485                         l = 1;
486                 } else {
487                         k = data->CD[rightd].CP;
488                         l = 0;
489         }
490         } else {
491                 l = 0;
492                 v = best;
493     }
494         if (rmid > Y - X) {
495                 l = 2;
496         } else if (rmid < Y - X) {
497                 l = 1;
498         }
499   }
500 /* Conquer: Solve subproblems recursively */
501 /* trace back */
502         r = -1; 
503         for (; k > -1; r=k, k=dmp[l][r], l=dmt[l][r]) {
504                 data->FP[k] = r;
505         data->FT[k] = (Uint1) l;
506         }
507 /* forward dividing */
508         if(r == -1) { /* optimal alignment did not cross the middle diagonal */
509                 if(rmid < 0) {
510                         gb_linear_align(A, B, X, Y, rmid + 1, up, tb, te,
511                       legA, legB, regA, regB, data);
512                 } else {
513                         gb_linear_align(A, B, X, Y, low, rmid-1, tb, te,
514                       legA, legB, regA, regB, data);
515                 }                 
516         } else {
517                 k = r;
518                 l = data->FP[k];
519         kt = data->FT[k];
520 
521 /* first block */
522         if (rmid < 0) {
523                 gb_linear_align(A, B, r - 1, r + rmid, rmid + 1,
524                       MIN(up, r + rmid), tb, 1, legA, legB, 1, 1, data);
525                 _DEL(1);
526                 legB = 1;
527                 if (data->sapp > data->sapp0 + 1) {
528                         legA = 1;
529                 }
530         } else if (rmid > 0) {
531                 gb_linear_align(A, B, r, r + rmid - 1, MAX(-r, low), rmid - 1,
532                         tb, 2, legA, legB, 1, 1, data);
533                 _INS(1);
534                 legA = 1;
535                 if(data->sapp > data->sapp0 + 1) {
536                         legB = 1;
537                 }
538                 }
539 /* intermediate blocks */
540                 t2 = up-rmid-1;
541                 t3 = low-rmid+1;
542                 for (; l > -1; k = l, l = data->FP[k], kt = data->FT[k]) {
543                 if (kt == 0) {
544                         _REP;
545                 } else if (kt == 1) { /* right-hand side triangle */
546                                 _INS(1);
547                                 t1 = l - k - 1;
548                                 gb_linear_align(A + k, B + k + rmid + 1, 
549                                                 t1, t1, 0, MIN(t1, t2), 2, 1, legA, legB, 1, 1, data);
550                                 _DEL(1);
551                 } else { /* kt == 2, left-hand side triangle */
552                                 _DEL(1);
553                                 t1 = l - k - 1;
554                                 gb_linear_align(A + k + 1, B + k + rmid, t1, t1,
555                                                 MAX(-t1, t3), 0, 1, 2, legA, legB, 1, 1, data);
556                                 _INS(1);
557                 }
558                 legA = legB = 1;
559         }
560 
561 /* last block */
562                 if (Y-X > rmid) {
563                 _INS(1);
564                 t1 = k + rmid + 1;
565                 gb_linear_align(A + k, B + t1, X - k, Y - t1, 0, 
566                                 MIN(Y - t1, t2), 2, te, legA, legB, 1, 1, data);
567         } else if (Y-X < rmid) {
568                 _DEL(1);
569                  t1 = X - (k + 1);
570                 gb_linear_align(A + k + 1, B + k + rmid, t1, Y - (k + rmid),
571                       MAX(-t1, t3), 0, 1, te, legA, legB, 1, 1, data);
572         }
573         }
574         if(X != M) {
575                 _DEL(M - X)
576         } else if(Y != N) {
577                 _INS(N - Y);
578         }
579         return(v);
580 }
581 
582 /************************************************************/
583 /* gb_linear_CHECK_SCORE - return the score of the alignment stored in S */
584 static Int4 gb_linear_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
585                                   Int4 M, Int4 N,
586                                   Int4 PNTR S,
587                                   Boolean lgA, Boolean lgB, 
588                                   Boolean rgA, Boolean rgB,
589                                   data_t *data)
590 {
591         register Int4   i,  j, op;
592         Int4 score;
593 
594         score = i = j = op = 0;
595         if (!lgA && (*S) < 0) {
596                 i = i-*S++;
597         } else if (!lgB && (*S) >0) {
598         j = j+*S++;
599         }
600         while (i < M || j < N) {
601         op = *S++;
602         if (op == 0) {
603                 score = data->w[A[++i]][B[++j]] + score;
604         } else if (op > 0) {
605                 score = score - (data->g + op * data->zzh);
606                 j = j + op;
607         } else {
608                 score = score - (data->g - op * data->zzh);
609                 i = i - op;
610         }
611         if (i==M)
612                 break;
613         }
614         if (!rgA && op < 0) {
615                 score += (data->g - op * data->zzh);
616         }
617         if (!rgB && op > 0) {
618                 score += (data->g + op * data->zzh);
619         }
620         return (score);
621 }
622 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.