NCBI C Toolkit Cross Reference

C/tools/bandalg0.c


  1 static char const rcsid[] = "$Id: bandalg0.c,v 6.3 2003/05/30 17:25:35 coulouri Exp $";
  2 
  3 /* ===========================================================================
  4 *
  5 *                            PUBLIC DOMAIN NOTICE
  6 *               National Center for Biotechnology Information
  7 *
  8 *  This software/database is a "United States Government Work" under the
  9 *  terms of the United States Copyright Act.  It was written as part of
 10 *  the author's official duties as a United States Government employee and
 11 *  thus cannot be copyrighted.  This software/database is freely available
 12 *  to the public for use. The National Library of Medicine and the U.S.
 13 *  Government have not placed any restriction on its use or reproduction.
 14 *
 15 *  Although all reasonable efforts have been taken to ensure the accuracy
 16 *  and reliability of the software and data, the NLM and the U.S.
 17 *  Government do not and cannot warrant the performance or results that
 18 *  may be obtained by using this software or data. The NLM and the U.S.
 19 *  Government disclaim all warranties, express or implied, including
 20 *  warranties of performance, merchantability or fitness for any particular
 21 *  purpose.
 22 *
 23 *  Please cite the author in any work or product based on this material.
 24 *
 25 * ===========================================================================*/
 26 /*****************************************************************************
 27 
 28 File name: bandalgn0.c
 29 
 30 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
 31 
 32 Contents: Functions to perform global linear alignments.
 33 
 34 ****************************************************************************/
 35 
 36 #include "bandalgn.h"
 37 
 38 static Int4 g_band0_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
 39                                 Int4 M, Int4 N,
 40                                 Int4 PNTR S,
 41                                 data_t *data);
 42 
 43 static Int4 g_band0_align(Uint1Ptr A, Uint1Ptr B,
 44                           Int4 M, Int4 N,
 45                           Int4 low, Int4 up,
 46                           Uint1 tb, Uint1 te,
 47                           data_t *data);
 48 
 49 Int4 LIBCALL gband_linear(Uint1Ptr Seq1, Uint1Ptr Seq2,
 50                           Int4 M, Int4 N,
 51                           Int4Ptr PNTR matrix,
 52                           PSUGapOptionsPtr option,
 53                           Int4Ptr S, Int4Ptr Slen)
 54 { 
 55   data_t data;
 56   Int4 c, i, j;
 57   Int4 low, up;
 58   Int4 band;
 59   Int4 score;
 60   
 61   /* Setup global parameters */
 62   data.g = option->gopen;
 63   data.zzh = option->gext;
 64   data.w = matrix;
 65   data.m = data.g + data.zzh;
 66   
 67   data.sapp = S;
 68   data.last = 0;
 69   *Slen = 0;
 70   
 71   low = option->start_diag;
 72   band = option->width;
 73   up = band + low - 1;
 74   
 75   low = MIN(MAX(-M, low), MIN(N - M, 0));
 76   up = MAX(MIN(N, up), MAX(N - M, 0));
 77   
 78   if (up < 0 || low > 0) {
 79     ErrPostEx(SEV_WARNING, 0, 0,
 80               "The band does not include (0,0) grid point");
 81     return 0;
 82   } 
 83   if (up + M < N || low + M > N) {
 84     ErrPostEx(SEV_WARNING, 0, 0,
 85               "The band does not include lower corner");
 86     return 0;
 87   }
 88   
 89   if (N <= 0) { 
 90     if (M > 0) DEL_(M)
 91     return -gap_(M);
 92   }
 93   if (M <= 0) {
 94     INS_(N)
 95     return -gap_(N);
 96   }
 97   if ((band = up - low + 1) <= 1) {
 98     c = 0;
 99     for (i = 1; i <= M; i++) {
100       REP_;
101       c += data.w[Seq1[i]][Seq2[i]];
102     }
103     return c;
104   }
105   
106   j = (band + 2) * sizeof(Int4);
107   
108   data.CD = (dp_ptr) MemGet(sizeof(dp_node) * (band + 2), MGET_ERRPOST);
109   j = M + 1;
110   data.MT[0] = MemGet(j, MGET_ERRPOST);
111   data.MT[1] = MemGet(j, MGET_ERRPOST);
112   data.MT[2] = MemGet(j, MGET_ERRPOST);
113   data.FT = MemGet(j, MGET_ERRPOST);
114   j *= sizeof(Int4);
115   data.MP[0] = (Int4Ptr)MemGet(j, MGET_ERRPOST);
116   data.MP[1] = (Int4Ptr)MemGet(j, MGET_ERRPOST);
117   data.MP[2] = (Int4Ptr)MemGet(j, MGET_ERRPOST);
118   data.FP = (Int4Ptr)MemGet(j, MGET_ERRPOST);
119   
120   c = g_band0_align(Seq1, Seq2, M, N, low, up, 0, 0, &data);
121   score = g_band0_CHECK_SCORE(Seq1, Seq2, M, N, S, &data);
122   
123   MemFree(data.CD);
124   MemFree(data.FT);
125   MemFree(data.FP);
126 
127   for(j = 0; j < 3; j++) {
128     MemFree(data.MT[j]);
129     MemFree(data.MP[j]);
130   }
131   
132   if(score != c) {
133     ErrPostEx(SEV_WARNING, 0, 0, "Check_Score = %ld align_score = %ld ", 
134               (long) score, (long) c);
135         return 0;
136   }
137   
138   *Slen = data.sapp - S;
139 
140   return c;
141 }
142 
143 /* g_band0_CHECK_SCORE - return the score of the alignment stored in S */
144 static Int4 g_band0_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B, 
145                                 Int4 M, Int4 N,
146                                 Int4 PNTR S,
147                                 data_t *data)
148 { 
149   register Int4 i, j, op;
150   Int4 score = i = j = op = 0;
151 
152   while (i < M || j < N) {
153     op = *S++;
154     if (op == 0) 
155       score = data->w[A[++i]][B[++j]] + score;
156     else if (op > 0) {
157       score = score - (data->g + op * data->zzh);
158       j = j + op;
159     }
160     else {
161       score = score - (data->g - op * data->zzh);
162       i = i - op;
163     }
164   }
165   return(score);
166 }
167 
168 /************************************************************/
169 /* g_band0_align(A,B,M,N,up,low,tb,te, data) returns the cost of an
170    optimum conversion between
171    A[1..M] and B[1..N] and appends such a conversion to the current script.
172    tb(te)= 1  no gap-open penalty if the conversion begins(ends) with a delete.
173    tb(te)= 2  no gap-open penalty if the conversion begins(ends) with an insert.
174 */
175 
176 static Int4 g_band0_align(Uint1Ptr A, Uint1Ptr B,
177                           Int4 M, Int4 N,
178                           Int4 low, Int4 up,
179                           Uint1 tb, Uint1 te,
180                           data_t *data)
181 {
182   Int4 rmid, k, l, r, v, kt;
183   Int4 t1, t2, t3;
184   {
185     Int4 band, midd, j;
186     Int4 leftd, rightd; /* for CC, DD, CP and DP */
187     register Int4 curd; /* current index for CC, DD CP and DP */
188     register Int4 i;
189     register Int4 c, d, e, x;
190     register dp_ptr ap;
191     Int4 t, fr;
192     Int4Ptr wa;
193     Int4 ib;
194 
195     /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
196     if (N <= 0) { 
197       if (M > 0) _DEL(M)
198       return 0;
199     }
200     if (M <= 0) {
201       _INS(N)
202       return 0;
203     }
204     if ((band = up - low + 1) <= 1) {
205       for (i = 1; i <= M; i++) _REP
206       return 0;
207     }
208 
209     /* Divide: Find all crossing points */
210 
211     /* Initialization */
212     midd = band/2 + 1;
213     rmid = low + midd - 1;
214     leftd = 1-low;
215     rightd = up-low+1;
216     if(leftd < midd) {
217       fr = -1;
218       for(j = 0; j < midd; j++) 
219         data->CD[j].CP = data->CD[j].DPDP = -1;
220       for(j = midd; j <= rightd; j++)
221         data->CD[j].CP = data->CD[j].DPDP = 0;
222       data->MP[0][0] = -1;
223       data->MP[1][0] = -1;
224       data->MP[2][0] = -1;
225     }
226     else if (leftd > midd) {
227       fr = leftd-midd;
228       for (j = 0; j <= midd; j++)
229         data->CD[j].CP = data->CD[j].DPDP = fr;
230       for (j = midd+1; j <= rightd; j++) 
231         data->CD[j].CP = data->CD[j].DPDP = -1;
232       data->MP[0][fr] = -1;
233       data->MP[1][fr] = -1;
234       data->MP[2][fr] = -1;
235     }
236     else {
237       fr = 0;
238       for (j = 0; j < midd; j++)
239         data->CD[j].CP = data->CD[j].DPDP = 0;
240       for (j = midd; j <= rightd; j++)
241         data->CD[j].CP = data->CD[j].DPDP = 0;
242       data->MP[0][0] = -1;
243       data->MP[1][0] = -1;
244       data->MP[2][0] = -1;
245     }
246 
247     data->CD[leftd].CC = 0;
248     if(tb == 2) t = 0;
249     else t = -data->g;
250     for(j = leftd+1; j <= rightd; j++) {
251       data->CD[j].CC = t = t - data->zzh;
252       data->CD[j-1].DD = t - data->m;
253     }
254     data->CD[rightd+1].CC = MININT;
255     data->CD[rightd].DD = MININT;
256     data->CD[rightd+1].CP = data->CD[rightd].DPDP = 0;
257     if(tb == 1) data->CD[leftd-1].DD = -data->zzh;
258     else data->CD[leftd-1].DD = -data->m;
259     data->CD[leftd-1].CC = MININT;
260     for(i = 1; i <= M; i++) {
261       if(i > N-up) rightd--;
262       if(leftd > 1) leftd--;
263       wa = data->w[A[i]];
264       d = data->CD[leftd].DD;
265       if((ib = leftd + low - 1 + i) > 0) c = data->CD[leftd].CC + wa[B[ib]];
266       if(d > c || ib <= 0) {
267         c = d;
268         data->CD[leftd].CP = data->CD[leftd].DPDP;
269       }
270       e = c - data->m;
271       data->IP = data->CD[leftd].CP;        
272       if(leftd == midd) data->CD[leftd].CP = data->CD[leftd].DPDP = data->IP = i;
273       if(leftd >= 1) 
274         if ((d -= data->zzh) >= e) {
275           data->CD[leftd-1].DD = d;
276           data->CD[leftd-1].DPDP = data->CD[leftd].DPDP;
277         } else {
278           data->CD[leftd-1].DD = e;
279           data->CD[leftd-1].DPDP = data->IP;
280         }
281       data->CD[leftd].CC = c;
282       if(midd <= rightd && midd >= leftd + 1) x = midd; else x = rightd + 1;
283       for(curd = leftd + 1, ap = &data->CD[curd]; curd < x; curd++) {
284         c = ap->CC + wa[B[curd + low - 1 + i]];
285         d = ap->DD;
286         if(c < d || c < e){ 
287           if(d < e) {
288             c = e;
289             ap->CP = data->IP;
290           }
291           else {
292             c = d;
293             ap->CP = ap->DPDP;
294           }
295         }   /* otherwise, CP is unchanged */
296         ap->CC = c;
297         if((c -= data->m) > (e -= data->zzh)) {
298           e = c;
299           data->IP = ap->CP;
300         }
301         if(c > (d -= data->zzh)) {
302           (ap - 1)->DD = c;
303           (ap - 1)->DPDP = ap->CP;
304           ap++;
305         }
306         else {
307           (ap - 1)->DD = d;
308           (ap - 1)->DPDP = ap->DPDP;
309           ap++;
310         }
311       }
312       if(x != rightd + 1) {
313         data->MP[1][i] = data->IP;
314         data->MT[1][i] = 2;
315         data->MP[2][i] = ap->DPDP;
316         data->MT[2][i] = 1;        
317         c = ap->CC + wa[B[curd + low - 1 + i]];
318         d = ap->DD;
319         if(c < d || c < e) {
320           if(e > d) {
321             c = e;
322             data->MP[0][i] = data->MP[1][i];
323             data->MT[0][i] = 2;
324           }
325           else {
326             c = d;
327             data->MP[0][i] = data->MP[2][i];
328             data->MT[0][i] = 1;
329           }
330         }
331         else {
332           data->MP[0][i] = i-1;
333           data->MT[0][i] = 0;
334         }
335         if(c - data->g > e) {
336           data->MP[1][i] = data->MP[0][i];
337           data->MT[1][i] = data->MT[0][i];
338         }
339         if(c - data->g > d) {
340           data->MP[2][i] = data->MP[0][i];
341           data->MT[2][i] = data->MT[0][i];
342         }
343         ap->CP = (ap - 1)->DPDP = data->IP = i;
344         ap->CC = c;
345         if((c -= data->m) > (e -= data->zzh)) e = c; 
346         if(c > (d -= data->zzh)) (ap++ - 1)->DD = c;
347         else (ap++ - 1)->DD = d;
348         for(curd++; curd <= rightd; curd++) {
349           c = ap->CC + wa[B[curd+low-1+i]];
350           d = ap->DD;
351           if (c < d || c < e) { 
352             if (d < e) {
353               c = e;
354               ap->CP = data->IP;
355             } else {
356               c = d;
357               ap->CP = ap->DPDP;
358             }
359           }   /* otherwise, CP is unchanged */
360           ap->CC = c;
361           if((c -= data->m) > (e -= data->zzh)) {
362             e = c; data->IP = ap->CP;
363           } 
364           if (c > (d -= data->zzh)) {
365             (ap-1)->DD = c; (ap-1)->DPDP = ap->CP; ap++;
366           } else {
367             (ap-1)->DD = d; (ap-1)->DPDP = ap->DPDP; ap++;
368           }
369         }
370       }
371     }
372 
373     /* decide which path to be traced back */
374     c = (ap-1)->CC; d = (ap-2)->DD;
375     if (te == 1 && d + data->m > c) {
376       k = data->CD[rightd-1].DPDP;
377       l = 2;
378     } else if (te == 2 && e + data->m > c) {
379       k = data->IP;
380       l = 1;
381     } else {
382       k = data->CD[rightd].CP;
383       l = 0;
384     }
385     if (rmid > N-M) l = 2;
386     else if (rmid < N-M) l = 1;
387     v = c;
388   }
389   /* Conquer: Solve subproblems recursively */
390 
391   /* trace back */
392   r = -1;       
393   for (; k > -1; r=k, k=data->MP[l][r], l=data->MT[l][r]){
394     data->FP[k] = r;
395     data->FT[k] = (Uint1) l;
396   }
397   /* forward dividing */
398   if (r == -1) { /* optimal alignment did not cross the middle diagonal */
399     if(rmid < 0) g_band0_align(A, B, M, N, rmid + 1, up, tb, te, data);
400     else g_band0_align(A, B, M, N, low, rmid - 1, tb, te, data);
401   } else {
402     k = r;
403     l = data->FP[k];
404     kt = data->FT[k];
405 
406     /* first block */
407     if (rmid < 0) {
408       g_band0_align(A, B, r - 1, r + rmid, rmid + 1, MIN(up, r + rmid), tb, 1, data);
409       _DEL(1)
410     } else if (rmid > 0) {
411       g_band0_align(A, B, r, r + rmid - 1, MAX(-r, low), rmid - 1, tb, 2, data);
412       _INS(1)
413     }
414 
415     /* intermediate blocks */
416     t2 = up - rmid - 1;
417     t3 = low - rmid + 1;
418     for (; l > -1; k = l, l = data->FP[k], kt = data->FT[k]) {
419       if (kt == 0) _REP
420       else if (kt == 1) { /* right-hand side triangle */
421         _INS(1)
422           t1 = l - k - 1;
423         g_band0_align(A + k, B + k + rmid + 1, t1, t1, 0, MIN(t1, t2), 2, 1, data);
424         _DEL(1)
425       } else { /* kt == 2, left-hand side triangle */
426         _DEL(1)
427           t1 = l - k - 1;
428         g_band0_align(A + k + 1, B + k + rmid, t1, t1, MAX(-t1, t3), 0, 1, 2, data);
429         _INS(1)
430       }
431     }
432 
433     /* last block */
434     if (N - M > rmid) {
435       _INS(1)
436         t1 = k + rmid + 1;
437       g_band0_align(A + k, B + t1, M - k, N - t1, 0, MIN(N - t1, t2), 2, te, data);
438     } else if (N - M < rmid) {
439       _DEL(1)
440         t1 = M -(k + 1);
441       g_band0_align(A + k + 1, B + k + rmid, t1, N - (k + rmid),
442             MAX(-t1, t3), 0, 1, te, data);
443     }
444   }
445   return(v);
446 }
447 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.