NCBI C Toolkit Cross Reference

C/tools/g_any.c


  1 static char const rcsid[] = "$Id: g_any.c,v 6.5 2003/05/30 17:25:36 coulouri Exp $";
  2 
  3 /*  g_any.c
  4 * ===========================================================================
  5 *
  6 *                            PUBLIC DOMAIN NOTICE
  7 *               National Center for Biotechnology Information
  8 *
  9 *  This software/database is a "United States Government Work" under the
 10 *  terms of the United States Copyright Act.  It was written as part of
 11 *  the author's official duties as a United States Government employee and
 12 *  thus cannot be copyrighted.  This software/database is freely available
 13 *  to the public for use. The National Library of Medicine and the U.S.
 14 *  Government have not placed any restriction on its use or reproduction.
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 *  Please cite the author in any work or product based on this material.
 25 *
 26 * ===========================================================================
 27 *
 28 * File Name:  g_any.c
 29 *
 30 * Author:  Jinghui Zhang and Kun-Mao Chao
 31 *
 32 * Version Creation Date: 5/24/95
 33 *
 34 *
 35 * File Description:   
 36 * A PACKAGE FOR GLOBALLY ALIGNING TWO SEQUENCES WITHIN AN ARBITRARY REGION:
 37 *
 38 *  To invoke, call z_ALIGN(,B,M,N,L,R,V,G,E,S).
 39 *   The parameters are explained as follows:
 40 *       A, B : two sequences to be aligned
 41 *       M : the length of sequence A
 42 *       N : the length of sequence B
 43 *       L : left boundary
 44 *       R : right boundary
 45 *       V : scoring table for matches and mismatches
 46 *       G : gap-opening penalty
 47 *       E : gap-extension penalty
 48 *       S : script for DISPLAY routine
 49 *
 50 *
 51 * Modifications:
 52 * --------------------------------------------------------------------------
 53 * Date     Name        Description of modification
 54 *
 55 * $Log: g_any.c,v $
 56 * Revision 6.5  2003/05/30 17:25:36  coulouri
 57 * add rcsid
 58 *
 59 * Revision 6.4  1998/08/24 21:19:38  kans
 60 * fixed -v -fd warnings
 61 *
 62 * Revision 6.3  1998/07/07 20:20:13  kans
 63 * corrected initial value
 64 *
 65 * Revision 6.2  1998/07/07 17:12:11  kans
 66 * fixed my bug, restoring pmr as previous value of mr, and trying a reasonable first value of mr to get around uninitialized variable warning
 67 *
 68 * Revision 6.1  1998/05/28 18:03:51  kans
 69 * switched lines to initialize mr before using
 70 *
 71 * Revision 6.0  1997/08/25 18:53:11  madden
 72 * Revision changed to 6.0
 73 *
 74 * Revision 5.0  1996/05/28 13:43:15  ostell
 75 * Set to revision 5.0
 76 *
 77  * Revision 4.0  1995/07/26  13:50:15  ostell
 78  * force revision to 4.0
 79  *
 80  * Revision 1.2  1995/05/25  20:23:06  zjing
 81  * add header
 82  *
 83 *
 84 *
 85 * ==========================================================================
 86 */
 87 
 88 
 89 #include <ncbi.h>
 90 
 91 #define MININT -999999
 92 #define Max(x,y)  ((x) >= (y) ? (x) : (y))
 93 #define Min(x,y)  ((x) <= (y) ? (x) : (y))
 94 #define gap(k)  ((k) <= 0 ? 0 : (g+e*(k)))      /* k-symbol indel cost */
 95 
 96                                                 /* Append "Delete k" op */
 97 #define DEL(k)                          \
 98 {                                       \
 99   if (last < 0)                         \
100     last = sapp[-1] -= (k);             \
101   else                                  \
102     last = *sapp++ = -(k);              \
103 }
104                                                 /* Append "Insert k" op */
105 #define INS(k)                          \
106 {                                       \
107   if (last > 0)                         \
108     last = sapp[-1] += (k);             \
109   else                                  \
110     last = *sapp++ = (k);               \
111 }
112 
113                                                 /* Append "Replace" op */
114 #define REP                             \
115 { last = *sapp++ = 0;                   \
116 }
117 
118 static Int4Ptr SS;
119 static Int4Ptr DD;
120 static Int4 I;
121 static Int4Ptr SP;
122 static Int4Ptr DP;
123 static Int4 IP;
124 static CharPtr ST, DT;
125 static Char IT;
126 static Int4Ptr PP[3];
127 static CharPtr PT[3];
128 static Int4Ptr RI;                      /* map back diagonal into its row index */
129 static Int4Ptr PNTR vv;                          /* vv = V */
130 static Int4 g, e, m;                            /* g = G, e = E, m = g+e */
131 static Int4Ptr sapp;                            /* Current script append ptr */
132 static Int4  last;                              /* Last script op appended */
133 CharPtr A, B;
134 Int4Ptr L, R;
135 
136 Int4 z_ALIGN(CharPtr, CharPtr, Int4, Int4, Int4, Int4, Int4Ptr, Int4Ptr, Int4Ptr PNTR, Int4, Int4, Int4Ptr);
137 void DISPLAY(CharPtr, CharPtr, Int4, Int4, Int4Ptr, Int4, Int4);
138 static Int4 CHECK_SCORE(CharPtr, CharPtr, Int4, Int4, Int4Ptr);
139 
140 static Int4 k_align(Int4 I1, Int4 J1, Char Type1, Int4 I2, Int4 J2, Char Type2)
141 {
142         Int4 i, j, l, r, b, b1, b2;
143         Int4 i1, j1, i2, j2;
144         Char t1, t2;
145         Int4 cmid, nmid, pmid, mr, pmr;
146         Int4 s, sp, c, cp, d, dp, ti;
147         Char st, ct, dt;
148         Int4Ptr va;
149         Char flag, rflag, dflag, sflag;
150         Int4 l_save, r_save;
151 
152 /*
153 TRACE("I1= %ld, J1= %ld, Type1 = %ld, I2= %ld, J2= %ld, Type2= %ld\n",I1,J1,Type1,I2,J2,Type2);
154 for (i=I1; i<= I2; ++i) TRACE("%ld : %ld %ld\n",i,L[i],R[i]);
155 */
156         /* Initializations */
157         if (Type2 == 0) SS[J2] = DD[J2] = I = 0;
158         else if (Type2 == 1) {
159                 DD[J2] = 0;
160                 I = SS[J2] = MININT;
161         } else {
162                 I = 0;
163                 DD[J2] = SS[J2] = MININT;
164         }
165         RI[I2+J2] = I2;
166         cmid = J2;
167         if (I1 < I2) nmid = (L[I2-1] + R[I2-1] + 1)/2;
168         else nmid = J1;
169         l = L[I2];
170         for (j = J2-1; j >= l; --j) {
171                 I = I - e;
172                 DD[j] = SS[j] = I - g;
173                 if (j+1 >= nmid) {  /* (I2,j+1) is a partition point */
174                         IP = SP[j] = I2+j+1;
175                         IT = ST[j] = 2;
176                 } else {
177                         IP = SP[j] = IP;
178                         IT = ST[j] = IT;
179                 }
180                 if (j >= nmid) {           /* (I2,j) is a partition point */
181                         b = I2+j;
182                         RI[b] = I2;
183                         PP[2][b] = PP[0][b] = IP;
184                         PT[2][b] = PT[0][b] = IT;
185                         PP[1][b] = DP[j] = b;
186                         PT[1][b] = DT[j] = 0;
187                 } else {
188                         DP[j] = SP[j];
189                         DT[j] = ST[j];
190                 }
191 
192         }
193         c = SS[l];
194         SS[J2+1] = MININT;
195         for (j = l-1; j >= J1; --j)
196                 SS[j] = DD[j] = MININT;
197         /*mr = 0;*/             /*set up the initiate value?? jz*/
198         mr = Min(L[I2], R[I2-1]); /* this will be the initial value */
199         for (i = I2-1; i >= I1; --i) {
200                 pmr = mr; /* previous value of mr */
201                 mr = Min(L[i+1], R[i]);
202                 pmid = cmid;
203                 cmid = nmid;
204                 if (i > I1) nmid = (L[i-1]+R[i-1] + 1)/2;
205                 else nmid = J1;
206                 l = L[i];
207                 r = R[i];
208                 I = MININT;
209                 s = SS[r+1];
210                 sp = SP[r+1];
211                 st = ST[r+1];
212                 va = vv[A[i+1]];
213                 flag = 0;
214                 if ((r+1>=cmid && r+1<=pmid) ||
215                     (i+1<I2 && r+1>pmid && r+1<=pmr)) dflag = 1;
216                 else dflag = 0;
217                 for (j = r; j >= l; --j) {
218                         rflag = flag;
219                         sflag = dflag;
220                         if ((j >= nmid && j <= cmid) ||
221                             (i < I2 && j > cmid && j <= mr)) {
222                                 flag = 1;
223                                 b = i + j;
224                                 RI[b] = i;
225                         } else flag = 0;
226                         if ((j >= cmid && j <= pmid) ||
227                             (i+1<I2 && j>pmid && j<=pmr)) dflag = 1;
228                         else dflag = 0;
229                         if (j < J2) c = s + va[B[j+1]];
230                         else c = MININT;
231                         d = DD[j] - m;
232                         ti = I - m;
233                         if (c >= Max(d,ti)) {
234                                 if (sflag) {
235                                         cp = i+j+2;
236                                         ct = 0;
237                                 } else {
238                                         cp = sp;
239                                         ct = st;
240                                 }
241                         } else if (d >= ti) {
242                                 c = d;
243                                 if (dflag) {
244                                         cp = i+j+1;
245                                         ct = 1;
246                                 } else {
247                                         cp = DP[j];
248                                         ct = DT[j];
249                                 }
250                         } else {
251                                 c = ti;
252                                 if (rflag) {
253                                         cp = i+j+1;
254                                         ct = 2;
255                                 } else {
256                                         cp = IP;
257                                         ct = IT;
258                                 }
259                         }
260                         if (c >= (d = DD[j]-e)) {
261                                 d = c;
262                                 if (flag) {
263                                         dp = i+j;
264                                         dt = 0;
265                                 } else {
266                                         dp = cp;
267                                         dt = ct;
268                                 }
269                         } else {
270                                 if (dflag) {
271                                         dp = i+j+1;
272                                         dt = 1;
273                                 } else {
274                                         dp = DP[j];
275                                         dt = DT[j];
276                                 }
277                         }
278                         if (c >= (I = I-e)) {
279                                 I = c;
280                                 if (flag) {
281                                         IP = i+j;
282                                         IT = 0;
283                                 } else {
284                                         IP = cp;
285                                         IT = ct;
286                                 }
287                         } else {
288                                 if (rflag) {
289                                         IP = i+j+1;
290                                         IT = 2;
291                                 }
292                         }
293                         s = SS[j];
294                         sp = SP[j];
295                         st = ST[j];
296                         SS[j] = c;
297                         SP[j] = cp;
298                         ST[j] = ct;
299                         DD[j] = d;
300                         DP[j] = dp;
301                         DT[j] = dt;
302                         if (flag) {  /* save information in partition nodes */
303                                 PP[0][b] = cp;
304                                 PT[0][b] = ct;
305                                 PP[1][b] = dp;
306                                 PT[1][b] = dt;
307                                 PP[2][b] = IP;
308                                 PT[2][b] = IT;
309                         }
310                 }
311                 SS[r+1] = MININT;
312         }
313         b = I2 + J2;
314         i1 = I1;
315         j1 = J1;
316         t1 = Type1;
317         b1 = I1 + J1;
318         while (b1 != b) {
319                 b2 = PP[t1][b1];
320                 i2 = RI[b2];
321                 j2 = b2 - i2;
322                 t2 = PT[t1][b1];
323                 if (i1 == i2) { if (j2 > j1) INS(1) }
324                 else if (j1 == j2) DEL(1)
325                 else if (i1+1 == i2 && j1+1 == j2) {
326                         if (t2 == 1) { INS(1) DEL(1)}
327                         else if (t2 == 2) { DEL(1) INS(1) }
328                         else REP
329                 } else {
330                         l_save = L[i2];
331                         r_save = R[i2];
332                         if (j1 < (L[i1]+R[i1] + 1)/2) {
333                                 for (i = i2; i > i1; --i) {
334                                         nmid = (L[i-1]+R[i-1] + 1)/2;
335                                         L[i] = Max(j1, L[i]);
336                                         R[i] = nmid-1;
337                                 }
338                                 L[i1] = j1;
339                                 R[i1] = j1;
340                                 R[i2] = j2;
341                         } else {
342                                 for (i = i1; i < i2; ++i) {
343                                         cmid = (L[i]+R[i] + 1)/2;
344                                         L[i] = Max(cmid,L[i+1])+1;
345                                         R[i] = Min(j2, R[i]);
346                                 }
347                                 L[i1] = j1;
348                                 L[i2] = j2;
349                                 R[i2] = j2;
350                         }
351                         k_align(i1,j1,t1,i2,j2,t2);
352                         L[i2] = l_save;
353                         R[i2] = r_save;
354                 }
355                 i1 = i2;
356                 j1 = j2;
357                 t1 = t2;
358                 b1 = b2;
359         }
360         return c;
361 }
362 
363 Int4 z_ALIGN(Char Seq1[], Char Seq2[], Int4 bi, Int4 bj, Int4 ei, Int4 ej, Int4 lb[], Int4 rb[], Int4Ptr PNTR V, Int4 G, Int4 E, Int4 S[])
364 { 
365         Int4 c;
366         Int4 j;
367         Int4 check_score;
368         CharPtr ckalloc(Int4);
369         Int4 M, N;
370 
371         A = Seq1;
372         B = Seq2;
373         L = lb;
374         R = rb;
375         vv = V;                  /* Setup global parameters */
376         g = G;
377         e = E;
378         m = g+e;
379         sapp = S;
380         last = 0;
381         M = ei - bi;
382         N = ej - bj;
383 
384         j = (N+2) * sizeof(Int4);
385         SS = ((Int4 *) ckalloc(j)) - bj;
386         DD = ((Int4 *) ckalloc(j)) - bj;
387         j = (N+2) * sizeof(Int4);
388         SP = ((Int4 *) ckalloc(j)) - bj;
389         DP = ((Int4 *) ckalloc(j)) - bj;
390         j = (N+2) * sizeof(Char);
391         ST = ((Char *) ckalloc(j)) - bj;
392         DT = ((Char *) ckalloc(j)) - bj;
393         j = (M+N+1) * sizeof(Int4);
394         PP[0] = (Int4 *) ckalloc(j) - bi - bj;
395         PP[1] = (Int4 *) ckalloc(j) - bi - bj;
396         PP[2] = (Int4 *) ckalloc(j) - bi - bj;
397         RI = (Int4 *) ckalloc(j) - bi - bj;
398         j = (M+N+1) * sizeof(Char);
399         PT[0] = (Char *) ckalloc(j) - bi - bj;
400         PT[1] = (Char *) ckalloc(j) - bi - bj;
401         PT[2] = (Char *) ckalloc(j) - bi - bj;
402 
403         c = k_align(bi,bj,0,ei,ej,0);
404         check_score = CHECK_SCORE(Seq1+bi,Seq2+bj,M,N,S);
405 /**#ifdef STATS
406         TRACE("\nAlign_score=%.1f\n",((FloatLo)c)/10.0);
407         TRACE("\nCheck_score=%.1f\n",((FloatLo)check_score)/10.0);
408         if (check_score != c) TRACE("\nCheck_score=%.1f\n",((FloatLo)check_score)/10.0);
409 #endif**/
410         MemFree(SS+bj);
411         MemFree(DD+bj);
412         MemFree(SP+bj);
413         MemFree(DP+bj);
414         MemFree(ST+bj);
415         MemFree(DT+bj);
416         MemFree(PP[0]+bi+bj);
417         MemFree(PP[1]+bi+bj);
418         MemFree(PP[2]+bi+bj);
419         MemFree(RI+bi+bj);
420         MemFree(PT[0]+bi+bj);
421         MemFree(PT[1]+bi+bj);
422         MemFree(PT[2]+bi+bj);
423         return c;
424 }
425 
426 /* Alignment display routine */
427 
428 static Char ALINE[51], BLINE[51], CLINE[51];
429 /**#if 0
430 void DISPLAY(Char A[], Char B[], Int4 M, Int4 N, Int4 S[], Int4 AP, Int4 BP)
431 { register CharPtr a, b, c;
432   register Int4   i,  j, op;
433            Int4   lines, ap, bp;
434 
435   i = j = op = lines = 0;
436   ap = AP;
437   bp = BP;
438   a = ALINE;
439   b = BLINE;
440   c = CLINE;
441   while (i < M || j < N)
442     { if (op == 0 && *S == 0)
443         { op = *S++;
444           *a = A[++i];
445           *b = B[++j];
446           *c++ = (*a++ == *b++) ? '|' : ' ';
447         }
448       else
449         { if (op == 0)
450             op = *S++;
451           if (op > 0)
452             { *a++ = ' ';
453               *b++ = B[++j];
454               op--;
455             }
456           else
457             { *a++ = A[++i];
458               *b++ = ' ';
459               op++;
460             }
461           *c++ = '-';
462         }
463       if (a >= ALINE+50 || i >= M && j >= N)
464         { *a = *b = *c = '\0';
465           TRACE("\n%6d ",50*lines++);
466           for (b = ALINE+10; b <= a; b += 10)
467             TRACE("    .    :");
468           if (b <= a+5)
469             TRACE("    .");
470           TRACE("\n%6d %s\n       %s\n%6d %s\n",ap,ALINE,CLINE,bp,BLINE);
471           ap = AP + i;
472           bp = BP + j;
473           a = ALINE;
474           b = BLINE;
475           c = CLINE;
476         }
477     }
478 }
479 #endif**/
480 
481 /* CHECK_SCORE - return the score of the alignment stored in S */
482 
483 static Int4 CHECK_SCORE(Char A[], Char B[], Int4 M, Int4 N, Int4 S[])
484 { 
485   register Int4   i,  j, op;
486   Int4 score;
487 
488   score = i = j = op = 0;
489   while (i < M || j < N) {
490         op = *S++;
491         if (op == 0) 
492                 score = vv[A[++i]][B[++j]] + score;
493         else if (op > 0) {
494                 score = score - (g+op*e);
495                 j = j+op;
496         } else {
497                 score = score - (g-op*e);
498                 i = i-op;
499         }
500   }
501   return(score);
502 }
503 
504 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.