|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/g_any.c |
source navigation diff markup identifier search freetext search file search |
1 static char const rcsid[] = "$Id: g_any.c,v 6.5 2003/05/30 17:25:36 coulouri Exp $";
2
3 /* g_any.c
4 * ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 *
28 * File Name: g_any.c
29 *
30 * Author: Jinghui Zhang and Kun-Mao Chao
31 *
32 * Version Creation Date: 5/24/95
33 *
34 *
35 * File Description:
36 * A PACKAGE FOR GLOBALLY ALIGNING TWO SEQUENCES WITHIN AN ARBITRARY REGION:
37 *
38 * To invoke, call z_ALIGN(,B,M,N,L,R,V,G,E,S).
39 * The parameters are explained as follows:
40 * A, B : two sequences to be aligned
41 * M : the length of sequence A
42 * N : the length of sequence B
43 * L : left boundary
44 * R : right boundary
45 * V : scoring table for matches and mismatches
46 * G : gap-opening penalty
47 * E : gap-extension penalty
48 * S : script for DISPLAY routine
49 *
50 *
51 * Modifications:
52 * --------------------------------------------------------------------------
53 * Date Name Description of modification
54 *
55 * $Log: g_any.c,v $
56 * Revision 6.5 2003/05/30 17:25:36 coulouri
57 * add rcsid
58 *
59 * Revision 6.4 1998/08/24 21:19:38 kans
60 * fixed -v -fd warnings
61 *
62 * Revision 6.3 1998/07/07 20:20:13 kans
63 * corrected initial value
64 *
65 * Revision 6.2 1998/07/07 17:12:11 kans
66 * fixed my bug, restoring pmr as previous value of mr, and trying a reasonable first value of mr to get around uninitialized variable warning
67 *
68 * Revision 6.1 1998/05/28 18:03:51 kans
69 * switched lines to initialize mr before using
70 *
71 * Revision 6.0 1997/08/25 18:53:11 madden
72 * Revision changed to 6.0
73 *
74 * Revision 5.0 1996/05/28 13:43:15 ostell
75 * Set to revision 5.0
76 *
77 * Revision 4.0 1995/07/26 13:50:15 ostell
78 * force revision to 4.0
79 *
80 * Revision 1.2 1995/05/25 20:23:06 zjing
81 * add header
82 *
83 *
84 *
85 * ==========================================================================
86 */
87
88
89 #include <ncbi.h>
90
91 #define MININT -999999
92 #define Max(x,y) ((x) >= (y) ? (x) : (y))
93 #define Min(x,y) ((x) <= (y) ? (x) : (y))
94 #define gap(k) ((k) <= 0 ? 0 : (g+e*(k))) /* k-symbol indel cost */
95
96 /* Append "Delete k" op */
97 #define DEL(k) \
98 { \
99 if (last < 0) \
100 last = sapp[-1] -= (k); \
101 else \
102 last = *sapp++ = -(k); \
103 }
104 /* Append "Insert k" op */
105 #define INS(k) \
106 { \
107 if (last > 0) \
108 last = sapp[-1] += (k); \
109 else \
110 last = *sapp++ = (k); \
111 }
112
113 /* Append "Replace" op */
114 #define REP \
115 { last = *sapp++ = 0; \
116 }
117
118 static Int4Ptr SS;
119 static Int4Ptr DD;
120 static Int4 I;
121 static Int4Ptr SP;
122 static Int4Ptr DP;
123 static Int4 IP;
124 static CharPtr ST, DT;
125 static Char IT;
126 static Int4Ptr PP[3];
127 static CharPtr PT[3];
128 static Int4Ptr RI; /* map back diagonal into its row index */
129 static Int4Ptr PNTR vv; /* vv = V */
130 static Int4 g, e, m; /* g = G, e = E, m = g+e */
131 static Int4Ptr sapp; /* Current script append ptr */
132 static Int4 last; /* Last script op appended */
133 CharPtr A, B;
134 Int4Ptr L, R;
135
136 Int4 z_ALIGN(CharPtr, CharPtr, Int4, Int4, Int4, Int4, Int4Ptr, Int4Ptr, Int4Ptr PNTR, Int4, Int4, Int4Ptr);
137 void DISPLAY(CharPtr, CharPtr, Int4, Int4, Int4Ptr, Int4, Int4);
138 static Int4 CHECK_SCORE(CharPtr, CharPtr, Int4, Int4, Int4Ptr);
139
140 static Int4 k_align(Int4 I1, Int4 J1, Char Type1, Int4 I2, Int4 J2, Char Type2)
141 {
142 Int4 i, j, l, r, b, b1, b2;
143 Int4 i1, j1, i2, j2;
144 Char t1, t2;
145 Int4 cmid, nmid, pmid, mr, pmr;
146 Int4 s, sp, c, cp, d, dp, ti;
147 Char st, ct, dt;
148 Int4Ptr va;
149 Char flag, rflag, dflag, sflag;
150 Int4 l_save, r_save;
151
152 /*
153 TRACE("I1= %ld, J1= %ld, Type1 = %ld, I2= %ld, J2= %ld, Type2= %ld\n",I1,J1,Type1,I2,J2,Type2);
154 for (i=I1; i<= I2; ++i) TRACE("%ld : %ld %ld\n",i,L[i],R[i]);
155 */
156 /* Initializations */
157 if (Type2 == 0) SS[J2] = DD[J2] = I = 0;
158 else if (Type2 == 1) {
159 DD[J2] = 0;
160 I = SS[J2] = MININT;
161 } else {
162 I = 0;
163 DD[J2] = SS[J2] = MININT;
164 }
165 RI[I2+J2] = I2;
166 cmid = J2;
167 if (I1 < I2) nmid = (L[I2-1] + R[I2-1] + 1)/2;
168 else nmid = J1;
169 l = L[I2];
170 for (j = J2-1; j >= l; --j) {
171 I = I - e;
172 DD[j] = SS[j] = I - g;
173 if (j+1 >= nmid) { /* (I2,j+1) is a partition point */
174 IP = SP[j] = I2+j+1;
175 IT = ST[j] = 2;
176 } else {
177 IP = SP[j] = IP;
178 IT = ST[j] = IT;
179 }
180 if (j >= nmid) { /* (I2,j) is a partition point */
181 b = I2+j;
182 RI[b] = I2;
183 PP[2][b] = PP[0][b] = IP;
184 PT[2][b] = PT[0][b] = IT;
185 PP[1][b] = DP[j] = b;
186 PT[1][b] = DT[j] = 0;
187 } else {
188 DP[j] = SP[j];
189 DT[j] = ST[j];
190 }
191
192 }
193 c = SS[l];
194 SS[J2+1] = MININT;
195 for (j = l-1; j >= J1; --j)
196 SS[j] = DD[j] = MININT;
197 /*mr = 0;*/ /*set up the initiate value?? jz*/
198 mr = Min(L[I2], R[I2-1]); /* this will be the initial value */
199 for (i = I2-1; i >= I1; --i) {
200 pmr = mr; /* previous value of mr */
201 mr = Min(L[i+1], R[i]);
202 pmid = cmid;
203 cmid = nmid;
204 if (i > I1) nmid = (L[i-1]+R[i-1] + 1)/2;
205 else nmid = J1;
206 l = L[i];
207 r = R[i];
208 I = MININT;
209 s = SS[r+1];
210 sp = SP[r+1];
211 st = ST[r+1];
212 va = vv[A[i+1]];
213 flag = 0;
214 if ((r+1>=cmid && r+1<=pmid) ||
215 (i+1<I2 && r+1>pmid && r+1<=pmr)) dflag = 1;
216 else dflag = 0;
217 for (j = r; j >= l; --j) {
218 rflag = flag;
219 sflag = dflag;
220 if ((j >= nmid && j <= cmid) ||
221 (i < I2 && j > cmid && j <= mr)) {
222 flag = 1;
223 b = i + j;
224 RI[b] = i;
225 } else flag = 0;
226 if ((j >= cmid && j <= pmid) ||
227 (i+1<I2 && j>pmid && j<=pmr)) dflag = 1;
228 else dflag = 0;
229 if (j < J2) c = s + va[B[j+1]];
230 else c = MININT;
231 d = DD[j] - m;
232 ti = I - m;
233 if (c >= Max(d,ti)) {
234 if (sflag) {
235 cp = i+j+2;
236 ct = 0;
237 } else {
238 cp = sp;
239 ct = st;
240 }
241 } else if (d >= ti) {
242 c = d;
243 if (dflag) {
244 cp = i+j+1;
245 ct = 1;
246 } else {
247 cp = DP[j];
248 ct = DT[j];
249 }
250 } else {
251 c = ti;
252 if (rflag) {
253 cp = i+j+1;
254 ct = 2;
255 } else {
256 cp = IP;
257 ct = IT;
258 }
259 }
260 if (c >= (d = DD[j]-e)) {
261 d = c;
262 if (flag) {
263 dp = i+j;
264 dt = 0;
265 } else {
266 dp = cp;
267 dt = ct;
268 }
269 } else {
270 if (dflag) {
271 dp = i+j+1;
272 dt = 1;
273 } else {
274 dp = DP[j];
275 dt = DT[j];
276 }
277 }
278 if (c >= (I = I-e)) {
279 I = c;
280 if (flag) {
281 IP = i+j;
282 IT = 0;
283 } else {
284 IP = cp;
285 IT = ct;
286 }
287 } else {
288 if (rflag) {
289 IP = i+j+1;
290 IT = 2;
291 }
292 }
293 s = SS[j];
294 sp = SP[j];
295 st = ST[j];
296 SS[j] = c;
297 SP[j] = cp;
298 ST[j] = ct;
299 DD[j] = d;
300 DP[j] = dp;
301 DT[j] = dt;
302 if (flag) { /* save information in partition nodes */
303 PP[0][b] = cp;
304 PT[0][b] = ct;
305 PP[1][b] = dp;
306 PT[1][b] = dt;
307 PP[2][b] = IP;
308 PT[2][b] = IT;
309 }
310 }
311 SS[r+1] = MININT;
312 }
313 b = I2 + J2;
314 i1 = I1;
315 j1 = J1;
316 t1 = Type1;
317 b1 = I1 + J1;
318 while (b1 != b) {
319 b2 = PP[t1][b1];
320 i2 = RI[b2];
321 j2 = b2 - i2;
322 t2 = PT[t1][b1];
323 if (i1 == i2) { if (j2 > j1) INS(1) }
324 else if (j1 == j2) DEL(1)
325 else if (i1+1 == i2 && j1+1 == j2) {
326 if (t2 == 1) { INS(1) DEL(1)}
327 else if (t2 == 2) { DEL(1) INS(1) }
328 else REP
329 } else {
330 l_save = L[i2];
331 r_save = R[i2];
332 if (j1 < (L[i1]+R[i1] + 1)/2) {
333 for (i = i2; i > i1; --i) {
334 nmid = (L[i-1]+R[i-1] + 1)/2;
335 L[i] = Max(j1, L[i]);
336 R[i] = nmid-1;
337 }
338 L[i1] = j1;
339 R[i1] = j1;
340 R[i2] = j2;
341 } else {
342 for (i = i1; i < i2; ++i) {
343 cmid = (L[i]+R[i] + 1)/2;
344 L[i] = Max(cmid,L[i+1])+1;
345 R[i] = Min(j2, R[i]);
346 }
347 L[i1] = j1;
348 L[i2] = j2;
349 R[i2] = j2;
350 }
351 k_align(i1,j1,t1,i2,j2,t2);
352 L[i2] = l_save;
353 R[i2] = r_save;
354 }
355 i1 = i2;
356 j1 = j2;
357 t1 = t2;
358 b1 = b2;
359 }
360 return c;
361 }
362
363 Int4 z_ALIGN(Char Seq1[], Char Seq2[], Int4 bi, Int4 bj, Int4 ei, Int4 ej, Int4 lb[], Int4 rb[], Int4Ptr PNTR V, Int4 G, Int4 E, Int4 S[])
364 {
365 Int4 c;
366 Int4 j;
367 Int4 check_score;
368 CharPtr ckalloc(Int4);
369 Int4 M, N;
370
371 A = Seq1;
372 B = Seq2;
373 L = lb;
374 R = rb;
375 vv = V; /* Setup global parameters */
376 g = G;
377 e = E;
378 m = g+e;
379 sapp = S;
380 last = 0;
381 M = ei - bi;
382 N = ej - bj;
383
384 j = (N+2) * sizeof(Int4);
385 SS = ((Int4 *) ckalloc(j)) - bj;
386 DD = ((Int4 *) ckalloc(j)) - bj;
387 j = (N+2) * sizeof(Int4);
388 SP = ((Int4 *) ckalloc(j)) - bj;
389 DP = ((Int4 *) ckalloc(j)) - bj;
390 j = (N+2) * sizeof(Char);
391 ST = ((Char *) ckalloc(j)) - bj;
392 DT = ((Char *) ckalloc(j)) - bj;
393 j = (M+N+1) * sizeof(Int4);
394 PP[0] = (Int4 *) ckalloc(j) - bi - bj;
395 PP[1] = (Int4 *) ckalloc(j) - bi - bj;
396 PP[2] = (Int4 *) ckalloc(j) - bi - bj;
397 RI = (Int4 *) ckalloc(j) - bi - bj;
398 j = (M+N+1) * sizeof(Char);
399 PT[0] = (Char *) ckalloc(j) - bi - bj;
400 PT[1] = (Char *) ckalloc(j) - bi - bj;
401 PT[2] = (Char *) ckalloc(j) - bi - bj;
402
403 c = k_align(bi,bj,0,ei,ej,0);
404 check_score = CHECK_SCORE(Seq1+bi,Seq2+bj,M,N,S);
405 /**#ifdef STATS
406 TRACE("\nAlign_score=%.1f\n",((FloatLo)c)/10.0);
407 TRACE("\nCheck_score=%.1f\n",((FloatLo)check_score)/10.0);
408 if (check_score != c) TRACE("\nCheck_score=%.1f\n",((FloatLo)check_score)/10.0);
409 #endif**/
410 MemFree(SS+bj);
411 MemFree(DD+bj);
412 MemFree(SP+bj);
413 MemFree(DP+bj);
414 MemFree(ST+bj);
415 MemFree(DT+bj);
416 MemFree(PP[0]+bi+bj);
417 MemFree(PP[1]+bi+bj);
418 MemFree(PP[2]+bi+bj);
419 MemFree(RI+bi+bj);
420 MemFree(PT[0]+bi+bj);
421 MemFree(PT[1]+bi+bj);
422 MemFree(PT[2]+bi+bj);
423 return c;
424 }
425
426 /* Alignment display routine */
427
428 static Char ALINE[51], BLINE[51], CLINE[51];
429 /**#if 0
430 void DISPLAY(Char A[], Char B[], Int4 M, Int4 N, Int4 S[], Int4 AP, Int4 BP)
431 { register CharPtr a, b, c;
432 register Int4 i, j, op;
433 Int4 lines, ap, bp;
434
435 i = j = op = lines = 0;
436 ap = AP;
437 bp = BP;
438 a = ALINE;
439 b = BLINE;
440 c = CLINE;
441 while (i < M || j < N)
442 { if (op == 0 && *S == 0)
443 { op = *S++;
444 *a = A[++i];
445 *b = B[++j];
446 *c++ = (*a++ == *b++) ? '|' : ' ';
447 }
448 else
449 { if (op == 0)
450 op = *S++;
451 if (op > 0)
452 { *a++ = ' ';
453 *b++ = B[++j];
454 op--;
455 }
456 else
457 { *a++ = A[++i];
458 *b++ = ' ';
459 op++;
460 }
461 *c++ = '-';
462 }
463 if (a >= ALINE+50 || i >= M && j >= N)
464 { *a = *b = *c = '\0';
465 TRACE("\n%6d ",50*lines++);
466 for (b = ALINE+10; b <= a; b += 10)
467 TRACE(" . :");
468 if (b <= a+5)
469 TRACE(" .");
470 TRACE("\n%6d %s\n %s\n%6d %s\n",ap,ALINE,CLINE,bp,BLINE);
471 ap = AP + i;
472 bp = BP + j;
473 a = ALINE;
474 b = BLINE;
475 c = CLINE;
476 }
477 }
478 }
479 #endif**/
480
481 /* CHECK_SCORE - return the score of the alignment stored in S */
482
483 static Int4 CHECK_SCORE(Char A[], Char B[], Int4 M, Int4 N, Int4 S[])
484 {
485 register Int4 i, j, op;
486 Int4 score;
487
488 score = i = j = op = 0;
489 while (i < M || j < N) {
490 op = *S++;
491 if (op == 0)
492 score = vv[A[++i]][B[++j]] + score;
493 else if (op > 0) {
494 score = score - (g+op*e);
495 j = j+op;
496 } else {
497 score = score - (g-op*e);
498 i = i-op;
499 }
500 }
501 return(score);
502 }
503
504 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |