|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/dotseq.c |
source navigation diff markup identifier search freetext search file search |
1 static char const rcsid[] = "$Id: dotseq.c,v 6.14 2006/07/13 17:16:20 bollin Exp $";
2
3 /* dotSeq.c */
4 /* ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information (NCBI)
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government do not place any restriction on its use or reproduction.
15 * We would, however, appreciate having the NCBI and the author cited in
16 * any work or product based on this material.
17 *
18 * Although all reasonable efforts have been taken to ensure the accuracy
19 * and reliability of the software and data, the NLM and the U.S.
20 * Government do not and cannot warrant the performance or results that
21 * may be obtained by using this software or data. The NLM and the U.S.
22 * Government disclaim all warranties, express or implied, including
23 * warranties of performance, merchantability or fitness for any particular
24 * purpose.
25 *
26 * ===========================================================================
27 *
28 * File Name: dotseq.c
29 *
30 * Author: Fasika Aklilu
31 *
32 * Version Creation Date: 8/9/01
33 *
34 * $Revision: 6.14 $
35 *
36 * File Description: computes local alignments for dot matrix
37 *
38 * Modifications:
39 * --------------------------------------------------------------------------
40 * Date Name Description of modification
41 * ------- ---------- -----------------------------------------------------
42
43 $Revision: 6.14 $
44 $Log: dotseq.c,v $
45 Revision 6.14 2006/07/13 17:16:20 bollin
46 use Uint4 instead of Uint2 for itemID values
47 removed unused variables
48
49 Revision 6.13 2004/03/17 20:33:48 bollin
50 In DOT_InitMainInfobyLoc, exit function and return NULL if unable to determine
51 ID or Bioseq from SeqLoc provided. This change averts a core dump.
52
53 Revision 6.12 2003/09/17 20:55:12 kskatz
54 Resetting to the way it was in revision 6.10; accidently checked in a kludge meant to be used only locally in DOT_GetNResidues
55
56 Revision 6.11 2003/09/17 20:39:01 kskatz
57 Commented out a temporary fix in SPI_AlignInWindows() [line 3880]
58
59 Revision 6.10 2003/05/30 17:25:36 coulouri
60 add rcsid
61
62 Revision 6.9 2003/02/07 21:21:50 kans
63 DOT_BuildPLookup was erroneously calling MemFree on elements of array
64
65 Revision 6.8 2001/08/09 17:21:08 kans
66 include alignmgr.h to avoid Mac compile error
67
68 Revision 6.7 2001/08/09 16:31:49 aklilu
69 added revision
70
71 Revision 6.6 2001/01/19 20:11:31 kans
72 minor changes to work with MacOS X compiler (contributed by William Van Etten)
73
74 Revision 6.5 2000/09/19 21:36:09 kans
75 commented out printf
76
77 Revision 6.4 2000/08/07 13:29:23 sicotte
78 Fix printf (long) casts
79
80 Revision 6.3 2000/07/26 18:23:09 aklilu
81 added DOT_SPI_FindBestAlnByDotPlotEx, to return best alignments
82
83
84 */
85
86 #include <dotseq.h>
87 #include <alignmgr.h>
88
89 /****************************************************************************
90
91 GLOBAL VARIABLES
92 ***************************************************************************/
93
94
95
96 static Int4 DOTblosum62[24][24] = {
97 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4,
98 -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4,
99 -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4,
100 -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4,
101 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4,
102 -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4,
103 -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4,
104 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4,
105 -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4,
106 -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4,
107 -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4,
108 -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4,
109 -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4,
110 -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4,
111 -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4,
112 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4,
113 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4,
114 -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4,
115 -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4,
116 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4,
117 -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4,
118 -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4,
119 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4,
120 -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1
121 };
122
123 static Int4 DOTNCBIstdaaToBLOSUM [] = {23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, 22, 19, 21, 22 , 23};
124
125 static Int4 trim_size=0;
126 static Boolean is_na;
127
128 /****************************************************************************
129
130 STATIC FUNCTIONS
131 ***************************************************************************/
132
133
134 static Boolean DOT_GetSeqsbyLoc(DOTMainDataPtr mip);
135 static Boolean DOT_ComputeDotPlot (DOTMainDataPtr mip);
136
137
138
139
140 /*_____________________________________(DOT_DNAScoringMatrix)_______
141
142
143 Purpose : Create DNA scoring matrix.
144
145 ____________________________________________________________________*/
146
147 extern Int4Ptr PNTR DOT_DNAScoringMatrix(Int4 mismatch, Int4 reward,Int4 alsize){
148 Int4Ptr PNTR alignMatrix;
149 Int4 i,j,k;
150 Int4 nbase1,nbase2;
151 Int4 base1[4],base2[4];
152 Int4 nmatches;
153 /* alsize = alphabet size.. should be a 4 or 16. */
154 /* If "ncbi'ized" the codes.. could make matrix this way,
155 then use the ncbi translation tools
156 */
157 if(alsize !=4 && alsize!=16) {
158 ErrPostEx(SEV_WARNING,0,0,"DNAScoringMatrix: No ambiguity codes for alphabet size=%d\n",alsize);
159 }
160 alignMatrix= (Int4Ptr PNTR) Calloc(alsize*(alsize+1),
161 MAX(sizeof(Int4),sizeof(Int4Ptr)));
162 for(i=0;i<alsize;i++) {
163 *(alignMatrix+i)=(Int4Ptr) (alignMatrix+(i+1)*alsize);
164 }
165
166 for(i=0;i<alsize;i++) {
167 for(j=0;j<alsize;j++) {
168 alignMatrix[i][j]=mismatch;
169 }
170 }
171 for(i=0;i<alsize;i++)
172 alignMatrix[i][i]=reward;
173 if(alsize!=16)
174 return alignMatrix;
175
176 /* For ncbi4na. "presence: of bases is Bit-encoded A=1,C=2,G=4,T=8"*/
177 /* Score gaps as mismatches... should not be there */
178 /* for(i=0;i<alsize;i++) alignMatrix[0][i]=alignMatrix[i][0]=mismatch; */
179
180 for(i=0;i<alsize;i++) {
181 base1[0] = i&1;
182 base1[1] = (i>>1)&1;
183 base1[2] = (i>>2)&1;
184 base1[3] = (i>>3)&1;
185 nbase1 = base1[0]+base1[1]+base1[2]+base1[3];
186 for(j=0;j<alsize;j++) {
187 base2[0] = j&1;
188 base2[1] = (j>>1)&1;
189 base2[2] = (j>>2)&1;
190 base2[3] = (j>>3)&1;
191 nbase2 = base2[0]+base2[1]+base2[2]+base2[3];
192 nmatches=0;
193 for(k=0;k<4;k++) nmatches += (base1[k]&base2[k]); /* matches */
194 if(nbase1==0 || nbase2==0) {
195 alignMatrix[i][j]=mismatch; /*gaps */
196 } else {
197 alignMatrix[i][j] =(reward*nmatches+(nbase1*nbase2-nmatches)*mismatch)/(nbase1*nbase2);
198 }
199 }
200 }
201 return alignMatrix;
202 }
203
204
205 /*____________________________________________(DOT_CalculateScoreMeans)_____
206
207 Purpose : Calculate max and mean values for matrix vectors.
208
209 ____________________________________________________________________*/
210
211 static void DOT_CalculateMatrix_MinMax (DOTMainDataPtr mip)
212 {
213 Int4 temp, sum, q, s, max;
214 Int2 maxscore, minscore;
215 Int4Ptr PNTR matrix;
216
217
218 matrix = mip->matrix;
219 max=(is_na)?4:24;
220
221 for (maxscore = minscore = sum = s = 0; s < max; s++)
222 for (q=0; q < max; q++)
223 {
224 if (is_na)
225 temp = matrix[q][s];
226 else
227 temp = DOTblosum62[DOTNCBIstdaaToBLOSUM[q]][DOTNCBIstdaaToBLOSUM[s]];
228
229 if (temp < minscore)
230 minscore = temp;
231 if (temp > maxscore)
232 maxscore = temp;
233
234 sum += temp;
235 }
236
237 mip->maxscore=maxscore;
238 mip->minscore=minscore;
239
240 return;
241 }
242
243 /*_______________________________________________________(CompareFunc)_____
244
245
246 Purpose : Compare function for hits binary tree.
247
248 ____________________________________________________________________*/
249
250 static Int4 CompareFunc(Pointer n1, Pointer n2)
251 {
252 DOTDiagPtr nd1, nd2;
253
254 nd1 = (DOTDiagPtr)n1;
255 nd2 = (DOTDiagPtr)n2;
256
257 if (nd1->score > nd2->score)
258 return 1;
259 else if (nd1->score < nd2->score)
260 return -1;
261 /* else if (nd1->length > nd2->length) */
262 /* return 1; */
263 /* else if (nd1->length < nd2->length) */
264 /* return -1; */
265 else if (nd1->rdmKey > nd2->rdmKey)
266 return 1;
267 else if (nd1->rdmKey < nd2->rdmKey)
268 return -1;
269 else
270 return 0;
271 }
272
273 Pointer avl_List[MAX_TRIM]={""};
274 Int4 avl_counter1=0;
275
276 /*___________________________________________________________(DOT_AddToList)
277
278 Purpose : Add to a temporary array for deletion from binary tree.
279
280 ____________________________________________________________________*/
281 static Boolean DOT_AddToList (Pointer item)
282 {
283
284 avl_List[avl_counter1]=item;
285
286 avl_counter1++;
287
288 if (avl_counter1>=trim_size)
289 {
290 avl_counter1=0;
291 return FALSE;
292 }
293 else
294 return TRUE;
295 }
296 /*___________________________________________________________(DOT_TrimBinTree)
297 Purpose : Trim hits binary tree if grows > mip->tree_limit.
298
299 ____________________________________________________________________*/
300 static void DOT_TrimBinTree (Avl_TreePtr tree, Int4Ptr cutoff)
301 {
302 Int2 i;
303 DOTDiagPtr node;
304
305 Avl_Traverse(tree, DOT_AddToList);
306
307 for (i=0; i<trim_size; i++)
308 {
309 node=(DOTDiagPtr)avl_List[i];
310 *cutoff=MAX(*cutoff, node->score);
311 node=(DOTDiagPtr)Avl_Delete(tree, (Pointer)node);
312 if (node) MemFree(node);
313 }
314 }
315 /*_________________________________________(DOT_NewHitNode)_____
316
317
318 Purpose : Create new hit node.
319
320 ____________________________________________________________________*/
321 static DOTDiagPtr DOT_NewHitNode(DOTMainDataPtr mip, Int4 q_left, Int4 s_left, Int4 length, Int4 score)
322 {
323 DOTDiagPtr node;
324
325 if (!(node =(DOTDiagPtr)MemNew (sizeof(DOTDiag))))
326 {
327 ErrPostEx (SEV_ERROR, 0, 0, "%s", "HIT node allocation failed");
328 return NULL;
329 }
330 node->score=score;
331 node->q_start = mip->q_start + q_left;
332 node->s_start = mip->s_start + s_left;
333
334 node->length = length;
335 node->rdmKey=RandomNum();
336
337 return node;
338 }
339
340 /*___________________________________________________________(DOT_SaveHit)_____
341
342
343 Purpose : Save extended hit.
344
345 ____________________________________________________________________*/
346 static void DOT_SaveHit (DOTMainDataPtr mip, Int4 score, Int4 q_offset, Int4 s_offset, Int4 length)
347 {
348 DOTDiagPtr Node;
349 Int4 unique=0;
350
351
352
353 Node=DOT_NewHitNode(mip, q_offset, s_offset, length, score);
354
355 if (Node == NULL) return;
356
357 if (mip->first_pass == FALSE)
358 {
359 if ((int)mip->tree->totalNodes>=mip->tree_limit)
360 DOT_TrimBinTree(mip->tree, &(mip->cutoff_score));
361
362 if(!Avl_Insert(mip->tree, (Pointer)Node, &unique))
363 if (Node) MemFree(Node);
364 }
365 else /* first pass */
366 {
367
368 mip->tree = (Avl_TreePtr)MemNew(sizeof(Avl_Tree));
369 Avl_Initialize(mip->tree, CompareFunc, NULL);
370 mip->tree->root=(Avl_NodePtr)MemNew(sizeof(Avl_Node));
371 mip->tree->root->avl_p=(Pointer)Node;
372 mip->tree->totalNodes++;
373 mip->first_pass=FALSE;
374 if (mip->tree_limit>1000)
375 trim_size=200;
376 else
377 trim_size=(mip->tree_limit*20)/100;
378 }
379
380 }
381
382 /*_______________________________________________________(CompareDiags)_____
383
384 Purpose : Compare function for history binary tree.
385
386 ____________________________________________________________________*/
387
388 static Int4 CompareDiags(Pointer n1, Pointer n2)
389 {
390 Int4 diag2, diag1;
391 DOTHistPtr node1, node2;
392
393 node1 = (DOTHistPtr)n1;
394 node2 = (DOTHistPtr)n2;
395
396 diag1 = node1->diag_constant;
397 diag2 = node2->diag_constant;
398
399 if (diag1 == diag2) /* found diag match */
400 return 0;/* found match */
401 else if (diag1 > diag2)
402 return 1; /* greater */
403 else
404 return -1; /* smaller */
405 }
406
407 /*_________________________________________(DOT_NewHistNode)_____
408
409
410 Purpose : Create new history node.
411
412 ____________________________________________________________________*/
413 DOTHistPtr DOT_NewHistNode(Int4 diag, Int4 q_pos)
414 {
415 DOTHistPtr node;
416
417 if (!(node =(DOTHistPtr)MemNew (sizeof(DOTHist))))
418 {
419 ErrPostEx (SEV_WARNING, 0, 0, "%s", "HIST node allocation failed");
420 return NULL;
421 }
422 node->diag_constant = diag;
423 node->q_stop = q_pos;
424
425 return node;
426 }
427
428
429 /*____________________________________________________________(DOT_ExtendProt)_____
430
431
432 Purpose : Protein extend hit function.
433
434 ____________________________________________________________________*/
435 static Int2 DOT_ExtendProt (DOTMainDataPtr mip, Uint1Ptr queryseq, Uint1Ptr subjectseq, Int4 q_off, Int4 s_off, Int4 diag, Int4 wordsize, DOTHistPtr node)
436 {
437 Uint1Ptr q_end,s, q, end, start, q_beg;
438 Int4 score, sum, length;
439 Int4 s_left, q_left, s_right, q_right, s_diff;
440 Int4 q_start, s_start;
441 Int4 threshold = 0, x, X; /* test default */
442 Int4 report_len;
443
444
445 q_left=q_off;
446 s_left=s_off;
447 x=X=threshold;
448 report_len=1; /* -arbitrary*/
449
450 /* initialize positions for left extension */
451 q =queryseq+(q_left-1); /* -1 b/c queryseq & subjectseq are =1 */
452 s = subjectseq+(s_left-1);
453
454 if (q_off<s_off)
455 start=subjectseq+(s_off-q_off);
456 else
457 start=subjectseq;
458
459
460 score = 0;
461 sum = 0;
462
463 /* extend to the left*/
464
465 while (s >start)
466 {
467 if (*q >24 || *s >24 || *q<1 || *s<1)
468 {
469 q++;
470 s++;
471 break;/* ambiguity found */
472 }
473
474 if ((sum = DOTblosum62[DOTNCBIstdaaToBLOSUM[*s]][DOTNCBIstdaaToBLOSUM[*q]])>= threshold)
475 {
476 score += sum;
477 sum=0;
478 if ((x=-score)<X)
479 x=X;
480 }
481 else
482 if (sum<x)
483 {
484 q++;
485 s++;
486 break;
487 }
488 q--;
489 s--;
490 }
491
492 q_beg=q;
493 score-=mip->maxscore*wordsize;
494
495 /* reinitialize positions for right extention */
496 q=queryseq+(q_off-wordsize);
497 s=subjectseq+(s_off-wordsize);
498 q_right = mip->qlen- (q_off-wordsize)-1;
499 s_right = mip->slen- (s_off-wordsize)-1;
500
501 /* assign ends */
502
503 if (q_right<s_right)
504 end = s + q_right;
505 else
506 end = s + s_right;
507
508 /* extend to the right only */
509
510 while (s < end)
511 {
512 q++;
513 s++;
514 if (*q > 24 || *s > 24 || *s<1 || *q<1)
515 {
516 break;/* ambiguity found */
517 }
518
519 if ((sum = DOTblosum62[DOTNCBIstdaaToBLOSUM[*s]][DOTNCBIstdaaToBLOSUM[*q]])>= threshold)
520 {
521 score += sum;
522 sum=0;
523 if ((x=-score)<X)
524 x=X;
525 }
526 else
527 if (sum<x)
528 break;
529 }
530
531 q_end = q;
532
533 length = q_end - q_beg;
534 if (length>=report_len)
535 {
536 /* check against the lowest score in tree */
537 if (score<mip->cutoff_score)
538 return -1;
539
540 s_diff = s_off-q_off;
541 q_start = q_beg- queryseq;
542 length = q_end - q_beg;
543 s_start = q_start+s_diff;
544
545 /* update node */
546 if (node!=NULL)
547 {
548 node->diag_constant = diag;
549 node->q_stop= q_start+length;
550 }
551
552 DOT_SaveHit (mip, score, q_start, s_start, length);
553 return 0;
554 }
555 else
556 return -1; /* hit not stored */
557 }
558
559 /*____________________________________________________________(DOT_ExtendNuc)_____
560
561
562 Purpose : Nucleotide extend hit function.
563
564 ____________________________________________________________________*/
565 static Int2 DOT_ExtendNuc (DOTMainDataPtr mip, Uint1Ptr queryseq, Uint1Ptr subjectseq, Int4 q_off, Int4 s_off, Int4 diag, Int4 wordsize, DOTHistPtr node)
566 {
567 Uint1Ptr q_end, s, q, end, start, q_beg;
568 Int4 score, sum, length;
569 Int4 s_left, q_left, s_right, s_diff, q_right;
570 Int4Ptr PNTR matrix;
571 Int4 ex_threshold=0,x,X;
572
573 matrix = mip->matrix;
574
575 s_left = s_off/* -wordsize */;
576 q_left=q_off/* -wordsize */;
577 x=X=ex_threshold;
578 /* reportable=4*maxscore; *//* below lowest possible -store everything*/
579
580 /* initialize positions for left extension */
581 q=queryseq+ (q_left-1);
582 s=subjectseq+ (s_left-1);
583
584 /* find the beginning */
585 if (q_off < s_off)
586 start = subjectseq+(s_off-q_off);
587 else
588 start = subjectseq;
589
590 score=0;
591 sum = 0;
592
593 /* extend to the left*/
594
595 while (s >start)
596 {
597
598 if (*q > 3 || *s > 3) /* ambiguity found */
599 {
600 q++;
601 s++;
602 break;
603 }
604 if ((sum+= matrix[*s][*q])>0)
605 {
606 score+=sum;
607 sum=0;
608 if ((x=-score)<X)
609 x=X;
610 }
611 else
612 if (sum<x)
613 {
614 q++;
615 s++;
616 break;
617 }
618 q--;
619 s--;
620 }
621
622 q_beg=q;
623 score-=mip->maxscore*(wordsize-2);
624
625 /* initialize positions for right extention */
626
627 q=queryseq+(q_off-wordsize);
628 s=subjectseq+(s_off-wordsize);
629
630 q_right = mip->qlen- (q_off-wordsize)-1;
631 s_right = mip->slen- (s_off-wordsize)-1;
632
633 /* assign ends */
634
635 if (q_right<s_right)
636 end = s + q_right;
637 else
638 end = s + s_right;
639
640
641 /* extend to the right */
642 sum = 0;
643
644 while (s < end)
645 {
646 q++;
647 s++;
648 if (*q > 3 || *s > 3) /* ambiguity found */
649 {
650 break;
651 }
652
653 if ((sum+= matrix[*s][*q])>0)
654 {
655 score+=sum;
656 sum=0;
657 if ((x=-score)<X)
658 x=X;
659 }
660 else
661 if (sum<x)
662 break;
663 }
664
665 q_end =q;
666
667 length = q_end - q_beg;
668
669 /* if (score>= reportable) */ /* storing everything -reportable is the lowest possible anyway*/
670 /* { */
671 /* check against the lowest score in tree */
672 if (score<mip->cutoff_score)
673 return -1;
674
675 s_diff = s_off-q_off;
676
677 q_left = q_beg- queryseq;
678 length = q_end - q_beg;
679 s_left = q_left+s_diff;
680
681 /* update node */
682 if (node!=NULL)
683 {
684 node->diag_constant = diag;
685 node->q_stop= q_left + length;
686 }
687
688 DOT_SaveHit (mip, score, q_left, s_left, length);
689 return 0;
690 /* } */
691 /* else */
692 /* return -1; */ /* hit not stored */
693 }
694
695
696
697 /*____________________________________________________________(ExtendHit)_____
698
699
700 Purpose : Calls extend function for new hit.
701
702 ____________________________________________________________________*/
703 static Int2 ExtendHit (DOTMainDataPtr mip, Uint1Ptr queryseq, Uint1Ptr subjectseq, Int4 q_off, Int4 s_off, Int4 diag_constant, Int4 wordsize, DOTHistPtr node)
704 {
705 if (is_na)
706 {
707 return DOT_ExtendNuc (mip, queryseq, subjectseq, q_off, s_off, diag_constant, wordsize, node);
708 }
709 else
710 {
711 return DOT_ExtendProt (mip, queryseq, subjectseq, q_off, s_off, diag_constant, wordsize, node);
712 }
713
714 }
715
716 /*_________________________________________(DOT_HistBTree)_____
717
718
719 Purpose : Check new hit against a previous hit in history binary tree.
720
721 ____________________________________________________________________*/
722
723 static void DOT_HistBTree (DOTInfoPtr info, Int4 diag, Int4 q_pos)/* rtn 1=found match, rtn 0=new hit*/
724 {
725 Avl_TreePtr tree=NULL;
726 Pointer tnode=NULL;
727 DOTHistPtr node=NULL;
728
729 tree = info->tree;
730 node=DOT_NewHistNode(diag,q_pos);
731
732 if (node == NULL) return;
733
734 /* check diag in tree */
735 if ((tnode=Avl_Search (tree, (Pointer)node)) != NULL) /* diag found */
736 {
737 if ( ((DOTHistPtr)tnode)->q_stop < node->q_stop) /*old diag, new hit */
738 ExtendHit (info->mip, info->qseq, info->sseq, info->q_pos, info->s_pos, node->diag_constant, info->wordsize, (DOTHistPtr)tnode);
739
740 if (node) MemFree(node);
741
742 }
743 else /* no diag, new hit */
744 {
745 if (ExtendHit (info->mip, info->qseq, info->sseq, info->q_pos, info->s_pos, node->diag_constant, info->wordsize, node) == 0)
746 {
747 if (!Avl_Insert(tree, (Pointer)node, NULL))
748 if (node) MemFree(node); /* insert new node returned FALSE */
749 }
750 else
751 if (node) MemFree(node);
752 }
753
754
755 }
756
757 DOTDiagPtr PNTR avl_hitlist=NULL;
758 DOTHistPtr PNTR avl_hist=NULL;
759 Int4 avl_counter2;
760
761
762 /*___________________________________________________________(CopyFunc)_____*/
763 static Boolean DOT_CopyFunc1 (Pointer item)
764 {
765
766 avl_counter2--;
767 if (!(avl_counter2<0))
768 {
769 avl_hist[avl_counter2]=item;
770 }
771 return TRUE;
772
773 }
774
775 /*_________________________________________(FreeBTree)_____
776
777 Purpose : Free binary tree structure.
778
779 ____________________________________________________________________*/
780 static void DOT_FreeBTree(Avl_TreePtr tree)
781 {
782 if (tree)
783 {
784 Avl_Clear (tree, CompareDiags, NULL);
785 if (tree->root!=NULL)
786 {
787 ErrPostEx (SEV_WARNING, 0, 0, "%s", "HistTree->root free failed");
788 }
789 else if (Avl_TotalNodes(tree)!=0)
790 {
791 ErrPostEx (SEV_WARNING, 0, 0, "%s", "HistTree counter != 0");
792 }
793 }
794
795 }
796
797 /*_________________________________________(FreeHistBTree)_____
798
799
800 Purpose : Free memory function for elements in history binary tree.
801
802 ____________________________________________________________________*/
803 static void DOT_FreeHistBTree(Avl_TreePtr tree)
804 {
805 Int4 index, i;
806 DOTHistPtr node;
807
808 if (tree==NULL)
809 {
810 return;
811 }
812
813 index=tree->totalNodes;
814 avl_counter2=index;
815
816 avl_hist=(DOTHistPtr PNTR)MemNew(sizeof(DOTHistPtr)*index);
817 Avl_Traverse(tree, DOT_CopyFunc1);
818
819 for (i=0; i<index; i++)
820 {
821 node= avl_hist[i];
822 node=(DOTHistPtr)Avl_Delete(tree, (Pointer)node);
823 if (node) MemFree(node);
824 }
825
826 if (avl_hist) MemFree(avl_hist);
827 DOT_FreeBTree(tree);
828
829 return;
830 }
831
832 /*___________________________________________________________(CopyFunc2)_____*/
833 static Boolean CopyFunc2 (Pointer item)
834 {
835 avl_counter2--;
836 if (!(avl_counter2<0))
837 {
838 avl_hitlist[avl_counter2]=item;
839 }
840
841 return TRUE;
842 }
843
844 /*_________________________________________(CopyHits)_____
845
846
847 Purpose : Copy stored hits from binary tree to array.
848
849 ____________________________________________________________________*/
850 static void DOT_CopyHits(DOTMainDataPtr mip)
851 {
852 Int4 index;
853
854 if (mip->tree==NULL)
855 {
856 /* Message(MSG_ERROR, "No hits"); */
857 return; /* no hits */
858 }
859
860 index=mip->tree->totalNodes;
861 avl_counter2=index;
862 mip->index=index;
863
864 mip->hitlist=(DOTDiagPtr PNTR)MemNew(sizeof(DOTDiagPtr)*index);
865 avl_hitlist=mip->hitlist;
866 Avl_Traverse(mip->tree, CopyFunc2);
867
868
869 }
870
871
872 /*_________________________________________(DOT_BuildPLookup)_____
873
874
875 Purpose : Build protein lookup table.
876
877 ____________________________________________________________________*/
878 static LookupTablePtr DOT_BuildPLookup(Uint1Ptr queryseq, Int4 qlen, Int4 word_size, Int4 alphabet_size, Int4 compression_ratio)
879 {
880 BlastAllWordPtr all_words;
881 LookupTablePtr lookup;
882 Int4 num_cols;
883 Int4 index = 0;
884 Int4 stop, start, offset, index1, index2;
885 Int4 threshold=13; /* one-hit heuristics T must be at least 13*/
886 Uint1Ptr str, PNTR array, word;
887 Boolean EXACTMATCH, found_ambig, saved_index = FALSE;
888 Int4 counter;
889
890
891 lookup = lookup_new(alphabet_size, (Int2) word_size, (Int2) (word_size/compression_ratio));
892 all_words = BlastPopulateAllWordArrays(word_size+1, alphabet_size);
893 if (!all_words)
894 return NULL;
895
896 str = queryseq;
897 num_cols = all_words->num_of_cols;
898 array = all_words->array;
899 stop = qlen - (word_size-1);
900 start = 0;
901 counter=0;
902
903 for (offset = start; offset<stop; offset++, str++)
904 {
905 /* for every word - this part takes long -sol'n wordsize = 2!!!*/
906 for (index1 = 0; index1<num_cols; index1++)
907 {
908 found_ambig = FALSE;
909 EXACTMATCH=TRUE;
910 word = array[index1];
911 for (index2=0; index2<word_size; index2++)
912 {
913
914 if (*(str+index2)>25
915 || *(str+index2)<1 ||*(word+index2)>25
916 || *(word+index2)<1)
917 {
918 found_ambig = TRUE;
919 EXACTMATCH=FALSE;
920 break;
921 }
922
923 if (*(str+index2) != *(word+index2))
924 {
925 EXACTMATCH = FALSE;
926 break; /* go on to the next word*/
927 }
928 }
929 /* if str is exact match to the word */
930 if (EXACTMATCH)
931 {
932 counter++;
933 lookup_add(lookup, (CharPtr)word, offset+word_size, 0);
934 saved_index = TRUE;
935 }
936 }
937 }
938 if (!saved_index)
939 return NULL;
940
941 /* deallocate */
942 /*
943 for (index=0; index<num_cols; index++)
944 {
945 if (array[index]) MemFree(array[index]);
946 }
947 */
948 if (array) MemFree(array);
949 if (all_words) MemFree(all_words);
950
951
952 return lookup;
953 }
954
955 /*_________________________________________(DOT_BuildNLookup)_____
956
957
958 Purpose : Build nucleotide lookup table.
959
960 ____________________________________________________________________*/
961 static LookupTablePtr DOT_BuildNLookup(Uint1Ptr queryseq, Int4 qlen, Int2 wordsize, Int2 alphabet_size, Int2 compression_ratio)
962 {
963
964 Boolean found_ambig, saved_index=FALSE;
965 Int4 offset, end, reduced_wordsize, stop;
966 Int4 lookup_index, index, index_addition;
967 Uint1Ptr str;
968 LookupTablePtr lookup;
969
970
971 reduced_wordsize = wordsize/compression_ratio;
972 lookup = lookup_new(alphabet_size, (Int2) wordsize, (Int2) wordsize);
973
974 str = queryseq;
975 end = (qlen-wordsize-1); /* use the shorter sequence for hash table */
976 stop = reduced_wordsize;
977
978
979
980 /* create hash table */
981 for (offset = 0; offset<end; offset++, str++)
982 {
983 found_ambig = FALSE;
984 lookup_index = 0;
985 index_addition = 0;
986
987 for (index=0; index<stop; index++)
988 {
989
990 if (*(str+index_addition) > 3 || *(str+index_addition+1)> 3 || *(str+index_addition+2) > 3 || *(str+index_addition+3) > 3)
991 {
992 found_ambig = TRUE;
993 break;
994 }
995
996 lookup_index += (*(str+index_addition) << 6);
997 lookup_index += (*(str+1+index_addition) << 4);
998 lookup_index += (*(str+2+index_addition) << 2);
999 lookup_index += *(str+3+index_addition);
1000
1001 if (index != stop-1)
1002 { /* 8 bits/byte */
1003 lookup_index <<= 8;
1004 index_addition += 4;
1005 }
1006 }
1007
1008 if (found_ambig == FALSE)
1009 {
1010 lookup_add_index(lookup, (Int4) lookup_index, offset+ (reduced_wordsize*compression_ratio), 0/* context_index */);
1011 saved_index = TRUE;
1012 }
1013 }
1014
1015 if (saved_index == FALSE)
1016 return NULL;
1017 /* convert lookup table to (mod_lt) table.*/
1018
1019
1020 return lookup;
1021 }
1022
1023
1024
1025 /*_________________________________________(DOT_ComputeHits)_____
1026
1027
1028 Purpose : Create hash table, find hits, store hits in hitlist array.
1029
1030 ____________________________________________________________________*/
1031 static Int2 DOT_ComputeHits(DOTMainDataPtr mip)
1032 {
1033
1034 LookupTablePtr lookup;
1035 ModLAEntry *mod_lt;
1036 Int4 wordsize, alphabet_size;
1037 Int4 char_size, lookup_index, mask, compression_ratio;
1038 Int4 qlen, slen;
1039 Int4 threshold=13;
1040 Uint1Ptr queryseq, subjectseq;
1041 Boolean saved_index = FALSE;
1042 Int4 end;
1043 Uint1Ptr s, subject0, s_end, q_end;
1044 ModLookupPositionPtr lookup_pos;
1045 ModLookupPosition hit_info;
1046 Int4 num_hits, s_off, s_pos, q_pos;
1047 Int4 diag, code_limit;
1048 DOTInfoPtr info;
1049 DOTHistPtr node=NULL;
1050
1051
1052
1053 queryseq=mip->qseq;
1054 subjectseq=mip->sseq;
1055 qlen=mip->qlen;
1056 slen=mip->slen;
1057
1058 if (is_na)
1059 {
1060 alphabet_size = 4; /*ncbi2na*/
1061 wordsize = mip->word_size; /* test default */
1062 compression_ratio = 4;
1063 if (!(lookup=DOT_BuildNLookup(queryseq, qlen, wordsize, alphabet_size, compression_ratio)))
1064 {
1065 return -1;
1066 }
1067 }
1068 else
1069 {
1070 wordsize =2; /*test default*/
1071 alphabet_size = 25; /* ncbi stdaa */
1072 compression_ratio = 1; /* ??*/
1073 if (!(lookup=DOT_BuildPLookup(queryseq, qlen, wordsize, alphabet_size, compression_ratio)))
1074 {
1075 return -1;
1076 }
1077 }
1078
1079 code_limit=alphabet_size-1;
1080
1081 /* convert lookup table to (mod_lt) table.*/
1082 lookup_position_aux_destruct(lookup);
1083 mod_lt = lookup->mod_lt;
1084 mask = lookup->mask;
1085 char_size = lookup->char_size;
1086 s = subjectseq;
1087 subject0 = subjectseq;
1088
1089 end = slen-wordsize-1;
1090 /* The subject sequence is too short, exit this function now. */
1091 if (wordsize > end)
1092 return -1;
1093
1094 s_end = subjectseq+slen-1;
1095 q_end = queryseq+qlen-1;
1096
1097 /* history btree structure */
1098 info = (DOTInfoPtr) MemNew (sizeof(DOTInfo));
1099 info->mip = mip;
1100 info->wordsize = wordsize;
1101 info->qseq = queryseq;
1102 info->sseq = subjectseq;
1103 info->first_pass = TRUE;
1104
1105
1106 /* scan subject sequence against lookup table */
1107 s = lookup_find_init (lookup, &lookup_index, s);
1108
1109 for (;;) /* loops until end of subject sequence */
1110 {
1111 do
1112 {
1113 /* lookup a contiguous word. */
1114 s++;
1115 if (s > s_end) /* end of subject sequence */
1116 goto fin;
1117
1118 if (*s > code_limit) /* ambiguity found */
1119 {
1120 num_hits = 0;
1121 continue; /*skip loop */
1122 }
1123
1124 lookup_index = (((lookup_index) & mask) << char_size) + *s;
1125
1126 } while ((num_hits = mod_lt[lookup_index].num_used) == 0);
1127
1128 lookup_pos = mod_lt[lookup_index].entries;
1129 hit_info = *((Uint4 *) lookup_pos);
1130 lookup_pos++;
1131
1132 if (num_hits>3)
1133 {
1134 lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
1135 }
1136
1137 s_pos = s_off = s-subject0+1;
1138 info->s_pos = s_pos;
1139
1140
1141 /* extend each hit in the linked list */
1142 do
1143 {
1144 q_pos = hinfo_get_pos (hit_info);
1145 num_hits --;
1146 hit_info = *((Uint4 *) lookup_pos);
1147 lookup_pos++;
1148
1149 diag = s_pos - q_pos; /* diag */
1150 info->q_pos = q_pos;
1151
1152 if (info->first_pass)
1153 {
1154 info->tree = (Avl_TreePtr)MemNew(sizeof(Avl_Tree));
1155 Avl_Initialize(info->tree, CompareDiags, NULL);
1156 info->tree->root = (Avl_NodePtr) MemNew (sizeof(Avl_Node));
1157 node=DOT_NewHistNode(diag, q_pos);
1158 if(ExtendHit (mip, queryseq, subjectseq, q_pos, s_pos, diag, wordsize, node)==0)
1159 {
1160 info->tree->root->avl_p = (Pointer)node;
1161 info->tree->totalNodes++;
1162 info->first_pass = FALSE;
1163 }
1164 else
1165 {
1166 if (node) MemFree(node);
1167 if (info->tree->root) MemFree(info->tree->root);
1168 if (info->tree) MemFree(info->tree);
1169 }
1170
1171 }
1172 else
1173 {
1174 DOT_HistBTree (info, diag, q_pos);
1175 }
1176
1177 } while (num_hits>0);
1178
1179 }
1180 fin:
1181
1182 lookup_destruct(lookup); /* deallocate lookup table */
1183 DOT_FreeHistBTree(info->tree);/* Free history btree */
1184 if (info->tree) MemFree(info->tree);
1185 if (info) MemFree(info);
1186
1187 DOT_CopyHits(mip); /* copy bintree to array */
1188 if (mip->tree)
1189 {
1190 DOT_FreeBTree(mip->tree); /* free hits btree */
1191 if (mip->tree) MemFree(mip->tree);
1192 }
1193 return 0;
1194
1195 }
1196
1197
1198 /*_________________________________________(DOT_SortProc)_____________
1199
1200 Purpose : Sort proc -by score, by q_start, by s_start.
1201
1202 ____________________________________________________________________*/
1203 static int LIBCALLBACK DOT_SortProc (VoidPtr vp1, VoidPtr vp2)
1204 {
1205 DOTDiagPtr hit1, hit2;
1206 DOTDiagPtr PNTR hp1, PNTR hp2;
1207
1208 hp1 = (DOTDiagPtr PNTR) vp1;
1209 hp2 = (DOTDiagPtr PNTR) vp2;
1210 hit1 = *hp1;
1211 hit2 = *hp2;
1212
1213 /*by descending score */
1214 if (hit1->score < hit2->score)
1215 return 1;
1216 else if (hit1->score > hit2->score)
1217 return -1;
1218 /* by ascending q_start */
1219 else if (hit1->q_start > hit2->q_start)
1220 return 1;
1221 else if (hit1->q_start < hit2->q_start)
1222 return -1;
1223 /* by ascending s_start */
1224 else if (hit1->s_start > hit2->s_start)
1225 return 1;
1226 else if (hit1->s_start < hit2->s_start)
1227 return -1;
1228
1229
1230 return 0;
1231 }
1232
1233
1234
1235 /*_______________________________________________(DOT_SortHitList)________________
1236
1237 Purpose :Sort hits array.
1238
1239 ____________________________________________________________________*/
1240
1241 static Int2 DOT_SortHitlist (DOTDiagPtr PNTR hitlist, Int4 index)
1242 {
1243 /* sort by score */
1244 HeapSort ((VoidPtr)hitlist, index, sizeof(DOTDiagPtr), DOT_SortProc);
1245
1246 return 0;
1247 }
1248
1249 /*___________________________________(DOT_ScoreCount)____________
1250
1251 Purpose : Rank scores.
1252
1253 ____________________________________________________________________*/
1254
1255 static void DOT_ScoreCount (DOTMainDataPtr mip, DOTDiagPtr PNTR hitlist, Int4 index)
1256 {
1257 Int4 PNTR score_array, Score, score, unique, i, j;
1258
1259 /* count unique scores */
1260 score=hitlist[0]->score;
1261 i=1;
1262 unique=1;
1263
1264 while (i<index)
1265 {
1266 if (score>(Score=hitlist[i]->score))
1267 {
1268 score=Score;
1269 unique++;
1270 }
1271 i++;
1272 }
1273
1274 mip->unique=unique;
1275
1276 if (unique<=1)
1277 return;
1278
1279 score_array=(Int4Ptr)MemNew(sizeof(Int4)*unique);
1280
1281 j=1;
1282 i=1;
1283 score=hitlist[0]->score;
1284 score_array[0]=0;
1285
1286 while (i<index)
1287 {
1288 if (score>(Score=hitlist[i]->score))
1289 {
1290 score=Score;
1291 score_array[j]=i;
1292 j++;
1293 }
1294 i++;
1295 }
1296
1297 mip->score_array=score_array;
1298
1299 }
1300
1301 /*_________________________________________(DOT_BuildHitList)_____
1302
1303
1304 Purpose : Build and sort hitlist array, count number of unique scores for threshold ramp.
1305
1306 ____________________________________________________________________*/
1307 Int2 DOT_BuildHitList(DOTMainDataPtr mip, Boolean sort, Boolean countUniqueScores)
1308 {
1309 DOTDiagPtr PNTR hitlist;
1310 Int4 index;
1311
1312
1313 DOT_ComputeHits(mip);
1314
1315 if (mip->hitlist==NULL)
1316 return -1; /* no hits */
1317
1318
1319 /* sort results */
1320 if (sort)
1321 {
1322 hitlist = mip->hitlist;
1323 index = mip->index;
1324 DOT_SortHitlist (hitlist, index);
1325 }
1326 if (countUniqueScores)
1327 DOT_ScoreCount(mip, hitlist, index);
1328
1329 return 0;
1330 }
1331
1332
1333
1334
1335 /*_________________________________________(Get_ProtResidues)_____
1336
1337 Purpose : Fill protein sequence buffers.
1338
1339 ____________________________________________________________________*/
1340 static Boolean DOT_GetPResidues (DOTMainDataPtr mip, Boolean is_byLoc)
1341 {
1342
1343 Int4 i;
1344 SeqPortPtr qspp, sspp;
1345 Uint1Ptr qseq, sseq, temp_seq1, temp_seq2;
1346 Int4 buf_len;
1347 Int4 qlen, slen;
1348 Int2 ctr;
1349 Uint1Ptr sbuffer, qbuffer;
1350
1351
1352 /* initialize buffers */
1353 qbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101);
1354 sbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101);
1355
1356 MemSet((Pointer)qbuffer, '\0', sizeof(Char)*101);
1357 MemSet((Pointer)qbuffer, '\0', sizeof(Char)*101);
1358
1359 qlen=mip->qlen;
1360 slen=mip->slen;
1361
1362 temp_seq1=NULL;
1363 temp_seq2=NULL;
1364
1365
1366 if (!(qseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(qlen)))) return FALSE;
1367
1368 if (!(sseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(slen)))) return FALSE;
1369
1370 if (is_byLoc)
1371 {
1372 qspp = SeqPortNewByLoc(mip->qslp, Seq_code_ncbistdaa);
1373 sspp = SeqPortNewByLoc(mip->sslp, Seq_code_ncbistdaa);
1374 }
1375 else
1376 {
1377 qspp = SeqPortNew (mip->qbsp, MIN(mip->q_start, mip->q_stop), MAX(mip->q_start, mip->q_stop), 0, Seq_code_ncbistdaa);
1378 sspp = SeqPortNew (mip->sbsp, mip->s_start, mip->s_stop, 0, Seq_code_ncbistdaa);
1379 }
1380
1381 if (qspp == NULL || sspp == NULL)
1382 {
1383 ErrPostEx (SEV_ERROR, 0, 0, "%s", "DOT- Failed on SeqPortNew");
1384 return FALSE;
1385 }
1386
1387 temp_seq2 = sseq;
1388 temp_seq1 = qseq;
1389
1390 do
1391 {
1392 ctr = SeqPortRead(qspp, qbuffer, 100);
1393
1394 if (ctr > 0)
1395 {
1396 i = 0;
1397 buf_len = ctr;
1398 while (buf_len > 0)
1399 {
1400 *temp_seq1 = qbuffer[i];
1401
1402 temp_seq1++;
1403 i++;
1404 buf_len--;
1405 }
1406 }
1407 } while (ctr != SEQPORT_EOF * -1);
1408
1409
1410 do
1411 {
1412 ctr = SeqPortRead(sspp, sbuffer, 100);
1413
1414 if (ctr > 0)
1415 {
1416 i = 0;
1417 buf_len = ctr;
1418 while (buf_len > 0)
1419 {
1420 *temp_seq2 = sbuffer[i];
1421 temp_seq2++;
1422 i++;
1423 buf_len--;
1424 }
1425 }
1426 } while (ctr != SEQPORT_EOF * -1);
1427
1428 SeqPortFree(sspp);
1429 SeqPortFree(qspp);
1430
1431
1432 mip->sseq=sseq;
1433 mip->qseq=qseq;
1434 return TRUE;
1435 }
1436
1437
1438
1439 /*_________________________________________(GetNResidues)_____
1440
1441 Purpose : Fill nucleotide sequence buffers.
1442
1443 ____________________________________________________________________*/
1444
1445 static Boolean DOT_GetNResidues (DOTMainDataPtr mip, Boolean is_byLoc)
1446 {
1447 Int4 i;
1448 SeqPortPtr qspp, sspp;
1449 Uint1Ptr qseq, sseq, temp_seq1, temp_seq2;
1450 Int4 qlen, slen;
1451 Int4 buf_len;
1452 Int2 ctr;
1453 Uint1Ptr sbuffer, qbuffer;
1454
1455 /* initialize buffers */
1456 qbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101);
1457 sbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101);
1458
1459 MemSet((Pointer)qbuffer, '\0', sizeof(Char)*101);
1460 MemSet((Pointer)sbuffer, '\0', sizeof(Char)*101);
1461
1462
1463 mip->matrix = DOT_DNAScoringMatrix(-3, 1, 4);
1464 qlen=mip->qlen;
1465 slen=mip->slen;
1466
1467 temp_seq1 = NULL;
1468 temp_seq2 = NULL;
1469
1470 if (!(qseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(qlen)))) return FALSE;
1471 if (!(sseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(slen)))) return FALSE;
1472
1473 if (is_byLoc){
1474 qspp = SeqPortNewByLoc(mip->qslp, Seq_code_ncbi2na);
1475 sspp = SeqPortNewByLoc(mip->sslp, Seq_code_ncbi2na);
1476 }
1477 else
1478 {
1479 qspp = SeqPortNew (mip->qbsp, MIN(mip->q_start, mip->q_stop), MAX(mip->q_start, mip->q_stop), 0, Seq_code_ncbi2na);
1480 sspp = SeqPortNew (mip->sbsp, mip->s_start, mip->s_stop, 0, Seq_code_ncbi2na);
1481 }
1482
1483 if (qspp == NULL || sspp == NULL)
1484 {
1485 ErrPostEx (SEV_ERROR, 0, 0, "%s", "DOT- Failed on SeqPortNew");
1486 return FALSE;
1487 }
1488 temp_seq1 = qseq;
1489 do
1490 {
1491 ctr = SeqPortRead(qspp, qbuffer, 100);
1492
1493 if (ctr > 0)
1494 {
1495 i = 0;
1496 buf_len = ctr;
1497 while (buf_len > 0)
1498 {
1499 *temp_seq1 = (Uint1)qbuffer[i];
1500 temp_seq1++;
1501 i++;
1502 buf_len--;
1503 }
1504 }
1505 } while (ctr != SEQPORT_EOF * -1);
1506
1507 temp_seq2 = sseq;
1508 do
1509 {
1510 ctr = SeqPortRead(sspp, sbuffer, 100);
1511
1512 if (ctr > 0)
1513 {
1514 i = 0;
1515 buf_len = ctr;
1516 while (buf_len > 0)
1517 {
1518 *temp_seq2 = (Uint1)sbuffer[i];
1519 temp_seq2++;
1520 i++;
1521 buf_len--;
1522 }
1523 }
1524 } while (ctr != SEQPORT_EOF * -1);
1525
1526
1527 SeqPortFree(sspp);
1528 SeqPortFree(qspp);
1529 if (qbuffer) MemFree(qbuffer);
1530 if (sbuffer) MemFree(sbuffer);
1531 mip->qseq = qseq;
1532 mip->sseq = sseq;
1533 return TRUE;
1534 }
1535
1536
1537 /*_________________________________________(DOT_GetSeqsbyLoc)_____
1538
1539 Purpose : Calls functions to fill protein or nucleotide buffers.
1540
1541 ____________________________________________________________________*/
1542 static Boolean DOT_GetSeqsbyLoc (DOTMainDataPtr mip)
1543 {
1544 Char q_idbuf[42]={""}, s_idbuf[42]={""};
1545
1546 /* mip->sname = (CharPtr)MemNew(42*sizeof(Char)); */
1547 /* mip->qname = (CharPtr)MemNew(42*sizeof(Char)); */
1548 /*
1549 FastaId (mip->qbsp, idbuf, 32);
1550 sprintf (mip->qname, "%s",idbuf);
1551 FastaId (mip->sbsp, idbuf, 32);
1552 sprintf (mip->sname, "%s",idbuf);
1553 */
1554 /* qsip = SeqLocId(mip->qslp); */
1555 /* qId = SeqIdFindBestAccession(qsip); */
1556 /* ssip = SeqLocId(mip->sslp); */
1557 /* sId = SeqIdFindBestAccession(ssip); */
1558 SeqIdWrite(mip->qbsp->id, q_idbuf,PRINTID_FASTA_SHORT, 41);
1559 SeqIdWrite(mip->sbsp->id, s_idbuf,PRINTID_FASTA_SHORT, 41);
1560 mip->qname=StringSave(q_idbuf);
1561 mip->sname=StringSave(s_idbuf);
1562
1563 /* printf ("\nLengths: \n %s(query_seq):%d\n %s(subject_seq):%d\n", mip->qname, mip->qlen, mip->sname, mip->slen); */
1564
1565
1566
1567 if (is_na)
1568 {
1569 if (!DOT_GetNResidues(mip, TRUE))
1570 return FALSE;
1571 }
1572 else
1573 {
1574 if (!DOT_GetPResidues(mip, TRUE))
1575 return FALSE;
1576 }
1577 return TRUE;
1578 }
1579
1580 /*_________________________________________(DOT_GetSeqs)_____
1581
1582 Purpose : Calls functions to fill protein or nucleotide buffers.
1583
1584 ____________________________________________________________________*/
1585 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom)
1586 {
1587 Char q_idbuf[42]={""}, s_idbuf[42]={""};
1588
1589
1590 if (is_zoom)
1591 {
1592 if (mip->qseq) mip->qseq=MemFree(mip->qseq);
1593 if (mip->sseq) mip->sseq=MemFree(mip->sseq);
1594 }
1595 else
1596 {
1597 /* mip->sname = (CharPtr)MemNew(42*sizeof(Char)); */
1598 /* mip->qname = (CharPtr)MemNew(42*sizeof(Char)); */
1599 SeqIdWrite(mip->qbsp->id, q_idbuf,PRINTID_FASTA_SHORT, 41);
1600 SeqIdWrite(mip->sbsp->id, s_idbuf,PRINTID_FASTA_SHORT, 41);
1601 mip->qname=StringSave(q_idbuf);
1602 mip->sname=StringSave(s_idbuf);
1603
1604 /* FastaId (mip->qbsp, idbuf, 32); */
1605 /* sprintf (mip->qname, "%s",idbuf); */
1606 /* FastaId (mip->sbsp, idbuf, 32); */
1607 /* sprintf (mip->sname, "%s",idbuf); */
1608 }
1609
1610 if (is_na)
1611 {
1612 if (!DOT_GetNResidues(mip, FALSE))
1613 return FALSE;
1614 }
1615 else
1616 {
1617 if (!DOT_GetPResidues(mip, FALSE))
1618 return FALSE;
1619 }
1620
1621 return TRUE;
1622
1623 }
1624
1625
1626 /*_______________________________________________(DOT_FreeHitPtrArray)___
1627
1628 Purpose : Free memory function for hits array.
1629
1630 ____________________________________________________________________*/
1631 Int2 DOT_FreeHitsArray (DOTDiagPtr PNTR hitlist, Int4 index)
1632 {
1633 Int4 i;
1634 DOTDiagPtr hit;
1635
1636 for(i = 0; i < index; i++)
1637 {
1638 hit=hitlist[i];
1639 if (hit) MemFree(hit);
1640 }
1641
1642 if (hitlist) hitlist=MemFree(hitlist);
1643
1644 if (hitlist!=NULL)
1645 {
1646 ErrPostEx (SEV_WARNING, 0, 0, "%s", "hitlist not empty");
1647 return -1;
1648 }
1649 return 0;
1650 }
1651
1652 /*_______________________________________________(DOT_FreeMainInfo)___
1653
1654 Purpose : Free memory function for main info ptr. Clears ONLY hitlist and sequence buffers.
1655
1656 ____________________________________________________________________*/
1657 Int2 DOT_FreeMainInfo(DOTMainDataPtr mip)
1658 {
1659 if (mip->hitlist) DOT_FreeHitsArray(mip->hitlist, mip->index);
1660 if (mip->score_array) mip->score_array=MemFree(mip->score_array);
1661 if (mip->qseq) mip->qseq=MemFree(mip->qseq);
1662 if (mip->sseq) mip->sseq=MemFree(mip->sseq);
1663
1664 return 0;
1665 }
1666
1667 /*_______________________________________________(DOT_FreeMainInfoPtrEx)___
1668
1669 Purpose : Free memory function for main info ptr. Clears all.
1670
1671 ____________________________________________________________________*/
1672 Int2 DOT_FreeMainInfoPtrEx (DOTMainDataPtr mip)
1673 {
1674
1675 DOT_FreeMainInfo(mip);
1676
1677 if (mip->is_na && mip->matrix)
1678 Free(mip->matrix);
1679 if (mip->qslp) SeqLocFree(mip->qslp);
1680 if (mip->sslp) SeqLocFree(mip->sslp);
1681 /* if (mip->qbsp) BioseqFree(mip->qbsp); */
1682 /* if (mip->sbsp) BioseqFree(mip->sbsp); */
1683
1684 if (mip) MemFree(mip);
1685
1686 return 0;
1687 }
1688
1689 /*____________________________________________(DOT_InitTheRest)____________
1690
1691
1692 Purpose : Initialize function for DOTMainDataPtr used by InitMainInfo and InitMainInfobyLoc.
1693
1694 ____________________________________________________________________*/
1695
1696
1697 static void DOT_InitTheRest(DOTMainDataPtr mip, Int4 word_size,Int4 tree_limit)
1698 {
1699
1700 if (ISA_aa (mip->qbsp->mol) && ISA_aa (mip->sbsp->mol))
1701 {
1702 mip->is_na = FALSE;
1703 is_na=FALSE;
1704 if (word_size < 4 || word_size > 11)
1705 word_size=2;
1706 }
1707 else if (ISA_na(mip->qbsp->mol) && ISA_na (mip->sbsp->mol))
1708 {
1709 mip->is_na = TRUE;
1710 is_na=TRUE;
1711 if (word_size < 1 || word_size > 2)
1712 word_size=8;
1713 }
1714
1715 mip->word_size=word_size;
1716
1717 if (tree_limit==0)
1718 mip->tree_limit=10200; /* default */
1719 else
1720 mip->tree_limit=tree_limit;
1721
1722 mip->cutoff_score=0;
1723 mip->first_pass=TRUE;
1724 mip->unique=0;
1725 mip->index=0;
1726 }
1727
1728
1729 /*____________________________________________(DOT_InitMainInfo)____________
1730
1731
1732 Purpose : Initialize DOTMainDataPtr for main window.
1733 **** use bsp with the shorter seq length as 'qbsp' for max speed ****
1734
1735 ____________________________________________________________________*/
1736
1737 extern DOTMainDataPtr DOT_InitMainInfo (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 word_size, Int4 tree_limit, Int4 q_left, Int4 q_right, Int4 s_left, Int4 s_right)
1738 {
1739
1740 mip->q_start=q_left;
1741 mip->q_stop=q_right;
1742 mip->s_start=s_left;
1743 mip->s_stop=s_right;
1744 mip->qbsp=qbsp;
1745 mip->sbsp=sbsp;
1746 mip->sstrand = Seq_strand_plus;
1747 mip->qstrand = Seq_strand_plus;
1748
1749 if (!((ISA_aa (mip->qbsp->mol) && ISA_aa (mip->sbsp->mol))||(ISA_na(mip->qbsp->mol) && ISA_na (mip->sbsp->mol))))
1750 {
1751 ErrPostEx(SEV_ERROR, 0, 0, "DOT -missmatched sequence types");
1752 return MemFree(mip);
1753 }
1754
1755 DOT_InitTheRest(mip, word_size, tree_limit);
1756
1757 return mip;
1758 }
1759
1760 static BioseqPtr DOT_GetBioseqGivenSeqLoc (SeqLocPtr slp, Uint2 entityID)
1761
1762 {
1763 BioseqPtr bsp=NULL;
1764 SeqEntryPtr sep=NULL;
1765 SeqIdPtr sip=NULL;
1766 SeqLocPtr tmp=NULL;
1767
1768 if (slp == NULL) return NULL;
1769 bsp = NULL;
1770 sip = SeqLocId (slp);
1771 if (sip != NULL) {
1772 bsp = BioseqFind (sip);
1773 } else {
1774 tmp = SeqLocFindNext (slp, NULL);
1775 if (tmp != NULL) {
1776 sip = SeqLocId (tmp);
1777 if (sip != NULL) {
1778 bsp = BioseqFind (sip);
1779 if (bsp != NULL) {
1780 sep = SeqMgrGetSeqEntryForData (bsp);
1781 entityID = ObjMgrGetEntityIDForChoice (sep);
1782 bsp = GetBioseqGivenSeqLoc (slp, entityID);
1783 }
1784 }
1785 }
1786 }
1787 return bsp;
1788 }
1789
1790
1791 static BioseqPtr DOT_GetBioseqReferencedByAnnot (SeqAnnotPtr sap, Uint2 entityID)
1792
1793 {
1794 SeqAlignPtr align;
1795 BioseqPtr bsp;
1796 DenseDiagPtr ddp;
1797 DenseSegPtr dsp;
1798 SeqFeatPtr feat;
1799 SeqGraphPtr graph;
1800 SeqIdPtr sip;
1801 SeqLocPtr slp;
1802 StdSegPtr ssp;
1803 SeqLocPtr tloc;
1804
1805 if (sap == NULL) return NULL;
1806 switch (sap->type) {
1807 case 1 :
1808 feat = (SeqFeatPtr) sap->data;
1809 while (feat != NULL) {
1810 slp = feat->location;
1811 if (slp != NULL) {
1812 bsp = DOT_GetBioseqGivenSeqLoc (slp, entityID);
1813 if (bsp != NULL) return bsp;
1814 }
1815 feat = feat->next;
1816 }
1817 break;
1818 case 2 :
1819 align = (SeqAlignPtr) sap->data;
1820 while (align != NULL) {
1821 if (align->segtype == 1) {
1822 ddp = (DenseDiagPtr) align->segs;
1823 if (ddp != NULL) {
1824 for (sip = ddp->id; sip != NULL; sip = sip->next) {
1825 bsp = BioseqFind (sip);
1826 if (bsp != NULL) return bsp;
1827 }
1828 }
1829 } else if (align->segtype == 2) {
1830 dsp = (DenseSegPtr) align->segs;
1831 if (dsp != NULL) {
1832 for (sip = dsp->ids; sip != NULL; sip = sip->next) {
1833 bsp = BioseqFind (sip);
1834 if (bsp != NULL) return bsp;
1835 }
1836 }
1837 } else if (align->segtype == 3) {
1838 ssp = (StdSegPtr) align->segs;
1839 if (ssp != NULL && ssp->loc != NULL) {
1840 for (tloc = ssp->loc; tloc != NULL; tloc = tloc->next) {
1841 bsp = BioseqFind (SeqLocId (tloc));
1842 if (bsp != NULL) return bsp;
1843 }
1844 }
1845 }
1846 align = align->next;
1847 }
1848 break;
1849 case 3 :
1850 graph = (SeqGraphPtr) sap->data;
1851 while (graph != NULL) {
1852 slp = graph->loc;
1853 if (slp != NULL) {
1854 bsp = DOT_GetBioseqGivenSeqLoc (slp, entityID);
1855 if (bsp != NULL) return bsp;
1856 }
1857 graph = graph->next;
1858 }
1859 break;
1860 default :
1861 break;
1862 }
1863 return NULL;
1864 }
1865
1866 /*____________________________(DOT_AttachSeqAnnotToSeqEntry)_____
1867
1868
1869 Purpose : Attach SeqAnnot structure to SeqEntry
1870 ________________________________________________________________*/
1871
1872 extern Uint2 DOT_AttachSeqAnnotToSeqEntry (Uint2 entityID, SeqAnnotPtr sap, BioseqPtr bsp)
1873
1874 {
1875 Int2 genCode;
1876 OMProcControl ompc;
1877 SeqEntryPtr sep;
1878 SeqFeatPtr sfp = NULL;
1879
1880 if (sap == NULL) return entityID;
1881 if (bsp==NULL)
1882 bsp = DOT_GetBioseqReferencedByAnnot (sap, entityID);
1883
1884 if (bsp == NULL) return entityID;
1885
1886
1887 sep = SeqMgrGetSeqEntryForData (bsp);
1888 entityID = ObjMgrGetEntityIDForChoice (sep);
1889 if (sap->type == 1) {
1890 sfp = (SeqFeatPtr) sap->data;
1891 sep = SeqMgrGetSeqEntryForData(bsp);
1892 /* sep = GetBestTopParentForData (entityID, bsp); */
1893 genCode = SeqEntryToGeneticCode (sep, NULL, NULL, 0);
1894 SetEmptyGeneticCodes (sap, genCode);
1895 }
1896 MemSet ((Pointer) &ompc, 0, sizeof (OMProcControl));
1897 ompc.input_entityID = entityID;
1898 ompc.input_itemID = GetItemIDGivenPointer (entityID, OBJ_BIOSEQ, (Pointer) bsp);
1899 ompc.input_itemtype = OBJ_BIOSEQ;
1900 ompc.output_itemtype = OBJ_SEQANNOT;
1901 ompc.output_data = (Pointer) sap;
1902 if (! AttachDataForProc (&ompc, FALSE)) {
1903 Message (MSG_ERROR, "DOT_AttachSeqAnnotToSeqEntry failed");
1904 } else if (sfp != NULL) {
1905 PromoteXrefs (sfp, bsp, entityID);
1906 }
1907
1908 return entityID;
1909 }
1910
1911
1912 /*____________________________________________(DOT_InitMainInfobyLoc)____________
1913
1914
1915 Purpose : Initialize DOTMainDataPtr for main window.
1916 **** use slp with the shorter seq length as 'slp1' for max speed ****
1917 ____________________________________________________________________*/
1918
1919 static DOTMainDataPtr DOT_InitMainInfobyLoc (DOTMainDataPtr mip, SeqLocPtr slp1, SeqLocPtr slp2, Int4 word_size, Int4 tree_limit)
1920 {
1921 Int4 slen;
1922 Int4 qlen;
1923 SeqLocPtr qslp, sslp;
1924 SeqIdPtr qsip, ssip;
1925
1926 if (mip == NULL || slp1 == NULL || slp2 == NULL) return MemFree (mip);
1927 qlen=SeqLocLen(slp1);
1928 slen=SeqLocLen(slp2);
1929
1930 qslp=slp1;
1931 sslp=slp2;
1932
1933 mip->qstrand = SeqLocStrand(qslp);
1934 mip->sstrand = SeqLocStrand(sslp);
1935
1936 qsip = SeqLocId (qslp);
1937 if (qsip == NULL) {
1938 return MemFree (mip);
1939 }
1940 ssip = SeqLocId (sslp);
1941 if (ssip == NULL) {
1942 return MemFree (mip);
1943 }
1944
1945 mip->qbsp = BioseqFind(SeqLocId(qslp));
1946 if (mip->qbsp == NULL) {
1947 return MemFree (mip);
1948 }
1949 mip->sbsp = BioseqFind(SeqLocId(sslp));
1950 if (mip->sbsp == NULL) {
1951 return MemFree (mip);
1952 }
1953
1954 if (!((ISA_aa (mip->qbsp->mol) && ISA_aa (mip->sbsp->mol))||(ISA_na(mip->qbsp->mol) && ISA_na (mip->sbsp->mol))))
1955 {
1956 ErrPostEx(SEV_ERROR, 0, 0, "DOT -missmatched sequence types");
1957 return MemFree(mip);
1958 }
1959
1960
1961 /* mip start and stop are in bioseq coordinates */
1962 mip->q_start = GetOffsetInBioseq (qslp, mip->qbsp, SEQLOC_LEFT_END);
1963 mip->s_start = GetOffsetInBioseq (sslp, mip->sbsp, SEQLOC_LEFT_END);
1964 mip->q_stop = GetOffsetInBioseq (qslp, mip->qbsp, SEQLOC_RIGHT_END);
1965 mip->s_stop = GetOffsetInBioseq (sslp, mip->sbsp, SEQLOC_RIGHT_END);
1966
1967
1968 mip->qslp=qslp;
1969 mip->sslp=sslp;
1970 mip->qlen=qlen;
1971 mip->slen=slen;
1972
1973
1974
1975 DOT_InitTheRest(mip, word_size, tree_limit);
1976 return mip;
1977 }
1978
1979 /*____________________________________________(DOT_ComputeDotPlot)__________
1980
1981 Purpose : Process input sequence information.
1982
1983 ____________________________________________________________________*/
1984 static Boolean DOT_ComputeDotPlot (DOTMainDataPtr mip)
1985 {
1986
1987
1988 DOT_CalculateMatrix_MinMax(mip);
1989
1990 if (DOT_BuildHitList(mip, TRUE, TRUE)<0)
1991 {
1992 ErrPostEx (SEV_WARNING, 0, 0, "%s", "DOT- no matches found");
1993 return FALSE; /* no hits */
1994 }
1995 return TRUE;
1996 }
1997
1998 /*____________________________________________(DOT_SPI_FindBestAlnByDotPlot)__________
1999
2000 Purpose : Sarah's function to find the best alignment.
2001
2002 ____________________________________________________________________*/
2003
2004 SeqAlignPtr DOT_SPI_FindBestAlnByDotPlot(SeqLocPtr slp1, SeqLocPtr slp2, Int4 wordsize, Int4 num_hits)
2005 {
2006 DOTDiagPtr ddp;
2007 DenseSegPtr dsp;
2008 Int4 i;
2009 DOTMainDataPtr mip;
2010 SeqAlignPtr sap;
2011 SeqAlignPtr sap_head;
2012 SeqAlignPtr sap_prev;
2013 ScorePtr scp;
2014 Int4 start1;
2015 Int4 start2;
2016 Uint1 strand;
2017
2018 mip = DOT_CreateAndStorebyLoc (slp1, slp2, wordsize, num_hits);
2019 sap = sap_head = sap_prev = NULL;
2020 if (mip == NULL || mip->hitlist == NULL)
2021 return NULL;
2022 i = 0;
2023 ddp = *mip->hitlist;
2024 start1 = SeqLocStart(slp1);
2025 start2 = SeqLocStart(slp2);
2026 strand = SeqLocStrand(slp2);
2027 while (ddp != NULL && i < mip->index)
2028 {
2029 ddp = mip->hitlist[i];
2030 i++;
2031 sap = SeqAlignNew();
2032 dsp = DenseSegNew();
2033 sap->type = SAT_PARTIAL;
2034 sap->segtype = SAS_DENSEG;
2035 sap->dim = 2;
2036 dsp->dim = 2;
2037 dsp->numseg = 1;
2038 dsp->ids = SeqIdDup(SeqLocId(slp1));
2039 dsp->ids->next = SeqIdDup(SeqLocId(slp2));
2040 dsp->strands = (Uint1Ptr)MemNew(2*sizeof(Uint1));
2041 dsp->strands[0] = SeqLocStrand(slp1);
2042 dsp->strands[1] = SeqLocStrand(slp2);
2043 dsp->starts = (Int4Ptr)MemNew(2*sizeof(Int4));
2044 dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
2045 dsp->starts[0] = ddp->q_start;
2046 if (dsp->strands[1] == Seq_strand_minus)
2047 dsp->starts[1] = ddp->s_start - ddp->length + 1;
2048 else
2049 dsp->starts[1] = ddp->s_start;
2050 if (ddp->length > SeqLocLen(slp2))
2051 dsp->lens[0] = SeqLocLen(slp2);
2052 else
2053 dsp->lens[0] = ddp->length - 1;
2054 scp = ScoreNew();
2055 scp->id = ObjectIdNew();
2056 scp->id->str = StringSave("score");
2057 scp->choice = 1;
2058 scp->value.intvalue = ddp->score;
2059 dsp->scores = scp;
2060 sap->segs = (Pointer)(dsp);
2061 if (sap_head != NULL)
2062 {
2063 sap_prev->next = sap;
2064 sap_prev = sap;
2065 } else
2066 sap_head = sap_prev = sap;
2067 }
2068 if (sap_head == NULL)
2069 return NULL;
2070 AlnMgrIndexSeqAlign(sap_head);
2071 AlnMgrMakeMultipleByScore(sap_head);
2072 AlnMgrDeleteHidden(sap_head, FALSE);
2073 sap = (SeqAlignPtr)(sap_head->segs);
2074 sap_head->segs = NULL;
2075 SeqAlignFree(sap_head);
2076 return sap;
2077 }
2078
2079 /*____________________________________________(DOT_CreateAndStorebyLoc)__________
2080
2081 Purpose : Runs Dotplot functions, does plus - minus strands.
2082
2083 ____________________________________________________________________*/
2084 DOTMainDataPtr DOT_CreateAndStorebyLoc (SeqLocPtr slp1, SeqLocPtr slp2, Int4 word_size, Int4 num_hits)
2085 {
2086 DOTMainDataPtr mip;
2087
2088 if (slp1 == NULL || slp2 == NULL || word_size == 0 || num_hits == 0 || SeqLocLen(slp1)<word_size || SeqLocLen(slp2)<word_size )
2089 return NULL;
2090
2091 mip=(DOTMainDataPtr)MemNew(sizeof(DOTMainData));
2092 if (!(mip=DOT_InitMainInfobyLoc(mip, slp1, slp2, word_size, num_hits)))
2093 return NULL;
2094
2095 if (!DOT_GetSeqsbyLoc(mip))
2096 return NULL;
2097 if (!DOT_ComputeDotPlot(mip))
2098 return NULL;
2099
2100 return mip;
2101
2102 }
2103
2104 /*____________________________________________(DOT_CreateAndStore)__________
2105
2106 Purpose : Runs Dotplot functions.
2107
2108 ____________________________________________________________________*/
2109 DOTMainDataPtr DOT_CreateAndStore (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 q_start, Int4 q_stop, Int4 s_start, Int4 s_stop, Int4 word_size, Int4 num_hits, Boolean initialize)
2110 {
2111 if (mip==NULL) return NULL;
2112
2113 if (mip==NULL || qbsp == NULL || sbsp == NULL || word_size == 0 || num_hits == 0 || qbsp->length<word_size || sbsp->length<word_size )
2114 return NULL;
2115
2116 if (initialize)
2117 if (!(mip=DOT_InitMainInfo(mip, qbsp, sbsp, word_size, num_hits, 0, qbsp->length-1, 0, sbsp->length-1)))
2118 return NULL;
2119 if (!DOT_GetSeqs(mip, FALSE))
2120 return NULL;
2121 if (!DOT_ComputeDotPlot(mip))
2122 return NULL;
2123
2124 return mip;
2125
2126 }
2127
2128 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |