NCBI C Toolkit Cross Reference

C/tools/dotseq.c


  1 static char const rcsid[] = "$Id: dotseq.c,v 6.14 2006/07/13 17:16:20 bollin Exp $";
  2 
  3 /* dotSeq.c */
  4 /* ===========================================================================
  5 *
  6 *                            PUBLIC DOMAIN NOTICE
  7 *            National Center for Biotechnology Information (NCBI)
  8 *
  9 *  This software/database is a "United States Government Work" under the
 10 *  terms of the United States Copyright Act.  It was written as part of
 11 *  the author's official duties as a United States Government employee and
 12 *  thus cannot be copyrighted.  This software/database is freely available
 13 *  to the public for use. The National Library of Medicine and the U.S.
 14 *  Government do not place any restriction on its use or reproduction.
 15 *  We would, however, appreciate having the NCBI and the author cited in
 16 *  any work or product based on this material.
 17 *
 18 *  Although all reasonable efforts have been taken to ensure the accuracy
 19 *  and reliability of the software and data, the NLM and the U.S.
 20 *  Government do not and cannot warrant the performance or results that
 21 *  may be obtained by using this software or data. The NLM and the U.S.
 22 *  Government disclaim all warranties, express or implied, including
 23 *  warranties of performance, merchantability or fitness for any particular
 24 *  purpose.
 25 *
 26 * ===========================================================================
 27 *
 28 * File Name:  dotseq.c
 29 *
 30 * Author:  Fasika Aklilu
 31 *
 32 * Version Creation Date:   8/9/01
 33 *
 34 * $Revision: 6.14 $
 35 *
 36 * File Description: computes local alignments for dot matrix
 37 *
 38 * Modifications:  
 39 * --------------------------------------------------------------------------
 40 * Date     Name        Description of modification
 41 * -------  ----------  -----------------------------------------------------
 42 
 43 $Revision: 6.14 $
 44 $Log: dotseq.c,v $
 45 Revision 6.14  2006/07/13 17:16:20  bollin
 46 use Uint4 instead of Uint2 for itemID values
 47 removed unused variables
 48 
 49 Revision 6.13  2004/03/17 20:33:48  bollin
 50 In DOT_InitMainInfobyLoc, exit function and return NULL if unable to determine
 51 ID or Bioseq from SeqLoc provided.  This change averts a core dump.
 52 
 53 Revision 6.12  2003/09/17 20:55:12  kskatz
 54 Resetting to the way it was in revision 6.10; accidently checked in a kludge meant to be used only locally in DOT_GetNResidues
 55 
 56 Revision 6.11  2003/09/17 20:39:01  kskatz
 57 Commented out a temporary fix in SPI_AlignInWindows() [line 3880]
 58 
 59 Revision 6.10  2003/05/30 17:25:36  coulouri
 60 add rcsid
 61 
 62 Revision 6.9  2003/02/07 21:21:50  kans
 63 DOT_BuildPLookup was erroneously calling MemFree on elements of array
 64 
 65 Revision 6.8  2001/08/09 17:21:08  kans
 66 include alignmgr.h to avoid Mac compile error
 67 
 68 Revision 6.7  2001/08/09 16:31:49  aklilu
 69 added revision
 70 
 71 Revision 6.6  2001/01/19 20:11:31  kans
 72 minor changes to work with MacOS X compiler (contributed by William Van Etten)
 73 
 74 Revision 6.5  2000/09/19 21:36:09  kans
 75 commented out printf
 76 
 77 Revision 6.4  2000/08/07 13:29:23  sicotte
 78 Fix printf (long) casts
 79 
 80 Revision 6.3  2000/07/26 18:23:09  aklilu
 81 added DOT_SPI_FindBestAlnByDotPlotEx, to return best alignments
 82 
 83 
 84 */
 85 
 86 #include <dotseq.h>
 87 #include <alignmgr.h>
 88 
 89 /****************************************************************************
 90 
 91       GLOBAL VARIABLES                                                               
 92  ***************************************************************************/
 93 
 94  
 95   
 96 static Int4  DOTblosum62[24][24] = {
 97   4, -1, -2, -2,  0, -1, -1,  0, -2, -1, -1, -1, -1, -2, -1,  1,  0, -3, -2,  0, -2, -1,  0, -4,
 98  -1,  5,  0, -2, -3,  1,  0, -2,  0, -3, -2,  2, -1, -3, -2, -1, -1, -3, -2, -3, -1,  0, -1, -4,
 99  -2,  0,  6,  1, -3,  0,  0,  0,  1, -3, -3,  0, -2, -3, -2,  1,  0, -4, -2, -3,  3,  0, -1, -4,
100  -2, -2,  1,  6, -3,  0,  2, -1, -1, -3, -4, -1, -3, -3, -1,  0, -1, -4, -3, -3,  4,  1, -1, -4,
101   0, -3, -3, -3,  9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4,
102  -1,  1,  0,  0, -3,  5,  2, -2,  0, -3, -2,  1,  0, -3, -1,  0, -1, -2, -1, -2,  0,  3, -1, -4,
103  -1,  0,  0,  2, -4,  2,  5, -2,  0, -3, -3,  1, -2, -3, -1,  0, -1, -3, -2, -2,  1,  4, -1, -4,
104   0, -2,  0, -1, -3, -2, -2,  6, -2, -4, -4, -2, -3, -3, -2,  0, -2, -2, -3, -3, -1, -2, -1, -4,
105  -2,  0,  1, -1, -3,  0,  0, -2,  8, -3, -3, -1, -2, -1, -2, -1, -2, -2,  2, -3,  0,  0, -1, -4,
106  -1, -3, -3, -3, -1, -3, -3, -4, -3,  4,  2, -3,  1,  0, -3, -2, -1, -3, -1,  3, -3, -3, -1, -4,
107  -1, -2, -3, -4, -1, -2, -3, -4, -3,  2,  4, -2,  2,  0, -3, -2, -1, -2, -1,  1, -4, -3, -1, -4,
108  -1,  2,  0, -1, -3,  1,  1, -2, -1, -3, -2,  5, -1, -3, -1,  0, -1, -3, -2, -2,  0,  1, -1, -4,
109  -1, -1, -2, -3, -1,  0, -2, -3, -2,  1,  2, -1,  5,  0, -2, -1, -1, -1, -1,  1, -3, -1, -1, -4,
110  -2, -3, -3, -3, -2, -3, -3, -3, -1,  0,  0, -3,  0,  6, -4, -2, -2,  1,  3, -1, -3, -3, -1, -4,
111  -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4,  7, -1, -1, -4, -3, -2, -2, -1, -2, -4,
112   1, -1,  1,  0, -1,  0,  0,  0, -1, -2, -2,  0, -1, -2, -1,  4,  1, -3, -2, -2,  0,  0,  0, -4,
113   0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1,  1,  5, -2, -2,  0, -1, -1,  0, -4,
114  -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1,  1, -4, -3, -2, 11,  2, -3, -4, -3, -2, -4,
115  -2, -2, -2, -3, -2, -1, -2, -3,  2, -1, -1, -2, -1,  3, -3, -2, -2,  2,  7, -1, -3, -2, -1, -4,
116   0, -3, -3, -3, -1, -2, -2, -3, -3,  3,  1, -2,  1, -1, -2, -2,  0, -3, -1,  4, -3, -2, -1, -4,
117  -2, -1,  3,  4, -3,  0,  1, -1,  0, -3, -4,  0, -3, -3, -2,  0, -1, -4, -3, -3,  4,  1, -1, -4,
118  -1,  0,  0,  1, -3,  3,  4, -2,  0, -3, -3,  1, -1, -3, -1,  0, -1, -3, -2, -2,  1,  4, -1, -4,
119   0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2,  0,  0, -2, -1, -1, -1, -1, -1, -4,
120  -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,  1
121  };
122  
123 static Int4  DOTNCBIstdaaToBLOSUM [] =  {23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, 22, 19, 21, 22 , 23};
124 
125 static Int4        trim_size=0;
126 static Boolean     is_na;
127 
128 /****************************************************************************
129 
130       STATIC FUNCTIONS                                                               
131  ***************************************************************************/
132 
133 
134 static Boolean DOT_GetSeqsbyLoc(DOTMainDataPtr mip);
135 static Boolean DOT_ComputeDotPlot (DOTMainDataPtr mip);
136 
137 
138 
139 
140 /*_____________________________________(DOT_DNAScoringMatrix)_______
141 
142 
143   Purpose : Create DNA scoring matrix.
144 
145 ____________________________________________________________________*/
146 
147 extern Int4Ptr PNTR   DOT_DNAScoringMatrix(Int4 mismatch, Int4 reward,Int4 alsize){
148   Int4Ptr PNTR alignMatrix;
149   Int4 i,j,k;
150   Int4 nbase1,nbase2;
151   Int4 base1[4],base2[4];
152   Int4 nmatches;
153   /* alsize = alphabet size.. should be a 4 or 16. */
154   /* If "ncbi'ized" the codes.. could make matrix this way,
155      then use the ncbi translation tools 
156      */
157   if(alsize !=4 && alsize!=16) {
158     ErrPostEx(SEV_WARNING,0,0,"DNAScoringMatrix: No ambiguity codes for alphabet size=%d\n",alsize);
159   }
160   alignMatrix= (Int4Ptr PNTR) Calloc(alsize*(alsize+1),
161                                      MAX(sizeof(Int4),sizeof(Int4Ptr)));
162   for(i=0;i<alsize;i++) {
163     *(alignMatrix+i)=(Int4Ptr) (alignMatrix+(i+1)*alsize);
164   }
165 
166   for(i=0;i<alsize;i++) {
167     for(j=0;j<alsize;j++) {
168       alignMatrix[i][j]=mismatch;
169     }
170   }
171   for(i=0;i<alsize;i++)
172       alignMatrix[i][i]=reward;
173   if(alsize!=16) 
174       return alignMatrix;
175 
176   /* For ncbi4na. "presence: of bases is Bit-encoded A=1,C=2,G=4,T=8"*/
177   /* Score gaps as mismatches... should not be there */
178   /*   for(i=0;i<alsize;i++) alignMatrix[0][i]=alignMatrix[i][0]=mismatch; */
179 
180   for(i=0;i<alsize;i++) {
181     base1[0] = i&1;
182     base1[1] = (i>>1)&1;
183     base1[2] = (i>>2)&1;
184     base1[3] = (i>>3)&1;
185     nbase1 = base1[0]+base1[1]+base1[2]+base1[3];
186     for(j=0;j<alsize;j++) {
187       base2[0] = j&1;
188       base2[1] = (j>>1)&1;
189       base2[2] = (j>>2)&1;
190       base2[3] = (j>>3)&1;
191       nbase2 = base2[0]+base2[1]+base2[2]+base2[3];
192       nmatches=0;
193       for(k=0;k<4;k++) nmatches += (base1[k]&base2[k]);  /* matches */
194       if(nbase1==0 || nbase2==0) {
195         alignMatrix[i][j]=mismatch; /*gaps */
196       } else {
197         alignMatrix[i][j] =(reward*nmatches+(nbase1*nbase2-nmatches)*mismatch)/(nbase1*nbase2);
198       }
199     }
200   }
201   return alignMatrix;
202 }
203 
204 
205 /*____________________________________________(DOT_CalculateScoreMeans)_____
206 
207   Purpose : Calculate max and mean values for matrix vectors.
208 
209 ____________________________________________________________________*/
210 
211 static void DOT_CalculateMatrix_MinMax (DOTMainDataPtr mip)
212 {
213   Int4             temp, sum, q, s, max;
214   Int2             maxscore, minscore;
215   Int4Ptr PNTR     matrix; 
216 
217 
218   matrix = mip->matrix;
219   max=(is_na)?4:24;
220       
221       for (maxscore = minscore = sum = s = 0; s < max; s++)
222           for (q=0; q < max; q++)
223             {
224               if (is_na)
225               temp = matrix[q][s];
226               else
227                 temp = DOTblosum62[DOTNCBIstdaaToBLOSUM[q]][DOTNCBIstdaaToBLOSUM[s]];
228               
229               if (temp < minscore)
230                 minscore = temp;
231             if (temp > maxscore) 
232               maxscore = temp;
233             
234             sum += temp;
235             }
236 
237       mip->maxscore=maxscore;
238       mip->minscore=minscore;
239 
240   return;
241 }
242 
243 /*_______________________________________________________(CompareFunc)_____
244 
245 
246   Purpose : Compare function for hits binary tree.
247 
248 ____________________________________________________________________*/
249 
250 static Int4 CompareFunc(Pointer n1, Pointer n2)
251 {
252   DOTDiagPtr   nd1, nd2;
253 
254   nd1 = (DOTDiagPtr)n1;
255   nd2 = (DOTDiagPtr)n2;
256 
257   if (nd1->score > nd2->score) 
258     return 1;
259   else if (nd1->score < nd2->score)
260     return -1;
261  /*  else if (nd1->length > nd2->length) */
262 /*     return 1; */
263 /*   else if (nd1->length < nd2->length) */
264 /*     return -1; */
265   else if (nd1->rdmKey > nd2->rdmKey)
266     return 1;
267   else if (nd1->rdmKey < nd2->rdmKey)
268     return -1;
269   else
270     return 0;
271 }
272 
273 Pointer   avl_List[MAX_TRIM]={""};
274 Int4      avl_counter1=0;
275 
276 /*___________________________________________________________(DOT_AddToList)
277 
278   Purpose : Add to a temporary array for deletion from binary tree.
279 
280 ____________________________________________________________________*/
281 static Boolean DOT_AddToList (Pointer item)
282 {
283 
284   avl_List[avl_counter1]=item;
285 
286   avl_counter1++;
287 
288   if (avl_counter1>=trim_size)
289     {
290       avl_counter1=0;
291       return FALSE; 
292     }
293   else 
294     return TRUE; 
295 }
296 /*___________________________________________________________(DOT_TrimBinTree)
297   Purpose : Trim hits binary tree if grows > mip->tree_limit.
298 
299 ____________________________________________________________________*/
300 static void DOT_TrimBinTree (Avl_TreePtr tree, Int4Ptr cutoff)
301 {
302   Int2  i;
303   DOTDiagPtr node;
304   
305   Avl_Traverse(tree, DOT_AddToList);
306 
307   for (i=0; i<trim_size; i++)
308     {
309       node=(DOTDiagPtr)avl_List[i];
310       *cutoff=MAX(*cutoff, node->score);
311       node=(DOTDiagPtr)Avl_Delete(tree, (Pointer)node); 
312       if (node) MemFree(node);
313     }
314 }
315 /*_________________________________________(DOT_NewHitNode)_____
316 
317 
318   Purpose : Create new hit node.
319 
320 ____________________________________________________________________*/
321 static DOTDiagPtr DOT_NewHitNode(DOTMainDataPtr mip, Int4 q_left, Int4 s_left, Int4 length, Int4 score)
322 {
323   DOTDiagPtr          node;
324 
325   if (!(node =(DOTDiagPtr)MemNew (sizeof(DOTDiag)))) 
326     {
327       ErrPostEx (SEV_ERROR, 0, 0, "%s", "HIT node allocation failed");
328       return NULL;
329     }
330   node->score=score;
331   node->q_start = mip->q_start + q_left;  
332   node->s_start = mip->s_start + s_left;
333 
334   node->length = length;
335   node->rdmKey=RandomNum();
336   
337   return node;
338 }
339 
340 /*___________________________________________________________(DOT_SaveHit)_____
341 
342 
343   Purpose : Save extended hit.
344 
345 ____________________________________________________________________*/
346 static void DOT_SaveHit (DOTMainDataPtr mip, Int4 score, Int4 q_offset, Int4 s_offset, Int4 length)
347 {
348   DOTDiagPtr          Node;
349   Int4                unique=0;
350   
351 
352 
353   Node=DOT_NewHitNode(mip, q_offset, s_offset, length, score);
354 
355   if (Node == NULL) return;
356 
357   if (mip->first_pass == FALSE) 
358     {
359       if ((int)mip->tree->totalNodes>=mip->tree_limit)
360         DOT_TrimBinTree(mip->tree, &(mip->cutoff_score));
361   
362      if(!Avl_Insert(mip->tree, (Pointer)Node, &unique))
363         if (Node) MemFree(Node);
364     }
365   else /* first pass */
366     {
367       
368       mip->tree =  (Avl_TreePtr)MemNew(sizeof(Avl_Tree));
369       Avl_Initialize(mip->tree, CompareFunc, NULL);
370       mip->tree->root=(Avl_NodePtr)MemNew(sizeof(Avl_Node));
371       mip->tree->root->avl_p=(Pointer)Node;
372       mip->tree->totalNodes++;
373       mip->first_pass=FALSE;
374       if (mip->tree_limit>1000)
375         trim_size=200;
376       else
377         trim_size=(mip->tree_limit*20)/100;
378     }
379   
380 }
381 
382 /*_______________________________________________________(CompareDiags)_____
383 
384   Purpose : Compare function for history binary tree.
385 
386 ____________________________________________________________________*/
387 
388 static Int4 CompareDiags(Pointer n1, Pointer n2)
389 {
390   Int4 diag2, diag1;
391   DOTHistPtr node1, node2;
392 
393   node1 = (DOTHistPtr)n1;
394   node2 = (DOTHistPtr)n2;
395 
396   diag1 = node1->diag_constant;
397   diag2 = node2->diag_constant;
398 
399   if (diag1 == diag2) /* found diag match */
400     return 0;/* found match */
401   else if (diag1 > diag2) 
402     return 1; /* greater */
403   else
404     return -1; /* smaller */  
405 }
406 
407 /*_________________________________________(DOT_NewHistNode)_____
408 
409 
410   Purpose : Create new history node.
411 
412 ____________________________________________________________________*/
413 DOTHistPtr DOT_NewHistNode(Int4 diag, Int4 q_pos)
414 {
415   DOTHistPtr node;
416 
417   if (!(node =(DOTHistPtr)MemNew (sizeof(DOTHist)))) 
418     {
419       ErrPostEx (SEV_WARNING, 0, 0, "%s", "HIST node allocation failed");
420       return NULL;
421     }
422   node->diag_constant = diag;
423   node->q_stop = q_pos;
424   
425   return node;  
426 }
427 
428 
429 /*____________________________________________________________(DOT_ExtendProt)_____
430 
431 
432   Purpose : Protein extend hit function.
433 
434 ____________________________________________________________________*/
435 static Int2 DOT_ExtendProt (DOTMainDataPtr mip, Uint1Ptr queryseq, Uint1Ptr subjectseq, Int4 q_off, Int4 s_off, Int4 diag, Int4 wordsize, DOTHistPtr node)
436 {
437   Uint1Ptr         q_end,s, q, end, start, q_beg;
438   Int4             score, sum, length;
439   Int4             s_left, q_left, s_right, q_right, s_diff;
440   Int4             q_start, s_start;
441   Int4             threshold = 0, x, X; /* test default */
442   Int4             report_len;
443   
444 
445   q_left=q_off;
446   s_left=s_off;
447   x=X=threshold;
448   report_len=1; /*  -arbitrary*/
449 
450   /* initialize positions for left extension */
451   q =queryseq+(q_left-1); /* -1 b/c queryseq & subjectseq are =1 */
452   s = subjectseq+(s_left-1);
453 
454   if (q_off<s_off)
455     start=subjectseq+(s_off-q_off);
456   else
457     start=subjectseq;
458   
459   
460   score = 0;
461   sum = 0;
462 
463   /* extend to the left*/
464   
465   while (s >start) 
466     {
467       if (*q >24 || *s >24 || *q<1 || *s<1) 
468         {
469           q++;
470           s++;
471           break;/* ambiguity found */
472         }
473            
474       if ((sum = DOTblosum62[DOTNCBIstdaaToBLOSUM[*s]][DOTNCBIstdaaToBLOSUM[*q]])>= threshold) 
475         {
476           score += sum;
477           sum=0;
478           if ((x=-score)<X)
479             x=X;
480         }
481       else 
482         if (sum<x)
483           {
484             q++;
485             s++;
486             break;
487           }
488       q--;
489       s--;
490     }
491   
492   q_beg=q;
493   score-=mip->maxscore*wordsize;
494   
495   /* reinitialize positions for right extention */
496   q=queryseq+(q_off-wordsize);
497   s=subjectseq+(s_off-wordsize);
498   q_right = mip->qlen- (q_off-wordsize)-1;
499   s_right = mip->slen- (s_off-wordsize)-1;
500 
501   /* assign ends */
502 
503   if (q_right<s_right)
504     end = s + q_right;
505   else
506     end = s + s_right;
507  
508   /* extend to the right only */
509   
510   while (s < end) 
511     {
512       q++;
513       s++;
514       if (*q > 24 || *s > 24 || *s<1 || *q<1) 
515         {
516           break;/* ambiguity found */
517         }
518       
519       if ((sum = DOTblosum62[DOTNCBIstdaaToBLOSUM[*s]][DOTNCBIstdaaToBLOSUM[*q]])>= threshold) 
520         {
521           score += sum;
522           sum=0;
523           if ((x=-score)<X)
524             x=X;
525         }
526       else 
527         if (sum<x)
528           break;
529     }
530   
531   q_end = q;    
532 
533   length = q_end - q_beg;
534   if (length>=report_len)
535     { 
536       /* check against the lowest score in tree */
537       if (score<mip->cutoff_score)
538         return -1; 
539       
540       s_diff = s_off-q_off;
541       q_start = q_beg- queryseq;
542       length = q_end - q_beg;
543       s_start = q_start+s_diff;
544       
545       /* update node */
546      if (node!=NULL)
547        {
548          node->diag_constant = diag;
549          node->q_stop= q_start+length; 
550        }
551      
552      DOT_SaveHit (mip, score, q_start, s_start, length);
553      return 0;
554     }
555   else 
556     return -1;  /* hit not stored */
557 }
558 
559 /*____________________________________________________________(DOT_ExtendNuc)_____
560 
561 
562   Purpose : Nucleotide extend hit function.
563 
564 ____________________________________________________________________*/
565 static Int2 DOT_ExtendNuc (DOTMainDataPtr mip, Uint1Ptr queryseq, Uint1Ptr subjectseq, Int4 q_off, Int4 s_off, Int4 diag, Int4 wordsize, DOTHistPtr node)
566 {
567   Uint1Ptr         q_end, s, q, end, start, q_beg;
568   Int4             score, sum, length;
569   Int4             s_left, q_left, s_right, s_diff, q_right;
570   Int4Ptr PNTR     matrix;
571   Int4             ex_threshold=0,x,X;
572  
573   matrix = mip->matrix;
574   
575   s_left = s_off/* -wordsize */;
576   q_left=q_off/* -wordsize */;
577   x=X=ex_threshold;
578   /* reportable=4*maxscore;  *//* below lowest possible -store everything*/
579 
580   /* initialize positions for left extension */
581   q=queryseq+ (q_left-1);
582   s=subjectseq+ (s_left-1);
583 
584   /* find the beginning */
585   if (q_off < s_off)
586     start = subjectseq+(s_off-q_off);
587   else
588     start = subjectseq;
589     
590   score=0;
591   sum = 0;
592 
593   /* extend to the left*/
594   
595   while (s >start)   
596     {
597 
598       if (*q > 3 || *s > 3) /* ambiguity found */
599         {
600           q++;
601           s++;
602           break;
603         }
604       if ((sum+= matrix[*s][*q])>0)
605         {
606           score+=sum;
607           sum=0;
608           if ((x=-score)<X)
609             x=X;
610         }
611       else
612         if (sum<x)
613           {
614             q++;
615             s++;
616             break;
617           }
618       q--;
619       s--;
620     }
621 
622   q_beg=q;
623   score-=mip->maxscore*(wordsize-2);
624 
625 /* initialize positions for right extention */
626 
627   q=queryseq+(q_off-wordsize);
628   s=subjectseq+(s_off-wordsize);
629 
630   q_right = mip->qlen- (q_off-wordsize)-1;
631   s_right = mip->slen- (s_off-wordsize)-1;
632 
633   /* assign ends */
634 
635   if (q_right<s_right)
636     end = s + q_right;
637   else
638     end = s + s_right;
639   
640   
641   /* extend to the right */
642   sum = 0;
643   
644   while (s < end) 
645     {
646       q++;
647       s++;
648       if (*q > 3 || *s > 3) /* ambiguity found */
649         {
650           break;
651         }
652       
653       if ((sum+= matrix[*s][*q])>0)
654         {
655           score+=sum;
656           sum=0;
657           if ((x=-score)<X)
658             x=X;
659         }
660       else
661         if (sum<x)
662           break;
663     }
664   
665   q_end =q; 
666 
667   length = q_end - q_beg;
668 
669 /*   if (score>= reportable) */ /* storing everything -reportable is the lowest possible anyway*/
670    /*  {  */
671       /* check against the lowest score in tree */
672       if (score<mip->cutoff_score)
673         return -1; 
674       
675       s_diff = s_off-q_off;
676 
677       q_left = q_beg- queryseq;
678       length = q_end - q_beg;
679       s_left = q_left+s_diff;
680       
681       /* update node */
682      if (node!=NULL)
683        {
684          node->diag_constant = diag;
685          node->q_stop= q_left + length; 
686        }
687      
688      DOT_SaveHit (mip, score, q_left, s_left, length);
689      return 0;
690    /*  } */
691 /*   else  */
692 /*     return -1; */  /* hit not stored */
693 }
694 
695 
696 
697 /*____________________________________________________________(ExtendHit)_____
698 
699 
700   Purpose : Calls extend function for new hit.
701 
702 ____________________________________________________________________*/
703 static Int2 ExtendHit (DOTMainDataPtr mip, Uint1Ptr queryseq, Uint1Ptr subjectseq, Int4 q_off, Int4 s_off, Int4 diag_constant, Int4 wordsize, DOTHistPtr node)
704 {
705   if (is_na)
706     {
707       return DOT_ExtendNuc (mip, queryseq, subjectseq, q_off, s_off, diag_constant,  wordsize, node);
708     }
709   else 
710     { 
711       return DOT_ExtendProt (mip, queryseq, subjectseq, q_off, s_off, diag_constant,  wordsize, node);
712     }  
713     
714 }
715  
716 /*_________________________________________(DOT_HistBTree)_____
717 
718 
719   Purpose : Check new hit against a previous hit in history binary tree.
720 
721 ____________________________________________________________________*/
722 
723 static void DOT_HistBTree (DOTInfoPtr info, Int4 diag, Int4 q_pos)/* rtn 1=found match, rtn 0=new hit*/
724 {
725   Avl_TreePtr tree=NULL;
726   Pointer     tnode=NULL;
727   DOTHistPtr     node=NULL;
728 
729   tree = info->tree;
730   node=DOT_NewHistNode(diag,q_pos);
731   
732   if (node == NULL) return;
733 
734   /*  check diag in tree */    
735   if ((tnode=Avl_Search (tree, (Pointer)node)) != NULL) /* diag found */
736     {
737       if ( ((DOTHistPtr)tnode)->q_stop < node->q_stop) /*old diag, new hit */
738         ExtendHit (info->mip, info->qseq, info->sseq, info->q_pos, info->s_pos, node->diag_constant, info->wordsize, (DOTHistPtr)tnode);
739 
740       if (node) MemFree(node);
741 
742     }  
743   else /* no diag, new hit */
744     {
745       if (ExtendHit (info->mip, info->qseq, info->sseq, info->q_pos, info->s_pos, node->diag_constant, info->wordsize, node) == 0)
746         {
747           if (!Avl_Insert(tree, (Pointer)node, NULL))
748             if (node) MemFree(node); /* insert new node returned FALSE */
749         }
750       else
751         if (node) MemFree(node);
752     }
753   
754 
755 }
756 
757 DOTDiagPtr PNTR avl_hitlist=NULL;
758 DOTHistPtr PNTR    avl_hist=NULL;
759 Int4            avl_counter2;
760 
761 
762 /*___________________________________________________________(CopyFunc)_____*/
763 static Boolean DOT_CopyFunc1 (Pointer item)
764 {
765 
766   avl_counter2--;
767   if (!(avl_counter2<0))
768     {
769       avl_hist[avl_counter2]=item;
770     }
771   return TRUE;
772 
773 }
774 
775 /*_________________________________________(FreeBTree)_____
776 
777   Purpose : Free binary tree structure.
778 
779 ____________________________________________________________________*/
780 static void DOT_FreeBTree(Avl_TreePtr tree)
781 {
782   if (tree)
783     {
784       Avl_Clear (tree, CompareDiags, NULL);
785       if (tree->root!=NULL)
786         {
787           ErrPostEx (SEV_WARNING, 0, 0, "%s", "HistTree->root free failed");
788         }
789       else if (Avl_TotalNodes(tree)!=0)
790         {
791           ErrPostEx (SEV_WARNING, 0, 0, "%s", "HistTree counter != 0");
792         }
793     }
794   
795 }
796 
797 /*_________________________________________(FreeHistBTree)_____
798 
799 
800   Purpose : Free memory function for elements in history binary tree.
801 
802 ____________________________________________________________________*/
803 static void DOT_FreeHistBTree(Avl_TreePtr tree)
804 {
805   Int4  index, i;
806   DOTHistPtr node;
807 
808   if (tree==NULL)
809     {
810       return;
811     }
812   
813   index=tree->totalNodes;
814   avl_counter2=index;
815 
816   avl_hist=(DOTHistPtr PNTR)MemNew(sizeof(DOTHistPtr)*index);
817   Avl_Traverse(tree, DOT_CopyFunc1);
818   
819    for (i=0; i<index; i++)
820     {
821       node= avl_hist[i];
822       node=(DOTHistPtr)Avl_Delete(tree, (Pointer)node); 
823       if (node) MemFree(node);
824     }
825 
826   if (avl_hist) MemFree(avl_hist);
827   DOT_FreeBTree(tree);
828 
829   return;
830 }
831 
832 /*___________________________________________________________(CopyFunc2)_____*/
833 static Boolean CopyFunc2 (Pointer item)
834 {
835   avl_counter2--;
836   if (!(avl_counter2<0))
837     {
838       avl_hitlist[avl_counter2]=item;
839     }
840 
841   return TRUE;
842 }
843 
844 /*_________________________________________(CopyHits)_____
845 
846 
847   Purpose : Copy stored hits from binary tree to array.
848 
849 ____________________________________________________________________*/
850 static void DOT_CopyHits(DOTMainDataPtr mip)
851 {
852   Int4      index;
853 
854   if (mip->tree==NULL)
855     {
856 /*       Message(MSG_ERROR, "No hits"); */
857       return; /* no hits */
858     }
859 
860   index=mip->tree->totalNodes;
861   avl_counter2=index;
862   mip->index=index;
863 
864   mip->hitlist=(DOTDiagPtr PNTR)MemNew(sizeof(DOTDiagPtr)*index);
865   avl_hitlist=mip->hitlist;
866   Avl_Traverse(mip->tree, CopyFunc2);
867 
868 
869 }
870 
871 
872  /*_________________________________________(DOT_BuildPLookup)_____
873 
874 
875   Purpose : Build protein lookup table.
876 
877 ____________________________________________________________________*/
878 static LookupTablePtr DOT_BuildPLookup(Uint1Ptr queryseq, Int4 qlen, Int4 word_size, Int4 alphabet_size, Int4 compression_ratio) 
879 {
880   BlastAllWordPtr all_words;
881   LookupTablePtr  lookup;
882   Int4            num_cols;
883   Int4            index = 0;
884   Int4            stop, start, offset, index1, index2;
885   Int4            threshold=13; /* one-hit heuristics T must be at least 13*/
886   Uint1Ptr        str, PNTR array, word;
887   Boolean         EXACTMATCH, found_ambig, saved_index = FALSE;
888   Int4                    counter;
889 
890 
891   lookup = lookup_new(alphabet_size, (Int2) word_size, (Int2) (word_size/compression_ratio));
892   all_words = BlastPopulateAllWordArrays(word_size+1, alphabet_size);
893   if (!all_words)
894     return NULL;
895   
896   str = queryseq;
897   num_cols = all_words->num_of_cols;
898   array = all_words->array;
899   stop = qlen - (word_size-1);
900   start = 0;
901   counter=0;
902   
903   for (offset = start; offset<stop; offset++, str++)
904     {
905       /* for every word - this part takes long -sol'n wordsize = 2!!!*/
906       for (index1 = 0; index1<num_cols; index1++)
907         {
908           found_ambig = FALSE;
909           EXACTMATCH=TRUE;
910           word = array[index1];
911           for (index2=0; index2<word_size; index2++)
912             {
913 
914               if (*(str+index2)>25 
915                   || *(str+index2)<1 ||*(word+index2)>25 
916                   || *(word+index2)<1)   
917                 { 
918                   found_ambig = TRUE; 
919                   EXACTMATCH=FALSE;
920                   break;
921                 }
922 
923               if (*(str+index2) != *(word+index2))
924                 {
925                   EXACTMATCH = FALSE;
926                   break; /* go on to the next word*/
927                 }
928             }
929           /* if str is exact match to the word */
930           if (EXACTMATCH)
931             {
932               counter++;
933               lookup_add(lookup, (CharPtr)word, offset+word_size, 0);
934               saved_index = TRUE;
935             }
936         }
937     }
938   if (!saved_index)
939         return NULL;
940 
941   /* deallocate */
942   /*
943   for (index=0; index<num_cols; index++)
944     {
945       if (array[index]) MemFree(array[index]);
946     }
947   */
948   if (array) MemFree(array);
949   if (all_words) MemFree(all_words);
950 
951 
952   return lookup;
953 }
954 
955  /*_________________________________________(DOT_BuildNLookup)_____
956 
957 
958   Purpose : Build nucleotide lookup table.
959 
960 ____________________________________________________________________*/
961 static LookupTablePtr DOT_BuildNLookup(Uint1Ptr queryseq, Int4 qlen, Int2 wordsize, Int2 alphabet_size, Int2 compression_ratio) 
962 {
963 
964   Boolean          found_ambig, saved_index=FALSE;
965   Int4             offset, end, reduced_wordsize, stop;
966   Int4             lookup_index, index, index_addition;
967   Uint1Ptr         str;
968   LookupTablePtr   lookup;
969   
970   
971   reduced_wordsize = wordsize/compression_ratio;
972   lookup = lookup_new(alphabet_size, (Int2) wordsize, (Int2) wordsize);
973 
974   str = queryseq;
975   end = (qlen-wordsize-1); /* use the shorter sequence for hash table */
976   stop = reduced_wordsize;
977 
978 
979 
980   /*  create hash table */
981   for (offset = 0; offset<end; offset++, str++)
982     {
983       found_ambig = FALSE;
984       lookup_index = 0;
985       index_addition = 0;
986       
987       for (index=0; index<stop; index++)
988         {
989           
990           if (*(str+index_addition) > 3 || *(str+index_addition+1)> 3 || *(str+index_addition+2) > 3 || *(str+index_addition+3) > 3)
991             {
992               found_ambig = TRUE;
993               break;
994             }
995           
996           lookup_index += (*(str+index_addition)   << 6);
997           lookup_index += (*(str+1+index_addition) << 4);
998           lookup_index += (*(str+2+index_addition) << 2);
999           lookup_index += *(str+3+index_addition);
1000           
1001           if (index != stop-1)
1002             {       /* 8 bits/byte */
1003               lookup_index <<= 8;
1004               index_addition += 4;
1005             }
1006         }
1007       
1008       if (found_ambig == FALSE)
1009         {
1010           lookup_add_index(lookup, (Int4) lookup_index, offset+ (reduced_wordsize*compression_ratio), 0/* context_index */);
1011           saved_index = TRUE;
1012         }
1013     }
1014   
1015   if (saved_index == FALSE)
1016     return NULL;
1017   /* convert lookup table to (mod_lt) table.*/
1018 
1019 
1020   return lookup;
1021 }
1022 
1023 
1024 
1025  /*_________________________________________(DOT_ComputeHits)_____
1026 
1027 
1028   Purpose : Create hash table, find hits, store hits in hitlist array.
1029 
1030 ____________________________________________________________________*/
1031 static Int2 DOT_ComputeHits(DOTMainDataPtr mip) 
1032 {
1033 
1034   LookupTablePtr  lookup;
1035   ModLAEntry      *mod_lt;
1036   Int4            wordsize, alphabet_size;
1037   Int4            char_size, lookup_index, mask, compression_ratio;
1038   Int4            qlen, slen;
1039   Int4            threshold=13;
1040   Uint1Ptr        queryseq, subjectseq;
1041   Boolean         saved_index = FALSE;
1042   Int4                    end;
1043   Uint1Ptr                s, subject0, s_end, q_end;
1044   ModLookupPositionPtr    lookup_pos;
1045   ModLookupPosition       hit_info;
1046   Int4                    num_hits, s_off, s_pos, q_pos;
1047   Int4                    diag, code_limit;
1048   DOTInfoPtr                 info;
1049   DOTHistPtr                 node=NULL;
1050   
1051   
1052 
1053   queryseq=mip->qseq;
1054   subjectseq=mip->sseq;
1055   qlen=mip->qlen;
1056   slen=mip->slen;
1057 
1058   if (is_na)
1059     {
1060       alphabet_size = 4; /*ncbi2na*/
1061       wordsize = mip->word_size; /* test default */
1062       compression_ratio = 4;
1063       if (!(lookup=DOT_BuildNLookup(queryseq, qlen, wordsize, alphabet_size, compression_ratio)))
1064         {
1065           return -1;    
1066         }
1067     }
1068   else
1069     {
1070       wordsize =2; /*test default*/
1071       alphabet_size = 25; /* ncbi stdaa */
1072       compression_ratio = 1; /* ??*/
1073       if (!(lookup=DOT_BuildPLookup(queryseq, qlen, wordsize, alphabet_size, compression_ratio)))
1074         {
1075           return -1;    
1076         }
1077     }
1078   
1079   code_limit=alphabet_size-1;
1080 
1081   /* convert lookup table to (mod_lt) table.*/
1082   lookup_position_aux_destruct(lookup);
1083   mod_lt = lookup->mod_lt;
1084   mask = lookup->mask;  
1085   char_size = lookup->char_size;
1086   s = subjectseq;
1087   subject0 = subjectseq;
1088 
1089   end = slen-wordsize-1;
1090   /* The subject sequence is too short, exit this function now. */
1091   if (wordsize > end)
1092     return -1;
1093 
1094   s_end = subjectseq+slen-1;
1095   q_end = queryseq+qlen-1;
1096 
1097   /* history btree structure */
1098   info = (DOTInfoPtr) MemNew (sizeof(DOTInfo));
1099   info->mip = mip;
1100   info->wordsize = wordsize;
1101   info->qseq = queryseq;
1102   info->sseq = subjectseq;
1103   info->first_pass = TRUE;
1104 
1105 
1106  /* scan subject sequence against lookup table */
1107   s = lookup_find_init (lookup, &lookup_index, s);
1108 
1109   for (;;) /* loops until end of subject sequence */
1110     {
1111       do 
1112         {
1113           /* lookup a contiguous word. */
1114           s++;
1115           if (s > s_end)  /*  end of subject sequence */
1116             goto  fin;
1117 
1118           if (*s > code_limit) /* ambiguity found */
1119             {
1120               num_hits = 0;
1121               continue; /*skip loop */
1122             }
1123 
1124           lookup_index = (((lookup_index) & mask) << char_size)  + *s;
1125 
1126         } while ((num_hits = mod_lt[lookup_index].num_used) == 0);
1127       
1128       lookup_pos = mod_lt[lookup_index].entries;
1129       hit_info = *((Uint4 *) lookup_pos);
1130       lookup_pos++;
1131       
1132       if (num_hits>3)
1133         {
1134           lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
1135         }
1136       
1137       s_pos = s_off = s-subject0+1;
1138       info->s_pos = s_pos;
1139               
1140 
1141       /* extend each hit in the linked list */
1142       do 
1143         {
1144           q_pos = hinfo_get_pos (hit_info);
1145           num_hits --;
1146           hit_info = *((Uint4 *) lookup_pos);
1147           lookup_pos++;
1148 
1149           diag = s_pos - q_pos; /* diag */
1150           info->q_pos = q_pos;
1151          
1152           if (info->first_pass)
1153             {
1154               info->tree = (Avl_TreePtr)MemNew(sizeof(Avl_Tree));
1155               Avl_Initialize(info->tree, CompareDiags, NULL);
1156               info->tree->root = (Avl_NodePtr) MemNew (sizeof(Avl_Node));
1157               node=DOT_NewHistNode(diag, q_pos);
1158               if(ExtendHit (mip, queryseq, subjectseq, q_pos, s_pos, diag, wordsize, node)==0)
1159                 {
1160                   info->tree->root->avl_p = (Pointer)node;
1161                   info->tree->totalNodes++;
1162                   info->first_pass = FALSE;
1163                 }
1164               else
1165                 {
1166                   if (node) MemFree(node);
1167                   if (info->tree->root) MemFree(info->tree->root);
1168                   if (info->tree) MemFree(info->tree);
1169                 }
1170               
1171             }
1172           else
1173             {
1174               DOT_HistBTree (info, diag, q_pos);
1175             }
1176                  
1177         } while (num_hits>0);
1178       
1179     }
1180  fin:
1181 
1182   lookup_destruct(lookup); /* deallocate lookup table */
1183   DOT_FreeHistBTree(info->tree);/* Free history btree */
1184   if (info->tree) MemFree(info->tree);
1185   if (info) MemFree(info);
1186   
1187   DOT_CopyHits(mip); /* copy bintree to array */
1188   if (mip->tree)
1189     {
1190       DOT_FreeBTree(mip->tree); /* free hits btree */
1191       if (mip->tree) MemFree(mip->tree);
1192     }
1193   return 0;
1194 
1195 }
1196 
1197 
1198 /*_________________________________________(DOT_SortProc)_____________
1199 
1200   Purpose : Sort proc -by score, by q_start, by s_start.
1201 
1202 ____________________________________________________________________*/
1203 static int LIBCALLBACK DOT_SortProc (VoidPtr vp1, VoidPtr vp2)
1204 {
1205   DOTDiagPtr hit1, hit2;
1206   DOTDiagPtr PNTR hp1, PNTR hp2;
1207   
1208   hp1 = (DOTDiagPtr PNTR) vp1;
1209   hp2 = (DOTDiagPtr PNTR) vp2;
1210   hit1 = *hp1;
1211   hit2 = *hp2;
1212 
1213   /*by descending score */
1214   if (hit1->score < hit2->score) 
1215     return 1;
1216   else if (hit1->score > hit2->score)
1217     return -1;
1218   /* by ascending q_start */
1219   else if (hit1->q_start > hit2->q_start)
1220     return 1;
1221   else if (hit1->q_start < hit2->q_start)
1222     return -1;
1223   /* by ascending s_start */
1224   else if (hit1->s_start > hit2->s_start)
1225     return 1;
1226   else if (hit1->s_start < hit2->s_start)
1227     return -1;
1228   
1229   
1230   return 0;
1231 }
1232 
1233 
1234 
1235 /*_______________________________________________(DOT_SortHitList)________________
1236 
1237   Purpose :Sort hits array.
1238 
1239 ____________________________________________________________________*/
1240 
1241 static Int2 DOT_SortHitlist (DOTDiagPtr PNTR hitlist, Int4 index)
1242 {
1243   /* sort by score */
1244     HeapSort ((VoidPtr)hitlist, index, sizeof(DOTDiagPtr), DOT_SortProc);
1245 
1246   return 0;
1247 }
1248 
1249 /*___________________________________(DOT_ScoreCount)____________
1250 
1251   Purpose : Rank scores.
1252 
1253 ____________________________________________________________________*/
1254 
1255 static void DOT_ScoreCount (DOTMainDataPtr mip, DOTDiagPtr PNTR hitlist, Int4 index)
1256 {
1257   Int4    PNTR score_array, Score, score, unique, i, j;
1258 
1259   /* count unique scores */
1260   score=hitlist[0]->score;
1261   i=1;
1262   unique=1;
1263 
1264   while (i<index)
1265     {
1266       if (score>(Score=hitlist[i]->score))
1267         {
1268           score=Score;
1269           unique++;
1270         }
1271       i++;
1272     }
1273 
1274   mip->unique=unique;
1275 
1276   if (unique<=1)
1277     return;
1278 
1279   score_array=(Int4Ptr)MemNew(sizeof(Int4)*unique);
1280   
1281   j=1;
1282   i=1;
1283   score=hitlist[0]->score;
1284   score_array[0]=0;
1285   
1286   while (i<index)
1287     {
1288       if (score>(Score=hitlist[i]->score))
1289         {
1290           score=Score;
1291           score_array[j]=i;
1292           j++;
1293         }
1294       i++;
1295     }
1296   
1297   mip->score_array=score_array;
1298   
1299 }
1300 
1301  /*_________________________________________(DOT_BuildHitList)_____
1302 
1303 
1304   Purpose : Build and sort hitlist array, count number of unique scores for threshold ramp.
1305 
1306 ____________________________________________________________________*/
1307 Int2 DOT_BuildHitList(DOTMainDataPtr mip, Boolean sort, Boolean countUniqueScores) 
1308 {
1309   DOTDiagPtr PNTR hitlist;
1310   Int4            index;
1311 
1312 
1313   DOT_ComputeHits(mip);
1314 
1315   if (mip->hitlist==NULL)
1316     return -1; /* no hits */
1317 
1318 
1319   /* sort results */
1320   if (sort)
1321     {
1322       hitlist = mip->hitlist;
1323       index = mip->index;
1324       DOT_SortHitlist (hitlist, index);
1325     }
1326   if (countUniqueScores)
1327     DOT_ScoreCount(mip, hitlist, index);
1328 
1329   return 0;
1330 }
1331 
1332 
1333 
1334 
1335 /*_________________________________________(Get_ProtResidues)_____
1336 
1337   Purpose : Fill protein sequence buffers.
1338 
1339 ____________________________________________________________________*/
1340 static Boolean DOT_GetPResidues (DOTMainDataPtr mip, Boolean is_byLoc)
1341 {
1342  
1343   Int4             i;
1344   SeqPortPtr       qspp, sspp;
1345   Uint1Ptr         qseq, sseq, temp_seq1, temp_seq2;
1346   Int4             buf_len;
1347   Int4             qlen, slen;
1348   Int2             ctr;
1349   Uint1Ptr         sbuffer, qbuffer;
1350 
1351 
1352  /* initialize buffers */
1353   qbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101); 
1354   sbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101); 
1355 
1356   MemSet((Pointer)qbuffer, '\0', sizeof(Char)*101);
1357   MemSet((Pointer)qbuffer, '\0', sizeof(Char)*101);
1358 
1359   qlen=mip->qlen;
1360   slen=mip->slen;
1361 
1362   temp_seq1=NULL;
1363   temp_seq2=NULL;
1364 
1365  
1366   if (!(qseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(qlen)))) return FALSE;
1367 
1368   if (!(sseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(slen)))) return FALSE;
1369 
1370  if (is_byLoc)
1371     {
1372       qspp = SeqPortNewByLoc(mip->qslp, Seq_code_ncbistdaa);
1373       sspp = SeqPortNewByLoc(mip->sslp, Seq_code_ncbistdaa);
1374     }
1375  else
1376    {
1377      qspp = SeqPortNew (mip->qbsp, MIN(mip->q_start, mip->q_stop), MAX(mip->q_start, mip->q_stop), 0,  Seq_code_ncbistdaa);
1378      sspp = SeqPortNew (mip->sbsp, mip->s_start, mip->s_stop, 0, Seq_code_ncbistdaa);
1379    }
1380  
1381  if (qspp == NULL || sspp == NULL) 
1382    {
1383      ErrPostEx (SEV_ERROR, 0, 0, "%s", "DOT- Failed on SeqPortNew");
1384      return FALSE;
1385    }
1386  
1387  temp_seq2 = sseq;
1388  temp_seq1 =  qseq;
1389 
1390  do 
1391    {
1392       ctr = SeqPortRead(qspp, qbuffer, 100);
1393       
1394       if (ctr > 0) 
1395         {
1396           i = 0;
1397           buf_len = ctr;
1398           while (buf_len > 0)
1399             {
1400               *temp_seq1 = qbuffer[i];
1401 
1402               temp_seq1++;
1403               i++;
1404               buf_len--;
1405             }
1406         }
1407     } while (ctr != SEQPORT_EOF * -1);
1408 
1409 
1410   do 
1411     {
1412       ctr = SeqPortRead(sspp, sbuffer, 100);
1413       
1414       if (ctr > 0) 
1415         {
1416           i = 0;
1417           buf_len = ctr;
1418           while (buf_len > 0)
1419             {
1420               *temp_seq2 = sbuffer[i];
1421               temp_seq2++;
1422               i++;
1423               buf_len--;
1424             }
1425         }
1426     } while (ctr != SEQPORT_EOF * -1);
1427   
1428   SeqPortFree(sspp);
1429   SeqPortFree(qspp);
1430 
1431   
1432   mip->sseq=sseq;
1433   mip->qseq=qseq;
1434   return TRUE;
1435 }
1436 
1437 
1438 
1439 /*_________________________________________(GetNResidues)_____
1440 
1441   Purpose : Fill nucleotide sequence buffers.
1442 
1443 ____________________________________________________________________*/
1444 
1445 static Boolean DOT_GetNResidues (DOTMainDataPtr mip, Boolean is_byLoc)
1446 {
1447   Int4             i;
1448   SeqPortPtr       qspp, sspp;
1449   Uint1Ptr         qseq, sseq, temp_seq1, temp_seq2;
1450   Int4             qlen, slen;
1451   Int4             buf_len;
1452   Int2             ctr;    
1453   Uint1Ptr         sbuffer, qbuffer;
1454 
1455  /* initialize buffers */
1456   qbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101); 
1457   sbuffer=(Uint1Ptr) MemNew (sizeof(Uint1)*101); 
1458   
1459   MemSet((Pointer)qbuffer, '\0', sizeof(Char)*101); 
1460   MemSet((Pointer)sbuffer, '\0', sizeof(Char)*101);
1461 
1462   
1463   mip->matrix = DOT_DNAScoringMatrix(-3, 1, 4);
1464   qlen=mip->qlen;
1465   slen=mip->slen;
1466 
1467   temp_seq1 = NULL;
1468   temp_seq2 = NULL;
1469 
1470   if (!(qseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(qlen)))) return FALSE;
1471   if (!(sseq = (Uint1Ptr) MemNew (sizeof(Uint1)*(slen)))) return FALSE;
1472 
1473   if (is_byLoc){
1474       qspp = SeqPortNewByLoc(mip->qslp, Seq_code_ncbi2na);
1475       sspp = SeqPortNewByLoc(mip->sslp, Seq_code_ncbi2na);
1476   }
1477   else
1478       {
1479       qspp = SeqPortNew (mip->qbsp, MIN(mip->q_start, mip->q_stop), MAX(mip->q_start, mip->q_stop), 0, Seq_code_ncbi2na); 
1480       sspp = SeqPortNew (mip->sbsp, mip->s_start, mip->s_stop, 0, Seq_code_ncbi2na);
1481     }
1482 
1483   if (qspp == NULL || sspp == NULL)
1484     {
1485       ErrPostEx (SEV_ERROR, 0, 0, "%s", "DOT- Failed on SeqPortNew");
1486       return FALSE;
1487     }
1488   temp_seq1 =  qseq;
1489   do 
1490     {
1491       ctr = SeqPortRead(qspp, qbuffer, 100);
1492       
1493       if (ctr > 0) 
1494         {  
1495           i = 0;
1496           buf_len = ctr;
1497           while (buf_len > 0)
1498             {
1499               *temp_seq1 = (Uint1)qbuffer[i];
1500               temp_seq1++;
1501               i++;  
1502               buf_len--;
1503             }
1504         }
1505     } while (ctr != SEQPORT_EOF * -1);
1506  
1507   temp_seq2 = sseq;
1508   do 
1509     {
1510       ctr = SeqPortRead(sspp, sbuffer, 100);
1511       
1512       if (ctr > 0) 
1513         {
1514           i = 0;
1515           buf_len = ctr;
1516           while (buf_len > 0)
1517             {
1518               *temp_seq2 = (Uint1)sbuffer[i];
1519               temp_seq2++;
1520               i++;
1521               buf_len--;
1522             }
1523         }
1524     } while (ctr != SEQPORT_EOF * -1);  
1525 
1526 
1527   SeqPortFree(sspp);
1528   SeqPortFree(qspp);
1529   if (qbuffer) MemFree(qbuffer);
1530   if (sbuffer) MemFree(sbuffer);
1531   mip->qseq = qseq;
1532   mip->sseq = sseq;
1533   return TRUE;
1534 }
1535 
1536 
1537 /*_________________________________________(DOT_GetSeqsbyLoc)_____
1538 
1539   Purpose : Calls functions to fill protein or nucleotide buffers.
1540 
1541 ____________________________________________________________________*/
1542 static Boolean DOT_GetSeqsbyLoc (DOTMainDataPtr mip)
1543 {
1544   Char         q_idbuf[42]={""}, s_idbuf[42]={""};
1545 
1546 /*   mip->sname = (CharPtr)MemNew(42*sizeof(Char)); */
1547 /*   mip->qname = (CharPtr)MemNew(42*sizeof(Char)); */
1548 /*
1549   FastaId (mip->qbsp, idbuf, 32);
1550   sprintf (mip->qname, "%s",idbuf);
1551   FastaId (mip->sbsp, idbuf, 32);
1552   sprintf (mip->sname, "%s",idbuf);
1553   */
1554 /*   qsip = SeqLocId(mip->qslp); */
1555 /*   qId = SeqIdFindBestAccession(qsip); */
1556 /*   ssip = SeqLocId(mip->sslp); */
1557 /*   sId = SeqIdFindBestAccession(ssip); */
1558   SeqIdWrite(mip->qbsp->id, q_idbuf,PRINTID_FASTA_SHORT, 41);
1559   SeqIdWrite(mip->sbsp->id, s_idbuf,PRINTID_FASTA_SHORT, 41);
1560   mip->qname=StringSave(q_idbuf);
1561   mip->sname=StringSave(s_idbuf);
1562 
1563  /*  printf ("\nLengths: \n %s(query_seq):%d\n %s(subject_seq):%d\n", mip->qname, mip->qlen, mip->sname, mip->slen); */
1564 
1565 
1566  
1567  if (is_na)
1568    {
1569      if (!DOT_GetNResidues(mip, TRUE))
1570        return FALSE;
1571    }
1572  else
1573    {
1574      if (!DOT_GetPResidues(mip, TRUE))
1575        return FALSE;
1576    }
1577  return TRUE;
1578 }
1579 
1580 /*_________________________________________(DOT_GetSeqs)_____
1581 
1582   Purpose : Calls functions to fill protein or nucleotide buffers.
1583 
1584 ____________________________________________________________________*/
1585 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom)
1586 {
1587   Char             q_idbuf[42]={""}, s_idbuf[42]={""};
1588 
1589 
1590  if (is_zoom)
1591    {
1592      if (mip->qseq) mip->qseq=MemFree(mip->qseq);
1593      if (mip->sseq) mip->sseq=MemFree(mip->sseq);
1594    }
1595  else
1596    {
1597 /*      mip->sname = (CharPtr)MemNew(42*sizeof(Char)); */
1598 /*      mip->qname = (CharPtr)MemNew(42*sizeof(Char)); */
1599      SeqIdWrite(mip->qbsp->id, q_idbuf,PRINTID_FASTA_SHORT, 41); 
1600      SeqIdWrite(mip->sbsp->id, s_idbuf,PRINTID_FASTA_SHORT, 41); 
1601      mip->qname=StringSave(q_idbuf);
1602      mip->sname=StringSave(s_idbuf);
1603 
1604     /*  FastaId (mip->qbsp, idbuf, 32); */
1605 /*      sprintf (mip->qname, "%s",idbuf); */
1606 /*      FastaId (mip->sbsp, idbuf, 32); */
1607 /*      sprintf (mip->sname, "%s",idbuf); */
1608    }
1609  
1610  if (is_na)
1611    {
1612      if (!DOT_GetNResidues(mip, FALSE))
1613        return FALSE;
1614    }
1615  else
1616    {
1617      if (!DOT_GetPResidues(mip, FALSE))
1618        return FALSE;
1619    }
1620 
1621  return TRUE;
1622  
1623 }
1624 
1625 
1626 /*_______________________________________________(DOT_FreeHitPtrArray)___
1627 
1628   Purpose : Free memory function for hits array.
1629 
1630 ____________________________________________________________________*/
1631 Int2 DOT_FreeHitsArray (DOTDiagPtr PNTR hitlist, Int4 index)
1632 {
1633   Int4   i;
1634   DOTDiagPtr  hit;
1635 
1636   for(i = 0; i < index; i++) 
1637     {
1638       hit=hitlist[i];
1639       if (hit) MemFree(hit);
1640     }
1641 
1642   if (hitlist) hitlist=MemFree(hitlist);
1643  
1644   if (hitlist!=NULL)
1645     {
1646       ErrPostEx (SEV_WARNING, 0, 0, "%s", "hitlist not empty");
1647       return -1;
1648     } 
1649   return 0;
1650 }
1651 
1652 /*_______________________________________________(DOT_FreeMainInfo)___
1653 
1654   Purpose : Free memory function for main info ptr. Clears ONLY hitlist and sequence buffers.
1655 
1656 ____________________________________________________________________*/
1657 Int2 DOT_FreeMainInfo(DOTMainDataPtr mip)
1658 {
1659   if (mip->hitlist) DOT_FreeHitsArray(mip->hitlist, mip->index);
1660   if (mip->score_array) mip->score_array=MemFree(mip->score_array);
1661   if (mip->qseq) mip->qseq=MemFree(mip->qseq);
1662   if (mip->sseq) mip->sseq=MemFree(mip->sseq);
1663 
1664   return 0;
1665 }
1666 
1667 /*_______________________________________________(DOT_FreeMainInfoPtrEx)___
1668 
1669   Purpose : Free memory function for main info ptr. Clears all.
1670 
1671 ____________________________________________________________________*/
1672 Int2 DOT_FreeMainInfoPtrEx (DOTMainDataPtr mip)
1673 {
1674 
1675   DOT_FreeMainInfo(mip);
1676 
1677   if (mip->is_na && mip->matrix) 
1678     Free(mip->matrix);
1679   if (mip->qslp) SeqLocFree(mip->qslp);
1680   if (mip->sslp) SeqLocFree(mip->sslp);
1681 /*   if (mip->qbsp) BioseqFree(mip->qbsp); */
1682 /*   if (mip->sbsp) BioseqFree(mip->sbsp); */
1683 
1684   if (mip) MemFree(mip);
1685 
1686   return 0;
1687 }
1688 
1689 /*____________________________________________(DOT_InitTheRest)____________
1690 
1691 
1692   Purpose : Initialize function for DOTMainDataPtr used by InitMainInfo and InitMainInfobyLoc.
1693 
1694 ____________________________________________________________________*/
1695 
1696 
1697 static void DOT_InitTheRest(DOTMainDataPtr mip, Int4 word_size,Int4 tree_limit)
1698 {
1699 
1700  if (ISA_aa (mip->qbsp->mol) && ISA_aa (mip->sbsp->mol)) 
1701     {
1702       mip->is_na = FALSE;
1703       is_na=FALSE;
1704       if (word_size < 4 || word_size > 11)
1705         word_size=2;
1706     }
1707   else if (ISA_na(mip->qbsp->mol) && ISA_na (mip->sbsp->mol)) 
1708     {
1709       mip->is_na = TRUE;
1710       is_na=TRUE;
1711       if (word_size < 1 || word_size > 2)
1712         word_size=8;
1713     }
1714 
1715   mip->word_size=word_size;
1716  
1717   if (tree_limit==0)
1718     mip->tree_limit=10200; /* default */
1719   else
1720     mip->tree_limit=tree_limit;
1721 
1722   mip->cutoff_score=0;  
1723   mip->first_pass=TRUE;
1724   mip->unique=0;
1725   mip->index=0;
1726 }
1727 
1728 
1729 /*____________________________________________(DOT_InitMainInfo)____________
1730 
1731 
1732   Purpose : Initialize DOTMainDataPtr for main window.
1733   **** use bsp with the shorter seq length as 'qbsp' for max speed ****
1734 
1735 ____________________________________________________________________*/
1736 
1737 extern DOTMainDataPtr DOT_InitMainInfo (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 word_size, Int4 tree_limit, Int4 q_left, Int4 q_right, Int4 s_left, Int4 s_right)
1738 {
1739   
1740   mip->q_start=q_left;
1741   mip->q_stop=q_right;
1742   mip->s_start=s_left;
1743   mip->s_stop=s_right;
1744   mip->qbsp=qbsp;
1745   mip->sbsp=sbsp;
1746   mip->sstrand = Seq_strand_plus;
1747   mip->qstrand = Seq_strand_plus;
1748 
1749    if (!((ISA_aa (mip->qbsp->mol) && ISA_aa (mip->sbsp->mol))||(ISA_na(mip->qbsp->mol) && ISA_na (mip->sbsp->mol))))
1750     {
1751       ErrPostEx(SEV_ERROR, 0, 0, "DOT -missmatched sequence types");
1752       return MemFree(mip);
1753     }
1754 
1755   DOT_InitTheRest(mip, word_size, tree_limit);
1756   
1757   return mip;
1758 }
1759 
1760 static BioseqPtr DOT_GetBioseqGivenSeqLoc (SeqLocPtr slp, Uint2 entityID)
1761 
1762 {
1763   BioseqPtr    bsp=NULL;
1764   SeqEntryPtr  sep=NULL;
1765   SeqIdPtr     sip=NULL;
1766   SeqLocPtr    tmp=NULL;
1767 
1768   if (slp == NULL) return NULL;
1769   bsp = NULL;
1770   sip = SeqLocId (slp);
1771   if (sip != NULL) {
1772     bsp = BioseqFind (sip);
1773   } else {
1774     tmp = SeqLocFindNext (slp, NULL);
1775     if (tmp != NULL) {
1776       sip = SeqLocId (tmp);
1777       if (sip != NULL) {
1778         bsp = BioseqFind (sip);
1779         if (bsp != NULL) {
1780           sep = SeqMgrGetSeqEntryForData (bsp);
1781           entityID = ObjMgrGetEntityIDForChoice (sep);
1782           bsp = GetBioseqGivenSeqLoc (slp, entityID);
1783         }
1784       }
1785     }
1786   }
1787   return bsp;
1788 }
1789 
1790 
1791 static BioseqPtr DOT_GetBioseqReferencedByAnnot (SeqAnnotPtr sap, Uint2 entityID)
1792 
1793 {
1794   SeqAlignPtr   align;
1795   BioseqPtr     bsp;
1796   DenseDiagPtr  ddp;
1797   DenseSegPtr   dsp;
1798   SeqFeatPtr    feat;
1799   SeqGraphPtr   graph;
1800   SeqIdPtr      sip;
1801   SeqLocPtr     slp;
1802   StdSegPtr     ssp;
1803   SeqLocPtr     tloc;
1804 
1805   if (sap == NULL) return NULL;
1806   switch (sap->type) {
1807     case 1 :
1808       feat = (SeqFeatPtr) sap->data;
1809       while (feat != NULL) {
1810         slp = feat->location;
1811         if (slp != NULL) {
1812           bsp = DOT_GetBioseqGivenSeqLoc (slp, entityID);
1813           if (bsp != NULL) return bsp;
1814         }
1815         feat = feat->next;
1816       }
1817       break;
1818     case 2 :
1819       align = (SeqAlignPtr) sap->data;
1820       while (align != NULL) {
1821         if (align->segtype == 1) {
1822           ddp = (DenseDiagPtr) align->segs;
1823           if (ddp != NULL) {
1824             for (sip = ddp->id; sip != NULL; sip = sip->next) {
1825               bsp = BioseqFind (sip);
1826               if (bsp != NULL) return bsp;
1827             }
1828           }
1829         } else if (align->segtype == 2) {
1830           dsp = (DenseSegPtr) align->segs;
1831           if (dsp != NULL) {
1832             for (sip = dsp->ids; sip != NULL; sip = sip->next) {
1833               bsp = BioseqFind (sip);
1834               if (bsp != NULL) return bsp;
1835             }
1836           }
1837         } else if (align->segtype == 3) {
1838           ssp = (StdSegPtr) align->segs;
1839           if (ssp != NULL && ssp->loc != NULL) {
1840             for (tloc = ssp->loc; tloc != NULL; tloc = tloc->next) {
1841               bsp = BioseqFind (SeqLocId (tloc));
1842               if (bsp != NULL) return bsp;
1843             }
1844           }
1845         }
1846         align = align->next;
1847       }
1848       break;
1849     case 3 :
1850       graph = (SeqGraphPtr) sap->data;
1851       while (graph != NULL) {
1852         slp = graph->loc;
1853         if (slp != NULL) {
1854           bsp = DOT_GetBioseqGivenSeqLoc (slp, entityID);
1855           if (bsp != NULL) return bsp;
1856         }
1857         graph = graph->next;
1858       }
1859       break;
1860     default :
1861       break;
1862   }
1863   return NULL;
1864 }
1865 
1866 /*____________________________(DOT_AttachSeqAnnotToSeqEntry)_____
1867 
1868 
1869   Purpose : Attach SeqAnnot structure to SeqEntry 
1870 ________________________________________________________________*/
1871 
1872 extern Uint2 DOT_AttachSeqAnnotToSeqEntry (Uint2 entityID, SeqAnnotPtr sap, BioseqPtr bsp)
1873 
1874 {
1875   Int2           genCode;
1876   OMProcControl  ompc;
1877   SeqEntryPtr    sep;
1878   SeqFeatPtr     sfp = NULL;
1879 
1880   if (sap == NULL) return entityID;
1881   if (bsp==NULL)
1882     bsp = DOT_GetBioseqReferencedByAnnot (sap, entityID);
1883 
1884   if (bsp == NULL) return entityID;
1885   
1886   
1887   sep = SeqMgrGetSeqEntryForData (bsp);
1888   entityID = ObjMgrGetEntityIDForChoice (sep);
1889   if (sap->type == 1) {
1890     sfp = (SeqFeatPtr) sap->data;
1891     sep = SeqMgrGetSeqEntryForData(bsp);
1892 /*     sep = GetBestTopParentForData (entityID, bsp); */
1893     genCode = SeqEntryToGeneticCode (sep, NULL, NULL, 0);
1894     SetEmptyGeneticCodes (sap, genCode);
1895   } 
1896   MemSet ((Pointer) &ompc, 0, sizeof (OMProcControl));
1897   ompc.input_entityID = entityID;
1898   ompc.input_itemID = GetItemIDGivenPointer (entityID, OBJ_BIOSEQ, (Pointer) bsp);
1899   ompc.input_itemtype = OBJ_BIOSEQ;
1900   ompc.output_itemtype = OBJ_SEQANNOT;
1901   ompc.output_data = (Pointer) sap;
1902   if (! AttachDataForProc (&ompc, FALSE)) {
1903     Message (MSG_ERROR, "DOT_AttachSeqAnnotToSeqEntry failed");
1904   } else if (sfp != NULL) {
1905     PromoteXrefs (sfp, bsp, entityID);
1906   }
1907 
1908 return entityID;
1909 }
1910 
1911 
1912 /*____________________________________________(DOT_InitMainInfobyLoc)____________
1913 
1914 
1915   Purpose : Initialize DOTMainDataPtr for main window.
1916   **** use slp with the shorter seq length as 'slp1' for max speed ****
1917 ____________________________________________________________________*/
1918 
1919 static DOTMainDataPtr DOT_InitMainInfobyLoc (DOTMainDataPtr mip, SeqLocPtr slp1, SeqLocPtr slp2, Int4 word_size, Int4 tree_limit)
1920 {
1921   Int4   slen;
1922   Int4   qlen;
1923   SeqLocPtr qslp, sslp;
1924   SeqIdPtr qsip, ssip;
1925 
1926   if (mip == NULL || slp1 == NULL || slp2 == NULL) return MemFree (mip);
1927   qlen=SeqLocLen(slp1);
1928   slen=SeqLocLen(slp2);
1929 
1930   qslp=slp1;
1931   sslp=slp2;
1932 
1933   mip->qstrand = SeqLocStrand(qslp);
1934   mip->sstrand = SeqLocStrand(sslp);
1935 
1936   qsip = SeqLocId (qslp);
1937   if (qsip == NULL) {
1938     return MemFree (mip);
1939   }
1940   ssip = SeqLocId (sslp);
1941   if (ssip == NULL) {
1942     return MemFree (mip);
1943   }
1944 
1945   mip->qbsp = BioseqFind(SeqLocId(qslp));
1946   if (mip->qbsp == NULL) {
1947     return MemFree (mip);
1948   }    
1949   mip->sbsp = BioseqFind(SeqLocId(sslp));
1950   if (mip->sbsp == NULL) {
1951     return MemFree (mip);
1952   }    
1953 
1954    if (!((ISA_aa (mip->qbsp->mol) && ISA_aa (mip->sbsp->mol))||(ISA_na(mip->qbsp->mol) && ISA_na (mip->sbsp->mol))))
1955     {
1956       ErrPostEx(SEV_ERROR, 0, 0, "DOT -missmatched sequence types");
1957       return MemFree(mip);
1958     }
1959 
1960   
1961   /* mip start and stop are in bioseq coordinates */
1962   mip->q_start = GetOffsetInBioseq (qslp, mip->qbsp, SEQLOC_LEFT_END);
1963   mip->s_start = GetOffsetInBioseq (sslp, mip->sbsp, SEQLOC_LEFT_END);
1964   mip->q_stop = GetOffsetInBioseq (qslp, mip->qbsp, SEQLOC_RIGHT_END);
1965   mip->s_stop = GetOffsetInBioseq (sslp, mip->sbsp, SEQLOC_RIGHT_END);
1966 
1967 
1968   mip->qslp=qslp;
1969   mip->sslp=sslp;
1970   mip->qlen=qlen;
1971   mip->slen=slen;
1972 
1973 
1974 
1975   DOT_InitTheRest(mip, word_size, tree_limit);
1976   return mip;
1977 }
1978 
1979 /*____________________________________________(DOT_ComputeDotPlot)__________
1980 
1981   Purpose : Process input sequence information.
1982 
1983 ____________________________________________________________________*/
1984 static Boolean DOT_ComputeDotPlot (DOTMainDataPtr mip)
1985 {
1986 
1987 
1988   DOT_CalculateMatrix_MinMax(mip);
1989 
1990   if (DOT_BuildHitList(mip, TRUE, TRUE)<0) 
1991     {
1992       ErrPostEx (SEV_WARNING, 0, 0, "%s", "DOT- no matches found");
1993       return FALSE; /* no hits */
1994     }
1995   return TRUE;
1996 }
1997 
1998 /*____________________________________________(DOT_SPI_FindBestAlnByDotPlot)__________
1999 
2000   Purpose : Sarah's function to find the best alignment.
2001 
2002 ____________________________________________________________________*/
2003 
2004 SeqAlignPtr DOT_SPI_FindBestAlnByDotPlot(SeqLocPtr slp1, SeqLocPtr slp2, Int4 wordsize, Int4 num_hits)
2005 {
2006    DOTDiagPtr      ddp;
2007    DenseSegPtr     dsp;
2008    Int4            i;
2009    DOTMainDataPtr  mip;
2010    SeqAlignPtr     sap;
2011    SeqAlignPtr     sap_head;
2012    SeqAlignPtr     sap_prev;
2013    ScorePtr        scp;
2014    Int4            start1;
2015    Int4            start2;
2016    Uint1           strand;
2017 
2018    mip = DOT_CreateAndStorebyLoc (slp1, slp2, wordsize, num_hits);
2019    sap = sap_head = sap_prev = NULL;
2020    if (mip == NULL || mip->hitlist == NULL)
2021       return NULL;
2022    i = 0;
2023    ddp = *mip->hitlist;
2024    start1 = SeqLocStart(slp1);
2025    start2 = SeqLocStart(slp2);
2026    strand = SeqLocStrand(slp2);
2027    while (ddp != NULL && i < mip->index)
2028    {
2029       ddp = mip->hitlist[i];
2030       i++;
2031       sap = SeqAlignNew();
2032       dsp = DenseSegNew();
2033       sap->type = SAT_PARTIAL;
2034       sap->segtype = SAS_DENSEG;
2035       sap->dim = 2;
2036       dsp->dim = 2;
2037       dsp->numseg = 1;
2038       dsp->ids = SeqIdDup(SeqLocId(slp1));
2039       dsp->ids->next = SeqIdDup(SeqLocId(slp2));
2040       dsp->strands = (Uint1Ptr)MemNew(2*sizeof(Uint1));
2041       dsp->strands[0] = SeqLocStrand(slp1);
2042       dsp->strands[1] = SeqLocStrand(slp2);
2043       dsp->starts = (Int4Ptr)MemNew(2*sizeof(Int4));
2044       dsp->lens = (Int4Ptr)MemNew(sizeof(Int4));
2045       dsp->starts[0] = ddp->q_start;
2046       if (dsp->strands[1] == Seq_strand_minus)
2047          dsp->starts[1] = ddp->s_start - ddp->length + 1;
2048       else
2049          dsp->starts[1] = ddp->s_start;
2050       if (ddp->length > SeqLocLen(slp2))
2051          dsp->lens[0] = SeqLocLen(slp2);
2052       else
2053          dsp->lens[0] = ddp->length - 1;
2054       scp = ScoreNew();
2055       scp->id = ObjectIdNew();
2056       scp->id->str = StringSave("score");
2057       scp->choice = 1;
2058       scp->value.intvalue = ddp->score;
2059       dsp->scores = scp;
2060       sap->segs = (Pointer)(dsp);
2061       if (sap_head != NULL)
2062       {
2063          sap_prev->next = sap;
2064          sap_prev = sap;
2065       } else
2066          sap_head = sap_prev = sap;
2067    }
2068    if (sap_head == NULL)
2069       return NULL;
2070    AlnMgrIndexSeqAlign(sap_head);
2071    AlnMgrMakeMultipleByScore(sap_head);
2072    AlnMgrDeleteHidden(sap_head, FALSE);
2073    sap = (SeqAlignPtr)(sap_head->segs);
2074    sap_head->segs = NULL;
2075    SeqAlignFree(sap_head);
2076    return sap;
2077 }
2078 
2079 /*____________________________________________(DOT_CreateAndStorebyLoc)__________
2080 
2081   Purpose : Runs Dotplot functions, does plus - minus strands.
2082 
2083 ____________________________________________________________________*/
2084 DOTMainDataPtr DOT_CreateAndStorebyLoc (SeqLocPtr slp1, SeqLocPtr slp2, Int4 word_size, Int4 num_hits)
2085 {
2086   DOTMainDataPtr mip;
2087   
2088   if (slp1 == NULL || slp2 == NULL || word_size == 0 || num_hits == 0 || SeqLocLen(slp1)<word_size || SeqLocLen(slp2)<word_size )
2089     return NULL;
2090 
2091   mip=(DOTMainDataPtr)MemNew(sizeof(DOTMainData));
2092   if (!(mip=DOT_InitMainInfobyLoc(mip, slp1, slp2, word_size, num_hits)))
2093     return NULL;
2094 
2095   if (!DOT_GetSeqsbyLoc(mip))
2096     return NULL;
2097   if (!DOT_ComputeDotPlot(mip))
2098     return NULL;
2099   
2100   return mip;
2101 
2102 }
2103 
2104 /*____________________________________________(DOT_CreateAndStore)__________
2105 
2106   Purpose : Runs Dotplot functions.
2107 
2108 ____________________________________________________________________*/
2109 DOTMainDataPtr DOT_CreateAndStore (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 q_start, Int4 q_stop, Int4 s_start, Int4 s_stop, Int4 word_size, Int4 num_hits, Boolean initialize)
2110 {
2111   if (mip==NULL) return NULL;
2112 
2113   if (mip==NULL || qbsp == NULL || sbsp == NULL || word_size == 0 || num_hits == 0 || qbsp->length<word_size || sbsp->length<word_size )
2114     return NULL;
2115 
2116   if (initialize)
2117     if (!(mip=DOT_InitMainInfo(mip, qbsp, sbsp, word_size, num_hits, 0, qbsp->length-1, 0, sbsp->length-1)))
2118       return NULL;
2119   if (!DOT_GetSeqs(mip, FALSE))
2120     return NULL;
2121   if (!DOT_ComputeDotPlot(mip))
2122     return NULL;
2123   
2124   return mip;
2125 
2126 }
2127 
2128 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.