NCBI C Toolkit Cross Reference

C/tools/impatool.c


  1 static char const rcsid[] = "$Id: impatool.c,v 6.16 2006/06/30 18:44:40 camacho Exp $";
  2 
  3 /* $Id: impatool.c,v 6.16 2006/06/30 18:44:40 camacho Exp $
  4 * ===========================================================================
  5 *
  6 *                            PUBLIC DOMAIN NOTICE
  7 *               National Center for Biotechnology Information
  8 *
  9 *  This software/database is a "United States Government Work" under the
 10 *  terms of the United States Copyright Act.  It was written as part of
 11 *  the author's official duties as a United States Government employee and
 12 *  thus cannot be copyrighted.  This software/database is freely available
 13 *  to the public for use. The National Library of Medicine and the U.S.
 14 *  Government have not placed any restriction on its use or reproduction.
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 *  Please cite the author in any work or product based on this material.
 25 *
 26 * ===========================================================================
 27 */
 28 
 29 /*****************************************************************************
 30 
 31 File name: impatool.c
 32 
 33 Author: Alejandro Schaffer
 34 
 35 Contents: utility routines for IMPALA.
 36 
 37  $Revision: 6.16 $
 38 
 39  $Log: impatool.c,v $
 40  Revision 6.16  2006/06/30 18:44:40  camacho
 41  Add support for O and J to getRes, ResToInt
 42 
 43  Revision 6.15  2004/10/04 13:35:53  camacho
 44  Do not use hard coded constants in IMPALAfindUngappedLambda
 45 
 46  Revision 6.14  2004/06/30 14:14:50  madden
 47  Remove add_string_to_bufferEx, already defined in blfmtutl.c
 48 
 49  Revision 6.13  2004/06/30 13:57:27  kans
 50  add_string_to_bufferEx had to be LIBCALL because of blfmtutl.h prototype
 51 
 52  Revision 6.12  2004/06/30 13:42:27  kans
 53  include <blfmtutl.h> to clear up Mac compiler missing prototype errors
 54 
 55  Revision 6.11  2003/05/30 17:25:36  coulouri
 56  add rcsid
 57 
 58  Revision 6.10  2001/12/28 18:01:54  dondosha
 59  Modified sorting routines to break ties by score when E-value is (mis)-reported as 0.0 due to underflow in BlastKarlinStoE_simple
 60 
 61  Revision 6.9  2000/07/26 16:54:07  lewisg
 62  add LIBCALLs
 63 
 64  Revision 6.8  2000/07/25 18:12:05  shavirin
 65  WARNING: This is no-turning-back changed related to S&W Blast from
 66  Alejandro Schaffer
 67 
 68 
 69 *****************************************************************************/
 70 
 71 
 72 
 73 #include <ncbi.h>
 74 #include <objseq.h>
 75 #include <objsset.h>
 76 #include <sequtil.h>
 77 #include <seqport.h>
 78 #include <tofasta.h>
 79 #include <blast.h>
 80 #include <blastpri.h>
 81 #include <txalign.h>
 82 #include <simutil.h>
 83 #include <posit.h>
 84 #include <profiles.h>
 85 #include <blfmtutl.h>
 86 
 87 
 88 /*convert a residue character to its integer representation*/
 89 Char LIBCALL getRes(Char input)
 90 {
 91     switch(input) 
 92       {
 93       case 0: 
 94         return('-');
 95       case 1: 
 96         return('A');
 97       case 2: 
 98         return('B');
 99       case 3: 
100         return('C');
101       case 4: 
102         return('D');
103       case 5: 
104         return('E');
105       case 6: 
106         return('F');
107       case 7: 
108         return('G');
109       case 8: 
110         return('H');
111       case 9: 
112         return('I');
113       case 10: 
114         return('K');
115       case 11: 
116         return('L');
117       case 12: 
118         return('M');
119       case 13: 
120         return('N');
121       case 14: 
122         return('P');
123       case 15: 
124         return('Q');
125       case 16: 
126         return('R');
127       case 17: 
128         return('S');
129       case 18: 
130         return('T');
131       case 19: 
132         return('V');
133       case 20:  
134         return('W');
135       case 21: 
136         return('X');
137       case 22: 
138         return('Y');
139       case 23: 
140         return('Z');
141       case 24: 
142         return('U');
143       case 25: 
144         return('*');
145       case 26: 
146         return('O');
147       case 27: 
148         return('J');
149       default:
150         return('?');
151     }
152 } 
153 
154 
155 /*get the citation for IMPALA*/
156 
157 static CharPtr 
158 IMPALAGetReference(Boolean html)
159 
160 {
161         CharPtr ret_buffer;
162         Int2 ret_buffer_length;
163 
164         ret_buffer = NULL;
165         ret_buffer_length = 0;
166 
167         
168         if (html) {
169           ;
170         } else
171                 add_string_to_bufferEx("Reference: Alejandro A. Schaffer, Yuri I. Wolf, Chris P. Ponting, ", &ret_buffer, &ret_buffer_length, TRUE);
172         add_string_to_bufferEx("Eugene V. Koonin, L. Aravind, Stephen F. Altschul (1999), ", &ret_buffer, &ret_buffer_length, TRUE);
173         add_string_to_bufferEx("\"IMPALA: Matching a Protein Sequence Against a Collection of ", &ret_buffer, &ret_buffer_length, TRUE);
174         add_string_to_bufferEx("\"PSI-BLAST-Constructed Position-Specific Score Matrices\",", &ret_buffer, &ret_buffer_length, TRUE);
175         add_string_to_bufferEx("Bioinformatics 15:1000-1011.", &ret_buffer, &ret_buffer_length, TRUE);
176         return ret_buffer;
177 }
178 
179 /*print the citation for IMPALA*/
180 Boolean LIBCALL
181 IMPALAPrintReference(Boolean html, Int4 line_length, FILE *outfp)
182 
183 {
184         CharPtr ret_buffer;
185         
186         if (outfp == NULL)
187                 return FALSE;
188         
189         ret_buffer = IMPALAGetReference(html);
190         PrintTildeSepLines(ret_buffer, line_length, outfp);
191         ret_buffer = MemFree(ret_buffer);
192 
193         return TRUE;
194 }
195 
196 static CharPtr 
197 makematGetHelp(Boolean html)
198 {
199 
200         CharPtr ret_buffer;
201         Int2 ret_buffer_length;
202 
203         ret_buffer = NULL;
204         ret_buffer_length = 0;
205 
206         
207         if (html) {
208           ;
209         } else {
210           add_string_to_bufferEx("makemat is the first profile preprocessor in the IMPALA software package.", &ret_buffer, &ret_buffer_length, TRUE);
211           add_string_to_bufferEx("makemat  converts a collection of byte-encoded profiles, created by the -C option of PSI-BLAST, into portable ASCII form.", &ret_buffer, &ret_buffer_length, TRUE);
212           add_string_to_bufferEx("Prepare the following files: ", &ret_buffer, &ret_buffer_length, TRUE);
213           add_string_to_bufferEx("i. a collection of PSI-BLAST-generated profiles with arbitrary   names and suffix .chk  ", &ret_buffer, &ret_buffer_length, TRUE);
214           add_string_to_bufferEx("ii. a collection of master sequences, associated with the profiles, each in a separate file with arbitrary name and a 3 character suffix starting with c", &ret_buffer, &ret_buffer_length, TRUE);
215           add_string_to_bufferEx("iii. a list of profile file names, one per line, named <database_name>.pn", &ret_buffer, &ret_buffer_length, TRUE);
216           add_string_to_bufferEx("iv. a list of master sequence file names, one per line, in the same order as a list of profile names, named <database_name>.sn", &ret_buffer, &ret_buffer_length, TRUE);
217           add_string_to_bufferEx("The following files will be created:", &ret_buffer, &ret_buffer_length, TRUE);
218           add_string_to_bufferEx("i. a collection of ASCII files, corresponding to each of the original profiles, named  <profile_name>.mtx", &ret_buffer, &ret_buffer_length, TRUE);
219           add_string_to_bufferEx("ii. a list of ASCII matrix files, named <database_name>.mn", &ret_buffer, &ret_buffer_length, TRUE);
220           add_string_to_bufferEx("iii. an ASCII file with auxiliary information, named <database_name>.aux", &ret_buffer, &ret_buffer_length, TRUE);
221           add_string_to_bufferEx("For the list of arguments to makemat enter makemat -", &ret_buffer, &ret_buffer_length, TRUE);
222         }
223         return ret_buffer;
224 }
225 
226 static CharPtr 
227 copymatGetHelp(Boolean html)
228 {
229 
230 
231         CharPtr ret_buffer;
232         Int2 ret_buffer_length;
233 
234         ret_buffer = NULL;
235         ret_buffer_length = 0;
236 
237         
238         if (html) {
239           ;
240         } else {
241           add_string_to_bufferEx("copymat is the second profile preprocessor in the IMPALA software package.", &ret_buffer, &ret_buffer_length, TRUE);
242           add_string_to_bufferEx("copymat converts ASCII matrices, produced by the primary preprocessor, makemat into a database that can be read into memory quickly", &ret_buffer, &ret_buffer_length, TRUE);
243           add_string_to_bufferEx("Prepare the following files:", &ret_buffer, &ret_buffer_length, TRUE);
244           add_string_to_bufferEx("i. a collection of ASCII files, correspondingto each of the original profiles, named <profile_name>.mtx (created by makemat)", &ret_buffer, &ret_buffer_length, TRUE);
245           add_string_to_bufferEx("ii. a collection of profile master sequences, associated with the profiles, each in a separate file with arbitrary name and a 3-charecter suffix starting with c", &ret_buffer, &ret_buffer_length, TRUE);
246           add_string_to_bufferEx("iii. a list of ASCII_matrix files, named <database_name>.mn  (created by makemat)", &ret_buffer, &ret_buffer_length, TRUE);
247           add_string_to_bufferEx("iv. a list of master sequence file names, one per  line, in the same order as a list of matrix names, named <database_name>.sn;", &ret_buffer, &ret_buffer_length, TRUE);
248           add_string_to_bufferEx("v. ASCII file with auxiliary information, named  <database_name>.aux (created by makemat)", &ret_buffer, &ret_buffer_length, TRUE);
249           add_string_to_bufferEx("The files input to copymatices are in ASCII format and thus portable between machines with different encodings for machine-readable files", &ret_buffer, &ret_buffer_length, TRUE);
250           add_string_to_bufferEx("The following file will be created:", &ret_buffer, &ret_buffer_length, TRUE);
251           add_string_to_bufferEx("i. a huge binary file, containing all profile matrices, named <database_name>.mat", &ret_buffer, &ret_buffer_length, TRUE);
252         }
253         return ret_buffer;
254 }
255 
256 static CharPtr 
257 impalaGetHelp(Boolean html)
258 {
259         CharPtr ret_buffer;
260         Int2 ret_buffer_length;
261 
262         ret_buffer = NULL;
263         ret_buffer_length = 0;
264 
265         
266         if (html) {
267           ;
268         } else {
269           add_string_to_bufferEx("impala is the main program in the IMPALA software package.", &ret_buffer, &ret_buffer_length, TRUE);
270           add_string_to_bufferEx("impala searches for matches between a query sequence and a library of score matrices, prepared by makemat and copymat.", &ret_buffer, &ret_buffer_length, TRUE);
271           add_string_to_bufferEx("impala produces BLAST-formatted output", &ret_buffer, &ret_buffer_length, TRUE);
272           add_string_to_bufferEx("Before you start searching, check that in the directory with the library you have the following files.", &ret_buffer, &ret_buffer_length, TRUE);
273           add_string_to_bufferEx("If the library has K matrices, you should have:", &ret_buffer, &ret_buffer_length, TRUE);
274           add_string_to_bufferEx("K files with names ending in .mtx", &ret_buffer, &ret_buffer_length, TRUE);
275           add_string_to_bufferEx("K files with names ending in a 3-letter extension starting with c", &ret_buffer, &ret_buffer_length, TRUE);
276           add_string_to_bufferEx("1 file with name ending in .sn", &ret_buffer, &ret_buffer_length, TRUE);
277           add_string_to_bufferEx("1 file with name ending in .aux", &ret_buffer, &ret_buffer_length, TRUE);
278           add_string_to_bufferEx("1 file with name ending in .mn", &ret_buffer, &ret_buffer_length, TRUE);
279           add_string_to_bufferEx("1 file with name ending in .mat", &ret_buffer, &ret_buffer_length, TRUE);
280         }
281         return ret_buffer;
282 }
283 
284 Boolean LIBCALL
285 IMPALAPrintHelp(Boolean html, Int4 line_length, Char * programName, FILE *outfp)
286 
287 {
288         CharPtr ret_buffer;
289         
290         if (outfp == NULL)
291                 return FALSE;
292         
293         if (0 == strcmp(programName,"makemat"))
294           ret_buffer = makematGetHelp(html);
295         if (0 == strcmp(programName,"copymat"))
296           ret_buffer = copymatGetHelp(html);
297         if (0 == strcmp(programName,"impala"))
298           ret_buffer = impalaGetHelp(html);
299 
300         PrintTildeSepLines(ret_buffer, line_length, outfp);
301         ret_buffer = MemFree(ret_buffer);
302 
303         return TRUE;
304 }
305 
306 
307 
308 /*prefix and suffix are strings, returns a string with prefix
309   concatenated to suffix. Used to build multiple distinct
310   file names with a common prefix*/
311 Char * LIBCALL addSuffixToName(Char *prefix, Char *suffix)
312 {
313   Char *returnName; /*string to return*/
314   Int4 i,j; /*loop indices*/
315   Int4 prefixLength, suffixLength, totalLength; /*length of pieces and whole*/
316 
317 
318   prefixLength = strlen(prefix);
319   suffixLength = strlen(suffix);
320   totalLength = prefixLength + suffixLength;
321   returnName = (Char *) MemNew((totalLength + 1) * sizeof(Char));
322   for(i = 0; i < prefixLength; i++)
323     returnName[i] = prefix[i];
324   for(j = 0, i = prefixLength; j < suffixLength; i++, j++)
325     returnName[i] = suffix[j];
326   returnName[totalLength] = '\0';
327   return(returnName);
328 }
329 
330 /*Use the specified name of the database to make file names
331   for the auxiliary information, mmap'd score matrices, list of
332   sequence files, and list of matrices, and lists of checkpoints.
333   Also extract a directory prefix that is to be prepended
334   to all file names for sequences and matrices and checkpoints */
335 void  LIBCALL impalaMakeFileNames(Char * matrixDbName,
336                     Char * auxiliaryFileName, Char * mmapFileName,
337                     Char * seqFileName, Char *matrixFileName, 
338                     Char * ckptFileName,
339                     Char *directoryPrefix)
340 {
341   Int4 commonLength;  /*length of common name prefix*/
342   Int4 c; /*loop index*/
343 
344   commonLength = strlen(matrixDbName);
345   strcpy(directoryPrefix,matrixDbName);
346   /*remove file name for profile library to peel back to last '/' */
347   for(c = commonLength; c >= 0; c--)
348     if ('/' == directoryPrefix[c]) 
349       break;
350   directoryPrefix[c+1] = '\0';
351   if (NULL != auxiliaryFileName)
352      strcpy(auxiliaryFileName,matrixDbName);
353   if (NULL != mmapFileName)
354      strcpy(mmapFileName,matrixDbName);
355   if (NULL != seqFileName)
356      strcpy(seqFileName,matrixDbName);
357   if (NULL != matrixFileName)
358      strcpy(matrixFileName,matrixDbName);
359   if (NULL != ckptFileName)
360      strcpy(ckptFileName,matrixDbName);
361   if (NULL != auxiliaryFileName) {
362     auxiliaryFileName[commonLength] = '.';
363     auxiliaryFileName[commonLength +1] = 'a';
364     auxiliaryFileName[commonLength + 2] = 'u';
365     auxiliaryFileName[commonLength + 3] = 'x';
366     auxiliaryFileName[commonLength + 4] = '\0';
367   }
368   if (NULL != mmapFileName) {
369     mmapFileName[commonLength] = '.';
370     mmapFileName[commonLength + 1] = 'm';
371     mmapFileName[commonLength + 2] = 'a';
372     mmapFileName[commonLength + 3] = 't';
373     mmapFileName[commonLength + 4] = '\0';
374   }
375   if (NULL != seqFileName) {
376     seqFileName[commonLength] = '.';
377     seqFileName[commonLength + 1] = 's';
378     seqFileName[commonLength+2] = 'n';
379     seqFileName[commonLength+3] = '\0';
380   }
381   if (NULL != matrixFileName) {
382     matrixFileName[commonLength] = '.';
383     matrixFileName[commonLength + 1] = 'm';
384     matrixFileName[commonLength+2] = 'n';
385     matrixFileName[commonLength+3] = '\0';
386   }
387   if (NULL != ckptFileName) {
388     ckptFileName[commonLength] = '.';
389     ckptFileName[commonLength + 1] = 'p';
390     ckptFileName[commonLength+2] = 'n';
391     ckptFileName[commonLength+3] = '\0';
392   }
393 }
394 
395 Nlm_FloatHi LIBCALL
396 IMPALAfindUngappedLambda(Char *matrixName)
397 {
398     Nlm_FloatHi* lambda_array = NULL;
399     int num_lambdas = BlastKarlinGetMatrixValues(matrixName, NULL, NULL, 
400                                                  &lambda_array, NULL, NULL, 
401                                                  NULL);
402     if (num_lambdas > 0) {
403         Nlm_FloatHi retval = lambda_array[0];
404         lambda_array = MemFree(lambda_array);
405         return retval;
406     } else {
407         return 0.0;
408     }
409 }
410 
411 /*Given a sequence of 'length' amino acid residues, compute the
412   probability of each residue and put that in the array resProb*/
413 void LIBCALL
414 IMPALAfillResidueProbability(Uint1Ptr sequence, Int4 length, Nlm_FloatHi * resProb)
415 {
416   Int4 frequency[PRO_ALPHABET_SIZE]; /*frequency of each letter*/
417   Int4 i; /*index*/
418   Int4 denominator; /*length not including X's*/
419 
420   denominator = length;
421   for(i = 0; i < PRO_ALPHABET_SIZE; i++)
422     frequency[i] = 0;
423   for(i = 0; i < length; i++)
424     if (Xchar != sequence[i])
425       frequency[sequence[i]]++;
426     else
427       denominator--;
428   for(i = 0; i < PRO_ALPHABET_SIZE; i++) {
429     if (frequency[i] == 0)
430       resProb[i] = 0.0;
431     else
432       resProb[i] = ((Nlm_FloatHi) (frequency[i])) /((Nlm_FloatHi) denominator);
433   }
434 }
435 
436 /*matrix is a position-specific score matrix with matrixLength positions
437   queryProbArray is an array containing the probability of occurrence
438   of each residue in the query
439   scoreArray is an array of probabilities for each score that is
440     to be used as a field in return_sfp
441   return_sfp is a the structure to be filled in and returned
442   range is the size of scoreArray and is an upper bound on the
443    difference between maximum score and minimum score in the matrix
444   the routine fillSfp computes the probability of each score weighted
445    by the probability of each query residue and fills those probabilities
446    into scoreArray and puts scoreArray as a field in
447    that in the structure that is returned
448    for indexing convenience the field storing scoreArray points to the
449    entry for score 0, so that referring to the -k index corresponds to
450    score -k */
451 BLAST_ScoreFreqPtr LIBCALL
452 IMPALAfillSfp(BLAST_Score **matrix, Int4 matrixLength, Nlm_FloatHi *queryProbArray, Nlm_FloatHi *scoreArray,  BLAST_ScoreFreqPtr return_sfp, Int4 range)
453 {
454   Int4 minScore, maxScore; /*observed minimum and maximum scores*/
455   Int4 i,j; /* indices */
456   Nlm_FloatHi onePosFrac; /*1/matrix length as a double*/
457 
458   minScore = maxScore = 0;
459 
460   for(i = 0; i < matrixLength; i++) {
461     for(j = 0 ; j < PRO_ALPHABET_SIZE; j++) {
462       if (Xchar == j)
463         continue;
464       if ((matrix[i][j] != BLAST_SCORE_MIN) && (matrix[i][j] < minScore))
465         minScore = matrix[i][j];
466       if (matrix[i][j] > maxScore)
467         maxScore = matrix[i][j];
468     }
469   }
470   return_sfp->obs_min = minScore;
471   return_sfp->obs_max = maxScore;
472   for (i = 0; i < range; i++)
473     scoreArray[i] = 0.0;
474   return_sfp->sprob = &(scoreArray[-minScore]); /*center around 0*/
475   onePosFrac = 1.0/ ((Nlm_FloatHi) matrixLength);
476   for(i = 0; i < matrixLength; i++) {
477     for (j = 0; j < PRO_ALPHABET_SIZE; j++) {
478       if (Xchar == j)
479         continue;
480       if(matrix[i][j] >= minScore) {
481         return_sfp->sprob[matrix[i][j]] += (onePosFrac * queryProbArray[j]);
482       }
483     }
484   }
485   return_sfp->score_avg = 0;
486   for(i = minScore; i <= maxScore; i++)
487     return_sfp->score_avg += i * return_sfp->sprob[i];
488   return(return_sfp);
489 }
490 
491 /*Gets the scores of an alignment together into a ScorePtr;
492   adapted from similar code with a different formula in pseed3.c*/
493 ScorePtr LIBCALL addScoresToSeqAlign(Int4 rawScore, Nlm_FloatHi eValue, 
494            Nlm_FloatHi Lambda, Nlm_FloatHi logK)
495 {
496   Nlm_FloatHi bitScoreUnrounded; /*conversion for raw score to bit score*/
497   ScorePtr returnScore = NULL;
498 
499   MakeBlastScore(&returnScore,"score",0.0, rawScore);
500   MakeBlastScore(&returnScore,"e_value",eValue,0);
501   bitScoreUnrounded = ((rawScore * Lambda) - logK)/NCBIMATH_LN2;
502   MakeBlastScore(&returnScore,"bit_score",bitScoreUnrounded, 0);
503   return(returnScore);
504 }
505 
506 /*Bubble sort the entries in qs from index i through j*/
507 static void pro_bbsort(SWResults **qs, Int4 i, Int4 j)
508 {
509     Int4 x, y; /*loop bounds for the two ends of the array to be sorted*/
510     SWResults *sp; /*temporary pointer for swapping*/
511 
512     for (x = j; x > i; x--) {
513       for (y = i; y < x; y++) {
514         if ((qs[y]->eValue < qs[y+1]->eValue) ||
515             ((qs[y]->eValue == qs[y+1]->eValue) &&
516              (0.0 == qs[y]->eValue) &&
517              (qs[y]->score > qs[y+1]->score)) ||
518             ((qs[y]->eValue == qs[y+1]->eValue) && 
519              ((0.0 != qs[y]->eValue) || (qs[y]->score == qs[y+1]->score)) &&
520              (qs[y]->subject_index < qs[y+1]->subject_index)) ||
521             ((qs[y]->eValue == qs[y+1]->eValue) &&
522              (qs[y]->subject_index == qs[y+1]->subject_index) &&
523                (qs[y]->eValueThisAlign < qs[y+1]->eValueThisAlign)) ||
524             ((qs[y]->eValue == qs[y+1]->eValue) &&
525              (qs[y]->subject_index == qs[y+1]->subject_index) &&
526                (qs[y]->eValueThisAlign == qs[y+1]->eValueThisAlign) &&
527              (qs[y]->scoreThisAlign > qs[y+1]-> scoreThisAlign))) {
528           /*swap pointers for inverted adjacent elements*/
529           sp = qs[y];
530           qs[y] = qs[y+1]; 
531           qs[y+1] = sp;
532         }
533       }
534     }
535 }
536 
537 /*choose the median of  elemnts indexed i, i+j/2, j for
538 partitioning in quicksort, if median is not in
539 position i to start with, swap it to position i*/
540 static void  medianOfThree(SWResults **qs, Int4 i, Int4 j)
541 {
542   Int4 middle;
543   Int4 swapIndex;
544   SWResults *swapTemp;
545 
546   middle = (i+j)/2;
547   if (qs[i]->eValue < qs[middle]->eValue) {
548     if (qs[middle]->eValue < qs[j]->eValue)
549       swapIndex = middle;
550     else
551       if (qs[i]->eValue < qs[j]->eValue)
552         swapIndex = j;
553       else
554         swapIndex = i;
555   }
556   else {
557     if (qs[j]->eValue < qs[middle]->eValue)
558       swapIndex = middle;
559     else
560       if (qs[j]->eValue < qs[i]->eValue)
561         swapIndex = j;
562       else
563         swapIndex = i;
564   }
565   if (i != swapIndex) {
566     swapTemp = qs[i];
567     qs[i] = qs[swapIndex];
568     qs[swapIndex] = swapTemp;
569   }
570 }
571 
572 /*quicksort the entries in qs from qs[i] through qs[j] */
573 static void pro_quicksort(SWResults **qs, Int4 i, Int4 j)
574 {
575     Int4 lf, rt;  /*left and right fingers into the array*/
576     Nlm_FloatHi partitionEvalue; /*Evalue to partition around*/
577     Int4  secondaryPartitionValue; /*for breaking ties*/
578     Int4 scorePartitionValue; /*for breaking ties with 0.0 eValue*/
579     Nlm_FloatHi tertiaryPartitionValue; /*for breaking ties*/
580     SWResults * tp; /*temporary pointer for swapping*/
581     if (j-i <= SORT_THRESHOLD) {
582       pro_bbsort(qs, i,j);
583       return;
584     }
585 
586     lf = i+1; 
587     rt = j; 
588     /*use median of three since array may be nearly sorted*/
589     medianOfThree(qs, i, j);
590     /*implicitly choose qs[i] as the partition element*/    
591     partitionEvalue = qs[i]->eValue;
592     secondaryPartitionValue = qs[i]->subject_index;
593     tertiaryPartitionValue = qs[i]->eValueThisAlign;
594     scorePartitionValue = qs[i]->score;
595     /*partititon around partitionEvalue = qs[i]*/
596     while (lf <= rt) {
597       while ((qs[lf]->eValue >  partitionEvalue)  ||
598               ((qs[lf]->eValue == partitionEvalue) && (0.0 == qs[lf]->eValue)
599                 && (qs[lf]->score < scorePartitionValue)) ||
600               ((qs[lf]->eValue == partitionEvalue) &&
601                ((0.0 != qs[lf]->eValue) || (qs[lf]->score == scorePartitionValue)) &&
602                (qs[lf]->subject_index > secondaryPartitionValue)) ||
603               ((qs[lf]->eValue == partitionEvalue) &&
604                (qs[lf]->subject_index == secondaryPartitionValue) &&
605                (qs[lf]->eValueThisAlign > tertiaryPartitionValue)))
606         lf++;
607       while ((qs[rt]->eValue <  partitionEvalue)  ||
608               ((qs[rt]->eValue == partitionEvalue) && (0.0 == qs[rt]->eValue)
609                && (qs[rt]->score > scorePartitionValue)) ||
610               ((qs[rt]->eValue == partitionEvalue) &&
611                ((0.0 != qs[rt]->eValue) || (qs[rt]->score == scorePartitionValue)) &&
612                (qs[rt]->subject_index < secondaryPartitionValue)) ||
613               ((qs[rt]->eValue == partitionEvalue) &&
614                (qs[rt]->subject_index == secondaryPartitionValue) &&
615                (qs[rt]->eValueThisAlign < tertiaryPartitionValue)))
616         rt--;
617 
618 
619       if (lf < rt) {
620         /*swap elements on wrong side of partition*/
621         tp = qs[lf];
622         qs[lf] = qs[rt];
623         qs[rt] = tp;
624         rt--;
625         lf++;
626       } 
627       else 
628         break;
629     }
630     /*swap partition element into middle position*/
631     tp = qs[i];
632     qs[i] = qs[rt]; 
633     qs[rt] = tp;
634     /*call recursively on two parts*/
635     pro_quicksort(qs, i,rt-1); 
636     pro_quicksort(qs, rt+1, j);
637 }
638 
639 
640 
641 /*Sort the sequences that hit the query by increasing score;
642   This routine converts the list in proResultsList to
643   an array for sorting and then converts back to a singly-linked list
644   adapted from quicksort_hits of pseed3.c
645   no_of_seq is the number of entries in the results list*/
646 void LIBCALL pro_quicksort_hits(Int4 no_of_seq, SWResults **proResultsList)
647 {
648   Int4 i; /*loop index for the resutls list*/
649     SWResults *sp; /*pointer to one entry in the array*/
650     SWResults sentinel; /*sentinel to add at the end of the array*/
651     SWResults **qs; /*local array for sorting*/
652 
653     /*Copy the list starting from proResultsList
654       into the array qs*/
655     qs = (SWResults **) MemNew(sizeof(SWResults*)*(no_of_seq+1));
656     for (i = 0, sp = (*proResultsList); 
657          i < no_of_seq; i++, sp = sp->next) 
658       qs[i] = sp;
659     /*Put sentinel at the end of the array*/
660     qs[i] = &sentinel; 
661     sentinel.eValue = -0.1;
662     sentinel.eValueThisAlign = -0.1;
663     pro_quicksort(qs, 0, no_of_seq-1);
664     /*Copy back to the list starting at seedResults->listOfMatchingSequences*/
665     for (i = no_of_seq-1; i > 0; i--)
666       qs[i]->next = qs[i-1];
667     qs[0]->next = NULL;
668     (*proResultsList) = qs[no_of_seq-1];
669     MemFree(qs);
670 }
671 
672 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.