NCBI C Toolkit Cross Reference

C/tools/blast.c


  1 
  2 static char const rcsid[] = "$Id: blast.c,v 6.451 2008/01/25 21:15:22 bealer Exp $";
  3 
  4 /* $Id: blast.c,v 6.451 2008/01/25 21:15:22 bealer Exp $
  5 * ===========================================================================
  6 *
  7 *                            PUBLIC DOMAIN NOTICE
  8 *               National Center for Biotechnology Information
  9 *
 10 *  This software/database is a "United States Government Work" under the
 11 *  terms of the United States Copyright Act.  It was written as part of
 12 *  the author's offical duties as a United States Government employee and
 13 *  thus cannot be copyrighted.  This software/database is freely available
 14 *  to the public for use. The National Library of Medicine and the U.S.
 15 *  Government have not placed any restriction on its use or reproduction.
 16 *
 17 *  Although all reasonable efforts have been taken to ensure the accuracy
 18 *  and reliability of the software and data, the NLM and the U.S.
 19 *  Government do not and cannot warrant the performance or results that
 20 *  may be obtained by using this software or data. The NLM and the U.S.
 21 *  Government disclaim all warranties, express or implied, including
 22 *  warranties of performance, merchantability or fitness for any particular
 23 *  purpose.
 24 *
 25 *  Please cite the author in any work or product based on this material.
 26 *
 27 * ===========================================================================*/
 28 
 29 /*****************************************************************************
 30 
 31 File name: blast.c
 32 
 33 Author: Tom Madden
 34 
 35 Contents: BLAST functions
 36 
 37 Detailed Contents: 
 38 
 39         - Functions that allocate and deallocate structures used by BLAST.
 40 
 41         - Functions that find the initial word hits for BLAST (both contiguous
 42         and discontiguous).
 43 
 44         - Functions that extend these initial word hits and decide if the
 45         results HSP (High-Scoring Segment Pairs) are worth keeping.
 46 
 47         - Functions that link together HSP's to a "hitlist".
 48 
 49         - Functions that save the hitlist to a structure appropriate for 
 50         further manipulation.
 51 
 52 ******************************************************************************
 53  * $Revision: 6.451 $
 54  *
 55  * $Log: blast.c,v $
 56  * Revision 6.451  2008/01/25 21:15:22  bealer
 57  * - Fix synchronization issue with blastpgp -a4 -j4 when composition based
 58  *   statistics is used for databases with multiple volumes.
 59  *
 60  * Revision 6.450  2007/05/07 13:30:54  kans
 61  * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
 62  *
 63  * Revision 6.449  2007/03/13 20:38:39  madden
 64  *   - In BLASTCalculateSearchSpace, use floating point multiplication to
 65  *     compute the floating point value searchsp.
 66  *
 67  *   - In BLASTSetUpSearchInternalByLoc, don't cast
 68  *     DROPOFF_NUMBER_OF_BITS to an integer when assigning the floating
 69  *     point options dropoff_1st_pass and dropoff_2nd_pass.
 70  *
 71  *   - In BLASTSetUpSearchInternalByLoc, use floating point division to
 72  *     compute the floating point value avglen.
 73  *
 74  *   - In blast_set_parameters, change the type of the function arguments
 75  *     dropoff_number_of_bits_1st_pass and
 76  *     dropoff_number_of_bits_2nd_pass to Nlm_FloatHi.
 77  *
 78  *   - In blast_set_parameters, cast a value in the computation of
 79  *     cutoff_s_first to type BLAST_Score only after dividing by Lambda,
 80  *     instead of before performing the division.
 81  *   [from Mike Gertz]
 82  *
 83  * Revision 6.448  2007/03/05 14:51:22  camacho
 84  * - In BLASTPerformFinalSearch, merge the hitlists for PSITBLASTN, and is
 85  *   done for TBLASTN.
 86  * - In xsum_compare_hsps, break ties by calling score_compare_hsps.
 87  *
 88  * Revision 6.447  2006/09/21 13:42:36  madden
 89  * BlastProcessGiLists returns a boolean to specify that an attempt was made to process a list of GIs.  If no matches were found this can be reported back to the user
 90  *
 91  * Revision 6.446  2006/06/01 15:48:38  papadopo
 92  * in blastMergeFilterLocs, add the capability to merge mixed-type seqlocs; these appear in e.g. megablast with both low-complexity and repeat filtering
 93  *
 94  * Revision 6.445  2005/10/06 12:52:23  madden
 95  * Changes to support correct gapped stats for blastn
 96  *
 97  * Revision 6.444  2005/09/29 17:40:08  coulouri
 98  * from mike gertz:
 99  *     In the do_gapped_blast_search routine, in the case where query
100  *     concatenation is used, call BlastLinkHsps only when
101  *     search->pbp->do_sum_stats is true.
102  *
103  * Revision 6.443  2005/09/26 15:02:58  morgulis
104  * Fixing some memort leaks when using query concatenation in blastn and tblastn.
105  *
106  * Revision 6.442  2005/08/31 20:32:31  coulouri
107  * From Mike Gertz:
108  *    - Added the function BlastSingleQueryResultSize to implement the
109  *      policy for adjusting the hitlist size for preliminary alignments
110  *      to a single query.
111  *    - In BLASTSetUpSearchWithReadDbInternalMult replaced existing code
112  *      for adjusting the hitlist size for a single query by a call to
113  *      BlastSingleQueryResultSize.
114  *    - In BLASTSetUpSearchEx, replaced existing code for adjusting the
115  *      hitlist size by a call to BlastSingleQueryResultSize.  This
116  *      changes the behavior of the routine slightly, in that the hitlist
117  *      size is (correctly) no longer increased for ungapped alignments.
118  *
119  * Revision 6.441  2005/07/28 14:57:09  coulouri
120  * remove dead code
121  *
122  * Revision 6.440  2005/07/27 15:51:54  coulouri
123  * remove unused queue_callback
124  *
125  * Revision 6.439  2005/05/19 11:11:59  coulouri
126  * Changes from morgulis to address rt ticket 15091715:
127  * null hsp_array in blastall tblastn query concatenation causes segfault
128  *
129  * Revision 6.438  2005/05/10 18:51:15  dondosha
130  * Removed unused functions and variables; moved sorting of HSPs by score after new_link_hsps inside this function
131  *
132  * Revision 6.437  2005/05/10 16:15:23  dondosha
133  * Back-porting changes in uneven gap HSP linking from algo/blast code: from Mike Gertz
134  *
135  * Revision 6.436  2005/05/06 11:49:22  coulouri
136  * remove unnecessary evalue check that results in instability of number_of_seqs_better_E; addresses rt ticket 15075332
137  *
138  * Revision 6.435  2005/05/02 16:03:14  coulouri
139  * refactor code to set db_chunk_size
140  *
141  * Revision 6.434  2005/04/25 14:16:36  coulouri
142  * set db_chunk_size adaptively
143  *
144  * Revision 6.433  2005/01/24 21:17:36  camacho
145  * 1. Changed implementation of RPSBlastResultHspScoreCmp to have the same
146  *    tie-breakers as score_compare_hsps
147  * 2. Renamed RPSBlastResultHspScoreCmp to BLASTResultHspScoreCmp
148  *
149  * Revision 6.432  2005/01/21 19:41:04  camacho
150  * Initialize variables
151  *
152  * Revision 6.431  2005/01/10 18:52:28  coulouri
153  * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
154  *
155  * Revision 6.430  2004/12/29 13:26:28  madden
156  * One hit extension fixes so that:
157  * 1.) it is no longer iterative; now a left extension is performed and then a right extension.
158  * 2.) the left extension now stops when the score has dropped by xdrop, the right when the score goes to zero.
159  * 3.) fix one hit stopping criteria so that it is like two hit criteria.
160  * .
161  *
162  * Revision 6.429  2004/12/20 15:22:16  camacho
163  * Calculate kbp_ideal values rather than loading them from pre-computed values
164  *
165  * Revision 6.428  2004/12/14 14:07:54  madden
166  * Fix typo in if statement
167  *
168  * Revision 6.427  2004/11/30 16:33:16  dondosha
169  * Do not subtract starting offset in AdjustOffsetsInMaskLoc, because this is done in other functions after lower case mask is merged with filter mask
170  *
171  * Revision 6.426  2004/11/23 21:21:15  coulouri
172  * remove dead code, eliminate compiler warnings
173  *
174  * Revision 6.425  2004/11/22 15:43:24  dondosha
175  * Call AdjustOffsetsInMaskLoc for the options query_lcase_mask field, not parameters, to avoid leaving pointer to freed memory
176  *
177  * Revision 6.424  2004/11/19 13:22:05  madden
178  * Remove no_check_score completely (from Mike Gertz)
179  *
180  * Revision 6.423  2004/11/04 17:23:11  madden
181  * Fix for tblastn searches, do not mix HSPs from separate frames
182  *
183  * Revision 6.422  2004/11/04 15:51:55  bealer
184  * - bl2seq should use dblen as average length if database is not available.
185  *
186  * Revision 6.421  2004/11/01 14:07:06  madden
187  *    - In CalculateSecondCutoffScore use the number of starting points,
188  *      rather than the maxiumum size of the gap, when calculating the
189  *      cutoffs.
190  *
191  *      Recently, the meaning of search->pbp->gap_size was changed.
192  *      Previously, it represented the maximum number of permitted
193  *      starting points; now it represents the maximum permitted gap.
194  *      The CalculateSecondCutoffScore was not updated to reflect the new
195  *      meaning.  (The algo/blast/code was appropriately updated.)
196  *
197  *    - Remove the BlastReapHitlistByScore routine, and a call to the
198  *      routine in BLASTPerformFinalSearch.
199  *
200  * Revision 6.420  2004/10/25 18:30:21  papadopo
201  * From Michael Gertz:
202  * 1. Change BlastNtWordExtend to only terminate an ungapped alignment
203  *    if the running score fails the X-drop criterion, *not* if the score
204  *    becomes zero
205  * 2. Change BlastNtWordExtend to call BlastSaveCurrentHsp only for an
206  *    ungapped alignment, since it would choose an incorrect start point
207  *    for a gapped alignment
208  *
209  * Revision 6.419  2004/10/18 13:01:54  madden
210  * Changes from Mike Gertz:
211  *         - In xsum_compare_hsps change the comparison tests so that nil
212  *           HSPs are less than any non-nil HSP.  Previously, this
213  *           function would return 0 if either HSP was nil, which would
214  *           result in sort routines terminating before the non-nil HSPs
215  *           in the list were fully sorted.
216  *
217  *         - In rev_compare_hsps_cfj, reversed the order of the
218  *           comparsion on query.frame to make the sort order consistent
219  *           with the sort used in algo/blast/core/link_hsps.c.
220  *
221  * Revision 6.418  2004/10/07 13:07:06  madden
222  * Cast int to FloatHi to prevent wrap-around
223  *
224  * Revision 6.417  2004/09/30 12:10:19  madden
225  * Add function BlastReapHitlistByScore, use on gapped tblastn and blastx HSPs that do not acheive a high enough score for continued processing
226  *
227  * Revision 6.416  2004/09/28 15:59:40  papadopo
228  * Items 1 and 2 of version 6.414 were mistakenly left out
229  *
230  * Revision 6.415  2004/09/28 15:52:16  papadopo
231  * From Michael Gertz:
232  * 1. Undo previous fix to ungapped PSSM wordfinder (not necessary)
233  * 2. Modify square-matrix ungapped wordfinder to avoid occaisional
234  *      incorrect choice of start offset for right extensions
235  * 3. Call BlastLinkHsps if and only if search->pbp->do_sum_stats is
236  *      true; previously used the program number to decide
237  * 4. For ungapped blastx and tblastn, if longest_intron is not set
238  *         (i.e. = 0) or (longest_intron - 2)/3 is nonpositive, call
239  *         link_hsps. Otherwise call new_link_hsps.
240  * 5. For gapped blastx, tblastn or psitblastn, if longest_intron is
241  *         not set (i.e. = 0), set it to 122.  Then call new_link_hsps if
242  *         (longest_intron - 2)/3 is positive.  Otherwise turn off sum statistics.
243  * 6. In BlastLinkHsps, enabled the use of new_link_hsps for psitblastn.
244  * 7. Caused all routines for calculating the significance of multiple
245  *      distinct alignments (BlastSmallGapSumE, BlastLargeGapSumE and
246  *      BlastUnevenGapSumE) to use
247  *
248  *      sum_{i in linked_set} (\lambda_i s_i - \ln K_i)
249  *
250  *      as the weighted sum score. This change affects e-values in
251  *      blastx and tblastx.
252  * 8. When computing normalized sum scores, use the ungapped values of
253  *      (lambda, K) for ungapped alignments.
254  * 9. In SumHSPEvalue, for blastx, the subject_length must be divided by 3.
255  * 10. Pass the effective database size into BlastSmallGapSumE,
256  *      BlastLargeGapSumE and BlastUnevenGapSumE.  The routines use this
257  *      value in a simplified formula to compute the e-value of singleton sets.
258  * 11. Sort HSPs in new_link_hsps by normalized score, rather than score;
259  *      for blastx, this places HSPs in the correct order of significance.
260  * 12. In new_link_hsps, set xsum field of every HSP to the appropriate
261  *      value for a singleton set before doing any linking.
262  * 13. In both link_hsps and new_link_hsps use normalized sum score,
263  *      rather than raw sum score, everywhere when choosing linked sets
264  * 14. Delete code in new_link_hsps for finding splice junctions.
265  * 15. Delete some unused variables in link_hsps.
266  *
267  * Revision 6.412  2004/09/22 16:44:48  dondosha
268  * Assign frames in ungapped blastn before any attempt to link HSPs, not only before second linking
269  *
270  * Revision 6.411  2004/09/21 16:28:23  dondosha
271  * Make sure first change in previous revision is applied only to blastn
272  *
273  * Revision 6.410  2004/09/21 13:58:46  dondosha
274  * 1. Assign HSP contexts and subject frames before linking HSPs after
275  *    reevaluation with ambiguities for ungapped blastn - necessary to distinguish
276  *    HSPs from different strands;
277  * 2. Use ideal Karlin-Altschul parameters for RPS tblastn instead of those for a
278  *    fake protein.
279  *
280  * Revision 6.409  2004/09/15 18:33:23  papadopo
281  * From Michael Gertz: modify two-hit ungapped code to compute the correct end offset even if an extension to the right does not happen
282  *
283  * Revision 6.408  2004/08/27 16:11:18  dondosha
284  * Changes in new_link_hsps from Mike Gertz: adjust singleton sets e-values by gap decay divisor; use effective db length for sum e-value calculations
285  *
286  * Revision 6.407  2004/08/16 19:37:26  dondosha
287  * Enabled uneven gap HSP linking for blastx
288  *
289  * Revision 6.406  2004/05/21 13:53:37  dondosha
290  * Fix in BLASTMergeHitLists
291  *
292  * Revision 6.405  2004/04/28 14:37:06  madden
293  * Changes from Mike Gertz
294  *  - modified the link_hsps routine to apply the gap_prob parameter to
295  *     the result of BlastSmallGapSumE and BlastLargeGapSumE.
296  *   - further modified link_hsps to use BlastGapDecayDivisor to weight
297  *     tests based on multiple collections of HSPs.
298  *   - removed all reference to gap_prob from the new_link_hsps.
299  *   - further modified new_link_hsps to use BlastGapDecayDivisor to weight
300  *     tests based on multiple collections of HSPs.
301  *
302  * Revision 6.404  2004/04/20 14:55:47  morgulis
303  * 1. Fixed query offsets in results when -B option is used.
304  * 2. Fixes for lower case masking handling with -B option.
305  *
306  * Revision 6.403  2004/04/13 21:03:30  madden
307  * Use ignore_gilist Boolean to determine whether gilist lookup should occur
308  *
309  * Revision 6.402  2004/03/31 17:58:51  papadopo
310  * Mike Gertz' changes for length adjustment calculations
311  *
312  * Revision 6.401  2004/03/22 15:35:39  dondosha
313  * 1. Do not allow cutoff score for saving HSPs to be smaller than gap trigger;
314  * 2. When merging hitlists with a restriction on number of HSPs, keep best
315  *    scoring ones.
316  *
317  * Revision 6.400  2004/02/26 15:52:29  papadopo
318  * Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
319  *
320  * Revision 6.399  2004/02/24 14:07:00  camacho
321  * Use approximate sequence length calculation for entrez-limited
322  * nucleotide blast databases.
323  *
324  * Revision 6.398  2004/02/03 17:54:16  dondosha
325  * Correction to revision 6.391 in function BlastGetDbChunk
326  *
327  * Revision 6.397  2004/01/06 22:37:10  dondosha
328  * Use BLAST_HSPfree function
329  *
330  * Revision 6.396  2003/12/29 15:42:46  coulouri
331  * tblastn query concatenation fixes from morgulis
332  *
333  * Revision 6.395  2003/12/12 16:01:23  madden
334  * Change to signature of BlastCutoffs, remove BlastCutoffs_simple
335  *
336  * Revision 6.394  2003/12/10 17:05:27  dondosha
337  * Added function ReevaluateScoreWithAmbiguities to reevaluate score for one HSP; use it after greedy traceback
338  *
339  * Revision 6.393  2003/11/19 18:09:13  dondosha
340  * Use consistent rounding in length adjustment calculation
341  *
342  * Revision 6.392  2003/11/10 20:15:29  dondosha
343  * Bug fix in BLASTMergeHsps
344  *
345  * Revision 6.391  2003/10/23 17:46:17  dondosha
346  * Fix in BlastGetDbChunk for looking up ordinal ids within a range
347  *
348  * Revision 6.390  2003/08/08 16:36:21  dondosha
349  * 1. Treat final_db_seq as 1 beyond the final sequence; 0 is an exception, meaning end of database.
350  * 2. Added more meaningful error message when query length is less than wordsize.
351  *
352  * Revision 6.389  2003/05/30 17:20:10  coulouri
353  * add rcsid
354  *
355  * Revision 6.388  2003/05/14 20:35:58  camacho
356  * Allow searching empty databases
357  *
358  * Revision 6.387  2003/05/13 16:02:53  coulouri
359  * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
360  *
361  * Revision 6.386  2003/05/12 12:23:43  camacho
362  * Sanity check for number of sequences & db length
363  *
364  * Revision 6.385  2003/04/23 15:15:36  camacho
365  * Moved reading of gi list to readdb
366  *
367  * Revision 6.384  2003/03/24 19:42:13  madden
368  * Changes to support query concatenation for blastn and tblastn
369  *
370  * Revision 6.383  2003/03/14 22:33:44  dondosha
371  * Do not increase preliminary hitlist size for ungapped search
372  *
373  * Revision 6.382  2003/03/06 19:10:42  madden
374  * Allow search->pbp->process_num to be > 1 if MT enabled
375  *
376  * Revision 6.381  2003/03/05 21:30:24  dondosha
377  * Fix in BlastMakeCopyQueryDNAP for single-strand OOF search
378  *
379  * Revision 6.380  2002/12/24 14:12:03  dondosha
380  * Removed accidental duplicate lines
381  *
382  * Revision 6.379  2002/12/10 23:13:22  bealer
383  * Fix do_the_blast_run and BlastGetDbChunk to calculate beginning and ending
384  * sequence numbers correctly.
385  * Fix BlastGetDbChunk to use precise start and end points, not nearest
386  * multiples of 32.
387  * Fix do_the_blast_run and BlastGetDbChunk to handle mixed oidlist / real db
388  * multiple database scenarios.
389  *
390  * Revision 6.378  2002/12/04 22:39:51  bealer
391  * Undo previous set of changes.
392  *
393  * Revision 6.377  2002/11/25 19:53:34  bealer
394  * Remove extraneous commented code.
395  *
396  * Revision 6.376  2002/11/25 19:50:26  bealer
397  * Prevent extra work by BlastGetDbChunk when OID lists are used.
398  *
399  * Revision 6.375  2002/11/13 18:03:10  dondosha
400  * Correction in BlastReevaluateWithAmbiguities
401  *
402  * Revision 6.374  2002/11/08 14:58:43  kans
403  * first argument to NlmReadMFILE must be cast as Uint1Ptr - Mac compiler picked up this inconsistency with the prototype
404  *
405  * Revision 6.373  2002/11/07 21:06:15  camacho
406  * Made GetGisFromFile work even without mmap
407  *
408  * Revision 6.372  2002/11/04 22:55:56  dondosha
409  * For blastn, calculate number of identities in BlastReevaluateWithAmbiguities
410  *
411  * Revision 6.371  2002/10/28 21:44:03  madden
412  * Added comments about gap-free extensions
413  *
414  * Revision 6.370  2002/09/18 20:23:19  camacho
415  * Added BLASTCalculateSearchSpace
416  *
417  * Revision 6.369  2002/09/11 20:46:25  camacho
418  * Removed deprecated BlastSeqIdListPtr code
419  *
420  * Revision 6.368  2002/08/30 18:56:02  dondosha
421  * Made BlastMakeTempProteinBioseq and HackSeqLocId public: needed for Cn3D
422  *
423  * Revision 6.367  2002/08/30 15:42:48  dondosha
424  * In blastn, use ewp structure only for the first context
425  *
426  * Revision 6.366  2002/08/22 13:39:45  camacho
427  * Close the header and sequence files only if allocated
428  *
429  * Revision 6.365  2002/08/07 21:37:47  camacho
430  * Do not remove the search block prematurely in do_gapped_blast_search
431  *
432  * Revision 6.364  2002/08/06 17:33:50  madden
433  * Fix return value problem
434  *
435  * Revision 6.363  2002/07/19 17:55:47  dondosha
436  * 1.Return 0 status from BLASTPerformFinalSearch when database sequence has 0 length;
437  * 2. Do not destroy search block too early.
438  *
439  * Revision 6.362  2002/07/15 18:53:27  camacho
440  * Small fix to previous commit
441  *
442  * Revision 6.361  2002/07/14 17:18:13  camacho
443  * Fixed small memory leak in do_blast_search/do_gapped_blast_search
444  *
445  * Revision 6.360  2002/07/12 18:02:55  dondosha
446  * Do not call AdjustOffsetsInMaskLoc if no lower case mask
447  *
448  * Revision 6.359  2002/07/12 16:06:26  dondosha
449  * Adjust offsets and remove unneeded lower case mask locations when query is a subsequence
450  *
451  * Revision 6.358  2002/06/27 13:01:26  kans
452  * BlastGetVirtualOIDList is LIBCALL
453  *
454  * Revision 6.357  2002/06/26 00:56:28  camacho
455  *
456  * 1. Fixed bug when searching a mixture of real and mask databases.
457  * 2. Clean up of code that calculates the number of sequences and database
458  *    length.
459  *
460  * Revision 6.356  2002/06/25 16:43:45  dondosha
461  * Get out from all search loops if bad status returned, meaning process ran out of memory
462  *
463  * Revision 6.355  2002/06/25 13:11:22  madden
464  * Fix UMR for status in do_gapped_blast_search
465  *
466  * Revision 6.354  2002/06/21 21:49:10  camacho
467  * Removed references to thr_info->blast_seqid_list in BlastGetDbChunk
468  *
469  * Revision 6.353  2002/06/12 15:43:09  dondosha
470  * Potential uninitialized variable bug fixed
471  *
472  * Revision 6.352  2002/06/12 15:33:25  dondosha
473  * Corrected integer types of the variable holding return status in 2 functions
474  *
475  * Revision 6.351  2002/06/11 20:40:04  dondosha
476  * Correction to previous change
477  *
478  * Revision 6.350  2002/06/11 14:44:45  dondosha
479  * Return status from some functions instead of search block pointer
480  *
481  * Revision 6.349  2002/06/05 15:30:34  coulouri
482  * Move signal handling to blastsrv.c
483  *
484  * Revision 6.348  2002/05/20 22:49:10  dondosha
485  * Fix for the Mega BLAST case when database sequence is split, and an HSP is accidentally extended across the boundary to a completely masked query
486  *
487  * Revision 6.347  2002/05/15 19:51:01  dondosha
488  * Do a sanity check for the final db sequence parameter
489  *
490  * Revision 6.346  2002/04/23 16:01:27  madden
491  * Fix for ungapped search of arbitrary matrix
492  *
493  * Revision 6.345  2002/04/23 15:40:10  madden
494  * Fix for effective length change and ungapped blast
495  *
496  * Revision 6.344  2002/04/19 21:22:30  madden
497  * Added protection for matrices that are only empty strings
498  *
499  * Revision 6.343  2002/04/18 12:07:05  madden
500  * Check for Selenocysteine in Bioseq, replace with X
501  *
502  * Revision 6.342  2002/04/17 17:30:15  madden
503  * Call getAlphaBeta only for gapped alignments
504  *
505  * Revision 6.341  2002/04/16 15:42:15  madden
506  * Save mask1 for lookup table hashing only (change for neighboring)
507  *
508  * Revision 6.340  2002/04/04 21:19:15  dondosha
509  * Corrections for megablast with non-greedy extensions
510  *
511  * Revision 6.339  2002/03/26 21:20:50  dondosha
512  * 1. Make hitlist size larger for preliminary gapped alignment
513  * 2. Pass readdb structure to megablast set up if it is already initialized
514  *
515  * Revision 6.338  2002/03/26 16:46:40  madden
516  * Move calculation of effective lengths to BlastCalculateEffectiveLengths
517  *
518  * Revision 6.337  2002/03/06 18:34:31  dondosha
519  * Pass the filtered locations back from the megablast engine to use in formatting
520  *
521  * Revision 6.336  2002/02/27 22:39:00  dondosha
522  * Fixed bug in splitting long database sequences for translated searches
523  *
524  * Revision 6.335  2002/02/27 17:43:20  dondosha
525  * Made effective database length option work properly
526  *
527  * Revision 6.334  2002/02/26 22:25:20  dondosha
528  * Return error as soon as it is found that matrix name is not supported
529  *
530  * Revision 6.333  2002/02/26 17:37:40  dondosha
531  * Fixed bug in BlastNtWordFinder for word sizes > 12
532  *
533  * Revision 6.332  2002/02/26 15:03:13  dondosha
534  * Accidental newline in sprintf removed
535  *
536  * Revision 6.331  2002/02/25 23:26:57  dondosha
537  * Changed error to warning if no letters to be indexed just on one context
538  *
539  * Revision 6.330  2002/01/04 22:01:33  coulouri
540  * Fixed BlastSetLimits() to work under linux
541  *
542  * Revision 6.329  2002/01/04 20:16:12  dondosha
543  * Correction for single strand blastx with OOF gapping
544  *
545  * Revision 6.328  2001/12/28 20:38:40  dondosha
546  * Moved Mega BLAST related parameters into a separate structure
547  *
548  * Revision 6.327  2001/12/17 17:31:35  madden
549  * Fix memory leaks
550  *
551  * Revision 6.326  2001/12/14 21:04:31  madden
552  * Reinit start to zero for every frame
553  *
554  * Revision 6.325  2001/12/10 23:04:19  dondosha
555  * Corrected how number of db sequences is set when gi list exists
556  *
557  * Revision 6.324  2001/11/23 21:11:16  dondosha
558  * Correction to previous change
559  *
560  * Revision 6.323  2001/11/23 19:57:55  dondosha
561  * Correction for bl2seq related to recent changes in megablast
562  *
563  * Revision 6.322  2001/11/14 23:39:31  dondosha
564  * Switched return value for BlastNtWordUngappedExtend
565  *
566  * Revision 6.321  2001/11/13 18:17:26  dondosha
567  * Added BlastNtWordUngappedExtend for use in Mega BLAST
568  *
569  * Revision 6.320  2001/09/21 14:42:08  dondosha
570  * Correction of previous fix in BlastReapPartialHitlistByEvalue for non-megablast programs
571  *
572  * Revision 6.319  2001/09/20 14:39:15  madden
573  * Fix for non-blastn programs in BlastReapPartialHitlistByEvalue
574  *
575  * Revision 6.318  2001/09/17 16:33:39  dondosha
576  * Bug fix in BlastReapPartialHitlistByEvalue
577  *
578  * Revision 6.317  2001/09/11 14:28:30  madden
579  * Added timed_out Boolean to SearchBlk
580  *
581  * Revision 6.316  2001/09/07 14:46:43  dondosha
582  * Roll back removal of threshold_first from functions and structures
583  *
584  * Revision 6.315  2001/09/06 20:24:33  dondosha
585  * Removed threshold_first
586  *
587  * Revision 6.314  2001/08/10 14:55:55  madden
588  * Add pv_array for multiple hits blastn
589  *
590  * Revision 6.313  2001/07/24 19:50:32  dondosha
591  * Do not create a star_proc thread if there is no tick_proc
592  *
593  * Revision 6.312  2001/07/20 18:52:25  dondosha
594  * Removed unused code
595  *
596  * Revision 6.311  2001/07/18 19:24:17  madden
597  * Set options->dbseq_num if use_real_db TRUE
598  *
599  * Revision 6.310  2001/07/09 14:17:23  madden
600  * Fix PC-lint complaints from R. Williams
601  *
602  * Revision 6.309  2001/07/09 13:12:02  madden
603  * Removed unused variables
604  *
605  * Revision 6.308  2001/07/06 15:22:42  madden
606  * Correction for BLASTN
607  *
608  * Revision 6.307  2001/06/29 18:07:20  madden
609  * Fix problem with scalingFactor
610  *
611  * Revision 6.306  2001/06/28 13:42:09  madden
612  * Fixes to prevent overflow on number of hits reporting
613  *
614  * Revision 6.305  2001/06/27 17:46:33  madden
615  * Add mutex to protect number_of_pos_hits, found by H. Gabb at KAI
616  *
617  * Revision 6.304  2001/06/26 20:37:04  madden
618  * Fixes for realdb_done problem found by H. Gabb at KAI
619  *
620  * Revision 6.303  2001/06/25 16:03:12  madden
621  * Correctly set gapped_start for blastn
622  *
623  * Revision 6.302  2001/06/21 21:29:07  dondosha
624  * Fixed memory leaks: destroy all error returns, free private_slp
625  *
626  * Revision 6.301  2001/06/15 16:38:45  dondosha
627  * Correction to previous changes
628  *
629  * Revision 6.300  2001/06/14 22:09:14  dondosha
630  * Rearranged code for gi lists and oid masks processing to get rid of duplication
631  *
632  * Revision 6.299  2001/06/13 21:45:08  dondosha
633  * Search of multiple databases with gi files implemented
634  *
635  * Revision 6.298  2001/06/12 19:48:55  madden
636  * Introduce total_hsp_limit, check before making SeqAlign
637  *
638  * Revision 6.297  2001/05/25 19:34:17  vakatov
639  * Nested comment typo fixed
640  *
641  * Revision 6.296  2001/05/04 15:59:46  dondosha
642  * Function BlastFillQueryOffsets now has an extra argument for megablast use
643  *
644  * Revision 6.295  2001/05/03 21:48:28  dondosha
645  * Handle some cases when memory allocation fails
646  *
647  * Revision 6.294  2001/04/23 17:09:18  madden
648  * Use StringSave for gifile variable
649  *
650  * Revision 6.293  2001/04/16 16:37:01  madden
651  * Restore old length correction behavior for blastn
652  *
653  * Revision 6.292  2001/04/13 20:46:42  madden
654  * Changed edge effect correction in BLASTSetUpSearchInternalByLoc to use new method with  alpha and beta parameters from Altschul, Bundschuh, Olsen, Hwa, Nucleic Acids Research 29(2001), 351-361.
655  *
656  * Revision 6.291  2001/04/11 20:56:06  madden
657  * Added scalingFactor for rpsblast
658  *
659  * Revision 6.290  2001/04/04 20:31:16  dondosha
660  * Bug fix for blastx with a subsequence query
661  *
662  * Revision 6.289  2001/04/02 15:55:27  dondosha
663  * Check HSP frames when merging hitlists from split subject sequence
664  *
665  * Revision 6.288  2001/03/30 23:53:45  dondosha
666  * Correction in splitting long database sequences for tblastn
667  *
668  * Revision 6.287  2001/03/19 18:53:45  madden
669  * Added call to BlastSeqLocFillDoubleIntEx, changed call to BlastSeqLocFillDoubleIntRev
670  *
671  * Revision 6.286  2001/03/14 14:54:35  madden
672  * fix problem with partial translating query
673  *
674  * Revision 6.285  2001/03/12 21:38:59  dondosha
675  * Bug fix in database sequence splitting change
676  *
677  * Revision 6.284  2001/03/08 22:05:47  dondosha
678  * Split very long database sequences in all BLAST programs
679  *
680  * Revision 6.283  2001/03/07 14:09:17  madden
681  * Set multiple_hits depending on option block
682  * 
683  * Revision 6.282  2001/03/06 22:02:32  dondosha
684  * Rolled back accidental change in BlastReevaluateWithAmbiguities
685  *
686  * Revision 6.281  2001/03/01 15:41:33  dondosha
687  * Added protection from infinite loop in new_link_hsps
688  *
689  * Revision 6.280  2001/01/24 21:55:53  dondosha
690  * Correction to previous change
691  *
692  * Revision 6.279  2001/01/24 20:51:49  dondosha
693  * Enabled splitting of the second sequence for 2 sequences with megablast
694  *
695  * Revision 6.278  2001/01/19 17:23:16  madden
696  * Optimization for 2-hit blastn
697  *
698  * Revision 6.277  2001/01/16 14:03:53  madden
699  * Enable gapped check for blastn immediately after finding hits
700  *
701  * Revision 6.276  2001/01/09 20:10:37  shavirin
702  * Added sorting of all hits in result_struct for every element in
703  * results. Added function RPSResultHspScoreCmp.
704  *
705  * Revision 6.275  2001/01/08 20:21:40  dondosha
706  * Adjust subject offset in the gap edit blocks if database sequence was split in megablast search
707  *
708  * Revision 6.274  2001/01/03 21:45:29  dondosha
709  * Fixed a memory leak - some edit blocks not freed in megablast
710  *
711  * Revision 6.273  2001/01/02 22:29:45  dondosha
712  * Assign virtual oidlist to the first non-whole database rdfp in the linked list
713  *
714  * Revision 6.272  2000/12/28 18:22:29  madden
715  * Fixes to BlastNtWordFinder_mh
716  *
717  * Revision 6.271  2000/12/27 16:51:17  dondosha
718  * When splitting database sequence for megablast, keep only significant HSPs from partial hitlists
719  *
720  * Revision 6.270  2000/12/26 17:50:46  dondosha
721  * Fixed bug in BLASTMergeHsps function for merging HSPs after splitting of a database sequence
722  *
723  * Revision 6.269  2000/12/21 17:37:24  dondosha
724  * Fixed bug with minus-strand blastn search
725  *
726  * Revision 6.268  2000/12/20 15:44:01  madden
727  * Better error message if query is shorter than wordsize
728  *
729  * Revision 6.267  2000/12/18 20:38:55  shavirin
730  * Removed include <time.h> before <ncbi.h>.
731  *
732  * Revision 6.266  2000/12/07 17:45:13  dondosha
733  * Use actual subject sequence length in GreedyAlignMemAlloc for 2 Sequences engine
734  *
735  * Revision 6.265  2000/12/04 18:51:23  madden
736  * Fix memory leaks
737  *
738  * Revision 6.264  2000/11/29 16:58:16  dondosha
739  * Small fix to previous revision
740  *
741  * Revision 6.263  2000/11/29 16:29:31  dondosha
742  * For megablast, allow splitting of long subject sequences and merging hitlists
743  *
744  * Revision 6.262  2000/11/17 17:51:59  dondosha
745  * Removed is_megablast argument from BLASTSetUpSearchWithReadDbInternalEx since it is part of options
746  *
747  * Revision 6.261  2000/11/13 20:38:48  madden
748  * Fix for zero length db sequence in ungapped blast
749  *
750  * Revision 6.260  2000/11/09 14:59:38  dondosha
751  * Longest intron length in options set in nucleotide coordinates
752  *
753  * Revision 6.259  2000/11/08 22:21:32  dondosha
754  * Enabled new tblastn by adding a longest_intron option
755  *
756  * Revision 6.258  2000/11/07 16:30:24  madden
757  * Introduce intermediate score (before linking of HSPs) for blastx and tblastn
758  *
759  * Revision 6.257  2000/11/03 20:13:55  dondosha
760  * Do not call readdb_get_sequence_ex from new_link_hsps for two sequences BLAST
761  *
762  * Revision 6.256  2000/11/01 16:25:58  madden
763  * Changes from Futamura for psitblastn
764  *
765  * Revision 6.255  2000/11/01 00:05:18  vakatov
766  * Added missing "LIBCALL"
767  *
768  * Revision 6.254  2000/10/31 16:30:56  shavirin
769  * Function BLASTSetUpSearchInternalByLoc became external.
770  *
771  * Revision 6.253  2000/10/30 16:51:04  shavirin
772  * Changed function with creation temporary bioseqs for SEG filtering.
773  *
774  * Revision 6.252  2000/10/26 18:45:58  dondosha
775  * Check if gi list file is provided from the db alias
776  *
777  * Revision 6.251  2000/10/24 19:05:45  dondosha
778  * Moved function UniqueLocalId to sequtil.c
779  *
780  * Revision 6.250  2000/10/06 21:36:02  dondosha
781  * Do not multiply window size by 3 for subject in new_link_hsps
782  *
783  * Revision 6.249  2000/10/06 16:36:57  shavirin
784  * Correctly closed file with gi list in the function GetGisFromFile().
785  *
786  * Revision 6.248  2000/10/05 19:54:50  dondosha
787  * For Mega BLAST, call MegaBlastSaveCurrentHitlist instead of BlastSaveCurrentHitlist
788  *
789  * Revision 6.247  2000/09/28 15:05:59  dondosha
790  * Added splice junction search; corrected sum evalue calculation
791  *
792  * Revision 6.246  2000/09/28 14:27:52  madden
793  * Correct use of search space for linked hsps
794  *
795  * Revision 6.245  2000/09/18 16:04:38  madden
796  * No call to BlastFindWords if rpsblast
797  *
798  * Revision 6.244  2000/09/14 14:58:20  dondosha
799  * Further improvements with new tblastn (still not in the executable)
800  *
801  * Revision 6.243  2000/09/12 16:11:31  dondosha
802  * Changed window size, plus some bug fixes for new_link_hsps
803  *
804  * Revision 6.242  2000/09/01 18:25:10  dondosha
805  * Pass start and length to BlastFindWords, not start and end
806  *
807  * Revision 6.241  2000/09/01 13:47:39  shavirin
808  * Fixed error and typecast warnings from Windows NT compilation.
809  *
810  * Revision 6.240  2000/08/31 18:37:22  shavirin
811  * Added check for NULL in BlastMakeCopyQueryDNAP().
812  *
813  * Revision 6.239  2000/08/31 17:06:20  shavirin
814  * Added few OOF related functions to copy and delete query_dnap.
815  *
816  * Revision 6.238  2000/08/31 15:59:12  dondosha
817  * No need to call ReadDBFreeSharedInfo from do_the_blast_run
818  *
819  * Revision 6.237  2000/08/29 19:36:37  madden
820  * Do not lookup gis if gilist_already_calculated is set
821  *
822  * Revision 6.236  2000/08/29 18:09:34  dondosha
823  * Adjust the reverse strand offsets for non-megablast blastn in BlastSaveCurrentHitlist
824  *
825  * Revision 6.235  2000/08/25 22:41:49  dondosha
826  * Do reevaluation of score with ambiguities for megablast
827  *
828  * Revision 6.234  2000/08/23 18:48:44  madden
829  * Use BlastKarlinBlkGappedCalcEx in place of BlastKarlinBlkGappedCalc
830  *
831  * Revision 6.233  2000/08/18 20:12:28  dondosha
832  * Do not use search->query_id in megablast, use only qid_array
833  *
834  * Revision 6.232  2000/08/07 16:59:49  dondosha
835  * Correct construction of path for gi list file
836  *
837  * Revision 6.231  2000/08/03 17:50:37  dondosha
838  * Check HSPs for going beyond ends of query in megablast
839  *
840  * Revision 6.230  2000/08/02 15:26:09  dondosha
841  * For megablast compute search space depending on query when getting evalue
842  *
843  * Revision 6.229  2000/07/25 16:52:49  shavirin
844  * Corrected function BlastCreateQueryDNAP().
845  *
846  * Revision 6.228  2000/07/24 16:12:05  hurwitz
847  * made definition of BLASTSetUpSearchWithReadDbInternalEx match the one in blastpri.h
848  *
849  * Revision 6.227  2000/07/21 21:26:43  dondosha
850  * Added BLASTSetUpSearchWithReadDbInternalEx with Boolean argument is_megablast
851  *
852  * Revision 6.226  2000/07/18 22:32:38  shavirin
853  * Adjusted space allocated for DNA-P query sequence
854  *
855  * Revision 6.225  2000/07/17 14:17:10  shavirin
856  * Added new function BlastCreateQueryDNAP() and OOF_TranslateToDNAP() and
857  * support for Out of frame gap algorithm.
858  *
859  * Revision 6.224  2000/07/12 13:36:29  shavirin
860  * Removed last NULL parameter from MegaBlastSetUpSearchInternalByLoc().
861  *
862  * Revision 6.223  2000/07/11 17:16:19  shavirin
863  * Added new parameter is_ooframe for Out-Of-Frame gapping algorithm.
864  *
865  * Revision 6.222  2000/06/22 22:28:07  dondosha
866  * Only look at HSPs up to hspcnt_max in BlastSaveCurrentHitlist - this allows not to use MemNew when initializing hsp_array
867  *
868  * Revision 6.221  2000/06/22 14:08:20  madden
869  * Fix bug in BlastWordExtend_prelim if word-hit is at end of sequence
870  *
871  * Revision 6.220  2000/06/08 20:34:10  madden
872  * add explode_seqids option to show all ids in a defline
873  *
874  * Revision 6.219  2000/05/25 21:03:56  dondosha
875  * In BlastSaveCurrentHitlist assign hspcnt for result hitlist correctly
876  *
877  * Revision 6.218  2000/05/24 19:48:06  dondosha
878  * Moved initialization of qid_array in megablast to search set-up
879  *
880  * Revision 6.217  2000/05/19 19:36:18  madden
881  * Fix for longer words in BlastNtWordFinder, do not call BlastNTPreliminaryGappedScore
882  *
883  * Revision 6.216  2000/05/17 17:13:36  dondosha
884  * Removed some unused variables
885  *
886  * Revision 6.215  2000/05/16 19:59:24  madden
887  * Do no set ignore_small_gaps to TRUE
888  *
889  * Revision 6.214  2000/05/12 19:42:29  dondosha
890  * Use array instead of linked list of query ids in megablast
891  *
892  * Revision 6.213  2000/05/12 18:53:25  shavirin
893  * Fixed memory leak with OIDList.
894  *
895  * Revision 6.212  2000/05/11 18:02:23  shavirin
896  * Minor change for using gi_list together with oid-databasees.
897  *
898  * Revision 6.211  2000/05/09 19:42:49  shavirin
899  * Fixed in BlastGetDbChunk() no-mutex regular database case.
900  *
901  * Revision 6.210  2000/05/03 17:08:26  shavirin
902  * Fixed minor bug in the function BLASTSetUpSearchWithReadDbInternal().
903  *
904  * Revision 6.209  2000/05/01 21:24:54  dondosha
905  * Changed greedy_gapped_align to MegaBlastGreedyAlign
906  *
907  * Revision 6.208  2000/04/28 17:51:49  shavirin
908  * Replaced define RPS_BLAST with checking parameter is_rps_blast.
909  *
910  * Revision 6.207  2000/04/25 19:05:13  dondosha
911  * Before search assign db_chunk_last to first_db_seq
912  *
913  * Revision 6.206  2000/04/24 16:43:51  dondosha
914  * Call BlastReapHitlistByEvalue in MegaBlast if hitlists are saved
915  *
916  * Revision 6.205  2000/04/20 15:12:32  dondosha
917  * Bug fix for minus-strand only search - do not try to concatenate second strand to first
918  *
919  * Revision 6.204  2000/04/11 12:47:08  madden
920  * Proper casting to Int8
921  *
922  * Revision 6.203  2000/04/10 20:01:24  dondosha
923  * Fill both strands mask locations in a one location list for blastn
924  *
925  * Revision 6.202  2000/04/10 17:16:44  madden
926  * Make search_sp Int8 to prevent overflow
927  *
928  * Revision 6.201  2000/04/10 15:24:24  dondosha
929  * Enabled use of MegaBlast for BlastTwoSequences
930  *
931  * Revision 6.200  2000/04/07 20:19:45  dondosha
932  * Do not call BlastReapHitlistByEvalue for megablast
933  *
934  * Revision 6.199  2000/04/07 16:43:25  dondosha
935  * Assign dbseq_num to min of actual db size and gilist size
936  *
937  * Revision 6.198  2000/04/07 13:11:56  shavirin
938  * Checked for queue_callback != NULL.
939  *
940  * Revision 6.197  2000/04/06 13:13:33  shavirin
941  * Changed sequence to post semaphore info for internal queueing.
942  *
943  * Revision 6.196  2000/04/04 20:48:21  dondosha
944  * Fixed a memory leak in saving hitlists for MegaBlast
945  *
946  * Revision 6.195  2000/04/04 16:16:59  dondosha
947  * Fixed some memory leaks in MegaBlast traceback
948  *
949  * Revision 6.194  2000/04/03 21:21:44  dondosha
950  * Assign is_neighboring parameter from option
951  *
952  * Revision 6.193  2000/03/31 21:14:24  dondosha
953  * Changed some names related to MegaBlast
954  *
955  * Revision 6.192  2000/03/31 16:50:51  dondosha
956  * Sort hsps and remove redundant when saving hitlist in MegaBlast
957  *
958  * Revision 6.191  2000/03/30 21:45:04  madden
959  * Add call to BLASTResultHitlistFreeEx
960  *
961  * Revision 6.190  2000/03/29 22:19:43  dondosha
962  * BlastSaveCurrentHitlist adjusts query offsets for blastn; creates seqaligns for MegaBlast
963  *
964  * Revision 6.189  2000/03/27 16:46:22  madden
965  * Moved call to BlastFillQueryOffsets to BLASTSetUpSearchInternalByLoc
966  *
967  * Revision 6.188  2000/03/23 20:51:15  dondosha
968  * Set dbseq_num to gi_list_total if search space is not recalculated and gi_list exists
969  *
970  * Revision 6.187  2000/03/22 18:08:59  dondosha
971  * Free rdfp->shared_info in single threaded case the same way as in multithreaded after the search
972  *
973  * Revision 6.186  2000/03/14 21:01:16  dondosha
974  * Call BlastTickProc even when gi_list is set
975  *
976  * Revision 6.185  2000/03/13 21:11:35  dondosha
977  * Check options parameters use_real_db_size and sort_gi_list when dealing with gi_list
978  *
979  * Revision 6.184  2000/03/03 18:02:05  shavirin
980  * Added support for low character filering in "blastx", "tblastx"
981  * and translated RPS Blast.
982  *
983  * Revision 6.183  2000/03/03 17:41:09  egorov
984  * fix memory leak with oidlist
985  *
986  * Revision 6.182  2000/03/02 21:24:16  shavirin
987  * Checked for SEQLOC_PACKED_INT in blastMergeFilterLocs()
988  *
989  * Revision 6.181  2000/03/02 18:30:46  dondosha
990  * Minor bug fix in BlastSaveCurrentHsp for blastn
991  *
992  * Revision 6.180  2000/03/02 17:11:01  dondosha
993  * Fixed bug with one strand search option for blastn
994  *
995  * Revision 6.179  2000/03/01 21:40:53  shavirin
996  * Added code to filter lower-case character regions (except blastx and tblastx)
997  *
998  * Revision 6.178  2000/02/29 18:17:24  shavirin
999  * Variable query_dna_mask changed to query_lcase_mask.
1000  *
1001  * Revision 6.177  2000/02/29 18:09:36  dondosha
1002  * Call BlastFillQueryOffsets in BLASTSetUpSearchEx
1003  *
1004  * Revision 6.176  2000/02/23 20:56:51  dondosha
1005  * Returning strand concatenation for blastn with bug fixes
1006  *
1007  * Revision 6.175  2000/02/18 15:30:36  shavirin
1008  * Added parameter query_dna_mask into options and parameters.
1009  *
1010  * Revision 6.174  2000/02/17 21:23:08  shavirin
1011  * Added parameter is_rps_blast.
1012  *
1013  * Revision 6.173  2000/02/17 19:02:08  shavirin
1014  * Removed all references to absolete theCacheSize variable.
1015  *
1016  * Revision 6.172  2000/02/17 18:29:02  shavirin
1017  * Added function DefineToFrame().
1018  *
1019  * Revision 6.171  2000/02/16 21:47:45  shavirin
1020  * Fixed memory leaks in the function BlastReapHitlistByEvalue ().
1021  *
1022  * Revision 6.170  2000/02/15 21:02:00  shavirin
1023  * Added support to filter DNA sequence in translated RPS Blast.
1024  *
1025  * Revision 6.169  2000/02/15 19:17:29  shavirin
1026  * Added filter_string to Parameters block.
1027  *
1028  * Revision 6.168  2000/02/14 16:15:40  madden
1029  * Revert to 6.166
1030  *
1031  * Revision 6.167  2000/02/11 20:41:46  dondosha
1032  * Search on two query strands concatenated in blastn
1033  *
1034  * Revision 6.166  2000/02/09 19:40:00  madden
1035  * Fix purify problems in link_hsps
1036  *
1037  * Revision 6.165  2000/02/09 19:35:36  madden
1038  * Changed GetGisFromFile to also read binary gilists
1039  *
1040  * Revision 6.164  2000/02/03 21:34:07  dondosha
1041  * Fixed bug in setting extra_bytes_needed
1042  *
1043  * Revision 6.163  2000/02/02 20:01:57  madden
1044  * Added LIBCALLBACK to a callback
1045  *
1046  * Revision 6.162  2000/02/02 18:21:31  madden
1047  * Joerg optimizations for link_hsps
1048  *
1049  * Revision 6.161  2000/02/02 16:56:23  dondosha
1050  * Do not call BlastSaveCurrentHitlist if handle_results callback set
1051  *
1052  * Revision 6.160  2000/02/02 15:05:42  dondosha
1053  * Removed call to ReapHitlistByContext, erroneously included in previous version
1054  *
1055  * Revision 6.159  2000/02/01 22:37:05  dondosha
1056  * Call the new routine BlastReapHitlistByContext only when greedy alignment option is set
1057  *
1058  * Revision 6.158  2000/02/01 21:47:04  dondosha
1059  * Added greedy basic gapped alignment option
1060  *
1061  * Revision 6.157  2000/01/14 15:17:13  madden
1062  * Set no_check_score in pbp
1063  *
1064  * Revision 6.156  2000/01/13 18:10:41  madden
1065  * Fix problem with incorrect stat values for blastn and missing hits
1066  *
1067  * Revision 6.155  2000/01/13 14:27:04  madden
1068  * Fixed other problem in BlastWordFinder_contig()
1069  *
1070  * Revision 6.154  2000/01/12 18:52:23  shavirin
1071  * Fixed lookup_pos in BlastWordFinder_contig().
1072  *
1073  * Revision 6.153  2000/01/11 18:36:25  shavirin
1074  * Added functions, those handle dynamic lookup table.
1075  *
1076  * Revision 6.152  2000/01/11 15:32:46  dondosha
1077  * Fixed memory leaks in opening shared header and sequence file memory maps
1078  *
1079  * Revision 6.151  2000/01/04 22:52:25  madden
1080  * Restored code for using real db size
1081  *
1082  * Revision 6.150  1999/12/31 14:23:18  egorov
1083  * Add support for using mixture of real and maks database with gi-list files:
1084  * 1. Change logic of creating rdfp list.
1085  * 2. BlastGetDbChunk gets real databases first, then masks.
1086  * 3. Propoper calculation of database sizes using alias files.
1087  * 4. Change to CommonIndex to support using of mask databases.
1088  * 5. Use correct gis in formated output (BlastGetAllowedGis()).
1089  * 6. Other small changes
1090  *
1091  * Revision 6.149  1999/12/29 19:03:59  shavirin
1092  * Relative pointers in BlastWordFinder_mh_contig() updated to 8 byte pointers
1093  *
1094  * Revision 6.148  1999/12/29 18:57:03  shavirin
1095  * Added possibility to use relative pointers in BlastWordFinder_mh_contig().
1096  *
1097  * Revision 6.147  1999/12/22 21:55:38  dondosha
1098  * Close header and sequence files when search is done
1099  *
1100  * Revision 6.146  1999/12/21 20:05:48  egorov
1101  * Change logic of generating mask file when we have a gi-list file,
1102  * real database and mask database.  In fact, this is a big bug fix.
1103  *
1104  * Revision 6.145  1999/12/16 19:17:34  egorov
1105  * Code cleanup
1106  *
1107  * Revision 6.144  1999/12/02 14:39:35  egorov
1108  * When both mask and gi_list are specified, do not overwrite calculated
1109  * number of sequences and database length with values from alias file.
1110  *
1111  * Revision 6.143  1999/11/30 19:00:49  madden
1112  * Added Nlm_SwapUint4 calls for the ordinal ID list
1113  *
1114  * Revision 6.142  1999/11/26 22:26:13  madden
1115  * Added BlastNT functions for nucl. extensions
1116  *
1117  * Revision 6.141  1999/11/24 21:43:35  madden
1118  * Added Nlm_SwapUint4 call to make database masks work with both big and small endian systems
1119  *
1120  * Revision 6.140  1999/11/12 20:57:39  shavirin
1121  * Added parameter use_best_align into BLAST_ParameterBlkPtr
1122  *
1123  * Revision 6.139  1999/10/27 21:33:00  madden
1124  * Use housekeeping threads only for larger sequences
1125  *
1126  * Revision 6.138  1999/10/26 20:45:18  madden
1127  * Add use_real_db_size option
1128  *
1129  * Revision 6.137  1999/10/19 17:41:20  madden
1130  * Ensure that ThreadJoin is called on every thread created
1131  *
1132  * Revision 6.136  1999/10/14 17:57:44  madden
1133  * Fix for database size set by user, remove ununsed variables
1134  *
1135  * Revision 6.135  1999/10/12 19:34:08  madden
1136  * Call MutexDestroy on callback_mutex
1137  *
1138  * Revision 6.134  1999/10/08 17:39:57  egorov
1139  * Store input gi list to pick up correct definition for redundant sequences
1140  *
1141  * Revision 6.133  1999/10/05 18:16:06  shavirin
1142  * Functions tick_proc and get_db_chunk were renamed and become public.
1143  *
1144  * Revision 6.132  1999/10/05 17:42:53  shavirin
1145  * Removed global variables from blast.c
1146  *
1147  * Revision 6.131  1999/10/01 21:07:12  shavirin
1148  * Chanded definition and adjusted function get_db_list().
1149  *
1150  * Revision 6.130  1999/09/28 20:14:32  madden
1151  * Joerg changes to mimize cache misses
1152  *
1153  * Revision 6.129  1999/09/22 21:54:08  egorov
1154  * remove debug info
1155  *
1156  * Revision 6.128  1999/09/22 21:03:55  egorov
1157  * Add mask DB stuff
1158  *
1159  * Revision 6.127  1999/09/16 16:54:23  madden
1160  * Changes to BlastNtWordFinder for long words
1161  *
1162  * Revision 6.126  1999/09/16 14:16:54  madden
1163  * Changed call to lookup_find_init
1164  *
1165  * Revision 6.125  1999/08/27 18:07:32  shavirin
1166  * Passed parameter decline_align from top to the engine.
1167  *
1168  * Revision 6.124  1999/08/26 14:55:15  madden
1169  * Fixed Int8 problem
1170  *
1171  * Revision 6.123  1999/08/25 13:11:16  madden
1172  * Roll back to rev 6.121
1173  *
1174  * Revision 6.121  1999/08/20 19:47:24  madden
1175  * Changed call to BlastSearchBlkNew(Extra), removed use of version array
1176  *
1177  * Revision 6.120  1999/08/06 18:46:13  madden
1178  * Fixed spelling of incompatible
1179  *
1180  * Revision 6.119  1999/06/07 18:28:20  beloslyu
1181  * NetBSD port
1182  *
1183  * Revision 6.118  1999/05/27 17:33:04  madden
1184  * Fixed Int2 (should have been Int4) problem
1185  *
1186  * Revision 6.117  1999/04/28 13:30:03  madden
1187  * Use BlastConstructErrorMessage for error messages
1188  *
1189  * Revision 6.116  1999/04/23 16:45:53  madden
1190  * call BQ_IncSemaphore as callback
1191  *
1192  * Revision 6.115  1999/04/22 16:45:29  shavirin
1193  * Added load-ballancing function.
1194  *
1195  * Revision 6.114  1999/04/13 16:39:14  madden
1196  * Fixed problem if first context not plus strand
1197  *
1198  * Revision 6.113  1999/04/07 20:43:33  egorov
1199  * Fix a bug when ordinal_id == 0 was not allowed
1200  *
1201  * Revision 6.112  1999/04/01 21:42:45  madden
1202  * Fix memory leaks when gi list is used
1203  *
1204  * Revision 6.111  1999/03/23 21:38:19  madden
1205  * Add Join to BlastStopAwakeThread
1206  *
1207  * Revision 6.110  1999/03/19 17:03:29  egorov
1208  * Initialize global variable
1209  *
1210  * Revision 6.109  1999/03/16 15:52:25  vakatov
1211  * Got rid of extra comments-within-comments in the CVS Log section
1212  *
1213  * Revision 6.108  1999/03/16 02:49:31  beloslyu
1214  * typo fixed
1215  *
1216  * Revision 6.107  1999/03/15 22:06:01  madden
1217  * Changed cpu limit message
1218  *
1219  * Revision 6.106  1999/03/12 15:03:43  egorov
1220  * Add proper Int4-long type casting
1221  *
1222  * Revision 6.105  1999/03/04 14:18:08  egorov
1223  * Do correct filter masking when query is seqloc
1224  * The only BlastMaskTheResidues() function is changed:
1225  *
1226  * Revision 6.104  1999/02/26 22:23:06  madden
1227  * Fixed bug when only one HSP allowed per area
1228  *
1229  * Revision 6.103  1999/02/25 17:40:48  madden
1230  * Check that proper sequence type is used in setup function
1231  *
1232  * Revision 6.102  1999/02/17 13:23:00  madden
1233  * Added hsp_num_max
1234  *
1235  * Revision 6.101  1999/02/11 13:52:59  madden
1236  * fixed memory leak
1237  *
1238  * Revision 6.100  1999/01/28 17:19:50  madden
1239  * Call BlastSeqLocFilterEx on reverse strand if plus strand NULL
1240  *
1241  * Revision 6.99  1999/01/28 16:04:25  madden
1242  * HspArrayPurge change, HeapSort of HSPs, efficiency in blastn wordfinder
1243  *
1244  * Revision 6.98  1999/01/26 17:55:50  madden
1245  * start set to last_db_seq
1246  *
1247  * Revision 6.97  1999/01/19 13:32:33  madden
1248  * Fix for final db sequence to search
1249  *
1250  * Revision 6.96  1998/12/31 18:17:02  madden
1251  * Added strand option
1252  *
1253  * Revision 6.95  1998/12/31 15:36:05  victorov
1254  * filtering internals is now based on SeqLoc instead of Bioseq
1255  *
1256  * Revision 6.94  1998/12/29 17:44:43  madden
1257  * Add BlastGetNonSumStatsEvalue, optimizations for NtWordFinder
1258  *
1259  * Revision 6.93  1998/12/18 16:19:57  madden
1260  * Make BLASTSetUpSearchWithReadDbInternal public, add BlastSearchBlkNewExtra
1261  *
1262  * Revision 6.92  1998/12/17 22:29:47  victorov
1263  * the way gifile is found has changed: now we look first in the
1264  * current directory then $BLASTDB and then in ncbirc
1265  *
1266  * Revision 6.91  1998/12/15 14:11:27  madden
1267  * Change to permit an arbitrary number of HSPs
1268  *
1269  * Revision 6.90  1998/11/27 15:44:58  madden
1270  * Ensure that gap_x_dropoff_final is at least as large as gap_x_dropoff.
1271  *
1272  * Revision 6.89  1998/11/23 13:36:07  madden
1273  * Check for non-NULL tick_callback before acquiring mutex
1274  *
1275  * Revision 6.88  1998/11/19 14:03:24  madden
1276  * Added comments, minor efficiency
1277  *
1278  * Revision 6.87  1998/10/13 20:37:51  madden
1279  * Use IS_residue after call to SeqPortGetResidue
1280  *
1281  * Revision 6.86  1998/09/24 15:26:34  egorov
1282  * Fix lint complaints
1283  *
1284  * Revision 6.85  1998/09/22 16:28:03  madden
1285  * Added call to lookup_position_aux_destruct
1286  *
1287  * Revision 6.84  1998/09/14 15:11:12  egorov
1288  * Add support for Int8 length databases; remove unused variables
1289  *
1290  * Revision 6.83  1998/09/04 14:45:39  madden
1291  * Moved code from blast.c blastool.c
1292  *
1293  * Revision 6.82  1998/08/29 20:06:46  madden
1294  * Do not find words for pattern search
1295  *
1296  * Revision 6.81  1998/08/26 19:20:26  madden
1297  * Added SignalIgnore
1298  *
1299  * Revision 6.80  1998/08/13 20:00:20  egorov
1300  * Add check if gilist file exists on server
1301  *
1302  * Revision 6.79  1998/08/11 13:27:22  madden
1303  * Fix to small function for culling
1304  *
1305  * Revision 6.78  1998/08/05 13:08:16  madden
1306  * Removed obsolete global_rdfp
1307  *
1308  * Revision 6.77  1998/07/30 19:00:24  madden
1309  * Change to allow search of subset of database
1310  *
1311  * Revision 6.76  1998/07/28 21:17:45  madden
1312  * added do_not_reevaluate and mask_at_hash
1313  *
1314  * Revision 6.75  1998/07/25 14:26:39  madden
1315  * Added comments
1316  *
1317  * Revision 6.74  1998/07/22 20:31:25  madden
1318  * Added comments
1319  *
1320  * Revision 6.73  1998/07/22 12:16:23  madden
1321  * Added handle_results
1322  *
1323  * Revision 6.72  1998/07/21 20:58:01  madden
1324  * Changes to allow masking at hash only
1325  *
1326  * Revision 6.71  1998/07/17 15:39:53  madden
1327  * Changes for Effective search space.
1328  *
1329  * Revision 6.70  1998/07/14 20:14:37  egorov
1330  * Allow to specify gilist and gifile from client side
1331  *
1332  * Revision 6.69  1998/07/09 14:39:04  madden
1333  * Fix memory leak
1334  *
1335  * Revision 6.68  1998/07/02 21:00:36  egorov
1336  * Remove memory leak in threaded version
1337  *
1338  * Revision 6.67  1998/06/25 13:14:48  madden
1339  * check for NULL pointer in BlastPossibleDeleteWholeHeap
1340  *
1341  * Revision 6.66  1998/06/12 16:07:40  madden
1342  * Fixed typo
1343  *
1344  * Revision 6.65  1998/06/12 15:52:52  madden
1345  * Fixed warnings
1346  *
1347  * Revision 6.64  1998/06/02 21:21:18  madden
1348  * Changes for DNA matrices
1349  *
1350  * Revision 6.63  1998/06/02 13:10:14  madden
1351  * Fixed increment problem in for loop
1352  *
1353  * Revision 6.62  1998/05/28 19:58:48  madden
1354  * Zhengs new culling code
1355  *
1356  * Revision 6.61  1998/05/22 20:19:51  madden
1357  * Changes to fix multi-db search bug
1358  *
1359  * Revision 6.60  1998/05/17 16:28:39  madden
1360  * Allow changes to filter options and cc filtering.
1361  *
1362  * Revision 6.59  1998/05/05 14:05:32  madden
1363  * Added functions BlastStartAwakeThread and BlastStopAwakeThread
1364  *
1365  * Revision 6.58  1998/04/24 21:51:12  madden
1366  * Check return value on BlastScoreBlkFill
1367  *
1368  * Revision 6.57  1998/04/24 19:26:47  madden
1369  * Allocate ideal Karlin-Blk
1370  *
1371  * Revision 6.56  1998/04/15 20:23:47  madden
1372  * offset arg removed from BlastMaskTheResidues
1373  *
1374  * Revision 6.55  1998/04/01 22:46:55  madden
1375  * Set query_invalid flag when there is no valid sequence
1376  *
1377  * Revision 6.54  1998/03/27 01:39:08  madden
1378  * Check for non-zero subject length in link_hsps
1379  *
1380  * Revision 6.53  1998/03/25 22:26:46  madden
1381  * Use NlmThreadCreateEx
1382  *
1383  * Revision 6.52  1998/03/24 15:38:20  madden
1384  * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
1385  *
1386  * Revision 6.51  1998/03/19 22:16:18  madden
1387  * Changes to allow blasting by gi list
1388  *
1389  * Revision 6.50  1998/03/18 14:14:05  madden
1390  * Support random access by gi list
1391  *
1392  * Revision 6.49  1998/03/14 18:29:16  madden
1393  * Added BlastSeqIdListPtr
1394  *
1395  * Revision 6.48  1998/03/09 22:14:39  madden
1396  * Set seqid_list to NULL for child threads
1397  *
1398  * Revision 6.47  1998/02/27 14:34:26  madden
1399  * Added missing return value
1400  *
1401  * Revision 6.46  1998/02/26 22:35:00  madden
1402  * Added return value to link_hsp
1403  *
1404  * Revision 6.45  1998/02/26 19:08:07  madden
1405  *  Removed BlastNtFindWords BlastPopulateAllWordArrays BlastFindWords and BlastNewFindWords
1406  *
1407  * Revision 6.44  1998/02/26 16:56:02  madden
1408  * Fix for flyblast type searches
1409  *
1410  * Revision 6.43  1998/02/24 22:46:00  madden
1411  * Added option to shutdown culling
1412  *
1413  * Revision 6.42  1998/02/19 22:57:20  madden
1414  * Correctly set multiple_hits flag in BlastSetUpSearchEx
1415  *
1416  * Revision 6.41  1998/02/02 21:42:17  madden
1417  * link_hsps returns first BLAST_HSPPtr in list
1418  *
1419  * Revision 6.40  1998/01/31 21:33:49  madden
1420  * Fix to ensure hits are ranked properly
1421  *
1422  * Revision 6.39  1998/01/27 20:33:19  madden
1423  * Adjustments for query and db lengths
1424  *
1425  * Revision 6.38  1998/01/23 22:01:49  madden
1426  * Effective query length fixes for short sequences
1427  *
1428  * Revision 6.37  1998/01/15 19:30:31  madden
1429  * Protection against crashes for short sequences
1430  *
1431  * Revision 6.36  1998/01/09 22:30:06  madden
1432  * Fix for range-dependent BLAST with short sequences
1433  *
1434  * Revision 6.35  1998/01/07 23:04:25  madden
1435  * Added mutex for callbacks
1436  *
1437  * Revision 6.34  1998/01/06 18:25:24  madden
1438  * Save query_slp
1439  *
1440  * Revision 6.33  1998/01/05 22:37:34  madden
1441  * Check that options->multiple_hits_only is set before using multiple_hits
1442  *
1443  * Revision 6.32  1998/01/05 21:14:51  madden
1444  * Added protection against NULL LookupTablePtr and BLAST_WordFinderPtr
1445  *
1446  * Revision 6.31  1998/01/05 16:46:46  madden
1447  * One or both strands can be searched, as opposed to only both, changes to number of contexts
1448  *
1449  * Revision 6.30  1997/12/31 19:46:40  madden
1450  * Optimization of database scanning loop
1451  *
1452  * Revision 6.29  1997/12/31 17:50:42  madden
1453  * Added function BlastNtWordFinder_mh
1454  *
1455  * Revision 6.28  1997/12/29 16:15:01  madden
1456  * Optimizations for BlastNtWordFinder
1457  *
1458  * Revision 6.27  1997/12/24 19:42:57  madden
1459  * Fix for cell dependent blast
1460  *
1461  * Revision 6.26  1997/12/23 19:13:36  madden
1462  * Removed flags parameter from NlmThreadCreate
1463  *
1464  * Revision 6.25  1997/12/23 18:11:51  madden
1465  * Changes for range-dependent blast
1466  *
1467  * Revision 6.24  1997/12/17 19:25:36  madden
1468  * replace THR_BOUND with THREAD_BOUND
1469  *
1470  * Revision 6.23  1997/12/11 22:19:49  madden
1471  * Removed unused variables and function
1472  *
1473  * Revision 6.22  1997/12/10 22:40:28  madden
1474  * Floats used in call to blast_set_parameters, use of defines rather than strings
1475  *
1476  * Revision 6.21  1997/12/08 21:56:25  madden
1477  * Check for queries without valid sequences
1478  *
1479  * Revision 6.20  1997/12/04 21:49:05  madden
1480  * Check for NULL returned by BioseqLockById
1481  *
1482  * Revision 6.19  1997/11/07 21:38:40  madden
1483  * Check for virtual Bioseqs
1484  *
1485  * Revision 6.18  1997/10/30 15:40:55  madden
1486  * Casts and fixes for DEC alpha
1487  *
1488  * Revision 6.17  1997/10/24 19:09:14  madden
1489  * Removed BlastSetReadDB and BlastGetReadDB_ID, changed to ReadDBGetDb and ReadDBGetDbId
1490  *
1491  * Revision 6.16  1997/10/21 19:49:53  madden
1492  * Fix for no valid query sequence and hitlist_max of 1
1493  *
1494  * Revision 6.15  1997/10/06 17:57:49  madden
1495  * DB chunk size now done properly
1496  *
1497  * Revision 6.14  1997/09/29 17:19:30  madden
1498  * Checks for two threads using the same resource
1499  *
1500  * Revision 6.13  1997/09/25 13:44:56  madden
1501  * tblastn fix for mutliple db searches
1502  *
1503  * Revision 6.12  1997/09/24 22:36:29  madden
1504  * Fixes for MT multidb searches
1505  *
1506  * Revision 6.11  1997/09/22 18:24:25  madden
1507  * Added ifdef for OS_UNIX_LINUX
1508  *
1509  * Revision 6.10  1997/09/22 17:36:18  madden
1510  * MACROS for position-specific matrices from Andy Neuwald
1511  *
1512  * Revision 6.9  1997/09/16 18:47:44  madden
1513  * ifdef for OS_UNIX_SUN
1514  *
1515  * Revision 6.8  1997/09/16 16:31:22  madden
1516  * More changes for multiple db runs
1517  *
1518  * Revision 6.7  1997/09/15 22:07:19  madden
1519  * Replacing ifdef RLIMIT_CPU with ifdef OS_UNIX
1520  *
1521  * Revision 6.6  1997/09/12 19:56:53  madden
1522  * Fix for multi-threaded runs
1523  *
1524  * Revision 6.5  1997/09/11 18:49:20  madden
1525  * Changes to enable searches against multiple databases.
1526  *
1527  * Revision 6.4  1997/09/10 23:10:53  kans
1528  * added ifdef RLIMIT_CPU for signal and headers
1529  *
1530  * Revision 6.3  1997/09/10 21:27:52  madden
1531  * Changes to set CPU limits
1532  *
1533  * Revision 6.2  1997/09/03 19:06:02  madden
1534  * Bug fix for effective HSP longer than query
1535  *
1536  * Revision 6.1  1997/08/27 14:46:43  madden
1537  * Changes to enable multiple DB searches
1538  *
1539  * Revision 6.0  1997/08/25 18:52:19  madden
1540  * Revision changed to 6.0
1541  *
1542  * Revision 1.227  1997/08/19 18:19:16  madden
1543  * Cast arg of log to Nlm_FloatHi
1544  *
1545  * Revision 1.226  1997/08/12 20:50:28  madden
1546  * Fixed case where two HSPs start at same query offset
1547  *
1548  * Revision 1.225  1997/07/29 17:07:01  madden
1549  * Fix for possible collision of two star threads
1550  *
1551  * Revision 1.224  1997/07/25 15:39:27  madden
1552  * Set correct query ID for filtering
1553  *
1554  * Revision 1.223  1997/07/24 21:08:31  madden
1555  * Take frame into account in sorting of hits for linking
1556  *
1557  * Revision 1.222  1997/07/22 17:17:23  madden
1558  * Added index callback
1559  *
1560  * Revision 1.221  1997/07/17 20:27:51  madden
1561  * Set choice to indicat frame when masking seqLoc is saved
1562  *
1563  * Revision 1.220  1997/07/16 20:35:11  madden
1564  * Call to BlastConvertProteinSeqLoc
1565  *
1566  * Revision 1.219  1997/07/16 18:51:55  madden
1567  * call to BioseqSeg, added static function BlastMakeTempProteinBioseq
1568  *
1569  * Revision 1.218  1997/07/15 20:37:05  madden
1570  * Calls to SeqLocSeg and BioseqSeg
1571  *
1572  * Revision 1.217  1997/07/14 20:11:03  madden
1573  * Removed unused variables
1574  *
1575  * Revision 1.216  1997/07/14 15:30:46  madden
1576  * Changed call to BlastKarlinBlkGappedCalc
1577  *
1578  * Revision 1.215  1997/07/11 19:28:23  madden
1579  * Added function BLASTSetUpSearchByLocWithReadDb
1580  *
1581  * Revision 1.214  1997/07/01 17:50:52  madden
1582  * used gapped Karlin-Altschul parameters when needed in LinkHsp
1583  *
1584  * Revision 1.213  1997/06/27 22:18:31  madden
1585  * MT fix for more threads than db seqs.
1586  *
1587  * Revision 1.212  1997/06/24 13:51:20  madden
1588  * Fixed SeqLoc leak
1589  *
1590  * Revision 1.211  1997/05/27 20:19:17  madden
1591  * Use of SeqLocDust rather than BioseqDust
1592  *
1593  * Revision 1.210  1997/05/22 21:24:46  madden
1594  * Added support for final gapX dropoff value
1595  *
1596  * Revision 1.209  1997/05/20 17:49:55  madden
1597  * Added functions BLASTSetUpSearchByLoc and BLASTSetUpSearchInternalByLoc
1598  *
1599  * Revision 1.208  1997/05/07 20:59:13  madden
1600  * Call to SeqId2OrdinalId replaces call to readdb_gi2seq
1601  *
1602  * Revision 1.207  1997/05/07 13:45:08  madden
1603  * Set mutex lock for ambiguity reevaluation, added use_large_gaps flag
1604  *
1605  * Revision 1.206  1997/05/01  21:08:26  madden
1606  * use ordinal index to rank results when they are statist. equivalent
1607  *
1608  * Revision 1.205  1997/05/01  15:53:07  madden
1609  * Addition of extra KarlinBlk's for psi-blast
1610  *
1611  * Revision 1.204  1997/04/25  13:57:43  madden
1612  * Fixed floating point exception by checking for zero query length value.
1613  *
1614  * Revision 1.203  1997/04/23  21:56:07  madden
1615  * Changes in BlastGetGappedAlignmentTraceback for in-frame gapping tblastn.
1616  *
1617  * Revision 1.202  1997/04/22  14:00:14  madden
1618  * Removed unused variables.
1619  *
1620  * Revision 1.201  1997/04/22  13:04:19  madden
1621  * Changes for in-frame blastx gapping.
1622  *
1623  * Revision 1.200  1997/04/17  22:07:48  madden
1624  * Changes to allow in-frame gapped tblastn.
1625  *
1626  * Revision 1.199  1997/04/09  20:01:53  madden
1627  * Added global_seqid's to allow only certain sequences in a db to be searched.
1628  *
1629  * Revision 1.198  1997/04/07  18:17:09  madden
1630  * Changed length_adjustment calculation.
1631  *
1632  * Revision 1.197  1997/04/04  15:30:37  madden
1633  * Removed extra fprint statement.
1634  *
1635  * Revision 1.196  1997/04/03  19:48:13  madden
1636  * Changes to use effective database length instead of the length of each
1637  * sequence in statistical calculations.
1638  *
1639  * Revision 1.195  1997/03/27  22:30:51  madden
1640  * Used gapped Karlin-Altschul parameters to calculate trigger for gapping.
1641  *
1642  * Revision 1.194  1997/03/20  22:09:52  madden
1643  * Used SeqIdFindBest to find GI in query.
1644  *
1645  * Revision 1.193  1997/03/20  19:57:40  madden
1646  * Changes to support segmented Bioseq queries.
1647  *
1648  * Revision 1.192  1997/03/14  22:06:11  madden
1649  * fixed MT bug in BlastReevaluateWithAmbiguities.
1650  *
1651  * Revision 1.191  1997/03/08  16:52:16  madden
1652  * Check in Reevaluate function to see if sequence is worth checking,
1653  * Added discontinuous option to ParameterBlk.
1654  *
1655  * Revision 1.190  1997/03/07  21:58:36  madden
1656  * Added Boolean gapped argument to BLASTOptionNew.
1657  *
1658  * Revision 1.189  1997/03/07  21:11:22  madden
1659  * Added in check for blastn on gapped calculations.
1660  *
1661  * Revision 1.188  1997/03/05  14:29:46  madden
1662  * Moved BlastSaveCurrentHsp to blastutl.c.
1663  *
1664  * Revision 1.187  1997/03/04  21:34:59  madden
1665  * Added in HspArrayPurge.
1666  *
1667  * Revision 1.186  1997/03/04  20:08:19  madden
1668  * Moved gapped alignment code from blast.c to blastutl.c
1669  *
1670  * Revision 1.185  1997/03/03  22:39:45  madden
1671  * Moved code from blast.c to blastutl.c.
1672  *
1673  * Revision 1.184  1997/03/03  21:47:22  madden
1674  * Moved functions from blast.c to blastutl.c for 16-bit windows.
1675  *
1676  * Revision 1.183  1997/03/03  20:58:09  madden
1677  * Fixed call to BlastGetGappedAlignmentTraceback; purged hitlist
1678  * for very short database sequences.
1679  *
1680  * Revision 1.182  1997/03/01  18:25:33  madden
1681  * reverse flag added to BlastGetGappedAlignmentTraceback functions.
1682  *
1683  * Revision 1.181  1997/02/24  16:40:38  madden
1684  * Change to GapXEditBlockToSeqAlign to use first SeqIdPtr, duplicate.
1685  *
1686  * Revision 1.180  1997/02/24  15:09:38  madden
1687  * Fixed bug where NULL pointer was dereferenced.
1688  *
1689  * Revision 1.179  1997/02/24  13:10:27  madden
1690  * Added function BlastGappedScoreInternal.
1691  *
1692  * Revision 1.178  1997/02/23  16:44:47  madden
1693  * GapAlignBlk became GapAlignBlkPtr and GapAlignBlkNew called.
1694  *
1695  * Revision 1.177  1997/02/20  21:50:24  madden
1696  * Added frame and translation information to GapAlignBlk, assigned it.
1697  *
1698  * Revision 1.176  1997/02/20  18:38:34  madden
1699  * Allowed theoretical database length to be set.
1700  *
1701  * Revision 1.175  1997/02/19  22:29:32  madden
1702  * Changes to handle multiple contexts in BlastGetGappedScore.
1703  *
1704  * Revision 1.174  1997/02/19  14:17:03  madden
1705  * GappedScore routines now work on all contexts.
1706  *
1707  * Revision 1.173  1997/02/17  17:39:54  madden
1708  * Changes to RealBlastGetGappedAlignmentTraceback for gapped blastn.
1709  *
1710  * Revision 1.172  1997/02/13  21:04:15  madden
1711  * fixed UMR.
1712  *
1713  * Revision 1.171  1997/02/12  22:19:08  madden
1714  * Added functions BlastNewWordExtend, BlastNewWordExtend_prelim, and
1715  * BlastNewFindWords for use in position based blast.
1716  *
1717  * Revision 1.170  1997/02/11  19:29:34  madden
1718  * Addition of BlastGetGappedScoreWithReaddb, removed dependence of
1719  * BlastGetGappedScore on readdb.
1720  *
1721  * Revision 1.169  1997/02/10  20:27:01  madden
1722  * Changed some CharPtr's into Uint1Ptr's.
1723  *
1724  * Revision 1.168  1997/02/10  20:14:23  madden
1725  * replaced doubles by Nlm_FloatHi's.
1726  *
1727  * Revision 1.167  1997/02/10  20:02:58  madden
1728  * Changed BlastSearchBlkNew to allow a set of words to be passed in.
1729  *
1730  * Revision 1.166  1997/02/10  15:24:59  madden
1731  * Set posMatrix element in gap_align structure.
1732  *
1733  * Revision 1.165  1997/02/07  22:43:03  madden
1734  * Moved BLAST_WordFinderNew and Destruct from blast.c to blastutl.c, made
1735  * non-static.
1736  *
1737  * Revision 1.164  1997/02/07  22:32:40  madden
1738  * Moved BlastGetSubjectId to blastutl.c, changed calling convention of
1739  * BlastGetSubjectId.
1740  *
1741  * Revision 1.163  1997/02/06  15:36:14  madden
1742  * Resuse 1st threshold if necessary.
1743  *
1744  * Revision 1.162  1997/02/06  14:27:15  madden
1745  * Addition of BlastAllWord structure.
1746  *
1747  * Revision 1.161  1997/02/05  19:54:59  madden
1748  * Changes for blastn gapped alignments.
1749  *
1750  * Revision 1.160  1997/02/04  22:12:59  madden
1751  * Added function RealBlastGetGappedAlignmentTraceback.
1752  *
1753  * Revision 1.159  1997/02/04  20:11:42  madden
1754  * Moved functions to blastutl.c
1755  *
1756  * Revision 1.158  1997/02/04  16:22:32  madden
1757  * Changes to enable gapped alignments on the reverse strand.
1758  *
1759  * Revision 1.157  1997/02/03  19:24:01  madden
1760  * Added function CheckGappedAlignmentsForOverlap.
1761  *
1762  * Revision 1.156  1997/02/03  17:19:03  madden
1763  * Increased number of bits for second pass if context factor > 1.
1764  *
1765  * Revision 1.155  1997/02/03  13:02:12  madden
1766  * Corrected SeqAlign offsets for minus strands.
1767  *
1768  * Revision 1.154  1997/01/31  22:42:51  madden
1769  * changed default thresholds and added strands to construction of SeqAlign.s
1770  *
1771  * Revision 1.153  1997/01/31  22:13:02  madden
1772  * Adjusted bit score by logK.
1773  *
1774  * Revision 1.152  1997/01/31  14:45:27  madden
1775  * Added check for threshold value to ValidateOptions.
1776  *
1777  * Revision 1.151  1997/01/30  19:12:19  madden
1778  * Fixed memory leak.
1779  *
1780  * Revision 1.150  1997/01/28  22:38:56  madden
1781  * Added function BLASTOptionValidate.
1782  *
1783  * Revision 1.149  1997/01/28  21:50:05  madden
1784  * Adjustments to CopyResultHspToHSP.
1785  *
1786  * Revision 1.148  1997/01/24  16:51:44  madden
1787  * Fixed memory leak.
1788  *
1789  * Revision 1.147  1997/01/24  15:13:02  madden
1790  * Changes to accommodate gapped blastn.
1791  *
1792  * Revision 1.146  1997/01/22  17:45:08  madden
1793  * Added search to GetStartForGappedAlignment.
1794  *
1795  * Revision 1.145  1997/01/17  17:41:44  madden
1796  * Added flags for position based BLAST.
1797  *
1798  * Revision 1.144  1997/01/14  17:22:30  madden
1799  * Changes for MT, especially for small databases.
1800  *
1801  * Revision 1.143  1997/01/13  22:13:41  madden
1802  * set further_process to FALSE as needed.
1803  *
1804  * Revision 1.142  1997/01/13  20:06:36  madden
1805  * Added index_addition to strings before checking for ambiguties.
1806  *
1807  * Revision 1.141  1997/01/13  15:37:05  madden
1808  * Changed prototypes for star_callback and tick_callback.
1809  *
1810  * Revision 1.140  1997/01/11  18:58:29  madden
1811  * Removed defunct PerformBlastSearch... functions.
1812  *
1813  * Revision 1.139  1997/01/11  18:39:48  madden
1814  * Simplified ranged blast model.
1815  *
1816  * Revision 1.138  1997/01/11  18:22:10  madden
1817  * Changes to allow S2 to be set.
1818  *
1819  * Revision 1.137  1997/01/11  16:41:42  madden
1820  * Fix to tick_proc for MT runs.
1821  *
1822  * Revision 1.136  1997/01/09  17:44:35  madden
1823  * Added "bit_score" to BLASTResultHsp.
1824  *
1825  * Revision 1.135  1997/01/09  13:33:43  madden
1826  * Fixed NlmThreadCompare typo.
1827  *
1828  * Revision 1.134  1997/01/08  23:05:37  madden
1829  * Added call to TNlmThreadCompare.
1830  *
1831  * Revision 1.133  1997/01/07  20:40:29  madden
1832  * Added reverse Boolean to GetSeqAlignForResultHitList.
1833  *
1834  * Revision 1.132  1997/01/06  22:40:55  madden
1835  * Added function BlastGetSubjectId.
1836  *
1837  * Revision 1.131  1997/01/06  19:31:49  madden
1838  * Removed subject and query ID from GapAlignBlk.
1839  *
1840  * Revision 1.130  1997/01/06  17:22:59  madden
1841  * Used GapXEditScriptToSeqAlign to find SeqAlign.
1842  *
1843  * Revision 1.129  1997/01/04  20:41:11  madden
1844  * Shorter sequence is always the query in BlastTwoSequences.
1845  *
1846  * Revision 1.128  1997/01/03  20:29:32  madden
1847  * Corrected count of significant sequences.
1848  *
1849  * Revision 1.127  1997/01/03  19:03:35  madden
1850  * Fixed incorrect KarlinBlkPtr use.
1851  *
1852  * Revision 1.126  1997/01/03  17:26:50  madden
1853  * Fixed stats recordation.
1854  *
1855  * Revision 1.125  1996/12/30  21:45:28  madden
1856  * Added "strict" Boolean to CheckForRequiredRegion.
1857  *
1858  * Revision 1.124  1996/12/30  17:14:06  madden
1859  * Fixes for changes for "require a portion of the query sequence".
1860  *
1861  * Revision 1.123  1996/12/30  15:44:25  madden
1862  * Added capability to require a portion of the query sequence.
1863  *
1864  * Revision 1.122  1996/12/27  20:44:10  madden
1865  * Chnages to require that part of the query be included.
1866  *
1867  * Revision 1.121  1996/12/23  22:02:05  madden
1868  * Changes to allow two sequences to be compared.
1869  *
1870  * Revision 1.120  1996/12/23  15:57:21  madden
1871  * Removed extra call to BlastPreliminaryGappedScore.
1872  * y
1873  *
1874  * Revision 1.119  1996/12/23  14:04:44  madden
1875  * Added gap_trigger.
1876  *
1877  * Revision 1.118  1996/12/20  21:11:40  madden
1878  * Changes to allow multiple hits runs only.
1879  *
1880  * Revision 1.117  1996/12/20  15:31:05  madden
1881  * Removed defunct function.
1882  *
1883  * Revision 1.116  1996/12/20  14:22:48  madden
1884  * Added discontinuous Boolean to GetSeqAlignForResultHitList.
1885  *
1886  * Revision 1.115  1996/12/18  14:33:13  madden
1887  * Checked for high score when E-values are equivalent.
1888  *
1889  * Revision 1.114  1996/12/17  18:28:10  madden
1890  * Changed score used to gap HSP's.
1891  *
1892  * Revision 1.113  1996/12/17  17:28:27  madden
1893  * Removed sleep function for non-UNIX platforms.
1894  *
1895  * Revision 1.112  1996/12/17  17:27:03  madden
1896  * Count number of attempted gappings.
1897  *
1898  * Revision 1.111  1996/12/17  13:47:57  madden
1899  * Added star_proc.
1900  *
1901  * Revision 1.110  1996/12/16  19:24:38  madden
1902  * Correct to initial wordsize for blastn.
1903  *
1904  * Revision 1.109  1996/12/16  18:24:21  madden
1905  * Corrected shift in BlastNtFindWords.
1906  *
1907  * Revision 1.108  1996/12/16  15:29:12  madden
1908  * Changed gapalign.h to gapxdrop.h
1909  *
1910  * Revision 1.107  1996/12/16  14:35:48  madden
1911  * Replaced BLAST_GAPPED_OPTION ifdef with gapped_calculation Boolean.
1912  *
1913  * Revision 1.106  1996/12/13  22:00:23  madden
1914  * Corrected starting point for gapped extension with traceback.
1915  *
1916  * Revision 1.105  1996/12/13  18:13:56  madden
1917  * Added tick callback functions
1918  *
1919  * Revision 1.104  1996/12/13  15:09:31  madden
1920  * Changes to parameters used for gapped extensions.
1921  *
1922  * Revision 1.103  1996/12/12  16:44:35  madden
1923  * Removed unused variables.
1924  *
1925  * Revision 1.102  1996/12/12  16:34:58  madden
1926  * GapAlignBlk replaces arguments in PerformGappedAlignment etc.
1927  *
1928  * Revision 1.101  1996/12/12  14:04:03  madden
1929  * Fixes for check on whether HSP is already contained by gapped alignment.
1930  *
1931  * Revision 1.100  1996/12/10  19:20:15  madden
1932  * Changed minimal HSP score for gapped alignments.
1933  *
1934  * Revision 1.99  1996/12/10  17:30:59  madden
1935  * Changed statistics for gapped blastp
1936  *
1937  * Revision 1.98  1996/12/09  23:24:05  madden
1938  * Added parameters to control which sequences get a gapped alignment.
1939  *
1940  * Revision 1.97  1996/12/09  20:45:47  madden
1941  * Adjustments to calculation of gapped HSP's.
1942  *
1943  * Revision 1.96  1996/12/08  15:19:59  madden
1944  * Added functions to enable gapped alignments.
1945  *
1946  * Revision 1.95  1996/11/27  22:46:08  madden
1947  * Removed includes that are no longer used.
1948  *
1949  * Revision 1.94  1996/11/27  22:25:09  madden
1950  * Corrected collection of statistics for MT runs.
1951  *
1952  * Revision 1.93  1996/11/27  21:52:30  madden
1953  * Added function FilterWithSeg.
1954  *
1955  * Revision 1.92  1996/11/26  19:53:46  madden
1956  * Checked for return value on BlastScoreBlkMatFill.
1957  *
1958  * Revision 1.91  1996/11/25  20:13:47  madden
1959  * Changed how NlmMutexInit is called.
1960  *
1961  * Revision 1.90  1996/11/25  19:51:41  madden
1962  * Fix for tblastx stats.
1963  *
1964  * Revision 1.89  1996/11/25  18:58:24  madden
1965  * Adjustments for translated database.
1966  *
1967  * Revision 1.88  1996/11/22  19:04:58  madden
1968  * Removed ifdef for OLD_BIT_ORDER; changed default values.
1969  *
1970  * Revision 1.87  1996/11/22  15:28:03  madden
1971  * Fixed problem of last query residue examined on a diagonal.
1972  *
1973  * Revision 1.86  1996/11/21  18:08:38  madden
1974  * Changed order of if-else statements in get_db_chunk for
1975  * possible improvement of parallelization.
1976  *
1977  * Revision 1.85  1996/11/20  23:15:50  madden
1978  * Changes to acquisition of Mutex in BlastSaveCurrentHitlist to
1979  * improve parallelization.
1980  *
1981  * Revision 1.84  1996/11/19  22:23:52  madden
1982  * Changed link_hsps to link HSP's faster.
1983  *
1984  * Revision 1.83  1996/11/18  19:32:09  madden
1985  * Removed unused variables found by CodeWarrior.
1986  *
1987  * Revision 1.82  1996/11/18  18:07:57  madden
1988  * Duplicated translation_buffer (for tblast[nx]).
1989  *
1990  * Revision 1.81  1996/11/18  17:28:13  madden
1991  * Duplicated translation information in BlastSearchBlkDuplicate and
1992  * also number of contexts.
1993  *
1994  * Revision 1.80  1996/11/18  15:45:40  madden
1995  * FilterDNA function to perform dusting added (by Sergei Shavirin).
1996  *
1997  * Revision 1.79  1996/11/15  17:54:54  madden
1998  * Added support for alternate genetic codes for blastx, tblast[nx].
1999  *
2000  * Revision 1.78  1996/11/14  16:37:58  madden
2001  * Put average lengths in defines.
2002  *
2003  * Revision 1.77  1996/11/14  16:21:55  madden
2004  * changed CharPtr to Uint1Ptr in GetTranslation.
2005  *
2006  * Revision 1.76  1996/11/13  22:35:18  madden
2007  * Added tblast[nx] capability to BlastReevaluateWithAmbiguities.
2008  *
2009  * Revision 1.75  1996/11/12  19:56:35  madden
2010  * Small gaps not considered for blastn.
2011  *
2012  * Revision 1.74  1996/11/12  16:21:17  madden
2013  * Added in context_factor.
2014  *
2015  * Revision 1.73  1996/11/12  13:46:15  madden
2016  * Removed defunct SetUpBlastSearch type functions.
2017  *
2018  * Revision 1.72  1996/11/11  17:44:21  madden
2019  * Fixed check for overlap in search.
2020  *
2021  * Revision 1.71  1996/11/09  21:02:59  madden
2022  * Fixes for blastn extensions.
2023  *
2024  * Revision 1.70  1996/11/08  21:45:03  madden
2025  * Fix for blastn extensions.
2026  *
2027  * Revision 1.69  1996/11/07  22:31:15  madden
2028  * Added function BlastReevaluateWithAmbiguities for nucl. db's.
2029  *
2030  * Revision 1.68  1996/11/07  17:31:26  madden
2031  * Fixed over-incrementing of index in link_hsps.
2032  *
2033  * Revision 1.67  1996/11/06  22:10:01  madden
2034  * Further optimization of BlastTranslateUnambiguousSequence.
2035  *
2036  * Revision 1.66  1996/11/05  23:19:08  madden
2037  * Rewrote BlastTranslateUnambiguousSequence so it's faster.
2038  *
2039  * Revision 1.65  1996/11/04  19:27:13  madden
2040  * Deallocated search->translation_buffer if allocated.
2041  *
2042  * Revision 1.64  1996/11/04  16:59:43  madden
2043  * Added function GetPrivatTranslationTable to optimize translation
2044  * of database.
2045  *
2046  * Revision 1.63  1996/11/01  21:06:49  madden
2047  * Corrected the (nucl.) database for the translated length for tblast[nx].
2048  *
2049  * Revision 1.62  1996/10/31  16:27:20  shavirin
2050  * Multiple changes due to reverce of residues in BLAST database
2051  * for nucleotide sequences from (4321) to (1234)
2052  * New dumper now required to create BLAST databases.
2053  *
2054  * Revision 1.61  1996/10/28  22:15:24  madden
2055  * Added check in BlastNtWordFinder that subject sequence is longet
2056  * than min. word size.
2057  *
2058  * Revision 1.60  1996/10/04  20:12:26  madden
2059  * Fixed memory leaks found by purify.
2060  *
2061  * Revision 1.59  1996/10/03  20:49:29  madden
2062  * Calculate standard Karlin parameters for blastx and tblastx,
2063  * Use proper Karlin parameters in linking of HSP's.
2064  *
2065  * Revision 1.58  1996/10/02  19:59:44  madden
2066  * Fixed translation of query in blastx, calculated different karlin parameters
2067  * for each frame.
2068  *
2069  * Revision 1.57  1996/10/01  21:24:02  madden
2070  * e2 value now depends on program, correct cutoffs for blastn.
2071  *
2072  * Revision 1.56  1996/10/01  18:49:06  madden
2073  * Properly placed counters for number of hits, extensions.
2074  *
2075  * Revision 1.55  1996/09/30  21:56:12  madden
2076  * Replaced query alphabet of ncbi2na with blastna alphabet.
2077  *
2078  * Revision 1.54  1996/09/26  21:48:29  madden
2079  * Set small/large gaps in SeqALign.
2080  *
2081  * Revision 1.53  1996/09/26  20:18:43  madden
2082  * Addition of ExperimentalLocalBlastSearch function, fixes to SeqIdPtr's.
2083  *
2084  * Revision 1.52  1996/09/25  19:05:24  madden
2085  * Fixes to nucl. extension functions.
2086  *
2087  * Revision 1.51  1996/09/25  14:31:06  madden
2088  * Removed functions and statements for discontiguous word hits.
2089  *
2090  * Revision 1.50  1996/09/24  22:13:06  madden
2091  * BlastNtWordExtend now extends properly to end of query or subject.
2092  *
2093  * Revision 1.49  1996/09/24  18:39:51  madden
2094  * Changes to extend into the remainder of nucl. sequences (for blastn) and
2095  * to perform minus strand extensions.
2096  *
2097  * Revision 1.48  1996/09/20  21:58:14  madden
2098  * Changed CharPtr's to Uint1Ptr, got remainder length out of top order bits.
2099  *
2100  * Revision 1.47  1996/09/19  13:46:29  madden
2101  * Removed unused variables.
2102  *
2103  * Revision 1.46  1996/09/19  13:16:20  madden
2104  * Adjusted subject offset by READDB_COMPRESSION_RATIO for calc. of diagonal.
2105  *
2106  * Revision 1.45  1996/09/18  21:25:30  madden
2107  * Fixed bug in WordFinder for nucleotides.
2108  *
2109  * Revision 1.44  1996/09/18  13:39:24  madden
2110  * fixed offsets for SeqAligns on minus strands.
2111  *
2112  * Revision 1.43  1996/09/17  12:27:04  madden
2113  * Changes to perform correct extensions in blastn.
2114  *
2115  * Revision 1.42  1996/09/16  19:41:14  sad
2116  * Changed BlastTimeFillStructure() to use new functions from ncbitime.
2117  * That removes platform-dependent code from this function.
2118  *
2119  * Revision 1.41  1996/09/13 20:01:52  madden
2120  * put in READDB_UNPACK macros.
2121  *
2122  * Revision 1.40  1996/09/12  21:11:55  madden
2123  * Added extension funcitons for blastn
2124  *
2125  * Revision 1.39  1996/09/11  22:21:06  madden
2126  * Changes for blastn.
2127  *
2128  * Revision 1.38  1996/09/11  20:36:41  shavirin
2129  * Removed few Windows NT compiler warnings
2130  *
2131  * Revision 1.35  1996/09/11  19:14:09  madden
2132  * Added BLAST_OptionsBlkPtr structure and use thereof.
2133  *
2134  * Revision 1.34  1996/09/10  19:40:35  madden
2135  * Added functions to perform blastn comparison.
2136  *
2137  * Revision 1.33  1996/09/05  19:39:52  madden
2138  * Added "word_width" to position already covered on diagonal.
2139  *
2140  * Revision 1.32  1996/09/05  19:26:16  madden
2141  * Combined masking and shifting, removed some checks if prelim.
2142  *
2143  * Revision 1.31  1996/09/05  14:12:19  madden
2144  * New (faster) type of extension.
2145  *
2146  * Revision 1.30  1996/09/03  16:27:21  madden
2147  * Added efficiency in scanning of database.
2148  *
2149  * Revision 1.29  1996/08/30  19:27:37  madden
2150  * Fix for one-pass blast, memory-mapped file was being freed.
2151  *
2152  * Revision 1.28  1996/08/30  18:23:50  madden
2153  * A few efficiencies and a correction for one-pass blast.
2154  *
2155  * Revision 1.27  1996/08/30  15:17:57  madden
2156  * Minor efficiency in BlastReapHitlistByEvalue.
2157  *
2158  * Revision 1.25  1996/08/28  20:07:36  madden
2159  * Fix for UMR when the (nucl) sequence is exactly div. by four.
2160  *
2161  * Revision 1.24  1996/08/28  17:11:07  madden
2162  * Fixes for the translation of (nucl.) database sequences.
2163  *
2164  * Revision 1.23  1996/08/27  21:51:44  madden
2165  * Changes for tblastx
2166  *
2167  * Revision 1.22  1996/08/27  17:47:37  madden
2168  * current_hitlist purged on second pass for tblastn.
2169  *
2170  * Revision 1.21  1996/08/26  17:20:20  shavirin
2171  * Added support for WIN32 in function BlastTimeFillStructure()
2172  *
2173  * Revision 1.20  1996/08/23  18:50:23  madden
2174  * Adjusted some of the NT warning fixes to give correct results.
2175  *
2176  * Revision 1.19  1996/08/23  16:52:07  madden
2177  * Changed Int1 to Int4 in SetUpBlastSearchInternal.
2178  *
2179  * Revision 1.18  1996/08/23  16:39:02  madden
2180  * Fixed problem with SaveCurrentHsp.
2181  *
2182  * Revision 1.17  1996/08/23  15:29:44  shavirin
2183  * Fixed a lot of NT compiler warnings about type mismatch
2184  *
2185  * Revision 1.16  1996/08/21  21:37:01  madden
2186  * Added casts to silence compiler warning.s
2187  *
2188  * Revision 1.15  1996/08/21  21:24:56  madden
2189  * Changes for tblastn.
2190  *
2191  * Revision 1.14  1996/08/21  12:55:54  madden
2192  * Changed "purge" frame.
2193  *
2194  * Revision 1.13  1996/08/15  17:07:57  madden
2195  * Added efficiencies in loop that scans database.
2196  *
2197  * Revision 1.12  1996/08/14  20:01:30  madden
2198  * Efficiencies suggested by Zheng Zhang.
2199  *
2200  * Revision 1.11  1996/08/14  18:15:31  madden
2201  * Query frame moved from context to BlastSeqBlk.
2202  *
2203  * Revision 1.10  1996/08/14  17:19:29  madden
2204  * Correctly set frame for subject.
2205  *
2206  * Revision 1.9  1996/08/14  15:20:37  madden
2207  * Added Blast prefix to TranslateUnambiguousSequence function name.
2208  *
2209  * Revision 1.8  1996/08/14  14:30:42  madden
2210  * Cleaned up problem with UMR in TranslateUnambiguousSequence.
2211  *
2212  * Revision 1.7  1996/08/13  22:04:36  madden
2213  * Fixed TranslateUnambiguousSequence to properly read a nucl. db.
2214  *
2215  * Revision 1.6  1996/08/13  15:26:29  madden
2216  * Changes for tblastn.
2217  *
2218  * Revision 1.5  1996/08/09  22:11:12  madden
2219  * Added original_sequence to BlastSequenceAddSequence.
2220  *
2221  * Revision 1.4  1996/08/08  21:39:00  madden
2222  * Added some functions for tblastn.
2223  *
2224  * Revision 1.3  1996/08/07  14:23:45  madden
2225  * Added functions to produce SeqAlign from BLAST results.
2226  *
2227  * Revision 1.2  1996/08/06  16:07:31  madden
2228  * Removed unused functions Bsp2BLAST0Request.
2229  *
2230  * Revision 1.1  1996/08/05  19:45:46  madden
2231  * Initial revision
2232  *
2233  * Revision 1.118  1996/08/05  13:56:44  madden
2234  * Check if threads are available with NlmThreadsAvailable.
2235  *
2236  * Revision 1.117  1996/08/02  14:20:06  madden
2237  * Changes in call to readdb.
2238  *
2239  * Revision 1.116  1996/07/31  13:46:23  madden
2240  * Each thread gets own copy of ewp_params in SearchBlk.
2241  *
2242  * Revision 1.115  1996/07/31  13:09:17  madden
2243  * Changes for threaded blast.
2244  *
2245  * Revision 1.114  1996/07/25  20:45:20  madden
2246  * Change to calling convention of PerformBlastSearchWithReadDb.
2247  *
2248  * Revision 1.113  1996/07/25  12:55:20  madden
2249  * readdb_get_sequence call changed to allow for systems w/o mmap.
2250  *
2251  * Revision 1.112  1996/07/24  13:16:28  madden
2252  * Removed commented out fprintf.
2253  *
2254  * Revision 1.111  1996/07/24  12:00:07  madden
2255  * Changes for blastx.
2256  *
2257  * Revision 1.110  1996/07/18  22:00:02  madden
2258  * Changes for multiple contexts.
2259  *
2260  * Revision 1.109  1996/07/18  13:35:51  madden
2261  * Addition of the BLASTContextStructPtr.
2262  *
2263  * Revision 1.108  1996/07/16  15:01:02  madden
2264  * Cleaned up link_hsp function.
2265  *
2266  * Revision 1.107  1996/07/16  14:37:42  madden
2267  * Changes to link_hsp's so another array is not needed for the HSP's.
2268  *
2269  * Revision 1.106  1996/07/11  16:03:58  madden
2270  * SaveCurrentHitlist keeps track of which set an HSP belongs to.
2271  *
2272  * Revision 1.105  1996/07/05  17:16:34  madden
2273  * Optimized loop in contiguous word finder.
2274  *
2275  * Revision 1.104  1996/07/03  14:26:05  madden
2276  * Added test extension function.
2277  *
2278  * Revision 1.103  1996/07/02  14:32:53  madden
2279  * Added hspcnt_max.
2280  *
2281  * Revision 1.102  1996/07/02  12:04:15  madden
2282  * HSP's saved on array, rather than linked list.
2283  *
2284  * Revision 1.101  1996/07/01  15:30:06  madden
2285  * Don't NULL out hit if extension to left does not succeed.
2286  *
2287  * Revision 1.100  1996/06/27  18:41:39  madden
2288  * Changes to cutoff score to start second pass.
2289  *
2290  * Revision 1.99  1996/06/26  19:38:12  madden
2291  * Don't continue extension on 1st pass if the first (left) extension
2292  * doesn't reach to the first hit.
2293  *
2294  * Revision 1.98  1996/06/26  15:53:54  madden
2295  * Second dropoff score parameter added.
2296  *
2297  * Revision 1.97  1996/06/26  14:30:25  madden
2298  * Removed unused variables.
2299  *
2300  * Revision 1.96  1996/06/26  14:09:16  madden
2301  * Added comments and indents to loops.
2302  *
2303  * Revision 1.95  1996/06/26  13:29:50  madden
2304  * Changes to reduce the amount of memory and time of BlastFindWords.
2305  *
2306  * Revision 1.94  1996/06/24  20:26:46  madden
2307  * Dropoff ("X") set to first or second dropoff parameter.
2308  *
2309  * Revision 1.93  1996/06/24  17:57:09  madden
2310  * Added wordFinders to test dropoff scores.
2311  *
2312  * Revision 1.92  1996/06/20  16:51:17  madden
2313  * Removed unused parameters.
2314  *
2315  * Revision 1.91  1996/06/20  16:15:57  madden
2316  * Replaced int's with Int4's.
2317  *
2318  * Revision 1.90  1996/06/19  14:18:33  madden
2319  * Addition of SetUpBlastSearchInternal function.
2320  *
2321  * Revision 1.89  1996/06/17  19:02:13  madden
2322  * Removed unused MP code.
2323  *
2324  * Revision 1.88  1996/06/17  18:23:31  madden
2325  * Removed unused functions.
2326  *
2327  * Revision 1.87  1996/06/14  17:58:13  madden
2328  * Changes to avoid nulling out arrays for every sequence.
2329  *
2330  * Revision 1.86  1996/06/13  21:16:33  madden
2331  * database length removed from BLAST_ExtendWordNew.
2332  *
2333  * Revision 1.85  1996/06/13  21:04:17  madden
2334  * Added efficiencies to word finders.
2335  *
2336  * Revision 1.84  1996/06/11  18:13:54  madden
2337  * Removed unused variables.
2338  *
2339  * Revision 1.83  1996/06/11  17:58:31  madden
2340  * Changes to allow shorter arrays for multiple hits type blast.
2341  *
2342  * Revision 1.82  1996/06/10  16:52:16  madden
2343  * Use bit-shifting and masking instead of dividing and remainder.
2344  *
2345  * Revision 1.81  1996/06/10  13:44:07  madden
2346  * Changes to reduce the size of the "already visited" array.
2347  *
2348  * Revision 1.80  1996/06/06  17:54:09  madden
2349  * number_of_bits added to SetUpBlastSearch and SetUpBlastSearchWithReadDb.
2350  *
2351  * Revision 1.79  1996/06/06  14:09:22  madden
2352  * Removed defunct function BlastNWSThreshold, blast_set_parameters became
2353  * static.
2354  *
2355  * Revision 1.78  1996/06/06  13:54:51  madden
2356  * Removed defunct function BLAST_ParameterBlkFill
2357  *
2358  * Revision 1.77  1996/06/06  13:23:17  madden
2359  * CalculateSecondCutoffs only called for second pass.
2360  *
2361  * Revision 1.76  1996/06/04  15:32:53  madden
2362  * Changed counting of first and second pass hits.
2363  *
2364  * Revision 1.75  1996/06/04  13:50:28  madden
2365  * Purge HitList, rather than deleting it.
2366  *
2367  * Revision 1.74  1996/05/29  17:21:07  madden
2368  * Removed defunct BlastFixEandPValues function, replaced one call
2369  * to BlastSequenceAddSequence.
2370  *
2371  * Revision 1.73  1996/05/29  12:43:25  madden
2372  * Function BlastTimeFillStructure added to keep track of time.
2373  *
2374  * Revision 1.72  1996/05/28  14:12:53  madden
2375  * Added code to collect statistics.
2376  *
2377  * Revision 1.71  1996/05/23  21:55:04  madden
2378  * Removed unused variable initlen
2379  *
2380  * Revision 1.70  1996/05/22  20:19:22  madden
2381  * Removed unused variables, fixed codecenter nits.
2382  *
2383  * Revision 1.68  1996/05/20  21:17:49  madden
2384  * Changed (incorrect) NULL's to zero's.
2385  *
2386  * Revision 1.67  1996/05/16  19:50:15  madden
2387  * Added documentation block.
2388  *
2389  * Revision 1.66  1996/05/16  13:28:24  madden
2390  * Both 1st and 2nd pass can separately be contiguous or discontiguous.
2391  *
2392  * Revision 1.64  1996/05/14  19:51:37  madden
2393  * Added some register variables.
2394  *
2395  * Revision 1.63  1996/05/14  18:56:53  madden
2396  * Unrolled some loops in extension function.
2397  *
2398  * Revision 1.62  1996/05/14  16:15:59  madden
2399  * Fixes to SaveCurrentHitlist
2400  *
2401  * Revision 1.61  1996/05/10  18:19:20  madden
2402  * Made lookup_pos a register variable.
2403  *
2404  * Revision 1.59  1996/05/09  13:14:56  madden
2405  * Consolidated CalculateEffectiveLengths and BlastReapHSPsByEvalue into other
2406  * functions.
2407  *
2408  * Revision 1.58  1996/05/03  19:54:24  madden
2409  * Removed defunct seqalign functions, optimized BlastWordFinder functions.
2410  *
2411  * Revision 1.57  1996/05/01  14:57:37  madden
2412  * Added BlastResults structures.
2413  *
2414  * Revision 1.56  1996/04/24  19:46:34  madden
2415  * Removed q_rightmost and q_leftmost from the extend function.
2416  *
2417  * Revision 1.55  1996/04/24  18:01:11  madden
2418  * Used call to readdb_get_max_length for first call to BLAST_ExtendWordNew.
2419  *
2420  * Revision 1.54  1996/04/24  16:16:58  madden
2421  * Changed LinkHsp's not to reallocate the hsp array every time.
2422  *
2423  * Revision 1.53  1996/04/24  12:51:15  madden
2424  * deleted function BlastSequenceAddSequenceIdToSequenceBlk.
2425  *
2426  * Revision 1.52  1996/04/22  21:39:31  madden
2427  * New calls to readdb_get_sequence.
2428  *
2429  * Revision 1.51  1996/04/18  13:39:33  madden
2430  * demodularized lookup of initial hits.
2431  *
2432  * Revision 1.50  1996/04/16  15:32:47  madden
2433  * economies added to new extension functions, non-scoring identical
2434  * words not added to lookup tables.
2435  *
2436  * Revision 1.48  1996/04/11  14:29:33  madden
2437  * function BlastWordExtend completely rewritten.
2438  *
2439  * Revision 1.47  1996/04/04  20:46:22  madden
2440  * Optimized extension function; made "lookup_find" a FnPtr.
2441  *
2442  * Revision 1.46  1996/04/03  19:13:04  madden
2443  * added functions PerformBlastSearchWithReadDb and Perform2PassBlastSearchWithReadDb.
2444  *
2445  * Revision 1.45  1996/03/29  21:26:01  madden
2446  * "hitlist" now kept on SeqAlign rather than HitList.
2447  *
2448  * Revision 1.44  1996/03/29  14:08:18  madden
2449  * SetUpBlastSearchWithReadDb added.
2450  *
2451  * Revision 1.43  1996/03/28  18:45:45  madden
2452  * sequence now added to hitlist after significance has been established.
2453  *
2454  * Revision 1.42  1996/03/27  23:51:11  madden
2455  * added function AddDescriptorsToHitlistWithReadDb.
2456  *
2457  * Revision 1.41  1996/03/27  23:19:24  madden
2458  * Added PerformBlastSearchWithReadDb and Perform2PassBlastSearchWithReadDb,
2459  * changed parameters for PerformBlastSearch and Perform2PassBlastSearch.
2460  *
2461  * Revision 1.40  1996/03/27  19:51:09  madden
2462  * current hits now saved on "current_hitlist", not saved to main
2463  * hitlist until significance decided upon.
2464  *
2465  * Revision 1.39  1996/03/26  19:36:15  madden
2466  * Changes to read databases formatted with formatdb.
2467  *
2468  * Revision 1.38  1996/03/25  16:34:19  madden
2469  * Changes to mimic old statistics.
2470  *
2471  * Revision 1.37  1996/03/20  14:28:57  madden
2472  * Changed cutoff values.
2473  *
2474  * Revision 1.36  1996/03/11  13:52:52  madden
2475  * Ignore gaps when the sequences are too short.
2476  *
2477  * Revision 1.35  1996/02/28  21:36:54  madden
2478  * changes for discontiguous words.
2479  *
2480  * Revision 1.34  1996/02/15  23:31:19  madden
2481  * Trimmed ends of HSP's in comparison with gap.
2482  *
2483  * Revision 1.33  1996/02/15  23:19:43  madden
2484  * Changed call to BlastScoreBlkFill
2485  *
2486  * Revision 1.32  1996/02/15  15:22:52  madden
2487  * Trimming of sequence ends for linking.
2488  *
2489  * Revision 1.31  1996/02/13  14:05:57  madden
2490  * changes to ensure that closer to optimal HSP's are found.
2491  *
2492  * Revision 1.30  1996/02/09  13:50:09  madden
2493  * Added BlastReapHSPsByEvalue; changes to allow both one and two pass runs.
2494  *
2495  * Revision 1.29  1996/02/06  22:50:56  madden
2496  * Changes for two-pass runs.
2497  *
2498  * Revision 1.28  1996/02/05  18:46:09  madden
2499  * Added support for two threshold values.
2500  *
2501  * Revision 1.27  1996/02/02  19:24:53  madden
2502  * Added wfp_first and wfp_second for first and second pass.
2503  *
2504  * Revision 1.26  1996/01/31  17:33:54  madden
2505  * Added function BlastReapHitlistByEvalue.
2506  *
2507  * Revision 1.25  1996/01/29  21:11:38  madden
2508  * Changes for MultipleHits BLAST.
2509  *
2510  * Revision 1.24  1996/01/23  16:30:52  madden
2511  * e_cutoff changed from BLAST_Score to double in SetUpBlastSearch.
2512  *
2513  * Revision 1.23  1996/01/22  22:31:01  madden
2514  * Fixed BlastFindWords to increment index1 correctly.
2515  *
2516  * Revision 1.22  1996/01/22  22:05:05  madden
2517  * Set initial e2 to 0.5.
2518  *
2519  * Revision 1.20  1996/01/17  16:59:56  madden
2520  * Added gap arguments to SetUpBlastSearch.
2521  *
2522  * Revision 1.19  1996/01/17  13:45:03  madden
2523  * Added function BlastFixEandPValues.
2524  *
2525  * Revision 1.18  1996/01/16  15:28:05  madden
2526  * Set i_am_multitasking flag.
2527  *
2528  * Revision 1.16  1996/01/10  17:50:21  madden
2529  * sort hitlist by pvalue.
2530  *
2531  * Revision 1.15  1996/01/08  23:23:22  madden
2532  * Fixed neighborhood bug, added some MP stuff
2533  *
2534  * Revision 1.14  1996/01/06  18:56:52  madden
2535  * Removed obsolete code, fixed purify nit.
2536  *
2537  * Revision 1.13  1996/01/06  17:50:20  madden
2538  * Fixed HeapSort functions for linking of HSP's.
2539  *
2540  * Revision 1.12  1996/01/06  17:18:42  madden
2541  * Fixed setting of "next" pointers when the HSp is part of a linked set.
2542  *
2543  * Revision 1.11  1996/01/06  16:29:38  madden
2544  * NULL'ed out some "link" pointers.
2545  *
2546  * Revision 1.10  1996/01/05  22:54:18  madden
2547  * Fixed HeapSort calls in linking routines.
2548  *
2549  * Revision 1.9  1996/01/05  15:51:14  madden
2550  * Added Stephen Altschul's link_hsps.
2551  *
2552  * Revision 1.8  1995/12/30  19:21:01  madden
2553  * Added PerformBlastSearch.
2554  *
2555  * Revision 1.7  1995/12/30  18:38:51  madden
2556  * Added function SetUpBlastSearch.
2557  *
2558  * Revision 1.6  1995/12/28  21:22:19  madden
2559  * Deallocated leaking memory.
2560  *
2561  * Revision 1.5  1995/12/26  23:03:22  madden
2562  * Added in functions to automatically set some parameters.
2563  *
2564  * Revision 1.4  1995/12/26  20:27:11  madden
2565  * simplified hit extension routine.
2566  *
2567  * Revision 1.3  1995/12/21  23:09:57  madden
2568  * BLAST_Score functions moved to blastkar.c
2569  *
2570  * */
2571 
2572 #include <ncbi.h>
2573 #include <blastpri.h>
2574 #include <lookup.h>
2575 #include <objcode.h>
2576 #include <objseq.h>
2577 #include <sequtil.h>
2578 #include <tofasta.h>
2579 #include <seqport.h>
2580 #include <readdb.h>
2581 #include <ncbithr.h>
2582 #include <gapxdrop.h>
2583 #include <dust.h>
2584 
2585 #include <mbalign.h>
2586 #include <mblast.h>
2587 
2588 /* 
2589 The last database sequence a tick (progress indicator) was issued for
2590 and the increments (i.e., number of db sequences completed) that a tick 
2591 should be emitted. 
2592 */
2593 /* Int4 last_db_seq=0, db_incr=0; */
2594 
2595 /*
2596         Set to TRUE if the process has timed out.
2597 */
2598 volatile Boolean time_out_boolean;
2599 
2600 /*
2601         SeqId lists if only a certain number of the database sequences will be
2602         used for the search.
2603 */
2604 /* SeqIdPtr global_seqid_list=NULL, global_seqid_ptr; */
2605 
2606 /*
2607         GI List to be used if database will be searched by GI.
2608         current is the current element in the array being worked on.
2609         global_gi_being_used specifies that it will be used.
2610 */
2611 
2612 /* Int4 global_gi_current=0;
2613    Boolean global_gi_being_used=FALSE; */
2614 
2615 /* Function to emit progress messages, set by user. */
2616 /* int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives)); */
2617 
2618 /* int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives));
2619    int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives)); */
2620 
2621 /* tells star_proc to check that a star should be emitted. */
2622 /* TNlmThread awake_thr=NULL;
2623    Boolean awake; */
2624 
2625 /* tells index_proc to check that a message should be emitted. */
2626 /* TNlmThread index_thr=NULL;
2627    Boolean awake_index; */
2628 
2629 /* period of sending out a star/message. */
2630 /* #define PERIOD 60 */
2631 
2632 /* Use by star_proc to determine whether to emit a star. */
2633 /* time_t last_tick=0; */
2634 
2635 /* How many positive hits were found (set by ReapHitlist, read by tick_proc
2636 and star_proc). */
2637 /* Int4 number_of_pos_hits=0; */
2638 
2639 /* Mutex for assignment of db seqs to search. */
2640 /* TNlmMutex db_mutex=NULL; */
2641 
2642 /* Mutex for insertion of results into list. */
2643 /* TNlmMutex results_mutex = NULL; */
2644 /* Mutex for the callbacks (star_proc, tick_proc, index_proc). */
2645 /* TNlmMutex callback_mutex=NULL; */
2646 
2647 /* The last db sequence to be assigned.  Used only in get_db_chunk after
2648 the acquisition of the "db_mutex" (above). */
2649 /* Int4 db_chunk_last=0; */
2650 
2651 /* the last sequence in the database to be compared against. */
2652 /* Int4 final_db_seq; */
2653 
2654 /* Default size of the chunks be that are assigned in the function get_db_chunk. */
2655 /* Actually db_chunk_size is used, which is smaller if the db is smaller. */
2656 
2657 static Int4 BlastExtendWordSearch PROTO((BlastSearchBlkPtr search, Boolean multiple_hits));
2658 
2659 static Int2 BlastWordExtend PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2660 
2661 /*AAS*/
2662 static Int2 BlastNewWordExtend PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2663 
2664 static Int2 BlastWordExtend_prelim PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2665 
2666 /*AAS*/
2667 static Int2 BlastNewWordExtend_prelim PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2668 
2669 
2670 static Int4 BlastWordFinder PROTO((BlastSearchBlkPtr search));
2671 static Int4 BlastWordFinder_mh PROTO((BlastSearchBlkPtr search));
2672 static Int4 BlastWordFinder_contig PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2673 static Int4 BlastWordFinder_mh_contig PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2674 
2675 static BLAST_HSPPtr link_hsps PROTO((BlastSearchBlkPtr search, BLAST_HitListPtr hitlist, BLAST_HSPPtr PNTR hsp_array));
2676 
2677 static Int4 BlastNtWordFinder PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2678 static Int4 BlastNtWordFinder_mh PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2679 
2680 /* DEBUGGING stuff */
2681 #ifdef BLAST_TIMER
2682 clock_t last_clock = 0;
2683 #endif
2684 /* end DEBUGGING stuff */
2685 
2686 
2687 /*
2688         The function that decides whether or not a tick should be
2689         emitted.  This is performed through the callback function
2690         ("tick_callback") that is set in "do_the_blast_run".  This
2691         function is called from "do_blast_search" for single processing
2692         machines and "get_db_chunk" for MT machines, after the db_mutex
2693         has been obtained in "get_db_chunk".
2694 */
2695 
2696 void BlastTickProc(Int4 sequence_number, BlastThrInfoPtr thr_info)
2697 
2698 {
2699     if(thr_info->tick_callback && 
2700        (sequence_number > (thr_info->last_db_seq + thr_info->db_incr))) {
2701         NlmMutexLockEx(&thr_info->callback_mutex);
2702         thr_info->last_db_seq += thr_info->db_incr;
2703         thr_info->tick_callback(sequence_number, thr_info->number_of_pos_hits);
2704         thr_info->last_tick = Nlm_GetSecs();
2705         NlmMutexUnlock(thr_info->callback_mutex);
2706     }
2707     return;
2708 }
2709 
2710 /*
2711         Sends out a message every PERIOD (i.e., 60 secs.) for the index.
2712 
2713         THis function runs as a separate thread and only runs on a threaded
2714         platform.
2715 */
2716 VoidPtr
2717 index_proc(VoidPtr dummy)
2718 
2719 {
2720 
2721     /* Sleep only works on UNIX.  An ifdef is used until
2722        a portable solution can be found. */
2723 #ifdef OS_UNIX
2724     
2725     Int2 index;
2726     BlastThrInfoPtr thr_info = (BlastThrInfoPtr) dummy;
2727     
2728     while (thr_info->awake_index) {
2729         for (index=0; index < STAR_MSG_PERIOD; index++) {
2730             sleep(1);
2731             if (thr_info->awake_index == FALSE)
2732                 break;
2733         }
2734         
2735         if (thr_info->awake_index && thr_info->index_callback) {
2736             NlmMutexLockEx(&thr_info->callback_mutex);
2737             thr_info->last_tick = Nlm_GetSecs();
2738             thr_info->index_callback(0, 0);
2739             NlmMutexUnlock(thr_info->callback_mutex);
2740         }
2741     }
2742 #endif
2743     return dummy;
2744 }
2745 
2746 /*
2747         Sends out a message every PERIOD (i.e., 60 secs.) and sends out a
2748         "star" if a tick has not been sent out in the last PERIOD. 
2749 
2750         THis function runs as a separate thread and only runs on a threaded
2751         platform.
2752 */
2753 static VoidPtr
2754 star_proc(VoidPtr dummy)
2755 
2756 {
2757     /* Sleep only works on UNIX.  An ifdef is used until
2758        a portable solution can be found. */
2759 #ifdef OS_UNIX
2760     
2761     time_t now;
2762     Int2 index;
2763     BlastThrInfoPtr thr_info = (BlastThrInfoPtr) dummy;
2764     
2765     now = Nlm_GetSecs();
2766     while (thr_info->awake) {
2767         if (now - thr_info->last_tick < STAR_MSG_PERIOD / 2) {
2768             for (index = 0; index < STAR_MSG_PERIOD; index++) {
2769                 sleep(1);
2770                 if (thr_info->awake == FALSE)
2771                     break;
2772             }
2773         }
2774         if (thr_info->awake) {
2775             NlmMutexLockEx(&thr_info->callback_mutex);
2776             now = Nlm_GetSecs();
2777             if (now-thr_info->last_tick > STAR_MSG_PERIOD) {
2778                 if (thr_info->star_callback) {
2779                     thr_info->star_callback(thr_info->db_chunk_last, 
2780                                             thr_info->number_of_pos_hits);
2781                     thr_info->last_tick = now;
2782                 }
2783             }
2784             NlmMutexUnlock(thr_info->callback_mutex);
2785         }
2786     }
2787 #endif
2788     return dummy;
2789 }
2790 
2791 /*
2792         Make a temporary protein BioseqPtr to use with seg.
2793 */
2794 BioseqPtr
2795 BlastMakeTempProteinBioseq (Uint1Ptr sequence, Int4 length, Uint1 alphabet)
2796 
2797 {
2798     BioseqPtr bsp;
2799     Int4 byte_store_length;
2800     Nlm_ByteStorePtr byte_store;
2801     ObjectIdPtr oip;
2802 
2803     if (sequence == NULL || length == 0)
2804         return NULL;
2805     
2806     byte_store = Nlm_BSNew(length);
2807     
2808     byte_store_length = Nlm_BSWrite(byte_store, (VoidPtr) sequence, length);
2809     if (length != byte_store_length) {
2810         Nlm_BSDelete(byte_store, length);
2811         return NULL;
2812     }
2813     
2814     bsp = BioseqNew();
2815     bsp->seq_data = (SeqDataPtr) byte_store;
2816     bsp->length = length;
2817     bsp->seq_data_type = alphabet;
2818     bsp->mol = Seq_mol_aa;
2819     bsp->repr = Seq_repr_raw;
2820     
2821     oip = UniqueLocalId();
2822     ValNodeAddPointer(&(bsp->id), SEQID_LOCAL, oip);
2823     SeqMgrAddToBioseqIndex(bsp);
2824     
2825     return bsp;
2826 }
2827 
2828 
2829 #define LINK_HSP_OVERLAP 9
2830 #define MY_EPS 1.0e-9
2831 /*
2832         Calculates cutoff scores and returns them.
2833         Equations provided by Stephen Altschul.
2834 
2835         BlastSearchBlkPtr search: provides info to perform calculation.
2836         Int4 subject_length: length of the DB sequence.
2837         Boolean PNTR ignore_small_gaps: If TRUE, test only for large gaps.
2838         BLAST_Score PNTR cutoff_s_second: S2 score for second pass.
2839         BLAST_Score PNTR cutoff_big_gap: Cutoff score for big gaps.
2840 
2841 */
2842 static void
2843 CalculateSecondCutoffScore(BlastSearchBlkPtr search, Int4 subject_length, Boolean PNTR ignore_small_gaps, BLAST_Score PNTR cutoff_s_second, BLAST_Score PNTR cutoff_big_gap)
2844 
2845 {
2846     const Int4 overlap_size = LINK_HSP_OVERLAP;
2847         Nlm_FloatHi gap_prob, gap_decay_rate, x_variable, y_variable;
2848         BLAST_KarlinBlkPtr kbp;
2849         Int4 expected_length, window_size, query_length;
2850         Int8 search_sp;
2851 
2852         /* Do this for the first context, should this be changed?? */
2853         kbp = search->sbp->kbp[search->first_context];
2854         window_size = search->pbp->gap_size + overlap_size + 1;
2855         gap_prob = search->pbp->gap_prob;
2856         gap_decay_rate = search->pbp->gap_decay_rate;
2857         query_length = search->context[search->first_context].query->length;
2858 
2859         if (search->pbp->old_stats == FALSE)
2860         {
2861         /* Subtract off the expected score. */
2862            expected_length = Nint(log(kbp->K*((Nlm_FloatHi) query_length)*((Nlm_FloatHi) subject_length))/(kbp->H));
2863            query_length = query_length - expected_length;
2864            subject_length = subject_length - expected_length;
2865            query_length = MAX(query_length, 1);
2866            subject_length = MAX(subject_length, 1);
2867 
2868            if (search->dblen > subject_length)
2869                 y_variable = log((Nlm_FloatHi) (search->dblen)/(Nlm_FloatHi) subject_length)*(kbp->K)/(gap_decay_rate);
2870            else
2871                 y_variable = log((Nlm_FloatHi) (subject_length + expected_length)/(Nlm_FloatHi) subject_length)*(kbp->K)/(gap_decay_rate);
2872            search_sp = ((Int8) query_length)* ((Int8) subject_length);
2873            x_variable = 0.25*y_variable*((FloatHi) search_sp);
2874 
2875 /* To use "small" gaps the query and subject must be "large" compared to
2876 the gap size. If small gaps may be used, then the cutoff values must be
2877 adjusted for the "bayesian" possibility that both large and small gaps are
2878 being checked for. */
2879 
2880            if (search_sp > 8*window_size*window_size)
2881            {
2882                 x_variable /= (1.0 - gap_prob + MY_EPS);
2883                 *cutoff_big_gap = (BLAST_Score) floor((log(x_variable)/kbp->Lambda)) + 1;
2884                 x_variable = y_variable*(window_size*window_size);
2885                 x_variable /= (gap_prob + MY_EPS);
2886                 *cutoff_s_second= (BLAST_Score) floor((log(x_variable)/kbp->Lambda)) + 1;
2887                 /* Don't allow this cutoff to be too small */
2888                 *cutoff_s_second = MAX(*cutoff_s_second, search->pbp->gap_trigger);
2889                 *ignore_small_gaps = FALSE;
2890            }
2891            else
2892            {
2893                 *cutoff_big_gap = (BLAST_Score) floor((log(x_variable)/kbp->Lambda)) + 1;
2894                 *cutoff_s_second = *cutoff_big_gap;
2895                 *ignore_small_gaps = TRUE;
2896            }    
2897            *cutoff_big_gap *= search->pbp->scalingFactor;
2898            *cutoff_s_second *= search->pbp->scalingFactor;
2899         }
2900         else
2901         {
2902         /* USE the old statistics, for comparison to the OLD BLAST. */
2903                 *cutoff_big_gap = search->pbp->cutoff_s_second;
2904                 *cutoff_s_second = *cutoff_big_gap;
2905                 *ignore_small_gaps = TRUE;
2906         }
2907 }
2908 
2909 /*
2910 Rounds down score to next even value if appropriate.
2911 */
2912 
2913 static Int2
2914 s_RoundDownOddScores(BLAST_ScoreBlkPtr sbp, BLAST_HitListPtr hitlist)
2915 {
2916         BLAST_HSPPtr PNTR hsp_array;
2917         Int4 hsp_cnt;
2918         Int4 index;
2919 
2920         if (sbp->round_down == FALSE || hitlist->hspcnt == 0)
2921                 return 0;
2922 
2923         hsp_cnt = hitlist->hspcnt;
2924         hsp_array = hitlist->hsp_array;
2925         for (index=0; index<hsp_cnt; index++)
2926         {
2927                 hsp_array[index]->score -= (hsp_array[index]->score &1);
2928         }
2929         return 0;
2930 }
2931 
2932 /*
2933         This function reevaluates the HSP's from a blast run, checking that
2934         ambiguity characters, ignored until now, don't change the score or
2935         extent of the HSP's.
2936 
2937         Only works for blastn right now.
2938 */
2939 
2940 static Int2
2941 BlastReevaluateWithAmbiguities (BlastSearchBlkPtr search, Int4 sequence_number)
2942 
2943 {
2944         BioseqPtr bsp;
2945         register BLAST_Score    sum, score;
2946         register BLAST_ScorePtr PNTR    matrix;
2947         BLAST_HitListPtr current_hitlist;
2948         BLAST_HSPPtr PNTR hsp_array;
2949         Int4 context, hspcnt, hspcnt_max, index, index1, status;
2950         Int4 length, longest_hsp_length, start, stop;
2951         Nlm_FloatHi current_evalue=DBL_MAX;
2952         SeqPortPtr spp=NULL;
2953         Uint1Ptr nt_seq, nt_seq_start, subject, subject_start, query, old_query_s, old_query_f, new_query_s, new_query_f=NULL;
2954         Uint1Ptr query_start, query_end, subject_real_start=NULL;
2955         Int4 num_ident;
2956 
2957 /* Only nucl. db's. */
2958         if (search->prog_number == blast_type_blastp || search->prog_number == blast_type_blastx)
2959                 return 0;
2960 
2961 /* Gapped alignments will be reevaluated anyway.*/
2962         if (search->pbp->gapped_calculation == TRUE || search->pbp->do_not_reevaluate == TRUE)
2963                 return 0;
2964 
2965 /* No hits to reevaluate. */
2966         if (search->current_hitlist == NULL || search->current_hitlist->hspcnt == 0)
2967                 return 0;
2968 
2969 /* Check if there are ambiguites at all, return 0 if there are none. */
2970         if(search->prog_number != blast_type_blastn &&
2971            readdb_ambchar_present(search->rdfp, sequence_number) == FALSE) {
2972            
2973                 return 0;
2974         }
2975         current_hitlist = search->current_hitlist;
2976         hspcnt = current_hitlist->hspcnt;
2977         hspcnt_max = current_hitlist->hspcnt_max;
2978         hsp_array = current_hitlist->hsp_array;
2979         matrix = search->sbp->matrix;
2980 
2981         /* Look for longest HSP. */
2982         longest_hsp_length = 0;
2983         for (index=0; index<hspcnt_max; index++)
2984         {
2985                 if (hsp_array[index] == NULL)
2986                         continue;
2987 
2988                 if (hsp_array[index]->subject.length > longest_hsp_length)
2989                         longest_hsp_length = hsp_array[index]->subject.length;
2990 
2991                 if (current_evalue > hsp_array[index]->evalue)
2992                         current_evalue = hsp_array[index]->evalue;
2993         }
2994 
2995         if (StringCmp(search->prog_name, "blastn") != 0)
2996         {
2997                 longest_hsp_length *= CODON_LENGTH;
2998         }
2999 
3000         if (longest_hsp_length > 0)
3001         {
3002                 nt_seq_start = MemNew(longest_hsp_length*sizeof(Uint1));
3003                 if (nt_seq_start == NULL)
3004                         return 0;
3005         }
3006         else
3007         {
3008                 return longest_hsp_length;
3009         }
3010         
3011         if (search->thr_info->ambiguities_mutex)
3012             NlmMutexLock(search->thr_info->ambiguities_mutex);
3013         
3014         bsp = readdb_get_bioseq(search->rdfp, sequence_number);
3015 
3016         for (index=0; index<hspcnt_max; index++)
3017         {
3018                 if (hsp_array[index] == NULL)
3019                         continue;
3020 
3021                 context = hsp_array[index]->context;
3022 
3023                 if (StringCmp(search->prog_name, "blastn") == 0)
3024                 {
3025                         start = hsp_array[index]->subject.offset;
3026                         stop = hsp_array[index]->subject.end - 1;
3027                         length = hsp_array[index]->subject.length;
3028                 }
3029                 else
3030                 {       /* Convert for translated alphabet. */
3031                     if (hsp_array[index]->subject.frame > 0)
3032                     {
3033                         start = hsp_array[index]->subject.frame - 1 + CODON_LENGTH*(hsp_array[index]->subject.offset);
3034                         stop = start + CODON_LENGTH*(hsp_array[index]->subject.length) - 1;
3035                         length = CODON_LENGTH*(hsp_array[index]->subject.length);
3036                     }
3037                     else
3038                     {
3039                         start = bsp->length - CODON_LENGTH*(hsp_array[index]->subject.offset + hsp_array[index]->subject.length) + hsp_array[index]->subject.frame + 1;
3040                         stop = bsp->length - CODON_LENGTH*(hsp_array[index]->subject.offset) + hsp_array[index]->subject.frame;
3041                         length = CODON_LENGTH*(hsp_array[index]->subject.length);
3042                      }
3043                 }
3044 
3045                 if (hsp_array[index]->subject.frame > 0)
3046                 {
3047                         spp = SeqPortNew(bsp, start, stop, Seq_strand_plus, Seq_code_ncbi4na);
3048                         SeqPortSet_do_virtual(spp, TRUE);
3049 
3050                 }
3051                 else
3052                 {       /* Offsets correct here?? */
3053                         spp = SeqPortNew(bsp, start, stop, Seq_strand_minus, Seq_code_ncbi4na);
3054                         SeqPortSet_do_virtual(spp, TRUE);
3055                 }
3056 
3057                 if (StringCmp(search->prog_name, "blastn") == 0)
3058                 {
3059                         nt_seq = nt_seq_start;
3060                         while (length > 0)
3061                         {
3062                                 *nt_seq = ncbi4na_to_blastna[SeqPortGetResidue(spp)];
3063                                 nt_seq++;
3064                                 length--;
3065                         }
3066                         subject_start = nt_seq_start;
3067                 }
3068                 else
3069                 {
3070                         nt_seq = nt_seq_start;
3071                         while (length > 0)
3072                         {
3073                                 *nt_seq = SeqPortGetResidue(spp);
3074                                 nt_seq++;
3075                                 length--;
3076                         }
3077                         /* Set frame to one so we start at beginning of nt seq. */
3078                         subject_real_start = GetTranslation(nt_seq_start, CODON_LENGTH*(hsp_array[index]->subject.length), 1, &length, search->db_genetic_code);
3079                         /* The first Residue is a NULLB */
3080                         subject_start = subject_real_start+1;
3081                 }
3082                 spp = SeqPortFree(spp);
3083 
3084                 query_start = (Uint1Ptr) search->context[context].query->sequence;
3085                 query_end = query_start + search->context[context].query->length;
3086 
3087                 score = 0;
3088                 sum = 0;
3089                 num_ident = 0;
3090                 subject = subject_start;
3091                 old_query_s = query_start + hsp_array[index]->query.offset;
3092                 old_query_f = query_start + hsp_array[index]->query.end; 
3093                 /* Assume, for now, that the real HSP starts where it does now. */
3094                 new_query_s = old_query_s;
3095                 for (query=old_query_s; query<old_query_f; query++, subject++)
3096                 {
3097                    if (*query == *subject)
3098                       ++num_ident;
3099 
3100                         if ((sum += matrix[*query][*subject]) < 0)
3101                         {
3102                                 if (score > 0)
3103                                 {
3104                                         if (score >= search->pbp->cutoff_s2)
3105                                         {
3106                                                 break;
3107                                         }
3108                                 }
3109                                 score = sum = 0;
3110                                 num_ident = 0;
3111                                 new_query_s = new_query_f = query;
3112                         }
3113                         else if (sum > score)
3114                         {       /* Start of scoring regime. */
3115                            if (score == 0)
3116                               new_query_s = query;
3117                            score = sum;
3118                            new_query_f = query+1;
3119                         }
3120                 }
3121 
3122                 if (score >= search->pbp->cutoff_s2)
3123                 { /* Adjust the information here. */
3124                         hsp_array[index]->score = score;
3125                         hsp_array[index]->query.offset = new_query_s - query_start;
3126                         hsp_array[index]->query.end = new_query_f - query_start;
3127                         hsp_array[index]->query.length = hsp_array[index]->query.end - hsp_array[index]->query.offset;
3128                         hsp_array[index]->subject.offset = hsp_array[index]->subject.offset + new_query_s - old_query_s;
3129                         hsp_array[index]->subject.end = hsp_array[index]->subject.end + new_query_f - old_query_f;
3130                         hsp_array[index]->subject.length = hsp_array[index]->subject.end - hsp_array[index]->subject.offset;
3131                         hsp_array[index]->num_ident = num_ident;
3132                         hsp_array[index]->linked_set = FALSE;
3133                         hsp_array[index]->start_of_chain = FALSE;
3134                         Nlm_MemSet((VoidPtr) &(hsp_array[index]->hsp_link), 0, sizeof(BLAST_HSP_LINK));
3135                         /* Need to NULL out more in HSP? */
3136                 }
3137                 else
3138                 { /* Delete if this is now below the cutoff score. */
3139                         hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
3140                 }
3141 
3142                 if (StringCmp(search->prog_name, "blastn") != 0)
3143                 {
3144                         subject_real_start = MemFree(subject_real_start);
3145                 }
3146         }
3147 
3148         bsp = BioseqFree(bsp);
3149         if (search->thr_info->ambiguities_mutex)
3150                 NlmMutexUnlock(search->thr_info->ambiguities_mutex);
3151         nt_seq_start = MemFree(nt_seq_start);
3152 
3153 /* Save HSP's again, discarding those that have been NULLed out. */
3154         index1 = HspArrayPurge(hsp_array, hspcnt_max, TRUE);
3155         current_hitlist->hspcnt = index1;       
3156         current_hitlist->hspcnt_max = index1;   
3157 
3158         s_RoundDownOddScores(search->sbp, search->current_hitlist);
3159         /* Relink the HSP's, ReReap the Hits. */
3160         if (!search->pbp->mb_params && search->pbp->do_sum_stats == TRUE) {
3161            status = BlastLinkHsps(search);
3162         } else {
3163            status = BlastGetNonSumStatsEvalue(search);
3164         }
3165         status = BlastReapHitlistByEvalue(search);
3166         
3167         return status;
3168 }
3169 
3170 /* Auxiliary function to retrieve the virtual oidlist attached to the
3171  * rdfp_chain. Returns a pointer to the OIDList, called should *NOT* modify
3172  * this copy. Assumes that this function is called after BlastProcessGiLists
3173  * has been called (while setting up the search) */
3174 OIDListPtr LIBCALL BlastGetVirtualOIDList(ReadDBFILEPtr rdfp_chain)
3175 {
3176     OIDListPtr virtual_oidlist = NULL;
3177 
3178     while (rdfp_chain) {
3179         if (virtual_oidlist = rdfp_chain->oidlist) {
3180             break;
3181         }
3182         rdfp_chain = rdfp_chain->next;
3183     }
3184     return virtual_oidlist;
3185 }
3186 
3187 /*
3188         Function to assign chunks of the database to a thread.  
3189         The "start" and "stop" points are returned by the arguments.
3190         Note that this is a half-closed interval (stop is not searched).
3191 
3192         The Int4 "db_chunk_last" (a global variable) keeps track of the last 
3193         database number assigned and is only changed if the db_mutex has been acquired.
3194 
3195         The Boolean done specifies that the search has already been
3196         completed.
3197 */
3198 
3199 Boolean BlastGetDbChunk(ReadDBFILEPtr rdfp, Int4Ptr start, Int4Ptr stop, 
3200                      Int4Ptr id_list, Int4Ptr id_list_number, 
3201                      BlastThrInfoPtr thr_info)
3202      
3203 {
3204     Boolean done=FALSE;
3205     OIDListPtr virtual_oidlist = NULL;
3206     *id_list_number = 0;
3207     
3208     NlmMutexLockEx(&thr_info->db_mutex);
3209     if (thr_info->realdb_done) {
3210         if (virtual_oidlist = BlastGetVirtualOIDList(rdfp)) {
3211             /* Virtual database.   Create id_list using mask file */
3212             Int4 gi_end       = 0;
3213             
3214             thr_info->final_db_seq = MIN(thr_info->final_db_seq, virtual_oidlist->total);
3215             
3216             gi_end = thr_info->final_db_seq;
3217 
3218             if (thr_info->gi_current < gi_end) {
3219                 Int4 oidindex  = 0;
3220                 Int4 gi_start  = thr_info->gi_current;
3221                 Int4 bit_start = gi_start % MASK_WORD_SIZE;
3222                 Int4 gi;
3223                 
3224                 for(gi = gi_start; (gi < gi_end) && (oidindex < thr_info->db_chunk_size);) {
3225                     Int4 bit_end = ((gi_end - gi + bit_start) < MASK_WORD_SIZE) ? (gi_end - gi + bit_start) : MASK_WORD_SIZE;
3226                     Int4 bit;
3227                     
3228                     Uint4 mask_index = gi / MASK_WORD_SIZE;
3229                     Uint4 mask_word  = Nlm_SwapUint4(virtual_oidlist->list[mask_index]);
3230                     
3231                     if ( mask_word ) {
3232                         for(bit = bit_start; bit<bit_end; bit++) {
3233                             Uint4 bitshift = (MASK_WORD_SIZE-1)-bit;
3234                             
3235                             if ((mask_word >> bitshift) & 1) {
3236                                 id_list[ oidindex++ ] = (gi - bit_start) + bit;
3237                             }
3238                         }
3239                     }
3240                     
3241                     gi += bit_end - bit_start;
3242                     bit_start = 0;
3243                 }
3244                 
3245                 thr_info->gi_current = gi;
3246                 *id_list_number = oidindex;
3247                 BlastTickProc(thr_info->gi_current/32, thr_info);
3248             } else {
3249                 done = TRUE;
3250             }
3251             
3252         } else {
3253             done = TRUE;
3254         }
3255     } else {
3256         int real_readdb_entries;
3257         int total_readdb_entries;
3258         int final_real_seq;
3259 
3260         real_readdb_entries  = readdb_get_num_entries_total_real(rdfp);
3261         total_readdb_entries = readdb_get_num_entries_total(rdfp);
3262         final_real_seq       = MIN( real_readdb_entries, thr_info->final_db_seq );
3263         
3264         /* we have real database with start/stop specified */
3265         if (thr_info->db_mutex) {
3266             /* Emit a tick if needed. */
3267             BlastTickProc(thr_info->db_chunk_last, thr_info);
3268             *start = thr_info->db_chunk_last;
3269             if (thr_info->db_chunk_last < final_real_seq) {
3270                 *stop = MIN((thr_info->db_chunk_last + 
3271                     thr_info->db_chunk_size), final_real_seq);
3272             } else {/* Already finished. */
3273                 *stop = thr_info->db_chunk_last;
3274 
3275                 /* Change parameters for oidlist processing. */
3276                 thr_info->realdb_done  = TRUE;
3277             }
3278             thr_info->db_chunk_last = *stop;
3279         } else {
3280             if (*stop != final_real_seq) {
3281                 done = FALSE;
3282                 *start = thr_info->last_db_seq;
3283                 *stop  = final_real_seq;
3284             } else {
3285                 thr_info->realdb_done = TRUE;
3286                 
3287                 if (total_readdb_entries == real_readdb_entries) {
3288                     done = TRUE;
3289                 } else {
3290                     thr_info->gi_current = final_real_seq;
3291                 }
3292             }
3293         }
3294     }
3295     
3296     NlmMutexUnlock(thr_info->db_mutex);
3297     return done;
3298 }
3299 
3300 static VoidPtr
3301 do_gapped_blast_search(VoidPtr ptr)
3302 
3303 {
3304     BlastSearchBlkPtr search;
3305     Int2 status=0;
3306     Int4 index, index1, start=0, stop=0, id_list_length;
3307     Int4Ptr id_list=NULL;
3308     Uint4 i; /* AM: Support for query concatenation. */
3309 
3310     search = (BlastSearchBlkPtr) ptr;
3311     if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
3312     {                                     /* FIXME: magic constant? */
3313         id_list = MemNew((search->thr_info->db_chunk_size+33)*sizeof(Int4));
3314     }
3315     
3316     while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list, 
3317                            &id_list_length, search->thr_info) != TRUE)
3318     {
3319         if (id_list && id_list_length)
3320         {
3321             for (index=0; index<id_list_length; index++)
3322             {
3323                 index1 = id_list[index];
3324                 if ((status =
3325                      BLASTPerformSearchWithReadDb(search, index1)) != 0)
3326                     break;
3327 
3328                 if (search->pbp->do_sum_stats) {
3329                     status = BlastLinkHsps(search);
3330                 }
3331                 status = BlastReapHitlistByEvalue(search);
3332                 if (search->handle_results)
3333                     search->handle_results((VoidPtr) search);
3334                 else
3335                     BlastSaveCurrentHitlist(search);
3336                 /* Emit a tick if needed and we're not MT. */
3337                 if (search->thr_info->db_mutex == NULL)
3338                     BlastTickProc(index1, search->thr_info);
3339                 if (time_out_boolean == TRUE)
3340                     break;      
3341             }
3342         } else if (!search->thr_info->realdb_done) {
3343             for (index=start; index<stop; index++)
3344             {
3345                 if ((status = BLASTPerformSearchWithReadDb(search, index)) != 0)
3346                     break;
3347 
3348                 /* AM: Support for query concatenation. */
3349         if( !search->mult_queries )
3350         {
3351             if (search->pbp->do_sum_stats) {
3352                 status = BlastLinkHsps(search);
3353             }
3354             status = BlastReapHitlistByEvalue(search);
3355             if (search->handle_results)
3356                 search->handle_results((VoidPtr) search);
3357             else
3358                 BlastSaveCurrentHitlist(search);
3359         }
3360                 else /* AM: Support for query concatenation. */
3361                 {
3362                   InitHitLists( search );
3363                   search->mult_queries->use_mq = TRUE;
3364                   search->mult_queries->delete_current_hitlist = FALSE;
3365 
3366                   for (i = 0;  i < search->mult_queries->NumQueries;  ++i) {
3367                       search->mult_queries->current_query = i;
3368 
3369                       if (search->pbp->do_sum_stats) {
3370                           status = BlastLinkHsps(search);
3371                       }
3372                       status = BlastReapHitlistByEvalue(search);
3373 
3374                       if (search->handle_results)
3375                           search->handle_results( (VoidPtr)search );
3376                       else
3377                           BlastSaveCurrentHitlist(search);
3378                   }
3379 
3380                   if( search->mult_queries->delete_current_hitlist )
3381                   {
3382                     search->current_hitlist
3383                       = BlastHitListDestruct( search->current_hitlist );
3384                   }
3385 
3386                   search->mult_queries->use_mq = FALSE;
3387                   BlastHitListPurge( search->current_hitlist );
3388                 }
3389 
3390                 /* Emit a tick if needed and we're not MT. */
3391                 if (search->thr_info->db_mutex == NULL)
3392                     BlastTickProc(index, search->thr_info);
3393                 if (time_out_boolean == TRUE)
3394                     break;      
3395             }
3396         }
3397         /* Get out if "stop" was the last seq. */
3398         if (time_out_boolean || status)
3399             break;
3400     }
3401 
3402     if (id_list)
3403         id_list = MemFree(id_list);
3404 
3405     return (VoidPtr) search;
3406 } 
3407 
3408 static VoidPtr
3409 do_blast_search(VoidPtr ptr)
3410 
3411 {
3412     BlastSearchBlkPtr search;
3413     Int2 status = 0;
3414     Int4 index, index1, start=0, stop=0, id_list_length;
3415     Int4Ptr id_list=NULL;
3416     Uint4 i; /* AM: Query multiplexing. */
3417 
3418     search = (BlastSearchBlkPtr) ptr;
3419         if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
3420     {                                     /* FIXME: magic constant? */
3421         id_list = MemNew((search->thr_info->db_chunk_size+33)
3422                          *sizeof(Int4));
3423     }
3424 
3425     while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list,
3426                            &id_list_length, search->thr_info) != TRUE) {
3427         if (search->thr_info->realdb_done && id_list) {
3428             for (index=0; index<id_list_length; index++) {
3429                 index1 = id_list[index];
3430                 if ((status = BLASTPerformSearchWithReadDb(search, index1))
3431                     != 0)
3432                    break;
3433                 s_RoundDownOddScores(search->sbp, search->current_hitlist);
3434                 if (!search->pbp->mb_params) {
3435                    if (search->pbp->do_sum_stats == TRUE)
3436                       status = BlastLinkHsps(search);
3437                    else
3438                       status = BlastGetNonSumStatsEvalue(search);
3439                    status = BlastReapHitlistByEvalue(search);
3440                    if (!search->handle_results)
3441                       status = BlastReevaluateWithAmbiguities(search, index1);
3442                 } else {
3443                    MegaBlastReevaluateWithAmbiguities(search);
3444                 }
3445                 
3446                 if (search->handle_results)
3447                     search->handle_results((VoidPtr) search);
3448                 else if (!search->pbp->mb_params)
3449                    BlastSaveCurrentHitlist(search);
3450                 else
3451                    MegaBlastSaveCurrentHitlist(search);
3452                 if (search->pbp->mb_params)
3453                    /* Free the ncbi4na-encoded sequence */
3454                    search->subject->sequence_start = (Uint1Ptr)
3455                       MemFree(search->subject->sequence_start);
3456                 /* Emit a tick if needed and we're not MT. */
3457                 if (search->thr_info->db_mutex == NULL)
3458                     BlastTickProc(index1, search->thr_info);
3459                 if (time_out_boolean == TRUE)
3460                     break;      
3461             }
3462         } else if (!search->thr_info->realdb_done) {
3463             for (index=start; index<stop; index++) {
3464                 if ((status = BLASTPerformSearchWithReadDb(search, index))
3465                     != 0)
3466                    break;
3467                 s_RoundDownOddScores(search->sbp, search->current_hitlist);
3468                 if (!search->pbp->mb_params) {
3469                    if (search->pbp->do_sum_stats == TRUE)
3470                       status = BlastLinkHsps(search);
3471                    else
3472                       status = BlastGetNonSumStatsEvalue(search);
3473                    status = BlastReapHitlistByEvalue(search);
3474                    if (!search->handle_results)
3475                       status = BlastReevaluateWithAmbiguities(search, index);
3476                 } else {
3477                    MegaBlastReevaluateWithAmbiguities(search);
3478                 }
3479                 if (search->handle_results)
3480                    search->handle_results((VoidPtr) search);
3481                 else if (!search->pbp->mb_params)
3482                 { /* AM: Query multiplexing. */
3483                   if( !search->mult_queries )
3484                     BlastSaveCurrentHitlist(search);
3485                   else
3486                   {
3487                     InitHitLists( search );
3488                     search->mult_queries->use_mq = TRUE;
3489                     search->mult_queries->delete_current_hitlist = FALSE;
3490 
3491                     for( i = 0; i < search->mult_queries->NumQueries; ++i )
3492                     {
3493                       search->mult_queries->current_query = i;
3494                       BlastSaveCurrentHitlist(search);
3495                     }
3496                     
3497                     if( search->mult_queries->delete_current_hitlist )
3498                     {
3499                       search->current_hitlist
3500                         = BlastHitListDestruct( search->current_hitlist );
3501                     }
3502 
3503                     search->mult_queries->use_mq = FALSE;
3504                     BlastHitListPurge( search->current_hitlist );
3505                   }
3506                 }
3507                 else
3508                    MegaBlastSaveCurrentHitlist(search);
3509 
3510                 if (search->pbp->mb_params)
3511                    /* Free the ncbi4na-encoded sequence */
3512                    search->subject->sequence_start = (Uint1Ptr)
3513                       MemFree(search->subject->sequence_start);
3514                 /* Emit a tick if needed and we're not MT. */
3515                 if (search->thr_info->db_mutex == NULL)
3516                     BlastTickProc(index, search->thr_info);
3517                 if (time_out_boolean == TRUE)
3518                     break;      
3519             }
3520         }
3521 
3522         /* Get out if "stop" was the last seq. */
3523         if (time_out_boolean || status)
3524             break;
3525     }
3526     
3527     if (id_list)
3528         id_list = MemFree(id_list);
3529 
3530     return (VoidPtr) search;
3531 } 
3532 
3533 void LIBCALL
3534 do_the_blast_run(BlastSearchBlkPtr search)
3535 
3536 {
3537     BlastSearchBlkPtr PNTR array;
3538     Char buffer[256];
3539     Int2 index;
3540     TNlmThread PNTR thread_array;
3541     VoidPtr status=NULL;
3542     int num_entries_total;
3543     int num_entries_total_real;
3544     int start_seq;
3545     int end_seq;
3546     Int4 i; /* AM: query concatenation */
3547     
3548     if (search == NULL)
3549         return;
3550     
3551     num_entries_total      = readdb_get_num_entries_total     (search->rdfp);
3552     num_entries_total_real = readdb_get_num_entries_total_real(search->rdfp);
3553 
3554     /* Set 'done with read db' according to whether real databases are present */
3555     
3556     if (num_entries_total_real) {
3557         search->thr_info->realdb_done = FALSE;
3558     } else {
3559         search->thr_info->realdb_done = TRUE;
3560     }
3561     
3562     /* Make sure first, last sequence indices are in-range (0, NUM-1) */
3563     
3564     /* NOTE: search->pbp->final_seq is 1 beyond the last sequence ordinal id,
3565        except when it's <=0, which means search to the last sequence in the 
3566        database. */
3567     /* search->thr_info versions are not. */
3568     
3569     if (search->pbp->final_db_seq > 0) {
3570         end_seq = MIN(search->pbp->final_db_seq, num_entries_total);
3571     } else {
3572         end_seq = num_entries_total;
3573     }
3574     
3575     start_seq = MAX(0, MIN(search->pbp->first_db_seq, end_seq));
3576     
3577     /* Set BlastGetDbChunk()'s pointers and counters */
3578     
3579     search->thr_info->last_db_seq       =
3580         search->thr_info->gi_current    =
3581         search->thr_info->db_chunk_last = start_seq;
3582     
3583     search->thr_info->final_db_seq = end_seq;
3584     
3585     ConfigureDbChunkSize(search, search->dbseq_num);
3586 
3587     if (NlmThreadsAvailable() && search->pbp->process_num > 1) {
3588         NlmMutexInit(&search->thr_info->db_mutex);
3589         NlmMutexInit(&search->thr_info->results_mutex);
3590         NlmMutexInit(&search->thr_info->ambiguities_mutex);
3591         
3592         array = (BlastSearchBlkPtr PNTR) MemNew((search->pbp->process_num)*sizeof(BlastSearchBlkPtr));
3593         array[0] = search;
3594         for (index=1; index<search->pbp->process_num; index++) {
3595             array[index] = BlastSearchBlkDuplicate(search);
3596             if (array[index] == NULL) {
3597                search->pbp->process_num = index;
3598                ErrPostEx(SEV_WARNING, 0, 0, "Number of threads reduced to %d", index);
3599                break;
3600             }
3601         }
3602         
3603         thread_array = (TNlmThread PNTR) MemNew((search->pbp->process_num)*sizeof(TNlmThread));
3604         for (index=0; index<search->pbp->process_num; index++) {
3605             if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn") != 0)
3606                 thread_array[index] = NlmThreadCreateEx(do_gapped_blast_search, (VoidPtr) array[index], THREAD_RUN|THREAD_BOUND, eTP_Default, NULL, NULL);
3607             else
3608                 thread_array[index] = NlmThreadCreateEx(do_blast_search, (VoidPtr) array[index], THREAD_RUN|THREAD_BOUND, eTP_Default, NULL, NULL);
3609             
3610             if (NlmThreadCompare(thread_array[index], NULL_thread)) {
3611                 ErrPostEx(SEV_ERROR, 0, 0, "Unable to open thread.");
3612             }
3613         }
3614 
3615         for (index=0; index<search->pbp->process_num; index++) {
3616             NlmThreadJoin(thread_array[index], &status);
3617         }
3618 
3619         for (index=1; index<search->pbp->process_num; index++) {
3620 #ifdef BLAST_COLLECT_STATS
3621             search->first_pass_hits += array[index]->first_pass_hits;
3622             search->second_pass_hits += array[index]->second_pass_hits;
3623             search->second_pass_trys += array[index]->second_pass_trys;
3624             search->first_pass_extends += array[index]->first_pass_extends;
3625             search->second_pass_extends += array[index]->second_pass_extends;
3626             search->first_pass_good_extends += array[index]->first_pass_good_extends;
3627             search->second_pass_good_extends += array[index]->second_pass_good_extends;
3628             search->number_of_seqs_better_E += array[index]->number_of_seqs_better_E;
3629             search->prelim_gap_no_contest += array[index]->prelim_gap_no_contest;
3630             search->prelim_gap_passed += array[index]->prelim_gap_passed;
3631             search->prelim_gap_attempts += array[index]->prelim_gap_attempts;
3632             search->real_gap_number_of_hsps += array[index]->real_gap_number_of_hsps;
3633 #endif
3634 
3635             if( array[index]->mult_queries ) { /* AM: query concatenation: free resources */
3636                 if( array[index]->mult_queries->HitListArray )
3637                     for( i = 0; i < array[index]->mult_queries->NumQueries; ++i )
3638                         BlastHitListDestruct( array[index]->mult_queries->HitListArray[i] );
3639 
3640                 MemFree( array[index]->mult_queries->HitListArray );
3641                 MemFree( array[index]->mult_queries );
3642             }
3643             /* Not copied at thread start. */
3644             array[index] = BlastSearchBlkDestruct(array[index]);        
3645         }
3646         array = MemFree(array);
3647 
3648         thread_array = MemFree(thread_array);
3649         
3650         NlmMutexDestroy(search->thr_info->db_mutex);
3651         search->thr_info->db_mutex = NULL;
3652         NlmMutexDestroy(search->thr_info->results_mutex);
3653         search->thr_info->results_mutex = NULL;
3654         NlmMutexDestroy(search->thr_info->ambiguities_mutex);
3655         search->thr_info->ambiguities_mutex = NULL;
3656     } else {
3657         if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn") != 0)
3658             do_gapped_blast_search((VoidPtr) search);
3659         else
3660             do_blast_search((VoidPtr) search);
3661     }
3662     if (search->rdfp->parameters & READDB_CONTENTS_ALLOCATED)
3663         search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp); 
3664     if (time_out_boolean) {
3665         sprintf(buffer, "CPU limit exceeded");
3666         BlastConstructErrorMessage("Blast", buffer, 2, &(search->error_return));
3667         search->timed_out = TRUE;
3668     } 
3669 
3670     return;
3671 }
3672 
3673 Uint1
3674 FrameToDefine(Int2 frame)
3675 
3676 {
3677         Uint1 retval;
3678 
3679         switch (frame) {
3680                 case -1:
3681                         retval = SEQLOC_MASKING_MINUS1;
3682                         break;
3683                 case -2:
3684                         retval = SEQLOC_MASKING_MINUS2;
3685                         break;
3686                 case -3:
3687                         retval = SEQLOC_MASKING_MINUS3;
3688                         break;
3689                 case 1:
3690                         retval = SEQLOC_MASKING_PLUS1;
3691                         break;
3692                 case 2:
3693                         retval = SEQLOC_MASKING_PLUS2;
3694                         break;
3695                 case 3:
3696                         retval = SEQLOC_MASKING_PLUS3;
3697                         break;
3698                 default:
3699                         retval = SEQLOC_MASKING_NOTSET;
3700                         break;
3701         }
3702 
3703         return retval;
3704 }
3705 Int2
3706 DefineToFrame(Uint1 define)
3707 
3708 {
3709     Int2 frame;
3710     
3711     switch (define) {
3712     case SEQLOC_MASKING_MINUS1:
3713         frame = -1;
3714         break;
3715     case SEQLOC_MASKING_MINUS2:
3716         frame = -2;
3717         break;
3718     case SEQLOC_MASKING_MINUS3:
3719         frame = -3;
3720         break;
3721     case SEQLOC_MASKING_PLUS1:
3722         frame = 1;
3723         break;
3724     case SEQLOC_MASKING_PLUS2:
3725         frame = 2;
3726         break;
3727     case SEQLOC_MASKING_PLUS3:
3728         frame = 3;
3729         break;
3730     case SEQLOC_MASKING_NOTSET:
3731     default:
3732         frame = 0;
3733         break;
3734     }
3735     
3736     return frame;
3737 }
3738 
3739 CharPtr
3740 BlastConstructFilterString(Int4 filter_value)
3741 
3742 {
3743         Char buffer[32];
3744         CharPtr ptr;
3745 
3746         ptr = buffer;
3747 
3748         if (filter_value == FILTER_NONE)
3749                 return NULL;
3750         
3751         if (filter_value & FILTER_DUST)
3752         {
3753                 *ptr = 'D'; ptr++;
3754                 *ptr = ';'; ptr++;
3755         }
3756         
3757         if (filter_value & FILTER_SEG)
3758         {
3759                 *ptr = 'S'; ptr++;
3760                 *ptr = ';'; ptr++;
3761         }
3762 
3763         *ptr = NULLB;
3764 
3765         return StringSave(buffer);
3766 }
3767 
3768 void
3769 HackSeqLocId(SeqLocPtr slp, SeqIdPtr id)
3770 {
3771         if (slp == NULL) {
3772                 return;
3773         }
3774         switch (slp->choice) {
3775         case SEQLOC_BOND:
3776         case SEQLOC_FEAT:
3777                 /* unsupported */
3778                 /* assert(0); */
3779                 break;
3780         case SEQLOC_NULL:
3781         case SEQLOC_EMPTY:
3782                 break;
3783         case SEQLOC_WHOLE:
3784                 SeqIdSetFree((SeqIdPtr)slp->data.ptrvalue);
3785                 slp->data.ptrvalue = SeqIdDup(id);
3786                 break;
3787         case SEQLOC_EQUIV:
3788         case SEQLOC_MIX:
3789         case SEQLOC_PACKED_INT:
3790                 slp = (SeqLocPtr)slp->data.ptrvalue;
3791                 for (; slp != NULL; slp = slp->next) {
3792                         HackSeqLocId(slp, id);
3793                 }
3794                 break;
3795         case SEQLOC_INT:
3796                 SeqIdSetFree(((SeqIntPtr)slp->data.ptrvalue)->id);
3797                 ((SeqIntPtr)slp->data.ptrvalue)->id = SeqIdDup(id);
3798                 break;
3799         case SEQLOC_PNT:
3800                 SeqIdSetFree(((SeqPntPtr)slp->data.ptrvalue)->id);
3801                 ((SeqPntPtr)slp->data.ptrvalue)->id = SeqIdDup(id);
3802                 break;
3803         case SEQLOC_PACKED_PNT:
3804                 SeqIdSetFree(((PackSeqPntPtr)slp->data.ptrvalue)->id);
3805                 ((PackSeqPntPtr)slp->data.ptrvalue)->id = SeqIdDup(id);
3806                 break;
3807         /* default:
3808                 assert(0); */
3809         }
3810 }
3811 /* This function duplicates a SEQLOC_PACKED_INT or a SEQLOC_INT type of SeqLoc */
3812 static SeqLocPtr blastDuplicateSeqLocInt(SeqLocPtr slp_head)
3813 {
3814     SeqLocPtr dup_slp, slp, dup_head = NULL;
3815     SeqIntPtr sqip;
3816 
3817     if(slp_head == NULL)
3818         return NULL;
3819 
3820     /* First seqLoc in lower level */
3821 
3822     if (slp_head->choice == SEQLOC_PACKED_INT) {
3823        slp = slp_head->data.ptrvalue;
3824        dup_head = ValNodeNew(NULL);
3825        dup_head->choice = slp_head->choice;
3826     } else if (slp_head->choice == SEQLOC_INT) {
3827        slp = slp_head;
3828     } else { 
3829        return NULL;
3830     }
3831     sqip = slp->data.ptrvalue;
3832     
3833     /* Top level SeqLoc */    
3834 
3835     dup_slp = (VoidPtr) SeqLocIntNew(sqip->from, sqip->to, sqip->strand, sqip->id);
3836     if (dup_head)
3837        dup_head->data.ptrvalue = dup_slp;
3838     else
3839        dup_head = dup_slp;
3840 
3841     /* Loop over all SeqIntPtr s in this SeqLoc */
3842     for(slp = slp->next; slp != NULL; slp = slp->next) {
3843         sqip = slp->data.ptrvalue;
3844         dup_slp->next = (VoidPtr) SeqLocIntNew(sqip->from, sqip->to, sqip->strand, sqip->id);
3845         dup_slp = dup_slp->next;
3846     }
3847     
3848     return dup_head;
3849 }
3850 /* This function use PACKED INT as mask */
3851 void BLASTUpdateSeqIdInSeqInt(SeqLocPtr mask, SeqIdPtr sip)
3852 {
3853     SeqLocPtr slp;
3854     SeqIntPtr sintp;
3855 
3856     if(mask == NULL)
3857         return;
3858     
3859     for(slp = mask->data.ptrvalue; slp != NULL; slp = slp->next) {
3860         if(slp->choice != SEQLOC_INT)
3861             continue;
3862         sintp = (SeqIntPtr)slp->data.ptrvalue;
3863         SeqIdSetFree(sintp->id);
3864         sintp->id = SeqIdDup(sip);
3865     }
3866     return;
3867 }
3868 
3869 /* Adjust offsets in the mask locations list; discard locations outside of
3870    the range */
3871 static SeqLocPtr 
3872 AdjustOffsetsInMaskLoc(SeqLocPtr mask_loc, Int4 start, Int4 end)
3873 {
3874    SeqLocPtr slp, last_slp = NULL, next_slp, head = NULL;
3875    SeqIntPtr loc;
3876 
3877    if (!mask_loc)
3878       return NULL;
3879 
3880    if (mask_loc->choice == SEQLOC_PACKED_INT)
3881       slp = (SeqLocPtr) mask_loc->data.ptrvalue;
3882    else if (mask_loc->choice == SEQLOC_INT)
3883       slp = mask_loc;
3884    else /* Should be impossible */ 
3885       return NULL;
3886 
3887    while (slp) {
3888       if (slp->choice == SEQLOC_INT) {
3889          loc = (SeqIntPtr) slp->data.ptrvalue;
3890          loc->from = MAX(loc->from, start);
3891          loc->to = MIN(loc->to, end);
3892          if (loc->from >= loc->to) {
3893             /* This mask location does not intersect the interval.
3894                Remove it. */
3895             next_slp = slp->next;
3896             SeqLocFree(slp);
3897             slp = next_slp;
3898          } else {
3899             if (last_slp) {
3900                last_slp->next = slp;
3901             } else {
3902                head = slp;
3903             }
3904             last_slp = slp;
3905             slp = slp->next;
3906          }
3907       } else {
3908          next_slp = slp->next;
3909          SeqLocFree(slp);
3910          slp = next_slp;
3911       }
3912    }
3913    if (last_slp)
3914       last_slp->next = NULL;
3915    
3916    if (mask_loc->choice == SEQLOC_PACKED_INT) {
3917       mask_loc->data.ptrvalue = head;
3918       /* If there are no locations left, free the packed-int and 
3919          return NULL. */
3920       if (!head)
3921          mask_loc = ValNodeFree(mask_loc);
3922       return mask_loc;
3923    } else {
3924       return head;
3925    }
3926 } 
3927 
3928 /* This function use PACKED INT as slp2 */
3929 SeqLocPtr blastMergeFilterLocs(SeqLocPtr slp1, SeqLocPtr slp2, Boolean translate,
3930                                Int2 frame, Int4 length)
3931 {
3932 
3933     SeqLocPtr slp, dup_slp, dup_head;
3934 
3935     if(slp1 == NULL && slp2 == NULL)
3936         return NULL;
3937 
3938     if(slp2 == NULL)
3939         return slp1;
3940 
3941     if (slp2->choice == SEQLOC_PACKED_INT || slp2->choice == SEQLOC_INT) {
3942         dup_slp = blastDuplicateSeqLocInt(slp2);
3943     }
3944     else if (slp2->choice == SEQLOC_MIX) {
3945         /* for mixed seqlocs, recursively flatten all the internal
3946            seqloc components into a single seqloc_int */
3947         SeqLocPtr list_slp = slp2;
3948         dup_slp = NULL;
3949         while (list_slp != NULL) {
3950             if (list_slp->choice == SEQLOC_MIX) {
3951                 dup_slp = blastMergeFilterLocs(dup_slp, list_slp->data.ptrvalue,
3952                                            FALSE, frame, length);
3953             }
3954             else {
3955                 dup_slp = blastMergeFilterLocs(dup_slp, list_slp,
3956                                            FALSE, frame, length);
3957             }
3958             list_slp = list_slp->next;
3959         }
3960     }
3961     else {
3962         ErrPostEx(SEV_FATAL, 1, 0, "Duplication of SeqLoc failed\n");
3963     }
3964 
3965     /* Request to translate means, that slp2 is DNA SeqLoc, that should be
3966        translated into protein SeqLoc corresponding to the specific frame */
3967 
3968     if(translate) {
3969         BlastConvertDNASeqLoc(dup_slp, frame, length);
3970     }
3971     
3972     if(slp1 == NULL) {
3973         return dup_slp;
3974     }
3975 
3976     /* OK We have 2 not NULL filters - merging... */
3977     
3978     if(slp1->choice == SEQLOC_PACKED_INT)
3979         slp = (SeqLocPtr) slp1->data.ptrvalue;
3980     else
3981         slp = slp1;
3982 
3983     if (dup_slp->choice == SEQLOC_PACKED_INT) {
3984        dup_head = dup_slp;
3985        dup_slp = (SeqLocPtr) dup_slp->data.ptrvalue;
3986        MemFree(dup_head);
3987     }
3988     
3989     if(slp == NULL) {
3990         ErrPostEx(SEV_WARNING, 0, 0, "Invalid filter detected");
3991         slp1->data.ptrvalue = dup_slp;
3992     } 
3993     else
3994     {
3995         while(slp->next != NULL)
3996            slp = slp->next;
3997     
3998         slp->next = dup_slp;
3999      }
4000     
4001     return slp1;
4002 }
4003 
4004 /* This function is used to filter one frame of the translated DNA
4005    sequence */
4006 static void rpsBlastFilterSequence(BlastSearchBlkPtr search, Int4 frame,
4007                                    Uint1Ptr sequence, Int4 prot_length,
4008                                    Int4 dna_length)
4009 {
4010     BioseqPtr bsp_temp;
4011     Boolean mask_at_hash = FALSE;
4012     SeqLocPtr filter_slp = NULL;
4013     
4014     if(search->pbp->query_lcase_mask == NULL) {
4015         if(search->pbp->filter_string == NULL || !StringICmp(search->pbp->filter_string, "F"))
4016             return;                 /* No filtering */
4017     }
4018     
4019     bsp_temp = BlastMakeTempProteinBioseq(sequence+1, prot_length, 
4020                                           Seq_code_ncbistdaa);
4021         
4022     filter_slp = BlastBioseqFilterEx(bsp_temp, search->pbp->filter_string, 
4023                                      &mask_at_hash);
4024     HackSeqLocId(filter_slp, search->subject_info->sip);
4025     
4026     if(search->pbp->query_lcase_mask != NULL) {
4027         filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, TRUE, frame, dna_length);
4028     }
4029     
4030     /* SeqMgrDeleteFromBioseqIndex(bsp_temp); */
4031     
4032     /* bsp_temp->id = SeqIdSetFree(bsp_temp->id); */
4033     bsp_temp = BioseqFree(bsp_temp);
4034 
4035     BlastMaskTheResidues(sequence+1, prot_length, 21, filter_slp, FALSE, 0);
4036 
4037     /* Conversion to ProteinSeqLoc will be done after original SeqLoc will
4038        be used once again on the Gapped extention stage */
4039     
4040     /*    BlastConvertProteinSeqLoc(filter_slp, frame, dna_length); */
4041 
4042     if (filter_slp)
4043         ValNodeAddPointer(&(search->mask), FrameToDefine(frame), filter_slp);
4044     
4045     return;
4046 }
4047 BlastSequenceBlkPtr PNTR LIBCALL
4048 BlastMakeCopyQueryDNAP(BlastSequenceBlkPtr PNTR bsbpp_in)
4049 {
4050     BlastSequenceBlkPtr PNTR bsbpp;
4051     Int4 buff_size, m;
4052 
4053     if(bsbpp_in == NULL)
4054         return NULL;
4055 
4056     bsbpp = MemNew(sizeof(BlastSequenceBlkPtr)*2);
4057     for(m = 0; m < 2; m++) {
4058        if (bsbpp_in[m]) {
4059           bsbpp[m] = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
4060         
4061           buff_size = bsbpp_in[m]->length+3*CODON_LENGTH;
4062           bsbpp[m]->sequence_start = MemNew(buff_size);
4063 
4064           MemCpy(bsbpp[m]->sequence_start, 
4065                  bsbpp_in[m]->sequence_start, buff_size);
4066 
4067           bsbpp[m]->sequence = bsbpp_in[m]->sequence;
4068           
4069           bsbpp[m]->length = bsbpp_in[m]->length;
4070           bsbpp[m]->original_length = bsbpp_in[m]->original_length;
4071           bsbpp[m]->effective_length = bsbpp_in[m]->effective_length;
4072        }
4073     }
4074 
4075     return bsbpp;
4076 }
4077 
4078 void LIBCALL BlastFreeQueryDNAP(BlastSequenceBlkPtr PNTR bsbpp)
4079 {
4080     Int4 m;
4081     
4082     for(m = 0; m < 2; m++) {
4083         BlastSequenceBlkDestruct(bsbpp[m]);
4084     }
4085     
4086     MemFree(bsbpp);
4087     
4088     return;
4089 }
4090 
4091 BlastSequenceBlkPtr PNTR LIBCALL
4092 BlastCreateQueryDNAP(BlastSearchBlkPtr search, Int4 length)
4093 {
4094 
4095     BlastSequenceBlkPtr PNTR bsbpp;
4096     Uint1Ptr dnap;
4097     Int4 i, j, k, m;
4098     Int4 shift;
4099     BLASTContextStructPtr context = search->context;
4100     Uint1 strand_option;
4101 
4102     if(context == NULL)
4103         return NULL;
4104     
4105     strand_option = search->last_context / CODON_LENGTH;
4106 
4107     bsbpp = MemNew(sizeof(BlastSequenceBlkPtr)*2);
4108 
4109     for(m = search->first_context/CODON_LENGTH; 
4110         m <= search->last_context/CODON_LENGTH; m++) {
4111 
4112         bsbpp[m] = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
4113         
4114         dnap = MemNew(length+3*CODON_LENGTH);
4115         /* dnap = MemNew(length + 1); */
4116         
4117         dnap[0]=dnap[1]=dnap[2] = NULLB;
4118         
4119         shift = m*CODON_LENGTH;
4120         for (i = 0, j = 0; i < length+1;) {
4121             for(k = shift; k < shift + CODON_LENGTH; k++) {
4122                 dnap[i] = context[k].query->sequence_start[j];
4123                 i++;
4124             }
4125             j++;
4126         }
4127         BlastSequenceAddSequence(bsbpp[m], dnap+3, dnap, length, length, 0);
4128     }
4129 
4130     return bsbpp;
4131 }
4132 
4133 
4134 FloatHi LIBCALL BLASTCalculateSearchSpace(BLAST_OptionsBlkPtr options, 
4135         Int4 nseq, Int8 dblen, Int4 qlen)
4136 {
4137     Int4 length_adjustment, qlen_eff;
4138     Int8 dblen_eff;
4139     BLAST_KarlinBlkPtr kbp;
4140     FloatHi searchsp;
4141 
4142     if (options == NULL)
4143         return 0;
4144 
4145     kbp = BlastKarlinBlkCreate();
4146     BlastKarlinBlkGappedCalcEx(kbp, options->gap_open, options->gap_extend,
4147             options->decline_align, options->matrix, NULL);
4148 
4149     if (options->gapped_calculation ) {
4150         Nlm_FloatHi alpha, beta; /*alpha and beta for new scoring system */
4151         if (StringCmp(options->program_name, "blastn") != 0)
4152             getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
4153                      options->gap_open, options->gap_extend);
4154         else
4155             BlastKarlinGetNuclAlphaBeta(options->reward, options->penalty, options->gap_open, 
4156                      options->gap_extend, kbp, options->gapped_calculation, &alpha, &beta);
4157 
4158         BlastComputeLengthAdjustment(kbp->K,
4159                                      kbp->logK, alpha/kbp->Lambda, beta, 
4160                                      qlen,
4161                                      dblen, nseq,
4162                                      &length_adjustment );
4163     } else {
4164         BlastComputeLengthAdjustment(kbp->K, kbp->logK, 1/kbp->H, 0.0, 
4165                                      qlen, 
4166                                      dblen, nseq,
4167                                      &length_adjustment );
4168     }
4169 
4170     kbp = BlastKarlinBlkDestruct(kbp);
4171     
4172     qlen_eff   = qlen - length_adjustment;
4173     dblen_eff  = dblen - nseq*length_adjustment;
4174     searchsp   = ((Nlm_FloatHi) qlen_eff) * ((Nlm_FloatHi) dblen_eff);
4175     
4176     return searchsp;
4177 }
4178 
4179 #define DROPOFF_NUMBER_OF_BITS 10.0
4180 #define INDEX_THR_MIN_SIZE 20000
4181 #define DEFAULT_LONGEST_INTRON 122
4182 
4183 Int2 LIBCALL BLASTSetUpSearchInternalByLoc (BlastSearchBlkPtr search, SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)(Int4 done, Int4 positives))
4184 
4185 {
4186         BioseqPtr bsp_temp, bsp;
4187         Boolean mask_at_hash=FALSE, private_slp_delete;
4188         Boolean query_is_na, db_is_na;
4189         Char buffer[128];
4190         Int2 retval = 0, status, last_index;
4191         Int4 effective_query_length, query_length, full_query_length,
4192                 index, length, length_adjustment=0;
4193         Int4 max_length, block_width;
4194         Nlm_FloatHi avglen;
4195         ReadDBFILEPtr rdfp;
4196         SeqIdPtr query_id;
4197         SeqPortPtr spp=NULL, spp_reverse=NULL;
4198         SeqLocPtr filter_slp=NULL, private_slp=NULL, private_slp_rev=NULL, private_slp_double=NULL;
4199         GeneticCodePtr gcp;
4200         Uint1 residue, strand;
4201         Uint1Ptr sequence;
4202         Uint1Ptr query_seq, query_seq_start, query_seq_rev, query_seq_start_rev;
4203         ValNodePtr vnp;
4204         Int4 query_loc_start;
4205 
4206         /* AM: Temporaries to compute effective lengths of individual queries. */
4207         IntArray lengths_eff=NULL; 
4208         IntArray length_adj_tmp=NULL;
4209         Int4 le_iter, length_tmp;
4210         Int4 i;
4211         BLAST_ScoreBlkPtr sbptmp = NULL; /* AM: query concatenation */
4212 
4213         /* AM: To support individual masking in the case of query multiplexing. */
4214         SeqLocPtr *concat_filter_slp=NULL, *concat_private_slp=NULL, *concat_private_slp_rev=NULL,
4215                   * indiv_filter_slp=NULL, *indiv_private_slp=NULL, *indiv_private_slp_rev=NULL;
4216         SeqLocPtr ConcatLCaseMask;
4217         Boolean * indiv_mask_at_hash=NULL;
4218         QueriesPtr mult_queries = NULL;
4219 
4220         if (options == NULL)
4221         {
4222                 ErrPostEx(SEV_FATAL, 1, 0, "BLAST_OptionsBlkPtr is NULL\n");
4223                 return 1;
4224         }
4225 
4226         if (query_slp == NULL && query_bsp == NULL)
4227         {
4228                 ErrPostEx(SEV_FATAL, 1, 0, "Query is NULL\n");
4229                 return 1;
4230         }
4231 
4232         /* AM: Support for query multiplexing. */
4233         mult_queries = search->mult_queries;
4234 
4235         if( mult_queries )
4236         {
4237           concat_filter_slp 
4238             = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4239           indiv_filter_slp 
4240             = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4241           concat_private_slp 
4242             = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4243           concat_private_slp_rev 
4244             = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4245           indiv_private_slp 
4246             = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4247           indiv_private_slp_rev 
4248             = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4249           indiv_mask_at_hash
4250             = (Boolean *)MemNew( mult_queries->NumQueries*sizeof( Boolean ) );
4251         }
4252 
4253         query_seq = NULL;       /* Gets rid of warning. */
4254         query_seq_rev = NULL;   /* Gets rid of warning. */
4255         query_seq_start = NULL; /* Gets rid of warning. */
4256         query_seq_start_rev = NULL;     /* Gets rid of warning. */
4257 
4258         /* These parameters are used by translated RPS Blast */
4259         search->pbp->filter_string = StringSave(options->filter_string);
4260         search->pbp->is_rps_blast = options->is_rps_blast;
4261 
4262         /* Restrict lower case mask to the query interval, if it is 
4263            not a whole Bioseq. */
4264         if (query_slp) {
4265            options->query_lcase_mask = 
4266               AdjustOffsetsInMaskLoc(options->query_lcase_mask, 
4267                                      SeqLocStart(query_slp), 
4268                                      SeqLocStop(query_slp));
4269         }
4270         search->pbp->query_lcase_mask = options->query_lcase_mask;
4271         search->pbp->is_ooframe = options->is_ooframe;
4272         search->pbp->shift_pen = options->shift_pen; 
4273 
4274         if (query_slp)
4275         {
4276                 query_loc_start = SeqLocStart(query_slp);
4277                 strand = SeqLocStrand(query_slp);
4278                 if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
4279                 {
4280                         private_slp = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_plus, SeqLocId(query_slp));
4281 
4282                   /* AM: Support for query multiplexing. */
4283                   if( mult_queries )
4284                     for( i = 0; i < mult_queries->NumQueries; ++i )
4285                     {
4286                       indiv_private_slp[i] 
4287                         = SeqLocIntNew( 0,
4288                                         mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
4289                                         Seq_strand_plus,
4290                                         mult_queries->FakeBsps[i]->id );
4291                       concat_private_slp[i]
4292                         = SeqLocIntNew( mult_queries->QueryStarts[i],
4293                                         mult_queries->QueryEnds[i],
4294                                         Seq_strand_plus,
4295                                         SeqLocId( query_slp ) );
4296                     }
4297                 }
4298                 if (strand == Seq_strand_minus || strand == Seq_strand_both)
4299                 {
4300                         private_slp_rev = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_minus, SeqLocId(query_slp));
4301 
4302                   /* AM: Support for query multiplexing. */
4303                   if( mult_queries )
4304                     for( i = 0; i < mult_queries->NumQueries; ++i )
4305                     {
4306                       indiv_private_slp_rev[i] 
4307                         = SeqLocIntNew( 0,
4308                                         mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
4309                                         Seq_strand_minus,
4310                                         mult_queries->FakeBsps[i]->id );
4311                       concat_private_slp_rev[i] 
4312                         = SeqLocIntNew( mult_queries->QueryStarts[i],
4313                                         mult_queries->QueryEnds[i],
4314                                         Seq_strand_minus,
4315                                         SeqLocId( query_slp ) );
4316                     }
4317                 }
4318                 private_slp_delete = TRUE;
4319                 if (search->prog_number==blast_type_blastn)
4320                         search = BlastFillQueryOffsets(search, query_slp, 1); 
4321 
4322         }
4323         else
4324         {
4325                 private_slp = SeqLocIntNew(0, query_bsp->length-1 , Seq_strand_plus, SeqIdFindBest(query_bsp->id, SEQID_GI));
4326                 private_slp_rev = SeqLocIntNew(0, query_bsp->length-1 , Seq_strand_minus, SeqIdFindBest(query_bsp->id, SEQID_GI));
4327                 private_slp_delete = FALSE;
4328 
4329                 private_slp_double = SeqLocIntNew(0, query_bsp->length-1 , Seq_strand_both, SeqIdFindBest(query_bsp->id, SEQID_GI));
4330                 if (search->prog_number==blast_type_blastn)
4331                         search = BlastFillQueryOffsets(search,
4332                                                        private_slp_double, 1); 
4333                 SeqLocFree(private_slp_double);
4334         }
4335 
4336         query_length = 0;
4337         if (private_slp)
4338                 query_length = SeqLocLen(private_slp);
4339         else if (private_slp_rev)
4340                 query_length = SeqLocLen(private_slp_rev);
4341         if (query_length == 0)
4342         {
4343                 sprintf(buffer, "No valid query sequence");
4344                 BlastConstructErrorMessage("Blast", buffer, 2,
4345                                            &(search->error_return));
4346                 retval = 1;
4347                 goto BlastSetUpReturn;
4348         }
4349 
4350         bsp = NULL;
4351         if (private_slp)
4352            bsp = BioseqLockById(SeqLocId(private_slp));
4353         else if (private_slp_rev)
4354            bsp = BioseqLockById(SeqLocId(private_slp_rev));
4355         
4356         if (bsp == NULL) {
4357            ErrPostEx(SEV_WARNING, 0, 0, "No valid query sequence, BioseqLockById returned NULL\n");
4358            retval = 1;
4359            goto BlastSetUpReturn;
4360         }
4361         full_query_length = bsp->length;
4362         
4363         BlastGetTypes(prog_name, &query_is_na, &db_is_na);
4364         if (query_is_na != ISA_na(bsp->mol))    {
4365            ErrPostEx(SEV_WARNING, 0, 0, "Query molecule is incompatible with %s program", prog_name);
4366            BioseqUnlock(bsp);
4367            retval = 1;
4368            goto BlastSetUpReturn;
4369         }
4370         if (bsp && bsp->repr == Seq_repr_virtual) {
4371            BioseqUnlock(bsp);
4372            ErrPostEx(SEV_WARNING, 0, 0, "Virtual sequence detected\n");
4373            retval = 1;
4374            goto BlastSetUpReturn;
4375         }
4376         BioseqUnlock(bsp);
4377 
4378         if (query_slp)  
4379         {
4380                 search->query_slp = query_slp;
4381         }
4382         else
4383         {
4384                 search->query_slp = private_slp;
4385                 search->allocated += BLAST_SEARCH_ALLOC_QUERY_SLP;
4386         }
4387                 
4388 
4389         search->translation_buffer = NULL;
4390         search->translation_buffer_size = 0;
4391 
4392         /* 
4393         Get genetic codes (should be determined from BLAST_OptionsBlkPtr.
4394         Only needed for blastx, tblast[nx] 
4395         */
4396         if (StringCmp(prog_name, "blastp") != 0 && StringCmp(prog_name, "blastn") != 0)
4397         {
4398 
4399                 if (StringCmp(prog_name, "tblastx") == 0 
4400                     || StringCmp(prog_name, "tblastn") == 0 
4401                     ||StringCmp(prog_name, "psitblastn") == 0)
4402 
4403                 {
4404                         gcp = GeneticCodeFind(options->db_genetic_code, NULL);
4405                         for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
4406                         {
4407                                 if (vnp->choice == 3)   /* ncbieaa */
4408                                 {
4409                                         search->db_genetic_code = (CharPtr)vnp->data.ptrvalue;
4410                                         break;
4411                                 }
4412                         }
4413                         search->translation_table = GetPrivatTranslationTable(search->db_genetic_code, FALSE);
4414                         search->translation_table_rc = GetPrivatTranslationTable(search->db_genetic_code, TRUE);
4415                         max_length = 0;
4416                         rdfp = search->rdfp;
4417                         while (rdfp)
4418                         {
4419                                 max_length = MAX(max_length, readdb_get_maxlen(rdfp));
4420                                 rdfp = rdfp->next;
4421                         }
4422                         search->translation_buffer = MemNew((3+(max_length/3))*sizeof(Uint1));
4423                         search->translation_buffer_size = 1+(max_length/3);
4424                         search->allocated += BLAST_SEARCH_ALLOC_TRANS_INFO;
4425                 }
4426 
4427                 if (StringCmp(prog_name, "blastx") == 0 || StringCmp(prog_name, "tblastx") == 0)
4428                 {
4429                         gcp = GeneticCodeFind(options->genetic_code, NULL);
4430                         for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
4431                         {
4432                                 if (vnp->choice == 3)   /* ncbieaa */
4433                                 {
4434                                         search->genetic_code = (CharPtr)vnp->data.ptrvalue;
4435                                         break;
4436                                 }
4437                         }
4438                 }
4439         }
4440 
4441         if (options->filter && !options->filter_string)
4442                 options->filter_string = BlastConstructFilterString(options->filter);
4443 
4444         /* If the query is translated do this below. */ 
4445         if (StringCmp(prog_name, "blastx") && 
4446             StringCmp(prog_name, "tblastx")) {
4447                                 /* Futamura */
4448             if(!(search->pbp->is_rps_blast &&
4449                                   (!StringCmp(prog_name, "tblastn")||
4450                                    !StringCmp(prog_name, "psitblastn")))) {
4451                 /* AM: Query multiplexing. */
4452                 if( !mult_queries )
4453                 {
4454                   if (private_slp)
4455                       filter_slp = BlastSeqLocFilterEx(private_slp, options->filter_string, &mask_at_hash);
4456                   else if (private_slp_rev)
4457                       filter_slp = BlastSeqLocFilterEx(private_slp_rev, options->filter_string, &mask_at_hash);
4458 
4459                   /* If lower case characters were detected in the input
4460                      their locations will be masked out */
4461                 
4462                   if(search->pbp->query_lcase_mask != NULL) {
4463                       filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, FALSE, 0, 0);
4464                   }
4465                 }
4466                 else
4467                   for( i = 0; i < mult_queries->NumQueries; ++i )
4468                   {
4469                     if( indiv_private_slp[i] )
4470                     {
4471                       indiv_filter_slp[i]
4472                         = BlastSeqLocFilterEx( indiv_private_slp[i], 
4473                                                options->filter_string, 
4474                                                indiv_mask_at_hash + i );
4475                       concat_filter_slp[i]
4476                         = BlastSeqLocFilterEx( concat_private_slp[i],
4477                                                options->filter_string,
4478                                                indiv_mask_at_hash + i );
4479                     }
4480                     else if( indiv_private_slp_rev[i] )
4481                     {
4482                       indiv_filter_slp[i] 
4483                         = BlastSeqLocFilterEx( indiv_private_slp_rev[i],
4484                                                options->filter_string,
4485                                                indiv_mask_at_hash + i );
4486                       concat_filter_slp[i]
4487                         = BlastSeqLocFilterEx( concat_private_slp_rev[i],
4488                                                options->filter_string,
4489                                                indiv_mask_at_hash + i );
4490                     }
4491 
4492                     if( mult_queries->LCaseMasks && mult_queries->LCaseMasks[i] )
4493                     {
4494                       indiv_filter_slp[i] = blastMergeFilterLocs( indiv_filter_slp[i],
4495                                                                   (SeqLocPtr)mult_queries->LCaseMasks[i]->data.ptrvalue, 
4496                                                                   FALSE, 0, 0 );
4497                       ConcatLCaseMask = ConcatSeqLoc( mult_queries, mult_queries->LCaseMasks[i], 
4498                                                       SeqLocId( query_slp ), i );
4499                       concat_filter_slp[i] = blastMergeFilterLocs( concat_filter_slp[i],
4500                                                                    (SeqLocPtr)ConcatLCaseMask->data.ptrvalue,
4501                                                                    FALSE, 0, 0 );
4502                     }
4503                   }
4504             }
4505         }
4506 
4507         if( mult_queries ) { /* AM: query concatenation: free resources */
4508             for( i = 0; i < mult_queries->NumQueries; ++i ) {
4509                 SeqLocFree( indiv_private_slp_rev[i] );
4510                 SeqLocFree( indiv_private_slp[i] );
4511                 SeqLocFree( concat_private_slp_rev[i] );
4512                 SeqLocFree( concat_private_slp[i] );
4513             }
4514 
4515             MemFree( indiv_private_slp_rev );
4516             MemFree( indiv_private_slp );
4517             MemFree( concat_private_slp_rev );
4518             MemFree( concat_private_slp );
4519         }
4520 
4521         /* 
4522            Dusting of query sequence. Only needed for blastn, optional
4523         */
4524 
4525         if(StringCmp(prog_name, "blastn") == 0) {
4526           /* AM: Changed to support query multiplexing. */
4527           if( !mult_queries )
4528                 if (filter_slp && !mask_at_hash)
4529                         ValNodeAddPointer(&(search->mask), SEQLOC_MASKING_NOTSET, filter_slp);
4530                 else
4531                         ValNodeAddPointer(&(search->mask1), SEQLOC_MASKING_NOTSET, filter_slp);
4532           else
4533             for( i = 0; i < mult_queries->NumQueries; ++i )
4534               if( !indiv_mask_at_hash[i] )
4535                 ValNodeAddPointer( &(search->mask), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4536               else
4537                 ValNodeAddPointer( &(search->mask1), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4538         }
4539 
4540 
4541         if (StringCmp(prog_name, "blastp") == 0
4542             || StringCmp(prog_name, "tblastn") == 0
4543             || StringCmp(prog_name, "psitblastn") == 0)
4544         {
4545                 spp = SeqPortNewByLoc(private_slp, Seq_code_ncbistdaa);
4546                 SeqPortSet_do_virtual(spp, TRUE);
4547 
4548                 /* AM: Changed to support query multiplexing. */
4549                 if( !mult_queries )
4550                   if (filter_slp && !mask_at_hash)
4551                         ValNodeAddPointer(&(search->mask), SEQLOC_MASKING_NOTSET, filter_slp);
4552                   else
4553                         ValNodeAddPointer(&(search->mask1), SEQLOC_MASKING_NOTSET, filter_slp);
4554                 else
4555                   for( i = 0; i < mult_queries->NumQueries; ++i )
4556                     if( !indiv_mask_at_hash[i] )
4557                       ValNodeAddPointer( &(search->mask), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4558                     else
4559                       ValNodeAddPointer( &(search->mask1), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4560         }
4561         else if (StringCmp(prog_name, "blastx") == 0 || StringCmp(prog_name, "tblastx") == 0 || StringCmp(prog_name, "blastn") == 0)
4562         {
4563                 if (private_slp)
4564                 {
4565                         spp = SeqPortNewByLoc(private_slp, Seq_code_ncbi4na);
4566                         SeqPortSet_do_virtual(spp, TRUE);
4567                 }
4568                 if (private_slp_rev)
4569                 {
4570                         spp_reverse = SeqPortNewByLoc(private_slp_rev, Seq_code_ncbi4na);
4571                         SeqPortSet_do_virtual(spp_reverse, TRUE);
4572                 }
4573         }
4574         else
4575         {
4576                 ErrPostEx(SEV_FATAL, 1, 0, "Only blastn, blastp, blastx, tblastn tblastx is allowed\n");
4577                 retval = 1;
4578                 goto BlastSetUpReturn;
4579         }
4580 
4581         /* AM: query concatenation: free resources */
4582         MemFree( indiv_mask_at_hash );
4583         MemFree( indiv_filter_slp );
4584 
4585         if (spp)
4586         {
4587                 query_seq_start = (Uint1Ptr) MemNew(2*((query_length)+2)*sizeof(Char));
4588                 query_seq_start[0] = NULLB;
4589                 query_seq = query_seq_start+1;
4590                 index=0;
4591                 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
4592                 {
4593 
4594                         if (IS_residue(residue))
4595                         {
4596                                 if (residue == 24) /* 24 is Selenocysteine. */
4597                                 {
4598                                         residue = 21; /* change Selenocysteine to X. */
4599                                         sprintf(buffer, "Selenocysteine (U) at position %ld replaced by X", 
4600                                                 (long) index+1);
4601                                         BlastConstructErrorMessage("Blast", buffer, 1, &(search->error_return));
4602                                 }
4603                                 query_seq[index] = residue;
4604                                 index++;
4605                         }
4606                 }
4607                 query_seq[index] = NULLB;
4608                 spp = SeqPortFree(spp);
4609                 if (StringCmp(prog_name, "blastn") == 0)
4610                 {
4611                         if (filter_slp)
4612                         {
4613                                 if (mask_at_hash)
4614                                         search->context[0].location =
4615                                                 BlastSeqLocFillDoubleIntEx(filter_slp, full_query_length, query_length, FALSE, SeqLocStart(private_slp));
4616                                 else
4617                                         BlastMaskTheResidues(query_seq, full_query_length, 15, filter_slp, FALSE, SeqLocStart(private_slp));
4618                         }
4619 
4620                         /* AM: query multiplexing. */
4621                         if( mult_queries )
4622                           for( i = 0; i < mult_queries->NumQueries; ++i )
4623                             if( concat_filter_slp[i] )
4624                               BlastMaskTheResidues( query_seq, 
4625                                                     full_query_length,
4626                                                     15, concat_filter_slp[i], FALSE, 
4627                                                     SeqLocStart( private_slp ) );
4628 
4629                         for (index=0; index<=query_length+1; index++)
4630                                 query_seq_start[index] = ncbi4na_to_blastna[query_seq_start[index]];
4631                 }
4632         }
4633 
4634         if (spp_reverse)
4635         {
4636                 query_seq_start_rev = (Uint1Ptr) MemNew(((query_length)+2)*sizeof(Char));
4637                 query_seq_start_rev[0] = NULLB;
4638                 query_seq_rev = query_seq_start_rev+1;
4639                 index=0;
4640                 while ((residue=SeqPortGetResidue(spp_reverse)) != SEQPORT_EOF)
4641                 {
4642                         if (IS_residue(residue))
4643                         {
4644                                 if (residue == 24) /* 24 is Selenocysteine. */
4645                                 {
4646                                         residue = 21; /* change Selenocysteine to X. */
4647                                         sprintf(buffer, "Selenocysteine (U) at position %ld replaced by X", 
4648                                                 (long) index+1);
4649                                         BlastConstructErrorMessage("Blast", buffer, 1, &(search->error_return));
4650                                 }
4651                                 query_seq_rev[index] = residue;
4652                                 index++;
4653                         }
4654                 }
4655                 query_seq_rev[index] = NULLB;
4656                 spp_reverse = SeqPortFree(spp_reverse);
4657                 if (StringCmp(prog_name, "blastn") == 0)
4658                 {
4659                         if (filter_slp)
4660                         {
4661                            if (mask_at_hash) 
4662                              search->context[0].location =
4663                                 BlastSeqLocFillDoubleIntRev(search->context[0].location, filter_slp, query_length, full_query_length, full_query_length - SeqLocStop(private_slp_rev) - 1);
4664                            else
4665                               BlastMaskTheResidues(query_seq_rev, full_query_length, 15, filter_slp, TRUE, full_query_length - SeqLocStop(private_slp_rev) - 1);
4666                         }
4667 
4668                         /* AM: query multiplexing. */
4669                         if( mult_queries )
4670                           for( i = 0; i < mult_queries->NumQueries; ++i )
4671                             if( concat_filter_slp[i] )
4672                               BlastMaskTheResidues( query_seq_rev, 
4673                                                     full_query_length,
4674                                                     15, concat_filter_slp[i], TRUE, 
4675                                                     full_query_length 
4676                                                       - SeqLocStop( private_slp_rev ) - 1 );
4677 
4678                         for (index=0; index<=query_length+1; index++)
4679                                 query_seq_start_rev[index] =
4680                                    ncbi4na_to_blastna[query_seq_start_rev[index]];
4681                         if (query_seq_start)
4682                            MemCpy(query_seq_start+query_length+1, 
4683                                   query_seq_start_rev,query_length+2);
4684                 }
4685         }
4686 
4687 /*
4688         Set the context_factor, which specifies how many different 
4689         ways the query or db is examined (e.g., blastn looks at both
4690         stands of query, context_factor is 2).
4691 */
4692         if (StringCmp(prog_name, "blastp") == 0)
4693         {
4694                 search->context_factor = 1;
4695                 length = query_length;
4696         }
4697         else if (StringCmp(prog_name, "blastn") == 0)
4698         {       /* two strands concatenated in one sequence */
4699                 search->context_factor = 1;
4700                 length = query_length;
4701         }
4702         else if (StringCmp(prog_name, "blastx") == 0)
4703         {       /* query translated in six frames. */
4704                 search->context_factor = search->last_context-search->first_context+1;
4705                 length = query_length/3;
4706         }
4707         else if ( (StringCmp(prog_name, "tblastn") == 0)
4708                   || (StringCmp(prog_name, "psitblastn") == 0))
4709         {       /* db translated in six frames. */
4710                 search->context_factor = 6;
4711                 length = query_length;
4712         }
4713         else if (StringCmp(prog_name, "tblastx") == 0)
4714         {       /* db and query each translated in six frames. */
4715                 search->context_factor = 6*CODON_LENGTH*(search->last_context-search->first_context+1);
4716                 length = query_length/3;
4717         }
4718         else
4719         {
4720                 sprintf(buffer, "%s is not a valid program name", prog_name);
4721                 BlastConstructErrorMessage("BLASTSetUpSearch", buffer, 2, &(search->error_return));
4722                 retval = 1;
4723                 goto BlastSetUpReturn;
4724         }
4725 
4726         if (private_slp)
4727                 query_id = SeqIdFindBest(SeqLocId(private_slp), SEQID_GI);
4728         else
4729                 query_id = SeqIdFindBest(SeqLocId(private_slp_rev), SEQID_GI);
4730 
4731         search->query_id = SeqIdDup(query_id);
4732 
4733 /* Store the query sequence, or the translation thereof. */
4734         if (StringCmp(prog_name, "blastp") == 0
4735             || StringCmp(prog_name, "tblastn") == 0
4736             || StringCmp(prog_name, "psitblastn") == 0)
4737         {       /* One blastp context for now. */
4738                 if (filter_slp)
4739                 {
4740                         if (mask_at_hash)
4741                                 search->context[0].location =
4742                                         BlastSeqLocFillDoubleInt(filter_slp, query_length, FALSE);
4743                         else
4744                                 BlastMaskTheResidues(query_seq, full_query_length, 21, filter_slp, FALSE, SeqLocStart(private_slp));
4745                 }
4746 
4747                 /* AM: query multiplexing. */
4748                 if( mult_queries )
4749                   for( i = 0; i < mult_queries->NumQueries; ++i )
4750                     if( concat_filter_slp[i] )
4751                       BlastMaskTheResidues( query_seq, full_query_length,
4752                                             21, concat_filter_slp[i], FALSE,
4753                                             SeqLocStart( private_slp ) );
4754 
4755                 BlastSequenceAddSequence(search->context[0].query, NULL, query_seq_start, query_length, query_length, 0);
4756         }
4757         else if (StringCmp(prog_name, "blastx") == 0  || StringCmp(prog_name, "tblastx") == 0)
4758         {
4759                 
4760                 for (index=search->first_context; index<=search->last_context; index++)
4761                 {
4762                    if (search->context[index].query->frame > 0)
4763                    {
4764                         sequence = GetTranslation(query_seq, query_length, search->context[index].query->frame, &length, search->genetic_code);
4765                    }
4766                    else
4767                    {
4768                         sequence = GetTranslation(query_seq_rev, query_length, search->context[index].query->frame, &length, search->genetic_code);
4769                    }
4770                    if (options->filter_string && length > 0)
4771                    {
4772                         bsp_temp = BlastMakeTempProteinBioseq(sequence+1, length, Seq_code_ncbistdaa);
4773                         
4774                         filter_slp = BlastBioseqFilterEx(bsp_temp, options->filter_string, &mask_at_hash);
4775                         HackSeqLocId(filter_slp, search->query_id);
4776 
4777                         /* If FASTA filtering is set - updating this SeqLoc */
4778                         if(search->pbp->query_lcase_mask != NULL) {
4779                             filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, TRUE, search->context[index].query->frame, query_length);
4780                         }
4781 
4782                         /* SeqMgrDeleteFromBioseqIndex(bsp_temp); */
4783                         
4784                         /* bsp_temp->id = SeqIdSetFree(bsp_temp->id); */
4785 
4786                         bsp_temp = BioseqFree(bsp_temp);
4787                         if (mask_at_hash)
4788                         {
4789                                 search->context[index].location = 
4790                                         BlastSeqLocFillDoubleInt(filter_slp, query_length, FALSE);
4791                         }
4792                         else
4793                         {
4794                                 BlastMaskTheResidues(sequence+1, length, 21, filter_slp, FALSE, 0);
4795                                 BlastConvertProteinSeqLoc(filter_slp, search->context[index].query->frame, query_length);
4796                         }
4797                         if (filter_slp && !mask_at_hash)
4798                                 ValNodeAddPointer(&(search->mask), FrameToDefine(search->context[index].query->frame), filter_slp);
4799                         else
4800                                 ValNodeAddPointer(&(search->mask1), FrameToDefine(search->context[index].query->frame), filter_slp);
4801                    }
4802                    BlastSequenceAddSequence(search->context[index].query, NULL, sequence, length, query_length, 0);
4803                 }
4804                 query_seq_start = MemFree(query_seq_start);
4805                 query_seq_start_rev = MemFree(query_seq_start_rev);
4806 
4807                 if(search->pbp->is_ooframe) {
4808                     search->query_dnap = BlastCreateQueryDNAP(search, query_length);
4809                 }
4810         } else if (StringCmp(prog_name, "blastn") == 0) {
4811            if (search->last_context - search->first_context > 0) {
4812               /* Both strands are searched */
4813               BlastSequenceAddSequence(search->context[search->first_context].query, NULL, query_seq_start, 2*query_length+2, 2*query_length+2, 0);
4814               BlastSequenceAddSequence(search->context[search->last_context].query, NULL,
4815                                        query_seq_start_rev, query_length,
4816                                        query_length, 0);
4817            } else if (search->first_context==0) 
4818               /* Only first strand is searched */
4819               BlastSequenceAddSequence(search->context[search->first_context].query, NULL, query_seq_start, query_length+1, query_length+1, 0);
4820            else {/* Only second strand is searched */
4821               BlastSequenceAddSequence(search->context[search->first_context].query, NULL,
4822                                        query_seq_start_rev, query_length+1,
4823                                        query_length+1, 0);
4824            }
4825         }
4826 
4827         if( mult_queries ) { /* AM: query concatenation: free resources */
4828             for( i = 0; i < mult_queries->NumQueries; ++i )
4829                 SeqLocFree( concat_filter_slp[i] );
4830 
4831             MemFree( concat_filter_slp );
4832         }
4833 
4834         if (mask_at_hash)
4835         { /* No longer needed. */
4836 /*
4837                 filter_slp = SeqLocSetFree(filter_slp);
4838 */
4839         }
4840         
4841 /* Set the ambiguous residue before the ScoreBlk is filled. */
4842         if (StringCmp(prog_name, "blastn") != 0)
4843         {
4844                 search->sbp->read_in_matrix = TRUE;
4845                 BlastScoreSetAmbigRes(search->sbp, 'X');
4846         }
4847         else
4848         {
4849                 if(options->matrix!=NULL && *(options->matrix) != NULLB) {
4850                      search->sbp->read_in_matrix = TRUE;
4851                 } else {
4852                      search->sbp->read_in_matrix = FALSE;
4853                 }
4854                 BlastScoreSetAmbigRes(search->sbp, 'N');
4855         }
4856 
4857 
4858         search->sbp->penalty = options->penalty;
4859         search->sbp->reward = options->reward;
4860         
4861         /* option is to use alignments chosen by user in PSM computation API (used in WWW PSI-Blast); */
4862         search->pbp->use_best_align = options->use_best_align;
4863 
4864 
4865         /* Should culling be used at all? */
4866         search->pbp->perform_culling = options->perform_culling;
4867         search->pbp->hsp_range_max = options->hsp_range_max;
4868         /* This assures that search->pbp->max_pieces is at least one wide. */
4869         block_width = MIN(query_length, options->block_width);
4870         if (block_width > 0)
4871            search->pbp->max_pieces = query_length/block_width;
4872 
4873         search->sbp->query_length = query_length;
4874 
4875         search->result_struct = BLASTResultsStructNew(search->result_size,
4876                                                       search->pbp->max_pieces,
4877                                                       search->pbp->hsp_range_max);
4878            
4879         if (options->matrix != NULL)
4880                 status = BlastScoreBlkMatFill(search->sbp, options->matrix);
4881         else 
4882                 status = BlastScoreBlkMatFill(search->sbp, "BLOSUM62");
4883 
4884         if (status != 0)
4885         {
4886                 ErrPostEx(SEV_WARNING, 0, 0, "BlastScoreBlkMatFill returned non-zero status");
4887                 retval = 1;
4888                 goto BlastSetUpReturn;
4889         }
4890 
4891         /* This is used right below. */
4892         search->pbp->gapped_calculation = options->gapped_calculation;
4893         search->pbp->do_not_reevaluate = options->do_not_reevaluate;
4894 
4895     /* Set up sum statistics */
4896     search->pbp->do_sum_stats = options->do_sum_stats;
4897     if(search->prog_number == blast_type_blastx  ||
4898        search->prog_number == blast_type_tblastn ||
4899        search->prog_number == blast_type_psitblastn)
4900     {
4901         /* The program may use new_link_hsps to evaluate sum
4902            statistics. */
4903         Int4 max_protein_gap; /* the largest gap permitted in the
4904                                * translated sequence */
4905 
4906         max_protein_gap = (options->longest_intron - 2)/3;
4907         if(search->pbp->gapped_calculation) {
4908             if(options->longest_intron == 0) {
4909                 /* a zero value of longest_intron
4910                  * invokes the default behavior, which for gapped
4911                  * calculation is to set longest_intron to a
4912                  * predefined value. */
4913                 search->pbp->longest_intron = (DEFAULT_LONGEST_INTRON - 2) / 3;
4914             } else if(max_protein_gap <= 0) {
4915                 /* A nonpositive value of max_protein_gap turns linking off */
4916                 search->pbp->do_sum_stats = FALSE;
4917                 search->pbp->longest_intron = 0;
4918             } else { /* the value of max_protein_gap is positive */
4919                 search->pbp->longest_intron = max_protein_gap;
4920             }
4921         } else { /* This is an ungapped calculation. */
4922             /* For ungapped calculations, we preserve the old behavior
4923              * of the longest_intron parameter to maintain
4924              * backward-compatibility with older versions of BLAST. */
4925             search->pbp->longest_intron = MAX(max_protein_gap, 0);
4926         }
4927     }
4928         search->pbp->first_db_seq = options->first_db_seq;
4929         search->pbp->final_db_seq = options->final_db_seq;
4930 
4931         retval = 0;
4932         for (index=search->first_context; index<=search->last_context; index++)
4933         {
4934            /* AM: Changed to support query multiplexing. */
4935            if (search->prog_number != blast_type_blastn || 
4936                index>search->first_context || 
4937                search->last_context==search->first_context)
4938            {
4939              if( search->prog_number == blast_type_tblastn
4940                  && search->mult_queries )
4941              {
4942                for( i = 0; i < search->mult_queries->NumQueries; ++i )
4943                {
4944                    sbptmp = BLAST_ScoreBlkNew( 
4945                            Seq_code_ncbistdaa, search->last_context + 1 );
4946                    sbptmp->read_in_matrix = TRUE;
4947                    BlastScoreSetAmbigRes( sbptmp, 'X' );
4948                    sbptmp->penalty = options->penalty;
4949                    sbptmp->reward = options->reward;
4950                    sbptmp->query_length = query_length;
4951 
4952                    if (options->matrix != NULL)
4953                            status = BlastScoreBlkMatFill(sbptmp, options->matrix);
4954                    else 
4955                            status = BlastScoreBlkMatFill(sbptmp, "BLOSUM62");
4956 
4957                  status = BlastScoreBlkFill( 
4958                          sbptmp,
4959                    ((CharPtr)search->context[index].query->sequence)
4960                      + search->mult_queries->QueryStarts[i],
4961                    search->mult_queries->QueryEnds[i]
4962                      - search->mult_queries->QueryStarts[i] + 1,
4963                    index );
4964 
4965                  if( status ) break;
4966 
4967                  search->mult_queries->lambda_array[i]
4968                    = sbptmp->kbp_std[search->first_context]->Lambda;
4969 
4970                  if( i )
4971                  {
4972                    if( search->mult_queries->LambdaMin
4973                        > sbptmp->kbp_std[search->first_context]->Lambda )
4974                      search->mult_queries->LambdaMin
4975                        = sbptmp->kbp_std[search->first_context]->Lambda;
4976 
4977                    if( search->mult_queries->LambdaMax
4978                        < sbptmp->kbp_std[search->first_context]->Lambda )
4979                      search->mult_queries->LambdaMax
4980                        = sbptmp->kbp_std[search->first_context]->Lambda;
4981 
4982                    if( search->mult_queries->LogKMin
4983                        > sbptmp->kbp_std[search->first_context]->logK )
4984                      search->mult_queries->LogKMin
4985                        = sbptmp->kbp_std[search->first_context]->logK;
4986 
4987                    if( search->mult_queries->LogKMax
4988                        < sbptmp->kbp_std[search->first_context]->logK )
4989                      search->mult_queries->LogKMax
4990                        = sbptmp->kbp_std[search->first_context]->logK;
4991                  }
4992                  else
4993                  {
4994                    search->mult_queries->LambdaMin
4995                      = search->mult_queries->LambdaMax
4996                      = sbptmp->kbp_std[search->first_context]->Lambda;
4997                    search->mult_queries->LogKMin 
4998                      = search->mult_queries->LogKMax
4999                      = sbptmp->kbp_std[search->first_context]->logK;
5000                  }
5001 
5002                  sbptmp = BLAST_ScoreBlkDestruct( sbptmp );
5003                }
5004              }
5005 
5006              status 
5007                = BlastScoreBlkFill(search->sbp, (CharPtr)
5008                                    search->context[index].query->sequence,
5009                                    search->context[index].query->length, 
5010                                    index);
5011            }
5012            else
5013            {
5014              status 
5015                = BlastScoreBlkFill(search->sbp, (CharPtr)
5016                                    search->context[index].query->sequence, 
5017                                    search->context[index+1].query->length, 
5018                                    index);
5019            }
5020 
5021                 if (status != 0)
5022                 {
5023                         sprintf(buffer, "Unable to calculate Karlin-Altschul params, check query sequence");
5024                         BlastConstructErrorMessage("BLASTSetUpSearch", buffer, 2, &(search->error_return));
5025                         retval = 1;
5026                 }
5027                 if (search->pbp->gapped_calculation)
5028                 {
5029                     if (StringCmp(search->prog_name, "blastn") != 0)
5030                     {
5031                         search->sbp->kbp_gap_std[index] = BlastKarlinBlkCreate();
5032                         status = BlastKarlinBlkGappedCalcEx(search->sbp->kbp_gap_std[index], options->gap_open, options->gap_extend, options->decline_align, search->sbp->name, &(search->error_return));
5033                         if (status != 0)
5034                         {
5035                                 retval = 1;
5036                         }
5037                         search->sbp->kbp_gap_psi[index] = BlastKarlinBlkCreate();
5038                         status = BlastKarlinBlkGappedCalcEx(search->sbp->kbp_gap_psi[index], options->gap_open, options->gap_extend, options->decline_align, search->sbp->name, &(search->error_return));
5039                         if (status != 0)
5040                         {
5041                                 retval = 1;
5042                         }
5043                    }
5044                    else
5045                    {
5046                         search->sbp->kbp_gap_std[index] = BlastKarlinBlkCreate();
5047                         status = BlastKarlinBlkNuclGappedCalc(search->sbp->kbp_gap_std[index], options->gap_open, options->gap_extend, options->reward, options->penalty, search->sbp->kbp_std[index], &(search->sbp->round_down), &(search->error_return));
5048                         if (status != 0)
5049                               retval = 1;
5050                    }
5051                }
5052         }
5053 
5054         search->sbp->kbp_gap = search->sbp->kbp_gap_std;
5055         search->sbp->kbp = search->sbp->kbp_std;
5056         if (search->pbp->gapped_calculation && StringCmp(prog_name, "blastn") != 0)
5057         {
5058         Int4 array_size = BlastKarlinGetMatrixValues(search->sbp->name, 
5059                                                      NULL, NULL, NULL, NULL, 
5060                                                      NULL, NULL);
5061                 if ( !(array_size > 0)) {
5062            /* This can only happen in case of unsupported matrix! */
5063            sprintf(buffer, 
5064                    "matrix %s is not supported\n",
5065                    search->sbp->name);
5066            BlastConstructErrorMessage("BLASTSetUpSearch", buffer, 2,
5067                                       &search->error_return);
5068            retval = 1;
5069         }
5070                 if (search->sbp->kbp_ideal == NULL)
5071                         search->sbp->kbp_ideal = BlastKarlinBlkStandardCalcEx(search->sbp);
5072         }
5073 
5074         /* Adjust the Karlin parameters. */
5075         if (StringCmp(prog_name, "blastx") == 0  || 
5076             StringCmp(prog_name, "tblastx") == 0 ||
5077             (search->pbp->is_rps_blast && !StringCmp(prog_name, "tblastn")))
5078         {
5079             /* Make sure ideal values are used for RPS tblastn, because the previously
5080                obtained values are for the fake protein. */
5081             if (search->pbp->is_rps_blast && !StringCmp(prog_name, "tblastn"))
5082                search->sbp->kbp[0]->Lambda = search->sbp->kbp_ideal->Lambda;
5083             BlastKarlinBlkStandardCalc(search->sbp, search->first_context, search->last_context);
5084         }
5085 
5086         /* If retval was set non-zero above (by the routines calculating Karlin-Altschul params),
5087            return here before these values are used.
5088         */
5089         if (retval)
5090            goto BlastSetUpReturn;
5091 
5092         if (options->gapped_calculation) {
5093         
5094         BLAST_KarlinBlkPtr kbp_gap =
5095           search->sbp->kbp_gap_std[search->first_context];
5096         Nlm_FloatHi alpha, beta; /*alpha and beta for the scoring system */
5097         if (StringCmp(options->program_name, "blastn") != 0)
5098             getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
5099                      options->gap_open, options->gap_extend);
5100         else
5101             BlastKarlinGetNuclAlphaBeta(options->reward, options->penalty, options->gap_open, 
5102                      options->gap_extend, kbp_gap, options->gapped_calculation, &alpha, &beta);
5103 
5104         BlastComputeLengthAdjustment(kbp_gap->K,
5105                                      kbp_gap->logK,
5106                                      alpha/kbp_gap->Lambda, beta, 
5107                                      length,
5108                                      search->dblen, search->dbseq_num,
5109                                      &length_adjustment );
5110         
5111         effective_query_length = length - length_adjustment;
5112 
5113         /* AM: If concatenating queries, then compute effective lengths of 
5114            individual queries. */
5115         if( search->mult_queries )
5116                 {
5117             search->mult_queries->TotalLength = length;
5118             lengths_eff =
5119                 (IntArray) MemNew( sizeof( Int4 )*
5120                                    search->mult_queries->NumQueries );
5121             length_adj_tmp =
5122                 (IntArray)MemNew( sizeof( Int4 )*
5123                                   search->mult_queries->NumQueries );
5124 
5125             for( le_iter = 0;
5126                  le_iter < search->mult_queries->NumQueries;
5127                  ++le_iter ) {
5128                 length_tmp = search->mult_queries->QueryEnds[le_iter]
5129                     - search->mult_queries->QueryStarts[le_iter] 
5130                     + 1;
5131                 length_adj_tmp[le_iter] = 0;
5132                 
5133                 BlastComputeLengthAdjustment(kbp_gap->K,
5134                                              kbp_gap->logK,
5135                                              alpha/kbp_gap->Lambda,
5136                                              beta, 
5137                                              length_tmp,
5138                                              search->dblen, search->dbseq_num,
5139                                              &length_adj_tmp[le_iter] );
5140 
5141                 lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
5142                 
5143                 search->mult_queries->EffLengths[le_iter] =
5144                     lengths_eff[le_iter];
5145                 search->mult_queries->Adjustments[le_iter] =
5146                     length_adj_tmp[le_iter];
5147             
5148                     if( search->mult_queries->MinLen > length_tmp )
5149                       search->mult_queries->MinLen = length_tmp;
5150 
5151             if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
5152                 search->mult_queries->MinLenEff = lengths_eff[le_iter];
5153                   }
5154                 }
5155     }
5156         else /* this is an ungapped alignment */
5157         {
5158         BLAST_KarlinBlkPtr kbp = search->sbp->kbp[search->first_context];
5159 
5160         BlastComputeLengthAdjustment( kbp->K, kbp->logK, 1/kbp->H, 0.0, 
5161                                  length, 
5162                                  search->dblen, search->dbseq_num,
5163                                  &length_adjustment );
5164 
5165         effective_query_length = length - length_adjustment;
5166 
5167         /* AM: If concatenating queries, then compute effective lengths of 
5168            individual queries. */
5169         if( search->mult_queries ) {
5170             search->mult_queries->TotalLength = length;
5171             lengths_eff =
5172                 (IntArray)MemNew( sizeof( Int4 )*
5173                                   search->mult_queries->NumQueries );
5174             length_adj_tmp =
5175                 (IntArray)MemNew( sizeof( Int4 )*
5176                                   search->mult_queries->NumQueries );
5177 
5178             for( le_iter = 0;
5179                  le_iter < search->mult_queries->NumQueries;
5180                  ++le_iter ) {
5181                 length_tmp = search->mult_queries->QueryEnds[le_iter]
5182                     - search->mult_queries->QueryStarts[le_iter] 
5183                     + 1;
5184                 length_adj_tmp[le_iter] = 0;
5185 
5186                 BlastComputeLengthAdjustment( kbp->K, kbp->logK,
5187                                               1/kbp->H, 0.0, 
5188                                               length_tmp, 
5189                                               search->dblen, search->dbseq_num,
5190                                               &(length_adj_tmp[le_iter]) );
5191 
5192                 lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
5193                 search->mult_queries->EffLengths[le_iter] =
5194                     lengths_eff[le_iter];
5195                 search->mult_queries->Adjustments[le_iter] =
5196                     length_adj_tmp[le_iter];
5197                     
5198                 if( search->mult_queries->MinLen > length_tmp )
5199                     search->mult_queries->MinLen = length_tmp;
5200                 
5201                 if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
5202                     search->mult_queries->MinLenEff = lengths_eff[le_iter];
5203             }
5204                 }
5205     }
5206 
5207         search->length_adjustment = MAX(length_adjustment, 0);
5208 
5209     if (!search->dblen_eff) {
5210         search->dblen_eff =
5211             search->dblen - search->dbseq_num*search->length_adjustment;
5212         /* AM: If concatenating queries find effective db lengths for each query. */
5213            if( search->mult_queries )
5214            {
5215              for( le_iter = 0; le_iter < search->mult_queries->NumQueries; 
5216                   ++le_iter )
5217              {
5218                if( search->prog_number == blast_type_blastn )
5219                  search->mult_queries->DbLenEff[le_iter]
5220                    = MAX( 1, search->dblen 
5221                              - search->dbseq_num*length_adj_tmp[le_iter] );
5222                else
5223                  search->mult_queries->DbLenEff[le_iter]
5224                    = MAX( search->dbseq_num, 
5225                           search->dblen 
5226                           - search->dbseq_num*length_adj_tmp[le_iter] );
5227              }
5228 
5229              MemFree( length_adj_tmp );
5230            }
5231         }
5232 
5233         for (index=search->first_context; index<=search->last_context; index++)
5234         {
5235                 search->context[index].query->effective_length = effective_query_length;
5236         }
5237 
5238         /* AM: Setting up effective search spaces for individual queries. */
5239         if (search->searchsp_eff == 0)
5240         {
5241                 search->searchsp_eff = ((Nlm_FloatHi) search->dblen_eff)*((Nlm_FloatHi) effective_query_length);
5242 
5243                 if( search->mult_queries )
5244                   for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
5245                   {
5246                     search->mult_queries->SearchSpEff[le_iter]
5247                       = ((Nlm_FloatHi)search->mult_queries->DbLenEff[le_iter])
5248                       * ((Nlm_FloatHi)lengths_eff[le_iter]);
5249 
5250                     if( lengths_eff[le_iter] == search->mult_queries->MinLenEff )
5251                       search->mult_queries->MinSearchSpEff
5252                         = search->mult_queries->SearchSpEff[le_iter];
5253                   }
5254         }
5255         else if( search->mult_queries )
5256           for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
5257             search->mult_queries->SearchSpEff[le_iter] = search->searchsp_eff;
5258 
5259         /* The default is that cutoff_s was not set and is zero. */
5260         if (options->cutoff_s == 0)
5261         {
5262                 search->pbp->cutoff_e = options->expect_value;
5263                 search->pbp->cutoff_e_set = TRUE;
5264                 search->pbp->cutoff_s = options->cutoff_s;
5265                 search->pbp->cutoff_s_set = FALSE;
5266         }
5267         else
5268         {
5269                 search->pbp->cutoff_e = options->expect_value;
5270                 search->pbp->cutoff_e_set = FALSE;
5271                 search->pbp->cutoff_s = options->cutoff_s;
5272                 search->pbp->cutoff_s_set = TRUE;
5273         }
5274 
5275         MemFree( lengths_eff ); /* AM: query concatenation: free resources */
5276 
5277 /* For now e2 is set to 0.5 and cutoff_e2_set is FALSE.  This is then
5278 changed to the proper values in blast_set_parameters.  In the final version
5279 of this program (where more blast programs and command-line options are
5280 available) this needs to be set higher up. */
5281         if (options->cutoff_s2 == 0)
5282         {
5283                 search->pbp->cutoff_e2 = options->e2;
5284                 search->pbp->cutoff_e2_set = TRUE;
5285                 search->pbp->cutoff_s2 = options->cutoff_s2;
5286                 search->pbp->cutoff_s2_set = FALSE;
5287         }
5288         else
5289         {
5290                 search->pbp->cutoff_e2 = options->e2;
5291                 search->pbp->cutoff_e2_set = FALSE;
5292                 search->pbp->cutoff_s2 = options->cutoff_s2;
5293                 search->pbp->cutoff_s2_set = TRUE;
5294         }
5295         
5296         search->pbp->discontinuous = options->discontinuous;
5297 
5298         
5299         /* For postion based blast. */
5300         search->pbp->ethresh = options->ethresh;
5301         search->pbp->maxNumPasses = options->maxNumPasses;
5302         search->pbp->pseudoCountConst = options->pseudoCountConst;
5303 
5304         if (NlmThreadsAvailable()) /* ONly allow more than one cpu if MT compiled. */
5305                 search->pbp->process_num = options->number_of_cpus;
5306         else
5307                 search->pbp->process_num = 1;
5308 
5309         search->pbp->cpu_limit = options->cpu_limit;
5310         search->pbp->gap_decay_rate = options->gap_decay_rate;
5311         search->pbp->gap_size = options->gap_size;
5312         search->pbp->gap_prob = options->gap_prob;
5313         search->pbp->old_stats = options->old_stats;
5314         search->pbp->use_large_gaps = options->use_large_gaps;
5315         search->pbp->number_of_bits = options->number_of_bits;
5316         search->pbp->two_pass_method = options->two_pass_method;
5317         search->pbp->multiple_hits_only = options->multiple_hits_only;
5318         search->pbp->gap_open = options->gap_open;
5319         search->pbp->gap_extend = options->gap_extend;
5320         search->pbp->decline_align = options->decline_align;
5321         search->pbp->total_hsp_limit = options->total_hsp_limit;
5322 
5323         search->pbp->hsp_num_max = options->hsp_num_max;
5324 /* CHANGE HERE??? */
5325         if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn"))
5326         {
5327 /*
5328                 search->pbp->cutoff_s2_set = TRUE;
5329 */
5330                 if (StringCmp(search->prog_name, "blastn") != 0)
5331                 {
5332                         search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
5333                         search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
5334 
5335                   /* AM: Change to support query multiplexing. */
5336                   if( StringCmp( search->prog_name, "tblastn" ) == 0
5337                       && search->mult_queries )
5338                   {
5339                     search->pbp->gap_trigger 
5340                       = (BLAST_Score)( ( options->gap_trigger*NCBIMATH_LN2
5341                                            + search->mult_queries->LogKMin )
5342                                        /search->mult_queries->LambdaMax );
5343                   }
5344                   else
5345                         search->pbp->gap_trigger = (BLAST_Score) ((options->gap_trigger*NCBIMATH_LN2+search->sbp->kbp[search->first_context]->logK)/ search->sbp->kbp[search->first_context]->Lambda);
5346                 }
5347                 else
5348                 {
5349                         search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5350                         search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5351                         search->pbp->gap_trigger = (BLAST_Score) ((options->gap_trigger*NCBIMATH_LN2+search->sbp->kbp[search->first_context]->logK)/ search->sbp->kbp[search->first_context]->Lambda);
5352                 }
5353                 /* The trigger value sets the s2 cutoff. */
5354                 search->pbp->cutoff_s2 = (Int4) search->pbp->gap_trigger;
5355         }
5356         else
5357         {
5358                 search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5359                 search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5360                 search->pbp->gap_trigger = (BLAST_Score) ((options->gap_trigger*NCBIMATH_LN2+search->sbp->kbp[search->first_context]->logK)/ search->sbp->kbp[search->first_context]->Lambda);
5361                 /* Set S and S2 equal if not sum stats. */
5362                 if (search->pbp->do_sum_stats == FALSE)
5363                         search->pbp->cutoff_s2 = search->pbp->cutoff_s;
5364         }
5365         /* Ensures that gap_x_dropoff_final is at least as large as gap_x_dropoff. */
5366         search->pbp->gap_x_dropoff_final = MAX(search->pbp->gap_x_dropoff_final, search->pbp->gap_x_dropoff);
5367 
5368 /* "threshold" (first and second) must be set manually for two-pass right now.*/
5369         search->pbp->threshold_set = TRUE;
5370         search->pbp->threshold_second = options->threshold_second;
5371 
5372         search->pbp->window_size = options->window_size;
5373         search->pbp->window_size_set = TRUE;
5374 
5375         search->whole_query = TRUE;
5376         if (options->required_start != 0 || options->required_end != -1)
5377         {
5378                 search->whole_query = FALSE;
5379                 search->required_start = options->required_start;
5380                 if (options->required_end != -1)
5381                         search->required_end = options->required_end;
5382                 else
5383                         search->required_end = query_length;
5384         }
5385 
5386         if (qlen <= 0)
5387                 qlen = query_length;
5388 
5389         /* Use DROPOFF_NUMBER_OF_BITS as the default if it's set to zero. */
5390         if (options->dropoff_1st_pass == 0)
5391                 options->dropoff_1st_pass = DROPOFF_NUMBER_OF_BITS;
5392 
5393         if (options->dropoff_2nd_pass == 0)
5394                 options->dropoff_2nd_pass = DROPOFF_NUMBER_OF_BITS;
5395 
5396         if (StringCmp(search->prog_name, "blastn") != 0)
5397         {
5398                 avglen = BLAST_AA_AVGLEN;
5399         }
5400         else
5401         {
5402                 avglen = BLAST_NT_AVGLEN;
5403                 /* Use only one type of gap for blastn */
5404                 search->pbp->ignore_small_gaps = FALSE;
5405         }
5406 
5407         if (search->rdfp)
5408         {
5409                 Int4 total_number;
5410                 Int8 total_length;
5411 
5412                 readdb_get_totals(search->rdfp, &total_length, &total_number);
5413                 if (total_number > 0)
5414                         avglen = ((Nlm_FloatHi) total_length)/total_number;
5415         }
5416         else if (search->dblen > 0 && search->dbseq_num == 1)
5417         {
5418                 avglen = search->dblen;
5419         }
5420 
5421         if (blast_set_parameters(search, options->dropoff_1st_pass, options->dropoff_2nd_pass, avglen, search->searchsp_eff, options->window_size) != 0) {
5422            retval = 1;
5423            goto BlastSetUpReturn;
5424         }
5425         if (options->scalingFactor == 0.0)
5426                 options->scalingFactor = 1.0;
5427 
5428         if (options->scalingFactor != 0.0 && options->scalingFactor != 1.0)
5429         {
5430                 search->pbp->gap_open *= options->scalingFactor;
5431                 search->pbp->gap_extend *= options->scalingFactor;
5432                 search->pbp->dropoff_1st_pass *= options->scalingFactor;
5433                 search->pbp->dropoff_2nd_pass *= options->scalingFactor;
5434                 search->pbp->gap_x_dropoff *= options->scalingFactor;
5435                 search->pbp->gap_x_dropoff_final *= options->scalingFactor;
5436                 search->pbp->decline_align *= options->scalingFactor;
5437                 search->pbp->gap_trigger *= options->scalingFactor;
5438                 search->pbp->cutoff_s *= options->scalingFactor;
5439                 search->pbp->cutoff_s1 *= options->scalingFactor;
5440                 search->pbp->cutoff_s2 *= options->scalingFactor;
5441                 search->pbp->cutoff_s2_max *= options->scalingFactor;
5442                 search->pbp->cutoff_s_first *= options->scalingFactor;
5443                 search->pbp->cutoff_s_second *= options->scalingFactor;
5444         }
5445         search->pbp->scalingFactor = options->scalingFactor;
5446         if (options->is_megablast_search) 
5447            search->pbp->mb_params = MegaBlastParameterBlkNew(options);
5448         search->pbp->explode_seqids = options->explode_seqids;
5449 
5450         if (search->pbp->multiple_hits_only)
5451         {       
5452                 if (search->context[search->first_context].query->length < 2*options->wordsize)
5453                 {
5454                         BlastConstructErrorMessage("Blast", 
5455                                 "Query must be at least twice wordsize for two hit mode", 2, &(search->error_return));
5456                         retval = 1;
5457                         goto BlastSetUpReturn;
5458                 }
5459         }
5460         else
5461         {
5462                 if (search->context[search->first_context].query->length < options->wordsize)
5463                 {
5464                    Char tmp_buffer[128];
5465                    sprintf(tmp_buffer, 
5466                            "Query length %d is less than wordsize %d",
5467                        search->context[search->first_context].query->length,
5468                            options->wordsize);
5469                    BlastConstructErrorMessage("Blast", buffer, 2,
5470                                               &(search->error_return));
5471                    BlastConstructErrorMessage("Blast", 
5472                       tmp_buffer, 2, &(search->error_return));
5473                    retval = 1;
5474                    goto BlastSetUpReturn;
5475                 }
5476         }
5477                 
5478         search->thr_info->awake_index = FALSE;
5479         if (NlmThreadsAvailable() && (search->context_factor*query_length) > INDEX_THR_MIN_SIZE) {
5480             search->thr_info->awake_index = TRUE;
5481             search->thr_info->last_tick = Nlm_GetSecs();
5482             search->thr_info->index_thr = 
5483                 NlmThreadCreate(index_proc, search->thr_info);
5484             search->thr_info->index_callback = callback;
5485         }
5486         
5487         /* Only do this if this is not a pattern search. */
5488         if (options->isPatternSearch == FALSE && search->pbp->is_rps_blast == FALSE)
5489         {
5490            if (StrCmp(search->prog_name, "blastn")) 
5491               last_index = search->last_context;
5492            else 
5493               last_index = search->first_context;
5494            for (index=search->first_context; index<=last_index; index++)
5495            {
5496                 if (options->threshold_second > 0)
5497                 {
5498                         search->wfp = search->wfp_first;
5499                         if (!(search->positionBased)) /*AAS*/
5500                             status = BlastFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5501                         else
5502                             status = BlastNewFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5503                         if (status < 0) {
5504                             search->thr_info->awake_index = FALSE;
5505                             ErrPostEx(SEV_WARNING, 0, 0, 
5506                                "BlastFindWords returned non-zero status");
5507                             retval = 1;
5508                             goto BlastSetUpReturn;
5509                         }
5510                 }
5511                 search->wfp = search->wfp_second;
5512                 if (StringCmp(prog_name, "blastn") != 0)
5513                 {
5514                     if (search->allocated & BLAST_SEARCH_ALLOC_WFP_SECOND)
5515                     {
5516                         if (!(search->positionBased))
5517                             status = BlastFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5518                         else
5519                             status = BlastNewFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5520                     }
5521                 }
5522                 else
5523                 { 
5524                         status = BlastNtFindWords(search, 0, search->context[index].query->length, 
5525                                                       (Uint1) index);
5526                 }
5527 
5528                 search->context[index].location = ValNodeFree(search->context[index].location);
5529 
5530                 if (status > 0)
5531                 {
5532                         search->thr_info->awake_index = FALSE;
5533                         sprintf(buffer, "No valid letters to be indexed on context %d", index);
5534                         /* This is just a warning */
5535                         BlastConstructErrorMessage("Blast", buffer, 1,
5536                                                    &(search->error_return));
5537                 }
5538                 else if (status < 0)
5539                 {
5540                         search->thr_info->awake_index = FALSE;
5541                         sprintf(buffer, "Error finding words");
5542                         BlastConstructErrorMessage("Blast", buffer, 2, &(search->error_return));
5543                         retval = 1;
5544                         goto BlastSetUpReturn;
5545                 }
5546            }
5547            if (StrCmp(search->prog_name, "blastn"))
5548               lookup_position_aux_destruct(search->wfp->lookup);
5549            else
5550               mb_lookup_position_aux_destruct(search->wfp->lookup);
5551         }
5552 
5553 
5554         /* 
5555         Turn off the index thread by setting this flag.  Don't wait for a join, as the
5556         search will take much longer than the one second for this to die.
5557         */
5558         search->thr_info->awake_index = FALSE;
5559  BlastSetUpReturn:
5560         if (private_slp && private_slp_delete)
5561                 private_slp = SeqLocFree(private_slp);
5562         if (private_slp_rev)
5563                 private_slp_rev = SeqLocFree(private_slp_rev);
5564 
5565         return retval;
5566 }
5567 
5568 Boolean 
5569 BlastGetFirstAndLastContext(CharPtr prog_name, SeqLocPtr query_slp, Int2Ptr first_context, Int2Ptr last_context, Uint1 strand_options)
5570 {
5571         Uint1 strand;
5572 
5573         if (query_slp == NULL)
5574         {       /* Query was a BioseqPtr, Check strand_options. */
5575                 strand = Seq_strand_both;
5576         }
5577         else
5578         {
5579                 strand = SeqLocStrand(query_slp);
5580         }
5581         
5582         /* 
5583         Check the strand_options and use that if top or bottom is specified. 
5584         otherwise use what's specified above. 
5585         */
5586         if (strand_options == BLAST_TOP_STRAND)
5587                 strand = Seq_strand_plus;
5588         else if (strand_options == BLAST_BOTTOM_STRAND)
5589                 strand = Seq_strand_minus;
5590         
5591         if (StringCmp(prog_name, "blastp") == 0
5592             || StringCmp(prog_name, "tblastn") == 0
5593             ||  StringCmp(prog_name, "psitblastn") == 0)
5594         {
5595                 *first_context = 0;
5596                 *last_context = 0;
5597         }
5598         else if (StringCmp(prog_name, "blastx") == 0 || StringCmp(prog_name, "tblastx") == 0)
5599         {
5600                 if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
5601                         *first_context = 0;
5602                 else 
5603                         *first_context = 3;
5604                         
5605                 if (strand == Seq_strand_minus || strand == Seq_strand_both)
5606                         *last_context = 5;
5607                 else
5608                         *last_context = 2;
5609         }
5610         else if (StringCmp(prog_name, "blastn") == 0)
5611         {
5612                 if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
5613                         *first_context = 0;
5614                 else 
5615                         *first_context = 1;
5616                         
5617                 if (strand == Seq_strand_minus || strand == Seq_strand_both)
5618                         *last_context = 1;
5619                 else
5620                         *last_context = 0;
5621         }
5622         return TRUE;
5623 }
5624 
5625 BlastDoubleInt4Ptr 
5626 GetGisFromFile (CharPtr gifile, Int4Ptr gi_list_size)
5627 {
5628     BlastDoubleInt4Ptr retval = NULL;
5629     Int4ListPtr gilist = NULL;
5630     register Int4 i;
5631 
5632     if ( !(gilist = Int4ListReadFromFile(gifile)))
5633         return NULL;
5634 
5635     retval = (BlastDoubleInt4Ptr) MemNew(sizeof(BlastDoubleInt4)*gilist->count);
5636     if (!retval)
5637         return retval;
5638 
5639     if (gi_list_size)
5640         *gi_list_size = gilist->count;
5641 
5642     for (i = 0; i < gilist->count; i++)
5643         retval[i].gi = gilist->i[i];
5644 
5645     gilist = Int4ListFree(gilist);
5646 
5647     return retval;
5648 }
5649 
5650 BlastSearchBlkPtr LIBCALL
5651 BLASTSetUpSearchWithReadDbInternalEx (SeqLocPtr query_slp, BioseqPtr query_bsp,
5652                                       CharPtr prog_name, Int4 qlen, CharPtr
5653                                       dbname, BLAST_OptionsBlkPtr options, int
5654                                       (LIBCALLBACK *callback)PROTO((Int4 done,
5655                                                                     Int4
5656                                                                     positives)),
5657                                       SeqIdPtr seqid_list, BlastDoubleInt4Ptr
5658                                       gi_list, Int4 gi_list_total, ReadDBFILEPtr
5659                                       rdfp)
5660 {
5661    if (options->is_megablast_search)
5662       return MegaBlastSetUpSearchWithReadDbInternal(query_slp, query_bsp,
5663                                                     prog_name, 0,
5664                                                     dbname, options, callback,
5665                                                     seqid_list, gi_list,
5666                                                     gi_list_total, rdfp);
5667    else
5668       return BLASTSetUpSearchWithReadDbInternal(query_slp, query_bsp,
5669                                                 prog_name, qlen,
5670                                                 dbname, options, callback,
5671                                                 seqid_list, gi_list,
5672                                                 gi_list_total, rdfp);
5673 }
5674 
5675 
5676 BlastSearchBlkPtr
5677 BLASTSetUpSearchWithReadDbInternal (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, ReadDBFILEPtr rdfp)
5678 {
5679         return BLASTSetUpSearchWithReadDbInternalMult(query_slp, query_bsp, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, rdfp, NULL);
5680 }
5681 
5682 
5683 /**
5684  * Calculate the hitlist size for preliminary alignments with a single
5685  * query, i.e. all but the final alignment with traceback.  This size
5686  * is generally somewhat larger than the final hitlist size because:
5687  * - the final alignment is the most sensitive, and may improve the
5688  *   score of alignments that would not otherwise be reported; and
5689  * - when composition-based statitics is used, many hits may be
5690  *   dropped in the final phase
5691  */
5692 Int4
5693 BlastSingleQueryResultSize(BLAST_OptionsBlkPtr options)
5694 {
5695     Int4 result_size = /* size to be returned */
5696         options->hitlist_size;
5697 
5698     if (options->tweak_parameters) {
5699         /* Composition based statistics are being used. */
5700         result_size *= 2;
5701     }
5702     if ((options->is_megablast_search && options->no_traceback) ||
5703         (!options->is_megablast_search && options->gapped_calculation)) {
5704         /* This search uses preliminary alignments before the final
5705          * gapped calculation with traceback; increase the results
5706          * size. */
5707         result_size = MIN(2*result_size, result_size + 50);
5708     }
5709     return result_size;
5710 }
5711 
5712 
5713 BlastSearchBlkPtr
5714 BLASTSetUpSearchWithReadDbInternalMult (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, ReadDBFILEPtr rdfp, QueriesPtr mult_queries)
5715 /* --KM added mult_queries param */
5716 
5717 {
5718 
5719     BlastSearchBlkPtr search = NULL;
5720     Boolean multiple_hits, options_alloc=FALSE;
5721     Int2 status, first_context = 0, last_context = 0;
5722     Int8        dblen = 0;
5723     Int4        query_length = 0;
5724     Nlm_FloatHi searchsp_eff = 0;
5725     Int4        hitlist_size = 0;
5726     Int4 i = 0; /* AM: Query multiplexing. */
5727 
5728     /* Allocate default options if none are allocated yet. */
5729     if (options == NULL) {
5730         options = BLASTOptionNew(prog_name, FALSE);
5731         options_alloc = TRUE;
5732     }
5733     
5734     multiple_hits = options->multiple_hits_only;
5735     /*
5736     if (options->window_size != 0)
5737         multiple_hits = TRUE;
5738     else
5739         multiple_hits = FALSE;
5740     */
5741     BlastGetFirstAndLastContext(prog_name, query_slp, &first_context, &last_context, options->strand_option);
5742     
5743     if (query_slp)
5744         query_length = SeqLocLen(query_slp);
5745     else
5746         query_length = query_bsp->length;
5747 
5748     hitlist_size = BlastSingleQueryResultSize(options);
5749 
5750     /* AM: Query multiplexing */
5751     if( mult_queries )
5752     {
5753       for( i = 0; i < mult_queries->NumQueries; ++i )
5754         mult_queries->result_info[i].results 
5755           = (BLASTResultHitlistPtr *)MemNew( 
5756               (hitlist_size + 1)*sizeof( BLASTResultHitlistPtr ) );
5757 
5758       mult_queries->max_results_per_query = hitlist_size;
5759       hitlist_size *= mult_queries->NumQueries;
5760     }
5761     
5762     /* On the first call query length is used for the subject length. */
5763     search = BlastSearchBlkNewExtra(options->wordsize, query_length, dbname, multiple_hits, 0, options->threshold_second, hitlist_size, prog_name, NULL, first_context, last_context, rdfp, options->window_size);
5764     
5765     if (search) {
5766        readdb_get_totals_ex(search->rdfp, &(dblen), &(search->dbseq_num), TRUE);
5767 
5768        if (!options->ignore_gilist)
5769        {
5770            Boolean looking_for_gis = FALSE;
5771            /* Create virtual database if any of the databases have gi lists or 
5772               ordinal id masks, or if gi list is provided from options */
5773            looking_for_gis = BlastProcessGiLists(search, options, gi_list, gi_list_total);
5774 
5775            /* search->thr_info->blast_gi_list will be non-NULL if gi_list or 
5776             * options->gilist or options->gifile was non-NULL and therefore
5777             * intersected with any oidlists in the search->rdfp(s). If this is the
5778             * case, we need to recalculate the database length and number of
5779             * sequences */
5780            if (search->thr_info->blast_gi_list && !options->use_real_db_size)
5781                readdb_get_totals_ex3(search->rdfp, &dblen, &search->dbseq_num,
5782                                  FALSE, TRUE, eApproximate);
5783 
5784            if (looking_for_gis && search->thr_info->blast_gi_list == NULL)
5785            {
5786                ErrPostEx(SEV_WARNING, 0, 0, "Intersection of gilist and BLAST database ID's empty");
5787                search->query_invalid = TRUE;
5788            }
5789        }
5790 
5791         /* command-line/options trump alias file. */
5792         if (options->db_length > 0)
5793            dblen = options->db_length;
5794         if (options->dbseq_num > 0)
5795             search->dbseq_num = options->dbseq_num;
5796         if (options->searchsp_eff > 0)
5797             searchsp_eff = options->searchsp_eff;
5798 
5799         if (StringCmp(prog_name, "tblastn") == 0 ||
5800             StringCmp(prog_name, "tblastx") == 0 ||
5801             StringCmp(prog_name, "psitblastn") == 0) {
5802             dblen /= 3;
5803             searchsp_eff /= 3.0;
5804         }
5805         search->dblen = dblen;
5806         if (options->db_length > 0)
5807            search->dblen_eff = dblen;
5808         search->searchsp_eff = searchsp_eff;
5809         /* AM: Moved next two lines here to be able to use mult_queries 
5810                in BLASTSetUpSearchInternalByLoc() */
5811         /* --KM put mult_queries, from Main, into the search structure */
5812         search->mult_queries = mult_queries;
5813         status = BLASTSetUpSearchInternalByLoc (search, query_slp, query_bsp, prog_name, qlen, options, callback);
5814         if (status != 0) {
5815             ErrPostEx(SEV_WARNING, 0, 0, "SetUpBlastSearch failed.");
5816             search->query_invalid = TRUE;
5817         }
5818 
5819         if (search->pbp->mb_params) 
5820             search = GreedyAlignMemAlloc(search);
5821         else 
5822             search->abmp = NULL;
5823         
5824         if (search->rdfp->parameters & READDB_CONTENTS_ALLOCATED)
5825             search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp);
5826     }
5827     
5828     if (options_alloc)
5829         options = BLASTOptionDelete(options);
5830     
5831     return search;
5832 }
5833 
5834 /*
5835         Performs setup for a BLAST search.  This function must be used
5836         with a search file accessed through readdb.
5837 */
5838 
5839 BlastSearchBlkPtr LIBCALL 
5840 BLASTSetUpSearchWithReadDb(BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
5841 
5842 {
5843         return BLASTSetUpSearchWithReadDbInternal(NULL, query_bsp, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL);
5844 }
5845 
5846 BlastSearchBlkPtr LIBCALL 
5847 BLASTSetUpSearchWithReadDbEx(BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
5848 
5849 {
5850         return BLASTSetUpSearchWithReadDbInternal (NULL, query_bsp, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL);
5851 }
5852 
5853 /*
5854         Performs setup for a BLAST search.  This function must be used
5855         with a search file accessed through readdb.
5856 */
5857 
5858 BlastSearchBlkPtr LIBCALL 
5859 BLASTSetUpSearchByLocWithReadDb(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
5860 
5861 {
5862         return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL, NULL);
5863         /* --KM pass NULL mult_queries */
5864 }
5865 
5866 
5867 BlastSearchBlkPtr LIBCALL 
5868 BLASTSetUpSearchByLocWithReadDbEx(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, QueriesPtr mult_queries)  
5869 /* --KM added mult_queries param */
5870 
5871 {
5872         return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL, mult_queries);
5873         /* --KM pass mult_queries */
5874 }
5875 static BlastSearchBlkPtr
5876 BLASTSetUpSearchEx (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
5877 
5878 {
5879         BlastSearchBlkPtr search;
5880         Boolean options_alloc=FALSE, multiple_hits;
5881         Int2 status, first_context, last_context;
5882         Int4 actual_query_length=0;
5883         Nlm_FloatHi searchsp_eff=0;
5884         Int4        hitlist_size;
5885 
5886         /* Allocate default options if no are allocated yet. */
5887         if (options == NULL)
5888         {
5889                 options = BLASTOptionNew(prog_name, FALSE);
5890                 options_alloc = TRUE;
5891         }
5892 
5893         multiple_hits = options->multiple_hits_only;
5894         /*
5895         if (options->window_size != 0)
5896                 multiple_hits = TRUE;
5897         else
5898                 multiple_hits = FALSE;
5899         */
5900         if (query_slp == NULL && query_bsp == NULL)
5901                 return NULL;
5902 
5903         if (query_slp)
5904                 actual_query_length = SeqLocLen(query_slp);
5905         else if (query_bsp)
5906                 actual_query_length = query_bsp->length;
5907 
5908         if (qlen <= 0)
5909         {
5910                 qlen = actual_query_length;
5911         }
5912 
5913         /* If dblen is not set, use qlen. */
5914         if (dblen <= 0)
5915                 dblen = qlen;
5916 
5917         BlastGetFirstAndLastContext(prog_name, query_slp, &first_context, &last_context, options->strand_option);
5918 
5919         hitlist_size = BlastSingleQueryResultSize(options);
5920 
5921         /* On the first call query length is used for the subject length. */
5922         search = BlastSearchBlkNew(options->wordsize, actual_query_length, NULL, multiple_hits, 0, options->threshold_second, hitlist_size, prog_name, all_words, first_context, last_context, options->window_size);
5923 
5924         if (search)
5925         {
5926                 search->subject->length = dblen; 
5927                 /* Options setting overrides parameter. */
5928                 if (options->db_length > 0)
5929                         dblen = options->db_length;
5930                 if (options->searchsp_eff > 0)
5931                         searchsp_eff = options->searchsp_eff;
5932                 if (StringCmp(prog_name, "tblastn") == 0
5933                     || StringCmp(prog_name, "tblastx") == 0
5934                     || StringCmp(prog_name, "psitblastn") == 0)
5935                 {
5936                         dblen /= 3;
5937                         searchsp_eff /= 3.0;
5938                 }
5939                 if (options->dbseq_num > 0)
5940                         search->dbseq_num = options->dbseq_num;
5941                 else
5942                         search->dbseq_num = (Int4) dblen/qlen;
5943         
5944                 if (search->dbseq_num <=0)
5945                         search->dbseq_num = 1;
5946 
5947                 search->dblen = dblen;
5948                 /* If searchsp_eff is > 0 it will be used. */
5949                 search->searchsp_eff = searchsp_eff;
5950                 if (options->is_megablast_search)
5951                    search->pbp->mb_params = MegaBlastParameterBlkNew(options);
5952                 if (search->pbp->mb_params)
5953                    status = MegaBlastSetUpSearchInternalByLoc (search, query_slp, query_bsp, prog_name, qlen, options, callback);
5954                 else
5955                    status = BLASTSetUpSearchInternalByLoc(search, query_slp, query_bsp, prog_name, qlen, options, callback);
5956                 if (status != 0)
5957                 {
5958                         ErrPostEx(SEV_WARNING, 0, 0, "SetUpBlastSearch failed.");
5959                         search->query_invalid = TRUE;
5960                 }
5961 
5962                 if (search->pbp->mb_params) 
5963                         search = GreedyAlignMemAlloc(search);
5964         }
5965 
5966         if (options_alloc)
5967                 options = BLASTOptionDelete(options);
5968 
5969         return search;
5970 }
5971 
5972 /*
5973         Performs necessary setup for a BLAST search.  The arguments are:
5974 
5975          - search: BlastSearchBlkPtr created by BlastSearchBlkNew
5976          - query_bsp: BioseqPtr for the query
5977          - matrix: CharPtr containing the name of the matrix
5978          - prog_name: CharPtr containing name of the program
5979          - qlen: Int4 with length of the query, if a lenght should be
5980                 specified (for statistical calculations); if this argument is
5981                 zero, then query_bsp->length is used.
5982          -dblen: Int8 with length of the database.
5983          - e_cutoff: BLAST_Score specifying the "expect" value.
5984          - number_of_processors: number of processors to use.
5985          - gap_decay_rate: between zero and one, related to prob. of # of HSP's.
5986          - gap_size: largest allowable gap if "small" gaps are used.
5987          - gap_prob: probability of "small" gap model being correct.
5988          - multiple_hits: if TRUE, multiple hits method is used.
5989          - window: window size for multiple hits method
5990          - threshold_second: initial hit threshold for 2nd pass
5991          - discontiguous: should discontiguous words be used?
5992          - old_stats: should the old statistics be used?
5993          - is_prot: is this a protein?
5994 
5995         
5996 */
5997 
5998 BlastSearchBlkPtr LIBCALL 
5999 BLASTSetUpSearch (BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
6000 
6001 {
6002         return BLASTSetUpSearchEx (NULL, query_bsp, prog_name, qlen, dblen, all_words, options, callback);
6003 }
6004 
6005 BlastSearchBlkPtr LIBCALL 
6006 BLASTSetUpSearchByLoc (SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
6007 
6008 {
6009         return BLASTSetUpSearchEx (query_slp, NULL, prog_name, qlen, dblen, all_words, options, callback);
6010 }
6011 
6012 static int LIBCALLBACK
6013 diag_compare_hsps(VoidPtr v1, VoidPtr v2)
6014 {
6015    BLAST_HSPPtr h1, h2;
6016 
6017    h1 = *((BLAST_HSPPtr PNTR) v1);
6018    h2 = *((BLAST_HSPPtr PNTR) v2);
6019    
6020    return (h1->query.offset - h1->subject.offset) - 
6021       (h2->query.offset - h2->subject.offset);
6022 }
6023 
6024 /* 
6025    Shifts all HSP coordinates according to where the partial sequence
6026    started in the large sequence
6027 */
6028 void AdjustOffsetsInBLASTHitList(BLAST_HitListPtr hitlist, Int4 start)
6029 {
6030    Int4 index;
6031    BLAST_HSPPtr hsp;
6032 
6033    for (index=0; index<hitlist->hspcnt; index++) {
6034       hsp = hitlist->hsp_array[index];
6035       hsp->subject.offset += start;
6036       hsp->subject.end += start;
6037       hsp->subject.gapped_start += start;
6038       if (hsp->gap_info)
6039          hsp->gap_info->start2 += start;
6040    }
6041 }
6042 
6043 #define DBSEQ_CHUNK_OVERLAP 100
6044 #define OVERLAP_DIAG_CLOSE 10
6045 /* This is a hard merge, i.e. the two HSPs will be merged only 
6046    if they intersect 
6047 */
6048 static Boolean
6049 BLASTMergeHsps(BLAST_HSPPtr hsp1, BLAST_HSPPtr hsp2, Int4 start)
6050 {
6051    BLASTHSPSegmentPtr segments1, segments2, new_segment1, new_segment2;
6052    GapXEditScriptPtr esp1, esp2, esp;
6053    Int4 end = start + DBSEQ_CHUNK_OVERLAP - 1;
6054    Int4 min_diag, max_diag, num1, num2, dist, next_dist=0;
6055    Int4 diag1_start, diag1_end, diag2_start, diag2_end;
6056    Int4 index;
6057    Uint1 intersection_found;
6058    Uint1 op_type;
6059 
6060    if (!hsp1->gap_info || !hsp2->gap_info) {
6061       /* Assume that this is an ungapped alignment, hence simply compare 
6062          diagonals. Do not merge if they are on different diagonals */
6063       if (diag_compare_hsps(&hsp1, &hsp2) == 0 &&
6064           hsp1->query.end >= hsp2->query.offset) {
6065          hsp1->query.end = hsp2->query.end;
6066          hsp1->subject.end = hsp2->subject.end;
6067          hsp1->query.length = hsp1->query.end - hsp1->query.offset;
6068          hsp1->subject.length = hsp1->subject.end - hsp1->subject.offset;
6069          return TRUE;
6070       } else
6071          return FALSE;
6072    }
6073    /* Find whether these HSPs have an intersection point */
6074    segments1 = (BLASTHSPSegmentPtr) MemNew(sizeof(BLASTHSPSegment));
6075    
6076    esp1 = hsp1->gap_info->esp;
6077    esp2 = hsp2->gap_info->esp;
6078    
6079    segments1->q_start = hsp1->query.offset;
6080    segments1->s_start = hsp1->subject.offset;
6081    while (segments1->s_start < start) {
6082       if (esp1->op_type == GAPALIGN_INS)
6083          segments1->q_start += esp1->num;
6084       else if (segments1->s_start + esp1->num < start) {
6085          if (esp1->op_type == GAPALIGN_SUB) { 
6086             segments1->s_start += esp1->num;
6087             segments1->q_start += esp1->num;
6088          } else if (esp1->op_type == GAPALIGN_DEL)
6089             segments1->s_start += esp1->num;
6090       } else 
6091          break;
6092       esp1 = esp1->next;
6093    }
6094    /* Current esp is the first segment within the overlap region */
6095    segments1->s_end = segments1->s_start + esp1->num - 1;
6096    if (esp1->op_type == GAPALIGN_SUB)
6097       segments1->q_end = segments1->q_start + esp1->num - 1;
6098    else
6099       segments1->q_end = segments1->q_start;
6100    
6101    new_segment1 = segments1;
6102    
6103    for (esp = esp1->next; esp; esp = esp->next) {
6104       new_segment1->next = (BLASTHSPSegmentPtr)
6105          MemNew(sizeof(BLASTHSPSegment));
6106       new_segment1->next->q_start = new_segment1->q_end + 1;
6107       new_segment1->next->s_start = new_segment1->s_end + 1;
6108       new_segment1 = new_segment1->next;
6109       if (esp->op_type == GAPALIGN_SUB) {
6110          new_segment1->q_end += esp->num - 1;
6111          new_segment1->s_end += esp->num - 1;
6112       } else if (esp->op_type == GAPALIGN_INS) {
6113          new_segment1->q_end += esp->num - 1;
6114          new_segment1->s_end = new_segment1->s_start;
6115       } else {
6116          new_segment1->s_end += esp->num - 1;
6117          new_segment1->q_end = new_segment1->q_start;
6118       }
6119    }
6120    
6121    /* Now create the second segments list */
6122    
6123    segments2 = (BLASTHSPSegmentPtr) MemNew(sizeof(BLASTHSPSegment));
6124    segments2->q_start = hsp2->query.offset;
6125    segments2->s_start = hsp2->subject.offset;
6126    segments2->q_end = segments2->q_start + esp2->num - 1;
6127    segments2->s_end = segments2->s_start + esp2->num - 1;
6128    
6129    new_segment2 = segments2;
6130    
6131    for (esp = esp2->next; esp && new_segment2->s_end < end; 
6132         esp = esp->next) {
6133       new_segment2->next = (BLASTHSPSegmentPtr)
6134          MemNew(sizeof(BLASTHSPSegment));
6135       new_segment2->next->q_start = new_segment2->q_end + 1;
6136       new_segment2->next->s_start = new_segment2->s_end + 1;
6137       new_segment2 = new_segment2->next;
6138       if (esp->op_type == GAPALIGN_INS) {
6139          new_segment2->s_end = new_segment2->s_start;
6140          new_segment2->q_end = new_segment2->q_start + esp->num - 1;
6141       } else if (esp->op_type == GAPALIGN_DEL) {
6142          new_segment2->s_end = new_segment2->s_start + esp->num - 1;
6143          new_segment2->q_end = new_segment2->q_start;
6144       } else if (esp->op_type == GAPALIGN_SUB) {
6145          new_segment2->s_end = new_segment2->s_start + esp->num - 1;
6146          new_segment2->q_end = new_segment2->q_start + esp->num - 1;
6147       }
6148    }
6149    
6150    new_segment1 = segments1;
6151    new_segment2 = segments2;
6152    intersection_found = 0;
6153    num1 = num2 = 0;
6154    while (new_segment1 && new_segment2 && !intersection_found) {
6155       if (new_segment1->s_end < new_segment2->s_start || 
6156           new_segment1->q_end < new_segment2->q_start) {
6157          new_segment1 = new_segment1->next;
6158          num1++;
6159          continue;
6160       }
6161       if (new_segment2->s_end < new_segment1->s_start || 
6162           new_segment2->q_end < new_segment1->q_start) {
6163          new_segment2 = new_segment2->next;
6164          num2++;
6165          continue;
6166       }
6167       diag1_start = new_segment1->s_start - new_segment1->q_start;
6168       diag2_start = new_segment2->s_start - new_segment2->q_start;
6169       diag1_end = new_segment1->s_end - new_segment1->q_end;
6170       diag2_end = new_segment2->s_end - new_segment2->q_end;
6171       
6172       if (diag1_start == diag1_end && diag2_start == diag2_end &&  
6173           diag1_start == diag2_start) {
6174          /* Both segments substitutions, on same diagonal */
6175          intersection_found = 1;
6176          dist = new_segment2->s_end - new_segment1->s_start + 1;
6177          break;
6178       } else if (diag1_start != diag1_end && diag2_start != diag2_end) {
6179          /* Both segments gaps - must intersect */
6180          intersection_found = 3;
6181 
6182          dist = new_segment2->s_end - new_segment1->s_start + 1;
6183          op_type = GAPALIGN_INS;
6184          next_dist = new_segment2->q_end - new_segment1->q_start - dist + 1;
6185          if (new_segment2->q_end - new_segment1->q_start < dist) {
6186             dist = new_segment2->q_end - new_segment1->q_start + 1;
6187             op_type = GAPALIGN_DEL;
6188             next_dist = new_segment2->s_end - new_segment1->s_start - dist + 1;
6189          }
6190          break;
6191       } else if (diag1_start != diag1_end) {
6192          max_diag = MAX(diag1_start, diag1_end);
6193          min_diag = MIN(diag1_start, diag1_end);
6194          if (diag2_start >= min_diag && diag2_start <= max_diag) {
6195             intersection_found = 2;
6196             dist = diag2_start - min_diag + 1;
6197             if (new_segment1->s_end == new_segment1->s_start)
6198                next_dist = new_segment2->s_end - new_segment1->s_end + 1;
6199             else
6200                next_dist = new_segment2->q_end - new_segment1->q_end + 1;
6201             break;
6202          }
6203       } else if (diag2_start != diag2_end) {
6204          max_diag = MAX(diag2_start, diag2_end);
6205          min_diag = MIN(diag2_start, diag2_end);
6206          if (diag1_start >= min_diag && diag1_start <= max_diag) {
6207             intersection_found = 2;
6208             next_dist = max_diag - diag1_start + 1;
6209             if (new_segment2->s_end == new_segment2->s_start)
6210                dist = new_segment2->s_start - new_segment1->s_start + 1;
6211             else
6212                dist = new_segment2->q_start - new_segment1->q_start + 1;
6213             break;
6214          }
6215       }
6216       if (new_segment1->s_end <= new_segment2->s_end) {
6217          new_segment1 = new_segment1->next;
6218          num1++;
6219       } else {
6220          new_segment2 = new_segment2->next;
6221          num2++;
6222       }
6223    }
6224 
6225    if (intersection_found) {
6226       esp = NULL;
6227       for (index = 0; index < num1-1; index++)
6228          esp1 = esp1->next;
6229       for (index = 0; index < num2-1; index++) {
6230          esp = esp2;
6231          esp2 = esp2->next;
6232       }
6233       if (intersection_found < 3) {
6234          if (num1 > 0)
6235             esp1 = esp1->next;
6236          if (num2 > 0) {
6237             esp = esp2;
6238             esp2 = esp2->next;
6239          }
6240       }
6241       switch (intersection_found) {
6242       case 1:
6243          esp1->num = dist;
6244          esp1->next = esp2->next;
6245          esp2->next = NULL;
6246          break;
6247       case 2:
6248          esp1->num = dist;
6249          esp2->num = next_dist;
6250          esp1->next = esp2;
6251          if (esp)
6252             esp->next = NULL;
6253          break;
6254       case 3:
6255          esp1->num += dist;
6256          esp2->op_type = op_type;
6257          esp2->num = next_dist;
6258          esp1->next = esp2;
6259          if (esp)
6260             esp->next = NULL;
6261          break;
6262       default: break;
6263       }
6264       hsp1->query.end = hsp2->query.end;
6265       hsp1->subject.end = hsp2->subject.end;
6266       hsp1->query.length = hsp1->query.end - hsp1->query.offset;
6267       hsp1->subject.length = hsp1->subject.end - hsp1->subject.offset;
6268    }
6269 
6270    return (Boolean) intersection_found;
6271 }
6272 
6273 static Boolean BLASTHspContained(BLAST_HSPPtr hsp1, BLAST_HSPPtr hsp2)
6274 {
6275    Boolean hsp_start_is_contained=FALSE, hsp_end_is_contained=FALSE;
6276 
6277    if (hsp1->score > hsp2->score || 
6278        SIGN(hsp2->query.frame) != SIGN(hsp1->query.frame) || 
6279        SIGN(hsp2->subject.frame) != SIGN(hsp1->subject.frame))
6280       return FALSE;
6281 
6282    if (CONTAINED_IN_HSP(hsp2->query.offset, hsp2->query.end, hsp1->query.offset, hsp2->subject.offset, hsp2->subject.end, hsp1->subject.offset) == TRUE) {
6283       hsp_start_is_contained = TRUE;
6284    }
6285    if (CONTAINED_IN_HSP(hsp2->query.offset, hsp2->query.end, hsp1->query.end, hsp2->subject.offset, hsp2->subject.end, hsp1->subject.end) == TRUE) {
6286       hsp_end_is_contained = TRUE;
6287    }
6288    
6289    return (hsp_start_is_contained && hsp_end_is_contained);
6290 }
6291 
6292 /*
6293   Merges the hits from different chunks of the subject sequence that
6294   have been searched separately
6295 */
6296 static BLAST_HitListPtr
6297 BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1, 
6298                    BLAST_HitListPtr hitlist2, Int4 start, Boolean merge_hsps)
6299 {
6300    BLAST_HSPPtr hsp, hsp_var, PNTR hspp1, PNTR hspp2;
6301    Int4 index, index1, index2;
6302    Int4 hspcnt1, hspcnt2, new_hspcnt = 0;
6303    BLAST_HSPPtr PNTR new_hsp_array;
6304 
6305    if (hitlist1 == NULL) {
6306       hitlist1 = (BLAST_HitListPtr)
6307          MemDup(hitlist2, sizeof(BLAST_HitList));
6308       hitlist1->hsp_array = (BLAST_HSPPtr PNTR) 
6309          MemNew(hitlist2->hspmax*sizeof(BLAST_HSPPtr));
6310       MemCpy(hitlist1->hsp_array, hitlist2->hsp_array, 
6311              hitlist2->hspcnt*sizeof(BLAST_HSPPtr));
6312       return hitlist1;
6313    } else {
6314       /* In case these have changed */
6315       hitlist1->exact_match_array = hitlist2->exact_match_array;
6316       hitlist1->exact_match_max = hitlist2->exact_match_max;
6317    }
6318 
6319    hspcnt1 = hspcnt2 = 0;
6320 
6321    /* Put all HSPs that intersect the overlap region at the front of the
6322       respective HSP arrays. */
6323    for (index = 0; index < hitlist1->hspcnt; index++) {
6324       hsp = hitlist1->hsp_array[index];
6325       if (hsp->subject.end > start) {
6326          /* At least part of this HSP lies in the overlap strip. */
6327          hsp_var = hitlist1->hsp_array[hspcnt1];
6328          hitlist1->hsp_array[hspcnt1] = hsp;
6329          hitlist1->hsp_array[index] = hsp_var;
6330          ++hspcnt1;
6331       }
6332    }
6333    for (index = 0; index < hitlist2->hspcnt; index++) {
6334       hsp = hitlist2->hsp_array[index];
6335       if (hsp->subject.offset < start + DBSEQ_CHUNK_OVERLAP) {
6336          /* At least part of this HSP lies in the overlap strip. */
6337          hsp_var = hitlist2->hsp_array[hspcnt2];
6338          hitlist2->hsp_array[hspcnt2] = hsp;
6339          hitlist2->hsp_array[index] = hsp_var;
6340          ++hspcnt2;
6341       }
6342    }
6343    hspp1 = hitlist1->hsp_array;
6344    hspp2 = hitlist2->hsp_array;
6345 
6346    HeapSort(hspp1, hspcnt1, sizeof(BLAST_HSPPtr), diag_compare_hsps);
6347    HeapSort(hspp2, hspcnt2, sizeof(BLAST_HSPPtr), diag_compare_hsps);
6348 
6349    for (index=0; index<hspcnt1; index++) {
6350       for (index1=0; index1<hspcnt2; index1++) {
6351          if (hspp2[index1] && 
6352              hspp2[index1]->query.frame == hspp1[index]->query.frame &&
6353              hspp2[index1]->subject.frame == hspp1[index]->subject.frame &&
6354              ABS(diag_compare_hsps(&hspp1[index], &hspp2[index1])) < 
6355              OVERLAP_DIAG_CLOSE) {
6356             if (merge_hsps) {
6357                if (BLASTMergeHsps(hspp1[index], hspp2[index1], start)) {
6358                   /* Free the second HSP. */
6359                   hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
6360                }
6361             } else { /* No gap information available */
6362                if (BLASTHspContained(hspp1[index], hspp2[index1])) {
6363                   /* Point the first HSP to the new HSP; */
6364                   hspp1[index] = BLAST_HSPFree(hspp1[index]);
6365                   hspp1[index] = hspp2[index1];
6366                   hspp2[index1] = NULL;
6367                   /* This HSP has been removed, so break out of the inner 
6368                      loop */
6369                   break;
6370                } else if (BLASTHspContained(hspp2[index1], hspp1[index])) {
6371                   hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
6372                }
6373             }
6374          } else {
6375             /* This and remaining HSPs are too far from the one being 
6376                checked */
6377             break;
6378          }
6379       }
6380    }
6381 
6382    HspArrayPurge(hitlist2->hsp_array, hitlist2->hspcnt, FALSE);
6383 
6384    /* The new number of HSPs is now the sum of the remaining counts in the 
6385       two lists, but if there is a restriction on the number of HSPs to keep,
6386       it might have to be reduced. */
6387    new_hspcnt = hitlist2->hspcnt + hitlist1->hspcnt;
6388    if (search->pbp->hsp_num_max)
6389       new_hspcnt = MIN(new_hspcnt, search->pbp->hsp_num_max);
6390    
6391    if (new_hspcnt >= hitlist1->hspmax-1 && hitlist1->do_not_reallocate == FALSE) {
6392       Int4 new_allocated = 2*new_hspcnt;
6393       if (search->pbp->hsp_num_max)
6394          new_allocated = MIN(new_allocated, search->pbp->hsp_num_max);
6395       new_hsp_array = (BLAST_HSPPtr PNTR) 
6396          Realloc(hitlist1->hsp_array, new_allocated*sizeof(BLAST_HSPPtr));
6397       if (new_hsp_array == NULL) {
6398          ErrPostEx(SEV_WARNING, 0, 0, "UNABLE to reallocate in BlastSaveCurrentHsp for ordinal id %ld, continuing with fixed array of %ld HSP's", (long) search->subject_id, (long) hitlist1->hspmax);
6399          hitlist1->do_not_reallocate = TRUE; 
6400       } else {
6401          hitlist1->hsp_array = new_hsp_array;
6402          hitlist1->hspmax = new_allocated;
6403       }
6404       new_hspcnt = MIN(new_hspcnt, hitlist1->hspmax);
6405    }
6406 
6407    if (new_hspcnt >= hitlist2->hspcnt + hitlist1->hspcnt) {
6408       /* All HSPs from both arrays are saved */
6409       for (index=hitlist1->hspcnt, index1=0; 
6410            index1<hitlist2->hspcnt; index1++) {
6411          if (hitlist2->hsp_array[index1] != NULL)
6412             hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
6413       }
6414    } else {
6415       /* Not all HSPs are be saved; sort both arrays by score and save only
6416          the new_hspcnt best ones. 
6417          For the merged set of HSPs, allocate array the same size as in the 
6418          old HSP list. */
6419       new_hsp_array = (BLAST_HSP**) 
6420          malloc(hitlist1->hspmax*sizeof(BLAST_HSP*));
6421       HeapSort(hitlist1->hsp_array, hitlist1->hspcnt, 
6422                sizeof(BLAST_HSP*), score_compare_hsps);
6423       HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSP*),
6424                score_compare_hsps);
6425       index1 = index2 = 0;
6426       for (index = 0; index < new_hspcnt; ++index) {
6427          if (index1 < hitlist1->hspcnt &&
6428              (index2 >= hitlist2->hspcnt ||
6429              (hitlist1->hsp_array[index1]->score >= 
6430              hitlist2->hsp_array[index2]->score))) {
6431             new_hsp_array[index] = hitlist1->hsp_array[index1];
6432             ++index1;
6433          } else {
6434             new_hsp_array[index] = hitlist2->hsp_array[index2];
6435             ++index2;
6436          }
6437       }
6438       /* Free the extra HSPs that could not be saved */
6439       for ( ; index1 < hitlist1->hspcnt; ++index1) {
6440          hitlist1->hsp_array[index1] = 
6441             BLAST_HSPFree(hitlist1->hsp_array[index1]);
6442       }
6443       for ( ; index2 < hitlist2->hspcnt; ++index2) {
6444          hitlist2->hsp_array[index2] = 
6445             BLAST_HSPFree(hitlist2->hsp_array[index2]);
6446       }
6447       /* Point hitlist1's HSP array to the new one */
6448       hitlist1->hsp_array = (BLAST_HSP**) MemFree(hitlist1->hsp_array);
6449       hitlist1->hsp_array = new_hsp_array;
6450    }
6451    
6452    hitlist1->hspcnt = index;
6453    /* Second HSP list now does not own any HSPs */
6454    hitlist2->hspcnt = 0;
6455 
6456    return hitlist1;
6457 }
6458 
6459 /* Remove HSPs that do not touch the overlap region and have initial evalue 
6460    estimate more than 10 times higher than the cutoff.
6461 */
6462 static BlastSearchBlkPtr 
6463 BlastReapPartialHitlistByEvalue(BlastSearchBlkPtr search, Int4 start)
6464 {
6465    BLAST_HSPPtr hsp;
6466    Int4 index, hspcnt;
6467    FloatHi searchsp_eff;
6468    BLAST_KarlinBlkPtr PNTR kbp;
6469    Int4 context;
6470    Uint4 query_num; /* AM: Support for query concatenation. */
6471 
6472    if (search->pbp->gapped_calculation)
6473       kbp = search->sbp->kbp_gap;
6474    else
6475       kbp = search->sbp->kbp;
6476 
6477    hspcnt = search->current_hitlist->hspcnt;
6478    for (index=0; index<hspcnt; index++) {
6479       hsp = search->current_hitlist->hsp_array[index];
6480       
6481       if (hsp->subject.offset > start + DBSEQ_CHUNK_OVERLAP) {
6482          if (search->pbp->mb_params)
6483             context = BinarySearchInt4(hsp->query.offset,
6484                                        search->query_context_offsets, 
6485                                        (Int4) (search->last_context+1));
6486          else
6487             context = (Int4) hsp->context;
6488 
6489             /* AM: Changed to support query concatenation. */
6490             if( !search->mult_queries )
6491               searchsp_eff = (FloatHi) search->dblen_eff *
6492                              (FloatHi) search->context[context].query->effective_length;
6493             else
6494             {
6495               query_num = GetQueryNum( search->mult_queries,
6496                                        hsp->query.offset,
6497                                        hsp->query.end,
6498                                        hsp->query.frame );
6499               searchsp_eff = search->mult_queries->SearchSpEff[query_num];
6500             }
6501          
6502          if (kbp[context]) {
6503             /* kbp[context] == NULL means that this alignment has been 
6504                extended across the boundary between different query sequences.
6505                Leave it like this for now */
6506             hsp->evalue = BlastKarlinStoE_simple(hsp->score, kbp[context], 
6507                                                  searchsp_eff);
6508 
6509             if (hsp->evalue > 10*search->pbp->cutoff_e) {
6510                hsp = BLAST_HSPFree(hsp);
6511                search->current_hitlist->hsp_array[index] = NULL;
6512             }
6513          }
6514       }
6515    }
6516    search->current_hitlist->hspcnt = 
6517       HspArrayPurge(search->current_hitlist->hsp_array, hspcnt, FALSE);
6518    return search;
6519 }
6520 
6521 /*
6522         Performs a BLAST search using a sequence from obtained from readdb.
6523 */
6524 Int2 LIBCALL
6525 BLASTPerformSearchWithReadDb (BlastSearchBlkPtr search, Int4 sequence_number)
6526 
6527 {
6528         Int4 subject_length;
6529         Uint1Ptr subject_seq=NULL;
6530         
6531         /* This mutex should not be necessary - readdb seems to have
6532          * synchronization issues when dealing with multiple volumes
6533          * from multiple threads.  This mutex fixes the symptom. */
6534         
6535         static int init_mutex = 0;
6536         static TNlmMutex wrap_readdb_mutex = 0;
6537         
6538         if (! init_mutex) {
6539             init_mutex++;
6540             NlmMutexInit(& wrap_readdb_mutex);
6541         }
6542         
6543         NlmMutexLock(wrap_readdb_mutex);
6544         
6545         subject_length = readdb_get_sequence(search->rdfp, sequence_number, &subject_seq);
6546         
6547         NlmMutexUnlock(wrap_readdb_mutex);
6548         
6549         search->dblen_eff_real += MAX(subject_length-search->length_adjustment, 1);
6550         search->subject_id = sequence_number;
6551 
6552         return BLASTPerformSearch(search, subject_length, subject_seq); 
6553 }
6554 
6555 /*
6556         Performs a BLAST search with a subject sequence that is passed in.
6557         Used when an entire database is being scanned (by 
6558         BLASTPerformSearchWithReadDb) and when only two seqs are being
6559         compared.
6560 */
6561 Int2 LIBCALL
6562 BLASTPerformSearch (BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq)
6563 
6564 {
6565         Int2 status;
6566 
6567         if (search->pbp->two_pass_method)
6568         {
6569                 status = BLASTPerform2PassSearch(search, subject_length, subject_seq);
6570         }
6571         else
6572         {
6573                 status = BLASTPerformFinalSearch(search, subject_length, subject_seq);
6574         }
6575         
6576         return status;
6577 }
6578 
6579 /*
6580 
6581         Performs a BLAST search using the two-pass method: the first pass
6582         looks for multiple initial hits and then performs a second pass
6583         (with single hits extended) wiht a lower T value.
6584 
6585          Arguments are:
6586 
6587          - search: BlastSearchBlkPtr returned by SetUpBlastSearch, call
6588                 SetUpBlastSearch before calling this function.
6589          - sequence_number: number assigned to sequence (by user).  The
6590                 "readdb" library uses this number to access the sequence.
6591                 This number should be zero if it's not important.
6592          - subject_length: the length of the database sequence (not the length
6593                 allocated in *subject_seq).
6594          - subject_seq: CharPtr pointing to the sequence.
6595 
6596         NOTE: static variables in PerformBlastSearch for subject_seq and 
6597         allocated_length are not an option as they can't be deallocated 
6598         after the last call and they are NOT MP-safe.
6599 */
6600 
6601 Int2 LIBCALL 
6602 BLASTPerform2PassSearch (BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq)
6603 
6604 {
6605         Int2 outer_frame, outer_frame_max, status, outer_frame_min;
6606         Int4 prot_length;
6607         Uint1Ptr prot_seq;
6608 
6609         search->current_hitlist_purge = TRUE; /* The default. */
6610         outer_frame_max = 1;
6611 
6612         if (StringCmp(search->prog_name, "tblastn") == 0
6613             || StringCmp(search->prog_name, "tblastx") == 0
6614             || StringCmp(search->prog_name, "psitblastn") == 0)
6615         {
6616                 outer_frame_min = -3;
6617                 outer_frame_max = 3;
6618         }
6619         else
6620         {
6621                 outer_frame_min = 0;
6622                 outer_frame_max = 0;
6623         }
6624 
6625         for (outer_frame=outer_frame_min; outer_frame<=outer_frame_max; outer_frame++)
6626         {
6627                 search->subject->frame = outer_frame;
6628                if (StringCmp("tblastn", search->prog_name) == 0
6629                     || StringCmp("tblastx", search->prog_name) == 0
6630                     || StringCmp("psitblastn", search->prog_name) == 0)
6631                 {
6632                         if (outer_frame == 0)
6633                                 continue;
6634                         prot_seq = search->translation_buffer;
6635                         prot_length = BlastTranslateUnambiguousSequence(search, subject_length, prot_seq, subject_seq, outer_frame);
6636 
6637                         if(search->pbp->is_rps_blast) {
6638                             /* SEG Filtering of query DNA sequence */
6639                             
6640                             rpsBlastFilterSequence(search, outer_frame,
6641                                                    prot_seq, prot_length,
6642                                                    subject_length);
6643                         }
6644                         
6645                         BlastSequenceAddSequence(search->subject, NULL, prot_seq, prot_length, subject_length, 0);
6646                 }
6647                 else
6648                 {
6649                         BlastSequenceAddSequence(search->subject, NULL, subject_seq-1, subject_length, subject_length, 0);
6650                 }
6651 
6652                 search->prelim = TRUE;
6653                 search->wfp = search->wfp_first;
6654 
6655 /* First pass with multiple hits. */
6656                 status = BlastExtendWordSearch(search, TRUE);
6657         /* status = 0 means NO significant matches found on first pass.*/
6658                 if (status > 0)
6659                 {       /* Match found on initial pass, DO second pass. */
6660                         status = BLASTPerformFinalSearch(search, subject_length, subject_seq);
6661                         break;
6662                 }
6663                 else
6664                 { /* NULL out the sequence to prevent unintentional FREE's
6665                         (it's in "*subject_seq"), but delete the descriptor. */
6666                         search->subject->sequence = NULL; 
6667                 }
6668 
6669                 if (status < 0)
6670                 {               /* Error */
6671                         ErrPostEx(SEV_FATAL, 1, 0, "BlastExtendWordSearch returned non-zero status");
6672                         return 1;
6673                 }
6674         }
6675 
6676 /* NULL out the sequence, leave in the proper length which is still needed
6677 for the significance evaluation. */
6678         search->subject->length = subject_length;
6679         search->subject->sequence = NULL;
6680         search->subject->sequence_start = NULL;
6681 
6682         return 0;
6683 }
6684 
6685 /*
6686 
6687         Performs a BLAST search using the two-pass method: the first pass
6688         looks for multiple initial hits and then performs a second pass
6689         (with single hits extended) wiht a lower T value.
6690 
6691          Arguments are:
6692 
6693          - search: BlastSearchBlkPtr returned by SetUpBlastSearch, call
6694                 SetUpBlastSearch before calling this function.
6695          - sequence_number: number assigned to sequence (by user).  The
6696                 "readdb" library uses this number to access the sequence.
6697                 This number should be zero if it's not important.
6698          - subject_length: the length of the database sequence (not the length
6699                 allocated in *subject_seq).
6700          - subject_seq: CharPtr pointing to the sequence.
6701 
6702         NOTE: static variables in PerformBlastSearch for subject_seq and 
6703         allocated_length are not an option as they can't be deallocated 
6704         after the last call and they are NOT MP-safe.
6705 */
6706 
6707 Int2 LIBCALL 
6708 BLASTPerformFinalSearch (BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq)
6709 
6710 {
6711     BLAST_HitListPtr current_hitlist, hitlist = NULL;
6712     Int2 inner_frame, inner_frame_max, inner_frame_min, status;
6713     Int4 real_length, length, start = 0, num_chunks, index;
6714     Uint1Ptr prot_seq;
6715     
6716     BlastHitListPurge(search->current_hitlist);
6717     if (subject_length == 0)
6718        /* Normal return */
6719         return 0;
6720 
6721     BlastSequenceAddSequence(search->subject, NULL, subject_seq-1, subject_length, subject_length, 0);
6722     search->current_hitlist_purge = TRUE; /* The default. */
6723     inner_frame_max = 1;
6724     if (search->prog_number == blast_type_tblastn
6725         || search->prog_number == blast_type_tblastx
6726         || search->prog_number == blast_type_psitblastn) {
6727         inner_frame_min = -3;
6728         inner_frame_max = 3;
6729     } else if (search->prog_number == blast_type_blastn) {
6730         inner_frame_min = 1;
6731         inner_frame_max = 1;
6732     } else {
6733         inner_frame_min = 0;
6734         inner_frame_max = 0;
6735     }
6736 
6737     /* Match found on initial pass, DO second pass. */
6738     for (inner_frame=inner_frame_min; inner_frame<=inner_frame_max; inner_frame++) {
6739         search->subject->frame = inner_frame;
6740         if (search->prog_number == blast_type_tblastn
6741             || search->prog_number == blast_type_tblastx
6742             || search->prog_number == blast_type_psitblastn) {
6743             if (inner_frame == inner_frame_min) /* Purge on 1st call. */
6744                 search->current_hitlist_purge = TRUE; 
6745             else
6746                 search->current_hitlist_purge = FALSE; 
6747             if (inner_frame == 0)
6748                 continue;
6749             start = 0;
6750             prot_seq = search->translation_buffer;
6751             real_length = BlastTranslateUnambiguousSequence(search, subject_length, prot_seq, subject_seq, inner_frame);
6752             
6753             if(search->pbp->is_rps_blast) {
6754                 /* SEG Filtering of query DNA sequence */
6755                 
6756                 rpsBlastFilterSequence(search, inner_frame,
6757                                        prot_seq, real_length,
6758                                        subject_length);
6759             }
6760             
6761             /* subject seq stays the same, except for tblast[nx]. */
6762             BlastSequenceAddSequence(search->subject, NULL, prot_seq, real_length, subject_length, 0);
6763             if (real_length == 0)
6764                 continue;
6765         } else 
6766            real_length = subject_length;
6767         
6768         search->prelim = FALSE;
6769         /* Calculate some cutoff scores, these depend upon the seq lengths.*/
6770         /* For blastn  and gapped calc. use the cutoff's originally found. */
6771         if (!search->pbp->gapped_calculation && 
6772             search->prog_number != blast_type_blastn) {
6773             CalculateSecondCutoffScore(search, search->subject->length, &search->pbp->ignore_small_gaps, &search->pbp->cutoff_s_second, &search->pbp->cutoff_big_gap);
6774         }
6775         
6776 #ifdef BLAST_COLLECT_STATS
6777         search->second_pass_trys++;
6778 #endif
6779 
6780         if (search->pbp->mb_params)
6781            /* sequence_start is reserved for ncbi4na encoded sequence
6782               in this case */
6783            search->subject->sequence_start = NULL;
6784 
6785         length = real_length;
6786         /* Split subject sequence into chunks if it is too long */
6787         num_chunks = (length - DBSEQ_CHUNK_OVERLAP) / 
6788            (MAX_DBSEQ_LEN - DBSEQ_CHUNK_OVERLAP) + 1;
6789         search->subject->original_length = 0;
6790         if (search->pbp->mb_params && !search->rdfp) { 
6791            /* Coming from the 2 sequences engine: save the entire 
6792               ncbi4na sequence in search->subject->sequence_start
6793            */
6794            Uint1Ptr seq_blastna, seq_2na;
6795            Uint1 rem;
6796            
6797            search->subject->sequence_start = 
6798               (Uint1Ptr) MemNew(subject_length + 1);
6799            seq_blastna = search->subject->sequence_start;
6800            seq_2na = search->subject->sequence;
6801            rem = 3;
6802            *seq_blastna = (Uint1) ncbi4na_to_blastna[NULLB];
6803            seq_blastna++;
6804            for (index=0; index<subject_length; index++) {
6805               *seq_blastna = 
6806                  (Uint1) ncbi4na_to_blastna[(1 << READDB_UNPACK_BASE_N(*seq_2na, rem))];
6807               seq_blastna++;
6808               if (rem>0) rem--;
6809               else {
6810                  rem = 3;
6811                     seq_2na++;
6812               }
6813            }
6814         }
6815 
6816         for (index=0; index<num_chunks; index++) {
6817            length = MIN(real_length-start, MAX_DBSEQ_LEN);
6818            search->subject->length = length;
6819            /* THE BLAST SEARCH _IS_ HERE! */
6820            if (BlastExtendWordSearch(search, search->pbp->multiple_hits_only) < 0) {
6821               /* Error occurred in BlastExtendWordSearch */
6822               return 1;
6823            }
6824            /* HSP's were not saved in any special order, sort. */
6825            current_hitlist = search->current_hitlist;
6826            if (current_hitlist && current_hitlist->do_not_reallocate == FALSE)
6827               HeapSort(current_hitlist->hsp_array, current_hitlist->hspcnt,sizeof(BLAST_HSPPtr), score_compare_hsps);
6828            if (search->pbp->gapped_calculation &&
6829                search->prog_number != blast_type_blastn) {
6830               status = BlastPreliminaryGappedScore(search, search->subject->sequence, search->subject->length, inner_frame);
6831               status = BlastGetGappedScore(search, search->subject->length, search->subject->sequence, inner_frame);
6832            }
6833 #if 1
6834            else if (!search->pbp->do_sum_stats && !search->pbp->mb_params) {
6835               status = BlastNTPreliminaryGappedScore(search, search->subject->sequence, search->subject->length);
6836               if (status < 0)
6837                  return status;
6838               status = BlastNTGetGappedScore(search, search->subject->length, search->subject->sequence);
6839               if (status < 0)
6840                  return status;
6841            }  
6842 #endif
6843            if (num_chunks > 1) {
6844               AdjustOffsetsInBLASTHitList(search->current_hitlist, start);
6845               
6846               if (search->current_hitlist->hspcnt > 0) {
6847                  search = BlastReapPartialHitlistByEvalue(search, start);
6848                  hitlist = BLASTMergeHitLists(search, hitlist,
6849                                               search->current_hitlist, start,
6850                                               (search->pbp->mb_params != NULL)); 
6851               }
6852               start += length - DBSEQ_CHUNK_OVERLAP;
6853               search->subject->original_length = start;
6854               if (search->prog_number == blast_type_blastn)
6855                  search->subject->sequence += 
6856                     (length - DBSEQ_CHUNK_OVERLAP)/READDB_COMPRESSION_RATIO;
6857               else
6858                  search->subject->sequence += length - DBSEQ_CHUNK_OVERLAP;
6859               search->current_hitlist->hspcnt = 
6860                  search->current_hitlist->hspcnt_max = 0;
6861            }
6862            else if (search->prog_number == blast_type_tblastn ||
6863                     search->prog_number == blast_type_psitblastn)
6864            {
6865                  hitlist = BLASTMergeHitLists(search, hitlist, search->current_hitlist, 0, FALSE);
6866                  MemSet((VoidPtr) search->current_hitlist->hsp_array, 0,
6867                            sizeof(BLAST_HSPPtr)*(search->current_hitlist->hspcnt_max));
6868                  search->current_hitlist->hspcnt = search->current_hitlist->hspcnt_max = 0;
6869            }
6870         }
6871     } /* for (inner_frame=inner_frame_min; inner_frame */
6872 
6873     if (hitlist) {
6874        MemFree(search->current_hitlist->hsp_array);
6875        MemCpy(search->current_hitlist, hitlist, sizeof(BLAST_HitList));
6876        MemFree(hitlist);
6877        if (!search->pbp->mb_params)
6878           search->subject->sequence = search->subject->sequence_start + 1;
6879     }
6880         
6881     /* NULL out the sequence, leave in the proper length which is still 
6882        needed for the significance evaluation. */
6883     search->subject->length = subject_length;
6884     search->subject->sequence = NULL;
6885     if (!search->pbp->mb_params)
6886        /* This holds the ncbi4na-encoded sequence for Mega BLAST */
6887        search->subject->sequence_start = NULL;
6888 
6889     return 0;
6890 }
6891 
6892 
6893 
6894 /*
6895         Gets the translation array for a give genetic code.  
6896         This array is optimized for the NCBI2na alphabet.
6897         The reverse complement can also be spcified.
6898 
6899         Int4 id: The number of the NCBI genetic code,
6900         CharPtr name: The name of the NCBI genetic code,
6901                 (only one of id or name must be specified).
6902         Boolean reverse_complement: translations for reverse
6903                 complement are needed.
6904 */
6905 
6906 Uint1Ptr
6907 GetPrivatTranslationTable(CharPtr genetic_code, Boolean reverse_complement)
6908 
6909 {
6910         Int2 index1, index2, index3, bp1, bp2, bp3;
6911         Int2 codon;
6912         SeqMapTablePtr smtp;
6913         Uint1Ptr translation;
6914 /* The next array translate between the ncbi2na rep's and 
6915 the rep's used by the genetic_code tables.  The rep used by the 
6916 genetic code arrays is in mapping: T=0, C=1, A=2, G=3 */
6917         static Uint1 mapping[4] = {2, /* A in ncbi2na */
6918                        1, /* C in ncbi2na. */
6919                        3, /* G in ncbi2na. */
6920                        0 /* T in ncbi2na. */ };
6921 
6922 
6923         if (genetic_code == NULL)
6924                 return NULL;
6925 
6926         translation = MemNew(64*sizeof(Uint1));
6927         if (translation == NULL)
6928                 return NULL;
6929 
6930         smtp = SeqMapTableFind(Seq_code_ncbistdaa, Seq_code_ncbieaa);
6931 
6932         for (index1=0; index1<4; index1++)
6933         {
6934                 for (index2=0; index2<4; index2++)
6935                 {
6936                         for (index3=0; index3<4; index3++)
6937                         {
6938 /* 
6939 The reverse complement codon is saved in it's orginal (non-complement)
6940 form AND with the high-order bits reversed from the non-complement form,
6941 as this is how they appear in the sequence. 
6942 */
6943                            if (reverse_complement)
6944                            {
6945                                 bp1 = 3 - index1;
6946                                 bp2 = 3 - index2;
6947                                 bp3 = 3 - index3;
6948                                 codon = (mapping[bp1]<<4) + (mapping[bp2]<<2) + (mapping[bp3]);
6949                                 translation[(index3<<4) + (index2<<2) + index1] = SeqMapTableConvert(smtp, genetic_code[codon]);
6950                            }
6951                            else
6952                            {
6953                                 codon = (mapping[index1]<<4) + (mapping[index2]<<2) + (mapping[index3]);
6954                                 translation[(index1<<4) + (index2<<2) + index3] = SeqMapTableConvert(smtp, genetic_code[codon]);
6955                            }
6956                                 
6957                         }
6958                 }
6959         }
6960         return translation;
6961 }       /* GetPrivatTranslationTable */
6962 
6963 /* Attach the "sequence" pointer to the BlastSequenceBlkPtr. sequence_start may be the
6964 actual start of the sequence (this pointer is kept for deallocation purposes).  The
6965 sequence may start before "sequence" starts as there may be a sentinel (i.e., NULLB)
6966 before the start of the sequence.  When the extension function extends this way it
6967 can tell that there is a NULLB there and stop the extension.
6968 
6969 */
6970 
6971 Int2 LIBCALL
6972 BlastSequenceAddSequence (BlastSequenceBlkPtr sequence_blk, Uint1Ptr sequence, Uint1Ptr sequence_start, Int4 length, Int4 original_length, Int4 effective_length)
6973 
6974 {
6975         if (sequence_blk == NULL)
6976                 return 1;
6977 
6978         if (sequence == NULL && sequence_start != NULL)
6979         {
6980                 sequence_blk->sequence = sequence_start+1;
6981         }
6982         else if (sequence != NULL)
6983         {
6984                 sequence_blk->sequence = sequence;
6985         }
6986         sequence_blk->sequence_start = sequence_start;
6987         sequence_blk->length = length;
6988         sequence_blk->original_length = original_length;
6989         sequence_blk->effective_length = effective_length;
6990 
6991         return 0;
6992 }
6993 
6994 /*
6995         Select the appropriate wordfinder and then perform the search.
6996         The "wordfinder's" called here look through the already found
6997         words and extend those above a set limit ("T").
6998 
6999         These wordfinders operate in two modes.  One is the "preliminary"
7000         mode (search->prelim is TRUE); the wordfinders attempt to extend
7001         an initial hit.  If they succeed at all, they return a positive
7002         return status.  On the second pass (search->prelim is FALSE)
7003         only those db seqs with hits are further investigated.
7004 
7005 */
7006 static Int4
7007 BlastExtendWordSearch(BlastSearchBlkPtr search, Boolean multiple_hits)
7008 {
7009         Int4 status=0;
7010 
7011 
7012         /* multiple hits structure needed to perform mh extensions. */
7013         if (multiple_hits == TRUE && search->ewp_params->multiple_hits == FALSE)
7014                 return -1;
7015 
7016         if (multiple_hits == TRUE)
7017                 status = BlastWordFinder_mh(search);
7018         else
7019                 status = BlastWordFinder(search);
7020 
7021         return status;
7022 }
7023 
7024 /*----------   search a sequence with 1 Context, 1 Letter per byte  ---------*/
7025 static Int4 
7026 BlastWordFinder(BlastSearchBlkPtr search)
7027 {
7028         BLAST_WordFinderPtr     wfp;
7029         LookupTablePtr          lookup;
7030         BLAST_ParameterBlkPtr   pbp;
7031 
7032         pbp = search->pbp;
7033         if (search->prelim == TRUE)
7034         {
7035                 wfp=search->wfp_first;
7036                 if (pbp->cutoff_s2_set == TRUE)
7037                         pbp->cutoff_s2 = pbp->cutoff_s2_max;
7038                 else
7039                         pbp->cutoff_s2 = MIN(pbp->cutoff_s_first, pbp->cutoff_s2_max);
7040                 pbp->X = pbp->dropoff_1st_pass;
7041         }
7042         else
7043         {
7044                 wfp=search->wfp_second;
7045                 if (!search->pbp->mb_params) {
7046                    if (pbp->cutoff_s2_set == TRUE)
7047                       pbp->cutoff_s2 = pbp->cutoff_s2_max;
7048                    else
7049                       pbp->cutoff_s2 = MIN(pbp->cutoff_s_second,
7050                                            pbp->cutoff_s2_max);
7051                 }
7052                 pbp->X = pbp->dropoff_2nd_pass;
7053         }
7054 
7055         lookup = wfp->lookup;
7056 
7057         if (search->prog_number == blast_type_blastn)
7058         {
7059            if (search->pbp->mb_params)
7060               return MegaBlastWordFinder(search, lookup);
7061            else
7062               return BlastNtWordFinder(search, lookup);
7063         }
7064         else
7065         {
7066                 return BlastWordFinder_contig(search, lookup);
7067         }
7068 }
7069 
7070 /* This function was updated to use mod_lt instead of the original lookup table, 
7071  * but was not heavily optimized or tested.
7072  * (Modifications listed in comments before BlastWordFinder_mh_contig.)
7073  * -cfj
7074  */
7075 
7076 /*
7077         Search a sequence with contiguous words.
7078 */
7079 static Int4
7080 BlastWordFinder_contig(BlastSearchBlkPtr search, LookupTablePtr lookup)
7081 {
7082         register Uint1Ptr       s, s_end;
7083         register Int4   char_size, lookup_index, mask;
7084         register BLAST_Diag     diag, diag_tmp, real_diag;
7085         BLAST_ExtendWordPtr     ewp;
7086         BLAST_ExtendWordParamsPtr       ewp_params;
7087         Boolean                 prelim, succeed_to_right;
7088         Uint1Ptr                        subject0;
7089         CfjModStruct *combo_array;
7090         Int4     index=0;
7091         register ModLookupPosition hit_info;
7092         Int2            context;
7093         Int4            q_off, s_off, offset, word_width;
7094         register Int4 bits_to_shift, min_diag_length, min_diag_mask;
7095         Int8    number_of_hits=0;
7096         register Int4 num_hits;
7097         register ModLookupPositionPtr lookup_pos;
7098         ModLAEntry *mod_lt=lookup->mod_lt;
7099         ewp_params=search->ewp_params;
7100         prelim = search->prelim;
7101 
7102 /* this function only does final run, prelim is done by BlastWordFinder_mh_contig */
7103         if (prelim)
7104                 return 1;
7105 
7106         char_size = lookup->char_size;
7107         mask = lookup->mask;
7108         offset = ewp_params->offset;
7109         subject0 = s = search->subject->sequence;
7110         min_diag_length = ewp_params->min_diag_length;
7111         bits_to_shift = ewp_params->bits_to_shift;
7112         min_diag_mask = ewp_params->min_diag_mask;
7113 
7114 /* The word_width tells how "long" a word is; if it's contiguous then it's
7115 the size of the word. */
7116         word_width = lookup->wordsize;
7117 
7118 
7119         if (search->current_hitlist == NULL)
7120         {
7121                 search->current_hitlist = BlastHitListNew(search); 
7122         }
7123         else
7124         { /* Scrub the hitlist. */
7125                 if (search->current_hitlist_purge)
7126                         BlastHitListPurge(search->current_hitlist);
7127         }
7128 
7129         /* subject is too short to find anything! */
7130         if (word_width > search->subject->length)
7131                 return 0;
7132 
7133         s = lookup_find_init(lookup, &index, s);
7134         lookup_index = index;
7135 
7136         /* Determines when to stop scanning the database. */
7137         s_end = subject0 + search->subject->length;
7138         if ((search->last_context-search->first_context+1) > 1)
7139         {
7140             for (;;) 
7141             {
7142                 do {
7143                         /* lookup a contiguous word. */
7144                         s++;
7145                         lookup_index = (((lookup_index) & mask)<<char_size) + *s;
7146                         if (s == s_end)
7147                                 goto NormalReturn;
7148                 } while (mod_lt[lookup_index].num_used == 0);
7149 
7150                 num_hits = mod_lt[lookup_index].num_used;
7151                 lookup_pos = mod_lt[lookup_index].entries;
7152                 hit_info = *((Uint4 *) lookup_pos);
7153                 lookup_pos++;
7154 
7155                 if(num_hits > 3){
7156                     if(search->pbp->is_rps_blast) {
7157                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7158                     } else {
7159                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7160                     }
7161                 }
7162 
7163                 s_off = s-subject0;
7164                 diag_tmp = s_off + min_diag_length;
7165                 /* Extend each hit in the linked list */
7166                 do {
7167 #ifdef BLAST_COLLECT_STATS
7168                     number_of_hits++;
7169 #endif
7170                     q_off = hinfo_get_pos(hit_info);
7171                     context = hinfo_get_context(hit_info);
7172                     num_hits--;
7173                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7174                     lookup_pos++;
7175 
7176                     diag = diag_tmp - q_off;
7177 
7178                     real_diag = diag & min_diag_mask;
7179                     ewp=search->context[context].ewp;
7180                     combo_array = ewp->combo_array;
7181 
7182                     if (combo_array[real_diag].diag_level > (s_off+offset))
7183                     {
7184                         continue;
7185                     }
7186                     if (!(search->positionBased)) {
7187                       if (BlastWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7188                         goto ErrorReturn;
7189                     }
7190                     else {
7191                       if (BlastNewWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7192                         goto ErrorReturn;
7193                     }
7194                 } while (num_hits>0);
7195            }
7196         }
7197         else    /* only one context. */
7198         {
7199            ewp=search->context[search->first_context].ewp;
7200            combo_array = ewp->combo_array;
7201            for (;;) 
7202            {
7203                 do {
7204                         /* lookup a contiguous word. */
7205                         lookup_index = (((lookup_index) & mask)<<char_size);
7206                         s++;
7207                         lookup_index += *s;
7208                         if (s == s_end)
7209                                 goto NormalReturn;
7210                 } while (mod_lt[lookup_index].num_used == 0);
7211 
7212 
7213                 num_hits = mod_lt[lookup_index].num_used;
7214                 lookup_pos = mod_lt[lookup_index].entries;
7215                 hit_info = *((Uint4 *) lookup_pos);
7216                 lookup_pos++;
7217 
7218                 if(num_hits > 3){
7219                     if(search->pbp->is_rps_blast) {
7220                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7221                     } else {
7222                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7223                     }
7224                 }
7225 
7226                 s_off = s-subject0;
7227                 diag_tmp = s_off + min_diag_length;
7228                 /* Extend each hit in the linked list */
7229                 do {
7230 #ifdef BLAST_COLLECT_STATS
7231                     number_of_hits++;
7232 #endif
7233                     q_off = hinfo_get_pos(hit_info);
7234                     num_hits--;
7235                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7236                     lookup_pos++;
7237 
7238                     diag = diag_tmp - q_off;
7239                     real_diag = diag & min_diag_mask;
7240                     if (combo_array[real_diag].diag_level > (s_off+offset))
7241                     {
7242                         continue;
7243                     }
7244                     if (!(search->positionBased)) {
7245                       if (BlastWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7246                         goto ErrorReturn;
7247                     }
7248                     else {
7249                       if (BlastNewWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7250                         goto ErrorReturn;
7251                     }
7252                 } while (num_hits>0);
7253            }
7254         }
7255 
7256 NormalReturn:
7257         if (search->prelim)
7258                 search->first_pass_hits += number_of_hits;
7259         else
7260                 search->second_pass_hits += number_of_hits;
7261         BlastExtendWordExit(search);
7262         return search->current_hitlist->hspcnt;
7263 
7264 ErrorReturn:
7265         BlastExtendWordExit(search);
7266         return 3;
7267 }
7268 
7269 /***************************************************************************
7270 *       This function is called once for each subject sequence.
7271 *
7272 *       New (experimental) version of the Word Finder that makes use of
7273 *       an idea of Stephen Altschul's.  Multiple hits are found before a
7274 *       hit is extended.
7275 
7276 *       "diagpos" is an Int4 array that is as long as the query sequence
7277 *       and the longest database sequence.   An efficient comparison of
7278 *       whether a new hit is in the same window as the last one is done
7279 *       by keeping track of how far along an "imaginary" array (i.e.,
7280 *       increment) one is; this array changes every time this function is 
7281 *       called by the subject length plus window.
7282 *
7283 ***************************************************************************/
7284 /*----------   search a sequence with 1 Context, 1 Letter per byte  ---------*/
7285 static Int4
7286 BlastWordFinder_mh(BlastSearchBlkPtr search)
7287 {
7288         BLAST_WordFinderPtr     wfp;
7289         LookupTablePtr lookup;
7290         BLAST_ParameterBlkPtr   pbp;
7291 
7292         pbp = search->pbp;
7293         if (search->prelim == TRUE)
7294         {
7295                 wfp=search->wfp_first;
7296                 if (pbp->cutoff_s2_set == TRUE)
7297                         pbp->cutoff_s2 = pbp->cutoff_s2_max;
7298                 else
7299                         pbp->cutoff_s2 = MIN(pbp->cutoff_s_first, pbp->cutoff_s2_max);
7300                 pbp->X = pbp->dropoff_1st_pass;
7301         }
7302         else
7303         {
7304                 wfp=search->wfp_second;
7305                 if (pbp->cutoff_s2_set == TRUE)
7306                         pbp->cutoff_s2 = pbp->cutoff_s2_max;
7307                 else
7308                         pbp->cutoff_s2 = MIN(pbp->cutoff_s_second, pbp->cutoff_s2_max);
7309                 pbp->X = search->pbp->dropoff_2nd_pass;
7310         }
7311 
7312         lookup = wfp->lookup;
7313 
7314         if (search->prog_number == blast_type_blastn)
7315         {
7316                 return BlastNtWordFinder_mh(search, lookup);
7317         }
7318         else
7319         {
7320                 return BlastWordFinder_mh_contig(search, lookup);
7321         }
7322 }
7323 
7324 /****************************************************************************
7325 
7326         This function scans the database, looking for matches to the words in
7327         the 'lookup_index'.  
7328 
7329         In order to keep track of how far along a certain diagonal has already
7330         been extended an Int4 array that is twice as long as the shortest sequence
7331         is used (actually it is the power of two that is more than twice as long as the 
7332         shortest sequence).  There is a need for a mapping from 'true' diagonals (which would
7333         be the length of both query and database sequence) to the pseudo-diagonals
7334         used here (i.e., the Int4 array).  This is done below with the 'version'.
7335         The procedure is as follows:
7336 
7337         1.) diag_tmp is calculated with the 'subject' offset + min_diag_length: s_off + min_diag_length
7338         (min_diag_length is 2**n such that n is large enough to make min_diag_length larger
7339         than the shorter of the query and database sequence).
7340 
7341         2.) diag is calculated with diag_tmp - q_off.  This is the 'real' diagonal, except
7342         for the sum min_diag_length.  
7343 
7344         3.) real_diag is calculated by keeping only those bits in diag that are less than 
7345         min_diag_length-1.  This provides a unique number within a range.
7346 
7347         4.) the version is calculated by shifting over 'bits_to_shift', which 
7348         corresonds to dividing by min_diag_length.
7349 
7350         5.) the combination of the version and the 'real_diag' provide a unique location
7351         for the diagonal.
7352 
7353 
7354 
7355         modifications (cfj):
7356          - changed hash_table entries to reduce cache misses (see comments in lookup.c)
7357          - when walking through sequence, precompute next_index and prefetch the entry
7358          - combined last_hit/version/diag_level into array of struct for better locality.
7359          - eliminated the need for the version[] array by changing the value stored as diag_level.
7360              (This is done by measuring diag_level along s (rather than q) -- With this measure, 
7361              previous hits found in XX[real_diag] will either really be from the same diag, or will 
7362              have a diag_level and last_hit much smaller (by at least min_diag_length) than the current 
7363              position.)
7364 
7365 
7366 ******************************************************************************/
7367 
7368 
7369 
7370 static Int4
7371 BlastWordFinder_mh_contig(BlastSearchBlkPtr search, LookupTablePtr lookup)
7372 {
7373     register Uint1Ptr   s;
7374     register Uint1Ptr s_end;
7375     Uint1Ptr    subject0;
7376     BLAST_Diag  diag, diag_tmp, real_diag;
7377     BLAST_ExtendWordPtr     ewp, ewp_pointer[40];
7378     Uint4 q_off;
7379     register Int4 s_off;
7380     Uint2 context;
7381     Int4 diff, offset, s_pos, window;
7382     Int4 min_diag_length, min_diag_mask;
7383     Int4 *last_hit_p;
7384     CfjModStruct *combo_array;
7385     CfjModStruct *ca_ptr[40];
7386     register ModLookupPositionPtr lookup_pos;
7387     register Uint4 hit_info;
7388     
7389     Int4 char_size, lookup_index, mask, wordsize;
7390     Int4 next_lindex;
7391     Int4 * next_nhits_addr;
7392     Int4 word_width, index=0; 
7393     Int8 number_of_hits=0;
7394     register Int4 num_hits;
7395     register Int4 next_nhits;
7396     
7397     BLAST_ExtendWordParamsPtr     ewp_params;
7398     Boolean                     prelim, succeed_to_right;
7399     ModLAEntry *mod_lt=lookup->mod_lt;
7400     PV_ARRAY_TYPE *pv_array = lookup->pv_array;
7401     register PV_ARRAY_TYPE PNTR next_pv_array_addr;
7402     register PV_ARRAY_TYPE next_pv_val,pv_val;
7403     
7404     ewp = NULL; /* Gets rid of a warning. */
7405     
7406     ewp_params=search->ewp_params;
7407     prelim = search->prelim;
7408     
7409     /* The word_width tells how "long" a word is; for a contiguous word it's
7410        the length of the word. */
7411     word_width = lookup->wordsize;
7412     
7413     wordsize = lookup->wordsize;
7414     char_size = lookup->char_size;
7415     mask = lookup->mask;
7416     subject0 = s = (Uint1Ptr) search->subject->sequence;
7417     
7418     window = ewp_params->window;
7419     offset = ewp_params->offset;
7420     min_diag_length = ewp_params->min_diag_length;
7421     min_diag_mask = ewp_params->min_diag_mask;
7422     
7423     if (search->current_hitlist == NULL) {
7424         search->current_hitlist = BlastHitListNew(search); 
7425     } else { /* Scrub the hitlist. */
7426         if (search->current_hitlist_purge)
7427             BlastHitListPurge(search->current_hitlist);
7428     }
7429     
7430     /* subject is too short to find anything! */
7431     if (word_width > search->subject->length)
7432         return 0;
7433     
7434     /* Move along string to appropriate starting point. */
7435     s = lookup_find_init(lookup, &index, s);
7436     lookup_pos=NULL;
7437     lookup_index = index;
7438     /* Determines when to stop scanning the database. */
7439     s_end = subject0 + search->subject->length;
7440     
7441     if (pv_array) {
7442         if ((search->last_context-search->first_context+1) > 1) { 
7443             /* Only used if more than one context. */
7444             for (index=search->first_context; index<=search->last_context; index++){
7445                 ewp_pointer[index] = search->context[index].ewp;
7446                 ca_ptr[index]=ewp_pointer[index]->combo_array;
7447             }
7448             s_off = (Int4) (s - subject0);
7449             next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7450             next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
7451 
7452             for (;;) { 
7453                 do {
7454                     /* lookup a contiguous word. */
7455                     s++;
7456                     lookup_index = next_lindex;
7457                     
7458                     if (s == s_end)
7459                         goto NormalReturn;
7460                     
7461                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7462                     next_pv_array_addr = &pv_array[next_lindex>>PV_ARRAY_BTS];
7463                     pv_val = next_pv_val;
7464                     next_pv_val = *next_pv_array_addr;
7465                     
7466                 } while ((pv_val&(((PV_ARRAY_TYPE) 1)<<(lookup_index&PV_ARRAY_MASK))) == 0); 
7467 
7468                 num_hits = mod_lt[lookup_index].num_used;
7469                 
7470                 /* Changed by TLM. */
7471                 lookup_pos = mod_lt[lookup_index].entries;
7472                 hit_info = *((Uint4 *) lookup_pos);
7473                 lookup_pos++;
7474                 
7475                 if(num_hits > 3){
7476                     if(search->pbp->is_rps_blast) {
7477                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7478                     } else {
7479                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7480                     }
7481                 }
7482                 /* Changed by TLM. */
7483                 s_off = (Int4) (s - subject0);
7484                 
7485                 s_pos = s_off + offset;
7486                 diag_tmp = s_off + min_diag_length;
7487                 
7488                 /* Extend each hit in the linked list */
7489                 /* Each link corresponds to different hits on the query sequence */
7490                 do {  /* for each hit */
7491                     
7492 #ifdef BLAST_COLLECT_STATS
7493                     number_of_hits++;
7494 #endif
7495                     q_off = hinfo_get_pos(hit_info);
7496                     context = hinfo_get_context(hit_info);
7497                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7498                     
7499                     diag = diag_tmp - q_off;
7500                     real_diag = (diag_tmp - q_off) & min_diag_mask;
7501                     /* conxtext dependent values */
7502                     combo_array = ca_ptr[context];
7503                     
7504                     last_hit_p = &combo_array[real_diag].last_hit;
7505                     diff = s_pos - *last_hit_p;
7506                     num_hits--;
7507                     lookup_pos++;
7508                     
7509                     /* diff is always greater than window for the first time in a function. */
7510                     if (diff >= window) {
7511                         *last_hit_p = s_pos;
7512                     } else if (diff >= wordsize) {
7513                         succeed_to_right = TRUE;
7514                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7515                             ewp = ewp_pointer[context];
7516                             ewp->actual_window = diff;
7517                             if (!(search->positionBased)) {
7518                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7519                                     goto ErrorReturn;
7520                             } else {
7521                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag,  real_diag, &succeed_to_right, context) != 0)
7522                                     goto ErrorReturn;
7523                             }
7524                             if (search->current_hitlist->hspcnt > 0 && prelim)
7525                                 goto NormalReturn;
7526                             
7527                         } 
7528                         if (succeed_to_right)
7529                             *last_hit_p = 0;
7530                         else
7531                             *last_hit_p = s_pos;
7532                     }
7533                 } while(num_hits>0); /* end for pos_cnt... */
7534             }
7535         } else { /* Only one context. */
7536         
7537             ewp=search->context[search->first_context].ewp;
7538             combo_array=ewp->combo_array;
7539             
7540             next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7541             next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
7542 
7543             for (;;) {
7544                 do {
7545                     /* lookup a contiguous word. */
7546                     s++;
7547                     lookup_index = next_lindex;
7548                     
7549                     if (s == s_end)
7550                         goto NormalReturn;
7551                     
7552                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7553                     next_pv_array_addr = &pv_array[next_lindex>>PV_ARRAY_BTS];
7554                     pv_val = next_pv_val;
7555                     next_pv_val = *next_pv_array_addr;
7556                     
7557                 } while ((pv_val&(((PV_ARRAY_TYPE) 1)<<(lookup_index&PV_ARRAY_MASK))) == 0); 
7558                 
7559                 num_hits = mod_lt[lookup_index].num_used;
7560                 
7561                 /* Changed by TLM. */
7562                 lookup_pos = mod_lt[lookup_index].entries;
7563                 hit_info = *((Uint4 *) lookup_pos);
7564                 lookup_pos++;
7565                 
7566                 if(num_hits > 3){
7567                     if(search->pbp->is_rps_blast) {
7568                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7569                     } else {
7570                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7571                     }
7572                 }
7573                 
7574                 /* Changed by TLM. */
7575                 s_off = (Int4) (s - subject0);
7576                 s_pos = s_off + offset;
7577                 diag_tmp = s_off + min_diag_length;
7578                 
7579                 /* Extend each hit in the linked list */
7580                 /* Each link corresponds to different hits on the query sequence */
7581                 do {  /* for each hit */
7582                     
7583 #ifdef BLAST_COLLECT_STATS
7584                     number_of_hits++;
7585 #endif
7586                     /* Changed by TLM. */
7587                     q_off = hit_info;
7588                     num_hits--;
7589                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7590                     lookup_pos++;
7591                     
7592                     diag = diag_tmp - q_off;
7593                     real_diag = diag & min_diag_mask;
7594                     
7595                     last_hit_p = &combo_array[real_diag].last_hit;
7596                     diff = s_pos - *last_hit_p;
7597                     
7598                     
7599                     /* diff is always greater than window for the first time in a function. */
7600                     if (diff >= window) {
7601                         *last_hit_p = s_pos;
7602                     } else if (diff >= wordsize) {
7603                         succeed_to_right = TRUE;
7604                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7605                             ewp->actual_window = diff;
7606                             if (!(search->positionBased)) {
7607                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7608                                     goto ErrorReturn;
7609                             } else {
7610                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7611                                     goto ErrorReturn;
7612                             }
7613                             if (search->current_hitlist->hspcnt > 0 && prelim)
7614                                 goto NormalReturn;
7615                             
7616                         } 
7617                         if (succeed_to_right)
7618                             *last_hit_p = 0;
7619                         else
7620                             *last_hit_p = s_pos;
7621                     }
7622                 } while(num_hits > 0); /* end for pos_cnt... */
7623                 
7624                 
7625             }
7626         }
7627     } else {
7628         if ((search->last_context-search->first_context+1) > 1) {
7629             /* Only used if more than one context. */
7630             for (index=search->first_context; index<=search->last_context; index++){
7631                 ewp_pointer[index] = search->context[index].ewp;
7632                 ca_ptr[index]=ewp_pointer[index]->combo_array;
7633             }
7634             next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7635             next_nhits_addr=&mod_lt[next_lindex].num_used ;
7636             next_nhits=*next_nhits_addr;
7637             s_off = (Int4) (s - subject0);
7638             for (;;) {
7639                 do {
7640                     /* lookup a contiguous word. */
7641                     lookup_index = next_lindex;
7642                     s++; 
7643                     
7644                     if (s == s_end) goto NormalReturn;
7645                     
7646                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7647                     
7648                     next_nhits_addr = &mod_lt[next_lindex].num_used;
7649                     
7650                     num_hits = next_nhits;
7651                     next_nhits=*next_nhits_addr;
7652                     
7653                 } while (num_hits == 0); 
7654                 
7655                 /* Changed by TLM. */
7656                 lookup_pos = mod_lt[lookup_index].entries;
7657                 hit_info = *((Uint4 *) lookup_pos);
7658                 lookup_pos++;
7659                 
7660                 if(num_hits>3){
7661                     if(search->pbp->is_rps_blast) {
7662                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7663                     } else {
7664                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7665                     }
7666                 }
7667                 /* Changed by TLM. */
7668                 s_off = (Int4) (s - subject0);
7669                 
7670                 s_pos = s_off + offset;
7671                 diag_tmp = s_off + min_diag_length;
7672                 
7673                 /* Extend each hit in the linked list */
7674                 /* Each link corresponds to different hits on the query sequence */
7675                 /* printf(" dtmp:%3d     ",diag_tmp); */
7676                 do{  /* for each hit */
7677                     
7678 #ifdef BLAST_COLLECT_STATS
7679                     number_of_hits++;
7680 #endif
7681                     q_off = hinfo_get_pos(hit_info);
7682                     context = hinfo_get_context(hit_info);
7683                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7684                     
7685                     diag = diag_tmp - q_off;
7686                     real_diag = (diag_tmp - q_off) & min_diag_mask;
7687                     /* conxtext dependent values */
7688                     combo_array = ca_ptr[context];
7689                     
7690                     last_hit_p = &combo_array[real_diag].last_hit;
7691                     diff = s_pos - *last_hit_p;
7692                     num_hits--;
7693                     lookup_pos++;
7694                     
7695                     /* diff is always greater than window for the first time in a function. */
7696                     if (diff >= window) {
7697                         *last_hit_p = s_pos;
7698                     } else if (diff >= wordsize) {
7699                         succeed_to_right = TRUE;
7700                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7701                             ewp = ewp_pointer[context];
7702                             ewp->actual_window = diff;
7703                             if (!(search->positionBased)) {
7704                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7705                                     goto ErrorReturn;
7706                             } else {
7707                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7708                                     goto ErrorReturn;
7709                             }
7710                             if (search->current_hitlist->hspcnt > 0 && prelim)
7711                                 goto NormalReturn;
7712                             
7713                         } 
7714                         if (succeed_to_right)
7715                             *last_hit_p = 0;
7716                         else
7717                             *last_hit_p = s_pos;
7718                     }
7719                 } while(num_hits>0); /* end for pos_cnt... */
7720             }
7721         } else { /* Only one context. */
7722             
7723             ewp=search->context[search->first_context].ewp;
7724             combo_array=ewp->combo_array;
7725             
7726             next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7727             /* CHanged by TLM. 
7728                next_nhits_addr=&mod_lt[next_lindex].num_used ;
7729                next_nhits=*next_nhits_addr;
7730             */
7731             next_nhits=mod_lt[next_lindex].num_used ;
7732             for (;;) {
7733                 do {
7734                     /* lookup a contiguous word. */
7735                     lookup_index = next_lindex;
7736                     s++; 
7737                     if (s == s_end) goto NormalReturn;
7738                     
7739                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7740                     /* CHanged by TLM. 
7741                        next_nhits_addr = &mod_lt[next_lindex].num_used;
7742                     */
7743                     
7744                     num_hits = next_nhits;
7745                     next_nhits=mod_lt[next_lindex].num_used;
7746                     
7747                 } while (num_hits == 0); 
7748                 
7749                 
7750                 /* Changed by TLM. */
7751                 lookup_pos = mod_lt[lookup_index].entries;
7752                 hit_info = *((Uint4 *) lookup_pos);
7753                 lookup_pos++;
7754                 
7755                 if(num_hits > 3){
7756                     if(search->pbp->is_rps_blast) {
7757                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7758                     } else {
7759                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7760                     }
7761                 }
7762                 
7763                 /* Changed by TLM. */
7764                 s_off = (Int4) (s - subject0);
7765                 s_pos = s_off + offset;
7766                 diag_tmp = s_off + min_diag_length;
7767                 
7768                 /* Extend each hit in the linked list */
7769                 /* Each link corresponds to different hits on the query sequence */
7770                 do {  /* for each hit */
7771                     
7772 #ifdef BLAST_COLLECT_STATS
7773                     number_of_hits++;
7774 #endif
7775                     /* Changed by TLM. */
7776                     q_off = hit_info;
7777                     num_hits--;
7778                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7779                     lookup_pos++;
7780                     
7781                     diag = diag_tmp - q_off;
7782                     real_diag = diag & min_diag_mask;
7783                     
7784                     last_hit_p = &combo_array[real_diag].last_hit;
7785                     diff = s_pos - *last_hit_p;
7786                     
7787 /* diff is always greater than window for the first time in a function. */
7788                     if (diff >= window) {
7789                         *last_hit_p = s_pos;
7790                     } else if (diff >= wordsize) {
7791                         succeed_to_right = TRUE;
7792                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7793                             ewp->actual_window = diff;
7794                             if (!(search->positionBased)) {
7795                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7796                                     goto ErrorReturn;
7797                             } else {
7798                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7799                                     goto ErrorReturn;
7800                             }
7801                             if (search->current_hitlist->hspcnt > 0 && prelim)
7802                                 goto NormalReturn;
7803                             
7804                         } 
7805                         if (succeed_to_right)
7806                             *last_hit_p = 0;
7807                         else
7808                             *last_hit_p = s_pos;
7809                     }
7810                 } while(num_hits>0); /* end for pos_cnt... */
7811             }  /* for(;;) */
7812         }
7813     }
7814     
7815  NormalReturn:
7816     if (search->prelim)
7817         search->first_pass_hits += number_of_hits;
7818     else
7819         search->second_pass_hits += number_of_hits;
7820     BlastExtendWordExit(search);
7821     return search->current_hitlist->hspcnt;
7822     
7823  ErrorReturn:
7824     BlastExtendWordExit(search);
7825     return 3;
7826 }
7827 
7828 /* BlastWordExtend -- extend a word-sized hit to a longer match */
7829 static Int2
7830 BlastWordExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
7831 {
7832         BLAST_ExtendWordPtr     ewp;
7833         BLAST_ParameterBlkPtr   pbp;
7834         BLAST_ScoreBlkPtr       sbp;
7835         BLAST_Score             leftsum, rightsum, rightscore, leftscore;
7836         Uint1Ptr                query;
7837         register Uint1Ptr       q, s;
7838         register Uint1Ptr       q_right, q_left, s_left, q_best_right, q_best_left;
7839         register BLAST_Score    score, sum; 
7840         register BLAST_ScorePtr PNTR    matrix;
7841         register BLAST_Score    x, X;
7842 
7843 
7844         q_best_left = NULL;     /* Gets rid of warning. */
7845         q_best_right = NULL;    /* Gets rid of warning. */
7846 
7847 #ifdef BLAST_COLLECT_STATS
7848         if (search->prelim)
7849                 search->first_pass_extends++;
7850         else
7851                 search->second_pass_extends++;
7852 #endif
7853 
7854         *succeed_to_right = FALSE;
7855 
7856         ewp=search->context[context].ewp;
7857 
7858         diag -= search->ewp_params->min_diag_length;
7859 
7860         sbp=search->sbp;
7861         pbp=search->pbp;
7862 
7863         query = search->context[context].query->sequence;
7864         q = query + q_off;
7865         s = search->subject->sequence + s_off; 
7866 
7867         X=pbp->X;
7868         matrix = sbp->matrix;
7869 
7870         score=0;
7871         sum = 0;
7872         q_left = q - word_width;
7873         q_right = q;
7874 
7875 /* Look for the highest scoring region in the initial word. */
7876         while (q > q_left)
7877         {
7878                 if ((sum += matrix[*q][*s]) > score)
7879                 {
7880                         score = sum;
7881                         q_best_right = q_right;
7882                         q_best_left = q;
7883                 }
7884                 else if (sum <= 0)
7885                 {
7886                         sum = 0;
7887                         q_right = q-1;
7888                 }
7889                 q--; s--;
7890         }
7891 
7892         leftsum = rightsum = rightscore = 0;
7893 
7894 /* q_left is the where the "attempted" extension along the query was 
7895 stopped (and may be picked up again if the "goto Extend_Left" is used).
7896 q_best_left is the "best" extension along the query that should be
7897 reported. Analogous logic applies to q_right and q_best_right. */
7898 
7899         q_left = q_best_left;
7900         q_right = q_best_right;
7901 
7902         q = q_left;
7903         s = search->subject->sequence + (q - query) + diag;
7904         sum = leftsum;
7905 
7906         x = X;
7907         while (sum > x)
7908         {
7909                 q--; s--;
7910                 if ((sum += matrix[*q][*s]) > 0)
7911                 {
7912                         do {
7913                                 score += sum;
7914                                 q_best_left = q;
7915                                 q--; s--;
7916                         } while ((sum = matrix[*q][*s]) > 0);
7917                 }
7918         } 
7919 
7920         if (score > rightscore && rightsum > X && -rightscore > X)
7921         {
7922                 leftscore = score;
7923                 leftsum = sum;
7924                 q_left = q;
7925 
7926                 q = q_right;
7927                 s = search->subject->sequence + (q - query) + diag;
7928                 sum = rightsum;
7929 
7930 /* "score" is actually the "maxscore", if sum drops by "score", then the
7931 total new score is zero and the extension can stop. */
7932                 if ((x = -score) < X)
7933                         x = X;
7934 
7935                 while (sum > x)
7936                 {
7937                         q++; s++;
7938                         if ((sum += matrix[*q][*s]) > 0)
7939                         {
7940                                 do {
7941                                         score += sum;
7942                                         q_best_right = q;
7943                                         q++; s++;
7944                                 } while ((sum = matrix[*q][*s]) > 0);
7945                                 /* do this if score changes. */
7946                                 if ((x = -score) < X)
7947                                         x = X;
7948                         }
7949                 } 
7950 
7951                 q_right = q;
7952         }
7953 
7954         /* Record how far this diagonal has been traversed,
7955         "q_right" was the last position on the query sequence.
7956         ewp_params->offset is added to provide the proper "zero-point" */       
7957         ewp->combo_array[real_diag].diag_level = q_right - query - q_off + word_width + s_off + search->ewp_params->offset;
7958 
7959         if (score >= pbp->cutoff_s2) /* Score is reportable */
7960         {
7961 
7962 #ifdef BLAST_COLLECT_STATS
7963                 if (search->prelim)
7964                         search->first_pass_good_extends++;
7965                 else
7966                         search->second_pass_good_extends++;
7967 #endif
7968                 s_left = search->subject->sequence + (q_best_left - query) + diag;
7969                 BlastSaveCurrentHsp(search, score, (q_best_left-query), (s_left-search->subject->sequence), (q_best_right-q_best_left+1), context);
7970         }
7971 
7972         return 0;
7973 }
7974 /*AAS*/
7975 /* BlastWordExtend -- extend a word-sized hit to a longer match,
7976    BlastNewWordExtend is position based */
7977 static Int2
7978 BlastNewWordExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
7979 {
7980         BLAST_ExtendWordPtr     ewp;
7981         BLAST_ParameterBlkPtr   pbp;
7982         BLAST_Score             leftsum, rightsum, rightscore, leftscore;
7983         Uint1Ptr                query;
7984         register Uint1Ptr       q, s;
7985         register Uint1Ptr       q_right, q_left, s_left, q_best_right, q_best_left;
7986         register BLAST_Score    score, sum; 
7987         register BLAST_Score    x, X;
7988 
7989 
7990 #ifdef BLAST_COLLECT_STATS
7991         if (search->prelim)
7992                 search->first_pass_extends++;
7993         else
7994                 search->second_pass_extends++;
7995 #endif
7996 
7997         *succeed_to_right = FALSE;
7998 
7999         ewp=search->context[context].ewp;
8000 
8001         diag -= search->ewp_params->min_diag_length;
8002 
8003         pbp=search->pbp;
8004 
8005         query = search->context[context].query->sequence;
8006         q = query + q_off;
8007         s = search->subject->sequence + s_off; 
8008 
8009         X=pbp->X;
8010 
8011         score=0;
8012         sum = 0;
8013         q_left = q - word_width;
8014         q_right = q;
8015         q_best_left = q;
8016         q_best_right = q; /*AAS*/
8017 
8018 /* Look for the highest scoring region in the initial word. */
8019         while (q > q_left)
8020         {
8021                 if ((sum += MtrxScorePosSearch(search->sbp,
8022                                 (Int4) (q - query),*s)) > score)
8023                 {
8024                         score = sum;
8025                         q_best_right = q_right;
8026                         q_best_left = q;
8027                 }
8028                 else if (sum <= 0)
8029                 {
8030                         sum = 0;
8031                         q_right = q-1;
8032                 }
8033                 q--; s--;
8034         }
8035 
8036         if ((x = -score) < X)
8037                 x = X;
8038 
8039         leftsum = rightsum = rightscore = 0;
8040 
8041 /* q_left is the where the "attempted" extension along the query was 
8042 stopped (and may be picked up again if the "goto Extend_Left" is used).
8043 q_best_left is the "best" extension along the query that should be
8044 reported. Analogous logic applies to q_right and q_best_right. */
8045 
8046         q_left = q_best_left;
8047         q_right = q_best_right;
8048 
8049         q = q_left;
8050         s = search->subject->sequence + (q - query) + diag;
8051         sum = leftsum;
8052         x = X;
8053 
8054         do
8055         {
8056                 q--; s--;
8057                 if (((q -query) >=0) &&
8058                     (sum += MtrxScorePosSearch(search->sbp,
8059                                 (Int4) (q - query),*s)) > 0)
8060                 {
8061                         do {
8062                                 score += sum;
8063                                 q_best_left = q;
8064                                 q--; s--;
8065                         } while (((q -query) >= 0) &&
8066                            ((sum = MtrxScorePosSearch(search->sbp,
8067                                         (Int4) (q - query),*s)) > 0));
8068                 }
8069         } while (((q -query) >= 0) && (sum >= x));
8070 
8071 
8072         if (score > rightscore && rightsum > X && -rightscore > X)
8073         {
8074                 leftscore = score;
8075                 leftsum = sum;
8076                 q_left = q;
8077 
8078                 q = q_right;
8079                 s = search->subject->sequence + (q - query) + diag;
8080                 sum = rightsum;
8081 
8082 /* "score" is actually the "maxscore", if sum drops by "score", then the
8083 total new score is zero and the extension can stop. */
8084                 if ((x = -score) < X)
8085                         x = X;
8086 
8087                 do
8088                 {
8089                         q++; s++;
8090                         if ((sum += MtrxScorePosSearch(search->sbp,
8091                                         (Int4) (q - query),*s)) > 0)
8092                         {
8093                                 do {
8094                                         score += sum;
8095                                         q_best_right = q;
8096                                         q++; s++;
8097                                 } while ((sum = MtrxScorePosSearch(search->sbp,
8098                                         (Int4) (q - query),*s)) > 0);
8099                                 /* do this if score changes. */
8100                                 if ((x = -score) < X)
8101                                         x = X;
8102                         }
8103                 } while (sum >= x);
8104 
8105                 q_right = q;
8106         }
8107 
8108         /* Record how far this diagonal has been traversed,
8109         "q_right" was the last position on the query sequence.
8110         ewp_params->offset is added to provide the proper "zero-point" */       
8111         ewp->combo_array[real_diag].diag_level = q_right - query - q_off + word_width + s_off + search->ewp_params->offset;
8112 
8113         if (score >= pbp->cutoff_s2) /* Score is reportable */
8114         {
8115 
8116 #ifdef BLAST_COLLECT_STATS
8117                 if (search->prelim)
8118                         search->first_pass_good_extends++;
8119                 else
8120                         search->second_pass_good_extends++;
8121 #endif
8122                 s_left = search->subject->sequence + (q_best_left - query) + diag;
8123                 BlastSaveCurrentHsp(search, score, (q_best_left-query), (s_left-search->subject->sequence), (q_best_right-q_best_left+1), context);
8124         }
8125 
8126         return 0;
8127 }
8128 
8129 
8130 
8131 /* BlastWordExtend_prelim -- for timing purposes. */
8132 static Int2
8133 BlastWordExtend_prelim(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
8134 {
8135         BLAST_ExtendWordPtr     ewp;
8136         BLAST_ParameterBlkPtr   pbp;
8137         BLAST_ScoreBlkPtr       sbp;
8138         register Uint1Ptr       q, s, query;
8139         register Uint1Ptr       q_right, q_left, s_left, q_best_right, q_best_left;
8140         register BLAST_Score    score, sum;
8141         register BLAST_ScorePtr PNTR    matrix;
8142         register BLAST_Score    x, X;
8143         Uint4 query_num; /* AM: Support for query multiplexing. */
8144         
8145         
8146 
8147         q_best_left = NULL;     /* Gets rid of warning. */
8148         q_best_right = NULL;    /* Gets rid of warning. */
8149 
8150 #ifdef BLAST_COLLECT_STATS
8151         if (search->prelim)
8152                 search->first_pass_extends++;
8153         else
8154                 search->second_pass_extends++;
8155 #endif
8156 
8157         *succeed_to_right = FALSE;
8158 
8159         ewp=search->context[context].ewp;
8160 
8161         diag -= search->ewp_params->min_diag_length;
8162 
8163         sbp=search->sbp;
8164         pbp=search->pbp;
8165 
8166         query = search->context[context].query->sequence;
8167         q = query + q_off;
8168         s =  search->subject->sequence + s_off; 
8169 
8170         /* AM: Support for query multiplexing. */
8171         if( search->prog_number == blast_type_tblastn && search->mult_queries )
8172         {
8173           query_num = GetQueryNum( search->mult_queries, q_off - word_width + 1, 
8174                                    q_off + 1, 0 );
8175           X = search->mult_queries->dropoff_2nd_pass_array[query_num];
8176         }
8177         else X=pbp->X;
8178 
8179         matrix = sbp->matrix;
8180 
8181         score=0;
8182         sum = 0;
8183         q_left = q - word_width;
8184         q_right = q;
8185 
8186 /* Look for the highest scoring region in the initial word. */
8187         while (q > q_left)
8188         {
8189                 sum += matrix[*q][*s];
8190                 if (sum > score)
8191                 {
8192                         score = sum;
8193                         q_best_right = q_right;
8194                         q_best_left = q;
8195                 }
8196                 else if (sum <= 0)
8197                 {
8198                         sum = 0;
8199                         q_right = q-1;
8200                 }
8201                 q--; s--;
8202         }
8203 
8204         q = q_left = q_best_left;
8205         s = s_left = search->subject->sequence + (q_left - query) + diag;
8206 
8207         q_left--;
8208 
8209 /******************************************************************
8210 
8211 The extension procedure used here is to:
8212 
8213 1.) keep on extending as long as it increases the total score so far, record this
8214 maximum score and the corresponding extents as each new maximum score is reached.
8215 
8216 2.) if extending decreases the total score so far then keep on extending
8217 until the score has dropped by "X" from the last maximum score to explore
8218 whether it is only a local minima that has been encountered:
8219 
8220         a.) if the score drops by "X" from the last maximum score, then stop
8221         the extension and record the last maximum score as well as the 
8222         corresponding extents for query and subject.
8223 
8224         b.) if the score recovers again and becomes higher than the last maximum
8225         score, reset the maximum score so far as well as the corresponding
8226         query and subject offsets.
8227 
8228 
8229 3.) When the end of a sequence (either query or subject) is encountered record the last 
8230 maximum score as well as the corresponding extents.
8231         
8232 
8233 
8234 In the "while" loop below the maximum score is the variable "score" and "sum"
8235 is the change since the maximum score was last recorded (i.e., the variable
8236 "score" was modified).  
8237 
8238 Both x and X are negative and the outer "while" loops continues
8239 as long as sum is less negative than x.  Iterations of the "while" 
8240 loop with "sum" containing a negative value corresponds to 2.) above.
8241 
8242 The inner do-while loop is executed only as long as each extension
8243 increases the maximum score, corresponding to 1.) above.
8244 
8245 There is no explicit check for the end of a sequence here, but
8246 between sequences in the blast database there is a "sentinel"
8247 byte.  If this sentinel byte is encountered then matrix[*q][*s]
8248 will be much more negative than "X" so that the extension will
8249 stop.  This corresponds to 3.) above.
8250 
8251 *******************************************************************/
8252 
8253         sum = 0;
8254         x = X;
8255         while (sum > x)
8256         {
8257                 q--; s--;
8258                 if ((sum += matrix[*q][*s]) > 0)
8259                 {
8260                         do {
8261                                 score += sum;
8262                                 q--; s--;
8263                         } while ((sum = matrix[*q][*s]) > 0);
8264                         q_left = q;
8265                 }
8266         }
8267         /* Adjust for extra decrement in do-while loop above. */
8268         q_left++;
8269         s_left = search->subject->sequence + (q_left - query) + diag;
8270 
8271 
8272 /* Extend towards the right (for this preliminary run) if
8273 q_off - q_left is greater than the window. */
8274         if (((query+q_off)-q_left) >= ewp->actual_window)
8275         {
8276                 *succeed_to_right = TRUE;
8277                 q = q_right = q_best_right;
8278                 s = search->subject->sequence + (q - query) + diag;
8279                 sum = 0;
8280                 q_right++;  /* pre-increment in case while() loop doesn't run */
8281 
8282 /**************************************************************
8283 
8284 The extension to the right is performed in the same way as the extension
8285 to the left, except that the extension can stop if the score
8286 drops by X or becomes negative, in which case the last maximum score
8287 is recorded.
8288 
8289 *****************************************************************/
8290                 if ((x = -score) < X)
8291                         x = X;
8292                 while (sum > x)
8293                 {
8294                         q++; s++;
8295                         if ((sum += matrix[*q][*s]) > 0)
8296                         {
8297                                 do {
8298                                         score += sum;
8299                                         q++; s++;
8300                                 } while ((sum = matrix[*q][*s]) > 0);
8301                                 q_right = q;
8302                                 /* do this if score changes. */
8303                                 if ((x = -score) < X)
8304                                         x = X;
8305                         }
8306                 }
8307                 /* Adjust for extra increment in do-while loop above. */
8308                 q_right--;
8309         }
8310 
8311         /* Record how far this diagonal has been traversed,
8312         "q" was the last position on the query sequence.
8313         ewp->offset is added to provide the proper "zero-point" */      
8314         ewp->combo_array[real_diag].diag_level = q - query - q_off + word_width + s_off + search->ewp_params->offset;
8315 
8316         if (score >= pbp->cutoff_s2) /* Score is reportable */
8317         {
8318 
8319 #ifdef BLAST_COLLECT_STATS
8320                 if (search->prelim)
8321                         search->first_pass_good_extends++;
8322                 else
8323                         search->second_pass_good_extends++;
8324 #endif
8325 
8326                 BlastSaveCurrentHsp(search, score, (q_left-query), (s_left-search->subject->sequence), (q_right-q_left+1), context);
8327         }
8328 
8329         return 0;
8330 }
8331 
8332 /*AAS*/
8333 /* BlastWordExtend_prelim -- for timing purposes. */
8334 static Int2
8335 BlastNewWordExtend_prelim(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
8336 {
8337         BLAST_ExtendWordPtr     ewp;
8338         BLAST_ParameterBlkPtr   pbp;
8339         register Uint1Ptr       q, s, query;
8340         register Uint1Ptr       q_right, q_left, s_left, q_best_right, q_best_left;
8341         register BLAST_Score    score, sum;
8342         register BLAST_Score    x, X;
8343         
8344         
8345 
8346 #ifdef BLAST_COLLECT_STATS
8347         if (search->prelim)
8348                 search->first_pass_extends++;
8349         else
8350                 search->second_pass_extends++;
8351 #endif
8352 
8353         *succeed_to_right = FALSE;
8354 
8355         ewp=search->context[context].ewp;
8356 
8357         diag -= search->ewp_params->min_diag_length;
8358 
8359         pbp=search->pbp;
8360 
8361         query = search->context[context].query->sequence;
8362         q = query + q_off;
8363         s = search->subject->sequence + s_off; 
8364 
8365         X=pbp->X;
8366 
8367         score=0;
8368         sum = 0;
8369         q_left = q - word_width;
8370         q_right = q+1;
8371         q_best_left = q;
8372         q_best_right = q; /*AAS*/
8373 
8374 /* Look for the highest scoring region in the initial word. */
8375         while (q > q_left)
8376         {
8377                 sum += MtrxScorePosSearch(search->sbp,(Int4) (q - query),*s);
8378                 if (sum > score)
8379                 {
8380                         score = sum;
8381                         q_best_right = q_right;
8382                         q_best_left = q;
8383                 }
8384                 else if (sum <= 0)
8385                 {
8386                         sum = 0;
8387                         q_right = q;
8388                 }
8389                 q--; s--;
8390         }
8391 
8392         q = q_left = q_best_left;
8393         s = s_left = search->subject->sequence + (q_left - query) + diag;
8394 
8395         q_left--;
8396 
8397         sum = 0;
8398         x = X;
8399         while (((q - query) >= 0) && (sum > x))
8400         {
8401                 q--; s--;
8402                 if (((q - query) >= 0) && 
8403                     ((sum += MtrxScorePosSearch(search->sbp,
8404                                         (Int4) (q - query),*s)) > 0))
8405                 {
8406                         do {
8407                                 score += sum;
8408                                 q--; s--;
8409                         } while (((q -query) >= 0) &&
8410                                  ((sum = MtrxScorePosSearch(search->sbp,
8411                                         (Int4) ( q- query),*s)) > 0));
8412                         q_left = q;
8413                 }
8414         }
8415         /* Adjust for extra decrement in do-while loop above. */
8416         q_left++;
8417         s_left = search->subject->sequence + (q_left - query) + diag;
8418 
8419 /* Extend towards the right (for this preliminary run) if
8420 q_off - q_left is greater than the window. */
8421         if (((query+q_off)-q_left) >= ewp->actual_window)
8422         {
8423                 *succeed_to_right = TRUE;
8424                 q = q_right = q_best_right;
8425                 q--;
8426                 s = search->subject->sequence + (q - query) + diag;
8427                 sum = 0;
8428 
8429 /* "score" is actually the "maxscore", if sum drops by "score", then the
8430 total new score is zero and the extension can stop. */
8431                 if ((x = -score) < X)
8432                         x = X;
8433                 while (sum > x)
8434                 {
8435                         q++; s++;
8436                         if ((sum += MtrxScorePosSearch(search->sbp,
8437                                         (Int4) (q - query),*s)) > 0)
8438                         {
8439                                 do {
8440                                         score += sum;
8441                                         q++; s++;
8442                                 } while ((sum = MtrxScorePosSearch(search->sbp,
8443                                                 (Int4) (q - query),*s)) > 0);
8444                                 q_right = q;
8445                                 /* do this if score changes. */
8446                                 if ((x = -score) < X)
8447                                         x = X;
8448                         }
8449                 }
8450                 /* Adjust for extra increment in do-while loop above. */
8451                 q_right--;
8452         }
8453 
8454         /* Record how far this diagonal has been traversed,
8455         "q" was the last position on the query sequence.
8456         ewp->offset is added to provide the proper "zero-point" */      
8457         ewp->combo_array[real_diag].diag_level = q - query -q_off + word_width + s_off + search->ewp_params->offset;
8458 
8459         if (score >= pbp->cutoff_s2) /* Score is reportable */
8460         {
8461 
8462 #ifdef BLAST_COLLECT_STATS
8463                 if (search->prelim)
8464                         search->first_pass_good_extends++;
8465                 else
8466                         search->second_pass_good_extends++;
8467 #endif
8468 
8469                 BlastSaveCurrentHsp(search, score, (q_left-query), (s_left-search->subject->sequence), (q_right-q_left+1), context);
8470         }
8471 
8472         return 0;
8473 }
8474 
8475 
8476 /* Ungapped extension a blastn type word hit, to be used in Mega BLAST with
8477    discontiguous word models.  
8478 
8479         BlastSearchBlkPtr search: main BLAST structure,
8480         Int4 q_off: offset of query sequence,
8481         Int4 s_off: offset of subject sequence, divided by four!
8482         Return: true if ungapped score below cutoff (to indicate that this
8483                 HSP should be deleted.
8484 */
8485 
8486 Boolean
8487 BlastNtWordUngappedExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, 
8488                           Int4 cutoff)
8489 {
8490         register Uint1Ptr       q;
8491         register BLAST_ScorePtr PNTR    matrix;
8492         register BLAST_Score    sum, score;
8493         Uint1   ch;
8494         Uint1Ptr query0, subject0, sf, q_beg, q_end, s_beg, s_end, s, start;
8495         BLAST_Score     X;
8496         Int2            remainder, base;
8497         BLAST_ParameterBlkPtr   pbp;
8498         BLAST_ScoreBlkPtr       sbp;
8499         Int4 q_avail, s_avail;
8500 
8501         base = 3 - (s_off % 4);
8502 
8503         sbp=search->sbp;
8504         pbp=search->pbp;
8505 
8506         matrix = sbp->matrix;
8507         matrix = sbp->matrix;
8508         query0 = (Uint1Ptr) search->context[search->first_context].query->sequence;
8509         subject0 = (Uint1Ptr) search->subject->sequence;
8510         q_avail = search->context[search->first_context].query->length - q_off;
8511         s_avail = search->subject->length - s_off;
8512 
8513         q = q_beg = q_end = query0 + q_off;
8514         s = s_beg = s_end = subject0 + s_off/READDB_COMPRESSION_RATIO;
8515         if (q_off < s_off) {
8516            start = (Uint1Ptr) search->subject->sequence + 
8517               (s_off-q_off)/READDB_COMPRESSION_RATIO;
8518            remainder = 3 - ((s_off-q_off)%READDB_COMPRESSION_RATIO);
8519         } else {
8520            start = (Uint1Ptr) search->subject->sequence;
8521            remainder = 3;
8522         }
8523 
8524         /* Find where positive scoring starts & ends within the word hit */
8525         score = 0;
8526         sum = 0;
8527 
8528         X = pbp->X;
8529 
8530         /* extend to the left */
8531         do {
8532            if (base == 3) {
8533               s--;
8534               base = 0;
8535            } else
8536               base++;
8537            ch = *s;
8538            if ((sum += matrix[*--q][READDB_UNPACK_BASE_N(ch, base)]) > 0) {
8539               q_beg = q;
8540               score += sum;
8541               sum = 0;
8542            } else if (sum < X)
8543               break;
8544         } while ((s > start) || (s == start && base <= remainder));
8545 
8546         if (score >= cutoff) 
8547            return FALSE;
8548 
8549         if (q_avail < s_avail) {
8550            sf = subject0 + (s_off + q_avail)/READDB_COMPRESSION_RATIO;
8551            remainder = 3 - ((s_off + q_avail)%READDB_COMPRESSION_RATIO);
8552         } else {
8553            sf = subject0 + (search->subject->length)/READDB_COMPRESSION_RATIO;
8554            remainder = 3 - ((search->subject->length)%READDB_COMPRESSION_RATIO);
8555         }
8556         /* extend to the right */
8557         q = q_end;
8558         s = s_end;
8559         sum = 0;
8560         base = 3 - (s_off % 4);
8561 
8562         while (s < sf || (s == sf && base >= remainder)) {
8563            ch = *s;
8564            if ((sum += matrix[*q++][READDB_UNPACK_BASE_N(ch, base)]) > 0) {
8565               q_end = q;
8566               score += sum;
8567               sum = 0;
8568            } else if (sum < X)
8569               break;
8570            if (base == 0) {
8571               base = 3;
8572               s++;
8573            } else
8574               base--;
8575         }
8576         
8577         return (score < cutoff);
8578 }
8579 
8580 /* Extend a blastn type word hit.  
8581 
8582         BlastSearchBlkPtr search: main BLAST structure,
8583         Int4 q_off: offset of query sequence,
8584         Int4 s_off: offset of subject sequence, divided by four!
8585         BLAST_Diag real_diag: diagonal,
8586         Int2 context: must be 0 (plus strand) or 1 (minus strand).
8587 */
8588 Int2
8589 BlastNtWordExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, BLAST_Diag real_diag, Int2 context)
8590 {
8591         register Uint1Ptr       q;
8592         register BLAST_ScorePtr PNTR    matrix;
8593         register BLAST_Score    sum, score;
8594         Uint1   ch;
8595         Uint1Ptr query0, subject0, sf, q_beg, q_end, s_beg, s_end, s, start;
8596         BLAST_Score     X;
8597         Int2            remainder;
8598         BLAST_ExtendWordPtr     ewp;
8599         BLAST_ParameterBlkPtr   pbp;
8600         BLAST_ScoreBlkPtr       sbp;
8601         Int4 q_avail, s_avail;
8602 
8603 #ifdef BLAST_COLLECT_STATS
8604         search->second_pass_extends++;
8605 #endif
8606         ewp=search->context[context].ewp;
8607 
8608         sbp=search->sbp;
8609         pbp=search->pbp;
8610 
8611         matrix = sbp->matrix;
8612         matrix = sbp->matrix;
8613         query0 = (Uint1Ptr) search->context[context].query->sequence;
8614         subject0 = (Uint1Ptr) search->subject->sequence;
8615         q_avail = search->context[context].query->length - q_off;
8616         s_avail = search->subject->length - s_off*READDB_COMPRESSION_RATIO;
8617         if (q_avail < s_avail)
8618         {
8619                 sf = subject0 + s_off + q_avail/READDB_COMPRESSION_RATIO;
8620                 remainder = q_avail%READDB_COMPRESSION_RATIO;
8621         }
8622         else
8623         {
8624                 sf = subject0 + (search->subject->length)/READDB_COMPRESSION_RATIO;
8625                 remainder = (search->subject->length)%READDB_COMPRESSION_RATIO;
8626         }
8627 
8628         q = q_beg = q_end = query0 + q_off;
8629         s = s_beg = s_end = subject0 + s_off;
8630         if (q_off < s_off*READDB_COMPRESSION_RATIO)
8631         {
8632                 start = (Uint1Ptr) search->subject->sequence + (s_off-q_off/READDB_COMPRESSION_RATIO);
8633         }
8634         else
8635         {
8636                 start = (Uint1Ptr) search->subject->sequence;
8637         }
8638 
8639         /* Find where positive scoring starts & ends within the word hit */
8640         score = sum = 0;
8641 
8642         X = pbp->X;
8643 
8644         /* extend to the left */
8645         do {
8646                 s--;
8647                 ch = *s;
8648                 if ((sum += matrix[*--q][READDB_UNPACK_BASE_4(ch)]) > 0) {
8649                         q_beg = q;
8650                         score += sum;
8651                         sum = 0;
8652                 }
8653                 else
8654             if (sum < X)
8655                                 break;
8656                 if ((sum += matrix[*--q][READDB_UNPACK_BASE_3(ch)]) > 0) {
8657                         q_beg = q;
8658                         score += sum;
8659                         sum = 0;
8660                 }
8661                 else
8662             if (sum < X)
8663                                 break;
8664                 if ((sum += matrix[*--q][READDB_UNPACK_BASE_2(ch)]) > 0) {
8665                         q_beg = q;
8666                         score += sum;
8667                         sum = 0;
8668                 }
8669                 else
8670             if (sum < X)
8671                                 break;
8672                 if ((sum += matrix[*--q][READDB_UNPACK_BASE_1(ch)]) > 0) {
8673                         q_beg = q;
8674                         score += sum;
8675                         sum = 0;
8676                 }
8677                 else
8678             if (sum < X)
8679                                 break;
8680         } while (s > start);
8681 
8682         /* There is still another partial byte to be extended through. */
8683     if (sum >= X && start != (Uint1Ptr) search->subject->sequence)
8684         {
8685                 s--;
8686                 ch = *s;
8687                 while (q > query0)
8688                 {
8689                         if ((sum += matrix[*--q][READDB_UNPACK_BASE_4(ch)]) > 0) 
8690                         {
8691                                 q_beg = q;
8692                                 score += sum;
8693                                 sum = 0;
8694                         }
8695             else if (sum < X)
8696                         {
8697                                 break;
8698                         }
8699                         ch >>= 2;
8700                 }
8701         }
8702 
8703         /* extend to the right */
8704         q = q_end;
8705         s = s_end;
8706         sum = 0;
8707         while (s < sf) 
8708         {
8709                 ch = *s;
8710                 if ((sum += matrix[*q++][READDB_UNPACK_BASE_1(ch)]) > 0) 
8711                 {
8712                         q_end = q;
8713                         score += sum;
8714                         sum = 0;
8715                 }
8716                 else if (sum < X)
8717                 {
8718                                 break;
8719                 }
8720 
8721                 if ((sum += matrix[*q++][READDB_UNPACK_BASE_2(ch)]) > 0) 
8722                 {
8723                         q_end = q;
8724                         score += sum;
8725                         sum = 0;
8726                 }
8727                 else if (sum < X)
8728                 {
8729                                 break;
8730                 }
8731 
8732                 if ((sum += matrix[*q++][READDB_UNPACK_BASE_3(ch)]) > 0) 
8733                 {
8734                         q_end = q;
8735                         score += sum;
8736                         sum = 0;
8737                 }
8738                 else if (sum < X)
8739                 {
8740                                 break;
8741                 }
8742 
8743                 if ((sum += matrix[*q++][READDB_UNPACK_BASE_4(ch)]) > 0) 
8744                 {
8745                         q_end = q;
8746                         score += sum;
8747                         sum = 0;
8748                 }
8749                 else if (sum < X)
8750                 {
8751                                 break;
8752                 }
8753                 s++;
8754         }
8755 
8756         /* extend into the final, partially packed byte (if one exists) */
8757 /* If the query ends before the subject, then don't extend any more as the query 
8758 has no remainder. */
8759         if (remainder > 0 && sum >= X)
8760         {
8761                 ch = *sf;
8762 
8763                 while (remainder > 0)
8764                 {
8765                         if ((sum += matrix[*q++][READDB_UNPACK_BASE_1(ch)]) > 0) 
8766                         {
8767                                 q_end = q;
8768                                 score += sum;
8769                                 sum = 0;
8770                         }
8771                         else if (sum < X)
8772                         {
8773                                         break;
8774                         }
8775 #ifdef OLD_BYTE_ORDER
8776                         ch >>= 2;
8777 #else
8778                         ch <<= 2;
8779 #endif
8780                         remainder--;
8781                 }
8782         } /* End ungapped alignment */
8783 
8784         /* Record how far this diagonal has been traversed */
8785         /*      ewp->combo_array[real_diag].diag_level = q_end - query0 + search->ewp_params->offset; */
8786         ewp->combo_array[real_diag].diag_level = (q_end - query0 - q_off) + s_off*READDB_COMPRESSION_RATIO + search->ewp_params->offset;
8787 
8788 
8789 
8790     if (score >= pbp->cutoff_s2) /* Score is reportable */
8791     {
8792 #ifdef BLAST_COLLECT_STATS
8793         search->second_pass_good_extends++;
8794 #endif
8795         if(search->pbp->gapped_calculation)
8796             BlastNtSaveCurrentHsp(search, score, (q_beg-query0),
8797                 (q_beg-query0+READDB_COMPRESSION_RATIO*s_off-q_off),
8798                 (q_end-q_beg), context,
8799                 q_off - 5, READDB_COMPRESSION_RATIO*s_off - 5);
8800         else
8801             BlastSaveCurrentHsp(search, score,
8802                 (q_beg-query0),
8803                 (q_beg-query0+READDB_COMPRESSION_RATIO*s_off-q_off),
8804                 (q_end-q_beg), context);
8805     }
8806 
8807         return 0;
8808 }
8809 
8810 /*
8811         search_nt_orig -- an adaptation of the original search_nt() function
8812         of BLASTN
8813 
8814         * Can this ever be called?
8815         *  - It is only called for blastn, only from BlastWordFinder_mh().
8816         *  - BlastWordFinder_mh() is only called if BlastExtendWordSearch() is called w/ multiphe_hits==TRUE
8817         *  - BlastExtendWordSearch() is called in 2 places:
8818         *       BLASTPerform2PassSearch(called w/ multiple_hits=TRUE)
8819         *         which is called from BLASTPerfromSearch if search->pbp->two_pass_method
8820         *       BLASTPerformFinalSearch(called w/ search->pbp->multiple_hits_only)
8821         *  * so multiple_hits_only, or two_pass_method must be set for this to be called.
8822         *  For blastn, these are set to false in blastool, blastutl. 
8823         *  These can be set to TRUE in blastpgp.c, but can blastn alse be true in this case?
8824         *
8825         *  I have updated the array accesses to use the new mod_lt[], as the other WordFinder routines
8826         *  now do, but I have not been able to test this change.</