NCBI C Toolkit Cross Reference

C/tools/blastutl.c


  1 static char const rcsid[] = "$Id: blastutl.c,v 6.471 2007/05/08 19:03:33 kans Exp $";
  2 
  3 /* ===========================================================================
  4 *
  5 *                            PUBLIC DOMAIN NOTICE
  6 *               National Center for Biotechnology Information
  7 *
  8 *  This software/database is a "United States Government Work" under the
  9 *  terms of the United States Copyright Act.  It was written as part of
 10 *  the author's official duties as a United States Government employee and
 11 *  thus cannot be copyrighted.  This software/database is freely available
 12 *  to the public for use. The National Library of Medicine and the U.S.
 13 *  Government have not placed any restriction on its use or reproduction.
 14 *
 15 *  Although all reasonable efforts have been taken to ensure the accuracy
 16 *  and reliability of the software and data, the NLM and the U.S.
 17 *  Government do not and cannot warrant the performance or results that
 18 *  may be obtained by using this software or data. The NLM and the U.S.
 19 *  Government disclaim all warranties, express or implied, including
 20 *  warranties of performance, merchantability or fitness for any particular
 21 *  purpose.
 22 *
 23 *  Please cite the author in any work or product based on this material.
 24 *
 25 * ===========================================================================*/
 26 
 27 /*****************************************************************************
 28 
 29 File name: blastutl.c
 30 
 31 Author: Tom Madden
 32 
 33 Contents: Utilities for BLAST
 34 
 35 $Revision: 6.471 $
 36 
 37 ******************************************************************************/
 38 /*
 39  *
 40 * $Log: blastutl.c,v $
 41 * Revision 6.471  2007/05/08 19:03:33  kans
 42 * in FilterWithSeg added SeqDataPtr and ByteStorePtr casts for seq_data
 43 *
 44 * Revision 6.470  2007/03/13 20:40:24  madden
 45 *   - In s_ComputeAverageLength, compute the floating point value retval
 46 *     using floating point division.
 47 *
 48 *   - In BioseqBlastEngineCore, call blast_set_paramters for rounds > 1
 49 *     of PSI-BLAST.
 50 *
 51 *   - In GetDbSubjRatio, use floating point operations to compute the
 52 *     floating point value db_subj_ratio.
 53 *   [from Mike Gertz]
 54 *
 55 * Revision 6.469  2007/03/05 14:51:24  camacho
 56 * - Make s_ComputeAverageLength static.
 57 *
 58 * Revision 6.468  2007/01/23 15:25:44  madden
 59 * Use SeqLocDustEx rather than SeqLocDust
 60 *
 61 * Revision 6.467  2007/01/17 15:46:00  madden
 62 * remove FilterDNA
 63 *
 64 * Revision 6.466  2006/08/10 17:34:38  merezhuk
 65 * Fix for reading -z advanced option by StringToInt8; RT # 15187990
 66 *
 67 * Revision 6.465  2006/02/15 18:23:47  madden
 68 * Made changes so that CheckStartForGappedAlignment by default
 69 * checks ungapped alignments of length 11, rather than length 10.
 70 * Made changes to the rules used when the starting point is close to
 71 * the edge of the preliminary gapped alignment.
 72 * (from Mike Gertz)
 73 *
 74 * Revision 6.464  2005/12/01 15:10:23  madden
 75 * Gave BLASTCheckHSPInclusion external linkage (i.e. removed the static specifier).
 76 *
 77 * Revision 6.463  2005/10/13 15:59:06  camacho
 78 * Add code to fix cutoff scores in PSI-BLAST.
 79 *
 80 * Revision 6.462  2005/07/28 14:57:09  coulouri
 81 * remove dead code
 82 *
 83 * Revision 6.461  2005/07/27 15:51:54  coulouri
 84 * remove unused queue_callback
 85 *
 86 * Revision 6.460  2005/05/02 16:03:14  coulouri
 87 * refactor code to set db_chunk_size
 88 *
 89 * Revision 6.459  2005/04/25 14:16:36  coulouri
 90 * set db_chunk_size adaptively
 91 *
 92 * Revision 6.458  2005/04/04 20:44:27  camacho
 93 * Do not overwrite the effective search space in Pssm2Sequences if specified in the options structure
 94 *
 95 * Revision 6.457  2005/02/07 15:30:08  dondosha
 96 * Removed restriction on the value of longest intron option
 97 *
 98 * Revision 6.456  2005/01/24 20:37:37  camacho
 99 * Added conditional compilation to structs need for BLAST_CLUSTER_HITS
100 *
101 * Revision 6.455  2005/01/18 14:54:13  camacho
102 * Change in tie-breakers for score comparison, suggestion by Mike Gertz
103 *
104 * Revision 6.454  2004/12/20 15:22:16  camacho
105 * Calculate kbp_ideal values rather than loading them from pre-computed values
106 *
107 * Revision 6.453  2004/12/01 17:24:15  coulouri
108 * do not dereference null pointer
109 *
110 * Revision 6.452  2004/11/22 16:10:11  dondosha
111 * Minor fix to make sure that "evalue" score type is always used when hsp is not part of a linked set
112 *
113 * Revision 6.451  2004/11/04 15:51:55  bealer
114 * - bl2seq should use dblen as average length if database is not available.
115 *
116 * Revision 6.450  2004/11/01 14:07:56  madden
117 * From Mike Gertz:
118 *
119 *    - In query_offset_compare_hsp and query_end_compare_hsp, use the
120 *      subject query/offset as a tie-breaker.  Without this tie-breaker
121 *      CheckGappedAlignmentsForOverlap won't work properly.
122 *
123 *    - In CheckGappedAlignmentsForOverlap check that hsp_array, rather
124 *      than *hsp_array, is not nil.
125 *
126 *    - In BlastSaveCurrentHsp, rewrote the binary search to use
127 *      score_compare_hsps, so that the answers are consistent with the
128 *      heap code used in the algo/blast/core code.
129 *
130 *    - In BlastGappedScoreInternal delete gapped extensions that don't
131 *      reach the cutoff score (cutoff_s1).
132 *
133 * Revision 6.449  2004/10/25 18:36:17  papadopo
134 * From Michael Gertz: remove unneeded decrement of alignment offsets in BlastNtSaveCurrentHsp
135 *
136 * Revision 6.448  2004/10/19 19:42:17  dondosha
137 * Optimized algorithm in BlastPruneSeqAlignByGiList to make it up to 25 times faster; Added new function BlastPruneSeqAlignBySortedGiList
138 *
139 * Revision 6.447  2004/10/18 13:02:41  madden
140 * Changes from Mike Gertz:
141 *         - In score_compare_hsps, query_offset_compare_hsp and
142 *           query_end_compare_hsp, change the comparison tests so that
143 *           nil HSPs are less than any non-nil HSP.  Previously, these
144 *           comparison functions would return 0 if either HSP was nil,
145 *           which would result in sort routines terminating before the
146 *           non-nil HSPs in the list were fully sorted.
147 *
148 *         - In score_compare_hsps, copied the set of tie-breakers from
149 *           the corresponding routine in algo/blast/core/blast_hits.c.
150 *
151 *         - In RealBlastGetGappedAlignmentTraceback, the HSP list must
152 *           be sorted before BLASTCheckHSPInclusion is invoked.
153 *
154 * Revision 6.446  2004/09/28 16:05:32  papadopo
155 * From Michael Gertz: In BlastGappedScoreInternal, changed a
156 * reference to the sumscore field of an HSP to a reference to the
157 * xsum field of an HSP.
158 *
159 * Revision 6.445  2004/08/23 17:05:42  papadopo
160 * From Michael Gertz: make CopyResultHspToHSP public
161 *
162 * Revision 6.444  2004/08/16 19:37:26  dondosha
163 * Enabled uneven gap HSP linking for blastx
164 *
165 * Revision 6.443  2004/08/05 21:52:28  camacho
166 * Gracefully handle inability to calculate ungapped lambda for PSSM in psiblast2sequences
167 *
168 * Revision 6.442  2004/07/24 18:55:29  camacho
169 * Fix to GetSequenceWithDenseSeg when sequence cannot be found
170 *
171 * Revision 6.441  2004/07/19 17:05:36  papadopo
172 * specify (unused) 'output-to-scoremat' parameter
173 *
174 * Revision 6.440  2004/06/30 12:29:39  madden
175 * Moved some functions to blfmtutl.c
176 *
177 * Revision 6.439  2004/06/22 14:16:55  camacho
178 * Changed invocation of posFreqsToMatrix to conform with new signature
179 *
180 * Revision 6.438  2004/06/01 20:34:06  dondosha
181 * Fix in previous change; memory leak fix
182 *
183 * Revision 6.437  2004/05/27 17:36:24  dondosha
184 * Minor fix for previous 2 changes
185 *
186 * Revision 6.436  2004/05/25 21:42:47  dondosha
187 * Fix in previous change: in some cases edit block should not be freed when BLAST_HSP is freed
188 *
189 * Revision 6.435  2004/05/21 13:53:04  dondosha
190 * Use BLAST_HSPFree to free BLAST_HSP structures, hence no need to call GapXEditBlockDelete in multiple places
191 *
192 * Revision 6.434  2004/04/22 16:40:32  dondosha
193 * Set search->subject_id to correct ordinal id, needed for finding splice junctions in HSP links at traceback stage
194 *
195 * Revision 6.433  2004/03/22 22:10:38  dondosha
196 * Use kbp_gap instead of kbp pointers in megablast traceback
197 *
198 * Revision 6.432  2004/02/26 15:52:30  papadopo
199 * Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
200 *
201 * Revision 6.431  2004/02/04 15:35:03  camacho
202 * Rollback to fix problems in release 2.2.7
203 *
204 * Revision 6.429  2004/01/30 16:54:45  dondosha
205 * Check if HSP needs to be deleted after reevaluation with ambiguities, after greedy traceback
206 *
207 * Revision 6.428  2004/01/28 16:54:03  dondosha
208 * Restored the code that shifts subject coordinates for blastn traceback with long subject sequences
209 *
210 * Revision 6.427  2004/01/25 05:06:21  dondosha
211 * Translate only relevant parts of long subject sequences for tblastn traceback
212 *
213 * Revision 6.426  2004/01/16 23:43:44  dondosha
214 * No more need for special argument for partial search: it is set in options
215 *
216 * Revision 6.425  2004/01/14 17:01:06  dondosha
217 * Gapped alignment is position based only if posMatrix exists
218 *
219 * Revision 6.424  2004/01/09 18:13:24  dondosha
220 * In [Get,Check]StartForGappedAlignment: if posMatrix not available, use square matrix for calculations
221 *
222 * Revision 6.423  2004/01/06 22:37:40  dondosha
223 * Use BLAST_HSPfree function; in particular fixes a bug with wrong memory being freed
224 *
225 * Revision 6.422  2003/12/11 23:46:28  dondosha
226 * Correction in setting hit ranges after repeats filtering
227 *
228 * Revision 6.421  2003/12/10 17:05:28  dondosha
229 * Added function ReevaluateScoreWithAmbiguities to reevaluate score for one HSP; use it after greedy traceback
230 *
231 * Revision 6.420  2003/11/24 22:06:41  madden
232 * Tblastn optimization, only fetch part of sequence needed
233 *
234 * Revision 6.419  2003/10/30 18:37:19  dondosha
235 * Fix for megablast with non-greedy traceback
236 *
237 * Revision 6.418  2003/10/29 17:46:59  dondosha
238 * Allow 2-stage greedy extension in megablast
239 *
240 * Revision 6.417  2003/08/20 22:14:08  dondosha
241 * Little correction in call to OOFBlastHSPGetNumIdentical
242 *
243 * Revision 6.416  2003/08/04 16:19:16  dondosha
244 * Added effective HSP length (length adjustment) to other returns, so it can be reported in XML output
245 *
246 * Revision 6.415  2003/05/30 17:25:36  coulouri
247 * add rcsid
248 *
249 * Revision 6.414  2003/05/23 22:12:11  camacho
250 * Fix memory leak in PsiBlast2Sequences
251 *
252 * Revision 6.413  2003/04/22 21:52:13  dondosha
253 * Added function OOFBlastHSPGetNumIdentical
254 *
255 * Revision 6.412  2003/04/10 19:21:16  dondosha
256 * Memory leak fix for megablast with limited number of HSPs per hit
257 *
258 * Revision 6.411  2003/03/24 19:42:14  madden
259 * Changes to support query concatenation for blastn and tblastn
260 *
261 * Revision 6.410  2003/03/11 14:33:48  madden
262 * Sort HSPs after array is no longer reallocated
263 *
264 * Revision 6.409  2003/02/21 02:52:16  madden
265 * Ensure stable sorting in score_compare_hsp (change from Morgulis)
266 *
267 * Revision 6.408  2003/01/24 22:26:03  camacho
268 * RPSInit is deprecated, use RPSInitEx instead
269 *
270 * Revision 6.407  2002/12/09 17:22:16  dondosha
271 * When alignment jumps beyond a strand boundary, keep the part of it where initial word is
272 *
273 * Revision 6.406  2002/12/04 23:32:50  camacho
274 * Do not set use_this_gi with nucleotide dbs (redundant)
275 *
276 * Revision 6.405  2002/12/04 18:42:22  camacho
277 * Minor change to previous commit
278 *
279 * Revision 6.404  2002/12/04 18:38:58  camacho
280 * Use correct effective search space in B2SPssmMultipleQueries
281 *
282 * Revision 6.403  2002/12/04 17:08:33  camacho
283 * Minor change to B2SPssmCleanUpSearch
284 *
285 * Revision 6.402  2002/11/27 15:41:51  dondosha
286 * Added -t, -g and -n megablast options to parse_blast_options
287 *
288 * Revision 6.401  2002/11/26 23:02:07  madden
289 * Add w option to parse_blast_options (OOF for blastx)
290 *
291 * Revision 6.400  2002/11/25 19:57:30  dondosha
292 * Further fix to the HSP limit (-H) megablast option
293 *
294 * Revision 6.399  2002/11/22 23:31:43  dondosha
295 * 1. Use array of structures instead of array of pointers for initial offset pairs;
296 * 2. Sort the HSP array when maximal number of HSPs is reached for a sequence
297 *
298 * Revision 6.398  2002/11/13 23:23:53  dondosha
299 * Correction for getting number of identities in tblastn
300 *
301 * Revision 6.397  2002/11/07 22:25:34  dondosha
302 * Correction in calculating number of identities for very long database sequences
303 *
304 * Revision 6.396  2002/11/04 23:00:54  dondosha
305 * Calculate number of identities while computing the traceback, and save it in the seqalign
306 *
307 * Revision 6.395  2002/10/22 21:03:42  camacho
308 * Calculate the effective search space correctly for rpsblast in BlastOtherReturnsPrepare
309 *
310 * Revision 6.394  2002/10/22 17:57:48  camacho
311 * Changes to B2SPssmMultipleQueries
312 *
313 * Revision 6.393  2002/10/22 15:28:45  kans
314 * SeqAlignCompare takes LIBCALLBACK
315 *
316 * Revision 6.392  2002/10/21 23:13:36  camacho
317 * Added B2SPssmOnTheFly functions
318 *
319 * Revision 6.391  2002/10/18 15:08:28  dondosha
320 * Correction in SaveCurrentHsp functions when maximal number of HSPs is reached
321 *
322 * Revision 6.390  2002/10/17 14:33:12  dondosha
323 * Correction for the maximal number of HSPs option
324 *
325 * Revision 6.389  2002/09/19 22:22:18  camacho
326 * Sanity checks in BlastTwoSequencesByLocWithCallback
327 *
328 * Revision 6.388  2002/09/16 15:54:59  camacho
329 * Turn off RedoAlignmentCore from psi-bl2seq
330 *
331 * Revision 6.387  2002/09/13 20:05:43  camacho
332 * Set the dbseq_num to 1 in BlastTwoSequencesByLocWithCallback
333 *
334 * Revision 6.386  2002/09/11 20:46:25  camacho
335 * Removed deprecated BlastSeqIdListPtr code
336 *
337 * Revision 6.385  2002/09/03 14:22:45  camacho
338 * Changes to pacify mac compiler
339 *
340 * Revision 6.384  2002/09/02 21:54:41  camacho
341 * Correction to previous revision
342 *
343 * Revision 6.383  2002/09/02 20:44:56  camacho
344 * Allow pssm rescaling if scalingFactor is non-zero
345 *
346 * Revision 6.382  2002/08/30 15:42:49  dondosha
347 * In blastn, use ewp structure only for the first context
348 *
349 * Revision 6.381  2002/08/29 19:22:20  camacho
350 * Save karlinK parameter when rescaling pssm
351 *
352 * Revision 6.380  2002/08/29 16:23:42  camacho
353 * Removed debugging code
354 *
355 * Revision 6.379  2002/08/29 15:49:56  camacho
356 * Added matrix rescaling code for psi-blast2sequences
357 *
358 * Revision 6.378  2002/08/26 16:55:52  madden
359 * Fix for scaling with translated searches
360 *
361 * Revision 6.376  2002/08/05 20:07:37  dondosha
362 * Correction for bl2seq with megablast option: convert gap info to seqalign after search
363 *
364 * Revision 6.375  2002/08/02 21:49:56  vakatov
365 * + LIBCALL
366 *
367 * Revision 6.374  2002/08/01 21:33:12  madden
368 * Do not put p-value and small_gap into SeqAlign
369 *
370 * Revision 6.373  2002/08/01 20:45:34  dondosha
371 * Changed prototype of the BLASTPostSearchLogic function to make it
372 * more convenient
373 *
374 * Revision 6.372  2002/07/18 19:40:45  dondosha
375 * Added an option to restrict number of HSPs per database sequence
376 *
377 * Revision 6.371  2002/07/11 22:31:54  camacho
378 * Added sanity check to BlastTwoSequencesByLocWithCallback with PSSM
379 *
380 * Revision 6.370  2002/07/02 17:08:01  dondosha
381 * Reverse previous change - not needed
382 *
383 * Revision 6.369  2002/07/02 01:41:31  dondosha
384 * Typo fix
385 *
386 * Revision 6.368  2002/07/02 01:36:40  dondosha
387 * For megablast use larger window in CheckStartForGappedAlignment
388 *
389 * Revision 6.367  2002/06/21 21:43:01  camacho
390 * Removed obsolete BlastSeqIdList structure and functions
391 *
392 * Revision 6.366  2002/06/13 16:51:41  madden
393 * BlastTwoSequencesCore and BlastTwoSequencesCoreEx return status instead of SearchBlk
394 *
395 * Revision 6.365  2002/06/12 20:34:50  coulouri
396 * Don't dereference possibly NULL pointer
397 *
398 * Revision 6.364  2002/06/11 20:40:05  dondosha
399 * Correction to previous change
400 *
401 * Revision 6.363  2002/06/11 14:44:46  dondosha
402 * Return status from some functions instead of search block pointer
403 *
404 * Revision 6.362  2002/05/31 16:06:20  kans
405 * changed MemSet (..., NULL, ...) to MemSet (..., 0, ...) for Mac compiler
406 *
407 * Revision 6.361  2002/05/29 17:14:49  dondosha
408 * Check whether an id found by SeqIdFindBest is indeed a gi
409 *
410 * Revision 6.360  2002/05/28 22:00:12  camacho
411 * *** empty log message ***
412 *
413 * Revision 6.359  2002/05/13 13:51:32  dondosha
414 * Made two functions public
415 *
416 * Revision 6.358  2002/05/08 22:51:11  dondosha
417 * Do the starting positions check for final gapped alignment in Mega BLAST case as well
418 *
419 * Revision 6.357  2002/04/23 20:41:21  dondosha
420 * In case of non-affine extension in megablast, check percent identity cutoff after the traceback is obtained
421 *
422 * Revision 6.356  2002/04/19 17:26:07  madden
423 * Fix for last update
424 *
425 * Revision 6.355  2002/04/18 20:16:52  madden
426 * Fix problem with FUM for SeqLoc
427 *
428 * Revision 6.354  2002/04/17 20:42:23  madden
429 * Fix typo for mask1
430 *
431 * Revision 6.353  2002/04/04 21:19:15  dondosha
432 * Corrections for megablast with non-greedy extensions
433 *
434 * Revision 6.352  2002/03/28 18:51:39  madden
435 * All threads get access to (query) masking seqloc, merge overlapping segments for seg
436 *
437 * Revision 6.351  2002/03/26 23:18:00  dondosha
438 * Duplicate mb_endpoint_results structure on all threads
439 *
440 * Revision 6.350  2002/03/26 16:49:33  madden
441 * Use scaled up/down Lambda
442 *
443 * Revision 6.349  2002/03/14 16:11:40  camacho
444 * Extended BlastTwoSequences to allow comparison between sequence and PSSM
445 *
446 * Revision 6.348  2002/03/05 17:58:56  dondosha
447 * Set same offsets for the traceback as for preliminary extension for megablast with non-greedy extensions
448 *
449 * Revision 6.347  2002/02/15 23:36:22  dondosha
450 * Correction for megablast with non-greedy extensions
451 *
452 * Revision 6.346  2002/01/11 20:14:28  madden
453 * Put the use_this_gi into the SeqAlign
454 *
455 * Revision 6.345  2002/01/07 23:16:00  dondosha
456 * Fixed several memory leaks and allocation/freeing bugs in multithreaded megablast
457 *
458 * Revision 6.344  2001/12/28 20:38:40  dondosha
459 * Moved Mega BLAST related parameters into a separate structure
460 *
461 * Revision 6.343  2001/12/13 16:06:54  dondosha
462 * Use separate mb_endpoint_results list for each of multiple threads
463 *
464 * Revision 6.342  2001/11/26 20:19:25  madden
465 * Add call to BLASTOptionValidateEx to BlastTwoSequencesWithCallback
466 *
467 * Revision 6.341  2001/11/16 15:44:26  dondosha
468 * In BlastPruneSeqAlignByGiList: retrieve bioseq only if seqid in seqalign is not a gi
469 *
470 * Revision 6.340  2001/11/14 00:31:44  camacho
471 * Updated BlastGetAllowedGis and BlastGetFirstGiofSubset functions
472 * to return the correct seqid's when dealing with the new database
473 * format and mask (subset) databases.
474 *
475 * Revision 6.339  2001/11/13 18:20:33  dondosha
476 * Use GapxEditScript structure instead of edit_script_t in higher level function calls
477 *
478 * Revision 6.338  2001/10/12 16:10:07  dondosha
479 * 1. Made BLASTResultFreeHsp public
480 * 2. Added BioseqBlastEngineCoreEx with partial search option
481 *
482 * Revision 6.337  2001/10/05 18:10:29  madden
483 * Add threshold_second to parse_blast_options
484 *
485 * Revision 6.336  2001/09/19 17:24:17  kans
486 * removed extra parameter from BioseqMegaBlastEngineCore
487 *
488 * Revision 6.335  2001/09/07 14:46:43  dondosha
489 * Roll back removal of threshold_first from functions and structures
490 *
491 * Revision 6.334  2001/09/06 20:24:33  dondosha
492 * Removed threshold_first
493 *
494 * Revision 6.333  2001/07/27 20:04:09  dondosha
495 * Small correction in passing effective db length for two sequences engine
496 *
497 * Revision 6.332  2001/07/26 18:19:03  dondosha
498 * Added a few more letter options in parse_blast_options
499 *
500 * Revision 6.331  2001/07/20 18:55:58  dondosha
501 * 1. Use effective db length option in 2 sequences engine
502 * 2. Create diagonal array for megablast when needed
503 *
504 * Revision 6.330  2001/07/09 14:17:24  madden
505 * Fix PC-lint complaints from R. Williams
506 *
507 * Revision 6.329  2001/07/09 13:12:03  madden
508 * Removed unused variables
509 *
510 * Revision 6.328  2001/06/25 18:30:24  madden
511 * Add define for NLM_GENERATED_CODE_PROTO to get prototypes in fdlobj.h
512 *
513 * Revision 6.327  2001/06/25 16:03:31  madden
514 * Comment out CheckGappedAlignmentsForOverlap
515 *
516 * Revision 6.326  2001/06/12 19:48:55  madden
517 * Introduce total_hsp_limit, check before making SeqAlign
518 *
519 * Revision 6.325  2001/06/04 21:29:42  dondosha
520 * Add message about deleted hits with e-value below the low threshold
521 *
522 * Revision 6.324  2001/05/07 13:18:24  madden
523 * Fix to really remove deleted HSPs from (culling) heap
524 *
525 * Revision 6.323  2001/05/04 19:50:45  dondosha
526 * Improved error message when all queries are shorter than word size
527 *
528 * Revision 6.322  2001/05/03 21:48:28  dondosha
529 * Handle some cases when memory allocation fails
530 *
531 * Revision 6.321  2001/04/16 21:28:11  dondosha
532 * Added function BlastPruneSeqAlignByEvalueRange
533 *
534 * Revision 6.320  2001/04/12 21:34:50  dondosha
535 * Added function BlastPruneSeqAlignByGiList
536 *
537 * Revision 6.319  2001/04/12 17:17:15  madden
538 * Fixes core-dump for small query
539 *
540 * Revision 6.318  2001/04/12 15:01:25  madden
541 * change repeat filtering db
542 *
543 * Revision 6.317  2001/04/11 20:56:06  madden
544 * Added scalingFactor for rpsblast
545 *
546 * Revision 6.316  2001/04/11 18:22:13  dondosha
547 * Copy query_slp in BlastSearchBlkDuplicate for all programs
548 *
549 * Revision 6.315  2001/04/03 21:59:49  dondosha
550 * Implemented tabulated output for non-megablast bl2seq
551 *
552 * Revision 6.314  2001/03/28 21:05:23  dondosha
553 * Set dbinfo->is_protein in other returns
554 *
555 * Revision 6.313  2001/03/27 21:27:01  madden
556 * Minor efficiency in how lookup table is made
557 *
558 * Revision 6.312  2001/03/27 21:13:56  dondosha
559 * Do not print error if OID list exists without CommonIndex
560 *
561 * Revision 6.311  2001/03/27 20:35:10  dondosha
562 * Small bug fix
563 *
564 * Revision 6.310  2001/03/26 15:03:25  madden
565 * Fix number warnings and two bugs found by PC compiler
566 *
567 * Revision 6.309  2001/03/21 15:46:32  dondosha
568 * Added missing parentheses in previous change
569 *
570 * Revision 6.308  2001/03/20 20:06:13  dondosha
571 * Added protection from crossing strand boundary for blastn
572 *
573 * Revision 6.307  2001/03/19 18:51:39  madden
574 * HitRangeToSeqLoc returns values appropriate for subsequences
575 *
576 * Revision 6.306  2001/03/12 14:53:46  dondosha
577 * Uninitialized variable corrections
578 *
579 * Revision 6.305  2001/03/08 22:05:48  dondosha
580 * Split very long database sequences in all BLAST programs
581 *
582 * Revision 6.304  2001/02/16 18:45:39  dondosha
583 * Fixed minor purify errors
584 *
585 * Revision 6.303  2001/02/08 20:41:16  dondosha
586 * Implemented tabulated output for all translated programs
587 *
588 * Revision 6.302  2001/02/07 21:12:05  dondosha
589 * 1. Added Blast Engine functions with callback argument
590 * 2. Pass output stream from options block to search
591 *
592 * Revision 6.301  2001/01/29 22:23:00  madden
593 * Do not recreate hsp_array
594 *
595 * Revision 6.300  2001/01/26 17:43:09  madden
596 * Comment out unneeded memset
597 *
598 * Revision 6.299  2001/01/23 20:25:43  dondosha
599 * 1. Renamed BlastParceInputString to BlastParseInputString
600 * 2. Recognize a double quoted string as an option value in
601 *    BlastParseInputString
602 *
603 * Revision 6.298  2001/01/23 18:23:57  madden
604 * Fix memory leak
605 *
606 * Revision 6.297  2001/01/19 16:49:37  madden
607 * Added helper array to BlastNtGappedScoreInternal
608 *
609 * Revision 6.296  2001/01/16 23:16:51  dondosha
610 * Added 2 arguments and several options to parse_blast_options
611 *
612 * Revision 6.295  2001/01/16 20:32:46  kans
613 * included simutil.h to suppress Mac error
614 *
615 * Revision 6.294  2001/01/12 17:10:04  dondosha
616 * If subject SeqLoc is on a single strand and query on both, swap the strands
617 *
618 * Revision 6.293  2001/01/11 18:34:20  dondosha
619 * Changed error level for nonexistent database from ERROR to FATAL
620 *
621 * Revision 6.292  2001/01/09 20:16:27  dondosha
622 * Implemented from-to location options for both sequences in bl2seq
623 *
624 * Revision 6.291  2001/01/05 17:12:48  dondosha
625 * Correction in previous memory leak fix
626 *
627 * Revision 6.290  2001/01/04 15:01:25  dondosha
628 * Fix for tblastx in blast two sequences engine
629 *
630 * Revision 6.289  2001/01/03 21:45:30  dondosha
631 * Fixed a memory leak - some edit blocks not freed in megablast
632 *
633 * Revision 6.288  2000/12/28 18:23:05  madden
634 * Add -P and -A to parse_blast_options
635 *
636 * Revision 6.287  2000/12/19 15:52:47  dondosha
637 * Forbid reversing query and subject for two sequences megablast
638 *
639 * Revision 6.286  2000/12/19 14:52:59  dondosha
640 * Previous change wrong
641 *
642 * Revision 6.285  2000/12/15 15:38:38  dondosha
643 * Call AdjustOffSetsInSeqAlign with correct query and subject SeqLocs
644 *
645 * Revision 6.284  2000/12/15 14:25:41  madden
646 * Optimization to BlastTranslateUnambiguousSequence
647 *
648 * Revision 6.283  2000/12/15 14:23:34  madden
649 * Use readdb_get_sequence_ex to get sequence faster
650 *
651 * Revision 6.282  2000/12/13 22:26:44  dondosha
652 * Free the ncbi4na-encoded subject sequence after search in two sequences megablast engine
653 *
654 * Revision 6.281  2000/12/13 13:51:35  madden
655 * Free SeqLocPtr in BlastSequencesOnTheFly
656 *
657 * Revision 6.280  2000/12/07 17:46:56  dondosha
658 * Call AdjustOffSetsInSeqAlign for for megablast too
659 *
660 * Revision 6.279  2000/12/04 18:51:24  madden
661 * Fix memory leaks
662 *
663 * Revision 6.278  2000/11/29 23:05:00  dondosha
664 * Keep ncbi4na-encoded subject sequence in search->subject for megablast
665 *
666 * Revision 6.277  2000/11/16 19:15:31  dondosha
667 * Pass back endpoint results in other_returus for Mega BLAST with no traceback
668 *
669 * Revision 6.276  2000/11/09 17:28:35  dondosha
670 * Set block_width to 0 for Mega BLAST in BlastTwoSequences engine
671 *
672 * Revision 6.275  2000/11/08 22:21:33  dondosha
673 * Enabled new tblastn by adding a longest_intron option
674 *
675 * Revision 6.274  2000/11/08 20:20:31  dondosha
676 * Do not free subject in BlastTwoSequencesCore for new tblastn - done elsewhere
677 *
678 * Revision 6.273  2000/11/07 16:30:27  madden
679 * Introduce intermediate score (before linking of HSPs) for blastx and tblastn
680 *
681 * Revision 6.272  2000/11/03 20:15:19  dondosha
682 * Pass the subject sequence to new_link_hsps from two sequences engine
683 *
684 * Revision 6.271  2000/11/02 20:15:38  dondosha
685 * Added functions BlastTwoSequencesByLocWithCallback and BlastTwoSequencesWithCallback
686 *
687 * Revision 6.270  2000/11/02 16:36:12  madden
688 * Fixed another minor problem from merge
689 *
690 * Revision 6.269  2000/11/02 16:12:37  madden
691 * fix Errors during merge of code
692 *
693 * Revision 6.268  2000/11/01 16:25:57  madden
694 * Changes from Futamura for psitblastn
695 *
696 * Revision 6.267  2000/10/31 17:51:44  dondosha
697 * Copy the necessary search block data for multi-threaded megablast
698 *
699 * Revision 6.266  2000/10/23 22:17:54  shavirin
700 * Added creation of "no database found" message in case if database is
701 * not found.
702 *
703 * Revision 6.265  2000/10/18 19:46:29  dondosha
704 * Fixed bug in BlastTwoSequencesCore for partial subject sequence search
705 *
706 * Revision 6.264  2000/10/16 19:34:16  shavirin
707 * Added possibility to run RPS Blast search from function BioseqBlastEngineByLocEx().
708 *
709 * Revision 6.263  2000/10/13 17:32:50  shavirin
710 * Adjusted calls to readdb_get_header for ASN.1 structured deflines.
711 *
712 * Revision 6.262  2000/10/13 16:05:44  shavirin
713 * Fixed minir bug with reporting database name.
714 *
715 * Revision 6.261  2000/10/12 14:45:34  madden
716 * Break out of loop if hsp is freed
717 *
718 * Revision 6.260  2000/10/11 17:14:02  dondosha
719 * For tblastn traceback convert subject sequence to ncbi4na encoding in BlastTwoSequencesCore
720 *
721 * Revision 6.259  2000/10/10 16:11:15  shavirin
722 * Added check for NULL in the function BLASTCheckHSPInclusion().
723 *
724 * Revision 6.258  2000/10/06 19:32:02  shavirin
725 * Added call to SeqMgrAddToBioseqIndex() for created fake Bioseq.
726 *
727 * Revision 6.257  2000/10/05 22:43:10  dondosha
728 * Use mb_result_struct for Mega BLAST results in two sequences functions
729 *
730 * Revision 6.256  2000/10/05 19:57:08  dondosha
731 * In Mega BLAST, results are saved in and freed from mb_result_struct, not result_struct
732 *
733 * Revision 6.255  2000/10/03 21:28:54  shavirin
734 * Added check for search->pbp for not NULL in BlastSearchBlkDestruct().
735 *
736 * Revision 6.254  2000/09/29 21:14:47  shavirin
737 * Added additional check for inclusion of HSPs after traceback for
738 * OOF gapped alignment case.
739 *
740 * Revision 6.253  2000/09/28 14:57:50  dondosha
741 * Initialize exact match array for megablast in BlastHitListNew
742 *
743 * Revision 6.252  2000/09/25 15:43:36  madden
744 * Fix for rpsblast, too high expect values getting through
745 *
746 * Revision 6.251  2000/09/14 15:05:46  dondosha
747 * For new tblastn, reset evalues to individual ones before relinking HSPs
748 *
749 * Revision 6.250  2000/09/07 13:41:42  madden
750 * Fix if first start is -1 in DenseSeg
751 *
752 * Revision 6.249  2000/09/01 18:29:12  dondosha
753 * Removed calls to ReadDBFreeSharedInfo and ReadDBCloseMHdrAndSeqFiles
754 *
755 * Revision 6.248  2000/08/31 18:37:21  shavirin
756 * Added check for NULL in BlastMakeCopyQueryDNAP().
757 *
758 * Revision 6.247  2000/08/31 16:55:17  shavirin
759 * Fixed problem with OOF alignment of negative starnd HSPs.
760 *
761 * Revision 6.246  2000/08/28 21:53:12  shavirin
762 * Added function BlastOtherReturnsFree(). Cleaned memory in case of
763 * tweak_parameters = TRUE. (Freed SeqAlign calculated before RedoAlignmentCore.
764 *
765 * Revision 6.245  2000/08/22 20:02:27  dondosha
766 * Previous change not quite right: use real subject length for all programs
767 *
768 * Revision 6.244  2000/08/22 19:42:25  dondosha
769 * Divide search->subject->length by 3 for tblastn in RealBlastGetGappedAlignmentTraceback
770 *
771 * Revision 6.243  2000/08/18 21:27:59  madden
772 * undo change 6.240 when smith_waterman is not set, the extra alignment is needed when only tweak_parameters is set
773 *
774 * Revision 6.242  2000/08/18 20:12:29  dondosha
775 * Do not use search->query_id in megablast, use only qid_array
776 *
777 * Revision 6.241  2000/08/08 21:43:35  shavirin
778 * Initialized GapAlignBlkPtr for the value of discontinuous parametrers.
779 *
780 * Revision 6.240  2000/08/03 22:25:36  shavirin
781 * Removed redundant gapped Traceback in case when tweak_parameters or
782 * smith_waterman is set.
783 *
784 * Revision 6.239  2000/07/31 23:08:13  dondosha
785 * Do not go over the end of the HSP in subject sequence when computing start for gapped alignment
786 *
787 * Revision 6.238  2000/07/25 18:12:03  shavirin
788 * WARNING: This is no-turning-back changed related to S&W Blast from
789 * Alejandro Schaffer
790 *
791 * Revision 6.237  2000/07/25 16:54:26  shavirin
792 * Corrected functions initializing gap_align in case of OOF gapping.
793 *
794 * Revision 6.236  2000/07/18 22:33:02  shavirin
795 * Adjusted start for gapped alignment in OOF case.
796 *
797 * Revision 6.235  2000/07/17 14:26:08  shavirin
798 * Added support for Out of frame gapping.
799 *
800 * Revision 6.234  2000/07/13 18:33:28  madden
801 * Fix for exploded hits with pdb
802 *
803 * Revision 6.233  2000/07/11 18:38:02  madden
804 * decreased size of helper array, added prefetch to BlastGappedScoreInternal
805 *
806 * Revision 6.232  2000/07/10 15:23:30  dondosha
807 * Moved check query_invalid from BlastTwoSequencesCoreEx to BlastTwoSequencesCore
808 *
809 * Revision 6.231  2000/07/10 15:06:23  madden
810 * Use helper array in BlastGappedScoreInternal to reduce cache misses
811 *
812 * Revision 6.230  2000/06/30 17:52:44  madden
813 * Move AWAKE_THR_MIN_SIZE to blastdef.h
814 *
815 * Revision 6.229  2000/06/29 21:27:02  dondosha
816 * Fixed memory leaks in culling by similarity
817 *
818 * Revision 6.228  2000/06/29 19:19:39  madden
819 * Fix minus strand offset in BlastConvertDNASeqLoc
820 *
821 * Revision 6.227  2000/06/26 20:15:34  shavirin
822 * Fixed coordinates transfer in the function BlastConvertDNASeqLoc().
823 *
824 * Revision 6.226  2000/06/23 20:17:42  madden
825 * Optimization for CheckGappedAlignmentsForOverlap (remove n-squared hsp check)
826 *
827 * Revision 6.225  2000/06/23 15:22:43  madden
828 * Fix problem with removing translated hits with different frames
829 *
830 * Revision 6.224  2000/06/21 18:02:25  dondosha
831 * In BlastSaveCurrentHspGapped no need to allocate new memory for hsp_array
832 *
833 * Revision 6.223  2000/06/21 15:10:27  madden
834 * efficiency in BlastGappedScoreInternal
835 *
836 * Revision 6.222  2000/06/21 12:53:22  madden
837 * Do each frame separately in CheckGappedScoreInternal for efficiency
838 *
839 * Revision 6.221  2000/06/20 16:45:36  dondosha
840 * Fixed a minor bug in revision 6.219
841 *
842 * Revision 6.220  2000/06/19 20:07:19  madden
843 * Skip transferring sequence to blastna format
844 *
845 * Revision 6.219  2000/06/19 19:16:19  dondosha
846 * Optimized reallocation of hsp array when it is overflowing
847 *
848 * Revision 6.218  2000/06/15 15:31:26  dondosha
849 * Added two sequences BLAST functions returning SearchBlk instead of SeqAlign;added code to cluster hits and keep only one hit per cluster - disabled so far; enabled two sequences BLAST for tblastn
850 *
851 * Revision 6.217  2000/06/13 20:54:38  shavirin
852 * Added return of EFF_SEARCH_SPACE in the function BlastOtherReturnsPrepare
853 *
854 * Revision 6.216  2000/06/08 20:34:15  madden
855 * add explode_seqids option to show all ids in a defline
856 *
857 * Revision 6.215  2000/05/24 20:53:48  dondosha
858 * Fixed a bug in previous change
859 *
860 * Revision 6.214  2000/05/24 19:49:07  dondosha
861 * Create qid_array for the new search in BlastSearchDuplicate, if megablast
862 *
863 * Revision 6.213  2000/05/22 19:49:35  dondosha
864 * Initialize vnp to NULL in BlastSeqLocFilterEx
865 *
866 * Revision 6.212  2000/05/16 20:00:02  madden
867 * fix for formatting db names
868 *
869 * Revision 6.211  2000/05/12 19:41:54  dondosha
870 * Free qid_array in BlastSearchBlkDestruct
871 *
872 * Revision 6.210  2000/05/05 20:10:22  madden
873 * Add vecscreen filtering capability
874 *
875 * Revision 6.209  2000/04/29 18:55:53  wheelan
876 * temporary fix for BlastTwoSequences NULL return problem
877 *
878 * Revision 6.208  2000/04/28 16:52:31  madden
879 * Fix for ungapped search of subset databases
880 *
881 * Revision 6.207  2000/04/10 17:26:28  madden
882 * Add BLASTResultFreeHsp to free memory as it is no longer needed
883 *
884 * Revision 6.206  2000/04/10 15:24:49  dondosha
885 * Enabled use of MegaBlast for BlastTwoSequences
886 *
887 * Revision 6.205  2000/04/07 16:57:45  shavirin
888 * Transfered queue parameters in BlastSearchBlkDuplicate() function.
889 *
890 * Revision 6.204  2000/04/06 17:33:57  madden
891 * Check if pointer is NULL in BlastGetAllowedGis
892 *
893 * Revision 6.203  2000/04/03 21:23:18  dondosha
894 * Do not construct ewp_params and ewp for MegaBlast search
895 *
896 * Revision 6.202  2000/04/03 20:05:27  madden
897 * Free lh_helper on tmp_hitlist, fixes leak
898 *
899 * Revision 6.201  2000/03/31 19:11:06  dondosha
900 * Changed some names related to MegaBlast
901 *
902 * Revision 6.200  2000/03/31 16:45:43  dondosha
903 * Enabled blastx for BlastTwoSequences search
904 *
905 * Revision 6.199  2000/03/30 21:44:22  madden
906 * Add BLASTResultHitlistFreeEx that checks Heap integrity
907 *
908 * Revision 6.198  2000/03/29 22:18:02  dondosha
909 * Moved adjustment of offsets in blastn to BlastSaveCurrentHitlist, added gap info processing for MegaBlast
910 *
911 * Revision 6.197  2000/03/22 17:58:54  dondosha
912 * Duplicate entire list of query_ids in BlastSearchBlkDuplicate
913 *
914 * Revision 6.196  2000/03/08 20:34:30  madden
915 * Add BlastGetFirstGiofSubset, BlastGetAllowedGis returns primary SeqId
916 *
917 * Revision 6.195  2000/03/03 18:15:52  dondosha
918 * Fixed bugs and memory leaks in MegaBlast related code
919 *
920 * Revision 6.194  2000/03/03 17:58:23  shavirin
921 * Added new function BlastConvertDNASeqLoc()
922 *
923 * Revision 6.193  2000/03/01 14:37:45  dondosha
924 * Adjust query offsets after search for all 3 versions of blastn
925 *
926 * Revision 6.192  2000/02/29 18:06:07  dondosha
927 * In case of MegaBlast save correct query ids in seqaligns
928 *
929 * Revision 6.191  2000/02/24 23:21:27  dondosha
930 * Adjust context offsets before gapped alignment to avoid strand crossover
931 *
932 * Revision 6.190  2000/02/23 20:51:05  dondosha
933 * Modifications for blastn to concatenate strands - handling of query offsets
934 *
935 * Revision 6.189  2000/02/17 21:23:10  shavirin
936 * Added parameter is_rps_blast.
937 *
938 * Revision 6.188  2000/02/17 19:02:09  shavirin
939 * Removed all references to absolete theCacheSize variable.
940 *
941 * Revision 6.187  2000/02/17 18:30:56  shavirin
942 * Added translated DNA filtering for RPS Blast
943 *
944 * Revision 6.186  2000/02/17 14:38:27  madden
945 * Duplicate filter_string for multiple threads
946 *
947 * Revision 6.185  2000/02/16 21:49:16  shavirin
948 * Fixed some memory leaks.
949 *
950 * Revision 6.184  2000/02/15 19:16:26  shavirin
951 * MemFree(pbp->filter_string) in BlastSearchBlkDestruct
952 *
953 * Revision 6.183  2000/02/14 16:15:50  madden
954 * Revert to 6.179
955 *
956 * Revision 6.182  2000/02/11 22:03:03  shavirin
957 * Returned back previous change.
958 *
959 * Revision 6.181  2000/02/11 21:25:58  shavirin
960 * Removed call to BlastLinkHsps() function for tblastn program.
961 *
962 * Revision 6.180  2000/02/11 20:45:54  dondosha
963 * Adjust the second strand offsets after blastn search
964 *
965 * Revision 6.179  2000/02/11 16:40:53  egorov
966 * The parse_blast_options is made public.
967 *
968 * Revision 6.178  2000/02/04 22:31:38  kans
969 * test subject_bsp for NULL before dereferencing in BlastTwoSequencesByLocEx
970 *
971 * Revision 6.177  2000/02/04 16:13:15  shavirin
972 * Returned changes done in Revision 6.172.
973 *
974 * Revision 6.176  2000/02/02 18:22:05  madden
975 * Free memory for LinkHelpStruct
976 *
977 * Revision 6.175  2000/02/01 22:13:26  dondosha
978 * Added code related to greedy basic gapped alignment
979 *
980 * Revision 6.174  2000/01/28 16:45:53  madden
981 * HitRangeToSeqLoc called with combine TRUE
982 *
983 * Revision 6.173  2000/01/26 22:01:56  madden
984 * Add function BlastGetProgramName
985 *
986 * Revision 6.172  2000/01/14 18:28:11  shavirin
987 * Some WordExtention* function mad external.
988 *
989 * Revision 6.171  2000/01/12 21:46:19  dondosha
990 * Minor memory leak clean-up (routine BlastSeqLocFilterEx)
991 *
992 * Revision 6.170  2000/01/12 18:54:44  madden
993 * Do not free bestid to fix problem
994 *
995 * Revision 6.169  2000/01/11 17:12:51  shavirin
996 * Added handling of the new parameter theCacheSize.
997 *
998 * Revision 6.168  2000/01/11 15:32:47  dondosha
999 * Fixed memory leaks in opening shared header and sequence file memory maps
1000 *
1001 * Revision 6.167  2000/01/04 21:56:59  madden
1002 * Add NULLB to both ends of db sequence before gap extend, use dynamic buffer for blast options in repeat filtering
1003 *
1004 * Revision 6.166  2000/01/03 17:38:33  shavirin
1005 * Added check for rdfp in BlastGetAllowedGis() function.
1006 *
1007 * Revision 6.165  1999/12/31 14:23:20  egorov
1008 * Add support for using mixture of real and maks database with gi-list files:
1009 * 1. Change logic of creating rdfp list.
1010 * 2. BlastGetDbChunk gets real databases first, then masks.
1011 * 3. Propoper calculation of database sizes using alias files.
1012 * 4. Change to CommonIndex to support using of mask databases.
1013 * 5. Use correct gis in formated output (BlastGetAllowedGis()).
1014 * 6. Other small changes
1015 *
1016 * Revision 6.164  1999/12/22 22:00:35  dondosha
1017 * Destruct the header and sequence memory maps separately before destructing the search structure
1018 *
1019 * Revision 6.163  1999/12/22 21:08:36  shavirin
1020 * Rewritten function BlastNewFindWords() added function BlastNewFindWordsEx()
1021 *
1022 * Revision 6.160  1999/12/21 20:02:45  egorov
1023 * Fix memory leak.
1024 *
1025 * Revision 6.159  1999/12/17 22:22:57  madden
1026 * New masking parameters from Wojtek
1027 *
1028 * Revision 6.158  1999/12/16 19:08:36  egorov
1029 * Check rdfp for NULL before using.  Bug reported by Patrick and Sergei Sh.
1030 *
1031 * Revision 6.157  1999/12/15 17:42:26  egorov
1032 * Change BlastGetAllowedGis() to handle gi's belonged to a database alias.
1033 *
1034 * Revision 6.156  1999/12/13 21:53:02  madden
1035 * Some fixes for repeat masking
1036 *
1037 * Revision 6.155  1999/11/26 22:11:26  madden
1038 * Added BlastNT functions for nucl. extensions
1039 *
1040 * Revision 6.154  1999/11/24 15:21:38  egorov
1041 * Avoid GCC warning
1042 *
1043 * Revision 6.153  1999/11/09 14:14:12  madden
1044 * Start alive thread for masking only if query is above min size
1045 *
1046 * Revision 6.152  1999/11/02 15:32:36  madden
1047 * Allow setting of repeat filtering options and database
1048 *
1049 * Revision 6.151  1999/11/01 20:18:22  egorov
1050 * New format of filter_string
1051 *
1052 * Revision 6.150  1999/10/27 21:33:02  madden
1053 * Use housekeeping threads only for larger sequences
1054 *
1055 * Revision 6.149  1999/10/18 20:06:52  shavirin
1056 * evalue_compare_hits() : In case of equal scores and E-values order
1057 * will be determined by subject id
1058 *
1059 * Revision 6.148  1999/10/18 16:15:04  egorov
1060 * Bug fixed
1061 *
1062 * Revision 6.147  1999/10/15 20:52:10  shavirin
1063 * Fixed bug with seq_id_list initialization
1064 *
1065 * Revision 6.146  1999/10/12 21:50:47  shavirin
1066 * Added intialization of db_chunk_size in BlastThrInfoNew().
1067 *
1068 * Revision 6.145  1999/10/05 17:42:55  shavirin
1069 * Removed global variables from blast.c
1070 *
1071 * Revision 6.144  1999/10/01 18:26:56  madden
1072 * Check for search->rdfp before search->rdfp->oidlist
1073 *
1074 * Revision 6.143  1999/09/28 20:14:33  madden
1075 * Joerg changes to mimize cache misses
1076 *
1077 * Revision 6.142  1999/09/22 20:58:49  egorov
1078 * OID list change
1079 *
1080 * Revision 6.141  1999/09/16 16:55:12  madden
1081 * Changes for long words in blastn
1082 *
1083 * Revision 6.140  1999/09/03 17:23:25  madden
1084 * Fixed bug in CheckStartForGappedAlignment
1085 *
1086 * Revision 6.139  1999/09/01 19:21:06  shavirin
1087 * Added propagation of the score for discontinuous alignment in
1088 * functions: RealBlastGetGappedAlignmentTraceback() and BioseqBlastEngineCore()
1089 *
1090 * Revision 6.138  1999/08/27 18:07:34  shavirin
1091 * Passed parameter decline_align from top to the engine.
1092 *
1093 * Revision 6.137  1999/08/20 20:54:12  madden
1094 * place sentinel byte at beginning of nt sequence for ALIGN
1095 *
1096 * Revision 6.136  1999/08/20 19:48:13  madden
1097 * Changed call to BlastSearchBlkNew(Extra), removed use of version array
1098 *
1099 * Revision 6.135  1999/08/20 16:35:25  shavirin
1100 * Added protection against invalid program name in BlastGetTypes().
1101 *
1102 * Revision 6.134  1999/08/06 18:53:57  madden
1103 * Added calls to lookup_position_aux_destruct
1104 *
1105 * Revision 6.133  1999/08/05 19:01:29  madden
1106 * Add check for NULL search or invalid query in BlastTwoSequencesCore
1107 *
1108 * Revision 6.132  1999/07/01 13:03:24  sicotte
1109 * Updated for DenseDiag and Moved seqalign_reverse_strand from blastutl.c(blast.h) to SeqAlignListReverseStrand in salpedit.ch and fixed call in salutil.c
1110 *
1111 * Revision 6.131  1999/06/24 17:24:12  madden
1112 * Fix bug in GetSeqAlignCount when SeqAlignPtr is NULL
1113 *
1114 * Revision 6.130  1999/06/18 21:17:58  madden
1115 * Check that an exact match gives a positive value when making words for blast2seqs
1116 *
1117 * Revision 6.129  1999/06/14 15:20:26  madden
1118 * Produce temporary BLAST_HitList to fix blastx core-dump
1119 *
1120 * Revision 6.128  1999/05/27 17:33:05  madden
1121 * Fixed Int2 (should have been Int4) problem
1122 *
1123 * Revision 6.127  1999/05/25 13:37:49  madden
1124 * Make smallest float 1.0e-180
1125 *
1126 * Revision 6.126  1999/05/19 12:44:00  madden
1127 * Change in longest_db_seq for multiple db search
1128 *
1129 * Revision 6.125  1999/05/13 13:48:11  madden
1130 * Only filter out hits if on same strand
1131 *
1132 * Revision 6.124  1999/04/15 13:24:35  madden
1133 * Fix for sum stats problems
1134 *
1135 * Revision 6.123  1999/04/13 19:16:47  madden
1136 * Check that two HSPs are on same strand before deleting one
1137 *
1138 * Revision 6.122  1999/04/12 20:24:54  egorov
1139 * Fix MT problem
1140 *
1141 * Revision 6.121  1999/04/01 21:42:46  madden
1142 * Fix memory leaks when gi list is used
1143 *
1144 * Revision 6.120  1999/04/01 14:18:58  madden
1145 * Fixed memory leaks with gi_list
1146 *
1147 * Revision 6.119  1999/03/31 15:46:52  madden
1148 * Removed unused code and variables
1149 *
1150 * Revision 6.118  1999/03/17 13:21:06  madden
1151 * Fix comment in comment problem
1152 *
1153 * Revision 6.117  1999/03/16 19:27:36  egorov
1154 * More type castings
1155 *
1156 * Revision 6.116  1999/03/12 17:19:59  egorov
1157 * More type casting fixes
1158 *
1159 * Revision 6.115  1999/03/12 15:03:45  egorov
1160 * Add proper Int4-long type casting
1161 *
1162 * Revision 6.114  1999/03/04 14:18:09  egorov
1163 * Do correct filter masking when query is seqloc
1164 * The only BlastMaskTheResidues() function is changed:
1165 *
1166 * Revision 6.113  1999/02/22 21:59:05  madden
1167 * binary search in GetAllowedGis function
1168 *
1169 * Revision 6.112  1999/02/22 17:32:46  madden
1170 * Fix memory leak
1171 *
1172 * Revision 6.111  1999/02/18 21:18:23  madden
1173 * Optimization
1174 *
1175 * Revision 6.110  1999/02/17 13:23:01  madden
1176 * Added hsp_num_max
1177 *
1178 * Revision 6.109  1999/02/11 13:53:46  madden
1179 * Added combine Boolean to HitRangeToSeqLoc, fixed mem leak
1180 *
1181 * Revision 6.108  1999/01/28 17:20:57  madden
1182 * Check do_sum_stats for linking, Int2 to Int4, UMR
1183 *
1184 * Revision 6.107  1999/01/28 16:05:49  madden
1185 * HspArrayPurge change, HSPs saved more efficiently
1186 *
1187 * Revision 6.106  1999/01/26 18:27:23  madden
1188 * handle delta sequences correctly
1189 *
1190 * Revision 6.105  1999/01/26 17:59:26  madden
1191 * ContextToFrame no longer static
1192 *
1193 * Revision 6.104  1999/01/25 21:31:25  madden
1194 * Check for illegal chars when nucl. query is translated
1195 *
1196 * Revision 6.103  1999/01/25 19:04:37  madden
1197 * prevent core-dump when query is empty
1198 *
1199 * Revision 6.102  1999/01/20 21:05:33  madden
1200 * Look for repeats on both strands
1201 *
1202 * Revision 6.101  1999/01/19 13:29:24  madden
1203 * Change to HspArrayPurge
1204 *
1205  * Revision 6.100  1998/12/31 18:17:08  madden
1206  * Added strand option
1207  *
1208  * Revision 6.99  1998/12/31 15:36:07  victorov
1209  * filtering internals is now based on SeqLoc instead of Bioseq
1210  *
1211  * Revision 6.98  1998/12/18 16:20:18  madden
1212  * efficiencies
1213  *
1214  * Revision 6.97  1998/12/15 14:11:29  madden
1215  * Change to permit an arbitrary number of HSPs
1216  *
1217  * Revision 6.96  1998/11/30 15:58:20  madden
1218  * Added CheckStartForGappedAlignment
1219  *
1220  * Revision 6.95  1998/11/27 15:24:12  madden
1221  * Duplicated handle_results and query_id if SearchBlk duplicated
1222  *
1223  * Revision 6.94  1998/11/16 17:39:23  kans
1224  * added FALSE for new paramter to FilterCC
1225  *
1226  * Revision 6.93  1998/11/06 14:13:01  madden
1227  * Added call to AdjustOffSetsInSeqAlign in BioseqBlastEngineByLocEx
1228  *
1229  * Revision 6.92  1998/10/21 13:44:16  madden
1230  * Fixed UMR found by purify
1231  *
1232  * Revision 6.91  1998/10/20 19:57:21  madden
1233  * Run dust if filtering is selected for nt
1234  *
1235  * Revision 6.90  1998/10/13 20:37:53  madden
1236  * Use IS_residue after call to SeqPortGetResidue
1237  *
1238  * Revision 6.89  1998/09/24 15:26:38  egorov
1239  * Fix lint complaints
1240  *
1241  * Revision 6.88  1998/09/16 19:00:16  madden
1242  * Added subset Boolean
1243  *
1244  * Revision 6.87  1998/09/15 13:12:29  madden
1245  * Fixed memory leak
1246  *
1247  * Revision 6.86  1998/09/14 15:11:18  egorov
1248  * Add support for Int8 length databases; remove unused variables
1249  *
1250  * Revision 6.85  1998/09/04 20:48:48  madden
1251  * typo fix (= instead of ==)
1252  *
1253  * Revision 6.84  1998/09/03 20:23:42  madden
1254  * Copied seq_ext and seq_ext_type in MakeFakeBioseq
1255  *
1256  * Revision 6.83  1998/09/03 19:41:09  madden
1257  * do not switch sequences for Blast2Sequences if filtering is performed
1258  *
1259  * Revision 6.82  1998/08/24 14:59:59  madden
1260  * readdb_get_sequence_ex function
1261  *
1262  * Revision 6.81  1998/07/30 19:00:56  madden
1263  * Fix memory leak
1264  *
1265  * Revision 6.80  1998/07/29 21:29:45  madden
1266  * Fixed UMR with longest_db_seq that showed up in Blast 2 sequences
1267  *
1268  * Revision 6.79  1998/07/28 21:18:35  madden
1269  * Change to BLAST_ExtendWordParamsNew saves memory
1270  *
1271  * Revision 6.78  1998/07/24 14:58:53  madden
1272  * Jinqhuis call to SeqLocRevCmp put back
1273  *
1274  * Revision 6.77  1998/07/22 20:31:51  madden
1275  * Replaced cutvalue of 1000000 with INT4_MAX
1276  *
1277  * Revision 6.76  1998/07/22 12:17:03  madden
1278  * Added BioseqHitRange call for repeat filtering
1279  *
1280  * Revision 6.75  1998/07/21 20:58:10  madden
1281  * Changes to allow masking at hash only
1282  *
1283  * Revision 6.74  1998/07/20 15:51:28  zjing
1284  * add a check for plus-minus before SeqLocRevCmp
1285  *
1286  * Revision 6.73  1998/07/17 15:39:59  madden
1287  * Changes for Effective search space.
1288  *
1289  * Revision 6.72  1998/07/14 21:31:43  madden
1290  * Fix for incorrectly sorted HSP bug and speed-up of CheckHspOverlap
1291  *
1292  * Revision 6.71  1998/07/06 13:39:04  madden
1293  * Fixed improper use of Int4 in parse_seg_options
1294  *
1295  * Revision 6.70  1998/07/02 21:00:39  egorov
1296  * Remove memory leak in threaded version
1297  *
1298  * Revision 6.69  1998/06/12 22:09:14  madden
1299  * Added call to SegParamsFree
1300  *
1301  * Revision 6.68  1998/06/12 16:08:51  madden
1302  * BlastHitRange stuff
1303  *
1304  * Revision 6.67  1998/06/08 15:07:32  madden
1305  * Fixed bug in BlastConvertProteinSeqLoc
1306  *
1307  * Revision 6.66  1998/06/04 16:23:17  madden
1308  * Use new seg
1309  *
1310  * Revision 6.65  1998/05/28 19:59:58  madden
1311  * Zhengs new culling code
1312  *
1313  * Revision 6.64  1998/05/22 20:20:38  madden
1314  * Added BlastTwoSequencesByLocEx and BlastTwoSequencesEx
1315  *
1316  * Revision 6.63  1998/05/18 17:58:31  madden
1317  * fixed parsing of coil-coil options, added parsing of dust options
1318  *
1319  * Revision 6.62  1998/05/17 16:28:41  madden
1320  * Allow changes to filter options and cc filtering.
1321  *
1322  * Revision 6.61  1998/05/05 14:05:35  madden
1323  * Added functions BlastStartAwakeThread and BlastStopAwakeThread
1324  *
1325  * Revision 6.60  1998/04/28 21:04:19  madden
1326  * Reset number of HSPs to zero if relinking
1327  *
1328  * Revision 6.59  1998/04/24 21:52:09  madden
1329  * Protection against NULL pointers
1330  *
1331  * Revision 6.58  1998/04/24 19:10:59  egorov
1332  * Fix bug when if wordsize == 2 blastall produces extra alignments
1333  *
1334  * Revision 6.57  1998/04/23 21:15:09  egorov
1335  * Show exact matching even if score is below threshold (case of two sequences)
1336  *
1337  * Revision 6.56  1998/04/15 20:24:54  madden
1338  * BlastMaskTheResidues optimized
1339  *
1340  * Revision 6.55  1998/04/10 17:46:58  madden
1341  * Changed FALSE to NULL in BioseqSeg
1342  *
1343  * Revision 6.54  1998/04/02 21:12:55  madden
1344  * Properly set value for linking HSPs in blastx and tblastn
1345  *
1346  * Revision 6.53  1998/04/01 22:47:35  madden
1347  * Check for query_invalid flag
1348  *
1349  * Revision 6.52  1998/03/26 14:20:20  madden
1350  * Changed GetScoreSetFromBlastResultHsp1 from static to LIBCALL
1351  *
1352  * Revision 6.51  1998/03/25 22:28:16  madden
1353  * Changes to allow random access BLAST by gi
1354  *
1355  * Revision 6.50  1998/03/24 15:38:25  madden
1356  * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
1357  *
1358  * Revision 6.49  1998/03/19 22:16:24  madden
1359  * Changes to allow blasting by gi list
1360  *
1361  * Revision 6.48  1998/03/18 14:14:11  madden
1362  * Support random access by gi list
1363  *
1364  * Revision 6.47  1998/03/16 17:41:59  madden
1365  * Fixed leaks
1366  *
1367  * Revision 6.46  1998/03/14 18:28:10  madden
1368  * Added BioseqBlastEngineEx
1369  *
1370  * Revision 6.45  1998/03/09 16:35:10  madden
1371  * Fixed bug with tblastn and blastx gapped searches
1372  *
1373  * Revision 6.44  1998/02/27 14:32:33  madden
1374  * Functions moved to blastool.c
1375  *
1376  * Revision 6.43  1998/02/26 22:34:27  madden
1377  * Changes for 16 bit windows
1378  *
1379  * Revision 6.42  1998/02/26 19:12:39  madden
1380  *  Removed AdjustOffSetsInSeqAlign, added BlastNtFindWords BlastPopulateAllWordArrays BlastFindWords and BlastNewFindWords
1381  *
1382  * Revision 6.41  1998/02/24 22:47:06  madden
1383  * Fixed problem with Option validation
1384  *
1385  * Revision 6.40  1998/02/23 16:09:57  madden
1386  * Corrected from offset for subject in tblastx search
1387  *
1388  * Revision 6.39  1998/02/19 17:17:05  madden
1389  * Use of Int4 rather than Int2 when pruning SeqAlign
1390  *
1391  * Revision 6.38  1998/02/12 21:50:39  madden
1392  * protection against NULL hitlist in blastx and tblastn
1393  *
1394  * Revision 6.37  1998/02/11 17:18:19  madden
1395  * Made BlastGetGappedAlignmentTraceback functions to BlastGetGapAlgnTbck (shorter than 32 chars)
1396  *
1397  * Revision 6.36  1998/01/31 21:34:09  madden
1398  * Fix to SeqAlign pruning
1399  *
1400  * Revision 6.35  1998/01/06 18:26:22  madden
1401  * Use SeqLocLen rather than bsp->length, wordsize done properly for nucl
1402  *
1403  * Revision 6.34  1998/01/05 22:41:40  madden
1404  * Added seqalign_reverse_strand
1405  *
1406  * Revision 6.33  1998/01/05 20:53:16  madden
1407  * Added ability to align minus-minus or plus-minus in BlastTwoSeqsByLoc
1408  *
1409  * Revision 6.32  1998/01/05 16:46:55  madden
1410  * One or both strands can be searched, as opposed to only both, changes to number of contexts
1411  *
1412  * Revision 6.31  1997/12/31 17:52:09  madden
1413  * Change to BLAST_WordFinderNew
1414  *
1415  * Revision 6.30  1997/12/23 19:16:52  madden
1416  * Minor efficiency in ExtendWordExit
1417  *
1418  * Revision 6.29  1997/12/23 18:12:34  madden
1419  * Changes for range-dependent blast
1420  *
1421  * Revision 6.28  1997/12/12 20:38:55  madden
1422  * ContextToFrame lost last parameter, fix to sprintf
1423  *
1424  * Revision 6.27  1997/12/11 22:22:24  madden
1425  * Proper casting of variables
1426  *
1427  * Revision 6.26  1997/12/10 22:43:09  madden
1428  * proper casting
1429  *
1430  * Revision 6.25  1997/12/01 22:07:10  madden
1431  * Changed call to BLASTOptionValidateEx
1432  *
1433  * Revision 6.24  1997/11/28 18:19:33  madden
1434  * Changes to TxDfDbInfoNew
1435  *
1436  * Revision 6.23  1997/11/18 22:23:20  madden
1437  * Added BLASTOptionSetGapParams
1438  *
1439  * Revision 6.22  1997/11/14 17:15:29  madden
1440  * Realign matches when they contain ambiguities in blastx/tblastn
1441  *
1442  * Revision 6.21  1997/11/07 00:49:02  madden
1443  * Added call to BLAST_MatrixFill
1444  *
1445  * Revision 6.20  1997/10/29 22:11:13  madden
1446  * ABS value of frames
1447  *
1448  * Revision 6.19  1997/10/24 20:44:52  madden
1449  * Removed BlastSetReadDB and BlastGetReadDB_ID
1450  *
1451  * Revision 6.18  1997/10/22 21:46:34  madden
1452  * Changed default values
1453  *
1454  * Revision 6.17  1997/10/21 20:39:18  madden
1455  * Fix for more alignments than descriptions.
1456  *
1457  * Revision 6.16  1997/10/21 19:50:00  madden
1458  * Fix for no valid query sequence and hitlist_max of 1
1459  *
1460  * Revision 6.15  1997/10/03 21:27:28  madden
1461  * Added BlastGetTypes
1462  *
1463  * Revision 6.14  1997/10/02 17:29:29  madden
1464  * Added PrintDbInformationBasic
1465  *
1466  * Revision 6.13  1997/10/01 13:35:31  madden
1467  * Changed BLAST_VERSION to BLAST_ENGINE_VERSION
1468  *
1469  * Revision 6.12  1997/09/30 20:03:07  madden
1470  * Saved db filename in dbinfo
1471  *
1472  * Revision 6.11  1997/09/24 22:36:35  madden
1473  * Fixes for MT multidb searches
1474  *
1475  * Revision 6.10  1997/09/23 16:43:41  madden
1476  * removed unneeded DenseSegPtr
1477  *
1478  * Revision 6.9  1997/09/22 18:18:35  madden
1479  * Added umlaut to Schaffer in reference
1480  *
1481  * Revision 6.8  1997/09/18 22:22:03  madden
1482  * Added prune functions
1483  *
1484  * Revision 6.7  1997/09/16 16:54:09  kans
1485  * return FASLE instead of NULL for Boolean value
1486  *
1487  * Revision 6.6  1997/09/16 16:31:28  madden
1488  * More changes for multiple db runs
1489  *
1490  * Revision 6.5  1997/09/11 18:49:31  madden
1491  * Changes to enable searches against multiple databases.
1492  *
1493  * Revision 6.4  1997/09/10 21:28:00  madden
1494  * Changes to set CPU limits
1495  *
1496  * Revision 6.3  1997/09/08 16:25:32  madden
1497  * Fixed bug that did not mask low-complexity regions at the end of a query
1498  *
1499  * Revision 6.2  1997/08/27 14:46:51  madden
1500  * Changes to enable multiple DB searches
1501  *
1502  * Revision 6.1  1997/08/26 15:05:26  madden
1503  * Fix for negative effective search space
1504  *
1505  * Revision 6.0  1997/08/25 18:52:49  madden
1506  * Revision changed to 6.0
1507  *
1508  * Revision 1.105  1997/08/22 18:37:43  madden
1509  * Added function BlastOtherReturnsPrepare
1510  *
1511  * Revision 1.104  1997/08/20 21:43:34  madden
1512  * Added page numbers
1513  *
1514  * Revision 1.103  1997/08/14 21:07:08  madden
1515  * ignored gapped for tblastx
1516  *
1517  * Revision 1.102  1997/08/14 14:30:35  madden
1518  * BlastNewFindWords called with range set for ranged blast
1519  *
1520  * Revision 1.101  1997/07/31 21:18:11  madden
1521  * Removed left-over file from seg
1522  *
1523  * Revision 1.100  1997/07/30 16:39:30  madden
1524  * Print gap existence and extension parameters for blastn
1525  *
1526  * Revision 1.99  1997/07/30 16:31:37  madden
1527  * tblastx prepares StdSeg
1528  *
1529  * Revision 1.98  1997/07/29 17:07:27  madden
1530  * better tblastx error messages.
1531  *
1532  * Revision 1.97  1997/07/25 15:39:49  madden
1533  * Corrected citation
1534  *
1535  * Revision 1.96  1997/07/25 13:47:46  madden
1536  * Made buffer longer to avoid ABR
1537  *
1538  * Revision 1.95  1997/07/23 20:59:02  madden
1539  * Changed blastn defaults for gap opening and extension
1540  *
1541  * Revision 1.94  1997/07/22 17:22:41  madden
1542  * Added NULL arg (for index callback) to BLASTSetUpSearch funcs
1543  *
1544  * Revision 1.93  1997/07/21 17:36:42  madden
1545  * Added BlastGetReleaseDate
1546  *
1547  * Revision 1.92  1997/07/18 20:57:02  madden
1548  * Added functions BlastGetVersionNumber and BlastGetReference
1549  *
1550  * Revision 1.91  1997/07/18 14:26:20  madden
1551  * call to AcknowledgeBlastQuery changed, SeqId no longer deleted there.
1552  *
1553  * Revision 1.90  1997/07/16 20:34:35  madden
1554  * Added function BlastConvertProteinSeqLoc
1555  *
1556  * Revision 1.89  1997/07/15 20:36:14  madden
1557  * Added BioseqSeg and SeqLocSeg
1558  *
1559  * Revision 1.88  1997/07/14 20:11:10  madden
1560  * Removed unused variables
1561  *
1562  * Revision 1.87  1997/07/14 16:15:41  madden
1563  * call to BLASTOptionValidateEx in BlastBioseqEngine
1564  *
1565  * Revision 1.86  1997/07/14 15:31:49  madden
1566  * Added BlastErrorMessage functions
1567  *
1568  * Revision 1.85  1997/07/11 19:29:37  madden
1569  * Added function BioseqBlastEngineByLoc
1570  *
1571  * Revision 1.84  1997/07/10 20:35:43  madden
1572  * Changed parameter output
1573  *
1574  * Revision 1.83  1997/07/02 20:18:39  madden
1575  * Made continuous SeqAlign the default
1576  *
1577  * Revision 1.82  1997/07/02 18:31:39  madden
1578  * changed defaults
1579  *
1580  * Revision 1.81  1997/07/01 19:15:44  madden
1581  * More changes to FormatBlastParameters
1582  *
1583  * Revision 1.80  1997/07/01 17:51:36  madden
1584  * changed gap_decay rate, gap_prob
1585  *
1586  * Revision 1.79  1997/07/01 15:44:44  madden
1587  * Changes to FormatBlastParameters per S. Altschul
1588  *
1589  * Revision 1.78  1997/06/30 15:50:06  madden
1590  * Changes to FormatBlastParameters
1591  *
1592  * Revision 1.77  1997/06/27 22:18:51  madden
1593  * Updated default parameters
1594  *
1595  * Revision 1.76  1997/06/27 14:31:08  madden
1596  * Added functions BlastAddSeqIdToList and BlastSeqIdListDestruct
1597  *
1598  * Revision 1.75  1997/06/24 13:51:27  madden
1599  * Fixed SeqLoc leak
1600  *
1601  * Revision 1.74  1997/06/23 20:49:31  madden
1602  * BLASTOptionValidate checks for proper gapping parameters
1603  *
1604  * Revision 1.73  1997/06/20 13:11:33  madden
1605  * Made AdjustOffSetsInSeqAlign non-static, Fixed purify error
1606  *
1607  * Revision 1.72  1997/06/06 21:29:48  madden
1608  * Added Boolean html to AcknowledgeBlastQuery and PrintDbInformation
1609  *
1610  * Revision 1.71  1997/06/06 19:49:46  madden
1611  * Added BlastMakeFakeBioseq and BlastDeleteFakeBioseq
1612  *
1613  * Revision 1.70  1997/05/30 21:05:59  madden
1614  * corrected call to readdb_new
1615  *
1616  * Revision 1.69  1997/05/27 20:20:02  madden
1617  * Added function BlastMaskTheResidues
1618  *
1619  * Revision 1.68  1997/05/22 21:24:55  madden
1620  * Added support for final gapX dropoff value
1621  *
1622  * Revision 1.67  1997/05/20 17:52:58  madden
1623  * Added functions BlastTwoSequencesByLoc and BlastSequencesOnTheFlyByLoc
1624  *
1625  * Revision 1.66  1997/05/12 21:34:16  madden
1626  * readdb_new allows indeterminate database type
1627  *
1628  * Revision 1.65  1997/05/06 22:17:59  madden
1629  * Duplicate dblen_eff, dbseq_num, and length_adjustment
1630  *
1631  * Revision 1.64  1997/05/01  15:53:19  madden
1632  * Addition of extra KarlinBlk's for psi-blast
1633  *
1634  * Revision 1.63  1997/04/29  14:07:45  madden
1635  * Fixed problem with hits failing PreliminaryGapping; fixed UMR.
1636  *
1637  * Revision 1.62  1997/04/25  20:23:06  madden
1638  * Freed SeqPort to clear mem leak.
1639  *
1640  * Revision 1.61  1997/04/24  14:43:07  madden
1641  * Fix for minus strand (ungapped) tblastn runs.
1642  *
1643  * Revision 1.60  1997/04/23  21:56:07  madden
1644  * Changes in BlastGetGappedAlignmentTraceback for in-frame gapping tblastn.
1645  *
1646  * Revision 1.59  1997/04/22  14:00:14  madden
1647  * Removed unused variables.
1648  *
1649  * Revision 1.58  1997/04/22  13:04:19  madden
1650  * Changes for in-frame blastx gapping.
1651  *
1652  * Revision 1.57  1997/04/21  15:35:26  madden
1653  * Fixes for 'gapped' StdSegs.
1654  *
1655  * Revision 1.56  1997/04/18  17:08:35  madden
1656  * Corrected printing of threshold values.
1657  *
1658  * Revision 1.55  1997/04/17  22:12:43  madden
1659  * Fix for offset in GetStartForGappedAlignment.
1660  *
1661  * Revision 1.54  1997/04/17  22:07:48  madden
1662  * Changes to allow in-frame gapped tblastn.
1663  *
1664  * Revision 1.53  1997/04/15  22:02:59  madden
1665  * Set original_length1 for translating searches.
1666  *
1667  * Revision 1.52  1997/04/14  21:31:58  madden
1668  * Checking for NULL pointer.
1669  *
1670  * Revision 1.51  1997/04/14  15:59:47  madden
1671  * Changes for ungapped psi-blast.
1672  *
1673  * Revision 1.50  1997/04/11  21:18:45  madden
1674  * Added GetSequenceWithDenseSeg.
1675  *
1676  * Revision 1.49  1997/04/11  19:02:49  madden
1677  * Changes for in-frame blastx, tblastn gapping.
1678  *
1679  * Revision 1.48  1997/04/09  20:01:53  madden
1680  * Copied seqid_list from search structure to duplicate, for use on threads.
1681  *
1682  * Revision 1.47  1997/04/08  16:27:28  madden
1683  * Fixed leaks; fix for blastn formatting of parameters.
1684  *
1685  * Revision 1.46  1997/04/07  21:42:56  madden
1686  * Freed SeqLocPtr used for dust.
1687  *
1688  * Revision 1.45  1997/04/07  18:17:09  madden
1689  * Formatted parameters for Stephen.
1690  *
1691  * Revision 1.44  1997/04/04  20:44:09  madden
1692  * Added check for NULL return.
1693  *
1694  * Revision 1.43  1997/04/04  20:42:35  madden
1695  * Added function BioseqBlastEngineCore.
1696  *
1697  * Revision 1.42  1997/04/03  19:50:56  madden
1698  * Changes to use effective database length instead of the length of each
1699  * sequence in statistical calculations.
1700  *
1701  * Revision 1.41  1997/03/27  22:30:51  madden
1702  * Correctly checked for overlapping HSP's.
1703  *
1704  * Revision 1.40  1997/03/20  22:56:24  madden
1705  * Added gap_info to hsp.
1706  *
1707  * Revision 1.39  1997/03/20  21:52:10  madden
1708  * Fix for segmented query BioseqPtr when gapped alignment is performed.
1709  *
1710  * Revision 1.39  1997/03/20  21:52:10  madden
1711  * Fix for segmented query BioseqPtr when gapped alignment is performed.
1712  *
1713  * Revision 1.38  1997/03/14  22:06:11  madden
1714  * fixed MT bug in BlastReevaluateWithAmbiguities.
1715  *
1716  * Revision 1.37  1997/03/14  15:57:23  madden
1717  * Removed superfluous call to SeqAlignNew
1718  *
1719  * Revision 1.36  1997/03/14  15:22:11  madden
1720  * Fixed UMR of seqalign in BlastTwoSequencesCore.
1721  *
1722  * Revision 1.35  1997/03/11  14:38:40  madden
1723  * Added BlastSequencesOnTheFly and BlastTwoSequencesCore.
1724  *
1725  * Revision 1.34  1997/03/07  22:35:54  madden
1726  * Fix for BLASTOptionNew.
1727  *
1728  * Revision 1.33  1997/03/07  21:58:36  madden
1729  * Added Boolean gapped argument to BLASTOptionNew.
1730  *
1731  * Revision 1.32  1997/03/07  21:11:22  madden
1732  * Added in check for blastn on gapped calculations.
1733  *
1734  * Revision 1.31  1997/03/06  21:47:27  madden
1735  * Made FormatBlastParameters non-static.
1736  *
1737  * Revision 1.30  1997/03/05  18:16:16  madden
1738  * SeqIdFree replaced by SeqIdSetFree, fixed memory leak.
1739  *
1740  * Revision 1.29  1997/03/05  14:29:46  madden
1741  * Moved BlastSaveCurrentHsp from blast.c; Added function CheckHspOverlap.
1742  *
1743  * Revision 1.28  1997/03/04  21:34:59  madden
1744  * Added in HspArrayPurge.
1745  *
1746  * Revision 1.27  1997/03/04  20:08:19  madden
1747  * Moved gapped alignment code from blast.c to blastutl.c
1748  *
1749  * Revision 1.26  1997/03/03  22:39:45  madden
1750  * Moved code from blast.c to blastutl.c.
1751  *
1752  * Revision 1.25  1997/03/03  21:47:22  madden
1753  * Moved functions from blast.c to blastutl.c for 16-bit windows.
1754  *
1755  * Revision 1.24  1997/03/03  20:58:09  madden
1756  * Fixed offsets for minus strands.
1757  *
1758  * Revision 1.23  1997/03/03  17:30:21  madden
1759  * Set SeqAlignPtr to NULL in BlastTwoSequences and BlastBioseqEngine, possible UMR.
1760  *
1761  * Revision 1.22  1997/03/01  18:25:33  madden
1762  * reverse flag added to BlastGetGappedAlignmentTraceback functions.
1763  *
1764  * Revision 1.21  1997/02/27  22:47:07  madden
1765  * Replaced tblastx with tblastn in BioseqBlastEngine.
1766  *
1767  * Revision 1.20  1997/02/26  23:39:54  madden
1768  * Added Txdfline stuff.
1769  *
1770  * Revision 1.19  1997/02/26  20:37:31  madden
1771  * Added *error_returns to BioseqBlastEngine.
1772  *
1773  * Revision 1.18  1997/02/25  19:17:05  madden
1774  * Changes to BioseqBlastEngine.
1775  *
1776  * Revision 1.17  1997/02/20  23:00:34  madden
1777  * Checked for NULL return in BlastTwoSequences.
1778  *
1779  * Revision 1.16  1997/02/20  18:38:34  madden
1780  * Set Default db_length to zero in Options.
1781  *
1782  * Revision 1.15  1997/02/19  16:25:22  madden
1783  * Reset gapped_calculation for blastn; returned proper SeqAlign for blastx, tblastn
1784  * in BioseqBlastEngine.
1785  *
1786  * Revision 1.14  1997/02/19  13:45:13  madden
1787  * replaced zero in call to BlastGetGappedAlignmentTraceback with FALSE.
1788  *
1789  * Revision 1.13  1997/02/18  22:09:02  madden
1790  * Removed unused variable.
1791  *
1792  * Revision 1.12  1997/02/18  21:03:00  madden
1793  * Changes to BioseqBlastEngine for gapped calculations.
1794  *
1795  * Revision 1.11  1997/02/18  18:31:34  madden
1796  * Used SeqIdFindBest in BlastTwoSequences.
1797  *
1798  * Revision 1.10  1997/02/18  17:58:52  madden
1799  * Added BioseqBlastEngine.
1800  *
1801  * Revision 1.9  1997/02/14  17:17:59  madden
1802  * Changes to default options and BlastTwoSequences for nucl.
1803  * sequences with ambiguites.
1804  *
1805  * Revision 1.8  1997/02/13  18:23:56  madden
1806  * Fixed ID type from BlastTwoSequences.
1807  *
1808  * Revision 1.7  1997/02/11  19:30:54  madden
1809  * Changes to BlastTwoSequences for gapped alignments.
1810  *
1811  * Revision 1.6  1997/02/10  20:03:58  madden
1812  * BlastTwoSequences indexes only the subject.
1813  *
1814  * Revision 1.5  1997/02/10  15:24:26  madden
1815  * Removed unused variable.
1816  *
1817  * Revision 1.4  1997/02/07  22:43:03  madden
1818  * Moved BLAST_WordFinderNew and Destruct from blast.c to blastutl.c, made
1819  * non-static.
1820  *
1821  * Revision 1.3  1997/02/07  22:32:40  madden
1822  * Changed prototypes for BlastGetSubjectId and GetSeqAlignForResultHitList.
1823  *
1824  * Revision 1.2  1997/02/05  13:36:48  madden
1825  * Removed Unused variable.
1826  *
1827  * Revision 1.1  1997/02/04  18:23:58  madden
1828  * Initial revision
1829  *
1830 */
1831 
1832 #define NLM_GENERATED_CODE_PROTO
1833 #include <ncbi.h>
1834 #include <blast.h>
1835 #include <blastpri.h>
1836 #include <objcode.h>
1837 #include <objseq.h>
1838 #include <sequtil.h>
1839 #include <tofasta.h>
1840 #include <seqport.h>
1841 #include <readdb.h>
1842 #include <ncbithr.h>
1843 #include <blast_dust.h>
1844 #include <urkpcc.h>
1845 #include <txalign.h>
1846 #include <seg.h>
1847 #include <salpedit.h>
1848 #include <mbalign.h>
1849 #include <mblast.h>
1850 #include <vecscrn.h>
1851 #include <rpsutil.h>
1852 #include <simutil.h>
1853 #include <blfmtutl.h>
1854 
1855 typedef struct _pgp_blast_options {
1856     BLAST_OptionsBlkPtr options;
1857     CharPtr blast_database;
1858     BioseqPtr query_bsp, fake_bsp;
1859     Int4 number_of_descriptions, number_of_alignments;
1860     FILE *infp, *outfp;
1861     AsnIoPtr aip_out;
1862     Boolean html;
1863     Boolean believe_query;
1864     Uint4 align_options, print_options;
1865   /* PHI-PSI Blast variables */
1866     Uint1 featureOrder[FEATDEF_ANY];
1867     Uint1 groupOrder[FEATDEF_ANY];
1868     Int4 program_flag;
1869     CharPtr patfile;
1870     FILE *patfp;
1871     seedSearchItems *seedSearch;
1872 } PGPBlastOptions, PNTR PGPBlastOptionsPtr;
1873 
1874 /* Window size used to scan HSP for highest score region, where gapped
1875 extension starts. */
1876 #define HSP_MAX_WINDOW 11
1877 
1878 #define BLASTFILTER_DIR "/usr/ncbi/blast/filter"
1879 
1880 static SeqIdPtr
1881 BlastGetFirstGiofSubset(ReadDBFILEPtr rdfp, Int4 ordinal_id, Int2 aliasfilebit)
1882 {
1883     Boolean     not_done = TRUE;
1884     SeqIdPtr    bestid = NULL, tmp_seqid, seqid=NULL;
1885     Uint4       header_index = 0;
1886     Int4        gi = 0;
1887     Int4        alias_mask;
1888     BlastDefLinePtr bdfp;
1889     
1890     if (!rdfp->cih && rdfp->formatdb_ver < FORMATDB_VER) {
1891         /* FORMATDB_VER_TEXT version requires the common index
1892          * to determine the subset databases */
1893         ErrPostEx(SEV_ERROR, 0, 0, "Database mask cannot be used without CommonIndex");
1894         return NULL;
1895     }
1896     
1897     alias_mask = (0x1 << rdfp->aliasfilebit);
1898     
1899     bdfp = NULL;
1900     if(rdfp->formatdb_ver > FORMATDB_VER_TEXT) {
1901         bdfp = FDReadDeflineAsn(rdfp, ordinal_id);
1902         if(bdfp == NULL) {
1903             ErrPostEx(SEV_ERROR, 0, 0, "Failure to read defline ASN for %d", 
1904                       ordinal_id);
1905             return NULL;
1906         }
1907 
1908         bestid = SeqIdFindBest(bdfp->seqid, SEQID_GI);
1909         if (bestid->choice == SEQID_GI) {
1910             gi = bestid->data.intvalue;
1911             ValNodeAddInt(&seqid, SEQID_GI, gi);
1912         }
1913         bdfp = BlastDefLineSetFree(bdfp);
1914 
1915         return seqid;
1916     }
1917     
1918     while (not_done) {
1919         CommonIndexPtr  cigi;
1920 
1921         /* get seqid from database headers file */
1922         not_done = readdb_get_header (rdfp, ordinal_id, &header_index, &tmp_seqid, NULL);
1923 
1924         if (not_done == FALSE)
1925             break;
1926 
1927         if (not_done) {
1928             /* get gi number */
1929             bestid = SeqIdFindBest(tmp_seqid, SEQID_GI);
1930             if (bestid->choice != SEQID_GI) {
1931                 tmp_seqid = SeqIdSetFree(tmp_seqid);
1932                 break;
1933             }
1934             gi = bestid->data.intvalue;
1935             
1936             /* get database commonindex mask */
1937             cigi = rdfp->cih->ci + gi;
1938             if (alias_mask & SwapUint4(cigi->dbmask)) {
1939                 ValNodeAddInt(&seqid, SEQID_GI, gi);
1940                 break;
1941             }
1942             tmp_seqid = SeqIdSetFree(tmp_seqid);
1943         }
1944     }
1945     tmp_seqid = SeqIdSetFree(tmp_seqid);
1946 
1947     return seqid;
1948 }
1949 
1950 #define BLAST_ITER_MAX 30
1951 
1952 /*
1953   Goes through the list of gi's/ordinal id's looking for matches
1954   to the ordinal ID.  Returns those acceptable gi's as SeqIdPtr's.
1955 */
1956 SeqIdPtr
1957 BlastGetAllowedGis (BlastSearchBlkPtr search, Int4 ordinal_id, SeqIdPtr PNTR seqid)
1958 {
1959     BlastGiListPtr blast_gi_list;
1960     Boolean found=FALSE;
1961     BlastDoubleInt4Ptr *gi_list_pointer;
1962     Int4 index, total, first, last, current;
1963     ValNodePtr gi_list=NULL;
1964     
1965     if (seqid)
1966         *seqid = NULL;
1967     gi_list = NULL;
1968     if (search->thr_info->blast_gi_list) {
1969         blast_gi_list = search->thr_info->blast_gi_list;
1970         total = blast_gi_list->total;
1971         found = FALSE;
1972         gi_list_pointer = blast_gi_list->gi_list_pointer;
1973         first = 0;
1974         last = total;
1975         for (index=0; index<BLAST_ITER_MAX; index++) {
1976             current = (first+last)/2;
1977             if (ordinal_id < gi_list_pointer[current]->ordinal_id)
1978                 last = current;
1979             else if (ordinal_id > gi_list_pointer[current]->ordinal_id)
1980                 first = current;
1981             else {      /* back up looking for all gi's associated with this oid. */
1982                 while (current > 0 && 
1983                        ordinal_id == gi_list_pointer[current-1]->ordinal_id)
1984                     current--;
1985                 found = TRUE;
1986                 break;
1987             }
1988         }
1989         
1990         if (found) {
1991             while (current < total) {
1992                 if (ordinal_id == gi_list_pointer[current]->ordinal_id) {
1993                     ValNodeAddInt(&gi_list, SEQID_GI, blast_gi_list->gi_list_pointer[current]->gi);
1994                 } else {
1995                     break;
1996                 }
1997                 current++;
1998             }
1999         }
2000 
2001         if (seqid && search->rdfp && search->rdfp->aliasfilebit != 0) {
2002             *seqid = BlastGetFirstGiofSubset(search->rdfp, ordinal_id, search->rdfp->aliasfilebit);
2003         }
2004         return (SeqIdPtr) gi_list;
2005     } else  if (search->rdfp != NULL && search->rdfp->oidlist != NULL) {
2006         /* if we have at least one mask, then we need print only those gis, which
2007            are in the database list (reals and masks) */
2008         
2009         Boolean not_done = TRUE;
2010         SeqIdPtr        bestid = NULL, tmp_seqid = NULL;
2011         Uint4   header_index = 0;
2012         Int4    gi = 0;
2013         Int4    mask;
2014         Int2    firstpos, curfirstpos;
2015         ReadDBFILEPtr   rdfp = search->rdfp, tmprdfp;
2016         BlastDefLinePtr bdfp, bdfp_head;
2017         
2018         if (!rdfp->cih && rdfp->formatdb_ver < FORMATDB_VER) {
2019             /* FORMATDB_VER_TEXT version requires the common index
2020              * to determine the subset databases */
2021            /*ErrPostEx(SEV_ERROR, 0, 0, "Database mask cannot be used without CommonIndex");*/
2022             return NULL;
2023         }
2024 
2025         /* kludge: only protein databases are non-redundant */
2026         if (readdb_is_prot(search->rdfp) == FALSE)
2027             return NULL;
2028         
2029         bdfp = NULL; bdfp_head = NULL;
2030         if(rdfp->formatdb_ver > FORMATDB_VER_TEXT) {
2031             /* just chain the seqid's returned, as they are filtered in 
2032              * FDReadDeflineAsn according to the membership_bit in the 
2033              * rdfp */
2034             bdfp = FDReadDeflineAsn(rdfp, ordinal_id);
2035             if(bdfp == NULL) {
2036                 ErrPostEx(SEV_ERROR, 0, 0, "Failure to read defline ASN for %d", ordinal_id);
2037                 return NULL;
2038             }
2039             for (bdfp_head = bdfp; bdfp; bdfp = bdfp->next) {
2040                 bestid = SeqIdFindBest(bdfp->seqid, SEQID_GI);
2041                 if (bestid->choice == SEQID_GI) {
2042                     gi = bestid->data.intvalue;
2043                     ValNodeAddInt(&gi_list, SEQID_GI, gi);
2044                 }
2045             }
2046 
2047             BlastDefLineSetFree(bdfp_head);
2048             
2049         } else {
2050         
2051             while (not_done) {
2052                 CommonIndexPtr  cigi;
2053 
2054                 /* get seqid from database headers file */
2055                 not_done = readdb_get_header (search->rdfp, ordinal_id, &header_index, &tmp_seqid, NULL);
2056 
2057                 if (not_done == FALSE)
2058                     break;
2059 
2060                 if (not_done) {
2061                     /* get gi number */
2062                     bestid = SeqIdFindBest(tmp_seqid, SEQID_GI);
2063                     if (bestid->choice != SEQID_GI) {
2064                         tmp_seqid = SeqIdSetFree(tmp_seqid);
2065                         break;
2066                     }
2067                     gi = bestid->data.intvalue;
2068                     
2069                     /* get database commonindex mask */
2070                     cigi = search->rdfp->cih->ci + gi;
2071                     mask = SwapUint4(cigi->dbmask);
2072 
2073                     firstpos = 0;
2074                     while (((curfirstpos = bit_engine_firstbit(mask)) != -1)) {
2075                         CharPtr         dbname;
2076 
2077                         firstpos += curfirstpos;
2078 
2079                         dbname = DBName(search->rdfp->cih->num_of_DBs,
2080                                         search->rdfp->cih->dbids, firstpos);
2081 
2082                         /* search in rdfp list this database */
2083                         tmprdfp = search->rdfp;
2084                         while (tmprdfp) {
2085                             if (tmprdfp->aliasfilename) {
2086                                 /* use mask name, if exists */
2087                                 if (!StrCmp(dbname, tmprdfp->aliasfilename)) {
2088                                     ValNodeAddInt(&gi_list, SEQID_GI, gi);
2089                                 }
2090                             } else {
2091                                 /* use real file name */
2092                                 if (!StrCmp(dbname, tmprdfp->filename)) {
2093                                     ValNodeAddInt(&gi_list, SEQID_GI, gi);
2094                                 }
2095                             }
2096                             tmprdfp = tmprdfp->next;
2097                         }
2098                         mask >>= (curfirstpos + 1);
2099                         firstpos++;
2100                     }
2101                 }
2102                 
2103                 if (tmp_seqid) {
2104                     tmp_seqid = SeqIdSetFree(tmp_seqid);
2105                 }
2106             }
2107         }
2108         if (seqid)
2109             *seqid = BlastGetFirstGiofSubset(search->rdfp, ordinal_id, search->rdfp->aliasfilebit);
2110 
2111         return (SeqIdPtr) gi_list;
2112     }
2113         
2114     return NULL;
2115 }
2116 
2117 /* 
2118         SOME FUNCTIONS TO PRODUCE A SeqAlign from the BLAST results.
2119 */
2120 
2121 /*****************************************************************************
2122 
2123         Finds the best SeqId for the SeqAlign.  Looks for the GI, then takes
2124         anything if that's not found and makes up a local ID if no ID is
2125         found at all.
2126 *****************************************************************************/
2127 
2128 SeqIdPtr
2129 GetTheSeqAlignID(SeqIdPtr seq_id)
2130 {
2131         SeqIdPtr new_id, ret_id;
2132         ObjectIdPtr obidp;
2133         
2134         ret_id = NULL;
2135         if (seq_id)
2136         {
2137                 /* Get the gi from the chain, if it's there. */
2138                 new_id = SeqIdFindBest(seq_id, SEQID_GI);
2139                 if (new_id)
2140                 {
2141                         ret_id = SeqIdDup(new_id);
2142                 }
2143                 else
2144                 {       /* No Gi was found, use any ID. */
2145                         ret_id = SeqIdDup(seq_id);
2146                 }
2147         }
2148 
2149         if (ret_id == NULL)
2150         {       /* make up an ID. */
2151                 obidp = ObjectIdNew();
2152                 obidp->str = StringSave("lcl|unknown");
2153                 ValNodeAddPointer(&ret_id, SEQID_LOCAL, obidp);
2154         }
2155 
2156         return ret_id;
2157 }
2158 static SeqAlignPtr 
2159 FillInSegsInfo(SeqAlignPtr sap_head, StdSegPtr ssp_head, DenseDiagPtr ddp_head)
2160 
2161 {
2162         SeqAlignPtr sap;
2163 
2164         if (ddp_head || ssp_head)
2165         {
2166                 if (sap_head)
2167                 {
2168                         sap = sap_head;
2169                         while (sap->next)
2170                                 sap = sap->next;
2171                         sap->next = SeqAlignNew();
2172                         sap = sap->next;
2173                 }
2174                 else
2175                 {
2176                         sap_head = sap = SeqAlignNew();
2177                 }
2178 
2179                 if (ddp_head)
2180                 {
2181                         sap->type = 2;
2182                         sap->segs = ddp_head;
2183                         sap->segtype = 1;
2184                 }
2185                 else if (ssp_head)
2186                 {
2187                         sap->type = 2;
2188                         sap->segs = ssp_head;
2189                         sap->segtype = 3;
2190                 }
2191         }
2192         return sap_head;
2193 }
2194 
2195 
2196 /*************************************************************************
2197 *
2198 *       This function fills in the DenseDiag Information from the variable
2199 *       hsp.  On the first call to this function *old should be
2200 *       NULL, after that pass in the head of the DenseDiagPtr chain.
2201 *       The newest DenseDiagPtr is returned.
2202 *
2203 ************************************************************************/
2204 
2205 static DenseDiagPtr
2206 FillInDenseDiagInfo(DenseDiagPtr PNTR old, BLASTResultHspPtr hsp, Boolean reverse, Int4 query_length, Int4 subject_length, SeqIdPtr gi_list)
2207 
2208 {
2209         DenseDiagPtr            ddp, new;
2210 
2211         new = DenseDiagNew();
2212         
2213         new->dim = 2;   /* Only 2 is supported in spec. */
2214         new->len = hsp->query_length;
2215         new->starts = (Int4Ptr) MemNew(2 * sizeof(Int4));
2216         new->strands = (Uint1Ptr) MemNew(2 * sizeof(Uint1));
2217         if (reverse)
2218         {
2219                 if (hsp->subject_frame >= 0)
2220                 {
2221                         new->strands[0] = Seq_strand_plus;
2222                         new->starts[0] = hsp->subject_offset;
2223                 }
2224                 else
2225                 {
2226                         new->strands[0] = Seq_strand_minus;
2227                         new->starts[0] = subject_length - hsp->subject_offset - hsp->subject_length;
2228                 }
2229                 if (hsp->query_frame >= 0)
2230                 {
2231                         new->strands[1] = Seq_strand_plus;
2232                         new->starts[1] = hsp->query_offset;
2233                 }
2234                 else
2235                 {
2236                         new->strands[1] = Seq_strand_minus;
2237                         new->starts[1] = query_length - hsp->query_offset - hsp->query_length;
2238                 }
2239         }
2240         else
2241         {
2242                 if (hsp->query_frame >= 0)
2243                 {
2244                         new->strands[0] = Seq_strand_plus;
2245                         new->starts[0] = hsp->query_offset;
2246                 }
2247                 else
2248                 {
2249                         new->strands[0] = Seq_strand_minus;
2250                         new->starts[0] = query_length - hsp->query_offset - hsp->query_length;
2251                 }
2252                 if (hsp->subject_frame >= 0)
2253                 {
2254                         new->strands[1] = Seq_strand_plus;
2255                         new->starts[1] = hsp->subject_offset;
2256                 }
2257                 else
2258                 {
2259                         new->strands[1] = Seq_strand_minus;
2260                         new->starts[1] = subject_length - hsp->subject_offset - hsp->subject_length;
2261                 }
2262         }
2263         new->scores = GetScoreSetFromBlastResultHsp(hsp, gi_list);
2264 
2265 /* Go to the end of the chain, and then attach "new" */
2266         if (*old)
2267         {
2268                 ddp = *old;
2269                 while (ddp->next)
2270                         ddp = ddp->next;
2271                 ddp->next = new;
2272         }
2273         else
2274         {
2275                 *old = new;
2276         }
2277 
2278         new->next = NULL;
2279 
2280         return new;
2281 }
2282 
2283 /*************************************************************************
2284 *
2285 *       This function fills in the StdSeg Information from the variable
2286 *       hsp.  On the first call to this function *old should be
2287 *       NULL, after that pass in the head of the DenseDiagPtr chain.
2288 *       The newest StdSegPtr is returned.
2289 *
2290 ************************************************************************/
2291 static StdSegPtr
2292 FillInStdSegInfo(BlastSearchBlkPtr search, Int4 subject_id, Int4 length, StdSegPtr PNTR old, BLASTResultHspPtr hsp, SeqIdPtr sip, Boolean reverse, SeqIdPtr gi_list)
2293 
2294 {
2295         Int4                    subject_length;
2296         StdSegPtr               ssp, new;
2297         SeqIdPtr                query_sip, subject_sip;
2298         SeqIntPtr               seq_int1, seq_int2;
2299         SeqLocPtr               slp=NULL;
2300 
2301         new = StdSegNew();
2302 /* Duplicate the id and split it up into query and subject parts */
2303         query_sip = SeqIdDup(sip);
2304         subject_sip = SeqIdDup(sip->next);
2305         
2306         new->dim = 2;   /* Only 2 is supported in spec. */
2307         seq_int1 = SeqIntNew();
2308         if (hsp->query_frame == 0)
2309         {
2310                 seq_int1->from = hsp->query_offset;
2311                 seq_int1->to = hsp->query_offset + hsp->query_length - 1;
2312                 seq_int1->strand = Seq_strand_unknown;
2313         }
2314         else if (hsp->query_frame < 0)
2315         {
2316                 seq_int1->to = search->context[hsp->context].query->original_length - CODON_LENGTH*hsp->query_offset + hsp->query_frame;
2317                 seq_int1->from = search->context[hsp->context].query->original_length - CODON_LENGTH*(hsp->query_offset+hsp->query_length) + hsp->query_frame + 1;
2318                 seq_int1->strand = Seq_strand_minus;
2319         }
2320         else if (hsp->query_frame > 0)
2321         {
2322                 seq_int1->from = CODON_LENGTH*(hsp->query_offset) + hsp->query_frame - 1;
2323                 seq_int1->to = CODON_LENGTH*(hsp->query_offset+hsp->query_length) + hsp->query_frame - 2;
2324                 seq_int1->strand = Seq_strand_plus;
2325         }
2326         seq_int1->id = query_sip;
2327         seq_int2 = SeqIntNew();
2328         if (hsp->subject_frame == 0)
2329         {
2330                 seq_int2->from = hsp->subject_offset;
2331                 seq_int2->to = hsp->subject_offset + hsp->subject_length - 1;
2332                 seq_int2->strand = Seq_strand_unknown;
2333         } 
2334         else if (hsp->subject_frame < 0)
2335         {
2336                 if (search->rdfp)
2337                         subject_length = readdb_get_sequence_length(search->rdfp, subject_id);
2338                 else
2339                    subject_length = length;
2340 
2341                 seq_int2->from = subject_length - CODON_LENGTH*(hsp->subject_offset + hsp->subject_length) + hsp->subject_frame + 1;
2342                 seq_int2->to = subject_length - CODON_LENGTH*(hsp->subject_offset) + hsp->subject_frame;
2343                 seq_int2->strand = Seq_strand_minus;
2344         }
2345         else if (hsp->subject_frame > 0)
2346         {
2347                 seq_int2->from = CODON_LENGTH*(hsp->subject_offset) + hsp->subject_frame - 1;
2348                 seq_int2->to = CODON_LENGTH*(hsp->subject_offset + hsp->subject_length) + hsp->subject_frame - 2;
2349                 seq_int2->strand = Seq_strand_plus;
2350         }
2351         seq_int2->id = subject_sip;
2352 
2353         if (reverse)
2354         {
2355                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int2); 
2356                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int1); 
2357         }
2358         else
2359         {
2360                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int1); 
2361                 ValNodeAddPointer(&slp, SEQLOC_INT, seq_int2); 
2362         }
2363         new->loc = slp;
2364 
2365         search->subject->sequence = MemFree(search->subject->sequence);
2366         new->scores = GetScoreSetFromBlastResultHsp(hsp, gi_list);
2367 
2368 /* Go to the end of the chain, and then attach "new" */
2369         if (*old)
2370         {
2371                 ssp = *old;
2372                 while (ssp->next)
2373                         ssp = ssp->next;
2374                 ssp->next = new;
2375         }
2376         else
2377         {
2378                 *old = new;
2379         }
2380 
2381         new->next = NULL;
2382 
2383         return new;
2384 }
2385 
2386 /************************************************************************
2387 *
2388 *       This function assembles all the components of the Seq-align from
2389 *       a "sparse" BLAST HitList.  "sparse" means that the hitlist 
2390 *       may contain no sequence and not even a descriptor.  It is only 
2391 *       required to contain the sequence_number that readdb refers to
2392 *       and scoring/alignment information.
2393 *
2394 *       If dbname is non-NULL, then only a general ("gnl") ID is 
2395 *       issued, with the ordinal number of the subject sequence in
2396 *       the ObjectIdPtr.
2397 *
2398 *       Boolean reverse: reverse the query and db order in SeqAlign.
2399 *
2400 ************************************************************************/
2401 SeqAlignPtr LIBCALL
2402 GetSeqAlignForResultHitList(BlastSearchBlkPtr search, Boolean getdensediag, Boolean ordinal_number, Boolean discontinuous, Boolean reverse, Boolean get_redundant_seqs)
2403 
2404 {
2405         BLASTResultHspPtr       hsp;
2406         BLASTResultHitlistPtr   results;
2407         BLASTResultsStructPtr   result_struct;
2408         DenseDiagPtr            ddp_head=NULL, ddp;
2409         SeqIdPtr                gi_list=NULL, sip, sip_subject,
2410            sip_subject_start, query_id, new_sip;
2411         StdSegPtr               ssp_head=NULL, ssp;
2412         SeqAlignPtr             last, seqalign_head, seqalign, sap_head;
2413         Int4                    hsp_cnt, index, index2, hspset_cnt_old, i;
2414         Int4                    hitlist_count;
2415         Int4                    subject_length;
2416         ValNodePtr              vnp, vnp_start;
2417 
2418         ddp_head = NULL;
2419         ssp_head = NULL;
2420         sap_head = NULL;
2421         seqalign_head = NULL;
2422 
2423         /* discontinuous = FALSE; */
2424         result_struct = search->result_struct;
2425         hitlist_count = result_struct->hitlist_count;
2426 
2427         last = NULL;
2428         sip = NULL;
2429         sip_subject_start = NULL;
2430         for (index=0; index<hitlist_count; index++)
2431         {
2432             results = result_struct->results[index];
2433             sip_subject_start = NULL;
2434             if (get_redundant_seqs)
2435             {
2436                 vnp = NULL;
2437                 sip = BlastGetSubjectId(search, index, ordinal_number, &vnp);
2438                 vnp_start = vnp;
2439                 while (vnp)
2440                 {
2441                         sip = GetTheSeqAlignID(vnp->data.ptrvalue);
2442                         SeqIdFree(vnp->data.ptrvalue);
2443                         if (sip_subject_start == NULL)
2444                         {
2445                                 sip_subject_start = sip;
2446                         }
2447                         else
2448                         {
2449                                 sip_subject = sip_subject_start;
2450                                 while (sip_subject->next)
2451                                         sip_subject = sip_subject->next;
2452                                 sip_subject->next = sip;
2453                         }
2454                         vnp = vnp->next;
2455                 }
2456                 vnp_start = vnp = ValNodeFree(vnp_start);
2457             }
2458             else
2459             {
2460                 sip = BlastGetSubjectId(search, index, ordinal_number, NULL);
2461                 sip_subject_start = sip_subject = GetTheSeqAlignID(sip);
2462                 sip = SeqIdSetFree(sip);
2463             }
2464 
2465             results = result_struct->results[index];
2466             if (search->rdfp)
2467                 subject_length = readdb_get_sequence_length(search->rdfp, results->subject_id);
2468             else if (results->subject_info)
2469                         subject_length = results->subject_info->length;
2470             else
2471                         subject_length = 0;
2472 
2473         gi_list = BlastGetAllowedGis(search, results->subject_id, &new_sip);
2474         /* right now sip_subject should only contain one ID.  At some
2475         point it will contain multiple ID's for identical sequences. */
2476             if (new_sip != NULL)
2477                sip_subject = new_sip;
2478             else
2479                sip_subject = sip_subject_start;
2480             while (sip_subject)
2481             {
2482                 seqalign = SeqAlignNew();
2483                 seqalign->type = 2;             /* alignment is diags */
2484                 if (last == NULL)       /* First sequence. */
2485                         seqalign_head = seqalign;
2486                 else
2487                         last->next = seqalign;
2488 
2489                 last = seqalign;
2490                 
2491                 hspset_cnt_old = -1;
2492                 hsp_cnt = results->hspcnt;
2493                 for (index2=0; index2<hsp_cnt; index2++)
2494                 {
2495                         hsp = &(results->hsp_array[index2]);
2496                         if (discontinuous && hspset_cnt_old != hsp->hspset_cnt)
2497                         {
2498                             hspset_cnt_old = hsp->hspset_cnt;
2499                             if (index2 != 0)
2500                             { /* nothing to save on first pass. */
2501                                 if (getdensediag)
2502                                 {
2503                                         sap_head = FillInSegsInfo(sap_head, NULL, ddp_head);
2504                                         ddp_head = NULL;
2505                                 }
2506                                 else
2507                                 {
2508                                         sap_head = FillInSegsInfo(sap_head, ssp_head, NULL);
2509                                         ssp_head = NULL;
2510                                 }
2511                             }
2512                         }
2513 
2514                         query_id = search->query_id;
2515                         if (search->prog_number==blast_type_blastn) {
2516                            for (i=0; i<hsp->context/2; i++)
2517                               query_id = query_id->next;
2518                         }
2519                         if (reverse)
2520                         {
2521                                 sip = SeqIdDup(sip_subject);
2522                                 sip->next = GetTheSeqAlignID(query_id);
2523                         }
2524                         else
2525                         {
2526                                 sip = GetTheSeqAlignID(query_id);
2527                                 sip->next = SeqIdDup(sip_subject);
2528                         }
2529 
2530                         if (getdensediag)
2531                         {
2532                                 ddp = FillInDenseDiagInfo(&ddp_head, hsp, reverse, search->context[hsp->context].query->length, subject_length, gi_list);
2533                                 ddp->id = sip;
2534                         }
2535                         else
2536                         {
2537                             Int4 length = 0;
2538 
2539                             if (results->subject_info)
2540                                 length = results->subject_info->length;
2541 
2542                             ssp = FillInStdSegInfo(search, results->subject_id, length, &ssp_head, hsp, sip, reverse, gi_list);
2543                             ssp->ids = sip;
2544                         }
2545                         sip = NULL; /* This SeqIdPtr is now on the SeqAlign. */
2546                 }
2547 
2548                 if (discontinuous)
2549                 {
2550                         if (getdensediag)
2551                         {
2552                                 sap_head = FillInSegsInfo(sap_head, NULL, ddp_head);
2553                                 ddp_head = NULL;
2554                         }
2555                         else
2556                         {
2557                                 sap_head = FillInSegsInfo(sap_head, ssp_head, NULL);
2558                                 ssp_head = NULL;
2559                         }
2560                         seqalign->segs = sap_head;
2561                         seqalign->segtype = 5;  /* Discontinuous */
2562                 }
2563                 else
2564                 {
2565                         if (getdensediag)
2566                         {
2567                                 seqalign->segs = ddp_head;
2568                                 seqalign->segtype = 1;  /* DenseDiag */
2569                                 ddp_head = NULL;
2570                         }
2571                         else
2572                         {
2573                                 seqalign->segs = ssp_head;
2574                                 seqalign->segtype = 3;  /* StdSeg */
2575                                 ssp_head = NULL;
2576                         }
2577                 }
2578 
2579                 sap_head = NULL;
2580 
2581                 sip_subject = sip_subject->next;
2582              }
2583              if (sip_subject_start)
2584                         sip_subject_start = SeqIdFree(sip_subject_start);
2585              if (new_sip)
2586                         new_sip = SeqIdFree(new_sip);
2587              gi_list = SeqIdSetFree(gi_list);
2588         }
2589 
2590         return seqalign_head;
2591 }
2592 
2593 /*
2594         "Core" function to compare two sequences, for use by 
2595         BlastTwoSequences and BlastSequencesOnTheFly.
2596 
2597         The subject_bsp is redundant with the subject_seq_start and
2598         subject_length (or visa-versa), but the subject must be
2599         extracted from the subject_bsp for BlastTwoSequences anyway, while
2600         the title and ID are needed from subject_bsp.
2601 */
2602 static Int2
2603 BlastTwoSequencesCoreEx (BlastSearchBlkPtr search, BioseqPtr subject_bsp, Uint1Ptr subject_seq, Int4 subject_length)
2604 {
2605         Int2 status=0;
2606 
2607         search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
2608     if (!search->handle_results)
2609        search->subject_info = BLASTSubjectInfoNew(SeqIdDup(SeqIdFindBest(subject_bsp->id, SEQID_GI)), StringSave(BioseqGetTitle(subject_bsp)), subject_length);
2610     else
2611        search->subject_info = BLASTSubjectInfoNew(SeqIdSetDup(subject_bsp->id), StringSave(BioseqGetTitle(subject_bsp)), subject_length);
2612 
2613     /*CC: is search->sbp->posMatrix, we're comparing a pssm with a subject
2614      * sequence, thus we need to do some set up */
2615     if (search->sbp->posMatrix && search->prog_number == blast_type_blastp) {
2616         Int4 hitlist_max;
2617         BLAST_ScoreBlkPtr sbp = search->sbp;
2618         BLAST_ParameterBlkPtr pbp = search->pbp;
2619 
2620         search->positionBased = TRUE;
2621         sbp->kbp = sbp->kbp_psi;
2622         sbp->kbp_gap = sbp->kbp_gap_psi;
2623         hitlist_max = search->result_struct->hitlist_max;
2624         search->result_struct =
2625             BLASTResultsStructDelete(search->result_struct);
2626                 search->result_struct = BLASTResultsStructNew(hitlist_max, 
2627             pbp->max_pieces, pbp->hsp_range_max);
2628 
2629         if (search->allocated & BLAST_SEARCH_ALLOC_WFP_FIRST) {
2630             search->wfp_first = BLAST_WordFinderDestruct(search->wfp_first);
2631                     search->wfp_first = BLAST_WordFinderNew(sbp->alphabet_size,
2632                     search->all_words->wordsize, 1, FALSE);
2633                 }
2634 
2635                 if (search->allocated & BLAST_SEARCH_ALLOC_WFP_SECOND) {
2636                     search->wfp_second = BLAST_WordFinderDestruct(search->wfp_second);
2637                     search->wfp_second = BLAST_WordFinderNew(sbp->alphabet_size,
2638                     search->all_words->wordsize, 1, FALSE);
2639                 }
2640 
2641                 /* threshold_first is defunct ! */
2642         search->wfp = search->wfp_first;
2643                 if (search->whole_query == TRUE)
2644             BlastNewFindWords(search, 0, search->context[0].query->length, 
2645                     pbp->threshold_second, (Uint1) 0);
2646                 else
2647                 BlastNewFindWords(search, search->required_start, 
2648                     search->required_end, pbp->threshold_second, (Uint1) 0);
2649         lookup_position_aux_destruct(search->wfp->lookup);
2650         search->wfp_second = search->wfp_first;
2651     }
2652         status = BLASTPerformSearch(search, subject_length, subject_seq);
2653         
2654         if (status) {
2655                 BlastConstructErrorMessage("BlastTwoSequencesCoreEx", "non-zero status", 2, &(search->error_return));
2656                 return status;
2657         }
2658 
2659         if (search->prog_number == blast_type_tblastn &&
2660             search->pbp->longest_intron > 0) {
2661            Uint1 rem;
2662            Uint1Ptr seq_4na, seq_2na, subject;
2663            Int4 i;
2664            /* Need to convert from ncbi2na to ncbi4na encoding */
2665            subject = (Uint1Ptr) MemNew(subject_length + 1);
2666            seq_4na = subject;
2667            seq_2na = subject_seq;
2668            rem = 3;
2669            for (i=0; i<subject_length; i++) {
2670               *seq_4na = (Uint1) (1 << READDB_UNPACK_BASE_N(*seq_2na, rem));
2671               seq_4na++;
2672               if (rem>0) rem--;
2673               else {
2674                  rem = 3;
2675                  seq_2na++;
2676               }
2677            }
2678            BlastSequenceAddSequence(search->subject, NULL, subject-1, subject_length, subject_length, 0);
2679            status = BlastLinkHsps(search);
2680         }
2681 
2682         if (StringCmp(search->prog_name, "blastn") == 0 || search->pbp->gapped_calculation == FALSE)
2683         {
2684             if (search->pbp->do_sum_stats == TRUE && 
2685                 !search->pbp->mb_params)
2686                 status = BlastLinkHsps(search);
2687             else
2688                 status = BlastGetNonSumStatsEvalue(search);
2689         }
2690         if (search->pbp->mb_params) {
2691            search->subject->sequence = subject_seq;
2692            MegaBlastReevaluateWithAmbiguities(search);
2693         }
2694         status = BlastReapHitlistByEvalue(search);
2695 
2696         if (search->handle_results)
2697            search->handle_results((VoidPtr) search);
2698         else if (!search->pbp->mb_params)
2699            BlastSaveCurrentHitlist(search);
2700         else
2701            MegaBlastSaveCurrentHitlist(search);
2702         if (search->pbp->mb_params)
2703            /* Free the ncbi4na-encoded sequence */
2704            search->subject->sequence_start = (Uint1Ptr)
2705               MemFree(search->subject->sequence_start);
2706 
2707         search->subject->sequence = NULL;
2708         search->subject->sequence_start = NULL;
2709         if (search->prog_number==blast_type_blastn) {
2710            /* Unconcatenate the strands by adjusting the query offsets in
2711               all hsps */
2712            search->context[search->first_context].query->length = 
2713               search->query_context_offsets[search->first_context+1] - 1;
2714         }
2715 
2716         return status;
2717 }
2718 
2719 static BLAST_ScorePtr *RPS2SeqImpalaStatCorrections
2720         (BlastSearchBlkPtr search, Uint1Ptr subject_seq, Int4 subject_length)
2721 {
2722     BLAST_ScorePtr *retval = NULL;
2723     Nlm_FloatHi *scoreArray; /*array of score probabilities*/
2724     Nlm_FloatHi *resProb; /*array of probabilities for each residue*/
2725     BLAST_ScoreFreqPtr this_sfp, return_sfp; /*score frequency pointers to compute lambda*/
2726     BLAST_ScorePtr *posMatrix; /* position-specific matrix. */
2727     Nlm_FloatHi initialUngappedLambda, scaledInitialUngappedLambda, 
2728                   correctUngappedLambda, scalingFactor, lambdaRatio;
2729     Nlm_FloatHi temp1; /*intermediate variable for adjusting matrix*/
2730     Int4 temp2; /*intermediate variable for adjusting matrix*/
2731     Int4 seqlength; /* length of posMatrix (or target sequence). */
2732     Int4 i, j; /* loop indices */
2733 
2734     if (search == NULL)
2735            return retval;
2736 
2737     posMatrix = search->sbp->posMatrix;
2738     scalingFactor = search->pbp->scalingFactor;
2739  
2740     resProb = (Nlm_FloatHi *) MemNew (PRO_ALPHABET_SIZE * sizeof(Nlm_FloatHi));
2741     scoreArray = (Nlm_FloatHi *) MemNew(scoreRange * sizeof(Nlm_FloatHi));
2742     return_sfp = (BLAST_ScoreFreqPtr) MemNew(1 * sizeof(BLAST_ScoreFreq));
2743  
2744     seqlength = search->sbp->query_length;
2745  
2746     IMPALAfillResidueProbability(subject_seq, subject_length, resProb);
2747     this_sfp = IMPALAfillSfp(posMatrix, seqlength, resProb, scoreArray, 
2748                      return_sfp, scoreRange);
2749     initialUngappedLambda = IMPALAfindUngappedLambda(search->sbp->name);
2750     scaledInitialUngappedLambda = initialUngappedLambda/scalingFactor;
2751     correctUngappedLambda = impalaKarlinLambdaNR(this_sfp, scaledInitialUngappedLambda);
2752     if(correctUngappedLambda == -1.0) {
2753         ErrPostEx(SEV_ERROR, 0, 0, 
2754                   "RPS2SeqImpalaStatCorrections: Could not calculate ungapped "
2755                   "lambda for PSSM");
2756         MemFree(resProb);
2757         MemFree(scoreArray);
2758         MemFree(return_sfp);
2759         return retval;
2760     }
2761  
2762     lambdaRatio = correctUngappedLambda/scaledInitialUngappedLambda;
2763  
2764     retval = (BLAST_Score **) MemNew((seqlength+1) * sizeof(BLAST_Score *));
2765     for (i = 0; i < seqlength+1; i++)
2766         retval[i] = (BLAST_Score *)MemNew(PRO_ALPHABET_SIZE * 
2767                 sizeof(BLAST_Score));
2768  
2769     for (i = 0; i < seqlength+1; i++) {
2770         for (j = 0; j < PRO_ALPHABET_SIZE; j++) {
2771             if ((posMatrix[i][j] == BLAST_SCORE_MIN) || (Xchar == j))
2772                 retval[i][j] = posMatrix[i][j];
2773             else {
2774                 temp1 = ((Nlm_FloatHi) (posMatrix[i][j]));
2775                 temp1 = temp1 * (lambdaRatio);
2776                 temp2 = Nlm_Nint(temp1);
2777                 retval[i][j] = temp2;
2778             }
2779         }
2780     }
2781 
2782     resProb = MemFree(resProb);
2783     scoreArray = MemFree(scoreArray);
2784     return_sfp = MemFree(return_sfp);
2785 
2786     return retval;
2787 }
2788 
2789 static SeqAlignPtr 
2790 BlastTwoSequencesCore (BlastSearchBlkPtr search, SeqLocPtr slp, Uint1Ptr subject_seq, Int4 subject_length, Boolean reverse)
2791 
2792 {
2793         BLASTResultsStructPtr result_struct;
2794         BioseqPtr subject_bsp;
2795         Int2 status;
2796         Int4 index, hitlist_count, rev_subject_length=0;
2797         SeqAlignPtr seqalign=NULL;
2798         SeqPortPtr spp;
2799         Uint1 residue;
2800         Uint1Ptr sequence, sequence_start, rev_subject=NULL;
2801         SeqIdPtr sip;
2802     BLAST_ScorePtr *scaledMatrix = NULL, *copyMatrix = NULL;
2803 
2804         if (search == NULL || search->query_invalid)
2805                 return NULL;
2806 
2807         sip = SeqLocId(slp);
2808         subject_bsp = BioseqLockById(sip);
2809 
2810     /* Save subject sequence location for tabulated output */
2811     if (search->handle_results && SeqLocLen(slp) < subject_bsp->length)
2812        search->query_slp->next = slp;
2813 
2814         status = BlastTwoSequencesCoreEx(search, subject_bsp, subject_seq,
2815                                          subject_length);
2816 
2817         if (status == 0) {
2818         /*CC: if we're emulating rpsblast, do the impala style matrix
2819          * rescaling */
2820         if (search->positionBased && search->pbp->scalingFactor != 0.0) {
2821             scaledMatrix = RPS2SeqImpalaStatCorrections(search, subject_seq, 
2822                     subject_length);
2823             if ( !scaledMatrix ) {
2824                 BioseqUnlock(subject_bsp);
2825                 return NULL;
2826             }
2827             copyMatrix = search->sbp->posMatrix;
2828             search->sbp->posMatrix = scaledMatrix;
2829             
2830             if (search->sbp->karlinK != 0.0)
2831                 search->sbp->kbp_gap[0]->K =
2832                     PRO_K_MULTIPLIER*search->sbp->karlinK;
2833             search->sbp->kbp_gap[0]->logK = log(search->sbp->kbp_gap[0]->K);
2834             search->sbp->kbp_gap[0]->Lambda /= search->pbp->scalingFactor;
2835         }
2836           if (search->pbp->mb_params && !search->pbp->mb_params->no_traceback
2837               && !search->pbp->mb_params->use_dyn_prog) {
2838              seqalign = MegaBlastGapInfoToSeqAlign(search, 0, 0);
2839           } else if (StringCmp(search->prog_name, "blastn") == 0 &&
2840                    search->pbp->gapped_calculation == TRUE) {
2841              result_struct = search->result_struct;
2842              hitlist_count = result_struct->hitlist_count;
2843              if (hitlist_count > 0)
2844              {
2845                 spp = SeqPortNewByLoc(slp, Seq_code_ncbi4na);
2846                 if (subject_bsp->repr == Seq_repr_delta) 
2847                    SeqPortSet_do_virtual(spp, TRUE);
2848                 
2849                 /* make one longer to "protect" ALIGN. */
2850                 sequence_start = MemNew((2+SeqLocLen(slp))*sizeof(Uint1));
2851                 sequence_start[0] = ncbi4na_to_blastna[0];
2852                 sequence = sequence_start+1;
2853                 index=0;
2854                 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
2855                 {
2856                    if (IS_residue(residue))
2857                    {
2858                       sequence[index] = ncbi4na_to_blastna[residue];
2859                       index++;
2860                    }
2861                 }
2862                 /* Gap character in last space. */
2863                 sequence[index] = ncbi4na_to_blastna[0];
2864                 
2865                 if (!search->pbp->mb_params) {
2866                    /* Traditional Blastn */
2867                    seqalign = SumBlastGetGappedAlignmentTraceback(
2868                                  search, 0, reverse, FALSE, sequence, 
2869                                  SeqLocLen(slp));
2870                 } else if (!search->pbp->mb_params->no_traceback) {
2871                    /* Mega BLAST with non-greedy extension */
2872                    SumBlastGetGappedAlignmentEx(search, 0, FALSE, FALSE, 
2873                       sequence, SeqLocLen(slp), TRUE, &seqalign, NULL, 0);
2874                 }
2875 
2876                 sequence_start = MemFree(sequence_start);
2877                 spp = SeqPortFree(spp);
2878              }
2879           }
2880           else if (search->pbp->gapped_calculation == TRUE)
2881           {
2882         result_struct = search->result_struct;
2883         hitlist_count = result_struct->hitlist_count;
2884                 if (hitlist_count > 0) {
2885 
2886                    if (!StringCmp(search->prog_name, "tblastn")
2887                        || !StringCmp(search->prog_name, "psitblastn")) {
2888                       Uint1Ptr subject = NULL;
2889                       SeqPortPtr rev_spp;
2890                       if (slp->choice == SEQLOC_WHOLE) {
2891                          spp = SeqPortNew(subject_bsp, 0, -1, Seq_strand_plus, 
2892                                           Seq_code_ncbi4na);
2893                          rev_spp = SeqPortNew(subject_bsp, 0, -1, Seq_strand_minus,
2894                                               Seq_code_ncbi4na);
2895                       } else {
2896                          spp = SeqPortNew(subject_bsp, SeqLocStart(slp), 
2897                                           SeqLocStop(slp), Seq_strand_plus, 
2898                                           Seq_code_ncbi4na);
2899                          rev_spp = SeqPortNew(subject_bsp, SeqLocStart(slp), 
2900                                               SeqLocStop(slp), Seq_strand_minus,
2901                                               Seq_code_ncbi4na);
2902                       }
2903                       /* make one longer to "protect" ALIGN. */
2904                       subject = (Uint1Ptr) MemNew((1+subject_length)*sizeof(Uint1));
2905                       rev_subject = (Uint1Ptr) MemNew((1+subject_length)*sizeof(Uint1));
2906                       for (index=0; index<subject_length; index++) {
2907                          subject[index] = SeqPortGetResidue(spp);
2908                          rev_subject[index] = SeqPortGetResidue(rev_spp);
2909                       }
2910                       /* Gap character in last space. */
2911                       subject[subject_length] = NULLB;
2912                       rev_subject[subject_length] = NULLB;
2913                       rev_subject_length = subject_length;
2914                       spp = SeqPortFree(spp);
2915                       rev_spp = SeqPortFree(rev_spp);
2916                                       
2917                    
2918                       seqalign = BlastGetGapAlgnTbck(search, 0, reverse,
2919                           FALSE, subject, subject_length, 
2920                           rev_subject, rev_subject_length);
2921 
2922                       if (search->pbp->longest_intron <= 0)
2923                          MemFree(subject);
2924                       MemFree(rev_subject);
2925                    } else {
2926                       seqalign = BlastGetGapAlgnTbck(search, 0, reverse,
2927                           FALSE, subject_seq, subject_length, 
2928                           rev_subject, rev_subject_length);
2929                       result_struct->results[0]->seqalign = seqalign;
2930                    }
2931                 }
2932           }
2933           else /* Ungapped case, any program */
2934           {
2935              if (search->prog_number == blast_type_blastn || 
2936                  search->prog_number == blast_type_blastp)
2937                 seqalign = GetSeqAlignForResultHitList(search, TRUE, FALSE,
2938                               search->pbp->discontinuous, reverse, FALSE);
2939              else
2940                 seqalign = GetSeqAlignForResultHitList(search, FALSE, FALSE,
2941                               search->pbp->discontinuous, reverse, FALSE); 
2942           }
2943       /*CC: Revert changes done for psi-blast2sequences */
2944       if (search->positionBased && search->pbp->scalingFactor != 0.0) {
2945           if (scaledMatrix) {
2946               for (index = 0; index < search->sbp->query_length + 1; index++)
2947                   MemFree(scaledMatrix[index]);
2948               MemFree(scaledMatrix);
2949               search->sbp->posMatrix = copyMatrix;
2950           }
2951           if (search->sbp->karlinK != 0.0)
2952               search->sbp->kbp_gap[0]->K = search->sbp->karlinK;
2953           search->sbp->kbp_gap[0]->logK = log(search->sbp->kbp_gap[0]->K);
2954       }
2955         }
2956         BioseqUnlock(subject_bsp);
2957 
2958         return seqalign;
2959 }
2960 
2961 BlastSearchBlkPtr LIBCALL
2962 BlastQuerySequenceSetUp(BioseqPtr bsp, CharPtr progname, 
2963                             BLAST_OptionsBlkPtr options)
2964 {
2965    BlastSearchBlkPtr search;
2966    SeqLocPtr slp=NULL;
2967 
2968    if (bsp == NULL)
2969       return NULL;
2970 
2971    ValNodeAddPointer(&slp, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBest(bsp->id, SEQID_GI)));
2972    if (progname == NULL && options == NULL)
2973       return NULL;
2974    
2975    if (progname == NULL)
2976       progname = options->program_name;
2977 
2978    if (!StringCmp(progname, "blastp") || 
2979        !StringCmp(progname, "blastx")) {
2980       if (options->gapped_calculation == TRUE) { 
2981          options->two_pass_method  = FALSE;
2982          options->multiple_hits_only  = TRUE;
2983       }
2984    }
2985         
2986    search = BLASTSetUpSearchByLoc(slp, progname, bsp->length, 0, NULL, options, NULL);
2987    
2988    search->allocated += BLAST_SEARCH_ALLOC_QUERY_SLP;
2989 
2990    if (search == NULL)
2991       return NULL;
2992         
2993    return search;
2994 }
2995 
2996 /*
2997         Runs blast between two sequences
2998 */
2999 SeqAlignPtr LIBCALL
3000 BlastTwoSequencesByLocEx(SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns)
3001 {
3002    return BlastTwoSequencesByLocWithCallback(slp1, slp2, progname, options,
3003            other_returns, error_returns, NULL, NULL);
3004 }
3005 
3006 /************************************************************************/
3007 /*        PSIBLAST2Sequences API                                        */
3008 /************************************************************************/
3009 
3010 static BLAST_ScorePtr *B2SAllocateScoreMatrix(Int4 rows, Int4 cols)
3011 {
3012     BLAST_ScorePtr *matrix = NULL;
3013     Int4 i;
3014 
3015     if (!(matrix = (BLAST_ScorePtr *) MemNew(rows*sizeof(BLAST_ScorePtr)))) {
3016         return NULL;
3017     }
3018 
3019     for (i = 0; i < rows; i++) {
3020         matrix[i] = (BLAST_ScorePtr) MemNew(cols*sizeof(BLAST_Score));
3021         if (matrix[i] == NULL) {
3022             while (--i >= 0)
3023                 MemFree(matrix[i]);
3024             MemFree(matrix);
3025             return NULL;
3026         }
3027     }
3028     return matrix;
3029 }
3030 
3031 /* Convert a set of residue frequencies into a scaled PSSM (using
3032  * scalingFactor). */
3033 static BLAST_ScorePtr *B2SCalculateScaledPSSM(BlastSearchBlkPtr search,
3034         Nlm_FloatHiPtr *posFreqs, compactSearchItems *compactSearch,
3035         Nlm_FloatHiPtr karlinK)
3036 {
3037     BLAST_ScorePtr *retval = NULL;
3038     posSearchItems *posSearch = NULL;
3039     Int4 qlen, alphabet_sz, rv;
3040     Nlm_FloatHi scalingFactor = search->pbp->scalingFactor;
3041     BLAST_ScoreBlkPtr sbp = NULL;
3042     ValNodePtr error_return;
3043     Int4 i, gap_open, gap_extend;
3044 
3045     if (!search || !compactSearch || !posFreqs)
3046         return NULL;
3047 
3048     if (!(posSearch = (posSearchItems *)MemNew(sizeof(posSearchItems)))) {
3049         ErrPostEx(SEV_ERROR, 0, 0, "B2SCalculateScaledPSSM: Out of memory");
3050         return NULL;
3051     }
3052 
3053     qlen = compactSearch->qlength;
3054     alphabet_sz = compactSearch->alphabetSize;
3055     gap_open = search->pbp->gap_open / scalingFactor;
3056     gap_extend = search->pbp->gap_extend / scalingFactor;
3057 
3058     if (!(sbp = BLAST_ScoreBlkNew(Seq_code_ncbistdaa, 1))) {
3059         ErrPostEx(SEV_ERROR, 0, 0, "B2SCalculateScaledPSSM: Out of memory");
3060         MemFree(posSearch);
3061         return NULL;
3062     }
3063     sbp->read_in_matrix = TRUE;
3064     sbp->protein_alphabet = TRUE;
3065     sbp->posMatrix = NULL;
3066     sbp->number_of_contexts = 1;
3067     BlastScoreBlkMatFill(sbp, search->sbp->name);
3068     compactSearch->matrix = sbp->matrix;
3069     compactSearch->gapped_calculation = TRUE;
3070     compactSearch->pseudoCountConst = search->pbp->pseudoCountConst;
3071     compactSearch->ethresh = 0.001;
3072     BlastScoreBlkFill(sbp, (CharPtr) compactSearch->query, qlen, 0);
3073     
3074     sbp->kbp_gap_std[0] = BlastKarlinBlkCreate();
3075     rv = BlastKarlinBlkGappedCalc(sbp->kbp_gap_std[0], gap_open, gap_extend, 
3076             sbp->name, &error_return);
3077     if (rv == 1) {
3078         BlastErrorPrint(error_return);
3079         BLAST_ScoreBlkDestruct(sbp);
3080         MemFree(posSearch);
3081         return NULL;
3082     }
3083     sbp->kbp_gap_psi[0] = BlastKarlinBlkCreate();
3084     rv = BlastKarlinBlkGappedCalc(sbp->kbp_gap_psi[0], gap_open, gap_extend, 
3085             sbp->name, &error_return);
3086     if (rv == 1) {
3087         BlastErrorPrint(error_return);
3088         BLAST_ScoreBlkDestruct(sbp);
3089         MemFree(posSearch);
3090         return NULL;
3091     }
3092 
3093     if (sbp->kbp_ideal == NULL)
3094         sbp->kbp_ideal = BlastKarlinBlkStandardCalcEx(sbp);
3095     compactSearch->lambda =  sbp->kbp_gap_std[0]->Lambda;
3096     compactSearch->kbp_std = sbp->kbp_std;
3097     compactSearch->kbp_psi = sbp->kbp_psi;
3098     compactSearch->kbp_gap_psi = sbp->kbp_gap_psi;
3099     compactSearch->kbp_gap_std = sbp->kbp_gap_std;
3100     compactSearch->lambda_ideal = sbp->kbp_ideal->Lambda;
3101     compactSearch->K_ideal = sbp->kbp_ideal->K;
3102 
3103     /* Initialize the posSearch structure */
3104     posSearch->posFreqs = posFreqs;
3105     posSearch->posMatrix = B2SAllocateScoreMatrix(qlen+1, alphabet_sz);
3106     posSearch->posPrivateMatrix = B2SAllocateScoreMatrix(qlen+1, alphabet_sz);
3107     if (!posSearch->posMatrix || !posSearch->posPrivateMatrix) {
3108         ErrPostEx(SEV_ERROR, 0, 0, "B2SCalculateScaledPSSM: Out of memory");
3109         BLAST_ScoreBlkDestruct(sbp);
3110         MemFree(posSearch->posMatrix); MemFree(posSearch->posPrivateMatrix);
3111         MemFree(posSearch);
3112         return NULL;
3113     }
3114 
3115     posFreqsToMatrix(posSearch, compactSearch);
3116     impalaScaling(posSearch, compactSearch, scalingFactor, TRUE);
3117     if (karlinK)
3118         *karlinK = compactSearch->kbp_gap_psi[0]->K;
3119 
3120     for (i = 0; i <= qlen; i++)
3121         MemFree(posSearch->posMatrix[i]);
3122     MemFree(posSearch->posMatrix);
3123     BLAST_ScoreBlkDestruct(sbp);
3124     retval = posSearch->posPrivateMatrix;
3125     MemFree(posSearch);
3126 
3127     return retval;
3128 }
3129 
3130 /* Calculates the PSSM for a given SeqLocPtr */
3131 static BLAST_ScorePtr *B2SCalculatePSSM(SeqLocPtr slp, BlastSearchBlkPtr search,
3132         BLAST_MatrixPtr matrix, Nlm_FloatHiPtr karlinK)
3133 {
3134     BLAST_ScorePtr *posMatrix = NULL;
3135     compactSearchItems *compactSearch = NULL;
3136     Boolean replaced_sequence = FALSE;
3137     Int4 query_length, full_query_length;
3138     SeqLocPtr filter_slp = NULL, full_slp = NULL;
3139     Uint1Ptr sequence = NULL;
3140     BlastSequenceBlk bseq;
3141     Nlm_FloatHi scalingFactor = search->pbp->scalingFactor;
3142 
3143     query_length = SeqLocLen(slp);
3144 
3145     /* if the slp is not the whole sequence, retrieve the whole sequence and
3146      * use it to compute the pssm */
3147     if (matrix->rows != (query_length+1)) {
3148         SeqPortPtr spp = NULL;
3149         SeqIdPtr sip = NULL;
3150         Uint1 residue;
3151         BioseqPtr bsp = NULL;
3152         Char tmp[256];
3153         Int4 index = 0;
3154 
3155         sip = SeqLocId(slp);
3156         if ((bsp = BioseqLockById(SeqIdFindBest(sip, SEQID_GI))) == NULL) {
3157             SeqIdWrite(SeqLocId(slp),tmp,PRINTID_FASTA_LONG, 
3158                     sizeof(tmp));
3159 
3160             ErrPostEx(SEV_ERROR,0,0,"Could not retrieve full bioseq "
3161                     "for %s",tmp);
3162             BioseqUnlock(bsp);
3163             return NULL;
3164         }
3165                     
3166         /* get full sequence to be used in WposComputation */
3167         spp = SeqPortNew(bsp, FIRST_RESIDUE, LAST_RESIDUE, Seq_strand_unknown,
3168                 Seq_code_ncbistdaa);
3169 
3170         full_query_length = bsp->length;
3171         sequence = (Uint1Ptr) MemNew(2*((bsp->length)+2)*sizeof(Char));
3172         BioseqUnlock(bsp);
3173 
3174         sequence[index++] = NULLB;
3175         while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
3176             if (IS_residue(residue)) {
3177                 if (residue == 24) { /* change selenocysteine to X */
3178                     residue = 21;
3179                     ErrPostEx(SEV_WARNING,0,0, "Selenocysteine (U) at "
3180                         "position %ld replaced by X", (long) index+1);
3181                 }
3182                 sequence[index++] = residue;
3183             }
3184         }
3185         sequence[index] = NULLB;
3186         spp = SeqPortFree(spp);
3187 
3188         /* Filter the sequence if necessary */
3189         ValNodeAddPointer(&full_slp, SEQLOC_WHOLE, SeqIdDup(SeqLocId(slp)));
3190         filter_slp = BlastSeqLocFilter(full_slp, search->pbp->filter_string);
3191         if(search->pbp->query_lcase_mask != NULL)
3192             filter_slp = blastMergeFilterLocs(filter_slp, 
3193                     search->pbp->query_lcase_mask, FALSE, 0, 0);
3194 
3195         BlastMaskTheResidues(sequence+1, full_query_length, 21, filter_slp, 
3196                 FALSE, SeqLocStart(full_slp));
3197 
3198         /* Save the current query sequence */
3199         MemCpy(&bseq, search->context[0].query, sizeof(BlastSequenceBlk));
3200 
3201                 BlastSequenceAddSequence(search->context[0].query, NULL, sequence, 
3202                                  full_query_length, full_query_length, 0);
3203 
3204         SeqLocSetFree(full_slp);
3205         SeqLocSetFree(filter_slp);
3206         replaced_sequence = TRUE;
3207     }
3208 
3209     compactSearch = compactSearchNew(compactSearch);
3210     copySearchItems(compactSearch, search, search->sbp->name);
3211     compactSearch->pseudoCountConst = search->pbp->pseudoCountConst;
3212     if (scalingFactor != 0.0 && scalingFactor != 1.0) {
3213         /* build pssm {make,copy}mat/rpsblast style */
3214         posMatrix = B2SCalculateScaledPSSM(search, search->sbp->posFreqs,
3215                 compactSearch, karlinK);
3216     } else {
3217         /* build pssm psiblast style */
3218         posMatrix = WposComputation(compactSearch, NULL, search->sbp->posFreqs);
3219     }
3220     compactSearchDestruct(compactSearch);
3221 
3222     if (replaced_sequence) {
3223         MemCpy(search->context[0].query, &bseq, sizeof(BlastSequenceBlk));
3224         MemFree(sequence);
3225     }
3226 
3227     return posMatrix;
3228 }
3229 
3230 /* Checks if the dimensions of the pssm attached to the search->sbp are
3231  * consistent with the length of the master query (slp), and trims the matrix
3232  * if necessary */
3233 static Boolean B2SVerifyPSSM(SeqLocPtr slp, BlastSearchBlkPtr search,
3234         BLAST_MatrixPtr matrix)
3235 {
3236     Int4 i, query_length = SeqLocLen(slp);
3237 
3238     if ((query_length+1) > matrix->rows) {
3239         ErrPostEx(SEV_WARNING,0,0,"Ignoring PSSM because it seems not to "
3240             "correspond to query sequence (query length  = %ld, PSSM's "
3241             "number of rows = %ld)", query_length+1, matrix->rows);
3242         search->positionBased = FALSE;
3243 
3244         if (matrix->matrix == NULL) {
3245             BLAST_ScorePtr *posMatrix = search->sbp->posMatrix;
3246 
3247             for (i = 0; i < matrix->rows; i++)
3248                 posMatrix[i] = MemFree(posMatrix[i]);
3249             posMatrix = MemFree(posMatrix);
3250         }
3251         search->sbp->posMatrix = NULL;
3252         search->sbp->posFreqs = NULL;
3253         return FALSE;
3254     } else if ((query_length+1) < matrix->rows) {
3255         /* Assume BLAST_Matrix corresponds to the entire sequence, so trim
3256          * it */
3257         Int4 from, to, i, j, alphabet_sz;
3258         BLAST_ScorePtr *pssm = NULL;
3259 
3260         if (slp->choice != SEQLOC_INT) {
3261             ErrPostEx(SEV_ERROR,0,0,"B2SVerifyPSSM: SeqLocPtr is not a "
3262                     "SEQLOC_INT, cannot trim matrix");
3263             return FALSE;
3264         }
3265 
3266         from = SeqLocStart(slp);
3267         to = SeqLocStop(slp);
3268         alphabet_sz = matrix->columns;
3269         
3270         /* Adjust the pssm */
3271         pssm = (BLAST_ScorePtr *)MemNew(sizeof(BLAST_ScorePtr) *
3272                 (query_length+1));
3273         for (i = 0; i <= query_length; i++) {
3274             pssm[i] = (BLAST_ScorePtr)MemNew(sizeof(BLAST_Score) *
3275                     alphabet_sz);
3276         }
3277 
3278         for (i = from; i <= to; i++) {
3279             for (j = 0; j < alphabet_sz; j++)
3280                 pssm[(i-from)][j] = search->sbp->posMatrix[i][j];
3281         }
3282         for (j = 0; j < alphabet_sz; j++)
3283             pssm[query_length][j] = BLAST_SCORE_MIN;
3284 
3285         if (matrix->matrix == NULL) {
3286             /* Free the matrix we calculated originally */
3287             BLAST_ScorePtr *posMatrix = search->sbp->posMatrix;
3288 
3289             for (i = 0; i < matrix->rows; i++)
3290                 posMatrix[i] = MemFree(posMatrix[i]);
3291             posMatrix = MemFree(posMatrix);
3292         }
3293         search->sbp->posMatrix = pssm;
3294 
3295     }
3296     return TRUE;
3297 }
3298 
3299 /* psi-blast2sequences setup: matrix must contain at least the residue
3300  * frequencies to calculate the PSSM. Otherwise, if the PSSM is given, that
3301  * will be used. */
3302 Boolean LIBCALL B2SPssmSetupSearch(BlastSearchBlkPtr search, 
3303         SeqLocPtr pssm_slp, BLAST_MatrixPtr matrix)
3304 {
3305     Nlm_FloatHi karlinK = 0.0;
3306     Int4 npos, alphabet_size;
3307 
3308     if (!search || !matrix)
3309         return FALSE;
3310 
3311     if (search->prog_number != blast_type_blastp) {
3312         ErrPostEx(SEV_ERROR, 0, 0, "B2SPssmSetupSearch: only blastp is "
3313                 "supported");
3314         return FALSE;
3315     }
3316 
3317     search->positionBased = TRUE;
3318     npos = SeqLocLen(pssm_slp);
3319     alphabet_size = search->sbp->alphabet_size;
3320 
3321     if (npos <= 0) {
3322         ErrPostEx(SEV_ERROR, 0, 0, "B2SPssmSetupSearch: length of pssm_slp "
3323                 "must be positive");
3324         return FALSE;
3325     }
3326 
3327     /* save the residue frequencies, we might need them later */
3328     if (matrix->posFreqs) {
3329         search->sbp->posFreqs = allocatePosFreqs(npos, alphabet_size);
3330         copyPosFreqs(matrix->posFreqs, search->sbp->posFreqs, npos,
3331                 alphabet_size);
3332     }
3333 
3334     if (matrix->posFreqs && !matrix->matrix) {
3335         search->sbp->posMatrix = B2SCalculatePSSM(pssm_slp, search, matrix,
3336                 &karlinK);
3337         /* if we calculated the pssm, and use did not provide one, save it*/
3338         if (matrix->karlinK == 0.0 && karlinK != 0.0)
3339             matrix->karlinK = karlinK;
3340     } else {
3341         search->sbp->posMatrix = matrix->matrix;
3342     }
3343 
3344     search->sbp->mat_dim1 = search->sbp->query_length + 1;
3345     search->sbp->mat_dim2 = search->sbp->alphabet_size;
3346 
3347     /* Sanity check */
3348     if (!search->sbp->posMatrix) {
3349         ErrPostEx(SEV_ERROR, 0, 0, "B2SPssmSetupSearch: "
3350                 "Could not create or obtain PSSM! Please verify "
3351                 "BLAST_Matrix parameter");
3352         search->positionBased = FALSE;
3353         return FALSE;
3354     }
3355 
3356     /* Make sure the BLAST_Matrix number of rows is consistent with
3357      * pssm_slp */
3358     B2SVerifyPSSM(pssm_slp, search, matrix);
3359 
3360     if (matrix->karlinK != 0.0) {
3361         search->sbp->karlinK = matrix->karlinK;
3362         search->sbp->kbp_gap_psi[0]->K = matrix->karlinK;
3363         search->sbp->kbp_gap_psi[0]->logK = log(matrix->karlinK);
3364     }
3365 
3366     return TRUE;
3367 }
3368 
3369 /* clean up psi-blast2sequences */
3370 Boolean LIBCALL B2SPssmCleanUpSearch(BlastSearchBlkPtr search, 
3371         BLAST_MatrixPtr matrix)
3372 {
3373     Int4 i, rows = search->sbp->query_length + 1;
3374     BLAST_ScorePtr *posMatrix = search->sbp->posMatrix;
3375     Nlm_FloatHiPtr *posFreqs = search->sbp->posFreqs;
3376 
3377     if (!matrix)
3378         return FALSE;
3379     
3380     if ((matrix->matrix == NULL) || /* B2SPssmSetupSearch created PSSM */
3381         (posMatrix != matrix->matrix)) { /* B2SVerifyPSSM trimmed PSSM */
3382         for (i = 0; i < rows; i++)
3383             posMatrix[i] = MemFree(posMatrix[i]);
3384         posMatrix = MemFree(posMatrix);
3385     }
3386     if (matrix->posFreqs) {
3387         for (i = 0; i < rows; i++)
3388             posFreqs[i] = MemFree(posFreqs[i]);
3389         posFreqs = MemFree(posFreqs);
3390     }
3391     search->sbp->posMatrix = NULL;
3392     search->sbp->posFreqs = NULL;
3393     search->positionBased = FALSE;
3394     return TRUE;
3395 }
3396 
3397 SeqAlignPtr LIBCALL B2SPssmOnTheFlyByLoc(BlastSearchBlkPtr search, 
3398             SeqLocPtr subj_slp) 
3399 {
3400     Int4 index, subject_length;
3401     SeqAlignPtr seqalign = NULL;
3402     Uint1Ptr subject_seq = NULL, subject_seq_start = NULL;
3403     SeqPortPtr spp;
3404     Uint1 residue;
3405 
3406     if (!search || search->query_invalid || !subj_slp)
3407         return NULL;
3408 
3409     if (search->result_struct)
3410         search->result_struct = BLASTResultsStructDelete(search->result_struct);
3411     search->result_struct = BLASTResultsStructNew(search->result_size, 
3412                  search->pbp->max_pieces, search->pbp->hsp_range_max);
3413     BlastHitListPurge(search->current_hitlist);
3414 
3415     subject_length = SeqLocLen(subj_slp);
3416 
3417     if (search->prog_number == blast_type_blastp) {
3418         subject_seq_start = (Uint1Ptr) MemNew(
3419                 ((subject_length)+2)*sizeof(Uint1));
3420         /* The first residue is the sentinel. */
3421         subject_seq_start[0] = NULLB;
3422         subject_seq = subject_seq_start+1;
3423         index = 0;
3424         spp = SeqPortNewByLoc(subj_slp, Seq_code_ncbistdaa);
3425         while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
3426             if (IS_residue(residue))
3427                 subject_seq[index++] = residue;
3428         }
3429         subject_seq[index] = NULLB;
3430         spp = SeqPortFree(spp);
3431     } else {
3432         return NULL;
3433     }
3434 
3435     seqalign = BlastTwoSequencesCore(search, subj_slp, subject_seq, 
3436             subject_length, FALSE);
3437 
3438     MemFree(subject_seq_start);
3439     AdjustOffSetsInSeqAlign(seqalign, search->query_slp, subj_slp);
3440 
3441     return seqalign;
3442 }
3443 
3444 SeqAlignPtr LIBCALL B2SPssmOnTheFly(BlastSearchBlkPtr search, 
3445         BioseqPtr subj_bsp) 
3446 {
3447     SeqAlignPtr salp = NULL;
3448     SeqLocPtr slp = NULL;
3449 
3450     if (!search || search->query_invalid || !subj_bsp)
3451         return NULL;
3452 
3453     ValNodeAddPointer(&slp, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBest(subj_bsp->id,
3454                     SEQID_GI)));
3455     salp = B2SPssmOnTheFlyByLoc(search, slp);
3456     SeqLocFree(slp);
3457     return salp;
3458 }
3459 
3460 SeqAlignPtr * LIBCALL B2SPssmMultipleQueries(SeqLocPtr pssm_slp,
3461         BLAST_MatrixPtr matrix, SeqLocPtr *target_seqs, Int4 ntargets,
3462         BLAST_OptionsBlkPtr options)
3463 {
3464     SeqAlignPtr *sa_array = NULL;
3465     BlastSearchBlkPtr search = NULL;
3466     Int4 i;
3467 
3468     if (!matrix || !pssm_slp || !target_seqs || ntargets <= 0 || !options)
3469         return NULL;
3470 
3471     /* Set up search structure */
3472     search = BLASTSetUpSearchByLoc(pssm_slp, options->program_name,
3473             SeqLocLen(pssm_slp), 0, NULL, options, NULL);
3474     B2SPssmSetupSearch(search, pssm_slp, matrix);
3475 
3476     /* Allocate memory for return value */
3477     if (!(sa_array = (SeqAlignPtr*)MemNew(sizeof(SeqAlignPtr)*ntargets))) {
3478         ErrPostEx(SEV_ERROR, 0, 0, "B2SPssmMultipleQueries: Out of memory");
3479         BlastSearchBlkDestruct(search);
3480         return NULL;
3481     }
3482 
3483 
3484     /* Iterate over seqlocs in target_seqs, using effective search space in
3485      * rpsblast style */
3486     for (i = 0; i < ntargets; i++) {
3487         Int8 dblen = (options->db_length != 0) ? 
3488                         options->db_length : SeqLocLen(pssm_slp);
3489         Int4 nseqs = (options->dbseq_num != 0) ?  options->dbseq_num : 1;
3490 
3491         /* If search space has been specified in the options structure, the it
3492          * must have been set in BLASTSetUpSearchEx, so don't overwrite it */
3493         if ( ! (options->searchsp_eff > 0) ) {
3494             search->searchsp_eff  = BLASTCalculateSearchSpace(options, nseqs, 
3495                     dblen, SeqLocLen(target_seqs[i]));
3496         }
3497         sa_array[i] = B2SPssmOnTheFlyByLoc(search, target_seqs[i]);
3498     }
3499 
3500     /* Clean up */
3501     B2SPssmCleanUpSearch(search, matrix);
3502     BlastSearchBlkDestruct(search);
3503 
3504     return sa_array;
3505 }
3506 
3507 /************************************************************************/
3508 /* END    PSIBLAST2Sequences API                                        */
3509 /************************************************************************/
3510 
3511 /* Note that the matrix parameter should correspond to the full master
3512  * sequence */
3513 SeqAlignPtr LIBCALL
3514 BlastTwoSequencesByLocWithCallback(SeqLocPtr slp1, SeqLocPtr slp2, CharPtr
3515         progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns,
3516         ValNodePtr *error_returns, int (LIBCALLBACK
3517             *handle_results)PROTO((VoidPtr srch)), BLAST_MatrixPtr matrix)
3518 {
3519         BlastAllWordPtr all_words;
3520         BlastSearchBlkPtr search;
3521         BioseqPtr subject_bsp;
3522         Boolean complement=FALSE, reverse, reverse_forbidden, options_alloc;
3523         Int2 status;
3524         Int4 index, subject_length, num_of_cols;
3525         SeqAlignPtr seqalign=NULL;
3526         SeqLocPtr query_slp, subject_slp;
3527         SeqPortPtr spp;
3528         SPCompressPtr spc=NULL;
3529         Uint1 residue;
3530         Uint1Ptr subject_seq, subject_seq_start;
3531         Uint1Ptr *array;
3532 
3533         if (slp1 == NULL || slp2 == NULL)
3534                 return NULL;
3535 
3536         if (error_returns)
3537         {
3538                 *error_returns = NULL;
3539         }
3540 
3541         if (other_returns)
3542         {
3543                 *other_returns = NULL;
3544         }
3545 
3546         if (progname == NULL && options == NULL)
3547                 return NULL;
3548 
3549         /* If filtering is performed, do not reverse the sequence.  
3550            In this case the wrong sequence would be filtered. */
3551         reverse_forbidden = FALSE;
3552         if ((options && ((options->filter_string &&
3553                         StringCmp(options->filter_string, "F")) ||
3554                         options->is_megablast_search)) ||
3555                         matrix != NULL)
3556         {
3557                 reverse_forbidden = TRUE;
3558         }
3559 
3560         /* Select the shorter sequence as the query, provided they are 
3561            of the same type. */
3562         if ((StringCmp(progname, "blastn") && StringCmp(progname, "blastp")) ||
3563             (reverse_forbidden || SeqLocLen(slp1) < SeqLocLen(slp2)))
3564         {
3565                 query_slp = slp1;
3566                 subject_slp = slp2;
3567                 reverse = FALSE;
3568         }
3569         else
3570         {
3571                 query_slp = slp2;
3572                 subject_slp = slp1;
3573                 reverse = TRUE;
3574         }
3575 
3576     /* Make sure strands are handled correctly */
3577     if (!StringCmp(progname, "blastn") &&
3578         SeqLocStrand(query_slp) != Seq_strand_both && 
3579         SeqLocStrand(subject_slp) == Seq_strand_both) {
3580        Change_Loc_Strand(subject_slp, SeqLocStrand(query_slp));
3581        Change_Loc_Strand(query_slp, Seq_strand_both);
3582     }
3583 
3584         if (progname == NULL)
3585         {
3586                 progname = options->program_name;
3587         }
3588 
3589         /* If the subject strand is minus, turn it into plus for blastn. */
3590         /* Complement the other strand to keep things straight. */
3591         if (StringCmp(progname, "blastn") == 0 && SeqLocStrand(subject_slp) == Seq_strand_minus)
3592         {
3593                 complement = TRUE;
3594                 if(SeqLocStrand(query_slp) == Seq_strand_plus ||
3595                         SeqLocStrand(query_slp) == Seq_strand_minus)
3596                                 SeqLocRevCmp(query_slp);
3597                 SeqLocRevCmp(subject_slp);
3598         }
3599 
3600         subject_seq_start = subject_seq = NULL;
3601 
3602     /* Allocate default options if none are allocated yet. */
3603     options_alloc = FALSE;
3604     if (options == NULL)
3605     {
3606             options = BLASTOptionNew(progname, FALSE);
3607             options_alloc = TRUE;
3608     }
3609 
3610     status = BLASTOptionValidateEx(options, progname, error_returns);
3611     if (status != 0)
3612     {       /* error messages in other_returns? */
3613             return NULL;
3614     }
3615 
3616         all_words = NULL;
3617 
3618         subject_length = SeqLocLen(subject_slp);
3619 
3620         if (!StringCmp(progname, "blastp") || 
3621             !StringCmp(progname, "blastx"))
3622         {
3623                 subject_seq_start = (Uint1Ptr) MemNew(((subject_length)+2)*sizeof(Uint1));
3624                 /* The first residue is the sentinel. */
3625                 subject_seq_start[0] = NULLB;
3626                 subject_seq = subject_seq_start+1;
3627                 index = 0;
3628                 spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbistdaa);
3629                 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
3630                 {
3631                         if (IS_residue(residue))
3632                         {
3633                                 subject_seq[index] = residue;
3634                                 index++;
3635                         }
3636                 }
3637                 subject_seq[index] = NULLB;
3638 
3639                 num_of_cols = subject_length+1-options->wordsize;
3640                 all_words = BlastAllWordNew(num_of_cols, options->wordsize, FALSE, TRUE);
3641                 array = (Uint1Ptr *) MemNew(num_of_cols*sizeof(Uint1Ptr));
3642                 for (index=0; index<num_of_cols; index++)
3643                 {
3644                         array[index] = subject_seq+index;
3645                 }
3646                 all_words->array = array;
3647                 spp = SeqPortFree(spp);
3648                 if (options->gapped_calculation == TRUE)
3649                 { 
3650                         options->two_pass_method  = FALSE;
3651                         options->multiple_hits_only  = TRUE;
3652                 }
3653         }
3654         else if (!StringCmp(progname, "blastn") || 
3655                  !StringCmp(progname, "tblastn") ||
3656                  !StringCmp(progname, "psitblastn") ||
3657                  !StringCmp(progname, "tblastx"))
3658         {
3659                 spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbi4na);
3660                 subject_bsp = BioseqFindCore(SeqLocId(subject_slp));
3661                 if (subject_bsp != NULL && subject_bsp->repr == Seq_repr_delta)
3662                         SeqPortSet_do_virtual(spp, TRUE);
3663                 spc = SPCompressDNA(spp);
3664                 if (spc == NULL)
3665                         return NULL;
3666                 subject_seq_start = subject_seq = spc->buffer;
3667                 spp = SeqPortFree(spp);
3668         }
3669         else /* Impossible! */
3670         {
3671                 return NULL;
3672         }
3673         
3674     if (options->is_megablast_search)
3675         /* This has a different meaning in Mega BLAST and must be 0 */
3676         options->block_width = 0;
3677 
3678     if (options->db_length == 0)
3679         options->db_length = subject_length;
3680 
3681     options->dbseq_num = 1;
3682 
3683     search = BLASTSetUpSearchByLoc(query_slp, progname, SeqLocLen(query_slp), subject_length, all_words, options, NULL);
3684 
3685         if (search == NULL) 
3686         return NULL;
3687 
3688     if (search->query_invalid) {
3689         search = BlastSearchBlkDestruct(search);
3690                 return NULL;
3691     }
3692 
3693         if (!StringCmp(progname, "tblastn") ||
3694             !StringCmp(progname, "tblastx") ||
3695             !StringCmp(progname, "psitblastn")) {
3696            MemFree(search->translation_buffer);
3697            search->translation_buffer = MemNew((3+(subject_length/3))*sizeof(Uint1));
3698            search->translation_buffer_size = 1+(subject_length/3);
3699         }
3700 
3701     B2SPssmSetupSearch(search, slp1, matrix);
3702 
3703     search->handle_results = handle_results;
3704     search->output = options->output;
3705 
3706         seqalign = BlastTwoSequencesCore(search, subject_slp, subject_seq, subject_length, reverse);
3707 
3708         if (complement)
3709         {
3710                 seqalign = SeqAlignListReverseStrand(seqalign);
3711                 SeqLocRevCmp(query_slp);
3712                 SeqLocRevCmp(subject_slp);
3713         }
3714 
3715         if (spc)
3716         {
3717                 SPCompressFree(spc);
3718                 spc = NULL;
3719         }
3720         else
3721         {
3722                 subject_seq_start = MemFree(subject_seq_start);
3723         }
3724         
3725         if (search->error_return)
3726         {
3727                 ValNodeLink(error_returns, search->error_return);
3728                 search->error_return = NULL;
3729         }
3730 
3731         if (other_returns)
3732         { /* format dbinfo etc.  */
3733                 *other_returns = BlastOtherReturnsPrepare(search);
3734         }
3735 
3736     if (options_alloc)
3737         options = BLASTOptionDelete(options);
3738 
3739     AdjustOffSetsInSeqAlign(seqalign, slp1, slp2);
3740 
3741     B2SPssmCleanUpSearch(search, matrix);
3742 
3743         search = BlastSearchBlkDestruct(search);
3744 
3745         return seqalign;
3746 }
3747 
3748 SeqAlignPtr LIBCALL
3749 BlastTwoSequencesByLoc(SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options)
3750 {
3751         return BlastTwoSequencesByLocEx(slp1, slp2, progname, options, NULL, NULL);
3752 }
3753 
3754 SeqAlignPtr LIBCALL
3755 BlastTwoSequencesEx(BioseqPtr bsp1, BioseqPtr bsp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns)
3756 {
3757    return BlastTwoSequencesWithCallback(bsp1, bsp2, progname, options,
3758            other_returns, error_returns, NULL);
3759 }
3760 
3761 SeqAlignPtr LIBCALL
3762 BlastTwoSequencesWithCallback(BioseqPtr bsp1, BioseqPtr bsp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)))
3763 {
3764         SeqAlignPtr seqalign;
3765         SeqLocPtr slp1=NULL, slp2=NULL;
3766 
3767         if (bsp1 == NULL || bsp2 == NULL)
3768                 return NULL;
3769 
3770         slp1 = NULL;
3771         slp2 = NULL;
3772     if (!handle_results) {
3773        ValNodeAddPointer(&slp1, SEQLOC_WHOLE,
3774                          SeqIdDup(SeqIdFindBest(bsp1->id, SEQID_GI)));
3775        ValNodeAddPointer(&slp2, SEQLOC_WHOLE,
3776                          SeqIdDup(SeqIdFindBest(bsp2->id, SEQID_GI)));
3777     } else {
3778        ValNodeAddPointer(&slp1, SEQLOC_WHOLE, 
3779                          SeqIdDup(SeqIdFindBestAccession(bsp1->id)));
3780        ValNodeAddPointer(&slp2, SEQLOC_WHOLE, 
3781                          SeqIdDup(SeqIdFindBestAccession(bsp2->id)));
3782     }
3783     seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, progname,
3784             options, other_returns, error_returns, handle_results, NULL);
3785 
3786     slp1 = SeqLocFree(slp1);
3787     slp2 = SeqLocFree(slp2);
3788     
3789     return seqalign;
3790 }
3791 
3792 SeqAlignPtr LIBCALL
3793 BlastTwoSequences(BioseqPtr bsp1, BioseqPtr bsp2, CharPtr progname, BLAST_OptionsBlkPtr options)
3794 {
3795         return BlastTwoSequencesEx(bsp1, bsp2, progname, options, NULL, NULL);
3796 }
3797 
3798 /*
3799         Runs blast on the fly between the query BioseqPtr (specified with a
3800         call to BLASTSetUpSearch) and the subject BioseqPtr.
3801 */
3802 
3803 
3804 BlastSearchBlkPtr LIBCALL
3805 BlastSequencesOnTheFlyEx(BlastSearchBlkPtr search, BioseqPtr subject_bsp)
3806 {
3807         Int4 index, subject_length;
3808         SeqPortPtr spp;
3809         SPCompressPtr spc=NULL;
3810         Uint1Ptr subject_seq, subject_seq_start;
3811         Uint1 residue;
3812 
3813         if (subject_bsp == NULL)
3814                 return NULL;
3815 
3816         if (search == NULL || search->query_invalid)
3817                 return NULL;
3818 
3819         if (!search->pbp->mb_params) {
3820            if (search->result_struct)
3821               search->result_struct =
3822                  BLASTResultsStructDelete(search->result_struct);
3823            search->result_struct = 
3824               BLASTResultsStructNew(search->result_size, 
3825                  search->pbp->max_pieces, search->pbp->hsp_range_max);
3826         } else {
3827            if (search->mb_result_struct && search->mb_result_struct[0])
3828               search->mb_result_struct[0] = 
3829                  BLASTResultsStructDelete(search->mb_result_struct[0]);
3830            if (!search->mb_result_struct)
3831               search->mb_result_struct = (BLASTResultsStructPtr PNTR) 
3832                  MemNew(sizeof(BLASTResultsStructPtr));
3833         }
3834 
3835         BlastHitListPurge(search->current_hitlist);
3836 
3837         subject_seq_start = subject_seq = NULL;
3838 
3839         subject_length = subject_bsp->length;
3840 
3841         if (StringCmp(search->prog_name, "blastp") == 0)
3842         {
3843                 subject_seq_start = (Uint1Ptr) MemNew(((subject_length)+2)*sizeof(Uint1));
3844                 /* The first residue is the sentinel. */
3845                 subject_seq_start[0] = NULLB;
3846                 subject_seq = subject_seq_start+1;
3847                 index = 0;
3848                 spp = SeqPortNew(subject_bsp, FIRST_RESIDUE, LAST_RESIDUE,
3849                                  0, Seq_code_ncbistdaa);
3850                 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
3851                 {
3852                         if (IS_residue(residue))
3853                         {
3854                                 subject_seq[index] = residue;
3855                                 index++;
3856                         }
3857                 }
3858                 subject_seq[index] = NULLB;
3859                 spp = SeqPortFree(spp);
3860         }
3861         else if (StringCmp(search->prog_name, "blastn") == 0)
3862         {
3863                 spp = SeqPortNew(subject_bsp, FIRST_RESIDUE, LAST_RESIDUE,
3864                                  0, Seq_code_ncbi4na);
3865                 spc = SPCompressDNA(spp);
3866                 subject_seq = spc->buffer;
3867                 spp = SeqPortFree(spp);
3868         }
3869         else
3870         {
3871                 return NULL;
3872         }
3873 
3874         BlastTwoSequencesCoreEx(search, subject_bsp, subject_seq, 
3875                                          subject_length);
3876 
3877         if (spc)
3878         {
3879                 SPCompressFree(spc);
3880                 spc = NULL;
3881         }
3882         else
3883         {
3884                 subject_seq_start = MemFree(subject_seq_start);
3885         }
3886         
3887    return search;
3888 }
3889 
3890 SeqAlignPtr LIBCALL
3891 BlastSequencesOnTheFlyByLoc(BlastSearchBlkPtr search, SeqLocPtr subject_slp)
3892 {
3893         Int4 index, subject_length;
3894         SeqAlignPtr seqalign=NULL;
3895         SeqPortPtr spp;
3896         SPCompressPtr spc=NULL;
3897         Uint1Ptr subject_seq, subject_seq_start;
3898         Uint1 residue;
3899 
3900         if (subject_slp == NULL)
3901                 return NULL;
3902 
3903         if (search == NULL || search->query_invalid)
3904                 return NULL;
3905 
3906 
3907         if (!search->pbp->mb_params) {
3908            if (search->result_struct)
3909               search->result_struct = BLASTResultsStructDelete(search->result_struct);
3910            search->result_struct = 
3911               BLASTResultsStructNew(search->result_size, 
3912                  search->pbp->max_pieces, search->pbp->hsp_range_max);
3913         } else {
3914            if (search->mb_result_struct && search->mb_result_struct[0])
3915               search->mb_result_struct[0] = 
3916                  BLASTResultsStructDelete(search->mb_result_struct[0]);
3917            if (!search->mb_result_struct)
3918               search->mb_result_struct = (BLASTResultsStructPtr PNTR) 
3919                  MemNew(sizeof(BLASTResultsStructPtr));
3920         }
3921         BlastHitListPurge(search->current_hitlist);
3922 
3923         subject_seq_start = subject_seq = NULL;
3924 
3925         subject_length = SeqLocLen(subject_slp);
3926 
3927         if (StringCmp(search->prog_name, "blastp") == 0)
3928         {
3929                 subject_seq_start = (Uint1Ptr) MemNew(((subject_length)+2)*sizeof(Uint1));
3930                 /* The first residue is the sentinel. */
3931                 subject_seq_start[0] = NULLB;
3932                 subject_seq = subject_seq_start+1;
3933                 index = 0;
3934                 spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbistdaa);
3935                 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
3936                 {
3937                         if (IS_residue(residue))
3938                         {
3939                                 subject_seq[index] = residue;
3940                                 index++;
3941                         }
3942                 }
3943                 subject_seq[index] = NULLB;
3944                 spp = SeqPortFree(spp);
3945         }
3946         else if (StringCmp(search->prog_name, "blastn") == 0)
3947         {
3948                 spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbi4na);
3949                 spc = SPCompressDNA(spp);
3950                 subject_seq = spc->buffer;
3951                 spp = SeqPortFree(spp);
3952         }
3953         else
3954         {
3955                 return NULL;
3956         }
3957 
3958         seqalign = BlastTwoSequencesCore(search, subject_slp, subject_seq, subject_length, FALSE);
3959 
3960         if (spc)
3961         {
3962                 SPCompressFree(spc);
3963                 spc = NULL;
3964         }
3965         else
3966         {
3967                 subject_seq_start = MemFree(subject_seq_start);
3968         }
3969         
3970         AdjustOffSetsInSeqAlign(seqalign, search->query_slp, subject_slp);
3971 
3972         return seqalign;
3973 }
3974 
3975 SeqAlignPtr LIBCALL
3976 BlastSequencesOnTheFly(BlastSearchBlkPtr search, BioseqPtr subject_bsp)
3977 {
3978         SeqAlignPtr seqalign;
3979         SeqLocPtr slp;
3980 
3981         slp = NULL;
3982         ValNodeAddPointer(&slp, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBest(subject_bsp->id, SEQID_GI)));
3983         seqalign = BlastSequencesOnTheFlyByLoc(search, slp);
3984         SeqLocFree(slp);
3985         return seqalign;
3986 }
3987 /*
3988         Translate a nucleotide sequence without ambiguity codes.
3989         This is used for the first-pass translation of the database.
3990         
3991         BlastSearchBlkPtr search: overall BLAST structure.
3992         Int4 length: length of the nucl. sequence
3993         Uint1Ptr prot_seq: the (translated) protein sequence, with NULLB
3994                 sentinels on either end.  This array should be allocated
3995                 with sufficient memory before the function is called.
3996         Uint1Ptr nt_seq: the original nucl. sequence.
3997         
3998         The genetic code to be used is determined by the translation_table
3999         on the BlastSearchBlkPtr.
4000 
4001         This function translates a packed (ncbi2na) nucl. alphabet.  It
4002         views a basepair as being in one of four sets of 2-bits:
4003 
4004         |0|1|2|3||0|1|2|3||0|1|2|3||...
4005 
4006         1st byte | 2 byte | 3rd byte...
4007 
4008         A codon that starts at the beginning of the above sequence starts in
4009         state "0" and includes basepairs 0, 1, and 2.  The next codon, in the
4010         same frame, after that starts in state "3" and includes 3, 0, and 1.
4011 
4012         ** Optimization:
4013           changed the single main loop to 
4014              - advance to state 0, 
4015              - optimized inner loop does two (3 byte->4 codon) translation per iteration
4016                    (loads are moved earlier so they can be done in advance.)
4017              - do remainder
4018 */
4019 
4020 Int4 LIBCALL
4021 BlastTranslateUnambiguousSequence(BlastSearchBlkPtr search, Int4 length, Uint1Ptr prot_seq, Uint1Ptr nt_seq, Int2 frame)
4022 
4023 {
4024         register int state;
4025         Int2 total_remainder;
4026         Int4 prot_length;
4027         register int byte_value, codon=0;
4028         Uint1 last_remainder, last_byte, remainder;
4029         register Uint1Ptr translation, nt_seq_end, nt_seq_start;
4030         Uint1Ptr prot_seq_start;
4031         int byte_value1,byte_value2,byte_value3,byte_value4,byte_value5;
4032   
4033         prot_length=0;
4034         if (nt_seq == NULL || prot_seq == NULL || (length-ABS(frame)+1) < CODON_LENGTH)
4035         return prot_length;
4036 
4037         *prot_seq = NULLB;
4038         prot_seq++;
4039 
4040 /* record to determine protein length. */
4041         prot_seq_start = prot_seq;
4042   
4043         if (frame > 0)
4044                 translation = search->translation_table;
4045         else
4046                 translation = search->translation_table_rc;
4047 
4048         remainder = length%4;
4049 
4050         if (frame > 0)
4051         {
4052                 nt_seq_end = nt_seq + (length)/4 - 1;
4053                 last_remainder = (4*(length/4) - frame + 1)%CODON_LENGTH;
4054                 total_remainder = last_remainder+remainder;
4055                         
4056                 state = frame-1;
4057                 byte_value = *nt_seq;
4058 
4059                 /* If there's lots to do, advance to state 0, then enter fast loop */
4060                 while (nt_seq < nt_seq_end)
4061                 {
4062                         switch (state)
4063                         {
4064                                 case 0:
4065                                         codon = (byte_value >> 2);
4066                                         *prot_seq = translation[codon];
4067                                         prot_seq++;
4068                                 /* do state = 3 now, break is NOT missing. */
4069                                 case 3:
4070                                         codon = ((byte_value & 3) << 4);
4071                                         nt_seq++;
4072                                         byte_value = *nt_seq;   
4073                                         codon += (byte_value >> 4);
4074                                         *prot_seq = translation[codon];
4075                                         prot_seq++;
4076                                         if (nt_seq >= nt_seq_end)
4077                                         {
4078                                                 state = 2;
4079                                                 break;
4080                                         }
4081                                 /* Go on to state = 2 if not at end. */
4082                                 case 2:
4083                                         codon = ((byte_value & 15) << 2);
4084                                         nt_seq++;
4085                                         byte_value = *nt_seq;   
4086                                         codon += (byte_value >> 6);
4087                                         *prot_seq = translation[codon];
4088                                         prot_seq++;
4089                                         if (nt_seq >= nt_seq_end)
4090                                         {
4091                                                 state = 1;
4092                                                 break;
4093                                         }
4094                                 /* Go on to state = 1 if not at end. */
4095                                 case 1:
4096                                         codon = byte_value & 63;
4097                                         *prot_seq = translation[codon];
4098                                         prot_seq++;
4099                                         nt_seq++;
4100                                         byte_value = *nt_seq;   
4101                                         state = 0;
4102                                         break;
4103                         } /* end switch */
4104                         /* switch ends at state 0, except when at end */
4105 
4106 
4107                         /********************************************/
4108                         /* optimized loop: start in state 0. continue til near end */
4109                         while (nt_seq < (nt_seq_end-10))
4110                           {
4111                             byte_value1 = *(++nt_seq);
4112                             byte_value2 = *(++nt_seq);
4113                             byte_value3 = *(++nt_seq);
4114                             /* case 0: */
4115                             codon = (byte_value >> 2);
4116                             *prot_seq = translation[codon];
4117                             prot_seq++;
4118 
4119                             /* case 3: */
4120                             codon = ((byte_value & 3) << 4);
4121                             codon += (byte_value1 >> 4);
4122                             *prot_seq = translation[codon];
4123                             prot_seq++;
4124 
4125                             byte_value4 = *(++nt_seq);
4126                             /* case 2: */
4127                             codon = ((byte_value1 & 15) << 2);
4128 
4129                             codon += (byte_value2 >> 6);
4130                             *prot_seq = translation[codon];
4131                             prot_seq++;
4132                             /* case 1: */
4133                             codon = byte_value2 & 63;
4134                             byte_value5 = *(++nt_seq);
4135                             *prot_seq = translation[codon];
4136                             prot_seq++;
4137 
4138                             /* case 0: */
4139                             codon = (byte_value3 >> 2);
4140                             *prot_seq = translation[codon];
4141                             prot_seq++;
4142                             /* case 3: */
4143                             byte_value = *(++nt_seq);
4144                             codon = ((byte_value3 & 3) << 4);
4145                             codon += (byte_value4 >> 4);
4146                             *prot_seq = translation[codon];
4147                             prot_seq++;
4148                             /* case 2: */
4149                             codon = ((byte_value4 & 15) << 2);
4150                             codon += (byte_value5 >> 6);
4151                             *prot_seq = translation[codon];
4152                             prot_seq++;
4153                             /* case 1: */
4154                             codon = byte_value5 & 63;
4155                             *prot_seq = translation[codon];
4156                             prot_seq++;
4157                             state=0;
4158                           } /* end optimized while */
4159                 /********************************************/
4160                 } /* end while */
4161 
4162 
4163                 if (state == 1)
4164                 { 
4165                 /* This doesn't get done above, DON't do the state = 0
4166                    below if this is done. */
4167                         byte_value = *nt_seq;
4168                         codon = byte_value & 63;
4169                         state = 0;
4170                         *prot_seq = translation[codon];
4171                         prot_seq++;
4172                 }
4173                 else if (state == 0)
4174                 { /* This one doesn't get done above. */
4175                         byte_value = *nt_seq;
4176                         codon = ((byte_value) >> 2);
4177                         state = 3;
4178                         *prot_seq = translation[codon];
4179                         prot_seq++;
4180                 }
4181 
4182                 if (total_remainder >= CODON_LENGTH)
4183                 {
4184                         byte_value = *(nt_seq_end);
4185                         last_byte = *(nt_seq_end+1);
4186                         if (state == 0)
4187                         {
4188                                 codon = (last_byte >> 2);
4189                         }
4190                         else if (state == 2)
4191                         {
4192                                 codon = ((byte_value & 15) << 2);
4193                                 codon += (last_byte >> 6);
4194                         }
4195                         else if (state == 3)
4196                         {
4197                                 codon = ((byte_value & 3) << 4);
4198                                 codon += (last_byte >> 4);
4199                         }
4200                         *prot_seq = translation[codon];
4201                         prot_seq++;
4202                 }
4203                 *prot_seq = NULLB;
4204         }
4205         else
4206         {
4207                 nt_seq_start = nt_seq;
4208                 nt_seq += length/4;
4209                 state = remainder+frame;
4210         /* Do we start in the last byte?  This one has the lowest order
4211         bits set to represent the remainder, hence the odd coding here. */
4212                 if (state >= 0)
4213                 {
4214                         last_byte = *nt_seq;
4215                         nt_seq--;
4216                         if (state == 0)
4217                         {
4218                                 codon = (last_byte >> 6);
4219                                 byte_value = *nt_seq;
4220                                 codon += ((byte_value & 15) << 2);
4221                                 state = 1;
4222                         }
4223                         else if (state == 1)
4224                         {
4225                                 codon = (last_byte >> 4);
4226                                 byte_value = *nt_seq;
4227                                 codon += ((byte_value & 3) << 4);
4228                                 state = 2;
4229                         }
4230                         else if (state == 2)
4231                         {
4232                                 codon = (last_byte >> 2);
4233                                 state = 3;
4234                         }
4235                         *prot_seq = translation[codon];
4236                         prot_seq++;
4237 
4238                 }
4239                 else
4240                 {
4241                         state = 3 + (remainder + frame + 1);
4242                         nt_seq--;
4243                 }
4244 
4245                 byte_value = *nt_seq;   
4246 
4247                 /* If there's lots to do, advance to state 3, then enter fast loop */
4248                 while (nt_seq > nt_seq_start)
4249                 {
4250                         switch (state)
4251                         {
4252                                 case 3:
4253                                         codon = (byte_value & 63);
4254                                         *prot_seq = translation[codon];
4255                                         prot_seq++;
4256                                 /* do state = 0 now, break is NOT missing. */
4257                                 case 0:
4258                                         codon = (byte_value >> 6);
4259                                         nt_seq--;
4260                                         byte_value = *nt_seq;   
4261                                         codon += ((byte_value & 15) << 2);
4262                                         *prot_seq = translation[codon];
4263                                         prot_seq++;
4264                                         if (nt_seq <= nt_seq_start)
4265                                         {
4266                                                 state = 1;
4267                                                 break;
4268                                         }
4269                                 /* Go on to state = 2 if not at end. */
4270                                 case 1:
4271                                         codon = (byte_value >> 4);
4272                                         nt_seq--;
4273                                         byte_value = *nt_seq;
4274                                         codon += ((byte_value & 3) << 4);
4275                                         *prot_seq = translation[codon];
4276                                         prot_seq++;
4277                                         if (nt_seq <= nt_seq_start)
4278                                         {
4279                                                 state = 2;
4280                                                 break;
4281                                         }
4282                                 /* Go on to state = 2 if not at end. */
4283                                 case 2:
4284                                         codon = (byte_value >> 2);
4285                                         *prot_seq = translation[codon];
4286                                         prot_seq++;
4287                                         nt_seq--;
4288                                         byte_value = *nt_seq;   
4289                                         state = 3;
4290                                         break;
4291                         } /* end switch */
4292                         /* switch ends at state 3, except when at end */
4293 
4294 
4295                         /********************************************/
4296                         /* optimized area: start in state 0. continue til near end */
4297                         while (nt_seq > (nt_seq_start+10))
4298                           {
4299                             byte_value1 = *(--nt_seq);  
4300                             byte_value2 = *(--nt_seq);
4301                             byte_value3 = *(--nt_seq);
4302 
4303                             codon = (byte_value & 63);
4304                             *prot_seq = translation[codon];
4305                             prot_seq++;
4306                             codon = (byte_value >> 6);
4307                             codon += ((byte_value1 & 15) << 2);
4308                             *prot_seq = translation[codon];
4309                             prot_seq++;
4310                             byte_value4 = *(--nt_seq);
4311                             codon = (byte_value1 >> 4);
4312                             codon += ((byte_value2 & 3) << 4);
4313                             *prot_seq = translation[codon];
4314                             prot_seq++;
4315                             codon = (byte_value2 >> 2);
4316                             *prot_seq = translation[codon];
4317                             prot_seq++;
4318                             byte_value5 = *(--nt_seq);
4319 
4320                             codon = (byte_value3 & 63);
4321                             *prot_seq = translation[codon];
4322                             prot_seq++;
4323                             byte_value = *(--nt_seq);
4324                             codon = (byte_value3 >> 6);
4325                             codon += ((byte_value4 & 15) << 2);
4326                             *prot_seq = translation[codon];
4327                             prot_seq++;
4328                             codon = (byte_value4 >> 4);
4329                             codon += ((byte_value5 & 3) << 4);
4330                             *prot_seq = translation[codon];
4331                             prot_seq++;
4332                             codon = (byte_value5 >> 2);
4333                             *prot_seq = translation[codon];
4334                             prot_seq++;
4335                           } /* end optimized while */
4336                         /********************************************/
4337 
4338                 } /* end while */
4339 
4340                 byte_value = *nt_seq;
4341                 if (state == 3)
4342                 {
4343                         codon = (byte_value & 63);
4344                         *prot_seq = translation[codon];
4345                         prot_seq++;
4346                 }
4347                 else if (state == 2)
4348                 {
4349                         codon = (byte_value >> 2);
4350                         *prot_seq = translation[codon];
4351                         prot_seq++;
4352                 }
4353         }
4354 
4355         *prot_seq = NULLB;
4356 
4357         return (prot_seq - prot_seq_start);
4358 }       /* BlastTranslateUnambiguousSequence */
4359 
4360 
4361 
4362 /*
4363         Gets an appropriate ID for the database (subject) sequence.
4364         Int4 hit_number is the index into the BLASTResultHitlistPtr,
4365         Boolean ordinal_number specifies whether an ordinal number (the
4366         db sequence number) or a real ID should be used.
4367 */
4368 SeqIdPtr LIBCALL
4369 BlastGetSubjectIdEx(BlastSearchBlkPtr search, Int4 hit_number, Boolean ordinal_number, ValNodePtr *vnpp, Int2 query_number)
4370 {
4371     BLASTResultHitlistPtr   results;
4372     DbtagPtr dbtagptr;
4373     ObjectIdPtr obidp;
4374     SeqIdPtr subject_id=NULL, sip;
4375     Uint4       header;
4376     BLASTResultsStructPtr result_struct;
4377     
4378     if (search->pbp->mb_params)
4379        result_struct = search->mb_result_struct[query_number];
4380     else
4381        result_struct = search->result_struct;
4382 
4383     results = result_struct->results[hit_number];
4384     if (ordinal_number) {
4385         
4386         obidp = ObjectIdNew();
4387         obidp->str = NULL;
4388         obidp->id = results->subject_id;
4389         dbtagptr = DbtagNew();
4390         if (search->rdfp) {
4391             dbtagptr->db = StringSave(search->rdfp->filename);
4392         }
4393         dbtagptr->tag = obidp;
4394         ValNodeAddPointer(&subject_id, SEQID_GENERAL, dbtagptr);
4395     }  else if (search->rdfp) {
4396         if (vnpp == NULL) {
4397             readdb_get_descriptor(search->rdfp, results->subject_id, &subject_id, NULL);
4398         } else {
4399             header = 0;
4400             sip = NULL;
4401             
4402             if(search->rdfp->formatdb_ver == FORMATDB_VER_TEXT) {
4403                 while (readdb_get_header(search->rdfp, results->subject_id, &header, &sip, NULL) == TRUE)
4404                     ValNodeAddPointer(vnpp, 0, sip);
4405             } else {
4406                 BlastDefLinePtr bdfp, bdfp_head;
4407                 
4408                 bdfp_head = FDReadDeflineAsn(search->rdfp, results->subject_id);
4409                 
4410                 if(bdfp_head == NULL) {
4411                     ErrPostEx(SEV_ERROR, 0, 0, "Failure to read defline ASN for %d", results->subject_id);
4412                     return NULL;
4413                 }
4414 
4415                 for(bdfp = bdfp_head; bdfp != NULL; bdfp = bdfp->next) {
4416                     sip = SeqIdSetDup(bdfp->seqid);
4417                     ValNodeAddPointer(vnpp, 0, sip);
4418                 }
4419 
4420                 BlastDefLineSetFree(bdfp_head);
4421             }
4422         }
4423     } else {
4424         if (results->subject_info)
4425             subject_id = SeqIdDup(results->subject_info->sip);
4426     }
4427     
4428     return subject_id;
4429 }
4430 
4431 SeqIdPtr LIBCALL
4432 BlastGetSubjectId(BlastSearchBlkPtr search, Int4 hit_number, Boolean ordinal_number, ValNodePtr *vnpp)
4433 {
4434    return BlastGetSubjectIdEx(search, hit_number, ordinal_number, vnpp, 0);
4435 }
4436 
4437 /*
4438         Use by HeapSort (in BioseqBlastEngine) to rank Hitlist's.
4439 */
4440 
4441 int LIBCALLBACK
4442 evalue_compare_hits(VoidPtr v1, VoidPtr v2)
4443 
4444 {
4445     BLASTResultHitlistPtr h1, h2;
4446     BLASTResultHitlistPtr *hp1, *hp2;
4447     
4448     hp1 = (BLASTResultHitlistPtr *) v1;
4449     hp2 = (BLASTResultHitlistPtr *) v2;
4450     h1 = *hp1;
4451     h2 = *hp2;
4452     
4453     /* Sort first by evalue, then by score in case all evalues are zero. */
4454 
4455     if (h1->best_evalue < h2->best_evalue)
4456         return -1;
4457     if (h1->best_evalue > h2->best_evalue)
4458         return 1;
4459     if (h1->high_score > h2->high_score)
4460         return -1;
4461     if (h1->high_score < h2->high_score)
4462         return 1;
4463     
4464     /* In case of equal scores and E-values order will be determined by
4465        subject id */
4466     
4467     if (h1->subject_id > h2->subject_id)
4468         return -1;
4469     if (h1->subject_id < h2->subject_id)
4470         return 1;
4471     
4472     return 0;
4473 }
4474 
4475 /* Code in BLAST_CLUSTER_HITS is not currently in use */
4476 
4477 #ifdef BLAST_CLUSTER_HITS        
4478 typedef struct _blast_result_with_subject_id {
4479    BLASTResultHspPtr hsp;
4480    Int4 hitlist_index, hsp_index;
4481 } BlastResultHspWithId, PNTR BlastResultHspWithIdPtr;
4482 
4483 static int LIBCALLBACK BLASTResultHspScoreCmp(VoidPtr v1, VoidPtr v2)
4484 {
4485    BLASTResultHspPtr h1, h2;
4486    
4487    h1 = (*(BlastResultHspWithIdPtr PNTR) v1)->hsp;
4488    h2 = (*(BlastResultHspWithIdPtr PNTR) v2)->hsp;
4489 
4490    if (h1->score < h2->score)
4491       return 1;
4492    else if (h1->score > h2->score)
4493       return -1;
4494    else return 0;
4495 }
4496 
4497 static int LIBCALLBACK ResultHspWithIdIndexCmp(VoidPtr v1, VoidPtr v2)
4498 {
4499    BlastResultHspWithIdPtr h1, h2;
4500 
4501    h1 = *(BlastResultHspWithIdPtr PNTR) v1;
4502    h2 = *(BlastResultHspWithIdPtr PNTR) v2;
4503 
4504    if (h1->hitlist_index < h2->hitlist_index)
4505       return -1;
4506    else if (h1->hitlist_index > h2->hitlist_index)
4507       return 1;
4508    else if (h1->hsp_index < h2->hsp_index)
4509       return -1;
4510    else if (h1->hsp_index > h2->hsp_index)
4511       return 1;
4512    else /* Should never happen */
4513       return 0;
4514 }
4515 #endif
4516 
4517 #define CLUSTER_LENGTH_THRESH 0.1
4518 #define CLUSTER_OVERLAP_THRESH 0.9
4519 #define CLUSTER_SCORE_THRESH 1.6
4520 
4521 static Nlm_FloatHi
4522 s_ComputeAverageLength(const BlastSearchBlk* search)
4523 {
4524     Nlm_FloatHi retval = 0.0;
4525 
4526         if (StringCmp(search->prog_name, "blastn") != 0) {
4527                 retval = BLAST_AA_AVGLEN;
4528         } else {
4529                 retval = BLAST_NT_AVGLEN;
4530         }
4531 
4532     if (search->rdfp) {
4533         Int4 total_number = 0;
4534         Int8 total_length = 0;
4535 
4536         readdb_get_totals(search->rdfp, &total_length, &total_number);
4537         if (total_number > 0)
4538             retval = ((Nlm_FloatHi) total_length)/total_number;
4539     } else if (search->dblen > 0 && search->dbseq_num == 1) {
4540         retval = search->dblen;
4541     }
4542 
4543     return retval;
4544 }
4545 
4546 SeqAlignPtr LIBCALL
4547 BioseqBlastEngineCore(BlastSearchBlkPtr search, BLAST_OptionsBlkPtr options,
4548                         Int4Ptr *pos_matrix)
4549 {
4550         Int4 hitlist_max;
4551         SeqAlignPtr head, seqalign;
4552 #ifdef BLAST_CLUSTER_HITS
4553         BLASTResultHspPtr hsp, hsp1;
4554         BlastResultHspWithIdPtr PNTR hspp;
4555         BLASTResultsStructPtr result_struct;
4556         BLASTResultHitlistPtr   result_hitlist;
4557         Int4 hspcnt, index, index1, index2;
4558         Int4 q_overlap;
4559         BioseqPtr bsp1, bsp2, PNTR bspp;
4560         BlastSearchBlkPtr search1;
4561         BLAST_KarlinBlkPtr kbp;
4562         FloatHi bit_score;
4563 #endif
4564 
4565         head = seqalign = NULL;
4566 
4567         if (search == NULL || search->query_invalid)
4568                 return NULL;
4569 
4570         /* If pos_matrix is not NULL, then psi-blast iterations are being 
4571         performed.  The first psi-blast iteration should be with normal
4572         blast. */
4573         if (pos_matrix)
4574         {
4575                 search->sbp->posMatrix = pos_matrix;
4576                 search->positionBased = TRUE;
4577                 search->sbp->kbp = search->sbp->kbp_psi;
4578                 search->sbp->kbp_gap = search->sbp->kbp_gap_psi;
4579                 hitlist_max = search->result_struct->hitlist_max;
4580                 search->result_struct = BLASTResultsStructDelete(search->result_struct);
4581                 search->result_struct = BLASTResultsStructNew(hitlist_max, search->pbp->max_pieces, search->pbp->hsp_range_max);
4582                 if (search->allocated & BLAST_SEARCH_ALLOC_WFP_FIRST)
4583                 {
4584                        search->wfp_first = BLAST_WordFinderDestruct(search->wfp_first);
4585                        search->wfp_first = BLAST_WordFinderNew(search->sbp->alphabet_size,options->wordsize,1, FALSE);
4586                 }
4587 
4588                 if (search->allocated & BLAST_SEARCH_ALLOC_WFP_SECOND)
4589                 {
4590                        search->wfp_second = BLAST_WordFinderDestruct(search->wfp_second);
4591                        search->wfp_second = BLAST_WordFinderNew(search->sbp->alphabet_size,options->wordsize,1, FALSE);
4592                 }
4593 
4594 
4595         /* Only find words once if thresholds are the same. */
4596         search->wfp = search->wfp_first;
4597         if (search->whole_query == TRUE) {
4598             BlastNewFindWords(search, 0, search->context[search->first_context].query->length, search->pbp->threshold_second, (Uint1) 0);
4599         } else {
4600             BlastNewFindWords(search, search->required_start, search->required_end, search->pbp->threshold_second, (Uint1) 0);
4601         }
4602         lookup_position_aux_destruct(search->wfp->lookup);
4603         search->wfp_second = search->wfp_first;
4604 
4605         /* Unless search->pbp->cutoff_s[2]_set is set, we wish to calculate
4606            cutoff_s[2] from cutoff_e[2], rather than the other way around.
4607            Setting cutoff_s[2] to zero, as was the case in the first call to
4608            blast_set_parameters, accomplishes this.
4609         */
4610         if (!search->pbp->cutoff_s_set) {
4611             search->pbp->cutoff_s = 0;
4612         }
4613         if (!search->pbp->cutoff_s2_set) {
4614             search->pbp->cutoff_s2 = 0;
4615         }
4616         /* recalculate the cutoff scores with the newly calculated
4617            Karlin-Altschul parameters. */
4618         blast_set_parameters(search, 
4619                              options->dropoff_1st_pass,
4620                              options->dropoff_2nd_pass,
4621                              s_ComputeAverageLength(search),
4622                              search->searchsp_eff,
4623                              options->window_size);
4624         }
4625 
4626         /* Starting awake thread if multithreaded. */
4627         if (search->searchsp_eff > AWAKE_THR_MIN_SIZE)
4628                 BlastStartAwakeThread(search->thr_info);
4629 
4630         /* THE BLAST SEARCH IS HERE */
4631         do_the_blast_run(search);
4632 
4633 #ifdef BLAST_CLUSTER_HITS        
4634         if (!search->pbp->mb_params) {
4635         /* Cluster hits by region within the query */
4636         /* Assume that hits are already sorted in each hitlist by score */
4637            ValNodePtr mask;
4638            result_struct = search->result_struct;
4639            hspcnt = 0;
4640            /* Collect all HSPs in one array */
4641            
4642            bspp = (BioseqPtr PNTR) Malloc(result_struct->hitlist_count*
4643                                           sizeof(BioseqPtr));
4644            for (index=0; index<result_struct->hitlist_count; index++) {
4645               hspcnt += result_struct->results[index]->hspcnt;
4646               bspp[index] = readdb_get_bioseq(search->rdfp, 
4647                                               result_struct->results[index]->subject_id);
4648            }
4649            
4650            hspp = (BlastResultHspWithIdPtr PNTR)
4651               Malloc(hspcnt*sizeof(BlastResultHspWithIdPtr)); 
4652            index2 = 0;
4653            for (index=0; index<result_struct->hitlist_count; index++) {
4654               result_hitlist = result_struct->results[index];
4655               for (index1=0; index1<result_hitlist->hspcnt; index1++) {
4656                  hspp[index2] = (BlastResultHspWithIdPtr) 
4657                     Malloc(sizeof(BlastResultHspWithId));
4658                  hspp[index2]->hitlist_index = index;
4659                  hspp[index2]->hsp_index = index1;
4660                  hspp[index2++]->hsp = &(result_hitlist->hsp_array[index1]);
4661               }
4662            }
4663            /* Sort by score */
4664            HeapSort((VoidPtr)hspp, hspcnt, sizeof(BLASTResultHspPtr), 
4665                     BLASTResultHspScoreCmp);
4666            index = 0;
4667            while (index<hspcnt) {
4668               hsp = hspp[index]->hsp;
4669               index2 = 0;
4670               
4671               result_hitlist = 
4672                  search->result_struct->results[hspp[index]->hitlist_index];
4673               bsp1 = bspp[hspp[index]->hitlist_index];
4674               
4675               search1 = 
4676                  BlastQuerySequenceSetUp(bsp1, search->prog_name, 
4677                                          options);
4678               for (index1=index+1; index1<hspcnt; index1++) {
4679                  /* Check if the next hit passes a simple test to be a
4680                     candidate to belong to this cluster */
4681                  if (hspp[index1]->hsp==NULL)
4682                     continue;
4683                  hsp1 = hspp[index1]->hsp;
4684                  result_hitlist = 
4685                     search->result_struct->results[hspp[index1]->hitlist_index];
4686                  bsp2 = bspp[hspp[index1]->hitlist_index];
4687                  if (((FloatHi)ABS(bsp1->length - bsp2->length)) / 
4688                      MIN(bsp1->length, bsp2->length) > CLUSTER_LENGTH_THRESH)
4689                     continue;
4690                  q_overlap = 
4691                     MIN(hsp->query_offset+hsp->query_length, 
4692                         hsp1->query_offset+hsp1->query_length) - 
4693                     MAX(hsp->query_offset, hsp1->query_offset);
4694                  if (((FloatHi)q_overlap) / 
4695                      MAX(hsp->query_length, hsp1->query_length) <
4696                      CLUSTER_OVERLAP_THRESH)
4697                     continue;
4698                  
4699                  /* We have a candidate for attaching to the cluster */
4700                  if (hspp[index]->hitlist_index == hspp[index1]->hitlist_index) {
4701                     /* Almost identical hit from same subject in the same 
4702                        area of the query - remove! */
4703                     result_hitlist = 
4704                        search->result_struct->results[hspp[index1]->hitlist_index];
4705                     hspp[index1]->hsp = NULL;
4706                  }
4707                  
4708                  /* Do the two sequences search to determine whether this 
4709                     candidate in fact belongs to this cluster */
4710                  search1 = BlastSequencesOnTheFlyEx(search1, bsp2); 
4711                  
4712                  if (search1 && search1->result_struct->results[0]) {
4713                     if (search1->pbp->gapped_calculation)
4714                        kbp = search1->sbp->kbp_gap[search1->first_context];
4715                     else
4716                        kbp = search1->sbp->kbp[search1->first_context]; 
4717                     bit_score = ((search1->result_struct->results[0]->high_score *
4718                                   kbp->Lambda) - kbp->logK)/NCBIMATH_LN2;
4719                     if (bit_score > CLUSTER_SCORE_THRESH * 
4720                         MAX(bsp1->length, bsp2->length)) {
4721                        /* remove the respective hit */
4722                        hspp[index1]->hsp = NULL;
4723                     }
4724                  }
4725               }
4726               mask = search1->mask;
4727               while (mask) {
4728                  SeqLocSetFree(mask->data.ptrvalue);
4729                  mask = mask->next;
4730               }
4731               ValNodeFree(search1->mask);
4732               search1 = BlastSearchBlkDestruct(search1);
4733               for (++index; index<hspcnt && hspp[index]->hsp==NULL; index++);
4734            }
4735            
4736            for (index=0; index<result_struct->hitlist_count; index++)
4737               BioseqFree(bspp[index]);
4738            MemFree(bspp);
4739            /* Remove all NULLs from hspp array */
4740            for (index=0, index1=0; index<hspcnt; index++) {
4741               if (hspp[index]->hsp != NULL) {
4742                  if (index != index1)
4743                     hspp[index1] = hspp[index];
4744                  index1++;
4745               } else
4746                  hspp[index] = MemFree(hspp[index]);
4747            }
4748            hspcnt = index1;
4749            /* Sort according to original hitlist and hsp indices */
4750            HeapSort((VoidPtr)hspp, hspcnt, sizeof(BLASTResultHspPtr), 
4751                     ResultHspWithIdIndexCmp);
4752            
4753            /* Rearrange the hsp_arrays for all hitlists */
4754            index = 0;
4755            for (index2=0; index2<result_struct->hitlist_count; index2++) {
4756               index1 = 0;
4757               while (index<hspcnt && hspp[index]->hitlist_index == index2) {
4758                  result_struct->results[index2]->hsp_array[index1] = 
4759                     *(hspp[index]->hsp);
4760                  index++;
4761                  index1++;
4762               }
4763               result_struct->results[index2]->hspcnt = index1;
4764            }
4765            
4766            for (index=0; index<hspcnt; index++) 
4767               hspp[index] = MemFree(hspp[index]);
4768            hspp = MemFree(hspp);
4769         }
4770 #endif  /* Clustering hits */
4771 
4772     if (options->no_traceback) {
4773        BlastStopAwakeThread(search->thr_info);
4774        return NULL;
4775     }
4776 
4777     BLASTPostSearchLogic(search, options, &head, TRUE);
4778 
4779         /* Stop the awake thread. */
4780         BlastStopAwakeThread(search->thr_info);
4781 
4782         return head;
4783 }
4784 
4785 /*
4786         Deallocates all memory involved with the BlastHitRangePtr.
4787 */
4788 
4789 BlastHitRangePtr LIBCALL
4790 BlastHitRangeDestruct(BlastHitRangePtr old)
4791 
4792 {
4793         if (old == NULL)
4794                 return NULL;
4795 
4796         MemFree(old->range_list);
4797         MemFree(old->range_list_pointer);
4798 
4799         return MemFree(old);
4800 }
4801 
4802 /*
4803         Allocates a a BlastHitRangePtr, with two 'total' 
4804         BlastDoubleInt4Ptr's.
4805 */
4806 
4807 BlastHitRangePtr LIBCALL
4808 BlastHitRangeNew(Int4 total)
4809 
4810 {
4811         BlastHitRangePtr bhrp;
4812         Int4 index;
4813 
4814         bhrp = MemNew(sizeof(BlastHitRange));
4815 
4816         bhrp->range_list = (BlastDoubleInt4Ptr) MemNew(total*sizeof(BlastDoubleInt4));
4817         bhrp->range_list_pointer = (BlastDoubleInt4Ptr PNTR) MemNew(total*sizeof(BlastDoubleInt4Ptr));
4818         for (index=0; index<total; index++)
4819         {
4820                 bhrp->range_list_pointer[index] = &(bhrp->range_list[index]);
4821         }
4822 
4823         bhrp->current = 0;
4824         bhrp->total = total;
4825 
4826         return bhrp;
4827 }
4828 
4829 static int LIBCALLBACK
4830 bhrp_compare(VoidPtr v1, VoidPtr v2)
4831 
4832 {
4833         BlastDoubleInt4Ptr h1, h2;
4834         BlastDoubleInt4Ptr *hp1, *hp2;
4835 
4836         hp1 = (BlastDoubleInt4Ptr PNTR) v1;
4837         hp2 = (BlastDoubleInt4Ptr PNTR) v2;
4838         h1 = *hp1;
4839         h2 = *hp2;
4840 
4841         if (h1->gi < h2->gi)
4842                 return -1;
4843         if (h1->gi > h2->gi)
4844                 return 1;
4845 
4846         return 0;
4847 }
4848 
4849 BlastHitRangePtr LIBCALL
4850 BioseqHitRangeEngineCore(BlastSearchBlkPtr search, BLAST_OptionsBlkPtr options)
4851 
4852 {
4853         BlastHitRangePtr bhrp=NULL;
4854         BLASTResultsStructPtr result_struct;
4855         Int4 hitlist_count, index, total_hsps;
4856         Int4 sequence_length, length;
4857         Uint1Ptr sequence;
4858 
4859         if (search == NULL || search->query_invalid)
4860                 return NULL;
4861 
4862         /* Starting awake thread if multithreaded. */
4863         if (search->searchsp_eff > AWAKE_THR_MIN_SIZE)
4864                 BlastStartAwakeThread(search->thr_info);
4865 
4866         do_the_blast_run(search);
4867 
4868         if (search->prog_number==blast_type_blastn) {
4869            /* Unconcatenate the strands by adjusting the query offsets in
4870               all hsps */
4871            search->context[search->first_context].query->length = 
4872               search->query_context_offsets[search->first_context+1] - 1;
4873            /*BlastAdjustHitOffsets(search);*/
4874         }
4875         
4876         if (StringCmp(search->prog_name, "blastn") == 0 && 
4877                 search->pbp->gapped_calculation)
4878         {
4879                 search->pbp->gap_open = options->gap_open;
4880                 search->pbp->gap_extend = options->gap_extend;
4881 /*
4882                 search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
4883                 search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
4884 */
4885 
4886 
4887                 result_struct = search->result_struct;
4888                 hitlist_count = result_struct->hitlist_count;
4889                 total_hsps = 0;
4890                 for (index=0; index<hitlist_count; index++)
4891                 {
4892                         total_hsps += result_struct->results[index]->hspcnt;
4893                 }
4894                 bhrp = BlastHitRangeNew(total_hsps);
4895                 bhrp->query_id = search->query_id;
4896                 
4897                 result_struct = search->result_struct;
4898                 hitlist_count = result_struct->hitlist_count;
4899 
4900                 sequence=NULL;
4901                 sequence_length=0;
4902                 
4903                 for (index=0; index<hitlist_count; index++)
4904                 {
4905                         length = readdb_get_sequence_ex(search->rdfp, result_struct->results[index]->subject_id, &sequence, &sequence_length, TRUE);
4906                         SumBlastGetGappedAlignmentEx(search, index, FALSE, FALSE, sequence+1, length, FALSE, NULL, bhrp, 0);
4907                 }
4908                 sequence = MemFree(sequence);
4909         }
4910         else
4911         {
4912                 return NULL;
4913         }
4914 
4915         HeapSort(bhrp->range_list_pointer, bhrp->current, sizeof(BlastHitRangePtr PNTR), bhrp_compare);
4916 
4917         /* Stop the awake thread. */
4918         BlastStopAwakeThread(search->thr_info);
4919 
4920         return bhrp;
4921 }
4922 
4923 SeqAlignPtr LIBCALL
4924 BioseqBlastEngineEx(BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
4925 
4926 {
4927         SeqLocPtr slp;
4928         SeqAlignPtr seqalign;
4929 
4930         slp = NULL;
4931         ValNodeAddPointer(&slp, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBest(bsp->id, SEQID_GI)));
4932         seqalign = BioseqBlastEngineByLocEx(slp, progname, database, options, other_returns, error_returns, callback, seqid_list, gi_list, gi_list_total);
4933         SeqLocFree(slp);
4934         
4935         return seqalign;
4936 }
4937 
4938 SeqAlignPtr LIBCALL
4939 BioseqBlastEngine(BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
4940 {
4941    /* --KM added NULL mult_queries param to call */ 
4942    return BioseqBlastEngineWithCallbackMult(bsp, progname, database, options, other_returns, error_returns, callback, NULL, NULL);
4943 }
4944 
4945 SeqAlignPtr LIBCALL 
4946 BioseqBlastEngineWithCallback(BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)))
4947 {
4948    return BioseqBlastEngineWithCallbackMult(bsp, progname, database, options, other_returns, error_returns, callback, NULL, NULL);
4949 }
4950 
4951 /* --KM added mult_queries parameter */
4952 SeqAlignPtr LIBCALL 
4953 BioseqBlastEngineWithCallbackMult(BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), QueriesPtr mult_queries)
4954 {
4955         SeqLocPtr slp;
4956         SeqAlignPtr seqalign;
4957 
4958         slp = NULL;
4959         ValNodeAddPointer(&slp, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBest(bsp->id, SEQID_GI)));
4960         seqalign = BioseqBlastEngineByLocWithCallbackMult(slp, progname, database, options, other_returns, error_returns, callback, NULL, NULL, 0, handle_results, mult_queries);/* --KM pass mult_queries */
4961         SeqLocFree(slp);
4962         
4963         return seqalign;
4964 }
4965 
4966 
4967 
4968 SeqAlignPtr LIBCALL
4969 BioseqBlastEngineByLoc(SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
4970 
4971 {
4972         return BioseqBlastEngineByLocEx(slp, progname, database, options, other_returns, error_returns, callback, NULL, NULL, 0);
4973 
4974 }
4975 
4976 SeqAlignPtr LIBCALL
4977 BioseqBlastEngineByLocEx(SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
4978 
4979 {
4980    return BioseqBlastEngineByLocWithCallback(slp, progname, database, options, other_returns, error_returns, callback, seqid_list, gi_list, gi_list_total, NULL); /* --KM pass NULL mult_queries */
4981 }
4982 
4983 SeqAlignPtr LIBCALL
4984 BioseqBlastEngineByLocWithCallback(SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)))
4985 {
4986         return BioseqBlastEngineByLocWithCallbackMult(slp, progname, database, options, other_returns, error_returns, callback, seqid_list, gi_list, gi_list_total, handle_results, NULL);
4987 }
4988 
4989 /* --KM added mult_queries param */
4990 SeqAlignPtr LIBCALL
4991 BioseqBlastEngineByLocWithCallbackMult(SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), QueriesPtr mult_queries)
4992 {
4993         Boolean options_allocated=FALSE;
4994         BlastSearchBlkPtr search;
4995         Int2 status;
4996         SeqAlignPtr head;
4997         SeqLocPtr whole_slp=NULL;
4998                 /* Futamura */
4999         posSearchItems *posSearch;
5000         compactSearchItems *compactSearch = NULL;
5001         Boolean  checkReturn = FALSE;
5002 
5003         head = NULL;
5004 
5005         if (error_returns)
5006         {
5007                 *error_returns = NULL;
5008         }
5009 
5010         if (other_returns)
5011         {
5012                 *other_returns = NULL;
5013         }
5014 
5015         if (progname == NULL)
5016                 return NULL;
5017 
5018         /* If no options, use default. */
5019         if (options == NULL)
5020         {
5021                 options = BLASTOptionNew(progname, FALSE);
5022                 options_allocated = TRUE;
5023         }
5024 
5025         status = BLASTOptionValidateEx(options, progname, error_returns);
5026         if (status != 0)
5027         {       /* error messages in other_returns? */
5028                 return NULL;
5029         }
5030 
5031         if (slp == NULL || database == NULL)
5032                 return NULL;
5033 
5034     if(options->is_rps_blast) {
5035         RPSInfoPtr rpsinfo;
5036         BioseqPtr bsp, fake_bsp;
5037         Boolean query_is_na;
5038         
5039         if((bsp = BioseqLockById(SeqLocId(slp))) == NULL)
5040             return NULL;
5041         
5042         /* RPS Blast discard program name and use specific RPS Blast
5043            logic for this */    
5044         
5045         if(bsp->mol == Seq_mol_aa) {
5046             query_is_na = FALSE;
5047             progname = "blastp";
5048         } else {
5049             query_is_na = TRUE;
5050             progname = "tblastn";
5051         }
5052         if((rpsinfo = RPSInitEx(database, !query_is_na, options)) == NULL) {
5053         
5054             ErrPostEx(SEV_ERROR, 0, 0, "Failure to initialize RPS: %s %s",
5055                       progname, database);
5056             return NULL;
5057         }
5058         /* Update size of the database in accordance with RPS Database size */
5059         RPSUpdateDbSize(options, rpsinfo, bsp->length);
5060         
5061         if(!query_is_na)
5062             fake_bsp = bsp;
5063         else {
5064             options->db_genetic_code = options->genetic_code;
5065             fake_bsp = createFakeProtein();
5066         }
5067         search = BLASTSetUpSearch (fake_bsp, progname, fake_bsp->length, 0, 
5068                                    NULL, options, NULL);
5069         
5070         if (search == NULL)
5071             return NULL;
5072         
5073         search->thr_info->tick_callback = NULL;
5074         search->thr_info->star_callback = NULL;
5075         
5076         head = RPSBlastSearch(search, bsp, rpsinfo);
5077         
5078         if(query_is_na)
5079             BioseqFree(fake_bsp);
5080         BioseqUnlock(bsp);
5081         RPSClose(rpsinfo);
5082     } else {
5083         
5084         search = BLASTSetUpSearchByLocWithReadDbEx(slp, progname, SeqLocLen(slp), database, options, NULL, seqid_list, gi_list, gi_list_total, mult_queries);
5085         /* --KM pass mult_queries */
5086         
5087         if (search == NULL) {
5088            /* We need to veryfy if database name is wrong and to set error
5089                returns correctly */
5090             Boolean is_prot;
5091             BlastErrorMsgPtr error_msg;
5092             CharPtr chptr;
5093             ReadDBFILEPtr rdfp=NULL;
5094 
5095             if(!StringICmp(progname, "blastp") ||
5096                !StringICmp(progname, "blastx")) {
5097                 is_prot = TRUE;
5098             } else {
5099                 is_prot = FALSE;
5100             }
5101 
5102             rdfp = readdb_new(database, is_prot);
5103             if(rdfp == NULL) {
5104                 error_msg = MemNew(sizeof(BlastErrorMsg));
5105                 chptr = MemNew(StringLen(database) + 256);
5106                 sprintf(chptr, "Database %s was not found or does not exist",
5107                         database);
5108                 error_msg->msg = chptr;
5109                 error_msg->level = 3; /* FATAL */
5110                 ValNodeAddPointer(error_returns, 0, error_msg);
5111             }
5112 
5113             readdb_destruct(rdfp);
5114             return NULL;
5115         }
5116         
5117         search->thr_info->tick_callback = callback;
5118         search->thr_info->star_callback = callback;
5119         search->handle_results = handle_results;
5120         search->output = options->output;
5121 
5122         /* Futamura psitblastn */
5123         if (options->recoverCheckpoint)
5124           search->positionBased = TRUE;
5125         else
5126           search->positionBased = FALSE;
5127 
5128         if (options->recoverCheckpoint) {
5129           posSearch = (posSearchItems *) MemNew(1 * sizeof(posSearchItems));
5130           compactSearch = compactSearchNew(compactSearch);
5131           copySearchItems(compactSearch, search, options->matrix);
5132           posInitializeInformation(posSearch,search);
5133           /*AAS*/
5134 
5135           checkReturn = posReadCheckpoint(posSearch, compactSearch,
5136                                           options->CheckpointFileName,
5137                                           NO_SCOREMAT_IO,
5138                                           &(search->error_return));
5139           /* Reading the checkpoint changes the statistical parameters
5140              kbp_psi and kbp_gap_psi.  Recalculate the cutoffs by calling
5141              blast_set_parameters. */
5142 
5143           /* Unless search->pbp->cutoff_s[2]_set is set, we wish to calculate
5144              cutoff_s[2] from cutoff_e[2], rather than the other way around.
5145              Setting cutoff_s[2] to zero, as was the case in the first call to
5146              blast_set_parameters, accomplishes this.
5147           */
5148           if (!search->pbp->cutoff_s_set) {
5149               search->pbp->cutoff_s = 0;
5150           }
5151           if (!search->pbp->cutoff_s2_set) {
5152               search->pbp->cutoff_s2 = 0;
5153           }
5154           search->sbp->kbp = search->sbp->kbp_psi;
5155           search->sbp->kbp_gap = search->sbp->kbp_gap_psi;
5156           blast_set_parameters(search,
5157                                options->dropoff_1st_pass,
5158                                options->dropoff_2nd_pass,
5159                                s_ComputeAverageLength(search),
5160                                search->searchsp_eff,
5161                                options->window_size);
5162 
5163           search->sbp->posMatrix = posSearch->posMatrix;
5164           if (NULL == search->sbp->posFreqs)
5165             search->sbp->posFreqs =  allocatePosFreqs(compactSearch->qlength,
5166                                                       compactSearch->alphabetSize);
5167           copyPosFreqs(posSearch->posFreqs,search->sbp->posFreqs,
5168                        compactSearch->qlength, compactSearch->alphabetSize);
5169 
5170           if (!checkReturn) {
5171                 BlastConstructErrorMessage("BioseqBlastEngineByLocEx",
5172                         "Error recovering from checkpoint", 3, error_returns);
5173                 return NULL;
5174           }
5175         }
5176 
5177         /* ----- Here is real BLAST search done ------- */
5178         if (search->positionBased)
5179           head = BioseqBlastEngineCore(search, options, search->sbp->posMatrix);
5180         else if (options->is_megablast_search) {
5181            SeqAlignPtr PNTR seqalignp;
5182            seqalignp = BioseqMegaBlastEngineCore(search, options);
5183            head = *seqalignp;
5184         } else
5185           head = BioseqBlastEngineCore(search, options, NULL);
5186         /* end Futamura */
5187         
5188     }
5189 
5190     if (search->error_return) {
5191         ValNodeLink(error_returns, search->error_return);
5192         search->error_return = NULL;
5193     }
5194     
5195     if (other_returns) { /* format dbinfo etc.  */
5196         *other_returns = BlastOtherReturnsPrepare(search);
5197     }
5198     
5199     if (options_allocated) {
5200         options = BLASTOptionDelete(options);
5201     }
5202     
5203     search = BlastSearchBlkDestruct(search);
5204 
5205     if(!options->is_rps_blast) {
5206     
5207         /* Adjsut the offset if the query does not cover the entire sequence. */
5208         if (slp->choice != SEQLOC_WHOLE) {
5209             ValNodeAddPointer(&whole_slp, SEQLOC_WHOLE, SeqIdFindBest(SeqLocId(slp), SEQID_GI));
5210             if (SeqLocAinB(whole_slp, slp) != 0) {
5211                 AdjustOffSetsInSeqAlign(head, slp, NULL);
5212             }
5213             ValNodeFree(whole_slp);
5214         }
5215     }
5216 
5217     return head;
5218 }
5219 
5220 SeqLocPtr LIBCALL
5221 BioseqHitRangeEngine(BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
5222 
5223 {
5224         SeqLocPtr slp;
5225 
5226         slp = NULL;
5227         ValNodeAddPointer(&slp, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBest(bsp->id, SEQID_GI)));
5228         return BioseqHitRangeEngineByLoc(slp, progname, database, options, other_returns, error_returns, callback, seqid_list, gi_list, gi_list_total);
5229 }
5230 
5231 SeqLocPtr 
5232 HitRangeToSeqLoc(BlastHitRangePtr bhrp, Int4 link_value, Boolean combine)
5233 
5234 {
5235         Boolean make_seqloc, start=TRUE;
5236         Int4 index, total, start_pos=0, stop_pos, largest_stop_pos=0;
5237         SeqIntPtr sint;
5238         SeqLocPtr retval=NULL;
5239 
5240         if (bhrp == NULL)
5241                 return NULL;
5242 
5243         total = bhrp->current;
5244         index=0;
5245         while (index < total)
5246         {
5247            if (combine)
5248            {
5249                 if (start == TRUE)
5250                 {
5251                         start_pos = bhrp->range_list_pointer[index]->gi + bhrp->base_offset;
5252                         start = FALSE;
5253                         largest_stop_pos = 0;
5254                 }
5255                 else
5256                 {
5257                         /* Keep track of largest stop position. */
5258                         largest_stop_pos = MAX(largest_stop_pos, bhrp->range_list_pointer[index]->ordinal_id + bhrp->base_offset);
5259                         make_seqloc = FALSE;
5260                         if (index == total-1)   /* Last one. */
5261                         {
5262                                 stop_pos = bhrp->range_list_pointer[index]->ordinal_id + bhrp->base_offset;
5263                                 start = TRUE;
5264                                 make_seqloc = TRUE;
5265                         }
5266                         else if (largest_stop_pos+link_value < bhrp->range_list_pointer[index+1]->gi + bhrp->base_offset)
5267                         { /* Check overlap with next one. */
5268                                 stop_pos = bhrp->range_list_pointer[index]->ordinal_id + bhrp->base_offset;
5269                                 start = TRUE;
5270                                 make_seqloc = TRUE;
5271                         }
5272                         
5273                         if (make_seqloc)
5274                         {
5275                                 sint = SeqIntNew();
5276                                 sint->from = start_pos;
5277                                 sint->to = MAX(largest_stop_pos, stop_pos);
5278                                 sint->strand = Seq_strand_plus;
5279                                 sint->id = SeqIdDup(SeqIdFindBest(bhrp->query_id, SEQID_GI));
5280                                 ValNodeAddPointer(&retval, SEQLOC_INT, sint);
5281                         }
5282                         index++;
5283                 }
5284            }
5285            else
5286            {
5287                 sint = SeqIntNew();
5288                 sint->from = bhrp->range_list_pointer[index]->gi + bhrp->base_offset;
5289                 sint->to = bhrp->range_list_pointer[index]->ordinal_id + bhrp->base_offset;
5290                 sint->strand = Seq_strand_plus;
5291                 sint->id = SeqIdDup(SeqIdFindBest(bhrp->query_id, SEQID_GI));
5292                 ValNodeAddPointer(&retval, SEQLOC_INT, sint);
5293                 index++;
5294            }
5295         }
5296 
5297         return retval;
5298 }
5299 
5300 #define HITRANGE_LINKVALUE 5
5301 
5302 SeqLocPtr LIBCALL
5303 BioseqHitRangeEngineByLoc(SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
5304 
5305 {
5306         Boolean options_allocated=FALSE;
5307         BlastHitRangePtr bhrp;
5308         BlastSearchBlkPtr search;
5309         Int2 status;
5310         SeqLocPtr seqloc, whole_slp=NULL;
5311 
5312         if (error_returns)
5313         {
5314                 *error_returns = NULL;
5315         }
5316 
5317         if (other_returns)
5318         {
5319                 *other_returns = NULL;
5320         }
5321 
5322         if (progname == NULL)
5323                 return NULL;
5324 
5325         /* If no options, use default. */
5326         if (options == NULL)
5327         {
5328                 options = BLASTOptionNew(progname, FALSE);
5329                 options_allocated = TRUE;
5330         }
5331 
5332         status = BLASTOptionValidateEx(options, progname, error_returns);
5333         if (status != 0)
5334         {       /* error messages in other_returns? */
5335                 return NULL;
5336         }
5337 
5338         if (slp == NULL || database == NULL)
5339                 return NULL;
5340 
5341         search = BLASTSetUpSearchByLocWithReadDbEx(slp, progname, SeqLocLen(slp), database, options, NULL, seqid_list, gi_list, gi_list_total, NULL); /* --KM pass NULL mult_queries */
5342 
5343         if (search == NULL)
5344         {
5345                 return NULL;
5346         }
5347 
5348         search->thr_info->tick_callback = callback;
5349         search->thr_info->star_callback = callback;
5350 
5351         bhrp = BioseqHitRangeEngineCore(search, options);
5352         if (bhrp == NULL) /* can happen for invalid queries. */
5353                 return NULL;
5354 
5355         if (slp->choice != SEQLOC_WHOLE) {
5356                 ValNodeAddPointer(&whole_slp, SEQLOC_WHOLE, SeqIdFindBest(SeqLocId(slp), SEQID_GI));
5357                 bhrp->base_offset = GetOffsetInLoc(slp, whole_slp, SEQLOC_START);
5358                 ValNodeFree(whole_slp);
5359         }
5360         
5361         seqloc = HitRangeToSeqLoc(bhrp, HITRANGE_LINKVALUE, TRUE);
5362         bhrp = BlastHitRangeDestruct(bhrp);
5363         if (search->error_return)
5364         {
5365                 ValNodeLink(error_returns, search->error_return);
5366                 search->error_return = NULL;
5367         }
5368 
5369         if (other_returns)
5370         { /* format dbinfo etc.  */
5371                 *other_returns = BlastOtherReturnsPrepare(search);
5372         }
5373 
5374         if (options_allocated)
5375         {
5376                 options = BLASTOptionDelete(options);
5377         }
5378         search = BlastSearchBlkDestruct(search);
5379 
5380         return seqloc;
5381 }
5382 
5383 void LIBCALL BlastOtherReturnsFree(ValNodePtr other_returns)
5384 {
5385     BLAST_KarlinBlkPtr ka_params;
5386     BLAST_MatrixPtr matrix;
5387     CharPtr params_buffer;
5388     TxDfDbInfoPtr dbinfo;
5389     ValNodePtr  mask_loc, mask_loc_start, vnp;
5390     
5391     mask_loc = NULL;
5392     
5393     for (vnp=other_returns; vnp; vnp = vnp->next) {
5394         switch (vnp->choice) {
5395         case TXDBINFO:
5396             dbinfo = vnp->data.ptrvalue;
5397             dbinfo = TxDfDbInfoDestruct(dbinfo);
5398             break;
5399         case TXKABLK_NOGAP:
5400             ka_params = vnp->data.ptrvalue;
5401             MemFree(ka_params);
5402             break;
5403         case TXKABLK_GAP:
5404             ka_params = vnp->data.ptrvalue;
5405             MemFree(ka_params);
5406             break;
5407         case TXPARAMETERS:
5408             params_buffer = vnp->data.ptrvalue;
5409             MemFree(params_buffer);
5410             break;
5411         case TXMATRIX:
5412             matrix = vnp->data.ptrvalue;
5413             matrix = BLAST_MatrixDestruct(matrix);
5414             
5415             break;
5416         case SEQLOC_MASKING_NOTSET:
5417         case SEQLOC_MASKING_PLUS1:
5418         case SEQLOC_MASKING_PLUS2:
5419         case SEQLOC_MASKING_PLUS3:
5420         case SEQLOC_MASKING_MINUS1:
5421         case SEQLOC_MASKING_MINUS2:
5422         case SEQLOC_MASKING_MINUS3:
5423             ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
5424             break;
5425         default:
5426             break;
5427         }
5428     }
5429     
5430     mask_loc_start = mask_loc;
5431     while (mask_loc) {
5432         SeqLocSetFree(mask_loc->data.ptrvalue);
5433         mask_loc = mask_loc->next;
5434     }
5435     ValNodeFree(mask_loc_start);
5436 
5437     other_returns = ValNodeFree(other_returns);
5438 
5439     return;
5440 }
5441 
5442 ValNodePtr LIBCALL
5443 BlastOtherReturnsPrepare(BlastSearchBlkPtr search)
5444 
5445 {
5446     BLAST_KarlinBlkPtr ka_params;
5447     BLAST_MatrixPtr blast_matrix;
5448     CharPtr parameters, chptr;
5449     ReadDBFILEPtr rdfp_var;
5450     TxDfDbInfoPtr dbinfo, head, dbinfo_var=NULL;
5451     ValNodePtr other_returns=NULL;
5452     
5453     head = NULL;
5454     if (search->thr_info->blast_gi_list) {
5455         dbinfo = MemNew(sizeof(TxDfDbInfo));
5456         dbinfo->total_length = search->dblen;
5457         dbinfo->number_seqs = search->dbseq_num;
5458         dbinfo->subset = TRUE;
5459         head = dbinfo;
5460         dbinfo_var = dbinfo;
5461     }
5462     
5463     rdfp_var = search->rdfp;
5464     while (rdfp_var) {
5465         dbinfo = MemNew(sizeof(TxDfDbInfo));
5466         dbinfo->name = StringSave(readdb_get_filename(rdfp_var));       
5467 
5468         if((chptr = readdb_get_title(rdfp_var)) == NULL)
5469             chptr = readdb_get_filename(rdfp_var);
5470         dbinfo->definition = StringSave(chptr); 
5471         
5472         dbinfo->date = StringSave(readdb_get_date(rdfp_var));   
5473 
5474         dbinfo->is_protein = readdb_is_prot(rdfp_var);
5475 
5476         if (rdfp_var->aliaslen)
5477             dbinfo->total_length = rdfp_var->aliaslen;
5478         else
5479             dbinfo->total_length = readdb_get_dblen(rdfp_var);
5480         if (rdfp_var->aliasnseq)
5481             dbinfo->number_seqs = rdfp_var->aliasnseq;
5482         else
5483             dbinfo->number_seqs = readdb_get_num_entries(rdfp_var);
5484         if (head == NULL) {
5485             head = dbinfo;
5486             dbinfo_var = dbinfo;
5487         } else {
5488             dbinfo_var->next = dbinfo;
5489             dbinfo_var = dbinfo_var->next;
5490         }
5491         rdfp_var = rdfp_var->next;
5492     }
5493     if (head)
5494         ValNodeAddPointer (&other_returns, TXDBINFO, head);
5495     
5496     if (search->sbp->kbp && search->sbp->kbp[search->first_context]) {
5497         ka_params = BlastKarlinBlkCreate();
5498         ka_params->Lambda = search->sbp->kbp[search->first_context]->Lambda;
5499         ka_params->K = search->sbp->kbp[search->first_context]->K;
5500         ka_params->H = search->sbp->kbp[search->first_context]->H;
5501         ValNodeAddPointer (&other_returns, TXKABLK_NOGAP, ka_params);
5502     }
5503     
5504     if (search->pbp->gapped_calculation == TRUE) {
5505         if (search->sbp->kbp_gap && search->sbp->kbp_gap[search->first_context]) {
5506                 ka_params = BlastKarlinBlkCreate();
5507                 ka_params->Lambda = search->sbp->kbp_gap[search->first_context]->Lambda;
5508                 ka_params->K = search->sbp->kbp_gap[search->first_context]->K;
5509                 ka_params->H = search->sbp->kbp_gap[search->first_context]->H;
5510                 ValNodeAddPointer (&other_returns, TXKABLK_GAP, ka_params);
5511         }
5512     }
5513     
5514     if (search->query_invalid == FALSE) {
5515         parameters = FormatBlastParameters(search);
5516         ValNodeAddPointer (&other_returns, TXPARAMETERS, parameters);
5517     }
5518     
5519     blast_matrix = BLAST_MatrixFill(search->sbp, search->positionBased);
5520     ValNodeAddPointer (&other_returns, TXMATRIX, blast_matrix);
5521     
5522     if (search->mask)
5523         ValNodeLink(&other_returns, search->mask);
5524     
5525     if (search->pbp->is_rps_blast) {
5526         ValNodeAddFloat(&other_returns, EFF_SEARCH_SPACE,
5527             ((Nlm_FloatHi) search->dblen_eff)*
5528             ((Nlm_FloatHi) (search->rps_qlen - search->length_adjustment)));
5529     } else {
5530         ValNodeAddFloat(&other_returns, EFF_SEARCH_SPACE,
5531             ((Nlm_FloatHi) search->dblen_eff)*
5532             ((Nlm_FloatHi) search->context[search->first_context].query->effective_length));
5533     }
5534     ValNodeAddInt(&other_returns, EFF_HSP_LENGTH, search->length_adjustment);
5535  
5536     /* If Mega BLAST endpoint results, save them here */
5537     if (search->mb_endpoint_results && search->pbp->mb_params && 
5538         search->pbp->mb_params->no_traceback)
5539        /* Here 21 = BlastResponse_mbalign (see file objblst3.h) */
5540        ValNodeAddPointer(&other_returns, 21, 
5541                          search->mb_endpoint_results->data.ptrvalue);
5542        
5543     return other_returns;
5544 }
5545 
5546 
5547 /*
5548         Deallocates memory for BLAST_ExtendWordParamsPtr
5549         
5550 */
5551 
5552 static BLAST_ExtendWordParamsPtr
5553 BLAST_ExtendWordParamsDestruct (BLAST_ExtendWordParamsPtr ewp_params)
5554 
5555 {
5556         ewp_params = MemFree(ewp_params);
5557 
5558         return ewp_params;
5559 }
5560 
5561 
5562 /*
5563         Allocates memory for the BLAST_ExtendWordParamsPtr.  
5564 
5565         This function also sets many of the parametes such as min_diag_length etc.
5566 
5567         Int4 qlen: length of the query.
5568         Boolean multiple_hits: specifies whether multiple hits method is used.
5569         Int4 window_size: the max. distance between two hits that are extended.
5570 */
5571 
5572 BLAST_ExtendWordParamsPtr
5573 BLAST_ExtendWordParamsNew (Int4 qlen, Boolean multiple_hits, Int4 window_size)
5574 
5575 {
5576         BLAST_ExtendWordParamsPtr ewp_params;
5577         Int4 min_diag_length, bits_to_shift;
5578 
5579         ewp_params= MemNew(sizeof(BLAST_ExtendWordParams));
5580 
5581         if (ewp_params)
5582         {
5583                 min_diag_length = 1;
5584                 bits_to_shift = 0;
5585                 /* What power of 2 is just longer than the query? */
5586                 while (min_diag_length < (qlen+window_size))
5587                 {
5588                         min_diag_length = min_diag_length << 1;
5589                         bits_to_shift++;
5590                 }
5591                 /* These are used in the word finders to shift and mask 
5592                 rather than dividing and taking the remainder. */
5593                 ewp_params->bits_to_shift = bits_to_shift;
5594                 ewp_params->min_diag_length = min_diag_length;
5595                 ewp_params->min_diag_mask = min_diag_length-1;
5596                 ewp_params->multiple_hits = multiple_hits;
5597                 ewp_params->offset = window_size;
5598                 ewp_params->window = window_size;
5599         }
5600         return ewp_params;
5601 }
5602 
5603 /*
5604         Deallocates memory for the BLAST_ExtendWordPtr.
5605 
5606 */
5607 BLAST_ExtendWordPtr LIBCALL 
5608 BLAST_ExtendWordDestruct (BLAST_ExtendWordPtr ewp)
5609 
5610 {
5611         if (ewp)
5612         {
5613                 if (ewp->_buffer)
5614                         ewp->_buffer = MemFree(ewp->_buffer);
5615 
5616                 ewp = MemFree(ewp);
5617         }
5618 
5619         return ewp;
5620 
5621 }
5622 
5623 /*
5624         Allocates memory for the BLAST_ExtendWordPtr.  
5625 
5626         All of the memory for the arrays is allocated in one chunk
5627         called "_buffer".  If multiple_hits is specified them room
5628         for "diag_level", "last_hit", and "version" is allocated and
5629         pointers into the array for these are set.  If multiple_hits
5630         is not set, then only room for diag_level and version is allocated;
5631         last_hit is not needed.
5632 
5633         Int4 qlen, dblen: length of the query and the LONGEST subject sequence.
5634         Boolean multiple_hits: specifies whether multiple hits method is used.
5635 
5636         ** CFJ
5637         ** - previously buffer contained diag_level array, last_hit array, and version array
5638         **   change to contain array of struct {dl,lh,v}.
5639         **
5640         ** - Now that version is no longer used, combining the remaining 2 is probably not a big win.
5641 
5642 */
5643 BLAST_ExtendWordPtr
5644 BLAST_ExtendWordNew (BLAST_ExtendWordParamsPtr ewp_params)
5645 
5646 {
5647         BLAST_ExtendWordPtr ewp;
5648         int i;
5649 
5650         ewp = MemNew(sizeof(BLAST_ExtendWord));
5651 
5652         if (ewp)
5653         {
5654                 /* Allocate the buffer to be used for Combo array. */
5655                 ewp->_buffer = (Int4Ptr) MemNew(ewp_params->min_diag_length*sizeof(CfjModStruct));
5656 
5657                 if (ewp->_buffer == NULL)
5658                 {
5659                         ewp = BLAST_ExtendWordDestruct(ewp);
5660                         return NULL;
5661                 }
5662 
5663                 ewp->combo_array= (CfjModStruct *) ewp->_buffer;
5664                 ewp_params->offset=0;
5665                 for(i=0;i<ewp_params->min_diag_length;i++){
5666                   ewp->combo_array[i].diag_level=0;
5667                   ewp->combo_array[i].last_hit = -ewp_params->window;
5668                 }
5669         }
5670 
5671         return ewp;
5672 }
5673 
5674 /*****************************************************************************
5675 *
5676 *       Zeroe's out the memory in the array _buffer, if offset is greater than
5677 *       INT4_MAX/2.  The first "min_diag_length" spaces in the array are used 
5678 *       by the array "diag_level", the second "min_diag_length" spaces are used 
5679 *       by "last_hit".  All of these are zeroed out.  The last "min_diag_length" 
5680 *       spaces are used by "version"; these are not zeroed out.
5681 *
5682 *       If offset is not greater than INT4_MAX/2, then the memory is not
5683 *       zeroed out.  Rather "offset" is used as a "zero-point" that is
5684 *       always greater than the next possible value when the word finder
5685 *       starts working on a new subject sequence.
5686 *
5687 ******************************************************************************/
5688 void LIBCALL
5689 BlastExtendWordExit(BlastSearchBlkPtr search)
5690 
5691 {
5692         BLAST_ExtendWordPtr ewp;
5693         BLAST_ExtendWordParamsPtr ewp_params;
5694         Int2 index;
5695         Int4 i, min_diag_length;
5696 
5697         ewp_params = search->ewp_params;
5698 
5699         for (index=search->first_context; index<=search->last_context; index++)
5700         {
5701 
5702                 if (ewp_params->offset >= INT4_MAX/2)
5703                 {
5704                         ewp = search->context[index].ewp;
5705                         if (ewp) {
5706                            min_diag_length = ewp_params->min_diag_length;
5707                            for(i=0;i<min_diag_length;i++)
5708                            {
5709                                 ewp->combo_array[i].diag_level=0;
5710                                 ewp->combo_array[i].last_hit = -ewp_params->window;
5711                            }
5712                         }
5713                 }
5714         }
5715 
5716         if (ewp_params->offset < INT4_MAX/2)
5717         {
5718                 ewp_params->offset += search->subject->length + ewp_params->window ;
5719         }
5720         else
5721         {
5722                 ewp_params->offset = 0;
5723         }
5724 }
5725 
5726 
5727 BlastSequenceBlkPtr LIBCALL
5728 BlastSequenceBlkDestruct(BlastSequenceBlkPtr seq_blk)
5729 
5730 {
5731 
5732         if (seq_blk == NULL)
5733                 return NULL;
5734 
5735         /* Free from the start of sequence if it's filled in. */
5736         if (seq_blk->sequence_start != NULL)
5737         {
5738                 seq_blk->sequence_start = MemFree(seq_blk->sequence_start);
5739         }
5740         else
5741         {       
5742                 seq_blk->sequence = MemFree(seq_blk->sequence);
5743         }
5744 
5745         seq_blk = MemFree(seq_blk);
5746 
5747         return seq_blk;
5748 }
5749 
5750 
5751 
5752 static BLASTContextStructPtr 
5753 BLASTContextFree(BLASTContextStructPtr context, Int2 number)
5754 
5755 {
5756         Int2 index;
5757 
5758         if (context == NULL)
5759           return NULL;
5760 
5761         for (index=0; index<number; index++)
5762         {
5763                 context[index].ewp = BLAST_ExtendWordDestruct(context[index].ewp);
5764                 if (context[index].query_allocated == TRUE)
5765                 {
5766                         context[index].query = BlastSequenceBlkDestruct(context[index].query);
5767                 }
5768         }
5769         context = MemFree(context);
5770 
5771         return context;
5772 }
5773 
5774 void BlastThrInfoFree(BlastThrInfoPtr thr_info)
5775 {
5776     VoidPtr status=NULL;
5777 
5778     if (thr_info == NULL)
5779         return;
5780 
5781     if (thr_info->index_thr)
5782     {
5783                 NlmThreadJoin(thr_info->index_thr, &status);
5784                 thr_info->index_thr = NULL;
5785     }
5786 
5787     if (thr_info->awake_thr) 
5788     {
5789         NlmThreadJoin(thr_info->awake_thr, &status);
5790         thr_info->awake_thr = NULL;
5791         if (thr_info->callback_mutex)
5792         {
5793                 NlmMutexDestroy(thr_info->callback_mutex);
5794                 thr_info->callback_mutex = NULL;
5795         }
5796     }
5797     BlastGiListDestruct(thr_info->blast_gi_list, TRUE);
5798     
5799     NlmMutexDestroy(thr_info->db_mutex);
5800     NlmMutexDestroy(thr_info->results_mutex);
5801     NlmMutexDestroy(thr_info->callback_mutex);
5802 
5803     MemFree(thr_info);
5804     
5805     return;
5806 }
5807 
5808 BlastThrInfoPtr BlastThrInfoNew(void)
5809 {
5810     BlastThrInfoPtr thr_info;
5811     
5812     thr_info = MemNew(sizeof(BlastThrInfo));
5813     
5814     return thr_info;
5815 }
5816 
5817 
5818 /* 
5819         Allocates space for a copy of the BlastSearchBlk for use in
5820         multi-processing BLAST.
5821 */
5822 
5823 BlastSearchBlkPtr LIBCALL
5824 BlastSearchBlkDuplicate (BlastSearchBlkPtr search)
5825 
5826 {
5827 
5828         BlastSearchBlkPtr new_search;
5829         Int2 index;
5830 
5831         if (search == NULL)
5832                 return NULL;
5833 
5834         new_search = (BlastSearchBlkPtr) MemNew(sizeof(BlastSearchBlk));
5835         if (new_search == NULL)
5836                 return NULL;
5837 
5838         /* What's allocated here? */
5839         new_search->allocated = 0;      
5840         new_search->allocated += BLAST_SEARCH_ALLOC_SUBJECT;
5841         new_search->allocated += BLAST_SEARCH_ALLOC_PBP;
5842         new_search->allocated += BLAST_SEARCH_ALLOC_CONTEXT;
5843         new_search->allocated += BLAST_SEARCH_ALLOC_READDB;
5844         new_search->allocated += BLAST_SEARCH_ALLOC_EWPPARAMS;
5845                 
5846         /* AM: Support for query multiplexing. */
5847         if( search->mult_queries )
5848           new_search->mult_queries = BlastDuplicateMultQueries( search->mult_queries );
5849                 
5850         /* Duplicate the rfdp struct, but not the contents. */
5851         new_search->rdfp = readdb_attach(search->rdfp);
5852         if (new_search->rdfp == NULL)
5853         {
5854                 new_search = BlastSearchBlkDestruct(new_search);
5855                 return NULL;
5856         }
5857 
5858         new_search->positionBased = search->positionBased;
5859 
5860         /* Changes, need to allocate. */
5861         new_search->pbp = MemDup(search->pbp, sizeof(BLAST_ParameterBlk));
5862         if (search->pbp->mb_params)
5863           new_search->pbp->mb_params = 
5864             MemDup(search->pbp->mb_params, sizeof(MegaBlastParameterBlk));
5865         new_search->pbp->filter_string = StringSave(search->pbp->filter_string);
5866         new_search->sbp = search->sbp;
5867         new_search->wfp_first = search->wfp_first;
5868         if (search->prog_number==blast_type_blastn && 
5869             search->pbp->mb_params) {
5870            new_search->wfp_second = 
5871               MemDup(search->wfp_second, sizeof(BLAST_WordFinder));
5872            new_search->wfp_second->lookup = 
5873               MegaBlastLookupTableDup(search->wfp_second->lookup);
5874            new_search->wfp = new_search->wfp_second;
5875         } else
5876            new_search->wfp_second = search->wfp_second;
5877         new_search->prog_name = StringSave(search->prog_name);
5878         new_search->prog_number = search->prog_number;
5879         new_search->first_context = search->first_context;
5880         new_search->last_context = search->last_context;
5881         new_search->query_slp = search->query_slp;
5882         if (search->prog_number==blast_type_blastn) {
5883            new_search->query_context_offsets =
5884               MemDup(search->query_context_offsets, 
5885                      (search->last_context-search->first_context+2)*sizeof(Int4));
5886         }
5887         if (search->ewp_params)
5888            new_search->ewp_params = MemDup(search->ewp_params, sizeof(BLAST_ExtendWordParams));
5889         new_search->dblen = search->dblen;
5890         new_search->dblen_eff = search->dblen_eff;
5891         new_search->dblen_eff_real = search->dblen_eff_real;
5892         new_search->dbseq_num = search->dbseq_num;
5893         new_search->length_adjustment = search->length_adjustment;
5894         new_search->searchsp_eff = search->searchsp_eff;
5895 
5896         /* Allocate last_context+1 elements, even if there are only last_context-first_context
5897         being used. */
5898         new_search->context = (BLASTContextStructPtr) MemNew((search->last_context+1)*sizeof(BLASTContextStruct));
5899         for (index=new_search->first_context; index<=new_search->last_context; index++)
5900         {
5901            if (new_search->ewp_params)
5902               new_search->context[index].ewp = BLAST_ExtendWordNew(new_search->ewp_params);
5903                 new_search->context[index].query = search->context[index].query;
5904                 new_search->context[index].query->frame = ContextToFrame(new_search, index);
5905                 new_search->context[index].query_allocated = FALSE;
5906         }
5907 
5908         new_search->context_factor = search->context_factor;
5909 
5910         new_search->subject = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
5911         /* 100 is the size limit in the present BLAST for hsp's. */
5912         new_search->hsp_array_size = search->hsp_array_size;
5913         /* The results are held here. */
5914         new_search->result_struct = search->result_struct;
5915         new_search->mb_result_struct = search->mb_result_struct;
5916         new_search->result_size = search->result_size;
5917         new_search->worst_evalue = DBL_MAX;
5918 
5919         new_search->translation_table = search->translation_table;
5920         new_search->translation_table_rc = search->translation_table_rc;
5921         new_search->genetic_code = search->genetic_code;
5922         new_search->db_genetic_code = search->db_genetic_code;
5923 
5924         if (search->translation_buffer_size > 0)
5925         {       /* two extra for the NULLB's on end. */
5926                 new_search->translation_buffer = MemNew((2+search->translation_buffer_size)*sizeof(Uint1));
5927                 new_search->translation_buffer_size = search->translation_buffer_size;
5928         }
5929 
5930         new_search->gap_align = NULL;   /* Allocated automatically. */
5931 
5932         new_search->whole_query = search->whole_query;
5933         new_search->required_start = search->required_start;
5934         new_search->required_end = search->required_end;
5935 
5936         new_search->handle_results = search->handle_results;
5937         if (!search->pbp->mb_params) 
5938            new_search->query_id = SeqIdSetDup(search->query_id);
5939         else {
5940            new_search->qid_array = (SeqIdPtr PNTR) 
5941               Malloc((search->last_context/2 + 1)*sizeof(SeqIdPtr));
5942            
5943            for (index=0; index<=search->last_context/2; index++)
5944               new_search->qid_array[index] = SeqIdSetDup(search->qid_array[index]);
5945         }
5946 
5947         /* Duplicating DNAP sequence used in OOF search */
5948         if(search->pbp->is_ooframe)
5949             new_search->query_dnap = BlastMakeCopyQueryDNAP(search->query_dnap);
5950 
5951         new_search->thr_info = search->thr_info;
5952         new_search->semid = search->semid;
5953         
5954 #ifdef BLAST_COLLECT_STATS
5955         new_search->first_pass_hits = 0;
5956         new_search->second_pass_hits = 0;
5957         new_search->second_pass_trys = 0;
5958         new_search->first_pass_extends = 0;
5959         new_search->second_pass_extends = 0;
5960         new_search->first_pass_good_extends = 0;
5961         new_search->second_pass_good_extends = 0;
5962         new_search->number_of_seqs_better_E = 0;
5963         new_search->prelim_gap_no_contest = 0;
5964         new_search->prelim_gap_passed = 0;
5965         new_search->prelim_gap_attempts = 0;
5966         new_search->real_gap_number_of_hsps = 0;
5967 #endif
5968         new_search->output = search->output;
5969 
5970         if (search->abmp) {
5971            new_search = GreedyAlignMemAlloc(new_search);
5972            if (new_search->abmp == NULL) {
5973               new_search = BlastSearchBlkDestruct(new_search);
5974               return NULL;
5975            }
5976         }
5977         if (search->mb_endpoint_results) {
5978            new_search->mb_endpoint_results = ValNodeNew(NULL);
5979            new_search->mb_endpoint_results->data.ptrvalue = 
5980               search->mb_endpoint_results->data.ptrvalue;
5981         }
5982         new_search->mask1 = search->mask1;
5983 
5984         return new_search;
5985 }
5986 /* 
5987         Allocates space for the new BlastSearchBlk and some sturctures
5988         attached to it.
5989 */
5990 
5991 BlastSearchBlkPtr LIBCALL
5992 BlastSearchBlkNew (Int2 wordsize, Int4 qlen, CharPtr dbname, Boolean multiple_hits, BLAST_Score threshold_first, BLAST_Score threshold_second, Int4 result_size, CharPtr prog_name, BlastAllWordPtr all_words, Int2 first_context, Int2 last_context, Int4 window_size)
5993 
5994 {
5995         return BlastSearchBlkNewExtra(wordsize, qlen, dbname, multiple_hits, threshold_first, threshold_second, result_size, prog_name, all_words, first_context, last_context, NULL, window_size);
5996 
5997 }
5998 
5999 /* 
6000         Allocates space for the new BlastSearchBlk and some sturctures
6001         attached to it.
6002 */
6003 
6004 BlastSearchBlkPtr LIBCALL
6005 BlastSearchBlkNewExtra (Int2 wordsize, Int4 qlen, CharPtr dbname, Boolean multiple_hits, BLAST_Score threshold_first, BLAST_Score threshold_second, Int4 result_size, CharPtr prog_name, BlastAllWordPtr all_words, Int2 first_context, Int2 last_context, ReadDBFILEPtr rdfp, Int4 window_size)
6006 
6007 {
6008 
6009         BlastSearchBlkPtr search;
6010         BLASTContextStructPtr context;
6011         Uint1 is_prot;
6012         Int2 index;
6013         Uint1 alphabet;
6014         Int4 longest_db_seq=INT4_MAX;
6015         ReadDBFILEPtr rdfp_var;
6016         Int4 last_ewp_index;
6017 
6018         search = (BlastSearchBlkPtr) MemNew(sizeof(BlastSearchBlk));
6019 
6020         if (search != NULL)
6021         {
6022                 search->allocated = 0;  /* everything's allocated here. */
6023                 search->allocated += BLAST_SEARCH_ALLOC_QUERY;
6024                 search->allocated += BLAST_SEARCH_ALLOC_SUBJECT;
6025                 search->allocated += BLAST_SEARCH_ALLOC_PBP;
6026                 search->allocated += BLAST_SEARCH_ALLOC_SBP;
6027                 search->allocated += BLAST_SEARCH_ALLOC_EWPPARAMS;
6028                 search->allocated += BLAST_SEARCH_ALLOC_CONTEXT;
6029                 search->allocated += BLAST_SEARCH_ALLOC_RESULTS;
6030                 search->allocated += BLAST_SEARCH_ALLOC_READDB;
6031                 search->allocated += BLAST_SEARCH_ALLOC_ALL_WORDS;
6032                 search->allocated += BLAST_SEARCH_ALLOC_THRINFO;
6033                 search->allocated += BLAST_SEARCH_ALLOC_MASK1;
6034                 
6035                 search->positionBased = FALSE;
6036 
6037                 if (StringCmp(prog_name, "blastn") == 0)
6038                 {
6039                         alphabet = BLASTNA_SEQ_CODE;
6040                 }
6041                 else
6042                 {
6043                         alphabet = Seq_code_ncbistdaa;
6044                 }
6045 
6046                 if (dbname != NULL)
6047                 {
6048                         
6049                         if (rdfp == NULL)
6050                         {
6051                                 if (StringCmp(prog_name, "blastp") == 0 || StringCmp(prog_name, "blastx") == 0)
6052                                 { /* Protein DB for blastp and blastx. */
6053                                         is_prot = READDB_DB_IS_PROT;
6054                                 }
6055                                 else
6056                                 {
6057                                         is_prot = READDB_DB_IS_NUC;
6058                                 }
6059                         
6060                                 if ((search->rdfp=readdb_new(dbname, is_prot)) == NULL)
6061                                 {
6062                                         return NULL;
6063                                 }
6064                         }
6065                         else
6066                         {       /* Attaches to the rdfp, rather than reallocating it. */
6067                                 search->rdfp = readdb_attach(rdfp);
6068                         }
6069 
6070                         rdfp_var = search->rdfp;
6071                         longest_db_seq = 0;
6072                         while (rdfp_var)
6073                         {
6074                                 longest_db_seq = MAX(longest_db_seq, readdb_get_maxlen(rdfp_var));
6075                                 rdfp_var = rdfp_var->next;
6076                         }
6077                 }
6078 
6079                 search->first_context = first_context;
6080                 search->last_context = last_context;
6081 
6082                 search->pbp = 
6083                    (BLAST_ParameterBlkPtr) MemNew(sizeof(BLAST_ParameterBlk));
6084 
6085                 search->sbp = BLAST_ScoreBlkNew(alphabet, last_context+1);
6086 
6087                 /* Only allocate these if thresholds are above zero, i.e. they will be used. */
6088                 if (StringCmp(prog_name, "blastn") != 0)
6089                 {
6090                         if (threshold_second > 0)
6091                         {
6092                                 search->wfp_first = BLAST_WordFinderNew(search->sbp->alphabet_size, wordsize, 1, FALSE);
6093                                 search->allocated += BLAST_SEARCH_ALLOC_WFP_FIRST;
6094                 /* Only allocate a new WFP if 2nd th differs from 1st. */
6095                                 search->wfp_second = search->wfp_first;
6096                         }
6097                 }
6098                 else
6099                 {
6100                         if (multiple_hits)
6101                                 search->wfp_second = BLAST_WordFinderNew(256, wordsize, READDB_COMPRESSION_RATIO, FALSE);
6102                         else
6103                                 search->wfp_second = BLAST_WordFinderNew(256, wordsize, READDB_COMPRESSION_RATIO, TRUE);
6104                         search->allocated += BLAST_SEARCH_ALLOC_WFP_SECOND;
6105                 }
6106 
6107                 search->prog_name = StringSave(prog_name);
6108                 search->prog_number = BlastGetProgramNumber(prog_name);
6109                 if (qlen > 0)
6110                    search->ewp_params = BLAST_ExtendWordParamsNew(qlen, multiple_hits, window_size);
6111                 else
6112                    search->ewp_params = NULL;
6113                 context = search->context = (BLASTContextStructPtr)
6114                    MemNew((1+search->last_context)*sizeof(BLASTContextStruct));
6115                 if (search->prog_number != blast_type_blastn)
6116                    last_ewp_index = search->last_context;
6117                 else /* All queries (Mega BLAST) and strands are concatenated
6118                         in a single sequence */
6119                    last_ewp_index = search->first_context;
6120 
6121                 for (index=search->first_context; index<=search->last_context; index++)
6122                 {
6123                    if (search->ewp_params && index <= last_ewp_index)
6124                       context[index].ewp = BLAST_ExtendWordNew(search->ewp_params);
6125                    context[index].query = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
6126                    context[index].query->frame = ContextToFrame(search, index);
6127                    context[index].query_allocated = TRUE;
6128                 }
6129 
6130                 search->subject = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
6131                 /* 100 is the size limit in the present BLAST for hsp's. */
6132                 search->hsp_array_size = 100;
6133                 /* The results are held here. */
6134                 search->result_size = result_size;
6135 /*
6136                 search->result_struct = BLASTResultsStructNew(result_size, search->pbp->max_pieces, search->pbp->hsp_range_max);
6137 */
6138 
6139                 search->worst_evalue = DBL_MAX;
6140 
6141                 search->whole_query = TRUE;
6142                 search->required_start = 0;
6143                 search->required_end = -1;
6144 
6145                 search->all_words = all_words;
6146 
6147                 search->thr_info = BlastThrInfoNew();
6148 #ifdef BLAST_COLLECT_STATS
6149                 search->first_pass_hits = 0;
6150                 search->second_pass_hits = 0;
6151                 search->second_pass_trys = 0;
6152                 search->first_pass_extends = 0;
6153                 search->second_pass_extends = 0;
6154                 search->first_pass_good_extends = 0;
6155                 search->second_pass_good_extends = 0;
6156                 search->number_of_seqs_better_E = 0;
6157                 search->prelim_gap_no_contest = 0;
6158                 search->prelim_gap_passed = 0;
6159                 search->prelim_gap_attempts = 0;
6160                 search->real_gap_number_of_hsps = 0;
6161 #endif
6162         }
6163 
6164         return search;
6165 }
6166 
6167 /*
6168         Deallocates memory associated with the BlastSearchBlkPtr.
6169 */
6170 
6171 BlastSearchBlkPtr LIBCALL 
6172 BlastSearchBlkDestruct (BlastSearchBlkPtr search)
6173 
6174 {
6175 
6176     if (search != NULL) {
6177         if (search->allocated & BLAST_SEARCH_ALLOC_QUERY)
6178             search->original_seq = MemFree(search->original_seq);
6179         
6180         if (search->allocated & BLAST_SEARCH_ALLOC_SUBJECT)
6181             search->subject = BlastSequenceBlkDestruct(search->subject);
6182         
6183         if (search->allocated & BLAST_SEARCH_ALLOC_SBP)
6184             search->sbp = BLAST_ScoreBlkDestruct(search->sbp);
6185         
6186         if (search->allocated & BLAST_SEARCH_ALLOC_WFP_FIRST)
6187             search->wfp_first = BLAST_WordFinderDestruct(search->wfp_first);
6188         
6189         if (search->allocated & BLAST_SEARCH_ALLOC_WFP_SECOND) {
6190             search->wfp_second = BLAST_WordFinderDestruct(search->wfp_second);
6191         } else if (search->prog_number==blast_type_blastn && 
6192                    search->pbp->mb_params) {
6193             search->wfp_second = 
6194                 MegaBlastWordFinderDeallocate(search->wfp_second);
6195         }  
6196         
6197         /* Freeing DNAP sequence used in OOF */
6198         
6199         if(search->pbp != NULL && search->pbp->is_ooframe) {
6200             BlastFreeQueryDNAP(search->query_dnap);
6201             search->query_dnap = NULL;
6202         }
6203         
6204         if (search->allocated & BLAST_SEARCH_ALLOC_EWPPARAMS) {
6205             search->ewp_params = BLAST_ExtendWordParamsDestruct(search->ewp_params);
6206         }
6207         
6208         if (search->allocated & BLAST_SEARCH_ALLOC_CONTEXT) {
6209             search->context = BLASTContextFree(search->context, 1+search->last_context);
6210         }
6211         
6212         if (search->allocated & BLAST_SEARCH_ALLOC_RESULTS) {
6213            if (!search->pbp->mb_params)
6214               search->result_struct =
6215                  BLASTResultsStructDelete(search->result_struct);
6216            else {
6217               Int2 index;
6218               for (index=0; index<=search->last_context/2; index++)
6219                  search->mb_result_struct[index] = 
6220                     BLASTResultsStructDelete(search->mb_result_struct[index]);
6221               search->mb_result_struct = MemFree(search->mb_result_struct);
6222            }
6223         }
6224         
6225         if (search->allocated & BLAST_SEARCH_ALLOC_PBP) {
6226             search->pbp->mb_params = MemFree(search->pbp->mb_params);
6227             MemFree(search->pbp->filter_string);
6228             search->pbp = MemFree(search->pbp);
6229         }
6230         
6231         if (search->allocated & BLAST_SEARCH_ALLOC_READDB) {
6232             search->rdfp = readdb_destruct(search->rdfp);
6233         }
6234         
6235         if (search->current_hitlist) {
6236             search->current_hitlist = BlastHitListDestruct(search->current_hitlist);
6237         }
6238         search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
6239         
6240         
6241         if (search->prog_name) {
6242             search->prog_name = MemFree(search->prog_name);
6243         }
6244         
6245         if (search->query_id) {
6246             search->query_id = SeqIdSetFree(search->query_id);
6247         }
6248         if (search->qid_array) {
6249             Int4 index;
6250             for (index=0; index<=search->last_context/2; index++)
6251                 SeqIdSetFree(search->qid_array[index]);
6252             search->qid_array = MemFree(search->qid_array);
6253         }
6254         if (search->translation_buffer_size > 0) {
6255             search->translation_buffer = MemFree(search->translation_buffer);
6256         }
6257         
6258         if (search->allocated & BLAST_SEARCH_ALLOC_TRANS_INFO) {
6259             
6260             if (search->translation_table) {
6261                 search->translation_table = MemFree(search->translation_table);
6262             }
6263             
6264             if (search->translation_table_rc) {
6265                 search->translation_table_rc = MemFree(search->translation_table_rc);
6266             }
6267         }
6268         
6269         if (search->allocated & BLAST_SEARCH_ALLOC_ALL_WORDS) {
6270             search->all_words = BlastAllWordDestruct(search->all_words);
6271         }
6272         
6273         search->gap_align = GapAlignBlkDelete(search->gap_align);
6274         
6275         if (search->allocated & BLAST_SEARCH_ALLOC_QUERY_SLP) {
6276             if (search->query_slp)
6277                 search->query_slp = SeqLocFree(search->query_slp);
6278         }
6279         
6280         
6281         if(search->allocated & BLAST_SEARCH_ALLOC_THRINFO)
6282             BlastThrInfoFree(search->thr_info);
6283         
6284         if (search->abmp)
6285             search->abmp = GreedyAlignMemFree(search->abmp);
6286         
6287         search->query_context_offsets = MemFree(search->query_context_offsets);
6288         
6289         MemFree(search->mb_endpoint_results);
6290 
6291         if (search->allocated & BLAST_SEARCH_ALLOC_MASK1)
6292         {
6293                 if (search->mask1)      
6294                 {
6295                         SeqLocSetFree(search->mask1->data.ptrvalue);
6296                         search->mask1 = ValNodeFree(search->mask1);
6297                 }
6298         }
6299 
6300         search = MemFree(search);
6301     }
6302     
6303     return search;
6304 }
6305 
6306 
6307 /* 
6308         Deallocates all the memory associated with the BlastAllWordPtr.
6309 */
6310 
6311 BlastAllWordPtr LIBCALL
6312 BlastAllWordDestruct(BlastAllWordPtr all_words)
6313 
6314 {
6315         if (all_words == NULL)
6316                 return NULL;
6317 
6318         if (all_words->array)
6319         {
6320                 all_words->array = MemFree(all_words->array);
6321         }
6322 
6323         if (all_words->rows_allocated && all_words->array_storage)
6324         {
6325                 all_words->array_storage = MemFree(all_words->array_storage);
6326         }
6327 
6328         MemFree(all_words);
6329 
6330         return NULL;
6331 }
6332 
6333 /*
6334         Allocates the BlastAllWordPtr and sets some flags.
6335 */
6336 BlastAllWordPtr LIBCALL
6337 BlastAllWordNew(Int4 num_of_cols, Int4 wordsize, Boolean rows_allocated, Boolean specific)
6338 
6339 {
6340         BlastAllWordPtr all_words;
6341 
6342         all_words = MemNew(sizeof(BlastAllWord));
6343         if (all_words)
6344         {
6345                 all_words->rows_allocated = rows_allocated;
6346                 all_words->specific = specific;
6347                 all_words->num_of_cols = num_of_cols;
6348                 all_words->wordsize = wordsize;
6349         }
6350 
6351         return all_words;
6352 }
6353 
6354 BLAST_HitListPtr LIBCALL
6355 BlastHitListDestruct(BLAST_HitListPtr hitlist)
6356 {
6357         BLAST_HSPPtr PNTR hsp_array;
6358         Int4 hspcnt_max, index;
6359 
6360         if (hitlist == NULL)
6361                 return NULL;
6362 
6363         hspcnt_max = hitlist->hspcnt_max;
6364         hsp_array = hitlist->hsp_array;
6365 
6366         for (index=0; index<hspcnt_max; index++)
6367         {
6368            hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
6369         }
6370 
6371         hitlist->hsp_array = MemFree(hsp_array);
6372         hitlist->lh_helper = MemFree(hitlist->lh_helper);
6373 
6374         MemFree(hitlist->exact_match_array);
6375            
6376         hitlist = MemFree(hitlist);
6377 
6378         return hitlist;
6379 }
6380 
6381 /****************************************************************
6382 
6383         Functions to allocate and destroy the BLAST_HitList.
6384 
6385 ***************************************************************/
6386 BLAST_HitListPtr LIBCALL
6387 BlastHitListNew(BlastSearchBlkPtr search)
6388 {
6389         BLAST_HitListPtr hitlist;
6390 
6391         hitlist = (BLAST_HitListPtr) MemNew(sizeof(BLAST_HitList));
6392 
6393         if (hitlist == NULL)
6394                 return hitlist;
6395 
6396         hitlist->hspmax = search->hsp_array_size;
6397         hitlist->hsp_array = (BLAST_HSPPtr PNTR) MemNew(hitlist->hspmax*sizeof
6398 (BLAST_HSPPtr));
6399 
6400         if (hitlist->hsp_array == NULL)
6401         {
6402                 hitlist = BlastHitListDestruct(hitlist);
6403                 return NULL;
6404         }
6405 
6406         if (search->pbp->mb_params) {
6407            hitlist->exact_match_array = (MegaBlastExactMatchPtr) 
6408               MemNew(hitlist->hspmax*sizeof(MegaBlastExactMatch));
6409            hitlist->exact_match_max = hitlist->hspmax;
6410         }
6411 
6412         return hitlist;
6413 }
6414 
6415 
6416 /*
6417         This function translates the context number of a context into
6418         the frame of the sequence.
6419 
6420         Arguments:
6421         
6422         BlastSearchBlkPtr search: search structure,
6423         Int2 context_number: context number used by BLASTContextStruct array
6424         Boolean is_query: if TRUE, refers to query, otherwise the subject.
6425 */
6426 
6427 Int2
6428 ContextToFrame(BlastSearchBlkPtr search, Int2 context_number)
6429 
6430 {
6431         Int2 frame=255;
6432         Uint1 prog_number = search->prog_number;
6433 
6434         if (prog_number == blast_type_blastn)
6435         {
6436                 if (context_number % 2 == 0)
6437                         frame = 1;
6438                 else
6439                         frame = -1;
6440         }
6441         else if (prog_number == blast_type_blastp ||
6442                  prog_number == blast_type_tblastn ||
6443                  prog_number == blast_type_psitblastn)
6444         {       /* Query and subject are protein, no frame. */
6445                 frame = 0;
6446         }
6447         else if (prog_number == blast_type_blastx || prog_number == blast_type_tblastx)
6448         {
6449                 frame = context_number < 3 ? context_number+1 : -context_number+2;
6450         }
6451 
6452         return frame;
6453 }
6454 
6455 /*
6456         Allocates and fills in the BLASTSubjectInfo structure.
6457 */
6458 
6459 BLASTSubjectInfoPtr LIBCALL
6460 BLASTSubjectInfoNew(SeqIdPtr sip, CharPtr defline, Int4 length)
6461 
6462 {
6463         BLASTSubjectInfoPtr subject_info;
6464 
6465         subject_info = (BLASTSubjectInfoPtr) MemNew(sizeof(BLASTSubjectInfo));  
6466 
6467         if (subject_info == NULL)
6468                 return NULL;
6469 
6470         subject_info->sip = sip;
6471         subject_info->defline = defline;
6472         subject_info->length = length;
6473 
6474         return subject_info;
6475 }
6476 
6477 /*
6478         Deallocates the BLASTSubjectInfo structure and the
6479         SeqIdPtr, as well as the defline.
6480 */
6481 
6482 BLASTSubjectInfoPtr LIBCALL
6483 BLASTSubjectInfoDestruct(BLASTSubjectInfoPtr subject_info)
6484 
6485 {
6486 
6487         if (subject_info == NULL)
6488                 return NULL;
6489 
6490         SeqIdFree(subject_info->sip);
6491         MemFree(subject_info->defline);
6492         subject_info = MemFree(subject_info);
6493 
6494         return subject_info;
6495 }
6496 
6497 
6498 
6499 /*
6500         Destroys BLASTResultsStructure and associated memory.
6501 */
6502 
6503 BLASTResultsStructPtr LIBCALL
6504 BLASTResultsStructDelete(BLASTResultsStructPtr result_struct)
6505 
6506 {
6507         Int4 index;
6508         BLASTResultHitlistPtr PNTR results;
6509         BLASTHeapPtr hp, hpt;
6510 
6511         if (result_struct == NULL)
6512                 return NULL;
6513         
6514         results = result_struct->results;
6515         for (index=0; index<result_struct->hitlist_max; index++)
6516         {
6517                 if (results[index])
6518                 {
6519                         results[index] = BLASTResultHitlistFree(results[index]);
6520                 }
6521         }
6522 
6523 
6524         for (hp = result_struct->heap_ptr; hp; ) 
6525         {
6526           hpt = hp->next;
6527           hp->heap = MemFree(hp->heap);
6528           hp = MemFree(hp);
6529           hp = hpt;
6530         }
6531         result_struct->results = MemFree(result_struct->results);
6532         result_struct = MemFree(result_struct);
6533 
6534         return result_struct;
6535 }
6536 
6537 /*
6538         returns BLASTResultsStruct.
6539 */
6540 
6541 BLASTResultsStructPtr
6542 BLASTResultsStructNew(Int4 results_size, Int4 max_pieces, Int4 range_max)
6543 
6544 {
6545         BLASTResultsStructPtr new;
6546         Int4 index;
6547 
6548         new = MemNew(sizeof(BLASTResultsStruct));
6549         new->results = (BLASTResultHitlistPtr PNTR) MemNew(results_size*sizeof(BLASTResultHitlistPtr));
6550 
6551         for (index=0; index<results_size; index++)
6552                 new->results[index] = NULL;
6553 
6554         new->hitlist_max = results_size;
6555         new->hitlist_count = 0;
6556         new->max_pieces = max_pieces;
6557         if (range_max > 0) {
6558            new->heap_ptr = (BLASTHeapPtr) MemNew(sizeof(BLASTHeapStruct));
6559            new->heap_ptr->cutvalue = INT4_MAX;
6560            new->heap_ptr->num_in_heap = new->heap_ptr->num_of_ref = 0;
6561            new->heap_ptr->prev = new->heap_ptr->next = NULL;
6562            new->heap_ptr->heap = (BLASTResultHspPtr PNTR) MemNew(sizeof(BLASTResultHspPtr)*range_max);
6563         }
6564         return new;
6565 }
6566 
6567 
6568 Uint1 AAForCodon (Uint1Ptr codon, CharPtr codes);
6569 
6570 /*
6571         GetTranslation to get the translation of the nucl. sequence in the
6572         appropriate frame and with the appropriate GeneticCode.
6573 
6574         The function return an allocated CharPtr, the caller must delete this.
6575         The first and last spaces of this CharPtr contain NULLB's.
6576 */
6577 
6578 Uint1Ptr LIBCALL
6579 GetTranslation(Uint1Ptr query_seq, Int4 nt_length, Int2 frame, Int4Ptr length, CharPtr genetic_code)
6580 {
6581         Uint1 codon[CODON_LENGTH];
6582         Int4 index, index_prot;
6583         SeqMapTablePtr smtp;
6584         Uint1 residue, new_residue;
6585         Uint1Ptr prot_seq;
6586 
6587         smtp = SeqMapTableFind(Seq_code_ncbistdaa, Seq_code_ncbieaa);
6588 
6589         /* Allocate two extra spaces for NULLB's at beginning and end of seq. */
6590         prot_seq = (Uint1Ptr) MemNew((2+(nt_length+2)/CODON_LENGTH)*sizeof(Uint1));
6591 
6592         /* The first character in the protein is the NULLB sentinel. */
6593         prot_seq[0] = NULLB;
6594         index_prot = 1;
6595         for (index=ABS(frame)-1; index<nt_length-2; index += CODON_LENGTH)
6596         {
6597                 codon[0] = query_seq[index];
6598                 codon[1] = query_seq[index+1];
6599                 codon[2] = query_seq[index+2];
6600                 residue = AAForCodon(codon, genetic_code);
6601                 new_residue = SeqMapTableConvert(smtp, residue);
6602                 if (IS_residue(new_residue))
6603                 {
6604                         prot_seq[index_prot] = new_residue;
6605                 }
6606                 index_prot++;
6607         }
6608         prot_seq[index_prot] = NULLB;
6609         *length = index_prot-1;
6610         
6611         return prot_seq;
6612 }
6613 
6614 
6615 /*************************************************************************
6616 *
6617 *       MaskTheResidues masks up to max_length residues in buffer.
6618 *       The residue to be used for masking (generally 'N' for nucleotides
6619 *       and 'X' for proteins) is mask_residue.  offset tells how far
6620 *       along the sequence the first residue in buffer is.  mask_slp
6621 *       specifies which parts of the sequence to mask.  'max_length is
6622 *       the total length of the sequence.
6623 *
6624 *************************************************************************/
6625 
6626 void
6627 BlastMaskTheResidues(Uint1Ptr buffer, Int4 max_length, Uint1 mask_residue, SeqLocPtr mask_slp, Boolean reverse, Int4 offset)
6628 
6629 {
6630         SeqLocPtr slp=NULL;
6631         Int4 index, start, stop;
6632        
6633         while (mask_slp)
6634         {
6635                 slp=NULL;
6636                 while((slp = SeqLocFindNext(mask_slp, slp))!=NULL)
6637                 {
6638                         if (reverse)
6639                         {
6640                                 start = max_length - 1 - SeqLocStop(slp);
6641                                 stop = max_length - 1 - SeqLocStart(slp);
6642                         }
6643                         else
6644                         {
6645                                 start = SeqLocStart(slp);
6646                                 stop = SeqLocStop(slp);
6647                         }
6648 
6649                         start -= offset;
6650                         stop  -= offset;
6651 
6652                         for (index=start; index<=stop; index++)
6653                         {
6654                                 buffer[index] = mask_residue;
6655                         }
6656                 }
6657                 mask_slp = mask_slp->next;
6658         }
6659 
6660 }
6661 
6662 /*
6663         COnverts a protein (translated) SeqLocPtr from the protein
6664         coordinates to the nucl. coordinates.
6665 
6666         Only works on a SeqLocPtr of type SeqIntPtr right now.
6667 */
6668 
6669 Boolean
6670 BlastConvertProteinSeqLoc(SeqLocPtr slp, Int2 frame, Int4 full_length)
6671 
6672 {
6673         SeqIntPtr seq_int;
6674         Int4 from, to;
6675 
6676         if (slp == NULL)
6677                 return TRUE;
6678 
6679         if (slp->choice == SEQLOC_PACKED_INT)
6680                 slp = slp->data.ptrvalue;
6681 
6682         while (slp)
6683         {
6684                 if (slp->choice != SEQLOC_INT)
6685                         return FALSE;
6686 
6687                 seq_int = slp->data.ptrvalue;
6688                 from = seq_int->from;
6689                 to = seq_int->to;
6690 
6691                 if (frame < 0)
6692                 {
6693                         seq_int->to = full_length - CODON_LENGTH*from + frame;
6694                         seq_int->from = full_length - CODON_LENGTH*to + frame + 1;
6695                         seq_int->strand = Seq_strand_minus;
6696                 }
6697                 else
6698                 {
6699                         seq_int->from = CODON_LENGTH*from + frame - 1;
6700                         seq_int->to = CODON_LENGTH*to + frame - 1;
6701                         seq_int->strand = Seq_strand_plus;
6702                 }
6703                 slp = slp->next;
6704         }
6705         
6706         return TRUE;
6707 }
6708 
6709 /*
6710   COnverts a DNA SeqLocPtr from the nucl. coordinates to 
6711   the protein (translated) coordinates.
6712   Only works on a SeqLocPtr of type SEQLOC_INT or SEQLOC_PACKED_INT right now.
6713 */
6714 
6715 Boolean
6716 BlastConvertDNASeqLoc(SeqLocPtr slp, Int2 frame, Int4 full_length) 
6717 {
6718     SeqIntPtr seq_int;
6719     Int4 from, to;
6720     
6721     if (slp == NULL)
6722         return TRUE;
6723     
6724     if (slp->choice == SEQLOC_PACKED_INT)
6725         slp = slp->data.ptrvalue;
6726     
6727     while (slp) {
6728         if (slp->choice != SEQLOC_INT)
6729             return FALSE;
6730         
6731         seq_int = slp->data.ptrvalue;
6732         from = seq_int->from;
6733         to = seq_int->to;
6734         
6735         if (frame < 0) {
6736             seq_int->from = (full_length + frame - to)/CODON_LENGTH;
6737             seq_int->to = (full_length + frame - from)/CODON_LENGTH;
6738             seq_int->strand = Seq_strand_minus;
6739         } else {
6740             seq_int->from = (from - frame + 1)/CODON_LENGTH;
6741             seq_int->to = (to-frame + 1)/CODON_LENGTH;
6742             seq_int->strand = Seq_strand_plus;
6743         }
6744         slp = slp->next;
6745     }
6746     
6747     return TRUE;
6748 }
6749 
6750 SeqLocPtr
6751 BioseqSegEx(BioseqPtr bsp_unfilter, CharPtr options)
6752 
6753 {
6754         BioseqPtr bsp_filter;
6755         Boolean mask_state;
6756         Char cmd_buf[2*PATH_MAX], temp_file[PATH_MAX];
6757         CharPtr filter_dir;
6758         Int4 index, mask_begin=0;
6759         SeqEntryPtr sep;
6760         SeqLocPtr slp_mask;
6761         SeqPortPtr spp_filter, spp_unfilter;
6762         Uint1 res_filter, res_unfilter;
6763         FILE *fp;
6764 
6765 
6766         if (bsp_unfilter == NULL)
6767                 return NULL;
6768 
6769 #ifdef OS_UNIX
6770 
6771         TmpNam(temp_file);
6772         fp = FileOpen(temp_file, "w");
6773         if (BioseqToFasta(bsp_unfilter, fp, FALSE) == FALSE)
6774         {
6775                 BioseqUnlock(bsp_unfilter);
6776                 FileClose(fp);
6777                 return NULL;
6778         }
6779         FileClose(fp);
6780 
6781         filter_dir = getenv("BLASTFILTER");
6782         if (filter_dir == NULL)
6783                 filter_dir = BLASTFILTER_DIR;
6784 
6785         if (options != NULL)
6786                 sprintf(cmd_buf, "%s%s%s%s %s%s", filter_dir, DIRDELIMSTR, "seg ", temp_file, options, " -x");
6787         else
6788                 sprintf(cmd_buf, "%s%s%s%s%s", filter_dir, DIRDELIMSTR, "seg ", temp_file, " -x");
6789 
6790         fp = popen(cmd_buf, "r");
6791         if (fp == NULL)
6792         {
6793                 ErrPostEx(SEV_WARNING, 0, 0, "Call to seg failed.");
6794                 return NULL;
6795         }
6796         
6797         sep = FastaToSeqEntry(fp, FALSE);
6798         FileClose(fp);
6799         if (sep == NULL)
6800         {
6801                 ErrPostEx(SEV_WARNING, 0, 0, "Call to seg failed.");
6802                 return NULL;
6803         }
6804         bsp_filter = sep->data.ptrvalue;
6805 
6806         spp_filter = SeqPortNew(bsp_filter, 0, -1, Seq_strand_plus, Seq_code_ncbistdaa);
6807         spp_unfilter = SeqPortNew(bsp_unfilter, 0, -1, Seq_strand_plus, Seq_code_ncbistdaa);
6808 
6809         mask_state = FALSE;
6810         index = 0;
6811         slp_mask = NULL;
6812         while ((res_filter=SeqPortGetResidue(spp_filter)) != SEQPORT_EOF)
6813         {
6814                 res_unfilter=SeqPortGetResidue(spp_unfilter);
6815                 if (res_filter != res_unfilter)
6816                 {
6817                         if (mask_state == FALSE)
6818                         {
6819                                 mask_begin = index;
6820                                 mask_state = TRUE;
6821                         }
6822                 }
6823                 else if (mask_state == TRUE)
6824                 {
6825                         ValNodeLink(&slp_mask, SeqLocIntNew(mask_begin, index-1, Seq_strand_plus, bsp_filter->id));
6826                         mask_state = FALSE;
6827                 }
6828                 index++;
6829         }
6830 
6831         /* If the last portion of the sequence was masked. */
6832         if (mask_state == TRUE)
6833         {
6834                 ValNodeLink(&slp_mask, SeqLocIntNew(mask_begin, index-1, Seq_strand_plus, bsp_filter->id));
6835         }
6836 
6837         sep = SeqEntryFree(sep);
6838         SeqPortFree(spp_filter);
6839         SeqPortFree(spp_unfilter);
6840 
6841         pclose(fp);
6842         FileRemove(temp_file);
6843 
6844         return slp_mask;
6845 #else
6846         return NULL;
6847 #endif
6848 }
6849 
6850 /*
6851         Runs seg and obtains a SeqLocPtr from it.
6852 */
6853 static SeqLocPtr 
6854 SeqLocSegEx(SeqLocPtr slp, CharPtr instructions)
6855 
6856 {
6857         BioseqPtr bsp_unfilter;
6858         SeqLocPtr slp_mask;
6859         SeqIdPtr sip;
6860 
6861 
6862         if (slp == NULL)
6863                 return NULL;
6864 
6865         sip = SeqIdFindBest(SeqLocId(slp), SEQID_GI);
6866         bsp_unfilter = BioseqLockById(sip);
6867         slp_mask = BioseqSegEx(bsp_unfilter, instructions);
6868 
6869         BioseqUnlock(bsp_unfilter);
6870 
6871         return slp_mask;
6872 }
6873 
6874 SeqLocPtr 
6875 SeqLocSeg(SeqLocPtr slp)
6876 
6877 {
6878         return SeqLocSegEx(slp, NULL);
6879 }
6880 
6881 SeqLocPtr
6882 MyBioseqSeg(BioseqPtr bsp_unfilter)
6883 
6884 {
6885         return BioseqSegEx(bsp_unfilter, NULL);
6886 }
6887 
6888 #define BLASTSEQLOC_BUFFER_SIZE 128
6889 
6890 Boolean
6891 parse_blast_options(BLAST_OptionsBlkPtr options, CharPtr string_options, 
6892                     CharPtr PNTR error_message, CharPtr PNTR database, 
6893                     Int4Ptr descriptions, Int4Ptr alignments)
6894 {
6895         CharPtr opt_str = "GErqeWdyXZPAIvbYzcFsSpfwtgn", *values;
6896         Int4 index;
6897 
6898         if (options == NULL)
6899                 return FALSE;
6900 
6901         if(!BlastParseInputString(string_options, opt_str, &values, error_message)) 
6902         {
6903             return FALSE;
6904         }
6905 
6906         /* -G  gap open cost */
6907 
6908         index = BlastGetLetterIndex(opt_str, 'G');
6909         if(values[index] != NULL) {
6910             options->gap_open = atoi(values[index]);
6911         }
6912 
6913         /* -E gap extend cost */
6914 
6915         index = BlastGetLetterIndex(opt_str, 'E');
6916         if(values[index] != NULL) {
6917             options->gap_extend = atoi(values[index]);
6918         }
6919 
6920         /* -q penalty for nucleotide mismatch. */
6921 
6922         index = BlastGetLetterIndex(opt_str, 'q');
6923         if(values[index] != NULL) {
6924             options->penalty = atoi(values[index]);
6925         }
6926 
6927         /* -r reward for nucleotide match. */
6928 
6929         index = BlastGetLetterIndex(opt_str, 'r');
6930         if(values[index] != NULL) {
6931             options->reward = atoi(values[index]);
6932         }
6933 
6934         /* -e expect value. */
6935 
6936         index = BlastGetLetterIndex(opt_str, 'e');
6937         if(values[index] != NULL) {
6938             options->expect_value = atof(values[index]);
6939         }
6940 
6941         /* -W wordsize. */
6942 
6943         index = BlastGetLetterIndex(opt_str, 'W');
6944         if(values[index] != NULL) {
6945             options->wordsize = atoi(values[index]);
6946         }
6947 
6948         /* -d database. */
6949         if (database) {
6950            index = BlastGetLetterIndex(opt_str, 'd');
6951            if(values[index] != NULL) {
6952               *database = values[index];
6953               values[index] = NULL;
6954            }
6955         }
6956 
6957         /* -y  Dropoff (X) for blast extensions in bits (default if zero) */
6958 
6959         index = BlastGetLetterIndex(opt_str, 'y');
6960         if(values[index] != NULL) {
6961             options->dropoff_2nd_pass = atof(values[index]);
6962         }
6963 
6964         /* -X  X dropoff value for gapped alignment (in bits) */
6965 
6966         index = BlastGetLetterIndex(opt_str, 'X');
6967         if(values[index] != NULL) {
6968             options->gap_x_dropoff = atof(values[index]);
6969         }
6970 
6971         /* -Z  final X dropoff value for gapped alignment (in bits) */
6972 
6973         index = BlastGetLetterIndex(opt_str, 'Z');
6974         if(values[index] != NULL) {
6975             options->gap_x_dropoff_final = atof(values[index]);
6976         }
6977 
6978         /* -P multiple hits/two-pass. */
6979 
6980         index = BlastGetLetterIndex(opt_str, 'P');
6981         if(values[index] != NULL) {
6982            if (atoi(values[index]) == 0) 
6983            {
6984                  options->two_pass_method  = FALSE;
6985                  options->multiple_hits_only  = TRUE;
6986            }
6987            else if (atoi(values[index]) == 1)
6988            {
6989                  options->two_pass_method  = FALSE;
6990                  options->multiple_hits_only  = FALSE;
6991            }
6992            else
6993            {
6994                  options->two_pass_method  = TRUE;
6995                  options->multiple_hits_only  = FALSE;
6996            }
6997         }
6998 
6999         /* -A window size. */
7000 
7001         index = BlastGetLetterIndex(opt_str, 'A');
7002         if(values[index] != NULL) {
7003             options->window_size = atoi(values[index]);
7004         }
7005 
7006         /* -I Hitlist size */
7007         index = BlastGetLetterIndex(opt_str, 'I');
7008         if (values[index] != NULL)
7009            options->hitlist_size = atoi(values[index]);
7010 
7011         /* -v Number of descriptions */
7012         if (descriptions) {
7013            *descriptions = -1;
7014            index = BlastGetLetterIndex(opt_str, 'v');
7015            if (values[index] != NULL) {
7016               *descriptions = atoi(values[index]);
7017               options->hitlist_size = 
7018                  MAX(options->hitlist_size, *descriptions);
7019            }
7020         }
7021 
7022         /* -b Number of alignments */
7023         if (alignments) {
7024            *alignments = -1;
7025            index = BlastGetLetterIndex(opt_str, 'b');
7026            if (values[index] != NULL) {
7027               *alignments = atoi(values[index]);
7028               options->hitlist_size = 
7029                  MAX(options->hitlist_size, *alignments);
7030            }
7031         }
7032 
7033         /* -Y Effective search space */
7034         index = BlastGetLetterIndex(opt_str, 'Y');
7035         if (values[index] != NULL)
7036            options->searchsp_eff = atof(values[index]);
7037 
7038         /* -z Effective database length */
7039         index = BlastGetLetterIndex(opt_str, 'z');
7040         if (values[index] != NULL) {
7041            const char *dummy=NULL;
7042            options->db_length =  StringToInt8(values[index], &dummy);
7043         }
7044 
7045         /* -c Constant in pseudocounts for multipass version */
7046         index = BlastGetLetterIndex(opt_str, 'c');
7047         if (values[index] != NULL)
7048            options->pseudoCountConst = atoi(values[index]);
7049         
7050         /* -F Filter string */
7051         index = BlastGetLetterIndex(opt_str, 'F');
7052         if (values[index] != NULL)
7053            options->filter_string = values[index];
7054 
7055         /* -s Score cut off for megablast */
7056         index = BlastGetLetterIndex(opt_str, 's');
7057         if (values[index] != NULL)
7058            options->cutoff_s2 = atoi(values[index]);
7059 
7060         /* -S Strand option */
7061         index = BlastGetLetterIndex(opt_str, 'S');
7062         if (values[index] != NULL)
7063            options->strand_option = (Uint1) atoi(values[index]);
7064         
7065         /* -p Percentage of identity cut-off */
7066         index = BlastGetLetterIndex(opt_str, 'p');
7067         if (values[index] != NULL)
7068            options->perc_identity = (FloatLo) atof(values[index]);
7069 
7070         /* -f  threshold for hits */
7071 
7072         index = BlastGetLetterIndex(opt_str, 'f');
7073         if(values[index] != NULL) {
7074             options->threshold_second = atoi(values[index]);
7075         }
7076 
7077         /* -w  Frame shift penalty (OOF algorithm for blastx) */
7078 
7079         index = BlastGetLetterIndex(opt_str, 'w');
7080         if(values[index] != NULL) {
7081             options->shift_pen = atoi(values[index]);
7082             options->is_ooframe = TRUE;
7083         }
7084 
7085         /* -t  Discontiguous word template length for megablast;
7086                Longest intron length for sum statistics in tblastn */
7087 
7088         index = BlastGetLetterIndex(opt_str, 't');
7089         if(values[index] != NULL) {
7090            if (options->is_megablast_search)
7091               options->mb_template_length = atoi(values[index]);
7092            else 
7093               options->longest_intron = atoi(values[index]);
7094         }
7095 
7096         /* -g  Scan every base of the database for megablast */
7097 
7098         index = BlastGetLetterIndex(opt_str, 'g');
7099         if(values[index] != NULL) {
7100             options->mb_one_base_step = (TO_UPPER(*values[index]) == 'T');
7101         }
7102 
7103         /* -n  Use dynamic programming algorithm in megablast for gapped 
7104                extensions instead of greedy algorithm */
7105 
7106         index = BlastGetLetterIndex(opt_str, 'n');
7107         if(values[index] != NULL) {
7108             options->mb_use_dyn_prog = (TO_UPPER(*values[index]) == 'T');
7109         }
7110 
7111         values = MemFree(values);
7112 
7113         return TRUE;
7114 }
7115 
7116 static Boolean
7117 parse_dust_options(CharPtr ptr, Int4Ptr level, Int4Ptr window, Int4Ptr cutoff, Int4Ptr linker)
7118 
7119 {
7120         Char buffer[BLASTSEQLOC_BUFFER_SIZE];
7121         Int4 arg, index, index1, window_pri=-1, linker_pri=-1, level_pri=-1, cutoff_pri=-1;
7122         long    tmplong;
7123 
7124         arg = 0;
7125         index1 = 0;
7126         for (index=0; index<BLASTSEQLOC_BUFFER_SIZE; index++)
7127         {
7128                 if (*ptr == ' ' || *ptr == NULLB)
7129                 {
7130                         buffer[index1] = NULLB;
7131                         index1 = 0;
7132                         switch(arg) {
7133                                 case 0:
7134                                         sscanf(buffer, "%ld", &tmplong);
7135                                         level_pri = tmplong;
7136                                         break;
7137                                 case 1:
7138                                         sscanf(buffer, "%ld", &tmplong);
7139                                         window_pri = tmplong;
7140                                         break;
7141                                 case 2:
7142                                         sscanf(buffer, "%ld", &tmplong);
7143                                         cutoff_pri = tmplong;
7144                                         break;
7145                                 case 3:
7146                                         sscanf(buffer, "%ld", &tmplong);
7147                                         linker_pri = tmplong;
7148                                         break;
7149                                 default:
7150                                         break;
7151                         }
7152 
7153                         arg++;
7154                         while (*ptr == ' ')
7155                                 ptr++;
7156 
7157                         /* end of the buffer. */
7158                         if (*ptr == NULLB)
7159                                 break;
7160                 }
7161                 else
7162                 {
7163                         buffer[index1] = *ptr; ptr++;
7164                         index1++;
7165                 }
7166         }
7167 
7168         *level = level_pri; 
7169         *window = window_pri; 
7170         *cutoff = cutoff_pri; 
7171         *linker = linker_pri; 
7172 
7173         return TRUE;
7174 }
7175 
7176 
7177 static Boolean
7178 parse_seg_options(CharPtr ptr, Int4Ptr window, FloatHiPtr locut, FloatHiPtr hicut)
7179 
7180 {
7181         Char buffer[BLASTSEQLOC_BUFFER_SIZE];
7182         Int4 arg, index, index1; 
7183         long    tmplong;
7184         FloatHi tmpdouble;
7185 
7186         arg = 0;
7187         index1 = 0;
7188         for (index=0; index<BLASTSEQLOC_BUFFER_SIZE; index++)
7189         {
7190                 if (*ptr == ' ' || *ptr == NULLB)
7191                 {
7192                         buffer[index1] = NULLB;
7193                         index1 = 0;
7194                         switch(arg) {
7195                                 case 0:
7196                                         sscanf(buffer, "%ld", &tmplong);
7197                                         *window = tmplong;
7198                                         break;
7199                                 case 1:
7200                                         sscanf(buffer, "%le", &tmpdouble);
7201                                         *locut = tmpdouble;
7202                                         break;
7203                                 case 2:
7204                                         sscanf(buffer, "%le", &tmpdouble);
7205                                         *hicut = tmpdouble;
7206                                         break;
7207                                 default:
7208                                         break;
7209                         }
7210 
7211                         arg++;
7212                         while (*ptr == ' ')
7213                                 ptr++;
7214 
7215                         /* end of the buffer. */
7216                         if (*ptr == NULLB)
7217                                 break;
7218                 }
7219                 else
7220                 {
7221                         buffer[index1] = *ptr; ptr++;
7222                         index1++;
7223                 }
7224         }
7225 
7226         return TRUE;
7227 }
7228 
7229 static Boolean
7230 parse_cc_options(CharPtr ptr, Int4Ptr window, FloatHiPtr cutoff, Int4Ptr linker)
7231 
7232 {
7233         Char buffer[BLASTSEQLOC_BUFFER_SIZE];
7234         Int4 arg, index, index1;
7235         long    tmplong;
7236         FloatHi tmpdouble;
7237 
7238         arg = 0;
7239         index1 = 0;
7240         for (index=0; index<BLASTSEQLOC_BUFFER_SIZE; index++)
7241         {
7242                 if (*ptr == ' ' || *ptr == NULLB)
7243                 {
7244                         buffer[index1] = NULLB;
7245                         index1 = 0;
7246                         switch(arg) {
7247                                 case 0:
7248                                         sscanf(buffer, "%ld", &tmplong);
7249                                         *window = tmplong;
7250                                         break;
7251                                 case 1:
7252                                         sscanf(buffer, "%le", &tmpdouble);
7253                                         *cutoff = tmpdouble;
7254                                         break;
7255                                 case 2:
7256                                         sscanf(buffer, "%ld", &tmplong);
7257                                         *linker = tmplong;
7258                                         break;
7259                                 default:
7260                                         break;
7261                         }
7262 
7263                         arg++;
7264                         while (*ptr == ' ')
7265                                 ptr++;
7266 
7267                         /* end of the buffer. */
7268                         if (*ptr == NULLB)
7269                                 break;
7270                 }
7271                 else
7272                 {
7273                         buffer[index1] = *ptr; ptr++;
7274                         index1++;
7275                 }
7276         }
7277 
7278         return TRUE;
7279 }
7280 
7281 CharPtr
7282 load_options_to_buffer(CharPtr instructions, CharPtr buffer)
7283 {
7284         Boolean not_started=TRUE;
7285         CharPtr buffer_ptr, ptr;
7286         Int4 index;
7287 
7288         ptr = instructions;
7289         buffer_ptr = buffer;
7290         for (index=0; index<BLASTSEQLOC_BUFFER_SIZE && *ptr != NULLB; index++)
7291         {
7292                 if (*ptr == ';')
7293                 {
7294                         ptr++;
7295                         break;
7296                 }
7297                 /* Remove blanks at the beginning. */
7298                 if (not_started && *ptr == ' ')
7299                 {
7300                         ptr++;
7301                 }
7302                 else
7303                 {
7304                         not_started = FALSE;
7305                         *buffer_ptr = *ptr;
7306                         buffer_ptr++; ptr++;
7307                 }
7308         }
7309 
7310         *buffer_ptr = NULLB;
7311 
7312         if (not_started == FALSE)
7313         {       /* Remove trailing blanks. */
7314                 buffer_ptr--;
7315                 while (*buffer_ptr == ' ' && buffer_ptr > buffer)
7316                 {
7317                         *buffer_ptr = NULLB;
7318                         buffer_ptr--;
7319                 }
7320         }
7321 
7322         return ptr;
7323 }
7324 
7325 #define CC_WINDOW 22
7326 #define CC_CUTOFF 40.0
7327 #define CC_LINKER 32
7328 
7329 /*
7330         This function parses the 'instructions' string and then calls the appopriate
7331         filtering functions.
7332 */
7333 SeqLocPtr
7334 BlastBioseqFilter(BioseqPtr bsp, CharPtr instructions)
7335 
7336 {
7337         return BlastBioseqFilterEx(bsp, instructions, NULL);
7338 }
7339 
7340 SeqLocPtr
7341 BlastBioseqFilterEx(BioseqPtr bsp, CharPtr instructions, BoolPtr mask_at_hash)
7342 
7343 {
7344         SeqLocPtr slp = NULL;
7345         SeqLocPtr slp_mask;
7346         
7347         ValNodeAddPointer(&slp, SEQLOC_WHOLE,
7348                 SeqIdDup(SeqIdFindBest(bsp->id, SEQID_GI)));
7349         slp_mask = BlastSeqLocFilterEx(slp, instructions, mask_at_hash);
7350         slp = SeqLocFree(slp);
7351         return slp_mask;
7352 }
7353 
7354 SeqLocPtr
7355 BlastSeqLocFilter(SeqLocPtr slp, CharPtr instructions)
7356 
7357 {
7358         return BlastSeqLocFilterEx(slp, instructions, NULL);
7359 }
7360 
7361 SeqLocPtr
7362 BlastSeqLocFilterEx(SeqLocPtr slp, CharPtr instructions, BoolPtr mask_at_hash)
7363 
7364 {
7365         BioseqPtr bsp;
7366         BLAST_OptionsBlkPtr repeat_options, vs_options;
7367         Boolean do_all=FALSE, do_seg=FALSE, do_coil_coil=FALSE, do_dust=FALSE, do_repeats=FALSE, do_vecscreen=FALSE;
7368         Boolean myslp_allocated;
7369         CharPtr buffer=NULL;
7370         CharPtr ptr, repeat_database=NULL, vs_database=NULL, error_msg;
7371         Int2 seqloc_num;
7372         Int4 window_cc, linker_cc, window_dust, level_dust, minwin_dust, linker_dust;
7373         SeqLocPtr cc_slp=NULL, dust_slp=NULL, seg_slp=NULL, seqloc_head=NULL, repeat_slp=NULL, vs_slp=NULL;
7374         PccDatPtr pccp;
7375         Nlm_FloatHiPtr scores;
7376         Nlm_FloatHi cutoff_cc;
7377         SegParamsPtr sparamsp=NULL;
7378         SeqAlignPtr seqalign;
7379         SeqIdPtr sip;
7380         SeqLocPtr myslp, seqloc_var, seqloc_tmp;
7381         ValNodePtr vnp=NULL, vnp_var;
7382 
7383         cutoff_cc = CC_CUTOFF;
7384 
7385         if (instructions == NULL || StringICmp(instructions, "F") == 0)
7386                 return NULL;
7387 
7388         /* FALSE is the default right now. */
7389         if (mask_at_hash)
7390                 *mask_at_hash = FALSE;
7391 
7392         /* parameters for dust. */
7393         /* -1 indicates defaults. */
7394         level_dust = -1;
7395         window_dust = -1;
7396         minwin_dust = -1;
7397         linker_dust = -1;
7398         if (StringICmp(instructions, "T") == 0)
7399         { /* do_all actually means seg for proteins and dust for nt. */
7400                 do_all = TRUE;
7401         }
7402         else
7403         {
7404                 buffer = MemNew(StringLen(instructions)*sizeof(Char));
7405                 ptr = instructions;
7406                 /* allow old-style filters when m cannot be followed by the ';' */
7407                 if (*ptr == 'm' && ptr[1] == ' ')
7408                 {
7409                         if (mask_at_hash)
7410                                 *mask_at_hash = TRUE;
7411                         ptr += 2;
7412                 }
7413                 while (*ptr != NULLB)
7414                 {
7415                         if (*ptr == 'S')
7416                         {
7417                                 sparamsp = SegParamsNewAa();
7418                                 sparamsp->overlaps = TRUE;      /* merge overlapping segments. */
7419                                 ptr = load_options_to_buffer(ptr+1, buffer);
7420                                 if (buffer[0] != NULLB)
7421                                 {
7422                                         parse_seg_options(buffer, &sparamsp->window, &sparamsp->locut, &sparamsp->hicut);
7423                                 }
7424                                 do_seg = TRUE;
7425                         }
7426                         else if (*ptr == 'C')
7427                         {
7428                                 ptr = load_options_to_buffer(ptr+1, buffer);
7429                                 window_cc = CC_WINDOW;
7430                                 cutoff_cc = CC_CUTOFF;
7431                                 linker_cc = CC_LINKER;
7432                                 if (buffer[0] != NULLB)
7433                                         parse_cc_options(buffer, &window_cc, &cutoff_cc, &linker_cc);
7434                                 do_coil_coil = TRUE;
7435                         }
7436                         else if (*ptr == 'D')
7437                         {
7438                                 ptr = load_options_to_buffer(ptr+1, buffer);
7439                                 if (buffer[0] != NULLB)
7440                                         parse_dust_options(buffer, &level_dust, &window_dust, &minwin_dust, &linker_dust);
7441                                 do_dust = TRUE;
7442                         }
7443                         else if (*ptr == 'R')
7444                         {
7445                                 repeat_options = BLASTOptionNew("blastn", TRUE);
7446                                 repeat_options->expect_value = 0.1;
7447                                 repeat_options->penalty = -1;
7448                                 repeat_options->wordsize = 11;
7449                                 repeat_options->gap_x_dropoff_final = 90;
7450                                 repeat_options->dropoff_2nd_pass = 40;
7451                                 repeat_options->gap_open = 2;
7452                                 repeat_options->gap_extend = 1;
7453                                 ptr = load_options_to_buffer(ptr+1, buffer);
7454                                 if (buffer[0] != NULLB)
7455                                    parse_blast_options(repeat_options,
7456                                       buffer, &error_msg, &repeat_database,
7457                                       NULL, NULL);
7458                                 if (repeat_database == NULL)
7459                                    repeat_database = StringSave("humlines.lib humsines.lib retrovir.lib");
7460                                 do_repeats = TRUE;
7461                         }
7462                         else if (*ptr == 'V')
7463                         {
7464                                 vs_options = VSBlastOptionNew();
7465                                 ptr = load_options_to_buffer(ptr+1, buffer);
7466                                 if (buffer[0] != NULLB)
7467                                    parse_blast_options(vs_options, buffer,
7468                                       &error_msg, &vs_database, NULL, NULL); 
7469                                 vs_options = BLASTOptionDelete(vs_options);
7470                                 if (vs_database == NULL)
7471                                    vs_database = StringSave("UniVec_Core");
7472                                 do_vecscreen = TRUE;
7473                         }
7474                         else if (*ptr == 'L')
7475                         { /* do low-complexity filtering; dust for blastn, otherwise seg.*/
7476                                 do_all = TRUE;
7477                                 ptr++;
7478                         }
7479                         else if (*ptr == 'm')
7480                         {
7481                                 if (mask_at_hash)
7482                                         *mask_at_hash = TRUE;
7483                                 ptr++;
7484                         }
7485                         else
7486                         {       /* Nothing applied. */
7487                                 ptr++;
7488                         }
7489                 }
7490                 buffer = MemFree(buffer);
7491         }
7492 
7493         seqloc_num = 0;
7494         seqloc_head = NULL;
7495         sip = SeqLocId(slp);
7496         bsp = BioseqLockById(SeqIdFindBest(sip, SEQID_GI));
7497         if (ISA_aa(bsp->mol))
7498         {
7499                 if (do_all || do_seg)
7500                 {
7501                         seg_slp = SeqlocSegAa(slp, sparamsp);
7502                         SegParamsFree(sparamsp);
7503                         sparamsp = NULL;
7504                         seqloc_num++;
7505                 }
7506                 if (do_coil_coil)
7507                 {
7508                         pccp = PccDatNew ();
7509                         pccp->window = window_cc;
7510                         ReadPccData (pccp);
7511                         /*scores = PredictCCBioseq(bsp, 0, bsp->length-1, pccp);*/
7512                         scores = PredictCCSeqLoc(slp, pccp);
7513                         cc_slp = FilterCC(scores, cutoff_cc, SeqLocLen(slp), linker_cc, SeqIdDup(sip), FALSE);
7514                         MemFree(scores);
7515                         PccDatFree (pccp);
7516                         seqloc_num++;
7517                 }
7518         }
7519         else
7520         {
7521                 if (do_all || do_dust)
7522                 {
7523                         dust_slp = SeqLocDustEx(slp, level_dust, window_dust, linker_dust);
7524                         seqloc_num++;
7525                 }
7526                 if (do_repeats)
7527                 {
7528                 /* Either the SeqLocPtr is SEQLOC_WHOLE (both strands) or SEQLOC_INT (probably 
7529 one strand).  In that case we make up a double-stranded one as we wish to look at both strands. */
7530                         myslp_allocated = FALSE;
7531                         if (slp->choice == SEQLOC_INT)
7532                         {
7533                                 myslp = SeqLocIntNew(SeqLocStart(slp), SeqLocStop(slp), Seq_strand_both, SeqLocId(slp));
7534                                 myslp_allocated = TRUE;
7535                         }
7536                         else
7537                         {
7538                                 myslp = slp;
7539                         }
7540 start_timer;
7541                         repeat_slp = BioseqHitRangeEngineByLoc(myslp, "blastn", repeat_database, repeat_options, NULL, NULL, NULL, NULL, NULL, 0);
7542 stop_timer("after repeat filtering");
7543                         repeat_options = BLASTOptionDelete(repeat_options);
7544                         repeat_database = MemFree(repeat_database);
7545                         if (myslp_allocated)
7546                                 SeqLocFree(myslp);
7547                         seqloc_num++;
7548                 }
7549                 if (do_vecscreen)
7550                 {
7551                 /* Either the SeqLocPtr is SEQLOC_WHOLE (both strands) or SEQLOC_INT (probably 
7552 one strand).  In that case we make up a double-stranded one as we wish to look at both strands. */
7553                         myslp_allocated = FALSE;
7554                         if (slp->choice == SEQLOC_INT)
7555                         {
7556                                 myslp = SeqLocIntNew(SeqLocStart(slp), SeqLocStop(slp), Seq_strand_both, SeqLocId(slp));
7557                                 myslp_allocated = TRUE;
7558                         }
7559                         else
7560                         {
7561                                 myslp = slp;
7562                         }
7563                         VSScreenSequenceByLoc(myslp, NULL, vs_database, &seqalign, &vnp, NULL, NULL);
7564                         vnp_var = vnp;
7565                         while (vnp_var)
7566                         {
7567                                 seqloc_tmp = vnp_var->data.ptrvalue;
7568                                 if (vs_slp == NULL)
7569                                 {
7570                                         vs_slp = seqloc_tmp;
7571                                 }
7572                                 else
7573                                 {
7574                                         seqloc_var = vs_slp;
7575                                         while (seqloc_var->next)
7576                                                 seqloc_var = seqloc_var->next;
7577                                         seqloc_var->next = seqloc_tmp;
7578                                 }
7579                                 vnp_var->data.ptrvalue = NULL;
7580                                 vnp_var = vnp_var->next;
7581                         }
7582                         vnp = ValNodeFree(vnp);
7583                         seqalign = SeqAlignSetFree(seqalign);
7584                         vs_database = MemFree(vs_database);
7585                         if (myslp_allocated)
7586                                 SeqLocFree(myslp);
7587                         seqloc_num++;
7588                 }
7589         }
7590 
7591         if (seqloc_num == 0)
7592         { /* nothing. */
7593                 ;
7594         } 
7595         else if (seqloc_num == 1)
7596         {
7597                 if (seg_slp)
7598                         seqloc_head = seg_slp;
7599                 if (cc_slp)
7600                         seqloc_head = cc_slp;
7601                 if (dust_slp)
7602                         seqloc_head = dust_slp;
7603                 if (repeat_slp)
7604                         seqloc_head = repeat_slp;
7605                 if (vs_slp)
7606                         seqloc_head = vs_slp;
7607         }
7608         else
7609         {
7610                 if (seg_slp)
7611                         ValNodeAddPointer(&seqloc_head, SEQLOC_MIX, seg_slp);
7612                 if (cc_slp)
7613                         ValNodeAddPointer(&seqloc_head, SEQLOC_MIX, cc_slp);
7614                 if (dust_slp)
7615                         ValNodeAddPointer(&seqloc_head, SEQLOC_MIX, dust_slp);
7616                 if (repeat_slp)
7617                         ValNodeAddPointer(&seqloc_head, SEQLOC_MIX, repeat_slp);
7618                 if (vs_slp)
7619                         ValNodeAddPointer(&seqloc_head, SEQLOC_MIX, vs_slp);
7620         }
7621 
7622         BioseqUnlock(bsp);
7623         return seqloc_head;
7624 }
7625 
7626 /*
7627         Program to run seg on a sequence.  Note that this program only
7628         really works in UNIX systems.
7629 */
7630 Boolean LIBCALL
7631 FilterWithSeg (Uint1Ptr sequence, Int4 length, Uint1 alphabet)
7632 
7633 {
7634 
7635 #ifdef OS_UNIX
7636 
7637         BioseqPtr bsp;
7638         Char cmd_buf[2*PATH_MAX], temp_file[PATH_MAX];
7639         CharPtr filter_dir;
7640         FILE PNTR fp;
7641         Int4 byte_store_length;
7642         Nlm_ByteStorePtr byte_store;
7643         SeqEntryPtr sep;
7644 
7645         if (sequence == NULL || length == 0)
7646                 return FALSE;
7647 
7648         byte_store = Nlm_BSNew(length);
7649 
7650         byte_store_length = Nlm_BSWrite(byte_store, (VoidPtr) sequence, length);
7651         if (length != byte_store_length)
7652         {
7653                 Nlm_BSDelete(byte_store, length);
7654                 return FALSE;
7655         }
7656 
7657         bsp = BioseqNew();
7658         bsp->seq_data = (SeqDataPtr) byte_store;
7659         bsp->length = length;
7660         bsp->seq_data_type = alphabet;
7661         bsp->mol = Seq_mol_aa;
7662         bsp->repr = Seq_repr_raw;
7663 
7664         TmpNam(temp_file);
7665         fp = FileOpen(temp_file, "w");
7666         if (BioseqToFasta(bsp, fp, FALSE) == FALSE)
7667         {
7668                 bsp = BioseqFree(bsp);
7669                 return FALSE;
7670         }
7671         FileClose(fp);
7672 
7673         bsp = BioseqFree(bsp);
7674 
7675         filter_dir = getenv("BLASTFILTER");
7676         if (filter_dir != NULL)
7677                 sprintf(cmd_buf, "%s%s%s%s%s", filter_dir, DIRDELIMSTR, "seg ", temp_file, " -x");
7678         else
7679                 sprintf(cmd_buf, "%s%s%s%s%s", BLASTFILTER_DIR, DIRDELIMSTR, "seg ", temp_file, " -x");
7680 
7681         fp = popen(cmd_buf, "r");
7682         if (fp == NULL)
7683         {
7684                 ErrPostEx(SEV_WARNING, 0, 0, "Call to seg failed.");
7685                 return FALSE;
7686         }
7687         
7688         sep = FastaToSeqEntry(fp, FALSE);
7689         if (sep == NULL)
7690         {
7691                 ErrPostEx(SEV_WARNING, 0, 0, "Call to seg failed.");
7692                 return FALSE;
7693         }
7694 
7695         pclose(fp);
7696 
7697         bsp = sep->data.ptrvalue;
7698         BioseqRawConvert(bsp, Seq_code_ncbistdaa);
7699 
7700         BSSeek((ByteStorePtr) bsp->seq_data, 0, SEEK_SET);
7701         Nlm_BSRead((ByteStorePtr) bsp->seq_data, (VoidPtr) sequence, length);
7702 
7703         SeqEntryFree(sep);
7704 
7705         FileRemove(temp_file);
7706 
7707         return TRUE;
7708 #else
7709         return FALSE;
7710 #endif
7711 }
7712 
7713 
7714 BLAST_HSPPtr BLAST_HSPFree(BLAST_HSPPtr hsp)
7715 {
7716 if (hsp)
7717    hsp->gap_info = GapXEditBlockDelete(hsp->gap_info);
7718 
7719 return (BLAST_HSPPtr) MemFree(hsp);
7720 }
7721 
7722 /* 
7723         Frees memory used for HSP's on the ResultHitlist.
7724         Should be called as the SeqAlignPtr for a hitlist
7725         is produced to save memory.
7726 */
7727 
7728 void
7729 BLASTResultFreeHsp(BLASTResultHitlistPtr result)
7730 
7731 {
7732         BLASTResultHspPtr hsp;
7733         Int4 index;
7734 
7735         if (result == NULL || result->hsp_array == NULL)
7736                 return;
7737 
7738         for(index=0; index < result->hspcnt; index++) {
7739           hsp = &result->hsp_array[index];
7740           if (hsp)
7741              hsp->gap_info = GapXEditBlockDelete(hsp->gap_info);
7742         }
7743 
7744         if (result->hspcnt != 0)
7745                 result->hsp_array = MemFree(result->hsp_array);
7746 
7747         result->hspcnt = 0;
7748 
7749         return;
7750 }
7751 
7752 /* 
7753         Free's the hitlist without performing a check 
7754         on the integrity of the heap (used for culling).
7755 */
7756 BLASTResultHitlistPtr LIBCALL
7757 BLASTResultHitlistFree(BLASTResultHitlistPtr result)
7758 
7759 {
7760         return BLASTResultHitlistFreeEx(NULL, result);
7761         
7762 }
7763 
7764 
7765 BLASTResultHitlistPtr LIBCALL
7766 BLASTResultHitlistFreeEx(BlastSearchBlkPtr search, BLASTResultHitlistPtr result)
7767 
7768 {
7769         BLASTHeapPtr hp;
7770         Int4 index;
7771         register Int4 subject_id;
7772         
7773         if (result == NULL)
7774                 return NULL;
7775 
7776 
7777         /* 
7778         Check the integrity of the heap used for culling.  Occassionally
7779         HSP's that have been saved (in the heap before the start of
7780         the HSP) are missed. 
7781         Only do this if the BlastSearchBlkPtr was provided.
7782         */
7783         if (search && search->pbp->perform_culling == TRUE && result->num_ref > 0)
7784         {
7785                 subject_id = result->subject_id;
7786 
7787                 /* result->num_ref can change in the loop. */
7788                 for (hp = search->result_struct->heap_ptr; hp && result->num_ref>0; hp = hp->next)
7789                 {
7790                         index=0; /* Note that hp->num_in_heap can change in the loop */
7791                         while (index < hp->num_in_heap)
7792                         {
7793                                 if (hp->heap[index]->point_back->subject_id == subject_id)
7794                                 {               
7795                                         BlastDeleteHeap(hp, index);
7796                                 }
7797                                 else
7798                                         index++;
7799                         }
7800                 }
7801         }
7802 
7803         /* In case it was not freed before. */
7804         BLASTResultFreeHsp(result);
7805 
7806         BLASTSubjectInfoDestruct(result->subject_info); 
7807 
7808         result = MemFree(result);
7809 
7810         return result;
7811 }
7812 
7813 /*
7814         Creates a new BLASTResultHitlist, with the an hsp-array of length hspcnt.  If the
7815         allocation fails, then NULL is returned.
7816 */
7817 
7818 BLASTResultHitlistPtr LIBCALL
7819 BLASTResultHitlistNew(Int4 hspcnt)
7820 
7821 {
7822 
7823         BLASTResultHitlistPtr new;
7824 
7825         new = (BLASTResultHitlistPtr) MemNew(sizeof(BLASTResultHitlist));
7826         if (new == NULL)
7827                 return NULL;
7828 
7829         new->hsp_array = (BLASTResultHspPtr) MemNew(hspcnt*sizeof(BLASTResultHsp));
7830         if (new->hsp_array == NULL)
7831         {
7832                 new = BLASTResultHitlistFree(new);
7833                 return NULL;
7834         }
7835         new->hspcnt = hspcnt;
7836 
7837         return new; 
7838 }
7839 
7840 
7841 static Boolean 
7842 CopyHSPToResultHsp(BLAST_KarlinBlkPtr kbp, BLAST_HSPPtr hsp, BLASTResultHspPtr result_hsp)
7843 {
7844         if (result_hsp == NULL || hsp == NULL)
7845                 return FALSE;
7846 
7847         result_hsp->ordering_method = hsp->ordering_method;
7848         result_hsp->number = hsp->num;
7849         result_hsp->score = hsp->score;
7850         result_hsp->bit_score = ((hsp->score*kbp->Lambda) - kbp->logK)/NCBIMATH_LN2;
7851         result_hsp->e_value = hsp->evalue;
7852         result_hsp->num_ident = hsp->num_ident;
7853         result_hsp->query_offset = hsp->query.offset;
7854         result_hsp->query_length = hsp->query.length;
7855         result_hsp->query_frame = hsp->query.frame;
7856         result_hsp->query_gapped_start = hsp->query.gapped_start;
7857         result_hsp->subject_offset = hsp->subject.offset;
7858         result_hsp->subject_length = hsp->subject.length;
7859         result_hsp->subject_frame = hsp->subject.frame;
7860         result_hsp->subject_gapped_start = hsp->subject.gapped_start;
7861         result_hsp->context = hsp->context;
7862         result_hsp->gap_info = hsp->gap_info;
7863         /* Not set in the other type of HSP? */
7864         result_hsp->hspset_cnt = 0;
7865 
7866         return TRUE;
7867 }
7868 
7869 Boolean LIBCALL
7870 CopyResultHspToHSP(BLASTResultHspPtr result_hsp, BLAST_HSPPtr hsp)
7871 {
7872         if (result_hsp == NULL || hsp == NULL)
7873                 return FALSE;
7874 
7875         hsp->ordering_method = result_hsp->ordering_method;
7876         hsp->num = result_hsp->number;
7877         hsp->score = result_hsp->score;
7878         hsp->evalue = result_hsp->e_value;
7879         hsp->num_ident = result_hsp->num_ident;
7880         hsp->query.offset = result_hsp->query_offset;
7881         hsp->query.length = result_hsp->query_length;
7882         hsp->query.end = result_hsp->query_offset + result_hsp->query_length;
7883         hsp->query.frame = result_hsp->query_frame;
7884         hsp->query.gapped_start = result_hsp->query_gapped_start;
7885         hsp->subject.offset = result_hsp->subject_offset;
7886         hsp->subject.length = result_hsp->subject_length;
7887         hsp->subject.end = result_hsp->subject_offset + result_hsp->subject_length;
7888         hsp->subject.frame = result_hsp->subject_frame;
7889         hsp->subject.gapped_start = result_hsp->subject_gapped_start;
7890         hsp->context = result_hsp->context;
7891 
7892         return TRUE;
7893 }
7894 
7895 /* Same as FillInStdSegInfo, only taking BLAST_HSPPtr argument instead of
7896    BlastResultHspPtr */
7897 StdSegPtr
7898 BLASTHspToStdSeg(BlastSearchBlkPtr search, Int4 subject_length, BLAST_HSPPtr hsp, SeqIdPtr sip, Boolean reverse, SeqIdPtr gi_list)
7899 {
7900    StdSegPtr ssp = NULL;
7901    BLASTResultHspPtr result_hsp = 
7902       (BLASTResultHspPtr) Malloc(sizeof(BLASTResultHsp));
7903 
7904    CopyHSPToResultHsp(search->sbp->kbp[search->first_context], 
7905                       hsp, result_hsp);
7906    ssp = FillInStdSegInfo(search, search->subject_id, subject_length, &ssp, 
7907                              result_hsp, sip, reverse, gi_list);
7908    MemFree(result_hsp);   
7909    return ssp;
7910 }
7911 
7912 /*
7913         Sort the HSP's by score.
7914 */
7915 
7916 int LIBCALLBACK
7917 score_compare_hsps(VoidPtr v1, VoidPtr v2)
7918 
7919 {
7920     BLAST_HSPPtr hsp1, hsp2;    /* the HSPs to be compared */
7921     int result = 0;             /* the result of the comparison */
7922 
7923     hsp1 = *((BLAST_HSPPtr PNTR) v1);
7924     hsp2 = *((BLAST_HSPPtr PNTR) v2);
7925 
7926     /* Null HSPs are "greater" than any non-null ones, so they go to the end
7927        of a sorted list. */
7928     if (!hsp1 && !hsp2)
7929         return 0;
7930     else if (!hsp1)
7931         return 1;
7932     else if (!hsp2)
7933         return -1;
7934 
7935     if (0 == (result = BLAST_CMP(hsp2->score,          hsp1->score)) &&
7936         0 == (result = BLAST_CMP(hsp1->subject.offset, hsp2->subject.offset)) &&
7937         0 == (result = BLAST_CMP(hsp2->subject.end,    hsp1->subject.end)) &&
7938         0 == (result = BLAST_CMP(hsp1->query  .offset, hsp2->query  .offset))) {
7939         /* if all other test can't distinguish the HSPs, then the final
7940            test is the result */
7941         result = BLAST_CMP(hsp2->query.end, hsp1->query.end);
7942     }
7943     return result;
7944 }
7945 
7946 /*
7947         Function to look for the highest scoring window (of size HSP_MAX_WINDOW)
7948         in an HSP and return the middle of this.  Used by the gapped-alignment
7949         functions to start the gapped alignments.
7950 */
7951 
7952 Int4 GetStartForGappedAlignment (BlastSearchBlkPtr search, BLAST_HSPPtr hsp, Uint1Ptr query, Uint1Ptr subject, Int4Ptr PNTR matrix)
7953 {
7954     Int4 index1, max_offset, score, max_score, hsp_end;
7955     Uint1Ptr query_var, subject_var;
7956     Boolean positionBased = (search->positionBased && search->sbp->posMatrix);
7957     
7958     if (hsp->query.length <= HSP_MAX_WINDOW) {
7959         max_offset = hsp->query.offset + hsp->query.length/2;
7960         return max_offset;
7961     }
7962 
7963     hsp_end = hsp->query.offset + HSP_MAX_WINDOW;
7964     query_var = query + hsp->query.offset;
7965     subject_var = subject + hsp->subject.offset;
7966     score=0;
7967     if (!positionBased) {
7968        for (index1=hsp->query.offset; index1<hsp_end; index1++) {
7969           score += matrix[*query_var][*subject_var];
7970           query_var++; subject_var++;
7971        }
7972     } else {
7973        for (index1=hsp->query.offset; index1<hsp_end; index1++) {
7974           score += search->sbp->posMatrix[index1][*subject_var];
7975           query_var++; subject_var++;
7976        }
7977     }
7978     max_score = score;
7979     max_offset = hsp_end - 1;
7980     hsp_end = hsp->query.end - 
7981         MAX(0, hsp->query.length - hsp->subject.length);
7982     for (index1=hsp->query.offset + HSP_MAX_WINDOW; index1<hsp_end; index1++) {
7983         if (!positionBased) {
7984             score -= matrix[*(query_var-HSP_MAX_WINDOW)][*(subject_var-HSP_MAX_WINDOW)];
7985             score += matrix[*query_var][*subject_var];
7986         } else {
7987             score -= search->sbp->posMatrix[index1-HSP_MAX_WINDOW][*(subject_var-HSP_MAX_WINDOW)];
7988             score += search->sbp->posMatrix[index1][*subject_var];
7989         }
7990         if (score > max_score) {
7991             max_score = score;
7992             max_offset = index1;
7993         }
7994         query_var++; subject_var++;
7995     }
7996     if (max_score > 0)
7997        max_offset -= HSP_MAX_WINDOW/2;
7998     else 
7999        max_offset = hsp->query.offset;
8000 
8001     return max_offset;
8002 }
8003 
8004 /*
8005    Check whether the starting point for gapped alignment lies in
8006    region that has positive score.  This routine is called after a
8007    preliminary gapped alignment has been computed, but before the
8008    traceback is computed.  The score of the region containing the
8009    starting point may have changed due to the introduction of
8010    ambiguity characters, further filtering of the sequences or the
8011    application of composition based statistics.
8012 
8013    Usually, we check an ungapped alignment of length 11 about the
8014    starting point: 5 characters to the left and 5 to the right.
8015    However, the actual region checked is occassionally shorter because
8016    we don't check characters before the start, or after the end, of
8017    the preliminarily aligned regions in the query or subject.
8018 */
8019 Boolean
8020 CheckStartForGappedAlignment (BlastSearchBlkPtr search, BLAST_HSPPtr hsp,
8021                               Uint1Ptr query, Uint1Ptr subject,
8022                               Int4Ptr PNTR matrix)
8023 {
8024     Int4 left, right;       /* Number of aligned characters to the
8025                                left and right of the starting point */
8026     Int4 score;             /* Score of the word alignment */
8027     Uint1Ptr subject_var;   /* Current character in the subject sequence */
8028     Uint1Ptr subject_right; /* last character to be considered in the subject
8029                                sequence */
8030     Boolean positionBased =
8031         (search->positionBased && search->sbp->posMatrix);
8032 
8033     /* Compute the number of characters to the left of the start
8034        to include in the word */
8035     left = -HSP_MAX_WINDOW/2;
8036     if (left < hsp->query.offset - hsp->query.gapped_start) {
8037         left = hsp->query.offset - hsp->query.gapped_start;
8038     }
8039     if (left < hsp->subject.offset - hsp->subject.gapped_start) {
8040         left = hsp->subject.offset - hsp->subject.gapped_start;
8041     }
8042 
8043     /* Compute the number of characters to right to include in the word,
8044        including the starting point itself. */
8045     right = HSP_MAX_WINDOW/2 + 1;
8046     if (right > hsp->query.end - hsp->query.gapped_start) {
8047         right = hsp->query.end - hsp->query.gapped_start;
8048     }
8049     if (right > hsp->subject.end - hsp->subject.gapped_start) {
8050         right = hsp->subject.end - hsp->subject.gapped_start;
8051     }
8052 
8053     /* Calculate the score of the word */
8054     score = 0;
8055     subject_var   = subject + hsp->subject.gapped_start + left;
8056     subject_right = subject + hsp->subject.gapped_start + right;
8057     if ( !positionBased ) {
8058         Uint1Ptr query_var;     /* Current character in the query */
8059         query_var = query + hsp->query.gapped_start + left;
8060         for ( ; subject_var < subject_right; subject_var++, query_var++) {
8061            score += matrix[*query_var][*subject_var];
8062         }
8063     } else {
8064         Int4 query_index;       /* Current position in the query */
8065         query_index = hsp->query.gapped_start + left;
8066         for ( ;  subject_var < subject_right;  subject_var++, query_index++) {
8067             score += search->sbp->posMatrix[query_index][*subject_var];
8068         }
8069     }
8070     if (score <= 0) {
8071         return FALSE;
8072     } else {
8073         return TRUE;
8074     }
8075 }
8076 
8077 
8078 /*
8079         Gets the ratio used to change an evalue calculated with the subject
8080         sequence length to one with a db length.
8081 */
8082 
8083 Nlm_FloatHi LIBCALL
8084 GetDbSubjRatio(BlastSearchBlkPtr search, Int4 subject_length)
8085 {
8086         Nlm_FloatHi db_subj_ratio;
8087 
8088         db_subj_ratio =
8089             ((Nlm_FloatHi) search->context_factor * search->dblen) /
8090             ((Nlm_FloatHi) subject_length);
8091         if (StringCmp(search->prog_name, "tblastn") == 0 ||
8092             StringCmp(search->prog_name, "tblastx") == 0 ||
8093             StringCmp(search->prog_name, "psitblastn") == 0)
8094         {
8095                 db_subj_ratio *= 3;
8096         }
8097         
8098         return db_subj_ratio;
8099 }
8100 
8101 /* The following value should be divisible by 3, to make sure that frames stay
8102    the same when translations are restricted to partial sequence. */
8103 #define SUBJECT_ADJUSTMENT 2100
8104 SeqAlignPtr LIBCALL 
8105 BlastGetGapAlgnTbckWithReaddb (BlastSearchBlkPtr search, Int4 hit_number, Boolean ordinal_number)
8106 
8107 {
8108         BLASTResultHitlistPtr   result_hitlist;
8109         BioseqPtr subject_bsp;
8110         Boolean subject_allocated = FALSE;
8111         Int4 index1, subject_length, rev_subject_length;
8112         Int4 subject_start, subject_end;
8113         Int4 hsp_count;
8114         BLASTResultHspPtr hsp_array;
8115         SeqAlignPtr seqalign;
8116         SeqPortPtr spp;
8117         Uint1Ptr subject, rev_subject;
8118 
8119         result_hitlist = search->result_struct->results[hit_number];
8120 
8121         if (StringCmp(search->prog_name, "tblastn") == 0 ||
8122             StringCmp(search->prog_name, "psitblastn") == 0)
8123         {
8124                 subject_bsp = readdb_get_bioseq(search->rdfp, result_hitlist->subject_id);
8125                 spp = SeqPortNew(subject_bsp, 0, -1, Seq_strand_plus, Seq_code_ncbi4na);
8126                 /* make one longer to "protect" ALIGN. */
8127                 subject = MemNew((1+subject_bsp->length)*sizeof(Uint1));
8128                 hsp_array = result_hitlist->hsp_array;
8129                 hsp_count = result_hitlist->hspcnt;
8130                 for (index1=0; index1<hsp_count; index1++)
8131                 {
8132                         if (hsp_array[index1].subject_frame > 0)
8133                         { /* Get subsequence corresponding to this hsp. */
8134                                 Int4 offset;
8135 
8136                                 subject_start = 3*hsp_array[index1].subject_offset;
8137                                 subject_end = subject_start + 3*hsp_array[index1].subject_length;
8138 
8139                                 /* add SUBJECT_ADJUSTMENT bases to either end. */
8140                                 subject_start = MAX(subject_start - SUBJECT_ADJUSTMENT, 0);
8141                                 subject_end = MIN(subject_end + SUBJECT_ADJUSTMENT, subject_bsp->length);
8142 
8143                                 SeqPortSeek(spp, subject_start, SEEK_SET);
8144 
8145                                 for (offset=subject_start; offset<subject_end; offset++)
8146                                         subject[offset] = SeqPortGetResidue(spp);
8147 
8148                                 if (subject_start == 0 && subject_end == subject_bsp->length)
8149                                         break;    /* entire sequence has been fetched. */
8150                         }
8151                 }
8152                 /* Gap character in last space. */
8153                 subject[subject_bsp->length] = NULLB;
8154                 subject_length = subject_bsp->length;
8155                 spp = SeqPortFree(spp);
8156 
8157                 spp = SeqPortNew(subject_bsp, 0, -1, Seq_strand_minus, Seq_code_ncbi4na);
8158                 /* make one longer to "protect" ALIGN. */
8159                 rev_subject = MemNew((1+subject_bsp->length)*sizeof(Uint1));
8160                 hsp_array = result_hitlist->hsp_array;
8161                 hsp_count = result_hitlist->hspcnt;
8162                 for (index1=0; index1<hsp_count; index1++)
8163                 {
8164                         if (hsp_array[index1].subject_frame < 0)
8165                         { /* Get subsequence corresponding to this hsp. */
8166                                 Int4 offset;
8167 
8168                                 subject_start = 3*hsp_array[index1].subject_offset;
8169                                 subject_end = subject_start + 3*hsp_array[index1].subject_length;
8170 
8171                                 /* add SUBJECT_ADJUSTMENT bases to either end. */
8172                                 subject_start = MAX(subject_start - SUBJECT_ADJUSTMENT, 0);
8173                                 subject_end = MIN(subject_end + SUBJECT_ADJUSTMENT, subject_bsp->length);
8174 
8175                                 SeqPortSeek(spp, subject_start, SEEK_SET);
8176 
8177                                 for (offset=subject_start; offset<subject_end; offset++)
8178                                         rev_subject[offset] = SeqPortGetResidue(spp);
8179 
8180                                 if (subject_start == 0 && subject_end == subject_bsp->length)
8181                                         break;    /* entire sequence has been fetched. */
8182                         }
8183                 }
8184                 /* Gap character in last space. */
8185                 rev_subject[subject_bsp->length] = NULLB;
8186                 rev_subject_length = subject_bsp->length;
8187                 spp = SeqPortFree(spp);
8188                 subject_bsp = BioseqFree(subject_bsp);
8189                 subject_allocated = TRUE;
8190         }
8191         else
8192         {
8193                 subject_length = readdb_get_sequence(search->rdfp, result_hitlist->subject_id, (Uint1Ptr PNTR) &subject);
8194                 rev_subject = NULL;
8195                 rev_subject_length = 0;
8196         }
8197 
8198         seqalign = BlastGetGapAlgnTbck (search, hit_number,  FALSE, ordinal_number, subject, subject_length, rev_subject, rev_subject_length);
8199                 
8200         if (subject_allocated)
8201         {
8202                 subject = MemFree(subject);
8203                 rev_subject = MemFree(rev_subject);
8204         }
8205 
8206         return seqalign;
8207 }
8208 
8209 int LIBCALLBACK
8210 query_offset_compare_hsp(VoidPtr v1, VoidPtr v2)
8211 
8212 {
8213         BLAST_HSPPtr h1, h2;
8214         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8215 
8216         hp1 = (BLAST_HSPPtr PNTR) v1;
8217         hp2 = (BLAST_HSPPtr PNTR) v2;
8218         h1 = *hp1;
8219         h2 = *hp2;
8220 
8221     if (h1 == NULL) {
8222         return (h2 == NULL) ? 0 : 1;
8223     } else if (h2 == NULL) {
8224       return -1;
8225     }
8226 
8227         if (h1->query.offset < h2->query.offset)
8228                 return -1;
8229         if (h1->query.offset > h2->query.offset)
8230                 return 1;
8231 
8232         if (h1->subject.offset < h2->subject.offset)
8233                 return -1;
8234         if (h1->subject.offset > h2->subject.offset)
8235                 return 1;
8236 
8237         return 0;
8238 }
8239 
8240 int LIBCALLBACK
8241 query_end_compare_hsp(VoidPtr v1, VoidPtr v2)
8242 
8243 {
8244         BLAST_HSPPtr h1, h2;
8245         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8246 
8247         hp1 = (BLAST_HSPPtr PNTR) v1;
8248         hp2 = (BLAST_HSPPtr PNTR) v2;
8249         h1 = *hp1;
8250         h2 = *hp2;
8251 
8252     if (h1 == NULL) {
8253         return (h2 == NULL) ? 0 : 1;
8254     } else if (h2 == NULL) {
8255       return -1;
8256     }
8257 
8258         if (h1->query.end < h2->query.end)
8259                 return -1;
8260         if (h1->query.end > h2->query.end)
8261                 return 1;
8262 
8263         if (h1->subject.end < h2->subject.end)
8264                 return -1;
8265         if (h1->subject.end > h2->subject.end)
8266                 return 1;
8267 
8268         return 0;
8269 }
8270 /*
8271         Check the gapped alignments for an overlap of two different alignments.
8272         A sufficient overlap is when two alignments have the same start values
8273         of have the same final values.
8274 
8275         The number of valid alignments remaining is returned.
8276 */
8277 
8278 static Int4
8279 CheckGappedAlignmentsForOverlap(BlastSearchBlkPtr search, BLAST_HSPPtr *hsp_array, Int4 hsp_count, Int2 frame)
8280 
8281 {
8282         Int4 index1, index, increment;
8283 
8284         if (search == NULL || hsp_array == NULL || hsp_count == 0)
8285                 return 0;
8286 
8287         HeapSort(hsp_array, hsp_count, sizeof(BLAST_HSPPtr), query_offset_compare_hsp);
8288         index=0;
8289         increment=1;
8290         while (index < hsp_count-increment)
8291         { /* Check if both HSP's start on or end on the same digonal. */
8292                 if (hsp_array[index+increment] == NULL)
8293                 {
8294                         increment++;
8295                         continue;
8296                 }
8297 
8298                 if (frame != 0 && hsp_array[index+increment]->subject.frame != frame)
8299                                 break;
8300 
8301                 if (hsp_array[index] && hsp_array[index]->query.offset == hsp_array[index+increment]->query.offset &&
8302                           hsp_array[index]->subject.offset == hsp_array[index+increment]->subject.offset &&
8303                             SIGN(hsp_array[index]->query.frame) == SIGN(hsp_array[index+increment]->query.frame))
8304                 {
8305                         if (hsp_array[index]->score > hsp_array[index+increment]->score)
8306                         {
8307                                 hsp_array[index+increment] = 
8308                                    BLAST_HSPFree(hsp_array[index+increment]);
8309                                 increment++;
8310                         }
8311                         else
8312                         {
8313                                 hsp_array[index] = 
8314                                    BLAST_HSPFree(hsp_array[index]);
8315                                 index++;
8316                                 increment = 1;
8317                         }
8318                 }
8319                 else
8320                 {
8321                         index++;
8322                         increment = 1;
8323                 }
8324         }
8325 
8326         HeapSort(hsp_array, hsp_count, sizeof(BLAST_HSPPtr), query_end_compare_hsp);
8327         index=0;
8328         increment=1;
8329         while (index < hsp_count-increment)
8330         { /* Check if both HSP's start on or end on the same digonal. */
8331                 if (hsp_array[index+increment] == NULL)
8332                 {
8333                         increment++;
8334                         continue;
8335                 }
8336 
8337                 if (frame != 0 && hsp_array[index+increment]->subject.frame != frame)
8338                                 break;
8339 
8340                 if (hsp_array[index] &&
8341                         hsp_array[index]->query.end == hsp_array[index+increment]->query.end &&
8342                           hsp_array[index]->subject.end == hsp_array[index+increment]->subject.end &&
8343                             SIGN(hsp_array[index]->query.frame) == SIGN(hsp_array[index+increment]->query.frame))
8344                 {
8345                         if (hsp_array[index]->score > hsp_array[index+increment]->score)
8346                         {
8347                                 hsp_array[index+increment] = 
8348                                    BLAST_HSPFree(hsp_array[index+increment]);
8349                                 increment++;
8350                         }
8351                         else
8352                         {
8353                                 hsp_array[index] = 
8354                                    BLAST_HSPFree(hsp_array[index]);
8355                                 index++;
8356                                 increment = 1;
8357                         }
8358                 }
8359                 else
8360                 {
8361                         index++;
8362                         increment = 1;
8363                 }
8364         }
8365 
8366         HeapSort(hsp_array,hsp_count,sizeof(BLAST_HSPPtr), score_compare_hsps);
8367 
8368         index1 = 0;
8369         for (index=0; index<hsp_count; index++)
8370         {
8371                 if (hsp_array[index] != NULL)
8372                         index1++;
8373         }
8374 
8375 
8376         return index1;
8377 
8378 }
8379 
8380 /*
8381         Sort the HSP's by frame.
8382 */
8383 
8384 int LIBCALLBACK
8385 frame_compare_hsp_m3(VoidPtr v1, VoidPtr v2)
8386 
8387 {
8388         BLAST_HSPPtr h1, h2;
8389         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8390 
8391         hp1 = (BLAST_HSPPtr PNTR) v1;
8392         hp2 = (BLAST_HSPPtr PNTR) v2;
8393         h1 = *hp1;
8394         h2 = *hp2;
8395 
8396         if (h1->subject.frame == -3 && h2->subject.frame != -3)
8397                 return -1;
8398         if (h2->subject.frame == -3 && h1->subject.frame != -3)
8399                 return 1;
8400 
8401         return 0;
8402 }
8403 int LIBCALLBACK
8404 frame_compare_hsp_m2(VoidPtr v1, VoidPtr v2)
8405 
8406 {
8407         BLAST_HSPPtr h1, h2;
8408         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8409 
8410         hp1 = (BLAST_HSPPtr PNTR) v1;
8411         hp2 = (BLAST_HSPPtr PNTR) v2;
8412         h1 = *hp1;
8413         h2 = *hp2;
8414 
8415         if (h1->subject.frame == -2 && h2->subject.frame != -2)
8416                 return -1;
8417         if (h2->subject.frame == -2 && h1->subject.frame != -2)
8418                 return 1;
8419 
8420         return 0;
8421 }
8422 
8423 int LIBCALLBACK
8424 frame_compare_hsp_m1(VoidPtr v1, VoidPtr v2)
8425 
8426 {
8427         BLAST_HSPPtr h1, h2;
8428         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8429 
8430         hp1 = (BLAST_HSPPtr PNTR) v1;
8431         hp2 = (BLAST_HSPPtr PNTR) v2;
8432         h1 = *hp1;
8433         h2 = *hp2;
8434 
8435         if (h1->subject.frame == -1 && h2->subject.frame != -1)
8436                 return -1;
8437         if (h2->subject.frame == -1 && h1->subject.frame != -1)
8438                 return 1;
8439 
8440         return 0;
8441 }
8442 int LIBCALLBACK
8443 frame_compare_hsp_p1(VoidPtr v1, VoidPtr v2)
8444 
8445 {
8446         BLAST_HSPPtr h1, h2;
8447         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8448 
8449         hp1 = (BLAST_HSPPtr PNTR) v1;
8450         hp2 = (BLAST_HSPPtr PNTR) v2;
8451         h1 = *hp1;
8452         h2 = *hp2;
8453 
8454         if (h1->subject.frame == 1 && h2->subject.frame != 1)
8455                 return -1;
8456         if (h2->subject.frame == 1 && h1->subject.frame != 1)
8457                 return 1;
8458 
8459         return 0;
8460 }
8461 int LIBCALLBACK
8462 frame_compare_hsp_p2(VoidPtr v1, VoidPtr v2)
8463 
8464 {
8465         BLAST_HSPPtr h1, h2;
8466         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8467 
8468         hp1 = (BLAST_HSPPtr PNTR) v1;
8469         hp2 = (BLAST_HSPPtr PNTR) v2;
8470         h1 = *hp1;
8471         h2 = *hp2;
8472 
8473         if (h1->subject.frame == 2 && h2->subject.frame != 2)
8474                 return -1;
8475         if (h2->subject.frame == 2 && h1->subject.frame != 2)
8476                 return 1;
8477 
8478         return 0;
8479 }
8480 int LIBCALLBACK
8481 frame_compare_hsp_p3(VoidPtr v1, VoidPtr v2)
8482 
8483 {
8484         BLAST_HSPPtr h1, h2;
8485         BLAST_HSPPtr PNTR hp1, PNTR hp2;
8486 
8487         hp1 = (BLAST_HSPPtr PNTR) v1;
8488         hp2 = (BLAST_HSPPtr PNTR) v2;
8489         h1 = *hp1;
8490         h2 = *hp2;
8491 
8492         if (h1->subject.frame == 3 && h2->subject.frame != 3)
8493                 return -1;
8494         if (h2->subject.frame == 3 && h1->subject.frame != 3)
8495                 return 1;
8496 
8497         return 0;
8498 }
8499 /*
8500         Engine to get the gapped scores from an array of HSP's.
8501 */
8502 static BLAST_HSPPtr PNTR
8503 BlastGappedScoreInternal(BlastSearchBlkPtr search, Uint1Ptr subject, Int4 subject_length, GapAlignBlkPtr gap_align, BLAST_HSPPtr *hsp_array, Int4Ptr hspcnt, Int4Ptr hspcnt_max, Int4 hspmax, Int2 frame)
8504 
8505 {
8506         BLAST_HSPPtr hsp, hsp1=NULL;
8507         BLAST_HSPPtr PNTR hsp_array_new;
8508         BLAST_HSP_helperPtr helper;
8509         Boolean hsp_start_is_contained, hsp_end_is_contained;
8510         Int4 hsp_cnt=0, index, index1;
8511         Int4 max_offset = 0, next_offset;
8512         Int4 query_num; /* AM: Added to support query concatenation */
8513 
8514         /* helper contains most frequently used information to speed up access. */
8515         helper = Malloc((*hspcnt)*sizeof(BLAST_HSP_helper));
8516         for (index=0; index<(*hspcnt); index++)
8517         {
8518                 hsp_start_is_contained = FALSE;
8519                 hsp_end_is_contained = FALSE;
8520                 hsp = hsp_array[index];
8521         /* This prefetches this value for the test below. */
8522                 next_offset = hsp->query.offset;
8523 
8524                 if (frame != 0 && hsp->subject.frame != frame)
8525                         break;
8526 
8527                 for (index1=0; index1<index; index1++)
8528                 {
8529                         hsp_start_is_contained = FALSE;
8530                         hsp_end_is_contained = FALSE;
8531 
8532                         hsp1 = hsp_array[index1];
8533                         if (hsp1 == NULL)
8534                                 continue;
8535 
8536                         /* Check with the helper array whether further
8537                                 tests are warranted.  Having only two ints
8538                                 in the helper array speeds up access. */
8539                         if (helper[index1].qoffset <= next_offset &&
8540                                 helper[index1].qend >= next_offset)
8541                         {
8542                            if (CONTAINED_IN_HSP(hsp1->query.offset, hsp1->query.end, hsp->query.offset, hsp1->subject.offset, hsp1->subject.end, hsp->subject.offset) == TRUE)
8543 
8544                            {    /* Check that it's on diff. strands. */
8545                                 if (SIGN(hsp1->query.frame) == SIGN(hsp->query.frame) &&
8546                                         SIGN(hsp1->subject.frame) == SIGN(hsp->subject.frame))
8547                                         hsp_start_is_contained = TRUE;
8548                            }
8549                            if (hsp_start_is_contained && CONTAINED_IN_HSP(hsp1->query.offset, hsp1->query.end, hsp->query.end, hsp1->subject.offset, hsp1->subject.end, hsp->subject.end) == TRUE)
8550 
8551                            {    /* Check that it's on diff. strands. */
8552                                 if (SIGN(hsp1->query.frame) == SIGN(hsp->query.frame) &&
8553                                         SIGN(hsp1->subject.frame) == SIGN(hsp->subject.frame))
8554                                         hsp_end_is_contained = TRUE;
8555                                 if (hsp_start_is_contained && hsp_end_is_contained && hsp->score <= hsp1->score)
8556                                 {
8557                                         break;
8558                                 }
8559                            }
8560                         }
8561                 }
8562                 
8563                 if (hsp_start_is_contained == FALSE ||
8564                     hsp_end_is_contained   == FALSE || 
8565                     (hsp1 == NULL) || (hsp->score > hsp1->score))
8566                 {
8567                         gap_align->include_query = 0;
8568 
8569                         if(!search->pbp->is_ooframe) {
8570                             max_offset = GetStartForGappedAlignment(search, hsp, search->context[hsp->context].query->sequence, subject, search->sbp->matrix);
8571                         }
8572                         
8573 #ifdef BLAST_COLLECT_STATS
8574                         search->real_gap_number_of_hsps++;
8575 #endif
8576                         Nlm_MemSet((VoidPtr) &(hsp_array[index]->hsp_link), 0, sizeof(BLAST_HSP_LINK));
8577                         hsp_array[index]->linked_set = FALSE;
8578                         hsp_array[index]->start_of_chain = FALSE;
8579                         hsp_array[index]->num = 0;
8580                         hsp_array[index]->xsum = 0.0;
8581 
8582                         if(search->pbp->is_ooframe) {
8583                             gap_align->is_ooframe = TRUE;
8584                             gap_align->query = subject;
8585                             if(hsp->query.frame > 0) {
8586                                 gap_align->subject = search->query_dnap[0]->sequence;
8587                                 gap_align->subject_length = search->query_dnap[0]->length;
8588                             } else {
8589                                 gap_align->subject = search->query_dnap[1]->sequence;
8590                                 gap_align->subject_length = search->query_dnap[1]->length;
8591                             }
8592 
8593                             gap_align->query_length = subject_length;
8594 
8595                             gap_align->q_start = hsp->subject.offset;
8596                             gap_align->s_start = hsp->query.offset; 
8597 
8598                             hsp->query.gapped_start = gap_align->s_start;
8599                             hsp->subject.gapped_start = gap_align->q_start;
8600 
8601                         } else {
8602                             gap_align->query = search->context[hsp->context].query->sequence;
8603                             gap_align->query_length = search->context[hsp->context].query->length;
8604                             gap_align->q_start = max_offset;
8605                             gap_align->s_start = 
8606                                (hsp->subject.offset - hsp->query.offset) + max_offset;
8607                             hsp->query.gapped_start = gap_align->q_start;
8608                             hsp->subject.gapped_start = gap_align->s_start;
8609 
8610                                gap_align->subject = subject;
8611                                gap_align->subject_length = subject_length;
8612                         }
8613                                                
8614                         /* For out-of frame gapping - query is protein
8615                            and subject is DNA translated into 3 frames */
8616 
8617                         PerformGappedAlignment(gap_align);
8618 
8619                         if(search->pbp->is_ooframe) {
8620                             hsp->query.offset = gap_align->subject_start;
8621                             hsp->subject.offset = gap_align->query_start;
8622                             /* The end is one further for BLAST than for the gapped align. */
8623                             hsp->query.end = gap_align->subject_stop + 1;
8624                             hsp->subject.end = gap_align->query_stop + 1;
8625                         } else {
8626                             hsp->query.offset = gap_align->query_start;
8627                             hsp->query.end = gap_align->query_stop + 1;
8628                             hsp->subject.offset = gap_align->subject_start;
8629                             hsp->subject.end = gap_align->subject_stop + 1;
8630                             /* The end is one further for BLAST than for the gapped align. */
8631                         }
8632 
8633                         hsp->query.length = hsp->query.end - hsp->query.offset;
8634                         hsp->subject.length = hsp->subject.end - hsp->subject.offset;
8635                         hsp->score = gap_align->score;
8636             if( hsp->score >= search->pbp->cutoff_s1 ) {
8637                 /* AM: Changed to support query concatenation */
8638                 if( !search->mult_queries )
8639                     hsp->evalue =
8640                         BlastKarlinStoE_simple(hsp->score,
8641                                                search->sbp->
8642                                                kbp_gap[search->first_context],
8643                                                search->searchsp_eff);
8644                 else {
8645                     query_num = GetQueryNum( search->mult_queries,
8646                                              hsp->query.offset,
8647                                              hsp->query.end,
8648                                              hsp->query.frame );
8649                     hsp->evalue =
8650                         BlastKarlinStoE_simple(hsp->score,
8651                                                search->sbp->
8652                                                kbp_gap[search->first_context],
8653                                                search->mult_queries->
8654                                                SearchSpEff[query_num]);
8655                 }
8656 
8657                 hsp_cnt++;
8658                 /* Fill in the helper structure. */
8659                 helper[index].qoffset = hsp->query.offset;
8660                 helper[index].qend = hsp->query.end;
8661             } else {
8662                 /* Score of the gapped extension is below the required
8663                    cutoff, delete this hsp */
8664                 hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
8665             }
8666         }
8667                 else
8668                 { /* Contained within another HSP, delete. */
8669                         hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
8670                 }
8671         }
8672         helper = MemFree(helper);
8673 
8674         hsp_cnt = CheckGappedAlignmentsForOverlap(search, hsp_array, *hspcnt, frame);
8675 
8676         if (hsp_cnt < (*hspcnt))
8677         {
8678 /* Save HSP's again, discarding those that have been NULLed out. */
8679                 hsp_array_new = MemNew(hspmax*sizeof(BLAST_HSPPtr));
8680                 index1 = 0;
8681                 for (index=0; index<(*hspcnt_max); index++)
8682                 {
8683                         if (hsp_array[index] != NULL)
8684                         {
8685                                 hsp_array_new[index1] = hsp_array[index];
8686                                 index1++;