NCBI C Toolkit Cross Reference

C/tools/blastool.c


  1 static char const rcsid[] = "$Id: blastool.c,v 6.292 2007/03/15 20:39:17 madden Exp $";
  2 
  3 /* ===========================================================================
  4 *
  5 *                            PUBLIC DOMAIN NOTICE
  6 *               National Center for Biotechnology Information
  7 *
  8 *  This software/database is a "United States Government Work" under the
  9 *  terms of the United States Copyright Act.  It was written as part of
 10 *  the author's official duties as a United States Government employee and
 11 *  thus cannot be copyrighted.  This software/database is freely available
 12 *  to the public for use. The National Library of Medicine and the U.S.
 13 *  Government have not placed any restriction on its use or reproduction.
 14 *
 15 *  Although all reasonable efforts have been taken to ensure the accuracy
 16 *  and reliability of the software and data, the NLM and the U.S.
 17 *  Government do not and cannot warrant the performance or results that
 18 *  may be obtained by using this software or data. The NLM and the U.S.
 19 *  Government disclaim all warranties, express or implied, including
 20 *  warranties of performance, merchantability or fitness for any particular
 21 *  purpose.
 22 *
 23 *  Please cite the author in any work or product based on this material.
 24 *
 25 * ===========================================================================*/
 26 
 27 /*****************************************************************************
 28 
 29 File name: blastool.c
 30 
 31 Author: Tom Madden
 32 
 33 Contents: Utilities for BLAST
 34 
 35 ******************************************************************************/
 36 /*
 37 * $Revision: 6.292 $
 38 * $Log: blastool.c,v $
 39 * Revision 6.292  2007/03/15 20:39:17  madden
 40 * Fix incorrect setting of filter in BLAST_WizardOptionsMaskInit
 41 *
 42 * Revision 6.291  2006/09/21 13:42:37  madden
 43 * BlastProcessGiLists returns a boolean to specify that an attempt was made to process a list of GIs.  If no matches were found this can be reported back to the user
 44 *
 45 * Revision 6.290  2006/04/26 12:42:36  madden
 46 * BlastSetUserErrorString and BlastDeleteUserErrorString moved from blastool.c to blfmtutl.c
 47 *
 48 * Revision 6.289  2006/03/21 22:35:27  camacho
 49 * Add support for setting database length in BLAST_WizardOptions{Blk,Mask}
 50 *
 51 * Revision 6.288  2006/03/02 21:01:54  camacho
 52 * Use lowercase masking in BLAST_Wizard
 53 *
 54 * Revision 6.287  2005/12/29 19:56:06  madden
 55 * Moved functions to print tabular output to blfmtutl
 56 *
 57 * Revision 6.286  2005/11/18 14:19:25  madden
 58 * Check pointer before dereferencing
 59 *
 60 * Revision 6.285  2005/10/06 12:52:23  madden
 61 * Changes to support correct gapped stats for blastn
 62 *
 63 * Revision 6.284  2005/09/29 17:39:58  coulouri
 64 * from mike gertz:
 65 *    Refactored BLASTPostSearchLogic so that RedoAlignmentCore may be
 66 *    called when concatenated queries are being used.
 67 *
 68 * Revision 6.283  2005/08/31 20:32:58  coulouri
 69 * From Mike Gertz:
 70 *     Removed manipulation of options->hitlist_size,
 71 *     options->expect_value and search->pbp->cutoff_e; these should no
 72 *     longer be manipulated by this routine.
 73 *
 74 * Revision 6.282  2005/07/25 19:01:00  coulouri
 75 * correction to previous commit
 76 *
 77 * Revision 6.281  2005/07/13 16:18:49  coulouri
 78 * correct logic error when dealing with oid 0. fixes rt#15077917.
 79 *
 80 * Revision 6.280  2005/02/15 21:10:47  dondosha
 81 * Set X-dropoff for the traceback in MegaBlastPrintAlignInfo to final X-dropoff parameter
 82 *
 83 * Revision 6.279  2004/12/09 16:21:21  camacho
 84 * Do not do the traceback for non-Smith-Waterman blastpgp runs [by Mike Gertz]
 85 *
 86 * Revision 6.278  2004/11/19 13:22:05  madden
 87 * Remove no_check_score completely (from Mike Gertz)
 88 *
 89 * Revision 6.277  2004/11/17 13:03:56  madden
 90 * Always set no_score_check to TRUE
 91 *
 92 * Revision 6.276  2004/11/01 20:43:15  camacho
 93 * + BlastErrorToString
 94 *
 95 * Revision 6.275  2004/08/31 17:07:14  dondosha
 96 * For PRINT_SEQUENCES option in tabular output, always show query on forward strand; reverse complement subject is necessary
 97 *
 98 * Revision 6.274  2004/08/26 19:01:35  dondosha
 99 * Retrieve Bioseq before attempting to calculate number of identities in alignment segments
100 *
101 * Revision 6.273  2004/08/16 19:37:26  dondosha
102 * Enabled uneven gap HSP linking for blastx
103 *
104 * Revision 6.272  2004/07/15 21:00:17  dondosha
105 * Print Accession.Version in megablast tabular output subject ids; print number of sequences in # Query comment if multiple queries
106 *
107 * Revision 6.271  2004/06/30 13:42:27  kans
108 * include <blfmtutl.h> to clear up Mac compiler missing prototype errors
109 *
110 * Revision 6.270  2004/06/30 12:29:39  madden
111 * Moved some functions to blfmtutl.c
112 *
113 * Revision 6.269  2004/06/24 21:16:34  dondosha
114 * Boolean argument in ScoreAndEvalueToBuffers changed to Uint1, so pass 0 instead of FALSE
115 *
116 * Revision 6.268  2004/05/21 13:53:04  dondosha
117 * Use BLAST_HSPFree to free BLAST_HSP structures, hence no need to call GapXEditBlockDelete in multiple places
118 *
119 * Revision 6.267  2004/05/14 15:38:11  dondosha
120 * Use newly public function ScoreAndEvalueToBuffers from txalign.h instead of a static function
121 *
122 * Revision 6.266  2004/05/14 14:41:03  bealer
123 * - Er. I mean .002, as per blastpgp.
124 *
125 * Revision 6.265  2004/05/14 14:39:45  bealer
126 * - Adjust ethresh to .001 for PSI blast.
127 *
128 * Revision 6.264  2004/04/30 15:25:20  dondosha
129 * Added argument in call to BXMLGetHspFromSeqAlign
130 *
131 * Revision 6.263  2004/04/28 20:32:55  bealer
132 * - Fix BLAST_Wizard defaults for PSI-Blast: comp-based-stats=TRUE, E=0.005.
133 *
134 * Revision 6.262  2004/03/22 22:04:43  dondosha
135 * Do not reassign kbp_gap pointers for megablast traceback, as they are already set due to recent changes
136 *
137 * Revision 6.261  2004/03/22 15:24:56  dondosha
138 * Do not set gapped-search-specific options for tblastx
139 *
140 * Revision 6.260  2004/02/26 15:52:30  papadopo
141 * Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
142 *
143 * Revision 6.259  2004/02/24 14:07:42  camacho
144 * BlastAdjustDbNumbers is DEPRECATED
145 *
146 * Revision 6.258  2003/12/30 15:14:40  camacho
147 * Fix to MergeDbGiListsWithOIDLists:
148 * When searching gi lists for matches in rdfp linked list, isolate each
149 * element of the linked list first.
150 *
151 * Revision 6.257  2003/12/23 23:28:27  dondosha
152 * Use number of identities from the seqalign to calculate percent identity in tabular output
153 *
154 * Revision 6.256  2003/12/12 16:01:23  madden
155 * Change to signature of BlastCutoffs, remove BlastCutoffs_simple
156 *
157 * Revision 6.255  2003/11/10 16:59:42  dondosha
158 * Print the seqid in AcknowledgeBlastQuery for any non-local id
159 *
160 * Revision 6.254  2003/10/29 17:46:59  dondosha
161 * Allow 2-stage greedy extension in megablast
162 *
163 * Revision 6.253  2003/09/12 16:02:06  dondosha
164 * If gap open is non-zero for megablast, require gap extension option to be non-zero too
165 *
166 * Revision 6.252  2003/08/21 16:06:37  dondosha
167 * Correction to previous change
168 *
169 * Revision 6.251  2003/08/20 22:13:18  dondosha
170 * Added BlastPrintTabularResults with an extra boolean parameter for OOF alignments
171 *
172 * Revision 6.250  2003/07/15 20:33:23  madden
173 * set is_rps_blast if service is rpsblast
174 *
175 * Revision 6.249  2003/07/15 14:33:43  dondosha
176 * Added a #define for fprintf substitute, needed for gzip compression of Web BLAST results
177 *
178 * Revision 6.248  2003/06/12 16:47:40  madden
179 * Fix so all query info on one line for tab. report
180 *
181 * Revision 6.247  2003/06/11 20:19:16  madden
182 * Fixes to PrintTabularOutputHeader
183 *
184 * Revision 6.246  2003/05/30 17:25:36  coulouri
185 * add rcsid
186 *
187 * Revision 6.245  2003/05/22 21:39:23  dondosha
188 * Correction to previous change
189 *
190 * Revision 6.244  2003/05/22 20:45:27  dondosha
191 * Fix in BlastCreateVirtualOIDList when rdfps with oidlist exist before rdfps with gi lists
192 *
193 * Revision 6.243  2003/05/13 16:02:53  coulouri
194 * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
195 *
196 * Revision 6.242  2003/05/06 15:19:19  dondosha
197 * Removed extra memory freeing statement for megablast -D3 option
198 *
199 * Revision 6.241  2003/05/05 16:48:40  camacho
200 * Removed warning about gi list restrictions
201 *
202 * Revision 6.240  2003/04/25 18:56:14  camacho
203 * Updated MergeDbGiListsWithOIDLists to use gilists as opposed to gifiles
204 *
205 * Revision 6.239  2003/04/24 17:10:15  dondosha
206 * Fixed FastaCheckDna: check only one sequence at a time
207 *
208 * Revision 6.238  2003/04/23 23:34:07  boemker
209 * Bug fixes: BLAST_Wizard ignored mask->use_best_align,
210 * mask->use_real_db_size, mask->window_size.
211 *
212 * Revision 6.237  2003/04/23 23:22:37  boemker
213 * Bug fix: BLAST_Wizard ignored strand_option.
214 *
215 * Revision 6.236  2003/04/23 23:11:16  boemker
216 * Bug fix: BLAST_Wizard ignored pseudoCountConst.
217 *
218 * Revision 6.235  2003/04/23 22:55:27  boemker
219 * Bug fixes: BLAST_Wizard ignored first_db_seq, final_db_seq.
220 *
221 * Revision 6.234  2003/04/23 22:48:25  boemker
222 * Bug fix: BLAST_Wizard ignored is_ooframe.
223 *
224 * Revision 6.233  2003/04/22 21:52:13  dondosha
225 * Added function OOFBlastHSPGetNumIdentical
226 *
227 * Revision 6.232  2003/04/17 20:51:33  camacho
228 * Rolled back previous changes
229 *
230 * Revision 6.230  2003/04/14 19:53:18  camacho
231 * Use ISAMUninitSearch to avoid running into memory problems
232 *
233 * Revision 6.229  2003/04/04 20:02:38  dondosha
234 * ABR bug fix in BlastPrintTabulatedResultsEx
235 *
236 * Revision 6.228  2003/03/26 20:29:50  boemker
237 * By default, set required_end = 0 to match behavior of blastcgicmd.cpp
238 *
239 * Revision 6.227  2003/03/26 15:47:52  boemker
240 * sizeof x -> sizeof(x)
241 *
242 * Revision 6.226  2003/03/26 15:46:18  boemker
243 * In BLAST_OptionsBlkInit, use memset to clear structure.
244 *
245 * Revision 6.225  2003/03/26 13:36:06  boemker
246 * Corrected misspelling (opts => options) in BLAST_WizardOptionsBlkInit.
247 *
248 * Revision 6.224  2003/03/26 02:45:25  boemker
249 * Rewrote BLAST_WizardOptionsBlkInit to reflect changes in
250 * BLAST_WizardOptionsBlk.
251 *
252 * Revision 6.223  2003/03/25 22:25:57  boemker
253 * Replaced cutoff_s2, which isn't used, with cutoff_s, which is.  Added
254 * query_lcase_mask.  Made alignments and descriptions (arguments of function
255 * BLAST_Wizard) optional.  Rewrote to avoid use of snprintf, which isn't
256 * portable.
257 *
258 * Revision 6.222  2003/03/25 19:58:18  boemker
259 * Moved code to initialize search options from blastcgicmd.cpp to here, as
260 * BLAST_Wizard et al.
261 *
262 * Revision 6.221  2003/03/24 19:42:14  madden
263 * Changes to support query concatenation for blastn and tblastn
264 *
265 * Revision 6.220  2003/02/28 18:34:41  dondosha
266 * Added a possibility to print sequences in the -D3 output
267 *
268 * Revision 6.219  2003/02/18 19:50:09  madden
269 * Correctly report number of sequences better than expect value for gapped search
270 *
271 * Revision 6.218  2003/01/31 17:57:27  camacho
272 * Fix to MergeDbGiFilesWithOIDLists
273 *
274 * Revision 6.217  2003/01/15 19:06:30  dondosha
275 * Correction in copying defline in BlastPrintTabulatedResultsEx
276 *
277 * Revision 6.216  2003/01/14 20:28:53  madden
278 * New function BLASTAddBlastDBTitleToSeqAnnotEx
279 *
280 * Revision 6.215  2002/12/04 23:32:49  camacho
281 * Do not set use_this_gi with nucleotide dbs (redundant)
282 *
283 * Revision 6.214  2002/11/26 23:02:34  madden
284 * Make ErrorMessage optional in ParseBlastOptions
285 *
286 * Revision 6.213  2002/11/21 21:28:27  camacho
287 * Mask the entire volume if no gis in rdfp->gilist belong to rdfp
288 *
289 * Revision 6.212  2002/11/08 20:52:01  dondosha
290 * After the traceback, reduce hitlist size back to the original
291 *
292 * Revision 6.211  2002/11/04 22:53:32  dondosha
293 * 1. Memory leak fixed in BlastClusterHitsFromSeqAlign
294 * 2. Removed redundant score sets from the seqaligns produced for translated searches
295 * 3. Added BlastHSPGetNumIdentical function
296 *
297 * Revision 6.210  2002/09/23 16:49:00  dondosha
298 * Include subject gis in megablast tabular output with full seqids option
299 *
300 * Revision 6.209  2002/09/19 18:12:46  dondosha
301 * Do not move around Karlin blocks if there are no results for some query in megablast
302 *
303 * Revision 6.208  2002/09/13 19:12:53  camacho
304 * Use correct query length in FormatBlastParameters
305 *
306 * Revision 6.207  2002/09/11 16:54:29  dondosha
307 * Removed erroneous assignment of cutoff_s for megablast in BLASTOptionNewEx that was restored in revision 6.204
308 *
309 * Revision 6.206  2002/08/22 12:36:36  madden
310 * Removed unused variables
311 *
312 * Revision 6.205  2002/08/19 18:24:20  camacho
313 * Removed unused variables
314 *
315 * Revision 6.204  2002/08/09 19:39:20  camacho
316 * Added constants for some blast search parameters
317 *
318 * Revision 6.203  2002/08/07 02:06:07  camacho
319 * Fixed problem when merging lists of gis
320 *
321 * Revision 6.202  2002/08/06 15:43:20  dondosha
322 * Set window size to 0 for megablast in BLASTOptionNewEx
323 *
324 * Revision 6.201  2002/08/06 15:41:49  camacho
325 * Changed return type of MergeDbGiFilesWithOIDLists to void
326 *
327 * Revision 6.200  2002/08/01 20:45:34  dondosha
328 * Changed prototype of the BLASTPostSearchLogic function to make it
329 * more convenient
330 *
331 * Revision 6.199  2002/07/24 21:11:38  kans
332 * reverted ncbi URL
333 *
334 * Revision 6.198  2002/07/24 15:38:07  dondosha
335 * Corrections in BlastPostSearchLogic for processing multiple-query intermediate results from megablast
336 *
337 * Revision 6.197  2002/07/23 16:50:04  kans
338 * changed www.ncbi.nlm.nih.gov to www.ncbi.nih.gov
339 *
340 * Revision 6.196  2002/07/15 17:34:18  camacho
341 * Fixed little/big endian problem in IntersectDoubleInt4ListWithOIDLists
342 *
343 * Revision 6.195  2002/07/09 16:09:43  camacho
344 * Changed interface to BlastCreateVirtualOIDList
345 *
346 * Revision 6.194  2002/07/02 17:08:01  dondosha
347 * Reverse previous change - not needed
348 *
349 * Revision 6.193  2002/07/02 01:36:40  dondosha
350 * For megablast use larger window in CheckStartForGappedAlignment
351 *
352 * Revision 6.192  2002/06/26 00:56:30  camacho
353 *
354 * 1. Fixed bug when searching a mixture of real and mask databases.
355 * 2. Clean up of code that calculates the number of sequences and database
356 *    length.
357 *
358 * Revision 6.191  2002/06/25 19:39:38  camacho
359 * Made BlastCreateVirtualOIDList public for use by neighboring software
360 *
361 * Revision 6.190  2002/06/23 06:57:45  camacho
362 * Minor fix to previous commit
363 *
364 * Revision 6.189  2002/06/21 21:48:19  camacho
365 * Reorganized BlastProcessGiLists
366 *
367 * Revision 6.188  2002/06/19 22:50:33  dondosha
368 * Added all queries information for tabular output with multiple queries
369 *
370 * Revision 6.187  2002/06/13 20:48:45  dondosha
371 * Do not assign cutoff_s for megablast in BLASTOptionNewEx - it is not needed anyway
372 *
373 * Revision 6.186  2002/06/06 22:03:06  camacho
374 *
375 * 1. Fixed bug in BlastDbGiListToOidList
376 * 2. Removed statement in BlastProcessGiLists that sets num_seqs to 0
377 *
378 * Revision 6.185  2002/05/28 21:41:03  dondosha
379 * Correction for megablast -D3 output with non-affine extension when database has a gi list
380 *
381 * Revision 6.184  2002/05/10 22:38:49  dondosha
382 * Always do preliminary and then traceback gapped extension if dynamic programming extension is used
383 *
384 * Revision 6.183  2002/05/09 15:35:51  dondosha
385 * Added BLASTOptionNewEx function with an extra argument for megablast
386 *
387 * Revision 6.182  2002/04/18 19:05:08  camacho
388 * Modified BlastProcessGiLists to deal with multi-volume databases with empty oidlists
389 *
390 * Revision 6.181  2002/04/18 16:18:21  dondosha
391 * Added BlastPrintTabulatedResultsEx with extra argument to keep track of progress
392 *
393 * Revision 6.180  2002/04/15 20:51:38  jianye
394 * Added getFastaStyleTitle(BioseqPtr bsp)
395 *
396 * Revision 6.179  2002/04/10 17:42:02  madden
397 * Fix for blastn matrix check (last checkin)
398 *
399 * Revision 6.178  2002/04/10 15:50:15  dondosha
400 * Added validity checks for some megablast options
401 *
402 * Revision 6.177  2002/04/09 18:44:44  madden
403 * Set matrix for blastn correctly
404 *
405 * Revision 6.176  2002/03/22 17:05:09  dondosha
406 * For megablast tabular formatting of seqalign, count number of alignments for each query separately
407 *
408 * Revision 6.175  2002/02/27 20:13:56  dondosha
409 * Added checks for NULL pointer in BlastInsertList2Heap
410 *
411 * Revision 6.174  2002/02/14 20:37:38  dondosha
412 * Bug fix in callback for blastn tabular output when only reverse strand is searched
413 *
414 * Revision 6.173  2002/01/07 23:16:00  dondosha
415 * Fixed several memory leaks and allocation/freeing bugs in multithreaded megablast
416 *
417 * Revision 6.172  2001/12/28 20:41:09  dondosha
418 * 1. Mega BLAST related parameters moved to a separate structure
419 * 2. Disallow word sizes < 8 for Mega BLAST
420 *
421 * Revision 6.171  2001/11/26 20:40:15  camacho
422 * Used the correct maximum oid in BlastProcessGiLists
423 *
424 * Revision 6.170  2001/11/14 23:41:32  dondosha
425 * Minor change in MegaBlastPrintAlignInfo
426 *
427 * Revision 6.169  2001/11/13 14:13:02  egorov
428 * The change fixes a bug when database specified by a list of GIs is
429 * smaller than list of GIs returned from Entrez what causes segmentation fault.
430 *
431 * Revision 6.168  2001/11/05 17:30:24  egorov
432 * Initialize 'cutoff' variable. This eliminates UMR problem
433 * and some of blastsrv3.REAL crashes.
434 *
435 * Revision 6.167  2001/10/30 17:27:00  dondosha
436 * Check if title exists before printing it in PrintTabularOutputHeader
437 *
438 * Revision 6.166  2001/10/24 15:02:48  dondosha
439 * Correction in BlastPrintAlignInfo for 2 sequences case
440 *
441 * Revision 6.165  2001/10/15 19:53:20  vakatov
442 * Get rid of the extra LIBCALL at BLASTPostSearchLogic()
443 *
444 * Revision 6.164  2001/10/12 15:26:29  dondosha
445 * 1. Improvements to BlastClusterHitsFromSeqAlign
446 * 2. Added BLASTPostSearchLogic function
447 *
448 * Revision 6.163  2001/10/02 16:03:58  dondosha
449 * Fix for ungapped search tabular output
450 *
451 * Revision 6.162  2001/09/06 20:24:33  dondosha
452 * Removed threshold_first
453 *
454 * Revision 6.161  2001/09/06 17:11:16  dondosha
455 * Truncate query defline in tabular output header if it is too long
456 *
457 * Revision 6.160  2001/09/05 20:31:04  dondosha
458 * Fixed tiny memory leak in PrintTabularOutputHeader function
459 *
460 * Revision 6.159  2001/08/15 21:37:09  dondosha
461 * Added functions for clustering of hits from a seqalign
462 *
463 * Revision 6.158  2001/07/31 16:42:39  dondosha
464 * Added function FastaCheckDna
465 *
466 * Revision 6.157  2001/07/20 18:38:27  dondosha
467 * Minor bug fix in MegaBlastPrintAlignInfo
468 *
469 * Revision 6.156  2001/07/09 14:17:24  madden
470 * Fix PC-lint complaints from R. Williams
471 *
472 * Revision 6.155  2001/07/09 13:12:04  madden
473 * Removed unused variables
474 *
475 * Revision 6.154  2001/07/06 13:59:02  madden
476 * Fixed compiler and lint warnings
477 *
478 * Revision 6.153  2001/06/29 20:45:21  dondosha
479 * If percent identity close but less than 100, print 99.99 in tabular output
480 *
481 * Revision 6.152  2001/06/28 21:22:20  dondosha
482 * Do not print query information in tabular output header if it is not available
483 *
484 * Revision 6.151  2001/06/28 13:42:09  madden
485 * Fixes to prevent overflow on number of hits reporting
486 *
487 * Revision 6.150  2001/06/25 18:04:24  madden
488 * Fix to BlastComputeProbs to ignore score > max or < min
489 *
490 * Revision 6.149  2001/06/18 21:45:16  dondosha
491 * Adjusted PrintTabularOutputHeader for web BLAST
492 *
493 * Revision 6.148  2001/06/15 21:18:39  dondosha
494 * Added function PrintTabularOutputHeader
495 *
496 * Revision 6.147  2001/06/15 16:38:46  dondosha
497 * Correction to previous changes
498 *
499 * Revision 6.146  2001/06/15 15:48:38  dondosha
500 * Fixed uninitialized variable bug
501 *
502 * Revision 6.145  2001/06/14 22:09:15  dondosha
503 * Rearranged code for gi lists and oid masks processing to get rid of duplication
504 *
505 * Revision 6.144  2001/06/14 16:33:35  dondosha
506 * Fixed bug in the new function BlastDbGiListToOidList
507 *
508 * Revision 6.143  2001/06/13 21:45:09  dondosha
509 * Search of multiple databases with gi files implemented
510 *
511 * Revision 6.142  2001/06/07 19:30:03  dondosha
512 * Pass believe query argument to BlastPrintTabulatedResults
513 *
514 * Revision 6.141  2001/06/06 21:22:43  dondosha
515 * Added (query) Bioseq and SeqLoc arguments to function BlastPrintTabulatedResults
516 *
517 * Revision 6.140  2001/06/05 22:16:15  dondosha
518 * Check if KarlinBlk exists in FormatBlastParameters
519 *
520 * Revision 6.139  2001/05/29 22:00:46  dondosha
521 * Tabulated output bug fixes
522 *
523 * Revision 6.138  2001/05/24 16:25:22  dondosha
524 * Correction for query and subject ids printed with post-search tabulated format
525 *
526 * Revision 6.137  2001/05/23 22:38:48  dondosha
527 * Added option -m 9 to print post-search tabulated output
528 *
529 * Revision 6.136  2001/05/15 19:19:26  shavirin
530 * Fixed minor bug in the function convertSeqAlignListToValNodeList().
531 *
532 * Revision 6.135  2001/05/11 22:05:20  dondosha
533 * Added BlastPrintTabulatedResults function for post-search formatting
534 *
535 * Revision 6.134  2001/05/10 21:58:58  dondosha
536 * Always print full ids in non-megablast tabulated output
537 *
538 * Revision 6.133  2001/05/04 22:14:41  dondosha
539 * Do not call BioseqLockById from MegaBlastPrintAlignInfo in case of local ids in the database
540 *
541 * Revision 6.132  2001/05/04 14:14:28  madden
542 * Fixes for multiple patterns in phi-blast
543 *
544 * Revision 6.131  2001/04/26 16:43:10  madden
545 * Correction to convertValNodeListToSeqAlignList from AS
546 *
547 * Revision 6.130  2001/04/25 13:04:03  madden
548 * Fix from AS to convertValNodeListToSeqAlignList for multiple occurrences of a pattern
549 *
550 * Revision 6.129  2001/04/10 17:01:15  dondosha
551 * Fixed purify errors, including problem with multithreaded tabulated output
552 *
553 * Revision 6.128  2001/04/06 18:15:08  madden
554 * Move UNIX-specific stuff (HeyIAmInMemory) to bqueue.[ch]
555 *
556 * Revision 6.127  2001/04/04 20:32:40  dondosha
557 * Correction for tabulated output from bl2seq for translated searches
558 *
559 * Revision 6.126  2001/04/03 22:35:24  kans
560 * include simutil.h to get prototype for Change_Loc_Strand for Mac compiler
561 *
562 * Revision 6.125  2001/04/03 21:59:49  dondosha
563 * Implemented tabulated output for non-megablast bl2seq
564 *
565 * Revision 6.124  2001/03/27 15:46:16  madden
566 * Fix problem in BlastSetUserErrorString
567 *
568 * Revision 6.123  2001/03/22 21:37:41  madden
569 * Raise hsp_num_max to 400
570 *
571 * Revision 6.122  2001/03/19 22:39:24  dondosha
572 * Allow location on the first query sequence for megablast
573 *
574 * Revision 6.121  2001/03/19 18:54:16  madden
575 * Added BlastSeqLocFillDoubleIntEx, changed BlastSeqLocFillDoubleIntRev
576 *
577 * Revision 6.120  2001/03/13 14:00:24  madden
578 * Make ungapped case consistent with stand-alone binary
579 *
580 * Revision 6.119  2001/03/07 14:09:35  madden
581 * Default window size for blastn is 30
582 *
583 * Revision 6.118  2001/02/28 19:55:30  dondosha
584 * Corrected handling of local query IDs for megablast tabulated output
585 *
586 * Revision 6.117  2001/02/21 15:05:31  dondosha
587 * Round scaled matrix values to nearest integer instead of integer cast
588 *
589 * Revision 6.116  2001/02/16 22:16:22  dondosha
590 * BlastScaleMatrix: if new lambda is -1, stop changing factor
591 *
592 * Revision 6.115  2001/02/16 18:45:40  dondosha
593 * Fixed minor purify errors
594 *
595 * Revision 6.114  2001/02/08 20:41:15  dondosha
596 * Implemented tabulated output for all translated programs
597 *
598 * Revision 6.113  2001/02/08 16:33:03  kans
599 * include mblast.h for Mac compiler
600 *
601 * Revision 6.112  2001/02/07 21:16:05  dondosha
602 * Added callbacks to produce tabulated output
603 *
604 * Revision 6.111  2001/01/25 14:34:46  madden
605 * Fix for ungapped blastn
606 *
607 * Revision 6.110  2001/01/23 21:01:01  dondosha
608 * Change double quotes to single quotes in previous change
609 *
610 * Revision 6.109  2001/01/23 20:25:42  dondosha
611 * 1. Renamed BlastParceInputString to BlastParseInputString
612 * 2. Recognize a double quoted string as an option value in
613 *    BlastParseInputString
614 *
615 * Revision 6.108  2001/01/17 17:44:34  madden
616 * Set default blastn window size to zero
617 *
618 * Revision 6.107  2001/01/16 14:03:54  madden
619 * Enable gapped check for blastn immediately after finding hits
620 *
621 * Revision 6.106  2001/01/12 16:57:29  shavirin
622 * Checked for NULL score frequencies to avoid coredump in case of empty
623 * sequence.
624 *
625 * Revision 6.105  2000/12/19 18:39:50  madden
626 * Add function BlastSetUserErrorString and BlastDeleteUserErrorString
627 *
628 * Revision 6.104  2000/12/15 19:27:17  dondosha
629 * Corrected the reference link and display for Mega BLAST
630 *
631 * Revision 6.103  2000/12/04 18:51:24  madden
632 * Fix memory leaks
633 *
634 * Revision 6.102  2000/11/29 16:32:59  dondosha
635 * Fixed minor memory leak in MegaBlastPrintReference
636 *
637 * Revision 6.101  2000/11/21 18:46:18  madden
638 * Remove settings for blosum62_20[ab]
639 *
640 * Revision 6.100  2000/11/09 16:12:36  madden
641 * Set do_sum_stats TRUE for ungapped blastp
642 *
643 * Revision 6.99  2000/11/07 16:30:26  madden
644 * Introduce intermediate score (before linking of HSPs) for blastx and tblastn
645 *
646 * Revision 6.98  2000/11/01 16:25:56  madden
647 * Changes from Futamura for psitblastn
648 *
649 * Revision 6.97  2000/10/26 18:52:56  dondosha
650 * Added function MegaBlastPrintReference
651 *
652 * Revision 6.96  2000/10/13 18:49:22  lewisg
653 * move structures out of unix ifdef
654 *
655 * Revision 6.95  2000/10/12 21:39:39  shavirin
656 * Added support for OOF in the function  BLASTFilterOverlapRegions().
657 *
658 * Revision 6.94  2000/10/12 17:55:06  shavirin
659 * Fixed NT error for invalid structure identifier.
660 *
661 * Revision 6.93  2000/10/11 21:49:58  shavirin
662 * Added set of functions around BLASTFilterOverlapRegions() to sort and
663 * filter output SeqAlign by overlapping reagions.
664 *
665 * Revision 6.92  2000/10/05 19:41:43  shavirin
666 * Rewritten function BlastErrorPrintExtra().
667 *
668 * Revision 6.91  2000/09/15 17:11:53  dondosha
669 * Set gap parameters to zeros for megablast in BLASTOptionSetGapParams
670 *
671 * Revision 6.90  2000/08/29 19:35:49  madden
672 * Add gilist_not_owned to blast_gi_list
673 *
674 * Revision 6.89  2000/08/28 17:09:53  madden
675 * query_lcase_mask no longer freed in BLASTOptionDelete
676 *
677 * Revision 6.88  2000/08/23 18:48:44  madden
678 * Use BlastKarlinBlkGappedCalcEx in place of BlastKarlinBlkGappedCalc
679 *
680 * Revision 6.87  2000/08/18 19:34:38  madden
681 * Do not print lcl| in AcknowledgeBlastQuery
682 *
683 * Revision 6.86  2000/08/04 15:02:25  shavirin
684 * Changed default threshhold values for matrixes BLOSUM62_20,
685 * BLOSUM62_20A, BLOSUM62_20B.
686 *
687 * Revision 6.85  2000/07/31 16:41:00  shavirin
688 * Reduced POSIT_SCALE_FACTOR from 1000 to 200 to avoid overflow
689 * with BLOSUM80; moved declaration os POSIT_SCALE_FACTOR to posit.h
690 *
691 * Revision 6.84  2000/07/21 15:23:24  madden
692 * Set default threshold for BLOSUM62_20
693 *
694 * Revision 6.83  2000/07/12 23:07:30  kans
695 * reverse_seq moved from pseed3.c to blastool.c, placed in blast.h header, called by gapxdrop.c
696 *
697 * Revision 6.82  2000/06/20 15:50:46  shavirin
698 * Added new functions: BLASTAddBlastDBTitleToSeqAnnot and
699 * BLASTGetDatabaseTitleFromSeqAnnot().
700 *
701 * Revision 6.81  2000/04/18 16:29:27  madden
702 * Free gifile name
703 *
704 * Revision 6.80  2000/04/10 19:58:06  dondosha
705 * Added BlastSeqLocFillDoubleIntRev to fill mask locations for concatenated reverse strand in blastn
706 *
707 * Revision 6.79  2000/03/31 19:10:57  dondosha
708 * Changed some names related to MegaBlast
709 *
710 * Revision 6.78  2000/03/30 21:45:36  madden
711 * Make BlastDeleteHeap non-static
712 *
713 * Revision 6.77  2000/03/14 13:44:01  madden
714 * Set default of sort_gi_list to TRUE
715 *
716 * Revision 6.76  2000/03/01 20:45:15  shavirin
717 * Clear some memory in BLASTOptionsDelete() function
718 *
719 * Revision 6.75  2000/02/23 20:45:45  dondosha
720 * Modified heap operations for blastn to handle concatenated contexts
721 *
722 * Revision 6.74  2000/01/21 22:24:09  madden
723 * Use Nlm_Int8tostr in place of Ltostr
724 *
725 * Revision 6.73  2000/01/13 18:10:43  madden
726 * Fix problem with incorrect stat values for blastn and missing hits
727 *
728 * Revision 6.72  2000/01/07 16:01:24  madden
729 * Use readdb_get_totals_ex to get db number to report
730 *
731 * Revision 6.71  1999/12/31 14:23:19  egorov
732 * Add support for using mixture of real and maks database with gi-list files:
733 * 1. Change logic of creating rdfp list.
734 * 2. BlastGetDbChunk gets real databases first, then masks.
735 * 3. Propoper calculation of database sizes using alias files.
736 * 4. Change to CommonIndex to support using of mask databases.
737 * 5. Use correct gis in formated output (BlastGetAllowedGis()).
738 * 6. Other small changes
739 *
740 * Revision 6.70  1999/12/21 20:03:23  egorov
741 * readdb_gi2seq() has new parameter.  Use NULL here.
742 *
743 * Revision 6.69  1999/12/21 16:58:00  madden
744 * Fix command-line parser for options in case there is no space between option and value
745 *
746 * Revision 6.68  1999/12/17 20:47:04  egorov
747 * Fix 'gcc -Wall' warnings
748 *
749 * Revision 6.67  1999/12/16 19:16:49  egorov
750 * Return a value from not-void function
751 *
752 * Revision 6.66  1999/12/14 15:35:13  madden
753 * Added BlastPrintFilterWarning
754 *
755 * Revision 6.65  1999/11/30 19:00:51  madden
756 * Added Nlm_SwapUint4 calls for the ordinal ID list
757 *
758 * Revision 6.64  1999/11/26 22:26:49  madden
759 * Change gap_x_dropoff value for blastn
760 *
761 * Revision 6.63  1999/11/09 14:16:53  madden
762 * made sum_stats default again, rolling back rev 6.61
763 *
764 * Revision 6.62  1999/11/02 15:22:18  madden
765 * Add BlastParseInputString and BlastGetLetterIndex
766 *
767 * Revision 6.61  1999/10/27 21:01:32  madden
768 * made do_sum_stats not default
769 *
770 * Revision 6.60  1999/09/29 17:14:50  shavirin
771 * Fixed memory leak in BLASTOptionsDelete()
772 *
773 * Revision 6.59  1999/09/22 20:59:20  egorov
774 * Add blast db mask stuff
775 *
776 * Revision 6.58  1999/09/16 16:54:43  madden
777 * Allow longer wordsizes
778 *
779 * Revision 6.57  1999/09/14 19:56:39  shavirin
780 * Fixed bug in PHI-Blast when number of hits to DB == 0
781 *
782 * Revision 6.56  1999/09/09 18:00:25  madden
783 * formatting problem for effective db length
784 *
785 * Revision 6.55  1999/08/27 20:22:51  shavirin
786 * Added default value for decline_align in the function BLASTOptionNew().
787 *
788 * Revision 6.53  1999/08/17 14:10:05  madden
789 * Validation for smith_waterman and tweak_parameters options
790 *
791 * Revision 6.52  1999/05/25 13:37:15  madden
792 * Call readdb_get_sequence_length only for seqs in db
793 *
794 * Revision 6.51  1999/04/27 17:22:27  madden
795 * Set hsp_num_max for ungapped BLAST
796 *
797 * Revision 6.50  1999/04/14 14:53:48  madden
798 * Correction for databases over 2 Gig
799 *
800 * Revision 6.49  1999/04/01 21:42:47  madden
801 * Fix memory leaks when gi list is used
802 *
803 * Revision 6.48  1999/03/22 15:19:03  beloslyu
804 * corrections to compile on Red Hat Linux v5.2
805 *
806 * Revision 6.47  1999/03/18 16:43:57  shavirin
807 * Added function Boolean HeyIAmInMemory(Int4 program)
808 *
809 * Revision 6.46  1999/03/17 13:21:07  madden
810 * Fix comment in comment problem
811 *
812 * Revision 6.45  1999/03/12 15:03:45  egorov
813 * Add proper Int4-long type casting
814 *
815 * Revision 6.44  1999/02/19 14:18:25  madden
816 * Added back check for negative nucl. penalty
817 *
818 * Revision 6.43  1999/02/19 14:16:20  madden
819 * list manipulation bug for seed
820 *
821 * Revision 6.41  1999/01/26 18:26:54  madden
822 * make updateLambdaK public
823 *
824 * Revision 6.40  1999/01/08 22:08:42  madden
825 * BlastScaleMatrix returns factor as FloatHi
826 *
827  * Revision 6.39  1998/12/31 18:17:06  madden
828  * Added strand option
829  *
830  * Revision 6.38  1998/12/29 17:45:07  madden
831  * Add do_sum_stats flag
832  *
833  * Revision 6.37  1998/12/03 15:19:32  madden
834  * Changes to speed up BlastFreeHeap and InsertToHeap
835  *
836  * Revision 6.36  1998/11/04 01:36:06  egorov
837  * Add support for entrez-query and org-name to blast3
838  *
839  * Revision 6.35  1998/10/13 22:06:24  madden
840  * Fixed AdjustDbNumbers
841  *
842  * Revision 6.34  1998/09/28 12:29:24  madden
843  * Check for problem in rescaling code
844  *
845  * Revision 6.33  1998/09/22 18:46:43  egorov
846  * Add BlastErrorPrintExtra()
847  *
848  * Revision 6.32  1998/09/17 19:53:02  madden
849  * Added fillCandLambda
850  *
851  * Revision 6.31  1998/09/16 18:59:35  madden
852  * Print subset information if entire database not searched
853  *
854  * Revision 6.30  1998/09/14 15:48:36  madden
855  * Fixed PHI-BLAST reference
856  *
857  * Revision 6.29  1998/09/14 15:11:15  egorov
858  * Add support for Int8 length databases; remove unused variables
859  *
860  * Revision 6.28  1998/09/10 22:36:09  madden
861  * Added convertSeqAlignListToValNodeList and convertValNodeListToSeqAlignList
862  *
863  * Revision 6.27  1998/09/09 21:18:09  madden
864  * Added PrintKAParametersExtra
865  *
866  * Revision 6.26  1998/09/04 14:45:42  madden
867  * Moved code from blast.c blastool.c
868  *
869  * Revision 6.25  1998/08/28 21:21:29  madden
870  * Changed PhiBlast ref
871  *
872  * Revision 6.24  1998/08/25 14:16:22  madden
873  * Added BlastGetPhiReference and BlastPrintPhiReference
874  *
875  * Revision 6.23  1998/07/28 15:23:47  madden
876  * Changed Number of sequences better printout again
877  *
878  * Revision 6.22  1998/07/27 21:54:02  madden
879  * Fixed printing of non-integral E-values
880  *
881  * Revision 6.21  1998/07/21 20:58:06  madden
882  * Changes to allow masking at hash only
883  *
884  * Revision 6.20  1998/07/17 15:40:01  madden
885  * Changes for Effective search space.
886  *
887  * Revision 6.19  1998/07/02 22:15:31  madden
888  * Fixed bug in BlastAdjustDbNumbers
889  *
890  * Revision 6.18  1998/06/17 18:10:24  madden
891  * Validate for isPatternSearch
892  *
893  * Revision 6.17  1998/06/12 15:52:49  madden
894  * Fixed warnings
895  *
896  * Revision 6.16  1998/06/05 15:50:35  madden
897  * Return 1 from BLASTOptionValidateEx if wordsize incorrect.
898  *
899  * Revision 6.15  1998/06/03 17:40:48  madden
900  * Added blastn check for wordsize in BLASTOptionValidateEx
901  *
902  * Revision 6.14  1998/05/28 19:59:42  madden
903  * Changed hsp_range_max
904  *
905  * Revision 6.13  1998/05/21 19:44:45  egorov
906  * Make word "Reference" be HTML link in case HTML output requested
907  *
908  * Revision 6.12  1998/05/03 17:21:04  madden
909  * Added error message for expect_value <= 0, fix typo setting open and expect values
910  *
911  * Revision 6.11  1998/05/01 18:33:56  egorov
912  * Add new parametes to BLASTOptionSetGapParam()
913  *
914  * Revision 6.10  1998/04/30 14:28:46  madden
915  * Raise thresholds for blastx, tblast[nx]
916  *
917  * Revision 6.9  1998/04/29 14:28:07  madden
918  * Fix reference formatting problem
919  *
920  * Revision 6.8  1998/04/27 16:47:34  madden
921  * Added window and threshold to BLASTOptionSetGapParams
922  *
923  * Revision 6.7  1998/04/24 19:28:29  madden
924  * Added BlastScaleMatrix (and other rescaling code moved from posit.c)
925  *
926  * Revision 6.6  1998/04/13 20:29:57  madden
927  * Add one to length of array for NULLB at end
928  *
929  * Revision 6.5  1998/03/24 15:38:23  madden
930  * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
931  *
932  * Revision 6.4  1998/03/18 14:14:18  madden
933  * Support random access by gi list
934  *
935  * Revision 6.3  1998/02/28 17:24:30  madden
936  * Default window_size zero for blastn
937  *
938  * Revision 6.2  1998/02/27 16:52:05  madden
939  * Added BlastGetSequenceFromBioseq
940  *
941  * Revision 6.1  1998/02/27 14:30:30  madden
942  * Tools (or utilities) for the BLAST programs
943  *
944 */
945 
946 #include <ncbi.h>
947 #include <blastpri.h>
948 #include <objcode.h>
949 #include <objseq.h>
950 #include <sequtil.h>
951 #include <readdb.h>
952 #include <ncbithr.h>
953 #include <txalign.h>
954 #include <posit.h>
955 #include <seed.h>
956 #include <xmlblast.h>
957 #include <mblast.h>
958 #include <simutil.h>
959 #include <vecscrn.h>
960 #include <blfmtutl.h>
961 
962 int (*blastool_fprintf)(FILE*, const char *, ...) = fprintf;
963 #define fprintf blastool_fprintf
964 
965 typedef struct _FilterAlign {
966     SeqAlignPtr  sap;
967     Char         subject_sip[64];
968     HspPtr       hsp;
969 } FilterAlign, PNTR FilterAlignPtr;
970 
971 
972 typedef struct _SappSet {
973     Int4               count;
974     FilterAlignPtr PNTR fapp;
975 } SappSet, PNTR SappSetPtr;
976 
977 
978 #define BUFFER_LENGTH 255
979 
980 /*
981         Formats the BLAST parameters for the BLAST report.
982         One CharPtr is returned, newlines are indicated by tildes ('~').
983 */      
984 
985 CharPtr
986 FormatBlastParameters(BlastSearchBlkPtr search)
987 
988 {
989         BLAST_ParameterBlkPtr   pbp;
990         BLAST_Score cutoff = 0;
991         Char buffer[128];
992         CharPtr ret_buffer;
993         Int2 ret_buffer_length;
994         Int4 num_entries;
995         Int8 total_length;
996         Nlm_FloatHi evalue;
997 
998         pbp = search->pbp;
999 
1000         ret_buffer = NULL;
1001         ret_buffer_length = 0;
1002 
1003         
1004         sprintf(buffer, "Matrix: %s", search->sbp->name);
1005         add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1006         
1007         if (pbp->gapped_calculation)
1008         {
1009                 sprintf(buffer, "Gap Penalties: Existence: %ld, Extension: %ld", (long) search->pbp->gap_open, (long) search->pbp->gap_extend);
1010                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1011         }
1012 
1013         if (pbp->two_pass_method == FALSE)
1014         {
1015                 sprintf(buffer, "Number of Hits to DB: %s", Nlm_Int8tostr((Int8) search->second_pass_hits, 1));
1016                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1017                 
1018                 readdb_get_totals_ex(search->rdfp, &total_length, &num_entries, TRUE);
1019                 
1020                 sprintf(buffer, "Number of Sequences: %ld", (long) num_entries);
1021                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1022                 sprintf(buffer, "Number of extensions: %ld", (long) search->second_pass_extends);
1023                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1024                 sprintf(buffer, "Number of successful extensions: %ld", (long) search->second_pass_good_extends);
1025                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1026         }
1027         else
1028         {
1029                 sprintf(buffer, "Number of Hits to DB: 1st pass: %s, 2nd pass: %s", 
1030                         Nlm_Int8tostr((Int8) search->first_pass_hits, 1), Nlm_Int8tostr((Int8) search->second_pass_hits, 1));
1031                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1032                 readdb_get_totals_ex(search->rdfp, &total_length, &num_entries, TRUE);
1033                 sprintf(buffer, "Number of Sequences: 1st pass: %ld, 2nd pass: %ld", 
1034                         (long) num_entries, (long) search->second_pass_trys);
1035                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1036                 sprintf(buffer, "Number of extensions: 1st pass: %ld, 2nd pass: %ld", 
1037                         (long) search->first_pass_extends, (long) search->second_pass_extends);
1038                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1039                 sprintf(buffer, "Number of successful extensions: 1st pass: %ld, 2nd pass: %ld", 
1040                         (long) search->first_pass_good_extends, (long) search->second_pass_good_extends);
1041                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1042         }
1043 
1044         if (pbp->cutoff_e > 0.1)
1045         {
1046                 sprintf(buffer, "Number of sequences better than %4.1f: %ld", 
1047                         pbp->cutoff_e, (long) search->number_of_seqs_better_E);
1048         }
1049         else
1050         {
1051                 sprintf(buffer, "Number of sequences better than %3.1e: %ld", 
1052                         pbp->cutoff_e, (long) search->number_of_seqs_better_E);
1053         }
1054         add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1055 
1056         if (pbp->gapped_calculation)
1057         {
1058                 sprintf(buffer, "Number of HSP's better than %4.1f without gapping: %ld", 
1059                         pbp->cutoff_e, (long) search->prelim_gap_no_contest);
1060                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1061                 sprintf(buffer, "Number of HSP's successfully gapped in prelim test: %ld", 
1062                         (long) search->prelim_gap_passed);
1063                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1064                 sprintf(buffer, "Number of HSP's that attempted gapping in prelim test: %ld", (long) search->prelim_gap_attempts);
1065                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1066                 sprintf(buffer, "Number of HSP's gapped (non-prelim): %ld", (long) search->real_gap_number_of_hsps);
1067                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1068         }
1069         /* The following makes sense only when there is only one query */
1070         if (search->last_context <= 1) {
1071            Int8 qlen;
1072            if (search->pbp->is_rps_blast) {
1073                qlen = (Int8) search->rps_qlen;
1074            } else {
1075                qlen = (Int8)
1076                    search->context[search->first_context].query->length;
1077            }
1078            sprintf(buffer, "length of query: %s", Nlm_Int8tostr(qlen, 1));
1079            add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1080         }
1081         sprintf(buffer, "length of database: %s", Nlm_Int8tostr ((Int8) search->dblen, 1));
1082         add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1083 
1084         sprintf(buffer, "effective HSP length: %ld", (long) search->length_adjustment);
1085         add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1086 
1087         if (search->last_context <= 1) {
1088            Int8 eff_qlen;
1089            if (search->pbp->is_rps_blast) {
1090                eff_qlen = (Int8) search->rps_qlen - search->length_adjustment;
1091             } else {
1092                eff_qlen = (Int8)
1093                search->context[search->first_context].query->effective_length;
1094            }
1095 
1096            sprintf(buffer, "effective length of query: %s", 
1097                    Nlm_Int8tostr(eff_qlen, 1));
1098            add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1099         }
1100         sprintf(buffer, "effective length of database: %s", Nlm_Int8tostr ((Int8) search->dblen_eff, 1));
1101         add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1102         if (search->last_context <= 1) {
1103            Int8 eff_qlen;
1104            if (search->pbp->is_rps_blast) {
1105                eff_qlen = (Int8) search->rps_qlen - search->length_adjustment;
1106             } else {
1107                eff_qlen = (Int8)
1108                search->context[search->first_context].query->effective_length;
1109            }
1110            sprintf(buffer, "effective search space: %8.0f", ((Nlm_FloatHi)
1111                        search->dblen_eff)*((Nlm_FloatHi) eff_qlen));
1112            add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1113         }
1114         sprintf(buffer, "effective search space used: %8.0f", (Nlm_FloatHi) search->searchsp_eff);
1115         add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1116 
1117         if (StringCmp(search->prog_name, "blastx") == 0
1118             || StringCmp(search->prog_name, "tblastn") == 0
1119             || StringCmp(search->prog_name, "tblastx") == 0
1120             || StringCmp(search->prog_name, "psitblastn") == 0 )
1121         {
1122                 sprintf(buffer, "frameshift window, decay const: %ld, %4.1f",
1123                         (long) pbp->gap_size, pbp->gap_decay_rate);
1124                 add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1125         }
1126 
1127         if (!search->pbp->mb_params) {
1128            sprintf(buffer, "T: %ld", (long) search->pbp->threshold_second);
1129            add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1130            sprintf(buffer, "A: %ld", (long) search->pbp->window_size);
1131            add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1132            sprintf(buffer, "X1: %ld (%4.1f bits)", (long) (-search->pbp->dropoff_2nd_pass), ((-search->pbp->dropoff_2nd_pass)*(search->sbp->kbp[search->first_context]->Lambda/NCBIMATH_LN2)));
1133            add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1134            if (StringCmp(search->prog_name, "blastn") == 0 || search->pbp->gapped_calculation == FALSE) {
1135               sprintf(buffer, "X2: %ld (%4.1f bits)", (long) search->pbp->gap_x_dropoff, ((search->pbp->gap_x_dropoff)*(search->sbp->kbp[search->first_context]->Lambda/NCBIMATH_LN2)));
1136               add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1137            } else {
1138               sprintf(buffer, "X2: %ld (%4.1f bits)", (long) search->pbp->gap_x_dropoff, ((search->pbp->gap_x_dropoff)*(search->sbp->kbp_gap[search->first_context]->Lambda/NCBIMATH_LN2)));
1139               add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1140               sprintf(buffer, "X3: %ld (%4.1f bits)", (long) search->pbp->gap_x_dropoff_final, ((search->pbp->gap_x_dropoff_final)*(search->sbp->kbp_gap[search->first_context]->Lambda/NCBIMATH_LN2)));
1141               add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1142            }
1143            sprintf(buffer, "S1: %ld (%4.1f bits)", (long) search->pbp->gap_trigger, ((((search->pbp->gap_trigger)*(search->sbp->kbp[search->first_context]->Lambda))-(search->sbp->kbp[search->first_context]->logK))/NCBIMATH_LN2));
1144            add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1145            cutoff = 0;
1146         }
1147         if (search->last_context <= 1) {
1148           evalue = pbp->cutoff_e;
1149           if (search->pbp->gapped_calculation == FALSE) {
1150             /* AM: Changed to support query concatenation. */
1151             if( !search->mult_queries ) {
1152               Nlm_FloatHi search_sp =
1153                 search->context[search->first_context]
1154                 .query->effective_length *
1155                 (Nlm_FloatHi) search->dblen_eff;
1156               BlastCutoffs(&cutoff, &evalue,
1157                            search->sbp->kbp[search->first_context],
1158                            search_sp, FALSE, 0.0 );
1159             } else {
1160               Nlm_FloatHi search_sp =
1161                 search->mult_queries->MinLenEff *
1162                 (Nlm_FloatHi) search->dblen_eff;
1163               BlastCutoffs(&cutoff, &evalue,
1164                            search->sbp->kbp[search->first_context],
1165                            search_sp, FALSE, 0.0 );
1166             }
1167             
1168             sprintf(buffer, "S2: %ld (%4.1f bits)", (long) cutoff, (((cutoff)*(search->sbp->kbp[search->first_context]->Lambda))-(search->sbp->kbp[search->first_context]->logK))/NCBIMATH_LN2);
1169           } else {
1170             if( !search->mult_queries ) {
1171               Nlm_FloatHi search_sp =
1172                 search->context[search->first_context]
1173                 .query->effective_length *
1174                 (Nlm_FloatHi) search->dblen_eff;
1175               BlastCutoffs(&cutoff, &evalue,
1176                            search->sbp->kbp_gap[search->first_context],
1177                            search_sp, FALSE, 0.0  );
1178             } else {
1179               Nlm_FloatHi search_sp =
1180                 search->mult_queries->MinLenEff *
1181                 (Nlm_FloatHi)search->dblen_eff;
1182               BlastCutoffs( &cutoff, &evalue, 
1183                             search->sbp->kbp_gap[search->first_context],
1184                             search_sp, FALSE, 0.0 );
1185             }
1186             sprintf(buffer, "S2: %ld (%4.1f bits)", (long) cutoff, (((cutoff)*(search->sbp->kbp_gap[search->first_context]->Lambda))-(search->sbp->kbp_gap[search->first_context]->logK))/NCBIMATH_LN2);
1187           }
1188           add_string_to_buffer(buffer, &ret_buffer, &ret_buffer_length);
1189         }
1190         return ret_buffer;
1191 }
1192 
1193 /*
1194         Deallocates *BlastErrorMsgPtr produced by BlastConstructErrorMessage.
1195 */
1196 
1197 BlastErrorMsgPtr BlastDestroyErrorMessage(BlastErrorMsgPtr error_msg)
1198 
1199 {
1200         if (error_msg == NULL)
1201                 return NULL;
1202 
1203         MemFree(error_msg->msg);
1204         MemFree(error_msg);
1205 
1206         return NULL;
1207 }
1208 
1209 /* 
1210         Prepares error message and appends ValNodePtr, containing BlastErrorMsgPtr, to
1211         end of chain.  The beginning of the ValNodePtr chain is returned.
1212 */
1213 
1214 ValNodePtr BlastConstructErrorMessage(CharPtr function, CharPtr message, Uint1 level, ValNodePtr PNTR vnpp)
1215 
1216 {
1217         CharPtr buffer;
1218         CharPtr ptr;
1219         BlastErrorMsgPtr error_msg;
1220         int length = 10;
1221 
1222         if (vnpp == NULL)
1223                 return NULL;
1224 
1225         if (function != NULL)
1226                 length += StringLen(function);
1227 
1228         if (message != NULL)
1229                 length += StringLen(message);
1230 
1231         ptr = buffer = (Char*) MemNew(length*sizeof(char));
1232         if (function != NULL)
1233         {
1234                 sprintf(buffer, "%s: ", function);
1235                 ptr = buffer + StringLen(buffer);
1236         }
1237         
1238         if (message != NULL)
1239         {
1240                 sprintf(ptr, "%s", message);
1241         }
1242 
1243         error_msg = (BlastErrorMsgPtr) MemNew(sizeof(BlastErrorMsg));
1244         error_msg->msg = buffer;
1245         error_msg->level = level;
1246 
1247         ValNodeAddPointer(vnpp, 0, error_msg);
1248 
1249         return *vnpp;
1250 }
1251 
1252 /*
1253         Destroys a chain of ValNodes and the BlastErrorMsgPtr data.
1254 */
1255 
1256 ValNodePtr BlastErrorChainDestroy(ValNodePtr vnp)
1257 
1258 {
1259         ValNodePtr start = vnp;
1260 
1261         while (vnp)
1262         {
1263                 BlastDestroyErrorMessage(vnp->data.ptrvalue);
1264                 vnp->data.ptrvalue = NULL;
1265                 vnp = vnp->next;
1266         }
1267 
1268         ValNodeFree(start);
1269 
1270         return NULL;
1271 }
1272 
1273 /*
1274         Prints the error messages.
1275 */
1276 
1277 void LIBCALL BlastErrorPrint(ValNodePtr error_return)
1278 {
1279         BlastErrorMsgPtr error_msg;
1280 
1281         if (error_return == NULL)
1282                 return;
1283 
1284         while (error_return)
1285         {
1286                 error_msg = error_return->data.ptrvalue;
1287                 switch (error_msg->level)
1288                 {
1289                         case 0:
1290                                 ErrPostEx(SEV_INFO, 0, 0, "%s", error_msg->msg);
1291                                 break;
1292                         case 1:
1293                                 ErrPostEx(SEV_WARNING, 0, 0, "%s", error_msg->msg);
1294                                 break;
1295                         case 2:
1296                                 ErrPostEx(SEV_ERROR, 0, 0, "%s", error_msg->msg);
1297                                 break;
1298                         case 3:
1299                                 ErrPostEx(SEV_FATAL, 1, 0, "%s", error_msg->msg);
1300                                 break;
1301                         default:
1302                                 ErrPostEx(SEV_WARNING, 0, 0, "Unknown BLAST error level");
1303                                 break;
1304                 }
1305                 error_return = error_return->next;
1306         }
1307         return;
1308         
1309 }
1310 
1311 CharPtr LIBCALL BlastErrorToString(ValNodePtr error_return)
1312 {
1313         BlastErrorMsgPtr error_msg;
1314     ValNodePtr tmp = NULL;
1315     Uint4 length = 0;
1316     CharPtr retval = NULL;
1317 
1318         if ( !error_return ) {
1319                 return NULL;
1320     }
1321 
1322     /* First, determine the length of the error messages */
1323     for (tmp = error_return; tmp; tmp = tmp->next) {
1324         error_msg = (BlastErrorMsgPtr) tmp->data.ptrvalue;
1325         length += StringLen(error_msg->msg);
1326         switch (error_msg->level) {
1327         case 0: length += 4; break;     /* INFO */
1328         case 1: length += 7; break;     /* WARNING */
1329         case 2: length += 5; break;     /* ERROR */
1330         case 3: length += 5; break;     /* FATAL */
1331         default: break;
1332         }
1333         length += 2;        /* for space and ':' */
1334         length += 2;        /* for newline/NULL byte */
1335     }
1336 
1337     retval = (CharPtr) Malloc(sizeof(Char)*length);
1338     if ( !retval ) {
1339         return NULL;
1340     }
1341     *retval = NULLB;
1342 
1343     for (tmp = error_return; tmp; tmp = tmp->next) {
1344         Char buffer[BUFFER_LENGTH] = { NULLB };
1345         error_msg = (BlastErrorMsgPtr) tmp->data.ptrvalue;
1346 
1347                 switch (error_msg->level)
1348                 {
1349         case 0:
1350             sprintf(buffer, "%s: %s\n", "INFO", error_msg->msg);
1351             break;
1352         case 1:
1353             sprintf(buffer, "%s: %s\n", "WARNING", error_msg->msg);
1354             break;
1355         case 2:
1356             sprintf(buffer, "%s: %s\n", "ERROR", error_msg->msg);
1357             break;
1358         case 3:
1359             sprintf(buffer, "%s: %s\n", "FATAL", error_msg->msg);
1360             break;
1361         default:
1362             ErrPostEx(SEV_WARNING, 0, 0, "Unknown BLAST error level");
1363             break;
1364                 }
1365         StringNCat(retval, buffer, length);
1366         }
1367         return retval;
1368 }
1369 
1370 void LIBCALL BlastErrorPrintExtra(ValNodePtr error_return, Boolean errpostex, FILE* fp)
1371 {
1372     BlastErrorMsgPtr error_msg;
1373     ErrSev           err_sev;
1374     CharPtr          msg;
1375     
1376     CharPtr errsevmsg[] = { "UNKNOWN","INFO","WARNING","ERROR",
1377                             "FATAL"};
1378     
1379     if (error_return == NULL)
1380         return;
1381     
1382     while (error_return) {
1383         error_msg = error_return->data.ptrvalue;
1384         msg = error_msg->msg;
1385         
1386         if(error_msg->level > 5)
1387             error_msg->level = 0;
1388         
1389         if(error_msg->level == 4)
1390             err_sev = SEV_FATAL;
1391         else
1392             err_sev = error_msg->level;
1393 
1394         if (errpostex)
1395             ErrPostEx(err_sev, 0, 0, "%s", msg);
1396 
1397         if (fp)
1398             fprintf(fp, "%s: %s\n", errsevmsg[error_msg->level], msg);
1399         
1400         error_return = error_return->next;
1401     }
1402     return;
1403 }
1404 
1405 CharPtr scan_to_break (CharPtr ptr)
1406 {
1407 
1408         while (*ptr != NULLB)
1409         {
1410                 if (*ptr == ';')
1411                 {
1412                         ptr++;
1413                         break;
1414                 }
1415                 ptr++;
1416         }
1417 
1418         return ptr;
1419 }
1420 
1421 Boolean LIBCALL
1422 BlastPrintFilterWarning (CharPtr filter_string, Int4 line_length, FILE *outfp, Boolean html)
1423 
1424 {
1425         CharPtr ptr;
1426 
1427         ptr = filter_string;
1428 
1429         if (filter_string == NULL || outfp == NULL)
1430                 return FALSE;
1431 
1432         while (*ptr != NULLB)
1433         {
1434                 if (*ptr == 'S')
1435                 {
1436                         ptr = scan_to_break(ptr);
1437                 }
1438                 else if (*ptr == 'C')
1439                 {
1440                         ptr = scan_to_break(ptr);
1441                 }
1442                 else if (*ptr == 'D')
1443                 {
1444                         ptr = scan_to_break(ptr);
1445                 }
1446                 else if (*ptr == 'R')
1447                 {
1448                         ptr = scan_to_break(ptr);
1449                         if (html)
1450                                 fprintf(outfp, "<B>NOTE:</B>");
1451                         else
1452                                 fprintf(outfp, "NOTE:");
1453                         fprintf(outfp, " This query has been filtered for human repeats.\n");
1454                         fprintf(outfp, "This filtering is effective for 70-90%% of all repeats.\n\n");
1455                 }
1456                 else if (*ptr == 'L')
1457                 { /* do low-complexity filtering; dust for blastn, otherwise seg.*/
1458                         ptr = scan_to_break(ptr);
1459                 }
1460                 else
1461                 {
1462                         ptr++;
1463                 }
1464         }
1465 
1466         return TRUE;
1467 
1468 }
1469 
1470 #define BLAST_SMALLEST_EVALUE 1.0e-300
1471 /*
1472         Initialize the options structure.
1473 
1474         The fields should be set to default values, that depend on the program.
1475 */
1476 BLAST_OptionsBlkPtr LIBCALL 
1477 BLASTOptionNew(CharPtr progname, Boolean gapped)
1478 
1479 {
1480    return BLASTOptionNewEx(progname, gapped, FALSE);
1481 }
1482 
1483 BLAST_OptionsBlkPtr LIBCALL 
1484 BLASTOptionNewEx(CharPtr progname, Boolean gapped, Boolean is_megablast)
1485 
1486 {
1487         BLAST_OptionsBlkPtr options;
1488 
1489         options = (BLAST_OptionsBlkPtr) MemNew(sizeof(BLAST_OptionsBlk));
1490 
1491         options->perform_culling = FALSE;       /* Results should not be culled at all right now. */
1492 
1493         options->program_name = StringSave(progname);
1494         options->required_start = 0;
1495         options->required_end = -1;     /* -1 indicates the end of the query. */
1496         options->db_length = 0;         /* zero means that real size will be used. */
1497         options->searchsp_eff = 0;      /* zero means that real size will be used. */
1498 
1499         options->hsp_range_max = 100;
1500         options->entrez_query = NULL;
1501         options->gifile = NULL;
1502         options->gilist = NULL;
1503         options->sort_gi_list = TRUE;
1504     options->gilist_already_calculated = FALSE;
1505 
1506         if (gapped)
1507         {
1508                 options->gapped_calculation = TRUE;
1509                 options->do_sum_stats = FALSE;
1510         }
1511         else
1512         {
1513                 options->gapped_calculation = FALSE;
1514                 options->do_sum_stats = TRUE;
1515                 options->hsp_num_max = 400;
1516         }
1517 
1518         options->discontinuous = FALSE; /* discontinuous is default. */
1519         if (StringICmp(progname, "blastn") == 0)
1520         {
1521                 options->gap_decay_rate = 0.5;
1522                 options->gap_prob = 0.5;
1523                 options->gap_size = 40;
1524                 options->threshold_second = WORD_THRESHOLD_BLASTN;
1525                 options->expect_value  = 10;
1526                 options->hitlist_size = 500;
1527                 options->two_pass_method  = FALSE;
1528                 options->multiple_hits_only  = FALSE;
1529                 options->number_of_bits  = 0.0;
1530                 /* 1st pass not done for blastn. */
1531                 options->dropoff_2nd_pass  = UNGAPPED_X_DROPOFF_NUCL;
1532                 options->matrix  = NULL;
1533                 options->old_stats  = FALSE;
1534                 options->penalty  = PENALTY;
1535                 options->reward  = REWARD;
1536                 options->e2 = 0.05;
1537                 /* Used in the post-process gapping of the blastn result. */
1538                 if (!is_megablast) {
1539                    options->wordsize  = WORDSIZE_NUCL;
1540                    options->gap_open  = GAP_OPEN_NUCL;
1541                    options->gap_extend  = GAP_EXTN_NUCL;
1542                    options->block_width = 20;
1543                    options->window_size = WINDOW_SIZE_NUCL;
1544                    options->cutoff_s = 0;
1545                    options->cutoff_s2 = 0;
1546                    options->gap_x_dropoff  = GAP_X_DROPOFF_NUCL;
1547                 } else {
1548                    options->is_megablast_search = TRUE;
1549                    options->wordsize = WORDSIZE_MEGABLAST;
1550                    options->gap_open  = GAP_OPEN_MEGABLAST;
1551                    options->gap_extend  = GAP_EXTN_MEGABLAST;
1552                    options->cutoff_s2 = 28;
1553                    options->window_size = WINDOW_SIZE_MEGABLAST;
1554                    options->gap_x_dropoff  = GAP_X_DROPOFF_MEGABLAST;
1555                    options->dropoff_2nd_pass = UNGAPPED_X_DROPOFF_MEGABLAST;
1556                 }
1557                 options->decline_align = INT2_MAX;
1558                 options->gap_x_dropoff_final  = GAP_X_DROPOFF_FINAL_NUCL;
1559                 options->gap_trigger  = 25.0;
1560                 options->strand_option  = BLAST_BOTH_STRAND;
1561                 if (gapped)
1562                    options->do_sum_stats = FALSE;
1563                 else
1564                    options->do_sum_stats = TRUE;
1565         }
1566         else
1567         {
1568                 options->gap_size = 40;
1569                 options->window_size = WINDOW_SIZE_PROT;
1570                 options->expect_value  = 10;
1571                 options->hitlist_size = 500;
1572                 options->two_pass_method  = FALSE;
1573                 options->multiple_hits_only  = TRUE;
1574                 options->number_of_bits  = 0.0;
1575                 options->dropoff_1st_pass  = 7;
1576                 options->dropoff_2nd_pass  = UNGAPPED_X_DROPOFF_PROT;
1577                 options->matrix  = StringSave("BLOSUM62");
1578                 options->old_stats  = FALSE;
1579                 options->wordsize  = WORDSIZE_PROT;
1580                 options->penalty  = 0;
1581                 options->reward  = 0;
1582                 options->gap_decay_rate = 0.5;
1583                 options->gap_prob = 0.5;
1584 
1585                 options->gap_open  = GAP_OPEN_PROT;
1586                 options->gap_extend  = GAP_EXTN_PROT;
1587         options->decline_align = INT2_MAX;
1588                 options->gap_x_dropoff  = GAP_X_DROPOFF_PROT;
1589                 options->gap_x_dropoff_final  = GAP_X_DROPOFF_FINAL_PROT;
1590                 options->gap_trigger  = 22.0;
1591 
1592                 if (StringICmp(progname, "blastp") == 0)
1593                 {
1594                         options->e2 = BLAST_SMALLEST_EVALUE;
1595                         options->threshold_second = WORD_THRESHOLD_BLASTP;
1596                 }
1597                 else if (StringICmp(progname, "blastx") == 0)
1598                 {
1599                         options->e2 = 1.0;
1600                         options->genetic_code = 1;
1601                         options->threshold_second = WORD_THRESHOLD_BLASTX;
1602                         options->do_sum_stats = TRUE;
1603                         options->strand_option  = BLAST_BOTH_STRAND;
1604                 }
1605 
1606         else if ( StringICmp(progname, "tblastn") == 0
1607                          || StringICmp(progname, "psitblastn") == 0 )
1608                 {
1609                         options->e2 = 1.0;
1610                         options->db_genetic_code = 1;
1611                         options->threshold_second = WORD_THRESHOLD_TBLASTN;
1612                         options->do_sum_stats = TRUE;
1613                 }
1614                 else if (StringICmp(progname, "tblastx") == 0)
1615                 {
1616                         options->e2 = BLAST_SMALLEST_EVALUE;
1617                         options->genetic_code = 1;
1618                         options->db_genetic_code = 1;
1619                         options->threshold_second = WORD_THRESHOLD_TBLASTX;
1620                         options->gap_open  = 0;
1621                         options->gap_extend  = 0;
1622                         options->gap_x_dropoff  = 0;
1623                         options->gap_x_dropoff_final  = 0;
1624                         options->gapped_calculation = gapped = FALSE;
1625                         options->do_sum_stats = TRUE;
1626                         options->strand_option  = BLAST_BOTH_STRAND;
1627                         options->hsp_num_max = 100;
1628                 }
1629                 if (gapped)
1630                 {
1631                         options->two_pass_method = FALSE;
1632                         options->multiple_hits_only  = TRUE;
1633                         options->dropoff_2nd_pass  = options->dropoff_1st_pass;
1634                         options->gap_decay_rate = 0.1;
1635                         options->gap_prob = 1.0;
1636                 }
1637         }
1638 
1639         return options;
1640 }
1641 
1642 /*
1643         Delete the Options structure.
1644 */
1645 BLAST_OptionsBlkPtr LIBCALL 
1646 BLASTOptionDelete(BLAST_OptionsBlkPtr options)
1647 {
1648     if (options == NULL)
1649         return NULL;
1650     
1651     if (options->matrix != NULL)
1652         MemFree(options->matrix);
1653     
1654     if (options->program_name != NULL)
1655         MemFree(options->program_name);
1656     
1657     MemFree(options->filter_string);
1658 
1659     MemFree(options->gifile);
1660 
1661     options = MemFree(options);
1662     return options;
1663 }
1664 
1665 
1666 /*
1667         Validate the Options structure.  If an invalid option is found,
1668         call BLASTOptionDelete and issue an error message.
1669 */
1670 BLAST_OptionsBlkPtr LIBCALL 
1671 BLASTOptionValidate(BLAST_OptionsBlkPtr options, CharPtr progname)
1672 
1673 {
1674         Int2 status;
1675         ValNodePtr error_return=NULL;
1676 
1677         status = BLASTOptionValidateEx(options, progname, &error_return);
1678 
1679         if (status != 0)
1680                 options = BLASTOptionDelete(options);
1681         
1682         BlastErrorPrint(error_return);
1683         BlastErrorChainDestroy(error_return);
1684 
1685         return options;
1686 }
1687 
1688 /*
1689         Validate the Options structure.  If an invalid option is found,
1690         call BLASTOptionDelete and issue an error message.
1691 */
1692 Int2 LIBCALL 
1693 BLASTOptionValidateEx (BLAST_OptionsBlkPtr options, CharPtr progname, ValNodePtr PNTR error_return)
1694 
1695 {
1696         Int2 status=0;
1697 
1698         if (options->hitlist_size < 1)
1699         {
1700                 BlastConstructErrorMessage("BLASTOptionValidateEx", "No hits are being saved", 1, error_return);
1701                 return 1;
1702         }
1703 
1704         if (options->expect_value <= 0.0 && options->cutoff_s == 0)
1705         {
1706                 BlastConstructErrorMessage("BLASTOptionValidateEx", "expect value must be greater than zero", 1, error_return);
1707                 return 1;
1708         }
1709 
1710         if (options->wordsize <= 0)
1711         {
1712                 BlastConstructErrorMessage("BLASTOptionValidateEx", "wordsize must be non-zero", 1, error_return);
1713                 return 1;
1714         }
1715 
1716         if (StringICmp(progname, "blastn") == 0)
1717         {
1718                if (options->penalty >= 0)
1719                {
1720                        BlastConstructErrorMessage("BLASTOptionValidateEx", "BLASTN penalty must be negative", 1, error_return);
1721                        return 1;
1722                }
1723 
1724                if (options->is_megablast_search && options->wordsize < 8) {
1725                   BlastConstructErrorMessage("BLASTOptionValidateEx", 
1726                      "Wordsize must be 8 or greater", 1, error_return);
1727                   return 1;
1728                } else if (options->wordsize < 7) {
1729                   BlastConstructErrorMessage("BLASTOptionValidateEx", "Wordsize must be 7 or greater", 1, error_return);
1730                   return 1;
1731                }
1732 
1733                if (options->is_megablast_search && options->gap_open > 0
1734                    && options->gap_extend == 0) 
1735                {
1736                   BlastConstructErrorMessage("BLASTOptionValidateEx", 
1737                      "Gap extension penalty must be non-zero if gap open"
1738                      " penalty specified", 1, error_return);
1739                   return 1;
1740                }
1741 
1742                 if (options->threshold_second != 0)
1743                 {
1744                         BlastConstructErrorMessage("BLASTOptionValidateEx", "non-zero threshold not permitted with blastn", 1, error_return);
1745                         return 1;
1746                 }
1747 
1748                 if (options->two_pass_method == TRUE || options->number_of_bits != 0.0)
1749                 {
1750                         BlastConstructErrorMessage("BLASTOptionValidateEx", "two-passes not available for blastn", 1, error_return);
1751                         return 1;
1752                 }
1753 
1754                 if ((options->strand_option | BLAST_BOTH_STRAND) == 0)
1755                 {
1756                         BlastConstructErrorMessage("BLASTOptionValidateEx", "invalid strand specified", 1, error_return);
1757                         return 1;
1758                 }
1759 
1760                 if (options->gapped_calculation == TRUE)
1761                 {
1762                       BLAST_KarlinBlk* kbp = BlastKarlinBlkCreate();
1763                       BLAST_KarlinBlk* kbp_ungap = BlastKarlinBlkCreate();
1764                       Boolean round_down;
1765                       /* We call this function to find out if these are supported parameters. */
1766                       status = BlastKarlinBlkNuclGappedCalc(kbp, options->gap_open, options->gap_extend, options->reward,
1767                           options->penalty, kbp_ungap, &round_down, error_return);
1768                       kbp = BlastKarlinBlkDestruct(kbp);
1769                       kbp_ungap = BlastKarlinBlkDestruct(kbp_ungap);
1770                       if (!status) 
1771                           return status;
1772                 }
1773 
1774 /*
1775                 if (options->multiple_hits_only == TRUE)
1776                 {
1777                         BlastConstructErrorMessage("BLASTOptionValidateEx", "multiple hits not available for blastn", 1, error_return);
1778                         return 1;
1779                 }
1780 */
1781         }
1782         else
1783         {
1784                 if (StringICmp(progname, "blastx") == 0 || StringICmp(progname, "tblastx") == 0)
1785                 {
1786                         if ((options->strand_option | BLAST_BOTH_STRAND) == 0)
1787                         {
1788                                 BlastConstructErrorMessage("BLASTOptionValidateEx", "invalid strand specified", 1, error_return);
1789                                 return 1;
1790                         }
1791                 }
1792                 if (options->wordsize < 2 || options->wordsize > 3)
1793                 {
1794                         BlastConstructErrorMessage("BLASTOptionValidateEx", "Valid wordsize range is 2 to 3", 1, error_return);
1795                         return 1;
1796                 }
1797                 if (options->threshold_second == 0)
1798                 {
1799                         BlastConstructErrorMessage("BLASTOptionValidateEx", "non-zero threshold required", 1, error_return );
1800                         return 1;
1801                 }
1802                 if (options->penalty != 0 || options->reward != 0)
1803                 {
1804                         BlastConstructErrorMessage("BLASTOptionValidateEx", "penalty or reward can only be non-zero for blastn", 1, error_return);
1805                         return 1;
1806                 }
1807 
1808                 if (StringICmp(progname, "tblastx") == 0)
1809                 {
1810                         if (options->gapped_calculation == TRUE)
1811                         {
1812                                 BlastConstructErrorMessage("BLASTOptionValidateEx", "gapped calculations not available with tblastx", 1, error_return);
1813                                 return 1;
1814                         }
1815                 }
1816                 
1817                 if (options->gapped_calculation == TRUE)
1818                 {
1819                         status = BlastKarlinBlkGappedCalcEx(NULL, options->gap_open, options->gap_extend, options->decline_align, options->matrix, error_return);
1820                 }
1821         }
1822 
1823         if (options->isPatternSearch && (!options->gapped_calculation))
1824             {
1825                 BlastConstructErrorMessage("BLASTOptionValidateEx", "PHI-BLAST cannot use ungapped alignments", 1, error_return);
1826                 return 1;
1827             }
1828 
1829         if (options->tweak_parameters && (!options->gapped_calculation))
1830             {
1831                 BlastConstructErrorMessage("BLASTOptionValidateEx", "parameter adjustment not supported with ungapped alignments", 1, error_return);
1832                 return 1;
1833             }
1834 
1835         if (options->smith_waterman && (!options->gapped_calculation))
1836             {
1837                 BlastConstructErrorMessage("BLASTOptionValidateEx", "locally optimal alignments not supported with ungapped alignments", 1, error_return);
1838                 return 1;
1839             }
1840         if ((StringICmp(progname, "psitblastn") != 0) && (options->CheckpointFileName !=NULL))
1841           {
1842               BlastConstructErrorMessage("BLASTOptionValidateEx",
1843                                          "-R option is available only for psitblastn", 1, error_return);
1844               return 1;
1845           }
1846 
1847         /* Check the validity of the megablast specific options */
1848         if (options->is_megablast_search) {
1849            if (options->mb_template_length > 0) {
1850               if (options->mb_template_length != 16 &&
1851                   options->mb_template_length != 18 &&
1852                   options->mb_template_length != 21) {
1853                  BlastConstructErrorMessage("BLASTOptionValidateEx",
1854                      "Only discontiguous template lengths 16, 18 and 21 are supported", 
1855                                             1, error_return);
1856                  return 1;
1857               }
1858 
1859               if (options->wordsize < 11 || options->wordsize > 12) {
1860                  BlastConstructErrorMessage("BLASTOptionValidateEx",
1861                      "Only discontiguous word sizes 11 and 12 are supported", 
1862                                             1, error_return);
1863                  return 1;
1864               }
1865 
1866               if (options->mb_disc_type > MB_TWO_TEMPLATES) {
1867                  BlastConstructErrorMessage("BLASTOptionValidateEx",
1868                      "Allowed discontiguous word types are: 0 - coding, 1 - maximal, 2 - two templates", 
1869                                             1, error_return);
1870                  return 1;
1871               }
1872            }
1873         }
1874 
1875         /* --KM check -B option: query concatenation only with certain
1876            BLAST flavors */
1877         if (options->NumQueries>0 &&
1878         !( (StringICmp("blastn",  progname) == 0) ||
1879         (StringICmp("tblastn", progname) == 0)   ) ) {
1880 
1881                 BlastConstructErrorMessage("BLASTOptionValidateEx",
1882                     "Query concatenation can only be used with blastn and tblastn",
1883                     1, error_return);
1884                 return 1;
1885         }
1886     
1887 
1888 
1889 
1890         return status;
1891 }
1892 
1893 /*
1894         Changes the matrix value to the one given and sets the 
1895         default parameters for that Matrix. 
1896 */
1897 
1898 Int2 LIBCALL 
1899 BLASTOptionSetGapParams (BLAST_OptionsBlkPtr options, 
1900                          CharPtr matrix_name, Int4 open, Int4 extended)
1901 
1902 {
1903     Boolean found_matrix=FALSE, threshold_set=FALSE;
1904     Int4 threshold, window_size;
1905     
1906     if (options == NULL || matrix_name == NULL)
1907         return -1;
1908     
1909 /* Save these in case we need to restore (for blastn). */
1910     threshold =  options->threshold_second;
1911     window_size =  options->window_size;
1912 
1913 /* We check for protein matrices also for blastn in case a nucl.-nucl. matrix really
1914 has been specified. */
1915     if (StringICmp(matrix_name, "BLOSUM62") == 0) {
1916         options->gap_open  = 11;
1917         options->gap_extend  = 1;
1918         options->window_size = 40;
1919         options->threshold_second = 11;
1920         found_matrix = TRUE;
1921         threshold_set = TRUE;
1922     } else if (StringICmp(matrix_name, "BLOSUM45") == 0) {
1923         options->gap_open  = 14;
1924         options->gap_extend  = 2;
1925         options->window_size = 60;
1926         options->threshold_second = 14;
1927         found_matrix = TRUE;
1928         threshold_set = TRUE;
1929     } else if (StringICmp(matrix_name, "BLOSUM50") == 0) {
1930         options->gap_open  = 13;
1931         options->gap_extend  = 2;
1932         found_matrix = TRUE;
1933     } else if (StringICmp(matrix_name, "PAM250") == 0) {
1934         options->gap_open  = 14;
1935         options->gap_extend  = 2;
1936         found_matrix = TRUE;
1937     } else if (StringICmp(matrix_name, "BLOSUM62_20") == 0) {
1938         options->gap_open  = 100;
1939         options->gap_extend  = 10;
1940         options->threshold_second = 100;
1941         found_matrix = TRUE;
1942     } else if (StringICmp(matrix_name, "BLOSUM90") == 0) {
1943         options->gap_open  = 10;
1944         options->gap_extend  = 1;
1945         found_matrix = TRUE;
1946     } else if (StringICmp(matrix_name, "BLOSUM80") == 0) {
1947         options->gap_open  = 10;
1948         options->gap_extend  = 1;
1949         options->window_size = 25;
1950         options->threshold_second = 12;
1951         found_matrix = TRUE;
1952         threshold_set = TRUE;
1953     } else if (StringICmp(matrix_name, "PAM30") == 0) {
1954         options->gap_open  = 9;
1955         options->gap_extend  = 1;
1956         options->window_size = 15;
1957         options->threshold_second = 16;
1958         found_matrix = TRUE;
1959         threshold_set = TRUE;
1960     } else if (StringICmp(matrix_name, "PAM70") == 0) {
1961         options->gap_open  = 10;
1962         options->gap_extend  = 1;
1963         options->window_size = 20;
1964         options->threshold_second = 14;
1965         found_matrix = TRUE;
1966         threshold_set = TRUE;
1967     }
1968     
1969     if (open)
1970         options->gap_open  = open;
1971     if (extended)
1972         options->gap_extend  = extended;
1973     
1974     
1975     /* blastn and megablast are different. */
1976     if (StringICmp("blastn", options->program_name) == 0) {
1977         options->threshold_second = threshold;
1978         options->window_size = window_size;
1979         if (!options->is_megablast_search) {
1980            options->gap_open  = 5;
1981            options->gap_extend  = 2;
1982            if (found_matrix == FALSE) {
1983                /* probably not a protein-protein matrix. */
1984                options->matrix = StringSave(matrix_name);
1985                }
1986         } else {
1987            options->gap_open   = GAP_OPEN_MEGABLAST;
1988            options->gap_extend = GAP_EXTN_MEGABLAST;
1989         }
1990         return 0;
1991     }
1992 
1993     if (matrix_name) 
1994     {
1995         if (options->matrix)
1996             MemFree(options->matrix);
1997         options->matrix = StringSave(matrix_name);
1998     }
1999     
2000     if (!found_matrix)
2001         return -1;
2002     
2003     if (threshold_set) {
2004         if (StringICmp(options->program_name, "blastx") == 0) {
2005             options->threshold_second++;
2006 
2007         } else if (StringICmp(options->program_name, "tblastn") == 0
2008                    || StringICmp(options->program_name, "tblastx") == 0
2009                    || StringICmp(options->program_name, "psitblastn") == 0) {
2010             options->threshold_second += 2;
2011         }
2012     }
2013     return 0;
2014 }
2015 
2016 /*
2017   This function obtains the sequence from a BioseqPtr in ASCII alphabet.
2018   The return value is a Uint1Ptr containing the sequence, the Int4Ptr
2019   length inidcates the length of the seqeunce.
2020 */      
2021 
2022 Uint1Ptr
2023 BlastGetSequenceFromBioseq (BioseqPtr bsp, Int4Ptr length)
2024 
2025 {
2026         Int4 index;
2027         SeqPortPtr spp;
2028         Uint1 residue;
2029         Uint1Ptr sequence;
2030 
2031         *length = 0;
2032         if (bsp == NULL)
2033                 return NULL;
2034 
2035         sequence = MemNew((1+bsp->length)*sizeof(Uint1));
2036         if (sequence == NULL)
2037                 return NULL;
2038 
2039         if (ISA_na(bsp->mol))
2040                 spp = SeqPortNew(bsp, 0, -1, Seq_strand_plus, Seq_code_iupacna);
2041         else
2042                 spp = SeqPortNew(bsp, 0, -1, Seq_strand_unknown, Seq_code_ncbieaa);
2043 
2044         index=0;
2045         while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
2046         {
2047                 if (residue == SEQPORT_VIRT)
2048                         continue;
2049                 sequence[index] = residue;
2050                 index++;
2051         }
2052         sequence[index] = NULLB;
2053         spp = SeqPortFree(spp);
2054 
2055         *length = index;
2056         return sequence;
2057 }
2058 
2059 /*
2060         Adjusts the length and number of sequences in a database according
2061         to the gi_list or seqIdPtr list given.
2062 
2063     06/24/2002: This function returns the number of sequences and number of
2064     bases/residues based on the information on the rdfp parameter (all others
2065     are ignored). This function should be called after BlastProcessGiLists and
2066     before calculating the effective search space.
2067 
2068     02/12/2004: This function is *deprecated*
2069 */
2070 
2071 Boolean
2072 BlastAdjustDbNumbers (ReadDBFILEPtr rdfp, Int8Ptr db_length, Int4Ptr db_number, SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, OIDListPtr oidlist, BlastDoubleInt4Ptr PNTR gi_list_pointers, Int4 gi_list_total)
2073 
2074 {
2075     return readdb_get_totals_ex2(rdfp, db_length, db_number, FALSE, TRUE);
2076 }
2077 
2078 /*
2079         Deletes only the BlastGiListPtr, not
2080         the associated arrays.
2081 */
2082 BlastGiListPtr 
2083 BlastGiListDestruct(BlastGiListPtr blast_gi_list, Boolean contents)
2084 {
2085         if (blast_gi_list == NULL)
2086                 return NULL;
2087 
2088         if (contents)
2089         { /* On the main thread.  Deallocate contents. */
2090                 if (blast_gi_list->gilist_not_owned == FALSE)
2091                         MemFree(blast_gi_list->gi_list);
2092                 MemFree(blast_gi_list->gi_list_pointer);
2093         }
2094 
2095         return MemFree(blast_gi_list);
2096 }
2097 
2098 /*
2099         Allocates BlastGiListPtr.  The caller still owns
2100         the gi_list and must delete it.
2101 */
2102 
2103 BlastGiListPtr BlastGiListNew(BlastDoubleInt4Ptr gi_list, Int4 tot)
2104 {
2105         BlastGiListPtr blast_gi_list = NULL;
2106     register Int4 i;
2107 
2108         if (gi_list == NULL || tot == 0)
2109                 return NULL;
2110 
2111         if ((blast_gi_list = MemNew(sizeof(BlastGiList))) == NULL)  {
2112         ErrPostEx(SEV_ERROR,0,0,"BlastGiListNew: Out of memory\n");
2113         return NULL;
2114     }
2115 
2116     blast_gi_list->gi_list = MemNew(sizeof(BlastDoubleInt4)*tot);
2117     blast_gi_list->gi_list_pointer = MemNew(sizeof(BlastDoubleInt4Ptr)*tot);
2118     if ((blast_gi_list->gi_list == NULL) || 
2119         (blast_gi_list->gi_list_pointer == NULL)) {
2120         ErrPostEx(SEV_ERROR,0,0,"BlastGiListNew: Out of memory\n");
2121         return NULL;
2122     }
2123 
2124     for (i = 0; i < tot; i++) {
2125         MemCpy(&(blast_gi_list->gi_list[i]),&gi_list[i],
2126                 sizeof(BlastDoubleInt4));
2127         blast_gi_list->gi_list_pointer[i] = &(blast_gi_list->gi_list[i]);
2128     }
2129         blast_gi_list->total = tot;
2130         blast_gi_list->current = 0;
2131     blast_gi_list->gilist_not_owned = FALSE; /* yes, we own this */
2132         
2133         return blast_gi_list;
2134 }
2135 
2136 /* This is a simple callback comparison function for sorting */
2137 static int LIBCALLBACK
2138 blast_double_int_oid_compare(VoidPtr v1, VoidPtr v2)
2139 {
2140         BlastDoubleInt4Ptr h1, h2;
2141         BlastDoubleInt4Ptr *hp1, *hp2;
2142 
2143         hp1 = (BlastDoubleInt4Ptr PNTR) v1;
2144         hp2 = (BlastDoubleInt4Ptr PNTR) v2;
2145         h1 = *hp1;
2146         h2 = *hp2;
2147 
2148         if (h1->ordinal_id < h2->ordinal_id)
2149                 return -1;
2150         if (h1->ordinal_id > h2->ordinal_id)
2151                 return 1;
2152     if (h1->start == h2->start) { /* break the tie with gis */
2153         if (h1->gi < h2->gi)
2154             return -1;
2155         if (h1->gi > h2->gi)
2156             return 1;
2157     }
2158 
2159         return 0;
2160 }
2161 static int LIBCALLBACK
2162 blast_double_int_gi_compare(VoidPtr v1, VoidPtr v2)
2163 {
2164         BlastDoubleInt4Ptr h1, h2;
2165         BlastDoubleInt4Ptr *hp1, *hp2;
2166 
2167         hp1 = (BlastDoubleInt4Ptr PNTR) v1;
2168         hp2 = (BlastDoubleInt4Ptr PNTR) v2;
2169         h1 = *hp1;
2170         h2 = *hp2;
2171 
2172         if (h1->gi < h2->gi)
2173                 return -1;
2174         if (h1->gi > h2->gi)
2175                 return 1;
2176 
2177         return 0;
2178 }
2179 static int LIBCALLBACK
2180 blast_double_int_start_compare(VoidPtr v1, VoidPtr v2)
2181 {
2182         BlastDoubleInt4Ptr h1, h2;
2183         BlastDoubleInt4Ptr *hp1, *hp2;
2184 
2185         hp1 = (BlastDoubleInt4Ptr PNTR) v1;
2186         hp2 = (BlastDoubleInt4Ptr PNTR) v2;
2187         h1 = *hp1;
2188         h2 = *hp2;
2189 
2190         if (h1->start < h2->start)
2191                 return -1;
2192         if (h1->start > h2->start)
2193                 return 1;
2194     if (h1->start == h2->start) { /* break the tie with oids */
2195         if (h1->ordinal_id < h2->ordinal_id)
2196             return -1;
2197         if (h1->ordinal_id > h2->ordinal_id)
2198             return 1;
2199     }
2200 
2201         return 0;
2202 }
2203 
2204 /* Returns the intersection of list1 and list2. Caller is reponsible to
2205  * deallocate the return value */
2206 static BlastGiListPtr IntersectBlastGiLists(
2207                         BlastDoubleInt4Ptr list1, Int4 total1,
2208                         BlastDoubleInt4Ptr list2, Int4 total2)
2209 {
2210     BlastGiListPtr retval = NULL;
2211     BlastDoubleInt4Ptr new_list = NULL;
2212     BlastDoubleInt4Ptr *list_ptrs1 = NULL, *list_ptrs2 = NULL;
2213     Int4 idx1, idx2, new_idx = 0, short_list_sz = 0, gi;
2214 
2215     if ((!list1 && !list2) || (total1 <= 0 && total2 <= 0)) {
2216         ErrPostEx(SEV_WARNING, 0, 0, "IntersectBlastGiLists: No lists to "
2217                 "combine?");
2218         return NULL;
2219     }
2220     if (!list1) total1 = 0;
2221     if (!list2) total2 = 0;
2222 
2223     if (total1 > total2)
2224         short_list_sz = total2;
2225     else
2226         short_list_sz = total1;
2227 
2228     new_list = (BlastDoubleInt4Ptr)
2229         MemNew(short_list_sz*sizeof(BlastDoubleInt4));
2230     if (!new_list) {
2231         ErrPostEx(SEV_WARNING, 0, 0, "IntersectBlastGiLists: Failed to "
2232                 "allocate memory for gi list");
2233         return NULL;
2234     }
2235 
2236     /* sort the two lists by gi to facilitate intersection */
2237     list_ptrs1 = (BlastDoubleInt4Ptr *)
2238                  Malloc(sizeof(BlastDoubleInt4Ptr)*total1);
2239     list_ptrs2 = (BlastDoubleInt4Ptr *)
2240                  Malloc(sizeof(BlastDoubleInt4Ptr)*total2);
2241 
2242     for (idx1 = 0; idx1 < total1; idx1++)
2243         list_ptrs1[idx1] = &(list1[idx1]);
2244     for (idx2 = 0; idx2 < total2; idx2++)
2245         list_ptrs2[idx2] = &(list2[idx2]);
2246     HeapSort(list_ptrs1, total1, sizeof(BlastDoubleInt4Ptr *),
2247              blast_double_int_gi_compare);
2248     HeapSort(list_ptrs2, total2, sizeof(BlastDoubleInt4Ptr *),
2249              blast_double_int_gi_compare);
2250 
2251     /* Intersect! */
2252     for (idx1 = 0, idx2 = 0; idx1 < total1; idx1++) {
2253         gi = list_ptrs1[idx1]->gi;
2254 
2255         for (; idx2 < total2 && list_ptrs2[idx2]->gi < gi; idx2++);
2256 
2257         if (idx2 < total2 && list_ptrs2[idx2]->gi == gi) {
2258             MemCpy((VoidPtr)&new_list[new_idx], list_ptrs1[idx1],
2259                    sizeof(BlastDoubleInt4));
2260             new_idx++;
2261         }
2262     }
2263 
2264     /* Save the intersection and return it */
2265     if (new_idx > 0)
2266         retval = BlastGiListNew(new_list, new_idx);
2267 
2268     if (list_ptrs1)
2269         MemFree(list_ptrs1);
2270     if (list_ptrs2)
2271         MemFree(list_ptrs2);
2272     MemFree(new_list);
2273 
2274     return retval;
2275 }
2276 
2277 /* Return the union of list1 and list2. Size of the return value will be
2278  * total1 + total2. This function does not eliminate repeated entries.
2279  * Caller is responsible for deallocating the return value.*/
2280 static
2281 BlastGiListPtr CombineDoubleInt4Lists(
2282                    BlastDoubleInt4Ptr list1, Int4 total1, 
2283                    BlastDoubleInt4Ptr list2, Int4 total2)
2284 {
2285     BlastGiListPtr retval = NULL;
2286     BlastDoubleInt4Ptr new_list;
2287  
2288     if ((!list1 && !list2) || (total1 <= 0 && total2 <= 0))
2289         return NULL;
2290  
2291     if (!list1) total1 = 0;
2292     if (!list2) total2 = 0;
2293  
2294     new_list = (BlastDoubleInt4Ptr) 
2295                Malloc((total1+total2)*sizeof(BlastDoubleInt4));
2296     if (!new_list) {
2297         ErrPostEx(SEV_WARNING, 0, 0, "CombineDoubleInt4Lists: Failed to "
2298                 "allocate memory for gi list");
2299         return NULL;
2300     }
2301     MemCpy(new_list, list1, total1*sizeof(BlastDoubleInt4));
2302     MemCpy(&(new_list[total1]), list2, total2*sizeof(BlastDoubleInt4));
2303 
2304     retval = BlastGiListNew(new_list, total1+total2);
2305     MemFree(new_list);
2306 
2307     return retval;
2308 }
2309 
2310 /* Intersects (or creates) oidlists and attaches them to individual rdfp's in
2311  * the rdfp_chain. If any rdfp->oidlist is populated before calling this
2312  * function, it will be freed and replaced by the intersection of the gilist
2313  * parameter and itself. */
2314 static 
2315 Boolean IntersectDoubleInt4ListWithOIDLists(
2316            BlastDoubleInt4Ptr gilist, Int4 total, 
2317            ReadDBFILEPtr rdfp_chain)
2318 {
2319     OIDListPtr new_oidlist = NULL, lcl_oidlist = NULL;
2320     ReadDBFILEPtr rdfp = NULL;
2321     BlastDoubleInt4Ptr *gilist_ptrs = NULL;
2322     Int4 gilist_idx = 0; /* index into gilist_ptrs */
2323     Int4 i, new_oidlistsz = 0, new_oidlist_count = 0;
2324     Int4 maxoid, lcl_mask_index, lcl_oid; 
2325     Uint4 lcl_oid_bit = 0, lcl_mask = 0;
2326 
2327     gilist_ptrs = (BlastDoubleInt4Ptr *) 
2328                     Malloc(total*sizeof(BlastDoubleInt4Ptr));
2329     for (i=0; i < total; i++)
2330         gilist_ptrs[i] = &(gilist[i]);
2331 
2332     /* Sort by start field */
2333     HeapSort(gilist_ptrs, total, 
2334               sizeof(BlastDoubleInt4Ptr PNTR), blast_double_int_start_compare);
2335 
2336    /* merge gis from gilist and oids as you walk through rdfp_chain */
2337    for (rdfp = rdfp_chain; rdfp; rdfp = rdfp->next) {
2338 
2339        lcl_oidlist = rdfp->oidlist;
2340 
2341        while (gilist_idx < total &&
2342               gilist_ptrs[gilist_idx]->start == rdfp->start) {
2343 
2344            if (!new_oidlist) {
2345                /* determine the maxoid for this rdfp */
2346                for (i = gilist_idx; 
2347                     i < total && gilist_ptrs[i]->start == rdfp->start; 
2348                     i++);
2349 
2350                if (i == total && rdfp->start != gilist_ptrs[i-1]->start)
2351                    break;
2352                maxoid = gilist_ptrs[i-1]->ordinal_id - rdfp->start;
2353                
2354                new_oidlist = (OIDListPtr) MemNew(sizeof(OIDList));
2355                new_oidlist->total = maxoid + 1;
2356                new_oidlistsz = maxoid/MASK_WORD_SIZE + 2;
2357                new_oidlist->list = (Uint4Ptr) MemNew(
2358                                    (new_oidlistsz)*sizeof(Uint4));
2359                new_oidlist->memory = new_oidlist->list;
2360            }
2361 
2362            lcl_oid = gilist_ptrs[gilist_idx]->ordinal_id - rdfp->start;
2363 
2364            if (lcl_oid >= 0) {
2365 
2366                lcl_mask_index = lcl_oid/MASK_WORD_SIZE;
2367                lcl_oid_bit = 0x1 << (MASK_WORD_SIZE - 1 - 
2368                                        lcl_oid % MASK_WORD_SIZE);
2369 
2370                /* In a database with volumes and oidlists we might have empty 
2371                 * masks (ie: month est subset in a multi-volume est real 
2372                 * database), so reset the mask index to avoid accessing 
2373                 * outside the bounds of the lcl_oidlist->list */
2374                if (lcl_oidlist) {
2375                    if (lcl_oidlist->total == 0)
2376                        lcl_mask_index = 0;
2377                    /* also remember the mask */
2378                    lcl_mask = SwapUint4(lcl_oidlist->list[lcl_mask_index]);
2379                }
2380 
2381                /* if there is an oidlist already, make sure this gi is in both
2382                 * the gilist and the oidlist */
2383                if (!lcl_oidlist || 
2384                    (lcl_oidlist && lcl_oidlist->list &&
2385                    (lcl_oid < new_oidlist->total) && (lcl_mask & lcl_oid_bit))) 
2386                {
2387                    new_oidlist->list[lcl_mask_index] |= lcl_oid_bit;
2388                    new_oidlist_count++;
2389                }
2390            }
2391            gilist_idx++;
2392        }
2393        /* Save the newly created (or combined) oidlist */
2394        if (new_oidlist_count != 0) {
2395            for (i = 0; i < new_oidlistsz; i++)
2396                new_oidlist->list[i] = SwapUint4(new_oidlist->list[i]);
2397            rdfp->oidlist = OIDListFree(rdfp->oidlist);
2398            rdfp->oidlist = new_oidlist;
2399            new_oidlist = NULL;
2400            new_oidlist_count = 0;
2401        } else {
2402            new_oidlist = OIDListFree(new_oidlist);
2403        }
2404    }
2405 
2406    MemFree(gilist_ptrs);
2407     
2408    return TRUE;
2409 }
2410 
2411 /* The purpose of this function is to merge (or convert) any rdfp->gifile(s)
2412    in the rdfp_chain to rdfp->oidlist(s). These oidlists would be local to
2413    each rdfp (ie: if one gi list is given for a multivolume database, 
2414    multiple oidlists could be created corresponding to each of the rdfp in 
2415    the rdfp_chain).
2416    If there are any ordinal id lists present in the rdfp_chain, these will be
2417    intersected with the oidlists created from the gilists.
2418    The gis that were listed in each rdfp->gilist that belong to this database
2419    are returned in the bglp. Caller is responsible for deallocating this
2420    value.
2421 */
2422 static
2423 void MergeDbGiListsWithOIDLists(ReadDBFILEPtr rdfp_chain, ValNodePtr *err_ret,
2424         BlastGiListPtr *bglpp) 
2425 {
2426     ReadDBFILEPtr rdfp = NULL;  /* holds an individual element of the
2427                                    rdfp_chain linked list */
2428     ReadDBFILEPtr rdfp_tmp = NULL; /* holds the rest of the rdfp linked list
2429                                       when searching for gis in a rdfp element
2430                                       */
2431     BlastDoubleInt4Ptr  list=NULL, global_list = NULL;
2432     BlastGiListPtr tmp_list = NULL;
2433     Int4        i, index, ngis = 0, total_num_gis = 0, start;
2434     Char buf[256];
2435  
2436     /* If this argument is not passed in, any gis from rdfp->gifile will
2437      * be lost for formatting purposes */
2438     if (!bglpp)
2439         return;
2440 
2441     /** 
2442      * Gather all gis from all rdfp->gilist(s).
2443      */
2444     for (rdfp = rdfp_chain; rdfp; rdfp = rdfp->next) {
2445         if (!rdfp->gilist)
2446            continue;
2447   
2448         ngis = rdfp->gilist->count;
2449         list = (BlastDoubleInt4Ptr) Malloc(sizeof(BlastDoubleInt4)*ngis);
2450         if (!list) {
2451             ErrPostEx(SEV_FATAL, 1, 0, "Out of memory");
2452             return;
2453         }
2454 
2455         /* Isolate the current rdfp element */
2456         rdfp_tmp = rdfp->next;
2457         rdfp->next = NULL;
2458            
2459         /* See which gis in rdfp->gilist belong to this rdfp */
2460         for (index=0, i=0; i < ngis; i++) {
2461             list[index].ordinal_id = readdb_gi2seq(rdfp_chain, 
2462                     rdfp->gilist->i[i], &start);
2463             if (list[index].ordinal_id >= 0) {
2464                 list[index].gi = rdfp->gilist->i[i];
2465                 list[index].start = start;
2466                 index++;
2467             }
2468         }
2469 
2470         /* Restore the rdfp_chain linked list */
2471         rdfp->next = rdfp_tmp;
2472 
2473         /* After this point we don't really need the rdfp->gilist */
2474         rdfp->gilist = Int4ListFree(rdfp->gilist);
2475 
2476         /* No gis in rdfp->gilist belong to this rdfp, so restrict this entire
2477          * rdfp (mask all of its sequences) */
2478         if ( (ngis = index) == 0) {
2479             Int4 maxoid = rdfp->stop - rdfp->start;
2480 
2481             rdfp->oidlist = (OIDListPtr) MemNew(sizeof(OIDList));
2482             rdfp->oidlist->total = maxoid + 1;
2483             rdfp->oidlist->list = (Uint4Ptr)
2484                 MemNew(sizeof(Uint4)*(maxoid/MASK_WORD_SIZE + 2));
2485             rdfp->oidlist->memory = rdfp->oidlist->list;
2486             sprintf(buf,"%s database restricted with gis not present in the "
2487                     "database. Restricting the entire database", 
2488                     FileNameFind(rdfp->filename));
2489             ErrPostEx(SEV_INFO, 0, 0, buf);
2490             /*BlastConstructErrorMessage("MergeDbGiListsWithOIDLists", buf, 
2491                     SEV_WARNING, err_ret);*/
2492             list = MemFree(list);
2493             continue;
2494         }
2495         
2496         tmp_list = CombineDoubleInt4Lists(global_list, total_num_gis, 
2497                                           list, ngis);
2498         if (tmp_list) {
2499             global_list = MemFree(global_list);
2500             global_list = tmp_list->gi_list;
2501             tmp_list->gilist_not_owned = TRUE;
2502             BlastGiListDestruct(tmp_list, TRUE);
2503             total_num_gis += ngis;
2504         }
2505         list = MemFree(list);
2506 
2507     }
2508     
2509     /**
2510      * If there are any lists, create (or intersect) oidlists for the 
2511      * appropriate rdfp's
2512      */
2513     if (global_list && total_num_gis > 0) {
2514  
2515         if (IntersectDoubleInt4ListWithOIDLists(global_list, total_num_gis,
2516                     rdfp_chain) == FALSE) {
2517             ErrPostEx(SEV_WARNING, 0, 0, "MergeDbGiListsWithOIDLists: Could "
2518                     "not intersect gi lists with oidlists");
2519             MemFree(global_list);
2520             return;
2521         }
2522   
2523         *bglpp = BlastGiListNew(global_list, total_num_gis);
2524         MemFree(global_list);
2525     }
2526     return;
2527 }
2528 
2529 /* Purpose: Create the virtual oidlist to limit this blast search.
2530    Parameters:
2531    bglp contains the list of gis to limit the search with. It is freed by this
2532    function.
2533    rdfp_chain must be the head of the linked list of rdfps attached to the 
2534    BlastSearchBlk structure. 
2535    oidlist_forall_rdfp determines whether the gis in bglp mask only
2536    certain rdfps or mask the entire rdfp_chain. 
2537    Finally, in the options parameter, the the gilist_already_calculated 
2538    field is a flag to determine whether the ordinal ids for the bglp should 
2539    be calculated. This is not the case for the neighboring software 
2540    (ie: nabrd.cpp), so we do not calculate them again. This assumes that *
2541    only* real databases are used and that the virtual oidlist is freed for 
2542    every subsequent invocation of the BLASTSetUpSearch function.
2543    The options->sort_gi_list flag is also used by the neighboring software
2544    where no sorting of the gi list (bglp parameter) is needed.
2545 
2546    The bglp parameter is used to construct the virtual oidlist. If there are
2547    any local oidlists in the rdfp_chain, these are intersected with the bglp.
2548    If the bglp is NULL, any oidlists in the rdfp will be consolidated into
2549    the virtual oidlist but these will not be part of the return value. If 
2550    there are no oidlists and bglp is NULL, the search is not limited at all.
2551 */
2552 BlastGiListPtr 
2553 BlastCreateVirtualOIDList(BlastGiListPtr bglp, ReadDBFILEPtr rdfp_chain,
2554                 Boolean oidlist_forall_rdfp, BLAST_OptionsBlkPtr options)
2555 {
2556     BlastDoubleInt4Ptr gilist, real_gilist = NULL, *real_gilist_ptrs = NULL;
2557     ReadDBFILEPtr rdfp, mask_rdfp = NULL; 
2558     OIDListPtr virtual_oidlist = NULL, lcl_oidlist = NULL;
2559     Int4 start, maxoid = 0, virtual_oidlistsz, real_ngis = 0, real_idx = 0;
2560     Int4 virtual_oid, virtual_mask_index, virtual_oid_bit;
2561     Int4 lcl_mask_index, lcl_bit, lcl_oid;
2562     Uint4 lcl_mask = 0;
2563     register Int4 i;
2564 
2565         {
2566         Boolean there_are_oidlists = FALSE;
2567         for(rdfp=rdfp_chain;rdfp;rdfp=rdfp->next) {
2568                 if (rdfp->oidlist) {
2569                         there_are_oidlists = TRUE;
2570                         break;
2571                 }
2572         }
2573         if ( (bglp == NULL) && (there_are_oidlists == FALSE) )
2574                 return NULL;
2575         }
2576 
2577     /* initialize the start and oid fields of gilist, as well as maxoid */
2578     if (bglp) {
2579         gilist = bglp->gi_list;
2580         for (i = 0; i < bglp->total; i++) {
2581             if (!options->gilist_already_calculated && /* nabrd does this */
2582                 gilist[i].ordinal_id <= 0) { /* assumes gilist was
2583                                                 memset'd to zero */
2584                 gilist[i].ordinal_id = readdb_gi2seq(rdfp_chain, gilist[i].gi, 
2585                                                  &start);
2586                 gilist[i].start = start;
2587             }
2588             maxoid = MAX(maxoid, gilist[i].ordinal_id);
2589             if (gilist[i].ordinal_id != -1)
2590                 real_ngis++; /* keep track of how many gis are actually found */
2591         }
2592     } else {
2593         for (rdfp = rdfp_chain; rdfp; rdfp = rdfp->next) {
2594             if (rdfp->oidlist)
2595                 maxoid = MAX(maxoid, rdfp->oidlist->total+rdfp->start-1);
2596         }
2597     }
2598 
2599     /* Allocate the virtual oidlist */
2600     if (!(virtual_oidlist = (OIDListPtr) MemNew(sizeof(OIDList)))) {
2601         ErrPostEx(SEV_WARNING, 0, 0,"BlastCreateVirtualOIDList: Could not "
2602                 "allocate memory for global oidlist");
2603         return NULL;
2604     }
2605     virtual_oidlist->total = maxoid + 1;
2606     virtual_oidlistsz = maxoid/MASK_WORD_SIZE+2;
2607     virtual_oidlist->list = (Uint4Ptr) MemNew(virtual_oidlistsz*sizeof(Uint4));
2608     virtual_oidlist->memory = virtual_oidlist->list;
2609     if (!virtual_oidlist->list) {
2610         ErrPostEx(SEV_WARNING, 0, 0,"BlastCreateVirtualOIDList: Could not "
2611                 "allocate memory for global oidlist");
2612         return NULL;
2613     }
2614 
2615     /* real_gilist will store the gilist that corresponds to the intersection
2616      * of the bglp and the oidlists in the rdfp_chain */
2617     if (real_ngis > 0) {
2618         real_gilist = (BlastDoubleInt4Ptr)
2619             MemNew(sizeof(BlastDoubleInt4)*real_ngis);
2620         real_gilist_ptrs = (BlastDoubleInt4Ptr *)
2621             MemNew(sizeof(BlastDoubleInt4 *)*real_ngis);
2622         if (!real_gilist || !real_gilist_ptrs) {
2623             ErrPostEx(SEV_WARNING, 0, 0, "BlastCreateVirtualOIDList: Out of "
2624                     "memory");
2625             MemFree(virtual_oidlist);
2626             return NULL;
2627         }
2628     }
2629 
2630     if (bglp) {
2631        Boolean first_gi_found = FALSE;
2632        Int4 j, virtual_oid1;
2633 
2634 
2635        /* Iterate through the gilist, initializing the virtual oidlist */
2636         for (i = 0; i < bglp->total; i++) {
2637             if ((virtual_oid = gilist[i].ordinal_id) < 0) 
2638                 continue;
2639 
2640             for (rdfp = rdfp_chain; rdfp; rdfp = rdfp->next) {
2641                if (!first_gi_found && rdfp->start < gilist[i].start) {
2642                   if ((lcl_oidlist = rdfp->oidlist) && !oidlist_forall_rdfp) {
2643 
2644                      for (j = 0; j < lcl_oidlist->total; j++) {
2645                         lcl_mask_index = j/MASK_WORD_SIZE;
2646                         lcl_bit = 0x1 << 
2647                            (MASK_WORD_SIZE - 1 - j % MASK_WORD_SIZE);
2648                         virtual_oid1 = j + rdfp->start;
2649                         virtual_mask_index = virtual_oid1/MASK_WORD_SIZE;
2650                         virtual_oid_bit = 0x1 << 
2651                            (MASK_WORD_SIZE - 1 - virtual_oid1 % MASK_WORD_SIZE);
2652 
2653                         lcl_mask = 
2654                            SwapUint4(lcl_oidlist->list[lcl_mask_index]);
2655                         if (lcl_mask & lcl_bit) {
2656                            virtual_oidlist->list[virtual_mask_index] |=
2657                               virtual_oid_bit;
2658                         }
2659                      }
2660                   }
2661                } else if (rdfp->start == gilist[i].start) {
2662                   first_gi_found = TRUE;
2663                   break;
2664                }
2665             }
2666             if (!rdfp) continue;
2667 
2668             virtual_mask_index = virtual_oid/MASK_WORD_SIZE;
2669             virtual_oid_bit = 0x1 << (MASK_WORD_SIZE - 1 -
2670                                       virtual_oid % MASK_WORD_SIZE);
2671             if ((lcl_oidlist = rdfp->oidlist)) {
2672                 lcl_oid = gilist[i].ordinal_id - gilist[i].start;
2673                 lcl_mask_index = lcl_oid/MASK_WORD_SIZE;
2674                 lcl_bit = 0x1 << (MASK_WORD_SIZE - 1 -
2675                                       lcl_oid % MASK_WORD_SIZE);
2676                 if (lcl_oidlist->total == 0)
2677                     lcl_mask_index = 0;
2678             }
2679 
2680             if (!lcl_oidlist ||
2681                 (lcl_oidlist && lcl_oidlist->list &&
2682                  lcl_oid < lcl_oidlist->total &&
2683                  (SwapUint4(lcl_oidlist->list[lcl_mask_index])&lcl_bit))) 
2684             {
2685                 virtual_oidlist->list[virtual_mask_index] |= virtual_oid_bit;
2686                 MemCpy((VoidPtr) &real_gilist[real_idx], (VoidPtr) &gilist[i],
2687                         sizeof(BlastDoubleInt4));
2688                 real_idx++;
2689             }
2690         }
2691     } else {
2692         /* Create virtual oidlist by combining existing oidlists */
2693 
2694         for (rdfp = rdfp_chain; rdfp; rdfp = rdfp->next) {
2695 
2696             if (!(lcl_oidlist = rdfp->oidlist) || oidlist_forall_rdfp)
2697                 continue;
2698 
2699             for (i = 0; i < lcl_oidlist->total; i++) {
2700                 lcl_mask_index = i/MASK_WORD_SIZE;
2701                 lcl_bit = 0x1 << (MASK_WORD_SIZE - 1 - i % MASK_WORD_SIZE);
2702                 virtual_oid = i + rdfp->start;
2703                 virtual_mask_index = virtual_oid/MASK_WORD_SIZE;
2704                 virtual_oid_bit = 0x1 << (MASK_WORD_SIZE - 1 -
2705                         virtual_oid % MASK_WORD_SIZE);
2706 
2707                 lcl_mask = SwapUint4(lcl_oidlist->list[lcl_mask_index]);
2708                 if (lcl_mask & lcl_bit) {
2709                     virtual_oidlist->list[virtual_mask_index] |=
2710                         virtual_oid_bit;
2711                 }
2712             }
2713         }
2714     }
2715     for (i = 0; i < virtual_oidlistsz; i++)
2716         virtual_oidlist->list[i] = SwapUint4(virtual_oidlist->list[i]);
2717     
2718     /* Determine the first rdfp with an oidlist, and free the local oidlists */
2719     for (rdfp = rdfp_chain; rdfp; rdfp = rdfp->next) {
2720         if (rdfp->oidlist && !mask_rdfp)
2721             mask_rdfp = rdfp;
2722         rdfp->oidlist = OIDListFree(rdfp->oidlist);
2723     }
2724 
2725     /* attach mask to appropriate place */
2726     if (oidlist_forall_rdfp && real_ngis > 0) {
2727         rdfp_chain->oidlist = virtual_oidlist;
2728     } else {
2729         if (mask_rdfp)
2730             mask_rdfp->oidlist = virtual_oidlist;
2731         else {
2732             /* Should never happen */
2733             ErrPostEx(SEV_ERROR, 0, 0, "BlastCreateVirtualOIDList: Missing "
2734                     "oidlists to attach virtual_oidlist");
2735             OIDListFree(virtual_oidlist);
2736             return NULL;
2737         }
2738     }
2739 
2740     /* Discard bglp and use real_gilist to store the gilist that corresponds
2741      * to the virtual oidlist */
2742     if (bglp) {
2743         bglp = BlastGiListDestruct(bglp, TRUE);
2744         for (i = 0; i < real_idx; i++)
2745             real_gilist_ptrs[i] = &(real_gilist[i]);
2746         if (options->sort_gi_list == TRUE) {
2747             /* sort by oid to allow quick searches in BlastGetAllowedGis */
2748             HeapSort(real_gilist_ptrs, real_idx,
2749                  sizeof(BlastDoubleInt4Ptr PNTR), blast_double_int_oid_compare);
2750         }
2751         gilist = (BlastDoubleInt4Ptr)MemNew(real_idx*sizeof(BlastDoubleInt4));
2752         for (i = 0; i < real_idx; i++)
2753             MemCpy((VoidPtr)&gilist[i], real_gilist_ptrs[i],
2754                     sizeof(BlastDoubleInt4));
2755         bglp = BlastGiListNew(gilist, real_idx);
2756         MemFree(real_gilist);
2757         MemFree(real_gilist_ptrs);
2758         MemFree(gilist);
2759     }
2760 
2761     return bglp;
2762 }
2763 
2764 /* These are the criteria that can restrict a blast search:
2765    a) OIDListPtr rdfp->oidlist;
2766    b) Int4ListPtr rdfp->gilist;
2767    c) ValNodePtr options->gilist;
2768    d) CharPtr options->gifile;
2769    e) BlastDoubleInt4Ptr gi_list;
2770    
2771    The policy to restrict blast searches follows:
2772    All rdfp->gilist(s) are merged (union). The resulting gilist is then 
2773    intersected with any rdfp->oidlist(s) in (a)
2774    (see MergeDbGiListsWithOIDLists). Gis from rdfp->gilist(s) will be returned
2775    so that they can be added to the search->thr_info->blast_gi_list structure.
2776 
2777    If non-NULL, (c), (d), and (e) are intersected and this result is 
2778    intersected with the gilist obtained previously.
2779 
2780    A single 'virtual' oidlist is created and attached to the first rdfp in the
2781    search->rdfp chain that has been restricted (see BlastCreateVirtualOIDList).
2782     
2783    The resulting gi list from (b), (c), (d), and (e) is attached to the 
2784    BlastGiListPtr search->thr_info->blast_gi_list field. The definite source
2785    for the actual search resides in the virtual oidlist (if any).
2786 */
2787 Boolean
2788 BlastProcessGiLists(BlastSearchBlkPtr search, BLAST_OptionsBlkPtr options,
2789                     BlastDoubleInt4Ptr gi_list, Int4 gi_list_size)
2790 {
2791     BlastGiListPtr bglp = NULL, bglp_tmp = NULL;
2792     BlastDoubleInt4Ptr tmp_list = NULL;
2793     ValNodePtr vnp;
2794     Int4 ngis = 0;
2795     /* determine if final oidlist should cover all rdfps or it should start
2796      * with the first rdfp that had a rdfp->oidlist or a rdfp->gilist */
2797     Boolean oidlist_forall_rdfp = FALSE; 
2798     Boolean looking_for_gis = FALSE;
2799 
2800     /* if any of the following parameters is non-NULL, we need to create an 
2801      * oidlist that covers all rdfp(s) in search->rdfp. */
2802     if (gi_list || options->gifile || options->gilist)
2803         oidlist_forall_rdfp = TRUE;
2804 
2805     /* Create individual oidlists for those databases which have gi lists */
2806     MergeDbGiListsWithOIDLists(search->rdfp, &search->error_return, &bglp);
2807 
2808     if (gi_list)
2809     {
2810         bglp = CombineDoubleInt4Lists(gi_list, gi_list_size, NULL, 0);
2811         looking_for_gis = TRUE;
2812     }
2813 
2814     if (options->gifile) {
2815 
2816         if ((tmp_list = GetGisFromFile(options->gifile, &ngis))) {
2817             if (bglp) {
2818                 bglp_tmp = IntersectBlastGiLists(tmp_list, ngis,
2819                             bglp->gi_list, bglp->total);
2820             } else {
2821                 bglp_tmp = CombineDoubleInt4Lists(tmp_list, ngis, NULL, 0);
2822             }
2823 
2824             BlastGiListDestruct(bglp, TRUE);
2825             bglp = bglp_tmp;
2826             bglp_tmp = NULL;
2827             tmp_list = (BlastDoubleInt4Ptr) MemFree(tmp_list);
2828             ngis = 0;
2829         }
2830         looking_for_gis = TRUE;
2831     }
2832 
2833     /* this is very inefficient, needs to be changed in ASN.1 spec? */
2834     if (options->gilist) {
2835         Int4 alloc_chunk = 1024, alloc = 1024; 
2836         looking_for_gis = TRUE;
2837 
2838         tmp_list = (BlastDoubleInt4Ptr) MemNew(sizeof(BlastDoubleInt4)*alloc);
2839         if (!tmp_list) {
2840             ErrPostEx(SEV_WARNING, 0, 0, "BlastProcessGiLists: Out of memory");
2841             BlastGiListDestruct(bglp, TRUE);
2842             return looking_for_gis;
2843         }
2844 
2845         for (vnp = options->gilist; vnp; vnp = vnp->next) {
2846             if (ngis >= alloc) {
2847                 alloc += alloc_chunk;
2848                 tmp_list = (BlastDoubleInt4Ptr) 
2849                     Realloc(tmp_list, sizeof(BlastDoubleInt4)*alloc);
2850                 MemSet(&tmp_list[ngis], 0, alloc_chunk*sizeof(BlastDoubleInt4));
2851             }
2852             tmp_list[ngis++].gi = vnp->data.intvalue;
2853         }
2854 
2855         if (bglp) {
2856             bglp_tmp = IntersectBlastGiLists(tmp_list, ngis,
2857                        bglp->gi_list, bglp->total);
2858         } else {
2859             /* (options->gifile intersection gi_list) resulted in an empty
2860              * set, then we don't recreate bglp. */
2861             if (!options->gifile && !gi_list) 
2862                 bglp_tmp = CombineDoubleInt4Lists(tmp_list, ngis, NULL, 0);
2863         }
2864 
2865         BlastGiListDestruct(bglp, TRUE);
2866         bglp = bglp_tmp;
2867         bglp_tmp = NULL;
2868         tmp_list = (BlastDoubleInt4Ptr) MemFree(tmp_list);
2869     }
2870 
2871     /* Save the final gi list for formatting purposes */
2872     search->thr_info->blast_gi_list = 
2873         BlastCreateVirtualOIDList(bglp, search->rdfp, oidlist_forall_rdfp,
2874                 options);
2875 
2876    return looking_for_gis;
2877 }
2878 
2879 
2880 #define POSIT_PERCENT 0.05
2881 #define POSIT_NUM_ITERATIONS 10
2882 
2883 #define Xchar   21    /*character for low-complexity columns*/
2884 
2885 
2886 /* This should be more than large enough for any alphabet. */
2887 #define POSIT_MAX_ALPHABET_SIZE 30
2888 
2889 /*Compute probabilities for each score in posMatrix,
2890 also sets minScore and maxScore*/
2891 static BLAST_ScoreFreqPtr 
2892 BlastComputeProbs(BlastMatrixRescalePtr matrix_rescale, Boolean position_dependent)
2893 {
2894    Int2 alphabet_total;
2895    Int4 c;  /*index on characters*/
2896    Int4 p;  /*index on positions*/
2897    Int4 s;  /*index on scores */
2898    Int4 dim1, dim2;
2899    BLAST_Score score_min, score_max;
2900    BLAST_ScoreFreqPtr sfp;
2901    Int4 score;  /*one score in the matrix*/
2902    Nlm_FloatHi increment;  /*Increment in probability due to one score*/
2903    Int4 effectiveLength;
2904    Int4Ptr *matrix;
2905    Uint1 std_alphabet[POSIT_MAX_ALPHABET_SIZE];
2906 
2907    alphabet_total = BlastGetStdAlphabet(Seq_code_ncbistdaa, std_alphabet, POSIT_MAX_ALPHABET_SIZE);
2908    if (alphabet_total <= 0)
2909         return NULL;
2910     
2911    matrix = matrix_rescale->matrix;
2912    score_min = 0;
2913    score_max = 0;
2914    if(position_dependent)
2915    {
2916         dim1 = matrix_rescale->query_length;
2917         dim2 = alphabet_total;
2918    }
2919    else
2920    {
2921         dim1 = alphabet_total;
2922         dim2 = alphabet_total;
2923    }
2924 
2925    effectiveLength = 0;
2926    for (p = 0; p < matrix_rescale->query_length; p++)
2927      if (Xchar != matrix_rescale->query[p])
2928        effectiveLength++;
2929    for (p = 0; p < dim1; p++)
2930      if (Xchar != matrix_rescale->query[p])
2931        for (c = 0; c < dim2; c++) {
2932          if (matrix[p][std_alphabet[c]] <= BLAST_SCORE_MIN || matrix[p][std_alphabet[c]] >= BLAST_SCORE_MAX)
2933                 continue;
2934          if (matrix[p][std_alphabet[c]] < (score_min))
2935            (score_min) = matrix[p][std_alphabet[c]];
2936          if (matrix[p][std_alphabet[c]] > (score_max))
2937            (score_max) = matrix[p][std_alphabet[c]];
2938        }
2939 
2940    if((sfp = BlastScoreFreqNew(score_min, score_max)) == NULL) {
2941        ErrPostEx(SEV_ERROR, 0, 0, 
2942                  "BlastComputeProbs(): Unable to create Score Frequencies");
2943        return NULL;
2944    }
2945 
2946    sfp->obs_min = sfp->score_min;
2947    sfp->obs_max = sfp->score_max;
2948    for (p = 0; p < dim1; p++)
2949      if (Xchar != matrix_rescale->query[p])
2950        for (c = 0; c < dim2; c++) {
2951          /*Increment the weight for the score in position [p][std_alphabet[c]] */
2952          score = matrix[p][std_alphabet[c]];
2953          if (score <= BLAST_SCORE_MIN || score >= BLAST_SCORE_MAX)
2954                 continue;
2955          increment =
2956            (matrix_rescale->standardProb[std_alphabet[c]]/ effectiveLength);
2957          sfp->sprob[score]+= increment;
2958        }
2959 
2960    sfp->score_avg = 0.0;
2961    for(s = sfp->score_min; s <= sfp->score_max; s++)
2962      sfp->score_avg += s*sfp->sprob[s];
2963    return(sfp);
2964 }
2965 
2966 void LIBCALL
2967 updateLambdaK(BlastMatrixRescalePtr matrix_rescale, Boolean position_dependent)
2968 {
2969   BLAST_ScoreFreqPtr sfp;
2970  
2971   if((sfp = BlastComputeProbs(matrix_rescale, position_dependent)) == NULL)
2972       return;
2973 
2974   BlastKarlinBlkCalc(matrix_rescale->kbp_psi[0], sfp);
2975   matrix_rescale->kbp_gap_psi[0]->K = (matrix_rescale->kbp_psi[0]->K)*(matrix_rescale->kbp_gap_std[0]->K)/matrix_rescale->K_ideal;
2976   matrix_rescale->kbp_gap_psi[0]->logK = log(matrix_rescale->kbp_gap_psi[0]->K);
2977   sfp = BlastScoreFreqDestruct(sfp);
2978 }
2979 
2980 Nlm_FloatHi 
2981 BlastScaleMatrix(BlastMatrixRescalePtr matrix_rescale, Boolean position_dependent)
2982 {
2983    Int4 dim1, dim2;
2984    Int4 a,c; /*loop indices*/
2985    Boolean too_high=TRUE, first_time;
2986    Nlm_FloatHi factor, factor_low=1.0, factor_high=1.0;
2987    Nlm_FloatHi lambda, new_lambda; /*Karlin-Altschul parameter*/
2988    Int4 index; /*loop index for binary search*/
2989    Int4Ptr *private_matrix;
2990    Int4Ptr *matrix;
2991 
2992    private_matrix = matrix_rescale->private_matrix;
2993    matrix = matrix_rescale->matrix;
2994 
2995 
2996 /* Bracket the values. */
2997    if (position_dependent)
2998    {
2999         dim1 = matrix_rescale->query_length;
3000         dim2 = matrix_rescale->alphabet_size;
3001    }
3002    else
3003    {
3004         dim1 = matrix_rescale->alphabet_size;
3005         dim2 = matrix_rescale->alphabet_size;
3006    }
3007    lambda = matrix_rescale->lambda_ideal;
3008 
3009    first_time = TRUE;
3010    factor = 1.0;
3011    while (1)
3012    {
3013         for(c = 0; c < dim1; c++)
3014         {
3015         for(a = 0; a < dim2; a++)
3016         {
3017             if (private_matrix[c][a] == BLAST_SCORE_MIN)
3018             {
3019                 matrix[c][a] = BLAST_SCORE_MIN;
3020             }
3021             else
3022             {
3023                 matrix[c][a] = Nlm_Nint(((FloatHi)(factor*private_matrix[c][a]))/POSIT_SCALE_FACTOR);
3024             }
3025         }
3026     }
3027 
3028     updateLambdaK(matrix_rescale, position_dependent);
3029 
3030     if(matrix_rescale->kbp_psi[0] != NULL)
3031         new_lambda = matrix_rescale->kbp_psi[0]->Lambda;
3032     else
3033         return 0.0;
3034 
3035         if (new_lambda > lambda)
3036         {
3037                 if (first_time)
3038                 {
3039                         factor_high = 1.0 + POSIT_PERCENT;
3040                         factor = factor_high;
3041                         factor_low = 1.0;
3042                         too_high = TRUE;
3043                         first_time = FALSE;
3044                 }
3045                 else
3046                 {
3047                         if (too_high == FALSE)
3048                                 break;
3049                         factor_high += (factor_high-1.0);
3050                         factor = factor_high;
3051                 }
3052         }
3053         else if (new_lambda > 0) 
3054         {
3055                 if (first_time)
3056                 {
3057                         factor_high = 1.0;
3058                         factor_low = 1.0 - POSIT_PERCENT;
3059                         factor = factor_low;
3060                         too_high = FALSE;
3061                         first_time = FALSE;
3062                 }
3063                 else
3064                 {
3065                         if (too_high == TRUE)
3066                                 break;
3067                         factor_low += (factor_low-1.0);
3068                         factor = factor_low;
3069                 }
3070         } else {
3071            ErrPostEx(SEV_FATAL, 1, 0, "Matrix has positive average score");
3072            return 0.0;
3073         }
3074                 
3075    }
3076 
3077 /* binary search for ten times. */
3078    for (index=0; index<POSIT_NUM_ITERATIONS; index++)
3079    {
3080     factor = 0.5*(factor_high+factor_low);
3081         for(c = 0; c < dim1; c++)
3082         {
3083             for(a = 0; a < dim2; a++)
3084             {
3085                 if (private_matrix[c][a] == BLAST_SCORE_MIN)
3086                 {
3087                         matrix[c][a] = BLAST_SCORE_MIN;
3088                 }
3089                 else
3090                 {
3091                         matrix[c][a] = Nlm_Nint(((FloatHi)(factor*private_matrix[c][a]))/POSIT_SCALE_FACTOR);
3092                 }
3093             }
3094         }
3095         updateLambdaK(matrix_rescale, position_dependent);
3096 
3097         if(matrix_rescale->kbp_psi[0] != NULL)
3098             new_lambda = matrix_rescale->kbp_psi[0]->Lambda;
3099         else
3100             return 0.0;
3101 
3102         if (new_lambda > lambda)
3103         {
3104                 factor_low = factor;
3105         }
3106         else
3107         {
3108                 factor_high = factor;
3109         }
3110     }
3111 
3112    for(a = 0; a < matrix_rescale->alphabet_size; a++) 
3113      matrix[dim1][a] = BLAST_SCORE_MIN;
3114 
3115         return factor;
3116 }
3117 
3118 
3119 /*
3120         Deallocates only memory for BlastMatrixRescalePtr.
3121 */
3122 BlastMatrixRescalePtr
3123 BlastMatrixRescaleDestruct (BlastMatrixRescalePtr matrix_rescale)
3124 
3125 {
3126         if (matrix_rescale == NULL)
3127                 return NULL;
3128 
3129         return MemFree(matrix_rescale);
3130 }
3131 
3132 /*
3133         Allocates and fills the BlastMatrixRescalePtr.
3134 */
3135 
3136 BlastMatrixRescalePtr
3137 BlastMatrixRescaleNew(Int4 alphabet_size, Int4 query_length, Uint1Ptr query,  Nlm_FloatHiPtr standardProb, Int4Ptr *matrix, Int4Ptr *private_matrix, BLAST_KarlinBlkPtr *kbp_std, BLAST_KarlinBlkPtr *kbp_psi, BLAST_KarlinBlkPtr *kbp_gap_std, BLAST_KarlinBlkPtr *kbp_gap_psi, Nlm_FloatHi lambda_ideal,  Nlm_FloatHi K_ideal)
3138 
3139 {
3140         BlastMatrixRescalePtr matrix_rescale;
3141 
3142         matrix_rescale = (BlastMatrixRescalePtr) MemNew(sizeof(BlastMatrixRescale));
3143 
3144         matrix_rescale->alphabet_size = alphabet_size;
3145         matrix_rescale->query_length = query_length;
3146         matrix_rescale->query = query;
3147         matrix_rescale->standardProb = standardProb;
3148         matrix_rescale->matrix = matrix;
3149         matrix_rescale->private_matrix = private_matrix;
3150         matrix_rescale->kbp_std = kbp_std;
3151         matrix_rescale->kbp_psi = kbp_psi;
3152         matrix_rescale->kbp_gap_std = kbp_gap_std;
3153         matrix_rescale->kbp_gap_psi = kbp_gap_psi;
3154         matrix_rescale->lambda_ideal = lambda_ideal;
3155         matrix_rescale->K_ideal = K_ideal;
3156 
3157         return matrix_rescale;
3158 }
3159 
3160 /*
3161         BlastSeqLocCount counts the number of SeqLoc's 
3162 */
3163 static Int4 
3164 BlastSeqLocCount (SeqLocPtr mask_slp)
3165 
3166 {
3167         Int4 index=0;
3168         SeqLocPtr slp;
3169        
3170         while (mask_slp)
3171         {
3172                 slp=NULL;
3173                 while((slp = SeqLocFindNext(mask_slp, slp))!=NULL)
3174                 {
3175                         index++;
3176                 }
3177                 mask_slp = mask_slp->next;
3178         }
3179 
3180         return index;
3181 }
3182 
3183 static Boolean 
3184 BlastIntervalSort (BlastDoubleInt4Ptr *link_ptr, Int4 link_value, Int4 total, int (LIBCALLBACK *callback )PROTO ((Nlm_VoidPtr, Int4 start, Int4 stop)), VoidPtr ptr)
3185 
3186 {
3187         Boolean do_callback, start=TRUE;
3188         Int4 index, start_pos, stop_pos, largest_stop_pos;
3189 
3190         index=0;
3191         while (index < total)
3192         {
3193                 if (start == TRUE)
3194                 {
3195                         start_pos = link_ptr[index]->gi;
3196                         start = FALSE;
3197                         largest_stop_pos = 0;
3198                 }
3199                 else
3200                 {
3201                         /* Keep track of largest stop position. */
3202                         largest_stop_pos = MAX(largest_stop_pos, link_ptr[index]->ordinal_id);
3203                         do_callback = FALSE;
3204                         if (index == total-1)   /* Last one. */
3205                         {
3206                                 stop_pos = link_ptr[index]->ordinal_id;
3207                                 start = TRUE;
3208                                 do_callback = TRUE;
3209                         }
3210                         else if (largest_stop_pos+link_value < link_ptr[index+1]->gi)
3211                         { /* Check overlap with next one. */
3212                                 stop_pos = link_ptr[index]->ordinal_id;
3213                                 start = TRUE;
3214                                 do_callback = TRUE;
3215                         }
3216                         
3217                         if (do_callback)
3218                         {
3219                                 callback(ptr, start_pos, MAX(largest_stop_pos, stop_pos));
3220                         }
3221                         index++;
3222                 }
3223         }
3224 
3225         return TRUE;
3226 }
3227 
3228 static int LIBCALLBACK
3229 list_ptr_compare(VoidPtr v1, VoidPtr v2)
3230 
3231 {
3232         BlastDoubleInt4Ptr h1, h2;
3233         BlastDoubleInt4Ptr *hp1, *hp2;
3234 
3235         hp1 = (BlastDoubleInt4Ptr PNTR) v1;
3236         hp2 = (BlastDoubleInt4Ptr PNTR) v2;
3237         h1 = *hp1;
3238         h2 = *hp2;
3239 
3240         if (h1->gi < h2->gi)
3241                 return -1;
3242         if (h1->gi > h2->gi)
3243                 return 1;
3244 
3245         return 0;
3246 }
3247 
3248 int LIBCALLBACK
3249 slp_callback(VoidPtr ptr, Int4 start, Int4 stop)
3250 
3251 {
3252         ValNodePtr *vnpp;
3253 
3254 
3255         vnpp = (ValNodePtr PNTR) ptr;   
3256 
3257         ValNodeAddInt(vnpp, 0, start);
3258         ValNodeAddInt(vnpp, 1, stop);
3259 
3260         return 1;
3261 }
3262 
3263 
3264 ValNodePtr
3265 BlastSeqLocFillDoubleInt (SeqLocPtr mask_slp, Int4 max_length, Boolean reverse)
3266 
3267 {
3268         return BlastSeqLocFillDoubleIntEx(mask_slp, 0, max_length, reverse, 0);
3269 
3270 }
3271 
3272 /*
3273         SeqLocPtr mask_slp: location to be masked
3274         Int4 full_query_length: length of the query, not just portion being blasted.
3275         Int4 max_length: length of query being blasted.
3276         Boolean reverse: do minus strand.
3277         Int4 offset: offset in full query to portion being blasted.
3278 
3279         the coordinates reported here are in terms of the portion of the query
3280         being blasted for use by the function that makes up words.
3281 */
3282 
3283 ValNodePtr
3284 BlastSeqLocFillDoubleIntEx (SeqLocPtr mask_slp, Int4 full_query_length, Int4 max_length, Boolean reverse, Int4 offset)
3285 
3286 {
3287         Int4 count, index, start, stop;
3288         BlastDoubleInt4Ptr list_pri, *list_ptr_pri;
3289         SeqLocPtr slp;
3290         ValNodePtr vnp;
3291 
3292         vnp = NULL;
3293         if (mask_slp == NULL)
3294         {
3295                 ValNodeAddInt(&vnp, 1, -1);
3296                 ValNodeAddInt(&vnp, 0, max_length);
3297                 return vnp;
3298         }
3299 
3300         /* use length of segment if full_query_length no set. */
3301         if (full_query_length == 0)
3302                 full_query_length = max_length;
3303 
3304         count = BlastSeqLocCount (mask_slp);
3305         list_pri = (BlastDoubleInt4Ptr) MemNew(count*sizeof(BlastDoubleInt4)); 
3306         list_ptr_pri = (BlastDoubleInt4Ptr PNTR) MemNew(count*sizeof(BlastDoubleInt4Ptr)); 
3307 
3308         index=0;
3309         while (mask_slp)
3310         {
3311                 slp=NULL;
3312                 while((slp = SeqLocFindNext(mask_slp, slp))!=NULL)
3313                 {
3314                         if (reverse)
3315                         {
3316                                 start = full_query_length - 1 - SeqLocStop(slp);
3317                                 stop = full_query_length - 1 - SeqLocStart(slp);
3318                         }
3319                         else
3320                         {
3321                                 start = SeqLocStart(slp);
3322                                 stop = SeqLocStop(slp);
3323                         }
3324 
3325                         list_pri[index].gi = start - offset;
3326                         list_pri[index].ordinal_id = stop -offset;
3327                         list_ptr_pri[index] = &(list_pri[index]);
3328                         index++;
3329                 }
3330                 mask_slp = mask_slp->next;
3331         }
3332 
3333         HeapSort(list_ptr_pri, count, sizeof(BlastHitRangePtr PNTR), list_ptr_compare);
3334 
3335         /* Allows the proper start. */
3336         ValNodeAddInt(&vnp, 1, -1);
3337         BlastIntervalSort(list_ptr_pri, 0, count, slp_callback, (VoidPtr) &vnp);
3338 
3339         list_pri = MemFree(list_pri);
3340         list_ptr_pri = MemFree(list_ptr_pri);
3341 
3342         return vnp;
3343 }
3344 
3345 ValNodePtr
3346 BlastSeqLocFillDoubleIntRev (ValNodePtr location, SeqLocPtr mask_slp, Int4 max_length, Int4 full_query_length, Int4 offset)
3347 
3348 {
3349    Int4 count, index, start, stop;
3350    BlastDoubleInt4Ptr list_pri, *list_ptr_pri;
3351    SeqLocPtr slp;
3352    ValNodePtr vnp;
3353    
3354    vnp = location;
3355    if (mask_slp == NULL) {
3356       if (!vnp) {
3357          ValNodeAddInt(&vnp, 1, -1);
3358          ValNodeAddInt(&vnp, 0, max_length);
3359       }
3360       return vnp;
3361    }
3362 
3363    /* use length of segment if full_query_length no set. */
3364    if (full_query_length == 0)
3365         full_query_length = max_length;
3366 
3367    
3368    count = BlastSeqLocCount (mask_slp);
3369    list_pri = (BlastDoubleInt4Ptr) MemNew(count*sizeof(BlastDoubleInt4)); 
3370    list_ptr_pri = (BlastDoubleInt4Ptr PNTR) MemNew(count*sizeof(BlastDoubleInt4Ptr)); 
3371    
3372    index=0;
3373    while (mask_slp) {
3374       slp=NULL;
3375       while((slp = SeqLocFindNext(mask_slp, slp))!=NULL) {
3376          start = full_query_length + max_length - SeqLocStop(slp);
3377          stop = full_query_length + max_length - SeqLocStart(slp);
3378          
3379          list_pri[index].gi = start - offset;
3380          list_pri[index].ordinal_id = stop - offset;
3381          list_ptr_pri[index] = &(list_pri[index]);
3382          index++;
3383       }
3384       mask_slp = mask_slp->next;
3385    }
3386    
3387    HeapSort(list_ptr_pri, count, sizeof(BlastHitRangePtr PNTR), list_ptr_compare);
3388    BlastIntervalSort(list_ptr_pri, 0, count, slp_callback, (VoidPtr) &vnp);
3389    
3390         list_pri = MemFree(list_pri);
3391         list_ptr_pri = MemFree(list_ptr_pri);
3392 
3393    return vnp;
3394 }
3395 
3396 
3397 
3398 static Boolean
3399 small(BLASTResultHspPtr a, BLASTResultHspPtr b)
3400 {
3401   if (a->e_value > b->e_value) return TRUE;
3402   if (a->e_value < b->e_value) return FALSE;
3403   if (a->score > b->score) return FALSE;
3404   if (a->score < b->score) return TRUE;
3405   if (a->point_back->subject_id > b->point_back->subject_id) return FALSE;
3406   return TRUE;
3407 }
3408 
3409 
3410 /*
3411         this is some sort of HeapSort (or it makes the heap
3412         as it is used in HeapSort).
3413 */
3414 static void
3415 BlastHeapify(BLASTHeapPtr which_heap, Int4 position)
3416 {
3417   Int4 heap_size, index, lim, left_son, small_son;
3418   BLASTResultHspPtr tmp, PNTR heap;
3419 
3420   heap_size = which_heap->num_in_heap;
3421   heap = which_heap->heap;
3422   index = position; lim = heap_size/2;
3423 
3424   while (index < lim) {
3425     left_son = 2*index + 1;
3426     if (left_son == heap_size-1)
3427       small_son = left_son;
3428     else {
3429       if (small(heap[left_son],heap[left_son+1])) small_son = left_son;
3430       else small_son = left_son+1;
3431     }
3432 /* If heap[small_son] is less significant than heap[index], then
3433    switch them.  Otherwise exit the loop. 
3434 */
3435     if (small(heap[small_son], heap[index])) {
3436       tmp = heap[index];
3437       heap[index] = heap[small_son];
3438       heap[small_son] = tmp;
3439       index = small_son;
3440     } else
3441       break;
3442   }
3443 }
3444 
3445 /*
3446         Insert a new element into the BLASTHeapPtr list, which
3447         appears to be ordered (here) from least to most significant.
3448         the order does not seem to be strictly from least to
3449         most significant, or is it?
3450 */
3451 static void
3452 BlastInsertHeap(BLASTHeapPtr which_heap, BLASTResultHspPtr hp)
3453 {
3454   Int4 heap_size, index, father;
3455   BLASTResultHspPtr PNTR heap;
3456 
3457   hp->point_back->num_ref+= 1;
3458   /* Increase heap size by one for new entry. */
3459   heap_size = (which_heap->num_in_heap+=1);
3460   heap = which_heap->heap;
3461   index = heap_size-1; 
3462  
3463   while (index > 0) {
3464     father = (index-1)/2;
3465     /* If heap[father] is LESS significant than hp, then exit loop. */
3466     if (small(heap[father], hp)) {
3467       break;
3468     }
3469     /* The 1st time this is called heap[index] is NULL as it's one past
3470         the last filled in element.  The idea here is obviously to
3471         move the more significant elements to the end, but don't
3472         they get out of order then?  
3473     */
3474     heap[index] = heap[father];
3475     index = father;
3476   }
3477   /* Fill in new element. */
3478   heap[index] = hp;
3479 } 
3480 
3481 Int4
3482 BlastDeleteHeap(BLASTHeapPtr which_heap, Int4 position)
3483 {
3484   Int4 last, return_value;
3485   BLASTResultHspPtr PNTR heap;
3486 
3487   last = (which_heap->num_in_heap-=1);
3488   heap = which_heap->heap;
3489   return_value = (heap[position]->point_back->num_ref -= 1);
3490   if (position != last) {
3491     heap[position] = heap[last];
3492     BlastHeapify(which_heap, position);
3493   }
3494   return return_value;
3495 }
3496 
3497 static void
3498 BlastInsertWholeHeap(BlastSearchBlkPtr search, BLASTHeapPtr which_heap, Int4 cutvalue)
3499 {
3500   BLASTHeapPtr hp;
3501   Int4 i;
3502 
3503   hp = (BLASTHeapPtr) MemNew(sizeof(BLASTHeapStruct));
3504   hp->heap = (BLASTResultHspPtr PNTR) MemNew(sizeof(BLASTResultHspPtr)*search->pbp->hsp_range_max);
3505   hp->next = which_heap;
3506   hp->num_of_ref = 0;
3507   hp->cutvalue = cutvalue;
3508   if (which_heap->prev) {
3509     hp->prev = which_heap->prev;
3510     which_heap->prev->next = hp;
3511   } else {
3512     hp->prev = NULL;
3513     search->result_struct->heap_ptr = hp;
3514   }
3515   which_heap->prev = hp;
3516   hp->num_in_heap = which_heap->num_in_heap;
3517   for (i = 0; i < which_heap->num_in_heap; i++) {
3518     hp->heap[i] = which_heap->heap[i];
3519     which_heap->heap[i]->point_back->num_ref +=1;
3520   }
3521 }
3522 
3523 static void
3524 BlastDeleteWholeHeap(BlastSearchBlkPtr search, BLASTHeapPtr which_heap)
3525 {
3526   Int4 i;
3527   if ((which_heap->num_of_ref -= 1) >0) return;
3528   if (which_heap->next) which_heap->next->prev = which_heap->prev;
3529   if (which_heap->prev) which_heap->prev->next = which_heap->next;
3530   else search->result_struct->heap_ptr = which_heap->next;
3531   for (i = 0; i < which_heap->num_in_heap; i++) {
3532      which_heap->heap[i]->point_back->num_ref -= 1;
3533   }
3534   which_heap->heap = MemFree(which_heap->heap);
3535   which_heap = MemFree(which_heap);
3536 }
3537 
3538 static Int2
3539 BlastPossibleDeleteWholeHeap(BlastSearchBlkPtr search, BLASTHeapPtr PNTR hhp,  BLASTResultHspPtr heap0)
3540      /* if the deleted hit result a remove of the right end point, 
3541         return 1 to make the program rerun 
3542         */
3543 {
3544   BLASTHeapPtr hp = *hhp;
3545 
3546   if (heap0 == NULL || hp == NULL)
3547         return 0;
3548 
3549   if (heap0->back_left && hp->prev && heap0->back_left == hp->prev) {
3550     heap0->back_left = NULL;
3551     if ((hp->prev->num_of_ref -= 1) == 0) {
3552       BlastDeleteWholeHeap(search, hp->prev);
3553     }
3554   }
3555   if (heap0->back_right == hp) {
3556     heap0->back_right = NULL;
3557     if ((hp->num_of_ref -= 1)==0) {
3558       *hhp = hp->next;    
3559       BlastDeleteWholeHeap(search, hp);
3560       *hhp = (*hhp)->prev;
3561       return 1;
3562     }
3563   }
3564   return 0;
3565 }
3566 
3567 
3568 
3569 Int2
3570 BlastInsertList2Heap(BlastSearchBlkPtr search, BLASTResultHitlistPtr result_hitlist)
3571 {
3572     BLASTResultHspPtr PNTR heap, hsp;
3573     BLASTResultHspPtr hsp_array;
3574     Int4 index, hsp_range_max;
3575     Int4 begin, end, hspcnt, query_length;
3576     Int2 context;
3577     Boolean hsp_deleted, new_inserted;
3578     BLASTHeapPtr hp;
3579 
3580     if (search->pbp->perform_culling == FALSE || 
3581         search->pbp->mb_params)  /* Culling is turned off. */
3582         return 3;
3583 
3584     hsp_deleted = new_inserted  = FALSE;  
3585     hsp_range_max = search->pbp->hsp_range_max;
3586     hspcnt = result_hitlist->hspcnt;
3587     hsp_array = result_hitlist->hsp_array;
3588 
3589     if (search->prog_number==blast_type_blastn)
3590        query_length = search->query_context_offsets[1] - 1;
3591 
3592     for (index = 0; index < hspcnt; index++) {
3593       hsp = &hsp_array[index];
3594       if (search->prog_number==blast_type_blastn) {
3595          context = hsp->query_offset / query_length;
3596          if (context == 0) {
3597             begin = hsp->query_offset;
3598             end = (hsp->query_offset+hsp->query_length-1);
3599          } else {
3600             end = 2*query_length - hsp->query_offset;
3601             begin = end - hsp->query_length + 1;
3602          }
3603       } else {
3604          begin = hsp->query_offset;
3605          end = (hsp->query_offset+hsp->query_length-1);
3606       }
3607       for (hp = search->result_struct->heap_ptr; hp; hp = hp->next) 
3608         if (hp->cutvalue >= begin) break;
3609       if (hp && (hp->num_in_heap < hsp_range_max || small(hp->heap[0], hsp))) {
3610         if (!hp->prev || hp->prev->cutvalue != begin-1) {
3611           BlastInsertWholeHeap(search, hp, begin-1);
3612         }
3613         hp->prev->num_of_ref +=1;
3614         hsp->back_left = hp->prev;
3615       } else hsp->back_left = NULL;
3616       for (; hp; hp = hp->next) {
3617         if (end <= hp->cutvalue) break;
3618         heap = hp->heap;
3619         if (hp->num_in_heap >= hsp_range_max) {
3620           if (small(heap[0], hsp)) {
3621             if (BlastPossibleDeleteWholeHeap(search, &hp, heap[0])) continue;
3622             if ((heap[0]->point_back->num_ref-=1)==0) 
3623               hsp_deleted = TRUE;
3624             heap = hp->heap;
3625             heap[0] = hsp;
3626             hsp->point_back->num_ref +=1;
3627             BlastHeapify(hp, 0);
3628             new_inserted = TRUE;
3629           } 
3630         } else {
3631           BlastInsertHeap(hp, hsp);
3632           new_inserted = TRUE;
3633         } 
3634       }
3635       hp = search->result_struct->heap_ptr;
3636       if (hp && (hp->num_in_heap < hsp_range_max || small(hp->heap[0], hsp))) {
3637         if (end != hp->cutvalue) {
3638           BlastInsertWholeHeap(search, hp, end);
3639           hp = hp->prev;
3640         } 
3641         hp->num_of_ref += 1;
3642         heap = hp->heap;
3643         if (hp->num_in_heap >= hsp_range_max) {
3644             BlastPossibleDeleteWholeHeap(search, &hp, heap[0]);
3645             if ((heap[0]->point_back->num_ref-=1)==0) 
3646               hsp_deleted = TRUE;
3647             heap = hp->heap;
3648             heap[0] = hsp;
3649             hsp->point_back->num_ref+=1;
3650             BlastHeapify(hp, 0);
3651             new_inserted = TRUE; 
3652         } else {
3653           BlastInsertHeap(hp, hsp);
3654           new_inserted = TRUE;
3655         }
3656         hsp->back_right = hp;
3657       } else hsp->back_right = NULL;
3658     }
3659     if (hsp_deleted) return 1;
3660     if (new_inserted) return 2;
3661     return 0;
3662 }
3663 
3664 void
3665 BlastFreeHeap(BlastSearchBlkPtr search, BLASTResultHitlistPtr result_hitlist)
3666 { 
3667     BLASTResultHspPtr PNTR heap, hsp;
3668     BLASTResultHspPtr hsp_array;
3669     Int4 index;
3670     Int4 begin, end, i, hspcnt, query_length;
3671     Int2 context;
3672     BLASTHeapPtr hp;
3673 
3674     if (search->pbp->perform_culling == FALSE || 
3675         search->pbp->mb_params)  /* Culling is turned off. */
3676         return;
3677 
3678     hspcnt = result_hitlist->hspcnt;
3679     hsp_array = result_hitlist->hsp_array;
3680 
3681     if (search->prog_number==blast_type_blastn)
3682        query_length = search->query_context_offsets[1] - 1;
3683     
3684     for (index = 0; index < hspcnt; index++) {
3685       hsp = &hsp_array[index];
3686       if (search->prog_number==blast_type_blastn) {
3687          context = hsp->query_offset / query_length;
3688          if (context == 0) {
3689             begin = hsp->query_offset;
3690             end = (hsp->query_offset+hsp->query_length-1);
3691          } else {
3692             end = 2*query_length - hsp->query_offset;
3693             begin = end - hsp->query_length + 1;
3694          }
3695       } else {
3696          begin = hsp->query_offset;
3697          end = (hsp->query_offset+hsp->query_length-1);
3698       }
3699       if (hsp->back_left) {
3700         hp = hsp->back_left->next;
3701         if (BlastPossibleDeleteWholeHeap(search, &hp, hsp)) continue;
3702       } else {
3703         for (hp = search->result_struct->heap_ptr; hp; hp = hp->next) {
3704           if (hp->cutvalue >= begin) break;
3705         }
3706       }
3707       for (; hp; hp = hp->next) {
3708         if (hp->cutvalue > end ) break;
3709         heap = hp->heap;
3710         for (i = 0; i < hp->num_in_heap; i++) {
3711           if (heap[i] == hsp) {
3712             BlastDeleteHeap(hp, i);
3713             break;
3714           }
3715         }
3716       }
3717       if (hp)
3718          hp = hp->prev;
3719       BlastPossibleDeleteWholeHeap(search, &hp, hsp);
3720     }
3721 }
3722 
3723 /*converts a 1-level list of SeqAligns to a 2-level
3724 list store in a ValNodePtr; the list lastSeqAligns
3725 specifies where to break up seqAlignList */
3726 ValNodePtr convertSeqAlignListToValNodeList(SeqAlignPtr seqAlignList, SeqAlignPtr * lastSeqAligns, Int4 numLastSeqAligns)
3727 {
3728     ValNodePtr  returnValNodePtr, thisValNodePtr, nextValNodePtr;
3729     SeqAlignPtr thisSeqAlign;
3730     Int4 lastAlignIndex;
3731     Int4 lastNonEmptyAlignIndex;
3732     
3733     if(seqAlignList == NULL)
3734         return NULL;
3735     
3736     lastAlignIndex = 0;
3737     lastNonEmptyAlignIndex = 0;
3738     returnValNodePtr = (ValNodePtr) MemNew (sizeof(ValNode));
3739     if (NULL == lastSeqAligns[lastAlignIndex]) 
3740         returnValNodePtr->data.ptrvalue = NULL;
3741     else
3742         returnValNodePtr->data.ptrvalue = seqAlignList;
3743     returnValNodePtr->next = NULL;
3744     thisValNodePtr = returnValNodePtr;
3745     thisSeqAlign = seqAlignList;
3746     
3747     /*Find first sub-list that is non-NULL and set it
3748       to point to seqAlignList*/
3749     while ((lastAlignIndex < (numLastSeqAligns -1) &&
3750             (NULL == lastSeqAligns[lastAlignIndex]))) {
3751         nextValNodePtr = (ValNodePtr) MemNew (sizeof(ValNode));
3752         if (NULL == lastSeqAligns[lastAlignIndex+1]) 
3753             nextValNodePtr->data.ptrvalue = NULL;
3754         else {
3755             nextValNodePtr->data.ptrvalue = seqAlignList;
3756             lastNonEmptyAlignIndex = lastAlignIndex +1;
3757         }
3758         thisValNodePtr->next = nextValNodePtr;
3759         nextValNodePtr->next = NULL;
3760         thisValNodePtr = nextValNodePtr;
3761         lastAlignIndex++;
3762     }
3763     
3764     while ((NULL != thisSeqAlign) || (lastAlignIndex < (numLastSeqAligns -1))) { 
3765         if ((lastAlignIndex < (numLastSeqAligns -1)) &&
3766             ((NULL == lastSeqAligns[lastAlignIndex])  ||
3767              (thisSeqAlign == lastSeqAligns[lastAlignIndex]))) {
3768             nextValNodePtr = (ValNodePtr) MemNew (sizeof(ValNode));
3769             
3770             if (NULL != lastSeqAligns[lastAlignIndex+1]) {
3771                 nextValNodePtr->data.ptrvalue = thisSeqAlign->next;
3772                 thisSeqAlign = thisSeqAlign->next;
3773                 lastSeqAligns[lastNonEmptyAlignIndex]->next = NULL;
3774                 lastNonEmptyAlignIndex = lastAlignIndex +1;
3775             } else {
3776                 nextValNodePtr->data.ptrvalue = NULL;
3777             }
3778             thisValNodePtr->next = nextValNodePtr;
3779             nextValNodePtr->next = NULL;
3780             thisValNodePtr = nextValNodePtr;
3781             lastAlignIndex++;
3782         } else {
3783             if (NULL != thisSeqAlign)
3784                 thisSeqAlign = thisSeqAlign->next;
3785         }
3786     }
3787     return (returnValNodePtr);
3788 }
3789 
3790 /*converts a 2-level list of SeqAligns stored as a ValNodePtr */
3791  
3792 SeqAlignPtr 
3793 convertValNodeListToSeqAlignList(ValNodePtr seqAlignDoubleList, 
3794                                  SeqAlignPtr ** lastSeqAligns, 
3795                                  Int4 * numLastSeqAligns)
3796 {
3797     ValNodePtr thisValNodePtr, secondValNodePtr;
3798     SeqAlignPtr returnSeqAlign, thisSeqAlign;
3799     Int4 numValNodePtrs, indexValNodePtrs;
3800 
3801     
3802     thisValNodePtr = seqAlignDoubleList;
3803     numValNodePtrs = 0;
3804 
3805     while (NULL != thisValNodePtr) {
3806         numValNodePtrs++;
3807         thisValNodePtr = thisValNodePtr->next;
3808     }
3809 
3810     if (seqAlignDoubleList == NULL || 
3811         seqAlignDoubleList->data.ptrvalue == NULL) {
3812         *numLastSeqAligns = 0;
3813         *lastSeqAligns = NULL;
3814         return NULL;
3815     } else {
3816         indexValNodePtrs = 0;
3817         *numLastSeqAligns = numValNodePtrs;
3818         *lastSeqAligns = (SeqAlignPtr *) MemNew (numValNodePtrs * sizeof(SeqAlignPtr));
3819         returnSeqAlign = seqAlignDoubleList->data.ptrvalue;
3820         thisValNodePtr = seqAlignDoubleList;
3821         do {
3822           thisSeqAlign = thisValNodePtr->data.ptrvalue;
3823           if (thisSeqAlign != NULL) {
3824             while (thisSeqAlign->next != NULL) 
3825               thisSeqAlign = thisSeqAlign->next;
3826           }
3827           (*lastSeqAligns)[indexValNodePtrs] = thisSeqAlign;
3828           indexValNodePtrs++;
3829           /*find next non-empty list to link in*/
3830           if (thisSeqAlign != NULL) {
3831             secondValNodePtr = thisValNodePtr->next;
3832             while (NULL != secondValNodePtr) {
3833               if (NULL != secondValNodePtr->data.ptrvalue)
3834               {
3835                 thisSeqAlign->next = secondValNodePtr->data.ptrvalue;
3836                 break;
3837               }
3838               else
3839                 secondValNodePtr = secondValNodePtr->next;
3840             }
3841             /*if all remaning ValNodePtrs have NULL seqAligns, finish off
3842               list of seqAligns with a NULL*/
3843             if (NULL == secondValNodePtr)
3844               thisSeqAlign->next = NULL;
3845           }
3846           thisValNodePtr = thisValNodePtr->next;
3847         } while (NULL != thisValNodePtr);
3848         return returnSeqAlign;
3849     }
3850 }
3851 
3852 void LIBCALL
3853 fillCandLambda(seedSearchItems * seedSearch, Char *matrixName, BLAST_OptionsBlkPtr options)
3854 {
3855   if (0 == StringCmp("BLOSUM62", matrixName)) {
3856     seedSearch->paramC = 0.50;
3857     if ((11 == options->gap_open) && (1 == options->gap_extend)) {
3858       seedSearch->paramLambda = 0.270;
3859       seedSearch->paramK = 0.047;
3860       return;
3861     }
3862     if ((9 == options->gap_open) && (2 == options->gap_extend)) {
3863       seedSearch->paramLambda = 0.285;
3864       seedSearch->paramK = 0.075;
3865       return;
3866     }
3867     if ((8 == options->gap_open) && (2 == options->gap_extend)) {
3868       seedSearch->paramLambda = 0.265;
3869       seedSearch->paramK = 0.046;
3870       return;
3871     }
3872     if ((7 == options->gap_open) && (2 == options->gap_extend)) {
3873       seedSearch->paramLambda = 0.243;
3874       seedSearch->paramK = 0.032;
3875       return;
3876     }
3877     if ((12 == options->gap_open) && (1 == options->gap_extend)) {
3878       seedSearch->paramLambda = 0.281;
3879       seedSearch->paramK = 0.057;
3880       return;
3881     }
3882     if ((10 == options->gap_open) && (1 == options->gap_extend)) {
3883       seedSearch->paramLambda = 0.250;
3884       seedSearch->paramK = 0.033;
3885       return;
3886     }
3887     ErrPostEx(SEV_FATAL, 1, 0, "The combination %d for gap opening cost and %d for gap extension is not supported in PHI-BLAST with matrix %s\n", options->gap_open, options->gap_extend, matrixName);
3888   }
3889   else {
3890     if (0 == StringCmp("PAM30", matrixName)) { 
3891       seedSearch->paramC = 0.30;
3892       if ((9 == options->gap_open) && (1 == options->gap_extend)) {
3893         seedSearch->paramLambda = 0.295;
3894         seedSearch->paramK = 0.13;
3895         return;
3896       }
3897       if ((7 == options->gap_open) && (2 == options->gap_extend)) {
3898         seedSearch->paramLambda = 0.306;
3899         seedSearch->paramK = 0.15;
3900         return;
3901       }
3902       if ((6 == options->gap_open) && (2 == options->gap_extend)) {
3903         seedSearch->paramLambda = 0.292;
3904         seedSearch->paramK = 0.13;
3905         return;
3906       }
3907       if ((5 == options->gap_open) && (2 == options->gap_extend)) {
3908         seedSearch->paramLambda = 0.263;
3909         seedSearch->paramK = 0.077;
3910         return;
3911       }
3912       if ((10 == options->gap_open) && (1 == options->gap_extend)) {
3913         seedSearch->paramLambda = 0.309;
3914         seedSearch->paramK = 0.15;
3915         return;
3916       }
3917       if ((8 == options->gap_open) && (1 == options->gap_extend)) {
3918         seedSearch->paramLambda = 0.270;
3919         seedSearch->paramK = 0.070;
3920         return;
3921       }
3922       ErrPostEx(SEV_FATAL, 1, 0, "The combination %d for gap opening cost and %d for gap extension is not supported in PHI-BLAST with matrix %s\n", options->gap_open, options->gap_extend, matrixName);
3923     }
3924     else {
3925       if (0 == StringCmp("PAM70", matrixName)) { 
3926         seedSearch->paramC = 0.35;
3927         if ((10 == options->gap_open) && (1 == options->gap_extend)) {
3928           seedSearch->paramLambda = 0.291;
3929           seedSearch->paramK = 0.089;
3930           return;
3931         }
3932         if ((8 == options->gap_open) && (2 == options->gap_extend)) {
3933           seedSearch->paramLambda = 0.303;
3934           seedSearch->paramK = 0.13;
3935           return;
3936         }
3937         if ((7 == options->gap_open) && (2 == options->gap_extend)) {
3938           seedSearch->paramLambda = 0.287;
3939           seedSearch->paramK = 0.095;
3940           return;
3941         }
3942         if ((6 == options->gap_open) && (2 == options->gap_extend)) {
3943           seedSearch->paramLambda = 0.269;
3944           seedSearch->paramK = 0.079;
3945           return;
3946         }
3947         if ((11 == options->gap_open) && (1 == options->gap_extend)) {
3948           seedSearch->paramLambda = 0.307;
3949           seedSearch->paramK = 0.13;
3950           return;
3951         }
3952         if ((9 == options->gap_open) && (1 == options->gap_extend)) {
3953           seedSearch->paramLambda = 0.269;
3954           seedSearch->paramK = 0.058;
3955           return;
3956         }
3957         ErrPostEx(SEV_FATAL, 1, 0, "The combination %d for gap opening cost and %d for gap extension is not supported in PHI-BLAST with matrix %s\n", options->gap_open, options->gap_extend, matrixName);
3958       }
3959       else {
3960         if (0 == StringCmp("BLOSUM80", matrixName)) { 
3961           seedSearch->paramC = 0.40;
3962           if ((10 == options->gap_open) && (1 == options->gap_extend)) {
3963             seedSearch->paramLambda = 0.300;
3964             seedSearch->paramK = 0.072;
3965             return;
3966           }
3967           if ((8 == options->gap_open) && (2 == options->gap_extend)) {
3968             seedSearch->paramLambda = 0.308;
3969             seedSearch->paramK = 0.089;
3970             return;
3971           }
3972           if ((7 == options->gap_open) && (2 == options->gap_extend)) {
3973             seedSearch->paramLambda = 0.295;
3974             seedSearch->paramK = 0.077;
3975             return;
3976           }
3977           if ((6 == options->gap_open) && (2 == options->gap_extend)) {
3978             seedSearch->paramLambda = 0.271;
3979             seedSearch->paramK = 0.051;
3980             return;
3981           }
3982           if ((11 == options->gap_open) && (1 == options->gap_extend)) {
3983             seedSearch->paramLambda = 0.314;
3984             seedSearch->paramK = 0.096;
3985             return;
3986           }
3987           if ((9 == options->gap_open) && (1 == options->gap_extend)) {
3988             seedSearch->paramLambda = 0.277;
3989             seedSearch->paramK = 0.046;
3990             return;
3991           }
3992           ErrPostEx(SEV_FATAL, 1, 0, "The combination %d for gap opening cost and %d for gap extension is not supported in PHI-BLAST with matrix %s\n", options->gap_open, options->gap_extend, matrixName);
3993         }
3994         else {
3995           if (0 == StringCmp("BLOSUM45", matrixName)) { 
3996             seedSearch->paramC = 0.60;
3997             if ((14 == options->gap_open) && (2 == options->gap_extend)) {
3998               seedSearch->paramLambda = 0.199;
3999               seedSearch->paramK = 0.040;
4000               return;
4001             }
4002             if ((13 == options->gap_open) && (3 == options->gap_extend)) {
4003               seedSearch->paramLambda = 0.209;
4004               seedSearch->paramK = 0.057;
4005               return;
4006             }
4007             if ((12 == options->gap_open) && (3 == options->gap_extend)) {
4008               seedSearch->paramLambda = 0.203;
4009               seedSearch->paramK = 0.049;
4010               return;
4011             }
4012             if ((11 == options->gap_open) && (3 == options->gap_extend)) {
4013               seedSearch->paramLambda = 0.193;
4014               seedSearch->paramK = 0.037;
4015               return;
4016             }
4017             if ((10 == options->gap_open) && (3 == options->gap_extend)) {
4018               seedSearch->paramLambda = 0.182;
4019               seedSearch->paramK = 0.029;
4020               return;
4021             }
4022             if ((15 == options->gap_open) && (2 == options->gap_extend)) {
4023               seedSearch->paramLambda = 0.206;
4024               seedSearch->paramK = 0.049;
4025               return;
4026             }
4027             if ((13 == options->gap_open) && (2 == options->gap_extend)) {
4028               seedSearch->paramLambda = 0.190;
4029               seedSearch->paramK = 0.032;
4030               return;
4031             }
4032             if ((12 == options->gap_open) && (2 == options->gap_extend)) {
4033               seedSearch->paramLambda = 0.177;
4034               seedSearch->paramK = 0.023;
4035               return;
4036             }
4037             if ((19 == options->gap_open) && (1 == options->gap_extend)) {
4038               seedSearch->paramLambda = 0.209;
4039               seedSearch->paramK = 0.049;
4040               return;
4041             }
4042             if ((18 == options->gap_open) && (1 == options->gap_extend)) {
4043               seedSearch->paramLambda = 0.202;
4044               seedSearch->paramK = 0.041;
4045               return;
4046             }
4047             if ((17 == options->gap_open) && (1 == options->gap_extend)) {
4048               seedSearch->paramLambda = 0.195;
4049               seedSearch->paramK = 0.034;
4050               return;
4051             }
4052             if ((16 == options->gap_open) && (1 == options->gap_extend)) {
4053               seedSearch->paramLambda = 0.183;
4054               seedSearch->paramK = 0.024;
4055               return;
4056             }
4057             ErrPostEx(SEV_FATAL, 1, 0, "The combination %d for gap opening cost and %d for gap extension is not supported in PHI-BLAST with matrix %s\n", options->gap_open, options->gap_extend, matrixName);
4058 
4059           }
4060           else {
4061             ErrPostEx(SEV_FATAL, 1, 0, "Matrix %s not allowed in PHI-BLAST\n", matrixName);
4062           }
4063         }
4064       }
4065     }
4066   }
4067 }
4068 
4069 /* 
4070         Used by BlastParseInputString to get the 'index' of the
4071         option and to check it's validity.
4072 */
4073 Int4 BlastGetLetterIndex(CharPtr letters, Char ch)
4074 {
4075     Int4 index;
4076 
4077     for(index = 0; letters[index] != NULLB; index++) {
4078         if (letters[index] == ch) {
4079             return index;
4080         }
4081     }
4082     return -1;
4083 }
4084 
4085 /*
4086         Parses a string of input options.
4087         For use in the Web page and filtering options. 
4088         Not for use in command-line programs - use GetArgs.
4089 */
4090 
4091 Boolean BlastParseInputString(CharPtr string, 
4092         CharPtr letters, 
4093         CharPtr PNTR *values_in,
4094         CharPtr PNTR ErrorMessage)
4095 {
4096     CharPtr chptr;
4097     Int4 i, index = 0, max_par_num;
4098     Char option[1024];
4099     CharPtr PNTR values;
4100     Char message[1024];
4101 
4102     if(string == NULL || letters == NULL || 
4103             *letters == '\0' || values_in == NULL) {
4104         return FALSE;
4105     }
4106 
4107     max_par_num = StringLen(letters);
4108 
4109     values = (CharPtr PNTR)MemNew(max_par_num * sizeof(CharPtr));
4110     *values_in = values;
4111 
4112     chptr = string;
4113 
4114     while(1) {
4115         while(IS_WHITESP(*chptr)) /* Rolling spaces */
4116             chptr++;
4117 
4118         if(*chptr == NULLB)   /* Check for NULLB */
4119             break;
4120 
4121         if (*chptr != '-') {   /* Check for the option sign */
4122             sprintf(message, "Invalid input string started from \"%s\"", 
4123                     chptr);
4124             if (ErrorMessage)
4125                 *ErrorMessage = StringSave(message);
4126             return FALSE;
4127         } else {
4128             chptr++;
4129         }
4130 
4131         /* checking index in options table */
4132 
4133         if((index = BlastGetLetterIndex(letters, *chptr)) < 0) {
4134             sprintf(message, "Character \'%c\' is not a valid option", 
4135                     *chptr);
4136             if (ErrorMessage)
4137                 *ErrorMessage = StringSave(message);
4138             return FALSE;
4139         }
4140 
4141         if(*chptr == NULLB)   /* Check for NULLB */
4142             break;
4143 
4144         chptr++;
4145 
4146         while(IS_WHITESP(*chptr)) /* Rolling spaces */
4147             chptr++;
4148 
4149         if(*chptr == NULLB)   /* Check for NULLB */
4150             break;
4151 
4152         /* If option is in double quotes, get the entire string between the
4153            quotes */
4154         if (*chptr == '\'') {
4155            for (i=0; *++chptr != NULLB && *chptr != '\''; i++)
4156               option[i] = *chptr;
4157            if (*chptr == '\'')
4158               chptr++;
4159         } else {
4160            for(i=0; !IS_WHITESP(*chptr) && *chptr != NULLB; i++, chptr++)
4161               option[i] = *chptr;
4162         }
4163         option[i] = NULLB;
4164 
4165         MemFree(values[index]);
4166         values[index] = StringSave(option);
4167     }
4168 
4169     return TRUE;
4170 }
4171 
4172 CharPtr BLASTGetDatabaseTitleFromSeqAnnot(SeqAnnotPtr seqannot)
4173 {
4174     ValNodePtr vnp;
4175     
4176     UserObjectPtr uop;
4177     UserFieldPtr ufp;
4178     ObjectIdPtr oip;
4179     
4180     for(vnp = seqannot->desc; vnp != NULL; vnp = vnp->next) {
4181         
4182         if (vnp->choice == Annot_descr_user) {
4183             uop = (UserObjectPtr) vnp->data.ptrvalue;
4184             oip = uop->type;
4185             if(!StringICmp(oip->str, "BLAST database title")) {
4186                 ufp = uop->data;
4187                 oip = ufp->label;
4188                 return oip->str;
4189             }
4190         }
4191     }
4192     return NULL;
4193 }
4194 
4195 
4196 void BLASTAddBlastDBTitleToSeqAnnotEx(SeqAnnotPtr seqannot, CharPtr title, Boolean is_na)
4197 {
4198     UserObjectPtr uop;
4199     UserFieldPtr ufp;
4200     ObjectIdPtr oip;
4201     
4202     uop = UserObjectNew();
4203     oip = ObjectIdNew();
4204     uop->type = oip;
4205     oip->str = StringSave("BLAST database title");
4206     
4207     ufp = UserFieldNew();
4208     oip = ObjectIdNew();
4209     ufp->label = oip;
4210     uop->data = ufp;
4211     
4212     oip->str = StringSave(title);
4213     ufp->choice = 4;
4214     ufp->data.boolvalue = is_na; /* Nucleotide if TRUE. */
4215     
4216     ValNodeAddPointer(&(seqannot->desc), Annot_descr_user, (Pointer)uop);
4217 }
4218 
4219 /* BLASTAddBlastDBTitleToSeqAnnotEx is preferred if the type of database is known. */
4220 
4221 void BLASTAddBlastDBTitleToSeqAnnot(SeqAnnotPtr seqannot, CharPtr title)
4222 {
4223         BLASTAddBlastDBTitleToSeqAnnotEx(seqannot, title, TRUE);
4224         return;
4225 }
4226 
4227 /* pos is assumed to be the address of a chracter in the array seq
4228    if so, copy from pos backwards to the start of seq
4229    into target.
4230    return the number of characters copied*/
4231 Int4 reverse_seq(Uint1 *seq, Uint1 *pos, Uint1 *target) 
4232 {
4233     Uint1 *c; /*index over addresses of characters in seq*/
4234     Int4 numCopied; /*number of characters copied*/
4235 
4236     for (c = pos, numCopied = 0; c >= seq; c--, numCopied++) 
4237         *target++ = *c;
4238     *target = '\0';
4239     return numCopied;
4240 }
4241 
4242 static void FilterAlignFree(FilterAlignPtr fap)
4243 {
4244     HspFree(fap->hsp);
4245     
4246     if(fap->sap != NULL)
4247         SeqAlignSetFree(fap->sap);
4248     
4249     MemFree(fap);
4250 
4251     return;
4252 }
4253 
4254 static void SappSetFree(SappSetPtr ssp)
4255 {
4256     Int4 i;
4257     FilterAlignPtr fap;
4258     
4259     for(i = 0; i < ssp->count; i++) {
4260         if ((fap = ssp->fapp[i]) != NULL) {
4261             FilterAlignFree(fap);
4262         }
4263     }
4264 
4265     MemFree(ssp->fapp);
4266     MemFree(ssp);
4267 
4268     return;
4269 }
4270 static int LIBCALLBACK SappArrayScoreCmp(VoidPtr v1, VoidPtr v2)
4271 {
4272     FilterAlignPtr f1, f2;
4273     
4274     f1 = (*(FilterAlignPtr PNTR) v1);
4275     f2 = (*(FilterAlignPtr PNTR) v2);
4276 
4277     if (f1->hsp->score < f2->hsp->score)
4278         return 1;
4279     else if (f1->hsp->score > f2->hsp->score)
4280         return -1;
4281 
4282     if (f1->hsp->evalue > f2->hsp->evalue)
4283         return 1;
4284     else if (f1->hsp->evalue < f2->hsp->evalue)
4285         return -1;
4286     
4287     else return 0;
4288 }
4289 
4290 static Boolean BLASTSortSapArray(SappSetPtr ssp)
4291 {
4292 
4293     HeapSort((VoidPtr)ssp->fapp, ssp->count, sizeof(FilterAlignPtr), 
4294              SappArrayScoreCmp);
4295 
4296     return TRUE;
4297 }
4298 
4299 static Boolean BLASTTestHSPPosition(HspPtr hsp1, HspPtr hsp2, Int4 pct)
4300 {
4301     Boolean from_is_inside, to_is_inside;
4302     Int4 length;
4303     Nlm_FloatHi pct_overlap = 0.0;
4304 
4305     if(hsp1 == NULL || hsp2 == NULL)
4306         return TRUE;
4307     
4308     from_is_inside = FALSE;
4309     to_is_inside   = FALSE;
4310 
4311     if((SIGN(hsp1->query_frame) != SIGN(hsp2->query_frame)) || 
4312        (SIGN(hsp1->hit_frame) != SIGN(hsp2->hit_frame)))
4313         return TRUE;
4314 
4315 
4316     if(SIGN(hsp1->query_frame) == SIGN(hsp1->hit_frame)) {
4317 
4318         if(CONTAINED_IN_HSP(hsp1->query_from, hsp1->query_to, hsp2->query_from,hsp1->hit_from, hsp1->hit_to, hsp2->hit_from)) {
4319             from_is_inside = TRUE;
4320         }
4321         
4322         if(CONTAINED_IN_HSP(hsp1->query_from, hsp1->query_to, hsp2->query_from, hsp1->hit_from, hsp1->hit_to, hsp2->hit_from)) {
4323             to_is_inside = TRUE;
4324         }
4325     } else {
4326         if(CONTAINED_IN_HSP(hsp1->query_to, hsp1->query_from, hsp2->query_from, hsp1->hit_from, hsp1->hit_to, hsp2->hit_from)) {
4327             from_is_inside = TRUE;
4328         }
4329 
4330         if(CONTAINED_IN_HSP(hsp1->query_to, hsp1->query_from, hsp2->query_from, hsp1->hit_from, hsp1->hit_to, hsp2->hit_from)) {
4331             to_is_inside = TRUE;
4332         }
4333         
4334     }
4335 
4336     if(from_is_inside && to_is_inside)
4337         return FALSE;
4338 
4339     length = abs(hsp2->query_to -  hsp2->query_from);
4340     
4341     if(from_is_inside) {
4342         pct_overlap = (length - (MIN(abs(hsp2->query_to - hsp1->query_from), abs(hsp2->query_to - hsp1->query_to))))*100.0/length;
4343         
4344     } else if (to_is_inside) {
4345         
4346         pct_overlap = (length - (MIN(abs(hsp2->query_from - hsp1->query_from), abs(hsp2->query_from - hsp1->query_to))))*100.0/length;
4347         
4348     }
4349 
4350     if(pct_overlap > pct)
4351         return FALSE;
4352     
4353     return TRUE;
4354 }
4355 static SeqAlignPtr BLASTFilterSapArray(SappSetPtr ssp,  Int4 pct, 
4356                                        SeqAlignPtr PNTR next_tail)
4357 {
4358     Int4 i, j;
4359     HspPtr hsp1, hsp2;
4360     SeqAlignPtr sap_head;
4361 
4362     if(ssp == NULL || ssp->count == 0)
4363         return NULL;
4364 
4365     
4366     for(i = 0; i < ssp->count; i++) {
4367         
4368         if(ssp->fapp[i] == NULL)
4369             continue;
4370         
4371         hsp1 = ssp->fapp[i]->hsp;
4372         
4373         for(j = 0; j < i; j++) {
4374             
4375             if(ssp->fapp[j] == NULL)
4376                 continue;
4377             
4378             hsp2 = ssp->fapp[j]->hsp;
4379             
4380             if(!BLASTTestHSPPosition(hsp1, hsp2, pct)) { 
4381                 /* FALSE mean hsp2 to be removed */
4382                 if(ssp->fapp[j] != NULL) {
4383                     FilterAlignFree(ssp->fapp[j]);    
4384                     ssp->fapp[j] = NULL;
4385                 }
4386                 break;
4387             }
4388         }
4389     }
4390 
4391     sap_head   = NULL;
4392     *next_tail = NULL;
4393     
4394     for(i = 0; i < ssp->count; i++) {
4395         if(ssp->fapp[i] != NULL) {
4396             if(sap_head == NULL) {
4397                 sap_head = ssp->fapp[i]->sap;
4398                 *next_tail = sap_head;
4399             } else {
4400                 (*next_tail)->next = ssp->fapp[i]->sap;
4401                 *next_tail = (*next_tail)->next;
4402             }
4403             ssp->fapp[i]->sap = NULL;
4404         }
4405     }
4406     
4407     return sap_head;
4408 }
4409 
4410 static SappSetPtr BLASTCreateSappArray(SeqAlignPtr sap_in, 
4411                                        Boolean subject_is_aa, 
4412                                        Boolean is_ooframe)
4413 {
4414     Int4 count;
4415     SeqAlignPtr sap;
4416     SappSetPtr ssp;
4417 
4418     if(sap_in == NULL)
4419         return NULL;
4420     
4421     for(count = 0, sap = sap_in; sap != NULL; sap = sap->next)
4422         count++;
4423     
4424     ssp = (SappSetPtr) MemNew(sizeof(SappSet));
4425     
4426     ssp->count = count;
4427     
4428     ssp->fapp = (FilterAlignPtr PNTR) MemNew(count*sizeof(FilterAlignPtr));
4429     
4430     for(count = 0, sap = sap_in; sap != NULL; count++) {
4431         ssp->fapp[count] = MemNew(sizeof(FilterAlign));
4432         ssp->fapp[count]->sap = sap;
4433         SeqIdWrite(TxGetSubjectIdFromSeqAlign(sap), 
4434                    ssp->fapp[count]->subject_sip, 
4435                    PRINTID_FASTA_LONG,
4436                    sizeof(ssp->fapp[count]->subject_sip));
4437         ssp->fapp[count]->hsp = BXMLGetHspFromSeqAlign(sap, subject_is_aa, 0,
4438                                                        is_ooframe, NULL);
4439 
4440         sap = sap->next;
4441         ssp->fapp[count]->sap->next = NULL; /* Single SAP */
4442     }
4443 
4444     return ssp;
4445 }
4446 
4447 static SappSetPtr BLASTGetNextSapArray(SappSetPtr ssp_in, Int4 pct)
4448 {
4449     CharPtr current_sip;
4450     Int4 count, i;
4451     SappSetPtr ssp;
4452 
4453     /* First - calculating number of SeqAligns for the next sap array */
4454     current_sip = NULL;
4455     count = 0;
4456     for(i = 0; i < ssp_in->count; i++) {
4457         
4458         if (ssp_in->fapp[i] != NULL) {
4459             if(current_sip == NULL) {
4460                 current_sip = ssp_in->fapp[i]->subject_sip;
4461                 count++;
4462                 continue;
4463             }
4464             
4465             if(!StringCmp(current_sip, ssp_in->fapp[i]->subject_sip))
4466                 count++;
4467         }
4468     }
4469 
4470     if(count == 0)
4471         return NULL;
4472 
4473     ssp = (SappSetPtr) MemNew(sizeof(SappSet));
4474     ssp->count = count;
4475     ssp->fapp = (FilterAlignPtr PNTR) MemNew(sizeof(FilterAlignPtr) * ssp->count);
4476 
4477     for(i = 0, count = 0; i < ssp_in->count && count < ssp->count; i++) {
4478         if (ssp_in->fapp[i] != NULL) {
4479             if(!StringCmp(current_sip, ssp_in->fapp[i]->subject_sip)) {
4480                 ssp->fapp[count] = ssp_in->fapp[i];
4481                 ssp_in->fapp[i] = NULL;
4482                 count++;
4483             }
4484         }
4485     }
4486     return ssp;
4487 }
4488         
4489 /* -----------------------------------------------------------------
4490    This function will filter given SeqAlignPtr for overlaping
4491    regions for the same query/subject pair. Another input parameter is
4492    percentage of overlapping required for the alignment to be removed
4493    from SeqAlignPtr
4494    ---------------------------------------------------------------- */
4495 
4496 SeqAlignPtr BLASTFilterOverlapRegions(SeqAlignPtr sap, Int4 pct, 
4497                                       Boolean subject_is_aa, 
4498                                       Boolean is_ooframe,
4499                                       Boolean sort_array)
4500 {
4501     SeqAlignPtr seqalign, head, tail=NULL, next_tail;
4502     SappSetPtr  ssp, ssp_tmp;
4503     
4504     if(sap == NULL)
4505         return NULL;
4506     
4507     head = NULL;
4508     
4509     ssp = BLASTCreateSappArray(sap, subject_is_aa, is_ooframe);
4510     
4511     /* Generally speaking SeqAlignPtr coming from Blast engine should be
4512        correctly sorted - if not sorting below may be used */
4513     
4514     if(sort_array)
4515         BLASTSortSapArray(ssp);
4516     
4517     while(TRUE) {
4518         
4519         if((ssp_tmp = BLASTGetNextSapArray(ssp, pct)) == NULL)
4520             break;
4521         
4522         BLASTSortSapArray(ssp_tmp);
4523         
4524         seqalign = BLASTFilterSapArray(ssp_tmp, pct, &next_tail);
4525 
4526         SappSetFree(ssp_tmp);
4527         
4528         if(head == NULL) {
4529             head = seqalign;
4530         } else {
4531             tail->next = seqalign;
4532         }
4533         
4534         tail = next_tail;
4535     }
4536     
4537     SappSetFree(ssp);
4538     
4539     return head;
4540 }
4541 
4542 Int4
4543 BlastGetNumIdentical(Uint1Ptr query, Uint1Ptr subject, Int4 q_start, 
4544                      Int4 s_start, Int4 length, Boolean reverse)
4545 {
4546    Int4 i, ident = 0;
4547    Uint1Ptr q, s;
4548 
4549    q = &query[q_start];
4550    if (!reverse) 
4551       s = &subject[s_start];
4552    else
4553       s = &subject[s_start + length - 1];
4554 
4555    for (i=0; i<length; i++) {
4556       if (*q == *s)
4557          ident++;
4558       q++;
4559       if (!reverse)
4560          s++;
4561       else
4562          s--;
4563    }
4564 
4565    return ident;
4566 }
4567 
4568 
4569 int LIBCALLBACK BlastPrintAlignInfo(VoidPtr srch)
4570 {
4571    BlastSearchBlkPtr search = (BlastSearchBlkPtr) srch;
4572    BLAST_HSPPtr hsp, PNTR hsp_array; 
4573    Int4 i;
4574    CharPtr title, ptr;
4575    Char query_buffer[BUFFER_LENGTH+1], subject_buffer[BUFFER_LENGTH+1];
4576    SeqIdPtr sip, subject_id, query_id; 
4577    Int4 index, score;
4578    Int4 num_mismatches, num_gap_opens, align_length, num_ident;
4579    Uint1Ptr query_seq, subject_start=NULL, subject_seq, rev_subject=NULL;
4580    FloatHi perc_ident, bit_score, evalue;
4581    Char bit_score_buff[10];
4582    CharPtr eval_buff = NULL;
4583    Int4 length=0, query_length, subject_length=0, rev_subject_length=0;
4584    Int4 q_start, q_end, s_start, s_end, q_shift=0, s_shift=0;
4585    CharPtr subject_descr = NULL;
4586    Int4 cutoff_s;
4587    Boolean is_na, is_translated, db_is_na;
4588    Int4 hspcnt, number, numseg;
4589    SeqAlignPtr seqalign = NULL, sap;
4590    FILE *fp = (FILE *)search->output;
4591    BioseqPtr bsp = NULL, query_bsp;
4592    SeqPortPtr spp;
4593    SeqLocPtr subject_slp = NULL;
4594    Uint1 residue;
4595    AlignSumPtr asp = NULL;
4596 
4597    if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) {
4598       search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
4599       return 0;
4600    }
4601 
4602    hspcnt = search->current_hitlist->hspcnt;
4603    is_na = (search->prog_number == blast_type_blastn);
4604    is_translated = (search->prog_number != blast_type_blastn &&
4605                     search->prog_number != blast_type_blastp);
4606 
4607    subject_slp = search->query_slp->next;
4608    /* Only for the two sequences case, get offset shift if subject 
4609       is a subsequence */
4610    if (!search->rdfp && subject_slp)
4611        s_shift = SeqLocStart(subject_slp);
4612    /* Check if query is a subsequence */
4613    q_shift = SeqLocStart(search->query_slp);
4614 
4615    if (is_na)
4616       cutoff_s = search->pbp->cutoff_s;
4617    else
4618       cutoff_s = 0;
4619   
4620    db_is_na = (search->prog_number != blast_type_blastp && 
4621                search->prog_number != blast_type_tblastn);
4622 
4623    if (search->rdfp) {
4624       ReadDBBioseqFetchEnable("blastall", search->rdfp->filename, 
4625                               db_is_na, TRUE);
4626       readdb_get_descriptor(search->rdfp, search->subject_id, &sip,
4627                             &subject_descr);
4628    } else
4629       sip = SeqIdSetDup(search->subject_info->sip);
4630 
4631    if (search->prog_number == blast_type_tblastn) {
4632       if (subject_slp) {
4633          spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbi4na);
4634          subject_length = SeqLocLen(subject_slp);
4635       } else {
4636          if (search->rdfp)
4637             bsp = readdb_get_bioseq(search->rdfp, search->subject_id);
4638          else
4639             bsp = BioseqLockById(sip);
4640          spp = SeqPortNew(bsp, 0, -1, Seq_strand_plus, Seq_code_ncbi4na);
4641          subject_length = bsp->length;
4642       }
4643       /* make one longer to "protect" ALIGN. */
4644       subject_start = subject_seq = 
4645          MemNew((1+subject_length)*sizeof(Uint1));
4646       for (index=0; index<subject_length; index++) {
4647          subject_seq[index] = SeqPortGetResidue(spp);
4648       }
4649       /* Gap character in last space. */
4650       subject_seq[subject_length] = NULLB;
4651       spp = SeqPortFree(spp);
4652       
4653       if (subject_slp && !bsp) {
4654          Change_Loc_Strand(subject_slp, Seq_strand_minus);
4655          spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbi4na);
4656          Change_Loc_Strand(subject_slp, Seq_strand_both);
4657       } else if (bsp)
4658          spp = SeqPortNew(bsp, 0, -1, Seq_strand_minus, Seq_code_ncbi4na);
4659 
4660       /* make one longer to "protect" ALIGN. */
4661       rev_subject = MemNew((1+subject_length)*sizeof(Uint1));
4662       for (index=0; index<subject_length; index++)
4663          rev_subject[index] = SeqPortGetResidue(spp);
4664       /* Gap character in last space. */
4665       rev_subject[subject_length] = NULLB;
4666       rev_subject_length = subject_length;
4667       spp = SeqPortFree(spp);
4668       if (search->rdfp)
4669          bsp = BioseqFree(bsp);
4670       else if (bsp) 
4671          BioseqUnlock(bsp);
4672    } else if (search->rdfp) {
4673       if (is_na) {
4674          subject_length = 
4675             readdb_get_sequence_ex(search->rdfp, search->subject_id, 
4676                                    &subject_start, &length, TRUE);
4677          subject_seq = subject_start + 1;
4678       } else {
4679          subject_length = 
4680             readdb_get_sequence(search->rdfp, search->subject_id, 
4681                                 &subject_seq);
4682       }
4683    } else if (search->prog_number == blast_type_blastn) {
4684       bsp = BioseqLockById(sip);
4685       if (subject_slp) {
4686          spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbi4na);
4687          subject_length = SeqLocLen(subject_slp);
4688       } else {
4689          spp = SeqPortNew(bsp, 0, -1, Seq_strand_plus, Seq_code_ncbi4na);
4690          subject_length = bsp->length;
4691       }
4692       if (bsp->repr == Seq_repr_delta) 
4693          SeqPortSet_do_virtual(spp, TRUE);
4694       /* make one longer to "protect" ALIGN. */
4695       subject_start = MemNew((2+subject_length)*sizeof(Uint1));
4696       subject_start[0] = ncbi4na_to_blastna[0];
4697       subject_seq = subject_start+1;
4698       index=0;
4699       while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
4700          if (IS_residue(residue))
4701             subject_seq[index++] = ncbi4na_to_blastna[residue];
4702       }
4703       /* Gap character in last space. */
4704       subject_seq[index] = ncbi4na_to_blastna[0];
4705       spp = SeqPortFree(spp);
4706       BioseqUnlock(bsp);
4707    } else {
4708       if (subject_slp) {
4709          subject_length = SeqLocLen(subject_slp);
4710          spp = SeqPortNewByLoc(subject_slp, Seq_code_ncbistdaa);
4711       } else {
4712          bsp = BioseqLockById(sip);
4713          subject_length = bsp->length;
4714          spp = SeqPortNew(bsp, 0, -1, Seq_strand_unknown, 
4715                           Seq_code_ncbistdaa);
4716          BioseqUnlock(bsp);
4717       }
4718       subject_start = (Uint1Ptr) MemNew(((subject_length)+2)*sizeof(Uint1));
4719       /* The first residue is the sentinel. */
4720       subject_start[0] = NULLB;
4721       subject_seq = subject_start+1;
4722       index = 0;
4723       while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
4724          if (IS_residue(residue))
4725             subject_seq[index++] = residue;
4726       }
4727       subject_seq[index] = NULLB;
4728       spp = SeqPortFree(spp);
4729    }
4730 
4731    if (sip->choice != SEQID_GENERAL ||
4732        StringCmp(((DbtagPtr)sip->data.ptrvalue)->db, "BL_ORD_ID")) {
4733       ptr = (CharPtr) Malloc(BUFFER_LENGTH+1);
4734       SeqIdWrite(sip, ptr, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4735       if (StringNCmp(ptr, "lcl|", 4))
4736          StringCpy(subject_buffer, ptr);
4737       else
4738          StringCpy(subject_buffer, ptr+4);
4739       ptr = MemFree(ptr);
4740    } else {
4741       ptr = StringTokMT(subject_descr, " \t", &subject_descr);
4742       subject_descr = ptr;
4743       StringCpy(subject_buffer, ptr);
4744    }
4745 
4746    query_id = search->query_id;
4747    if (is_na) 
4748       query_length = search->query_context_offsets[search->first_context+1] -
4749          search->query_context_offsets[search->first_context] - 1;
4750    else
4751       query_length = search->context[0].query->length;
4752 
4753    query_bsp = BioseqLockById(query_id);
4754    if (query_id->choice == SEQID_LOCAL) {
4755       title = BioseqGetTitle(query_bsp);
4756       if (title) {
4757          ptr = StringTokMT(title, " ", &title);
4758          StringCpy(query_buffer, ptr);
4759       } else {
4760          ptr = (CharPtr) Malloc(BUFFER_LENGTH+1);
4761          SeqIdWrite(query_bsp->id, ptr, PRINTID_FASTA_LONG, 
4762                     BUFFER_LENGTH);
4763          if (StringNCmp(ptr, "lcl|", 4))
4764             StringCpy(query_buffer, ptr);
4765          else
4766             StringCpy(query_buffer, ptr+4);
4767          ptr = MemFree(ptr);
4768          
4769       }  
4770    } else {
4771       SeqIdWrite(query_bsp->id, query_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4772    }
4773    BioseqUnlock(query_bsp);
4774    
4775    hsp_array = search->current_hitlist->hsp_array;
4776 
4777    subject_id = GetTheSeqAlignID(sip);
4778    sip = SeqIdSetFree(sip);
4779    if (search->prog_number != blast_type_tblastx && 
4780        search->pbp->gapped_calculation)
4781       RealBlastGetGappedAlignmentTraceback(search, subject_seq, subject_length, rev_subject, rev_subject_length, subject_id, hsp_array, hspcnt, &seqalign, NULL, cutoff_s, FALSE, search->subject_id, TRUE);
4782    else {
4783       SeqIdPtr new_sip, gi_list=NULL;
4784       StdSegPtr ssp;
4785 
4786       gi_list = BlastGetAllowedGis(search, search->subject_id, &new_sip);
4787       sip = SeqIdDup(query_id);
4788       if (new_sip)
4789          sip->next = new_sip;
4790       else
4791          sip->next = subject_id;
4792       for (index=0; index<hspcnt; index++) {
4793          hsp = hsp_array[index];
4794          ssp = BLASTHspToStdSeg(search, subject_length, hsp, sip, 
4795                                 FALSE, gi_list);
4796          ssp->ids = SeqIdSetDup(sip);
4797          if (!seqalign)
4798             sap = seqalign = SeqAlignNew();
4799          else {
4800             sap->next = SeqAlignNew();
4801             sap = sap->next;
4802          }
4803          sap->type = 2;
4804          sap->segtype = 3;
4805          sap->segs = ssp;
4806       }
4807       SeqIdFree(sip);
4808    }
4809 
4810    subject_id = SeqIdSetFree(subject_id);
4811 
4812    if (is_translated || !search->pbp->gapped_calculation) {
4813       asp = MemNew(sizeof(AlignSum));
4814       asp->matrix = NULL;
4815       asp->matrix = load_default_matrix();
4816       asp->is_aa = !is_na;
4817       if (is_translated)
4818          AdjustOffSetsInSeqAlign(seqalign, search->query_slp, subject_slp);
4819    }
4820 
4821    /* Evalue buffer is dynamically allocated to avoid compiler warnings 
4822       in calls to ScoreAndEvalueToBuffers. */
4823    eval_buff = Malloc(10);
4824 
4825    /* Now print the tab-delimited fields, using seqalign */
4826    for (sap = seqalign; sap; sap = sap->next) {
4827       perc_ident = 0;
4828       align_length = 0;
4829       num_gap_opens = 0;
4830       num_mismatches = 0;
4831 
4832       GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
4833 
4834       /* Do not allow knocking off digit in evalue buffer, so parsers are 
4835          not confused. */
4836       ScoreAndEvalueToBuffers(bit_score, evalue, 
4837                               bit_score_buff, &eval_buff, 0);
4838 
4839       query_seq = search->context[search->first_context].query->sequence;
4840 
4841       if (!is_translated && search->pbp->gapped_calculation) {
4842          DenseSegPtr dsp = (DenseSegPtr) sap->segs;
4843          Boolean reverse = (dsp->strands[0] != dsp->strands[1]);
4844          numseg = dsp->numseg;
4845          
4846          for (i=0; i<numseg; i++) {
4847             align_length += dsp->lens[i];
4848             if (dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1) {
4849                if (reverse) {
4850                   q_start = 
4851                      query_length - dsp->starts[2*i] - dsp->lens[i] + 1;
4852                } else {
4853                   q_start = dsp->starts[2*i];
4854                }
4855                num_ident = BlastGetNumIdentical(query_seq, subject_seq, 
4856                   q_start, dsp->starts[2*i+1], dsp->lens[i], FALSE);
4857                perc_ident += num_ident;
4858                num_mismatches += dsp->lens[i] - num_ident;
4859             } else
4860                num_gap_opens++;
4861          }
4862          perc_ident = perc_ident / align_length * 100;
4863          
4864          /* Adjust offsets for (concatenated) minus strand */
4865          if (is_na && dsp->starts[0] >= query_length) {
4866             q_start = 2*query_length - dsp->starts[2*numseg-2] - 
4867                dsp->lens[numseg-1] + 2;
4868             q_end = 2*query_length - dsp->starts[0] + 1;
4869             s_start = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
4870             s_end = dsp->starts[1]+1;
4871          } else if (reverse) {
4872             q_start = dsp->starts[0];
4873             q_end = dsp->starts[2*numseg-2] + dsp->lens[numseg-1] - 1;
4874             s_end = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
4875             s_start = dsp->starts[1]+1;
4876          } else {
4877             q_start = dsp->starts[0] + 1;
4878             q_end = dsp->starts[2*numseg-2] + dsp->lens[numseg-1];
4879             s_start = dsp->starts[1] + 1;
4880             s_end = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
4881          }
4882       } else {
4883          StdSegPtr ssp = (StdSegPtr) sap->segs;
4884          
4885          find_score_in_align(sap, 1, asp);
4886 
4887          if (asp->m_frame < 0)
4888             q_start = SeqLocStop(ssp->loc) + 1;
4889          else
4890             q_start = SeqLocStart(ssp->loc) + 1;
4891 
4892          if (asp->t_frame < 0)
4893             s_start = SeqLocStop(ssp->loc->next) + 1;
4894          else
4895             s_start = SeqLocStart(ssp->loc->next) + 1;
4896          
4897          for (index=1; ssp->next; index++)
4898             ssp = ssp->next;
4899          
4900          if (asp->m_frame < 0)
4901             q_end = SeqLocStart(ssp->loc) + 1;
4902          else
4903             q_end = SeqLocStop(ssp->loc) + 1;
4904 
4905          if (asp->t_frame < 0)
4906             s_end = SeqLocStart(ssp->loc->next) + 1;
4907          else
4908             s_end = SeqLocStop(ssp->loc->next) + 1;
4909 
4910          align_length = asp->totlen;
4911          num_gap_opens = index / 2;
4912          num_mismatches = asp->totlen - asp->gaps - asp->identical;
4913          perc_ident = ((FloatHi) 100*asp->identical)/ (asp->totlen);
4914       }
4915 
4916       if (!is_translated) {
4917          /* Adjust coordinates if query and/or subject is a subsequence */
4918          q_start += q_shift;
4919          q_end += q_shift;
4920          s_start += s_shift;
4921          s_end += s_shift;
4922       }
4923 
4924       if (perc_ident >= 99.995 && perc_ident < 100.00)
4925          perc_ident = 99.99;
4926          
4927       fprintf(fp, 
4928               "%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\n",
4929               query_buffer, subject_buffer, perc_ident, align_length, 
4930               num_mismatches, num_gap_opens, q_start, 
4931               q_end, s_start, s_end, eval_buff, bit_score_buff);
4932    }
4933 
4934    eval_buff = MemFree(eval_buff);
4935 
4936    if (is_translated) {
4937       free_default_matrix(asp->matrix);
4938       MemFree(asp);
4939    }
4940 
4941    if (search->rdfp)
4942       ReadDBBioseqFetchDisable();
4943    
4944    MemFree(subject_start);
4945    MemFree(rev_subject);
4946    SeqAlignSetFree(seqalign);
4947    MemFree(subject_descr);
4948    fflush(fp);
4949    return 0;
4950 }
4951 
4952 static void 
4953 FillSequenceBuffers(Uint1* query_seq, Uint1* subject_seq, 
4954                     Int4* starts, Int4* lengths, Int4 numseg,
4955                     char* query_buffer, char* subject_buffer, 
4956                     Boolean reverse)
4957 {
4958    Int4 index, index1;
4959    const char* blastna_to_iupacna     = "ACGTRYMKWSBDHVN-";
4960    const char* blastna_to_iupacna_rev = "TGCAYRKMSWVHDBN-"; 
4961    Uint1* query_ptr;
4962    Uint1* subject_ptr;
4963    Int4 offset;
4964    char* buffer;
4965 
4966    offset = 0;
4967    if (!reverse) {
4968       for (index = 0; index < numseg; ++index) {
4969          buffer = &query_buffer[offset];
4970          if (starts[2*index] != -1) {
4971             query_ptr = &query_seq[starts[2*index]];
4972             for (index1 = 0; index1 < lengths[index]; ++index1) {
4973                *buffer = blastna_to_iupacna[*query_ptr];
4974                buffer++;
4975                query_ptr++;
4976             }
4977          } else {
4978             memset(buffer, '-', lengths[index]);
4979          }
4980          buffer = &subject_buffer[offset];
4981          if (starts[2*index+1] != -1) {
4982             subject_ptr = &subject_seq[starts[2*index+1]];
4983             for (index1 = 0; index1 < lengths[index]; ++index1) {
4984                *buffer = blastna_to_iupacna[*subject_ptr];
4985                buffer++;
4986                subject_ptr++;
4987             }
4988          } else {
4989             memset(buffer, '-', lengths[index]);
4990          }
4991          offset += lengths[index];
4992       }
4993    } else {
4994       for (index = numseg-1; index >=0; --index) {
4995          buffer = &query_buffer[offset];
4996          if (starts[2*index] != -1) {
4997             query_ptr = &query_seq[starts[2*index]+lengths[index]-1];
4998             for (index1 = 0; index1 < lengths[index]; ++index1) {
4999                *buffer = blastna_to_iupacna_rev[*query_ptr];
5000                buffer++;
5001                query_ptr--;
5002             }
5003          } else {
5004             memset(buffer, '-', lengths[index]);
5005          }
5006          buffer = &subject_buffer[offset];
5007          if (starts[2*index+1] != -1) {
5008             subject_ptr = &subject_seq[starts[2*index+1]+lengths[index]-1];
5009             for (index1 = 0; index1 < lengths[index]; ++index1) {
5010                *buffer = blastna_to_iupacna_rev[*subject_ptr];
5011                buffer++;
5012                subject_ptr--;
5013             }
5014          } else {
5015             memset(buffer, '-', lengths[index]);
5016          }
5017          offset += lengths[index];
5018       }
5019    }
5020 }
5021 
5022 int LIBCALLBACK
5023 MegaBlastPrintAlignInfo(VoidPtr ptr)
5024 {
5025    BlastSearchBlkPtr search = (BlastSearchBlkPtr) ptr;
5026    BLAST_HSPPtr hsp; 
5027    Int4 i, subject_gi;
5028    Int2 context;
5029    CharPtr query_buffer, title;
5030    SeqIdPtr sip, subject_id, query_id; 
5031    Int4 hsp_index;
5032    Int4 num_mismatches, num_gap_opens, align_length, num_ident;
5033    BLAST_KarlinBlkPtr kbp;
5034    Uint1Ptr query_seq, subject_seq = NULL;
5035    FloatHi perc_ident, bit_score;
5036    Char bit_score_buff[10];
5037    CharPtr eval_buff = NULL;
5038    GapXEditScriptPtr esp;
5039    Int4 q_start, q_end, s_start, s_end, query_length, numseg;
5040    Int4 q_off, q_shift = 0, s_off, s_shift = 0;
5041    Int4Ptr length, start;
5042    Uint1Ptr strands;
5043    CharPtr subject_descr=NULL, subject_buffer, buffer;
5044    Boolean numeric_sip_type = FALSE;
5045    FILE *fp = (FILE *) search->output;
5046    CharPtr query_seq_buffer, subject_seq_buffer;
5047    Boolean print_sequences;
5048 
5049    if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0) {
5050       search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
5051       return 0;
5052    }
5053 
5054    print_sequences = (getenv("PRINT_SEQUENCES") != NULL);
5055 
5056    subject_seq = search->subject->sequence_start + 1;
5057 
5058    if (search->rdfp) {
5059       readdb_get_descriptor(search->rdfp, search->subject_id, &sip,
5060                             &subject_descr);
5061    } else 
5062       sip = SeqIdSetDup(search->subject_info->sip);
5063 
5064    if (!search->pbp->mb_params->full_seqids)
5065       subject_id = SeqIdFindBestAccession(sip);
5066    else 
5067       subject_id = sip;
5068    
5069    if (subject_id->choice == SEQID_GI) {
5070       SeqIdPtr new_subject_seqid = NULL;
5071       ValNodePtr gi_list = 
5072          BlastGetAllowedGis(search, search->subject_id, &new_subject_seqid);
5073       
5074       if (gi_list) {
5075          /* change subject's gi with this 'use_this_gi' gi */
5076          subject_id->data.intvalue = gi_list->data.intvalue;
5077       }
5078    }
5079 
5080    if (subject_id->choice != SEQID_GENERAL ||
5081        StringCmp(((DbtagPtr)subject_id->data.ptrvalue)->db, "BL_ORD_ID")) {
5082       if (search->pbp->mb_params->full_seqids) { 
5083          subject_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
5084          SeqIdWrite(subject_id, subject_buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
5085       } else {
5086          numeric_sip_type = 
5087             GetAccessionVersionFromSeqId(subject_id, &subject_gi, 
5088                                          &subject_buffer, TRUE);
5089       }
5090    } else {
5091       subject_buffer = StringTokMT(subject_descr, " \t", &subject_descr);
5092       subject_descr = subject_buffer;
5093    }
5094 
5095    buffer = (CharPtr) Malloc(LARGE_BUFFER_LENGTH);
5096 
5097    /* Only for the two sequences case, get offset shift if subject 
5098       is a subsequence */
5099    if (!search->rdfp && search->query_slp->next)
5100        s_shift = SeqLocStart(search->query_slp->next);
5101    /* Get offset shift if query is a subsequence */
5102    q_shift = SeqLocStart(search->query_slp);
5103 
5104    /* Evalue buffer is dynamically allocated to avoid compiler warnings 
5105       in calls to ScoreAndEvalueToBuffers. */
5106    eval_buff = Malloc(10);
5107 
5108    for (hsp_index=0; hsp_index<search->current_hitlist->hspcnt; hsp_index++) {
5109       hsp = search->current_hitlist->hsp_array[hsp_index];
5110       if (hsp==NULL || (search->pbp->cutoff_e > 0 && 
5111                         hsp->evalue > search->pbp->cutoff_e)) {
5112          continue;
5113       }
5114       context = hsp->context;
5115       query_id = search->qid_array[context/2];
5116 
5117      
5118       if (query_id == NULL) /* Bad hsp, something wrong */
5119          continue; 
5120       hsp->context = context & 1;
5121 
5122       query_length = search->query_context_offsets[context+1] -
5123          search->query_context_offsets[context] - 1;
5124 
5125       query_seq = search->context[context].query->sequence;
5126       kbp = search->sbp->kbp[context];
5127 
5128       /* If traceback hasn't been done yet, do it now */
5129       if (!hsp->gap_info) {
5130          /* This must have already been allocated */
5131          GapAlignBlkPtr gap_align = search->gap_align; 
5132          FloatHi searchsp_eff;
5133          Int4 max_offset, max_start = MAX_DBSEQ_LEN / 2, start_shift;
5134 
5135          /* Set the X-dropoff to the final X dropoff parameter. */
5136          gap_align->x_parameter = search->pbp->gap_x_dropoff_final;
5137          gap_align->query = query_seq;
5138          gap_align->query_length = query_length;
5139          gap_align->subject = subject_seq;
5140          gap_align->subject_length = search->subject->length;
5141          if ((hsp->query.gapped_start == 0 && hsp->subject.gapped_start == 0) ||
5142              CheckStartForGappedAlignment(search, hsp, gap_align->query, gap_align->subject, search->sbp->matrix) == FALSE) {
5143             max_offset = GetStartForGappedAlignment(search, hsp, gap_align->query, gap_align->subject, search->sbp->matrix);
5144             gap_align->q_start = max_offset;
5145             gap_align->s_start = (hsp->subject.offset - hsp->query.offset) + max_offset;
5146             hsp->query.gapped_start = gap_align->q_start;
5147             hsp->subject.gapped_start = gap_align->s_start;
5148          } else {
5149             gap_align->q_start = hsp->query.gapped_start;
5150             gap_align->s_start = hsp->subject.gapped_start;
5151          }
5152             
5153          if (gap_align->s_start > max_start) {
5154             start_shift = (gap_align->s_start / max_start) * max_start;
5155             gap_align->subject = gap_align->subject + start_shift;
5156             
5157             gap_align->s_start %= max_start;
5158          } else
5159             start_shift = 0;
5160          
5161          gap_align->subject_length =
5162             MIN(gap_align->subject_length - start_shift, 
5163                 gap_align->s_start + hsp->subject.length + max_start);
5164          PerformGappedAlignmentWithTraceback(gap_align);
5165          hsp->query.offset = gap_align->query_start;
5166          hsp->subject.offset = gap_align->subject_start + start_shift;
5167          /* The end is one further for BLAST than for the gapped align. */
5168          hsp->query.end = gap_align->query_stop + 1;
5169          hsp->subject.end = gap_align->subject_stop + 1 + start_shift;
5170          if (gap_align->edit_block && start_shift > 0) {
5171             gap_align->edit_block->start2 += start_shift;
5172             gap_align->edit_block->length2 += start_shift;
5173          }
5174          hsp->query.length = hsp->query.end - hsp->query.offset;
5175          hsp->subject.length = hsp->subject.end - hsp->subject.offset;
5176          hsp->score = gap_align->score;
5177          hsp->gap_info = gap_align->edit_block;
5178          searchsp_eff = (FloatHi) search->dblen_eff *
5179             (FloatHi) search->context[hsp->context].query->effective_length;
5180          
5181          hsp->evalue = 
5182             BlastKarlinStoE_simple(hsp->score, kbp, searchsp_eff);
5183          if (hsp->evalue > search->pbp->cutoff_e) {
5184             continue;
5185          }
5186       }
5187 
5188       if (query_id->choice == SEQID_LOCAL && 
5189           search->pbp->mb_params->full_seqids) {
5190          Int4 local_index;
5191          if (GetAccessionVersionFromSeqId(query_id, &local_index,
5192                                           &query_buffer, TRUE)) {
5193             /* If local id is numeric, it is a fake id, so try to retrieve the
5194                real one */
5195             BioseqPtr query_bsp = BioseqLockById(query_id);
5196             title = StringSave(BioseqGetTitle(query_bsp));
5197             if (title)
5198                query_buffer = StringTokMT(title, " ", &title);
5199             else /* Failed to find any string for this id */
5200                query_buffer = StringSave("QUERY");
5201             BioseqUnlock(query_bsp);
5202          }
5203       } else {
5204          query_buffer = (CharPtr) Malloc(BUFFER_LENGTH + 1);
5205          if (!search->pbp->mb_params->full_seqids) {
5206             SeqIdWrite(SeqIdFindBestAccession(query_id), query_buffer,
5207                        PRINTID_TEXTID_ACC_VER, BUFFER_LENGTH);
5208          } else {
5209             SeqIdWrite(query_id, query_buffer, PRINTID_FASTA_LONG,
5210                        BUFFER_LENGTH);
5211          }
5212       }
5213 
5214       bit_score = (hsp->score*kbp->Lambda - kbp->logK) / NCBIMATH_LN2;
5215 
5216       esp = hsp->gap_info->esp;
5217         
5218       for (numseg=0; esp; esp = esp->next, numseg++);
5219 
5220       q_off = hsp->query.offset;
5221       s_off = hsp->subject.offset;
5222 
5223       GXECollectDataForSeqalign(hsp->gap_info, hsp->gap_info->esp, numseg,
5224                                 &start, &length, &strands, 
5225                                 &q_off, &s_off);
5226 
5227       if (start[0] < 0) {
5228          length[0] += start[0];
5229          start[1] -= start[0];
5230          start[0] = 0;
5231       } 
5232       if (start[2*(numseg-1)] + length[numseg-1] > query_length) 
5233          length[numseg-1] = query_length - start[2*(numseg-1)];
5234       
5235       perc_ident = 0;
5236       align_length = 0;
5237       num_gap_opens = 0;
5238       num_mismatches = 0;
5239       for (i=0; i<numseg; i++) {
5240          if (start[2*i] != -1 && start[2*i+1] != -1) {
5241             num_ident = MegaBlastGetNumIdentical(query_seq, subject_seq, 
5242                                                  start[2*i], start[2*i+1], 
5243                                                  length[i], FALSE);
5244             perc_ident += num_ident;
5245             num_mismatches += length[i] - num_ident;
5246          } else {
5247             num_gap_opens++;
5248          }
5249          align_length += length[i];
5250       }
5251       perc_ident = perc_ident / align_length * 100;
5252       /* Avoid printing 100.00 when the hit is not an exact match */
5253       if (perc_ident >= 99.995 && perc_ident < 100.00)
5254          perc_ident = 99.99;
5255 
5256       if (perc_ident < search->pbp->mb_params->perc_identity) {
5257          MemFree(start);
5258          MemFree(length);
5259          MemFree(strands);
5260          MemFree(query_buffer);
5261          continue;
5262       }
5263 
5264       if (print_sequences) {
5265          /* Fill the query and subject sequence buffers */
5266          query_seq_buffer = MemNew((align_length+1));
5267          subject_seq_buffer = MemNew((align_length+1));
5268          FillSequenceBuffers(query_seq, subject_seq, start, length, numseg, 
5269                              query_seq_buffer, subject_seq_buffer, 
5270                              (context%2 == 1));
5271       }
5272 
5273       if (hsp->context) {
5274          hsp->query.end = query_length - hsp->query.offset;
5275          hsp->query.offset = 
5276             hsp->query.end - hsp->query.length;
5277          s_end = hsp->subject.offset + 1;
5278          s_start = hsp->subject.end;
5279       } else {
5280          hsp->query.end = hsp->query.offset + hsp->query.length;
5281          s_start = hsp->subject.offset + 1;
5282          s_end = hsp->subject.offset + hsp->subject.length;
5283       }
5284 
5285       q_start = hsp->query.offset + 1;
5286       q_end = hsp->query.end;
5287          
5288       /* Adjust offsets if query is a subsequence, only for first query */
5289       if (context < 2) {
5290           q_start += q_shift;
5291           q_end += q_shift;
5292       }
5293 
5294       s_start += s_shift;
5295       s_end += s_shift;
5296 
5297       /* Do not allow knocking off digit in evalue buffer, so parsers are 
5298          not confused. */
5299       ScoreAndEvalueToBuffers(bit_score, hsp->evalue, 
5300                               bit_score_buff, &eval_buff, 0);
5301 
5302       if (print_sequences) {
5303          if (numeric_sip_type) {
5304             fprintf(fp, 
5305        "%s\t%ld\t%.2f\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%s\t%s\t%s\t%s\n",
5306                query_buffer, (long) subject_gi, perc_ident, 
5307                (long) align_length, (long) num_mismatches, 
5308                (long) num_gap_opens, (long) q_start, (long) q_end, 
5309                (long) s_start, (long) s_end, eval_buff, bit_score_buff,
5310                query_seq_buffer, subject_seq_buffer);
5311          } else {
5312             fprintf(fp, 
5313         "%s\t%s\t%.2f\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%s\t%s\t%s\t%s\n",
5314                query_buffer, subject_buffer, perc_ident, 
5315                (long) align_length, (long) num_mismatches, 
5316                (long) num_gap_opens, (long) q_start, (long) q_end, 
5317                (long) s_start, (long) s_end, eval_buff, bit_score_buff,
5318                query_seq_buffer, subject_seq_buffer);
5319          }
5320          MemFree(query_seq_buffer);
5321          MemFree(subject_seq_buffer);
5322       } else {
5323          if (numeric_sip_type) {
5324             fprintf(fp, 
5325                "%s\t%ld\t%.2f\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%s\t%s\n",
5326                query_buffer, (long) subject_gi, perc_ident, 
5327                (long) align_length, (long) num_mismatches, 
5328                (long) num_gap_opens, (long) q_start, (long) q_end, 
5329                (long) s_start, (long) s_end, eval_buff, bit_score_buff);
5330          } else {
5331             fprintf(fp, 
5332                "%s\t%s\t%.2f\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%s\t%s\n",
5333                query_buffer, subject_buffer, perc_ident, 
5334                (long) align_length, (long) num_mismatches, 
5335                (long) num_gap_opens, (long) q_start, (long) q_end, 
5336                (long) s_start, (long) s_end, eval_buff, bit_score_buff);
5337          }
5338       }
5339 
5340       MemFree(start);
5341       MemFree(length);
5342       MemFree(strands);
5343       MemFree(query_buffer);
5344    } /* End loop on hsp's */
5345    if (!numeric_sip_type && subject_buffer != subject_descr)
5346       MemFree(subject_buffer);
5347    MemFree(subject_descr);
5348    MemFree(buffer);
5349    MemFree(eval_buff);
5350 
5351    sip = SeqIdSetFree(sip);
5352    fflush(fp);
5353    return 0;
5354 }
5355 
5356 
5357 Boolean FastaCheckDna(CharPtr seq)
5358 {
5359     Uint2 len = 100;
5360     CharPtr ptr = NULL;
5361 
5362     if (*seq == '>') {
5363         for (++seq; *seq != NULLB && *seq != '\n'; seq++) ;
5364     }
5365 
5366     if ((ptr = StrChr(seq, '>')) != NULL)
5367        len = MIN(len, ptr - seq);
5368     else 
5369        len = MIN(len, StringLen(seq));
5370 
5371     return (CheckDnaResidue(seq, len, NULL));
5372 }
5373 
5374 BLASTHSPSegmentPtr BLASTHSPSegmentFromSeqAlign (SeqAlignPtr sap)
5375 {
5376    BLASTHSPSegmentPtr hsp = (BLASTHSPSegmentPtr) 
5377       MemNew(sizeof(BLASTHSPSegment));
5378    Int4 numseg;
5379    DenseDiagPtr ddp;
5380    DenseSegPtr dsp;
5381    StdSegPtr ssp;
5382 
5383    switch(sap->segtype) {
5384    case SAS_DENDIAG:
5385       ddp = (DenseDiagPtr) sap->segs;
5386       if (ddp->strands == NULL || ddp->strands[0] != Seq_strand_minus) {
5387          hsp->q_start = ddp->starts[0] + 1;
5388          hsp->q_end = ddp->starts[0] + ddp->len;
5389       } else {
5390          hsp->q_start = ddp->starts[0] + ddp->len;
5391          hsp->q_end = ddp->starts[0] + 1;
5392       }
5393       if (ddp->strands == NULL || ddp->strands[1] != Seq_strand_minus) {
5394          hsp->s_start = ddp->starts[1] + 1;
5395          hsp->s_end = ddp->starts[1] + ddp->len;
5396       } else {
5397          hsp->s_start = ddp->starts[1] + ddp->len;
5398          hsp->s_end = ddp->starts[1] + 1;
5399       }
5400       break;
5401    case SAS_DENSEG:
5402       dsp = (DenseSegPtr) sap->segs;
5403       numseg = dsp->numseg;
5404       if (dsp->strands == NULL || dsp->strands[0] != Seq_strand_minus) {
5405          hsp->q_start = dsp->starts[0] + 1;
5406          hsp->q_end = dsp->starts[2*numseg-2] + dsp->lens[numseg-1];
5407       } else {
5408          hsp->q_start = dsp->starts[2*numseg-2] + 1;
5409          hsp->q_end = dsp->starts[0] + dsp->lens[0];
5410       }
5411       
5412       if (dsp->strands == NULL || dsp->strands[1] != Seq_strand_minus) {
5413          hsp->s_start = dsp->starts[1] + 1;
5414          hsp->s_end = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
5415       } else {
5416          hsp->s_start = dsp->starts[2*numseg-1] + 1;
5417          hsp->s_end = dsp->starts[1] + dsp->lens[0];
5418       }
5419       break;
5420    case SAS_STD:
5421       ssp = (StdSegPtr) sap->segs;
5422       
5423       if (SeqLocStrand(ssp->loc) != Seq_strand_minus)
5424          hsp->q_start = SeqLocStart(ssp->loc);
5425       else
5426          hsp->q_end = SeqLocStop(ssp->loc);
5427       if (SeqLocStrand(ssp->loc->next) != Seq_strand_minus)
5428          hsp->s_start = SeqLocStart(ssp->loc->next);
5429       else
5430          hsp->s_end = SeqLocStop(ssp->loc->next);
5431 
5432       while (ssp->next)
5433          ssp = ssp->next;
5434 
5435       if (SeqLocStrand(ssp->loc) != Seq_strand_minus)
5436          hsp->q_end = SeqLocStop(ssp->loc);
5437       else
5438          hsp->q_start = SeqLocStart(ssp->loc);
5439       if (SeqLocStrand(ssp->loc->next) != Seq_strand_minus)
5440          hsp->s_end = SeqLocStop(ssp->loc->next);
5441       else
5442          hsp->s_start = SeqLocStart(ssp->loc->next);
5443       break;
5444    default:
5445       break;
5446    }
5447    return hsp;
5448 }
5449 
5450 #define CLUSTER_GOOD_HIT_LTHRESH 50
5451 
5452 SeqAlignPtr BlastClusterHitsFromSeqAlign(SeqAlignPtr seqalign, 
5453                                          CharPtr prog_name, 
5454                                          CharPtr database,
5455                                          BLAST_OptionsBlkPtr options,
5456                                          FloatHi length_thresh,
5457                                          FloatHi score_thresh,
5458                                          FloatHi overlap_thresh,
5459                                          Boolean two_sided)
5460 {
5461    SeqAlignPtr sap, prev_sap, head, next_sap;
5462    SeqAlignPtr PNTR sapp;
5463    SeqIdPtr sip, next_sip;
5464    Int4 index, hspcnt, index1, q_overlap;
5465    ValNodePtr mask;
5466    BLASTHSPSegmentPtr PNTR hit_array;
5467    BLASTHSPSegmentPtr hsp, hsp1;
5468    BioseqPtr PNTR bspp;
5469    BioseqPtr bsp1, bsp2;
5470    BlastSearchBlkPtr search1;
5471    BLASTResultHitlistPtr hitlist;
5472    BLASTResultHspPtr best_hsp;
5473    BLAST_KarlinBlkPtr kbp;
5474    FloatHi bit_score, length_coverage, score_coverage;
5475    Boolean db_is_na;
5476    Int4 root1, root2;
5477    Int4Ptr root;
5478 
5479    for (sap = seqalign, hspcnt=0; sap; sap = sap->next, hspcnt++);
5480       
5481    sapp = (SeqAlignPtr PNTR) Malloc(hspcnt*sizeof(SeqAlignPtr));
5482    bspp = (BioseqPtr PNTR) Malloc(hspcnt*sizeof(BioseqPtr));
5483    hit_array = (BLASTHSPSegmentPtr PNTR) 
5484       Malloc(hspcnt*sizeof(BLASTHSPSegmentPtr));
5485 
5486    db_is_na = TRUE;
5487    if (!StrCmp(prog_name, "blastp") || !StrCmp(prog_name, "blastx"))
5488       db_is_na = FALSE;
5489 
5490    ReadDBBioseqFetchEnable ("blastall", database, db_is_na, TRUE);
5491 
5492    sip = next_sip = NULL;
5493    prev_sap = NULL;
5494    for (sap = seqalign, hspcnt=0; sap; sap = sap->next) {
5495       next_sip = TxGetSubjectIdFromSeqAlign(sap);
5496       if (SeqIdComp(next_sip, sip) != SIC_YES) {
5497          sapp[hspcnt] = sap;
5498          if (prev_sap)
5499             prev_sap->next = NULL;
5500          bspp[hspcnt]= BioseqLockById(next_sip);
5501          hit_array[hspcnt] = BLASTHSPSegmentFromSeqAlign(sap);
5502          hspcnt++;
5503          sip = next_sip;
5504       }         
5505       prev_sap = sap;
5506    }
5507 
5508    root = (Int4Ptr) Malloc(hspcnt*sizeof(Int4));
5509    for (index=0; index<hspcnt; index++)
5510       root[index] = index;
5511 
5512    index = 0;
5513    while (index<hspcnt) {
5514       hsp = hit_array[index];
5515       
5516       bsp1 = bspp[index];
5517       
5518       search1 = 
5519          BlastQuerySequenceSetUp(bsp1, prog_name, options);
5520       root2 = index;
5521       while (root[root2] != root2)
5522          root2 = root[root2];
5523 
5524       for (index1=index+1; index1<hspcnt; index1++) {
5525          root1 = index1;
5526          while (root[root1] != root1)
5527             root1 = root[root1];
5528          /* If these hits are already in one cluster, no need to compare them */
5529          if (root1 == root2)
5530             continue;
5531          hsp1 = hit_array[index1];
5532          bsp2 = bspp[index1];
5533 
5534          /* For blastn, check if these hits are good enough to be involved in 
5535             clustering, to avoid pairwise comparison of too many sequences */
5536          if (!StrCmp(prog_name, "blastn") && 
5537              MIN(hsp->q_end-hsp->q_start, hsp1->q_end-hsp1->q_start) < 
5538              CLUSTER_GOOD_HIT_LTHRESH)
5539             continue;
5540          /* Check if the two lengths are comparable, for the 2-sided case */
5541          if (two_sided && ((FloatHi)ABS(bsp1->length - bsp2->length)) / 
5542              MIN(bsp1->length, bsp2->length) > length_thresh)
5543             continue;
5544          /* Check if the next hit passes a simple query overlap test to be a
5545             candidate to belong to this cluster */
5546          q_overlap = 
5547             MIN(hsp->q_end, hsp1->q_end) - 
5548             MAX(hsp->q_start, hsp1->q_start);
5549          if (((FloatHi)q_overlap) / 
5550              (MAX(hsp->q_end-hsp->q_start, hsp1->q_end-hsp1->q_start) + 1) <
5551              overlap_thresh)
5552             continue;
5553          
5554          /* Do the two sequences search to determine whether this 
5555             candidate in fact belongs to this cluster */
5556          search1 = BlastSequencesOnTheFlyEx(search1, bsp2); 
5557          
5558          if (search1) {
5559             hitlist = search1->result_struct->results[0];
5560             if (hitlist && hitlist->hspcnt > 0) {
5561                best_hsp = &hitlist->hsp_array[0];
5562                if (search1->pbp->gapped_calculation)
5563                   kbp = search1->sbp->kbp_gap[search1->first_context];
5564                else
5565                   kbp = search1->sbp->kbp[search1->first_context]; 
5566                bit_score = ((hitlist->high_score *
5567                              kbp->Lambda) - kbp->logK)/NCBIMATH_LN2;
5568 
5569                if (two_sided)
5570                   length_coverage = MIN(((FloatHi)best_hsp->query_length) / 
5571                                         bsp1->length, 
5572                                         ((FloatHi)best_hsp->subject_length) / 
5573                                         bsp2->length);
5574                else
5575                   length_coverage = MAX(((FloatHi)best_hsp->query_length) / 
5576                                         bsp1->length, 
5577                                         ((FloatHi)best_hsp->subject_length) / 
5578                                         bsp2->length);
5579                score_coverage = bit_score /
5580                   MAX(best_hsp->query_length, best_hsp->subject_length);
5581                if (score_coverage > score_thresh && 
5582                    length_coverage > length_thresh) {
5583                   /* remove the respective hit */
5584                   root1 = index1;
5585                   while (root[root1] != root1)
5586                      root1 = root[root1];
5587                   root[root1] = root[root2];
5588                }
5589             }
5590          }
5591       }
5592       if (search1) {
5593          mask = search1->mask;
5594          while (mask) {
5595             SeqLocSetFree(mask->data.ptrvalue);
5596             mask = mask->next;
5597          }
5598          ValNodeFree(search1->mask);
5599          search1 = BlastSearchBlkDestruct(search1);
5600       }
5601       for (++index; index<hspcnt && hit_array[index]==NULL; index++);
5602    }
5603 
5604    /* Remove the unneeded seqaligns */
5605    head = NULL;
5606 
5607    for (index=0; index<hspcnt; index++) {
5608       if (root[index] == index) {
5609          if (!head) {
5610             head = (SeqAlignPtr) Malloc(sizeof(SeqAlign));
5611             MemCpy(head, sapp[index], sizeof(SeqAlign));
5612             for (sap = head; sap->next; sap = sap->next) {
5613                next_sap = sap->next;
5614                sap->next = (SeqAlignPtr) Malloc(sizeof(SeqAlign));
5615                MemCpy(sap->next, next_sap, sizeof(SeqAlign));
5616             }
5617             sapp[index] = head;
5618          } else {
5619             sap->next = sapp[index];
5620             for (index1=0; sap->next; sap = sap->next, index1=1) {
5621                next_sap = sap->next;
5622                sap->next = (SeqAlignPtr) Malloc(sizeof(SeqAlign));
5623                MemCpy(sap->next, next_sap, sizeof(SeqAlign));
5624                if (index1==0)
5625                   sapp[index] = sap->next;
5626             }
5627          }
5628       } else {
5629          /* Save the ids of sequences in the cluster in the 'master' field of
5630             SeqAlign structure (kludge) */
5631          if (!sapp[root[index]]->master) {
5632             sapp[root[index]]->master = 
5633                SeqIdDup(TxGetSubjectIdFromSeqAlign(sapp[index]));
5634          } else {
5635             sip = sapp[root[index]]->master;
5636             while (sip->next)
5637                sip = sip->next;
5638             sip->next = SeqIdDup(TxGetSubjectIdFromSeqAlign(sapp[index]));
5639          }
5640       }
5641    }
5642 
5643    for (index=0; index<hspcnt; index++)
5644       BioseqUnlock(bspp[index]);
5645    MemFree(bspp);
5646 
5647    ReadDBBioseqFetchDisable();
5648   
5649    return head;
5650 }
5651 
5652 /* This function does everything necessary to produce a SeqAlign from the
5653    intermediate BLAST results structures;
5654    In case of multiple queries, the SeqAlign array must be allocated outside.
5655 */
5656 void
5657 BLASTPostSearchLogic(BlastSearchBlkPtr search, BLAST_OptionsBlkPtr options,
5658                      SeqAlignPtr PNTR seqalignp, Boolean single_chain)
5659 {
5660    BLASTResultHspPtr hsp;
5661    GapXEditBlockPtr edit_block;
5662    SeqIdPtr gi_list=NULL,subject_id;
5663    SeqAlignPtr head,seqalign,tail;
5664    BLASTResultsStructPtr result_struct;
5665    BLASTResultHitlistPtr   result_hitlist;
5666    Uint1Ptr sequence;
5667    Int4 length,hitlist_count,sequence_length,index,index1,total_num_hsp=0,hspcnt;
5668    Char buffer[512];
5669    Int2 num_queries, query_index;
5670 
5671    /* AM: Support for query multiplexing. */
5672    MQ_DivideResultsInfoPtr subjects = NULL;
5673    Int4 index2;
5674    Int4 nhlists = 0;
5675    
5676    head = NULL;
5677    tail = NULL;
5678    if (single_chain)
5679       *seqalignp = NULL;
5680    
5681    if (search->prog_number==blast_type_blastn) {
5682       /* Unconcatenate the strands by adjusting the query offsets in
5683          all hsps */
5684       search->context[search->first_context].query->length = 
5685          search->query_context_offsets[search->first_context+1] - 1;
5686    }
5687    
5688    if (StringCmp(search->prog_name, "blastn") == 0 && 
5689        search->pbp->gapped_calculation)
5690    {
5691       num_queries = search->last_context / 2 + 1;
5692 
5693       for (query_index=0; query_index<num_queries; query_index++) {
5694          if (options) {
5695             search->pbp->gap_open = options->gap_open;
5696             search->pbp->gap_extend = options->gap_extend;
5697          }
5698          /*
5699            search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
5700            search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
5701          */
5702          if (!search->pbp->mb_params) {
5703             result_struct = search->result_struct;
5704          } else {
5705             result_struct = search->mb_result_struct[query_index];
5706          }
5707          
5708          if (!result_struct)
5709             continue;
5710          hitlist_count = result_struct->hitlist_count;
5711          
5712          sequence=NULL;
5713          sequence_length=0;
5714          
5715          for (index=0; index<hitlist_count; index++)
5716          {
5717             if (search->pbp->mb_params && !search->pbp->mb_params->no_traceback
5718                 && !search->pbp->mb_params->use_dyn_prog) {
5719                /* Traceback has already been computed */
5720                seqalign = 
5721                   MegaBlastGapInfoToSeqAlign(search, index, query_index);
5722             } else {
5723                length = readdb_get_sequence_ex(search->rdfp, 
5724                            result_struct->results[index]->subject_id,
5725                            &sequence, &sequence_length, TRUE);
5726                
5727                if (!search->pbp->mb_params) {
5728                   /* Traditional Blastn */
5729                   seqalign = SumBlastGetGappedAlignmentTraceback(
5730                                 search, index, FALSE, FALSE, 
5731                                 sequence+1, length);
5732                } else {
5733                   /* Mega BLAST with non-greedy extension */
5734                   SumBlastGetGappedAlignmentEx(search, index, FALSE, FALSE, 
5735                                                sequence+1, length, TRUE,
5736                                                &seqalign, NULL, query_index);
5737                }
5738             }
5739             result_struct->results[index]->seqalign = seqalign;
5740             total_num_hsp += result_struct->results[index]->hspcnt;
5741             BLASTResultFreeHsp(result_struct->results[index]);
5742             if (search->pbp->total_hsp_limit > 0 && total_num_hsp > search->pbp->total_hsp_limit)
5743             {
5744                sprintf(buffer, "Only alignments to %ld best database sequences returned", (long) index+1);
5745                BlastConstructErrorMessage("EngineCore", buffer, 1, &(search->error_return));
5746                break;
5747             }   
5748          }
5749          sequence = MemFree(sequence);
5750          sequence_length = 0;
5751          
5752          HeapSort(result_struct->results, hitlist_count, sizeof(BLASTResultHitlistPtr), evalue_compare_hits);
5753 
5754          /* 
5755             The next loop organizes the SeqAligns (and the alignments
5756             in the BLAST report) in the same order as the deflines.
5757          */
5758          if (!single_chain) {
5759             head = tail = NULL;
5760          }
5761 
5762          /* AM: Support for query multiplexing. */
5763          if( search->mult_queries )
5764          {
5765            subjects = (MQ_DivideResultsInfoPtr)MemNew( sizeof( MQ_DivideResultsInfo )*hitlist_count );
5766            nhlists = hitlist_count;
5767          }
5768          else nhlists = MIN(hitlist_count, options->hitlist_size);
5769 
5770          for (index=0, index2 = 0; index<nhlists; index++) {
5771             seqalign = result_struct->results[index]->seqalign;
5772             if (seqalign) {
5773 
5774               /* AM: Support for query multiplexing. */
5775               if( search->mult_queries )
5776               {
5777                 subjects[index2].evalue
5778                   = result_struct->results[index]->best_evalue;
5779                 subjects[index2++].subject_id 
5780                   = result_struct->results[index]->subject_id;
5781               }
5782 
5783                if (head == NULL) {
5784                   head = seqalign;
5785                } else {
5786                   for (; tail->next; tail = tail->next);
5787                   tail->next = seqalign;
5788                }
5789                tail = seqalign;
5790             }
5791          }
5792 
5793          /* AM: Support for query multiplexing. */
5794          if( search->mult_queries )
5795          {
5796            search->mult_queries->sap_array_data->sap_array
5797              = DivideSeqAligns( options, head, 
5798                                 search->mult_queries, subjects );
5799            subjects = MemFree( subjects );
5800          }
5801 
5802          /* Free the extra seqaligns */
5803          for ( ; index < hitlist_count; index++)
5804             result_struct->results[index]->seqalign = 
5805                SeqAlignSetFree(result_struct->results[index]->seqalign);
5806          
5807          if (!single_chain)
5808             seqalignp[query_index] = head;
5809       }
5810       if (!single_chain)
5811          head = NULL;
5812    }
5813    else if (search->pbp->gapped_calculation)
5814       /* Non-blastn programs */
5815    {
5816       result_struct = search->result_struct;
5817       hitlist_count = result_struct->hitlist_count;
5818 
5819       if (!options || !(options->smith_waterman ||
5820                         options->tweak_parameters)) {
5821 
5822          for (index=0; index<hitlist_count; index++) {
5823             seqalign = BlastGetGapAlgnTbckWithReaddb(search, index, FALSE);
5824             result_struct->results[index]->seqalign = seqalign;
5825             /*BLASTResultFreeHsp(result_struct->results[index]);*/
5826             total_num_hsp += result_struct->results[index]->hspcnt;
5827             if (search->pbp->total_hsp_limit > 0 && total_num_hsp > search->pbp->total_hsp_limit)
5828             {
5829                sprintf(buffer, "Only alignments to %ld best database sequences returned", (long) index+1);
5830                BlastConstructErrorMessage("EngineCore", buffer, 1, &(search->error_return));
5831                break;
5832             }
5833          }
5834          HeapSort(result_struct->results, hitlist_count,
5835                   sizeof(BLASTResultHitlistPtr), evalue_compare_hits);
5836 
5837          for (index=0;  index<hitlist_count;  index++) {
5838              BLASTResultFreeHsp(result_struct->results[index]);
5839          }
5840          index = 0;
5841          /* AM: Support for query multiplexing. */
5842          if (search->mult_queries) {
5843              subjects = (MQ_DivideResultsInfoPtr) 
5844                  MemNew(sizeof(MQ_DivideResultsInfo) * hitlist_count);
5845              nhlists = hitlist_count;
5846          } else {
5847              nhlists = MIN(hitlist_count, options->hitlist_size);
5848          }
5849          /* Only save alignments up to the original hitlist size */
5850          for (index2 = 0;  index < nhlists;  index++) {
5851              seqalign = result_struct->results[index]->seqalign;
5852              /* AM: Support for query multiplexing. */
5853              if (search->mult_queries) {
5854                  subjects[index2].evalue
5855                      = result_struct->results[index]->best_evalue;
5856                  subjects[index2++].subject_id 
5857                      = result_struct->results[index]->subject_id;
5858              }
5859              if (seqalign) {
5860                  if (head == NULL) {
5861                      head = seqalign;
5862                  } else {
5863                      for ( ;  tail->next;  tail = tail->next) ;
5864                      tail->next = seqalign;
5865                  }
5866                  tail = seqalign;
5867              }
5868          }
5869          /* AM: Support for query multiplexing. */
5870          if (search->mult_queries) {
5871              search->mult_queries->sap_array_data->sap_array
5872                  = DivideSeqAligns(options, head, 
5873                                    search->mult_queries, subjects);
5874              subjects = MemFree(subjects);
5875          }
5876       } else {
5877           SeqAlignPtr * sap =
5878               RedoAlignmentCore(search, options, hitlist_count, 
5879                                    options->tweak_parameters,
5880                                    options->smith_waterman);
5881           if (search->mult_queries) {
5882               int query_index;
5883               search->mult_queries->sap_array_data->sap_array = sap;
5884               /* Set head to the first non-null list of seqAligns; or 
5885                * NULL if none exists -- a nonnil value of head
5886                * indicates that results were found, even if head
5887                * doesn't contain all results. */
5888               head = NULL;
5889               for (query_index = 0;
5890                    query_index < search->mult_queries->NumQueries;
5891                    query_index++) {
5892                   if (sap[query_index] != NULL) {
5893                       head = sap[query_index];
5894                       break;
5895                   }
5896               }
5897           } else {
5898               if (sap)
5899               {
5900                  head = sap[0];
5901                  MemFree(sap);
5902               }
5903           }  
5904           for (index=0;  index<hitlist_count;  index++) {
5905               BLASTResultFreeHsp(result_struct->results[index]);
5906           }
5907           /* Set index = 0 to free any seqaligns remaining in the
5908            * results_struct; RedoAlignmentCore doesn't add seqaligns
5909            * to results_struct, so if there are any they are the
5910            * result of an early phase of blast. */
5911           index = 0;
5912       }
5913       /* Free the unused seqaligns (all in case of PSI BLAST, 
5914          extra ones otherwise */
5915       for ( ; index<hitlist_count; index++) {
5916          result_struct->results[index]->seqalign = 
5917             SeqAlignSetFree(result_struct->results[index]->seqalign);
5918       }
5919       search->number_of_seqs_better_E =
5920           MIN(hitlist_count, options->hitlist_size);
5921    } else {
5922       /* Ungapped psi-blast. */
5923       if (StringCmp("blastp", search->prog_name) == 0 && search->positionBased == TRUE)
5924       {
5925          result_struct = search->result_struct;
5926          hitlist_count = result_struct->hitlist_count;
5927          for (index=0; index<hitlist_count; index++)
5928          {
5929             result_hitlist = result_struct->results[index];
5930             gi_list = BlastGetAllowedGis(search, result_hitlist->subject_id, NULL);
5931             hspcnt = result_hitlist->hspcnt;
5932             subject_id = BlastGetSubjectId(search, index, TRUE, NULL);
5933             for (index1=0; index1<hspcnt; index1++)
5934             {
5935                hsp = &(result_hitlist->hsp_array[index1]);
5936                edit_block = SimpleIntervalToGapXEditBlock(hsp->query_offset, hsp->subject_offset, hsp->query_length);
5937                seqalign = GapXEditBlockToSeqAlign(edit_block, SeqIdDup(subject_id), SeqIdDup(search->query_id));
5938                seqalign->score = GetScoreSetFromBlastResultHsp(hsp, gi_list);
5939                
5940                if (head == NULL) {
5941                   head = seqalign;
5942                } else {
5943                   for ( ; tail->next; tail = tail->next);
5944                   tail->next = seqalign;
5945                }
5946                tail = seqalign;
5947                total_num_hsp += result_struct->results[index]->hspcnt;
5948                if (search->pbp->total_hsp_limit > 0 && total_num_hsp > search->pbp->total_hsp_limit)
5949                {
5950                   sprintf(buffer, "Only alignments to %ld best database sequences returned", (long) index+1);
5951                   BlastConstructErrorMessage("EngineCore", buffer, 1, &(search->error_return));
5952                   break;
5953                }        
5954             }
5955          }
5956       } /* end blastp */
5957       else
5958       {
5959          Boolean discontinuous = ((options != NULL) && options->discontinuous);
5960          if (StringCmp("blastn", search->prog_name) == 0 || StringCmp("blastp", search->prog_name) == 0)
5961             head = GetSeqAlignForResultHitList(search, TRUE, FALSE, discontinuous, FALSE, FALSE);
5962          else
5963             head = GetSeqAlignForResultHitList(search, FALSE, FALSE, discontinuous, FALSE, FALSE);
5964       }
5965    }
5966    
5967    gi_list = SeqIdSetFree(gi_list);
5968 
5969    if (head)
5970       *seqalignp = head;
5971 }
5972 
5973 /*return query fasta style title(id+title). New memory was allocated for this title*/
5974 CharPtr getFastaStyleTitle(BioseqPtr bsp){
5975   
5976   CharPtr titleBuf=NULL;
5977 
5978   if(bsp){
5979     
5980     if (bsp->id)
5981       {
5982         Char idBuf[BUFFER_LENGTH+1];
5983         CharPtr seqTitle, temp=NULL;
5984 
5985         SeqIdWrite(bsp->id, idBuf, PRINTID_FASTA_LONG, BUFFER_LENGTH);
5986         temp=idBuf;
5987         if (StringNCmp(idBuf, "lcl|", 4) == 0)
5988           temp=StrStr(idBuf, "gi|");
5989                 
5990         titleBuf=MemNew(sizeof(Char)*(BUFFER_LENGTH+1));
5991         StringCpy(titleBuf,temp); 
5992         seqTitle= BioseqGetTitle(bsp);
5993         StringNCat(titleBuf, seqTitle, BUFFER_LENGTH-StringLen(temp));
5994        
5995       }
5996   }
5997    return titleBuf;
5998 }
5999 
6000 Int2
6001 BlastHSPGetNumIdentical(BlastSearchBlkPtr search, BLAST_HSPPtr hsp,
6002                         BLASTResultHspPtr result_hsp,
6003                         Int4Ptr num_ident_ptr, Int4Ptr align_length_ptr)
6004 {
6005    Int4 i, num_ident, align_length, q_off, s_off;
6006    Int2 context;
6007    Uint1Ptr query_seq, subject_seq = NULL, q, s;
6008    GapXEditBlockPtr gap_info;
6009    GapXEditScriptPtr esp;
6010 
6011    gap_info = (hsp ? hsp->gap_info : result_hsp->gap_info);
6012 
6013    if (!gap_info && search->pbp->gapped_calculation)
6014       return -1;
6015 
6016    context = (hsp ? hsp->context : result_hsp->context);
6017    q_off = (hsp ? hsp->query.offset : result_hsp->query_offset);
6018    s_off = (hsp ? hsp->subject.offset : result_hsp->subject_offset);
6019 
6020    if (search->pbp->is_ooframe)
6021      q_off /= CODON_LENGTH;
6022    
6023    query_seq = search->context[context].query->sequence;
6024    if (search->subject->sequence_start)
6025       subject_seq = search->subject->sequence_start + 1;
6026    else 
6027       subject_seq = search->subject->sequence;
6028 
6029    if (!subject_seq)
6030       return -1;
6031 
6032    q = &query_seq[q_off];
6033    s = &subject_seq[s_off];
6034 
6035    num_ident = 0;
6036    align_length = 0;
6037 
6038 
6039    if (!gap_info) {
6040       /* Ungapped case */
6041       align_length = (hsp ? hsp->query.length : result_hsp->query_length); 
6042       for (i=0; i<align_length; i++) {
6043          if (*q++ == *s++)
6044             num_ident++;
6045       }
6046    } else {
6047       for (esp = gap_info->esp; esp; esp = esp->next) {
6048          align_length += esp->num;
6049          switch (esp->op_type) {
6050          case GAPALIGN_SUB:
6051             for (i=0; i<esp->num; i++) {
6052                if (*q++ == *s++)
6053                   num_ident++;
6054             }
6055             break;
6056          case GAPALIGN_DEL:
6057             s += esp->num;
6058             break;
6059          case GAPALIGN_INS:
6060             q += esp->num;
6061             break;
6062          default: 
6063             s += esp->num;
6064             q += esp->num;
6065             break;
6066          }
6067       }
6068    }
6069 
6070    *align_length_ptr = align_length;
6071    *num_ident_ptr = num_ident;
6072    return 0;
6073 }
6074 
6075 Int2
6076 OOFBlastHSPGetNumIdentical(Uint1Ptr query_seq, Uint1Ptr subject_seq, 
6077    BLAST_HSPPtr hsp, BLASTResultHspPtr result_hsp,
6078    Int4Ptr num_ident_ptr, Int4Ptr align_length_ptr)
6079 {
6080    Int4 i, num_ident, align_length, q_off, s_off;
6081    Int2 context;
6082    Uint1Ptr q, s;
6083    GapXEditBlockPtr gap_info;
6084    GapXEditScriptPtr esp;
6085 
6086    gap_info = (hsp ? hsp->gap_info : result_hsp->gap_info);
6087 
6088    if (!gap_info)
6089       return -1;
6090 
6091    context = (hsp ? hsp->context : result_hsp->context);
6092    s_off = (hsp ? hsp->query.offset : result_hsp->query_offset);
6093    q_off = (hsp ? hsp->subject.offset : result_hsp->subject_offset);
6094 
6095    if (!subject_seq || !query_seq)
6096       return -1;
6097 
6098    q = &query_seq[q_off];
6099    s = &subject_seq[s_off];
6100 
6101    num_ident = 0;
6102    align_length = 0;
6103 
6104 
6105    for (esp = gap_info->esp; esp; esp = esp->next) {
6106       switch (esp->op_type) {
6107       case 3: /* Substitution */
6108          align_length += esp->num;
6109          for (i=0; i<esp->num; i++) {
6110             if (*q == *s)
6111                num_ident++;
6112             ++q;
6113             s += CODON_LENGTH;
6114          }
6115          break;
6116       case 6: /* Insertion */
6117          align_length += esp->num;
6118          s += esp->num * CODON_LENGTH;
6119          break;
6120       case 0: /* Deletion */
6121          align_length += esp->num;
6122          q += esp->num;
6123          break;
6124       case 1: /* Gap of two nucleotides. */
6125          s -= 2;
6126          break;
6127       case 2: /* Gap of one nucleotide. */
6128          s -= 1;
6129          break;
6130       case 4: /* Insertion of one nucleotide. */
6131          s += 1;
6132          break;
6133       case 5: /* Insertion of two nucleotides. */
6134          s += 2;
6135          break;
6136       default: 
6137          s += esp->num * CODON_LENGTH;
6138          q += esp->num;
6139          break;
6140       }
6141    }
6142 
6143    *align_length_ptr = align_length;
6144    *num_ident_ptr = num_ident;
6145    return 0;
6146 }
6147 
6148 
6149 /*  --------------------------------------------------------------------
6150  *
6151  *  BLAST_Wizard & related functions
6152  *
6153  *  --------------------------------------------------------------------
6154  */
6155 
6156 void
6157 BLAST_WizardOptionsBlkInit(BLAST_WizardOptionsBlkPtr options)
6158 {
6159     memset(options, 0, sizeof(BLAST_WizardOptionsBlk));
6160 }
6161 
6162 void
6163 BLAST_WizardOptionsBlkDone(BLAST_WizardOptionsBlkPtr options)
6164 {
6165     if(options->entrez_query) {
6166         MemFree(options->entrez_query);
6167         options->entrez_query = 0;
6168     }
6169     if(options->filter_string) {
6170         MemFree(options->filter_string);
6171         options->filter_string = 0;
6172     }
6173     ValNodeFree(options->gilist);
6174     options->gilist = 0;
6175     if(options->matrix) {
6176         MemFree(options->matrix);
6177         options->matrix = 0;
6178     }
6179     if(options->phi_pattern) {
6180         MemFree(options->phi_pattern);
6181         options->phi_pattern = 0;
6182     }
6183     if(options->query_lcase_mask) {
6184         ValNodeFreeData(options->query_lcase_mask);
6185         options->query_lcase_mask = 0;
6186     }
6187     if(options->string_options) {
6188         MemFree(options->string_options);
6189         options->string_options = 0;
6190     }
6191 }
6192 
6193 void
6194 BLAST_WizardOptionsMaskInit(BLAST_WizardOptionsMaskPtr mask)
6195 {
6196     mask->block_width               = FALSE;
6197     mask->cutoff_s                  = FALSE;
6198     mask->db_genetic_code           = FALSE;
6199     mask->ethresh                   = FALSE;
6200     mask->expect_value              = FALSE;
6201     mask->first_db_seq              = FALSE;
6202     mask->final_db_seq              = FALSE;
6203     mask->gap_extend                = FALSE;
6204     mask->gap_open                  = FALSE;
6205     mask->gapped_calculation        = FALSE;
6206     mask->genetic_code              = FALSE;
6207     mask->hitlist_size              = FALSE;
6208     mask->hsp_range_max             = FALSE;
6209     mask->is_ooframe                = FALSE;
6210     mask->mb_disc_type              = FALSE;
6211     mask->mb_template_length        = FALSE;
6212     mask->no_traceback              = FALSE;
6213     mask->penalty                   = FALSE;
6214     mask->perc_identity             = FALSE;
6215     mask->perform_culling           = FALSE;
6216     mask->pseudoCountConst          = FALSE;
6217     mask->required_end              = FALSE;
6218     mask->required_start            = FALSE;
6219     mask->reward                    = FALSE;
6220     mask->db_length                 = FALSE;
6221     mask->searchsp_eff              = FALSE;
6222     mask->smith_waterman            = FALSE;
6223     mask->strand_option             = FALSE;
6224     mask->threshold_first           = FALSE;
6225     mask->threshold_second          = FALSE;
6226     mask->tweak_parameters          = FALSE;
6227     mask->use_best_align            = FALSE;
6228     mask->use_real_db_size          = FALSE;
6229     mask->window_size               = FALSE;
6230     mask->wordsize                  = FALSE;
6231 
6232     mask->two_hits                  = FALSE;
6233 }
6234 
6235 BLAST_OptionsBlkPtr
6236 BLAST_Wizard(
6237     const char*                 program,
6238     const char*                 service,
6239     BLAST_WizardOptionsBlkPtr   options,
6240     BLAST_WizardOptionsMaskPtr  mask,
6241     int*                        alignments,
6242     int*                        descriptions,
6243     char**                      error)
6244 {
6245     int dummy_alignments = -1;
6246     int dummy_descriptions = -1;
6247     BLAST_OptionsBlkPtr out = 0;
6248     Boolean ungapped_alignment;
6249     *error = 0;
6250     if(!alignments)
6251         alignments = &dummy_alignments;
6252     if(!descriptions)
6253         descriptions = &dummy_descriptions;
6254     if(mask->expect_value && mask->cutoff_s) {
6255         *error = StringSave("can't specify both expect_value and cutoff_s");
6256         goto end;
6257     }
6258     ungapped_alignment = mask->gapped_calculation ?
6259         !options->gapped_calculation :
6260         FALSE;
6261     if(!StringCmp(service, "vecscreen")) {
6262         out = VSBlastOptionNew();
6263         out->gapped_calculation = !ungapped_alignment;
6264     }
6265     else {
6266         Boolean is_megablast_search = !StringICmp(service, "megablast");
6267         out = BLASTOptionNewEx(
6268             (char*) program, !ungapped_alignment, is_megablast_search);
6269 
6270         /* set some defaults for backward compat. with blastcgicmd.cpp */
6271         if(!StringCmp(service, "psi"))
6272             out->ethresh = 0.002;
6273         else if (!StringCmp(service, "rpsblast"))
6274             out->is_rps_blast = TRUE;
6275 
6276         /* ignore some fields for backward compat. with blastcgicmd.cpp */
6277         if(is_megablast_search)
6278             mask->block_width = FALSE;
6279         else {
6280             mask->mb_disc_type = FALSE;
6281             mask->mb_template_length = FALSE;
6282             mask->no_traceback = FALSE;
6283             mask->two_hits = FALSE;
6284         }
6285     }
6286     if(out->is_megablast_search) {
6287         out->no_traceback = mask->no_traceback ?
6288             options->no_traceback :
6289             FALSE;
6290     }
6291     if(mask->is_ooframe)
6292         out->is_ooframe = options->is_ooframe;
6293     if(mask->first_db_seq)
6294         out->first_db_seq = options->first_db_seq;
6295     if(mask->final_db_seq)
6296         out->final_db_seq = options->final_db_seq;
6297     if(mask->pseudoCountConst)
6298         out->pseudoCountConst = options->pseudoCountConst;
6299     if(mask->strand_option)
6300         out->strand_option = options->strand_option;
6301     if(mask->use_best_align)
6302         out->use_best_align = options->use_best_align;
6303     if(mask->use_real_db_size)
6304         out->use_real_db_size = options->use_real_db_size;
6305     if(mask->window_size)
6306         out->window_size = options->window_size;
6307     if(mask->expect_value)
6308         out->expect_value = options->expect_value;
6309     if(mask->cutoff_s)
6310         out->cutoff_s = options->cutoff_s;
6311     if(mask->threshold_first)
6312         out->threshold_first = options->threshold_first;
6313     if(mask->threshold_second)
6314         out->threshold_second = options->threshold_second;
6315     if(mask->perc_identity)
6316         out->perc_identity = options->perc_identity;
6317     if(out->is_megablast_search) {
6318         if (out->perc_identity >= 95.0) {
6319             out->reward = 1;
6320             out->penalty = -3;
6321         }
6322         else if(out->perc_identity >= 85.0
6323                 || out->perc_identity == 0.0) {
6324             out->reward = 1;
6325             out->penalty = -2;
6326         }
6327         else if (out->perc_identity >= 80.0) {
6328             out->reward = 2;
6329             out->penalty = -3;
6330         }
6331         else if (out->perc_identity >= 75.0) {
6332             out->reward = 4;
6333             out->penalty = -5;
6334         }
6335         else if (out->perc_identity >= 60.0) {
6336             out->reward = 2;
6337             out->penalty = -2;
6338         }
6339     }
6340     if(mask->penalty)
6341         out->penalty = options->penalty;
6342     if(mask->reward)
6343         out->reward = options->reward;
6344     if(mask->wordsize)
6345         out->wordsize = options->wordsize;
6346     if(mask->db_length)
6347         out->db_length = options->db_length;
6348     if(mask->searchsp_eff)
6349         out->searchsp_eff = options->searchsp_eff;
6350     if(mask->hsp_range_max)
6351         out->hsp_range_max = options->hsp_range_max;
6352     if(!out->is_megablast_search) {
6353         if(mask->block_width)
6354             out->block_width = options->block_width;
6355     }
6356     else {
6357         if(mask->mb_template_length)
6358             out->mb_template_length = options->mb_template_length;
6359         if(mask->two_hits && options->two_hits)
6360             out->window_size = 40;
6361         if(mask->mb_disc_type)
6362             out->mb_disc_type = options->mb_disc_type;
6363         if(out->mb_template_length > 0
6364            && out->wordsize <= 12
6365            && out->wordsize >= 11
6366            && out->perc_identity < 90.0) {
6367             if(out->perc_identity >= 85.0) {
6368                 out->gap_open = 4;
6369                 out->gap_extend = 1;
6370             }
6371             else if(out->perc_identity >= 80.0) {
6372                 out->gap_open = 5;
6373                 out->gap_extend = 2;
6374             }
6375             else if(out->perc_identity >= 75) {
6376                 out->gap_open = 7;
6377                 out->gap_extend = 2;
6378             }
6379             else if(out->perc_identity >= 60) {
6380                 out->gap_open = 3;
6381                 out->gap_extend = 1;
6382             }
6383         }
6384     }
6385     if(!out->perform_culling)
6386         out->perform_culling = mask->perform_culling ?
6387             options->perform_culling :
6388             FALSE;
6389     if(mask->gap_open)
6390         out->gap_open = options->gap_open;
6391     if(mask->gap_extend)
6392         out->gap_extend = options->gap_extend;
6393     if(mask->genetic_code)
6394         out->genetic_code = options->genetic_code;
6395     if(mask->db_genetic_code)
6396         out->db_genetic_code = options->db_genetic_code;
6397     if(!out->tweak_parameters)
6398         out->tweak_parameters = mask->tweak_parameters ?
6399             options->tweak_parameters :
6400             FALSE;
6401     if(!strcmp(service, "psi")) {
6402         out->ethresh = mask->ethresh ?
6403             options->ethresh :
6404             0.002;
6405         out->tweak_parameters = mask->tweak_parameters ?
6406             options->tweak_parameters :
6407             TRUE;
6408     }
6409     if(options->entrez_query) {
6410         out->entrez_query = options->entrez_query; /* take ownership */
6411         options->entrez_query = 0;
6412         out->gifile = 0;
6413     }
6414     if(options->matrix) {
6415         out->matrix = options->matrix; /* take ownership */
6416         options->matrix = 0;
6417     }
6418     if(options->phi_pattern) {
6419         out->phi_pattern = options->phi_pattern; /* take ownership */
6420         options->phi_pattern = 0;
6421     }
6422     if(out->is_megablast_search) {
6423         if(out->wordsize < 8)
6424             out->wordsize = 8;
6425         out->cutoff_s2 = out->wordsize * out->reward;
6426     }
6427     out->hitlist_size = -1;
6428     if(mask->hitlist_size) {
6429         out->hitlist_size = options->hitlist_size;
6430         *alignments = MIN(out->hitlist_size, *alignments);
6431         *descriptions = MIN(out->hitlist_size, *descriptions);
6432     }
6433     else
6434         out->hitlist_size = -1;
6435     if(options->string_options) {
6436         int other_adv_descriptions = -1;
6437         int other_adv_alignments = -1;
6438         parse_blast_options(
6439             out,
6440             options->string_options,
6441             error,
6442             NULL,
6443             &other_adv_descriptions,
6444             &other_adv_alignments);
6445         if(*error)
6446             goto end;
6447         if (other_adv_alignments != -1) {
6448             *alignments = other_adv_alignments;
6449             out->hitlist_size =
6450                 MAX(out->hitlist_size, other_adv_alignments);
6451         }
6452         if (other_adv_descriptions != -1) {
6453             *descriptions = other_adv_descriptions;
6454             out->hitlist_size =
6455                 MAX(out->hitlist_size, other_adv_descriptions);
6456         }
6457         if(!StringCmp(out->filter_string, "T")) {
6458             MemFree(out->filter_string);
6459             if(StringICmp(program, "blastn"))
6460                 out->filter_string = StringSave("S");
6461             else
6462                 out->filter_string = StringSave("D");
6463         }
6464     }
6465     /* append ; for backward compat. with blastcgicmd.cpp */
6466     if(out->filter_string) {
6467         char buf[10240];
6468         if(strlen(out->filter_string)+2 > sizeof buf) {
6469             *error = StringSave("internal error: buffer too small");
6470             goto end;
6471         }
6472         sprintf(buf, "%s;", out->filter_string);
6473         MemFree(out->filter_string);
6474         out->filter_string = StringSave(buf);
6475     }
6476     if(options->filter_string) {
6477         char buf[10240];
6478         char* s = out->filter_string ? out->filter_string : "";
6479         if(strlen(s)+strlen(options->filter_string)+1 > sizeof buf) {
6480             *error = StringSave("internal error: buffer too small");
6481             goto end;
6482         }
6483         sprintf(buf, "%s%s", s, options->filter_string);
6484         if(out->filter_string)
6485             MemFree(out->filter_string);
6486         out->filter_string = StringSave(buf);
6487     }
6488     if(out->hitlist_size != -1) {
6489         if(*descriptions == -1)
6490             *descriptions = MIN(100, out->hitlist_size);
6491         if(*alignments == -1)
6492             *alignments = MIN(100, out->hitlist_size);
6493     } else {
6494         if(*descriptions == -1)
6495             *descriptions = 100;
6496         if(*alignments == -1)
6497             *alignments = 100;
6498         out->hitlist_size = MAX(*descriptions, *alignments);
6499     }
6500     if(!strcmp(service, "vecscreen"))
6501         out->hitlist_size = 1000;
6502     if(mask->required_start)
6503         out->required_start = options->required_start;
6504     if(mask->required_end)
6505         out->required_end = options->required_end;
6506         else
6507                 out->required_end = 0; /* backward compat. with blastcgicmd.cpp */
6508     if(options->gilist) {
6509         out->gilist = options->gilist; /* take ownership */
6510         options->gilist = 0;
6511     }
6512     if (options->query_lcase_mask) {
6513         out->query_lcase_mask = options->query_lcase_mask;  /* take ownership */
6514         options->query_lcase_mask = 0;
6515     }
6516 end:
6517     if(*error) {
6518         BLASTOptionDelete(out);
6519         out = 0;
6520     }
6521     return out;
6522 }
6523 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.