NCBI C Toolkit Cross Reference

C/demo/blastall.c


  1 static char const rcsid[] = "$Id: blastall.c,v 6.205 2009/05/28 18:37:11 kans Exp $";
  2 
  3 /* $Id: blastall.c,v 6.205 2009/05/28 18:37:11 kans Exp $
  4 **************************************************************************
  5 *                                                                         *
  6 *                             COPYRIGHT NOTICE                            *
  7 *                                                                         *
  8 * This software/database is categorized as "United States Government      *
  9 * Work" under the terms of the United States Copyright Act.  It was       *
 10 * produced as part of the author's official duties as a Government        *
 11 * employee and thus can not be copyrighted.  This software/database is    *
 12 * freely available to the public for use without a copyright notice.      *
 13 * Restrictions can not be placed on its present or future use.            *
 14 *                                                                         *
 15 * Although all reasonable efforts have been taken to ensure the accuracy  *
 16 * and reliability of the software and data, the National Library of       *
 17 * Medicine (NLM) and the U.S. Government do not and can not warrant the   *
 18 * performance or results that may be obtained by using this software,     *
 19 * data, or derivative works thereof.  The NLM and the U.S. Government     *
 20 * disclaim any and all warranties, expressed or implied, as to the        *
 21 * performance, merchantability or fitness for any particular purpose or   *
 22 * use.                                                                    *
 23 *                                                                         *
 24 * In any work or product derived from this material, proper attribution   *
 25 * of the author(s) as the source of the software or data would be         *
 26 * appreciated.                                                            *
 27 *                                                                         *
 28 ************************************************************************** 
 29  * 
 30  * $Log: blastall.c,v $
 31  * Revision 6.205  2009/05/28 18:37:11  kans
 32  * remove includd hspstream_queue.h
 33  *
 34  * Revision 6.204  2009/05/28 14:55:09  camacho
 35  * Fix compilation issues following merge with BLAST sources from the C++ toolkit JIRA SB-166
 36  *
 37  * Revision 6.203  2009/01/26 14:00:56  madden
 38  * env variable sets BLAST_MAXQUERY_SIZE
 39  *
 40  * Revision 6.202  2008/07/01 18:38:14  madden
 41  * Correct X3 value for blastn/megablast
 42  *
 43  * Revision 6.201  2008/01/02 14:02:06  madden
 44  * Make composition-based score adjustments the default for blastp and tblastn
 45  *
 46  * Revision 6.200  2007/11/13 20:31:51  madden
 47  * Enable ARG_BESTHITS arg (culling)
 48  *
 49  * Revision 6.199  2007/10/10 13:16:46  madden
 50  * Fix composition-based command-lines for blastall_old (from Alejandro Schaffer)
 51  *
 52  * Revision 6.198  2007/05/07 13:29:11  kans
 53  * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
 54  *
 55  * Revision 6.197  2007/05/04 15:37:39  papadopo
 56  * 1. update usage
 57  * 2. fix Smith-Waterman configuration
 58  *
 59  * Revision 6.196  2007/04/24 20:59:40  papadopo
 60  * 1. Increase the query batch size for blastp with a compressed
 61  *    lookup table
 62  * 2. Make composition-based statistics the default for blastp and tblastn
 63  *
 64  * Revision 6.195  2007/03/23 14:37:20  madden
 65  * Move SBlastOptionsFree after BlastFormattingInfoFree to prevent reading deallocated memory
 66  *
 67  * Revision 6.194  2007/03/20 14:56:58  camacho
 68  * Call GeneticCodeSingletonInit/GeneticCodeSingletonFini
 69  *
 70  * Revision 6.193  2007/03/13 18:37:06  kans
 71  * added call to FreeSeqLocSetComponents to free query sequences referenced by query_slp list
 72  *
 73  * Revision 6.192  2007/03/12 23:06:26  papadopo
 74  * fix accidentally reverted change
 75  *
 76  * Revision 6.191  2007/03/12 23:00:50  papadopo
 77  * fix acciednetally reverted change
 78  *
 79  * Revision 6.190  2007/03/12 16:14:51  madden
 80  *    - #include string.h to get a declaration for the (nonstandard but
 81  *      common) function strcasecmp
 82  *    - In Main_new, use a Blast_PsiCheckpointLoc to specify the
 83  *      location, if any, of a PSI-BLAST checkpoint file.
 84  *    - In Main_new, use the file extension of the ARG_PSITCHKPNT file
 85  *      to determine the file format if a PSI-BLAST checkpoint file.  Warn
 86  *      if the extension is not .chk, .asn, or .txt, but read unknown
 87  *      extension as standard PSI-BLAST checkpoints.
 88  *    [from Mike Gertz]
 89  *
 90  * Revision 6.188  2007/03/05 14:54:39  camacho
 91  * - Call Blast_FindRepeatFilterSeqLoc with a NULL pointer for a PSI-BLAST
 92  *   checkpoint file.
 93  *
 94  * Revision 6.187  2007/02/08 17:07:22  papadopo
 95  * change signature of FillInitialWordOptions; ungapped extensions are always turned on now by default
 96  *
 97  * Revision 6.186  2007/01/19 14:31:47  madden
 98  * In Main_new, add a case label for psitblastn (from Mike Gertz).
 99  *
100  * Revision 6.185  2006/10/06 12:23:01  madden
101  * Use head_on_every_query boolean on BlastFormattingInfo for backwards compatibilty in new engine
102  *
103  * Revision 6.184  2006/10/02 18:13:10  papadopo
104  * make new engine use the -P option
105  *
106  * Revision 6.183  2006/09/13 15:21:28  papadopo
107  * add switch to turn on use of only Smith-Waterman alignments (currently ifdef'ed out)
108  *
109  * Revision 6.182  2006/07/28 21:09:18  papadopo
110  * allow database length to override the real value when using the rewritten blast engine
111  *
112  * Revision 6.181  2006/07/20 20:48:43  madden
113  * Fix for truncated SeqAlign when multiple runs are done
114  *
115  * Revision 6.180  2006/06/09 17:45:36  papadopo
116  * change signature of BlastTabularFormatDataNew
117  *
118  * Revision 6.179  2006/06/07 16:52:54  madden
119  * Enable the use of composition-based statistics and unified P-values for blastp.
120  * (from Mike Gertz)
121  *
122  * Revision 6.178  2006/05/18 16:29:13  papadopo
123  * do not set search space field directly
124  *
125  * Revision 6.177  2006/04/26 12:47:48  madden
126  * Use SBlastMessage in place of Blast_Message
127  *
128  * Revision 6.176  2006/04/25 18:00:19  papadopo
129  * change signature of BlastTabularFormatDataNew
130  *
131  * Revision 6.175  2006/04/21 14:34:50  madden
132  * BLAST_GetQuerySeqLoc prototype change
133  *
134  * Revision 6.174  2006/04/20 15:32:36  papadopo
135  * if query IDs are actually used, verify that there are no duplicate IDs
136  *
137  * Revision 6.173  2006/04/04 13:13:48  madden
138  * 1.) Add range check to ARG_FORMAT argument.
139  * 2.) Rework error reporting so as to not truncate results if there is a warning.
140  *
141  * Revision 6.172  2006/03/08 15:41:26  coulouri
142  * tune query concatenation limits
143  *
144  * Revision 6.171  2006/01/24 18:33:44  papadopo
145  * from Mike Gertz: Use enumerated values, rather than #define'd constants, to specify the composition adjustment method
146  *
147  * Revision 6.170  2006/01/23 16:44:05  papadopo
148  * change signature of FillHitSavingOptions
149  *
150  * Revision 6.169  2006/01/13 15:39:17  madden
151  * - Enabled the use of composition-based statistics for tblastn (and
152  *   only tblastn) using the new engine.
153  * - Disabled setting evalue to a larger number when mode > 1
154  *   composition-based statistics is used by setting EVALUE_EXPAND to 1.
155  * - In Main_new, don't create a BlastSeqSrc.  It is apparently never
156  *   used and is a memory leak.  (from Mike Gertz).
157  *
158  * Revision 6.168  2006/01/10 20:44:10  madden
159  * Use SBlastSeqalignArray
160  *
161  * Revision 6.167  2005/12/22 14:22:19  papadopo
162  * change signature of BLAST_FillLookupTableOptions
163  *
164  * Revision 6.166  2005/12/16 18:30:08  coulouri
165  * disable new engine for smith-waterman and composition-based statistics until they are implemented
166  *
167  * Revision 6.165  2005/12/14 14:43:12  coulouri
168  * enable new engine by default
169  *
170  * Revision 6.164  2005/12/12 13:42:59  madden
171  * SBlastOptionsSetRewardPenaltyAndGapCosts now has new greedy Boolean, BLAST_FillScoringOptions no longer called
172  *
173  * Revision 6.163  2005/10/31 14:15:10  madden
174  * Call SBlastOptionsSetRewardPenaltyAndGapCosts
175  *
176  * Revision 6.162  2005/10/17 14:07:13  madden
177  * Use -1 rather than zero for unset gap parameters
178  *
179  * Revision 6.161  2005/09/29 17:39:29  coulouri
180  * from mike gertz:
181  *    - Removed the unused static routing GetLambdaFast.
182  *    - Removed unused variables from s_FillOptions.
183  *    - Removed unused variables from Main_new.
184  *    - For tblastn, enabled query concatenation when composition-based
185  *      statistics or Smith-Waterman are used.
186  *    - Free seq_annot_arr only if query concatenation is being used.
187  *    - In Nlm_Main, add preprocessor directives around the declaration
188  *      of use_new_engine to suppress a compiler warning.
189  *
190  * Revision 6.159  2005/09/26 15:02:58  morgulis
191  * Fixing some memort leaks when using query concatenation in blastn and tblastn.
192  *
193  * Revision 6.158  2005/09/16 14:10:03  madden
194  * Print out more informative message when Blast_DatabaseSearch has non-zero return if available, add call to SBlastOptionsSetRewardPenaltyAndGapCosts
195  *
196  * Revision 6.157  2005/09/13 17:39:05  kans
197  * include repeats_filter.h
198  *
199  * Revision 6.156  2005/09/08 14:02:06  coulouri
200  * From Mike Gertz:
201  *   - Introduced the new options -C for using composition-based
202  *     statistics with tblastn; and -s for using Smith-Waterman alignments
203  *     with tblastn.
204  *   - Forbid the use of the -B option when -C or -s is present; we
205  *     expect to remove this restriction.
206  *
207  * Revision 6.155  2005/09/01 12:28:52  madden
208  * 1.) add new function Main_new and put old Main in Main_old, which one is called
209  * depends upon the -V option as well as the other params.
210  * 2.) Main_new now runs searches with the new engine.
211  * 3.) Add headers to allow new engine.
212  * 4.) all of the above can be turned off at compile time with a BLASTALL_TOOLS_ONLY define
213  *
214  * Revision 6.154  2005/08/17 12:42:31  madden
215  * Set TXALIGN_SHOW_NO_OF_SEGS for tblastx
216  *
217  * Revision 6.153  2005/08/08 15:47:41  dondosha
218  * Added call to TransTableFreeAll, fixing a memory leak
219  *
220  * Revision 6.152  2005/06/15 21:37:23  dondosha
221  * Do not trigger on-the-fly output with -m8 option for megablast
222  *
223  * Revision 6.151  2005/05/05 14:41:32  coulouri
224  * plug object manager entity id leak - rt ticket 15084082
225  *
226  * Revision 6.150  2005/02/07 15:30:39  dondosha
227  * Removed restriction on the value of longest intron option
228  *
229  * Revision 6.149  2005/01/10 18:52:28  coulouri
230  * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
231  *
232  * Revision 6.148  2004/09/28 16:06:38  papadopo
233  * From Michael Gertz:
234  * 1. Disabled ungapped psitblastn.
235  * 2. The longest_intron parameter no longer has a minimum value of 4000.
236  * 3. Changed the command line help for the longest_intron parameter.
237  *
238  * Revision 6.147  2004/08/17 17:22:33  madden
239  * Add BlastArguments enum for command-line arguments
240  *
241  * Revision 6.146  2004/07/29 00:05:57  coulouri
242  * fix blastcl3 umr
243  *
244  * Revision 6.145  2004/07/28 18:49:56  coulouri
245  * fix printf specifier
246  *
247  * Revision 6.144  2004/06/30 12:33:30  madden
248  * Add include for blfmtutl.h
249  *
250  * Revision 6.143  2004/05/13 18:42:44  coulouri
251  * disable -B for blastcl3
252  *
253  * Revision 6.142  2004/04/29 19:56:00  dondosha
254  * Mask filtered locations in query sequence lines in XML output
255  *
256  * Revision 6.141  2004/04/20 14:55:47  morgulis
257  * 1. Fixed query offsets in results when -B option is used.
258  * 2. Fixes for lower case masking handling with -B option.
259  *
260  * Revision 6.140  2004/03/26 21:42:19  coulouri
261  * remove unused variables
262  *
263  * Revision 6.139  2004/03/18 15:14:21  coulouri
264  * do not dereference null seqalignptr
265  *
266  * Revision 6.138  2004/02/27 14:22:47  coulouri
267  * Correct typo
268  *
269  * Revision 6.137  2004/02/10 18:49:06  coulouri
270  * do not allow 1-hit blastn searches
271  *
272  * Revision 6.136  2003/11/05 22:28:06  dondosha
273  * No need to shift subsequence coordinates in tabular output, since they are already shifted in the seqalign
274  *
275  * Revision 6.135  2003/08/21 15:37:54  dondosha
276  * Corrections for out-of-frame tabular output and megablast XML output
277  *
278  * Revision 6.134  2003/05/30 17:31:09  coulouri
279  * add rcsid
280  *
281  * Revision 6.133  2003/05/09 18:44:49  coulouri
282  * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
283  *
284  * Revision 6.132  2003/05/06 18:57:46  dondosha
285  * Do not set cutoff_s for megablast, it is not needed
286  *
287  * Revision 6.131  2003/04/08 17:33:42  dondosha
288  * Scale the default values of gap costs if match reward is > 1
289  *
290  * Revision 6.130  2003/04/07 14:46:25  madden
291  * Disallow query concatenation if XML, tabular, or ASN.1
292  *
293  * Revision 6.129  2003/04/01 22:40:09  dondosha
294  * Check lower case masking option if megablast option is on
295  *
296  * Revision 6.128  2003/03/25 15:28:08  dondosha
297  * Print tabular output header before checking if seqalign is NULL
298  *
299  * Revision 6.127  2003/03/24 21:17:08  madden
300  * XML fix, remove random printf statements
301  *
302  * Revision 6.126  2003/03/24 19:43:05  madden
303  * Changes to support query concatenation for blastn and tblastn
304  *
305  * Revision 6.125  2003/03/20 13:44:23  madden
306  * Fix -m 10/11 output to make them SeqAnnots
307  *
308  * Revision 6.124  2002/12/31 22:47:16  boemker
309  * Added support for printing output as ASN (text, with -m 10, or binary, with
310  * -m 11).
311  *
312  * Revision 6.123  2002/09/18 20:34:30  camacho
313  * Restored -P option
314  *
315  * Revision 6.122  2002/08/23 16:45:36  madden
316  * Issue WARNING for out-of-frame alignments
317  *
318  * Revision 6.121  2002/08/14 15:09:59  camacho
319  * Only change default window size if its command-line value is non-zero
320  *
321  * Revision 6.120  2002/08/09 19:41:25  camacho
322  * 1) Added blast version number to command-line options
323  * 2) Added explanations for some default parameters
324  *
325  * Revision 6.119  2002/06/19 22:50:17  dondosha
326  * Added all queries information for tabular output with multiple queries
327  *
328  * Revision 6.118  2002/05/09 15:37:52  dondosha
329  * Call BLASTOptionNewEx instead of BLASTOptionNew, so megablast defaults are set in a central place
330  *
331  * Revision 6.117  2002/05/04 13:04:43  madden
332  * Unsuppress options
333  *
334  * Revision 6.116  2002/04/29 19:55:26  madden
335  * Use ARG_FLOAT for db length
336  *
337  * Revision 6.115  2002/04/25 21:57:45  madden
338  * Strip options for release
339  *
340  * Revision 6.114  2002/04/25 21:49:28  madden
341  * Reset mask_loc_start to NULL for every query
342  *
343  * Revision 6.113  2002/04/24 19:55:13  madden
344  * Rolled back last change
345  *
346  * Revision 6.112  2002/04/23 20:58:52  madden
347  * Suppress options for release
348  *
349  * Revision 6.111  2002/04/18 20:18:22  dondosha
350  * Separate mask locations when formatting results for multiple queries
351  *
352  * Revision 6.110  2002/04/16 21:10:58  madden
353  * Change placement of ReadDBBioseqFetchEnable so db open only once (for HPUX)
354  *
355  * Revision 6.109  2002/04/16 14:06:00  madden
356  * Do not print headers for XML or tabular output
357  *
358  * Revision 6.108  2002/03/19 23:29:38  dondosha
359  * Do not increment options->wordsize by 4 for megablast any more
360  *
361  * Revision 6.107  2002/02/19 23:21:45  dondosha
362  * Fix for XML output if megablast option is used
363  *
364  * Revision 6.106  2001/12/20 21:51:06  madden
365  * Uncomment DO_NOT_SUPPRESS_BLAST_OP
366  *
367  * Revision 6.105  2001/12/17 20:23:44  madden
368  * comment out DO_NOT_SUPPRESS_BLAST_OP
369  *
370  * Revision 6.104  2001/09/06 20:24:34  dondosha
371  * Removed threshold_first
372  *
373  * Revision 6.103  2001/08/28 17:34:34  madden
374  * Add -m 9 as tabular output with comments
375  *
376  * Revision 6.102  2001/08/28 16:23:12  madden
377  * Do not suppress args
378  *
379  * Revision 6.101  2001/07/27 21:47:35  dondosha
380  * Fixed dummy variable declaration for call to StringToInt8
381  *
382  * Revision 6.100  2001/07/26 18:21:04  dondosha
383  * Dummy variable type correction
384  *
385  * Revision 6.99  2001/07/20 13:31:23  dondosha
386  * Undeclared variable correction
387  *
388  * Revision 6.98  2001/07/19 22:05:47  dondosha
389  * Made db_length option a string, to convert to Int8 value
390  *
391  * Revision 6.97  2001/07/05 15:40:33  madden
392  * Comment out DO_NOT_SUPPRESS_BLAST_OP for release
393  *
394  * Revision 6.96  2001/07/03 20:50:33  madden
395  * Commented out call to PrintTabularOutputHeader
396  *
397  * Revision 6.95  2001/06/21 21:49:55  dondosha
398  * No need to declare extra variable vnp
399  *
400  * Revision 6.94  2001/06/21 21:29:08  dondosha
401  * Fixed memory leaks: destroy all error returns, free private_slp
402  *
403  * Revision 6.93  2001/06/15 21:20:19  dondosha
404  * Moved -m9 option to -m8; added header for tabular output
405  *
406  * Revision 6.92  2001/06/07 19:30:03  dondosha
407  * Pass believe query argument to BlastPrintTabulatedResults
408  *
409  * Revision 6.91  2001/06/06 21:22:44  dondosha
410  * Added (query) Bioseq and SeqLoc arguments to function BlastPrintTabulatedResults
411  *
412  * Revision 6.90  2001/05/25 19:26:36  vakatov
413  * Nested comment typo fixed
414  *
415  * Revision 6.89  2001/05/23 22:38:47  dondosha
416  * Added option -m 9 to print post-search tabulated output
417  *
418  * Revision 6.88  2001/04/10 19:20:52  madden
419  * Unsuppress some options suppressed for the release
420  *
421  * Revision 6.87  2001/04/02 13:52:15  madden
422  * Fix for last checkin, properly suppress some options
423  *
424  * Revision 6.85  2001/03/19 22:39:24  dondosha
425  * Allow location on the first query sequence for megablast
426  *
427  * Revision 6.84  2001/03/13 21:58:23  madden
428  * add support for multiple hits blastn, add option for window size
429  *
430  * Revision 6.83  2001/02/22 20:26:03  dondosha
431  * If location stop is -1, make it end of sequence
432  *
433  * Revision 6.82  2001/02/22 20:11:58  dondosha
434  * Previous change reversed; added option to set location on query sequence
435  *
436  * Revision 6.81  2001/02/22 16:16:43  shavirin
437  * Added options for required start and required stop of the query to be
438  * used in the Blast search.
439  *
440  * Revision 6.80  2001/02/22 15:38:48  dondosha
441  * Corrected the argument number for longest intron length
442  *
443  * Revision 6.79  2001/02/09 22:22:36  madden
444  * Do not use BlastPruneHitsFromSeqAlign for printing DefLines
445  *
446  * Revision 6.78  2001/02/08 20:41:17  dondosha
447  * Implemented tabulated output for all translated programs
448  *
449  * Revision 6.77  2001/02/07 21:17:22  dondosha
450  * Added support to produce tabulated output (-m 8 option)
451  *
452  * Revision 6.76  2001/01/19 20:03:47  dondosha
453  * Uninitialized variable seqannot caused core dump with XML output
454  *
455  * Revision 6.75  2000/12/19 18:40:47  madden
456  * Add calls to BlastSetUserErrorString and BlastDeleteUserErrorString
457  *
458  * Revision 6.74  2000/12/15 21:32:12  dondosha
459  * Appended getargs explanation of new tblastn (-t) option
460  *
461  * Revision 6.73  2000/11/21 15:47:21  dondosha
462  * Corrected default wordsize for megablast option
463  *
464  * Revision 6.72  2000/11/17 21:56:26  dondosha
465  * Do not free query_lcase_mask in client-server case - already freed
466  *
467  * Revision 6.71  2000/11/17 20:56:50  dondosha
468  * Returned Mega BLAST option which existed in blastcl3 and was removed
469  *
470  * Revision 6.70  2000/11/17 17:54:50  dondosha
471  * Added argument to allow greedy (a la Mega BLAST) extension in blastn
472  *
473  * Revision 6.69  2000/11/15 15:10:27  shavirin
474  * This revision is result of merge between blastall.c and blastcl3.c
475  * programs. Using define BLAST_CS_API - client/server version may be
476  * created.
477  *
478  * Revision 6.68  2000/11/09 15:01:00  dondosha
479  * Set longest intron length in options in nucleotide coordinates
480  *
481  * Revision 6.67  2000/11/08 22:24:07  dondosha
482  * Enabled new tblastn by adding longest intron option
483  *
484  * Revision 6.66  2000/11/01 16:26:50  madden
485  * Changes from Futamura for psitblastn
486  *
487  * Revision 6.65  2000/10/27 19:14:40  madden
488  * Change description of -b option
489  *
490  * Revision 6.64  2000/10/23 22:14:04  shavirin
491  * Added possibility to pass valid error message into XML output in case
492  * of failure or no hits.
493  *
494  * Revision 6.63  2000/10/23 19:58:22  dondosha
495  * Open and close AsnIo outside of call(s) to BXMLPrintOutput
496  *
497  * Revision 6.62  2000/10/17 19:37:41  shavirin
498  * Fixed compilation problems detected on Mac.
499  *
500  * Revision 6.61  2000/10/17 17:19:49  shavirin
501  * Temporary - for toolkit release - commented OOF shift penalty parameter.
502  *
503  * Revision 6.60  2000/10/06 17:54:28  shavirin
504  * Added usage of correct matrix in case of OOF alignment.
505  *
506  * Revision 6.59  2000/09/26 15:48:15  dondosha
507  * Put back printing of header before results of every search when multiple queries are submitted
508  *
509  * Revision 6.58  2000/09/13 22:26:23  dondosha
510  * Removed extra </PRE> that is now printed in PrintDefLinesFromSeqAlign
511  *
512  * Revision 6.57  2000/09/13 21:39:31  dondosha
513  * Corrected html output when input contains multiple queries
514  *
515  * Revision 6.56  2000/09/12 16:08:43  dondosha
516  * Create txalign style matrix from search matrix
517  *
518  * Revision 6.55  2000/09/12 16:02:13  madden
519  * do not allow -P with blastn, fix typo
520  *
521  * Revision 6.54  2000/09/07 20:25:59  madden
522  * Remove L option, turn off K (culling) by default, add -P option
523  *
524  * Revision 6.53  2000/09/07 16:27:07  shavirin
525  * Added option for OOF gap alignment for blastx.
526  *
527  * Revision 6.52  2000/08/24 14:13:23  shavirin
528  * Added return 1 if database do not exists on any path.
529  *
530  * Revision 6.51  2000/08/11 18:03:58  shavirin
531  * Added possibility to make blastx and tblastx with XML output.
532  *
533  * Revision 6.50  2000/08/11 17:54:08  shavirin
534  * Added possibility to print XML output (with -m 7 option)
535  *
536  * Revision 6.49  2000/08/01 16:35:34  madden
537  * Append Seq-annot, do not overwrite
538  *
539  * Revision 6.48  2000/06/27 15:25:18  madden
540  * Changed master-slave to query-anchored
541  *
542  * Revision 6.47  2000/06/13 19:38:46  shavirin
543  * Added ability to print XML Blast output.
544  *
545  * Revision 6.46  2000/06/05 19:31:31  madden
546  * Free query->lcase_mask between searches
547  *
548  * Revision 6.45  2000/05/26 19:28:44  shavirin
549  * Added adjustment of dropoff_1st_pass if dropoff_1st_pass > dropoff_2nd_pass
550  *
551  * Revision 6.44  2000/05/26 18:48:23  shavirin
552  * Added two new parameters; '-y' and '-Z'
553  *
554  * Revision 6.43  2000/05/09 15:57:26  shavirin
555  * Added call to the function ReadDBBioseqSetDbGeneticCode().
556  *
557  * Revision 6.42  2000/04/25 20:50:45  dondosha
558  * Removed unavailable option to use greedy algorithm
559  *
560  * Revision 6.41  2000/04/13 13:34:19  shavirin
561  * Added call to ObjMgrFreeCache() back after fixes in API.
562  *
563  * Revision 6.40  2000/04/04 18:29:13  shavirin
564  * Added some missing HTML tags.
565  *
566  * Revision 6.39  2000/03/31 19:13:33  dondosha
567  * Changed some names related to MegaBlast
568  *
569  * Revision 6.38  2000/03/24 21:49:30  madden
570  * Comment out ObjMgrFreeCache
571  *
572  * Revision 6.37  2000/03/02 21:06:09  shavirin
573  * Added -U option, that allows to consider low characters in FASTA files
574  * as filtered regions (for blastn, blastp and tblastn).
575  *
576  * Revision 6.36  2000/02/01 20:05:31  dondosha
577  * Added option -B: use greedy basic alignment search if set to T
578  *
579  * Revision 6.35  2000/01/28 16:46:54  madden
580  * Added function BlastGetMaskingLoc
581  *
582  * Revision 6.34  1999/12/17 20:48:53  egorov
583  * Fix 'gcc -Wall' warnings and remove old stuff.
584  *
585  * Revision 6.33  1999/10/12 19:35:26  madden
586  * Deallocate Mask information
587  *
588  * Revision 6.32  1999/08/26 14:58:06  madden
589  * Use float for db length
590  *
591  * Revision 6.31  1999/05/26 13:12:56  madden
592  * Initialized matrix to NULL
593  *
594  * Revision 6.30  1999/03/31 16:58:04  madden
595  * Removed static FindProt and FindNuc
596  *
597  * Revision 6.29  1999/02/10 21:12:26  madden
598  * Added HTML and GI list option, fixed filtering
599  *
600  * Revision 6.28  1999/01/22 17:24:51  madden
601  * added line breaks for alignment views
602  *
603  * Revision 6.27  1998/12/31 18:18:27  madden
604  * Added strand option
605  *
606  * Revision 6.26  1998/12/29 20:03:14  kans
607  * calls UseLocalAsnloadDataAndErrMsg at startup
608  *
609  * Revision 6.25  1998/11/19 14:04:34  madden
610  * Changed message level to SEV_WARNING
611  *
612  * Revision 6.24  1998/11/16 16:29:19  madden
613  * Added ErrSetMessageLevel(SEV_INFO)
614  *
615  * Revision 6.23  1998/07/17 15:41:36  madden
616  * Added effective search space flag
617  *
618  * Revision 6.22  1998/06/29 13:02:01  madden
619  * Deallocate matrix
620  *
621  * Revision 6.21  1998/06/10 13:33:14  madden
622  * Change -K from zero to 100
623  *
624  * Revision 6.20  1998/06/05 21:48:42  madden
625  * Added -K and -L options
626  *
627  * Revision 6.19  1998/05/18 18:01:04  madden
628  * Changed args to allow filter options to be changed
629  *
630  * Revision 6.18  1998/05/01 18:31:02  egorov
631  * Add new parametes to BLASTOptionSetGapParam()
632  *
633  * Revision 6.17  1998/04/30 14:32:32  madden
634  * init_buff_ex arg changed to 90 for reference
635  *
636  * Revision 6.16  1998/04/29 14:29:30  madden
637  * Made reference line longer
638  *
639  * Revision 6.15  1998/04/01 22:49:12  madden
640  * Print No hits found message
641  *
642  * Revision 6.14  1998/02/25 20:50:48  madden
643  * Added arg for db length
644  *
645  * Revision 6.13  1998/02/24 22:48:34  madden
646  * Removed options for culling
647  *
648  * Revision 6.12  1998/01/31 21:35:17  madden
649  * zeroed out values between searches
650  *
651  * Revision 6.11  1997/12/31 17:48:52  madden
652  * Added wordsize option
653  *
654  * Revision 6.10  1997/12/23 21:09:47  madden
655  * Added -K and -L for range-dependent blast
656  *
657  * Revision 6.9  1997/11/19 14:26:43  madden
658  * Removed extra break statement
659  *
660  * Revision 6.8  1997/11/18 22:24:22  madden
661  * Added call to BLASTOptionSetGapParams
662  *
663  * Revision 6.7  1997/10/27 22:26:52  madden
664  * Added call to ObjMgrFreeCache(0)
665  *
666  * Revision 6.6  1997/10/23 20:26:12  madden
667  * Use of init_buff_ex rather than init_buff
668  *
669  * Revision 6.5  1997/10/22 21:56:04  madden
670  * Added matrix option
671  *
672  * Revision 6.3  1997/10/07 21:33:38  madden
673  * Added BLUNT option
674  *
675  * Revision 6.2  1997/09/23 22:13:19  madden
676  * enabled descriptions and alignment options
677  *
678  * Revision 6.1  1997/09/16 16:34:32  madden
679  * Dbinfo printing changed for multiple db searches
680  *
681  * Revision 6.0  1997/08/25 18:19:14  madden
682  * Revision changed to 6.0
683  *
684  * Revision 1.16  1997/07/29 19:33:02  madden
685  * Added TXALIGN_SHOW_QS flag
686  *
687  * Revision 1.15  1997/07/28 17:01:23  madden
688  * Added include for simutil.h
689  *
690  * Revision 1.14  1997/07/28 14:31:09  madden
691  * Changes for masking alignments.
692  *
693  * Revision 1.13  1997/07/22 19:06:35  madden
694  * Option changes, Printing of verison info
695  *
696  * Revision 1.12  1997/07/18 20:09:22  madden
697  * Conversion from blast2 output to new output
698  *
699  * Revision 1.3  1997/02/24  22:08:38  madden
700  * Added reward and penalty for match and mismatch.
701  *
702  * Revision 1.2  1997/02/23  16:48:52  madden
703  * Call to AcknowledgeBlastQuery added.
704  *
705  * Revision 1.1  1997/02/19  21:44:28  madden
706  * Initial revision
707  *
708  *
709 */
710 
711 #include <string.h>
712 
713 #include <ncbi.h>
714 #include <objseq.h>
715 #include <objsset.h>
716 #include <sequtil.h>
717 #include <seqport.h>
718 #include <tofasta.h>
719 #include <blast.h>
720 #include <blastpri.h>
721 #include <simutil.h>
722 #include <txalign.h>
723 #include <gapxdrop.h>
724 #include <sqnutils.h>
725 #include <xmlblast.h>
726 #include <mblast.h>
727 #include <blfmtutl.h>
728 #include <algo/blast/composition_adjustment/composition_constants.h>
729 #ifdef BLAST_CS_API
730 #include <objblst3.h>
731 #include <netblap3.h>
732 #endif
733 #ifndef BLASTALL_TOOLS_ONLY
734 #include <algo/blast/core/blast_options.h>
735 #include <algo/blast/core/blast_setup.h>
736 #include <algo/blast/core/blast_message.h>
737 #include <algo/blast/core/blast_filter.h>
738 #include <algo/blast/core/blast_util.h>
739 #include <algo/blast/core/blast_engine.h>
740 #include <algo/blast/core/blast_stat.h>
741 #include <algo/blast/api/blast_seq.h>
742 #include <algo/blast/api/blast_input.h>
743 #include <algo/blast/api/blast_format.h>
744 #include <algo/blast/api/blast_seqalign.h>
745 #include <algo/blast/api/seqsrc_readdb.h>
746 #include <algo/blast/api/blast_tabular.h>
747 #include <algo/blast/api/blast_mtlock.h>
748 #include <algo/blast/api/blast_prelim.h>
749 #include <algo/blast/api/blast_api.h>
750 #include <algo/blast/api/repeats_filter.h>
751 #endif   /* BLASTALL_TOOLS_ONLY */
752 
753 #define DEFLINE_BUF 255
754 
755 
756 /* Used by the callback function. */
757 FILE *global_fp=NULL;
758 /*
759         Callback to print out ticks, in UNIX only due to file systems
760         portability issues.
761 */
762 
763 #ifdef BLAST_CS_API
764 static  Boolean LIBCALLBACK
765 tick_callback (BlastResponsePtr brp, Boolean PNTR cancel)
766 {
767     
768 #if 0
769     fprintf(global_fp, ".");
770     fflush(global_fp);
771 #endif
772 
773     return TRUE;    
774 }
775 
776 #else
777 static int LIBCALLBACK
778 tick_callback(Int4 sequence_number, Int4 number_of_positive_hits)
779 
780 {
781 #ifdef OS_UNIX
782     /* #ifndef BLAST_CS_API */
783     fprintf(global_fp, "%s", ".");
784     fflush(global_fp);
785     /* #endif */
786 #endif
787     return 0;
788 }
789 #endif
790 
791 static Int2
792 BlastGetMaskingLoc(FILE *infp, FILE *outfp, CharPtr instructions)
793 {
794         BioseqPtr bsp;
795         Char buffer[50];
796         SeqEntryPtr sep;
797         SeqLocPtr slp, slp_start, tmp_slp;
798 
799         if (infp == NULL || outfp == NULL || instructions == NULL)
800                 return 1;
801 
802         while ((sep=FastaToSeqEntryEx(infp, TRUE, NULL, TRUE)) != NULL) 
803         {
804                 bsp = NULL;
805                 SeqEntryExplore(sep, &bsp, FindNuc);
806 
807                 if (bsp == NULL)
808                 {
809                         ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
810                         return 2;
811                 }
812                 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, 50);
813                 fprintf(outfp, ">%s\n", buffer);
814                 slp_start = slp = BlastBioseqFilter(bsp, instructions);
815                 while (slp)
816                 {
817                         tmp_slp=NULL;
818                         while((tmp_slp = SeqLocFindNext(slp, tmp_slp))!=NULL)
819                         {
820                                 fprintf(outfp, "%ld %ld\n", (long) (1+SeqLocStart(tmp_slp)), (long) (1+SeqLocStop(tmp_slp)));
821                         }
822                         slp = slp->next;
823                 }
824 
825 /* used for debugging. */
826 #if 0
827 {{
828         BioseqPtr bsp_tmp;
829         ByteStorePtr byte_sp;
830         Int4 index;
831         SeqLocPtr tmp_slp_1, tmp_filter_slp;
832         SeqPortPtr spp;
833         Uint1Ptr tmp_query_seq, tmp_query_seq_start;
834         Uint1 residue;
835         FILE *tmp_fp;
836 
837                 spp = SeqPortNew(bsp, 0, -1, 0, Seq_code_iupacna);
838                 SeqPortSet_do_virtual(spp, TRUE);
839                 tmp_query_seq_start = (Uint1Ptr) MemNew(((BioseqGetLen(bsp))+2)*sizeof(Uint1));
840                 tmp_query_seq_start[0] = NULLB;
841                 tmp_query_seq = tmp_query_seq_start+1;
842                 index=0;
843                 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
844                 {
845 
846                         if (IS_residue(residue))
847                         {
848                                 tmp_query_seq[index] = residue;
849                                 index++;
850                         }
851                 }
852                 BlastMaskTheResidues(tmp_query_seq, BioseqGetLen(bsp), 78, slp_start, FALSE, 0);
853                 bsp_tmp = BioseqNew();
854                 bsp_tmp->length = BioseqGetLen(bsp);
855                 byte_sp = BSNew(1);
856                 BSWrite(byte_sp, tmp_query_seq, bsp->length);
857                 bsp_tmp->seq_data = byte_sp;
858                 bsp_tmp->repr = Seq_repr_raw;
859                 bsp_tmp->seq_data_type = Seq_code_iupacna;
860                 bsp_tmp->mol = 1;
861 
862                 bsp_tmp->id = bsp->id;
863                 bsp_tmp->descr = bsp->descr;
864 
865                 tmp_fp = FileOpen("masked.fsa", "w");
866                 BioseqRawToFastaExtra(bsp_tmp, tmp_fp, 50);
867 
868                 bsp_tmp->id = NULL;
869                 bsp_tmp->descr = NULL;
870 
871                 spp = SeqPortFree(spp);
872                 bsp_tmp = BioseqFree(bsp_tmp);
873                 tmp_query_seq_start = MemFree(tmp_query_seq_start);
874                 FileClose(tmp_fp);
875 
876                 tmp_filter_slp = slp_start;
877                 tmp_fp = FileOpen("locations.msk", "w");
878                 while (tmp_filter_slp)
879                 {
880                  tmp_slp_1=NULL;
881                  while((tmp_slp_1 = SeqLocFindNext(tmp_filter_slp, tmp_slp_1))!=NULL)
882                  {
883                         fprintf(tmp_fp, "%ld %ld\n", (long) (1+SeqLocStart(tmp_slp_1)), (long) (1+SeqLocStop(tmp_slp_1)));
884 
885                  }
886                         tmp_filter_slp = tmp_filter_slp->next;
887                 }
888 
889 
890                 FileClose(tmp_fp);
891 }}
892 #endif
893                 slp_start = SeqLocSetFree(slp_start);
894                 sep = SeqEntryFree(sep);
895         }
896 
897         return 0;
898 }
899 
900 /* Breaks up a location like "2000 3000" into two integers 
901    that are returned.
902 
903    If location is NULL then the integers are set to 0.
904 */
905 
906 /* FIXME: better name, move to API directory?? */
907 static Boolean
908 sGetLoc(char* location, Int4* start, Int4* end)
909 {
910         CharPtr delimiters = " ,;";
911 
912         if (start == NULL || end == NULL)
913            return FALSE;
914 
915         *start = 0;
916         *end = 0;
917 
918         if (location == NULL)
919            return TRUE;
920 
921         *start =  atoi(StringTokMT(location, delimiters, &location));
922         *end = atoi(location);
923 
924         return TRUE;
925 }
926 
927 typedef enum {
928 ARG_PROGRAM = 0,
929 ARG_DB,
930 ARG_QUERY,
931 ARG_EVALUE,
932 ARG_FORMAT,
933 ARG_OUT,
934 ARG_FILTER,
935 ARG_GAPOPEN,
936 ARG_GAPEXT,
937 ARG_XDROP,
938 ARG_SHOWGIS,
939 ARG_MISMATCH,
940 ARG_MATCH,
941 ARG_DESCRIPTIONS,
942 ARG_ALIGNMENTS,
943 ARG_THRESHOLD,
944 ARG_GAPPED,
945 ARG_QGENETIC_CODE,
946 ARG_DBGENCODE,
947 ARG_THREADS, 
948 ARG_ASNOUT,
949 ARG_BELIEVEQUERY,
950 ARG_MATRIX,
951 ARG_WORDSIZE,
952 ARG_DBSIZE,
953 ARG_BESTHITS,
954 ARG_MULTIPLEHITS,
955 ARG_SEARCHSP,
956 ARG_STRAND,
957 ARG_HTML,
958 #ifdef BLAST_CS_API
959 ARG_ENTREZQ,
960 #else
961 ARG_GILIST,
962 #endif
963 ARG_LCASE,
964 ARG_XDROP_UNGAPPED,
965 ARG_XDROP_FINAL,
966 #ifdef BLAST_CS_API
967 ARG_RPSBLAST,
968 #else
969 ARG_PSITCHKPNT,
970 #endif
971 ARG_USEMEGABLAST,
972 ARG_QUERYLOC,
973 ARG_WINDOW,
974 ARG_FRAMESHIFT,
975 ARG_INTRON,
976 #ifndef BLAST_CS_API
977 ARG_NUMQUERIES,
978 #ifndef BLASTALL_TOOLS_ONLY
979 ARG_FORCE_OLD,
980 #endif
981 #endif
982 ARG_COMP_BASED_STATS,
983 ARG_SMITH_WATERMAN,
984 #ifdef ALLOW_FULL_SMITH_WATERMAN
985 ARG_SMITH_WATERMAN_ALL
986 #endif
987 } BlastArguments;
988 
989 #define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
990 
991 static Args myargs[] = {
992     { "Program Name",           
993       NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL},    /* ARG_PROGRAM */
994     { "Database",               
995       "nr", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL},    /* ARG_DB */
996     { "Query File",            
997       "stdin", NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_QUERY */
998     { "Expectation value (E)",  
999       "10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL},    /* ARG_EVALUE */
1000     { "alignment view options:\n0 = pairwise,\n1 = query-anchored showing identities,\n2 = query-anchored no identities,\n3 = flat query-anchored, show identities,\n4 = flat query-anchored, no identities,\n5 = query-anchored no identities and blunt ends,\n6 = flat query-anchored, no identities and blunt ends,\n7 = XML Blast output,\n8 = tabular, \n9 tabular with comment lines\n10 ASN, text\n11 ASN, binary", /* 4 */
1001       "0", "0", "11", FALSE, 'm', ARG_INT, 0.0, 0, NULL},         /* ARG_FORMAT */
1002     { "BLAST report Output File", 
1003       "stdout", NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_OUT */
1004     { "Filter query sequence (DUST with blastn, SEG with others)", 
1005       "T", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL},       /* ARG_FILTER */
1006     { "Cost to open a gap (-1 invokes default behavior)", 
1007       "-1", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL},          /* ARG_GAPOPEN */
1008     { "Cost to extend a gap (-1 invokes default behavior)", 
1009       "-1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL},          /* ARG_GAPEXT */
1010     { "X dropoff value for gapped alignment (in bits) (zero invokes default "
1011       "behavior)\n      blastn 30, megablast 20, tblastx 0, all others 15", 
1012       "0", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL},          /* ARG_XDROP */
1013     { "Show GI's in deflines",  /* 10 */
1014       "F", NULL, NULL, FALSE, 'I', ARG_BOOLEAN, 0.0, 0, NULL},      /* ARG_SHOWGIS */
1015     { "Penalty for a nucleotide mismatch (blastn only)", 
1016       "-3", NULL, NULL, FALSE, 'q', ARG_INT, 0.0, 0, NULL},         /* ARG_MISMATCH */
1017     { "Reward for a nucleotide match (blastn only)", 
1018       "1", NULL, NULL, FALSE, 'r', ARG_INT, 0.0, 0, NULL},          /* ARG_MATCH */
1019     { "Number of database sequences to show one-line descriptions for (V)", 
1020       "500", NULL, NULL, FALSE, 'v', ARG_INT, 0.0, 0, NULL},         /*  ARG_DESCRIPTIONS */
1021     { "Number of database sequence to show alignments for (B)", 
1022       "250", NULL, NULL, FALSE, 'b', ARG_INT, 0.0, 0, NULL},        /* ARG_ALIGNMENTS */
1023     { "Threshold for extending hits, default if zero\n" 
1024       "      blastp 11, blastn 0, blastx 12, tblastn 13\n"
1025       "      tblastx 13, megablast 0",
1026       "0", NULL, NULL, FALSE, 'f', ARG_FLOAT, 0.0, 0, NULL},           /* ARG_THRESHOLD */
1027     { "Perform gapped alignment (not available with tblastx)", 
1028         "T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},     /* ARG_GAPPED */
1029     { "Query Genetic code to use", /* 17 */
1030       "1", NULL, NULL, FALSE, 'Q', ARG_INT, 0.0, 0, NULL},           /* ARG_QGENETIC_CODE */
1031     { "DB Genetic code (for tblast[nx] only)", /* 18 */
1032       "1", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL},           /* ARG_DBGENCODE */
1033     { "Number of processors to use", /* 19 */
1034       "1", NULL, NULL, FALSE, 'a', ARG_INT, 0.0, 0, NULL},           /* ARG_THREADS */
1035     { "SeqAlign file",          /* 20 */
1036       NULL, NULL, NULL, TRUE, 'O', ARG_FILE_OUT, 0.0, 0, NULL},      /* ARG_ASNOUT */
1037     { "Believe the query defline", /* 21 */
1038       "F", NULL, NULL, FALSE, 'J', ARG_BOOLEAN, 0.0, 0, NULL},        /* ARG_BELIEVEQUERY */
1039     { "Matrix",                 /* 22 */
1040       "BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL},  /* ARG_MATRIX */
1041     { "Word size, default if zero (blastn 11, megablast 28, "
1042         "all others 3)", /* 23 */
1043       "0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL},            /* ARG_WORDSIZE */
1044     { "Effective length of the database (use zero for the real size)", 
1045       "0", NULL, NULL, FALSE, 'z', ARG_FLOAT, 0.0, 0, NULL},          /* ARG_DBSIZE */
1046     { "Number of best hits from a region to keep. Off by default.\nIf used a value of 100 is recommended.  Very high values of -v or -b is also suggested", 
1047       "0", NULL, NULL, FALSE, 'K', ARG_INT, 0.0, 0, NULL},            /* ARG_BESTHITS */
1048     { "0 for multiple hit, 1 for single hit (does not apply to blastn)",
1049        "0",  NULL, NULL, FALSE, 'P', ARG_INT, 0.0, 0, NULL},           /* ARG_MULTIPLEHITS */
1050     { "Effective length of the search space (use zero for the real size)", 
1051       "0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL},           /* ARG_SEARCHSP */
1052     { "Query strands to search against database (for blast[nx], and tblastx)\n"
1053       "       3 is both, 1 is top, 2 is bottom", 
1054       "3", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL},             /* ARG_STRAND */
1055     { "Produce HTML output",    /* 29 */
1056       "F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},         /* ARG_HTML */
1057 #ifdef BLAST_CS_API
1058     { "Restrict search of database to results of Entrez2 lookup", 
1059       NULL, NULL, NULL, TRUE, 'u', ARG_STRING, 0.0, 0, NULL},          /* ARG_ENTREZQ */
1060 #else
1061     { "Restrict search of database to list of GI's",             
1062       NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0, 0, NULL},          /* ARG_GILIST */
1063 #endif
1064     {"Use lower case filtering of FASTA sequence", 
1065      NULL, NULL, NULL, TRUE, 'U', ARG_BOOLEAN, 0.0, 0, NULL},          /* ARG_LCASE */
1066     { "X dropoff value for ungapped extensions in bits (0.0 invokes default "
1067       "behavior)\n      blastn 20, megablast 10, all others 7", 
1068       "0.0", NULL, NULL, FALSE, 'y', ARG_FLOAT, 0.0, 0, NULL},         /* ARG_XDROP_UNGAPPED */       
1069     { "X dropoff value for final gapped alignment in bits " 
1070       "(0.0 invokes default behavior)\n"
1071       "      blastn/megablast 100, tblastx 0, all others 25",
1072       "0", NULL, NULL, FALSE, 'Z', ARG_INT, 0.0, 0, NULL},             /* ARG_XDROP_FINAL */
1073 #ifdef BLAST_CS_API
1074     { "RPS Blast search",            /* 34 */
1075       "F", NULL, NULL, FALSE, 'R', ARG_BOOLEAN, 0.0, 0, NULL},          /* ARG_RPSBLAST */
1076 #else
1077     { "PSI-TBLASTN checkpoint file", /* 34 */
1078       NULL, NULL, NULL, TRUE, 'R', ARG_FILE_IN, 0.0, 0, NULL},         /* ARG_PSITCHKPNT */
1079 #endif
1080     { "MegaBlast search",       /* 35 */
1081       "F", NULL, NULL, FALSE, 'n', ARG_BOOLEAN, 0.0, 0, NULL},         /* ARG_USEMEGABLAST */
1082     { "Location on query sequence",/* 36 */
1083       NULL, NULL, NULL, TRUE, 'L', ARG_STRING, 0.0, 0, NULL},          /* ARG_QUERYLOC */
1084     { "Multiple Hits window size, default if zero (blastn/megablast 0, "
1085         "all others 40", /* 37 */
1086       "0", NULL, NULL, FALSE, 'A', ARG_INT, 0.0, 0, NULL},             /* ARG_WINDOW */
1087     { "Frame shift penalty (OOF algorithm for blastx)", 
1088       "0", NULL, NULL, FALSE, 'w', ARG_INT, 0.0, 0, NULL},             /* ARG_FRAMESHIFT */
1089     { "Length of the largest intron allowed in a translated nucleotide "
1090       "sequence when "
1091       "linking multiple distinct alignments. (0 invokes default behavior; a "
1092       "negative value disables linking.)", 
1093       "0", NULL, NULL, FALSE, 't', ARG_INT, 0.0, 0, NULL},             /* ARG_INTRON */
1094 /*--KM
1095    seems ok to add another param b/c NUMARG is defined based on 
1096     sizeof(myargs) itself
1097    made optional=TRUE but this may change?
1098 */
1099 #ifndef BLAST_CS_API
1100     { "Number of concatenated queries, for blastn and tblastn", 
1101       "0", NULL, NULL, TRUE, 'B', ARG_INT, 0.0, 0, NULL},               /* ARG_NUMQUERIES */
1102 #ifndef BLASTALL_TOOLS_ONLY
1103     { "Force use of the legacy BLAST engine", 
1104       "F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL},              /* ARG_FORCE_OLD */
1105 #endif  /* BLASTALL_TOOLS_ONLY */
1106 #endif
1107     { "Use composition-based score adjustments for blastp or tblastn:\n"                /* ARG_COMP_BASED_STATS */
1108       "      As first character:\n"
1109       "      D or d: default (equivalent to T)\n"
1110       "      0 or F or f: no composition-based statistics\n"
1111       "      2 or T or t: Composition-based score adjustments as in "
1112       "Bioinformatics 21:902-911,\n"
1113       "      1: Composition-based statistics as in "
1114       "NAR 29:2994-3005, 2001\n"
1115       "          2005, conditioned on sequence properties\n"
1116       "      3: Composition-based score adjustment as in "
1117       "Bioinformatics 21:902-911,\n"
1118       "          2005, unconditionally\n"
1119       "      For programs other than tblastn, must either be absent "
1120       "or be D, F or 0.\n     "
1121       "      As second character, if first character is "
1122       "equivalent to 1, 2, or 3:\n"
1123       "      U or u: unified p-value combining alignment p-value "
1124       "and compositional p-value in round 1 only\n",
1125       "D", NULL, NULL, FALSE, 'C', ARG_STRING, 0.0, 0, NULL},
1126     { "Compute locally optimal Smith-Waterman alignments "
1127         "(This option is only\n"
1128       "      available for gapped tblastn.)",                          /* ARG_SMITH_WATERMAN */
1129       "F", NULL, NULL, FALSE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
1130 #ifdef ALLOW_FULL_SMITH_WATERMAN
1131     { "Compute only Smith-Waterman alignments (new engine only)",
1132       "F", NULL, NULL, FALSE, 'h', ARG_BOOLEAN, 0.0, 0, NULL},         /* ARG_SMITH_WATERMAN_ALL */
1133 #endif
1134 };
1135 
1136 
1137 #ifdef BLAST_CS_API
1138 static BlastNet3Hptr BNETInitializeBlast(CharPtr database, CharPtr program, 
1139                                   FILE *outfp, Boolean db_is_na,
1140                                   Boolean is_rps_blast, Boolean html, Boolean header)
1141 {
1142     BlastNet3Hptr    bl3hp;
1143     BlastResponsePtr response = NULL;
1144     BlastVersionPtr     blast_version;
1145 
1146     if (! BlastInit("blastcl3", &bl3hp, &response)) {
1147         ErrPostEx(SEV_FATAL, 1, 0, "Unable to initialize BLAST service");
1148         return NULL;
1149     }
1150     if (response && response->choice == BlastResponse_init) {
1151         blast_version = response->data.ptrvalue;
1152     } else {
1153         ErrPostEx(SEV_FATAL, 1, 0, "Unable to connect to the BLAST service");
1154         return NULL;
1155     }
1156     
1157     BlastNetBioseqFetchEnable(bl3hp, database, db_is_na, TRUE);
1158     
1159     if(is_rps_blast == TRUE && header)
1160     {
1161         BlastPrintVersionInfoEx("RPS-BLAST", html, blast_version->version, 
1162                                 blast_version->date, outfp);
1163     }
1164     else if (header) 
1165     {
1166         init_buff_ex(90);
1167         BlastPrintVersionInfoEx(program, html, blast_version->version, 
1168                                 blast_version->date, outfp);
1169         fprintf(outfp, "\n");
1170         BlastPrintReference(html, 80, outfp);
1171         free_buff();
1172     }
1173 
1174     BlastResponseFree(response);
1175 
1176     return bl3hp;
1177 }
1178 #endif
1179 
1180 /* Needed for Mega BLAST only */
1181 #define MAX_NUM_QUERIES 16383 /* == 1/2 INT2_MAX */
1182 
1183 #ifndef BLASTALL_TOOLS_ONLY
1184 
1185 /** Fills all the options structures with user defined values. Uses the 
1186  * myargs global structure obtained from GetArgs.
1187  * @param lookup_options Lookup table options [in]
1188  * @param query_setup_options Query options [in]
1189  * @param word_options Initial word processing options [in]
1190  * @param ext_options Extension options [in]
1191  * @param hit_options Hit saving options [out]
1192  * @param score_options Scoring options [out]
1193  * @param eff_len_options Effective length options [out]
1194  * @param psi_options Protein BLAST options [out]
1195  * @param db_options BLAST database options [out]
1196  */
1197 static Int2 
1198 s_FillOptions(SBlastOptions* options)
1199 {
1200    LookupTableOptions* lookup_options = options->lookup_options;
1201    QuerySetUpOptions* query_setup_options = options->query_options; 
1202    BlastInitialWordOptions* word_options = options->word_options;
1203    BlastExtensionOptions* ext_options = options->ext_options;
1204    BlastHitSavingOptions* hit_options = options->hit_options ;
1205    BlastScoringOptions* score_options = options->score_options;
1206    BlastEffectiveLengthsOptions* eff_len_options = options->eff_len_options;
1207 
1208    Boolean mb_lookup = FALSE;
1209    Boolean greedy = FALSE;
1210    Boolean is_gapped = FALSE;
1211    EBlastProgramType program_number = options->program;
1212 
1213    if (myargs[ARG_USEMEGABLAST].intvalue != 0)
1214    {
1215        greedy = TRUE;
1216        mb_lookup = TRUE;
1217    }
1218 
1219    BLAST_FillLookupTableOptions(lookup_options, program_number, mb_lookup,
1220       myargs[ARG_THRESHOLD].floatvalue, (Int2)myargs[ARG_WORDSIZE].intvalue);
1221 
1222    BLAST_FillQuerySetUpOptions(query_setup_options, program_number, 
1223       myargs[ARG_FILTER].strvalue, (Uint1)myargs[ARG_STRAND].intvalue);
1224 
1225    if (myargs[ARG_QGENETIC_CODE].intvalue &&
1226        (program_number == eBlastTypeBlastx || 
1227         program_number == eBlastTypeTblastx))
1228       query_setup_options->genetic_code = myargs[ARG_QGENETIC_CODE].intvalue;
1229 
1230    BLAST_FillInitialWordOptions(word_options, program_number, 
1231                     myargs[ARG_WINDOW].intvalue, 
1232                     myargs[ARG_XDROP_UNGAPPED].intvalue);
1233 
1234    BLAST_FillExtensionOptions(ext_options, program_number, greedy, 
1235       myargs[ARG_XDROP].intvalue, myargs[ARG_XDROP_FINAL].intvalue);
1236 
1237    /* if both gap_open and gap_extend are zero then they are set to suggested values */
1238    SBlastOptionsSetMatrixAndGapCosts(options, myargs[ARG_MATRIX].strvalue,
1239         myargs[ARG_GAPOPEN].intvalue, myargs[ARG_GAPEXT].intvalue);
1240 
1241    SBlastOptionsSetRewardPenaltyAndGapCosts(options,
1242         myargs[ARG_MATCH].intvalue,
1243         myargs[ARG_MISMATCH].intvalue,
1244         myargs[ARG_GAPOPEN].intvalue,
1245         myargs[ARG_GAPEXT].intvalue,
1246         FALSE);
1247 
1248    if (myargs[ARG_MULTIPLEHITS].intvalue == 1 ||
1249        myargs[ARG_WINDOW].intvalue < 0)
1250        word_options->window_size = 0;
1251    else
1252        SBlastOptionsSetWindowSize(options, myargs[ARG_WINDOW].intvalue);
1253 
1254    SBlastOptionsSetThreshold(options, myargs[ARG_THRESHOLD].floatvalue);
1255 
1256    if (program_number != eBlastTypeTblastx)
1257       is_gapped = myargs[ARG_GAPPED].intvalue;
1258    else
1259       is_gapped = FALSE;
1260 
1261    score_options->gapped_calculation = is_gapped;
1262    if (myargs[ARG_FRAMESHIFT].intvalue) {
1263       score_options->shift_pen = myargs[ARG_FRAMESHIFT].intvalue;
1264       score_options->is_ooframe = TRUE;
1265    }
1266 
1267    BLAST_FillHitSavingOptions(hit_options, 
1268       myargs[ARG_EVALUE].floatvalue, 
1269       MAX(myargs[ARG_DESCRIPTIONS].intvalue, 
1270           myargs[ARG_ALIGNMENTS].intvalue),
1271           is_gapped, 
1272       myargs[ARG_BESTHITS].intvalue,  /* culling limit */
1273       0);               /* min diag separation */
1274  
1275    hit_options->longest_intron = MIN(myargs[ARG_INTRON].intvalue, MAX_INTRON_LENGTH);
1276 
1277    if (myargs[ARG_SEARCHSP].floatvalue != 0 ||
1278        myargs[ARG_DBSIZE].floatvalue != 0) {
1279       Int8 searchsp = (Int8)myargs[ARG_SEARCHSP].floatvalue; 
1280       Int8 dbsize = (Int8)myargs[ARG_DBSIZE].floatvalue; 
1281       BLAST_FillEffectiveLengthsOptions(eff_len_options, 0, dbsize, &searchsp, 1);
1282    }
1283 
1284    if (program_number == eBlastTypeTblastn ||
1285        program_number == eBlastTypeRpsTblastn ||
1286        program_number == eBlastTypeTblastx) {
1287        SBlastOptionsSetDbGeneticCode(options, myargs[ARG_DBGENCODE].intvalue);
1288    }
1289    if ((program_number == eBlastTypeTblastn ||
1290         program_number == eBlastTypeBlastp) && is_gapped) {
1291        /* Set options specific to gapped tblastn  and blastp */
1292        switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
1293        case '0': case 'F': case 'f':
1294            ext_options->compositionBasedStats = eNoCompositionBasedStats;
1295            break;
1296        case '1':
1297            ext_options->compositionBasedStats = eCompositionBasedStats;
1298            break;
1299        case 'D': case 'd':
1300        case '2': case 'T': case 't':
1301            ext_options->compositionBasedStats = eCompositionMatrixAdjust;
1302            break;
1303        case '3':
1304            ErrPostEx(SEV_WARNING, 1, 0, "the -C 3 argument "
1305                      "is currently experimental\n");
1306            ext_options->compositionBasedStats = eCompoForceFullMatrixAdjust;
1307            break;
1308        default:
1309            ErrPostEx(SEV_FATAL, 1, 0, "invalid argument for composition-"
1310                      "based statistics; see -C options\n");
1311            break;
1312        }
1313        if (ext_options->compositionBasedStats > eNoCompositionBasedStats) {
1314            switch (myargs[ARG_COMP_BASED_STATS].strvalue[1]) {
1315            case 'U':
1316            case 'u':
1317                if (program_number == eBlastTypeBlastp) {
1318                    ext_options->unifiedP = 1;
1319                    ErrPostEx(SEV_WARNING, 1, 0, "unified p-values "
1320                              "are currently experimental\n");
1321                } else {
1322                    ErrPostEx(SEV_FATAL, 1, 0, "unified p-values "
1323                              "are currently only available for blastp\n");
1324                }
1325                break;
1326            case '\0':
1327                break;
1328            default:
1329                ErrPostEx(SEV_WARNING, 1, 0, "unrecognized second character"
1330                          "in value of -t, ignoring it\n");
1331                break;
1332            }
1333        }
1334        if (myargs[ARG_SMITH_WATERMAN].intvalue) {
1335            ext_options->eTbackExt = eSmithWatermanTbck;
1336        }
1337    } else {
1338        /* Make sure tblastn and blastp parameters were not set for
1339         * other programs */
1340        
1341        switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
1342        case '0': case 'D': case 'd': case 'F': case 'f':
1343            break;
1344        default:
1345            ErrPostEx(SEV_FATAL, 1, 0,
1346                      "Invalid option -C: only gapped blastp or gapped tblastn "
1347                      "may use composition based statistics.");
1348            break;
1349        }
1350        if(myargs[ARG_SMITH_WATERMAN].intvalue) {
1351            ErrPostEx(SEV_FATAL, 1, 0,
1352                      "Invalid option -s: Smith-Waterman alignments are only "
1353                      "available for gapped blastp and gapped tblastn.");
1354        }
1355    }
1356 
1357 #ifdef ALLOW_FULL_SMITH_WATERMAN
1358    if (myargs[ARG_SMITH_WATERMAN_ALL].intvalue) {
1359        ext_options->ePrelimGapExt = eSmithWatermanScoreOnly;
1360        ext_options->eTbackExt = eSmithWatermanTbckFull;
1361        ext_options->compositionBasedStats = eNoCompositionBasedStats;
1362    }
1363 #endif
1364 
1365    if (lookup_options->lut_type == eCompressedAaLookupTable) {
1366        if (lookup_options->threshold < 16) {
1367            ErrPostEx(SEV_WARNING, 1, 0,
1368                      "Threshold is probably too small for protein "
1369                      "searches with a compressed alphabet");
1370        }
1371        if (word_options->window_size > 0) {
1372            ErrPostEx(SEV_WARNING, 1, 0,
1373                      "Multiple hits may not work with compressed alphabets");
1374        }
1375    }
1376    return 0;
1377 }
1378 
1379 #ifndef TX_MATRIX_SIZE
1380 #define TX_MATRIX_SIZE 128
1381 #endif
1382 
1383 Int4** LIBCALL BlastMatrixConvert(Int4** old)
1384 {
1385    Int4 i, j, index1, index2;
1386    Int4** new;
1387    SeqMapTablePtr smtp;
1388    SeqCodeTablePtr sctp;
1389 
1390    if (!old)
1391       return NULL;
1392 
1393    sctp = SeqCodeTableFindObj(Seq_code_ncbistdaa);
1394    smtp = SeqMapTableFind(Seq_code_ncbieaa, Seq_code_ncbistdaa);
1395 
1396    new = malloc(TX_MATRIX_SIZE*sizeof(Int4Ptr));
1397 
1398    for (i=0; i<TX_MATRIX_SIZE; i++) {
1399       new[i] = malloc(TX_MATRIX_SIZE*sizeof(Int4));
1400       for (j=0; j<TX_MATRIX_SIZE; j++)
1401          new[i][j] = BLAST_SCORE_MIN;
1402    }
1403 
1404    for (i=sctp->start_at; i < sctp->start_at + sctp->num; i++) {
1405       for (j=sctp->start_at; j < sctp->start_at + sctp->num; j++) {
1406          index1 = SeqMapTableConvert(smtp, i);
1407          index2 = SeqMapTableConvert(smtp, j);
1408          new[index1][index2] = old[i][j];
1409       }
1410    }
1411 
1412    return new;
1413 }
1414 
1415 Int2 Main_new (void)
1416 
1417 {
1418    Boolean query_is_na;
1419    Boolean db_is_na;
1420    Boolean believe_query = FALSE;
1421    EBlastProgramType program_number;
1422    Int2 status = 0;
1423    Int4 start=0, end=0;   /* start and end of sequence to be searched as specified by ARG_QUERYLOC */
1424    FILE *infp=NULL, *outfp=NULL;
1425    SBlastOptions* options = NULL;
1426    BlastFormattingInfo* format_info = NULL;
1427    BlastFormattingInfo* asn_format_info = NULL;  /* For ASN.1 output. */  /* For ASN.1 output. */
1428    Int4 ctr = 1;
1429    Boolean tabular_output = FALSE;
1430    Blast_SummaryReturn* sum_returns = Blast_SummaryReturnNew();
1431    Blast_SummaryReturn* full_sum_returns = NULL;
1432    char* blast_program = myargs[ARG_PROGRAM].strvalue;
1433    char* dbname = myargs[ARG_DB].strvalue;
1434    Int4 maxquery = 0; /* maximum number of bases/residues to concatenate per
1435                          database pass */
1436    /* A file that contains a PSI-BLAST "checkpoint", the frequency
1437       ratios computed from a prior run of PSI-BLAST.  These
1438       frequencies may be used to compute a PSSM for PSI-BLAST or
1439       PSI-TBLASTN. The name of the file is specified by the -R
1440       option; the FILE * is NULL if no file is specified. */
1441    Blast_PsiCheckpointLoc * psi_checkpoint = NULL;
1442    char* max_query_string = NULL;
1443 
1444    GeneticCodeSingletonInit();
1445 
1446    status = SBlastOptionsNew(blast_program, &options, sum_returns);
1447 
1448    if (status) {
1449        if (sum_returns->error) {
1450            SBlastMessageErrPost(sum_returns->error);
1451            sum_returns = Blast_SummaryReturnFree(sum_returns);
1452        }
1453        return -1;
1454    }
1455 
1456    s_FillOptions(options);
1457    program_number = options->program;
1458 
1459    switch(program_number) {
1460        case eBlastTypeBlastn:
1461            maxquery = 40000;
1462            if (myargs[ARG_USEMEGABLAST].intvalue)
1463                maxquery = 5000000;
1464            break;
1465        case eBlastTypeTblastn:
1466        case eBlastTypePsiTblastn:
1467            maxquery = 20000;
1468            break;
1469        case eBlastTypeBlastp:
1470            maxquery = 10000;
1471            if (options->lookup_options->lut_type == 
1472                            eCompressedAaLookupTable) {
1473                maxquery = 20000;
1474            }
1475            break;
1476        case eBlastTypeBlastx:
1477        case eBlastTypeTblastx:
1478        default:
1479            maxquery = 10000;
1480    }
1481 
1482    max_query_string = getenv("BLAST_MAXQUERY_SIZE");
1483    if (max_query_string)
1484         sscanf (max_query_string, "%ld", &maxquery);
1485 
1486    BlastGetTypes(myargs[ARG_PROGRAM].strvalue, &query_is_na, &db_is_na);
1487 
1488    if (myargs[ARG_BELIEVEQUERY].intvalue != 0)
1489         believe_query = TRUE;
1490 
1491    SBlastOptionsSetBelieveQuery(options, believe_query);
1492 
1493    if (myargs[ARG_FORMAT].intvalue == 8 && myargs[ARG_USEMEGABLAST].intvalue)
1494         tabular_output = TRUE;
1495 
1496    if (!tabular_output) {
1497        Int2 finfo_status = BlastFormattingInfoNew(myargs[ARG_FORMAT].intvalue, options,
1498                               blast_program, dbname,
1499                               myargs[ARG_OUT].strvalue, &format_info);
1500        if (finfo_status != 0)
1501        {
1502            ErrPostEx(SEV_FATAL, 1, 0, "BlastFormattingInfoNew returned non-zero status");
1503        }
1504 
1505        /* Pass TRUE for the "is megablast" argument. Since megablast is always
1506           gapped, pass FALSE for the "is ungapped" argument. */
1507        BlastFormattingInfoSetUpOptions(format_info,
1508                                        myargs[ARG_DESCRIPTIONS].intvalue,
1509                                        myargs[ARG_ALIGNMENTS].intvalue,
1510                                        (Boolean) myargs[ARG_HTML].intvalue,
1511                                        FALSE,
1512                                        (Boolean) myargs[ARG_SHOWGIS].intvalue,
1513                                        believe_query);
1514        format_info->head_on_every_query = TRUE;
1515    }
1516    else
1517    { /* tabular output requires raw FILE*. */
1518        if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
1519             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", 
1520                 myargs[ARG_OUT].strvalue);
1521             return (1);
1522        }
1523        believe_query = TRUE;
1524        /* FetchEnable/Disable called in blast_format.c for non-tabular output. */
1525        ReadDBBioseqFetchEnable ("blastall", myargs[ARG_DB].strvalue, db_is_na, TRUE);
1526    }
1527 
1528    if (myargs[ARG_ASNOUT].strvalue) {
1529         /* This just prints out the ASN.1 to a secondary file. */
1530         BlastFormattingInfoNew(eAlignViewAsnText, options,
1531               blast_program, dbname, myargs[ARG_ASNOUT].strvalue, &asn_format_info);
1532         BlastFormattingInfoSetUpOptions(asn_format_info,
1533               myargs[ARG_DESCRIPTIONS].intvalue,
1534               myargs[ARG_ALIGNMENTS].intvalue,
1535               FALSE,
1536               FALSE,
1537               (Boolean) myargs[ARG_SHOWGIS].intvalue,
1538               believe_query);
1539    }
1540 
1541 
1542    if ((infp = FileOpen(myargs[ARG_QUERY].strvalue, "r")) == NULL) {
1543       ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", 
1544                 myargs[ARG_QUERY].strvalue);
1545       return (1);
1546    }
1547 
1548    sGetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end);
1549 
1550 
1551    /* Get the query (queries), loop if necessary. */
1552    while (1) {
1553       SBlastSeqalignArray* seqalign_arr=NULL;
1554       BlastTabularFormatData* tf_data = NULL;
1555       SeqLoc* lcase_mask = NULL;
1556       SeqLoc* repeat_mask = NULL; /* Repeat mask locations */
1557       SeqLoc* query_slp = NULL;
1558       SeqLoc* filter_loc=NULL;  /* All masking locations */
1559       Int4 num_queries; /* Number of queries read this time. */
1560       Int4  letters_read;  /* number of letters (bases/residues) read. */
1561 
1562       if ((Boolean)myargs[ARG_LCASE].intvalue) {
1563          letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na, 
1564                    myargs[ARG_STRAND].intvalue, maxquery, start, end,
1565                    &lcase_mask, &query_slp, &ctr, &num_queries, believe_query,
1566                    myargs[ARG_QGENETIC_CODE].intvalue);
1567       } else {
1568          letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na,
1569                    myargs[ARG_STRAND].intvalue, maxquery, start, end, 
1570                    NULL, &query_slp, &ctr, &num_queries, believe_query,
1571                    myargs[ARG_QGENETIC_CODE].intvalue);
1572       }
1573 
1574       if (letters_read == 0)
1575           break;
1576 
1577       if (letters_read < 0)
1578       {
1579            ErrPostEx(SEV_FATAL, 1, 0, "BLAST_GetQuerySeqLoc returned an error\n");
1580            return -1;
1581       }
1582 
1583       if (believe_query && BlastSeqlocsHaveDuplicateIDs(query_slp)) {
1584          ErrPostEx(SEV_FATAL, 1, 0, 
1585                  "Duplicate IDs detected; please ensure that "
1586                  "all query sequence identifiers are unique");
1587       }
1588 #ifndef BLAST_CS_API
1589       /* Now, if this is PSI-TBLASTN (and eventually PSI-BLAST) look for
1590          a restart */
1591       if (program_number == eBlastTypePsiTblastn &&
1592           !myargs[ARG_PSITCHKPNT].strvalue) {
1593           ErrPostEx(SEV_FATAL, 1, 0,
1594                     "PSI-TBLASTN requires that a checkpoint file be "
1595                     "specified (use the -R option).");
1596       } else if (program_number == eBlastTypePsiTblastn &&
1597                  myargs[ARG_PSITCHKPNT].strvalue) {
1598           EPsiCheckpointType checkpoint_type;
1599           char * checkpoint_file_extension =
1600               strrchr(myargs[ARG_PSITCHKPNT].strvalue, '.');
1601           if (NULL == checkpoint_file_extension) {
1602               /* No extension */
1603               checkpoint_file_extension = "";
1604           }
1605           if (0 == strcasecmp(checkpoint_file_extension, ".asn")) {
1606               checkpoint_type = eAsnBinaryCheckpoint;
1607           } else if (0 == strcasecmp(checkpoint_file_extension, ".asnt")) {
1608               checkpoint_type = eAsnTextCheckpoint;
1609           } else if (0 == strcasecmp(checkpoint_file_extension, ".chk")) {
1610               checkpoint_type = eStandardCheckpoint;
1611           } else {
1612               checkpoint_type = eStandardCheckpoint;
1613               ErrPostEx(SEV_WARNING, 1, 0, "The name of the PSI-BLAST "
1614                         "checkpoint file does not end with .chk, .asnt, or "
1615                         ".txt.  Trying to read the file using standard "
1616                         "PSI-BLAST format.");
1617           }
1618           psi_checkpoint =
1619               Blast_PsiCheckpointLocNew(checkpoint_type,
1620                                         myargs[ARG_PSITCHKPNT].strvalue);
1621           if (!psi_checkpoint) {
1622               ErrPostEx(SEV_FATAL, 1, 0,
1623                         "Cannot open the checkpoint file %s for reading.",
1624                         myargs[ARG_PSITCHKPNT].strvalue);
1625           }
1626       }
1627 #endif
1628       if (tabular_output) {
1629            EBlastTabularFormatOptions tab_option = eBlastTabularDefault;
1630            if (tabular_output == 2) {
1631                if (program_number == eBlastTypeBlastn) {
1632                    tab_option = eBlastTabularAddSequences;
1633                } else {
1634                    fprintf(stderr, 
1635                            "WARNING: Sequences printout in tabular output"
1636                            " allowed only for blastn\n");
1637                }
1638            } 
1639            
1640            /* Print the header of tabular output. */
1641            PrintTabularOutputHeader(dbname, NULL, query_slp, 
1642                                     blast_program, 0, FALSE, outfp);
1643            
1644            tf_data = BlastTabularFormatDataNew(outfp, NULL, query_slp, 
1645                                                tab_option, believe_query);
1646            tf_data->show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
1647            tf_data->show_accession = TRUE;
1648       }
1649 
1650       options->num_cpus = myargs[ARG_THREADS].intvalue;
1651 
1652       /* Find repeat mask, if necessary */
1653       if ((status = Blast_FindRepeatFilterSeqLoc(query_slp, myargs[ARG_FILTER].strvalue,
1654                                 &repeat_mask, &sum_returns->error)) != 0)
1655       {
1656             if (sum_returns && sum_returns->error)
1657             {
1658                    ErrSev max_sev = SBlastMessageErrPost(sum_returns->error);
1659                    if (max_sev >= SEV_ERROR)
1660                          return status;
1661             }
1662       }
1663 
1664       /* Combine repeat mask with lower case mask */
1665       if (repeat_mask)
1666           lcase_mask = ValNodeLink(&lcase_mask, repeat_mask);
1667 
1668       status = Blast_DatabaseSearch(query_slp, psi_checkpoint,
1669                                     dbname, lcase_mask, options,
1670                                     tf_data, &seqalign_arr,
1671                                     &filter_loc, sum_returns);
1672       if (status != 0) {
1673             /* Jump out if fatal error or unknown reason for exit. */
1674             if (sum_returns && sum_returns->error)
1675             {
1676                 ErrSev max_severity = SBlastMessageErrPost(sum_returns->error);
1677                 if (max_severity >= SEV_ERROR)
1678                    return status;
1679             }
1680             else if (!sum_returns || !sum_returns->error)
1681             {
1682                    ErrPostEx(SEV_ERROR, 1, 0, "Non-zero return from Blast_DatabaseSearch\n");
1683                    return status;
1684             }
1685       }
1686 
1687        /* Deallocate the formatting thread data structure. */
1688        if (tabular_output)
1689            BlastTabularFormatDataFree(tf_data);
1690 
1691        /* Free the lower case mask in SeqLoc form. */
1692        lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);
1693 
1694       /* If masking was done for lookup table only, free the masking locations,
1695           because they will not be used for formatting. */
1696        if (SBlastOptionsGetMaskAtHash(options))
1697            filter_loc = Blast_ValNodeMaskListFree(filter_loc);
1698 
1699        /* Post warning or error messages, no matter what the search status was. */
1700        SBlastMessageErrPost(sum_returns->error);
1701 
1702        if (!status && !tabular_output) {
1703 /*   FIXME:
1704            Int4** ascii_matrix = BlastMatrixConvert(sbp->matrix);
1705 */
1706            if (myargs[ARG_ASNOUT].strvalue) {
1707                    /* This just prints out the ASN.1 to a secondary file. */
1708                    status = 
1709                        BLAST_FormatResults(seqalign_arr, num_queries, query_slp, 
1710                                    NULL, asn_format_info, sum_returns);
1711            }
1712            
1713            /* Format the results */
1714            status = 
1715                BLAST_FormatResults(seqalign_arr, num_queries, query_slp, 
1716                                    filter_loc, format_info, sum_returns);
1717        }
1718 
1719        seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
1720        /* Update the cumulative summary returns structure and clean the returns
1721           substructures for the current search iteration. */
1722        Blast_SummaryReturnUpdate(sum_returns, &full_sum_returns);
1723        Blast_SummaryReturnClean(sum_returns);
1724        filter_loc = Blast_ValNodeMaskListFree(filter_loc);
1725        FreeSeqLocSetComponents (query_slp);
1726        query_slp = SeqLocSetFree(query_slp);
1727        if (psi_checkpoint)
1728            Blast_PsiCheckpointLocFree(&psi_checkpoint);
1729    } /* End loop on sets of queries */
1730 
1731    Blast_PrintOutputFooter(format_info, full_sum_returns);
1732 
1733    sum_returns = Blast_SummaryReturnFree(sum_returns);
1734    full_sum_returns = Blast_SummaryReturnFree(full_sum_returns);
1735    GeneticCodeSingletonFini();
1736 
1737    if (!tabular_output)
1738       format_info = BlastFormattingInfoFree(format_info);
1739    else
1740    {
1741       FileClose(outfp);
1742       /* FetchEnable/Disable called in blast_format.c for non-tabular output. */
1743       ReadDBBioseqFetchDisable();
1744    }
1745 
1746    options = SBlastOptionsFree(options); /* Must come after call to BlastFormattingInfoFree. */
1747 
1748    if (asn_format_info)
1749       asn_format_info = BlastFormattingInfoFree(asn_format_info);
1750 
1751    if (infp)
1752       FileClose(infp);
1753    
1754    return status;
1755 }
1756 
1757 #endif /* BLASTALL_TOOLS_ONLY */
1758 
1759 
1760 /* Amount to relax the evalue threshold for preliminary alignments
1761  * when compositionally adjusted score matrices are used. */
1762 #define EVALUE_EXPAND 1
1763 
1764 
1765 Int2 Main_old (void)
1766  
1767 {
1768     AsnIoPtr aip, xml_aip;
1769     BioseqPtr fake_bsp = NULL, query_bsp, bsp;
1770     BioSourcePtr source;
1771     BLAST_MatrixPtr matrix;
1772     Int4Ptr PNTR txmatrix;
1773     BLAST_OptionsBlkPtr options;
1774     BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
1775     BlastPruneSapStructPtr prune;
1776     Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
1777     Boolean html = FALSE;
1778     CharPtr params_buffer=NULL;
1779     Int4 number_of_descriptions, number_of_alignments;
1780     SeqAlignPtr  seqalign;
1781     SeqAnnotPtr seqannot = NULL;
1782     SeqEntryPtr sep;
1783     TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
1784     Uint1 align_type, align_view, err_ticket;
1785     Uint4 align_options, print_options;
1786     ValNodePtr mask_loc, mask_loc_start = NULL, vnp, next_mask_loc = NULL;
1787     ValNodePtr other_returns, error_returns;
1788     CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile;
1789     FILE *infp, *outfp;
1790     /* Mega BLAST related variables */
1791     SeqAlignPtr sap, next_seqalign, PNTR seqalignp;
1792     Int4 num_bsps, index;
1793     SeqLocPtr last_mask, mask_slp, slp = NULL, tmp_slp;
1794     Int2 ctr = 1;
1795     Char prefix[2];
1796     Boolean done = TRUE;
1797     int (LIBCALLBACK *handle_results)(VoidPtr srch);       
1798     Int4 from = 0, to = -1;
1799     Uint4 num_queries;          /*--KM for concatenated queries in blastn, tblastn */
1800     Uint4 num_iters;
1801     Uint4 sap_iter;
1802     SeqAlignPtr curr_seqalign;
1803     SeqAlignPtrArray sap_array;         /*--KM for separating seqaligns to test concat printing, temporary?*/
1804     SeqAnnotPtr curr_seqannot;
1805     SeqAnnotPtrArray seq_annot_arr;
1806     Uint4 bsp_iter;
1807     BspArray fake_bsp_arr;      /*--KM the array of fake_bsps for indiv. queries */ 
1808     SeqLocPtr PNTR lcase_mask_arr = NULL;       /* AM: information about lower case masked parts of queries */
1809     Boolean concat_done, nuc_concat;
1810     QueriesPtr mult_queries = NULL;     /*--KM, AM: stores information related to 
1811                                                     query multipolexing, to put in search */
1812     BioseqPtr curr_bsp;
1813 
1814     /* AM: Support for query multiplexing. */
1815     Uint4 num_spacers;
1816     ValNodePtr orig_mask_loc = NULL;
1817 
1818 #ifdef BLAST_CS_API
1819     BlastNet3Hptr    bl3hp;
1820     Boolean status;
1821 #endif
1822     
1823     blast_program = myargs[ARG_PROGRAM].strvalue;
1824 
1825 #ifdef BLAST_CS_API
1826     /* For RPS Blast - anything not "blastp" - is "tblastn" */    
1827     if(myargs[ARG_RPSBLAST].intvalue) {
1828         if(StringICmp(blast_program, "blastp")) {
1829             StringCpy(blast_program, "blastx");
1830         }
1831     }
1832 #endif
1833 
1834     blast_database = myargs[ARG_DB].strvalue;
1835     blast_inputfile = myargs[ARG_QUERY].strvalue;
1836     blast_outputfile = myargs[ARG_OUT].strvalue;
1837 
1838     if (myargs[ARG_HTML].intvalue)
1839         html = TRUE;
1840     
1841     if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
1842         ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile);
1843         return (1);
1844     }
1845 
1846     align_view = (Int1) myargs[ARG_FORMAT].intvalue;
1847     outfp = NULL;
1848     if (align_view != 7 && align_view != 10 && align_view != 11 && blast_outputfile != NULL) {
1849         if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
1850             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
1851             return (1);
1852         }
1853     }
1854     
1855     if (StringCmp("filter", blast_program) == 0) {
1856         BlastGetMaskingLoc(infp, outfp, myargs[ARG_FILTER].strvalue);
1857         FileClose(outfp);
1858         FileClose(infp);        
1859         return 0;
1860     }
1861     
1862     align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
1863 
1864     if(align_view < 7) {
1865         if (StringICmp("blastx", blast_program) == 0) {
1866             if (align_view != 0) {
1867                 ErrPostEx(SEV_FATAL, 1, 0, "This option is not available with blastx");
1868                 return 1;
1869             }
1870         } else if (StringICmp("tblastx", blast_program) == 0) {
1871             if (align_view != 0) {
1872                 ErrPostEx(SEV_FATAL, 1, 0, "This option is not available with tblastx");
1873                 return 1;
1874             }
1875         }
1876     }
1877     
1878     believe_query = FALSE;
1879     if (myargs[ARG_BELIEVEQUERY].intvalue != 0)
1880         believe_query = TRUE;
1881     
1882     if (believe_query == FALSE && (myargs[ARG_ASNOUT].strvalue || align_view == 10 || align_view ==11)) {
1883         ErrPostEx(SEV_FATAL, 1, 0, "-J option must be TRUE to produce a SeqAlign file");
1884     }
1885     
1886     options = BLASTOptionNewEx(blast_program, (Boolean) myargs[ARG_GAPPED].intvalue, (Boolean) myargs[ARG_USEMEGABLAST].intvalue);
1887     if (options == NULL)
1888         return 3;
1889 
1890 #ifdef BLAST_CS_API
1891     if(myargs[ARG_RPSBLAST].intvalue) 
1892         options->is_rps_blast = TRUE;
1893 #endif
1894     
1895     handle_results = NULL;
1896 
1897     BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); 
1898     options->kappa_expect_value =
1899         options->expect_value  = (Nlm_FloatHi) myargs[ARG_EVALUE].floatvalue;
1900     number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue; 
1901     number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;     
1902     options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);
1903 
1904     if (StringICmp("blastn", blast_program) == 0) {
1905         options->penalty = myargs[ARG_MISMATCH].intvalue;
1906         options->reward = myargs[ARG_MATCH].intvalue;
1907         if (options->reward > 1) {
1908            /* Scale the default values for gap costs; will be overridden
1909               later, if command line values are non-zero */
1910            options->gap_open *= options->reward;
1911            options->gap_extend *= options->reward;
1912         }
1913     } else {
1914         if ((Int4)myargs[ARG_THRESHOLD].floatvalue != 0) {
1915             options->threshold_second = (Int4)myargs[ARG_THRESHOLD].floatvalue;
1916         }
1917     }
1918     
1919     if (myargs[ARG_GAPOPEN].intvalue >= 0)
1920         options->gap_open = myargs[ARG_GAPOPEN].intvalue;
1921     if (myargs[ARG_GAPEXT].intvalue >= 0)
1922         options->gap_extend = myargs[ARG_GAPEXT].intvalue;
1923     if (myargs[ARG_XDROP].intvalue != 0)
1924         options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
1925 
1926     /* use one-hit if specified or it's a blastn search */
1927     if ( (myargs[ARG_MULTIPLEHITS].intvalue == 1) || (StringICmp("blastn", blast_program) == 0 ) )
1928       {
1929         options->two_pass_method  = FALSE;
1930         options->multiple_hits_only  = FALSE;
1931       }
1932     /* otherwise, use two-hit */
1933     else
1934       { 
1935         /* all other inputs, including the default 0 use 2-hit method */
1936         options->two_pass_method  = FALSE;
1937         options->multiple_hits_only  = TRUE;
1938       }
1939     
1940     if(myargs[ARG_XDROP_FINAL].intvalue != 0) 
1941         options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;
1942 
1943     if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0) {
1944         if (StringICmp("blastn", blast_program) == 0)
1945             options->filter_string = StringSave("D");
1946         else
1947             options->filter_string = StringSave("S");
1948     } else {
1949         options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
1950     }
1951     
1952     show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
1953 
1954     options->genetic_code = myargs[ARG_QGENETIC_CODE].intvalue;
1955     options->db_genetic_code = myargs[ARG_DBGENCODE].intvalue;
1956     options->number_of_cpus = myargs[ARG_THREADS].intvalue;
1957     if (myargs[ARG_WORDSIZE].intvalue != 0) {
1958         options->wordsize = myargs[ARG_WORDSIZE].intvalue;
1959     }
1960     
1961     if (options->is_megablast_search) {
1962        options->cutoff_s2 = options->wordsize*options->reward;
1963     }
1964 
1965     options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
1966     
1967     options->hsp_range_max  = myargs[ARG_BESTHITS].intvalue;
1968     if (options->hsp_range_max != 0)
1969         options->perform_culling = TRUE;
1970     if (myargs[ARG_SEARCHSP].floatvalue)
1971         options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;
1972 
1973     if ((0 != StringICmp("tblastn", blast_program) &&
1974          0 != StringICmp("blastp", blast_program)) ||
1975         !options->gapped_calculation) {
1976         /* Set some gapped tblastn-specific options to the correct
1977          * defaults for non-tblastn or non-gapped modes of operation.
1978          */
1979         options->tweak_parameters = eNoCompositionBasedStats;
1980         options->smith_waterman = 0;
1981         options->unified_p = 0;
1982         
1983         switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
1984         case '0': case 'D': case 'd': case 'F': case 'f':
1985             options->tweak_parameters = eNoCompositionBasedStats;
1986             break;
1987         default:
1988             ErrPostEx(SEV_FATAL, 1, 0,
1989                "Invalid option -C: only gapped blastp or gapped tblastn "
1990                "may use composition based statistics.");
1991             break;
1992         }
1993         if(myargs[ARG_SMITH_WATERMAN].intvalue) {
1994             ErrPostEx(SEV_FATAL, 1, 0,
1995                "Invalid option -s: Smith-Waterman alignments are only "
1996                "available for gapped blastp or gapped tblastn.");
1997         }
1998     } else {
1999         /* Set options specific to gapped tblastn and blastp */
2000         switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
2001         case '0': case 'F': case 'f':
2002             options->tweak_parameters = eNoCompositionBasedStats;
2003             break;
2004         case 'D': case 'd':
2005         case '1': case 'T': case 't':
2006             options->tweak_parameters = eCompositionBasedStats;
2007             break;
2008         case '2':
2009             ErrPostEx(SEV_WARNING, 1, 0, "the -C 2 argument "
2010                       "is currently experimental\n");
2011             options->tweak_parameters = eCompositionMatrixAdjust;
2012             break;
2013         case '3':
2014             ErrPostEx(SEV_WARNING, 1, 0, "the -C 3 argument "
2015                       "is currently experimental\n");
2016             options->tweak_parameters = eCompoForceFullMatrixAdjust;
2017         break;
2018         default:
2019             ErrPostEx(SEV_FATAL, 1, 0, "invalid argument for composition-"
2020                       "based statistics; see -C options\n");
2021             break;
2022         }
2023         if (options->tweak_parameters > 0) {
2024             switch (myargs[ARG_COMP_BASED_STATS].strvalue[1]) {
2025             case 'U':
2026             case 'u': 
2027                 if (0 == StringICmp("blastp", blast_program)) {
2028                     options->unified_p = 1;
2029                     ErrPostEx(SEV_WARNING, 1, 0, "unified p-values "
2030                               "are currently experimental\n");
2031                 } else {
2032                     ErrPostEx(SEV_FATAL, 1, 0, "unified p-values "
2033                               "are currently only available for blastp\n");
2034                 }
2035                 break;
2036           case '\0':
2037             break;
2038           default:
2039             ErrPostEx(SEV_WARNING, 1, 0, "unrecognized second character"
2040                       "in value of -t, ignoring it\n");
2041             break;
2042           }
2043         }
2044         options->smith_waterman =
2045             (Boolean) myargs[ARG_SMITH_WATERMAN].intvalue;
2046     }
2047     if (options->tweak_parameters > 1) {
2048         /* Compositionally adjusted score matrices are being used, and
2049          * these can improve evalue, so relax the evalue cutoff for
2050          * the preliminary alignments.  (Note that traditional
2051          * composition based statistics can only make evalues larger.)
2052          */
2053         options->expect_value *= EVALUE_EXPAND;
2054     }
2055 
2056     options->strand_option = myargs[ARG_STRAND].intvalue;
2057 
2058     if(myargs[ARG_XDROP_UNGAPPED].floatvalue != 0.0) {
2059         options->dropoff_2nd_pass  = myargs[ARG_XDROP_UNGAPPED].floatvalue;
2060         if(options->dropoff_1st_pass > options->dropoff_2nd_pass)
2061             options->dropoff_1st_pass = options->dropoff_2nd_pass;
2062     }
2063 
2064     if (myargs[ARG_WINDOW].intvalue != 0)
2065         options->window_size = myargs[ARG_WINDOW].intvalue;
2066 
2067     print_options = 0;
2068     align_options = 0;
2069     align_options += TXALIGN_COMPRESS;
2070     align_options += TXALIGN_END_NUM;
2071     if (StringICmp("blastx", blast_program) == 0) {
2072         align_options += TXALIGN_BLASTX_SPECIAL;
2073     }
2074     if (show_gi) {
2075         align_options += TXALIGN_SHOW_GI;
2076         print_options += TXALIGN_SHOW_GI;
2077     }
2078     if (myargs[ARG_GAPPED].intvalue == 0 || StringICmp("tblastx", blast_program) == 0)
2079         print_options += TXALIGN_SHOW_NO_OF_SEGS;
2080     
2081     if (align_view) {
2082         align_options += TXALIGN_MASTER;
2083         if (align_view == 1 || align_view == 3)
2084             align_options += TXALIGN_MISMATCH;
2085         if (align_view == 3 || align_view == 4 || align_view == 6)
2086             align_options += TXALIGN_FLAT_INS;
2087         if (align_view == 5 || align_view == 6)
2088             align_options += TXALIGN_BLUNT_END;
2089     } else {
2090         align_options += TXALIGN_MATRIX_VAL;
2091         align_options += TXALIGN_SHOW_QS;
2092     }
2093     
2094     if (html) {
2095         align_options += TXALIGN_HTML;
2096         print_options += TXALIGN_HTML;
2097     }
2098 
2099 #ifdef BLAST_CS_API
2100     if(myargs[ARG_ENTREZQ].strvalue)
2101         options->entrez_query = StringSave(myargs[ARG_ENTREZQ].strvalue);
2102 #else    
2103     if (myargs[ARG_GILIST].strvalue) {
2104         options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
2105     }
2106 #endif
2107     
2108     /* 
2109        Out-of-frame option is valid only for blastx, tblastn and 
2110        psitblastnsearches
2111     */
2112 
2113     if(myargs[ARG_FRAMESHIFT].intvalue > 0) {
2114         if (!StringICmp("blastx", blast_program) || 
2115             !StringICmp("tblastn", blast_program)||
2116             !StringICmp("psitblastn", blast_program)) {
2117            if (!StringICmp("blastx", blast_program)) {
2118               options->is_ooframe = TRUE;
2119               options->shift_pen = myargs[ARG_FRAMESHIFT].intvalue;
2120            }
2121         }
2122     }
2123         
2124     /* Input longest intron length is in nucleotide scale; in the lower level
2125        code it will be used in protein scale */
2126     options->longest_intron = myargs[ARG_INTRON].intvalue;
2127 
2128     aip = NULL;
2129     if (myargs[ARG_ASNOUT].strvalue != NULL) {
2130         if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) {
2131                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
2132                 return 1;
2133         }
2134     }
2135     else if (align_view == 10 || align_view == 11) 
2136     {
2137         const char* mode = (align_view == 10) ? "w" : "wb";
2138         if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) {
2139                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
2140                 return 1;
2141         }
2142     }
2143 
2144     if(align_view < 7) {
2145        if (html) {
2146           fprintf(outfp, "<HTML>\n<TITLE>BLAST Search Results</TITLE>\n");
2147           fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
2148                   "VLINK=\"#660099\" ALINK=\"#660099\">\n");
2149           fprintf(outfp, "<PRE>\n");
2150        }
2151     } else if (align_view == 7 ) {
2152         xml_aip = AsnIoOpen(blast_outputfile, "wx");
2153     }
2154 
2155 #ifndef BLAST_CS_API
2156     if(align_view >= 7 && myargs[ARG_NUMQUERIES].intvalue > 1)
2157     {
2158       ErrPostEx(SEV_FATAL, 1, 0, 
2159                  "blast: Query concatenation is currently not supported with -m > 7");
2160       return 1;
2161     }
2162 #endif
2163 
2164 
2165                   /* Futamura: Setting up the psitblastn options */
2166 #ifndef BLAST_CS_API
2167     if (NULL != myargs[ARG_PSITCHKPNT].strvalue) {
2168           options->recoverCheckpoint = TRUE;
2169           options->freqCheckpoint = TRUE;
2170     }
2171     options->CheckpointFileName=myargs[ARG_PSITCHKPNT].strvalue;
2172 #endif
2173 
2174 #ifdef BLAST_CS_API
2175     if (align_view < 7)
2176         bl3hp = BNETInitializeBlast(blast_database, blast_program, outfp, 
2177                                 db_is_na, options->is_rps_blast, html, TRUE);
2178     else
2179         bl3hp = BNETInitializeBlast(blast_database, blast_program, outfp, 
2180                                 db_is_na, options->is_rps_blast, html, FALSE);
2181 #endif
2182 
2183     /*--KM get number of queries for concatenated blastn/tblastn queries */
2184 
2185 #ifndef BLAST_CS_API
2186     options->NumQueries=myargs[ARG_NUMQUERIES].intvalue;  
2187 #endif
2188 
2189     num_queries = options->NumQueries;
2190     if (num_queries>0 && 
2191         !( (StringICmp("blastn",  blast_program) == 0) || 
2192            (StringICmp("tblastn", blast_program) == 0)   ) ) {
2193 
2194         ErrPostEx(SEV_FATAL, 1, 0, "blast: Can't concat with program %s\n", myargs[ARG_PROGRAM].strvalue);
2195        return 1;
2196     }
2197     
2198     /* AM: Query concatenation is not consistent with ungapped search */
2199     if( num_queries > 0 && !myargs[ARG_GAPPED].intvalue )
2200     {
2201       ErrPostEx(SEV_FATAL, 1, 0, 
2202                  "blast: Query concatenation is inconsistent with ungapped search\n" );
2203       return 1;
2204     }
2205     if( !myargs[ARG_GAPPED].intvalue &&
2206         0 == StringCmp("psitblastn", blast_program ) ) {
2207       ErrPostEx(SEV_FATAL, 1, 0,"blast: Ungapped alignment is not appropriate "
2208                 "for PSI-tBLASTn.\n" );
2209     }
2210 
2211     /* --KM set bool value if DNA and concat needed, need for Fasta->seq functions */
2212     if (num_queries>0 && query_is_na == TRUE) {
2213         nuc_concat = TRUE;
2214     } else {
2215         nuc_concat = FALSE;
2216     }
2217  
2218     /* --- Main loop over all FASTA entries in the input file ---- */
2219 
2220     concat_done = FALSE;        /*--KM */
2221 
2222     sGetLoc(myargs[ARG_QUERYLOC].strvalue, &from, &to);
2223 
2224     while (TRUE) {
2225        if (options->is_megablast_search) {
2226           StrCpy(prefix, "");
2227           slp = NULL;
2228           num_bsps = 0;
2229           done = TRUE;
2230           SeqMgrHoldIndexing(TRUE);
2231           mask_slp = last_mask = NULL;
2232           while ((sep=FastaToSeqEntryForDb(infp, query_is_na, NULL,
2233                                            believe_query, prefix, &ctr, 
2234                                            &mask_slp)) != NULL) {
2235              if ((Boolean)myargs[ARG_LCASE].intvalue) {
2236                 if (mask_slp) {
2237                    if (!last_mask)
2238                       options->query_lcase_mask = last_mask = mask_slp;
2239                    else {
2240                       last_mask->next = mask_slp;
2241                       last_mask = last_mask->next;
2242                    }
2243                    mask_slp = NULL;
2244                 }
2245              } else {
2246                 mask_slp = SeqLocSetFree(mask_slp);
2247              }
2248              query_bsp = NULL;
2249              if (query_is_na) 
2250                 SeqEntryExplore(sep, &query_bsp, FindNuc);
2251              else
2252                 SeqEntryExplore(sep, &query_bsp, FindProt);
2253              
2254              if (query_bsp == NULL) {
2255                 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
2256                 return 2;
2257              }
2258              
2259              /* Only for the first query */
2260              if (num_bsps == 0) {
2261                  to = MIN(to, query_bsp->length - 1);
2262                  
2263                  /* -1 means end of sequence */
2264                  if (to < 0)
2265                      to = query_bsp->length - 1;
2266                  if (from >= query_bsp->length || to < 0) {
2267                      ErrPostEx(SEV_FATAL, 1, 0, 
2268                                "Location outside of the query sequence range\n");
2269                      return 3;
2270                  }
2271                  slp = SeqLocIntNew(from, to, options->strand_option, 
2272                                     SeqIdFindBest(query_bsp->id, SEQID_GI));
2273              } else 
2274                  ValNodeAddPointer(&slp, SEQLOC_WHOLE,
2275                                    SeqIdDup(SeqIdFindBest(query_bsp->id,
2276                                                           SEQID_GI)));
2277              num_bsps++;
2278              if (num_bsps >= MAX_NUM_QUERIES) {
2279                 done = FALSE;
2280                 break;
2281              }
2282              /*sep = MemFree(sep);*/ /* Do not free the underlying Bioseq */
2283           }
2284           SeqMgrHoldIndexing(FALSE);
2285           if (num_bsps == 0) 
2286              break;
2287        } else {
2288           /* not megablast */
2289 
2290           /*--KM make array of fake_bsp's if concat. query */
2291           if (concat_done)
2292              break;
2293           if (num_queries > 0)  {
2294              fake_bsp_arr = (BspArray) MemNew(sizeof(BioseqPtr)*num_queries); 
2295 
2296              if( myargs[ARG_LCASE].intvalue )
2297                lcase_mask_arr = (SeqLocPtr PNTR)MemNew( sizeof( SeqLocPtr )*num_queries );
2298           }
2299           num_iters = (num_queries>0) ? num_queries : 1; 
2300           for (bsp_iter=0; bsp_iter<num_iters; bsp_iter++) {
2301 
2302              if(myargs[ARG_LCASE].intvalue) {
2303                 /* AM: query multiplexing */
2304                 if( !num_queries )
2305                   sep = FastaToSeqEntryForDb (infp, query_is_na, NULL, believe_query, NULL, NULL, &options->query_lcase_mask);
2306                 else
2307                   sep = FastaToSeqEntryInternalEx( infp, FASTA_FILE_IO, NULL, query_is_na, NULL, believe_query,
2308                                                    NULL, NULL, NULL, lcase_mask_arr + bsp_iter );
2309                 
2310              } else {
2311                 sep = FastaToSeqEntryEx(infp, query_is_na, NULL, believe_query);
2312              }
2313           
2314              /* if concat and num_queries has not been reached and sep is NULL, crap out */
2315              if (sep == NULL && bsp_iter < num_queries) {   /* implies num_queries>0 */
2316                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Only %d queries found!\n", bsp_iter); 
2317                 return (1);
2318              }
2319                
2320              if(sep == NULL)
2321                 break;  /* no more queries, can go to finish with next break */
2322           
2323              query_bsp = NULL;
2324              if (query_is_na) {
2325                 SeqEntryExplore(sep, &query_bsp, FindNuc);
2326              } else {
2327                 SeqEntryExplore(sep, &query_bsp, FindProt);
2328              }
2329           
2330              if (query_bsp == NULL) {
2331                 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
2332                 return 2;
2333              }
2334 
2335              if (num_queries>0) {
2336                 *(fake_bsp_arr + bsp_iter) = query_bsp;
2337              }
2338           }
2339           if ( (sep == NULL && num_queries ==0) || (num_queries>0 && concat_done) )
2340              break;  /* go to finish */
2341 
2342           /* --KM */
2343           
2344           if (num_queries>0) {
2345              concat_done = TRUE;   /* --KM to prevent futher looping */
2346 
2347              /* AM: Determine the number of query separators. */
2348              num_spacers = GetNumSpacers( options, believe_query, fake_bsp_arr ); 
2349 
2350              if( num_spacers%2 ) ++num_spacers;
2351 
2352              /* --KM make the concatenated fake_bsp */
2353              /* AM: Added num_spacers. */
2354              if( query_is_na )
2355                fake_bsp = (BioseqPtr) 
2356                           BlastMakeFakeBspConcat(fake_bsp_arr, num_queries, query_is_na, num_spacers); 
2357              else
2358                fake_bsp = (BioseqPtr) 
2359                           BlastMakeFakeBspConcat(fake_bsp_arr, num_queries, query_is_na, num_spacers); 
2360              
2361              /* construct the MultQueries struct here*/
2362              mult_queries = (QueriesPtr) BlastMakeMultQueries(fake_bsp_arr, num_queries, query_is_na, num_spacers,
2363                                                               lcase_mask_arr);
2364           } else {
2365              if(believe_query)
2366                 fake_bsp = query_bsp;
2367              else 
2368                 fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
2369           }
2370 
2371           err_ticket = BlastSetUserErrorString(NULL, query_bsp->id, believe_query);
2372         
2373           /* If fake_bsp created mask should be updated to use it's id */
2374           /* AM: query multiplexing */
2375           if( !mult_queries )
2376             BLASTUpdateSeqIdInSeqInt(options->query_lcase_mask, fake_bsp->id);
2377           else for( bsp_iter = 0; bsp_iter < num_iters; ++bsp_iter )
2378                  if( mult_queries->LCaseMasks )
2379                    BLASTUpdateSeqIdInSeqInt( mult_queries->LCaseMasks[bsp_iter],
2380                                              mult_queries->FakeBsps[bsp_iter]->id );
2381         
2382           source = BioSourceNew();
2383           source->org = OrgRefNew();
2384           source->org->orgname = OrgNameNew();
2385           source->org->orgname->gcode = options->genetic_code;
2386           ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
2387 
2388        /* free sep later when done. --KM remember to free all if array*/
2389        }
2390 
2391        global_fp = outfp;
2392           
2393        if(align_view < 7) {
2394 #ifndef BLAST_CS_API
2395            init_buff_ex(90);
2396            BlastPrintVersionInfo(blast_program, html, outfp);
2397            fprintf(outfp, "\n");
2398            BlastPrintReference(html, 90, outfp);
2399            fprintf(outfp, "\n");
2400 #else
2401            fprintf(outfp, "\n");
2402 #endif            
2403            if (!options->is_megablast_search) {
2404               /* KM added loop here for concat case */
2405               num_iters = (num_queries>0) ? num_queries : 1;
2406               for (bsp_iter=0; bsp_iter<num_iters; bsp_iter++) {
2407                  curr_bsp = (num_queries>0) ? *(fake_bsp_arr + bsp_iter) : query_bsp; 
2408                  AcknowledgeBlastQuery(curr_bsp, 70, outfp, believe_query, html);
2409               }
2410            }
2411 
2412             /* Here we first check, that database do no exists */
2413 
2414 #ifndef BLAST_CS_API
2415            if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html))
2416                 return 1;
2417 #else
2418 
2419             {{
2420                 BlastDbinfoPtr dbinfo;
2421                 static Boolean not_first_time;
2422 
2423                 /* For CS version we will print database info ones to 
2424                    decrease network traffic */                
2425 
2426                 if(!not_first_time) {
2427                     dbinfo = BlastRequestDbInfo(bl3hp, blast_database, !db_is_na);
2428                     if (dbinfo)
2429                         PrintDbInformationBasic(blast_database, !db_is_na, 70, dbinfo->definition, dbinfo->number_seqs, dbinfo->total_length, outfp, html);
2430                     dbinfo = BlastDbinfoFree(dbinfo);
2431                     not_first_time = TRUE;
2432                 }
2433             }}
2434 #endif    /* BLAST_CS_API */        
2435             free_buff();
2436                 if (options->is_ooframe)
2437                         ErrPostEx(SEV_WARNING, 0, 0, "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments");
2438         }
2439 #ifdef OS_UNIX
2440         if(align_view < 7) { /*--KM why not fold into previous if statement? */
2441 #ifdef BLAST_CS_API
2442             fprintf(global_fp, "%s", "Searching... please wait.. ");
2443 #else
2444             fprintf(global_fp, "%s", "Searching");
2445 #endif
2446         }
2447 #endif
2448         other_returns = NULL;
2449         error_returns = NULL;
2450 
2451         if (options->is_megablast_search) {
2452 #ifdef BLAST_CS_API
2453            seqalign = MegaBlastSeqLocNetCore(bl3hp, slp, blast_program, 
2454                                    blast_database, options, 
2455                                    &other_returns, &error_returns,
2456                                    align_view < 7 ? tick_callback : NULL,
2457                                    &status);
2458 #else
2459            seqalignp = BioseqMegaBlastEngineByLoc(slp, blast_program,
2460                                    blast_database, options, &other_returns, 
2461                                    &error_returns, 
2462                                    align_view < 7 ? tick_callback : NULL,
2463                                    NULL, NULL, 0, handle_results);
2464            seqalign = NULL;
2465            for (index=0; index<num_bsps; index++) { 
2466               if (seqalignp && seqalignp[index]) {
2467                  if (seqalign == NULL) 
2468                     sap = seqalign = seqalignp[index];
2469                  else
2470                     sap->next = seqalignp[index];
2471                  while (sap->next != NULL)
2472                     sap = sap->next;
2473               }
2474            }
2475            seqalignp = MemFree(seqalignp);
2476 #endif
2477         } else if (!myargs[ARG_QUERYLOC].strvalue) {       
2478 #ifdef BLAST_CS_API
2479            seqalign = BlastBioseqNetCore(bl3hp, fake_bsp, blast_program, 
2480                                       blast_database, options,
2481                                       &other_returns, &error_returns,
2482                                       align_view < 7 ? tick_callback : NULL,
2483                                       NULL, &status);
2484 #else
2485            /* KM added mult_queries param */
2486            seqalign = BioseqBlastEngineWithCallbackMult(fake_bsp, blast_program, blast_database, options, &other_returns, &error_returns, align_view < 7 ? tick_callback : NULL, handle_results, mult_queries);
2487 #endif
2488         } else { /* Location on query provided */
2489            to = MIN(to, fake_bsp->length - 1);
2490            
2491            /* -1 means end of sequence */
2492            if (to < 0)
2493               to = fake_bsp->length - 1;
2494            if (from >= fake_bsp->length || to < 0) {
2495               ErrPostEx(SEV_FATAL, 1, 0, 
2496                         "Location outside of the query sequence range\n");
2497               return 3;
2498            }
2499            slp = SeqLocIntNew(from, to, options->strand_option, 
2500                               fake_bsp->id);
2501            
2502 #ifdef BLAST_CS_API
2503            seqalign = BlastSeqLocNetCore(bl3hp, slp, blast_program, 
2504                                          blast_database, options,
2505                                          &other_returns, &error_returns,
2506                                          align_view < 7 ? tick_callback : NULL,
2507                                          NULL, &status);
2508 #else
2509            seqalign = BioseqBlastEngineByLocWithCallbackMult(slp, blast_program, blast_database, options, &other_returns, &error_returns, align_view < 7 ? tick_callback : NULL, NULL, NULL, 0, handle_results, mult_queries);
2510 #endif
2511            
2512         }
2513 #if 0
2514         seqalign = BLASTFilterOverlapRegions(seqalign, 0, !db_is_na, 
2515                                              options->is_ooframe, FALSE);
2516 #endif
2517         
2518         BlastErrorPrint(error_returns);
2519 
2520         dbinfo = NULL;
2521         ka_params = NULL;
2522         ka_params_gap = NULL;
2523         params_buffer = NULL;
2524         mask_loc = NULL;
2525         matrix = NULL;
2526         txmatrix = NULL;
2527         for (vnp=other_returns; vnp; vnp = vnp->next) {
2528             switch (vnp->choice) {
2529             case TXDBINFO:
2530                 dbinfo = vnp->data.ptrvalue;
2531                 break;
2532             case TXKABLK_NOGAP:
2533                 ka_params = vnp->data.ptrvalue;
2534                 break;
2535             case TXKABLK_GAP:
2536                 ka_params_gap = vnp->data.ptrvalue;
2537                 break;
2538             case TXPARAMETERS:
2539                 params_buffer = vnp->data.ptrvalue;
2540                 break;
2541             case TXMATRIX:
2542                 matrix = vnp->data.ptrvalue;
2543                 if (matrix)
2544                    txmatrix = BlastMatrixToTxMatrix(matrix);
2545                 break;
2546             case SEQLOC_MASKING_NOTSET:
2547             case SEQLOC_MASKING_PLUS1:
2548             case SEQLOC_MASKING_PLUS2:
2549             case SEQLOC_MASKING_PLUS3:
2550             case SEQLOC_MASKING_MINUS1:
2551             case SEQLOC_MASKING_MINUS2:
2552             case SEQLOC_MASKING_MINUS3:
2553                 ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
2554                 break;
2555             default:
2556                 break;
2557             }
2558         }       
2559 
2560 #ifdef OS_UNIX
2561         fflush(global_fp);
2562 #endif
2563         
2564 #ifdef OS_UNIX
2565         if(align_view < 7) {
2566             fprintf(global_fp, "%s", "done");
2567         }
2568 #endif
2569         
2570 #ifndef BLAST_CS_API
2571     ReadDBBioseqFetchEnable ("blastall", blast_database, db_is_na, TRUE);
2572 #endif
2573         ReadDBBioseqSetDbGeneticCode(options->db_genetic_code);
2574 
2575         tmp_slp = slp;
2576         if (slp)
2577            query_bsp = NULL;
2578 
2579         if (getenv("POST_BLAST_CLUSTER_HITS") != NULL)
2580            BlastClusterHitsFromSeqAlign(seqalign, blast_program, blast_database, 
2581                                         options, 0.9, 1.6, 0.5, TRUE);
2582 
2583         if (mask_loc) {
2584            mask_loc_start = mask_loc;
2585         }
2586         else
2587         {       /* Could have become non-NUll for last query. */
2588            mask_loc_start = NULL;
2589         }
2590         /* Print header in any case */
2591         if (align_view == 9) {
2592            PrintTabularOutputHeader(blast_database, query_bsp, slp, 
2593               blast_program, 0, believe_query, global_fp);
2594         }
2595 
2596         if (seqalign) {
2597            if (num_queries > 0) { /* AM: Support for query multiplexing. */
2598               sap_array = mult_queries->sap_array_data->sap_array;
2599            }   
2600         
2601            if (align_view == 8 || align_view == 9) {
2602 /* --KM need to put a loop around this. seqaligns already broken up
2603    note the method for looping if num_aligns > 0 - reuse this method everywhere */
2604               num_iters = (num_queries>0) ? num_queries : 1;
2605               for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2606                  curr_seqalign = (num_queries>0) ? *(sap_array + sap_iter) : seqalign;
2607                  BlastPrintTabularResults(curr_seqalign, query_bsp, slp, 
2608                number_of_alignments, blast_program, 
2609                !options->gapped_calculation, options->is_ooframe,
2610                believe_query, 0, 0, global_fp, NULL, (align_view == 9));
2611 
2612                  SeqAlignSetFree(curr_seqalign);
2613               }
2614            } else {
2615            while (seqalign) {
2616                         
2617               if (!options->is_megablast_search){
2618                  next_seqalign = NULL;
2619               } else {
2620                  SeqIdPtr sip, next_sip = NULL;
2621                  
2622                  sap = seqalign;
2623                  sip = TxGetQueryIdFromSeqAlign(seqalign);
2624                  while (sap != NULL) { 
2625                     if (sap->next != NULL) {
2626                        next_sip = TxGetQueryIdFromSeqAlign(sap->next);
2627 
2628                        if (SeqIdComp(sip, next_sip) != SIC_YES) {
2629                           next_seqalign = sap->next;
2630                           sap->next = NULL;
2631                        }
2632                     } else
2633                        next_seqalign = NULL;
2634                     sap = sap->next;
2635                  }
2636                  
2637                  while (tmp_slp && SeqIdComp(sip, SeqLocId(tmp_slp)) != SIC_YES)
2638                     tmp_slp = tmp_slp->next;
2639                  if (tmp_slp == NULL) /* Should never happen */
2640                     break;
2641                  /* Separate the mask locations list for this query */
2642                  if (!mask_loc && next_mask_loc) {
2643                     mask_loc = next_mask_loc;
2644                     next_mask_loc = NULL;
2645                  }
2646                  if (mask_loc) {
2647                     if (next_mask_loc) {
2648                        mask_loc->next = next_mask_loc;
2649                        mask_loc = next_mask_loc;
2650                     }
2651                     mask_slp = (SeqLocPtr) mask_loc->data.ptrvalue;
2652                     next_mask_loc = mask_loc;
2653                     while (SeqIdComp(SeqLocId(mask_slp), sip) != SIC_YES) {
2654                        mask_loc = mask_loc->next;
2655                        if (!mask_loc)
2656                           break;
2657                        mask_slp = (SeqLocPtr) mask_loc->data.ptrvalue;
2658                     }
2659                     if (mask_loc) {
2660                        next_mask_loc = mask_loc->next;
2661                        mask_loc->next = NULL;
2662                     }
2663                  }
2664                  if (align_view < 7) {
2665                      bsp = BioseqLockById(SeqLocId(tmp_slp));
2666                      init_buff_ex(85);
2667                      fprintf(outfp, "\n");
2668                      AcknowledgeBlastQuery(bsp, 70, outfp, believe_query, 
2669                                            html);
2670                      free_buff();
2671                      BioseqUnlock(bsp);
2672                  }
2673               }
2674               if((align_view == 7) && !options->is_ooframe) {
2675                  if (options->is_megablast_search) {
2676                     bsp = BioseqLockById(SeqLocId(tmp_slp));
2677                     BXMLPrintOutput(xml_aip, seqalign, 
2678                                     options, blast_program, blast_database, 
2679                                     bsp, other_returns, 0, NULL, mask_loc);
2680                     BioseqUnlock(bsp);
2681                     AsnIoReset(xml_aip);
2682                     SeqAlignSetFree(seqalign);
2683                  } else {
2684                     num_iters = (num_queries>0) ? num_queries : 1;
2685                     for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2686                        curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2687                        BXMLPrintOutput(xml_aip, curr_seqalign, 
2688                                     options, blast_program, blast_database, 
2689                                     fake_bsp, other_returns, 0, NULL, mask_loc);
2690                        AsnIoReset(xml_aip);
2691                        SeqAlignSetFree(curr_seqalign);
2692                     } /* for loop over sap-array (concat) */
2693                  } /* not MBlast case */
2694               } else {
2695                  /* create the array of SeqAnnotPtrs, if necessary */
2696 
2697                  num_iters = (num_queries > 0) ? num_queries : 1; 
2698                  for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2699                     curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2700                     if ( (num_queries > 0) && (sap_iter == 0) ) {
2701                        seq_annot_arr = (SeqAnnotPtrArray) MemNew(sizeof(SeqAnnotPtr)*num_queries);
2702                     }
2703                     seqannot = SeqAnnotNew();
2704                     seqannot->type = 2;
2705                     AddAlignInfoToSeqAnnot(seqannot, align_type);
2706                     seqannot->data = curr_seqalign;
2707                     if (aip) {
2708                        SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
2709                        AsnIoReset(aip);
2710                     }
2711                     if (num_queries > 0) {
2712                        *(seq_annot_arr + sap_iter) = seqannot;
2713                     }
2714                  } /* make seqannots over the sap_iters from concat, or the single seqalign */
2715                     
2716                  if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */
2717                     ObjMgrSetHold();
2718                     /* print deflines */
2719                     for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2720                        curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2721 
2722                        init_buff_ex(85);
2723 
2724                        PrintDefLinesFromSeqAlignEx2(curr_seqalign, 80, outfp, 
2725                                         print_options, FIRST_PASS, NULL, 
2726                                         number_of_descriptions, NULL, NULL);
2727                        free_buff();
2728                     } /* print deflines, looped if concat */
2729 
2730                     for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2731                        /* AM: Query concatenation. */
2732                        if( mult_queries && mask_loc )
2733                        {
2734                          orig_mask_loc = mask_loc;
2735                          
2736                          if( !mask_loc->data.ptrvalue ) mask_loc = NULL;
2737                        }
2738 
2739                        curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2740                        curr_seqannot = (num_queries > 0) ? *(seq_annot_arr + sap_iter) : seqannot;
2741 
2742                        prune = BlastPruneHitsFromSeqAlign(curr_seqalign,
2743                                         number_of_alignments, NULL);
2744                        curr_seqannot->data = prune->sap;
2745 
2746                        if(options->is_ooframe) {
2747                           OOFShowBlastAlignment(curr_seqalign, /*mask*/ NULL,
2748                                         outfp, align_options, txmatrix);
2749                        } else {
2750                           if (align_view != 0)
2751                              ShowTextAlignFromAnnot(curr_seqannot, 60, outfp, NULL, NULL, 
2752                                         align_options, txmatrix, mask_loc, NULL);
2753                           else
2754                              ShowTextAlignFromAnnot(curr_seqannot, 60, outfp, NULL, NULL,
2755                                         align_options, txmatrix, mask_loc, 
2756                                         FormatScoreFunc);
2757                        }
2758                     
2759                        curr_seqannot->data = curr_seqalign;
2760                        prune = BlastPruneSapStructDestruct(prune);
2761 
2762                        /* AM: Query concatenation. */
2763                        if( mult_queries && orig_mask_loc ) 
2764                        {
2765                          mask_loc = orig_mask_loc;
2766                          mask_loc = mask_loc->next;
2767                        }
2768                     } /* show text align, loop over seqalign/seqannots for concat */
2769                     ObjMgrClearHold();
2770                  } /* if outfp */
2771                  for (sap_iter=0; sap_iter < num_queries; sap_iter++) {
2772                     /* upper bound is num_queries, take care not to do this unless concat */
2773                     *(seq_annot_arr + sap_iter) = SeqAnnotFree(*(seq_annot_arr + sap_iter)); 
2774                  }
2775                  if (mult_queries) 
2776                      seq_annot_arr = MemFree(seq_annot_arr);
2777         /*--KM free seqalign array and all seqaligns?? */
2778 
2779               } /* end of else (not XML Printing) */
2780               if (options->is_megablast_search)
2781                  tmp_slp = tmp_slp->next;
2782         /* --KM watch for memory leaks */
2783               if (seqannot && num_queries == 0)   
2784                  seqannot = SeqAnnotFree(seqannot);
2785               seqalign = next_seqalign;
2786            } /* End of loop on all seqaligns */
2787            if (mask_loc && next_mask_loc)
2788               mask_loc->next = next_mask_loc;
2789 
2790            } /* end of align_view not tabular case */
2791         } else {         /* seqalign is NULL */
2792            if((align_view == 7) && !options->is_ooframe) {
2793               BlastErrorMsgPtr error_msg;
2794               CharPtr message;
2795               
2796               if (error_returns == NULL) {
2797                  message = "No hits found";
2798               } else {
2799                  error_msg = error_returns->data.ptrvalue;
2800                  message = error_msg->msg;
2801               }
2802               if (options->is_megablast_search) {
2803                  bsp = BioseqLockById(SeqLocId(tmp_slp));
2804                  BXMLPrintOutput(xml_aip, seqalign, 
2805                                  options, blast_program, blast_database, 
2806                                  bsp, other_returns, 0, NULL, mask_loc);
2807                  BioseqUnlock(bsp);
2808               } else {
2809                  BXMLPrintOutput(xml_aip, NULL, options, blast_program, 
2810                                  blast_database, fake_bsp, other_returns, 0, 
2811                                  message, mask_loc);
2812               }
2813               AsnIoReset(xml_aip);
2814            } else if (align_view < 8) {
2815               fprintf(outfp, "\n\n ***** No hits found ******\n\n");
2816            }
2817            if (error_returns != NULL) {
2818               for (vnp = error_returns; vnp; vnp = vnp->next) {
2819                  BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
2820               }
2821               ValNodeFree(error_returns);
2822            }
2823         }
2824         
2825         slp = SeqLocSetFree(slp);
2826         matrix = BLAST_MatrixDestruct(matrix);
2827         if (txmatrix)
2828            txmatrix = TxMatrixDestruct(txmatrix);
2829         
2830         if(html) {
2831            fprintf(outfp, "<PRE>\n");
2832         }
2833         
2834         init_buff_ex(85);
2835         dbinfo_head = dbinfo;
2836         
2837         if(align_view < 7 && done) {
2838            while (dbinfo) {
2839               PrintDbReport(dbinfo, 70, outfp);
2840               dbinfo = dbinfo->next;
2841            }
2842         }
2843         dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
2844         
2845         if (ka_params) {
2846            if(align_view < 7 && done) {
2847               PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
2848            }
2849            MemFree(ka_params);
2850         }
2851         
2852         if (ka_params_gap) {
2853            if(align_view < 7 && done) {
2854               PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
2855            }
2856            MemFree(ka_params_gap);
2857         }
2858         
2859         if(align_view < 7 && done) {
2860            PrintTildeSepLines(params_buffer, 70, outfp);
2861         }
2862         
2863         MemFree(params_buffer);
2864         free_buff();
2865         mask_loc = mask_loc_start;
2866         while (mask_loc) {
2867            SeqLocSetFree(mask_loc->data.ptrvalue);
2868            mask_loc = mask_loc->next;
2869         }
2870         ValNodeFree(mask_loc_start);
2871         
2872         if(num_queries > 0) { /* AM: query concatenation */
2873             SeqDataFree(fake_bsp->seq_data, fake_bsp->seq_data_type);
2874             fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
2875         } else if(!believe_query ) {
2876             fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
2877         }
2878         other_returns = ValNodeFree(other_returns);
2879         if (done) 
2880            sep = SeqEntryFree(sep);
2881 #ifndef BLAST_CS_API
2882         /* This is freed earlier in client-server case */
2883         options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
2884         /* Free the database translation tables, if applicable. */
2885         TransTableFreeAll();
2886         ReadDBBioseqFetchDisable();
2887 #endif
2888         if (html)
2889            fprintf(outfp, "</PRE>\n<P><HR><BR>\n<PRE>");
2890         
2891         if (!options->is_megablast_search) 
2892            BlastDeleteUserErrorString(err_ticket);
2893                     
2894         ObjMgrFreeCache(0);
2895     } /* while(TRUE)  - main loop of the program over all FASTA entries */
2896     
2897 #ifdef BLAST_CS_API
2898     BlastNetBioseqFetchDisable(bl3hp, blast_database, db_is_na);
2899     BlastFini(bl3hp);
2900 #endif
2901     
2902     aip = AsnIoClose(aip);
2903     
2904     if(align_view < 7) {
2905         if (html) {
2906             fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n");
2907         }
2908     } else if (align_view == 7)
2909         xml_aip = AsnIoClose(xml_aip);
2910     
2911     /* AM: query concatenation. */
2912     mult_queries = BlastMultQueriesDestruct( mult_queries );
2913 
2914     options = BLASTOptionDelete(options);
2915     FileClose(infp);
2916     return 0;
2917 }
2918 
2919 /*
2920         This function decides whether the new blast code can handle this database or not.
2921         Currently it should return FALSE for any database that uses a gilist.
2922         This implementation only works for nucleotide databases.
2923 
2924         If it is not possible to initialize the database or some error condition exists then FALSE
2925         will also be returned and the old engine should deal with this.
2926 */
2927 static Boolean
2928 readdb_use_new_blast(char* dbname)
2929 {
2930       Boolean db_is_na, query_is_na;
2931       Boolean retval=TRUE;
2932       ReadDBFILEPtr rdfp=NULL;
2933       ReadDBFILEPtr rdfp_var=NULL;
2934 
2935       if (!dbname)
2936            return FALSE;
2937 
2938       BlastGetTypes(myargs[ARG_PROGRAM].strvalue, &query_is_na, &db_is_na);
2939       rdfp = readdb_new(dbname, !db_is_na);
2940       if (!rdfp)
2941            return FALSE;
2942 
2943       rdfp_var = rdfp;
2944       while (rdfp_var)
2945       {
2946             if (rdfp_var->gilist != NULL)
2947             {
2948                    retval = FALSE;
2949                    break;  /* Break out and free rdfp. */
2950             }
2951             rdfp_var = rdfp_var->next;
2952       }
2953       rdfp = readdb_destruct(rdfp);
2954       return retval;
2955 }
2956 
2957 Int2 Nlm_Main(void)
2958 {
2959 #ifndef BLASTALL_TOOLS_ONLY
2960     Boolean use_new_engine=FALSE;
2961 #endif
2962     char buf[256] = { '\0' };
2963 
2964 #ifdef BLAST_CS_API
2965     StringCpy(buf, "blastcl3 ");
2966     StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf)-1);
2967     if (! GetArgs (buf, NUMARG, myargs)) {
2968         return (1);
2969     }
2970 #else
2971     StringCpy(buf, "blastall ");
2972     StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf));
2973     if (! GetArgs (buf, NUMARG, myargs)) {
2974         return (1);
2975     }
2976 #endif
2977 
2978     UseLocalAsnloadDataAndErrMsg ();
2979 
2980     if (! SeqEntryLoad())
2981                 return 1;
2982 
2983     ErrSetMessageLevel(SEV_WARNING);
2984 
2985 #ifdef BLAST_CS_API
2986     return Main_old();
2987 #else
2988 #ifndef BLASTALL_TOOLS_ONLY
2989     if (myargs[ARG_FORCE_OLD].intvalue == 0 &&
2990                       myargs[ARG_GILIST].strvalue == NULL)
2991           use_new_engine = readdb_use_new_blast(myargs[ARG_DB].strvalue);
2992 
2993     if (use_new_engine)
2994         return Main_new();
2995     else
2996 #endif /* BLASTALL_TOOLS_ONLY */
2997         return Main_old();
2998 #endif
2999 }
3000 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.