|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/blastall.c |
source navigation diff markup identifier search freetext search file search |
1 static char const rcsid[] = "$Id: blastall.c,v 6.205 2009/05/28 18:37:11 kans Exp $";
2
3 /* $Id: blastall.c,v 6.205 2009/05/28 18:37:11 kans Exp $
4 **************************************************************************
5 * *
6 * COPYRIGHT NOTICE *
7 * *
8 * This software/database is categorized as "United States Government *
9 * Work" under the terms of the United States Copyright Act. It was *
10 * produced as part of the author's official duties as a Government *
11 * employee and thus can not be copyrighted. This software/database is *
12 * freely available to the public for use without a copyright notice. *
13 * Restrictions can not be placed on its present or future use. *
14 * *
15 * Although all reasonable efforts have been taken to ensure the accuracy *
16 * and reliability of the software and data, the National Library of *
17 * Medicine (NLM) and the U.S. Government do not and can not warrant the *
18 * performance or results that may be obtained by using this software, *
19 * data, or derivative works thereof. The NLM and the U.S. Government *
20 * disclaim any and all warranties, expressed or implied, as to the *
21 * performance, merchantability or fitness for any particular purpose or *
22 * use. *
23 * *
24 * In any work or product derived from this material, proper attribution *
25 * of the author(s) as the source of the software or data would be *
26 * appreciated. *
27 * *
28 **************************************************************************
29 *
30 * $Log: blastall.c,v $
31 * Revision 6.205 2009/05/28 18:37:11 kans
32 * remove includd hspstream_queue.h
33 *
34 * Revision 6.204 2009/05/28 14:55:09 camacho
35 * Fix compilation issues following merge with BLAST sources from the C++ toolkit JIRA SB-166
36 *
37 * Revision 6.203 2009/01/26 14:00:56 madden
38 * env variable sets BLAST_MAXQUERY_SIZE
39 *
40 * Revision 6.202 2008/07/01 18:38:14 madden
41 * Correct X3 value for blastn/megablast
42 *
43 * Revision 6.201 2008/01/02 14:02:06 madden
44 * Make composition-based score adjustments the default for blastp and tblastn
45 *
46 * Revision 6.200 2007/11/13 20:31:51 madden
47 * Enable ARG_BESTHITS arg (culling)
48 *
49 * Revision 6.199 2007/10/10 13:16:46 madden
50 * Fix composition-based command-lines for blastall_old (from Alejandro Schaffer)
51 *
52 * Revision 6.198 2007/05/07 13:29:11 kans
53 * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
54 *
55 * Revision 6.197 2007/05/04 15:37:39 papadopo
56 * 1. update usage
57 * 2. fix Smith-Waterman configuration
58 *
59 * Revision 6.196 2007/04/24 20:59:40 papadopo
60 * 1. Increase the query batch size for blastp with a compressed
61 * lookup table
62 * 2. Make composition-based statistics the default for blastp and tblastn
63 *
64 * Revision 6.195 2007/03/23 14:37:20 madden
65 * Move SBlastOptionsFree after BlastFormattingInfoFree to prevent reading deallocated memory
66 *
67 * Revision 6.194 2007/03/20 14:56:58 camacho
68 * Call GeneticCodeSingletonInit/GeneticCodeSingletonFini
69 *
70 * Revision 6.193 2007/03/13 18:37:06 kans
71 * added call to FreeSeqLocSetComponents to free query sequences referenced by query_slp list
72 *
73 * Revision 6.192 2007/03/12 23:06:26 papadopo
74 * fix accidentally reverted change
75 *
76 * Revision 6.191 2007/03/12 23:00:50 papadopo
77 * fix acciednetally reverted change
78 *
79 * Revision 6.190 2007/03/12 16:14:51 madden
80 * - #include string.h to get a declaration for the (nonstandard but
81 * common) function strcasecmp
82 * - In Main_new, use a Blast_PsiCheckpointLoc to specify the
83 * location, if any, of a PSI-BLAST checkpoint file.
84 * - In Main_new, use the file extension of the ARG_PSITCHKPNT file
85 * to determine the file format if a PSI-BLAST checkpoint file. Warn
86 * if the extension is not .chk, .asn, or .txt, but read unknown
87 * extension as standard PSI-BLAST checkpoints.
88 * [from Mike Gertz]
89 *
90 * Revision 6.188 2007/03/05 14:54:39 camacho
91 * - Call Blast_FindRepeatFilterSeqLoc with a NULL pointer for a PSI-BLAST
92 * checkpoint file.
93 *
94 * Revision 6.187 2007/02/08 17:07:22 papadopo
95 * change signature of FillInitialWordOptions; ungapped extensions are always turned on now by default
96 *
97 * Revision 6.186 2007/01/19 14:31:47 madden
98 * In Main_new, add a case label for psitblastn (from Mike Gertz).
99 *
100 * Revision 6.185 2006/10/06 12:23:01 madden
101 * Use head_on_every_query boolean on BlastFormattingInfo for backwards compatibilty in new engine
102 *
103 * Revision 6.184 2006/10/02 18:13:10 papadopo
104 * make new engine use the -P option
105 *
106 * Revision 6.183 2006/09/13 15:21:28 papadopo
107 * add switch to turn on use of only Smith-Waterman alignments (currently ifdef'ed out)
108 *
109 * Revision 6.182 2006/07/28 21:09:18 papadopo
110 * allow database length to override the real value when using the rewritten blast engine
111 *
112 * Revision 6.181 2006/07/20 20:48:43 madden
113 * Fix for truncated SeqAlign when multiple runs are done
114 *
115 * Revision 6.180 2006/06/09 17:45:36 papadopo
116 * change signature of BlastTabularFormatDataNew
117 *
118 * Revision 6.179 2006/06/07 16:52:54 madden
119 * Enable the use of composition-based statistics and unified P-values for blastp.
120 * (from Mike Gertz)
121 *
122 * Revision 6.178 2006/05/18 16:29:13 papadopo
123 * do not set search space field directly
124 *
125 * Revision 6.177 2006/04/26 12:47:48 madden
126 * Use SBlastMessage in place of Blast_Message
127 *
128 * Revision 6.176 2006/04/25 18:00:19 papadopo
129 * change signature of BlastTabularFormatDataNew
130 *
131 * Revision 6.175 2006/04/21 14:34:50 madden
132 * BLAST_GetQuerySeqLoc prototype change
133 *
134 * Revision 6.174 2006/04/20 15:32:36 papadopo
135 * if query IDs are actually used, verify that there are no duplicate IDs
136 *
137 * Revision 6.173 2006/04/04 13:13:48 madden
138 * 1.) Add range check to ARG_FORMAT argument.
139 * 2.) Rework error reporting so as to not truncate results if there is a warning.
140 *
141 * Revision 6.172 2006/03/08 15:41:26 coulouri
142 * tune query concatenation limits
143 *
144 * Revision 6.171 2006/01/24 18:33:44 papadopo
145 * from Mike Gertz: Use enumerated values, rather than #define'd constants, to specify the composition adjustment method
146 *
147 * Revision 6.170 2006/01/23 16:44:05 papadopo
148 * change signature of FillHitSavingOptions
149 *
150 * Revision 6.169 2006/01/13 15:39:17 madden
151 * - Enabled the use of composition-based statistics for tblastn (and
152 * only tblastn) using the new engine.
153 * - Disabled setting evalue to a larger number when mode > 1
154 * composition-based statistics is used by setting EVALUE_EXPAND to 1.
155 * - In Main_new, don't create a BlastSeqSrc. It is apparently never
156 * used and is a memory leak. (from Mike Gertz).
157 *
158 * Revision 6.168 2006/01/10 20:44:10 madden
159 * Use SBlastSeqalignArray
160 *
161 * Revision 6.167 2005/12/22 14:22:19 papadopo
162 * change signature of BLAST_FillLookupTableOptions
163 *
164 * Revision 6.166 2005/12/16 18:30:08 coulouri
165 * disable new engine for smith-waterman and composition-based statistics until they are implemented
166 *
167 * Revision 6.165 2005/12/14 14:43:12 coulouri
168 * enable new engine by default
169 *
170 * Revision 6.164 2005/12/12 13:42:59 madden
171 * SBlastOptionsSetRewardPenaltyAndGapCosts now has new greedy Boolean, BLAST_FillScoringOptions no longer called
172 *
173 * Revision 6.163 2005/10/31 14:15:10 madden
174 * Call SBlastOptionsSetRewardPenaltyAndGapCosts
175 *
176 * Revision 6.162 2005/10/17 14:07:13 madden
177 * Use -1 rather than zero for unset gap parameters
178 *
179 * Revision 6.161 2005/09/29 17:39:29 coulouri
180 * from mike gertz:
181 * - Removed the unused static routing GetLambdaFast.
182 * - Removed unused variables from s_FillOptions.
183 * - Removed unused variables from Main_new.
184 * - For tblastn, enabled query concatenation when composition-based
185 * statistics or Smith-Waterman are used.
186 * - Free seq_annot_arr only if query concatenation is being used.
187 * - In Nlm_Main, add preprocessor directives around the declaration
188 * of use_new_engine to suppress a compiler warning.
189 *
190 * Revision 6.159 2005/09/26 15:02:58 morgulis
191 * Fixing some memort leaks when using query concatenation in blastn and tblastn.
192 *
193 * Revision 6.158 2005/09/16 14:10:03 madden
194 * Print out more informative message when Blast_DatabaseSearch has non-zero return if available, add call to SBlastOptionsSetRewardPenaltyAndGapCosts
195 *
196 * Revision 6.157 2005/09/13 17:39:05 kans
197 * include repeats_filter.h
198 *
199 * Revision 6.156 2005/09/08 14:02:06 coulouri
200 * From Mike Gertz:
201 * - Introduced the new options -C for using composition-based
202 * statistics with tblastn; and -s for using Smith-Waterman alignments
203 * with tblastn.
204 * - Forbid the use of the -B option when -C or -s is present; we
205 * expect to remove this restriction.
206 *
207 * Revision 6.155 2005/09/01 12:28:52 madden
208 * 1.) add new function Main_new and put old Main in Main_old, which one is called
209 * depends upon the -V option as well as the other params.
210 * 2.) Main_new now runs searches with the new engine.
211 * 3.) Add headers to allow new engine.
212 * 4.) all of the above can be turned off at compile time with a BLASTALL_TOOLS_ONLY define
213 *
214 * Revision 6.154 2005/08/17 12:42:31 madden
215 * Set TXALIGN_SHOW_NO_OF_SEGS for tblastx
216 *
217 * Revision 6.153 2005/08/08 15:47:41 dondosha
218 * Added call to TransTableFreeAll, fixing a memory leak
219 *
220 * Revision 6.152 2005/06/15 21:37:23 dondosha
221 * Do not trigger on-the-fly output with -m8 option for megablast
222 *
223 * Revision 6.151 2005/05/05 14:41:32 coulouri
224 * plug object manager entity id leak - rt ticket 15084082
225 *
226 * Revision 6.150 2005/02/07 15:30:39 dondosha
227 * Removed restriction on the value of longest intron option
228 *
229 * Revision 6.149 2005/01/10 18:52:28 coulouri
230 * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
231 *
232 * Revision 6.148 2004/09/28 16:06:38 papadopo
233 * From Michael Gertz:
234 * 1. Disabled ungapped psitblastn.
235 * 2. The longest_intron parameter no longer has a minimum value of 4000.
236 * 3. Changed the command line help for the longest_intron parameter.
237 *
238 * Revision 6.147 2004/08/17 17:22:33 madden
239 * Add BlastArguments enum for command-line arguments
240 *
241 * Revision 6.146 2004/07/29 00:05:57 coulouri
242 * fix blastcl3 umr
243 *
244 * Revision 6.145 2004/07/28 18:49:56 coulouri
245 * fix printf specifier
246 *
247 * Revision 6.144 2004/06/30 12:33:30 madden
248 * Add include for blfmtutl.h
249 *
250 * Revision 6.143 2004/05/13 18:42:44 coulouri
251 * disable -B for blastcl3
252 *
253 * Revision 6.142 2004/04/29 19:56:00 dondosha
254 * Mask filtered locations in query sequence lines in XML output
255 *
256 * Revision 6.141 2004/04/20 14:55:47 morgulis
257 * 1. Fixed query offsets in results when -B option is used.
258 * 2. Fixes for lower case masking handling with -B option.
259 *
260 * Revision 6.140 2004/03/26 21:42:19 coulouri
261 * remove unused variables
262 *
263 * Revision 6.139 2004/03/18 15:14:21 coulouri
264 * do not dereference null seqalignptr
265 *
266 * Revision 6.138 2004/02/27 14:22:47 coulouri
267 * Correct typo
268 *
269 * Revision 6.137 2004/02/10 18:49:06 coulouri
270 * do not allow 1-hit blastn searches
271 *
272 * Revision 6.136 2003/11/05 22:28:06 dondosha
273 * No need to shift subsequence coordinates in tabular output, since they are already shifted in the seqalign
274 *
275 * Revision 6.135 2003/08/21 15:37:54 dondosha
276 * Corrections for out-of-frame tabular output and megablast XML output
277 *
278 * Revision 6.134 2003/05/30 17:31:09 coulouri
279 * add rcsid
280 *
281 * Revision 6.133 2003/05/09 18:44:49 coulouri
282 * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
283 *
284 * Revision 6.132 2003/05/06 18:57:46 dondosha
285 * Do not set cutoff_s for megablast, it is not needed
286 *
287 * Revision 6.131 2003/04/08 17:33:42 dondosha
288 * Scale the default values of gap costs if match reward is > 1
289 *
290 * Revision 6.130 2003/04/07 14:46:25 madden
291 * Disallow query concatenation if XML, tabular, or ASN.1
292 *
293 * Revision 6.129 2003/04/01 22:40:09 dondosha
294 * Check lower case masking option if megablast option is on
295 *
296 * Revision 6.128 2003/03/25 15:28:08 dondosha
297 * Print tabular output header before checking if seqalign is NULL
298 *
299 * Revision 6.127 2003/03/24 21:17:08 madden
300 * XML fix, remove random printf statements
301 *
302 * Revision 6.126 2003/03/24 19:43:05 madden
303 * Changes to support query concatenation for blastn and tblastn
304 *
305 * Revision 6.125 2003/03/20 13:44:23 madden
306 * Fix -m 10/11 output to make them SeqAnnots
307 *
308 * Revision 6.124 2002/12/31 22:47:16 boemker
309 * Added support for printing output as ASN (text, with -m 10, or binary, with
310 * -m 11).
311 *
312 * Revision 6.123 2002/09/18 20:34:30 camacho
313 * Restored -P option
314 *
315 * Revision 6.122 2002/08/23 16:45:36 madden
316 * Issue WARNING for out-of-frame alignments
317 *
318 * Revision 6.121 2002/08/14 15:09:59 camacho
319 * Only change default window size if its command-line value is non-zero
320 *
321 * Revision 6.120 2002/08/09 19:41:25 camacho
322 * 1) Added blast version number to command-line options
323 * 2) Added explanations for some default parameters
324 *
325 * Revision 6.119 2002/06/19 22:50:17 dondosha
326 * Added all queries information for tabular output with multiple queries
327 *
328 * Revision 6.118 2002/05/09 15:37:52 dondosha
329 * Call BLASTOptionNewEx instead of BLASTOptionNew, so megablast defaults are set in a central place
330 *
331 * Revision 6.117 2002/05/04 13:04:43 madden
332 * Unsuppress options
333 *
334 * Revision 6.116 2002/04/29 19:55:26 madden
335 * Use ARG_FLOAT for db length
336 *
337 * Revision 6.115 2002/04/25 21:57:45 madden
338 * Strip options for release
339 *
340 * Revision 6.114 2002/04/25 21:49:28 madden
341 * Reset mask_loc_start to NULL for every query
342 *
343 * Revision 6.113 2002/04/24 19:55:13 madden
344 * Rolled back last change
345 *
346 * Revision 6.112 2002/04/23 20:58:52 madden
347 * Suppress options for release
348 *
349 * Revision 6.111 2002/04/18 20:18:22 dondosha
350 * Separate mask locations when formatting results for multiple queries
351 *
352 * Revision 6.110 2002/04/16 21:10:58 madden
353 * Change placement of ReadDBBioseqFetchEnable so db open only once (for HPUX)
354 *
355 * Revision 6.109 2002/04/16 14:06:00 madden
356 * Do not print headers for XML or tabular output
357 *
358 * Revision 6.108 2002/03/19 23:29:38 dondosha
359 * Do not increment options->wordsize by 4 for megablast any more
360 *
361 * Revision 6.107 2002/02/19 23:21:45 dondosha
362 * Fix for XML output if megablast option is used
363 *
364 * Revision 6.106 2001/12/20 21:51:06 madden
365 * Uncomment DO_NOT_SUPPRESS_BLAST_OP
366 *
367 * Revision 6.105 2001/12/17 20:23:44 madden
368 * comment out DO_NOT_SUPPRESS_BLAST_OP
369 *
370 * Revision 6.104 2001/09/06 20:24:34 dondosha
371 * Removed threshold_first
372 *
373 * Revision 6.103 2001/08/28 17:34:34 madden
374 * Add -m 9 as tabular output with comments
375 *
376 * Revision 6.102 2001/08/28 16:23:12 madden
377 * Do not suppress args
378 *
379 * Revision 6.101 2001/07/27 21:47:35 dondosha
380 * Fixed dummy variable declaration for call to StringToInt8
381 *
382 * Revision 6.100 2001/07/26 18:21:04 dondosha
383 * Dummy variable type correction
384 *
385 * Revision 6.99 2001/07/20 13:31:23 dondosha
386 * Undeclared variable correction
387 *
388 * Revision 6.98 2001/07/19 22:05:47 dondosha
389 * Made db_length option a string, to convert to Int8 value
390 *
391 * Revision 6.97 2001/07/05 15:40:33 madden
392 * Comment out DO_NOT_SUPPRESS_BLAST_OP for release
393 *
394 * Revision 6.96 2001/07/03 20:50:33 madden
395 * Commented out call to PrintTabularOutputHeader
396 *
397 * Revision 6.95 2001/06/21 21:49:55 dondosha
398 * No need to declare extra variable vnp
399 *
400 * Revision 6.94 2001/06/21 21:29:08 dondosha
401 * Fixed memory leaks: destroy all error returns, free private_slp
402 *
403 * Revision 6.93 2001/06/15 21:20:19 dondosha
404 * Moved -m9 option to -m8; added header for tabular output
405 *
406 * Revision 6.92 2001/06/07 19:30:03 dondosha
407 * Pass believe query argument to BlastPrintTabulatedResults
408 *
409 * Revision 6.91 2001/06/06 21:22:44 dondosha
410 * Added (query) Bioseq and SeqLoc arguments to function BlastPrintTabulatedResults
411 *
412 * Revision 6.90 2001/05/25 19:26:36 vakatov
413 * Nested comment typo fixed
414 *
415 * Revision 6.89 2001/05/23 22:38:47 dondosha
416 * Added option -m 9 to print post-search tabulated output
417 *
418 * Revision 6.88 2001/04/10 19:20:52 madden
419 * Unsuppress some options suppressed for the release
420 *
421 * Revision 6.87 2001/04/02 13:52:15 madden
422 * Fix for last checkin, properly suppress some options
423 *
424 * Revision 6.85 2001/03/19 22:39:24 dondosha
425 * Allow location on the first query sequence for megablast
426 *
427 * Revision 6.84 2001/03/13 21:58:23 madden
428 * add support for multiple hits blastn, add option for window size
429 *
430 * Revision 6.83 2001/02/22 20:26:03 dondosha
431 * If location stop is -1, make it end of sequence
432 *
433 * Revision 6.82 2001/02/22 20:11:58 dondosha
434 * Previous change reversed; added option to set location on query sequence
435 *
436 * Revision 6.81 2001/02/22 16:16:43 shavirin
437 * Added options for required start and required stop of the query to be
438 * used in the Blast search.
439 *
440 * Revision 6.80 2001/02/22 15:38:48 dondosha
441 * Corrected the argument number for longest intron length
442 *
443 * Revision 6.79 2001/02/09 22:22:36 madden
444 * Do not use BlastPruneHitsFromSeqAlign for printing DefLines
445 *
446 * Revision 6.78 2001/02/08 20:41:17 dondosha
447 * Implemented tabulated output for all translated programs
448 *
449 * Revision 6.77 2001/02/07 21:17:22 dondosha
450 * Added support to produce tabulated output (-m 8 option)
451 *
452 * Revision 6.76 2001/01/19 20:03:47 dondosha
453 * Uninitialized variable seqannot caused core dump with XML output
454 *
455 * Revision 6.75 2000/12/19 18:40:47 madden
456 * Add calls to BlastSetUserErrorString and BlastDeleteUserErrorString
457 *
458 * Revision 6.74 2000/12/15 21:32:12 dondosha
459 * Appended getargs explanation of new tblastn (-t) option
460 *
461 * Revision 6.73 2000/11/21 15:47:21 dondosha
462 * Corrected default wordsize for megablast option
463 *
464 * Revision 6.72 2000/11/17 21:56:26 dondosha
465 * Do not free query_lcase_mask in client-server case - already freed
466 *
467 * Revision 6.71 2000/11/17 20:56:50 dondosha
468 * Returned Mega BLAST option which existed in blastcl3 and was removed
469 *
470 * Revision 6.70 2000/11/17 17:54:50 dondosha
471 * Added argument to allow greedy (a la Mega BLAST) extension in blastn
472 *
473 * Revision 6.69 2000/11/15 15:10:27 shavirin
474 * This revision is result of merge between blastall.c and blastcl3.c
475 * programs. Using define BLAST_CS_API - client/server version may be
476 * created.
477 *
478 * Revision 6.68 2000/11/09 15:01:00 dondosha
479 * Set longest intron length in options in nucleotide coordinates
480 *
481 * Revision 6.67 2000/11/08 22:24:07 dondosha
482 * Enabled new tblastn by adding longest intron option
483 *
484 * Revision 6.66 2000/11/01 16:26:50 madden
485 * Changes from Futamura for psitblastn
486 *
487 * Revision 6.65 2000/10/27 19:14:40 madden
488 * Change description of -b option
489 *
490 * Revision 6.64 2000/10/23 22:14:04 shavirin
491 * Added possibility to pass valid error message into XML output in case
492 * of failure or no hits.
493 *
494 * Revision 6.63 2000/10/23 19:58:22 dondosha
495 * Open and close AsnIo outside of call(s) to BXMLPrintOutput
496 *
497 * Revision 6.62 2000/10/17 19:37:41 shavirin
498 * Fixed compilation problems detected on Mac.
499 *
500 * Revision 6.61 2000/10/17 17:19:49 shavirin
501 * Temporary - for toolkit release - commented OOF shift penalty parameter.
502 *
503 * Revision 6.60 2000/10/06 17:54:28 shavirin
504 * Added usage of correct matrix in case of OOF alignment.
505 *
506 * Revision 6.59 2000/09/26 15:48:15 dondosha
507 * Put back printing of header before results of every search when multiple queries are submitted
508 *
509 * Revision 6.58 2000/09/13 22:26:23 dondosha
510 * Removed extra </PRE> that is now printed in PrintDefLinesFromSeqAlign
511 *
512 * Revision 6.57 2000/09/13 21:39:31 dondosha
513 * Corrected html output when input contains multiple queries
514 *
515 * Revision 6.56 2000/09/12 16:08:43 dondosha
516 * Create txalign style matrix from search matrix
517 *
518 * Revision 6.55 2000/09/12 16:02:13 madden
519 * do not allow -P with blastn, fix typo
520 *
521 * Revision 6.54 2000/09/07 20:25:59 madden
522 * Remove L option, turn off K (culling) by default, add -P option
523 *
524 * Revision 6.53 2000/09/07 16:27:07 shavirin
525 * Added option for OOF gap alignment for blastx.
526 *
527 * Revision 6.52 2000/08/24 14:13:23 shavirin
528 * Added return 1 if database do not exists on any path.
529 *
530 * Revision 6.51 2000/08/11 18:03:58 shavirin
531 * Added possibility to make blastx and tblastx with XML output.
532 *
533 * Revision 6.50 2000/08/11 17:54:08 shavirin
534 * Added possibility to print XML output (with -m 7 option)
535 *
536 * Revision 6.49 2000/08/01 16:35:34 madden
537 * Append Seq-annot, do not overwrite
538 *
539 * Revision 6.48 2000/06/27 15:25:18 madden
540 * Changed master-slave to query-anchored
541 *
542 * Revision 6.47 2000/06/13 19:38:46 shavirin
543 * Added ability to print XML Blast output.
544 *
545 * Revision 6.46 2000/06/05 19:31:31 madden
546 * Free query->lcase_mask between searches
547 *
548 * Revision 6.45 2000/05/26 19:28:44 shavirin
549 * Added adjustment of dropoff_1st_pass if dropoff_1st_pass > dropoff_2nd_pass
550 *
551 * Revision 6.44 2000/05/26 18:48:23 shavirin
552 * Added two new parameters; '-y' and '-Z'
553 *
554 * Revision 6.43 2000/05/09 15:57:26 shavirin
555 * Added call to the function ReadDBBioseqSetDbGeneticCode().
556 *
557 * Revision 6.42 2000/04/25 20:50:45 dondosha
558 * Removed unavailable option to use greedy algorithm
559 *
560 * Revision 6.41 2000/04/13 13:34:19 shavirin
561 * Added call to ObjMgrFreeCache() back after fixes in API.
562 *
563 * Revision 6.40 2000/04/04 18:29:13 shavirin
564 * Added some missing HTML tags.
565 *
566 * Revision 6.39 2000/03/31 19:13:33 dondosha
567 * Changed some names related to MegaBlast
568 *
569 * Revision 6.38 2000/03/24 21:49:30 madden
570 * Comment out ObjMgrFreeCache
571 *
572 * Revision 6.37 2000/03/02 21:06:09 shavirin
573 * Added -U option, that allows to consider low characters in FASTA files
574 * as filtered regions (for blastn, blastp and tblastn).
575 *
576 * Revision 6.36 2000/02/01 20:05:31 dondosha
577 * Added option -B: use greedy basic alignment search if set to T
578 *
579 * Revision 6.35 2000/01/28 16:46:54 madden
580 * Added function BlastGetMaskingLoc
581 *
582 * Revision 6.34 1999/12/17 20:48:53 egorov
583 * Fix 'gcc -Wall' warnings and remove old stuff.
584 *
585 * Revision 6.33 1999/10/12 19:35:26 madden
586 * Deallocate Mask information
587 *
588 * Revision 6.32 1999/08/26 14:58:06 madden
589 * Use float for db length
590 *
591 * Revision 6.31 1999/05/26 13:12:56 madden
592 * Initialized matrix to NULL
593 *
594 * Revision 6.30 1999/03/31 16:58:04 madden
595 * Removed static FindProt and FindNuc
596 *
597 * Revision 6.29 1999/02/10 21:12:26 madden
598 * Added HTML and GI list option, fixed filtering
599 *
600 * Revision 6.28 1999/01/22 17:24:51 madden
601 * added line breaks for alignment views
602 *
603 * Revision 6.27 1998/12/31 18:18:27 madden
604 * Added strand option
605 *
606 * Revision 6.26 1998/12/29 20:03:14 kans
607 * calls UseLocalAsnloadDataAndErrMsg at startup
608 *
609 * Revision 6.25 1998/11/19 14:04:34 madden
610 * Changed message level to SEV_WARNING
611 *
612 * Revision 6.24 1998/11/16 16:29:19 madden
613 * Added ErrSetMessageLevel(SEV_INFO)
614 *
615 * Revision 6.23 1998/07/17 15:41:36 madden
616 * Added effective search space flag
617 *
618 * Revision 6.22 1998/06/29 13:02:01 madden
619 * Deallocate matrix
620 *
621 * Revision 6.21 1998/06/10 13:33:14 madden
622 * Change -K from zero to 100
623 *
624 * Revision 6.20 1998/06/05 21:48:42 madden
625 * Added -K and -L options
626 *
627 * Revision 6.19 1998/05/18 18:01:04 madden
628 * Changed args to allow filter options to be changed
629 *
630 * Revision 6.18 1998/05/01 18:31:02 egorov
631 * Add new parametes to BLASTOptionSetGapParam()
632 *
633 * Revision 6.17 1998/04/30 14:32:32 madden
634 * init_buff_ex arg changed to 90 for reference
635 *
636 * Revision 6.16 1998/04/29 14:29:30 madden
637 * Made reference line longer
638 *
639 * Revision 6.15 1998/04/01 22:49:12 madden
640 * Print No hits found message
641 *
642 * Revision 6.14 1998/02/25 20:50:48 madden
643 * Added arg for db length
644 *
645 * Revision 6.13 1998/02/24 22:48:34 madden
646 * Removed options for culling
647 *
648 * Revision 6.12 1998/01/31 21:35:17 madden
649 * zeroed out values between searches
650 *
651 * Revision 6.11 1997/12/31 17:48:52 madden
652 * Added wordsize option
653 *
654 * Revision 6.10 1997/12/23 21:09:47 madden
655 * Added -K and -L for range-dependent blast
656 *
657 * Revision 6.9 1997/11/19 14:26:43 madden
658 * Removed extra break statement
659 *
660 * Revision 6.8 1997/11/18 22:24:22 madden
661 * Added call to BLASTOptionSetGapParams
662 *
663 * Revision 6.7 1997/10/27 22:26:52 madden
664 * Added call to ObjMgrFreeCache(0)
665 *
666 * Revision 6.6 1997/10/23 20:26:12 madden
667 * Use of init_buff_ex rather than init_buff
668 *
669 * Revision 6.5 1997/10/22 21:56:04 madden
670 * Added matrix option
671 *
672 * Revision 6.3 1997/10/07 21:33:38 madden
673 * Added BLUNT option
674 *
675 * Revision 6.2 1997/09/23 22:13:19 madden
676 * enabled descriptions and alignment options
677 *
678 * Revision 6.1 1997/09/16 16:34:32 madden
679 * Dbinfo printing changed for multiple db searches
680 *
681 * Revision 6.0 1997/08/25 18:19:14 madden
682 * Revision changed to 6.0
683 *
684 * Revision 1.16 1997/07/29 19:33:02 madden
685 * Added TXALIGN_SHOW_QS flag
686 *
687 * Revision 1.15 1997/07/28 17:01:23 madden
688 * Added include for simutil.h
689 *
690 * Revision 1.14 1997/07/28 14:31:09 madden
691 * Changes for masking alignments.
692 *
693 * Revision 1.13 1997/07/22 19:06:35 madden
694 * Option changes, Printing of verison info
695 *
696 * Revision 1.12 1997/07/18 20:09:22 madden
697 * Conversion from blast2 output to new output
698 *
699 * Revision 1.3 1997/02/24 22:08:38 madden
700 * Added reward and penalty for match and mismatch.
701 *
702 * Revision 1.2 1997/02/23 16:48:52 madden
703 * Call to AcknowledgeBlastQuery added.
704 *
705 * Revision 1.1 1997/02/19 21:44:28 madden
706 * Initial revision
707 *
708 *
709 */
710
711 #include <string.h>
712
713 #include <ncbi.h>
714 #include <objseq.h>
715 #include <objsset.h>
716 #include <sequtil.h>
717 #include <seqport.h>
718 #include <tofasta.h>
719 #include <blast.h>
720 #include <blastpri.h>
721 #include <simutil.h>
722 #include <txalign.h>
723 #include <gapxdrop.h>
724 #include <sqnutils.h>
725 #include <xmlblast.h>
726 #include <mblast.h>
727 #include <blfmtutl.h>
728 #include <algo/blast/composition_adjustment/composition_constants.h>
729 #ifdef BLAST_CS_API
730 #include <objblst3.h>
731 #include <netblap3.h>
732 #endif
733 #ifndef BLASTALL_TOOLS_ONLY
734 #include <algo/blast/core/blast_options.h>
735 #include <algo/blast/core/blast_setup.h>
736 #include <algo/blast/core/blast_message.h>
737 #include <algo/blast/core/blast_filter.h>
738 #include <algo/blast/core/blast_util.h>
739 #include <algo/blast/core/blast_engine.h>
740 #include <algo/blast/core/blast_stat.h>
741 #include <algo/blast/api/blast_seq.h>
742 #include <algo/blast/api/blast_input.h>
743 #include <algo/blast/api/blast_format.h>
744 #include <algo/blast/api/blast_seqalign.h>
745 #include <algo/blast/api/seqsrc_readdb.h>
746 #include <algo/blast/api/blast_tabular.h>
747 #include <algo/blast/api/blast_mtlock.h>
748 #include <algo/blast/api/blast_prelim.h>
749 #include <algo/blast/api/blast_api.h>
750 #include <algo/blast/api/repeats_filter.h>
751 #endif /* BLASTALL_TOOLS_ONLY */
752
753 #define DEFLINE_BUF 255
754
755
756 /* Used by the callback function. */
757 FILE *global_fp=NULL;
758 /*
759 Callback to print out ticks, in UNIX only due to file systems
760 portability issues.
761 */
762
763 #ifdef BLAST_CS_API
764 static Boolean LIBCALLBACK
765 tick_callback (BlastResponsePtr brp, Boolean PNTR cancel)
766 {
767
768 #if 0
769 fprintf(global_fp, ".");
770 fflush(global_fp);
771 #endif
772
773 return TRUE;
774 }
775
776 #else
777 static int LIBCALLBACK
778 tick_callback(Int4 sequence_number, Int4 number_of_positive_hits)
779
780 {
781 #ifdef OS_UNIX
782 /* #ifndef BLAST_CS_API */
783 fprintf(global_fp, "%s", ".");
784 fflush(global_fp);
785 /* #endif */
786 #endif
787 return 0;
788 }
789 #endif
790
791 static Int2
792 BlastGetMaskingLoc(FILE *infp, FILE *outfp, CharPtr instructions)
793 {
794 BioseqPtr bsp;
795 Char buffer[50];
796 SeqEntryPtr sep;
797 SeqLocPtr slp, slp_start, tmp_slp;
798
799 if (infp == NULL || outfp == NULL || instructions == NULL)
800 return 1;
801
802 while ((sep=FastaToSeqEntryEx(infp, TRUE, NULL, TRUE)) != NULL)
803 {
804 bsp = NULL;
805 SeqEntryExplore(sep, &bsp, FindNuc);
806
807 if (bsp == NULL)
808 {
809 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
810 return 2;
811 }
812 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, 50);
813 fprintf(outfp, ">%s\n", buffer);
814 slp_start = slp = BlastBioseqFilter(bsp, instructions);
815 while (slp)
816 {
817 tmp_slp=NULL;
818 while((tmp_slp = SeqLocFindNext(slp, tmp_slp))!=NULL)
819 {
820 fprintf(outfp, "%ld %ld\n", (long) (1+SeqLocStart(tmp_slp)), (long) (1+SeqLocStop(tmp_slp)));
821 }
822 slp = slp->next;
823 }
824
825 /* used for debugging. */
826 #if 0
827 {{
828 BioseqPtr bsp_tmp;
829 ByteStorePtr byte_sp;
830 Int4 index;
831 SeqLocPtr tmp_slp_1, tmp_filter_slp;
832 SeqPortPtr spp;
833 Uint1Ptr tmp_query_seq, tmp_query_seq_start;
834 Uint1 residue;
835 FILE *tmp_fp;
836
837 spp = SeqPortNew(bsp, 0, -1, 0, Seq_code_iupacna);
838 SeqPortSet_do_virtual(spp, TRUE);
839 tmp_query_seq_start = (Uint1Ptr) MemNew(((BioseqGetLen(bsp))+2)*sizeof(Uint1));
840 tmp_query_seq_start[0] = NULLB;
841 tmp_query_seq = tmp_query_seq_start+1;
842 index=0;
843 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
844 {
845
846 if (IS_residue(residue))
847 {
848 tmp_query_seq[index] = residue;
849 index++;
850 }
851 }
852 BlastMaskTheResidues(tmp_query_seq, BioseqGetLen(bsp), 78, slp_start, FALSE, 0);
853 bsp_tmp = BioseqNew();
854 bsp_tmp->length = BioseqGetLen(bsp);
855 byte_sp = BSNew(1);
856 BSWrite(byte_sp, tmp_query_seq, bsp->length);
857 bsp_tmp->seq_data = byte_sp;
858 bsp_tmp->repr = Seq_repr_raw;
859 bsp_tmp->seq_data_type = Seq_code_iupacna;
860 bsp_tmp->mol = 1;
861
862 bsp_tmp->id = bsp->id;
863 bsp_tmp->descr = bsp->descr;
864
865 tmp_fp = FileOpen("masked.fsa", "w");
866 BioseqRawToFastaExtra(bsp_tmp, tmp_fp, 50);
867
868 bsp_tmp->id = NULL;
869 bsp_tmp->descr = NULL;
870
871 spp = SeqPortFree(spp);
872 bsp_tmp = BioseqFree(bsp_tmp);
873 tmp_query_seq_start = MemFree(tmp_query_seq_start);
874 FileClose(tmp_fp);
875
876 tmp_filter_slp = slp_start;
877 tmp_fp = FileOpen("locations.msk", "w");
878 while (tmp_filter_slp)
879 {
880 tmp_slp_1=NULL;
881 while((tmp_slp_1 = SeqLocFindNext(tmp_filter_slp, tmp_slp_1))!=NULL)
882 {
883 fprintf(tmp_fp, "%ld %ld\n", (long) (1+SeqLocStart(tmp_slp_1)), (long) (1+SeqLocStop(tmp_slp_1)));
884
885 }
886 tmp_filter_slp = tmp_filter_slp->next;
887 }
888
889
890 FileClose(tmp_fp);
891 }}
892 #endif
893 slp_start = SeqLocSetFree(slp_start);
894 sep = SeqEntryFree(sep);
895 }
896
897 return 0;
898 }
899
900 /* Breaks up a location like "2000 3000" into two integers
901 that are returned.
902
903 If location is NULL then the integers are set to 0.
904 */
905
906 /* FIXME: better name, move to API directory?? */
907 static Boolean
908 sGetLoc(char* location, Int4* start, Int4* end)
909 {
910 CharPtr delimiters = " ,;";
911
912 if (start == NULL || end == NULL)
913 return FALSE;
914
915 *start = 0;
916 *end = 0;
917
918 if (location == NULL)
919 return TRUE;
920
921 *start = atoi(StringTokMT(location, delimiters, &location));
922 *end = atoi(location);
923
924 return TRUE;
925 }
926
927 typedef enum {
928 ARG_PROGRAM = 0,
929 ARG_DB,
930 ARG_QUERY,
931 ARG_EVALUE,
932 ARG_FORMAT,
933 ARG_OUT,
934 ARG_FILTER,
935 ARG_GAPOPEN,
936 ARG_GAPEXT,
937 ARG_XDROP,
938 ARG_SHOWGIS,
939 ARG_MISMATCH,
940 ARG_MATCH,
941 ARG_DESCRIPTIONS,
942 ARG_ALIGNMENTS,
943 ARG_THRESHOLD,
944 ARG_GAPPED,
945 ARG_QGENETIC_CODE,
946 ARG_DBGENCODE,
947 ARG_THREADS,
948 ARG_ASNOUT,
949 ARG_BELIEVEQUERY,
950 ARG_MATRIX,
951 ARG_WORDSIZE,
952 ARG_DBSIZE,
953 ARG_BESTHITS,
954 ARG_MULTIPLEHITS,
955 ARG_SEARCHSP,
956 ARG_STRAND,
957 ARG_HTML,
958 #ifdef BLAST_CS_API
959 ARG_ENTREZQ,
960 #else
961 ARG_GILIST,
962 #endif
963 ARG_LCASE,
964 ARG_XDROP_UNGAPPED,
965 ARG_XDROP_FINAL,
966 #ifdef BLAST_CS_API
967 ARG_RPSBLAST,
968 #else
969 ARG_PSITCHKPNT,
970 #endif
971 ARG_USEMEGABLAST,
972 ARG_QUERYLOC,
973 ARG_WINDOW,
974 ARG_FRAMESHIFT,
975 ARG_INTRON,
976 #ifndef BLAST_CS_API
977 ARG_NUMQUERIES,
978 #ifndef BLASTALL_TOOLS_ONLY
979 ARG_FORCE_OLD,
980 #endif
981 #endif
982 ARG_COMP_BASED_STATS,
983 ARG_SMITH_WATERMAN,
984 #ifdef ALLOW_FULL_SMITH_WATERMAN
985 ARG_SMITH_WATERMAN_ALL
986 #endif
987 } BlastArguments;
988
989 #define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
990
991 static Args myargs[] = {
992 { "Program Name",
993 NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL}, /* ARG_PROGRAM */
994 { "Database",
995 "nr", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL}, /* ARG_DB */
996 { "Query File",
997 "stdin", NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_QUERY */
998 { "Expectation value (E)",
999 "10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_EVALUE */
1000 { "alignment view options:\n0 = pairwise,\n1 = query-anchored showing identities,\n2 = query-anchored no identities,\n3 = flat query-anchored, show identities,\n4 = flat query-anchored, no identities,\n5 = query-anchored no identities and blunt ends,\n6 = flat query-anchored, no identities and blunt ends,\n7 = XML Blast output,\n8 = tabular, \n9 tabular with comment lines\n10 ASN, text\n11 ASN, binary", /* 4 */
1001 "0", "0", "11", FALSE, 'm', ARG_INT, 0.0, 0, NULL}, /* ARG_FORMAT */
1002 { "BLAST report Output File",
1003 "stdout", NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_OUT */
1004 { "Filter query sequence (DUST with blastn, SEG with others)",
1005 "T", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL}, /* ARG_FILTER */
1006 { "Cost to open a gap (-1 invokes default behavior)",
1007 "-1", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPOPEN */
1008 { "Cost to extend a gap (-1 invokes default behavior)",
1009 "-1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPEXT */
1010 { "X dropoff value for gapped alignment (in bits) (zero invokes default "
1011 "behavior)\n blastn 30, megablast 20, tblastx 0, all others 15",
1012 "0", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL}, /* ARG_XDROP */
1013 { "Show GI's in deflines", /* 10 */
1014 "F", NULL, NULL, FALSE, 'I', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_SHOWGIS */
1015 { "Penalty for a nucleotide mismatch (blastn only)",
1016 "-3", NULL, NULL, FALSE, 'q', ARG_INT, 0.0, 0, NULL}, /* ARG_MISMATCH */
1017 { "Reward for a nucleotide match (blastn only)",
1018 "1", NULL, NULL, FALSE, 'r', ARG_INT, 0.0, 0, NULL}, /* ARG_MATCH */
1019 { "Number of database sequences to show one-line descriptions for (V)",
1020 "500", NULL, NULL, FALSE, 'v', ARG_INT, 0.0, 0, NULL}, /* ARG_DESCRIPTIONS */
1021 { "Number of database sequence to show alignments for (B)",
1022 "250", NULL, NULL, FALSE, 'b', ARG_INT, 0.0, 0, NULL}, /* ARG_ALIGNMENTS */
1023 { "Threshold for extending hits, default if zero\n"
1024 " blastp 11, blastn 0, blastx 12, tblastn 13\n"
1025 " tblastx 13, megablast 0",
1026 "0", NULL, NULL, FALSE, 'f', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_THRESHOLD */
1027 { "Perform gapped alignment (not available with tblastx)",
1028 "T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_GAPPED */
1029 { "Query Genetic code to use", /* 17 */
1030 "1", NULL, NULL, FALSE, 'Q', ARG_INT, 0.0, 0, NULL}, /* ARG_QGENETIC_CODE */
1031 { "DB Genetic code (for tblast[nx] only)", /* 18 */
1032 "1", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL}, /* ARG_DBGENCODE */
1033 { "Number of processors to use", /* 19 */
1034 "1", NULL, NULL, FALSE, 'a', ARG_INT, 0.0, 0, NULL}, /* ARG_THREADS */
1035 { "SeqAlign file", /* 20 */
1036 NULL, NULL, NULL, TRUE, 'O', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_ASNOUT */
1037 { "Believe the query defline", /* 21 */
1038 "F", NULL, NULL, FALSE, 'J', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_BELIEVEQUERY */
1039 { "Matrix", /* 22 */
1040 "BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL}, /* ARG_MATRIX */
1041 { "Word size, default if zero (blastn 11, megablast 28, "
1042 "all others 3)", /* 23 */
1043 "0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL}, /* ARG_WORDSIZE */
1044 { "Effective length of the database (use zero for the real size)",
1045 "0", NULL, NULL, FALSE, 'z', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_DBSIZE */
1046 { "Number of best hits from a region to keep. Off by default.\nIf used a value of 100 is recommended. Very high values of -v or -b is also suggested",
1047 "0", NULL, NULL, FALSE, 'K', ARG_INT, 0.0, 0, NULL}, /* ARG_BESTHITS */
1048 { "0 for multiple hit, 1 for single hit (does not apply to blastn)",
1049 "0", NULL, NULL, FALSE, 'P', ARG_INT, 0.0, 0, NULL}, /* ARG_MULTIPLEHITS */
1050 { "Effective length of the search space (use zero for the real size)",
1051 "0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_SEARCHSP */
1052 { "Query strands to search against database (for blast[nx], and tblastx)\n"
1053 " 3 is both, 1 is top, 2 is bottom",
1054 "3", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL}, /* ARG_STRAND */
1055 { "Produce HTML output", /* 29 */
1056 "F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_HTML */
1057 #ifdef BLAST_CS_API
1058 { "Restrict search of database to results of Entrez2 lookup",
1059 NULL, NULL, NULL, TRUE, 'u', ARG_STRING, 0.0, 0, NULL}, /* ARG_ENTREZQ */
1060 #else
1061 { "Restrict search of database to list of GI's",
1062 NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0, 0, NULL}, /* ARG_GILIST */
1063 #endif
1064 {"Use lower case filtering of FASTA sequence",
1065 NULL, NULL, NULL, TRUE, 'U', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_LCASE */
1066 { "X dropoff value for ungapped extensions in bits (0.0 invokes default "
1067 "behavior)\n blastn 20, megablast 10, all others 7",
1068 "0.0", NULL, NULL, FALSE, 'y', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_XDROP_UNGAPPED */
1069 { "X dropoff value for final gapped alignment in bits "
1070 "(0.0 invokes default behavior)\n"
1071 " blastn/megablast 100, tblastx 0, all others 25",
1072 "0", NULL, NULL, FALSE, 'Z', ARG_INT, 0.0, 0, NULL}, /* ARG_XDROP_FINAL */
1073 #ifdef BLAST_CS_API
1074 { "RPS Blast search", /* 34 */
1075 "F", NULL, NULL, FALSE, 'R', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_RPSBLAST */
1076 #else
1077 { "PSI-TBLASTN checkpoint file", /* 34 */
1078 NULL, NULL, NULL, TRUE, 'R', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_PSITCHKPNT */
1079 #endif
1080 { "MegaBlast search", /* 35 */
1081 "F", NULL, NULL, FALSE, 'n', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_USEMEGABLAST */
1082 { "Location on query sequence",/* 36 */
1083 NULL, NULL, NULL, TRUE, 'L', ARG_STRING, 0.0, 0, NULL}, /* ARG_QUERYLOC */
1084 { "Multiple Hits window size, default if zero (blastn/megablast 0, "
1085 "all others 40", /* 37 */
1086 "0", NULL, NULL, FALSE, 'A', ARG_INT, 0.0, 0, NULL}, /* ARG_WINDOW */
1087 { "Frame shift penalty (OOF algorithm for blastx)",
1088 "0", NULL, NULL, FALSE, 'w', ARG_INT, 0.0, 0, NULL}, /* ARG_FRAMESHIFT */
1089 { "Length of the largest intron allowed in a translated nucleotide "
1090 "sequence when "
1091 "linking multiple distinct alignments. (0 invokes default behavior; a "
1092 "negative value disables linking.)",
1093 "0", NULL, NULL, FALSE, 't', ARG_INT, 0.0, 0, NULL}, /* ARG_INTRON */
1094 /*--KM
1095 seems ok to add another param b/c NUMARG is defined based on
1096 sizeof(myargs) itself
1097 made optional=TRUE but this may change?
1098 */
1099 #ifndef BLAST_CS_API
1100 { "Number of concatenated queries, for blastn and tblastn",
1101 "0", NULL, NULL, TRUE, 'B', ARG_INT, 0.0, 0, NULL}, /* ARG_NUMQUERIES */
1102 #ifndef BLASTALL_TOOLS_ONLY
1103 { "Force use of the legacy BLAST engine",
1104 "F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_FORCE_OLD */
1105 #endif /* BLASTALL_TOOLS_ONLY */
1106 #endif
1107 { "Use composition-based score adjustments for blastp or tblastn:\n" /* ARG_COMP_BASED_STATS */
1108 " As first character:\n"
1109 " D or d: default (equivalent to T)\n"
1110 " 0 or F or f: no composition-based statistics\n"
1111 " 2 or T or t: Composition-based score adjustments as in "
1112 "Bioinformatics 21:902-911,\n"
1113 " 1: Composition-based statistics as in "
1114 "NAR 29:2994-3005, 2001\n"
1115 " 2005, conditioned on sequence properties\n"
1116 " 3: Composition-based score adjustment as in "
1117 "Bioinformatics 21:902-911,\n"
1118 " 2005, unconditionally\n"
1119 " For programs other than tblastn, must either be absent "
1120 "or be D, F or 0.\n "
1121 " As second character, if first character is "
1122 "equivalent to 1, 2, or 3:\n"
1123 " U or u: unified p-value combining alignment p-value "
1124 "and compositional p-value in round 1 only\n",
1125 "D", NULL, NULL, FALSE, 'C', ARG_STRING, 0.0, 0, NULL},
1126 { "Compute locally optimal Smith-Waterman alignments "
1127 "(This option is only\n"
1128 " available for gapped tblastn.)", /* ARG_SMITH_WATERMAN */
1129 "F", NULL, NULL, FALSE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
1130 #ifdef ALLOW_FULL_SMITH_WATERMAN
1131 { "Compute only Smith-Waterman alignments (new engine only)",
1132 "F", NULL, NULL, FALSE, 'h', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_SMITH_WATERMAN_ALL */
1133 #endif
1134 };
1135
1136
1137 #ifdef BLAST_CS_API
1138 static BlastNet3Hptr BNETInitializeBlast(CharPtr database, CharPtr program,
1139 FILE *outfp, Boolean db_is_na,
1140 Boolean is_rps_blast, Boolean html, Boolean header)
1141 {
1142 BlastNet3Hptr bl3hp;
1143 BlastResponsePtr response = NULL;
1144 BlastVersionPtr blast_version;
1145
1146 if (! BlastInit("blastcl3", &bl3hp, &response)) {
1147 ErrPostEx(SEV_FATAL, 1, 0, "Unable to initialize BLAST service");
1148 return NULL;
1149 }
1150 if (response && response->choice == BlastResponse_init) {
1151 blast_version = response->data.ptrvalue;
1152 } else {
1153 ErrPostEx(SEV_FATAL, 1, 0, "Unable to connect to the BLAST service");
1154 return NULL;
1155 }
1156
1157 BlastNetBioseqFetchEnable(bl3hp, database, db_is_na, TRUE);
1158
1159 if(is_rps_blast == TRUE && header)
1160 {
1161 BlastPrintVersionInfoEx("RPS-BLAST", html, blast_version->version,
1162 blast_version->date, outfp);
1163 }
1164 else if (header)
1165 {
1166 init_buff_ex(90);
1167 BlastPrintVersionInfoEx(program, html, blast_version->version,
1168 blast_version->date, outfp);
1169 fprintf(outfp, "\n");
1170 BlastPrintReference(html, 80, outfp);
1171 free_buff();
1172 }
1173
1174 BlastResponseFree(response);
1175
1176 return bl3hp;
1177 }
1178 #endif
1179
1180 /* Needed for Mega BLAST only */
1181 #define MAX_NUM_QUERIES 16383 /* == 1/2 INT2_MAX */
1182
1183 #ifndef BLASTALL_TOOLS_ONLY
1184
1185 /** Fills all the options structures with user defined values. Uses the
1186 * myargs global structure obtained from GetArgs.
1187 * @param lookup_options Lookup table options [in]
1188 * @param query_setup_options Query options [in]
1189 * @param word_options Initial word processing options [in]
1190 * @param ext_options Extension options [in]
1191 * @param hit_options Hit saving options [out]
1192 * @param score_options Scoring options [out]
1193 * @param eff_len_options Effective length options [out]
1194 * @param psi_options Protein BLAST options [out]
1195 * @param db_options BLAST database options [out]
1196 */
1197 static Int2
1198 s_FillOptions(SBlastOptions* options)
1199 {
1200 LookupTableOptions* lookup_options = options->lookup_options;
1201 QuerySetUpOptions* query_setup_options = options->query_options;
1202 BlastInitialWordOptions* word_options = options->word_options;
1203 BlastExtensionOptions* ext_options = options->ext_options;
1204 BlastHitSavingOptions* hit_options = options->hit_options ;
1205 BlastScoringOptions* score_options = options->score_options;
1206 BlastEffectiveLengthsOptions* eff_len_options = options->eff_len_options;
1207
1208 Boolean mb_lookup = FALSE;
1209 Boolean greedy = FALSE;
1210 Boolean is_gapped = FALSE;
1211 EBlastProgramType program_number = options->program;
1212
1213 if (myargs[ARG_USEMEGABLAST].intvalue != 0)
1214 {
1215 greedy = TRUE;
1216 mb_lookup = TRUE;
1217 }
1218
1219 BLAST_FillLookupTableOptions(lookup_options, program_number, mb_lookup,
1220 myargs[ARG_THRESHOLD].floatvalue, (Int2)myargs[ARG_WORDSIZE].intvalue);
1221
1222 BLAST_FillQuerySetUpOptions(query_setup_options, program_number,
1223 myargs[ARG_FILTER].strvalue, (Uint1)myargs[ARG_STRAND].intvalue);
1224
1225 if (myargs[ARG_QGENETIC_CODE].intvalue &&
1226 (program_number == eBlastTypeBlastx ||
1227 program_number == eBlastTypeTblastx))
1228 query_setup_options->genetic_code = myargs[ARG_QGENETIC_CODE].intvalue;
1229
1230 BLAST_FillInitialWordOptions(word_options, program_number,
1231 myargs[ARG_WINDOW].intvalue,
1232 myargs[ARG_XDROP_UNGAPPED].intvalue);
1233
1234 BLAST_FillExtensionOptions(ext_options, program_number, greedy,
1235 myargs[ARG_XDROP].intvalue, myargs[ARG_XDROP_FINAL].intvalue);
1236
1237 /* if both gap_open and gap_extend are zero then they are set to suggested values */
1238 SBlastOptionsSetMatrixAndGapCosts(options, myargs[ARG_MATRIX].strvalue,
1239 myargs[ARG_GAPOPEN].intvalue, myargs[ARG_GAPEXT].intvalue);
1240
1241 SBlastOptionsSetRewardPenaltyAndGapCosts(options,
1242 myargs[ARG_MATCH].intvalue,
1243 myargs[ARG_MISMATCH].intvalue,
1244 myargs[ARG_GAPOPEN].intvalue,
1245 myargs[ARG_GAPEXT].intvalue,
1246 FALSE);
1247
1248 if (myargs[ARG_MULTIPLEHITS].intvalue == 1 ||
1249 myargs[ARG_WINDOW].intvalue < 0)
1250 word_options->window_size = 0;
1251 else
1252 SBlastOptionsSetWindowSize(options, myargs[ARG_WINDOW].intvalue);
1253
1254 SBlastOptionsSetThreshold(options, myargs[ARG_THRESHOLD].floatvalue);
1255
1256 if (program_number != eBlastTypeTblastx)
1257 is_gapped = myargs[ARG_GAPPED].intvalue;
1258 else
1259 is_gapped = FALSE;
1260
1261 score_options->gapped_calculation = is_gapped;
1262 if (myargs[ARG_FRAMESHIFT].intvalue) {
1263 score_options->shift_pen = myargs[ARG_FRAMESHIFT].intvalue;
1264 score_options->is_ooframe = TRUE;
1265 }
1266
1267 BLAST_FillHitSavingOptions(hit_options,
1268 myargs[ARG_EVALUE].floatvalue,
1269 MAX(myargs[ARG_DESCRIPTIONS].intvalue,
1270 myargs[ARG_ALIGNMENTS].intvalue),
1271 is_gapped,
1272 myargs[ARG_BESTHITS].intvalue, /* culling limit */
1273 0); /* min diag separation */
1274
1275 hit_options->longest_intron = MIN(myargs[ARG_INTRON].intvalue, MAX_INTRON_LENGTH);
1276
1277 if (myargs[ARG_SEARCHSP].floatvalue != 0 ||
1278 myargs[ARG_DBSIZE].floatvalue != 0) {
1279 Int8 searchsp = (Int8)myargs[ARG_SEARCHSP].floatvalue;
1280 Int8 dbsize = (Int8)myargs[ARG_DBSIZE].floatvalue;
1281 BLAST_FillEffectiveLengthsOptions(eff_len_options, 0, dbsize, &searchsp, 1);
1282 }
1283
1284 if (program_number == eBlastTypeTblastn ||
1285 program_number == eBlastTypeRpsTblastn ||
1286 program_number == eBlastTypeTblastx) {
1287 SBlastOptionsSetDbGeneticCode(options, myargs[ARG_DBGENCODE].intvalue);
1288 }
1289 if ((program_number == eBlastTypeTblastn ||
1290 program_number == eBlastTypeBlastp) && is_gapped) {
1291 /* Set options specific to gapped tblastn and blastp */
1292 switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
1293 case '0': case 'F': case 'f':
1294 ext_options->compositionBasedStats = eNoCompositionBasedStats;
1295 break;
1296 case '1':
1297 ext_options->compositionBasedStats = eCompositionBasedStats;
1298 break;
1299 case 'D': case 'd':
1300 case '2': case 'T': case 't':
1301 ext_options->compositionBasedStats = eCompositionMatrixAdjust;
1302 break;
1303 case '3':
1304 ErrPostEx(SEV_WARNING, 1, 0, "the -C 3 argument "
1305 "is currently experimental\n");
1306 ext_options->compositionBasedStats = eCompoForceFullMatrixAdjust;
1307 break;
1308 default:
1309 ErrPostEx(SEV_FATAL, 1, 0, "invalid argument for composition-"
1310 "based statistics; see -C options\n");
1311 break;
1312 }
1313 if (ext_options->compositionBasedStats > eNoCompositionBasedStats) {
1314 switch (myargs[ARG_COMP_BASED_STATS].strvalue[1]) {
1315 case 'U':
1316 case 'u':
1317 if (program_number == eBlastTypeBlastp) {
1318 ext_options->unifiedP = 1;
1319 ErrPostEx(SEV_WARNING, 1, 0, "unified p-values "
1320 "are currently experimental\n");
1321 } else {
1322 ErrPostEx(SEV_FATAL, 1, 0, "unified p-values "
1323 "are currently only available for blastp\n");
1324 }
1325 break;
1326 case '\0':
1327 break;
1328 default:
1329 ErrPostEx(SEV_WARNING, 1, 0, "unrecognized second character"
1330 "in value of -t, ignoring it\n");
1331 break;
1332 }
1333 }
1334 if (myargs[ARG_SMITH_WATERMAN].intvalue) {
1335 ext_options->eTbackExt = eSmithWatermanTbck;
1336 }
1337 } else {
1338 /* Make sure tblastn and blastp parameters were not set for
1339 * other programs */
1340
1341 switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
1342 case '0': case 'D': case 'd': case 'F': case 'f':
1343 break;
1344 default:
1345 ErrPostEx(SEV_FATAL, 1, 0,
1346 "Invalid option -C: only gapped blastp or gapped tblastn "
1347 "may use composition based statistics.");
1348 break;
1349 }
1350 if(myargs[ARG_SMITH_WATERMAN].intvalue) {
1351 ErrPostEx(SEV_FATAL, 1, 0,
1352 "Invalid option -s: Smith-Waterman alignments are only "
1353 "available for gapped blastp and gapped tblastn.");
1354 }
1355 }
1356
1357 #ifdef ALLOW_FULL_SMITH_WATERMAN
1358 if (myargs[ARG_SMITH_WATERMAN_ALL].intvalue) {
1359 ext_options->ePrelimGapExt = eSmithWatermanScoreOnly;
1360 ext_options->eTbackExt = eSmithWatermanTbckFull;
1361 ext_options->compositionBasedStats = eNoCompositionBasedStats;
1362 }
1363 #endif
1364
1365 if (lookup_options->lut_type == eCompressedAaLookupTable) {
1366 if (lookup_options->threshold < 16) {
1367 ErrPostEx(SEV_WARNING, 1, 0,
1368 "Threshold is probably too small for protein "
1369 "searches with a compressed alphabet");
1370 }
1371 if (word_options->window_size > 0) {
1372 ErrPostEx(SEV_WARNING, 1, 0,
1373 "Multiple hits may not work with compressed alphabets");
1374 }
1375 }
1376 return 0;
1377 }
1378
1379 #ifndef TX_MATRIX_SIZE
1380 #define TX_MATRIX_SIZE 128
1381 #endif
1382
1383 Int4** LIBCALL BlastMatrixConvert(Int4** old)
1384 {
1385 Int4 i, j, index1, index2;
1386 Int4** new;
1387 SeqMapTablePtr smtp;
1388 SeqCodeTablePtr sctp;
1389
1390 if (!old)
1391 return NULL;
1392
1393 sctp = SeqCodeTableFindObj(Seq_code_ncbistdaa);
1394 smtp = SeqMapTableFind(Seq_code_ncbieaa, Seq_code_ncbistdaa);
1395
1396 new = malloc(TX_MATRIX_SIZE*sizeof(Int4Ptr));
1397
1398 for (i=0; i<TX_MATRIX_SIZE; i++) {
1399 new[i] = malloc(TX_MATRIX_SIZE*sizeof(Int4));
1400 for (j=0; j<TX_MATRIX_SIZE; j++)
1401 new[i][j] = BLAST_SCORE_MIN;
1402 }
1403
1404 for (i=sctp->start_at; i < sctp->start_at + sctp->num; i++) {
1405 for (j=sctp->start_at; j < sctp->start_at + sctp->num; j++) {
1406 index1 = SeqMapTableConvert(smtp, i);
1407 index2 = SeqMapTableConvert(smtp, j);
1408 new[index1][index2] = old[i][j];
1409 }
1410 }
1411
1412 return new;
1413 }
1414
1415 Int2 Main_new (void)
1416
1417 {
1418 Boolean query_is_na;
1419 Boolean db_is_na;
1420 Boolean believe_query = FALSE;
1421 EBlastProgramType program_number;
1422 Int2 status = 0;
1423 Int4 start=0, end=0; /* start and end of sequence to be searched as specified by ARG_QUERYLOC */
1424 FILE *infp=NULL, *outfp=NULL;
1425 SBlastOptions* options = NULL;
1426 BlastFormattingInfo* format_info = NULL;
1427 BlastFormattingInfo* asn_format_info = NULL; /* For ASN.1 output. */ /* For ASN.1 output. */
1428 Int4 ctr = 1;
1429 Boolean tabular_output = FALSE;
1430 Blast_SummaryReturn* sum_returns = Blast_SummaryReturnNew();
1431 Blast_SummaryReturn* full_sum_returns = NULL;
1432 char* blast_program = myargs[ARG_PROGRAM].strvalue;
1433 char* dbname = myargs[ARG_DB].strvalue;
1434 Int4 maxquery = 0; /* maximum number of bases/residues to concatenate per
1435 database pass */
1436 /* A file that contains a PSI-BLAST "checkpoint", the frequency
1437 ratios computed from a prior run of PSI-BLAST. These
1438 frequencies may be used to compute a PSSM for PSI-BLAST or
1439 PSI-TBLASTN. The name of the file is specified by the -R
1440 option; the FILE * is NULL if no file is specified. */
1441 Blast_PsiCheckpointLoc * psi_checkpoint = NULL;
1442 char* max_query_string = NULL;
1443
1444 GeneticCodeSingletonInit();
1445
1446 status = SBlastOptionsNew(blast_program, &options, sum_returns);
1447
1448 if (status) {
1449 if (sum_returns->error) {
1450 SBlastMessageErrPost(sum_returns->error);
1451 sum_returns = Blast_SummaryReturnFree(sum_returns);
1452 }
1453 return -1;
1454 }
1455
1456 s_FillOptions(options);
1457 program_number = options->program;
1458
1459 switch(program_number) {
1460 case eBlastTypeBlastn:
1461 maxquery = 40000;
1462 if (myargs[ARG_USEMEGABLAST].intvalue)
1463 maxquery = 5000000;
1464 break;
1465 case eBlastTypeTblastn:
1466 case eBlastTypePsiTblastn:
1467 maxquery = 20000;
1468 break;
1469 case eBlastTypeBlastp:
1470 maxquery = 10000;
1471 if (options->lookup_options->lut_type ==
1472 eCompressedAaLookupTable) {
1473 maxquery = 20000;
1474 }
1475 break;
1476 case eBlastTypeBlastx:
1477 case eBlastTypeTblastx:
1478 default:
1479 maxquery = 10000;
1480 }
1481
1482 max_query_string = getenv("BLAST_MAXQUERY_SIZE");
1483 if (max_query_string)
1484 sscanf (max_query_string, "%ld", &maxquery);
1485
1486 BlastGetTypes(myargs[ARG_PROGRAM].strvalue, &query_is_na, &db_is_na);
1487
1488 if (myargs[ARG_BELIEVEQUERY].intvalue != 0)
1489 believe_query = TRUE;
1490
1491 SBlastOptionsSetBelieveQuery(options, believe_query);
1492
1493 if (myargs[ARG_FORMAT].intvalue == 8 && myargs[ARG_USEMEGABLAST].intvalue)
1494 tabular_output = TRUE;
1495
1496 if (!tabular_output) {
1497 Int2 finfo_status = BlastFormattingInfoNew(myargs[ARG_FORMAT].intvalue, options,
1498 blast_program, dbname,
1499 myargs[ARG_OUT].strvalue, &format_info);
1500 if (finfo_status != 0)
1501 {
1502 ErrPostEx(SEV_FATAL, 1, 0, "BlastFormattingInfoNew returned non-zero status");
1503 }
1504
1505 /* Pass TRUE for the "is megablast" argument. Since megablast is always
1506 gapped, pass FALSE for the "is ungapped" argument. */
1507 BlastFormattingInfoSetUpOptions(format_info,
1508 myargs[ARG_DESCRIPTIONS].intvalue,
1509 myargs[ARG_ALIGNMENTS].intvalue,
1510 (Boolean) myargs[ARG_HTML].intvalue,
1511 FALSE,
1512 (Boolean) myargs[ARG_SHOWGIS].intvalue,
1513 believe_query);
1514 format_info->head_on_every_query = TRUE;
1515 }
1516 else
1517 { /* tabular output requires raw FILE*. */
1518 if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
1519 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n",
1520 myargs[ARG_OUT].strvalue);
1521 return (1);
1522 }
1523 believe_query = TRUE;
1524 /* FetchEnable/Disable called in blast_format.c for non-tabular output. */
1525 ReadDBBioseqFetchEnable ("blastall", myargs[ARG_DB].strvalue, db_is_na, TRUE);
1526 }
1527
1528 if (myargs[ARG_ASNOUT].strvalue) {
1529 /* This just prints out the ASN.1 to a secondary file. */
1530 BlastFormattingInfoNew(eAlignViewAsnText, options,
1531 blast_program, dbname, myargs[ARG_ASNOUT].strvalue, &asn_format_info);
1532 BlastFormattingInfoSetUpOptions(asn_format_info,
1533 myargs[ARG_DESCRIPTIONS].intvalue,
1534 myargs[ARG_ALIGNMENTS].intvalue,
1535 FALSE,
1536 FALSE,
1537 (Boolean) myargs[ARG_SHOWGIS].intvalue,
1538 believe_query);
1539 }
1540
1541
1542 if ((infp = FileOpen(myargs[ARG_QUERY].strvalue, "r")) == NULL) {
1543 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n",
1544 myargs[ARG_QUERY].strvalue);
1545 return (1);
1546 }
1547
1548 sGetLoc(myargs[ARG_QUERYLOC].strvalue, &start, &end);
1549
1550
1551 /* Get the query (queries), loop if necessary. */
1552 while (1) {
1553 SBlastSeqalignArray* seqalign_arr=NULL;
1554 BlastTabularFormatData* tf_data = NULL;
1555 SeqLoc* lcase_mask = NULL;
1556 SeqLoc* repeat_mask = NULL; /* Repeat mask locations */
1557 SeqLoc* query_slp = NULL;
1558 SeqLoc* filter_loc=NULL; /* All masking locations */
1559 Int4 num_queries; /* Number of queries read this time. */
1560 Int4 letters_read; /* number of letters (bases/residues) read. */
1561
1562 if ((Boolean)myargs[ARG_LCASE].intvalue) {
1563 letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na,
1564 myargs[ARG_STRAND].intvalue, maxquery, start, end,
1565 &lcase_mask, &query_slp, &ctr, &num_queries, believe_query,
1566 myargs[ARG_QGENETIC_CODE].intvalue);
1567 } else {
1568 letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na,
1569 myargs[ARG_STRAND].intvalue, maxquery, start, end,
1570 NULL, &query_slp, &ctr, &num_queries, believe_query,
1571 myargs[ARG_QGENETIC_CODE].intvalue);
1572 }
1573
1574 if (letters_read == 0)
1575 break;
1576
1577 if (letters_read < 0)
1578 {
1579 ErrPostEx(SEV_FATAL, 1, 0, "BLAST_GetQuerySeqLoc returned an error\n");
1580 return -1;
1581 }
1582
1583 if (believe_query && BlastSeqlocsHaveDuplicateIDs(query_slp)) {
1584 ErrPostEx(SEV_FATAL, 1, 0,
1585 "Duplicate IDs detected; please ensure that "
1586 "all query sequence identifiers are unique");
1587 }
1588 #ifndef BLAST_CS_API
1589 /* Now, if this is PSI-TBLASTN (and eventually PSI-BLAST) look for
1590 a restart */
1591 if (program_number == eBlastTypePsiTblastn &&
1592 !myargs[ARG_PSITCHKPNT].strvalue) {
1593 ErrPostEx(SEV_FATAL, 1, 0,
1594 "PSI-TBLASTN requires that a checkpoint file be "
1595 "specified (use the -R option).");
1596 } else if (program_number == eBlastTypePsiTblastn &&
1597 myargs[ARG_PSITCHKPNT].strvalue) {
1598 EPsiCheckpointType checkpoint_type;
1599 char * checkpoint_file_extension =
1600 strrchr(myargs[ARG_PSITCHKPNT].strvalue, '.');
1601 if (NULL == checkpoint_file_extension) {
1602 /* No extension */
1603 checkpoint_file_extension = "";
1604 }
1605 if (0 == strcasecmp(checkpoint_file_extension, ".asn")) {
1606 checkpoint_type = eAsnBinaryCheckpoint;
1607 } else if (0 == strcasecmp(checkpoint_file_extension, ".asnt")) {
1608 checkpoint_type = eAsnTextCheckpoint;
1609 } else if (0 == strcasecmp(checkpoint_file_extension, ".chk")) {
1610 checkpoint_type = eStandardCheckpoint;
1611 } else {
1612 checkpoint_type = eStandardCheckpoint;
1613 ErrPostEx(SEV_WARNING, 1, 0, "The name of the PSI-BLAST "
1614 "checkpoint file does not end with .chk, .asnt, or "
1615 ".txt. Trying to read the file using standard "
1616 "PSI-BLAST format.");
1617 }
1618 psi_checkpoint =
1619 Blast_PsiCheckpointLocNew(checkpoint_type,
1620 myargs[ARG_PSITCHKPNT].strvalue);
1621 if (!psi_checkpoint) {
1622 ErrPostEx(SEV_FATAL, 1, 0,
1623 "Cannot open the checkpoint file %s for reading.",
1624 myargs[ARG_PSITCHKPNT].strvalue);
1625 }
1626 }
1627 #endif
1628 if (tabular_output) {
1629 EBlastTabularFormatOptions tab_option = eBlastTabularDefault;
1630 if (tabular_output == 2) {
1631 if (program_number == eBlastTypeBlastn) {
1632 tab_option = eBlastTabularAddSequences;
1633 } else {
1634 fprintf(stderr,
1635 "WARNING: Sequences printout in tabular output"
1636 " allowed only for blastn\n");
1637 }
1638 }
1639
1640 /* Print the header of tabular output. */
1641 PrintTabularOutputHeader(dbname, NULL, query_slp,
1642 blast_program, 0, FALSE, outfp);
1643
1644 tf_data = BlastTabularFormatDataNew(outfp, NULL, query_slp,
1645 tab_option, believe_query);
1646 tf_data->show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
1647 tf_data->show_accession = TRUE;
1648 }
1649
1650 options->num_cpus = myargs[ARG_THREADS].intvalue;
1651
1652 /* Find repeat mask, if necessary */
1653 if ((status = Blast_FindRepeatFilterSeqLoc(query_slp, myargs[ARG_FILTER].strvalue,
1654 &repeat_mask, &sum_returns->error)) != 0)
1655 {
1656 if (sum_returns && sum_returns->error)
1657 {
1658 ErrSev max_sev = SBlastMessageErrPost(sum_returns->error);
1659 if (max_sev >= SEV_ERROR)
1660 return status;
1661 }
1662 }
1663
1664 /* Combine repeat mask with lower case mask */
1665 if (repeat_mask)
1666 lcase_mask = ValNodeLink(&lcase_mask, repeat_mask);
1667
1668 status = Blast_DatabaseSearch(query_slp, psi_checkpoint,
1669 dbname, lcase_mask, options,
1670 tf_data, &seqalign_arr,
1671 &filter_loc, sum_returns);
1672 if (status != 0) {
1673 /* Jump out if fatal error or unknown reason for exit. */
1674 if (sum_returns && sum_returns->error)
1675 {
1676 ErrSev max_severity = SBlastMessageErrPost(sum_returns->error);
1677 if (max_severity >= SEV_ERROR)
1678 return status;
1679 }
1680 else if (!sum_returns || !sum_returns->error)
1681 {
1682 ErrPostEx(SEV_ERROR, 1, 0, "Non-zero return from Blast_DatabaseSearch\n");
1683 return status;
1684 }
1685 }
1686
1687 /* Deallocate the formatting thread data structure. */
1688 if (tabular_output)
1689 BlastTabularFormatDataFree(tf_data);
1690
1691 /* Free the lower case mask in SeqLoc form. */
1692 lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);
1693
1694 /* If masking was done for lookup table only, free the masking locations,
1695 because they will not be used for formatting. */
1696 if (SBlastOptionsGetMaskAtHash(options))
1697 filter_loc = Blast_ValNodeMaskListFree(filter_loc);
1698
1699 /* Post warning or error messages, no matter what the search status was. */
1700 SBlastMessageErrPost(sum_returns->error);
1701
1702 if (!status && !tabular_output) {
1703 /* FIXME:
1704 Int4** ascii_matrix = BlastMatrixConvert(sbp->matrix);
1705 */
1706 if (myargs[ARG_ASNOUT].strvalue) {
1707 /* This just prints out the ASN.1 to a secondary file. */
1708 status =
1709 BLAST_FormatResults(seqalign_arr, num_queries, query_slp,
1710 NULL, asn_format_info, sum_returns);
1711 }
1712
1713 /* Format the results */
1714 status =
1715 BLAST_FormatResults(seqalign_arr, num_queries, query_slp,
1716 filter_loc, format_info, sum_returns);
1717 }
1718
1719 seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
1720 /* Update the cumulative summary returns structure and clean the returns
1721 substructures for the current search iteration. */
1722 Blast_SummaryReturnUpdate(sum_returns, &full_sum_returns);
1723 Blast_SummaryReturnClean(sum_returns);
1724 filter_loc = Blast_ValNodeMaskListFree(filter_loc);
1725 FreeSeqLocSetComponents (query_slp);
1726 query_slp = SeqLocSetFree(query_slp);
1727 if (psi_checkpoint)
1728 Blast_PsiCheckpointLocFree(&psi_checkpoint);
1729 } /* End loop on sets of queries */
1730
1731 Blast_PrintOutputFooter(format_info, full_sum_returns);
1732
1733 sum_returns = Blast_SummaryReturnFree(sum_returns);
1734 full_sum_returns = Blast_SummaryReturnFree(full_sum_returns);
1735 GeneticCodeSingletonFini();
1736
1737 if (!tabular_output)
1738 format_info = BlastFormattingInfoFree(format_info);
1739 else
1740 {
1741 FileClose(outfp);
1742 /* FetchEnable/Disable called in blast_format.c for non-tabular output. */
1743 ReadDBBioseqFetchDisable();
1744 }
1745
1746 options = SBlastOptionsFree(options); /* Must come after call to BlastFormattingInfoFree. */
1747
1748 if (asn_format_info)
1749 asn_format_info = BlastFormattingInfoFree(asn_format_info);
1750
1751 if (infp)
1752 FileClose(infp);
1753
1754 return status;
1755 }
1756
1757 #endif /* BLASTALL_TOOLS_ONLY */
1758
1759
1760 /* Amount to relax the evalue threshold for preliminary alignments
1761 * when compositionally adjusted score matrices are used. */
1762 #define EVALUE_EXPAND 1
1763
1764
1765 Int2 Main_old (void)
1766
1767 {
1768 AsnIoPtr aip, xml_aip;
1769 BioseqPtr fake_bsp = NULL, query_bsp, bsp;
1770 BioSourcePtr source;
1771 BLAST_MatrixPtr matrix;
1772 Int4Ptr PNTR txmatrix;
1773 BLAST_OptionsBlkPtr options;
1774 BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
1775 BlastPruneSapStructPtr prune;
1776 Boolean db_is_na, query_is_na, show_gi, believe_query=FALSE;
1777 Boolean html = FALSE;
1778 CharPtr params_buffer=NULL;
1779 Int4 number_of_descriptions, number_of_alignments;
1780 SeqAlignPtr seqalign;
1781 SeqAnnotPtr seqannot = NULL;
1782 SeqEntryPtr sep;
1783 TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
1784 Uint1 align_type, align_view, err_ticket;
1785 Uint4 align_options, print_options;
1786 ValNodePtr mask_loc, mask_loc_start = NULL, vnp, next_mask_loc = NULL;
1787 ValNodePtr other_returns, error_returns;
1788 CharPtr blast_program, blast_database, blast_inputfile, blast_outputfile;
1789 FILE *infp, *outfp;
1790 /* Mega BLAST related variables */
1791 SeqAlignPtr sap, next_seqalign, PNTR seqalignp;
1792 Int4 num_bsps, index;
1793 SeqLocPtr last_mask, mask_slp, slp = NULL, tmp_slp;
1794 Int2 ctr = 1;
1795 Char prefix[2];
1796 Boolean done = TRUE;
1797 int (LIBCALLBACK *handle_results)(VoidPtr srch);
1798 Int4 from = 0, to = -1;
1799 Uint4 num_queries; /*--KM for concatenated queries in blastn, tblastn */
1800 Uint4 num_iters;
1801 Uint4 sap_iter;
1802 SeqAlignPtr curr_seqalign;
1803 SeqAlignPtrArray sap_array; /*--KM for separating seqaligns to test concat printing, temporary?*/
1804 SeqAnnotPtr curr_seqannot;
1805 SeqAnnotPtrArray seq_annot_arr;
1806 Uint4 bsp_iter;
1807 BspArray fake_bsp_arr; /*--KM the array of fake_bsps for indiv. queries */
1808 SeqLocPtr PNTR lcase_mask_arr = NULL; /* AM: information about lower case masked parts of queries */
1809 Boolean concat_done, nuc_concat;
1810 QueriesPtr mult_queries = NULL; /*--KM, AM: stores information related to
1811 query multipolexing, to put in search */
1812 BioseqPtr curr_bsp;
1813
1814 /* AM: Support for query multiplexing. */
1815 Uint4 num_spacers;
1816 ValNodePtr orig_mask_loc = NULL;
1817
1818 #ifdef BLAST_CS_API
1819 BlastNet3Hptr bl3hp;
1820 Boolean status;
1821 #endif
1822
1823 blast_program = myargs[ARG_PROGRAM].strvalue;
1824
1825 #ifdef BLAST_CS_API
1826 /* For RPS Blast - anything not "blastp" - is "tblastn" */
1827 if(myargs[ARG_RPSBLAST].intvalue) {
1828 if(StringICmp(blast_program, "blastp")) {
1829 StringCpy(blast_program, "blastx");
1830 }
1831 }
1832 #endif
1833
1834 blast_database = myargs[ARG_DB].strvalue;
1835 blast_inputfile = myargs[ARG_QUERY].strvalue;
1836 blast_outputfile = myargs[ARG_OUT].strvalue;
1837
1838 if (myargs[ARG_HTML].intvalue)
1839 html = TRUE;
1840
1841 if ((infp = FileOpen(blast_inputfile, "r")) == NULL) {
1842 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile);
1843 return (1);
1844 }
1845
1846 align_view = (Int1) myargs[ARG_FORMAT].intvalue;
1847 outfp = NULL;
1848 if (align_view != 7 && align_view != 10 && align_view != 11 && blast_outputfile != NULL) {
1849 if ((outfp = FileOpen(blast_outputfile, "w")) == NULL) {
1850 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
1851 return (1);
1852 }
1853 }
1854
1855 if (StringCmp("filter", blast_program) == 0) {
1856 BlastGetMaskingLoc(infp, outfp, myargs[ARG_FILTER].strvalue);
1857 FileClose(outfp);
1858 FileClose(infp);
1859 return 0;
1860 }
1861
1862 align_type = BlastGetTypes(blast_program, &query_is_na, &db_is_na);
1863
1864 if(align_view < 7) {
1865 if (StringICmp("blastx", blast_program) == 0) {
1866 if (align_view != 0) {
1867 ErrPostEx(SEV_FATAL, 1, 0, "This option is not available with blastx");
1868 return 1;
1869 }
1870 } else if (StringICmp("tblastx", blast_program) == 0) {
1871 if (align_view != 0) {
1872 ErrPostEx(SEV_FATAL, 1, 0, "This option is not available with tblastx");
1873 return 1;
1874 }
1875 }
1876 }
1877
1878 believe_query = FALSE;
1879 if (myargs[ARG_BELIEVEQUERY].intvalue != 0)
1880 believe_query = TRUE;
1881
1882 if (believe_query == FALSE && (myargs[ARG_ASNOUT].strvalue || align_view == 10 || align_view ==11)) {
1883 ErrPostEx(SEV_FATAL, 1, 0, "-J option must be TRUE to produce a SeqAlign file");
1884 }
1885
1886 options = BLASTOptionNewEx(blast_program, (Boolean) myargs[ARG_GAPPED].intvalue, (Boolean) myargs[ARG_USEMEGABLAST].intvalue);
1887 if (options == NULL)
1888 return 3;
1889
1890 #ifdef BLAST_CS_API
1891 if(myargs[ARG_RPSBLAST].intvalue)
1892 options->is_rps_blast = TRUE;
1893 #endif
1894
1895 handle_results = NULL;
1896
1897 BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0);
1898 options->kappa_expect_value =
1899 options->expect_value = (Nlm_FloatHi) myargs[ARG_EVALUE].floatvalue;
1900 number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue;
1901 number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;
1902 options->hitlist_size = MAX(number_of_descriptions, number_of_alignments);
1903
1904 if (StringICmp("blastn", blast_program) == 0) {
1905 options->penalty = myargs[ARG_MISMATCH].intvalue;
1906 options->reward = myargs[ARG_MATCH].intvalue;
1907 if (options->reward > 1) {
1908 /* Scale the default values for gap costs; will be overridden
1909 later, if command line values are non-zero */
1910 options->gap_open *= options->reward;
1911 options->gap_extend *= options->reward;
1912 }
1913 } else {
1914 if ((Int4)myargs[ARG_THRESHOLD].floatvalue != 0) {
1915 options->threshold_second = (Int4)myargs[ARG_THRESHOLD].floatvalue;
1916 }
1917 }
1918
1919 if (myargs[ARG_GAPOPEN].intvalue >= 0)
1920 options->gap_open = myargs[ARG_GAPOPEN].intvalue;
1921 if (myargs[ARG_GAPEXT].intvalue >= 0)
1922 options->gap_extend = myargs[ARG_GAPEXT].intvalue;
1923 if (myargs[ARG_XDROP].intvalue != 0)
1924 options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
1925
1926 /* use one-hit if specified or it's a blastn search */
1927 if ( (myargs[ARG_MULTIPLEHITS].intvalue == 1) || (StringICmp("blastn", blast_program) == 0 ) )
1928 {
1929 options->two_pass_method = FALSE;
1930 options->multiple_hits_only = FALSE;
1931 }
1932 /* otherwise, use two-hit */
1933 else
1934 {
1935 /* all other inputs, including the default 0 use 2-hit method */
1936 options->two_pass_method = FALSE;
1937 options->multiple_hits_only = TRUE;
1938 }
1939
1940 if(myargs[ARG_XDROP_FINAL].intvalue != 0)
1941 options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;
1942
1943 if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0) {
1944 if (StringICmp("blastn", blast_program) == 0)
1945 options->filter_string = StringSave("D");
1946 else
1947 options->filter_string = StringSave("S");
1948 } else {
1949 options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
1950 }
1951
1952 show_gi = (Boolean) myargs[ARG_SHOWGIS].intvalue;
1953
1954 options->genetic_code = myargs[ARG_QGENETIC_CODE].intvalue;
1955 options->db_genetic_code = myargs[ARG_DBGENCODE].intvalue;
1956 options->number_of_cpus = myargs[ARG_THREADS].intvalue;
1957 if (myargs[ARG_WORDSIZE].intvalue != 0) {
1958 options->wordsize = myargs[ARG_WORDSIZE].intvalue;
1959 }
1960
1961 if (options->is_megablast_search) {
1962 options->cutoff_s2 = options->wordsize*options->reward;
1963 }
1964
1965 options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
1966
1967 options->hsp_range_max = myargs[ARG_BESTHITS].intvalue;
1968 if (options->hsp_range_max != 0)
1969 options->perform_culling = TRUE;
1970 if (myargs[ARG_SEARCHSP].floatvalue)
1971 options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;
1972
1973 if ((0 != StringICmp("tblastn", blast_program) &&
1974 0 != StringICmp("blastp", blast_program)) ||
1975 !options->gapped_calculation) {
1976 /* Set some gapped tblastn-specific options to the correct
1977 * defaults for non-tblastn or non-gapped modes of operation.
1978 */
1979 options->tweak_parameters = eNoCompositionBasedStats;
1980 options->smith_waterman = 0;
1981 options->unified_p = 0;
1982
1983 switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
1984 case '0': case 'D': case 'd': case 'F': case 'f':
1985 options->tweak_parameters = eNoCompositionBasedStats;
1986 break;
1987 default:
1988 ErrPostEx(SEV_FATAL, 1, 0,
1989 "Invalid option -C: only gapped blastp or gapped tblastn "
1990 "may use composition based statistics.");
1991 break;
1992 }
1993 if(myargs[ARG_SMITH_WATERMAN].intvalue) {
1994 ErrPostEx(SEV_FATAL, 1, 0,
1995 "Invalid option -s: Smith-Waterman alignments are only "
1996 "available for gapped blastp or gapped tblastn.");
1997 }
1998 } else {
1999 /* Set options specific to gapped tblastn and blastp */
2000 switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
2001 case '0': case 'F': case 'f':
2002 options->tweak_parameters = eNoCompositionBasedStats;
2003 break;
2004 case 'D': case 'd':
2005 case '1': case 'T': case 't':
2006 options->tweak_parameters = eCompositionBasedStats;
2007 break;
2008 case '2':
2009 ErrPostEx(SEV_WARNING, 1, 0, "the -C 2 argument "
2010 "is currently experimental\n");
2011 options->tweak_parameters = eCompositionMatrixAdjust;
2012 break;
2013 case '3':
2014 ErrPostEx(SEV_WARNING, 1, 0, "the -C 3 argument "
2015 "is currently experimental\n");
2016 options->tweak_parameters = eCompoForceFullMatrixAdjust;
2017 break;
2018 default:
2019 ErrPostEx(SEV_FATAL, 1, 0, "invalid argument for composition-"
2020 "based statistics; see -C options\n");
2021 break;
2022 }
2023 if (options->tweak_parameters > 0) {
2024 switch (myargs[ARG_COMP_BASED_STATS].strvalue[1]) {
2025 case 'U':
2026 case 'u':
2027 if (0 == StringICmp("blastp", blast_program)) {
2028 options->unified_p = 1;
2029 ErrPostEx(SEV_WARNING, 1, 0, "unified p-values "
2030 "are currently experimental\n");
2031 } else {
2032 ErrPostEx(SEV_FATAL, 1, 0, "unified p-values "
2033 "are currently only available for blastp\n");
2034 }
2035 break;
2036 case '\0':
2037 break;
2038 default:
2039 ErrPostEx(SEV_WARNING, 1, 0, "unrecognized second character"
2040 "in value of -t, ignoring it\n");
2041 break;
2042 }
2043 }
2044 options->smith_waterman =
2045 (Boolean) myargs[ARG_SMITH_WATERMAN].intvalue;
2046 }
2047 if (options->tweak_parameters > 1) {
2048 /* Compositionally adjusted score matrices are being used, and
2049 * these can improve evalue, so relax the evalue cutoff for
2050 * the preliminary alignments. (Note that traditional
2051 * composition based statistics can only make evalues larger.)
2052 */
2053 options->expect_value *= EVALUE_EXPAND;
2054 }
2055
2056 options->strand_option = myargs[ARG_STRAND].intvalue;
2057
2058 if(myargs[ARG_XDROP_UNGAPPED].floatvalue != 0.0) {
2059 options->dropoff_2nd_pass = myargs[ARG_XDROP_UNGAPPED].floatvalue;
2060 if(options->dropoff_1st_pass > options->dropoff_2nd_pass)
2061 options->dropoff_1st_pass = options->dropoff_2nd_pass;
2062 }
2063
2064 if (myargs[ARG_WINDOW].intvalue != 0)
2065 options->window_size = myargs[ARG_WINDOW].intvalue;
2066
2067 print_options = 0;
2068 align_options = 0;
2069 align_options += TXALIGN_COMPRESS;
2070 align_options += TXALIGN_END_NUM;
2071 if (StringICmp("blastx", blast_program) == 0) {
2072 align_options += TXALIGN_BLASTX_SPECIAL;
2073 }
2074 if (show_gi) {
2075 align_options += TXALIGN_SHOW_GI;
2076 print_options += TXALIGN_SHOW_GI;
2077 }
2078 if (myargs[ARG_GAPPED].intvalue == 0 || StringICmp("tblastx", blast_program) == 0)
2079 print_options += TXALIGN_SHOW_NO_OF_SEGS;
2080
2081 if (align_view) {
2082 align_options += TXALIGN_MASTER;
2083 if (align_view == 1 || align_view == 3)
2084 align_options += TXALIGN_MISMATCH;
2085 if (align_view == 3 || align_view == 4 || align_view == 6)
2086 align_options += TXALIGN_FLAT_INS;
2087 if (align_view == 5 || align_view == 6)
2088 align_options += TXALIGN_BLUNT_END;
2089 } else {
2090 align_options += TXALIGN_MATRIX_VAL;
2091 align_options += TXALIGN_SHOW_QS;
2092 }
2093
2094 if (html) {
2095 align_options += TXALIGN_HTML;
2096 print_options += TXALIGN_HTML;
2097 }
2098
2099 #ifdef BLAST_CS_API
2100 if(myargs[ARG_ENTREZQ].strvalue)
2101 options->entrez_query = StringSave(myargs[ARG_ENTREZQ].strvalue);
2102 #else
2103 if (myargs[ARG_GILIST].strvalue) {
2104 options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
2105 }
2106 #endif
2107
2108 /*
2109 Out-of-frame option is valid only for blastx, tblastn and
2110 psitblastnsearches
2111 */
2112
2113 if(myargs[ARG_FRAMESHIFT].intvalue > 0) {
2114 if (!StringICmp("blastx", blast_program) ||
2115 !StringICmp("tblastn", blast_program)||
2116 !StringICmp("psitblastn", blast_program)) {
2117 if (!StringICmp("blastx", blast_program)) {
2118 options->is_ooframe = TRUE;
2119 options->shift_pen = myargs[ARG_FRAMESHIFT].intvalue;
2120 }
2121 }
2122 }
2123
2124 /* Input longest intron length is in nucleotide scale; in the lower level
2125 code it will be used in protein scale */
2126 options->longest_intron = myargs[ARG_INTRON].intvalue;
2127
2128 aip = NULL;
2129 if (myargs[ARG_ASNOUT].strvalue != NULL) {
2130 if ((aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w")) == NULL) {
2131 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
2132 return 1;
2133 }
2134 }
2135 else if (align_view == 10 || align_view == 11)
2136 {
2137 const char* mode = (align_view == 10) ? "w" : "wb";
2138 if ((aip = AsnIoOpen (blast_outputfile, (char*) mode)) == NULL) {
2139 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_ASNOUT].strvalue);
2140 return 1;
2141 }
2142 }
2143
2144 if(align_view < 7) {
2145 if (html) {
2146 fprintf(outfp, "<HTML>\n<TITLE>BLAST Search Results</TITLE>\n");
2147 fprintf(outfp, "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
2148 "VLINK=\"#660099\" ALINK=\"#660099\">\n");
2149 fprintf(outfp, "<PRE>\n");
2150 }
2151 } else if (align_view == 7 ) {
2152 xml_aip = AsnIoOpen(blast_outputfile, "wx");
2153 }
2154
2155 #ifndef BLAST_CS_API
2156 if(align_view >= 7 && myargs[ARG_NUMQUERIES].intvalue > 1)
2157 {
2158 ErrPostEx(SEV_FATAL, 1, 0,
2159 "blast: Query concatenation is currently not supported with -m > 7");
2160 return 1;
2161 }
2162 #endif
2163
2164
2165 /* Futamura: Setting up the psitblastn options */
2166 #ifndef BLAST_CS_API
2167 if (NULL != myargs[ARG_PSITCHKPNT].strvalue) {
2168 options->recoverCheckpoint = TRUE;
2169 options->freqCheckpoint = TRUE;
2170 }
2171 options->CheckpointFileName=myargs[ARG_PSITCHKPNT].strvalue;
2172 #endif
2173
2174 #ifdef BLAST_CS_API
2175 if (align_view < 7)
2176 bl3hp = BNETInitializeBlast(blast_database, blast_program, outfp,
2177 db_is_na, options->is_rps_blast, html, TRUE);
2178 else
2179 bl3hp = BNETInitializeBlast(blast_database, blast_program, outfp,
2180 db_is_na, options->is_rps_blast, html, FALSE);
2181 #endif
2182
2183 /*--KM get number of queries for concatenated blastn/tblastn queries */
2184
2185 #ifndef BLAST_CS_API
2186 options->NumQueries=myargs[ARG_NUMQUERIES].intvalue;
2187 #endif
2188
2189 num_queries = options->NumQueries;
2190 if (num_queries>0 &&
2191 !( (StringICmp("blastn", blast_program) == 0) ||
2192 (StringICmp("tblastn", blast_program) == 0) ) ) {
2193
2194 ErrPostEx(SEV_FATAL, 1, 0, "blast: Can't concat with program %s\n", myargs[ARG_PROGRAM].strvalue);
2195 return 1;
2196 }
2197
2198 /* AM: Query concatenation is not consistent with ungapped search */
2199 if( num_queries > 0 && !myargs[ARG_GAPPED].intvalue )
2200 {
2201 ErrPostEx(SEV_FATAL, 1, 0,
2202 "blast: Query concatenation is inconsistent with ungapped search\n" );
2203 return 1;
2204 }
2205 if( !myargs[ARG_GAPPED].intvalue &&
2206 0 == StringCmp("psitblastn", blast_program ) ) {
2207 ErrPostEx(SEV_FATAL, 1, 0,"blast: Ungapped alignment is not appropriate "
2208 "for PSI-tBLASTn.\n" );
2209 }
2210
2211 /* --KM set bool value if DNA and concat needed, need for Fasta->seq functions */
2212 if (num_queries>0 && query_is_na == TRUE) {
2213 nuc_concat = TRUE;
2214 } else {
2215 nuc_concat = FALSE;
2216 }
2217
2218 /* --- Main loop over all FASTA entries in the input file ---- */
2219
2220 concat_done = FALSE; /*--KM */
2221
2222 sGetLoc(myargs[ARG_QUERYLOC].strvalue, &from, &to);
2223
2224 while (TRUE) {
2225 if (options->is_megablast_search) {
2226 StrCpy(prefix, "");
2227 slp = NULL;
2228 num_bsps = 0;
2229 done = TRUE;
2230 SeqMgrHoldIndexing(TRUE);
2231 mask_slp = last_mask = NULL;
2232 while ((sep=FastaToSeqEntryForDb(infp, query_is_na, NULL,
2233 believe_query, prefix, &ctr,
2234 &mask_slp)) != NULL) {
2235 if ((Boolean)myargs[ARG_LCASE].intvalue) {
2236 if (mask_slp) {
2237 if (!last_mask)
2238 options->query_lcase_mask = last_mask = mask_slp;
2239 else {
2240 last_mask->next = mask_slp;
2241 last_mask = last_mask->next;
2242 }
2243 mask_slp = NULL;
2244 }
2245 } else {
2246 mask_slp = SeqLocSetFree(mask_slp);
2247 }
2248 query_bsp = NULL;
2249 if (query_is_na)
2250 SeqEntryExplore(sep, &query_bsp, FindNuc);
2251 else
2252 SeqEntryExplore(sep, &query_bsp, FindProt);
2253
2254 if (query_bsp == NULL) {
2255 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
2256 return 2;
2257 }
2258
2259 /* Only for the first query */
2260 if (num_bsps == 0) {
2261 to = MIN(to, query_bsp->length - 1);
2262
2263 /* -1 means end of sequence */
2264 if (to < 0)
2265 to = query_bsp->length - 1;
2266 if (from >= query_bsp->length || to < 0) {
2267 ErrPostEx(SEV_FATAL, 1, 0,
2268 "Location outside of the query sequence range\n");
2269 return 3;
2270 }
2271 slp = SeqLocIntNew(from, to, options->strand_option,
2272 SeqIdFindBest(query_bsp->id, SEQID_GI));
2273 } else
2274 ValNodeAddPointer(&slp, SEQLOC_WHOLE,
2275 SeqIdDup(SeqIdFindBest(query_bsp->id,
2276 SEQID_GI)));
2277 num_bsps++;
2278 if (num_bsps >= MAX_NUM_QUERIES) {
2279 done = FALSE;
2280 break;
2281 }
2282 /*sep = MemFree(sep);*/ /* Do not free the underlying Bioseq */
2283 }
2284 SeqMgrHoldIndexing(FALSE);
2285 if (num_bsps == 0)
2286 break;
2287 } else {
2288 /* not megablast */
2289
2290 /*--KM make array of fake_bsp's if concat. query */
2291 if (concat_done)
2292 break;
2293 if (num_queries > 0) {
2294 fake_bsp_arr = (BspArray) MemNew(sizeof(BioseqPtr)*num_queries);
2295
2296 if( myargs[ARG_LCASE].intvalue )
2297 lcase_mask_arr = (SeqLocPtr PNTR)MemNew( sizeof( SeqLocPtr )*num_queries );
2298 }
2299 num_iters = (num_queries>0) ? num_queries : 1;
2300 for (bsp_iter=0; bsp_iter<num_iters; bsp_iter++) {
2301
2302 if(myargs[ARG_LCASE].intvalue) {
2303 /* AM: query multiplexing */
2304 if( !num_queries )
2305 sep = FastaToSeqEntryForDb (infp, query_is_na, NULL, believe_query, NULL, NULL, &options->query_lcase_mask);
2306 else
2307 sep = FastaToSeqEntryInternalEx( infp, FASTA_FILE_IO, NULL, query_is_na, NULL, believe_query,
2308 NULL, NULL, NULL, lcase_mask_arr + bsp_iter );
2309
2310 } else {
2311 sep = FastaToSeqEntryEx(infp, query_is_na, NULL, believe_query);
2312 }
2313
2314 /* if concat and num_queries has not been reached and sep is NULL, crap out */
2315 if (sep == NULL && bsp_iter < num_queries) { /* implies num_queries>0 */
2316 ErrPostEx(SEV_FATAL, 1, 0, "blast: Only %d queries found!\n", bsp_iter);
2317 return (1);
2318 }
2319
2320 if(sep == NULL)
2321 break; /* no more queries, can go to finish with next break */
2322
2323 query_bsp = NULL;
2324 if (query_is_na) {
2325 SeqEntryExplore(sep, &query_bsp, FindNuc);
2326 } else {
2327 SeqEntryExplore(sep, &query_bsp, FindProt);
2328 }
2329
2330 if (query_bsp == NULL) {
2331 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
2332 return 2;
2333 }
2334
2335 if (num_queries>0) {
2336 *(fake_bsp_arr + bsp_iter) = query_bsp;
2337 }
2338 }
2339 if ( (sep == NULL && num_queries ==0) || (num_queries>0 && concat_done) )
2340 break; /* go to finish */
2341
2342 /* --KM */
2343
2344 if (num_queries>0) {
2345 concat_done = TRUE; /* --KM to prevent futher looping */
2346
2347 /* AM: Determine the number of query separators. */
2348 num_spacers = GetNumSpacers( options, believe_query, fake_bsp_arr );
2349
2350 if( num_spacers%2 ) ++num_spacers;
2351
2352 /* --KM make the concatenated fake_bsp */
2353 /* AM: Added num_spacers. */
2354 if( query_is_na )
2355 fake_bsp = (BioseqPtr)
2356 BlastMakeFakeBspConcat(fake_bsp_arr, num_queries, query_is_na, num_spacers);
2357 else
2358 fake_bsp = (BioseqPtr)
2359 BlastMakeFakeBspConcat(fake_bsp_arr, num_queries, query_is_na, num_spacers);
2360
2361 /* construct the MultQueries struct here*/
2362 mult_queries = (QueriesPtr) BlastMakeMultQueries(fake_bsp_arr, num_queries, query_is_na, num_spacers,
2363 lcase_mask_arr);
2364 } else {
2365 if(believe_query)
2366 fake_bsp = query_bsp;
2367 else
2368 fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
2369 }
2370
2371 err_ticket = BlastSetUserErrorString(NULL, query_bsp->id, believe_query);
2372
2373 /* If fake_bsp created mask should be updated to use it's id */
2374 /* AM: query multiplexing */
2375 if( !mult_queries )
2376 BLASTUpdateSeqIdInSeqInt(options->query_lcase_mask, fake_bsp->id);
2377 else for( bsp_iter = 0; bsp_iter < num_iters; ++bsp_iter )
2378 if( mult_queries->LCaseMasks )
2379 BLASTUpdateSeqIdInSeqInt( mult_queries->LCaseMasks[bsp_iter],
2380 mult_queries->FakeBsps[bsp_iter]->id );
2381
2382 source = BioSourceNew();
2383 source->org = OrgRefNew();
2384 source->org->orgname = OrgNameNew();
2385 source->org->orgname->gcode = options->genetic_code;
2386 ValNodeAddPointer(&(query_bsp->descr), Seq_descr_source, source);
2387
2388 /* free sep later when done. --KM remember to free all if array*/
2389 }
2390
2391 global_fp = outfp;
2392
2393 if(align_view < 7) {
2394 #ifndef BLAST_CS_API
2395 init_buff_ex(90);
2396 BlastPrintVersionInfo(blast_program, html, outfp);
2397 fprintf(outfp, "\n");
2398 BlastPrintReference(html, 90, outfp);
2399 fprintf(outfp, "\n");
2400 #else
2401 fprintf(outfp, "\n");
2402 #endif
2403 if (!options->is_megablast_search) {
2404 /* KM added loop here for concat case */
2405 num_iters = (num_queries>0) ? num_queries : 1;
2406 for (bsp_iter=0; bsp_iter<num_iters; bsp_iter++) {
2407 curr_bsp = (num_queries>0) ? *(fake_bsp_arr + bsp_iter) : query_bsp;
2408 AcknowledgeBlastQuery(curr_bsp, 70, outfp, believe_query, html);
2409 }
2410 }
2411
2412 /* Here we first check, that database do no exists */
2413
2414 #ifndef BLAST_CS_API
2415 if(!PrintDbInformation(blast_database, !db_is_na, 70, outfp, html))
2416 return 1;
2417 #else
2418
2419 {{
2420 BlastDbinfoPtr dbinfo;
2421 static Boolean not_first_time;
2422
2423 /* For CS version we will print database info ones to
2424 decrease network traffic */
2425
2426 if(!not_first_time) {
2427 dbinfo = BlastRequestDbInfo(bl3hp, blast_database, !db_is_na);
2428 if (dbinfo)
2429 PrintDbInformationBasic(blast_database, !db_is_na, 70, dbinfo->definition, dbinfo->number_seqs, dbinfo->total_length, outfp, html);
2430 dbinfo = BlastDbinfoFree(dbinfo);
2431 not_first_time = TRUE;
2432 }
2433 }}
2434 #endif /* BLAST_CS_API */
2435 free_buff();
2436 if (options->is_ooframe)
2437 ErrPostEx(SEV_WARNING, 0, 0, "Out-of-frame option selected, Expect values are only approximate and calculated not assuming out-of-frame alignments");
2438 }
2439 #ifdef OS_UNIX
2440 if(align_view < 7) { /*--KM why not fold into previous if statement? */
2441 #ifdef BLAST_CS_API
2442 fprintf(global_fp, "%s", "Searching... please wait.. ");
2443 #else
2444 fprintf(global_fp, "%s", "Searching");
2445 #endif
2446 }
2447 #endif
2448 other_returns = NULL;
2449 error_returns = NULL;
2450
2451 if (options->is_megablast_search) {
2452 #ifdef BLAST_CS_API
2453 seqalign = MegaBlastSeqLocNetCore(bl3hp, slp, blast_program,
2454 blast_database, options,
2455 &other_returns, &error_returns,
2456 align_view < 7 ? tick_callback : NULL,
2457 &status);
2458 #else
2459 seqalignp = BioseqMegaBlastEngineByLoc(slp, blast_program,
2460 blast_database, options, &other_returns,
2461 &error_returns,
2462 align_view < 7 ? tick_callback : NULL,
2463 NULL, NULL, 0, handle_results);
2464 seqalign = NULL;
2465 for (index=0; index<num_bsps; index++) {
2466 if (seqalignp && seqalignp[index]) {
2467 if (seqalign == NULL)
2468 sap = seqalign = seqalignp[index];
2469 else
2470 sap->next = seqalignp[index];
2471 while (sap->next != NULL)
2472 sap = sap->next;
2473 }
2474 }
2475 seqalignp = MemFree(seqalignp);
2476 #endif
2477 } else if (!myargs[ARG_QUERYLOC].strvalue) {
2478 #ifdef BLAST_CS_API
2479 seqalign = BlastBioseqNetCore(bl3hp, fake_bsp, blast_program,
2480 blast_database, options,
2481 &other_returns, &error_returns,
2482 align_view < 7 ? tick_callback : NULL,
2483 NULL, &status);
2484 #else
2485 /* KM added mult_queries param */
2486 seqalign = BioseqBlastEngineWithCallbackMult(fake_bsp, blast_program, blast_database, options, &other_returns, &error_returns, align_view < 7 ? tick_callback : NULL, handle_results, mult_queries);
2487 #endif
2488 } else { /* Location on query provided */
2489 to = MIN(to, fake_bsp->length - 1);
2490
2491 /* -1 means end of sequence */
2492 if (to < 0)
2493 to = fake_bsp->length - 1;
2494 if (from >= fake_bsp->length || to < 0) {
2495 ErrPostEx(SEV_FATAL, 1, 0,
2496 "Location outside of the query sequence range\n");
2497 return 3;
2498 }
2499 slp = SeqLocIntNew(from, to, options->strand_option,
2500 fake_bsp->id);
2501
2502 #ifdef BLAST_CS_API
2503 seqalign = BlastSeqLocNetCore(bl3hp, slp, blast_program,
2504 blast_database, options,
2505 &other_returns, &error_returns,
2506 align_view < 7 ? tick_callback : NULL,
2507 NULL, &status);
2508 #else
2509 seqalign = BioseqBlastEngineByLocWithCallbackMult(slp, blast_program, blast_database, options, &other_returns, &error_returns, align_view < 7 ? tick_callback : NULL, NULL, NULL, 0, handle_results, mult_queries);
2510 #endif
2511
2512 }
2513 #if 0
2514 seqalign = BLASTFilterOverlapRegions(seqalign, 0, !db_is_na,
2515 options->is_ooframe, FALSE);
2516 #endif
2517
2518 BlastErrorPrint(error_returns);
2519
2520 dbinfo = NULL;
2521 ka_params = NULL;
2522 ka_params_gap = NULL;
2523 params_buffer = NULL;
2524 mask_loc = NULL;
2525 matrix = NULL;
2526 txmatrix = NULL;
2527 for (vnp=other_returns; vnp; vnp = vnp->next) {
2528 switch (vnp->choice) {
2529 case TXDBINFO:
2530 dbinfo = vnp->data.ptrvalue;
2531 break;
2532 case TXKABLK_NOGAP:
2533 ka_params = vnp->data.ptrvalue;
2534 break;
2535 case TXKABLK_GAP:
2536 ka_params_gap = vnp->data.ptrvalue;
2537 break;
2538 case TXPARAMETERS:
2539 params_buffer = vnp->data.ptrvalue;
2540 break;
2541 case TXMATRIX:
2542 matrix = vnp->data.ptrvalue;
2543 if (matrix)
2544 txmatrix = BlastMatrixToTxMatrix(matrix);
2545 break;
2546 case SEQLOC_MASKING_NOTSET:
2547 case SEQLOC_MASKING_PLUS1:
2548 case SEQLOC_MASKING_PLUS2:
2549 case SEQLOC_MASKING_PLUS3:
2550 case SEQLOC_MASKING_MINUS1:
2551 case SEQLOC_MASKING_MINUS2:
2552 case SEQLOC_MASKING_MINUS3:
2553 ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
2554 break;
2555 default:
2556 break;
2557 }
2558 }
2559
2560 #ifdef OS_UNIX
2561 fflush(global_fp);
2562 #endif
2563
2564 #ifdef OS_UNIX
2565 if(align_view < 7) {
2566 fprintf(global_fp, "%s", "done");
2567 }
2568 #endif
2569
2570 #ifndef BLAST_CS_API
2571 ReadDBBioseqFetchEnable ("blastall", blast_database, db_is_na, TRUE);
2572 #endif
2573 ReadDBBioseqSetDbGeneticCode(options->db_genetic_code);
2574
2575 tmp_slp = slp;
2576 if (slp)
2577 query_bsp = NULL;
2578
2579 if (getenv("POST_BLAST_CLUSTER_HITS") != NULL)
2580 BlastClusterHitsFromSeqAlign(seqalign, blast_program, blast_database,
2581 options, 0.9, 1.6, 0.5, TRUE);
2582
2583 if (mask_loc) {
2584 mask_loc_start = mask_loc;
2585 }
2586 else
2587 { /* Could have become non-NUll for last query. */
2588 mask_loc_start = NULL;
2589 }
2590 /* Print header in any case */
2591 if (align_view == 9) {
2592 PrintTabularOutputHeader(blast_database, query_bsp, slp,
2593 blast_program, 0, believe_query, global_fp);
2594 }
2595
2596 if (seqalign) {
2597 if (num_queries > 0) { /* AM: Support for query multiplexing. */
2598 sap_array = mult_queries->sap_array_data->sap_array;
2599 }
2600
2601 if (align_view == 8 || align_view == 9) {
2602 /* --KM need to put a loop around this. seqaligns already broken up
2603 note the method for looping if num_aligns > 0 - reuse this method everywhere */
2604 num_iters = (num_queries>0) ? num_queries : 1;
2605 for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2606 curr_seqalign = (num_queries>0) ? *(sap_array + sap_iter) : seqalign;
2607 BlastPrintTabularResults(curr_seqalign, query_bsp, slp,
2608 number_of_alignments, blast_program,
2609 !options->gapped_calculation, options->is_ooframe,
2610 believe_query, 0, 0, global_fp, NULL, (align_view == 9));
2611
2612 SeqAlignSetFree(curr_seqalign);
2613 }
2614 } else {
2615 while (seqalign) {
2616
2617 if (!options->is_megablast_search){
2618 next_seqalign = NULL;
2619 } else {
2620 SeqIdPtr sip, next_sip = NULL;
2621
2622 sap = seqalign;
2623 sip = TxGetQueryIdFromSeqAlign(seqalign);
2624 while (sap != NULL) {
2625 if (sap->next != NULL) {
2626 next_sip = TxGetQueryIdFromSeqAlign(sap->next);
2627
2628 if (SeqIdComp(sip, next_sip) != SIC_YES) {
2629 next_seqalign = sap->next;
2630 sap->next = NULL;
2631 }
2632 } else
2633 next_seqalign = NULL;
2634 sap = sap->next;
2635 }
2636
2637 while (tmp_slp && SeqIdComp(sip, SeqLocId(tmp_slp)) != SIC_YES)
2638 tmp_slp = tmp_slp->next;
2639 if (tmp_slp == NULL) /* Should never happen */
2640 break;
2641 /* Separate the mask locations list for this query */
2642 if (!mask_loc && next_mask_loc) {
2643 mask_loc = next_mask_loc;
2644 next_mask_loc = NULL;
2645 }
2646 if (mask_loc) {
2647 if (next_mask_loc) {
2648 mask_loc->next = next_mask_loc;
2649 mask_loc = next_mask_loc;
2650 }
2651 mask_slp = (SeqLocPtr) mask_loc->data.ptrvalue;
2652 next_mask_loc = mask_loc;
2653 while (SeqIdComp(SeqLocId(mask_slp), sip) != SIC_YES) {
2654 mask_loc = mask_loc->next;
2655 if (!mask_loc)
2656 break;
2657 mask_slp = (SeqLocPtr) mask_loc->data.ptrvalue;
2658 }
2659 if (mask_loc) {
2660 next_mask_loc = mask_loc->next;
2661 mask_loc->next = NULL;
2662 }
2663 }
2664 if (align_view < 7) {
2665 bsp = BioseqLockById(SeqLocId(tmp_slp));
2666 init_buff_ex(85);
2667 fprintf(outfp, "\n");
2668 AcknowledgeBlastQuery(bsp, 70, outfp, believe_query,
2669 html);
2670 free_buff();
2671 BioseqUnlock(bsp);
2672 }
2673 }
2674 if((align_view == 7) && !options->is_ooframe) {
2675 if (options->is_megablast_search) {
2676 bsp = BioseqLockById(SeqLocId(tmp_slp));
2677 BXMLPrintOutput(xml_aip, seqalign,
2678 options, blast_program, blast_database,
2679 bsp, other_returns, 0, NULL, mask_loc);
2680 BioseqUnlock(bsp);
2681 AsnIoReset(xml_aip);
2682 SeqAlignSetFree(seqalign);
2683 } else {
2684 num_iters = (num_queries>0) ? num_queries : 1;
2685 for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2686 curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2687 BXMLPrintOutput(xml_aip, curr_seqalign,
2688 options, blast_program, blast_database,
2689 fake_bsp, other_returns, 0, NULL, mask_loc);
2690 AsnIoReset(xml_aip);
2691 SeqAlignSetFree(curr_seqalign);
2692 } /* for loop over sap-array (concat) */
2693 } /* not MBlast case */
2694 } else {
2695 /* create the array of SeqAnnotPtrs, if necessary */
2696
2697 num_iters = (num_queries > 0) ? num_queries : 1;
2698 for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2699 curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2700 if ( (num_queries > 0) && (sap_iter == 0) ) {
2701 seq_annot_arr = (SeqAnnotPtrArray) MemNew(sizeof(SeqAnnotPtr)*num_queries);
2702 }
2703 seqannot = SeqAnnotNew();
2704 seqannot->type = 2;
2705 AddAlignInfoToSeqAnnot(seqannot, align_type);
2706 seqannot->data = curr_seqalign;
2707 if (aip) {
2708 SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
2709 AsnIoReset(aip);
2710 }
2711 if (num_queries > 0) {
2712 *(seq_annot_arr + sap_iter) = seqannot;
2713 }
2714 } /* make seqannots over the sap_iters from concat, or the single seqalign */
2715
2716 if (outfp) { /* Uncacheing causes problems with ordinal nos. vs. gi's. */
2717 ObjMgrSetHold();
2718 /* print deflines */
2719 for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2720 curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2721
2722 init_buff_ex(85);
2723
2724 PrintDefLinesFromSeqAlignEx2(curr_seqalign, 80, outfp,
2725 print_options, FIRST_PASS, NULL,
2726 number_of_descriptions, NULL, NULL);
2727 free_buff();
2728 } /* print deflines, looped if concat */
2729
2730 for (sap_iter=0; sap_iter < num_iters; sap_iter++) {
2731 /* AM: Query concatenation. */
2732 if( mult_queries && mask_loc )
2733 {
2734 orig_mask_loc = mask_loc;
2735
2736 if( !mask_loc->data.ptrvalue ) mask_loc = NULL;
2737 }
2738
2739 curr_seqalign = (num_queries > 0) ? *(sap_array + sap_iter) : seqalign;
2740 curr_seqannot = (num_queries > 0) ? *(seq_annot_arr + sap_iter) : seqannot;
2741
2742 prune = BlastPruneHitsFromSeqAlign(curr_seqalign,
2743 number_of_alignments, NULL);
2744 curr_seqannot->data = prune->sap;
2745
2746 if(options->is_ooframe) {
2747 OOFShowBlastAlignment(curr_seqalign, /*mask*/ NULL,
2748 outfp, align_options, txmatrix);
2749 } else {
2750 if (align_view != 0)
2751 ShowTextAlignFromAnnot(curr_seqannot, 60, outfp, NULL, NULL,
2752 align_options, txmatrix, mask_loc, NULL);
2753 else
2754 ShowTextAlignFromAnnot(curr_seqannot, 60, outfp, NULL, NULL,
2755 align_options, txmatrix, mask_loc,
2756 FormatScoreFunc);
2757 }
2758
2759 curr_seqannot->data = curr_seqalign;
2760 prune = BlastPruneSapStructDestruct(prune);
2761
2762 /* AM: Query concatenation. */
2763 if( mult_queries && orig_mask_loc )
2764 {
2765 mask_loc = orig_mask_loc;
2766 mask_loc = mask_loc->next;
2767 }
2768 } /* show text align, loop over seqalign/seqannots for concat */
2769 ObjMgrClearHold();
2770 } /* if outfp */
2771 for (sap_iter=0; sap_iter < num_queries; sap_iter++) {
2772 /* upper bound is num_queries, take care not to do this unless concat */
2773 *(seq_annot_arr + sap_iter) = SeqAnnotFree(*(seq_annot_arr + sap_iter));
2774 }
2775 if (mult_queries)
2776 seq_annot_arr = MemFree(seq_annot_arr);
2777 /*--KM free seqalign array and all seqaligns?? */
2778
2779 } /* end of else (not XML Printing) */
2780 if (options->is_megablast_search)
2781 tmp_slp = tmp_slp->next;
2782 /* --KM watch for memory leaks */
2783 if (seqannot && num_queries == 0)
2784 seqannot = SeqAnnotFree(seqannot);
2785 seqalign = next_seqalign;
2786 } /* End of loop on all seqaligns */
2787 if (mask_loc && next_mask_loc)
2788 mask_loc->next = next_mask_loc;
2789
2790 } /* end of align_view not tabular case */
2791 } else { /* seqalign is NULL */
2792 if((align_view == 7) && !options->is_ooframe) {
2793 BlastErrorMsgPtr error_msg;
2794 CharPtr message;
2795
2796 if (error_returns == NULL) {
2797 message = "No hits found";
2798 } else {
2799 error_msg = error_returns->data.ptrvalue;
2800 message = error_msg->msg;
2801 }
2802 if (options->is_megablast_search) {
2803 bsp = BioseqLockById(SeqLocId(tmp_slp));
2804 BXMLPrintOutput(xml_aip, seqalign,
2805 options, blast_program, blast_database,
2806 bsp, other_returns, 0, NULL, mask_loc);
2807 BioseqUnlock(bsp);
2808 } else {
2809 BXMLPrintOutput(xml_aip, NULL, options, blast_program,
2810 blast_database, fake_bsp, other_returns, 0,
2811 message, mask_loc);
2812 }
2813 AsnIoReset(xml_aip);
2814 } else if (align_view < 8) {
2815 fprintf(outfp, "\n\n ***** No hits found ******\n\n");
2816 }
2817 if (error_returns != NULL) {
2818 for (vnp = error_returns; vnp; vnp = vnp->next) {
2819 BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
2820 }
2821 ValNodeFree(error_returns);
2822 }
2823 }
2824
2825 slp = SeqLocSetFree(slp);
2826 matrix = BLAST_MatrixDestruct(matrix);
2827 if (txmatrix)
2828 txmatrix = TxMatrixDestruct(txmatrix);
2829
2830 if(html) {
2831 fprintf(outfp, "<PRE>\n");
2832 }
2833
2834 init_buff_ex(85);
2835 dbinfo_head = dbinfo;
2836
2837 if(align_view < 7 && done) {
2838 while (dbinfo) {
2839 PrintDbReport(dbinfo, 70, outfp);
2840 dbinfo = dbinfo->next;
2841 }
2842 }
2843 dbinfo_head = TxDfDbInfoDestruct(dbinfo_head);
2844
2845 if (ka_params) {
2846 if(align_view < 7 && done) {
2847 PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
2848 }
2849 MemFree(ka_params);
2850 }
2851
2852 if (ka_params_gap) {
2853 if(align_view < 7 && done) {
2854 PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
2855 }
2856 MemFree(ka_params_gap);
2857 }
2858
2859 if(align_view < 7 && done) {
2860 PrintTildeSepLines(params_buffer, 70, outfp);
2861 }
2862
2863 MemFree(params_buffer);
2864 free_buff();
2865 mask_loc = mask_loc_start;
2866 while (mask_loc) {
2867 SeqLocSetFree(mask_loc->data.ptrvalue);
2868 mask_loc = mask_loc->next;
2869 }
2870 ValNodeFree(mask_loc_start);
2871
2872 if(num_queries > 0) { /* AM: query concatenation */
2873 SeqDataFree(fake_bsp->seq_data, fake_bsp->seq_data_type);
2874 fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
2875 } else if(!believe_query ) {
2876 fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
2877 }
2878 other_returns = ValNodeFree(other_returns);
2879 if (done)
2880 sep = SeqEntryFree(sep);
2881 #ifndef BLAST_CS_API
2882 /* This is freed earlier in client-server case */
2883 options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
2884 /* Free the database translation tables, if applicable. */
2885 TransTableFreeAll();
2886 ReadDBBioseqFetchDisable();
2887 #endif
2888 if (html)
2889 fprintf(outfp, "</PRE>\n<P><HR><BR>\n<PRE>");
2890
2891 if (!options->is_megablast_search)
2892 BlastDeleteUserErrorString(err_ticket);
2893
2894 ObjMgrFreeCache(0);
2895 } /* while(TRUE) - main loop of the program over all FASTA entries */
2896
2897 #ifdef BLAST_CS_API
2898 BlastNetBioseqFetchDisable(bl3hp, blast_database, db_is_na);
2899 BlastFini(bl3hp);
2900 #endif
2901
2902 aip = AsnIoClose(aip);
2903
2904 if(align_view < 7) {
2905 if (html) {
2906 fprintf(outfp, "</PRE>\n</BODY>\n</HTML>\n");
2907 }
2908 } else if (align_view == 7)
2909 xml_aip = AsnIoClose(xml_aip);
2910
2911 /* AM: query concatenation. */
2912 mult_queries = BlastMultQueriesDestruct( mult_queries );
2913
2914 options = BLASTOptionDelete(options);
2915 FileClose(infp);
2916 return 0;
2917 }
2918
2919 /*
2920 This function decides whether the new blast code can handle this database or not.
2921 Currently it should return FALSE for any database that uses a gilist.
2922 This implementation only works for nucleotide databases.
2923
2924 If it is not possible to initialize the database or some error condition exists then FALSE
2925 will also be returned and the old engine should deal with this.
2926 */
2927 static Boolean
2928 readdb_use_new_blast(char* dbname)
2929 {
2930 Boolean db_is_na, query_is_na;
2931 Boolean retval=TRUE;
2932 ReadDBFILEPtr rdfp=NULL;
2933 ReadDBFILEPtr rdfp_var=NULL;
2934
2935 if (!dbname)
2936 return FALSE;
2937
2938 BlastGetTypes(myargs[ARG_PROGRAM].strvalue, &query_is_na, &db_is_na);
2939 rdfp = readdb_new(dbname, !db_is_na);
2940 if (!rdfp)
2941 return FALSE;
2942
2943 rdfp_var = rdfp;
2944 while (rdfp_var)
2945 {
2946 if (rdfp_var->gilist != NULL)
2947 {
2948 retval = FALSE;
2949 break; /* Break out and free rdfp. */
2950 }
2951 rdfp_var = rdfp_var->next;
2952 }
2953 rdfp = readdb_destruct(rdfp);
2954 return retval;
2955 }
2956
2957 Int2 Nlm_Main(void)
2958 {
2959 #ifndef BLASTALL_TOOLS_ONLY
2960 Boolean use_new_engine=FALSE;
2961 #endif
2962 char buf[256] = { '\0' };
2963
2964 #ifdef BLAST_CS_API
2965 StringCpy(buf, "blastcl3 ");
2966 StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf)-1);
2967 if (! GetArgs (buf, NUMARG, myargs)) {
2968 return (1);
2969 }
2970 #else
2971 StringCpy(buf, "blastall ");
2972 StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf));
2973 if (! GetArgs (buf, NUMARG, myargs)) {
2974 return (1);
2975 }
2976 #endif
2977
2978 UseLocalAsnloadDataAndErrMsg ();
2979
2980 if (! SeqEntryLoad())
2981 return 1;
2982
2983 ErrSetMessageLevel(SEV_WARNING);
2984
2985 #ifdef BLAST_CS_API
2986 return Main_old();
2987 #else
2988 #ifndef BLASTALL_TOOLS_ONLY
2989 if (myargs[ARG_FORCE_OLD].intvalue == 0 &&
2990 myargs[ARG_GILIST].strvalue == NULL)
2991 use_new_engine = readdb_use_new_blast(myargs[ARG_DB].strvalue);
2992
2993 if (use_new_engine)
2994 return Main_new();
2995 else
2996 #endif /* BLASTALL_TOOLS_ONLY */
2997 return Main_old();
2998 #endif
2999 }
3000 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |