|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/bl2seq.c |
source navigation diff markup identifier search freetext search file search |
1 static char const rcsid[] = "$Id: bl2seq.c,v 6.89 2009/05/27 19:29:17 camacho Exp $";
2
3 /**************************************************************************
4 * *
5 * COPYRIGHT NOTICE *
6 * *
7 * This software/database is categorized as "United States Government *
8 * Work" under the terms of the United States Copyright Act. It was *
9 * produced as part of the author's official duties as a Government *
10 * employee and thus can not be copyrighted. This software/database is *
11 * freely available to the public for use without a copyright notice. *
12 * Restrictions can not be placed on its present or future use. *
13 * *
14 * Although all reasonable efforts have been taken to ensure the accuracy *
15 * and reliability of the software and data, the National Library of *
16 * Medicine (NLM) and the U.S. Government do not and can not warrant the *
17 * performance or results that may be obtained by using this software, *
18 * data, or derivative works thereof. The NLM and the U.S. Government *
19 * disclaim any and all warranties, expressed or implied, as to the *
20 * performance, merchantability or fitness for any particular purpose or *
21 * use. *
22 * *
23 * In any work or product derived from this material, proper attribution *
24 * of the author(s) as the source of the software or data would be *
25 * appreciated. *
26 * *
27 ***************************************************************************
28 *
29 * $Log: bl2seq.c,v $
30 * Revision 6.89 2009/05/27 19:29:17 camacho
31 * Disambiguate eNone enumeration
32 *
33 * Revision 6.88 2007/05/07 13:29:11 kans
34 * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
35 *
36 * Revision 6.87 2007/03/20 14:56:58 camacho
37 * Call GeneticCodeSingletonInit/GeneticCodeSingletonFini
38 *
39 * Revision 6.86 2006/08/28 14:11:37 coulouri
40 * correct seqids in asn.1 output when input sequences are specified as accessions; fixes rt#15192840
41 *
42 * Revision 6.85 2006/08/21 21:47:06 camacho
43 * Allocate Blast_SummaryReturn structure to avoid dereferencing NULL pointer
44 *
45 * Revision 6.84 2006/08/16 16:03:59 coulouri
46 * do not strlen NULL pointer
47 *
48 * Revision 6.83 2006/04/26 12:47:29 madden
49 * Use SBlastMessage in place of Blast_Message
50 *
51 * Revision 6.82 2006/01/13 16:00:02 madden
52 * BLAST_TwoSeqLocSets now takes SBlastSeqalignArray rather than SeqAlignPtr, remove unused variable
53 *
54 * Revision 6.81 2006/01/10 20:43:15 madden
55 * BLAST_FormatResults now takes SBlastSeqalignArray
56 *
57 * Revision 6.80 2005/10/17 14:06:44 madden
58 * Change message on gap parameter arg
59 *
60 * Revision 6.79 2005/08/29 14:45:34 camacho
61 * From Ilya Dondoshansky:
62 * Retrieve mask_at_hash option from the SBlastOptions structure instead of
63 * passing as argument in search API calls
64 *
65 * Revision 6.78 2005/06/08 20:32:48 dondosha
66 * Fixed masking locations memory leak and added comment
67 *
68 * Revision 6.77 2005/06/02 20:45:32 dondosha
69 * Use BlastFormattingInfo structure for formatting
70 *
71 * Revision 6.76 2005/05/02 17:00:27 coulouri
72 * change default to new engine
73 *
74 * Revision 6.75 2005/03/16 00:43:40 dondosha
75 * Correction to previous commit to make reported deflines the same as before
76 *
77 * Revision 6.74 2005/03/15 20:59:16 dondosha
78 * When retrieving Bioseq by gi, try BLAST databases first
79 *
80 * Revision 6.73 2005/03/03 15:05:47 dondosha
81 * Blast_FindRepeatFilterLoc renamed to Blast_FindRepeatFilterSeqLoc
82 *
83 * Revision 6.72 2005/02/08 20:43:03 dondosha
84 * Added repeats filtering for new engine
85 *
86 * Revision 6.71 2005/02/07 15:30:39 dondosha
87 * Removed restriction on the value of longest intron option
88 *
89 * Revision 6.70 2005/02/03 18:02:07 dondosha
90 * Pass summary returns to BLAST_FormatResults, needed for XML output
91 *
92 * Revision 6.69 2005/02/02 19:01:36 dondosha
93 * Use new high level API for performing the search
94 *
95 * Revision 6.68 2004/12/29 16:06:20 dondosha
96 * Small memory leak fix; correction in mask array allocation for non-translated search
97 *
98 * Revision 6.67 2004/12/29 15:20:55 dondosha
99 * Set gapped_calculation option for new engine; allocate appropriate size BlastSeqLoc array in BlastMaskLoc before formatting
100 *
101 * Revision 6.66 2004/10/04 14:05:06 madden
102 * Use Blast_PrintOutputFooter rather than BLAST_PrintSummaryReturns
103 *
104 * Revision 6.65 2004/09/13 15:15:54 madden
105 * Changes for BlastSeqLoc and BlastMaskLoc
106 *
107 * Revision 6.64 2004/08/16 14:26:47 madden
108 * Fix bad frees for blastx filtering locations
109 *
110 * Revision 6.63 2004/08/09 13:08:15 madden
111 * Bl2seq now can run queries with new blast engine
112 *
113 * Revision 6.62 2004/08/03 14:02:02 madden
114 * Introduce enum to make myargs more readable
115 *
116 * Revision 6.61 2004/06/30 16:03:48 kans
117 * include <blfmtutl.h>
118 *
119 * Revision 6.60 2004/02/26 16:25:06 camacho
120 * Fix uninitialized variable errors that caused core dumps on empty input files
121 *
122 * Revision 6.59 2003/06/26 18:56:05 coulouri
123 * remove unnecessary variables
124 *
125 * Revision 6.58 2003/05/30 17:31:09 coulouri
126 * add rcsid
127 *
128 * Revision 6.57 2003/05/13 16:02:42 coulouri
129 * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
130 *
131 * Revision 6.56 2003/05/06 18:57:46 dondosha
132 * Do not set cutoff_s for megablast, it is not needed
133 *
134 * Revision 6.55 2003/04/22 19:38:00 dondosha
135 * Fix for ungapped search with tabular output
136 *
137 * Revision 6.54 2003/01/16 19:46:28 kans
138 * include accid1.h to fix Mac compiler error on missing symbols
139 *
140 * Revision 6.53 2003/01/10 20:55:36 dondosha
141 * Enable the Entrez client option without a special definition
142 *
143 * Revision 6.52 2003/01/07 15:47:11 dondosha
144 * Hide the -A argument if NCBI_ENTREZ_CLIENT is not defined
145 *
146 * Revision 6.51 2002/09/18 18:21:04 camacho
147 * Fixed memory leak when using the -U option
148 *
149 * Revision 6.50 2002/09/13 18:53:26 dondosha
150 * Corrected the way query and subject deflines are shown in all types of output
151 *
152 * Revision 6.49 2002/08/09 19:41:25 camacho
153 * 1) Added blast version number to command-line options
154 * 2) Added explanations for some default parameters
155 *
156 * Revision 6.48 2002/06/26 21:44:57 dondosha
157 * Set default gap penalties before checking command line options
158 *
159 * Revision 6.47 2002/06/24 18:24:05 dondosha
160 * Multiple memory freeing bug fixed
161 *
162 * Revision 6.46 2002/05/09 15:37:52 dondosha
163 * Call BLASTOptionNewEx instead of BLASTOptionNew, so megablast defaults are set in a central place
164 *
165 * Revision 6.45 2002/05/02 22:18:25 dondosha
166 * Copy bioseq if getting it from ID1, removing non-residue characters from sequence
167 *
168 * Revision 6.44 2002/05/01 16:43:53 dondosha
169 * Call BLASTOptionSetGapParams instead of setting options->matrix
170 *
171 * Revision 6.43 2002/04/29 19:55:26 madden
172 * Use ARG_FLOAT for db length
173 *
174 * Revision 6.42 2002/03/19 23:29:38 dondosha
175 * Do not increment options->wordsize by 4 for megablast any more
176 *
177 * Revision 6.41 2002/03/14 16:11:41 camacho
178 * Extended BlastTwoSequences to allow comparison between sequence and PSSM
179 *
180 * Revision 6.40 2001/07/19 22:05:47 dondosha
181 * Made db_length option a string, to convert to Int8 value
182 *
183 * Revision 6.39 2001/06/21 21:49:26 dondosha
184 * Destroy all error returns
185 *
186 * Revision 6.38 2001/05/11 22:03:21 dondosha
187 * Do not use fake Bioseqs in case of tabulated output
188 *
189 * Revision 6.37 2001/05/09 20:15:31 dondosha
190 * 1. Made program argument non-optional
191 * 2. Added -U argument for masking lower case
192 *
193 * Revision 6.36 2001/05/02 20:00:05 dondosha
194 * Create subject SeqLoc with plus strand (not both) when location is given
195 *
196 * Revision 6.35 2001/04/16 20:46:00 dondosha
197 * Improved error message when sequence not found in Entrez
198 *
199 * Revision 6.34 2001/04/04 20:30:05 dondosha
200 * Fixed a typo
201 *
202 * Revision 6.33 2001/04/03 21:59:50 dondosha
203 * Implemented tabulated output for non-megablast bl2seq
204 *
205 * Revision 6.32 2001/03/19 22:39:24 dondosha
206 * Allow location on the first query sequence for megablast
207 *
208 * Revision 6.31 2001/03/02 20:30:20 dondosha
209 * Typo fix
210 *
211 * Revision 6.30 2001/02/16 18:45:39 dondosha
212 * Fixed minor purify errors
213 *
214 * Revision 6.29 2001/02/07 21:18:42 dondosha
215 * Moved the MegaBlastPrintAlignInfo callback to blastool.c
216 *
217 * Revision 6.28 2001/01/24 20:51:50 dondosha
218 * Enabled splitting of the second sequence for 2 sequences with megablast
219 *
220 * Revision 6.27 2001/01/23 22:20:04 dondosha
221 * Do not free subject id in megablast callback
222 *
223 * Revision 6.26 2001/01/17 20:34:11 dondosha
224 * Use the subject id with best accession in the MegaBlastPrintAlignInfo callback
225 *
226 * Revision 6.25 2001/01/12 18:28:09 dondosha
227 * Pass the best accession ids in the SeqLocs in case of partial sequences
228 *
229 * Revision 6.24 2001/01/09 20:53:01 dondosha
230 * Locations start from 1; added handling of erroneous location input
231 *
232 * Revision 6.23 2001/01/09 20:16:27 dondosha
233 * Implemented from-to location options for both sequences in bl2seq
234 *
235 * Revision 6.22 2000/12/13 22:28:06 dondosha
236 * Unlock bioseqs in the end if they were obtained from entrez lookup
237 *
238 * Revision 6.21 2000/11/21 15:47:21 dondosha
239 * Corrected default wordsize for megablast option
240 *
241 * Revision 6.20 2000/11/15 22:21:47 dondosha
242 * Corrected default wordsize for Mega BLAST
243 *
244 * Revision 6.19 2000/11/09 15:01:00 dondosha
245 * Set longest intron length in options in nucleotide coordinates
246 *
247 * Revision 6.18 2000/11/08 22:24:07 dondosha
248 * Enabled new tblastn by adding longest intron option
249 *
250 * Revision 6.17 2000/11/08 18:22:46 kans
251 * includes <mblast.h> for Mac compiler
252 *
253 * Revision 6.16 2000/11/02 20:23:27 dondosha
254 * Allow megablast one-line style output; allow accession or gi input if NCBI_ENTREZ_CLIENT is defined
255 *
256 * Revision 6.15 2000/10/30 19:05:44 madden
257 * Added -Y search space option
258 *
259 * Revision 6.14 2000/10/20 20:10:01 dondosha
260 * Revert previous change - would fail on some platforms
261 *
262 * Revision 6.13 2000/10/20 19:45:16 dondosha
263 * Changed -d option type to string to allow 8-byte integer db sizes
264 *
265 * Revision 6.12 2000/10/13 20:32:32 madden
266 * Added call to AcknowledgeBlastQuery
267 *
268 * Revision 6.11 2000/10/06 21:37:08 dondosha
269 * Set Mega BlAST default parameters correctly before calling the engine
270 *
271 * Revision 6.10 2000/10/05 22:44:28 dondosha
272 * Set block_width to 0 for Mega BLAST - it has different meaning
273 *
274 * Revision 6.9 2000/10/03 15:16:12 madden
275 * Set program name in call to BlastTwoSequencesEx
276 *
277 * Revision 6.8 2000/09/12 21:48:41 dondosha
278 * Pass the correct scoring matrix to ShowTextAlignFromAnnot
279 *
280 * Revision 6.7 2000/06/15 15:29:57 dondosha
281 * Fixed several memory leaks; tblastn and tblastx enabled
282 *
283 * Revision 6.6 2000/05/24 20:36:08 dondosha
284 * If megablast is used, set cutoff_s and cutoff_s2 parameters
285 *
286 * Revision 6.5 2000/04/10 15:23:33 dondosha
287 * Added option to use MegaBlast for search
288 *
289 * Revision 6.2 1999/11/26 20:16:11 vakatov
290 * Added <sqnutils.h> to pick up proto of 'UseLocalAsnloadDataAndErrMsg()'
291 *
292 * Revision 6.1 1999/07/06 18:48:20 madden
293 * Compares two sequences
294 */
295
296 #include <ncbi.h>
297 #include <objseq.h>
298 #include <objsset.h>
299 #include <sequtil.h>
300 #include <seqport.h>
301 #include <tofasta.h>
302 #include <blast.h>
303 #include <blastpri.h>
304 #include <txalign.h>
305 #include <sqnutils.h>
306 #include <mblast.h>
307 #include <accid1.h>
308 #include <blfmtutl.h>
309
310 #include <algo/blast/api/twoseq_api.h>
311 #include <algo/blast/api/blast_format.h>
312 #include <algo/blast/api/blast_seq.h>
313 #include <algo/blast/api/repeats_filter.h>
314 #include <algo/blast/core/blast_util.h>
315 #include <algo/blast/api/blast_api.h>
316
317 #define LOCAL_BUFLEN 255
318 static BioseqPtr
319 BioseqFromAccession(CharPtr accver, Boolean is_na)
320 {
321 CharPtr accession, version_str;
322 Int4 version=0, gi, number;
323 SeqIdPtr sip = NULL;
324 TextSeqIdPtr tsip;
325 PDBSeqIdPtr psip;
326 BioseqPtr bsp = NULL, bsp_tmp = NULL;
327 SeqPortPtr spp;
328 Int2 retval, buf_length=512;
329 Uint1 buf[512];
330 char* defline = NULL;
331 char* dummy_ptr = NULL;
332
333 if (!ID1BioseqFetchEnable ("bl2seq", TRUE))
334 ErrPostEx(SEV_FATAL, 1, 0,
335 "Entrez access interface currently unavailable\n");
336 if (!IS_DIGIT(*accver)) {
337 accession = StringTokMT(accver, ".", &version_str);
338 if (version_str)
339 version = atoi(version_str);
340
341 if((sip = ValNodeNew (NULL)) == NULL)
342 return NULL;
343 if((tsip = TextSeqIdNew ()) == NULL)
344 return NULL;
345
346 tsip->accession = StringSave(accession);
347 tsip->version = version;
348 /* GenBank, EMBL, and DDBJ. */
349 sip->choice = SEQID_GENBANK;
350 sip->data.ptrvalue = (Pointer) tsip;
351 gi = ID1FindSeqId (sip);
352
353 if (gi == 0) {
354 /* SwissProt. */
355 sip->choice = SEQID_SWISSPROT;
356 gi = ID1FindSeqId (sip);
357 }
358 if (gi == 0) {
359 /* PIR */
360 sip->choice = SEQID_PIR;
361 gi = ID1FindSeqId (sip);
362 }
363
364 if (gi == 0) {
365 /* PRF */
366 sip->choice = SEQID_PRF;
367 gi = ID1FindSeqId (sip);
368 }
369
370 if (gi == 0) {
371 /* OTHER, probably 'ref' */
372 sip->choice = SEQID_OTHER;
373 gi = ID1FindSeqId (sip);
374 }
375
376 if(gi == 0) {
377 /* OK. We failed to find gi using string as TextSeqId. Now trying
378 last time - with PDBSeqIdPtr */
379
380 if((psip = PDBSeqIdNew()) == NULL)
381 return NULL;
382
383 sip->choice = SEQID_PDB;
384 tsip = TextSeqIdFree(tsip);
385 sip->data.ptrvalue = psip;
386
387 psip->mol = accession;
388 psip->chain = version;
389
390 gi = ID1FindSeqId (sip);
391 }
392
393 if (gi == 0) {
394 ErrPostEx(SEV_WARNING, 0, 0, "Sequence %s not found\n", accver);
395 return NULL;
396 }
397 sip = SeqIdFree(sip);
398 } else
399 gi = atoi(accver);
400
401 ID1BioseqFetchDisable();
402
403 if (gi > 0) {
404 /* First attempt to retrieve Bioseq from BLAST databases. */
405 char* db_name = (is_na ? "nucl_dbs" : "prot_dbs");
406
407 ValNodeAddInt(&sip, SEQID_GI, gi);
408 ReadDBBioseqFetchEnable ("bl2seq", db_name, is_na, TRUE);
409 bsp_tmp = BioseqLockById(sip);
410 ReadDBBioseqFetchDisable();
411
412 if (!bsp_tmp) {
413 /* Try ID1 again as a last resort. */
414 ID1BioseqFetchEnable("bl2seq", TRUE);
415 bsp_tmp = BioseqLockById(sip);
416 ID1BioseqFetchDisable();
417 }
418 sip = SeqIdFree(sip);
419 }
420
421 if (!bsp_tmp) {
422 ErrPostEx(SEV_WARNING, 0, 0, "Gi %ld not found", gi);
423 return NULL;
424 }
425
426 if (ISA_na(bsp_tmp->mol) != is_na) {
427 BioseqUnlock(bsp_tmp);
428 if (is_na)
429 ErrPostEx(SEV_FATAL, 1, 0,
430 "%s is a protein sequence, program requires nucleotide",
431 accver);
432 else
433 ErrPostEx(SEV_FATAL, 1, 0,
434 "%s is a nucleotide sequence, program requires protein",
435 accver);
436 return NULL;
437 }
438
439 bsp = AsnIoMemCopy(bsp_tmp, (AsnReadFunc) BioseqAsnRead, (AsnWriteFunc) BioseqAsnWrite);
440 SeqMgrDeleteFromBioseqIndex(bsp_tmp);
441 BioseqUnlock(bsp_tmp);
442 BioseqPack(bsp);
443
444 return bsp;
445 }
446
447 #define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
448
449 typedef enum {
450 ARG_QUERY = 0,
451 ARG_SUBJECT,
452 ARG_PROGRAM,
453 ARG_GAPPED,
454 ARG_OUT,
455 ARG_DBSIZE,
456 ARG_ASNOUT,
457 ARG_GAPOPEN,
458 ARG_GAPEXT,
459 ARG_XDROP,
460 ARG_WORDSIZE,
461 ARG_MATRIX,
462 ARG_MISMATCH,
463 ARG_MATCH,
464 ARG_FILTER,
465 ARG_EVALUE,
466 ARG_STRAND,
467 ARG_HTML,
468 ARG_USEMEGABLAST,
469 ARG_SEARCHSP,
470 ARG_INTRON,
471 ARG_LOC1,
472 ARG_LOC2,
473 ARG_FORMAT,
474 ARG_LCASE,
475 ARG_ACCN,
476 ARG_FORCE_OLD
477 } BlastArguments;
478
479 static Args myargs [] = {
480 { "First sequence",
481 NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_QUERY */
482 { "Second sequence",
483 NULL, NULL, NULL, FALSE, 'j', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_SUBJECT */
484 { "Program name: blastp, blastn, blastx, tblastn, tblastx. For blastx 1st sequence should be nucleotide, tblastn 2nd sequence nucleotide",
485 NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL}, /* ARG_PROGRAM */
486 { "Gapped",
487 "T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_GAPPED */
488 { "alignment output file",
489 "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_OUT */
490 { "theor. db size (zero is real size)",
491 "0", NULL, NULL, FALSE, 'd', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_DBSIZE */
492 { "Text ASN.1 output file",
493 NULL, NULL, NULL, TRUE, 'a', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_ASNOUT */
494 { "Cost to open a gap (-1 invokes default behavior)",
495 "-1", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPOPEN */
496 { "Cost to extend a gap (-1 invokes default behavior)",
497 "-1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPEXT */
498 { "X dropoff value for gapped alignment (in bits) (zero invokes default "
499 "behavior)\n blastn 30, megablast 20, tblastx 0, all others 15",
500 "0", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL}, /* ARG_XDROP */
501 { "Word size, default if zero (blastn 11, megablast 28, "
502 "all others 3)",
503 "0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL}, /* ARG_WORDSIZE */
504 { "Matrix",
505 "BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL}, /* ARG_MATRIX */
506 { "Penalty for a nucleotide mismatch (blastn only)",
507 "-3", NULL, NULL, FALSE, 'q', ARG_INT, 0.0, 0, NULL}, /* ARG_MISMATCH */
508 { "Reward for a nucleotide match (blastn only)",
509 "1", NULL, NULL, FALSE, 'r', ARG_INT, 0.0, 0, NULL}, /* ARG_MATCH */
510 { "Filter query sequence (DUST with blastn, SEG with others)",
511 "T", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL}, /* ARG_FILTER */
512 { "Expectation value (E)",
513 "10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_EVALUE */
514 { "Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom",
515 "3", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL}, /* ARG_STRAND */
516 { "Produce HTML output",
517 "F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_HTML */
518 { "Use Mega Blast for search",
519 "F", NULL, NULL, TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_USEMEGABLAST */
520 { "Effective length of the search space (use zero for the real size)",
521 "0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_SEARCHSP */
522 { "Length of the largest intron allowed in tblastn for linking HSPs",
523 "0", NULL, NULL, FALSE, 't', ARG_INT, 0.0, 0, NULL}, /* ARG_INTRON */
524 { "Location on first sequence",
525 NULL, NULL, NULL, TRUE, 'I', ARG_STRING, 0.0, 0, NULL}, /* ARG_LOC1 */
526 { "Location on second sequence",
527 NULL, NULL, NULL, TRUE, 'J', ARG_STRING, 0.0, 0, NULL}, /* ARG_LOC2 */
528 { "Output format: 0 - traditional, 1 - tabular",
529 "0", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL}, /* ARG_FORMAT */
530 { "Use lower case filtering for the query sequence",
531 "F", NULL, NULL, TRUE, 'U', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_LCASE */
532 { "Input sequences in the form of accession.version",
533 "F", NULL, NULL, FALSE, 'A', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_ACCN */
534 {"Force use of the legacy BLAST engine",
535 "F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL} /* ARG_FORCE_OLD */
536 };
537
538 /**
539 * Fetches sequences filling in either just the Bioseq's (if fetch from Entrez) or
540 * both the BioseqPtr's and the SeqEntryPtr's (if read from FASTA). The lcase_mask
541 * is also filled in with letters in query that were lower-case if myargs[ARG_LCASE].intvalue
542 * is non-zero.
543 *
544 * @param seq1_is_na the query sequence is DNA if true [in]
545 * @param seq2_is_na the subject sequence is DNA if true [in]
546 * @param query_bsp pointer to query BioseqPtr, to be filled in [out]
547 * @param subject_bsp pointer to subject BioseqPtr, to be filled in [out]
548 * @param sep pointer to query SeqEntryPtr, to be filled in [out]
549 * @param sep1 pointer to subject SeqEntryPtr, to be filled in [out]
550 * @param lcase_mask pointer to lower-case masking data to be filled in [out]
551 * @return TRUE on success, FALSE on failure.
552 */
553
554 static Int4
555 BL2SEQ_GetSequences(Boolean seq1_is_na, Boolean seq2_is_na, BioseqPtr *query_bsp, BioseqPtr *subject_bsp,
556 SeqEntryPtr *sep, SeqEntryPtr *sep1, SeqLocPtr *lcase_mask, Boolean believe_query)
557 {
558 Boolean entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
559 char *query_accver = NULL, *subject_accver = NULL; /* Used if entrez_lookup. */
560 char *blast_inputfile = NULL, *blast_inputfile1 = NULL; /* Used if FASTA read. */
561
562 if (entrez_lookup) {
563 query_accver = myargs [ARG_QUERY].strvalue;
564 subject_accver = myargs [ARG_SUBJECT].strvalue;
565 } else {
566 blast_inputfile = myargs [ARG_QUERY].strvalue;
567 blast_inputfile1 = myargs [ARG_SUBJECT].strvalue;
568 }
569
570 if (entrez_lookup) {
571 *query_bsp = BioseqFromAccession(query_accver, seq1_is_na);
572 } else {
573 FILE *infp;
574 if ((infp = FileOpen(blast_inputfile, "r")) == NULL)
575 {
576 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile);
577 return FALSE;
578 }
579 if (myargs[ARG_LCASE].intvalue)
580 *sep = FastaToSeqEntryForDb(infp, seq1_is_na, NULL,
581 believe_query, NULL, NULL,
582 lcase_mask);
583 else
584 *sep = FastaToSeqEntryEx(infp, seq1_is_na, NULL, believe_query);
585
586 FileClose(infp);
587
588 if (*sep != NULL) {
589 *query_bsp = NULL;
590 if (seq1_is_na)
591 SeqEntryExplore(*sep, query_bsp, FindNuc);
592 else
593 SeqEntryExplore(*sep, query_bsp, FindProt);
594
595 }
596 }
597 if (*query_bsp == NULL) {
598 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
599 return FALSE;
600 }
601
602 if (entrez_lookup) {
603 *subject_bsp =
604 BioseqFromAccession(subject_accver, seq2_is_na);
605 } else {
606 FILE *infp1;
607 if ((infp1 = FileOpen(blast_inputfile1, "r")) == NULL)
608 {
609 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile1);
610 return FALSE;
611 }
612 *sep1 = FastaToSeqEntryEx(infp1, seq2_is_na, NULL, FALSE);
613
614 FileClose(infp1);
615
616 if (*sep1 != NULL) {
617 *subject_bsp = NULL;
618 if (seq2_is_na)
619 SeqEntryExplore(*sep1, subject_bsp, FindNuc);
620 else
621 SeqEntryExplore(*sep1, subject_bsp, FindProt);
622
623 }
624 }
625
626 if (*subject_bsp == NULL) {
627 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
628 return FALSE;
629 }
630
631 return TRUE;
632 }
633
634 /**
635 * Creates SeqLoc's from the given BioseqPtr's. if myargs for ARG_LOC1 or ARG_LOC2 are non-NULL,
636 * these are used in the creation of the SeqLoc's.
637 *
638 * @param bsp1 the query BioseqPtr [in]
639 * @param bsp2 the subject BioseqPtr [in]
640 * @param slp1 the query SeqLocPtr to be filled in [out]
641 * @param slp2 the subject SeqLocPtr to be filled in [out]
642 * @param strand_option specifies strand of slp1 [in]
643 * @return TRUE on success, FALSE on failure.
644 */
645
646 static Boolean
647 BL2SEQ_MakeSeqLoc(const BioseqPtr bsp1, const BioseqPtr bsp2, SeqLocPtr *slp1, SeqLocPtr *slp2, Uint1 strand_option)
648 {
649 const char* k_delimiters = " ,;";
650 CharPtr location;
651 Int4 from, to;
652
653 *slp1 = NULL;
654 *slp2 = NULL;
655
656 location = myargs[ARG_LOC1].strvalue;
657 if (location) {
658 from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
659 to = atoi(location) - 1;
660
661 from = MAX(from, 0);
662 if (to < 0)
663 to = bsp1->length - 1;
664 to = MIN(to, bsp1->length - 1);
665 if (from >= bsp1->length) {
666 ErrPostEx(SEV_FATAL, 1, 0,
667 "Location outside of the first sequence range\n");
668 return FALSE;
669 }
670 *slp1 = SeqLocIntNew(from, to, strand_option,
671 SeqIdFindBestAccession(bsp1->id));
672 } else if (strand_option != Seq_strand_both) {
673 *slp1 = SeqLocIntNew(0, bsp1->length-1, strand_option,
674 SeqIdFindBestAccession(bsp1->id));
675 } else
676 ValNodeAddPointer(slp1, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp1->id)));
677
678 location = myargs[ARG_LOC2].strvalue;
679 if (location) {
680 from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
681 to = atoi(location) - 1;
682
683 from = MAX(from, 0);
684 if (to < 0)
685 to = bsp2->length - 1;
686 to = MIN(to, bsp2->length - 1);
687 if (from >= bsp2->length) {
688 ErrPostEx(SEV_FATAL, 1, 0,
689 "Location outside of the second sequence range\n");
690 return FALSE;
691 }
692 *slp2 = SeqLocIntNew(from, to, Seq_strand_plus, SeqIdFindBestAccession(bsp2->id));
693 } else
694 ValNodeAddPointer(slp2, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp2->id)));
695
696 return TRUE;
697 }
698
699 /**
700 * Initializes and sets the summary options based upon the command-line args.
701 *
702 * @param ret_options object to be initialized and filled in [out]
703 * @param program_number specifies blastn/blastp/blastx etc. [in]
704 * @return TRUE on success, FALSE on failure.
705 */
706 static Boolean
707 Bl2SEQ_SummaryOptionsSet(BLAST_SummaryOptions* *ret_options, EBlastProgramType program_number)
708 {
709 BLAST_SummaryOptions* options;
710
711 if (BLAST_SummaryOptionsInit(&options) != 0)
712 {
713 ErrPostEx(SEV_FATAL, 1, 0, "SummaryOptionsInit failed.");
714 return FALSE;
715 }
716
717 options->hint = eBlastHint_None;
718
719 switch (program_number) {
720 case eBlastTypeBlastn:
721 options->program = eBlastn;
722 break;
723 case eBlastTypeBlastp:
724 options->program = eBlastp;
725 break;
726 case eBlastTypeBlastx:
727 options->program = eBlastx;
728 break;
729 case eBlastTypeTblastn:
730 options->program = eTblastn;
731 break;
732 case eBlastTypeTblastx:
733 options->program = eTblastx;
734 break;
735 default:
736 ErrPostEx(SEV_FATAL, 1, 0, "Program_number (%ld) not valid in Bl2SEQ_SummaryOptionsSet", (long) program_number);
737 BLAST_SummaryOptionsFree(options);
738 return FALSE;
739 }
740
741 options->cutoff_evalue = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
742
743 if (options->program == eBlastn)
744 {
745 options->nucleotide_mismatch = myargs[ARG_MISMATCH].intvalue;
746 options->nucleotide_match = myargs[ARG_MATCH].intvalue;
747 if (myargs[ARG_USEMEGABLAST].intvalue > 0)
748 options->use_megablast = TRUE;
749 }
750
751 if (myargs[ARG_GAPOPEN].intvalue != -1)
752 options->gap_open = myargs[ARG_GAPOPEN].intvalue;
753
754 if (myargs[ARG_GAPEXT].intvalue != -1)
755 options->gap_extend = myargs[ARG_GAPEXT].intvalue;
756
757 options->strand = myargs[ARG_STRAND].intvalue;
758
759 if (myargs[ARG_WORDSIZE].intvalue != 0)
760 options->word_size = myargs[ARG_WORDSIZE].intvalue;
761
762 if (myargs[ARG_MATRIX].strvalue)
763 options->matrix = StringSave(myargs[ARG_MATRIX].strvalue);
764
765 if (myargs[ARG_FILTER].strvalue)
766 options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
767
768 if (myargs[ARG_XDROP].intvalue != 0)
769 {
770 options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
771 }
772
773 if (program_number != eBlastTypeTblastx)
774 options->gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
775 else
776 options->gapped_calculation = FALSE;
777
778 options->db_length = myargs[ARG_DBSIZE].floatvalue;
779
780 *ret_options = options;
781
782 return TRUE;
783 }
784
785 Int2 Main_new(void)
786
787 {
788 BioseqPtr query_bsp=NULL, subject_bsp=NULL;
789 BioseqPtr bsp1=NULL, bsp2=NULL;
790 BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL;
791 BlastFormattingInfo* format_info = NULL;
792 BLAST_SummaryOptions* options=NULL;
793 Blast_SummaryReturn* extra_returns = Blast_SummaryReturnNew();
794 Boolean believe_query= FALSE;
795 Boolean seq1_is_na, seq2_is_na; /* seq1/2 is DNA if TRUE. */
796 Boolean seqannot_output; /* SeqAlign will be output. */
797 Boolean entrez_lookup; /* QUery/subject fetched from Entrez. */
798 Boolean mask_at_hash=FALSE; /* masking only on lookup table if TRUE. */
799 DbtagPtr dbtagptr;
800 EBlastProgramType program_number;
801 Int2 status; /* return value */
802 EAlignView align_view = eAlignViewPairwise; /* Used for formatting */
803 SeqAlignPtr seqalign=NULL;
804 SeqEntryPtr sep=NULL, sep1=NULL;
805 SeqLocPtr slp1, slp2; /* Used for actual search. */
806 SeqLocPtr filter_loc=NULL; /* Location of regions filtered (returned by engine) */
807 SeqLocPtr lcase_mask=NULL; /* For lower-case masking info from query FASTA. */
808 SeqLoc* repeat_mask = NULL; /* Repeat mask locations */
809 Uint1 strand_option = 0; /* FIXME */
810 SBlastOptions* search_options = NULL; /* Needed for formatting. */
811 SBlastSeqalignArray* seqalign_arr = NULL;
812 GeneticCodeSingletonInit();
813
814 strand_option = (Uint1) myargs[ARG_STRAND].intvalue;
815
816 entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
817 seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
818 believe_query = (seqannot_output || entrez_lookup);
819 /* Non-zero value for -m option means tabular output. */
820 if (myargs[ARG_FORMAT].intvalue != 0)
821 align_view = eAlignViewTabularWithComments;
822
823 BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);
824
825 seq1_is_na = (program_number == eBlastTypeBlastn ||
826 program_number == eBlastTypeBlastx ||
827 program_number == eBlastTypeRpsTblastn ||
828 program_number == eBlastTypeTblastx);
829
830 seq2_is_na = (program_number == eBlastTypeBlastn ||
831 program_number == eBlastTypeTblastn ||
832 program_number == eBlastTypeTblastx);
833
834 if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
835 &sep, &sep1, &lcase_mask, believe_query)
836 == FALSE)
837 {
838 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
839 return (1);
840 }
841
842 if (!entrez_lookup) {
843 if (!believe_query)
844 fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
845
846 fake_subject_bsp = BioseqNew();
847 fake_subject_bsp->descr = subject_bsp->descr;
848 fake_subject_bsp->repr = subject_bsp->repr;
849 fake_subject_bsp->mol = subject_bsp->mol;
850 fake_subject_bsp->length = subject_bsp->length;
851 fake_subject_bsp->seq_data = subject_bsp->seq_data;
852 fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
853 dbtagptr = DbtagNew();
854 dbtagptr->db = StringSave("BL_ORD_ID");
855 dbtagptr->tag = ObjectIdNew();
856
857 if (BioseqGetTitle(subject_bsp) != NULL)
858 dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
859 else
860 dbtagptr->tag->str = StringSave("No definition line found");
861
862 ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
863 bsp1 = (believe_query ? query_bsp : fake_bsp);
864 bsp2 = fake_subject_bsp;
865 } else { /* Query and subject Bioseqs are already "fake". */
866 bsp1 = query_bsp;
867 bsp2 = subject_bsp;
868 }
869
870 if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, strand_option) == FALSE)
871 return 1;
872
873 if (Bl2SEQ_SummaryOptionsSet(&options, program_number) == FALSE)
874 return 1;
875
876 /* Find repeat mask, if necessary */
877 if ((status = Blast_FindRepeatFilterSeqLoc(slp1, myargs[ARG_FILTER].strvalue,
878 &repeat_mask, &extra_returns->error)) != 0)
879 {
880 if (extra_returns && extra_returns->error)
881 {
882 ErrSev max_sev = SBlastMessageErrPost(extra_returns->error);
883 if (max_sev >= SEV_ERROR)
884 return status;
885 }
886 }
887
888 /* Combine repeat mask with lower case mask */
889 if (repeat_mask)
890 lcase_mask = ValNodeLink(&lcase_mask, repeat_mask);
891
892 status = BLAST_TwoSeqLocSets(options, slp1, slp2, lcase_mask, &seqalign_arr,
893 &filter_loc, &mask_at_hash,
894 &extra_returns);
895
896 /* Free the lower case mask in SeqLoc form. */
897 lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);
898
899 /* Post warning or error messages, no matter what the search status
900 was. */
901 SBlastMessageErrPost(extra_returns->error);
902
903 if (status != 0)
904 {
905 ErrPostEx(SEV_FATAL, 1, 0, "BLAST_TwoSeqLocSets failed");
906 return status;
907 }
908
909 if (myargs[ARG_ASNOUT].strvalue && seqalign_arr) {
910 AsnIoPtr asnout =
911 AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
912 GenericSeqAlignSetAsnWrite(seqalign_arr->array[0], asnout);
913 asnout = AsnIoClose(asnout);
914 }
915
916 /* Pass NULL for the database name, since there is no database. */
917 BlastFormattingInfoNewBasic(align_view, options, slp1,
918 myargs[ARG_OUT].strvalue, &search_options,
919 &format_info);
920
921 /* Always show gis in the output, hence pass TRUE for respective
922 argument. */
923 BlastFormattingInfoSetUpOptions(format_info, 0, 1,
924 (Boolean) myargs[ARG_HTML].intvalue,
925 (Boolean) myargs[ARG_USEMEGABLAST].intvalue,
926 TRUE, believe_query);
927
928 /* If masking was at hash only, free the masking locations,
929 * to prevent them from being used for formatting.
930 */
931 if (SBlastOptionsGetMaskAtHash(search_options))
932 filter_loc = Blast_ValNodeMaskListFree(filter_loc);
933
934 /* Format the results */
935 status =
936 BLAST_FormatResults(seqalign_arr, 1, slp1, filter_loc, format_info,
937 extra_returns);
938
939 status = Blast_PrintOutputFooter(format_info, extra_returns);
940
941 /* Free masking locations if they haven't been freed already. */
942 filter_loc = Blast_ValNodeMaskListFree(filter_loc);
943
944 format_info = BlastFormattingInfoFree(format_info);
945 extra_returns = Blast_SummaryReturnFree(extra_returns);
946 search_options = SBlastOptionsFree(search_options);
947
948 if (entrez_lookup) {
949 BioseqFree(query_bsp);
950 BioseqFree(subject_bsp);
951 } else {
952 SeqEntryFree(sep);
953 SeqEntryFree(sep1);
954 }
955
956 options = BLAST_SummaryOptionsFree(options);
957 seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
958 slp1 = SeqLocSetFree(slp1);
959 slp2 = SeqLocSetFree(slp2);
960
961 fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
962 GeneticCodeSingletonFini();
963
964 return 0;
965
966 }
967
968 Int2 Main_old (void)
969
970 {
971
972 AsnIoPtr aip;
973 BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL,
974 subject_bsp = NULL;
975 BioseqPtr bsp1, bsp2;
976 BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
977 BLAST_OptionsBlkPtr options=NULL;
978 Boolean seq1_is_na, seq2_is_na;
979 CharPtr params_buffer=NULL;
980 DbtagPtr dbtagptr;
981 Uint1 align_type;
982 Uint4 align_options;
983 SeqAlignPtr seqalign;
984 SeqAnnotPtr seqannot;
985 SeqEntryPtr sep = NULL, sep1 = NULL;
986 CharPtr program_name, blast_outputfile;
987 FILE *outfp;
988 ValNodePtr mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL;
989 BLAST_MatrixPtr matrix;
990 Int4Ptr PNTR txmatrix;
991 int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL;
992 Boolean entrez_lookup = FALSE;
993 Boolean html, seqannot_output, believe_query;
994 Uint1 tabular_output;
995 Boolean gapped_calculation;
996
997 entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
998 html = (Boolean) myargs[ARG_HTML].intvalue;
999 seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
1000
1001 blast_outputfile = myargs [ARG_OUT].strvalue;
1002
1003 program_name = StringSave(myargs[ARG_PROGRAM].strvalue);
1004 if (StringCmp(program_name, "blastn") &&
1005 StringCmp(program_name, "blastp") &&
1006 StringCmp(program_name, "blastx") &&
1007 StringCmp(program_name, "tblastn") &&
1008 StringCmp(program_name, "tblastx")) {
1009 ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
1010 return (1);
1011 }
1012
1013 align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na);
1014
1015 if ((outfp = FileOpen(blast_outputfile, "w")) == NULL)
1016 {
1017 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
1018 return (1);
1019 }
1020
1021 gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
1022 believe_query = (seqannot_output || entrez_lookup);
1023
1024 options = BLASTOptionNewEx(program_name, gapped_calculation,
1025 (Boolean) myargs[ARG_USEMEGABLAST].intvalue);
1026
1027 if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
1028 &sep, &sep1, &(options->query_lcase_mask),
1029 believe_query) == FALSE)
1030 {
1031 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
1032 return (1);
1033 }
1034
1035 if (!entrez_lookup) {
1036 if (!believe_query)
1037 fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
1038
1039 fake_subject_bsp = BioseqNew();
1040 fake_subject_bsp->descr = subject_bsp->descr;
1041 fake_subject_bsp->repr = subject_bsp->repr;
1042 fake_subject_bsp->mol = subject_bsp->mol;
1043 fake_subject_bsp->length = subject_bsp->length;
1044 fake_subject_bsp->seq_data = subject_bsp->seq_data;
1045 fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
1046 dbtagptr = DbtagNew();
1047 dbtagptr->db = StringSave("BL_ORD_ID");
1048 dbtagptr->tag = ObjectIdNew();
1049
1050 if (BioseqGetTitle(subject_bsp) != NULL)
1051 dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
1052 else
1053 dbtagptr->tag->str = StringSave("No definition line found");
1054
1055 ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
1056 bsp1 = (believe_query ? query_bsp : fake_bsp);
1057 bsp2 = fake_subject_bsp;
1058 } else {
1059 bsp1 = query_bsp;
1060 bsp2 = subject_bsp;
1061 }
1062
1063 tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue;
1064
1065
1066 if (myargs[ARG_SEARCHSP].floatvalue)
1067 options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;
1068
1069
1070 options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
1071 options->expect_value = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
1072
1073 if (StringICmp("blastn", program_name) == 0)
1074 {
1075 options->penalty = myargs[ARG_MISMATCH].intvalue;
1076 options->reward = myargs[ARG_MATCH].intvalue;
1077 }
1078
1079 options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
1080
1081 options->discontinuous = FALSE;
1082
1083 if (myargs[ARG_XDROP].intvalue != 0)
1084 {
1085 options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
1086 }
1087 if (myargs[ARG_WORDSIZE].intvalue != 0)
1088 options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue;
1089
1090 if (options->is_megablast_search) {
1091 options->cutoff_s2 = options->wordsize*options->reward;
1092 }
1093 options->matrix = MemFree(options->matrix);
1094 BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0);
1095
1096 if (myargs[ARG_GAPOPEN].intvalue != -1)
1097 options->gap_open = myargs[ARG_GAPOPEN].intvalue;
1098 if (myargs[ARG_GAPEXT].intvalue != -1)
1099 options->gap_extend = myargs[ARG_GAPEXT].intvalue;
1100
1101 options->strand_option = myargs[ARG_STRAND].intvalue;
1102
1103 /* Input longest intron length is in nucleotide scale; in the lower
1104 level code it will be used in protein scale */
1105 if (myargs[ARG_INTRON].intvalue > 0)
1106 options->longest_intron = myargs[ARG_INTRON].intvalue;
1107
1108
1109 if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) {
1110 seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name,
1111 options, &other_returns, &error_returns, handle_results);
1112 } else {
1113 SeqLocPtr slp1=NULL, slp2=NULL;
1114 if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE)
1115 return 1;
1116 seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL);
1117 SeqLocFree(slp1);
1118 SeqLocFree(slp2);
1119 }
1120
1121 if (error_returns) {
1122 BlastErrorPrint(error_returns);
1123 for (vnp = error_returns; vnp; vnp = vnp->next) {
1124 BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1125 }
1126 ValNodeFree(error_returns);
1127 }
1128
1129 ka_params = NULL;
1130 ka_params_gap = NULL;
1131 params_buffer = NULL;
1132 mask_loc = NULL;
1133 matrix = NULL;
1134 txmatrix = NULL;
1135 for (vnp=other_returns; vnp; vnp = vnp->next) {
1136 switch (vnp->choice) {
1137 case TXKABLK_NOGAP:
1138 ka_params = vnp->data.ptrvalue;
1139 break;
1140 case TXKABLK_GAP:
1141 ka_params_gap = vnp->data.ptrvalue;
1142 break;
1143 case TXPARAMETERS:
1144 params_buffer = vnp->data.ptrvalue;
1145 break;
1146 case TXMATRIX:
1147 matrix = vnp->data.ptrvalue;
1148 if (matrix && !tabular_output)
1149 txmatrix = BlastMatrixToTxMatrix(matrix);
1150 break;
1151 case SEQLOC_MASKING_NOTSET:
1152 case SEQLOC_MASKING_PLUS1:
1153 case SEQLOC_MASKING_PLUS2:
1154 case SEQLOC_MASKING_PLUS3:
1155 case SEQLOC_MASKING_MINUS1:
1156 case SEQLOC_MASKING_MINUS2:
1157 case SEQLOC_MASKING_MINUS3:
1158 ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
1159 break;
1160 default:
1161 break;
1162 }
1163 }
1164 if (!tabular_output || seqannot_output) {
1165 align_options = 0;
1166 align_options += TXALIGN_MATRIX_VAL;
1167 align_options += TXALIGN_SHOW_QS;
1168 align_options += TXALIGN_COMPRESS;
1169 align_options += TXALIGN_END_NUM;
1170 if (StringICmp("blastx", program_name) == 0) {
1171 align_options += TXALIGN_BLASTX_SPECIAL;
1172 }
1173
1174 if (html)
1175 align_options += TXALIGN_HTML;
1176
1177 seqannot = SeqAnnotNew();
1178 seqannot->type = 2;
1179 AddAlignInfoToSeqAnnot(seqannot, align_type);
1180 seqannot->data = seqalign;
1181 aip = NULL;
1182 if (seqannot_output)
1183 aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w");
1184
1185 if (aip && seqannot) {
1186 SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
1187 AsnIoReset(aip);
1188 aip = AsnIoClose(aip);
1189 }
1190 }
1191 if (!tabular_output) {
1192 AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html);
1193 ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc);
1194
1195 seqannot = SeqAnnotFree(seqannot);
1196 if (txmatrix)
1197 txmatrix = TxMatrixDestruct(txmatrix);
1198 init_buff_ex(85);
1199
1200 if (ka_params) {
1201 PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
1202 }
1203
1204 if (ka_params_gap) {
1205 PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
1206 }
1207
1208 PrintTildeSepLines(params_buffer, 70, outfp);
1209 free_buff();
1210 } else {
1211 PrintTabularOutputHeader(NULL, query_bsp, NULL,
1212 program_name, 0, believe_query, outfp);
1213
1214 BlastPrintTabulatedResults(seqalign, query_bsp, NULL,
1215 1, program_name, !gapped_calculation,
1216 believe_query, 0, 0, outfp, FALSE);
1217 SeqAlignSetFree(seqalign);
1218 }
1219
1220 matrix = BLAST_MatrixDestruct(matrix);
1221 MemFree(ka_params);
1222 MemFree(ka_params_gap);
1223 MemFree(params_buffer);
1224
1225 mask_loc_start = mask_loc;
1226 while (mask_loc) {
1227 SeqLocSetFree(mask_loc->data.ptrvalue);
1228 mask_loc = mask_loc->next;
1229 }
1230 ValNodeFree(mask_loc_start);
1231
1232 fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
1233
1234 other_returns = ValNodeFree(other_returns);
1235 options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
1236 options = BLASTOptionDelete(options);
1237 MemFree(program_name);
1238 FileClose(outfp);
1239
1240 if (entrez_lookup) {
1241 BioseqFree(query_bsp);
1242 BioseqFree(subject_bsp);
1243 } else {
1244 SeqEntryFree(sep);
1245 SeqEntryFree(sep1);
1246 }
1247 return 0;
1248 }
1249
1250
1251 Int2 Main (void)
1252
1253 {
1254 Char buf[256] = { '\0' }; /* Used below for name and version. */
1255 Int2 status = 0; /* return value of function. */
1256
1257 StringCpy(buf, "bl2seq ");
1258 StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf)-1);
1259 if (! GetArgs (buf, NUMARG, myargs)) {
1260 return (1);
1261 }
1262
1263 UseLocalAsnloadDataAndErrMsg ();
1264
1265 if (! SeqEntryLoad())
1266 return 1;
1267
1268 ErrSetMessageLevel(SEV_WARNING);
1269
1270 if (myargs[ARG_FORCE_OLD].intvalue != 0)
1271 status = Main_old();
1272 else
1273 status = Main_new();
1274
1275 FreeArgs(NUMARG, myargs);
1276
1277 return status;
1278 }
1279 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |