NCBI C Toolkit Cross Reference

C/demo/bl2seq.c


  1 static char const rcsid[] = "$Id: bl2seq.c,v 6.89 2009/05/27 19:29:17 camacho Exp $";
  2 
  3 /**************************************************************************
  4 *                                                                         *
  5 *                             COPYRIGHT NOTICE                            *
  6 *                                                                         *
  7 * This software/database is categorized as "United States Government      *
  8 * Work" under the terms of the United States Copyright Act.  It was       *
  9 * produced as part of the author's official duties as a Government        *
 10 * employee and thus can not be copyrighted.  This software/database is    *
 11 * freely available to the public for use without a copyright notice.      *
 12 * Restrictions can not be placed on its present or future use.            *
 13 *                                                                         *
 14 * Although all reasonable efforts have been taken to ensure the accuracy  *
 15 * and reliability of the software and data, the National Library of       *
 16 * Medicine (NLM) and the U.S. Government do not and can not warrant the   *
 17 * performance or results that may be obtained by using this software,     *
 18 * data, or derivative works thereof.  The NLM and the U.S. Government     *
 19 * disclaim any and all warranties, expressed or implied, as to the        *
 20 * performance, merchantability or fitness for any particular purpose or   *
 21 * use.                                                                    *
 22 *                                                                         *
 23 * In any work or product derived from this material, proper attribution   *
 24 * of the author(s) as the source of the software or data would be         *
 25 * appreciated.                                                            *
 26 *                                                                         *
 27 ***************************************************************************
 28 *
 29 * $Log: bl2seq.c,v $
 30 * Revision 6.89  2009/05/27 19:29:17  camacho
 31 * Disambiguate eNone enumeration
 32 *
 33 * Revision 6.88  2007/05/07 13:29:11  kans
 34 * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
 35 *
 36 * Revision 6.87  2007/03/20 14:56:58  camacho
 37 * Call GeneticCodeSingletonInit/GeneticCodeSingletonFini
 38 *
 39 * Revision 6.86  2006/08/28 14:11:37  coulouri
 40 * correct seqids in asn.1 output when input sequences are specified as accessions; fixes rt#15192840
 41 *
 42 * Revision 6.85  2006/08/21 21:47:06  camacho
 43 * Allocate Blast_SummaryReturn structure to avoid dereferencing NULL pointer
 44 *
 45 * Revision 6.84  2006/08/16 16:03:59  coulouri
 46 * do not strlen NULL pointer
 47 *
 48 * Revision 6.83  2006/04/26 12:47:29  madden
 49 * Use SBlastMessage in place of Blast_Message
 50 *
 51 * Revision 6.82  2006/01/13 16:00:02  madden
 52 * BLAST_TwoSeqLocSets now takes SBlastSeqalignArray rather than SeqAlignPtr, remove unused variable
 53 *
 54 * Revision 6.81  2006/01/10 20:43:15  madden
 55 * BLAST_FormatResults now takes SBlastSeqalignArray
 56 *
 57 * Revision 6.80  2005/10/17 14:06:44  madden
 58 * Change message on gap parameter arg
 59 *
 60 * Revision 6.79  2005/08/29 14:45:34  camacho
 61 * From Ilya Dondoshansky:
 62 * Retrieve mask_at_hash option from the SBlastOptions structure instead of
 63 * passing as argument in search API calls
 64 *
 65 * Revision 6.78  2005/06/08 20:32:48  dondosha
 66 * Fixed masking locations memory leak and added comment
 67 *
 68 * Revision 6.77  2005/06/02 20:45:32  dondosha
 69 * Use BlastFormattingInfo structure for formatting
 70 *
 71 * Revision 6.76  2005/05/02 17:00:27  coulouri
 72 * change default to new engine
 73 *
 74 * Revision 6.75  2005/03/16 00:43:40  dondosha
 75 * Correction to previous commit to make reported deflines the same as before
 76 *
 77 * Revision 6.74  2005/03/15 20:59:16  dondosha
 78 * When retrieving Bioseq by gi, try BLAST databases first
 79 *
 80 * Revision 6.73  2005/03/03 15:05:47  dondosha
 81 * Blast_FindRepeatFilterLoc renamed to Blast_FindRepeatFilterSeqLoc
 82 *
 83 * Revision 6.72  2005/02/08 20:43:03  dondosha
 84 * Added repeats filtering for new engine
 85 *
 86 * Revision 6.71  2005/02/07 15:30:39  dondosha
 87 * Removed restriction on the value of longest intron option
 88 *
 89 * Revision 6.70  2005/02/03 18:02:07  dondosha
 90 * Pass summary returns to BLAST_FormatResults, needed for XML output
 91 *
 92 * Revision 6.69  2005/02/02 19:01:36  dondosha
 93 * Use new high level API for performing the search
 94 *
 95 * Revision 6.68  2004/12/29 16:06:20  dondosha
 96 * Small memory leak fix; correction in mask array allocation for non-translated search
 97 *
 98 * Revision 6.67  2004/12/29 15:20:55  dondosha
 99 * Set gapped_calculation option for new engine; allocate appropriate size BlastSeqLoc array in BlastMaskLoc before formatting
100 *
101 * Revision 6.66  2004/10/04 14:05:06  madden
102 * Use Blast_PrintOutputFooter rather than BLAST_PrintSummaryReturns
103 *
104 * Revision 6.65  2004/09/13 15:15:54  madden
105 * Changes for BlastSeqLoc and BlastMaskLoc
106 *
107 * Revision 6.64  2004/08/16 14:26:47  madden
108 * Fix bad frees for blastx filtering locations
109 *
110 * Revision 6.63  2004/08/09 13:08:15  madden
111 * Bl2seq now can run queries with new blast engine
112 *
113 * Revision 6.62  2004/08/03 14:02:02  madden
114 * Introduce enum to make myargs more readable
115 *
116 * Revision 6.61  2004/06/30 16:03:48  kans
117 * include <blfmtutl.h>
118 *
119 * Revision 6.60  2004/02/26 16:25:06  camacho
120 * Fix uninitialized variable errors that caused core dumps on empty input files
121 *
122 * Revision 6.59  2003/06/26 18:56:05  coulouri
123 * remove unnecessary variables
124 *
125 * Revision 6.58  2003/05/30 17:31:09  coulouri
126 * add rcsid
127 *
128 * Revision 6.57  2003/05/13 16:02:42  coulouri
129 * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
130 *
131 * Revision 6.56  2003/05/06 18:57:46  dondosha
132 * Do not set cutoff_s for megablast, it is not needed
133 *
134 * Revision 6.55  2003/04/22 19:38:00  dondosha
135 * Fix for ungapped search with tabular output
136 *
137 * Revision 6.54  2003/01/16 19:46:28  kans
138 * include accid1.h to fix Mac compiler error on missing symbols
139 *
140 * Revision 6.53  2003/01/10 20:55:36  dondosha
141 * Enable the Entrez client option without a special definition
142 *
143 * Revision 6.52  2003/01/07 15:47:11  dondosha
144 * Hide the -A argument if NCBI_ENTREZ_CLIENT is not defined
145 *
146 * Revision 6.51  2002/09/18 18:21:04  camacho
147 * Fixed memory leak when using the -U option
148 *
149 * Revision 6.50  2002/09/13 18:53:26  dondosha
150 * Corrected the way query and subject deflines are shown in all types of output
151 *
152 * Revision 6.49  2002/08/09 19:41:25  camacho
153 * 1) Added blast version number to command-line options
154 * 2) Added explanations for some default parameters
155 *
156 * Revision 6.48  2002/06/26 21:44:57  dondosha
157 * Set default gap penalties before checking command line options
158 *
159 * Revision 6.47  2002/06/24 18:24:05  dondosha
160 * Multiple memory freeing bug fixed
161 *
162 * Revision 6.46  2002/05/09 15:37:52  dondosha
163 * Call BLASTOptionNewEx instead of BLASTOptionNew, so megablast defaults are set in a central place
164 *
165 * Revision 6.45  2002/05/02 22:18:25  dondosha
166 * Copy bioseq if getting it from ID1, removing non-residue characters from sequence
167 *
168 * Revision 6.44  2002/05/01 16:43:53  dondosha
169 * Call BLASTOptionSetGapParams instead of setting options->matrix
170 *
171 * Revision 6.43  2002/04/29 19:55:26  madden
172 * Use ARG_FLOAT for db length
173 *
174 * Revision 6.42  2002/03/19 23:29:38  dondosha
175 * Do not increment options->wordsize by 4 for megablast any more
176 *
177 * Revision 6.41  2002/03/14 16:11:41  camacho
178 * Extended BlastTwoSequences to allow comparison between sequence and PSSM
179 *
180 * Revision 6.40  2001/07/19 22:05:47  dondosha
181 * Made db_length option a string, to convert to Int8 value
182 *
183 * Revision 6.39  2001/06/21 21:49:26  dondosha
184 * Destroy all error returns
185 *
186 * Revision 6.38  2001/05/11 22:03:21  dondosha
187 * Do not use fake Bioseqs in case of tabulated output
188 *
189 * Revision 6.37  2001/05/09 20:15:31  dondosha
190 * 1. Made program argument non-optional
191 * 2. Added -U argument for masking lower case
192 *
193 * Revision 6.36  2001/05/02 20:00:05  dondosha
194 * Create subject SeqLoc with plus strand (not both) when location is given
195 *
196 * Revision 6.35  2001/04/16 20:46:00  dondosha
197 * Improved error message when sequence not found in Entrez
198 *
199 * Revision 6.34  2001/04/04 20:30:05  dondosha
200 * Fixed a typo
201 *
202 * Revision 6.33  2001/04/03 21:59:50  dondosha
203 * Implemented tabulated output for non-megablast bl2seq
204 *
205 * Revision 6.32  2001/03/19 22:39:24  dondosha
206 * Allow location on the first query sequence for megablast
207 *
208 * Revision 6.31  2001/03/02 20:30:20  dondosha
209 * Typo fix
210 *
211 * Revision 6.30  2001/02/16 18:45:39  dondosha
212 * Fixed minor purify errors
213 *
214 * Revision 6.29  2001/02/07 21:18:42  dondosha
215 * Moved the MegaBlastPrintAlignInfo callback to blastool.c
216 *
217 * Revision 6.28  2001/01/24 20:51:50  dondosha
218 * Enabled splitting of the second sequence for 2 sequences with megablast
219 *
220 * Revision 6.27  2001/01/23 22:20:04  dondosha
221 * Do not free subject id in megablast callback
222 *
223 * Revision 6.26  2001/01/17 20:34:11  dondosha
224 * Use the subject id with best accession in the MegaBlastPrintAlignInfo callback
225 *
226 * Revision 6.25  2001/01/12 18:28:09  dondosha
227 * Pass the best accession ids in the SeqLocs in case of partial sequences
228 *
229 * Revision 6.24  2001/01/09 20:53:01  dondosha
230 * Locations start from 1; added handling of erroneous location input
231 *
232 * Revision 6.23  2001/01/09 20:16:27  dondosha
233 * Implemented from-to location options for both sequences in bl2seq
234 *
235 * Revision 6.22  2000/12/13 22:28:06  dondosha
236 * Unlock bioseqs in the end if they were obtained from entrez lookup
237 *
238 * Revision 6.21  2000/11/21 15:47:21  dondosha
239 * Corrected default wordsize for megablast option
240 *
241 * Revision 6.20  2000/11/15 22:21:47  dondosha
242 * Corrected default wordsize for Mega BLAST
243 *
244 * Revision 6.19  2000/11/09 15:01:00  dondosha
245 * Set longest intron length in options in nucleotide coordinates
246 *
247 * Revision 6.18  2000/11/08 22:24:07  dondosha
248 * Enabled new tblastn by adding longest intron option
249 *
250 * Revision 6.17  2000/11/08 18:22:46  kans
251 * includes <mblast.h> for Mac compiler
252 *
253 * Revision 6.16  2000/11/02 20:23:27  dondosha
254 * Allow megablast one-line style output; allow accession or gi input if NCBI_ENTREZ_CLIENT is defined
255 *
256 * Revision 6.15  2000/10/30 19:05:44  madden
257 * Added -Y search space option
258 *
259 * Revision 6.14  2000/10/20 20:10:01  dondosha
260 * Revert previous change - would fail on some platforms
261 *
262 * Revision 6.13  2000/10/20 19:45:16  dondosha
263 * Changed -d option type to string to allow 8-byte integer db sizes
264 *
265 * Revision 6.12  2000/10/13 20:32:32  madden
266 * Added call to AcknowledgeBlastQuery
267 *
268 * Revision 6.11  2000/10/06 21:37:08  dondosha
269 * Set Mega BlAST default parameters correctly before calling the engine
270 *
271 * Revision 6.10  2000/10/05 22:44:28  dondosha
272 * Set block_width to 0 for Mega BLAST - it has different meaning
273 *
274 * Revision 6.9  2000/10/03 15:16:12  madden
275 * Set program name in call to BlastTwoSequencesEx
276 *
277 * Revision 6.8  2000/09/12 21:48:41  dondosha
278 * Pass the correct scoring matrix to ShowTextAlignFromAnnot
279 *
280 * Revision 6.7  2000/06/15 15:29:57  dondosha
281 * Fixed several memory leaks; tblastn and tblastx enabled
282 *
283 * Revision 6.6  2000/05/24 20:36:08  dondosha
284 * If megablast is used, set cutoff_s and cutoff_s2 parameters
285 *
286 * Revision 6.5  2000/04/10 15:23:33  dondosha
287 * Added option to use MegaBlast for search
288 *
289 * Revision 6.2  1999/11/26 20:16:11  vakatov
290 * Added <sqnutils.h> to pick up proto of 'UseLocalAsnloadDataAndErrMsg()'
291 *
292 * Revision 6.1  1999/07/06 18:48:20  madden
293 * Compares two sequences
294 */
295 
296 #include <ncbi.h>
297 #include <objseq.h>
298 #include <objsset.h>
299 #include <sequtil.h>
300 #include <seqport.h>
301 #include <tofasta.h>
302 #include <blast.h>
303 #include <blastpri.h>
304 #include <txalign.h>
305 #include <sqnutils.h>
306 #include <mblast.h>
307 #include <accid1.h>
308 #include <blfmtutl.h>
309 
310 #include <algo/blast/api/twoseq_api.h>
311 #include <algo/blast/api/blast_format.h>
312 #include <algo/blast/api/blast_seq.h>
313 #include <algo/blast/api/repeats_filter.h>
314 #include <algo/blast/core/blast_util.h>
315 #include <algo/blast/api/blast_api.h>
316 
317 #define LOCAL_BUFLEN 255
318 static BioseqPtr
319 BioseqFromAccession(CharPtr accver, Boolean is_na)
320 {
321    CharPtr accession, version_str;
322    Int4 version=0, gi, number;
323    SeqIdPtr sip = NULL;
324    TextSeqIdPtr tsip;
325    PDBSeqIdPtr  psip;
326    BioseqPtr bsp = NULL, bsp_tmp = NULL;
327    SeqPortPtr spp;
328    Int2 retval, buf_length=512;
329    Uint1 buf[512];
330    char* defline = NULL;
331    char* dummy_ptr = NULL;
332 
333    if (!ID1BioseqFetchEnable ("bl2seq", TRUE))
334       ErrPostEx(SEV_FATAL, 1, 0, 
335                 "Entrez access interface currently unavailable\n");
336    if (!IS_DIGIT(*accver)) {
337       accession = StringTokMT(accver, ".", &version_str);
338       if (version_str)
339          version = atoi(version_str);
340       
341       if((sip = ValNodeNew (NULL)) == NULL)
342          return NULL;
343       if((tsip = TextSeqIdNew ()) == NULL)
344          return NULL;
345       
346       tsip->accession = StringSave(accession);
347       tsip->version = version;
348       /* GenBank, EMBL, and DDBJ. */
349       sip->choice = SEQID_GENBANK;
350       sip->data.ptrvalue = (Pointer) tsip;
351       gi = ID1FindSeqId (sip);
352       
353       if (gi == 0) {
354          /* SwissProt. */
355          sip->choice = SEQID_SWISSPROT;
356          gi = ID1FindSeqId (sip);
357       }
358       if (gi == 0) {
359         /* PIR */
360          sip->choice = SEQID_PIR;
361          gi = ID1FindSeqId (sip);
362       }
363       
364       if (gi == 0) {
365          /* PRF */
366          sip->choice = SEQID_PRF;
367          gi = ID1FindSeqId (sip);
368       }
369       
370       if (gi == 0) {
371          /* OTHER, probably 'ref' */
372          sip->choice = SEQID_OTHER;
373          gi = ID1FindSeqId (sip);
374       }
375       
376       if(gi == 0) {
377          /* OK. We failed to find gi using string as TextSeqId. Now trying
378             last time - with PDBSeqIdPtr */
379          
380          if((psip = PDBSeqIdNew()) == NULL)
381             return NULL;
382          
383          sip->choice = SEQID_PDB;
384          tsip = TextSeqIdFree(tsip);
385          sip->data.ptrvalue = psip;
386          
387          psip->mol = accession;
388          psip->chain = version;
389          
390          gi = ID1FindSeqId (sip);
391       }
392 
393       if (gi == 0) {
394          ErrPostEx(SEV_WARNING, 0, 0, "Sequence %s not found\n", accver);
395          return NULL;
396       }
397       sip = SeqIdFree(sip);
398    } else
399       gi = atoi(accver);
400 
401    ID1BioseqFetchDisable();
402 
403    if (gi > 0) {
404        /* First attempt to retrieve Bioseq from BLAST databases. */
405        char* db_name = (is_na ? "nucl_dbs" : "prot_dbs");
406 
407        ValNodeAddInt(&sip, SEQID_GI, gi);
408        ReadDBBioseqFetchEnable ("bl2seq", db_name, is_na, TRUE);
409        bsp_tmp = BioseqLockById(sip);
410        ReadDBBioseqFetchDisable();
411 
412        if (!bsp_tmp) {
413            /* Try ID1 again as a last resort. */
414            ID1BioseqFetchEnable("bl2seq", TRUE);
415            bsp_tmp = BioseqLockById(sip);
416            ID1BioseqFetchDisable();
417        }
418        sip = SeqIdFree(sip);
419    }
420    
421    if (!bsp_tmp) {
422        ErrPostEx(SEV_WARNING, 0, 0, "Gi %ld not found", gi);
423        return NULL;
424    }
425    
426    if (ISA_na(bsp_tmp->mol) != is_na) {
427       BioseqUnlock(bsp_tmp);
428       if (is_na)
429          ErrPostEx(SEV_FATAL, 1, 0, 
430                    "%s is a protein sequence, program requires nucleotide", 
431                    accver);
432       else
433          ErrPostEx(SEV_FATAL, 1, 0, 
434                    "%s is a nucleotide sequence, program requires protein", 
435                    accver);
436       return NULL;
437    }
438 
439    bsp = AsnIoMemCopy(bsp_tmp, (AsnReadFunc) BioseqAsnRead, (AsnWriteFunc) BioseqAsnWrite);
440    SeqMgrDeleteFromBioseqIndex(bsp_tmp);
441    BioseqUnlock(bsp_tmp);
442    BioseqPack(bsp);
443    
444    return bsp;
445 }
446                 
447 #define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
448 
449 typedef enum {
450    ARG_QUERY = 0,
451    ARG_SUBJECT,
452    ARG_PROGRAM,
453    ARG_GAPPED,
454    ARG_OUT,
455    ARG_DBSIZE,
456    ARG_ASNOUT,
457    ARG_GAPOPEN,
458    ARG_GAPEXT,
459    ARG_XDROP,
460    ARG_WORDSIZE,
461    ARG_MATRIX,
462    ARG_MISMATCH,
463    ARG_MATCH,
464    ARG_FILTER,
465    ARG_EVALUE,
466    ARG_STRAND,
467    ARG_HTML,
468    ARG_USEMEGABLAST,
469    ARG_SEARCHSP,
470    ARG_INTRON,
471    ARG_LOC1,
472    ARG_LOC2,
473    ARG_FORMAT,
474    ARG_LCASE,
475    ARG_ACCN,
476    ARG_FORCE_OLD
477 } BlastArguments;
478 
479 static Args myargs [] = {
480   { "First sequence",            
481         NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_QUERY */
482   { "Second sequence",
483         NULL, NULL, NULL, FALSE, 'j', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_SUBJECT */
484   { "Program name: blastp, blastn, blastx, tblastn, tblastx. For blastx 1st sequence should be nucleotide, tblastn 2nd sequence nucleotide",
485         NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL}, /* ARG_PROGRAM */
486   { "Gapped",
487         "T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_GAPPED */
488   { "alignment output file",
489         "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_OUT */
490   { "theor. db size (zero is real size)", 
491         "0", NULL, NULL, FALSE, 'd', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_DBSIZE */
492   { "Text ASN.1 output file",
493         NULL, NULL, NULL, TRUE, 'a', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_ASNOUT */
494   { "Cost to open a gap (-1 invokes default behavior)",
495         "-1", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPOPEN */
496   { "Cost to extend a gap (-1 invokes default behavior)",
497         "-1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPEXT */
498   { "X dropoff value for gapped alignment (in bits) (zero invokes default "
499     "behavior)\n      blastn 30, megablast 20, tblastx 0, all others 15",
500         "0", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL}, /* ARG_XDROP */
501   { "Word size, default if zero (blastn 11, megablast 28, "
502         "all others 3)",
503         "0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL}, /* ARG_WORDSIZE */
504   { "Matrix",
505         "BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL}, /* ARG_MATRIX */
506   { "Penalty for a nucleotide mismatch (blastn only)",
507         "-3", NULL, NULL, FALSE, 'q', ARG_INT, 0.0, 0, NULL}, /* ARG_MISMATCH */
508   { "Reward for a nucleotide match (blastn only)",
509         "1", NULL, NULL, FALSE, 'r', ARG_INT, 0.0, 0, NULL}, /* ARG_MATCH */
510   { "Filter query sequence (DUST with blastn, SEG with others)",
511         "T", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL}, /* ARG_FILTER */
512   { "Expectation value (E)",
513         "10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_EVALUE */
514   { "Query strands to search against database (blastn only).  3 is both, 1 is top, 2 is bottom",
515         "3", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL}, /* ARG_STRAND */
516   { "Produce HTML output",
517         "F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_HTML */
518   { "Use Mega Blast for search",
519         "F", NULL, NULL, TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_USEMEGABLAST */
520   { "Effective length of the search space (use zero for the real size)",
521         "0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_SEARCHSP */
522   { "Length of the largest intron allowed in tblastn for linking HSPs",
523         "0", NULL, NULL, FALSE, 't', ARG_INT, 0.0, 0, NULL}, /* ARG_INTRON */
524   { "Location on first sequence",
525         NULL, NULL, NULL, TRUE, 'I', ARG_STRING, 0.0, 0, NULL}, /* ARG_LOC1 */
526   { "Location on second sequence",
527         NULL, NULL, NULL, TRUE, 'J', ARG_STRING, 0.0, 0, NULL}, /* ARG_LOC2 */
528   { "Output format: 0 - traditional, 1 - tabular", 
529         "0", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL}, /* ARG_FORMAT */
530   { "Use lower case filtering for the query sequence",
531         "F", NULL, NULL, TRUE, 'U', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_LCASE */
532   { "Input sequences in the form of accession.version",
533         "F", NULL, NULL, FALSE, 'A', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_ACCN */
534   {"Force use of the legacy BLAST engine",
535         "F", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL}  /* ARG_FORCE_OLD */
536 };
537 
538 /**
539  * Fetches sequences filling in either just the Bioseq's (if fetch from Entrez) or
540  * both the BioseqPtr's and the SeqEntryPtr's (if read from FASTA).  The lcase_mask 
541  * is also filled in with letters in query that were lower-case if myargs[ARG_LCASE].intvalue
542  * is non-zero.
543  *
544  * @param seq1_is_na the query sequence is DNA if true [in]
545  * @param seq2_is_na the subject sequence is DNA if true [in]
546  * @param query_bsp pointer to query BioseqPtr, to be filled in [out]
547  * @param subject_bsp pointer to subject BioseqPtr, to be filled in [out]
548  * @param sep pointer to query SeqEntryPtr, to be filled in [out]
549  * @param sep1 pointer to subject SeqEntryPtr, to be filled in [out]
550  * @param lcase_mask pointer to lower-case masking data to be filled in [out]
551  * @return TRUE on success, FALSE on failure.
552 */
553 
554 static Int4
555 BL2SEQ_GetSequences(Boolean seq1_is_na, Boolean seq2_is_na, BioseqPtr *query_bsp, BioseqPtr *subject_bsp, 
556     SeqEntryPtr *sep, SeqEntryPtr *sep1, SeqLocPtr *lcase_mask, Boolean believe_query)
557 {
558         Boolean entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
559         char *query_accver = NULL, *subject_accver = NULL;  /* Used if entrez_lookup. */
560         char *blast_inputfile = NULL, *blast_inputfile1 = NULL;  /* Used if FASTA read. */
561         
562         if (entrez_lookup) {
563            query_accver = myargs [ARG_QUERY].strvalue;
564            subject_accver = myargs [ARG_SUBJECT].strvalue;
565         } else {
566            blast_inputfile = myargs [ARG_QUERY].strvalue;
567            blast_inputfile1 = myargs [ARG_SUBJECT].strvalue;
568         }
569 
570         if (entrez_lookup) {
571            *query_bsp = BioseqFromAccession(query_accver, seq1_is_na);
572         } else {
573            FILE *infp;
574            if ((infp = FileOpen(blast_inputfile, "r")) == NULL)
575            {
576                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile);
577                 return FALSE;
578            }
579            if (myargs[ARG_LCASE].intvalue)
580               *sep = FastaToSeqEntryForDb(infp, seq1_is_na, NULL, 
581                                          believe_query, NULL, NULL, 
582                                          lcase_mask);
583            else
584               *sep = FastaToSeqEntryEx(infp, seq1_is_na, NULL, believe_query);
585 
586            FileClose(infp);
587 
588            if (*sep != NULL) {
589               *query_bsp = NULL;
590               if (seq1_is_na)
591                  SeqEntryExplore(*sep, query_bsp, FindNuc);
592               else
593                  SeqEntryExplore(*sep, query_bsp, FindProt);
594 
595            }
596         }
597         if (*query_bsp == NULL) {
598            ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
599            return FALSE;
600         }
601 
602         if (entrez_lookup) {
603            *subject_bsp = 
604               BioseqFromAccession(subject_accver, seq2_is_na);
605         } else {
606            FILE *infp1;
607            if ((infp1 = FileOpen(blast_inputfile1, "r")) == NULL)
608            {
609                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile1);
610                 return FALSE;
611            }
612            *sep1 = FastaToSeqEntryEx(infp1, seq2_is_na, NULL, FALSE);
613 
614            FileClose(infp1);
615 
616            if (*sep1 != NULL) {
617               *subject_bsp = NULL;
618               if (seq2_is_na)
619                  SeqEntryExplore(*sep1, subject_bsp, FindNuc);
620               else
621                  SeqEntryExplore(*sep1, subject_bsp, FindProt);
622               
623            }
624         }
625         
626         if (*subject_bsp == NULL) {
627            ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
628            return FALSE;
629         }
630 
631         return TRUE;
632 }
633 
634 /**
635  * Creates SeqLoc's from the given BioseqPtr's.  if myargs for ARG_LOC1 or ARG_LOC2 are non-NULL, 
636  * these are used in the creation of the SeqLoc's.
637  *
638  * @param bsp1 the query BioseqPtr [in]
639  * @param bsp2 the subject BioseqPtr [in]
640  * @param slp1 the query SeqLocPtr to be filled in [out]
641  * @param slp2 the subject SeqLocPtr to be filled in [out]
642  * @param strand_option specifies strand of slp1 [in]
643  * @return TRUE on success, FALSE on failure.
644 */
645 
646 static Boolean
647 BL2SEQ_MakeSeqLoc(const BioseqPtr bsp1, const BioseqPtr bsp2, SeqLocPtr *slp1, SeqLocPtr *slp2, Uint1 strand_option)
648 {
649        const char* k_delimiters = " ,;";
650        CharPtr location;
651        Int4 from, to;
652 
653        *slp1 = NULL;
654        *slp2 = NULL;
655 
656        location = myargs[ARG_LOC1].strvalue;
657        if (location) {
658            from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
659            to = atoi(location) - 1;
660 
661             from = MAX(from, 0);
662             if (to < 0) 
663                  to = bsp1->length - 1;
664             to = MIN(to, bsp1->length - 1);
665             if (from >= bsp1->length) {
666                  ErrPostEx(SEV_FATAL, 1, 0, 
667                            "Location outside of the first sequence range\n");
668                  return FALSE;
669             }
670             *slp1 = SeqLocIntNew(from, to, strand_option,
671                                  SeqIdFindBestAccession(bsp1->id));
672         } else if (strand_option != Seq_strand_both) {
673             *slp1 = SeqLocIntNew(0, bsp1->length-1, strand_option,
674                                  SeqIdFindBestAccession(bsp1->id));
675         } else
676               ValNodeAddPointer(slp1, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp1->id)));
677 
678         location = myargs[ARG_LOC2].strvalue;
679         if (location) {
680             from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
681             to = atoi(location) - 1;
682 
683             from = MAX(from, 0);
684             if (to < 0) 
685                 to = bsp2->length - 1;
686             to = MIN(to, bsp2->length - 1);
687             if (from >= bsp2->length) {
688                ErrPostEx(SEV_FATAL, 1, 0, 
689                            "Location outside of the second sequence range\n");
690                return FALSE;
691             }
692             *slp2 = SeqLocIntNew(from, to, Seq_strand_plus, SeqIdFindBestAccession(bsp2->id));
693          } else
694             ValNodeAddPointer(slp2, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp2->id)));
695 
696        return TRUE;
697 }
698 
699 /**
700  * Initializes and sets the summary options based upon the command-line args.
701  *
702  * @param ret_options object to be initialized and filled in [out]
703  * @param program_number specifies blastn/blastp/blastx etc. [in]
704  * @return TRUE on success, FALSE on failure.
705  */
706 static Boolean
707 Bl2SEQ_SummaryOptionsSet(BLAST_SummaryOptions* *ret_options, EBlastProgramType program_number)
708 {
709         BLAST_SummaryOptions* options;
710 
711         if (BLAST_SummaryOptionsInit(&options) != 0)
712         {
713                 ErrPostEx(SEV_FATAL, 1, 0, "SummaryOptionsInit failed.");
714                 return FALSE;
715         }
716 
717         options->hint = eBlastHint_None;
718 
719         switch (program_number) {
720             case eBlastTypeBlastn:
721                options->program = eBlastn;
722                break;
723             case eBlastTypeBlastp:
724                options->program = eBlastp;
725                break;
726             case eBlastTypeBlastx:
727                options->program = eBlastx;
728                break;
729             case eBlastTypeTblastn:
730                options->program = eTblastn;
731                break;
732             case eBlastTypeTblastx:
733                options->program = eTblastx;
734                break;
735             default:  
736                ErrPostEx(SEV_FATAL, 1, 0, "Program_number (%ld) not valid in Bl2SEQ_SummaryOptionsSet", (long) program_number);
737                BLAST_SummaryOptionsFree(options);
738                return FALSE;
739         }
740 
741         options->cutoff_evalue = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
742 
743         if (options->program == eBlastn)
744         {
745             options->nucleotide_mismatch = myargs[ARG_MISMATCH].intvalue;
746             options->nucleotide_match = myargs[ARG_MATCH].intvalue;
747             if (myargs[ARG_USEMEGABLAST].intvalue > 0)
748                 options->use_megablast = TRUE;
749         }
750 
751         if (myargs[ARG_GAPOPEN].intvalue != -1)
752               options->gap_open = myargs[ARG_GAPOPEN].intvalue;
753 
754         if (myargs[ARG_GAPEXT].intvalue != -1)
755                options->gap_extend = myargs[ARG_GAPEXT].intvalue;
756 
757         options->strand = myargs[ARG_STRAND].intvalue;
758 
759         if (myargs[ARG_WORDSIZE].intvalue != 0)
760                options->word_size = myargs[ARG_WORDSIZE].intvalue;
761 
762         if (myargs[ARG_MATRIX].strvalue)
763                options->matrix = StringSave(myargs[ARG_MATRIX].strvalue);
764 
765         if (myargs[ARG_FILTER].strvalue)
766                options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
767 
768         if (myargs[ARG_XDROP].intvalue != 0)
769         {
770                options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
771         }
772 
773         if (program_number != eBlastTypeTblastx)
774             options->gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
775         else
776             options->gapped_calculation = FALSE;
777 
778         options->db_length = myargs[ARG_DBSIZE].floatvalue;
779 
780         *ret_options = options;
781 
782         return TRUE;
783 }
784 
785 Int2 Main_new(void)
786 
787 {
788         BioseqPtr query_bsp=NULL, subject_bsp=NULL;
789         BioseqPtr bsp1=NULL, bsp2=NULL;
790         BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL;
791         BlastFormattingInfo* format_info = NULL;
792         BLAST_SummaryOptions* options=NULL;
793         Blast_SummaryReturn* extra_returns = Blast_SummaryReturnNew();
794         Boolean believe_query= FALSE;
795         Boolean seq1_is_na, seq2_is_na;  /* seq1/2 is DNA if TRUE. */
796         Boolean seqannot_output;   /* SeqAlign will be output. */
797         Boolean entrez_lookup;     /* QUery/subject fetched from Entrez. */
798         Boolean mask_at_hash=FALSE;  /* masking only on lookup table if TRUE. */
799         DbtagPtr        dbtagptr;
800         EBlastProgramType program_number;
801         Int2 status; /* return value */
802         EAlignView align_view = eAlignViewPairwise; /* Used for formatting */
803         SeqAlignPtr seqalign=NULL;
804         SeqEntryPtr sep=NULL, sep1=NULL;
805         SeqLocPtr slp1, slp2;   /* Used for actual search. */
806         SeqLocPtr filter_loc=NULL;  /* Location of regions filtered (returned by engine) */
807         SeqLocPtr lcase_mask=NULL;    /* For lower-case masking info from query FASTA. */
808         SeqLoc* repeat_mask = NULL; /* Repeat mask locations */
809         Uint1 strand_option = 0; /* FIXME */
810         SBlastOptions* search_options = NULL; /* Needed for formatting. */
811         SBlastSeqalignArray* seqalign_arr = NULL;
812         GeneticCodeSingletonInit();
813         
814         strand_option = (Uint1) myargs[ARG_STRAND].intvalue;
815 
816         entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
817         seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
818         believe_query = (seqannot_output || entrez_lookup); 
819         /* Non-zero value for -m option means tabular output. */
820         if (myargs[ARG_FORMAT].intvalue != 0)
821            align_view = eAlignViewTabularWithComments; 
822 
823         BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);
824 
825         seq1_is_na = (program_number == eBlastTypeBlastn ||
826                   program_number == eBlastTypeBlastx ||
827                   program_number == eBlastTypeRpsTblastn ||
828                   program_number == eBlastTypeTblastx);
829 
830         seq2_is_na = (program_number == eBlastTypeBlastn ||
831                program_number == eBlastTypeTblastn ||
832                program_number == eBlastTypeTblastx);
833 
834         if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
835                                 &sep, &sep1, &lcase_mask, believe_query) 
836             == FALSE)
837         {
838                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
839                 return (1);
840         }
841 
842         if (!entrez_lookup) {
843             if (!believe_query)
844                 fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
845             
846             fake_subject_bsp = BioseqNew();
847             fake_subject_bsp->descr = subject_bsp->descr;
848             fake_subject_bsp->repr = subject_bsp->repr;
849             fake_subject_bsp->mol = subject_bsp->mol;
850             fake_subject_bsp->length = subject_bsp->length;
851             fake_subject_bsp->seq_data = subject_bsp->seq_data;
852             fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
853             dbtagptr = DbtagNew();
854             dbtagptr->db = StringSave("BL_ORD_ID");
855             dbtagptr->tag = ObjectIdNew();
856 
857             if (BioseqGetTitle(subject_bsp) != NULL)
858               dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
859             else
860               dbtagptr->tag->str = StringSave("No definition line found");
861 
862             ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
863             bsp1 = (believe_query ? query_bsp : fake_bsp);
864             bsp2 = fake_subject_bsp;
865         } else { /* Query and subject Bioseqs are already "fake". */
866             bsp1 = query_bsp;
867             bsp2 = subject_bsp;
868         }
869 
870         if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, strand_option) == FALSE)
871                 return 1;
872 
873         if (Bl2SEQ_SummaryOptionsSet(&options, program_number) == FALSE)
874                 return 1;
875 
876         /* Find repeat mask, if necessary */
877         if ((status = Blast_FindRepeatFilterSeqLoc(slp1, myargs[ARG_FILTER].strvalue,
878                                 &repeat_mask, &extra_returns->error)) != 0)
879         {
880             if (extra_returns && extra_returns->error)
881             {
882                    ErrSev max_sev = SBlastMessageErrPost(extra_returns->error);
883                    if (max_sev >= SEV_ERROR)
884                          return status;
885             }
886         }
887 
888         /* Combine repeat mask with lower case mask */
889         if (repeat_mask)
890             lcase_mask = ValNodeLink(&lcase_mask, repeat_mask);
891         
892         status = BLAST_TwoSeqLocSets(options, slp1, slp2, lcase_mask, &seqalign_arr, 
893                                      &filter_loc, &mask_at_hash, 
894                                      &extra_returns);
895 
896         /* Free the lower case mask in SeqLoc form. */
897         lcase_mask = Blast_ValNodeMaskListFree(lcase_mask);
898 
899         /* Post warning or error messages, no matter what the search status 
900            was. */
901         SBlastMessageErrPost(extra_returns->error);
902 
903         if (status != 0)
904         {
905                 ErrPostEx(SEV_FATAL, 1, 0, "BLAST_TwoSeqLocSets failed");
906                 return status;
907         }
908 
909         if (myargs[ARG_ASNOUT].strvalue && seqalign_arr) {
910             AsnIoPtr asnout =
911                AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
912             GenericSeqAlignSetAsnWrite(seqalign_arr->array[0], asnout);
913             asnout = AsnIoClose(asnout);
914         }
915 
916         /* Pass NULL for the database name, since there is no database. */
917         BlastFormattingInfoNewBasic(align_view, options, slp1, 
918                                     myargs[ARG_OUT].strvalue, &search_options,
919                                     &format_info);
920         
921         /* Always show gis in the output, hence pass TRUE for respective 
922            argument. */
923         BlastFormattingInfoSetUpOptions(format_info, 0, 1,
924                                         (Boolean) myargs[ARG_HTML].intvalue,
925                                         (Boolean) myargs[ARG_USEMEGABLAST].intvalue,
926                                         TRUE, believe_query);
927 
928         /* If masking was at hash only, free the masking locations,
929          * to prevent them from being used for formatting.
930          */
931         if (SBlastOptionsGetMaskAtHash(search_options))
932             filter_loc = Blast_ValNodeMaskListFree(filter_loc);
933 
934         /* Format the results */
935         status = 
936             BLAST_FormatResults(seqalign_arr, 1, slp1, filter_loc, format_info, 
937                                 extra_returns);
938         
939         status = Blast_PrintOutputFooter(format_info, extra_returns);
940 
941         /* Free masking locations if they haven't been freed already. */
942         filter_loc = Blast_ValNodeMaskListFree(filter_loc);
943 
944         format_info = BlastFormattingInfoFree(format_info);
945         extra_returns = Blast_SummaryReturnFree(extra_returns);
946         search_options = SBlastOptionsFree(search_options);
947 
948         if (entrez_lookup) {
949            BioseqFree(query_bsp);
950            BioseqFree(subject_bsp);
951         } else {
952            SeqEntryFree(sep);
953            SeqEntryFree(sep1);
954         }
955 
956         options = BLAST_SummaryOptionsFree(options);
957         seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
958         slp1 = SeqLocSetFree(slp1);
959         slp2 = SeqLocSetFree(slp2);
960 
961         fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
962         GeneticCodeSingletonFini();
963 
964         return 0;
965 
966 }
967 
968 Int2 Main_old (void)
969  
970 {
971         
972         AsnIoPtr aip;
973         BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL, 
974                   subject_bsp = NULL;
975         BioseqPtr bsp1, bsp2;
976         BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
977         BLAST_OptionsBlkPtr options=NULL;
978         Boolean seq1_is_na, seq2_is_na;
979         CharPtr params_buffer=NULL;
980         DbtagPtr        dbtagptr;
981         Uint1 align_type;
982         Uint4 align_options;
983         SeqAlignPtr  seqalign;
984         SeqAnnotPtr seqannot;
985         SeqEntryPtr sep = NULL, sep1 = NULL;
986         CharPtr program_name, blast_outputfile;
987         FILE *outfp;
988         ValNodePtr  mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL;
989         BLAST_MatrixPtr matrix;
990         Int4Ptr PNTR txmatrix;
991         int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL;
992         Boolean entrez_lookup = FALSE;
993         Boolean html, seqannot_output, believe_query;
994         Uint1 tabular_output;
995         Boolean gapped_calculation;
996 
997         entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
998         html = (Boolean) myargs[ARG_HTML].intvalue;
999         seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
1000 
1001         blast_outputfile = myargs [ARG_OUT].strvalue;
1002 
1003         program_name = StringSave(myargs[ARG_PROGRAM].strvalue);
1004         if (StringCmp(program_name, "blastn") && 
1005             StringCmp(program_name, "blastp") && 
1006             StringCmp(program_name, "blastx") && 
1007             StringCmp(program_name, "tblastn") && 
1008             StringCmp(program_name, "tblastx")) {
1009                 ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
1010                 return (1);
1011         }
1012            
1013         align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na);
1014 
1015         if ((outfp = FileOpen(blast_outputfile, "w")) == NULL)
1016         {
1017                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
1018                 return (1);
1019         }
1020 
1021         gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
1022         believe_query = (seqannot_output || entrez_lookup); 
1023 
1024         options = BLASTOptionNewEx(program_name, gapped_calculation,
1025                                    (Boolean) myargs[ARG_USEMEGABLAST].intvalue);
1026 
1027         if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp,
1028                                 &sep, &sep1, &(options->query_lcase_mask), 
1029                                 believe_query) == FALSE)
1030         {
1031             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
1032             return (1);
1033         }
1034 
1035         if (!entrez_lookup) {
1036             if (!believe_query)
1037                 fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
1038             
1039             fake_subject_bsp = BioseqNew();
1040             fake_subject_bsp->descr = subject_bsp->descr;
1041             fake_subject_bsp->repr = subject_bsp->repr;
1042             fake_subject_bsp->mol = subject_bsp->mol;
1043             fake_subject_bsp->length = subject_bsp->length;
1044             fake_subject_bsp->seq_data = subject_bsp->seq_data;
1045             fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
1046             dbtagptr = DbtagNew();
1047             dbtagptr->db = StringSave("BL_ORD_ID");
1048             dbtagptr->tag = ObjectIdNew();
1049 
1050             if (BioseqGetTitle(subject_bsp) != NULL)
1051               dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
1052             else
1053               dbtagptr->tag->str = StringSave("No definition line found");
1054 
1055             ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
1056             bsp1 = (believe_query ? query_bsp : fake_bsp);
1057             bsp2 = fake_subject_bsp;
1058         } else {
1059             bsp1 = query_bsp;
1060             bsp2 = subject_bsp;
1061         }
1062 
1063         tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue; 
1064 
1065 
1066         if (myargs[ARG_SEARCHSP].floatvalue)
1067            options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;
1068 
1069 
1070         options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
1071         options->expect_value  = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
1072 
1073         if (StringICmp("blastn", program_name) == 0)
1074         {
1075                 options->penalty = myargs[ARG_MISMATCH].intvalue;
1076                 options->reward = myargs[ARG_MATCH].intvalue;
1077         }
1078 
1079         options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
1080 
1081         options->discontinuous = FALSE;
1082 
1083         if (myargs[ARG_XDROP].intvalue != 0)
1084         {
1085                options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
1086         }
1087         if (myargs[ARG_WORDSIZE].intvalue != 0)
1088                options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue;
1089 
1090         if (options->is_megablast_search) {
1091            options->cutoff_s2 = options->wordsize*options->reward;
1092         }
1093         options->matrix = MemFree(options->matrix);
1094         BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0); 
1095 
1096         if (myargs[ARG_GAPOPEN].intvalue != -1)
1097               options->gap_open = myargs[ARG_GAPOPEN].intvalue;
1098         if (myargs[ARG_GAPEXT].intvalue != -1)
1099                options->gap_extend = myargs[ARG_GAPEXT].intvalue;
1100 
1101         options->strand_option = myargs[ARG_STRAND].intvalue;
1102 
1103         /* Input longest intron length is in nucleotide scale; in the lower 
1104            level code it will be used in protein scale */
1105         if (myargs[ARG_INTRON].intvalue > 0) 
1106            options->longest_intron = myargs[ARG_INTRON].intvalue;
1107 
1108 
1109         if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) {
1110            seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, 
1111               options, &other_returns, &error_returns, handle_results);
1112         } else {
1113             SeqLocPtr slp1=NULL, slp2=NULL;
1114             if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE)
1115                 return 1;
1116            seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL);
1117            SeqLocFree(slp1);
1118            SeqLocFree(slp2);
1119         }
1120 
1121         if (error_returns) {
1122            BlastErrorPrint(error_returns);
1123            for (vnp = error_returns; vnp; vnp = vnp->next) {
1124               BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1125            }
1126            ValNodeFree(error_returns);
1127         }
1128        
1129         ka_params = NULL;
1130         ka_params_gap = NULL;
1131         params_buffer = NULL;
1132         mask_loc = NULL;
1133         matrix = NULL;
1134         txmatrix = NULL;
1135         for (vnp=other_returns; vnp; vnp = vnp->next) {
1136            switch (vnp->choice) {
1137            case TXKABLK_NOGAP:
1138               ka_params = vnp->data.ptrvalue;
1139               break;
1140            case TXKABLK_GAP:
1141               ka_params_gap = vnp->data.ptrvalue;
1142               break;
1143            case TXPARAMETERS:
1144               params_buffer = vnp->data.ptrvalue;
1145               break;
1146            case TXMATRIX:
1147               matrix = vnp->data.ptrvalue;
1148               if (matrix && !tabular_output)
1149                  txmatrix = BlastMatrixToTxMatrix(matrix);
1150               break;
1151            case SEQLOC_MASKING_NOTSET:
1152            case SEQLOC_MASKING_PLUS1:
1153            case SEQLOC_MASKING_PLUS2:
1154            case SEQLOC_MASKING_PLUS3:
1155            case SEQLOC_MASKING_MINUS1:
1156            case SEQLOC_MASKING_MINUS2:
1157            case SEQLOC_MASKING_MINUS3:
1158               ValNodeAddPointer(&mask_loc, vnp->choice, vnp->data.ptrvalue);
1159               break;
1160            default:
1161               break;
1162            }
1163         }       
1164         if (!tabular_output || seqannot_output) {
1165            align_options = 0;
1166            align_options += TXALIGN_MATRIX_VAL;
1167            align_options += TXALIGN_SHOW_QS;
1168            align_options += TXALIGN_COMPRESS;
1169            align_options += TXALIGN_END_NUM;
1170            if (StringICmp("blastx", program_name) == 0) {
1171               align_options += TXALIGN_BLASTX_SPECIAL;
1172            }
1173            
1174            if (html)
1175               align_options += TXALIGN_HTML;
1176 
1177            seqannot = SeqAnnotNew();
1178            seqannot->type = 2;
1179            AddAlignInfoToSeqAnnot(seqannot, align_type);
1180            seqannot->data = seqalign;
1181            aip = NULL;
1182            if (seqannot_output)
1183               aip = AsnIoOpen (myargs[ARG_ASNOUT].strvalue,"w");
1184            
1185            if (aip && seqannot) {
1186               SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, aip, NULL);
1187               AsnIoReset(aip);
1188               aip = AsnIoClose(aip);
1189            }
1190         }
1191         if (!tabular_output) {    
1192            AcknowledgeBlastQuery(query_bsp, 70, outfp, believe_query, html);
1193            ShowTextAlignFromAnnot(seqannot, 60, outfp, NULL, NULL, align_options, txmatrix, mask_loc, FormatScoreFunc);
1194            
1195            seqannot = SeqAnnotFree(seqannot);
1196            if (txmatrix)
1197               txmatrix = TxMatrixDestruct(txmatrix);
1198            init_buff_ex(85);
1199         
1200            if (ka_params) {
1201               PrintKAParameters(ka_params->Lambda, ka_params->K, ka_params->H, 70, outfp, FALSE);
1202            }
1203         
1204            if (ka_params_gap) {
1205               PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K, ka_params_gap->H, 70, outfp, TRUE);
1206            }
1207         
1208            PrintTildeSepLines(params_buffer, 70, outfp);
1209            free_buff();
1210         } else {
1211            PrintTabularOutputHeader(NULL, query_bsp, NULL, 
1212               program_name, 0, believe_query, outfp);
1213 
1214            BlastPrintTabulatedResults(seqalign, query_bsp, NULL, 
1215               1, program_name, !gapped_calculation,
1216               believe_query, 0, 0, outfp, FALSE);
1217            SeqAlignSetFree(seqalign);
1218         }
1219 
1220         matrix = BLAST_MatrixDestruct(matrix);
1221         MemFree(ka_params);
1222         MemFree(ka_params_gap);
1223         MemFree(params_buffer);
1224     
1225         mask_loc_start = mask_loc;
1226         while (mask_loc) {
1227            SeqLocSetFree(mask_loc->data.ptrvalue);
1228            mask_loc = mask_loc->next;
1229         }
1230         ValNodeFree(mask_loc_start);
1231         
1232         fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
1233 
1234         other_returns = ValNodeFree(other_returns);
1235     options->query_lcase_mask = SeqLocSetFree(options->query_lcase_mask);
1236         options = BLASTOptionDelete(options);
1237         MemFree(program_name);
1238         FileClose(outfp);
1239 
1240         if (entrez_lookup) {
1241            BioseqFree(query_bsp);
1242            BioseqFree(subject_bsp);
1243         } else {
1244            SeqEntryFree(sep);
1245            SeqEntryFree(sep1);
1246         }
1247         return 0;
1248 }
1249 
1250 
1251 Int2 Main (void)
1252 
1253 {
1254     Char buf[256] = { '\0' };  /* Used below for name and version. */
1255     Int2 status = 0;    /* return value of function. */
1256 
1257     StringCpy(buf, "bl2seq ");
1258     StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf)-1);
1259     if (! GetArgs (buf, NUMARG, myargs)) {
1260         return (1);
1261     }
1262 
1263     UseLocalAsnloadDataAndErrMsg ();
1264 
1265     if (! SeqEntryLoad())
1266                 return 1;
1267 
1268     ErrSetMessageLevel(SEV_WARNING);
1269 
1270     if (myargs[ARG_FORCE_OLD].intvalue != 0)
1271         status = Main_old();
1272     else
1273         status = Main_new();
1274 
1275     FreeArgs(NUMARG, myargs);
1276 
1277     return status;
1278 }
1279 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.