NCBI C Toolkit Cross Reference

C/tools/blast.h


  1 /* ===========================================================================
  2 *
  3 *                            PUBLIC DOMAIN NOTICE
  4 *               National Center for Biotechnology Information
  5 *
  6 *  This software/database is a "United States Government Work" under the
  7 *  terms of the United States Copyright Act.  It was written as part of
  8 *  the author's official duties as a United States Government employee and
  9 *  thus cannot be copyrighted.  This software/database is freely available
 10 *  to the public for use. The National Library of Medicine and the U.S.
 11 *  Government have not placed any restriction on its use or reproduction.
 12 *
 13 *  Although all reasonable efforts have been taken to ensure the accuracy
 14 *  and reliability of the software and data, the NLM and the U.S.
 15 *  Government do not and cannot warrant the performance or results that
 16 *  may be obtained by using this software or data. The NLM and the U.S.
 17 *  Government disclaim all warranties, express or implied, including
 18 *  warranties of performance, merchantability or fitness for any particular
 19 *  purpose.
 20 *
 21 *  Please cite the author in any work or product based on this material.
 22 *
 23 * ===========================================================================*/
 24 /*****************************************************************************
 25 
 26 File name: blast.h
 27 
 28 Author: Tom Madden
 29 
 30 Contents: prototypes for "public" BLAST functions (ones that other utilitiles
 31         may safely call).
 32 
 33 ******************************************************************************/
 34 
 35 /* $Revision: 6.74 $ 
 36 * $Log: blast.h,v $
 37 * Revision 6.74  2005/12/29 19:56:06  madden
 38 * Moved functions to print tabular output to blfmtutl
 39 *
 40 * Revision 6.73  2005/08/02 14:40:29  coulouri
 41 * From Mike Gertz:
 42 * - Fixes to comments
 43 * - Added enum constant eGapChar; renamed eStarChar to eStopChar.
 44 * - Change the integer type of some variables to suppress warnings about
 45 *   assigning a wider type to a narrower type.
 46 * - Made the routines BLbasicSmithWatermanScoreOnly and BLSmithWatermanFindStart
 47 *   static.
 48 * - Renamed s_ScatterFreqRatios -> s_ScatterScores; renamed parameters.
 49 * - In NewAlignmentUsingXdrop, use the translate2 field from gap_align
 50 *   to set the same field in the edit block.
 51 * - Changed the Kappa_WindowInfo datatype to hold a list of HSPs in the
 52 *   window; added logic in several places, notably WindowsFromHSPs, to
 53 *   generate and maintain these lists.
 54 * - Refactored Kappa_AdjustSearch.  Use a more sophisticated rule,
 55 *   implemented in the new Kappa_GetSubjectComposition routine, to
 56 *   determine the subject sequence composition for tblastn.
 57 * - Removed unused parameters from several routines.
 58 * - In RedoAlignmentCore, delete NRrecord to avoid a memory leak.
 59 *
 60 * Revision 6.72  2005/01/24 21:17:36  camacho
 61 * 1. Changed implementation of RPSBlastResultHspScoreCmp to have the same
 62 *    tie-breakers as score_compare_hsps
 63 * 2. Renamed RPSBlastResultHspScoreCmp to BLASTResultHspScoreCmp
 64 *
 65 * Revision 6.71  2005/01/18 14:54:13  camacho
 66 * Change in tie-breakers for score comparison, suggestion by Mike Gertz
 67 *
 68 * Revision 6.70  2004/11/01 20:43:15  camacho
 69 * + BlastErrorToString
 70 *
 71 * Revision 6.69  2004/10/19 19:41:33  dondosha
 72 * Added hitlist_size argument to BlastPruneSeqAlignByGiList; Added new function BlastPruneSeqAlignBySortedGiList
 73 *
 74 * Revision 6.68  2004/06/30 12:28:20  madden
 75 * Removed some function prototypes and moved to blfmtutl.h
 76 *
 77 * Revision 6.67  2003/03/25 22:21:53  boemker
 78 * Clarified behavior of BLAST_Wizard.
 79 *
 80 * Revision 6.66  2003/03/25 19:58:18  boemker
 81 * Moved code to initialize search options from blastcgicmd.cpp to here, as
 82 * BLAST_Wizard et al.
 83 *
 84 * Revision 6.65  2003/03/24 19:42:14  madden
 85 * Changes to support query concatenation for blastn and tblastn
 86 *
 87 * Revision 6.64  2003/01/14 20:28:54  madden
 88 * New function BLASTAddBlastDBTitleToSeqAnnotEx
 89 *
 90 * Revision 6.63  2002/10/22 17:57:48  camacho
 91 * Changes to B2SPssmMultipleQueries
 92 *
 93 * Revision 6.62  2002/10/21 23:13:36  camacho
 94 * Added B2SPssmOnTheFly functions
 95 *
 96 * Revision 6.61  2002/09/18 20:23:20  camacho
 97 * Added BLASTCalculateSearchSpace
 98 *
 99 * Revision 6.60  2002/09/02 21:15:20  camacho
100 * Changed comment for psi-blast2sequences
101 *
102 * Revision 6.59  2002/08/30 18:56:02  dondosha
103 * Made BlastMakeTempProteinBioseq and HackSeqLocId public: needed for Cn3D
104 *
105 * Revision 6.58  2002/08/29 20:44:38  camacho
106 * Added description of psi-blast2sequences
107 *
108 * Revision 6.57  2002/08/09 19:39:20  camacho
109 * Added constants for some blast search parameters
110 *
111 * Revision 6.56  2002/08/01 20:47:24  dondosha
112 * Prototypes changed for megablast functions related to traceback
113 *
114 * Revision 6.55  2002/07/02 17:08:00  dondosha
115 * Reverse previous change - not needed
116 *
117 * Revision 6.54  2002/07/01 22:52:06  dondosha
118 * Added CheckStartForGappedAlignmentEx with an extra window size parameter
119 *
120 * Revision 6.53  2002/05/28 22:00:12  camacho
121 * *** empty log message ***
122 *
123 * Revision 6.52  2002/05/13 13:51:33  dondosha
124 * Made two functions public
125 *
126 * Revision 6.51  2002/05/09 15:35:51  dondosha
127 * Added BLASTOptionNewEx function with an extra argument for megablast
128 *
129 * Revision 6.50  2002/03/14 16:11:40  camacho
130 * Extended BlastTwoSequences to allow comparison between sequence and PSSM
131 *
132 * Revision 6.49  2002/02/15 23:36:23  dondosha
133 * Correction for megablast with non-greedy extensions
134 *
135 * Revision 6.48  2001/07/09 15:12:47  shavirin
136 * Functions BLbasicSmithWatermanScoreOnly() and BLSmithWatermanFindStart()
137 * used to calculate Smith-waterman alignments on low level become external.
138 *
139 * Revision 6.47  2001/06/18 16:09:25  dondosha
140 * Added prototype for PrintTabularOutputHeader
141 *
142 * Revision 6.46  2001/06/13 21:40:54  dondosha
143 * Moved GetGisFromFile declaration from mblast.h to blast.h
144 *
145 * Revision 6.45  2001/04/16 21:28:11  dondosha
146 * Added function BlastPruneSeqAlignByEvalueRange
147 *
148 * Revision 6.44  2001/04/12 21:34:50  dondosha
149 * Added function BlastPruneSeqAlignByGiList
150 *
151 * Revision 6.43  2001/04/06 18:15:08  madden
152 * Move UNIX-specific stuff (HeyIAmInMemory) to bqueue.[ch]
153 *
154 * Revision 6.42  2001/02/07 21:10:10  dondosha
155 * Added prototypes of Blast Engine functions with callback
156 *
157 * Revision 6.41  2001/01/16 23:16:51  dondosha
158 * Added 2 arguments and several options to parse_blast_options
159 *
160 * Revision 6.40  2001/01/09 20:10:39  shavirin
161 * Added sorting of all hits in result_struct for every element in
162 * results. Added function RPSResultHspScoreCmp.
163 *
164 * Revision 6.39  2000/11/02 20:16:34  dondosha
165 * Added prototypes for BlastTwoSequencesByLocWithCallback and BlastTwoSequencesWithCallback
166 *
167 * Revision 6.38  2000/10/31 16:30:58  shavirin
168 * Function BLASTSetUpSearchInternalByLoc became external.
169 *
170 * Revision 6.37  2000/10/26 18:52:42  dondosha
171 * Added prototype for MegaBlastPrintReference
172 *
173 * Revision 6.36  2000/10/24 19:00:39  dondosha
174 * Removed UniqueLocalId() prototype - goes to sequtil.h
175 *
176 * Revision 6.35  2000/10/24 18:57:22  dondosha
177 * Added prototype of UniqueLocalId(), removed from mblast.h
178 *
179 * Revision 6.34  2000/08/31 16:27:20  shavirin
180 * Added definition of the function BlastSequenceBlkDestruct().
181 *
182 * Revision 6.33  2000/07/12 23:07:30  kans
183 * reverse_seq moved from pseed3.c to blastool.c, placed in blast.h header, called by gapxdrop.c
184 *
185 * Revision 6.32  2000/07/07 21:20:07  vakatov
186 * Get all "#include" out of the 'extern "C" { }' scope!
187 *
188 * Revision 6.31  2000/06/20 15:50:45  shavirin
189 * Added new functions: BLASTAddBlastDBTitleToSeqAnnot and
190 * BLASTGetDatabaseTitleFromSeqAnnot().
191 *
192 * Revision 6.30  2000/06/14 22:21:57  dondosha
193 * Added prototypes for BlastQuerySequenceSetUp and BlastSequencesOnTheFlyEx
194 *
195 * Revision 6.29  2000/04/28 19:49:36  shavirin
196 * Added definition of the function DefineToFrame().
197 *
198 * Revision 6.28  2000/03/31 17:01:26  dondosha
199 * Added explanation for use of blastx in two sequences search
200 *
201 * Revision 6.27  2000/03/24 17:01:27  kans
202 * added BLASTUpdateSeqIdInSeqInt - needed to compile blastall.c on Mac, which requires prototypes
203 *
204 * Revision 6.26  2000/02/23 20:37:45  dondosha
205 * Added prototype for MegaBlastBuildLookupTable and BlastNtWordExtend
206 *
207 * Revision 6.25  2000/02/11 16:40:53  egorov
208 * The parse_blast_options is made public.
209 *
210 * Revision 6.24  2000/02/01 18:04:59  dondosha
211 * Added prototype for GreedyAlignMemAlloc
212 *
213 * Revision 6.23  2000/01/26 22:01:57  madden
214 * Add function BlastGetProgramName
215 *
216 * Revision 6.22  2000/01/14 18:27:45  shavirin
217 * Added definitions of WordExtend* functions.
218 *
219 * Revision 6.21  1999/12/29 18:55:29  shavirin
220 * Added definition of non-static function BlastSequenceAddSequence().
221 *
222 * Revision 6.20  1999/09/22 20:55:07  egorov
223 * Add time measure stuff
224 *
225 * Revision 6.19  1999/07/01 13:03:24  sicotte
226 * Updated for DenseDiag and Moved seqalign_reverse_strand from blastutl.c(blast.h) to SeqAlignListReverseStrand in salpedit.ch and fixed call in salutil.c
227 *
228 * Revision 6.18  1999/03/18 16:43:31  shavirin
229 * Added definition of the function Boolean HeyIAmInMemory(Int4 program)
230 *
231 * Revision 6.17  1999/03/17 16:49:11  madden
232 * Removed comment within comment
233 *
234 * Revision 6.16  1999/01/26 18:26:23  madden
235 * Add updateLambdaK prototype
236 *
237 * Revision 6.15  1998/09/22 16:56:12  egorov
238 * Add prototype for BlastErrorPrintExtra()
239 *
240  * Revision 6.14  1998/09/14 15:11:14  egorov
241  * Add support for Int8 length databases; remove unused variables
242  *
243  * Revision 6.13  1998/08/25 14:16:23  madden
244  * Added BlastGetPhiReference and BlastPrintPhiReference
245  *
246  * Revision 6.12  1998/06/12 16:08:49  madden
247  * BlastHitRange stuff
248  *
249  * Revision 6.11  1998/05/22 20:20:37  madden
250  * Added BlastTwoSequencesByLocEx and BlastTwoSequencesEx
251  *
252  * Revision 6.10  1998/05/01 18:34:37  egorov
253  * Add new parametes to BLASTOptionSetGapParam()
254  *
255  * Revision 6.9  1998/03/24 15:38:22  madden
256  * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
257  *
258  * Revision 6.8  1998/03/18 14:14:14  madden
259  * Support random access by gi list
260  *
261  * Revision 6.7  1998/03/14 18:28:08  madden
262  * Added BioseqBlastEngineEx
263  *
264  * Revision 6.6  1998/02/26 19:09:28  madden
265  * Removed AdjustOffSetsInSeqAlign prototype
266  *
267  * Revision 6.5  1998/01/05 22:41:44  madden
268  * Added seqalign_reverse_strand
269  *
270  * Revision 6.4  1997/12/10 22:41:20  madden
271  * prototype for BlastGetProgramNumber
272  *
273  * Revision 6.3  1997/12/01 22:07:15  madden
274  * Changed call to BLASTOptionValidateEx
275  *
276  * Revision 6.2  1997/11/18 22:23:17  madden
277  * Added BLASTOptionSetGapParams
278  *
279  * Revision 6.1  1997/10/02 17:28:55  madden
280  * Added prototype for BlastPrintVersionInfoEx
281  *
282  * Revision 6.0  1997/08/25 18:52:29  madden
283  * Revision changed to 6.0
284  *
285  * Revision 1.26  1997/07/22 17:21:55  madden
286  * Added index callbacks to SetUp function prototypes
287  *
288  * Revision 1.25  1997/07/21 17:36:47  madden
289  * Added BlastGetReleaseDate
290  *
291  * Revision 1.24  1997/07/18 20:55:25  madden
292  * Added prototypes for BlastGetVersionNumber and BlastGetReference
293  *
294  * Revision 1.23  1997/07/14 16:15:09  madden
295  * Added prototype for BlastErrorPrint
296  *
297  * Revision 1.22  1997/07/14 15:33:32  madden
298  * Prototype for BLASTOptionValidateEx
299  *
300  * Revision 1.21  1997/07/11 19:29:08  madden
301  * Added prototypes for BLASTSetUpSearchByLocWithReadDb and BioseqBlastEngineByLoc
302  *
303  * Revision 1.20  1997/06/20 13:11:53  madden
304  * added prototype for AdjustOffSetsInSeqAlign
305  *
306  * Revision 1.19  1997/05/20 17:51:02  madden
307  * Added prototypes for BLASTSetUpSearchByLoc, BlastTwoSequencesByLoc and BlastSequencesOnTheFlyByLoc
308  *
309  * Revision 1.18  1997/03/11 14:38:40  madden
310  * Added BlastSequencesOnTheFly prototype.
311  *
312  * Revision 1.17  1997/03/07  21:58:36  madden
313  * Added Boolean gapped argument to BLASTOptionNew.
314  *
315  * Revision 1.16  1997/03/03  21:48:52  madden
316  * *** empty log message ***
317  *
318  * Revision 1.15  1997/03/03  14:48:57  madden
319  * Changes prototype for SumBlastGetGappedAlignmentTraceback
320  *
321  * Revision 1.14  1997/02/26  20:37:31  madden
322  * Added *error_returns to BioseqBlastEngine.
323  *
324  * Revision 1.13  1997/02/18  17:58:52  madden
325  * Added BioseqBlastEngine.
326  *
327  * Revision 1.12  1997/02/10  20:03:58  madden
328  * Added all_words to BLASTSetUpSearch.
329  *
330  * Revision 1.11  1997/02/05  19:54:59  madden
331  * Removed defunct prototype.
332  *
333  * Revision 1.10  1997/02/03  13:02:12  madden
334  * Added length to BLASTSubjectInfoNew call.
335  *
336  * Revision 1.9  1997/01/28  22:38:56  madden
337  * Added function BLASTOptionValidate.
338  *
339  * Revision 1.8  1997/01/11  18:58:29  madden
340  * Removed defunct PerformBlastSearch... functions.
341  *
342  * Revision 1.7  1996/12/23  22:02:05  madden
343  * Changes to allow two sequences to be compared.
344  *
345  * Revision 1.6  1996/09/26  20:18:43  madden
346  * Added prototype for ExperimentalLocalBlastSearch.
347  *
348  * Revision 1.5  1996/09/25  19:59:10  madden
349  * Removed prototype for for GetParameterStack.
350  *
351  * Revision 1.4  1996/09/11  22:21:51  madden
352  * *** empty log message ***
353  *
354  * Revision 1.3  1996/09/11  19:14:09  madden
355  * Added BLAST_OptionsBlkPtr structure and use thereof.
356  *
357  * Revision 1.2  1996/08/23  16:30:54  shavirin
358  * Fixed NT compiler warnings type mismatch
359  *
360  * Revision 1.1  1996/08/05  19:46:34  madden
361  * Initial revision
362  *
363  * Revision 1.34  1996/08/02  14:20:06  madden
364  * Add prototype for do_the_blast_run.
365  *
366  * Revision 1.33  1996/06/26  15:53:54  madden
367  * Second dropoff score parameter added.
368  *
369  * Revision 1.32  1996/06/24  17:57:39  madden
370  * Added dropoff_number_of_bits argument to SetUpBlastSearch.
371  *
372  * Revision 1.31  1996/06/20  16:15:57  madden
373  * Replaced int's with Int4's.
374  *
375  * Revision 1.30  1996/06/19  14:19:12  madden
376  * Changed prototypes for SetUpBlastSearch.
377  *
378  * Revision 1.29  1996/06/06  17:54:09  madden
379  * number_of_bits added to SetUpBlastSearch and SetUpBlastSearchWithReadDb.
380  *
381  * Revision 1.28  1996/06/04  15:33:12  madden
382  * Changed prototype for GetParameterStack.
383  *
384  * Revision 1.27  1996/05/28  14:12:53  madden
385  * prototype for GetParameterStack changed.
386  *
387  * Revision 1.26  1996/05/16  19:50:15  madden
388  * Added documentation block.
389  *
390  * Revision 1.25  1996/05/16  13:29:38  madden
391  * Changed prototype for SetUpBlastSearchWithReadDb.
392  *
393  * Revision 1.24  1996/05/03  19:55:07  madden
394  * *** empty log message ***
395  *
396  * Revision 1.23  1996/05/01  14:58:22  madden
397  * Changed prototypes for SetUpBlastSearchWithReadDb
398  *
399  * Revision 1.22  1996/04/24  12:52:06  madden
400  * wordsize new parameter for SetUpBlastSearch.
401  *
402  * Revision 1.21  1996/04/22  21:40:07  madden
403  * New prototypes for performing blast searches.
404  *
405  * Revision 1.20  1996/04/03  19:15:28  madden
406  * *** empty log message ***
407  *
408  * Revision 1.19  1996/03/29  21:26:01  madden
409  * Added prototype for SortSeqAlignByPvalue.
410  *
411  * Revision 1.18  1996/03/29  14:08:40  madden
412  * prototype for SetUpBlastSearchWithReadDb added.
413  *
414  * Revision 1.17  1996/03/27  23:19:24  madden
415  * changed parameters for PerformBlastSearch and Perform2PassBlastSearch.
416  *
417  * Revision 1.16  1996/03/26  19:36:42  madden
418  * Changes to read databases formatted with formatdb.
419  *
420  * Revision 1.15  1996/03/25  16:34:19  madden
421  * Changes to mimic old statistics.
422  *
423  * Revision 1.14  1996/02/28  21:36:54  madden
424  * changes for discontiguous words.
425  *
426  * Revision 1.13  1996/02/15  15:22:52  madden
427  * renamed Perform2HitBlastSearch to Perform2PassBlastSearch.
428  *
429  * Revision 1.12  1996/02/09  13:50:45  madden
430  * Added prototype for Perform2HitBlastSearch.
431  *
432  * Revision 1.11  1996/02/05  18:46:30  madden
433  * Added second threshold value to SetUpBlastSearch.
434  *
435  * Revision 1.10  1996/01/29  21:11:38  madden
436  * Changes for MultipleHits BLAST.
437  *
438  * Revision 1.9  1996/01/23  16:31:23  madden
439  *  e_cutoff changed from BLAST_Score to double in SetUpBlastSearch.
440  *
441  * Revision 1.8  1996/01/17  23:18:01  madden
442  * *** empty log message ***
443  *
444  * Revision 1.7  1996/01/17  17:00:24  madden
445  * Added gap arguments to SetUpBlastSearch.
446  *
447  * Revision 1.6  1996/01/17  13:45:25  madden
448  * Added "gap_decay_rate" to SetUpBlastSearch.
449  *
450  * Revision 1.5  1996/01/16  15:28:54  madden
451  * Changed call to SetUpBlastSearch.
452  *
453  * Revision 1.4  1995/12/30  19:22:04  madden
454  * Added prototype for PerformBlastSearch.
455  *
456  * Revision 1.3  1995/12/30  18:39:27  madden
457  * Added prototype for SetUpBlastSearch.
458  *
459  * Revision 1.2  1995/12/19  22:31:05  madden
460  * *** empty log message ***
461  *
462  * Revision 1.1  1995/12/08  15:48:23  madden
463  * Initial revision
464  *
465  * */
466 #ifndef __BLAST__
467 #define __BLAST__
468 
469 #include <ncbi.h>
470 #include <blastdef.h>
471 
472 /* AM: Support for query multiplexing. */
473 #include "blastconcatdef.h"
474 
475 #ifdef __cplusplus
476 extern "C" {
477 #endif
478 
479 /*
480         Call this function to allocate the "options" structure.  The
481         fields will be filled in with the default values, which depend
482         on the program. Defaults are #defined constants in blastdef.h
483 */
484 BLAST_OptionsBlkPtr LIBCALL BLASTOptionNew PROTO((CharPtr progname, Boolean gapped));
485 BLAST_OptionsBlkPtr LIBCALL BLASTOptionNewEx PROTO((CharPtr progname, Boolean gapped, Boolean is_megablast));
486 
487 BLAST_OptionsBlkPtr LIBCALL BLASTOptionDelete PROTO((BLAST_OptionsBlkPtr));
488 
489 BLAST_OptionsBlkPtr LIBCALL BLASTOptionValidate PROTO((BLAST_OptionsBlkPtr options, CharPtr progname));
490 
491 Int2 LIBCALL BLASTOptionValidateEx PROTO((BLAST_OptionsBlkPtr options, CharPtr progname, ValNodePtr PNTR error_return));
492 
493 Int2 LIBCALL BLASTOptionSetGapParams PROTO((BLAST_OptionsBlkPtr options, CharPtr matrix, Int4 open, Int4 extended));
494 
495 /********************* BLASTCalculateSearchSpace **************************
496 Purpose: Calculate the effective search space for a gapped search with a
497          minimal set of options. Assumes the query is a protein sequence (ie:
498          no multiple contexts to consider).
499 Parameters: options [in]: Blast options structure
500             nseq [in]: Number of sequences in the database
501             dblen [in]: Length of the database
502             qlen [in]: Length of the query sequence
503 Returns: Effective search space
504 **************************************************************************/
505 FloatHi LIBCALL BLASTCalculateSearchSpace PROTO((BLAST_OptionsBlkPtr options, 
506             Int4 nseq, Int8 dblen, Int4 qlen)); 
507 
508 /* 
509         the setup functions, call before running blast.
510 */
511 
512 BlastSearchBlkPtr LIBCALL BLASTSetUpSearchWithReadDb PROTO((BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives))));
513 
514 BlastSearchBlkPtr LIBCALL BLASTSetUpSearchWithReadDbEx PROTO((BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
515 
516 BlastSearchBlkPtr LIBCALL BLASTSetUpSearchByLocWithReadDb PROTO((SeqLocPtr slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives))));
517 
518 BlastSearchBlkPtr LIBCALL BLASTSetUpSearchByLocWithReadDbEx PROTO((SeqLocPtr slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, QueriesPtr mult_queries)); 
519 /* --KM added mult_queries param: struct holding info about individual queries that got concatenated when -B option used */
520 
521 BlastSearchBlkPtr LIBCALL BLASTSetUpSearch PROTO((BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives))));
522 
523 BlastSearchBlkPtr LIBCALL BLASTSetUpSearchByLoc PROTO((SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives))));
524 
525 Int2 LIBCALL BLASTSetUpSearchInternalByLoc  PROTO((BlastSearchBlkPtr search, SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives))));
526 /*
527         Use these function to perform the search.
528 */
529 Int2 LIBCALL BLASTPerform2PassSearch PROTO((BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq));
530 
531 Int2 LIBCALL BLASTPerformFinalSearch PROTO((BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq));
532 
533 
534 BLASTSubjectInfoPtr LIBCALL BLASTSubjectInfoNew PROTO((SeqIdPtr sip, CharPtr defline, Int4 length));
535 
536 BLASTSubjectInfoPtr LIBCALL BLASTSubjectInfoDestruct PROTO((BLASTSubjectInfoPtr subject_info));
537 
538 void LIBCALL do_the_blast_run PROTO((BlastSearchBlkPtr search));
539 
540 Int2 LIBCALL BlastSequenceAddSequence PROTO((BlastSequenceBlkPtr sequence_blk, Uint1Ptr sequence, Uint1Ptr sequence_start, Int4 length, Int4 original_seq, Int4 effective_length));
541 
542 BlastSequenceBlkPtr LIBCALL
543 BlastSequenceBlkDestruct PROTO((BlastSequenceBlkPtr seq_blk));
544 
545 void BLASTUpdateSeqIdInSeqInt(SeqLocPtr mask, SeqIdPtr sip);
546 
547 /*
548         Blast two sequences and return a SeqAlign. For blastx program first
549         sequence must be nucleotide, second protein.
550 */
551 SeqAlignPtr LIBCALL BlastTwoSequences PROTO((BioseqPtr bsp1, BioseqPtr bsp2, CharPtr progname, BLAST_OptionsBlkPtr options));
552 
553 SeqAlignPtr LIBCALL BlastTwoSequencesByLoc PROTO((SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options));
554 
555 
556 SeqAlignPtr LIBCALL BlastTwoSequencesByLocEx PROTO((SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns));
557 
558 /* Notes for psi-blast2sequences (compare a PSSM with sequence slp2):  (CC)
559  * =============================
560  * 1) This functionality requires (at least) the residue frequencies
561  * (BLAST_MatrixPtr->posFreqs) to compute the PSSM. If the PSSM is 
562  * provided (BLAST_MatrixPtr->matrix), then this takes precedence and the 
563  * PSSM will not be recomputed. Please note that the PSSM and residue 
564  * frequencies are matrices with dimensions 26 by query_length+1. The last 
565  * row should be set to BLAST_SCORE_MIN (for PSSMs) or 0.0 (for residue 
566  * frequencies). 26 is the alphabet size (also defined as PRO_ALPHABET_SIZE in
567  * profiles.h).
568  *
569  * 2) The slp1 parameter is the master sequence for the PSSM (used to display 
570  * the alignment) and can only be shorter than the PSSM (the PSSM will be
571  * trimmed accordingly).
572  *
573  * 3) If the scalingFactor is set to 0.0 (default in the options structure),
574  * the PSSM will be calculated in the same way as psiblast (blastpgp) does it
575  * (that is, without scaling the PSSM). In order to use composition-based
576  * statistics (default in psiblast), please set the options->tweak_parameters
577  * option to TRUE. This is *not* the default in the options structure.
578  *
579  * 4) Also, if the scalingFactor is not 0.0 in the options parameter, this
580  * value will be used to scale the PSSM only if it is calculated by this
581  * function (if the PSSM is calculated outside this function it is assumed
582  * that the PSSM has been scaled already and that the same scalingFactor 
583  * that was used to create it is passed into this function).  This value is 
584  * also used to multiply various parameters such as gap costs, X dropoff 
585  * values, when performing the matrix rescaling, and to adjust the scores 
586  * and Lambda parameters when performing the traceback stage. 
587  * The matrix rescaling step will take place prior to the traceback stage. 
588  * This functionality resembles what rpsblast/impala do.
589  */
590 SeqAlignPtr LIBCALL BlastTwoSequencesByLocWithCallback PROTO((SeqLocPtr slp1, SeqLocPtr slp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), BLAST_MatrixPtr matrix));
591 
592 SeqAlignPtr LIBCALL BlastTwoSequencesEx PROTO((BioseqPtr bsp1, BioseqPtr bsp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns));
593 
594 SeqAlignPtr LIBCALL BlastTwoSequencesWithCallback PROTO((BioseqPtr bsp1, BioseqPtr bsp2, CharPtr progname, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch))));
595 
596 SeqAlignPtr LIBCALL BlastSequencesOnTheFly PROTO((BlastSearchBlkPtr search, BioseqPtr subject_bsp));
597 
598 SeqAlignPtr LIBCALL BlastSequencesOnTheFlyByLoc PROTO((BlastSearchBlkPtr search, SeqLocPtr subject_slp));
599 
600 BlastSearchBlkPtr LIBCALL
601 BlastQuerySequenceSetUp PROTO((BioseqPtr bsp, CharPtr progname,  
602                                BLAST_OptionsBlkPtr options));
603 
604 BlastSearchBlkPtr LIBCALL
605 BlastSequencesOnTheFlyEx PROTO((BlastSearchBlkPtr search, BioseqPtr subject_bsp));
606    
607 /*** PSIBLAST2Sequences API ***/
608 Boolean LIBCALL 
609 B2SPssmSetupSearch PROTO((BlastSearchBlkPtr search, SeqLocPtr pssm_slp, 
610 BLAST_MatrixPtr matrix));
611 
612 Boolean LIBCALL 
613 B2SPssmCleanUpSearch PROTO((BlastSearchBlkPtr search, BLAST_MatrixPtr matrix));
614 
615 SeqAlignPtr LIBCALL 
616 B2SPssmOnTheFly PROTO((BlastSearchBlkPtr search, BioseqPtr subj_bsp));
617 
618 SeqAlignPtr LIBCALL 
619 B2SPssmOnTheFlyByLoc PROTO((BlastSearchBlkPtr search, SeqLocPtr subj_slp));
620 
621 /* Compare pssm against all sequences in target_seqs. 
622    Returns an array of length ntargets with the corresponding alignments.
623    Caller is responsible for deallocating the return value */
624 SeqAlignPtr * LIBCALL 
625 B2SPssmMultipleQueries PROTO((SeqLocPtr pssm_slp, BLAST_MatrixPtr matrix, 
626 SeqLocPtr *target_seqs, Int4 ntargets, BLAST_OptionsBlkPtr options));
627 
628 /*** END PSIBLAST2Sequences API ***/
629 
630 
631 SeqAlignPtr LIBCALL SumBlastGetGappedAlignmentTraceback PROTO((BlastSearchBlkPtr search, Int4 hit_number, Boolean reverse, Boolean ordinal_number, Uint1Ptr subject, Int4 subject_length));
632 
633 Boolean LIBCALL SumBlastGetGappedAlignmentEx PROTO((BlastSearchBlkPtr search, Int4 hit_number, Boolean reverse, Boolean ordinal_number, Uint1Ptr subject, Int4 subject_length, Boolean do_traceback, SeqAlignPtr PNTR seqalignP, BlastHitRangePtr bhrp, Int2 query_number));
634 
635 /*
636         Performs a complete BLAST search and returns a SeqAnlign.
637 */
638 
639 SeqAlignPtr LIBCALL BioseqBlastEngine PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives))));
640 
641 SeqAlignPtr LIBCALL BioseqBlastEngineWithCallback PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch))));
642 
643 SeqAlignPtr LIBCALL BioseqBlastEngineWithCallbackMult PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), QueriesPtr mult_queries)); /* AM: Added mult_queries param. */
644 
645 SeqAlignPtr LIBCALL BioseqBlastEngineEx PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
646 
647 SeqAlignPtr LIBCALL BioseqBlastEngineByLoc PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives))));
648 
649 SeqAlignPtr LIBCALL BioseqBlastEngineByLocEx PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
650 
651 SeqAlignPtr LIBCALL BioseqBlastEngineByLocWithCallback PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)))); 
652 
653 SeqAlignPtr LIBCALL BioseqBlastEngineByLocWithCallbackMult PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, int (LIBCALLBACK *handle_results)PROTO((VoidPtr srch)), QueriesPtr mult_queries));  /* --KM added mult_queries param */
654 /*
655         Prints error messages. 
656 */
657 
658 void LIBCALL BlastErrorPrint PROTO((ValNodePtr error_return));
659 void LIBCALL BlastErrorPrintExtra PROTO((ValNodePtr error_return,  Boolean errpostex, FILE* fp));
660 /* Caller is responsible for deallocating return value */
661 CharPtr LIBCALL BlastErrorToString PROTO((ValNodePtr error_return));
662 
663 
664 Uint1 LIBCALL BlastGetProgramNumber PROTO((CharPtr blast_program));
665 CharPtr LIBCALL BlastGetProgramName PROTO((Uint1 number));
666 
667 
668 
669 BlastHitRangePtr LIBCALL BlastHitRangeDestruct PROTO((BlastHitRangePtr old));
670 BlastHitRangePtr LIBCALL BlastHitRangeNew PROTO((Int4 total));
671 
672 BlastHitRangePtr LIBCALL BioseqHitRangeEngineCore PROTO((BlastSearchBlkPtr search, BLAST_OptionsBlkPtr options));
673 
674 SeqLocPtr LIBCALL BioseqHitRangeEngineByLoc PROTO((SeqLocPtr slp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
675 
676 SeqLocPtr LIBCALL BioseqHitRangeEngine PROTO((BioseqPtr bsp, CharPtr progname, CharPtr database, BLAST_OptionsBlkPtr options, ValNodePtr *other_returns, ValNodePtr *error_returns, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total));
677 
678 BLAST_ExtendWordParamsPtr BLAST_ExtendWordParamsNew (Int4 qlen, Boolean multiple_hits, Int4 window_size);
679 BLAST_ExtendWordPtr BLAST_ExtendWordNew (BLAST_ExtendWordParamsPtr ewp_params);
680 BLAST_ExtendWordPtr LIBCALL BLAST_ExtendWordDestruct (BLAST_ExtendWordPtr ewp);
681 
682 
683 void LIBCALL updateLambdaK PROTO((BlastMatrixRescalePtr matrix_rescale, Boolean position_dependent));
684 
685 BlastSearchBlkPtr GreedyAlignMemAlloc PROTO((BlastSearchBlkPtr search));
686 
687 Boolean parse_blast_options(BLAST_OptionsBlkPtr options, CharPtr string_options, CharPtr PNTR error_message, CharPtr PNTR database, Int4Ptr descriptions, Int4Ptr alignments);
688 
689 Int2
690 BlastNtWordExtend PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, BLAST_Diag real_diag, Int2 context));
691 
692 Boolean MegaBlastBuildLookupTable PROTO((BlastSearchBlkPtr search));
693 
694 Int2 DefineToFrame PROTO((Uint1 define));
695 
696 CharPtr BLASTGetDatabaseTitleFromSeqAnnot PROTO((SeqAnnotPtr seqannot));
697 void BLASTAddBlastDBTitleToSeqAnnot PROTO((SeqAnnotPtr seqannot, CharPtr title));
698 void BLASTAddBlastDBTitleToSeqAnnotEx PROTO((SeqAnnotPtr seqannot, CharPtr title, Boolean is_na));
699 
700 Int4 reverse_seq (Uint1 *seq, Uint1 *pos, Uint1 *target);
701 
702 /* CC: Changed to have the same tie-breakers as score_compare_hsps */
703 int LIBCALLBACK BLASTResultHspScoreCmp(VoidPtr v1, VoidPtr v2);
704 
705 SeqAlignPtr 
706 BlastPruneSeqAlignByGiList PROTO((SeqAlignPtr seqalign, Int4Ptr gi_list, 
707                                   Int4 gi_list_total, Int4 hitlist_size));
708 SeqAlignPtr 
709 BlastPruneSeqAlignBySortedGiList PROTO((SeqAlignPtr seqalign, Int4Ptr gi_list,
710                                         Int4 gi_list_total));
711 SeqAlignPtr 
712 BlastPruneSeqAlignByEvalueRange PROTO((SeqAlignPtr seqalign, FloatHi expect_low, FloatHi expect_high));
713 
714 BlastDoubleInt4Ptr GetGisFromFile PROTO((CharPtr file_name, Int4Ptr gi_list_size));
715 
716 /* ------ Functions related to Smith-Waterman algorithm ------ */
717 
718 Boolean
719 CheckStartForGappedAlignment PROTO((BlastSearchBlkPtr search, BLAST_HSPPtr hsp, Uint1Ptr query, Uint1Ptr subject, Int4Ptr PNTR matrix));
720 
721 Int4 GetStartForGappedAlignment PROTO((BlastSearchBlkPtr search, BLAST_HSPPtr hsp, Uint1Ptr query, Uint1Ptr subject, Int4Ptr PNTR matrix));
722 
723 BioseqPtr BlastMakeTempProteinBioseq PROTO((Uint1Ptr sequence, Int4 length, 
724                                             Uint1 alphabet));
725 
726 void HackSeqLocId PROTO((SeqLocPtr slp, SeqIdPtr id));
727 
728 /*  --------------------------------------------------------------------
729  *
730  *  BLAST_Wizard & related functions.
731  *
732  *  Use BLAST_WizardOptionsBlkInit to initialize a
733  *  BLAST_WizardOptionsBlk by setting every field to zero or FALSE.
734  *
735  *  Use BLAST_WizardOptionsBlkDone to free any memory owned by a
736  *  BLAST_WizardOptionsBlk, excluding the memory for the
737  *  BLAST_WizardOptionsBlk itself, which should be allocated on the
738  *  stack.
739  *
740  *  Use BLAST_WizardOptionsMaskInit to initialize a
741  *  BLAST_WizardOptionsMask by setting every field to FALSE, indicating
742  *  that the corresponding fields of some BLAST_WizardOptionsBlk aren't
743  *  set.
744  *
745  *  Use BLAST_Wizard to initialize a BLAST_WizardOptionsBlk according
746  *  to program, service, options, and mask.  Alignments, descriptions,
747  *  and errors are output parameters.  Alignments and descriptions are
748  *      optional; error is required.  BLAST_Wizard returns a null pointer
749  *      if and only if *error != 0 on exit.  The returned object must be
750  *  freed with BLASTOptionDelete; if *error != 0, then *error must be
751  *  freed with MemFree.
752  *
753  *  --------------------------------------------------------------------
754  */
755 
756 void
757 BLAST_WizardOptionsBlkInit(
758     BLAST_WizardOptionsBlkPtr   options);
759 
760 void
761 BLAST_WizardOptionsBlkDone(
762     BLAST_WizardOptionsBlkPtr   options);
763 
764 void
765 BLAST_WizardOptionsMaskInit(
766     BLAST_WizardOptionsMaskPtr  mask);
767 
768 BLAST_OptionsBlkPtr
769 BLAST_Wizard(
770     const char*                 program,
771     const char*                 service,
772     BLAST_WizardOptionsBlkPtr   options,
773     BLAST_WizardOptionsMaskPtr  mask,
774     int*                        alignments,
775     int*                        descriptions,
776     char**                      error);
777 
778 /**
779  * A macro expression that returns 1, 0, -1 if a is greater than,
780  * equal to or less than b, respectively.  This macro evaluates its
781  * arguments more than once.
782  */
783 #ifndef BLAST_CMP
784 #define BLAST_CMP(a,b) ((a)>(b) ? 1 : ((a)<(b) ? -1 : 0))
785 #endif
786 
787 
788 /* ----------------------------------------------------------- */
789 
790 /* DEBUG */
791 /* time mesuaring utilities */
792 
793 /* #define BLAST_TIMER */
794 
795 #ifdef BLAST_TIMER
796 clock_t last_clock;
797 #define start_timer     last_clock = clock();
798 
799 #define stop_timer(msg) { \
800     clock_t     current_clock = clock(), since; \
801     since = current_clock - last_clock; \
802     fprintf(stderr, "TIME [%s] - %5.2f\n", msg, ((double) since) / CLOCKS_PER_SEC); \
803     last_clock = current_clock; \
804 }
805 #else
806 #define start_timer ;
807 #define stop_timer ;
808 #endif
809 /* end of DEBUG block */
810 
811 
812 #ifdef __cplusplus
813 }
814 #endif
815 #endif /* !__BLAST__ */
816 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.