NCBI C Toolkit Cross Reference

C/tools/blastdef.h


  1 /* ===========================================================================
  2 *
  3 *                            PUBLIC DOMAIN NOTICE
  4 *               National Center for Biotechnology Information
  5 *
  6 *  This software/database is a "United States Government Work" under the
  7 *  terms of the United States Copyright Act.  It was written as part of
  8 *  the author's official duties as a United States Government employee and
  9 *  thus cannot be copyrighted.  This software/database is freely available
 10 *  to the public for use. The National Library of Medicine and the U.S.
 11 *  Government have not placed any restriction on its use or reproduction.
 12 *
 13 *  Although all reasonable efforts have been taken to ensure the accuracy
 14 *  and reliability of the software and data, the NLM and the U.S.
 15 *  Government do not and cannot warrant the performance or results that
 16 *  may be obtained by using this software or data. The NLM and the U.S.
 17 *  Government disclaim all warranties, express or implied, including
 18 *  warranties of performance, merchantability or fitness for any particular
 19 *  purpose.
 20 *
 21 *  Please cite the author in any work or product based on this material.
 22 *
 23 * ===========================================================================*/
 24 /*****************************************************************************
 25 
 26 File name: blastdef.h
 27 
 28 Author: Tom Madden
 29 
 30 Contents: #defines and definitions for structures used by BLAST.
 31 
 32 ******************************************************************************/
 33 /* $Revision: 6.169 $ 
 34 * $Log: blastdef.h,v $
 35 * Revision 6.169  2007/03/13 20:39:58  madden
 36 *  - Change the type of the dropoff_1st_pass, dropoff_2nd_pass,
 37 *    gap_x_dropoff, and gap_x_dropoff_final fields of the
 38 *    BLAST_OptionsBlk struct to Nlm_FloatHi.
 39 *  [from Mike Gertz]
 40 *
 41 * Revision 6.168  2006/05/03 14:41:50  madden
 42 * Added a Boolean field "unified_p" to the BLAST_OptionsBlk
 43 * structure.  The field indicates whether to use a combination of
 44 * alignment and compositional p-values when evaluating significance;
 45 * the field is ignored unless composition-based statistics is on.
 46 * (from Mike Gertz).
 47 *
 48 * Revision 6.167  2006/03/21 22:35:27  camacho
 49 * Add support for setting database length in BLAST_WizardOptions{Blk,Mask}
 50 *
 51 * Revision 6.166  2006/01/24 18:38:15  papadopo
 52 * from Mike Gertz: Remove #define'd constants that specify the composition adjustment mode.  These have been replaced by an enum in the composition adjustment library
 53 *
 54 * Revision 6.165  2005/08/31 20:32:47  coulouri
 55 * From Mike Gertz:
 56 *    - Added the kappa_expect_value field to the
 57 *      BLAST_OptionsBlk datatype.  This new field holds the cutoff value
 58 *      used by RedoAlignmentCore; it should equal expect_value if
 59 *      RedoAlignmentCore will not be called.
 60 *    - removed the now unused original_expect_value field of the
 61 *      BLAST_OptionsBlk datatype.
 62 *
 63 * Revision 6.164  2005/07/27 15:51:54  coulouri
 64 * remove unused queue_callback
 65 *
 66 * Revision 6.163  2005/05/16 17:43:29  papadopo
 67 * From Alejandro Schaffer: Added support for compositional score
 68 * matrix adjustment
 69 *
 70 * Revision 6.162  2005/04/25 14:16:36  coulouri
 71 * set db_chunk_size adaptively
 72 *
 73 * Revision 6.161  2005/01/10 18:52:29  coulouri
 74 * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
 75 *
 76 * Revision 6.160  2004/11/19 13:22:05  madden
 77 * Remove no_check_score completely (from Mike Gertz)
 78 *
 79 * Revision 6.159  2004/09/28 16:02:29  papadopo
 80 * From Michael Gertz: Changed the "sumscore" field of an HSP to
 81 * "xsum" to represent a normalized sum score of linked HSPs;
 82 * the normalized score is more appropriate/useful in doing linking.
 83 *
 84 * Revision 6.158  2004/06/30 12:29:00  madden
 85 * Removed typedef for BlastPruneSapStruct and some defines, moved to blfmtutl.h
 86 *
 87 * Revision 6.157  2004/04/30 12:45:45  coulouri
 88 * bump version to 2.2.9
 89 *
 90 * Revision 6.156  2004/04/13 21:02:52  madden
 91 * Add ignore_gilist Boolean to Options for use in formatting
 92 *
 93 * Revision 6.155  2004/02/04 15:35:03  camacho
 94 * Rollback to fix problems in release 2.2.7
 95 *
 96 * Revision 6.154  2004/01/27 20:46:06  dondosha
 97 * Allow values 0, 1, 2 for no_traceback megablast option
 98 *
 99 * Revision 6.153  2004/01/05 22:09:26  madden
100 * Put back dashes in date
101 *
102 * Revision 6.152  2004/01/02 13:44:32  coulouri
103 * Revert to hardcoded BLAST_RELEASE_DATE
104 *
105 * Revision 6.151  2003/12/29 15:51:18  coulouri
106 * Bump version, use __DATE__ instead of hardcoded date
107 *
108 * Revision 6.150  2003/11/06 19:52:13  dondosha
109 * Added error MBTemplateType, so it can be returned when wordsize/template length combination is wrong
110 *
111 * Revision 6.149  2003/10/02 19:30:11  madden
112 * add field seAlign to SWResults for use in kappa.c
113 *
114 * Revision 6.148  2003/04/09 14:18:34  madden
115 * Update version and release-date
116 *
117 * Revision 6.147  2003/03/26 15:45:48  boemker
118 * Documented relationships among BLAST_OptionsBlk, BLAST_WizardOptionsBlk,
119 * and BLAST_WizardOptionsMask.
120 *
121 * Revision 6.146  2003/03/25 22:23:06  boemker
122 * Replaced cutoff_s2, which isn't used, with cutoff_s, which is.
123 * Added query_lcase_mask.
124 *
125 * Revision 6.145  2003/03/25 19:58:18  boemker
126 * Moved code to initialize search options from blastcgicmd.cpp to here, as
127 * BLAST_Wizard et al.
128 *
129 * Revision 6.144  2003/03/24 19:42:14  madden
130 * Changes to support query concatenation for blastn and tblastn
131 *
132 * Revision 6.143  2002/11/22 23:28:43  dondosha
133 * Use array of structures instead of array of pointers for initial offset pairs
134 *
135 * Revision 6.142  2002/11/16 17:12:55  madden
136 * Change version and date
137 *
138 * Revision 6.141  2002/11/04 22:51:13  dondosha
139 * Changed FloatHi pvalue to Int4 num_ident in HSP structures
140 *
141 * Revision 6.140  2002/09/13 19:11:02  camacho
142 * Added rps_qlen field
143 *
144 * Revision 6.139  2002/09/11 21:15:23  camacho
145 * Removed obsolete #define and comment about BlastSeqIdList structure
146 *
147 * Revision 6.138  2002/09/11 20:46:25  camacho
148 * Removed deprecated BlastSeqIdListPtr code
149 *
150 * Revision 6.137  2002/08/26 15:49:51  madden
151 * Change release date and version
152 *
153 * Revision 6.136  2002/08/09 19:39:20  camacho
154 * Added constants for some blast search parameters
155 *
156 * Revision 6.135  2002/06/21 21:43:01  camacho
157 * Removed obsolete BlastSeqIdList structure and functions
158 *
159 * Revision 6.134  2002/05/17 21:40:13  dondosha
160 * Added 2 optimal Mega BLAST word templates for length 21
161 *
162 * Revision 6.133  2002/05/14 22:20:20  dondosha
163 * Renamed maximal discontiguous template type into optimal
164 *
165 * Revision 6.132  2002/04/23 20:59:53  madden
166 * Change version and date for release
167 *
168 * Revision 6.131  2002/04/09 18:16:43  dondosha
169 * Added more options/parameters for megablast
170 *
171 * Revision 6.130  2002/03/28 18:53:18  madden
172 * Add ValNodePtr mask1 to BlastSearch structure
173 *
174 * Revision 6.129  2001/12/28 20:38:39  dondosha
175 * Moved Mega BLAST related parameters into a separate structure
176 *
177 * Revision 6.128  2001/12/28 18:01:26  dondosha
178 * Added field scoreThisAlign to SWResults to allow more tie-breaking options
179 *
180 * Revision 6.127  2001/12/14 22:05:40  madden
181 * Changed version and release date
182 *
183 * Revision 6.126  2001/09/11 14:28:31  madden
184 * Added timed_out Boolean to SearchBlk
185 *
186 * Revision 6.125  2001/09/07 14:46:44  dondosha
187 * Roll back removal of threshold_first from functions and structures
188 *
189 * Revision 6.124  2001/09/06 20:24:34  dondosha
190 * Removed threshold_first
191 *
192 * Revision 6.123  2001/08/06 12:50:51  madden
193 * Change release date
194 *
195 * Revision 6.122  2001/07/12 19:50:24  madden
196 * Changed release date
197 *
198 * Revision 6.121  2001/06/28 13:42:09  madden
199 * Fixes to prevent overflow on number of hits reporting
200 *
201 * Revision 6.120  2001/06/12 19:48:56  madden
202 * Introduce total_hsp_limit, check before making SeqAlign
203 *
204 * Revision 6.119  2001/04/13 20:56:08  madden
205 * Updated version to 2.2.1, changed date
206 *
207 * Revision 6.118  2001/04/11 20:56:21  madden
208 * Added scalingFactor for rpsblast, changed release date
209 *
210 * Revision 6.117  2001/03/30 21:58:18  madden
211 * Change release date and version
212 *
213 * Revision 6.116  2001/03/27 21:27:01  madden
214 * Minor efficiency in how lookup table is made
215 *
216 * Revision 6.115  2001/03/19 18:52:57  madden
217 * Add base_offset element to structure for BlastHitRange
218 *
219 * Revision 6.114  2001/02/07 21:05:33  dondosha
220 * Added an output stream to BlastOptionsBlk
221 *
222 * Revision 6.113  2000/12/21 22:28:17  dondosha
223 * Added option and parameter for percent identity cutoff
224 *
225 * Revision 6.112  2000/11/29 16:17:56  dondosha
226 * Added a definition of small structure BLASTHSPSegment
227 *
228 * Revision 6.111  2000/11/14 18:14:00  madden
229 * release date to Nov-13-2000
230 *
231 * Revision 6.110  2000/11/08 22:18:05  dondosha
232 * Added longest_intron integer option and parameter
233 *
234 * Revision 6.109  2000/11/07 16:30:25  madden
235 * Introduce intermediate score (before linking of HSPs) for blastx and tblastn
236 *
237 * Revision 6.108  2000/11/03 20:16:24  dondosha
238 * Changed one_line_results option and parameter to more meaningful no_traceback
239 *
240 * Revision 6.107  2000/11/01 16:25:56  madden
241 * Changes from Futamura for psitblastn
242 *
243 * Revision 6.106  2000/10/18 19:53:19  shavirin
244 * Empty log message.
245 *
246 * Revision 6.105  2000/10/18 19:17:56  shavirin
247 * Changed BLAST_ENGINE_VERSION and BLAST_RELEASE_DATE
248 *
249 * Revision 6.104  2000/10/05 19:50:49  dondosha
250 * Added mb_result_struct to the BlastSearchBlk to be used instead of result_struct in Mega BLAST
251 *
252 * Revision 6.103  2000/09/28 14:48:20  dondosha
253 * Added exact_match_array to hitlist structure for megablast initial hits
254 *
255 * Revision 6.102  2000/09/21 19:16:30  madden
256 * increase AWAKE_THR_MIN_SIZE by 100
257 *
258 * Revision 6.101  2000/08/29 19:35:49  madden
259 * Add gilist_not_owned to blast_gi_list
260 *
261 * Revision 6.100  2000/08/08 20:37:21  madden
262 * increase version number to 2.1.1 and release date
263 *
264 * Revision 6.99  2000/07/17 14:05:22  shavirin
265 * Added parameter Out-Of-Frame shift penalty and query DNAP sequence
266 *
267 * Revision 6.98  2000/07/11 18:38:02  madden
268 * decreased size of helper array, added prefetch to BlastGappedScoreInternal
269 *
270 * Revision 6.97  2000/07/11 17:16:20  shavirin
271 * Added new parameter is_ooframe for Out-Of-Frame gapping algorithm.
272 *
273 * Revision 6.96  2000/07/10 15:41:28  madden
274 * Add typedef for BLAST_HSP_helper
275 *
276 * Revision 6.95  2000/07/07 21:20:07  vakatov
277 * Get all "#include" out of the 'extern "C" { }' scope!
278 *
279 * Revision 6.94  2000/07/06 17:24:55  dondosha
280 * Added option and parameter megablast_full_deflines
281 *
282 * Revision 6.93  2000/06/30 17:52:45  madden
283 * Move AWAKE_THR_MIN_SIZE to blastdef.h
284 *
285 * Revision 6.92  2000/06/29 20:30:03  madden
286 * Update version and date
287 *
288 * Revision 6.91  2000/06/08 20:34:18  madden
289 * add explode_seqids option to show all ids in a defline
290 *
291 * Revision 6.90  2000/05/26 20:04:57  madden
292 * Raise version and date
293 *
294 * Revision 6.89  2000/05/12 19:40:59  dondosha
295 * Added qid_array element to BlastSearchBlk
296 *
297 * Revision 6.88  2000/05/01 19:04:31  shavirin
298 * Changed parameter level in BlastErrorMsg structure from Uint1 to Uint2.
299 *
300 * Revision 6.87  2000/04/21 20:48:05  madden
301 * Change version and date
302 *
303 * Revision 6.86  2000/04/06 14:47:10  madden
304 * Added original_expect_value
305 *
306 * Revision 6.85  2000/04/03 21:20:03  dondosha
307 * Added option and parameter is_neighboring
308 *
309 * Revision 6.84  2000/03/31 19:10:44  dondosha
310 * Changed some names related to MegaBlast
311 *
312 * Revision 6.83  2000/03/13 21:01:24  dondosha
313 * Added boolean option sort_gi_list to options block structure
314 *
315 * Revision 6.82  2000/02/29 18:17:23  shavirin
316 * Variable query_dna_mask changed to query_lcase_mask.
317 *
318 * Revision 6.81  2000/02/18 15:30:36  shavirin
319 * Added parameter query_dna_mask into options and parameters.
320 *
321 * Revision 6.80  2000/02/17 21:23:09  shavirin
322 * Added parameter is_rps_blast.
323 *
324 * Revision 6.79  2000/02/17 19:00:44  shavirin
325 * Removed theCacheSize parameter from everywhere.
326 *
327 * Revision 6.78  2000/02/15 19:06:09  shavirin
328 * Added parameter filter_string into BLAST_ParameterBlk structure.
329 *
330 * Revision 6.77  2000/02/02 18:21:51  madden
331 * Add LinkHelpStruct definition
332 *
333 * Revision 6.76  2000/02/02 16:52:43  dondosha
334 * Added option one_line_results to BLAST_OptionsBlk and BLAST_ParameterBlk
335 *
336 * Revision 6.75  2000/02/01 18:02:22  dondosha
337 * Added greedy alignment option to BLAST_OptionsBlk and query context offsets array to BlastSearchBlk
338 *
339 * Revision 6.74  2000/01/26 22:00:52  madden
340 * Added subject_index field to SWResults
341 *
342 * Revision 6.73  2000/01/20 19:12:00  madden
343 * Change BLAST version and date
344 *
345 * Revision 6.72  2000/01/13 18:10:43  madden
346 * Fix problem with incorrect stat values for blastn and missing hits
347 *
348 * Revision 6.71  2000/01/11 17:02:48  shavirin
349 * Added element theCacheSize into BLAST_OptionsBlk and BLAST_ParameterBlk.
350 *
351 * Revision 6.70  1999/12/31 14:23:19  egorov
352 * Add support for using mixture of real and maks database with gi-list files:
353 * 1. Change logic of creating rdfp list.
354 * 2. BlastGetDbChunk gets real databases first, then masks.
355 * 3. Propoper calculation of database sizes using alias files.
356 * 4. Change to CommonIndex to support using of mask databases.
357 * 5. Use correct gis in formated output (BlastGetAllowedGis()).
358 * 6. Other small changes
359 *
360 * Revision 6.69  1999/12/21 20:04:15  egorov
361 * gi_list now contains start position for corresponding database
362 *
363 * Revision 6.68  1999/11/30 18:23:08  shavirin
364 * Added parameter max_num_patterns to the BLAST_OptionsBlkPtr structure
365 *
366 * Revision 6.67  1999/11/15 22:03:31  madden
367 * added Boolean isFirstAlignment to SWResults
368 *
369 * Revision 6.66  1999/11/12 20:57:39  shavirin
370 * Added parameter use_best_align into BLAST_ParameterBlkPtr
371 *
372 * Revision 6.65  1999/11/12 16:37:30  shavirin
373 * Added new option use_best_align into Blast options.
374 *
375 * Revision 6.64  1999/10/26 20:45:19  madden
376 * Add use_real_db_size option
377 *
378 * Revision 6.63  1999/10/05 17:42:54  shavirin
379 * Removed global variables from blast.c
380 *
381 * Revision 6.62  1999/09/28 20:14:31  madden
382 * Joerg changes to mimize cache misses
383 *
384 * Revision 6.61  1999/08/31 13:42:23  madden
385 * Moved SWResults to blastdef.h from profiles.h
386 *
387 * Revision 6.60  1999/08/27 18:07:33  shavirin
388 * Passed parameter decline_align from top to the engine.
389 *
390 * Revision 6.59  1999/08/26 14:56:49  madden
391 * Raise version and date
392 *
393 * Revision 6.58  1999/08/26 14:55:16  madden
394 * Fixed Int8 problem
395 *
396 * Revision 6.57  1999/08/20 19:47:41  madden
397 * removed version element
398 *
399 * Revision 6.56  1999/08/17 18:37:12  shavirin
400 * Added phi_pattern element into options block.
401 *
402 * Revision 6.55  1999/08/17 14:02:34  madden
403 * add smith_waterman and tweak_parameters fields to Options
404 *
405 * Revision 6.54  1999/05/10 18:47:52  madden
406 * Changed version to 2.0.9
407 *
408 * Revision 6.53  1999/05/08 15:04:24  madden
409 * Changed version and release date
410 *
411 * Revision 6.52  1999/04/23 19:25:01  madden
412 * Fixes a prototype complaint
413 *
414 * Revision 6.51  1999/04/23 16:45:54  madden
415 * call BQ_IncSemaphore as callback
416 *
417 * Revision 6.50  1999/04/22 16:46:13  shavirin
418 * Added semaphore ID to the search_blk structure.
419 *
420 * Revision 6.49  1999/04/01 21:42:47  madden
421 * Fix memory leaks when gi list is used
422 *
423 * Revision 6.48  1999/03/18 21:13:32  egorov
424 * The "output" filed added to search block.  This is VoidPtr and an application can
425 * use it as stream, ASNIO, etc to output blast results.
426 *
427 * Revision 6.47  1999/03/17 16:49:11  madden
428 * Removed comment within comment
429 *
430 * Revision 6.46  1999/02/17 13:23:01  madden
431 * Added hsp_num_max
432 *
433 * Revision 6.45  1999/01/28 16:04:56  madden
434 * do_not_reallocate Boolean for HSPs
435 *
436 * Revision 6.44  1999/01/26 17:56:37  madden
437 * query_id added to HitRange
438 *
439 * Revision 6.43  1999/01/05 13:57:19  madden
440 * Changed version and release date
441 *
442  * Revision 6.42  1998/12/31 18:17:03  madden
443  * Added strand option
444  *
445  * Revision 6.41  1998/12/29 17:45:06  madden
446  * Add do_sum_stats flag
447  *
448  * Revision 6.40  1998/12/21 13:09:53  madden
449  * Changed version and release date
450  *
451  * Revision 6.39  1998/11/04 01:36:05  egorov
452  * Add support for entrez-query and org-name to blast3
453  *
454  * Revision 6.38  1998/09/16 18:58:57  madden
455  * Changed release number and date
456  *
457  * Revision 6.37  1998/09/14 15:11:15  egorov
458  * Add support for Int8 length databases; remove unused variables
459  *
460  * Revision 6.36  1998/07/30 19:00:32  madden
461  * Change to allow search of subset of database
462  *
463  * Revision 6.35  1998/07/28 21:17:59  madden
464  * Added do_not_reevaluate
465  *
466  * Revision 6.34  1998/07/25 14:26:38  madden
467  * Added comments
468  *
469  * Revision 6.33  1998/07/22 12:16:25  madden
470  * Added handle_results
471  *
472  * Revision 6.32  1998/07/21 20:58:04  madden
473  * Changes to allow masking at hash only
474  *
475  * Revision 6.31  1998/07/17 15:39:56  madden
476  * Changes for Effective search space.
477  *
478  * Revision 6.30  1998/07/14 20:17:05  egorov
479  * Add two new parameters (gilist and gifile) to BLAST_OptionsBlk
480  *
481  * Revision 6.29  1998/06/17 18:10:07  madden
482  * Added isPatternSearch to Options
483  *
484  * Revision 6.28  1998/06/12 16:08:49  madden
485  * BlastHitRange stuff
486  *
487  * Revision 6.27  1998/05/28 19:59:16  madden
488  * Added typedef for BLASTHeapStruct
489  *
490  * Revision 6.26  1998/05/17 16:28:43  madden
491  * Allow changes to filter options and cc filtering.
492  *
493  * Revision 6.25  1998/05/05 13:56:38  madden
494  * Raised version to 2.0.5 and changed date
495  *
496  * Revision 6.24  1998/04/24 19:27:05  madden
497  * Added BlastMatrixRescalePtr
498  *
499  * Revision 6.23  1998/04/01 22:47:14  madden
500  * Added query_invalid flag
501  *
502  * Revision 6.22  1998/03/24 15:38:22  madden
503  * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
504  *
505  * Revision 6.21  1998/03/18 14:14:20  madden
506  * Support random access by gi list
507  *
508  * Revision 6.20  1998/03/14 18:29:21  madden
509  * Added BlastSeqIdListPtr
510  *
511  * Revision 6.19  1998/02/26 22:34:37  madden
512  * Changes for 16 bit windows
513  *
514  * Revision 6.18  1998/02/26 19:10:37  madden
515  * Removed elements with BLAST_COLLECT_SPECIAL_STATS defines
516  *
517  * Revision 6.17  1998/02/24 22:46:29  madden
518  * Added perform_culling Boolean and changed release date
519  *
520  * Revision 6.16  1998/02/19 17:17:10  madden
521  * Use of Int4 rather than Int2 when pruning SeqAlign
522  *
523  * Revision 6.15  1998/01/05 16:46:52  madden
524  * One or both strands can be searched, as opposed to only both, changes to number of contexts
525  *
526  * Revision 6.14  1997/12/23 19:14:14  madden
527  * release version to 2.0.4
528  *
529  * Revision 6.13  1997/12/23 18:12:32  madden
530  * Changes for range-dependent blast
531  *
532  * Revision 6.12  1997/12/12 20:38:02  madden
533  * Fix to comments
534  *
535  * Revision 6.11  1997/12/11 22:20:16  madden
536  * Corrected blast_type defines
537  *
538  * Revision 6.10  1997/12/10 22:41:40  madden
539  * program number defines
540  *
541  * Revision 6.9  1997/11/14 21:30:16  madden
542  * Changed version and date
543  *
544  * Revision 6.8  1997/10/26 17:26:59  madden
545  * Changes for range dependent limits
546  *
547  * Revision 6.7  1997/10/01 13:35:28  madden
548  * Changed BLAST_VERSION to BLAST_ENGINE_VERSION
549  *
550  * Revision 6.6  1997/09/22 17:36:24  madden
551  * MACROS for position-specific matrices from Andy Neuwald
552  *
553  * Revision 6.5  1997/09/18 22:22:12  madden
554  * Added prune functions
555  *
556  * Revision 6.4  1997/09/11 18:49:26  madden
557  * Changes to enable searches against multiple databases.
558  *
559  * Revision 6.3  1997/09/10 21:27:57  madden
560  * Changes to set CPU limits
561  *
562  * Revision 6.2  1997/09/03 19:06:35  madden
563  * changed BLAST_VERSION and BLAST_RELEASE_DATE
564  *
565  * Revision 6.1  1997/08/27 14:46:48  madden
566  * Changes to enable multiple DB searches
567  *
568  * Revision 6.0  1997/08/25 18:52:32  madden
569  * Revision changed to 6.0
570  *
571  * Revision 1.63  1997/08/20 21:43:10  madden
572  * Updated release date
573  *
574  * Revision 1.62  1997/07/21 17:37:15  madden
575  * Added define for BLAST_RELEASE_DATE
576  *
577  * Revision 1.61  1997/07/18 20:55:45  madden
578  * Added BLAST_VERSION
579  *
580  * Revision 1.60  1997/07/15 20:36:43  madden
581  * Added ValNodePtr mask
582  *
583  * Revision 1.59  1997/07/14 15:33:00  madden
584  * typedef for BlastErrorMsg
585  *
586  * Revision 1.58  1997/05/22 21:24:52  madden
587  * Added support for final gapX dropoff value
588  *
589  * Revision 1.57  1997/05/20 17:51:33  madden
590  * Added element SeqLocPtr query_slp to BlastSearch
591  *
592  * Revision 1.56  1997/05/06 22:19:35  madden
593  * Added use_large_gaps and subject_length
594  *
595  * Revision 1.55  1997/04/09  20:01:53  madden
596  * Added seqid_list to SearchBlk
597  *
598  * Revision 1.54  1997/04/03  19:48:13  madden
599  * Changes to use effective database length instead of the length of each
600  * sequence in statistical calculations.
601  *
602  * Revision 1.53  1997/03/31  17:07:57  madden
603  * Added BLAST_COLLECT_STATS define.
604  *
605  * Revision 1.52  1997/03/20  22:56:24  madden
606  * Added gap_info to hsp.
607  *
608  * Revision 1.51  1997/03/14  22:06:11  madden
609  * fixed MT bug in BlastReevaluateWithAmbiguities.
610  *
611  * Revision 1.50  1997/03/08  16:52:16  madden
612  * y
613  * Added discontinuous option to ParameterBlk.
614  *
615  * Revision 1.49  1997/02/25  19:17:05  madden
616  * Added discontinuous flag to options.
617  *
618  * Revision 1.48  1997/02/23  16:44:47  madden
619  * GapAlignBlkPtr added to search structure.
620  *
621  * Revision 1.47  1997/02/20  18:38:34  madden
622  * Added Int4 db_length to Options block.
623  *
624  * Revision 1.46  1997/02/18  21:03:00  madden
625  * Added #define FILTER_NONE 0.
626  *
627  * Revision 1.45  1997/02/17  17:40:18  madden
628  * Added seqalign to ResultHitlistptr
629  *
630  * Revision 1.44  1997/02/11  19:30:54  madden
631  * Added program_name to Options.
632  *
633  * Revision 1.43  1997/02/10  20:27:01  madden
634  * Changed some CharPtr's into Uint1Ptr's.
635  *
636  * Revision 1.42  1997/02/10  20:14:23  madden
637  * replaced doubles by Nlm_FloatHi's.
638  *
639  * Revision 1.41  1997/02/10  20:03:58  madden
640  * Added specific to BlastAllWordsPtr.
641  *
642  * Revision 1.40  1997/02/10  15:36:40  madden
643  * added posConverged to the BlastSearchBlk.
644  *
645  * Revision 1.39  1997/02/06  14:27:15  madden
646  * Addition of BlastAllWord structure.
647  *
648  * Revision 1.38  1997/02/03  13:02:12  madden
649  * Added length to BLASTSubjectInfo.
650  *
651  * Revision 1.37  1997/01/17  17:41:44  madden
652  * Added flags for position based BLAST.
653  *
654  * Revision 1.36  1997/01/13  15:37:05  madden
655  * Changed prototypes for star_callback and tick_callback.
656  *
657  * Revision 1.35  1997/01/11  18:22:10  madden
658  * Changes to allow S2 to be set.
659  *
660  * Revision 1.34  1997/01/09  17:44:35  madden
661  * Added "bit_score" to BLASTResultHsp.
662  *
663  * Revision 1.33  1996/12/27  20:44:10  madden
664  * Chnages to require that part of the query be included.
665  *
666  * Revision 1.32  1996/12/23  14:04:44  madden
667  * Added gap_trigger.
668  *
669  * Revision 1.31  1996/12/20  21:11:40  madden
670  * Changes to allow multiple hits runs only.
671  *
672  * Revision 1.30  1996/12/18  14:33:13  madden
673  * Added high_score element.
674  *
675  * Revision 1.29  1996/12/17  17:27:03  madden
676  * Count number of attempted gappings.
677  *
678  * Revision 1.28  1996/12/17  13:47:57  madden
679  * Added star_proc.
680  *
681  * Revision 1.27  1996/12/16  14:35:48  madden
682  * Added gapped_calculation Boolean
683  *
684  * Revision 1.26  1996/12/13  22:00:23  madden
685  * Corrected starting point for gapped extension with traceback.
686  *
687  * Revision 1.25  1996/12/13  18:13:56  madden
688  * Added tick callback functions
689  *
690  * Revision 1.24  1996/12/13  15:09:31  madden
691  * Changes to parameters used for gapped extensions.
692  *
693  * Revision 1.23  1996/12/09  23:24:05  madden
694  * Added parameters to control which sequences get a gapped alignment.
695  *
696  * Revision 1.22  1996/12/08  15:19:59  madden
697  * Added parameters for gapped alignments.
698  *
699  * Revision 1.21  1996/11/27  21:56:57  madden
700  * Removed define for XNU.
701  *
702  * Revision 1.20  1996/11/18  18:07:57  madden
703  * *** empty log message ***
704  *
705  * Revision 1.19  1996/11/18  17:28:13  madden
706  * Added BLAST_SEARCH_ALLOC_TRANS_INFO define.
707  *
708  * Revision 1.18  1996/11/18  15:45:40  madden
709  * Defines for filter type added (by S. Shavirin),.
710  *
711  * Revision 1.17  1996/11/15  17:54:54  madden
712  * Added support for alternate genetic codes for blastx, tblast[nx].
713  *
714  * Revision 1.16  1996/11/13  22:35:18  madden
715  * Added genetic_code and db_genetic_code elements to blastdef.h
716  *
717  * Revision 1.15  1996/11/12  16:21:53  madden
718  * Added context_factor
719  *
720  * Revision 1.14  1996/11/06  22:10:01  madden
721  * translation_buffer changed from CharPtr to Uint1Ptr.
722  *
723  * Revision 1.13  1996/11/04  16:59:43  madden
724  * Added translation_table and translation_table_rc elements
725  * to BlastSearchBlk.
726  *
727  * Revision 1.12  1996/10/03  20:49:29  madden
728  * Added xsum member to HSP_Link structure.
729  * ,.
730  *
731  * Revision 1.11  1996/10/01  21:24:02  madden
732  * Added e2.
733  *
734  * Revision 1.10  1996/09/26  13:02:32  madden
735  * Removed ifdef for BLAST_COLLECT_STATS with counters.
736  *
737  * Revision 1.9  1996/09/12  21:13:46  madden
738  * *** empty log message ***
739  *
740  * Revision 1.8  1996/09/11  22:21:51  madden
741  * *** empty log message ***
742  *
743  * Revision 1.7  1996/09/11  19:14:09  madden
744  * Added BLAST_OptionsBlkPtr structure and use thereof.
745  *
746  * Revision 1.6  1996/08/14  18:16:13  madden
747  * removed frame from Context.
748  *
749  * Revision 1.5  1996/08/14  17:19:02  madden
750  * Added frame to BlastSeqBlkPtr.
751  *
752  * Revision 1.4  1996/08/13  15:26:29  madden
753  * Changes for tblastn.
754  *
755  * Revision 1.3  1996/08/09  22:11:12  madden
756  * Added original_sequence to BlastSequenceBlk.
757  *
758  * Revision 1.2  1996/08/07  14:24:42  madden
759  * Removed include for blast18p.h and objblst2.h
760  *
761  * Revision 1.1  1996/08/05  20:32:18  madden
762  * Initial revision
763  *
764  * Revision 1.51  1996/08/02  14:20:06  madden
765  * Removed multiproc strucutre.
766  *
767  * Revision 1.50  1996/07/31  13:09:17  madden
768  * Changes for threaded blast.
769  *
770  * Revision 1.49  1996/07/24  12:01:28  madden
771  * Changes for blastx
772  *
773  * Revision 1.48  1996/07/18  22:00:49  madden
774  * Addition of BLAST_ExtendWordParams structure.
775  *
776  * Revision 1.47  1996/07/18  13:36:34  madden
777  * Addition of the BLASTContextStructPtr.
778  *
779  * Revision 1.46  1996/07/16  14:37:42  madden
780  * Removed _blast_link_structure .
781  *
782  * Revision 1.45  1996/07/11  16:03:58  madden
783  * SaveCurrentHitlist keeps track of which set an HSP belongs to.
784  *
785  * Revision 1.44  1996/07/02  14:33:16  madden
786  * Added hspcnt_max.
787  *
788  * Revision 1.43  1996/07/02  12:04:15  madden
789  * HSP's saved on array, rather than linked list.
790  *
791  * Revision 1.42  1996/06/26  19:38:12  madden
792  * Removed ifdef.
793  *
794  * Revision 1.41  1996/06/24  20:26:46  madden
795  * Added dropoff_1st_pass and dropoff_2nd_pass to ParameterBlkPtr.
796  *
797  * Revision 1.40  1996/06/24  17:58:21  madden
798  * Removed X_set parameter, added right and left dropoff's.
799  *
800  * Revision 1.39  1996/06/20  16:15:57  madden
801  * Replaced int's with Int4's.
802  *
803  * Revision 1.38  1996/06/19  14:19:53  madden
804  * Added define for BLASTSubjectInfoPtr.
805  *
806  * Revision 1.37  1996/06/17  19:03:07  madden
807  * Rmoved unused structure.
808  *
809  * Revision 1.36  1996/06/14  17:58:13  madden
810  * Changes to avoid nulling out arrays for every sequence.
811  *
812  * Revision 1.35  1996/06/13  21:03:06  madden
813  * Added actual_window element to ExtendWord structure.
814  *
815  * Revision 1.34  1996/06/11  17:58:31  madden
816  * Changes to allow shorter arrays for multiple hits type blast.
817  *
818  * Revision 1.33  1996/06/10  16:52:16  madden
819  * Use bit-shifting and masking instead of dividing and remainder.
820  *
821  * Revision 1.32  1996/06/10  13:44:07  madden
822  * Changes to reduce the size of the "already visited" array.
823  *
824  * Revision 1.31  1996/06/06  17:55:16  madden
825  * Added number_of_bits to ParameterBlkPtr.
826  *
827  * Revision 1.30  1996/06/06  13:23:17  madden
828  * Added elements cutoff_big_gap and ignore_small_gaps to ParameterBlkPt.
829  *
830  * Revision 1.29  1996/05/29  12:44:04  madden
831  * Added structure BlastTimeKeeper.
832  *
833  * Revision 1.28  1996/05/28  14:16:32  madden
834  * Added Int4's to collect statistics info.
835  *
836  * Revision 1.27  1996/05/23  21:55:04  madden
837  * Removed unused variable initlen
838  *
839  * Revision 1.26  1996/05/23  21:48:23  madden
840  * Removed unused defines.
841  *
842  * Revision 1.25  1996/05/16  19:51:09  madden
843  * Added documentation block.
844  *
845  * Revision 1.24  1996/05/16  13:29:38  madden
846  * Added defines for contiguous or discontiguous calls.
847  *
848  * Revision 1.23  1996/05/01  15:00:00  madden
849  * Added BlastResults sturcture defs.
850  *
851  * Revision 1.22  1996/04/24  16:17:26  madden
852  * Added new structure, BLAST_Link.
853  *
854  * Revision 1.21  1996/04/24  12:52:48  madden
855  * ID's for sequences simplified.
856  *
857  * Revision 1.20  1996/04/03  19:14:35  madden
858  * Removed defunct HSP ptr's.
859  *
860  * Revision 1.19  1996/03/29  21:27:43  madden
861  * "hitlist" now kept on SeqAlign rather than HitList.
862  *
863  * Revision 1.17  1996/03/27  19:51:53  madden
864  * "current_hitlist" added to Search Structure.
865  *
866  * Revision 1.16  1996/03/26  19:36:59  madden
867  * Added  ReadDBFILEPtr to Search structure.
868  *
869  * Revision 1.15  1996/03/25  16:35:18  madden
870  * Added old_stats.
871  *
872  * Revision 1.14  1996/02/28  21:37:43  madden
873  * Added "trim" variables to segments for HSP.
874  *
875  * Revision 1.13  1996/02/06  22:51:13  madden
876  * Added "prelim" to BlastSearch
877  *
878  * Revision 1.12  1996/02/02  19:25:32  madden
879  * Added wfp_first and wfp_second to BlastParameterBlk for first and second pass.
880  *
881  * Revision 1.11  1996/01/29  21:12:07  madden
882  * *** empty log message ***
883  *
884  * Revision 1.10  1996/01/23  16:31:47  madden
885  * e_cutoff changed from BLAST_Score to double in ParameterBlk.
886  *
887  * Revision 1.9  1996/01/17  17:00:40  madden
888  * Added gap parameters to ParameterBlk, dblen to SearchBlk.
889  *
890  * Revision 1.8  1996/01/17  13:45:58  madden
891  * Added gap_prob and gap_decay_rate to ParameterBlk.
892  *
893  * Revision 1.7  1996/01/11  15:17:36  madden
894  * Added process_num to ParameterBlk.
895  *
896  * Revision 1.6  1996/01/08  23:23:55  madden
897  * removed "len" from HSP.
898  *
899  * Revision 1.5  1996/01/06  18:57:47  madden
900  * Added BLAST_HSP_LINK structure.
901  *
902  * Revision 1.4  1995/12/28  21:26:05  madden
903  * *** empty log message ***
904  *
905  * Revision 1.3  1995/12/26  23:04:14  madden
906  * Added parameters to BlastParameterBlk.
907  *
908  * Revision 1.2  1995/12/21  23:10:41  madden
909  * BLAST_Score prototypes moved to blastkar.h.
910  *
911  * Revision 1.1  1995/12/19  22:33:06  madden
912  * Initial revision
913  *
914  * Revision 1.1  1995/12/08  15:48:23  madden
915  * Initial revision
916  *
917  * */
918 #ifndef __BLASTSTR__
919 #define __BLASTSTR__
920 
921 #include <ncbi.h>
922 #include <lookup.h>
923 #include <blastkar.h>
924 #include <objalign.h>
925 #include <sequtil.h>
926 #include <readdb.h>
927 #include <gapxdrop.h>
928 #include <mbalign.h>
929 
930 #ifdef __cplusplus
931 extern "C" {
932 #endif
933 
934 /* Defines for program numbers. (Translated in BlastGetProgramNumber). */
935 #define blast_type_undefined 0
936 #define blast_type_blastn 1
937 #define blast_type_blastp 2
938 #define blast_type_blastx 3
939 #define blast_type_tblastn 4
940 #define blast_type_tblastx 5
941 #define blast_type_psitblastn 6
942 
943 
944 /* defines for strand_option, determines which strand of query to compare. */
945 #define BLAST_TOP_STRAND 1
946 #define BLAST_BOTTOM_STRAND 2
947 #define BLAST_BOTH_STRAND 3
948 
949 /* Defines that specify whether or not BLAST should delete some memory, or
950         leave it up to the caller.
951 */
952 #define BLAST_OWN 0
953 #define BLAST_NOT_OWN 1
954 
955 /* Specifies minimum search space size for an awak thread. */
956 #define AWAKE_THR_MIN_SIZE 2000000000000.0
957 
958 #ifndef _BLASTCONCAT_
959 #include "blastconcat.h"
960 #endif
961 /* --KM concat */
962 
963 /* Some default values (used when creating blast options block and for
964  * command-line program defaults. When changing these defaults, please
965  * remember to update the defaults in the command-line programs */
966 #define WINDOW_SIZE_PROT 40
967 #define WINDOW_SIZE_NUCL 0
968 #define WINDOW_SIZE_MEGABLAST 0
969 
970 #define WORDSIZE_PROT 3
971 #define WORDSIZE_NUCL 11
972 #define WORDSIZE_MEGABLAST 28
973 
974 /* Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
975  * More gap costs are listed in BLASTOptionSetGapParams */
976 #define GAP_OPEN_PROT 11
977 #define GAP_OPEN_NUCL 5
978 #define GAP_OPEN_MEGABLAST 0
979 
980 #define GAP_EXTN_PROT 1
981 #define GAP_EXTN_NUCL 2
982 #define GAP_EXTN_MEGABLAST 0
983 
984 #define WORD_THRESHOLD_BLASTP 11
985 #define WORD_THRESHOLD_BLASTN 0
986 #define WORD_THRESHOLD_BLASTX 12
987 #define WORD_THRESHOLD_TBLASTN 13
988 #define WORD_THRESHOLD_TBLASTX 13
989 #define WORD_THRESHOLD_MEGABLAST 0
990 
991 #define UNGAPPED_X_DROPOFF_PROT 7
992 #define UNGAPPED_X_DROPOFF_NUCL 20
993 #define UNGAPPED_X_DROPOFF_MEGABLAST 10
994 
995 #define GAP_X_DROPOFF_PROT 15
996 #define GAP_X_DROPOFF_NUCL 30 
997 #define GAP_X_DROPOFF_MEGABLAST 20 
998 #define GAP_X_DROPOFF_TBLASTX 0
999 
1000 #define GAP_X_DROPOFF_FINAL_PROT 25
1001 #define GAP_X_DROPOFF_FINAL_NUCL 50 
1002 #define GAP_X_DROPOFF_FINAL_TBLASTX 0
1003 
1004 /* reward and penalty only apply to blastn/megablast */
1005 #define PENALTY -3
1006 #define REWARD 1
1007 
1008 /********************************************************************
1009 *
1010 *       define for collecting BLAST stats.
1011 *
1012 ***********************************************************************/
1013 
1014 #define BLAST_COLLECT_STATS
1015 
1016 /********************************************************************
1017 *
1018 *       Structure to save timing info. in.  Right now this only
1019 *       works for UNIX.
1020 *
1021 ********************************************************************/
1022 
1023 typedef struct _blast_time_keeper {
1024                 FloatLo user,   /* CPU time in user space of the process. */
1025                         system, /* CPU time used by system. */
1026                         total;  /* total CPU time (i.e., both of the above). */
1027         } BlastTimeKeeper, PNTR BlastTimeKeeperPtr;
1028 
1029 
1030 /***************************************************************************
1031   Macros added by Andy Neuwald in order to allow easy modification of matrices.
1032 ***************************************************************************/
1033 
1034 #define  MtrxScorePosSearch(S,x,y)      ((S)->posMatrix[(x)][(y)])
1035 #define  PtrMtrxScorePosSearch(S,x)     ((S)->posMatrix[(x)])
1036 
1037 /*****
1038 #define  MtrxScorePosSearchi2(S,x,y)    \
1039         ((S)->posMatrix[( (x) %(S)->query_length)][(y)])
1040 #define  PtrMtrxScorePosSearch2(S,x)    \
1041         ((S)->posMatrix[( (x) %(S)->query_length)])
1042 *****/
1043 
1044 /********************************************************************
1045 
1046         Defines for discontiguous word hits on 1st and 2nd pass.
1047 
1048 ********************************************************************/
1049 
1050 #define BLAST_NO_PASS_DISCONTIG 0
1051 #define BLAST_1ST_PASS_DISCONTIG 1
1052 #define BLAST_2ND_PASS_DISCONTIG 2
1053 #define BLAST_BOTH_PASS_DISCONTIG 3
1054 
1055 #define CODON_LENGTH 3  /* three is always the codon length. */
1056 
1057 #define BLAST_SMALL_GAPS 0
1058 #define BLAST_LARGE_GAPS 1
1059 #define MAX_INTRON_LENGTH 4000
1060 #define MAX_DBSEQ_LEN 5000000
1061 
1062 /*********************************************************************
1063     Filter types definitions
1064 *********************************************************************/
1065 
1066 #define FILTER_NONE 0
1067 #define FILTER_DUST 1
1068 #define FILTER_SEG  2
1069 
1070 typedef enum {
1071    MB_WORD_CODING = 0,
1072    MB_WORD_OPTIMAL = 1,
1073    MB_TWO_TEMPLATES = 2
1074 } MBDiscWordType;
1075 
1076 /**********************************************************************
1077         Structure for the blast options (available to user/programmer).
1078         This should be filled in by the "Main" program before blast
1079         is called.
1080 
1081     If changes are made to this structure, corresponding changes should
1082     likely be made to BLAST_WizardOptionsBlk and BLAST_WizardOptionsMask.
1083 ***********************************************************************/
1084 
1085 typedef struct _blast_optionsblk {
1086         Nlm_FloatHi gap_decay_rate,     /* decay rate. */
1087                     gap_prob;   /* Prob of decay. */
1088         Int4        gap_size,   /* Small gap size. */
1089                     window_size,/* Multiple Hits window size (zero for single hit algorithm) */
1090                 threshold_first, /* Threshold for extending hits (preliminary pass), zero if one-pass algorithm is used. */ 
1091                 threshold_second;/* Threshold for extending hits (second pass) */
1092         Nlm_FloatHi     expect_value,   /* Expectation value (E) */
1093                         e2;             /* Expect value for a single HSP */
1094         /* These two scores are zero, unless they've been set, then they set
1095         the expect_value and e2 above. */
1096         Int4            cutoff_s,       /* score corresponds to expect_value above.*/
1097                         cutoff_s2;      /* score corresponds to e2 above. */
1098         Boolean two_pass_method; /* should two passes be used? */
1099         Boolean multiple_hits_only; /* Only the multiple hits alg. used. */
1100         Int4    hitlist_size;   /* How many hits should be returned. */
1101         Nlm_FloatHi number_of_bits; /* Number of bits to initiate 2nd pass (default is used if zero) */
1102         Nlm_FloatHi     dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */
1103                 dropoff_2nd_pass; /* dropoff ("X") used for 2nd pass. */
1104         Int2    number_of_cpus; /* How many CPU's. */
1105         CharPtr matrix;         /* name of matrix to use. */
1106         Boolean old_stats; /* Use old stats (option may disappear later) */
1107         Boolean do_sum_stats;   /* Should sum statistics be used? */
1108         Boolean use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */
1109         Int2    wordsize;       /* size of word used to find hits. */
1110         Int2    penalty, reward; /* penalty and reward, only for blastn */
1111         /* The ID numbers from gc.prt are used for the genetic codes. */
1112         Int4    genetic_code,           /* genetic code for query (blastx, tblastx) */
1113                 db_genetic_code;        /* genetic code for db (tblast[nx]). */
1114         Int4 filter;          /* filter type 0 mean no filter
1115                                  non-zero value indicate filer type */
1116         CharPtr filter_string;  /* String specifying the type of filtering and filter options. */
1117         Boolean         gapped_calculation; /* Is a gapped calc. being done? */
1118         /* The next three are used ONLY for gapped alignments. */
1119         Int4            gap_open,       /* Cost to open a gap (NO extension). */
1120                         gap_extend;     /* Cost to extend a gap one letter. */
1121         Nlm_FloatHi     gap_x_dropoff,  /* X-dropoff (in bits) used by Gapped align routine. */
1122                         gap_x_dropoff_final;    /* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */
1123         Int4            decline_align;  /* Cost for declining alignment */
1124         Nlm_FloatHi     gap_trigger; /* Score (in bits) to gap, if an HSP gaps well. */
1125 
1126         Boolean         discontinuous;  /* Should the SeqAlign be discontinuous.*/
1127         /* What region of the query is required for the alignment.  If start is
1128         zero and end is -1 (the entire query), then these are not checked. */
1129         Int4            required_start,
1130                         required_end;
1131         Int8            db_length;      /* database size used for stat. calcul. */
1132         Int4            dbseq_num;      /* number of database sequences used for stat. calcul. */
1133         Nlm_FloatHi     searchsp_eff;   /* Effective search space to be used. */
1134                         
1135         /* Options for postion based blast. */
1136         Nlm_FloatHi     ethresh;
1137         Int4            maxNumPasses,
1138                         pseudoCountConst;
1139         CharPtr program_name;           /* program name, for reference. */
1140         Int4 cpu_limit; /* timeout total. */
1141         /* Used for region-dependent limits when storing hits. */
1142         Int4    hsp_range_max,          /* maximum hits for a range */
1143                 block_width;            /* width of a block */
1144         Boolean perform_culling;        /* Should results be culled at all? */
1145         Boolean isPatternSearch;        /* Is this a use of PHI-BLAST?*/
1146         CharPtr         gifile;         /* name of file containing list of gis on server */
1147         ValNodePtr      gilist;         /* list of gis specified by client */
1148         Boolean         do_not_reevaluate;      /* Don't perform BlastReevaluateWithAmbiguities. */
1149         /* These options allow a subset of the database to be examined.  IF they
1150                 are set to zero, then the entire database is examined. */
1151         Int4            first_db_seq,           /* 1st sequence in db to be compared. */
1152                         final_db_seq;           /* Final sequence to be compared. */
1153         CharPtr         entrez_query;   /* user specified Entrez query to make selection from databases */
1154         CharPtr         org_name;       /* user specified name of organizm;  corresponding .gil file will be used */
1155         Uint1           strand_option;  /* BLAST_TOP_STRAND, BLAST_BOTTOM_STRAND, or BLAST_BOTH_STRAND.  used by blast[nx] and tblastx */
1156         Int4            hsp_num_max;    /* maximum number of HSP's allowed.  Zero indicates no limit. */
1157         Uint1           tweak_parameters, /* For composition-based statistics. */
1158                         smith_waterman;
1159         Boolean         unified_p;    /* use a combination of alignment and 
1160                                          compositional p-values when evaluating
1161                                          significance; ignored unless 
1162                                          composition-based statisics is on. */
1163         CharPtr         phi_pattern;      /* Pattern for PHI-Blast search */
1164         Boolean         use_real_db_size; /* Use real DB size.  meant for use if a list of gis' is submitted, 
1165                                         but statistics should be based upon the real database. */
1166         Boolean         use_best_align;   /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */
1167         Int4            max_num_patterns; /* Maximum number of patterns to be used in PHI-Blast search */
1168         Boolean         is_megablast_search; /* Is this a MegaBlast search? */
1169         Uint1         no_traceback;    /* No traceback in MegaBLAST extension */
1170         Boolean         is_rps_blast;     /* If this RPS Blast ? */
1171         SeqLocPtr       query_lcase_mask; /* Masking of input DNA regions */
1172         Boolean         sort_gi_list;     /* Should the gi list be sorted? */
1173         Boolean         is_neighboring;   /* Is this a neighboring task? */
1174         Nlm_FloatHi kappa_expect_value;   /* E-value threshold for
1175                                              hits to be saved when
1176                                              RedoAlignmentCore is used
1177                                              to compute final alignments;
1178                                              should equal expect_value for
1179                                              other types of alignment. */
1180         Boolean         explode_seqids; /* make one SeqAlign for every gi on a
1181                                            redundant sequence. */
1182         Boolean         megablast_full_deflines; /* Print full deflines in
1183                                                    megablast one-line output */
1184         Boolean         is_ooframe;  /* Use Out-Of-Frame gapping algorithm */
1185         Int4            shift_pen;   /* Out-Of-Frame shift penalty */
1186         Boolean         gilist_already_calculated; /* translation of gis to ordinalID's already done (used for neighboring). */
1187         Boolean  recoverCheckpoint;  /* For psitblastn */
1188         Boolean  freqCheckpoint;     /* For psitblastn */
1189         CharPtr  CheckpointFileName; /* For psitblastn */
1190         Int4     longest_intron;     /* the length of longest intron for linking HSPs */
1191         FloatLo  perc_identity;      /* Identity percentage cut-off */
1192         VoidPtr  output;             /* Output stream to put results to */
1193         FloatHi scalingFactor;       /* scaling factor used when constructing pssm for rpsblast. */ 
1194         Int4    total_hsp_limit;        /* total number of HSP's that will be processed to SeqAligns, zero means no limit. */
1195         Boolean mb_one_base_step; /* Scan every base of the database */
1196         Int2 mb_template_length;  /* Length of the discontiguous word */
1197         Boolean mb_use_dyn_prog;  /* Use dynamic programming gapped extension in
1198                                      megablast with affine gap scores */ 
1199         MBDiscWordType mb_disc_type;
1200         Uint4 NumQueries;               /*--KM for query concatenation in [t]blastn */
1201         Boolean ignore_gilist;    /* Used in traceback stage to not lookup gi's */
1202       } BLAST_OptionsBlk, PNTR BLAST_OptionsBlkPtr;
1203 
1204 
1205 /*  --------------------------------------------------------------------
1206  *
1207  *  BLAST_WizardOptionsBlk contains those fields of BLAST_OptionsBlk
1208  *  that a user can set.
1209  *
1210  *  BLAST_WizardOptionsMask contains a Boolean for each field defined in
1211  *  BLAST_WizardOptionsBlk, except those holding pointers.  TRUE means
1212  *  that the corresponding field in BLAST_WizardOptionsBlk is set.
1213  *
1214  *  These structures are used only in conjunction with BLAST_Wizard.
1215  *
1216  *  --------------------------------------------------------------------
1217  */
1218 
1219 struct _blast_wizardoptionsblk {
1220     Int4            block_width;
1221     Int4            cutoff_s;
1222     Int4            db_genetic_code;
1223     CharPtr         entrez_query;
1224     Nlm_FloatHi     ethresh;
1225     Nlm_FloatHi     expect_value;
1226     CharPtr         filter_string;
1227     Int4            first_db_seq;
1228     Int4            final_db_seq;
1229     Int4            gap_extend;
1230     Int4            gap_open;
1231     Boolean         gapped_calculation;
1232     Int4            genetic_code;
1233     ValNodePtr      gilist;
1234     Int4            hitlist_size;
1235     Int4            hsp_range_max;
1236     Boolean         is_ooframe;
1237     CharPtr         matrix;
1238     MBDiscWordType  mb_disc_type;
1239     Int2            mb_template_length;
1240     Uint1           no_traceback;
1241     Int2            penalty;
1242     FloatLo         perc_identity;
1243     Boolean         perform_culling;
1244     CharPtr         phi_pattern;
1245     Int4            pseudoCountConst;
1246     SeqLocPtr       query_lcase_mask;
1247     Int4            required_end;
1248     Int4            required_start;
1249     Int2            reward;
1250     Int8            db_length;
1251     Nlm_FloatHi     searchsp_eff;
1252     Boolean         smith_waterman;
1253     Uint1           strand_option;
1254     Int4            threshold_first;
1255     Int4            threshold_second;
1256     Uint1           tweak_parameters;
1257     Boolean         use_best_align;
1258     Boolean         use_real_db_size;
1259     Int4            window_size;
1260     Int2            wordsize;
1261 
1262     Boolean         two_hits;
1263     CharPtr         string_options;
1264 };
1265 
1266 typedef struct _blast_wizardoptionsblk
1267             BLAST_WizardOptionsBlk,
1268     PNTR    BLAST_WizardOptionsBlkPtr;
1269 
1270 struct _blast_wizardoptionsmask {
1271     Boolean         block_width;
1272     Boolean         cutoff_s;
1273     Boolean         db_genetic_code;
1274     Boolean         ethresh;
1275     Boolean         expect_value;
1276     Boolean         first_db_seq;
1277     Boolean         final_db_seq;
1278     Boolean         gap_extend;
1279     Boolean         gap_open;
1280     Boolean         gapped_calculation;
1281     Boolean         genetic_code;
1282     Boolean         hitlist_size;
1283     Boolean         hsp_range_max;
1284     Boolean         is_ooframe;
1285     Boolean         mb_disc_type;
1286     Boolean         mb_template_length;
1287     Boolean         no_traceback;
1288     Boolean         penalty;
1289     Boolean         perc_identity;
1290     Boolean         perform_culling;
1291     Boolean         pseudoCountConst;
1292     Boolean         required_end;
1293     Boolean         required_start;
1294     Boolean         reward;
1295     Boolean         db_length;
1296     Boolean         searchsp_eff;
1297     Boolean         smith_waterman;
1298     Boolean         strand_option;
1299     Boolean         threshold_first;
1300     Boolean         threshold_second;
1301     Boolean         tweak_parameters;
1302     Boolean         use_best_align;
1303     Boolean         use_real_db_size;
1304     Boolean         window_size;
1305     Boolean         wordsize;
1306 
1307     Boolean         two_hits;
1308 };
1309 
1310 typedef struct _blast_wizardoptionsmask
1311             BLAST_WizardOptionsMask,
1312     PNTR    BLAST_WizardOptionsMaskPtr;
1313 
1314 typedef enum {
1315    TEMPL_11_16 = 0,
1316    TEMPL_12_16 = 1,
1317    TEMPL_11_18 = 2,
1318    TEMPL_12_18 = 3,
1319    TEMPL_11_21 = 4,
1320    TEMPL_12_21 = 5,
1321    TEMPL_11_16_OPT = 6,
1322    TEMPL_12_16_OPT = 7,
1323    TEMPL_11_18_OPT = 8,
1324    TEMPL_12_18_OPT = 9,
1325    TEMPL_11_21_OPT = 10,
1326    TEMPL_12_21_OPT = 11,
1327    TEMPL_ERROR = -1
1328 } MBTemplateType;
1329 
1330 typedef struct _mb_parameter_blk_ {
1331    Uint1 no_traceback;    /* No traceback in greedy extension */
1332    Boolean is_neighboring;  /* Is this a neighboring task? */ 
1333    Boolean full_seqids;     /* Print full seqids in tabular output? */
1334    FloatLo perc_identity;   /* Identity percentage cut-off */
1335    Int4    max_positions;   /* Maximal number of positions in query of a given word */
1336    Boolean disc_word;       /* Use a discontiguous word template to find initial 
1337                                matches */
1338    Boolean one_base_step;   /* Form words for every position in the database
1339                                sequence (default is every 4th position) */
1340    Int2    word_weight;     /* Number of identical nucleotides in a word match */
1341    Int2    template_length; /* Length of a discontiguous word template */
1342    Boolean use_dyn_prog;    /* Use dynamic programming extension for affine gap
1343                                scores */
1344    MBTemplateType template_type; /* Type of a discontiguous template */
1345    Boolean use_two_templates;
1346 } MegaBlastParameterBlk, PNTR MegaBlastParameterBlkPtr;
1347 
1348 /****************************************************************************
1349 
1350         PARAMETER BLOCK: parameters for the BLAST search entered by on
1351         command line by user.
1352 
1353 *****************************************************************************/
1354 
1355 typedef struct _blast_parameterblk {
1356         BLAST_Score     threshold,      /* threshold for extending a word hit*/
1357                         threshold_first, /* threshold for 1st pass. */
1358                         threshold_second, /* threshold for 2nd pass. */
1359                         X,              /* drop-off score for extension. */
1360                         dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */
1361                         dropoff_2nd_pass, /* dropoff ("X") used for 2nd pass. */
1362                         cutoff_s,       /* Final Score to report a hit. */
1363                         cutoff_s1,      /* Score to save an HSP after a gapped extension. */
1364                         cutoff_s2,      /* Score to save an HSP after an ungapped extension. */
1365                         cutoff_s_first, /* Score (S2) to use on 1st pass */
1366                         cutoff_s_second, /* Score (S2) to use on 2nd pass and
1367                            for "small" gaps in link_hsps (in blast.c) */
1368         /* Max value of s2, used if s2 is set or s2 becomes larger than s. */
1369                         cutoff_s2_max,  
1370                         cutoff_big_gap; /* cutoff value for a "big" gap in
1371                            link_hsps (in blast.c). */
1372         Nlm_FloatHi     cutoff_e,       /* Expect value to report a hit. */
1373                         cutoff_e2,      /* Expect value to report a hsp. */
1374                         number_of_bits; /* number of bits of significance, used
1375                            to calculate cutoff_s_first (above). */
1376         Boolean         threshold_set, /*TRUE if threshold set on command-line*/
1377                         cutoff_s_set,   /* TRUE if cutoff score set on c-l */
1378                         cutoff_s2_set,  /* TRUE if cutoff score2 set on c-l */
1379                         cutoff_e_set,   /* TRUE if cutoff expect set on c-l */
1380                         cutoff_e2_set,  /* TRUE if cutoff expect2 set on c-l */
1381                         ignore_small_gaps, /* ignore small gaps if TRUE, set by
1382                            CalculateSecondCutoffScore in blast.c if the search 
1383                            space is smalled than 8*gap_size*gap_size. */
1384                         window_size_set;/* TRUE if window size set for MHBLAST*/
1385         Boolean         sump_option;    /* TRUE if sump is used. */
1386         Int4            gap_size,       /* max. gap allowed for small gaps.*/
1387                         window_size;    /* used for multiple hits BLAST. */
1388         Nlm_FloatHi     gap_prob;       /* prob. of gap of size "gap" (above).*/
1389         Nlm_FloatHi     gap_decay_rate; /* prob. of only one HSP */
1390         Int2            process_num;    /* max # processrs permitted (for MP).*/
1391         Boolean         old_stats;      /* Use "old" stats if TRUE. */
1392         Boolean         do_sum_stats;   /* Should sum statistics be used? */
1393         Boolean         use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */
1394         Boolean         two_pass_method; /* should two passes be used? */
1395         Boolean         multiple_hits_only; /* Only the multiple hits alg. used. */
1396         Boolean         discontinuous;  /* Should discontinuous SeqAlign's be produced? */
1397         Boolean         gapped_calculation; /* Is a gapped calc. being done? */
1398         Boolean         do_not_reevaluate;      /* Don't perform BlastReevaluateWithAmbiguities. */
1399         /* The next three are used ONLY for gapped alignments. */
1400         Int4            gap_open,       /* Cost to open a gap (NO extension). */
1401                         gap_extend,     /* Cost to extend a gap one letter. */
1402                         gap_x_dropoff,  /* X-dropoff used by Gapped align routine. */
1403                         gap_x_dropoff_final;    /* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */
1404         Int4            decline_align;  /* Cost for declining alignment */
1405 
1406         Nlm_FloatHi     gap_trigger; /* Score (in bits) to gap, if an HSP gaps well.*/
1407 
1408         /* Options for postion based blast. */
1409         Nlm_FloatHi     ethresh;
1410         Int4            maxNumPasses,
1411                         pseudoCountConst;
1412         Int4 cpu_limit; /* timeout total. */
1413         Int4    hsp_range_max,          /* maximum hits for a range */
1414                 max_pieces;             /* Max number of pieces allowed (query_length/block_width) */
1415         Boolean perform_culling;        /* determines whether culling should be used or not.
1416                                         If not, then hsp_range_max, block_width, and max_pieces are ignored. */
1417         /* These options allow a subset of the database to be examined.  IF they
1418                 are set to zero, then the entire database is examined. */
1419         Int4            first_db_seq,           /* 1st sequence in db to be compared. */
1420                         final_db_seq;           /* Final sequence to be compared. */
1421         Int4            hsp_num_max;    /* maximum number of HSP's allowed.  Zero indicates no limit. */
1422         Boolean   use_best_align;   /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */
1423         MegaBlastParameterBlkPtr mb_params;  /* Is this a MegaBlast search? */
1424         CharPtr filter_string;  /* String specifying the type of filtering and filter options. - used with Translated RPS Blast */
1425         Boolean is_rps_blast;      /* If this RPS Blast ? */
1426         SeqLocPtr  query_lcase_mask; /* Masking of input DNA regions */
1427         Boolean         explode_seqids; /* make one SeqAlign for every gi on a
1428                                            redundant sequence. */
1429         Boolean         is_ooframe;  /* Use Out-Of-Frame gapping algorithm */
1430         Int4            shift_pen;  /* Out-Of-Frame shift penalty */
1431         Int4    longest_intron;     /* the length of longest intron for linking HSPs */
1432         FloatHi scalingFactor;       /* scaling factor used when constructing pssm for rpsblast. */ 
1433         Int4    total_hsp_limit;        /* total number of HSP's that will be processed to SeqAligns, zero means no limit. */
1434         } BLAST_ParameterBlk, PNTR BLAST_ParameterBlkPtr;
1435 
1436 typedef Nlm_Int4        BLAST_Diag, PNTR BLAST_DiagPtr;
1437 
1438 /* Structure to keep track of the last hit and diag level. */
1439 
1440 typedef struct cfj_mod_struct{
1441     Int4 last_hit;
1442     Int4 diag_level;
1443 } CfjModStruct;
1444 /*
1445         BLAST_ExtendWord contains information about which diagonals
1446         have been extended over (i.e., which diagonals have been 
1447         tested).  This structure will be duplicated once for each
1448         context as every context is different.
1449 */
1450 typedef struct _blast_extend_word {
1451                 Int4Ptr _buffer; /* The "real" buffer for diag_level, version,
1452                                 and last_hit arrays. */
1453                 CfjModStruct *combo_array;
1454                 Int4Ptr version; /* still needed?? */
1455                 Int4    actual_window; /* The actual window used if the multiple
1456                                 hits method was used and a hit was found. */    
1457         } BLAST_ExtendWord, PNTR BLAST_ExtendWordPtr;
1458 
1459 /*
1460         BLAST_ExtendWordParams contains parameters about the extensions.
1461         Only one copy of this structure is needed, regardless of how many
1462         contexts there are.
1463 */
1464 typedef struct _blast_extend_word_params {
1465                 Int4    bits_to_shift; /* how many bits should the diagonal be
1466                                 shifted to get the "version" */
1467                 Int4    min_diag_length, /* Min. length of diagonal, actuall
1468                                 2**bits_to_shift. */
1469                         min_diag_mask; /* Used to mask off everything above
1470                                 min_diag_length (mask = min_diag_length-1). */
1471                 Int4    offset; /* "offset" added to query and subject position
1472                                 so that "diag_level" and "last_hit" don't have
1473                                 to be zeroed out every time. */
1474                 Int4    window; /* The "window" size, within which two (or more)
1475                                 hits must be found in order to be extended. */
1476                 /* Used by BLAST_ExtendWordNew to decide whether or not
1477                 to prepare the structure for multiple-hit type searches.
1478                 If TRUE, multiple hits are not neccessary, but possible. */
1479                 Boolean multiple_hits;  
1480         } BLAST_ExtendWordParams, PNTR BLAST_ExtendWordParamsPtr;
1481 /*
1482         Data block to describe a single sequence.
1483 */
1484 
1485 typedef struct blast_sequence_block {
1486         Uint1Ptr        sequence,       /* Actual (perhaps transl.) sequence. */
1487                 sequence_start; /* Start of sequence, used if the sequence is preceded by a NULLB.  Sequences
1488                                 starting with a NULLB are used by BlastWordExtend_L1. */
1489         Int4    length,         /* length of sequence. */
1490                 original_length,/* length before translation. */
1491                 effective_length;/* effective length, used only by query. */
1492         Int2 frame;             /* frame of the sequence. */
1493 } BlastSequenceBlk, PNTR BlastSequenceBlkPtr;
1494 
1495 
1496 typedef struct _blast_seg {
1497                 Int2            frame;
1498                 Int4            offset; /* start of hsp */
1499                 Int4            length; /* length of hsp */
1500                 Int4            end;    /* end of HSP */
1501                 Int4            offset_trim;    /* start of trimmed hsp */
1502                 Int4            end_trim;       /* end of trimmed HSP */
1503                 /* Where the gapped extension (with X-dropoff) started. */
1504                 Int4            gapped_start;
1505         } BLAST_Seg, PNTR BLAST_SegPtr;
1506 
1507 #define BLAST_NUMBER_OF_ORDERING_METHODS 2
1508 
1509 
1510 /*
1511         The following structure is used in "link_hsps" to decide between
1512         two different "gapping" models.  Here link is used to hook up
1513         a chain of HSP's (this is a VoidPtr as _blast_hsp is not yet
1514         defined), num is the number of links, and sum is the sum score.
1515         Once the best gapping model has been found, this information is
1516         transferred up to the BLAST_HSP.  This structure should not be
1517         used outside of the function link_hsps.
1518 */
1519 typedef struct _blast_hsp_link {
1520                 /* Used to order the HSP's (i.e., hook-up w/o overlapping). */ 
1521         VoidPtr link[BLAST_NUMBER_OF_ORDERING_METHODS]; 
1522                 /* number of HSP in the ordering. */
1523         Int2    num[BLAST_NUMBER_OF_ORDERING_METHODS];
1524                 /* Sum-Score of HSP. */
1525         Int4    sum[BLAST_NUMBER_OF_ORDERING_METHODS]; 
1526                 /* Sum-Score of HSP, multiplied by the appropriate Lambda. */
1527         Nlm_FloatHi     xsum[BLAST_NUMBER_OF_ORDERING_METHODS]; 
1528         Int4 changed;
1529         } BLAST_HSP_LINK, PNTR BLAST_HSP_LINKPtr;
1530 /*
1531         BLAST_NUMBER_OF_ORDERING_METHODS tells how many methods are used
1532         to "order" the HSP's.
1533 */
1534 
1535 typedef struct _blast_hsp {
1536                 struct _blast_hsp PNTR next, /* the next HSP */
1537                                   PNTR prev; /* the previous one. */
1538                 BLAST_HSP_LINK  hsp_link;
1539 /* Is this HSp part of a linked set? */
1540                 Boolean         linked_set;
1541 /* which method (max or no max for gaps) was used? */
1542                 Int2            ordering_method; 
1543 /* how many HSP's make up this (sum) segment */
1544                 Int4            num;
1545 /* normalized score of a set of "linked" HSP's */
1546         Nlm_FloatHi xsum;
1547                 /* If TRUE this HSP starts a chain along the "link" pointer. */
1548                 Boolean         start_of_chain;
1549                 BLAST_Score     score;
1550                 Int4            num_ident;
1551                 Nlm_FloatHi     evalue;
1552                 BLAST_Seg query,        /* query sequence info. */
1553                         subject;        /* subject sequence info. */
1554                 Int2            context;        /* Context number of query */
1555                 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */
1556                 Int4 num_ref;
1557                 Int4 linked_to;
1558 /*which method if any was used for compositional adjustment?
1559   relevant only for blastp*/
1560                 Int2            comp_adjustment_method; 
1561         } BLAST_HSP, PNTR BLAST_HSPPtr;
1562 
1563 /* The helper arrays contains the info used frequently in the inner for loops. -cfj
1564  * One array of helpers will be allocated for each thread. See comments preceding
1565  * link_hsps in blast.c for more info.
1566  */
1567 
1568 typedef struct link_help_struct{
1569   BLAST_HSPPtr ptr;
1570   Int4 q_off_trim;
1571   Int4 s_off_trim;
1572   Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS];
1573   Int4 maxsum1;
1574   Int4 next_larger;
1575 } LinkHelpStruct;
1576 
1577 /* Orders information for HSP accesses. */
1578 typedef struct hsp_helper{
1579         Int4    qoffset,
1580                 qend;
1581 } BLAST_HSP_helper, PNTR BLAST_HSP_helperPtr;
1582                 
1583 
1584 typedef struct _exact_match {
1585    Int4 q_off;
1586    Int4 s_off;
1587 } MegaBlastExactMatch, PNTR MegaBlastExactMatchPtr;
1588 
1589 typedef struct _blast_hitlist {
1590         struct _blast_hitlist   PNTR next;
1591         BLAST_HSPPtr PNTR       hsp_array; /* head of linked list of HSPs */
1592         Int4            hspmax, /* max no. of HSPs allowed per hit list */
1593                         hspcnt, /* no. of HSPs in hit list */
1594                         hspcnt_max; /* no. of HSPs in hitlist, before reaping */
1595         Boolean         further_process; /* This sequence has been found interesting,
1596                                             it should be further processed by a gapped
1597                                             alignment etc. */
1598         Boolean         do_not_reallocate; /* Don't reallocate the HSP's, probably because
1599                                    there is no more memory for this. */
1600                 /* added -cfj */
1601         LinkHelpStruct *lh_helper;
1602         Int4 lh_helper_size;
1603         MegaBlastExactMatchPtr exact_match_array; /* Array to hold initial
1604                                                           exact match hits */
1605         Int4 exact_match_max;
1606         } BLAST_HitList, PNTR BLAST_HitListPtr;
1607 
1608 /*
1609         The next two structures are the final output produced by BLAST.  Formatters should then
1610         convert the data into SeqAligns or the BLAST ASN.1 spec.  
1611 */
1612 
1613 typedef struct _blast_results_hsp {
1614                 Int2            ordering_method;/* determines whether large or small gap was used. */
1615                 Int4            number; /* number of HSP's used to calculate the p-value. */
1616                 BLAST_Score     score;  /* score of this HSP. */
1617                 Nlm_FloatHi     e_value,/* expect value of this set of HSP's. */
1618                                 bit_score; /* above score * lambda/ln2 */
1619                 Int4            num_ident;/* number of identities in this HSP. */
1620                 Int2            context;        /* context number of query. */
1621                 Int2            query_frame, /* frame of query, non-zero if transl. */
1622                                 subject_frame; /* frame of subject, non-zero if transl. */
1623                 Int4            query_offset,   /* Start of the query HSP. */
1624                                 query_length,   /* Length of the query HSP. */
1625                                 subject_offset, /* Start of the subject HSP. */
1626                                 subject_length, /* Length of the subject HSP.*/
1627                                 hspset_cnt;     /* which set of HSP's? */
1628         /* Starting points (on original HSP) for a gapped extension with X dropoff. */
1629                 Int4            query_gapped_start,
1630                                 subject_gapped_start;
1631 
1632                 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */
1633                 struct _blast_result_hitlist PNTR point_back;
1634                 struct _blast_heap_struct PNTR back_left, PNTR back_right;
1635                 } BLASTResultHsp, PNTR BLASTResultHspPtr;
1636 
1637 /*
1638         The following structure contains the subject info, if the readdb
1639         facility is not being used.  Then the subject information is
1640         kept here.  Otherwise this structure is NULL.
1641 */
1642 typedef struct _blast_subject_info {
1643                 SeqIdPtr sip;   /* ID of the subject. */
1644                 CharPtr defline; /* Defline of the subject. */
1645                 Int4 length;    /* untranslated length of the database sequence. */
1646                 } BLASTSubjectInfo, PNTR BLASTSubjectInfoPtr;
1647 
1648 typedef struct _blast_result_hitlist {
1649                 BLASTResultHspPtr hsp_array;    /* An array holding the HSP's. */
1650                 Nlm_FloatHi     best_evalue;    /* best evalue in all the HSP's. */
1651                 Int4    high_score;     /* HSP with highest score. */
1652                 Int4    hspcnt,         /* Number of HSP's. */
1653                         subject_id;     /* ID of the subject. */
1654                 Int2    db_id;          /* ID (0,1,2...) of the db if multiple db's searched. */
1655                 Int4    subject_length; /* length of the database sequence. */
1656                 BLASTSubjectInfoPtr subject_info; /* Subject info if the readdb facility is not being used. */
1657                 SeqAlignPtr seqalign; /* alignment, if this a gapped calculation. */
1658                 Int4 num_ref;
1659                 } BLASTResultHitlist, PNTR BLASTResultHitlistPtr;
1660 
1661 
1662 typedef struct _blast_heap_struct {
1663   Int4 cutvalue;        /* start of a region? */
1664   BLASTResultHspPtr PNTR heap;
1665   Int4 num_in_heap;     /* Number in 'heap' */
1666   Int4 num_of_ref;
1667   struct _blast_heap_struct PNTR next, PNTR prev;
1668 } BLASTHeapStruct, PNTR BLASTHeapPtr;
1669 
1670 /*
1671         Holds the results already saved.
1672 */
1673 
1674 typedef struct _blast_results_struct {
1675 
1676                 BLASTResultHitlistPtr PNTR results;
1677                 Int4    hitlist_count,  /* Number of hitlists saved on results array already. */
1678                         hitlist_max,    /* Length of results array. */
1679                         max_pieces;     /* For range-dependent limits. */
1680                 BLASTResultHspPtr **heap;
1681                 Int4 *num_in_heap;
1682                 BLASTHeapPtr heap_ptr;
1683                 } BLASTResultsStruct, PNTR BLASTResultsStructPtr;
1684 
1685 /*
1686         Holds the data for all possible words that might be used by BLAST.
1687 */
1688 
1689 typedef struct _blast_all_words {
1690                 Uint1Ptr *array,        /* All the possible words */
1691                          array_storage; /* Storage for the words in array. */
1692                 Int4    num_of_cols, 
1693                         wordsize;
1694                 Boolean rows_allocated, /* are the rows (of length the wordsize) alloc.*/
1695                         specific;       /* specific (limited) words are to be indexed. */
1696         } BlastAllWord, *BlastAllWordPtr;
1697                 
1698 /*
1699         Contains gi and ordinal number for use by random access BLAST.
1700 */
1701 typedef struct _double_int4 {
1702         Int4    gi,
1703                 ordinal_id,
1704                 start;
1705 } BlastDoubleInt4, *BlastDoubleInt4Ptr;
1706 
1707 
1708 typedef struct _blast_gi_list {
1709     BlastDoubleInt4Ptr gi_list; /* List of gi's. */
1710     BlastDoubleInt4Ptr *gi_list_pointer;        /* Pointer to above list. */
1711     Int4 current;              /* Current position in gi list. */
1712     Int4 total;                /* total number of gi's. */
1713     Boolean gilist_not_owned; /* do not delete gilist at end. */
1714 } BlastGiList, *BlastGiListPtr;
1715         
1716 /*
1717         used for keeping start and stop of hits to query, for ALU filtering.
1718 */
1719 typedef struct _blast_hit_range {
1720         BlastDoubleInt4Ptr      range_list;        /* ranges. */
1721         BlastDoubleInt4Ptr      *range_list_pointer;       /* Pointer to above list. */
1722         Int4            current,        /* current position in list. */
1723                         total;          /* total number in list. */
1724         SeqIdPtr        query_id;       /* ID to be put on SeqLoc's that are produced. */
1725         Int4            base_offset;    /* used if a SeqLoc is searched and it does not start at begining
1726                                         of sequence. */
1727         } BlastHitRange, *BlastHitRangePtr;
1728 
1729 /*
1730         Contains BLAST error messages.
1731 */
1732 
1733 typedef struct _blast_error_msg {
1734     Uint2 level;/* corresponds to levels of ErrPostEx [none(0), info(1), warn(2), error(3) and fatal(4)] */
1735     CharPtr msg;
1736 } BlastErrorMsg, *BlastErrorMsgPtr;
1737 
1738 /*
1739   Holds data for each "context" (which is generally equal to
1740   one frame of the query).  blastx would have six contexts,
1741   blastp would have one.
1742   */
1743 
1744 typedef struct _blast_context_structure {
1745     Boolean query_allocated;/* The BlastSequenceBlkPtr IS allocated. */
1746     BlastSequenceBlkPtr query;  /* query sequence. */
1747     BLAST_ExtendWordPtr ewp;/* keep track of diagonal etc. for each frame */
1748     ValNodePtr location;    /* Where to start/stop masking. */
1749 } BLASTContextStruct, PNTR BLASTContextStructPtr;
1750     
1751     /* Structure used for full Smith-Waterman results. */
1752     
1753 typedef struct SWResults {
1754     Uint1Ptr seq;
1755     Int4 seqStart;
1756     Int4 seqEnd;
1757     Int4 queryStart;
1758     Int4 queryEnd;
1759     Int4 *reverseAlignScript;
1760     BLAST_Score score;
1761     BLAST_Score scoreThisAlign;
1762     Nlm_FloatHi eValue;
1763     Nlm_FloatHi eValueThisAlign;
1764     Nlm_FloatHi Lambda;
1765     Nlm_FloatHi logK;
1766     SeqIdPtr subject_id;  /*used to display the sequence in alignment*/
1767     struct SWResults *next;
1768     Boolean isFirstAlignment;
1769     Int4 subject_index;  /* needed to break ties on rare occasions */
1770     SeqAlignPtr seqAlign; /*needed when seqAlign is already computed*/
1771 } SWResults;
1772     
1773 /* Average sizes of protein and nucl. sequences. */
1774 #define BLAST_AA_AVGLEN 300
1775 #define BLAST_NT_AVGLEN 1000
1776 
1777 /* How many ticks should be emitted total. */
1778 #define BLAST_NTICKS 50
1779 
1780 /* period of sending out a star/message. */
1781 #define STAR_MSG_PERIOD 60
1782 
1783 typedef struct _BlastThrInfo {
1784 
1785     TNlmMutex db_mutex;  /*lock for access to database*/
1786     TNlmMutex results_mutex; /*lock for storing results */
1787     TNlmMutex callback_mutex; /*lock for issuing update ticks on the screen*/
1788     /* Mutex for recalculation of ambiguities, in BlastReevaluateWithAmbiguities */
1789     TNlmMutex ambiguities_mutex;
1790 
1791     /*
1792       GI List to be used if database will be searched by GI.
1793       current is the current element in the array being worked on.
1794       global_gi_being_used specifies that it will be used.
1795       */
1796     Int4 gi_current;
1797     BlastGiListPtr blast_gi_list;
1798     
1799     /* Number of database sequences for each thread to process. */
1800     Int4 db_chunk_size;
1801 
1802     /* The last db sequence to be assigned.  Used only in get_db_chunk after
1803        the acquisition of the "db_mutex" (above). */
1804     Int4 db_chunk_last;
1805 
1806     /* the last sequence in the database to be compared against. */
1807     Int4 final_db_seq;
1808     Int4 number_seqs_done;  /*number of sequences already tested*/
1809     Int4 db_incr;  /*size of a database chunk to get*/
1810     Int4 last_db_seq;
1811 
1812     /* How many positive hits were found (set by ReapHitlist, read by tick_proc
1813        and star_proc). */
1814     Int4 number_of_pos_hits;
1815 
1816     /* Use by star_proc to determine whether to emit a star. */
1817     time_t last_tick;
1818     
1819     /* tells star_proc to check that a star should be emitted. */
1820     TNlmThread awake_thr;
1821     Boolean awake;
1822     
1823     /* tells index_proc to check that a message should be emitted. */
1824     TNlmThread index_thr;
1825     Boolean awake_index;
1826     
1827     /*
1828       Callback functions to indicate progress, or lack thereof.
1829       */
1830     int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives));
1831     int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives));
1832     int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives));
1833 
1834     /* whether real databases are done */
1835     Boolean     realdb_done;
1836 
1837 } BlastThrInfo, PNTR BlastThrInfoPtr;
1838     
1839 /*
1840         Structure used for matrix rescaling. 
1841 */
1842 
1843 typedef struct _blast_matrix_rescale {
1844         Int4            alphabet_size,
1845                         query_length;   /* length of query. */
1846         Uint1Ptr        query;
1847         Nlm_FloatHi     *standardProb;
1848         Int4Ptr         *matrix;
1849         Int4Ptr         *private_matrix;
1850         BLAST_KarlinBlkPtr      *kbp_std, 
1851                                 *kbp_psi, 
1852                                 *kbp_gap_std, 
1853                                 *kbp_gap_psi;
1854         Nlm_FloatHi     lambda_ideal,
1855                         K_ideal;
1856 } BlastMatrixRescale, *BlastMatrixRescalePtr;
1857         
1858                 
1859 /*
1860         The central structure for the BLAST search.  This structure
1861         should contain data (or pointers to data) for all the
1862         information in a BLAST search.
1863 */
1864 
1865 
1866 #define BLAST_SEARCH_ALLOC_QUERY 1
1867 #define BLAST_SEARCH_ALLOC_SUBJECT 2
1868 #define BLAST_SEARCH_ALLOC_PBP 4
1869 #define BLAST_SEARCH_ALLOC_SBP 8
1870 #define BLAST_SEARCH_ALLOC_WFP_FIRST 16
1871 #define BLAST_SEARCH_ALLOC_WFP_SECOND 32
1872 #define BLAST_SEARCH_ALLOC_EWPPARAMS 64
1873 #define BLAST_SEARCH_ALLOC_CONTEXT 128
1874 #define BLAST_SEARCH_ALLOC_RESULTS 256
1875 #define BLAST_SEARCH_ALLOC_READDB 512
1876 #define BLAST_SEARCH_ALLOC_TRANS_INFO 1024
1877 #define BLAST_SEARCH_ALLOC_ALL_WORDS 2048
1878 #define BLAST_SEARCH_ALLOC_QUERY_SLP 4096
1879 #define BLAST_SEARCH_ALLOC_THRINFO 8192
1880 #define BLAST_SEARCH_ALLOC_MASK1 16384
1881 
1882 typedef struct blast_search_block {
1883     Int4 allocated; 
1884 /* bit fields specify which structures from below are allocated.  If 
1885 a field is allocated, then it's bit is non-zero. 
1886 
1887                 structure               bit-field (define)
1888                 -----------------------------------------
1889                 query                   BLAST_SEARCH_ALLOC_QUERY
1890                 subject                 BLAST_SEARCH_ALLOC_SUBJECT
1891                 pbp                     BLAST_SEARCH_ALLOC_PBP
1892                 sbp                     BLAST_SEARCH_ALLOC_SBP
1893                 wfp_first               BLAST_SEARCH_ALLOC_WFP_FIRST
1894                 wfp_second              BLAST_SEARCH_ALLOC_WFP_SECOND
1895                 ewp_params              BLAST_SEARCH_ALLOC_EWPPARAMS
1896                 context                 BLAST_SEARCH_ALLOC_CONTEXT
1897                 result_struct           BLAST_SEARCH_ALLOC_RESULTS
1898                 rdfp                    BLAST_SEARCH_ALLOC_READDB
1899                 translation_table       BLAST_SEARCH_ALLOC_TRANS_INFO
1900                 translation_table_rc
1901                 all_words               BLAST_SEARCH_ALLOC_ALL_WORDS
1902                 query_slp               BLAST_SEARCH_ALLOC_QUERY_SLP
1903                 mask1                   BLAST_SEARCH_ALLOC_MASK1
1904 */
1905 
1906 /*
1907   Specifies whether the search is position based or not.
1908   */
1909     Boolean positionBased;
1910     Boolean posConverged;
1911     /*
1912       Specifies that the query sequence was invalid (e.g., XXXXXXXXXXXXXXXXXXXXXX).
1913       */
1914     Boolean query_invalid;
1915     /* Specifies that the search timed out (i.e., cpu time limit was reached). */
1916     Boolean timed_out;
1917     /*
1918       The BLASTContextStructPtr is an array and each element contains
1919       information about the query sequence and the frame number.
1920       If there are six frames (e.g., blastx) then the BLASTContextStructPtr
1921       is six elements long; if there's one frame (e.g., blastp) then
1922       BLASTContextStructPtr is one element long.
1923       
1924       number_of_contexts states how long the context array is.
1925       */        
1926     BLASTContextStructPtr context;
1927     Int2        first_context,
1928         last_context;
1929     /* 
1930        The GapAlignBlkPtr used by ALIGN (in gapxdrop.c) for gapped alignments.
1931        */
1932     
1933     GapAlignBlkPtr gap_align;
1934     
1935     /*
1936       All the possible words.
1937       */
1938     BlastAllWordPtr all_words;
1939     /*
1940         Set the context_factor, which specifies how many different 
1941         ways the query or db is examined (e.g., blastn looks at both
1942         stands of query, context_factor is 2).
1943         */
1944     Int2 context_factor;
1945     
1946     /*
1947       What type of search (e.g., blastp, blastx, etc.)?
1948       */
1949     CharPtr prog_name;
1950     Uint1 prog_number;
1951     /*
1952       translation_table and translation_table_rc holds the translation
1953       from ncbi2na to ncbistdaa for normal and reverse-complement
1954       translations.  Only used and initialized with tblast[nx].
1955       Initialized by GetPrivatTranslationTable
1956       */
1957     Uint1Ptr translation_table,
1958         translation_table_rc;
1959     
1960     /*
1961       ValNodePtr containing error messages. 
1962       */
1963     ValNodePtr error_return;
1964     
1965     /*
1966       ValNodePtr containing masking SeqLocPtr's
1967       */
1968     ValNodePtr mask;
1969     ValNodePtr mask1;
1970     /*
1971       What genetic codes are we using to translate the query or database
1972       when needed.  Based upon NCBI genetic codes.
1973       */
1974     CharPtr genetic_code,               /* genetic code used for query. */
1975         db_genetic_code;        /* genetic code used for database. */
1976     
1977     /*  
1978         The BlastSequenceBlk's subject hold info about the subject.  
1979         Info about the original sequence is in original_seq.  This will
1980         be NULL if the sequence was not translated. 
1981         */
1982     Uint1Ptr translation_buffer;        /* Buffer for (tblast[nx]) db translations*/
1983     Int4 translation_buffer_size;       /* size of translation_buffer. */
1984     CharPtr original_seq;       /* Original (i.e.,  untransl.) sequence. */
1985     BlastSequenceBlkPtr subject;/* subject sequence. */
1986     
1987 
1988     /* KM-- info about individual queries from a concatenated query in
1989        blastn or tblastn */
1990     struct queries PNTR mult_queries;   /* struct defined in blastconcat.h */ 
1991 
1992 
1993     /*
1994       SeqLocPtr for the query, owned by the called and not by BLAST.
1995       */
1996     SeqLocPtr query_slp;
1997     
1998     /* Id's for the query and subject. */
1999     SeqIdPtr            query_id;       /* ID for the query, any form. */
2000     Int4                        subject_id;     /* the number of the subject, in the DB. */
2001     BLAST_ParameterBlkPtr pbp;  /* options selected. */
2002     BLAST_ScoreBlkPtr sbp;              /* info on scoring. */
2003     BLAST_ExtendWordParamsPtr ewp_params; /* parameters for extensions.*/
2004     
2005     /*  For the two-pass method two BLAST_WordFinderPtr's are required.
2006         The actual wfp's are in wfp_first and wfp_second.  "wfp" is just
2007         a pointer to one of those two.  If they have been allocated (at all)
2008         is signified by setting the bit-fields above. 
2009         */
2010     BLAST_WordFinderPtr     wfp,        /* find initial words. */
2011         wfp_first, /* words for first pass. */
2012         wfp_second;/* words for second pass. */
2013     /*  For the two-pass this should be set to TRUE on the first (preliminary)
2014         pass and FALSE on the second pass.
2015         */
2016     Boolean                     prelim;
2017 /*
2018   The "current" hit, that is the one being worked on right now.  
2019   If a hitlist is deemed significant, then "current_hitlist" is 
2020   moved to "seqalign".  current_hitlist_purge specifies 
2021   whether the hitlist should be purged after each call to a
2022   WordFinder; it will generally be purged except for non-initial
2023   frames of tblast[nx].
2024   */
2025     Boolean                     current_hitlist_purge;
2026     BLAST_HitListPtr    current_hitlist;
2027 
2028     BlastSequenceBlkPtr PNTR query_dnap; /* query DNAP sequence. */
2029 
2030     /*
2031       The worst evalue seen by this thread so far.
2032       Only filled in if the hitlist is already full, otherwise
2033       it should be DBL_MAX.
2034       */
2035     Nlm_FloatHi worst_evalue;
2036     /*
2037       Size of the HSP array on the "current_hitlist"
2038       */
2039     Int4 hsp_array_size;
2040     /*
2041       Contains hits that are significant. 
2042       */
2043     Int4                        result_size;
2044     BLASTResultsStructPtr       result_struct;
2045     
2046     Int8                        dblen;  /* total length of the database. */
2047     Int8                    dblen_eff;      /* effective length of the database. */
2048     Int8                    dblen_eff_real;      /* effective length of the database. */
2049     Int4                    dbseq_num;      /* number of sequences in the database. */
2050     Int4                    length_adjustment; /* amount removed from end of query and db sequences. */
2051     Nlm_FloatHi         searchsp_eff;   /* Effective search space (used for statistics). */
2052     Int4            rps_qlen; /* original query sequence length (RPS-BLAST only) */
2053     ReadDBFILEPtr               rdfp, /* I/O PTR for database files. */
2054         rdfp_list;      /* linked rdfp list of all databases. */
2055     /* The subject info (id and defline) is kept here for the current sequence
2056        if the readdb facility is not used.  This structure should only
2057        be used if rdfp is NULL.
2058        */
2059     BLASTSubjectInfoPtr subject_info;
2060     
2061     /* Data used in threads - previously global variables */
2062 
2063     BlastThrInfoPtr thr_info;
2064     
2065     /*
2066       start and stop of query that must be included for an alignment
2067       to be counted.  The Boolean whole_query specifies whether these
2068       are valid (i.e., have been set) or not.
2069       */
2070     Boolean whole_query;
2071     Int4 required_start, required_end;
2072     
2073     /*
2074       Callback functions to indicate progress, or lack thereof.
2075 */
2076     /* int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives)); */
2077     /* int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives)); */
2078     /*
2079       Callback function to handle results (e.g., print them out for neighboring)
2080       in place of BlastSaveCurrentHitlist.
2081       */
2082     int (LIBCALLBACK *handle_results)PROTO((VoidPtr search));
2083     /*  
2084         Output stream to put results to
2085         */
2086     VoidPtr             output;
2087     /*
2088       These "counters" keep track of how often certain operations
2089       were performed.
2090       
2091       This counting is performed only if BLAST_COLLECT_STATS is defined.
2092       */
2093     Int8        first_pass_hits,        /* no. of hits on 1st pass. */
2094         second_pass_hits,       /* no. of hits on 2nd pass. */
2095         second_pass_trys,       /* no. of seqs that made it to 2nd pass. */
2096         first_pass_extends,     /* no. extended on 1st pass. */
2097         second_pass_extends,    /* no. extended on 2nd pass. */
2098         first_pass_good_extends,/* no. successfully extended on 1st pass. */
2099         second_pass_good_extends,/* no. successfully extended on 2nd pass. */
2100         number_of_seqs_better_E,/* how many sequences were better than E. */
2101         prelim_gap_no_contest,  /* No. of HSP's under E=10 alone. */
2102         prelim_gap_passed,      /* No. of HSP's that passed prelim gapping. */
2103         prelim_gap_attempts,    /* No. of HSP's we attempted to gap. */
2104         real_gap_number_of_hsps, /* How many HSP's were gapped in BlastGetGappedScore. */
2105         semid;                  /* Here will be stored ID of load-ballance semaphore */
2106     GreedyAlignMemPtr abmp; /* Memory for megablast greedy extension */
2107     Int4 PNTR query_context_offsets; /* offsets for all queries and strands in a 
2108                                         concatenated sequence */
2109     SeqIdPtr PNTR qid_array; /* Ids of all queries in Mega BLAST search */
2110     BLASTResultsStructPtr PNTR mb_result_struct; /* one result struct per query
2111                                                     for Mega BLAST */
2112     ValNodePtr mb_endpoint_results; /* Points to linked list of results  */
2113 } BlastSearchBlk, PNTR BlastSearchBlkPtr;
2114     
2115 typedef struct _blast_hsp_segment {
2116    Int4 q_start, q_end;
2117    Int4 s_start, s_end;
2118    struct _blast_hsp_segment PNTR next;
2119 } BLASTHSPSegment, PNTR BLASTHSPSegmentPtr;
2120 
2121 #ifdef __cplusplus
2122 }
2123 #endif
2124 #endif /* !__BLASTSTR__ */
2125 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.