|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/tools/blastdef.h |
source navigation diff markup identifier search freetext search file search |
1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================*/
24 /*****************************************************************************
25
26 File name: blastdef.h
27
28 Author: Tom Madden
29
30 Contents: #defines and definitions for structures used by BLAST.
31
32 ******************************************************************************/
33 /* $Revision: 6.169 $
34 * $Log: blastdef.h,v $
35 * Revision 6.169 2007/03/13 20:39:58 madden
36 * - Change the type of the dropoff_1st_pass, dropoff_2nd_pass,
37 * gap_x_dropoff, and gap_x_dropoff_final fields of the
38 * BLAST_OptionsBlk struct to Nlm_FloatHi.
39 * [from Mike Gertz]
40 *
41 * Revision 6.168 2006/05/03 14:41:50 madden
42 * Added a Boolean field "unified_p" to the BLAST_OptionsBlk
43 * structure. The field indicates whether to use a combination of
44 * alignment and compositional p-values when evaluating significance;
45 * the field is ignored unless composition-based statistics is on.
46 * (from Mike Gertz).
47 *
48 * Revision 6.167 2006/03/21 22:35:27 camacho
49 * Add support for setting database length in BLAST_WizardOptions{Blk,Mask}
50 *
51 * Revision 6.166 2006/01/24 18:38:15 papadopo
52 * from Mike Gertz: Remove #define'd constants that specify the composition adjustment mode. These have been replaced by an enum in the composition adjustment library
53 *
54 * Revision 6.165 2005/08/31 20:32:47 coulouri
55 * From Mike Gertz:
56 * - Added the kappa_expect_value field to the
57 * BLAST_OptionsBlk datatype. This new field holds the cutoff value
58 * used by RedoAlignmentCore; it should equal expect_value if
59 * RedoAlignmentCore will not be called.
60 * - removed the now unused original_expect_value field of the
61 * BLAST_OptionsBlk datatype.
62 *
63 * Revision 6.164 2005/07/27 15:51:54 coulouri
64 * remove unused queue_callback
65 *
66 * Revision 6.163 2005/05/16 17:43:29 papadopo
67 * From Alejandro Schaffer: Added support for compositional score
68 * matrix adjustment
69 *
70 * Revision 6.162 2005/04/25 14:16:36 coulouri
71 * set db_chunk_size adaptively
72 *
73 * Revision 6.161 2005/01/10 18:52:29 coulouri
74 * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
75 *
76 * Revision 6.160 2004/11/19 13:22:05 madden
77 * Remove no_check_score completely (from Mike Gertz)
78 *
79 * Revision 6.159 2004/09/28 16:02:29 papadopo
80 * From Michael Gertz: Changed the "sumscore" field of an HSP to
81 * "xsum" to represent a normalized sum score of linked HSPs;
82 * the normalized score is more appropriate/useful in doing linking.
83 *
84 * Revision 6.158 2004/06/30 12:29:00 madden
85 * Removed typedef for BlastPruneSapStruct and some defines, moved to blfmtutl.h
86 *
87 * Revision 6.157 2004/04/30 12:45:45 coulouri
88 * bump version to 2.2.9
89 *
90 * Revision 6.156 2004/04/13 21:02:52 madden
91 * Add ignore_gilist Boolean to Options for use in formatting
92 *
93 * Revision 6.155 2004/02/04 15:35:03 camacho
94 * Rollback to fix problems in release 2.2.7
95 *
96 * Revision 6.154 2004/01/27 20:46:06 dondosha
97 * Allow values 0, 1, 2 for no_traceback megablast option
98 *
99 * Revision 6.153 2004/01/05 22:09:26 madden
100 * Put back dashes in date
101 *
102 * Revision 6.152 2004/01/02 13:44:32 coulouri
103 * Revert to hardcoded BLAST_RELEASE_DATE
104 *
105 * Revision 6.151 2003/12/29 15:51:18 coulouri
106 * Bump version, use __DATE__ instead of hardcoded date
107 *
108 * Revision 6.150 2003/11/06 19:52:13 dondosha
109 * Added error MBTemplateType, so it can be returned when wordsize/template length combination is wrong
110 *
111 * Revision 6.149 2003/10/02 19:30:11 madden
112 * add field seAlign to SWResults for use in kappa.c
113 *
114 * Revision 6.148 2003/04/09 14:18:34 madden
115 * Update version and release-date
116 *
117 * Revision 6.147 2003/03/26 15:45:48 boemker
118 * Documented relationships among BLAST_OptionsBlk, BLAST_WizardOptionsBlk,
119 * and BLAST_WizardOptionsMask.
120 *
121 * Revision 6.146 2003/03/25 22:23:06 boemker
122 * Replaced cutoff_s2, which isn't used, with cutoff_s, which is.
123 * Added query_lcase_mask.
124 *
125 * Revision 6.145 2003/03/25 19:58:18 boemker
126 * Moved code to initialize search options from blastcgicmd.cpp to here, as
127 * BLAST_Wizard et al.
128 *
129 * Revision 6.144 2003/03/24 19:42:14 madden
130 * Changes to support query concatenation for blastn and tblastn
131 *
132 * Revision 6.143 2002/11/22 23:28:43 dondosha
133 * Use array of structures instead of array of pointers for initial offset pairs
134 *
135 * Revision 6.142 2002/11/16 17:12:55 madden
136 * Change version and date
137 *
138 * Revision 6.141 2002/11/04 22:51:13 dondosha
139 * Changed FloatHi pvalue to Int4 num_ident in HSP structures
140 *
141 * Revision 6.140 2002/09/13 19:11:02 camacho
142 * Added rps_qlen field
143 *
144 * Revision 6.139 2002/09/11 21:15:23 camacho
145 * Removed obsolete #define and comment about BlastSeqIdList structure
146 *
147 * Revision 6.138 2002/09/11 20:46:25 camacho
148 * Removed deprecated BlastSeqIdListPtr code
149 *
150 * Revision 6.137 2002/08/26 15:49:51 madden
151 * Change release date and version
152 *
153 * Revision 6.136 2002/08/09 19:39:20 camacho
154 * Added constants for some blast search parameters
155 *
156 * Revision 6.135 2002/06/21 21:43:01 camacho
157 * Removed obsolete BlastSeqIdList structure and functions
158 *
159 * Revision 6.134 2002/05/17 21:40:13 dondosha
160 * Added 2 optimal Mega BLAST word templates for length 21
161 *
162 * Revision 6.133 2002/05/14 22:20:20 dondosha
163 * Renamed maximal discontiguous template type into optimal
164 *
165 * Revision 6.132 2002/04/23 20:59:53 madden
166 * Change version and date for release
167 *
168 * Revision 6.131 2002/04/09 18:16:43 dondosha
169 * Added more options/parameters for megablast
170 *
171 * Revision 6.130 2002/03/28 18:53:18 madden
172 * Add ValNodePtr mask1 to BlastSearch structure
173 *
174 * Revision 6.129 2001/12/28 20:38:39 dondosha
175 * Moved Mega BLAST related parameters into a separate structure
176 *
177 * Revision 6.128 2001/12/28 18:01:26 dondosha
178 * Added field scoreThisAlign to SWResults to allow more tie-breaking options
179 *
180 * Revision 6.127 2001/12/14 22:05:40 madden
181 * Changed version and release date
182 *
183 * Revision 6.126 2001/09/11 14:28:31 madden
184 * Added timed_out Boolean to SearchBlk
185 *
186 * Revision 6.125 2001/09/07 14:46:44 dondosha
187 * Roll back removal of threshold_first from functions and structures
188 *
189 * Revision 6.124 2001/09/06 20:24:34 dondosha
190 * Removed threshold_first
191 *
192 * Revision 6.123 2001/08/06 12:50:51 madden
193 * Change release date
194 *
195 * Revision 6.122 2001/07/12 19:50:24 madden
196 * Changed release date
197 *
198 * Revision 6.121 2001/06/28 13:42:09 madden
199 * Fixes to prevent overflow on number of hits reporting
200 *
201 * Revision 6.120 2001/06/12 19:48:56 madden
202 * Introduce total_hsp_limit, check before making SeqAlign
203 *
204 * Revision 6.119 2001/04/13 20:56:08 madden
205 * Updated version to 2.2.1, changed date
206 *
207 * Revision 6.118 2001/04/11 20:56:21 madden
208 * Added scalingFactor for rpsblast, changed release date
209 *
210 * Revision 6.117 2001/03/30 21:58:18 madden
211 * Change release date and version
212 *
213 * Revision 6.116 2001/03/27 21:27:01 madden
214 * Minor efficiency in how lookup table is made
215 *
216 * Revision 6.115 2001/03/19 18:52:57 madden
217 * Add base_offset element to structure for BlastHitRange
218 *
219 * Revision 6.114 2001/02/07 21:05:33 dondosha
220 * Added an output stream to BlastOptionsBlk
221 *
222 * Revision 6.113 2000/12/21 22:28:17 dondosha
223 * Added option and parameter for percent identity cutoff
224 *
225 * Revision 6.112 2000/11/29 16:17:56 dondosha
226 * Added a definition of small structure BLASTHSPSegment
227 *
228 * Revision 6.111 2000/11/14 18:14:00 madden
229 * release date to Nov-13-2000
230 *
231 * Revision 6.110 2000/11/08 22:18:05 dondosha
232 * Added longest_intron integer option and parameter
233 *
234 * Revision 6.109 2000/11/07 16:30:25 madden
235 * Introduce intermediate score (before linking of HSPs) for blastx and tblastn
236 *
237 * Revision 6.108 2000/11/03 20:16:24 dondosha
238 * Changed one_line_results option and parameter to more meaningful no_traceback
239 *
240 * Revision 6.107 2000/11/01 16:25:56 madden
241 * Changes from Futamura for psitblastn
242 *
243 * Revision 6.106 2000/10/18 19:53:19 shavirin
244 * Empty log message.
245 *
246 * Revision 6.105 2000/10/18 19:17:56 shavirin
247 * Changed BLAST_ENGINE_VERSION and BLAST_RELEASE_DATE
248 *
249 * Revision 6.104 2000/10/05 19:50:49 dondosha
250 * Added mb_result_struct to the BlastSearchBlk to be used instead of result_struct in Mega BLAST
251 *
252 * Revision 6.103 2000/09/28 14:48:20 dondosha
253 * Added exact_match_array to hitlist structure for megablast initial hits
254 *
255 * Revision 6.102 2000/09/21 19:16:30 madden
256 * increase AWAKE_THR_MIN_SIZE by 100
257 *
258 * Revision 6.101 2000/08/29 19:35:49 madden
259 * Add gilist_not_owned to blast_gi_list
260 *
261 * Revision 6.100 2000/08/08 20:37:21 madden
262 * increase version number to 2.1.1 and release date
263 *
264 * Revision 6.99 2000/07/17 14:05:22 shavirin
265 * Added parameter Out-Of-Frame shift penalty and query DNAP sequence
266 *
267 * Revision 6.98 2000/07/11 18:38:02 madden
268 * decreased size of helper array, added prefetch to BlastGappedScoreInternal
269 *
270 * Revision 6.97 2000/07/11 17:16:20 shavirin
271 * Added new parameter is_ooframe for Out-Of-Frame gapping algorithm.
272 *
273 * Revision 6.96 2000/07/10 15:41:28 madden
274 * Add typedef for BLAST_HSP_helper
275 *
276 * Revision 6.95 2000/07/07 21:20:07 vakatov
277 * Get all "#include" out of the 'extern "C" { }' scope!
278 *
279 * Revision 6.94 2000/07/06 17:24:55 dondosha
280 * Added option and parameter megablast_full_deflines
281 *
282 * Revision 6.93 2000/06/30 17:52:45 madden
283 * Move AWAKE_THR_MIN_SIZE to blastdef.h
284 *
285 * Revision 6.92 2000/06/29 20:30:03 madden
286 * Update version and date
287 *
288 * Revision 6.91 2000/06/08 20:34:18 madden
289 * add explode_seqids option to show all ids in a defline
290 *
291 * Revision 6.90 2000/05/26 20:04:57 madden
292 * Raise version and date
293 *
294 * Revision 6.89 2000/05/12 19:40:59 dondosha
295 * Added qid_array element to BlastSearchBlk
296 *
297 * Revision 6.88 2000/05/01 19:04:31 shavirin
298 * Changed parameter level in BlastErrorMsg structure from Uint1 to Uint2.
299 *
300 * Revision 6.87 2000/04/21 20:48:05 madden
301 * Change version and date
302 *
303 * Revision 6.86 2000/04/06 14:47:10 madden
304 * Added original_expect_value
305 *
306 * Revision 6.85 2000/04/03 21:20:03 dondosha
307 * Added option and parameter is_neighboring
308 *
309 * Revision 6.84 2000/03/31 19:10:44 dondosha
310 * Changed some names related to MegaBlast
311 *
312 * Revision 6.83 2000/03/13 21:01:24 dondosha
313 * Added boolean option sort_gi_list to options block structure
314 *
315 * Revision 6.82 2000/02/29 18:17:23 shavirin
316 * Variable query_dna_mask changed to query_lcase_mask.
317 *
318 * Revision 6.81 2000/02/18 15:30:36 shavirin
319 * Added parameter query_dna_mask into options and parameters.
320 *
321 * Revision 6.80 2000/02/17 21:23:09 shavirin
322 * Added parameter is_rps_blast.
323 *
324 * Revision 6.79 2000/02/17 19:00:44 shavirin
325 * Removed theCacheSize parameter from everywhere.
326 *
327 * Revision 6.78 2000/02/15 19:06:09 shavirin
328 * Added parameter filter_string into BLAST_ParameterBlk structure.
329 *
330 * Revision 6.77 2000/02/02 18:21:51 madden
331 * Add LinkHelpStruct definition
332 *
333 * Revision 6.76 2000/02/02 16:52:43 dondosha
334 * Added option one_line_results to BLAST_OptionsBlk and BLAST_ParameterBlk
335 *
336 * Revision 6.75 2000/02/01 18:02:22 dondosha
337 * Added greedy alignment option to BLAST_OptionsBlk and query context offsets array to BlastSearchBlk
338 *
339 * Revision 6.74 2000/01/26 22:00:52 madden
340 * Added subject_index field to SWResults
341 *
342 * Revision 6.73 2000/01/20 19:12:00 madden
343 * Change BLAST version and date
344 *
345 * Revision 6.72 2000/01/13 18:10:43 madden
346 * Fix problem with incorrect stat values for blastn and missing hits
347 *
348 * Revision 6.71 2000/01/11 17:02:48 shavirin
349 * Added element theCacheSize into BLAST_OptionsBlk and BLAST_ParameterBlk.
350 *
351 * Revision 6.70 1999/12/31 14:23:19 egorov
352 * Add support for using mixture of real and maks database with gi-list files:
353 * 1. Change logic of creating rdfp list.
354 * 2. BlastGetDbChunk gets real databases first, then masks.
355 * 3. Propoper calculation of database sizes using alias files.
356 * 4. Change to CommonIndex to support using of mask databases.
357 * 5. Use correct gis in formated output (BlastGetAllowedGis()).
358 * 6. Other small changes
359 *
360 * Revision 6.69 1999/12/21 20:04:15 egorov
361 * gi_list now contains start position for corresponding database
362 *
363 * Revision 6.68 1999/11/30 18:23:08 shavirin
364 * Added parameter max_num_patterns to the BLAST_OptionsBlkPtr structure
365 *
366 * Revision 6.67 1999/11/15 22:03:31 madden
367 * added Boolean isFirstAlignment to SWResults
368 *
369 * Revision 6.66 1999/11/12 20:57:39 shavirin
370 * Added parameter use_best_align into BLAST_ParameterBlkPtr
371 *
372 * Revision 6.65 1999/11/12 16:37:30 shavirin
373 * Added new option use_best_align into Blast options.
374 *
375 * Revision 6.64 1999/10/26 20:45:19 madden
376 * Add use_real_db_size option
377 *
378 * Revision 6.63 1999/10/05 17:42:54 shavirin
379 * Removed global variables from blast.c
380 *
381 * Revision 6.62 1999/09/28 20:14:31 madden
382 * Joerg changes to mimize cache misses
383 *
384 * Revision 6.61 1999/08/31 13:42:23 madden
385 * Moved SWResults to blastdef.h from profiles.h
386 *
387 * Revision 6.60 1999/08/27 18:07:33 shavirin
388 * Passed parameter decline_align from top to the engine.
389 *
390 * Revision 6.59 1999/08/26 14:56:49 madden
391 * Raise version and date
392 *
393 * Revision 6.58 1999/08/26 14:55:16 madden
394 * Fixed Int8 problem
395 *
396 * Revision 6.57 1999/08/20 19:47:41 madden
397 * removed version element
398 *
399 * Revision 6.56 1999/08/17 18:37:12 shavirin
400 * Added phi_pattern element into options block.
401 *
402 * Revision 6.55 1999/08/17 14:02:34 madden
403 * add smith_waterman and tweak_parameters fields to Options
404 *
405 * Revision 6.54 1999/05/10 18:47:52 madden
406 * Changed version to 2.0.9
407 *
408 * Revision 6.53 1999/05/08 15:04:24 madden
409 * Changed version and release date
410 *
411 * Revision 6.52 1999/04/23 19:25:01 madden
412 * Fixes a prototype complaint
413 *
414 * Revision 6.51 1999/04/23 16:45:54 madden
415 * call BQ_IncSemaphore as callback
416 *
417 * Revision 6.50 1999/04/22 16:46:13 shavirin
418 * Added semaphore ID to the search_blk structure.
419 *
420 * Revision 6.49 1999/04/01 21:42:47 madden
421 * Fix memory leaks when gi list is used
422 *
423 * Revision 6.48 1999/03/18 21:13:32 egorov
424 * The "output" filed added to search block. This is VoidPtr and an application can
425 * use it as stream, ASNIO, etc to output blast results.
426 *
427 * Revision 6.47 1999/03/17 16:49:11 madden
428 * Removed comment within comment
429 *
430 * Revision 6.46 1999/02/17 13:23:01 madden
431 * Added hsp_num_max
432 *
433 * Revision 6.45 1999/01/28 16:04:56 madden
434 * do_not_reallocate Boolean for HSPs
435 *
436 * Revision 6.44 1999/01/26 17:56:37 madden
437 * query_id added to HitRange
438 *
439 * Revision 6.43 1999/01/05 13:57:19 madden
440 * Changed version and release date
441 *
442 * Revision 6.42 1998/12/31 18:17:03 madden
443 * Added strand option
444 *
445 * Revision 6.41 1998/12/29 17:45:06 madden
446 * Add do_sum_stats flag
447 *
448 * Revision 6.40 1998/12/21 13:09:53 madden
449 * Changed version and release date
450 *
451 * Revision 6.39 1998/11/04 01:36:05 egorov
452 * Add support for entrez-query and org-name to blast3
453 *
454 * Revision 6.38 1998/09/16 18:58:57 madden
455 * Changed release number and date
456 *
457 * Revision 6.37 1998/09/14 15:11:15 egorov
458 * Add support for Int8 length databases; remove unused variables
459 *
460 * Revision 6.36 1998/07/30 19:00:32 madden
461 * Change to allow search of subset of database
462 *
463 * Revision 6.35 1998/07/28 21:17:59 madden
464 * Added do_not_reevaluate
465 *
466 * Revision 6.34 1998/07/25 14:26:38 madden
467 * Added comments
468 *
469 * Revision 6.33 1998/07/22 12:16:25 madden
470 * Added handle_results
471 *
472 * Revision 6.32 1998/07/21 20:58:04 madden
473 * Changes to allow masking at hash only
474 *
475 * Revision 6.31 1998/07/17 15:39:56 madden
476 * Changes for Effective search space.
477 *
478 * Revision 6.30 1998/07/14 20:17:05 egorov
479 * Add two new parameters (gilist and gifile) to BLAST_OptionsBlk
480 *
481 * Revision 6.29 1998/06/17 18:10:07 madden
482 * Added isPatternSearch to Options
483 *
484 * Revision 6.28 1998/06/12 16:08:49 madden
485 * BlastHitRange stuff
486 *
487 * Revision 6.27 1998/05/28 19:59:16 madden
488 * Added typedef for BLASTHeapStruct
489 *
490 * Revision 6.26 1998/05/17 16:28:43 madden
491 * Allow changes to filter options and cc filtering.
492 *
493 * Revision 6.25 1998/05/05 13:56:38 madden
494 * Raised version to 2.0.5 and changed date
495 *
496 * Revision 6.24 1998/04/24 19:27:05 madden
497 * Added BlastMatrixRescalePtr
498 *
499 * Revision 6.23 1998/04/01 22:47:14 madden
500 * Added query_invalid flag
501 *
502 * Revision 6.22 1998/03/24 15:38:22 madden
503 * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
504 *
505 * Revision 6.21 1998/03/18 14:14:20 madden
506 * Support random access by gi list
507 *
508 * Revision 6.20 1998/03/14 18:29:21 madden
509 * Added BlastSeqIdListPtr
510 *
511 * Revision 6.19 1998/02/26 22:34:37 madden
512 * Changes for 16 bit windows
513 *
514 * Revision 6.18 1998/02/26 19:10:37 madden
515 * Removed elements with BLAST_COLLECT_SPECIAL_STATS defines
516 *
517 * Revision 6.17 1998/02/24 22:46:29 madden
518 * Added perform_culling Boolean and changed release date
519 *
520 * Revision 6.16 1998/02/19 17:17:10 madden
521 * Use of Int4 rather than Int2 when pruning SeqAlign
522 *
523 * Revision 6.15 1998/01/05 16:46:52 madden
524 * One or both strands can be searched, as opposed to only both, changes to number of contexts
525 *
526 * Revision 6.14 1997/12/23 19:14:14 madden
527 * release version to 2.0.4
528 *
529 * Revision 6.13 1997/12/23 18:12:32 madden
530 * Changes for range-dependent blast
531 *
532 * Revision 6.12 1997/12/12 20:38:02 madden
533 * Fix to comments
534 *
535 * Revision 6.11 1997/12/11 22:20:16 madden
536 * Corrected blast_type defines
537 *
538 * Revision 6.10 1997/12/10 22:41:40 madden
539 * program number defines
540 *
541 * Revision 6.9 1997/11/14 21:30:16 madden
542 * Changed version and date
543 *
544 * Revision 6.8 1997/10/26 17:26:59 madden
545 * Changes for range dependent limits
546 *
547 * Revision 6.7 1997/10/01 13:35:28 madden
548 * Changed BLAST_VERSION to BLAST_ENGINE_VERSION
549 *
550 * Revision 6.6 1997/09/22 17:36:24 madden
551 * MACROS for position-specific matrices from Andy Neuwald
552 *
553 * Revision 6.5 1997/09/18 22:22:12 madden
554 * Added prune functions
555 *
556 * Revision 6.4 1997/09/11 18:49:26 madden
557 * Changes to enable searches against multiple databases.
558 *
559 * Revision 6.3 1997/09/10 21:27:57 madden
560 * Changes to set CPU limits
561 *
562 * Revision 6.2 1997/09/03 19:06:35 madden
563 * changed BLAST_VERSION and BLAST_RELEASE_DATE
564 *
565 * Revision 6.1 1997/08/27 14:46:48 madden
566 * Changes to enable multiple DB searches
567 *
568 * Revision 6.0 1997/08/25 18:52:32 madden
569 * Revision changed to 6.0
570 *
571 * Revision 1.63 1997/08/20 21:43:10 madden
572 * Updated release date
573 *
574 * Revision 1.62 1997/07/21 17:37:15 madden
575 * Added define for BLAST_RELEASE_DATE
576 *
577 * Revision 1.61 1997/07/18 20:55:45 madden
578 * Added BLAST_VERSION
579 *
580 * Revision 1.60 1997/07/15 20:36:43 madden
581 * Added ValNodePtr mask
582 *
583 * Revision 1.59 1997/07/14 15:33:00 madden
584 * typedef for BlastErrorMsg
585 *
586 * Revision 1.58 1997/05/22 21:24:52 madden
587 * Added support for final gapX dropoff value
588 *
589 * Revision 1.57 1997/05/20 17:51:33 madden
590 * Added element SeqLocPtr query_slp to BlastSearch
591 *
592 * Revision 1.56 1997/05/06 22:19:35 madden
593 * Added use_large_gaps and subject_length
594 *
595 * Revision 1.55 1997/04/09 20:01:53 madden
596 * Added seqid_list to SearchBlk
597 *
598 * Revision 1.54 1997/04/03 19:48:13 madden
599 * Changes to use effective database length instead of the length of each
600 * sequence in statistical calculations.
601 *
602 * Revision 1.53 1997/03/31 17:07:57 madden
603 * Added BLAST_COLLECT_STATS define.
604 *
605 * Revision 1.52 1997/03/20 22:56:24 madden
606 * Added gap_info to hsp.
607 *
608 * Revision 1.51 1997/03/14 22:06:11 madden
609 * fixed MT bug in BlastReevaluateWithAmbiguities.
610 *
611 * Revision 1.50 1997/03/08 16:52:16 madden
612 * y
613 * Added discontinuous option to ParameterBlk.
614 *
615 * Revision 1.49 1997/02/25 19:17:05 madden
616 * Added discontinuous flag to options.
617 *
618 * Revision 1.48 1997/02/23 16:44:47 madden
619 * GapAlignBlkPtr added to search structure.
620 *
621 * Revision 1.47 1997/02/20 18:38:34 madden
622 * Added Int4 db_length to Options block.
623 *
624 * Revision 1.46 1997/02/18 21:03:00 madden
625 * Added #define FILTER_NONE 0.
626 *
627 * Revision 1.45 1997/02/17 17:40:18 madden
628 * Added seqalign to ResultHitlistptr
629 *
630 * Revision 1.44 1997/02/11 19:30:54 madden
631 * Added program_name to Options.
632 *
633 * Revision 1.43 1997/02/10 20:27:01 madden
634 * Changed some CharPtr's into Uint1Ptr's.
635 *
636 * Revision 1.42 1997/02/10 20:14:23 madden
637 * replaced doubles by Nlm_FloatHi's.
638 *
639 * Revision 1.41 1997/02/10 20:03:58 madden
640 * Added specific to BlastAllWordsPtr.
641 *
642 * Revision 1.40 1997/02/10 15:36:40 madden
643 * added posConverged to the BlastSearchBlk.
644 *
645 * Revision 1.39 1997/02/06 14:27:15 madden
646 * Addition of BlastAllWord structure.
647 *
648 * Revision 1.38 1997/02/03 13:02:12 madden
649 * Added length to BLASTSubjectInfo.
650 *
651 * Revision 1.37 1997/01/17 17:41:44 madden
652 * Added flags for position based BLAST.
653 *
654 * Revision 1.36 1997/01/13 15:37:05 madden
655 * Changed prototypes for star_callback and tick_callback.
656 *
657 * Revision 1.35 1997/01/11 18:22:10 madden
658 * Changes to allow S2 to be set.
659 *
660 * Revision 1.34 1997/01/09 17:44:35 madden
661 * Added "bit_score" to BLASTResultHsp.
662 *
663 * Revision 1.33 1996/12/27 20:44:10 madden
664 * Chnages to require that part of the query be included.
665 *
666 * Revision 1.32 1996/12/23 14:04:44 madden
667 * Added gap_trigger.
668 *
669 * Revision 1.31 1996/12/20 21:11:40 madden
670 * Changes to allow multiple hits runs only.
671 *
672 * Revision 1.30 1996/12/18 14:33:13 madden
673 * Added high_score element.
674 *
675 * Revision 1.29 1996/12/17 17:27:03 madden
676 * Count number of attempted gappings.
677 *
678 * Revision 1.28 1996/12/17 13:47:57 madden
679 * Added star_proc.
680 *
681 * Revision 1.27 1996/12/16 14:35:48 madden
682 * Added gapped_calculation Boolean
683 *
684 * Revision 1.26 1996/12/13 22:00:23 madden
685 * Corrected starting point for gapped extension with traceback.
686 *
687 * Revision 1.25 1996/12/13 18:13:56 madden
688 * Added tick callback functions
689 *
690 * Revision 1.24 1996/12/13 15:09:31 madden
691 * Changes to parameters used for gapped extensions.
692 *
693 * Revision 1.23 1996/12/09 23:24:05 madden
694 * Added parameters to control which sequences get a gapped alignment.
695 *
696 * Revision 1.22 1996/12/08 15:19:59 madden
697 * Added parameters for gapped alignments.
698 *
699 * Revision 1.21 1996/11/27 21:56:57 madden
700 * Removed define for XNU.
701 *
702 * Revision 1.20 1996/11/18 18:07:57 madden
703 * *** empty log message ***
704 *
705 * Revision 1.19 1996/11/18 17:28:13 madden
706 * Added BLAST_SEARCH_ALLOC_TRANS_INFO define.
707 *
708 * Revision 1.18 1996/11/18 15:45:40 madden
709 * Defines for filter type added (by S. Shavirin),.
710 *
711 * Revision 1.17 1996/11/15 17:54:54 madden
712 * Added support for alternate genetic codes for blastx, tblast[nx].
713 *
714 * Revision 1.16 1996/11/13 22:35:18 madden
715 * Added genetic_code and db_genetic_code elements to blastdef.h
716 *
717 * Revision 1.15 1996/11/12 16:21:53 madden
718 * Added context_factor
719 *
720 * Revision 1.14 1996/11/06 22:10:01 madden
721 * translation_buffer changed from CharPtr to Uint1Ptr.
722 *
723 * Revision 1.13 1996/11/04 16:59:43 madden
724 * Added translation_table and translation_table_rc elements
725 * to BlastSearchBlk.
726 *
727 * Revision 1.12 1996/10/03 20:49:29 madden
728 * Added xsum member to HSP_Link structure.
729 * ,.
730 *
731 * Revision 1.11 1996/10/01 21:24:02 madden
732 * Added e2.
733 *
734 * Revision 1.10 1996/09/26 13:02:32 madden
735 * Removed ifdef for BLAST_COLLECT_STATS with counters.
736 *
737 * Revision 1.9 1996/09/12 21:13:46 madden
738 * *** empty log message ***
739 *
740 * Revision 1.8 1996/09/11 22:21:51 madden
741 * *** empty log message ***
742 *
743 * Revision 1.7 1996/09/11 19:14:09 madden
744 * Added BLAST_OptionsBlkPtr structure and use thereof.
745 *
746 * Revision 1.6 1996/08/14 18:16:13 madden
747 * removed frame from Context.
748 *
749 * Revision 1.5 1996/08/14 17:19:02 madden
750 * Added frame to BlastSeqBlkPtr.
751 *
752 * Revision 1.4 1996/08/13 15:26:29 madden
753 * Changes for tblastn.
754 *
755 * Revision 1.3 1996/08/09 22:11:12 madden
756 * Added original_sequence to BlastSequenceBlk.
757 *
758 * Revision 1.2 1996/08/07 14:24:42 madden
759 * Removed include for blast18p.h and objblst2.h
760 *
761 * Revision 1.1 1996/08/05 20:32:18 madden
762 * Initial revision
763 *
764 * Revision 1.51 1996/08/02 14:20:06 madden
765 * Removed multiproc strucutre.
766 *
767 * Revision 1.50 1996/07/31 13:09:17 madden
768 * Changes for threaded blast.
769 *
770 * Revision 1.49 1996/07/24 12:01:28 madden
771 * Changes for blastx
772 *
773 * Revision 1.48 1996/07/18 22:00:49 madden
774 * Addition of BLAST_ExtendWordParams structure.
775 *
776 * Revision 1.47 1996/07/18 13:36:34 madden
777 * Addition of the BLASTContextStructPtr.
778 *
779 * Revision 1.46 1996/07/16 14:37:42 madden
780 * Removed _blast_link_structure .
781 *
782 * Revision 1.45 1996/07/11 16:03:58 madden
783 * SaveCurrentHitlist keeps track of which set an HSP belongs to.
784 *
785 * Revision 1.44 1996/07/02 14:33:16 madden
786 * Added hspcnt_max.
787 *
788 * Revision 1.43 1996/07/02 12:04:15 madden
789 * HSP's saved on array, rather than linked list.
790 *
791 * Revision 1.42 1996/06/26 19:38:12 madden
792 * Removed ifdef.
793 *
794 * Revision 1.41 1996/06/24 20:26:46 madden
795 * Added dropoff_1st_pass and dropoff_2nd_pass to ParameterBlkPtr.
796 *
797 * Revision 1.40 1996/06/24 17:58:21 madden
798 * Removed X_set parameter, added right and left dropoff's.
799 *
800 * Revision 1.39 1996/06/20 16:15:57 madden
801 * Replaced int's with Int4's.
802 *
803 * Revision 1.38 1996/06/19 14:19:53 madden
804 * Added define for BLASTSubjectInfoPtr.
805 *
806 * Revision 1.37 1996/06/17 19:03:07 madden
807 * Rmoved unused structure.
808 *
809 * Revision 1.36 1996/06/14 17:58:13 madden
810 * Changes to avoid nulling out arrays for every sequence.
811 *
812 * Revision 1.35 1996/06/13 21:03:06 madden
813 * Added actual_window element to ExtendWord structure.
814 *
815 * Revision 1.34 1996/06/11 17:58:31 madden
816 * Changes to allow shorter arrays for multiple hits type blast.
817 *
818 * Revision 1.33 1996/06/10 16:52:16 madden
819 * Use bit-shifting and masking instead of dividing and remainder.
820 *
821 * Revision 1.32 1996/06/10 13:44:07 madden
822 * Changes to reduce the size of the "already visited" array.
823 *
824 * Revision 1.31 1996/06/06 17:55:16 madden
825 * Added number_of_bits to ParameterBlkPtr.
826 *
827 * Revision 1.30 1996/06/06 13:23:17 madden
828 * Added elements cutoff_big_gap and ignore_small_gaps to ParameterBlkPt.
829 *
830 * Revision 1.29 1996/05/29 12:44:04 madden
831 * Added structure BlastTimeKeeper.
832 *
833 * Revision 1.28 1996/05/28 14:16:32 madden
834 * Added Int4's to collect statistics info.
835 *
836 * Revision 1.27 1996/05/23 21:55:04 madden
837 * Removed unused variable initlen
838 *
839 * Revision 1.26 1996/05/23 21:48:23 madden
840 * Removed unused defines.
841 *
842 * Revision 1.25 1996/05/16 19:51:09 madden
843 * Added documentation block.
844 *
845 * Revision 1.24 1996/05/16 13:29:38 madden
846 * Added defines for contiguous or discontiguous calls.
847 *
848 * Revision 1.23 1996/05/01 15:00:00 madden
849 * Added BlastResults sturcture defs.
850 *
851 * Revision 1.22 1996/04/24 16:17:26 madden
852 * Added new structure, BLAST_Link.
853 *
854 * Revision 1.21 1996/04/24 12:52:48 madden
855 * ID's for sequences simplified.
856 *
857 * Revision 1.20 1996/04/03 19:14:35 madden
858 * Removed defunct HSP ptr's.
859 *
860 * Revision 1.19 1996/03/29 21:27:43 madden
861 * "hitlist" now kept on SeqAlign rather than HitList.
862 *
863 * Revision 1.17 1996/03/27 19:51:53 madden
864 * "current_hitlist" added to Search Structure.
865 *
866 * Revision 1.16 1996/03/26 19:36:59 madden
867 * Added ReadDBFILEPtr to Search structure.
868 *
869 * Revision 1.15 1996/03/25 16:35:18 madden
870 * Added old_stats.
871 *
872 * Revision 1.14 1996/02/28 21:37:43 madden
873 * Added "trim" variables to segments for HSP.
874 *
875 * Revision 1.13 1996/02/06 22:51:13 madden
876 * Added "prelim" to BlastSearch
877 *
878 * Revision 1.12 1996/02/02 19:25:32 madden
879 * Added wfp_first and wfp_second to BlastParameterBlk for first and second pass.
880 *
881 * Revision 1.11 1996/01/29 21:12:07 madden
882 * *** empty log message ***
883 *
884 * Revision 1.10 1996/01/23 16:31:47 madden
885 * e_cutoff changed from BLAST_Score to double in ParameterBlk.
886 *
887 * Revision 1.9 1996/01/17 17:00:40 madden
888 * Added gap parameters to ParameterBlk, dblen to SearchBlk.
889 *
890 * Revision 1.8 1996/01/17 13:45:58 madden
891 * Added gap_prob and gap_decay_rate to ParameterBlk.
892 *
893 * Revision 1.7 1996/01/11 15:17:36 madden
894 * Added process_num to ParameterBlk.
895 *
896 * Revision 1.6 1996/01/08 23:23:55 madden
897 * removed "len" from HSP.
898 *
899 * Revision 1.5 1996/01/06 18:57:47 madden
900 * Added BLAST_HSP_LINK structure.
901 *
902 * Revision 1.4 1995/12/28 21:26:05 madden
903 * *** empty log message ***
904 *
905 * Revision 1.3 1995/12/26 23:04:14 madden
906 * Added parameters to BlastParameterBlk.
907 *
908 * Revision 1.2 1995/12/21 23:10:41 madden
909 * BLAST_Score prototypes moved to blastkar.h.
910 *
911 * Revision 1.1 1995/12/19 22:33:06 madden
912 * Initial revision
913 *
914 * Revision 1.1 1995/12/08 15:48:23 madden
915 * Initial revision
916 *
917 * */
918 #ifndef __BLASTSTR__
919 #define __BLASTSTR__
920
921 #include <ncbi.h>
922 #include <lookup.h>
923 #include <blastkar.h>
924 #include <objalign.h>
925 #include <sequtil.h>
926 #include <readdb.h>
927 #include <gapxdrop.h>
928 #include <mbalign.h>
929
930 #ifdef __cplusplus
931 extern "C" {
932 #endif
933
934 /* Defines for program numbers. (Translated in BlastGetProgramNumber). */
935 #define blast_type_undefined 0
936 #define blast_type_blastn 1
937 #define blast_type_blastp 2
938 #define blast_type_blastx 3
939 #define blast_type_tblastn 4
940 #define blast_type_tblastx 5
941 #define blast_type_psitblastn 6
942
943
944 /* defines for strand_option, determines which strand of query to compare. */
945 #define BLAST_TOP_STRAND 1
946 #define BLAST_BOTTOM_STRAND 2
947 #define BLAST_BOTH_STRAND 3
948
949 /* Defines that specify whether or not BLAST should delete some memory, or
950 leave it up to the caller.
951 */
952 #define BLAST_OWN 0
953 #define BLAST_NOT_OWN 1
954
955 /* Specifies minimum search space size for an awak thread. */
956 #define AWAKE_THR_MIN_SIZE 2000000000000.0
957
958 #ifndef _BLASTCONCAT_
959 #include "blastconcat.h"
960 #endif
961 /* --KM concat */
962
963 /* Some default values (used when creating blast options block and for
964 * command-line program defaults. When changing these defaults, please
965 * remember to update the defaults in the command-line programs */
966 #define WINDOW_SIZE_PROT 40
967 #define WINDOW_SIZE_NUCL 0
968 #define WINDOW_SIZE_MEGABLAST 0
969
970 #define WORDSIZE_PROT 3
971 #define WORDSIZE_NUCL 11
972 #define WORDSIZE_MEGABLAST 28
973
974 /* Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
975 * More gap costs are listed in BLASTOptionSetGapParams */
976 #define GAP_OPEN_PROT 11
977 #define GAP_OPEN_NUCL 5
978 #define GAP_OPEN_MEGABLAST 0
979
980 #define GAP_EXTN_PROT 1
981 #define GAP_EXTN_NUCL 2
982 #define GAP_EXTN_MEGABLAST 0
983
984 #define WORD_THRESHOLD_BLASTP 11
985 #define WORD_THRESHOLD_BLASTN 0
986 #define WORD_THRESHOLD_BLASTX 12
987 #define WORD_THRESHOLD_TBLASTN 13
988 #define WORD_THRESHOLD_TBLASTX 13
989 #define WORD_THRESHOLD_MEGABLAST 0
990
991 #define UNGAPPED_X_DROPOFF_PROT 7
992 #define UNGAPPED_X_DROPOFF_NUCL 20
993 #define UNGAPPED_X_DROPOFF_MEGABLAST 10
994
995 #define GAP_X_DROPOFF_PROT 15
996 #define GAP_X_DROPOFF_NUCL 30
997 #define GAP_X_DROPOFF_MEGABLAST 20
998 #define GAP_X_DROPOFF_TBLASTX 0
999
1000 #define GAP_X_DROPOFF_FINAL_PROT 25
1001 #define GAP_X_DROPOFF_FINAL_NUCL 50
1002 #define GAP_X_DROPOFF_FINAL_TBLASTX 0
1003
1004 /* reward and penalty only apply to blastn/megablast */
1005 #define PENALTY -3
1006 #define REWARD 1
1007
1008 /********************************************************************
1009 *
1010 * define for collecting BLAST stats.
1011 *
1012 ***********************************************************************/
1013
1014 #define BLAST_COLLECT_STATS
1015
1016 /********************************************************************
1017 *
1018 * Structure to save timing info. in. Right now this only
1019 * works for UNIX.
1020 *
1021 ********************************************************************/
1022
1023 typedef struct _blast_time_keeper {
1024 FloatLo user, /* CPU time in user space of the process. */
1025 system, /* CPU time used by system. */
1026 total; /* total CPU time (i.e., both of the above). */
1027 } BlastTimeKeeper, PNTR BlastTimeKeeperPtr;
1028
1029
1030 /***************************************************************************
1031 Macros added by Andy Neuwald in order to allow easy modification of matrices.
1032 ***************************************************************************/
1033
1034 #define MtrxScorePosSearch(S,x,y) ((S)->posMatrix[(x)][(y)])
1035 #define PtrMtrxScorePosSearch(S,x) ((S)->posMatrix[(x)])
1036
1037 /*****
1038 #define MtrxScorePosSearchi2(S,x,y) \
1039 ((S)->posMatrix[( (x) %(S)->query_length)][(y)])
1040 #define PtrMtrxScorePosSearch2(S,x) \
1041 ((S)->posMatrix[( (x) %(S)->query_length)])
1042 *****/
1043
1044 /********************************************************************
1045
1046 Defines for discontiguous word hits on 1st and 2nd pass.
1047
1048 ********************************************************************/
1049
1050 #define BLAST_NO_PASS_DISCONTIG 0
1051 #define BLAST_1ST_PASS_DISCONTIG 1
1052 #define BLAST_2ND_PASS_DISCONTIG 2
1053 #define BLAST_BOTH_PASS_DISCONTIG 3
1054
1055 #define CODON_LENGTH 3 /* three is always the codon length. */
1056
1057 #define BLAST_SMALL_GAPS 0
1058 #define BLAST_LARGE_GAPS 1
1059 #define MAX_INTRON_LENGTH 4000
1060 #define MAX_DBSEQ_LEN 5000000
1061
1062 /*********************************************************************
1063 Filter types definitions
1064 *********************************************************************/
1065
1066 #define FILTER_NONE 0
1067 #define FILTER_DUST 1
1068 #define FILTER_SEG 2
1069
1070 typedef enum {
1071 MB_WORD_CODING = 0,
1072 MB_WORD_OPTIMAL = 1,
1073 MB_TWO_TEMPLATES = 2
1074 } MBDiscWordType;
1075
1076 /**********************************************************************
1077 Structure for the blast options (available to user/programmer).
1078 This should be filled in by the "Main" program before blast
1079 is called.
1080
1081 If changes are made to this structure, corresponding changes should
1082 likely be made to BLAST_WizardOptionsBlk and BLAST_WizardOptionsMask.
1083 ***********************************************************************/
1084
1085 typedef struct _blast_optionsblk {
1086 Nlm_FloatHi gap_decay_rate, /* decay rate. */
1087 gap_prob; /* Prob of decay. */
1088 Int4 gap_size, /* Small gap size. */
1089 window_size,/* Multiple Hits window size (zero for single hit algorithm) */
1090 threshold_first, /* Threshold for extending hits (preliminary pass), zero if one-pass algorithm is used. */
1091 threshold_second;/* Threshold for extending hits (second pass) */
1092 Nlm_FloatHi expect_value, /* Expectation value (E) */
1093 e2; /* Expect value for a single HSP */
1094 /* These two scores are zero, unless they've been set, then they set
1095 the expect_value and e2 above. */
1096 Int4 cutoff_s, /* score corresponds to expect_value above.*/
1097 cutoff_s2; /* score corresponds to e2 above. */
1098 Boolean two_pass_method; /* should two passes be used? */
1099 Boolean multiple_hits_only; /* Only the multiple hits alg. used. */
1100 Int4 hitlist_size; /* How many hits should be returned. */
1101 Nlm_FloatHi number_of_bits; /* Number of bits to initiate 2nd pass (default is used if zero) */
1102 Nlm_FloatHi dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */
1103 dropoff_2nd_pass; /* dropoff ("X") used for 2nd pass. */
1104 Int2 number_of_cpus; /* How many CPU's. */
1105 CharPtr matrix; /* name of matrix to use. */
1106 Boolean old_stats; /* Use old stats (option may disappear later) */
1107 Boolean do_sum_stats; /* Should sum statistics be used? */
1108 Boolean use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */
1109 Int2 wordsize; /* size of word used to find hits. */
1110 Int2 penalty, reward; /* penalty and reward, only for blastn */
1111 /* The ID numbers from gc.prt are used for the genetic codes. */
1112 Int4 genetic_code, /* genetic code for query (blastx, tblastx) */
1113 db_genetic_code; /* genetic code for db (tblast[nx]). */
1114 Int4 filter; /* filter type 0 mean no filter
1115 non-zero value indicate filer type */
1116 CharPtr filter_string; /* String specifying the type of filtering and filter options. */
1117 Boolean gapped_calculation; /* Is a gapped calc. being done? */
1118 /* The next three are used ONLY for gapped alignments. */
1119 Int4 gap_open, /* Cost to open a gap (NO extension). */
1120 gap_extend; /* Cost to extend a gap one letter. */
1121 Nlm_FloatHi gap_x_dropoff, /* X-dropoff (in bits) used by Gapped align routine. */
1122 gap_x_dropoff_final; /* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */
1123 Int4 decline_align; /* Cost for declining alignment */
1124 Nlm_FloatHi gap_trigger; /* Score (in bits) to gap, if an HSP gaps well. */
1125
1126 Boolean discontinuous; /* Should the SeqAlign be discontinuous.*/
1127 /* What region of the query is required for the alignment. If start is
1128 zero and end is -1 (the entire query), then these are not checked. */
1129 Int4 required_start,
1130 required_end;
1131 Int8 db_length; /* database size used for stat. calcul. */
1132 Int4 dbseq_num; /* number of database sequences used for stat. calcul. */
1133 Nlm_FloatHi searchsp_eff; /* Effective search space to be used. */
1134
1135 /* Options for postion based blast. */
1136 Nlm_FloatHi ethresh;
1137 Int4 maxNumPasses,
1138 pseudoCountConst;
1139 CharPtr program_name; /* program name, for reference. */
1140 Int4 cpu_limit; /* timeout total. */
1141 /* Used for region-dependent limits when storing hits. */
1142 Int4 hsp_range_max, /* maximum hits for a range */
1143 block_width; /* width of a block */
1144 Boolean perform_culling; /* Should results be culled at all? */
1145 Boolean isPatternSearch; /* Is this a use of PHI-BLAST?*/
1146 CharPtr gifile; /* name of file containing list of gis on server */
1147 ValNodePtr gilist; /* list of gis specified by client */
1148 Boolean do_not_reevaluate; /* Don't perform BlastReevaluateWithAmbiguities. */
1149 /* These options allow a subset of the database to be examined. IF they
1150 are set to zero, then the entire database is examined. */
1151 Int4 first_db_seq, /* 1st sequence in db to be compared. */
1152 final_db_seq; /* Final sequence to be compared. */
1153 CharPtr entrez_query; /* user specified Entrez query to make selection from databases */
1154 CharPtr org_name; /* user specified name of organizm; corresponding .gil file will be used */
1155 Uint1 strand_option; /* BLAST_TOP_STRAND, BLAST_BOTTOM_STRAND, or BLAST_BOTH_STRAND. used by blast[nx] and tblastx */
1156 Int4 hsp_num_max; /* maximum number of HSP's allowed. Zero indicates no limit. */
1157 Uint1 tweak_parameters, /* For composition-based statistics. */
1158 smith_waterman;
1159 Boolean unified_p; /* use a combination of alignment and
1160 compositional p-values when evaluating
1161 significance; ignored unless
1162 composition-based statisics is on. */
1163 CharPtr phi_pattern; /* Pattern for PHI-Blast search */
1164 Boolean use_real_db_size; /* Use real DB size. meant for use if a list of gis' is submitted,
1165 but statistics should be based upon the real database. */
1166 Boolean use_best_align; /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */
1167 Int4 max_num_patterns; /* Maximum number of patterns to be used in PHI-Blast search */
1168 Boolean is_megablast_search; /* Is this a MegaBlast search? */
1169 Uint1 no_traceback; /* No traceback in MegaBLAST extension */
1170 Boolean is_rps_blast; /* If this RPS Blast ? */
1171 SeqLocPtr query_lcase_mask; /* Masking of input DNA regions */
1172 Boolean sort_gi_list; /* Should the gi list be sorted? */
1173 Boolean is_neighboring; /* Is this a neighboring task? */
1174 Nlm_FloatHi kappa_expect_value; /* E-value threshold for
1175 hits to be saved when
1176 RedoAlignmentCore is used
1177 to compute final alignments;
1178 should equal expect_value for
1179 other types of alignment. */
1180 Boolean explode_seqids; /* make one SeqAlign for every gi on a
1181 redundant sequence. */
1182 Boolean megablast_full_deflines; /* Print full deflines in
1183 megablast one-line output */
1184 Boolean is_ooframe; /* Use Out-Of-Frame gapping algorithm */
1185 Int4 shift_pen; /* Out-Of-Frame shift penalty */
1186 Boolean gilist_already_calculated; /* translation of gis to ordinalID's already done (used for neighboring). */
1187 Boolean recoverCheckpoint; /* For psitblastn */
1188 Boolean freqCheckpoint; /* For psitblastn */
1189 CharPtr CheckpointFileName; /* For psitblastn */
1190 Int4 longest_intron; /* the length of longest intron for linking HSPs */
1191 FloatLo perc_identity; /* Identity percentage cut-off */
1192 VoidPtr output; /* Output stream to put results to */
1193 FloatHi scalingFactor; /* scaling factor used when constructing pssm for rpsblast. */
1194 Int4 total_hsp_limit; /* total number of HSP's that will be processed to SeqAligns, zero means no limit. */
1195 Boolean mb_one_base_step; /* Scan every base of the database */
1196 Int2 mb_template_length; /* Length of the discontiguous word */
1197 Boolean mb_use_dyn_prog; /* Use dynamic programming gapped extension in
1198 megablast with affine gap scores */
1199 MBDiscWordType mb_disc_type;
1200 Uint4 NumQueries; /*--KM for query concatenation in [t]blastn */
1201 Boolean ignore_gilist; /* Used in traceback stage to not lookup gi's */
1202 } BLAST_OptionsBlk, PNTR BLAST_OptionsBlkPtr;
1203
1204
1205 /* --------------------------------------------------------------------
1206 *
1207 * BLAST_WizardOptionsBlk contains those fields of BLAST_OptionsBlk
1208 * that a user can set.
1209 *
1210 * BLAST_WizardOptionsMask contains a Boolean for each field defined in
1211 * BLAST_WizardOptionsBlk, except those holding pointers. TRUE means
1212 * that the corresponding field in BLAST_WizardOptionsBlk is set.
1213 *
1214 * These structures are used only in conjunction with BLAST_Wizard.
1215 *
1216 * --------------------------------------------------------------------
1217 */
1218
1219 struct _blast_wizardoptionsblk {
1220 Int4 block_width;
1221 Int4 cutoff_s;
1222 Int4 db_genetic_code;
1223 CharPtr entrez_query;
1224 Nlm_FloatHi ethresh;
1225 Nlm_FloatHi expect_value;
1226 CharPtr filter_string;
1227 Int4 first_db_seq;
1228 Int4 final_db_seq;
1229 Int4 gap_extend;
1230 Int4 gap_open;
1231 Boolean gapped_calculation;
1232 Int4 genetic_code;
1233 ValNodePtr gilist;
1234 Int4 hitlist_size;
1235 Int4 hsp_range_max;
1236 Boolean is_ooframe;
1237 CharPtr matrix;
1238 MBDiscWordType mb_disc_type;
1239 Int2 mb_template_length;
1240 Uint1 no_traceback;
1241 Int2 penalty;
1242 FloatLo perc_identity;
1243 Boolean perform_culling;
1244 CharPtr phi_pattern;
1245 Int4 pseudoCountConst;
1246 SeqLocPtr query_lcase_mask;
1247 Int4 required_end;
1248 Int4 required_start;
1249 Int2 reward;
1250 Int8 db_length;
1251 Nlm_FloatHi searchsp_eff;
1252 Boolean smith_waterman;
1253 Uint1 strand_option;
1254 Int4 threshold_first;
1255 Int4 threshold_second;
1256 Uint1 tweak_parameters;
1257 Boolean use_best_align;
1258 Boolean use_real_db_size;
1259 Int4 window_size;
1260 Int2 wordsize;
1261
1262 Boolean two_hits;
1263 CharPtr string_options;
1264 };
1265
1266 typedef struct _blast_wizardoptionsblk
1267 BLAST_WizardOptionsBlk,
1268 PNTR BLAST_WizardOptionsBlkPtr;
1269
1270 struct _blast_wizardoptionsmask {
1271 Boolean block_width;
1272 Boolean cutoff_s;
1273 Boolean db_genetic_code;
1274 Boolean ethresh;
1275 Boolean expect_value;
1276 Boolean first_db_seq;
1277 Boolean final_db_seq;
1278 Boolean gap_extend;
1279 Boolean gap_open;
1280 Boolean gapped_calculation;
1281 Boolean genetic_code;
1282 Boolean hitlist_size;
1283 Boolean hsp_range_max;
1284 Boolean is_ooframe;
1285 Boolean mb_disc_type;
1286 Boolean mb_template_length;
1287 Boolean no_traceback;
1288 Boolean penalty;
1289 Boolean perc_identity;
1290 Boolean perform_culling;
1291 Boolean pseudoCountConst;
1292 Boolean required_end;
1293 Boolean required_start;
1294 Boolean reward;
1295 Boolean db_length;
1296 Boolean searchsp_eff;
1297 Boolean smith_waterman;
1298 Boolean strand_option;
1299 Boolean threshold_first;
1300 Boolean threshold_second;
1301 Boolean tweak_parameters;
1302 Boolean use_best_align;
1303 Boolean use_real_db_size;
1304 Boolean window_size;
1305 Boolean wordsize;
1306
1307 Boolean two_hits;
1308 };
1309
1310 typedef struct _blast_wizardoptionsmask
1311 BLAST_WizardOptionsMask,
1312 PNTR BLAST_WizardOptionsMaskPtr;
1313
1314 typedef enum {
1315 TEMPL_11_16 = 0,
1316 TEMPL_12_16 = 1,
1317 TEMPL_11_18 = 2,
1318 TEMPL_12_18 = 3,
1319 TEMPL_11_21 = 4,
1320 TEMPL_12_21 = 5,
1321 TEMPL_11_16_OPT = 6,
1322 TEMPL_12_16_OPT = 7,
1323 TEMPL_11_18_OPT = 8,
1324 TEMPL_12_18_OPT = 9,
1325 TEMPL_11_21_OPT = 10,
1326 TEMPL_12_21_OPT = 11,
1327 TEMPL_ERROR = -1
1328 } MBTemplateType;
1329
1330 typedef struct _mb_parameter_blk_ {
1331 Uint1 no_traceback; /* No traceback in greedy extension */
1332 Boolean is_neighboring; /* Is this a neighboring task? */
1333 Boolean full_seqids; /* Print full seqids in tabular output? */
1334 FloatLo perc_identity; /* Identity percentage cut-off */
1335 Int4 max_positions; /* Maximal number of positions in query of a given word */
1336 Boolean disc_word; /* Use a discontiguous word template to find initial
1337 matches */
1338 Boolean one_base_step; /* Form words for every position in the database
1339 sequence (default is every 4th position) */
1340 Int2 word_weight; /* Number of identical nucleotides in a word match */
1341 Int2 template_length; /* Length of a discontiguous word template */
1342 Boolean use_dyn_prog; /* Use dynamic programming extension for affine gap
1343 scores */
1344 MBTemplateType template_type; /* Type of a discontiguous template */
1345 Boolean use_two_templates;
1346 } MegaBlastParameterBlk, PNTR MegaBlastParameterBlkPtr;
1347
1348 /****************************************************************************
1349
1350 PARAMETER BLOCK: parameters for the BLAST search entered by on
1351 command line by user.
1352
1353 *****************************************************************************/
1354
1355 typedef struct _blast_parameterblk {
1356 BLAST_Score threshold, /* threshold for extending a word hit*/
1357 threshold_first, /* threshold for 1st pass. */
1358 threshold_second, /* threshold for 2nd pass. */
1359 X, /* drop-off score for extension. */
1360 dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */
1361 dropoff_2nd_pass, /* dropoff ("X") used for 2nd pass. */
1362 cutoff_s, /* Final Score to report a hit. */
1363 cutoff_s1, /* Score to save an HSP after a gapped extension. */
1364 cutoff_s2, /* Score to save an HSP after an ungapped extension. */
1365 cutoff_s_first, /* Score (S2) to use on 1st pass */
1366 cutoff_s_second, /* Score (S2) to use on 2nd pass and
1367 for "small" gaps in link_hsps (in blast.c) */
1368 /* Max value of s2, used if s2 is set or s2 becomes larger than s. */
1369 cutoff_s2_max,
1370 cutoff_big_gap; /* cutoff value for a "big" gap in
1371 link_hsps (in blast.c). */
1372 Nlm_FloatHi cutoff_e, /* Expect value to report a hit. */
1373 cutoff_e2, /* Expect value to report a hsp. */
1374 number_of_bits; /* number of bits of significance, used
1375 to calculate cutoff_s_first (above). */
1376 Boolean threshold_set, /*TRUE if threshold set on command-line*/
1377 cutoff_s_set, /* TRUE if cutoff score set on c-l */
1378 cutoff_s2_set, /* TRUE if cutoff score2 set on c-l */
1379 cutoff_e_set, /* TRUE if cutoff expect set on c-l */
1380 cutoff_e2_set, /* TRUE if cutoff expect2 set on c-l */
1381 ignore_small_gaps, /* ignore small gaps if TRUE, set by
1382 CalculateSecondCutoffScore in blast.c if the search
1383 space is smalled than 8*gap_size*gap_size. */
1384 window_size_set;/* TRUE if window size set for MHBLAST*/
1385 Boolean sump_option; /* TRUE if sump is used. */
1386 Int4 gap_size, /* max. gap allowed for small gaps.*/
1387 window_size; /* used for multiple hits BLAST. */
1388 Nlm_FloatHi gap_prob; /* prob. of gap of size "gap" (above).*/
1389 Nlm_FloatHi gap_decay_rate; /* prob. of only one HSP */
1390 Int2 process_num; /* max # processrs permitted (for MP).*/
1391 Boolean old_stats; /* Use "old" stats if TRUE. */
1392 Boolean do_sum_stats; /* Should sum statistics be used? */
1393 Boolean use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */
1394 Boolean two_pass_method; /* should two passes be used? */
1395 Boolean multiple_hits_only; /* Only the multiple hits alg. used. */
1396 Boolean discontinuous; /* Should discontinuous SeqAlign's be produced? */
1397 Boolean gapped_calculation; /* Is a gapped calc. being done? */
1398 Boolean do_not_reevaluate; /* Don't perform BlastReevaluateWithAmbiguities. */
1399 /* The next three are used ONLY for gapped alignments. */
1400 Int4 gap_open, /* Cost to open a gap (NO extension). */
1401 gap_extend, /* Cost to extend a gap one letter. */
1402 gap_x_dropoff, /* X-dropoff used by Gapped align routine. */
1403 gap_x_dropoff_final; /* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */
1404 Int4 decline_align; /* Cost for declining alignment */
1405
1406 Nlm_FloatHi gap_trigger; /* Score (in bits) to gap, if an HSP gaps well.*/
1407
1408 /* Options for postion based blast. */
1409 Nlm_FloatHi ethresh;
1410 Int4 maxNumPasses,
1411 pseudoCountConst;
1412 Int4 cpu_limit; /* timeout total. */
1413 Int4 hsp_range_max, /* maximum hits for a range */
1414 max_pieces; /* Max number of pieces allowed (query_length/block_width) */
1415 Boolean perform_culling; /* determines whether culling should be used or not.
1416 If not, then hsp_range_max, block_width, and max_pieces are ignored. */
1417 /* These options allow a subset of the database to be examined. IF they
1418 are set to zero, then the entire database is examined. */
1419 Int4 first_db_seq, /* 1st sequence in db to be compared. */
1420 final_db_seq; /* Final sequence to be compared. */
1421 Int4 hsp_num_max; /* maximum number of HSP's allowed. Zero indicates no limit. */
1422 Boolean use_best_align; /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */
1423 MegaBlastParameterBlkPtr mb_params; /* Is this a MegaBlast search? */
1424 CharPtr filter_string; /* String specifying the type of filtering and filter options. - used with Translated RPS Blast */
1425 Boolean is_rps_blast; /* If this RPS Blast ? */
1426 SeqLocPtr query_lcase_mask; /* Masking of input DNA regions */
1427 Boolean explode_seqids; /* make one SeqAlign for every gi on a
1428 redundant sequence. */
1429 Boolean is_ooframe; /* Use Out-Of-Frame gapping algorithm */
1430 Int4 shift_pen; /* Out-Of-Frame shift penalty */
1431 Int4 longest_intron; /* the length of longest intron for linking HSPs */
1432 FloatHi scalingFactor; /* scaling factor used when constructing pssm for rpsblast. */
1433 Int4 total_hsp_limit; /* total number of HSP's that will be processed to SeqAligns, zero means no limit. */
1434 } BLAST_ParameterBlk, PNTR BLAST_ParameterBlkPtr;
1435
1436 typedef Nlm_Int4 BLAST_Diag, PNTR BLAST_DiagPtr;
1437
1438 /* Structure to keep track of the last hit and diag level. */
1439
1440 typedef struct cfj_mod_struct{
1441 Int4 last_hit;
1442 Int4 diag_level;
1443 } CfjModStruct;
1444 /*
1445 BLAST_ExtendWord contains information about which diagonals
1446 have been extended over (i.e., which diagonals have been
1447 tested). This structure will be duplicated once for each
1448 context as every context is different.
1449 */
1450 typedef struct _blast_extend_word {
1451 Int4Ptr _buffer; /* The "real" buffer for diag_level, version,
1452 and last_hit arrays. */
1453 CfjModStruct *combo_array;
1454 Int4Ptr version; /* still needed?? */
1455 Int4 actual_window; /* The actual window used if the multiple
1456 hits method was used and a hit was found. */
1457 } BLAST_ExtendWord, PNTR BLAST_ExtendWordPtr;
1458
1459 /*
1460 BLAST_ExtendWordParams contains parameters about the extensions.
1461 Only one copy of this structure is needed, regardless of how many
1462 contexts there are.
1463 */
1464 typedef struct _blast_extend_word_params {
1465 Int4 bits_to_shift; /* how many bits should the diagonal be
1466 shifted to get the "version" */
1467 Int4 min_diag_length, /* Min. length of diagonal, actuall
1468 2**bits_to_shift. */
1469 min_diag_mask; /* Used to mask off everything above
1470 min_diag_length (mask = min_diag_length-1). */
1471 Int4 offset; /* "offset" added to query and subject position
1472 so that "diag_level" and "last_hit" don't have
1473 to be zeroed out every time. */
1474 Int4 window; /* The "window" size, within which two (or more)
1475 hits must be found in order to be extended. */
1476 /* Used by BLAST_ExtendWordNew to decide whether or not
1477 to prepare the structure for multiple-hit type searches.
1478 If TRUE, multiple hits are not neccessary, but possible. */
1479 Boolean multiple_hits;
1480 } BLAST_ExtendWordParams, PNTR BLAST_ExtendWordParamsPtr;
1481 /*
1482 Data block to describe a single sequence.
1483 */
1484
1485 typedef struct blast_sequence_block {
1486 Uint1Ptr sequence, /* Actual (perhaps transl.) sequence. */
1487 sequence_start; /* Start of sequence, used if the sequence is preceded by a NULLB. Sequences
1488 starting with a NULLB are used by BlastWordExtend_L1. */
1489 Int4 length, /* length of sequence. */
1490 original_length,/* length before translation. */
1491 effective_length;/* effective length, used only by query. */
1492 Int2 frame; /* frame of the sequence. */
1493 } BlastSequenceBlk, PNTR BlastSequenceBlkPtr;
1494
1495
1496 typedef struct _blast_seg {
1497 Int2 frame;
1498 Int4 offset; /* start of hsp */
1499 Int4 length; /* length of hsp */
1500 Int4 end; /* end of HSP */
1501 Int4 offset_trim; /* start of trimmed hsp */
1502 Int4 end_trim; /* end of trimmed HSP */
1503 /* Where the gapped extension (with X-dropoff) started. */
1504 Int4 gapped_start;
1505 } BLAST_Seg, PNTR BLAST_SegPtr;
1506
1507 #define BLAST_NUMBER_OF_ORDERING_METHODS 2
1508
1509
1510 /*
1511 The following structure is used in "link_hsps" to decide between
1512 two different "gapping" models. Here link is used to hook up
1513 a chain of HSP's (this is a VoidPtr as _blast_hsp is not yet
1514 defined), num is the number of links, and sum is the sum score.
1515 Once the best gapping model has been found, this information is
1516 transferred up to the BLAST_HSP. This structure should not be
1517 used outside of the function link_hsps.
1518 */
1519 typedef struct _blast_hsp_link {
1520 /* Used to order the HSP's (i.e., hook-up w/o overlapping). */
1521 VoidPtr link[BLAST_NUMBER_OF_ORDERING_METHODS];
1522 /* number of HSP in the ordering. */
1523 Int2 num[BLAST_NUMBER_OF_ORDERING_METHODS];
1524 /* Sum-Score of HSP. */
1525 Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS];
1526 /* Sum-Score of HSP, multiplied by the appropriate Lambda. */
1527 Nlm_FloatHi xsum[BLAST_NUMBER_OF_ORDERING_METHODS];
1528 Int4 changed;
1529 } BLAST_HSP_LINK, PNTR BLAST_HSP_LINKPtr;
1530 /*
1531 BLAST_NUMBER_OF_ORDERING_METHODS tells how many methods are used
1532 to "order" the HSP's.
1533 */
1534
1535 typedef struct _blast_hsp {
1536 struct _blast_hsp PNTR next, /* the next HSP */
1537 PNTR prev; /* the previous one. */
1538 BLAST_HSP_LINK hsp_link;
1539 /* Is this HSp part of a linked set? */
1540 Boolean linked_set;
1541 /* which method (max or no max for gaps) was used? */
1542 Int2 ordering_method;
1543 /* how many HSP's make up this (sum) segment */
1544 Int4 num;
1545 /* normalized score of a set of "linked" HSP's */
1546 Nlm_FloatHi xsum;
1547 /* If TRUE this HSP starts a chain along the "link" pointer. */
1548 Boolean start_of_chain;
1549 BLAST_Score score;
1550 Int4 num_ident;
1551 Nlm_FloatHi evalue;
1552 BLAST_Seg query, /* query sequence info. */
1553 subject; /* subject sequence info. */
1554 Int2 context; /* Context number of query */
1555 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */
1556 Int4 num_ref;
1557 Int4 linked_to;
1558 /*which method if any was used for compositional adjustment?
1559 relevant only for blastp*/
1560 Int2 comp_adjustment_method;
1561 } BLAST_HSP, PNTR BLAST_HSPPtr;
1562
1563 /* The helper arrays contains the info used frequently in the inner for loops. -cfj
1564 * One array of helpers will be allocated for each thread. See comments preceding
1565 * link_hsps in blast.c for more info.
1566 */
1567
1568 typedef struct link_help_struct{
1569 BLAST_HSPPtr ptr;
1570 Int4 q_off_trim;
1571 Int4 s_off_trim;
1572 Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS];
1573 Int4 maxsum1;
1574 Int4 next_larger;
1575 } LinkHelpStruct;
1576
1577 /* Orders information for HSP accesses. */
1578 typedef struct hsp_helper{
1579 Int4 qoffset,
1580 qend;
1581 } BLAST_HSP_helper, PNTR BLAST_HSP_helperPtr;
1582
1583
1584 typedef struct _exact_match {
1585 Int4 q_off;
1586 Int4 s_off;
1587 } MegaBlastExactMatch, PNTR MegaBlastExactMatchPtr;
1588
1589 typedef struct _blast_hitlist {
1590 struct _blast_hitlist PNTR next;
1591 BLAST_HSPPtr PNTR hsp_array; /* head of linked list of HSPs */
1592 Int4 hspmax, /* max no. of HSPs allowed per hit list */
1593 hspcnt, /* no. of HSPs in hit list */
1594 hspcnt_max; /* no. of HSPs in hitlist, before reaping */
1595 Boolean further_process; /* This sequence has been found interesting,
1596 it should be further processed by a gapped
1597 alignment etc. */
1598 Boolean do_not_reallocate; /* Don't reallocate the HSP's, probably because
1599 there is no more memory for this. */
1600 /* added -cfj */
1601 LinkHelpStruct *lh_helper;
1602 Int4 lh_helper_size;
1603 MegaBlastExactMatchPtr exact_match_array; /* Array to hold initial
1604 exact match hits */
1605 Int4 exact_match_max;
1606 } BLAST_HitList, PNTR BLAST_HitListPtr;
1607
1608 /*
1609 The next two structures are the final output produced by BLAST. Formatters should then
1610 convert the data into SeqAligns or the BLAST ASN.1 spec.
1611 */
1612
1613 typedef struct _blast_results_hsp {
1614 Int2 ordering_method;/* determines whether large or small gap was used. */
1615 Int4 number; /* number of HSP's used to calculate the p-value. */
1616 BLAST_Score score; /* score of this HSP. */
1617 Nlm_FloatHi e_value,/* expect value of this set of HSP's. */
1618 bit_score; /* above score * lambda/ln2 */
1619 Int4 num_ident;/* number of identities in this HSP. */
1620 Int2 context; /* context number of query. */
1621 Int2 query_frame, /* frame of query, non-zero if transl. */
1622 subject_frame; /* frame of subject, non-zero if transl. */
1623 Int4 query_offset, /* Start of the query HSP. */
1624 query_length, /* Length of the query HSP. */
1625 subject_offset, /* Start of the subject HSP. */
1626 subject_length, /* Length of the subject HSP.*/
1627 hspset_cnt; /* which set of HSP's? */
1628 /* Starting points (on original HSP) for a gapped extension with X dropoff. */
1629 Int4 query_gapped_start,
1630 subject_gapped_start;
1631
1632 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */
1633 struct _blast_result_hitlist PNTR point_back;
1634 struct _blast_heap_struct PNTR back_left, PNTR back_right;
1635 } BLASTResultHsp, PNTR BLASTResultHspPtr;
1636
1637 /*
1638 The following structure contains the subject info, if the readdb
1639 facility is not being used. Then the subject information is
1640 kept here. Otherwise this structure is NULL.
1641 */
1642 typedef struct _blast_subject_info {
1643 SeqIdPtr sip; /* ID of the subject. */
1644 CharPtr defline; /* Defline of the subject. */
1645 Int4 length; /* untranslated length of the database sequence. */
1646 } BLASTSubjectInfo, PNTR BLASTSubjectInfoPtr;
1647
1648 typedef struct _blast_result_hitlist {
1649 BLASTResultHspPtr hsp_array; /* An array holding the HSP's. */
1650 Nlm_FloatHi best_evalue; /* best evalue in all the HSP's. */
1651 Int4 high_score; /* HSP with highest score. */
1652 Int4 hspcnt, /* Number of HSP's. */
1653 subject_id; /* ID of the subject. */
1654 Int2 db_id; /* ID (0,1,2...) of the db if multiple db's searched. */
1655 Int4 subject_length; /* length of the database sequence. */
1656 BLASTSubjectInfoPtr subject_info; /* Subject info if the readdb facility is not being used. */
1657 SeqAlignPtr seqalign; /* alignment, if this a gapped calculation. */
1658 Int4 num_ref;
1659 } BLASTResultHitlist, PNTR BLASTResultHitlistPtr;
1660
1661
1662 typedef struct _blast_heap_struct {
1663 Int4 cutvalue; /* start of a region? */
1664 BLASTResultHspPtr PNTR heap;
1665 Int4 num_in_heap; /* Number in 'heap' */
1666 Int4 num_of_ref;
1667 struct _blast_heap_struct PNTR next, PNTR prev;
1668 } BLASTHeapStruct, PNTR BLASTHeapPtr;
1669
1670 /*
1671 Holds the results already saved.
1672 */
1673
1674 typedef struct _blast_results_struct {
1675
1676 BLASTResultHitlistPtr PNTR results;
1677 Int4 hitlist_count, /* Number of hitlists saved on results array already. */
1678 hitlist_max, /* Length of results array. */
1679 max_pieces; /* For range-dependent limits. */
1680 BLASTResultHspPtr **heap;
1681 Int4 *num_in_heap;
1682 BLASTHeapPtr heap_ptr;
1683 } BLASTResultsStruct, PNTR BLASTResultsStructPtr;
1684
1685 /*
1686 Holds the data for all possible words that might be used by BLAST.
1687 */
1688
1689 typedef struct _blast_all_words {
1690 Uint1Ptr *array, /* All the possible words */
1691 array_storage; /* Storage for the words in array. */
1692 Int4 num_of_cols,
1693 wordsize;
1694 Boolean rows_allocated, /* are the rows (of length the wordsize) alloc.*/
1695 specific; /* specific (limited) words are to be indexed. */
1696 } BlastAllWord, *BlastAllWordPtr;
1697
1698 /*
1699 Contains gi and ordinal number for use by random access BLAST.
1700 */
1701 typedef struct _double_int4 {
1702 Int4 gi,
1703 ordinal_id,
1704 start;
1705 } BlastDoubleInt4, *BlastDoubleInt4Ptr;
1706
1707
1708 typedef struct _blast_gi_list {
1709 BlastDoubleInt4Ptr gi_list; /* List of gi's. */
1710 BlastDoubleInt4Ptr *gi_list_pointer; /* Pointer to above list. */
1711 Int4 current; /* Current position in gi list. */
1712 Int4 total; /* total number of gi's. */
1713 Boolean gilist_not_owned; /* do not delete gilist at end. */
1714 } BlastGiList, *BlastGiListPtr;
1715
1716 /*
1717 used for keeping start and stop of hits to query, for ALU filtering.
1718 */
1719 typedef struct _blast_hit_range {
1720 BlastDoubleInt4Ptr range_list; /* ranges. */
1721 BlastDoubleInt4Ptr *range_list_pointer; /* Pointer to above list. */
1722 Int4 current, /* current position in list. */
1723 total; /* total number in list. */
1724 SeqIdPtr query_id; /* ID to be put on SeqLoc's that are produced. */
1725 Int4 base_offset; /* used if a SeqLoc is searched and it does not start at begining
1726 of sequence. */
1727 } BlastHitRange, *BlastHitRangePtr;
1728
1729 /*
1730 Contains BLAST error messages.
1731 */
1732
1733 typedef struct _blast_error_msg {
1734 Uint2 level;/* corresponds to levels of ErrPostEx [none(0), info(1), warn(2), error(3) and fatal(4)] */
1735 CharPtr msg;
1736 } BlastErrorMsg, *BlastErrorMsgPtr;
1737
1738 /*
1739 Holds data for each "context" (which is generally equal to
1740 one frame of the query). blastx would have six contexts,
1741 blastp would have one.
1742 */
1743
1744 typedef struct _blast_context_structure {
1745 Boolean query_allocated;/* The BlastSequenceBlkPtr IS allocated. */
1746 BlastSequenceBlkPtr query; /* query sequence. */
1747 BLAST_ExtendWordPtr ewp;/* keep track of diagonal etc. for each frame */
1748 ValNodePtr location; /* Where to start/stop masking. */
1749 } BLASTContextStruct, PNTR BLASTContextStructPtr;
1750
1751 /* Structure used for full Smith-Waterman results. */
1752
1753 typedef struct SWResults {
1754 Uint1Ptr seq;
1755 Int4 seqStart;
1756 Int4 seqEnd;
1757 Int4 queryStart;
1758 Int4 queryEnd;
1759 Int4 *reverseAlignScript;
1760 BLAST_Score score;
1761 BLAST_Score scoreThisAlign;
1762 Nlm_FloatHi eValue;
1763 Nlm_FloatHi eValueThisAlign;
1764 Nlm_FloatHi Lambda;
1765 Nlm_FloatHi logK;
1766 SeqIdPtr subject_id; /*used to display the sequence in alignment*/
1767 struct SWResults *next;
1768 Boolean isFirstAlignment;
1769 Int4 subject_index; /* needed to break ties on rare occasions */
1770 SeqAlignPtr seqAlign; /*needed when seqAlign is already computed*/
1771 } SWResults;
1772
1773 /* Average sizes of protein and nucl. sequences. */
1774 #define BLAST_AA_AVGLEN 300
1775 #define BLAST_NT_AVGLEN 1000
1776
1777 /* How many ticks should be emitted total. */
1778 #define BLAST_NTICKS 50
1779
1780 /* period of sending out a star/message. */
1781 #define STAR_MSG_PERIOD 60
1782
1783 typedef struct _BlastThrInfo {
1784
1785 TNlmMutex db_mutex; /*lock for access to database*/
1786 TNlmMutex results_mutex; /*lock for storing results */
1787 TNlmMutex callback_mutex; /*lock for issuing update ticks on the screen*/
1788 /* Mutex for recalculation of ambiguities, in BlastReevaluateWithAmbiguities */
1789 TNlmMutex ambiguities_mutex;
1790
1791 /*
1792 GI List to be used if database will be searched by GI.
1793 current is the current element in the array being worked on.
1794 global_gi_being_used specifies that it will be used.
1795 */
1796 Int4 gi_current;
1797 BlastGiListPtr blast_gi_list;
1798
1799 /* Number of database sequences for each thread to process. */
1800 Int4 db_chunk_size;
1801
1802 /* The last db sequence to be assigned. Used only in get_db_chunk after
1803 the acquisition of the "db_mutex" (above). */
1804 Int4 db_chunk_last;
1805
1806 /* the last sequence in the database to be compared against. */
1807 Int4 final_db_seq;
1808 Int4 number_seqs_done; /*number of sequences already tested*/
1809 Int4 db_incr; /*size of a database chunk to get*/
1810 Int4 last_db_seq;
1811
1812 /* How many positive hits were found (set by ReapHitlist, read by tick_proc
1813 and star_proc). */
1814 Int4 number_of_pos_hits;
1815
1816 /* Use by star_proc to determine whether to emit a star. */
1817 time_t last_tick;
1818
1819 /* tells star_proc to check that a star should be emitted. */
1820 TNlmThread awake_thr;
1821 Boolean awake;
1822
1823 /* tells index_proc to check that a message should be emitted. */
1824 TNlmThread index_thr;
1825 Boolean awake_index;
1826
1827 /*
1828 Callback functions to indicate progress, or lack thereof.
1829 */
1830 int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives));
1831 int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives));
1832 int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives));
1833
1834 /* whether real databases are done */
1835 Boolean realdb_done;
1836
1837 } BlastThrInfo, PNTR BlastThrInfoPtr;
1838
1839 /*
1840 Structure used for matrix rescaling.
1841 */
1842
1843 typedef struct _blast_matrix_rescale {
1844 Int4 alphabet_size,
1845 query_length; /* length of query. */
1846 Uint1Ptr query;
1847 Nlm_FloatHi *standardProb;
1848 Int4Ptr *matrix;
1849 Int4Ptr *private_matrix;
1850 BLAST_KarlinBlkPtr *kbp_std,
1851 *kbp_psi,
1852 *kbp_gap_std,
1853 *kbp_gap_psi;
1854 Nlm_FloatHi lambda_ideal,
1855 K_ideal;
1856 } BlastMatrixRescale, *BlastMatrixRescalePtr;
1857
1858
1859 /*
1860 The central structure for the BLAST search. This structure
1861 should contain data (or pointers to data) for all the
1862 information in a BLAST search.
1863 */
1864
1865
1866 #define BLAST_SEARCH_ALLOC_QUERY 1
1867 #define BLAST_SEARCH_ALLOC_SUBJECT 2
1868 #define BLAST_SEARCH_ALLOC_PBP 4
1869 #define BLAST_SEARCH_ALLOC_SBP 8
1870 #define BLAST_SEARCH_ALLOC_WFP_FIRST 16
1871 #define BLAST_SEARCH_ALLOC_WFP_SECOND 32
1872 #define BLAST_SEARCH_ALLOC_EWPPARAMS 64
1873 #define BLAST_SEARCH_ALLOC_CONTEXT 128
1874 #define BLAST_SEARCH_ALLOC_RESULTS 256
1875 #define BLAST_SEARCH_ALLOC_READDB 512
1876 #define BLAST_SEARCH_ALLOC_TRANS_INFO 1024
1877 #define BLAST_SEARCH_ALLOC_ALL_WORDS 2048
1878 #define BLAST_SEARCH_ALLOC_QUERY_SLP 4096
1879 #define BLAST_SEARCH_ALLOC_THRINFO 8192
1880 #define BLAST_SEARCH_ALLOC_MASK1 16384
1881
1882 typedef struct blast_search_block {
1883 Int4 allocated;
1884 /* bit fields specify which structures from below are allocated. If
1885 a field is allocated, then it's bit is non-zero.
1886
1887 structure bit-field (define)
1888 -----------------------------------------
1889 query BLAST_SEARCH_ALLOC_QUERY
1890 subject BLAST_SEARCH_ALLOC_SUBJECT
1891 pbp BLAST_SEARCH_ALLOC_PBP
1892 sbp BLAST_SEARCH_ALLOC_SBP
1893 wfp_first BLAST_SEARCH_ALLOC_WFP_FIRST
1894 wfp_second BLAST_SEARCH_ALLOC_WFP_SECOND
1895 ewp_params BLAST_SEARCH_ALLOC_EWPPARAMS
1896 context BLAST_SEARCH_ALLOC_CONTEXT
1897 result_struct BLAST_SEARCH_ALLOC_RESULTS
1898 rdfp BLAST_SEARCH_ALLOC_READDB
1899 translation_table BLAST_SEARCH_ALLOC_TRANS_INFO
1900 translation_table_rc
1901 all_words BLAST_SEARCH_ALLOC_ALL_WORDS
1902 query_slp BLAST_SEARCH_ALLOC_QUERY_SLP
1903 mask1 BLAST_SEARCH_ALLOC_MASK1
1904 */
1905
1906 /*
1907 Specifies whether the search is position based or not.
1908 */
1909 Boolean positionBased;
1910 Boolean posConverged;
1911 /*
1912 Specifies that the query sequence was invalid (e.g., XXXXXXXXXXXXXXXXXXXXXX).
1913 */
1914 Boolean query_invalid;
1915 /* Specifies that the search timed out (i.e., cpu time limit was reached). */
1916 Boolean timed_out;
1917 /*
1918 The BLASTContextStructPtr is an array and each element contains
1919 information about the query sequence and the frame number.
1920 If there are six frames (e.g., blastx) then the BLASTContextStructPtr
1921 is six elements long; if there's one frame (e.g., blastp) then
1922 BLASTContextStructPtr is one element long.
1923
1924 number_of_contexts states how long the context array is.
1925 */
1926 BLASTContextStructPtr context;
1927 Int2 first_context,
1928 last_context;
1929 /*
1930 The GapAlignBlkPtr used by ALIGN (in gapxdrop.c) for gapped alignments.
1931 */
1932
1933 GapAlignBlkPtr gap_align;
1934
1935 /*
1936 All the possible words.
1937 */
1938 BlastAllWordPtr all_words;
1939 /*
1940 Set the context_factor, which specifies how many different
1941 ways the query or db is examined (e.g., blastn looks at both
1942 stands of query, context_factor is 2).
1943 */
1944 Int2 context_factor;
1945
1946 /*
1947 What type of search (e.g., blastp, blastx, etc.)?
1948 */
1949 CharPtr prog_name;
1950 Uint1 prog_number;
1951 /*
1952 translation_table and translation_table_rc holds the translation
1953 from ncbi2na to ncbistdaa for normal and reverse-complement
1954 translations. Only used and initialized with tblast[nx].
1955 Initialized by GetPrivatTranslationTable
1956 */
1957 Uint1Ptr translation_table,
1958 translation_table_rc;
1959
1960 /*
1961 ValNodePtr containing error messages.
1962 */
1963 ValNodePtr error_return;
1964
1965 /*
1966 ValNodePtr containing masking SeqLocPtr's
1967 */
1968 ValNodePtr mask;
1969 ValNodePtr mask1;
1970 /*
1971 What genetic codes are we using to translate the query or database
1972 when needed. Based upon NCBI genetic codes.
1973 */
1974 CharPtr genetic_code, /* genetic code used for query. */
1975 db_genetic_code; /* genetic code used for database. */
1976
1977 /*
1978 The BlastSequenceBlk's subject hold info about the subject.
1979 Info about the original sequence is in original_seq. This will
1980 be NULL if the sequence was not translated.
1981 */
1982 Uint1Ptr translation_buffer; /* Buffer for (tblast[nx]) db translations*/
1983 Int4 translation_buffer_size; /* size of translation_buffer. */
1984 CharPtr original_seq; /* Original (i.e., untransl.) sequence. */
1985 BlastSequenceBlkPtr subject;/* subject sequence. */
1986
1987
1988 /* KM-- info about individual queries from a concatenated query in
1989 blastn or tblastn */
1990 struct queries PNTR mult_queries; /* struct defined in blastconcat.h */
1991
1992
1993 /*
1994 SeqLocPtr for the query, owned by the called and not by BLAST.
1995 */
1996 SeqLocPtr query_slp;
1997
1998 /* Id's for the query and subject. */
1999 SeqIdPtr query_id; /* ID for the query, any form. */
2000 Int4 subject_id; /* the number of the subject, in the DB. */
2001 BLAST_ParameterBlkPtr pbp; /* options selected. */
2002 BLAST_ScoreBlkPtr sbp; /* info on scoring. */
2003 BLAST_ExtendWordParamsPtr ewp_params; /* parameters for extensions.*/
2004
2005 /* For the two-pass method two BLAST_WordFinderPtr's are required.
2006 The actual wfp's are in wfp_first and wfp_second. "wfp" is just
2007 a pointer to one of those two. If they have been allocated (at all)
2008 is signified by setting the bit-fields above.
2009 */
2010 BLAST_WordFinderPtr wfp, /* find initial words. */
2011 wfp_first, /* words for first pass. */
2012 wfp_second;/* words for second pass. */
2013 /* For the two-pass this should be set to TRUE on the first (preliminary)
2014 pass and FALSE on the second pass.
2015 */
2016 Boolean prelim;
2017 /*
2018 The "current" hit, that is the one being worked on right now.
2019 If a hitlist is deemed significant, then "current_hitlist" is
2020 moved to "seqalign". current_hitlist_purge specifies
2021 whether the hitlist should be purged after each call to a
2022 WordFinder; it will generally be purged except for non-initial
2023 frames of tblast[nx].
2024 */
2025 Boolean current_hitlist_purge;
2026 BLAST_HitListPtr current_hitlist;
2027
2028 BlastSequenceBlkPtr PNTR query_dnap; /* query DNAP sequence. */
2029
2030 /*
2031 The worst evalue seen by this thread so far.
2032 Only filled in if the hitlist is already full, otherwise
2033 it should be DBL_MAX.
2034 */
2035 Nlm_FloatHi worst_evalue;
2036 /*
2037 Size of the HSP array on the "current_hitlist"
2038 */
2039 Int4 hsp_array_size;
2040 /*
2041 Contains hits that are significant.
2042 */
2043 Int4 result_size;
2044 BLASTResultsStructPtr result_struct;
2045
2046 Int8 dblen; /* total length of the database. */
2047 Int8 dblen_eff; /* effective length of the database. */
2048 Int8 dblen_eff_real; /* effective length of the database. */
2049 Int4 dbseq_num; /* number of sequences in the database. */
2050 Int4 length_adjustment; /* amount removed from end of query and db sequences. */
2051 Nlm_FloatHi searchsp_eff; /* Effective search space (used for statistics). */
2052 Int4 rps_qlen; /* original query sequence length (RPS-BLAST only) */
2053 ReadDBFILEPtr rdfp, /* I/O PTR for database files. */
2054 rdfp_list; /* linked rdfp list of all databases. */
2055 /* The subject info (id and defline) is kept here for the current sequence
2056 if the readdb facility is not used. This structure should only
2057 be used if rdfp is NULL.
2058 */
2059 BLASTSubjectInfoPtr subject_info;
2060
2061 /* Data used in threads - previously global variables */
2062
2063 BlastThrInfoPtr thr_info;
2064
2065 /*
2066 start and stop of query that must be included for an alignment
2067 to be counted. The Boolean whole_query specifies whether these
2068 are valid (i.e., have been set) or not.
2069 */
2070 Boolean whole_query;
2071 Int4 required_start, required_end;
2072
2073 /*
2074 Callback functions to indicate progress, or lack thereof.
2075 */
2076 /* int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives)); */
2077 /* int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives)); */
2078 /*
2079 Callback function to handle results (e.g., print them out for neighboring)
2080 in place of BlastSaveCurrentHitlist.
2081 */
2082 int (LIBCALLBACK *handle_results)PROTO((VoidPtr search));
2083 /*
2084 Output stream to put results to
2085 */
2086 VoidPtr output;
2087 /*
2088 These "counters" keep track of how often certain operations
2089 were performed.
2090
2091 This counting is performed only if BLAST_COLLECT_STATS is defined.
2092 */
2093 Int8 first_pass_hits, /* no. of hits on 1st pass. */
2094 second_pass_hits, /* no. of hits on 2nd pass. */
2095 second_pass_trys, /* no. of seqs that made it to 2nd pass. */
2096 first_pass_extends, /* no. extended on 1st pass. */
2097 second_pass_extends, /* no. extended on 2nd pass. */
2098 first_pass_good_extends,/* no. successfully extended on 1st pass. */
2099 second_pass_good_extends,/* no. successfully extended on 2nd pass. */
2100 number_of_seqs_better_E,/* how many sequences were better than E. */
2101 prelim_gap_no_contest, /* No. of HSP's under E=10 alone. */
2102 prelim_gap_passed, /* No. of HSP's that passed prelim gapping. */
2103 prelim_gap_attempts, /* No. of HSP's we attempted to gap. */
2104 real_gap_number_of_hsps, /* How many HSP's were gapped in BlastGetGappedScore. */
2105 semid; /* Here will be stored ID of load-ballance semaphore */
2106 GreedyAlignMemPtr abmp; /* Memory for megablast greedy extension */
2107 Int4 PNTR query_context_offsets; /* offsets for all queries and strands in a
2108 concatenated sequence */
2109 SeqIdPtr PNTR qid_array; /* Ids of all queries in Mega BLAST search */
2110 BLASTResultsStructPtr PNTR mb_result_struct; /* one result struct per query
2111 for Mega BLAST */
2112 ValNodePtr mb_endpoint_results; /* Points to linked list of results */
2113 } BlastSearchBlk, PNTR BlastSearchBlkPtr;
2114
2115 typedef struct _blast_hsp_segment {
2116 Int4 q_start, q_end;
2117 Int4 s_start, s_end;
2118 struct _blast_hsp_segment PNTR next;
2119 } BLASTHSPSegment, PNTR BLASTHSPSegmentPtr;
2120
2121 #ifdef __cplusplus
2122 }
2123 #endif
2124 #endif /* !__BLASTSTR__ */
2125 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |