|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/algo/blast/api/blast_api.c |
source navigation diff markup identifier search freetext search file search |
1 /* $Id: blast_api.c,v 1.53 2008/02/13 22:35:49 camacho Exp $
2 ***************************************************************************
3 * *
4 * COPYRIGHT NOTICE *
5 * *
6 * This software/database is categorized as "United States Government *
7 * Work" under the terms of the United States Copyright Act. It was *
8 * produced as part of the author's official duties as a Government *
9 * employee and thus can not be copyrighted. This software/database is *
10 * freely available to the public for use without a copyright notice. *
11 * Restrictions can not be placed on its present or future use. *
12 * *
13 * Although all reasonable efforts have been taken to ensure the accuracy *
14 * and reliability of the software and data, the National Library of *
15 * Medicine (NLM) and the U.S. Government do not and can not warrant the *
16 * performance or results that may be obtained by using this software, *
17 * data, or derivative works thereof. The NLM and the U.S. Government *
18 * disclaim any and all warranties, expressed or implied, as to the *
19 * performance, merchantability or fitness for any particular purpose or *
20 * use. *
21 * *
22 * In any work or product derived from this material, proper attribution *
23 * of the author(s) as the source of the software or data would be *
24 * appreciated. *
25 *
26 * ===========================================================================
27 *
28 * Author: Ilya Dondoshansky
29 *
30 * ===========================================================================
31 */
32
33 /** @file blast_api.c
34 * Functions for C toolkit applications to perform a BLAST search
35 * against a BLAST database, using the rewritten blast engine.
36 */
37
38 #include <algo/blast/api/blast_api.h>
39 #include <algo/blast/core/blast_setup.h>
40 #include <algo/blast/core/blast_filter.h>
41 #include <algo/blast/core/blast_util.h>
42 #include <algo/blast/core/blast_message.h>
43 #include <algo/blast/core/blast_engine.h>
44 #include <algo/blast/core/blast_traceback.h>
45 #include <algo/blast/core/hspstream_collector.h>
46 #include <algo/blast/core/phi_lookup.h>
47 #include <algo/blast/core/blast_psi.h>
48 #include <algo/blast/api/hspstream_queue.h>
49 #include <algo/blast/api/blast_mtlock.h>
50 #include <algo/blast/api/blast_prelim.h>
51 #include <algo/blast/api/blast_seq.h>
52 #include <algo/blast/api/seqsrc_readdb.h>
53 #include <algo/blast/api/seqsrc_multiseq.h>
54 #include <algo/blast/api/blast_seqalign.h>
55 #include <algo/blast/api/dust_filter.h>
56 #include <algo/blast/api/blast_message_api.h>
57 #include <algo/blast/core/gencode_singleton.h>
58
59 /** @addtogroup CToolkitAlgoBlast
60 *
61 * @{
62 */
63
64 /** Initializes the auxiliary structure with RPS BLAST database information.
65 * @param ppinfo Resulting structure. [out]
66 * @param rps_mmap Memory mapped lookup table [out]
67 * @param rps_pssm_mmap Memory mapped PSSM [out]
68 * @param dbname Name of the database [in]
69 */
70 static Int2
71 s_BlastRPSInfoInit(BlastRPSInfo **ppinfo, Nlm_MemMap **rps_mmap,
72 Nlm_MemMap **rps_pssm_mmap, const char* dbname)
73 {
74 char filename[PATH_MAX];
75 char pathname[PATH_MAX];
76 BlastRPSInfo *info;
77 FILE *auxfile;
78 Int4 i;
79 Int4 seq_size;
80 Int4 num_db_seqs;
81 Nlm_MemMapPtr lut_mmap;
82 Nlm_MemMapPtr pssm_mmap;
83 char buffer[PATH_MAX];
84 ReadDBFILEPtr rdfp;
85 char *tmp_dbname;
86 Uint4 version;
87
88 info = (BlastRPSInfo *)malloc(sizeof(BlastRPSInfo));
89 if (info == NULL)
90 ErrPostEx(SEV_FATAL, 1, 0, "Memory allocation failed");
91
92 /* find the path to the RPS database */
93 tmp_dbname = strdup(dbname);
94 rdfp = readdb_new_ex2(tmp_dbname, READDB_DB_IS_PROT,
95 READDB_NEW_DO_REPORT, NULL, NULL);
96 sfree(tmp_dbname);
97 if (rdfp == NULL)
98 ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST database");
99 sprintf(pathname, "%s", rdfp->full_filename);
100 rdfp = readdb_destruct(rdfp);
101
102 sprintf(filename, "%s.loo", (char *)pathname);
103 lut_mmap = Nlm_MemMapInit(filename);
104 if (lut_mmap == NULL)
105 ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST lookup file");
106
107 info->lookup_header = (BlastRPSLookupFileHeader *)lut_mmap->mmp_begin;
108 version = info->lookup_header->magic_number;
109 if (version != RPS_MAGIC_NUM && version != RPS_MAGIC_NUM_28) {
110
111 version = Nlm_SwitchUint4(version);
112 if (version == RPS_MAGIC_NUM || version == RPS_MAGIC_NUM_28) {
113 ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST lookup file was created "
114 "on an incompatible platform");
115 }
116 else {
117 ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST lookup file is corrupt");
118 }
119 }
120
121 sprintf(filename, "%s.rps", (char *)pathname);
122 pssm_mmap = Nlm_MemMapInit(filename);
123 if (pssm_mmap == NULL)
124 ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST profile file");
125
126 info->profile_header = (BlastRPSProfileHeader *)pssm_mmap->mmp_begin;
127 version = info->profile_header->magic_number;
128 if (version != RPS_MAGIC_NUM && version != RPS_MAGIC_NUM_28) {
129
130 version = Nlm_SwitchUint4(version);
131 if (version == RPS_MAGIC_NUM || version == RPS_MAGIC_NUM_28) {
132 ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST profile file was created "
133 "on an incompatible platform");
134 }
135 else {
136 ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST profile file is corrupt");
137 }
138 }
139
140 num_db_seqs = info->profile_header->num_profiles;
141
142 sprintf(filename, "%s.aux", (char *)pathname);
143 auxfile = FileOpen(filename, "r");
144 if (auxfile == NULL)
145 ErrPostEx(SEV_FATAL, 1, 0,"Cannot open RPS BLAST parameters file");
146
147 fscanf(auxfile, "%s", buffer);
148 info->aux_info.orig_score_matrix = strdup(buffer);
149 fscanf(auxfile, "%d", &info->aux_info.gap_open_penalty);
150 fscanf(auxfile, "%d", &info->aux_info.gap_extend_penalty);
151 fscanf(auxfile, "%le", &info->aux_info.ungapped_k);
152 fscanf(auxfile, "%le", &info->aux_info.ungapped_h);
153 fscanf(auxfile, "%d", &info->aux_info.max_db_seq_length);
154 fscanf(auxfile, "%d", &info->aux_info.db_length);
155 fscanf(auxfile, "%lf", &info->aux_info.scale_factor);
156
157 info->aux_info.karlin_k = (double *)malloc(num_db_seqs * sizeof(double));
158 for (i = 0; i < num_db_seqs && !feof(auxfile); i++) {
159 fscanf(auxfile, "%d", &seq_size); /* not used */
160 fscanf(auxfile, "%le", &info->aux_info.karlin_k[i]);
161 }
162
163 if (i < num_db_seqs)
164 ErrPostEx(SEV_FATAL, 1, 0, "Missing Karlin parameters");
165
166 FileClose(auxfile);
167 *ppinfo = info;
168 *rps_mmap = lut_mmap;
169 *rps_pssm_mmap = pssm_mmap;
170 return 0;
171 }
172
173 /** Initializes and populates the RPS BLAST specific structures.
174 * @param seq_src Database sequences source [in]
175 * @param options All search options [in]
176 * @param rps_options Copy of options, with RPS-specific modifications for
177 * scoring and hit saving options. All other options pointers
178 * are left the same as in input "options". [out]
179 * @param rps_info_out Auxiliary structure with RPS-specific information [out]
180 * @param rps_mmap Memory mapped lookup table [out]
181 * @param rps_pssm_mmap Memory mapped PSSM [out]
182 * @param scale_factor Scaling factor for RPS matrix. [out]
183 * @param extra_returns Structure containing error information [in] [out]
184 * @return Status.
185 */
186 static Int2
187 s_RPSExtraStructsSetUp(const BlastSeqSrc* seq_src, const SBlastOptions* options,
188 SBlastOptions* *rps_options, BlastRPSInfo* *rps_info_out,
189 Nlm_MemMapPtr *rps_mmap, Nlm_MemMapPtr *rps_pssm_mmap,
190 double *scale_factor, Blast_SummaryReturn* extra_returns)
191 {
192 const char* kDbName;
193 BlastRPSInfo* rps_info = NULL;
194 BlastScoringOptions* rps_score_options;
195 BlastHitSavingOptions* rps_hit_options;
196 Int2 status = 0;
197
198 /* The caller has already checked these, so we are just asserting it here. */
199 ASSERT(seq_src && options && extra_returns);
200
201 kDbName = BlastSeqSrcGetName(seq_src);
202
203 if (kDbName == NULL ||
204 (status = s_BlastRPSInfoInit(&rps_info, rps_mmap,
205 rps_pssm_mmap, kDbName)) != 0) {
206 SBlastMessageWrite(&extra_returns->error, SEV_WARNING, "RPS BLAST database setup failed", NULL, FALSE);
207 return status;
208 }
209 *rps_info_out = rps_info;
210 *scale_factor = rps_info->aux_info.scale_factor;
211 rps_score_options = (BlastScoringOptions*)
212 BlastMemDup(options->score_options, sizeof(BlastScoringOptions));
213 rps_hit_options = (BlastHitSavingOptions*)
214 BlastMemDup(options->hit_options, sizeof(BlastHitSavingOptions));
215 rps_score_options->gap_open =
216 rps_info->aux_info.gap_open_penalty;
217 rps_score_options->gap_extend =
218 rps_info->aux_info.gap_extend_penalty;
219 rps_score_options->matrix =
220 strdup(rps_info->aux_info.orig_score_matrix);
221
222 *rps_options = (SBlastOptions*) BlastMemDup(options, sizeof(SBlastOptions));
223 (*rps_options)->score_options = rps_score_options;
224 (*rps_options)->hit_options = rps_hit_options;
225
226 return 0;
227 }
228
229 /** Frees the RPS BLAST specific extra structures.
230 * @param rps_info Auxiliary structure with RPS-specific information [in]
231 * @param rps_mmap Memory mapped lookup table [in]
232 * @param rps_pssm_mmap Memory mapped PSSM [in]
233 * @param options Copy of the options wrapper structure, containing scoring
234 * and hit saving options, specially modified for RPS search.
235 * All other options are the same as in the original structure,
236 * so they should not be freed here. [in]
237 */
238 static void
239 s_RPSExtraStructsFree(BlastRPSInfo* rps_info, Nlm_MemMapPtr rps_mmap,
240 Nlm_MemMapPtr rps_pssm_mmap, SBlastOptions* options)
241 {
242 Nlm_MemMapFini(rps_mmap);
243 Nlm_MemMapFini(rps_pssm_mmap);
244
245 if (rps_info) {
246 sfree(rps_info->aux_info.karlin_k);
247 sfree(rps_info->aux_info.orig_score_matrix);
248 sfree(rps_info);
249 }
250 if (options) {
251 if (options->score_options) {
252 sfree(options->score_options->matrix);
253 sfree(options->score_options);
254 }
255 sfree(options->hit_options);
256 sfree(options);
257 }
258 }
259
260 /** Sets up the HSP stream, depending on whether the search is single or
261 * multithreaded, and whether on-the-fly tabular output option is set.
262 */
263 static Int2
264 s_BlastHSPStreamSetUp(BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
265 const BlastSeqSrc* seq_src, const SBlastOptions* options,
266 BlastScoreBlk* sbp, BlastTabularFormatData* tf_data,
267 BlastHSPStream* *hsp_stream,
268 Blast_SummaryReturn* extra_returns)
269 {
270 Int2 status = 0;
271
272 /* If any of the required inputs were NULL, the caller would have exited
273 before getting to this point. ASSERT this here. */
274 ASSERT(query && query_info && seq_src && options && sbp);
275
276 if (!tf_data) {
277 const Int4 kNumResults = query_info->num_queries;
278 SBlastHitsParameters* blasthit_params=NULL;
279 MT_LOCK lock = NULL;
280 if (options->num_cpus > 1)
281 lock = Blast_MT_LOCKInit();
282
283 SBlastHitsParametersNew(options->hit_options, options->ext_options,
284 options->score_options, &blasthit_params);
285 *hsp_stream =
286 Blast_HSPListCollectorInitMT(options->program, blasthit_params,
287 options->ext_options, TRUE,
288 kNumResults, lock);
289 } else {
290 /* Initialize the queue HSP stream for tabular formatting. */
291 *hsp_stream = Blast_HSPListQueueInit();
292 if ((status = Blast_TabularFormatDataSetUp(tf_data, options->program,
293 *hsp_stream, seq_src, query, query_info,
294 options->score_options, sbp, options->eff_len_options,
295 options->ext_options, options->hit_options,
296 options->db_options)) != 0) {
297 SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
298 "Failed to set up tabular formatting data structure", NULL, FALSE);
299 return status;
300 }
301 }
302 return status;
303 }
304
305 /** Starts and joins all threads performing a multi-threaded search, with or
306 * without on-the-fly output, or performs a single-threaded search.
307 */
308 static Int2
309 s_BlastThreadManager(BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
310 const BlastSeqSrc* seq_src, const SBlastOptions* options,
311 LookupTableWrap* lookup_wrap, BlastScoreBlk* sbp,
312 BlastHSPStream* hsp_stream, BlastRPSInfo* rps_info,
313 BlastTabularFormatData* tf_data, BlastHSPResults **results,
314 Blast_SummaryReturn* extra_returns)
315 {
316 Int2 status = 0;
317 /* The options input cannot be NULL here. The program would have exited
318 before entering this function if it was. */
319 const BlastInitialWordOptions* word_options = options->word_options;
320 const BlastScoringOptions* score_options = options->score_options;
321 const BlastExtensionOptions* ext_options = options->ext_options;
322 const BlastHitSavingOptions* hit_options = options->hit_options;
323 const BlastEffectiveLengthsOptions* eff_len_options =
324 options->eff_len_options;
325 const PSIBlastOptions* psi_options = options->psi_options;
326 const BlastDatabaseOptions* db_options = options->db_options;
327 TNlmThread format_thread = NULL;
328 BlastDiagnostics* diagnostics = NULL;
329 const EBlastProgramType kProgram = options->program;
330 const int kNumCpus = options->num_cpus;
331
332 /* Assert that all required inputs are not NULL. They must be - otherwise
333 the program should have exited before entering this function. */
334 ASSERT(query && query_info && seq_src && lookup_wrap && sbp &&
335 hsp_stream && extra_returns);
336
337 BlastSeqSrcResetChunkIterator((BlastSeqSrc*) seq_src);
338
339 /* Start the formatting thread */
340 if(tf_data && NlmThreadsAvailable() &&
341 (format_thread =
342 NlmThreadCreate(Blast_TabularFormatThread, (void*) tf_data))
343 == NULL_thread) {
344 SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
345 "Cannot create thread for formatting tabular output\n", NULL, options->believe_query);
346 return -1;
347 }
348
349 if (NlmThreadsAvailable() && kNumCpus > 1) {
350 TNlmThread* thread_array =
351 (TNlmThread*) calloc(kNumCpus, sizeof(TNlmThread));
352 BlastPrelimSearchThreadData* search_data = NULL;
353 void* join_status = NULL;
354 int index;
355
356 diagnostics = Blast_DiagnosticsInitMT(Blast_MT_LOCKInit());
357
358 for (index = 0; index < kNumCpus; index++) {
359 search_data =
360 BlastPrelimSearchThreadDataInit(kProgram, query,
361 query_info, seq_src, lookup_wrap, score_options,
362 word_options, ext_options, hit_options, eff_len_options,
363 psi_options, db_options, sbp, diagnostics, hsp_stream);
364
365 thread_array[index] =
366 NlmThreadCreate(Blast_PrelimSearchThreadRun,
367 (void*) search_data);
368 }
369 for (index = 0; index < kNumCpus; index++)
370 NlmThreadJoin(thread_array[index], &join_status);
371
372 MemFree(thread_array);
373
374 if (!tf_data) {
375 SPHIPatternSearchBlk* pattern_blk = NULL;
376 if (Blast_ProgramIsPhiBlast(kProgram)) {
377 pattern_blk = (SPHIPatternSearchBlk*) lookup_wrap->lut;
378 pattern_blk->num_patterns_db =
379 (Int4)diagnostics->ungapped_stat->lookup_hits;
380 }
381
382 if ((status = Blast_RunTracebackSearch(kProgram, query,
383 query_info, seq_src, score_options,
384 ext_options, hit_options, eff_len_options,
385 db_options, psi_options, sbp, hsp_stream,
386 rps_info, pattern_blk, results)) != 0) {
387 SBlastMessageWrite(&extra_returns->error, SEV_ERROR,
388 "Traceback engine failed\n", NULL, options->believe_query);
389 }
390 }
391 } else {
392 diagnostics = Blast_DiagnosticsInit();
393
394 if (tf_data) { /* Single thread, tabular */
395 if ((status =
396 Blast_RunPreliminarySearch(kProgram, query, query_info,
397 seq_src, score_options, sbp, lookup_wrap, word_options,
398 ext_options, hit_options, eff_len_options, psi_options,
399 db_options, hsp_stream, diagnostics)) != 0) {
400 SBlastMessageWrite(&extra_returns->error, SEV_ERROR,
401 "Preliminary search engine failed\n", NULL, options->believe_query);
402 }
403 } else { /* Single thread, non-tabular */
404 if ((status=Blast_RunFullSearch(kProgram, query, query_info,
405 seq_src, sbp, score_options, lookup_wrap,
406 word_options, ext_options, hit_options,
407 eff_len_options, psi_options, db_options, hsp_stream,
408 rps_info, diagnostics, results, 0, 0)) != 0) {
409 SBlastMessageWrite(&extra_returns->error, SEV_ERROR,
410 "Blast_RunFullSearch failed\n", NULL, options->believe_query);
411 }
412 }
413 }
414
415 if (tf_data) {
416 void* join_status = NULL;
417 BlastHSPStreamClose(hsp_stream);
418 NlmThreadJoin(format_thread, &join_status);
419 /* Free the internally allocated structures used for tabular
420 formatting. */
421 BlastTabularFormatDataClean(tf_data);
422 }
423
424 hsp_stream = BlastHSPStreamFree(hsp_stream);
425 Blast_SummaryReturnFill(kProgram, score_options, sbp, options->lookup_options,
426 word_options, ext_options, hit_options,
427 eff_len_options, options->query_options, query_info,
428 seq_src, &diagnostics, extra_returns);
429
430 return status;
431 }
432
433 /** GET_MATRIX_PATH callback to find the path to a specified matrix.
434 * Looks first in current directory, then one specified by
435 * .ncbirc, then in local data directory, then env
436 * variables.
437 * @param matrix_name name of the matrix (e.g., BLOSUM50) [in]
438 * @param is_prot protein matrix if TRUE [in]
439 * @return path to matrix if found, or NULL.
440 */
441 static char*
442 s_BlastFindMatrixPath(const char* matrix_name, Boolean is_prot)
443 {
444 char* matrix_path = NULL; /* return value. */
445 char buf_path[PATH_MAX]; /* Used for path without matrix filename. */
446 char buf_full[PATH_MAX]; /* used for full path with filename. */
447 char* ptr = NULL;
448
449 if (matrix_name == NULL)
450 return NULL;
451
452 /* current directory */
453 if (Nlm_FileLength((char*) matrix_name) > 0)
454 {
455 char buf_path_2[PATH_MAX];
456 Nlm_ProgramPath(buf_path, PATH_MAX);
457 ptr = StringRChr (buf_path, DIRDELIMCHR);
458 if (ptr != NULL)
459 *ptr = '\0';
460 sprintf(buf_path_2, "%s%s", buf_path, DIRDELIMSTR);
461 matrix_path = StringSave(buf_path_2);
462 return matrix_path;
463 }
464
465 /* local data directory. */
466 sprintf(buf_full, "data%s%s", DIRDELIMSTR, matrix_name);
467 if (Nlm_FileLength(buf_full) > 0)
468 {
469 char buf_path_2[PATH_MAX];
470 Nlm_ProgramPath(buf_path, PATH_MAX);
471 ptr = StringRChr (buf_path, DIRDELIMCHR);
472 if (ptr != NULL)
473 *ptr = '\0';
474 sprintf(buf_path_2, "%s%sdata%s", buf_path, DIRDELIMSTR, DIRDELIMSTR);
475 matrix_path = StringSave(buf_path_2);
476 return matrix_path;
477 }
478
479 if(FindPath("ncbi", "ncbi", "data", buf_path, PATH_MAX)) {
480 sprintf(buf_full, "%s%s", buf_path, matrix_name);
481 if(FileLength(buf_full) > 0) {
482 matrix_path = StringSave(buf_path);
483 return matrix_path;
484 } else {
485 char alphabet_type[3]; /* aa or nt */
486 if (is_prot)
487 Nlm_StringNCpy(alphabet_type, "aa", 2);
488 else
489 Nlm_StringNCpy(alphabet_type, "nt", 2);
490 alphabet_type[2] = NULLB;
491
492 sprintf(buf_full, "%s%s%s%s", buf_path,
493 alphabet_type, DIRDELIMSTR, matrix_name);
494 if(FileLength(buf_full) > 0)
495 {
496 matrix_path = StringSave(buf_path);
497 return matrix_path;
498 }
499 }
500 }
501
502 return NULL;
503 }
504
505
506 /**
507 * Read a checkpoint file and set the necessary structures in a
508 * BlastScoreBlk: the psi_matrix, kbp_psi[0], and kbp_gap_psi[0].
509 *
510 * @param sbp a BlastScoreBlk to receive a PSSM [in/out]
511 * @param query query sequence data
512 * @param psi_matrix_file checkpoint file to read
513 * @pcore_msg a pointer to receive error and warning messages
514 */
515 static int
516 s_SetupScoreBlkPssmFromChkpt(BlastScoreBlk * sbp,
517 BLAST_SequenceBlk * query,
518 Blast_PsiCheckpointLoc * psi_checkpoint,
519 Blast_Message* *pcore_msg)
520 {
521 int status = 0;
522 /* An intermediate representation of the PSSM data that is used
523 in PSIBlast routines */
524 PSIMatrix * pssm = NULL;
525 /* The actual PSSM that is saved in the BlastScoreBlk */
526 SPsiBlastScoreMatrix * psi_matrix = NULL;
527 size_t i, j;
528
529 psi_matrix = SPsiBlastScoreMatrixNew(query->length);
530 if (!psi_matrix) {
531 ErrPostEx(SEV_FATAL, 1, 0,
532 "Out-of-memory: cannot allocate a PSSM of length %d.\n",
533 query->length);
534 status = -1;
535 goto error_return;
536 }
537 status = Blast_PosReadCheckpoint(psi_matrix->freq_ratios,
538 query->length, query->sequence,
539 psi_checkpoint,
540 pcore_msg);
541 if (status != 0) {
542 goto error_return;
543 }
544 Blast_KarlinBlkCopy(psi_matrix->kbp, sbp->kbp_gap_std[0]);
545 status = PSICreatePssmFromFrequencyRatios(query->sequence,
546 query->length, sbp,
547 psi_matrix->freq_ratios,
548 kPSSM_NoImpalaScaling,
549 &pssm);
550 if (0 != status) {
551 goto error_return;
552 }
553 for (i = 0; i < psi_matrix->pssm->ncols; i++) {
554 for (j = 0; j < psi_matrix->pssm->nrows; j++) {
555 psi_matrix->pssm->data[i][j] = pssm->pssm[i][j];
556 }
557 }
558 PSIMatrixFree(pssm);
559 sbp->psi_matrix = psi_matrix;
560 return 0;
561 error_return:
562 if (psi_matrix)
563 SPsiBlastScoreMatrixFree(psi_matrix);
564 return status;
565 }
566
567
568 Int2
569 Blast_RunSearch(SeqLoc* query_seqloc,
570 Blast_PsiCheckpointLoc * psi_checkpoint,
571 const BlastSeqSrc* seq_src,
572 SeqLoc* masking_locs,
573 const SBlastOptions* options,
574 BlastTabularFormatData* tf_data,
575 BlastHSPResults **results,
576 SeqLoc** filter_out,
577 Blast_SummaryReturn* extra_returns)
578 {
579 Int2 status = 0;
580 BLAST_SequenceBlk *query = NULL;
581 BlastQueryInfo* query_info = NULL;
582 double scale_factor = 1.0;
583 BlastSeqLoc* lookup_segments = NULL;
584 BlastScoreBlk* sbp = NULL;
585 LookupTableWrap* lookup_wrap = NULL;
586 BlastMaskLoc* mask_loc = NULL;
587 BlastHSPStream* hsp_stream = NULL;
588 const EBlastProgramType kProgram = options->program;
589 const Boolean kRpsBlast =
590 (kProgram == eBlastTypeRpsBlast ||
591 kProgram == eBlastTypeRpsTblastn);
592 BlastRPSInfo* rps_info = NULL;
593 Nlm_MemMapPtr rps_mmap = NULL;
594 Nlm_MemMapPtr rps_pssm_mmap = NULL;
595 const QuerySetUpOptions* query_options = options->query_options;
596 const LookupTableOptions* lookup_options = options->lookup_options;
597 const BlastScoringOptions* score_options = options->score_options;
598 const BlastHitSavingOptions* hit_options = options->hit_options;
599 SBlastOptions* rps_options = NULL;
600 const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(kProgram);
601 const Uint1 kDeallocateMe = 253;
602 Blast_Message *core_msg = NULL;
603
604 if (!query_seqloc || !seq_src || !options || !extra_returns)
605 return -1;
606
607 if ((status =
608 BLAST_ValidateOptions(kProgram, options->ext_options, score_options,
609 lookup_options, options->word_options, hit_options,
610 &core_msg)) != 0) {
611 extra_returns->error = Blast_MessageToSBlastMessage(core_msg, NULL, NULL, options->believe_query);
612 core_msg = Blast_MessageFree(core_msg);
613
614 return status;
615 }
616
617 if (options->program == eBlastTypeBlastn)
618 {
619 SeqLoc* dust_mask = NULL; /* Dust mask locations */
620 Blast_FindDustSeqLoc(query_seqloc, options, &dust_mask);
621 /* Combine dust mask with lower case mask
622 The dust mask will be deallocated by the end of this function
623 though as it's copied in BLAST_MainSetUp
624 Not deallocating it will result in a memory leak if masking_locs
625 was NULL at the start of this function */
626 if (dust_mask)
627 {
628 SeqLoc* dust_mask_var = dust_mask;
629 while (dust_mask_var)
630 {
631 dust_mask_var->choice = kDeallocateMe;
632 dust_mask_var = dust_mask_var->next;
633 }
634 ValNodeLink(&masking_locs, dust_mask);
635 }
636 }
637
638 if (kRpsBlast) {
639 if ((status =
640 s_RPSExtraStructsSetUp(seq_src, options, &rps_options, &rps_info,
641 &rps_mmap, &rps_pssm_mmap, &scale_factor,
642 extra_returns)))
643 return status;
644 score_options = rps_options->score_options;
645 hit_options = rps_options->hit_options;
646 options = rps_options; /* This will not change the caller's pointer. */
647 }
648
649 if ((status = BLAST_SetUpQuery(kProgram, query_seqloc, query_options,
650 masking_locs, &query_info, &query))) {
651 SBlastMessageWrite(&extra_returns->error, SEV_ERROR,
652 "BLAST_SetUpQuery returned non-zero status\n", NULL, FALSE);
653 return status;
654 }
655
656 status =
657 BLAST_MainSetUp(kProgram, query_options, score_options, query,
658 query_info, scale_factor, &lookup_segments, &mask_loc,
659 &sbp, &core_msg, s_BlastFindMatrixPath);
660 if (core_msg)
661 {
662 extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query);
663 core_msg = Blast_MessageFree(core_msg);
664 }
665
666 if (status)
667 return status;
668
669 if (psi_checkpoint) {
670 core_msg = NULL;
671 status = s_SetupScoreBlkPssmFromChkpt(sbp, query, psi_checkpoint,
672 &core_msg);
673 if (core_msg) {
674 extra_returns->error =
675 Blast_MessageToSBlastMessage(core_msg, query_seqloc,
676 query_info,
677 options->believe_query);
678 core_msg = Blast_MessageFree(core_msg);
679 }
680 if (status)
681 return status;
682 }
683 if (filter_out) {
684 *filter_out =
685 BlastMaskLocToSeqLoc(kProgram, mask_loc, query_seqloc);
686 }
687
688 /* Mask locations in BlastMaskLoc form are no longer needed. */
689 BlastMaskLocFree(mask_loc);
690
691 if (masking_locs)
692 {
693 SeqLocPtr slp_var = masking_locs;
694 SeqLocPtr last = NULL;
695 while (slp_var)
696 {
697 if (slp_var->choice == kDeallocateMe)
698 {
699 if (last == NULL)
700 {
701 masking_locs = slp_var->next;
702 slp_var->next = NULL;
703 Blast_ValNodeMaskListFree(slp_var);
704 slp_var = masking_locs;
705 }
706 else
707 {
708 last->next = slp_var->next;
709 slp_var->next = NULL;
710 Blast_ValNodeMaskListFree(slp_var);
711 slp_var = last->next;
712 }
713 }
714 else
715 {
716 last = slp_var;
717 slp_var = slp_var->next;
718 }
719 }
720 }
721
722 status = LookupTableWrapInit(query, lookup_options, query_options,
723 lookup_segments, sbp, &lookup_wrap, rps_info, &core_msg);
724 if (core_msg)
725 {
726 extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query);
727 core_msg = Blast_MessageFree(core_msg);
728 }
729 if (status)
730 return status;
731
732 /* For PHI BLAST, save information about pattern occurrences in
733 query in the BlastQueryInfo structure. */
734 if (kPhiBlast) {
735 SPHIPatternSearchBlk* pattern_blk =
736 (SPHIPatternSearchBlk*) lookup_wrap->lut;
737 Blast_SetPHIPatternInfo(kProgram, pattern_blk, query, lookup_segments,
738 query_info, &core_msg);
739 if (core_msg)
740 {
741 extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query);
742 core_msg = Blast_MessageFree(core_msg);
743 }
744
745 }
746 /* Only need for the setup of lookup table. */
747 lookup_segments = BlastSeqLocFree(lookup_segments);
748
749 if ((status = s_BlastHSPStreamSetUp(query, query_info, seq_src, options, sbp,
750 tf_data, &hsp_stream, extra_returns)))
751 return status;
752
753 if ((status = s_BlastThreadManager(query, query_info, seq_src, options,
754 lookup_wrap, sbp, hsp_stream, rps_info,
755 tf_data, results, extra_returns)))
756 return status;
757
758 lookup_wrap = LookupTableWrapFree(lookup_wrap);
759
760 query = BlastSequenceBlkFree(query);
761 query_info = BlastQueryInfoFree(query_info);
762 BlastScoreBlkFree(sbp);
763
764 if (kRpsBlast)
765 s_RPSExtraStructsFree(rps_info, rps_mmap, rps_pssm_mmap, rps_options);
766
767 return status;
768 }
769
770 Int2
771 Blast_DatabaseSearch(SeqLoc* query_seqloc,
772 Blast_PsiCheckpointLoc * psi_checkpoint,
773 char* db_name,
774 SeqLoc* masking_locs,
775 const SBlastOptions* options,
776 BlastTabularFormatData* tf_data,
777 SBlastSeqalignArray* *seqalign_arr,
778 SeqLoc** filter_out,
779 Blast_SummaryReturn* extra_returns)
780 {
781 BlastSeqSrc *seq_src = NULL;
782 Boolean db_is_prot;
783 Int2 status = 0;
784 BlastHSPResults* results = NULL;
785 ReadDBFILE* rdfp = NULL;
786
787 if (!options || !query_seqloc || !db_name || !extra_returns)
788 return -1;
789
790 db_is_prot =
791 (options->program == eBlastTypeBlastp ||
792 options->program == eBlastTypeBlastx ||
793 options->program == eBlastTypeRpsBlast ||
794 options->program == eBlastTypeRpsTblastn);
795
796 rdfp = readdb_new(db_name, db_is_prot);
797
798 seq_src = ReaddbBlastSeqSrcAttach(rdfp);
799
800 if (seq_src == NULL) {
801 SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
802 "Initialization of subject sequences source failed",
803 NULL, options->believe_query);
804 } else if (BlastSeqSrcGetNumSeqs(seq_src) == 0) {
805 SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
806 "Database is empty", NULL, options->believe_query);
807 } else {
808 char* error_str = BlastSeqSrcGetInitError(seq_src);
809 if (error_str)
810 SBlastMessageWrite(&extra_returns->error, SEV_WARNING, error_str, NULL, options->believe_query);
811 }
812
813 /* If there was an error initializing the sequence source, return without
814 doing the search. */
815 if (extra_returns->error)
816 return -1;
817
818 status =
819 Blast_RunSearch(query_seqloc, psi_checkpoint, seq_src,
820 masking_locs, options, tf_data, &results,
821 filter_out, extra_returns);
822
823 /* The ReadDBFILE structure will not be destroyed here, because the
824 initialising function used readdb_attach */
825 BlastSeqSrcFree(seq_src);
826
827 if (!status && !tf_data) {
828 status =
829 BLAST_ResultsToSeqAlign(options->program, &results,
830 query_seqloc, rdfp, NULL,
831 options->score_options->gapped_calculation,
832 options->score_options->is_ooframe,
833 seqalign_arr);
834 }
835
836 readdb_destruct(rdfp);
837
838 if (status)
839 return status;
840
841 return status;
842 }
843
844 /** Splits the PHI BLAST results corresponding to different pattern occurrences
845 * in query, converts them to Seq-aligns and puts in a list of ValNodes.
846 * @param results All results from different pattern occurrences
847 * mixed together. On return points to NULL. [in]
848 * @param pattern_info Query pattern occurrences information [in]
849 * @param program Program type (phiblastp or phiblastn) [in]
850 * @param query_seqloc List of query locations [in]
851 * @param rdfp blast db object [in]
852 * @param phivnps List of ValNodes containing Seq-aligns. [out]
853 * @return Status, 0 on success, -1 on failure.
854 */
855 static Int2
856 s_PHIResultsToSeqAlign(const BlastHSPResults* results,
857 const SPHIQueryInfo* pattern_info,
858 EBlastProgramType program, SeqLoc* query_seqloc,
859 ReadDBFILE* rdfp, ValNode* *phivnps)
860 {
861 Int2 status = 0;
862 /* Split results into an array of BlastHSPResults structures corresponding
863 to different pattern occurrences. */
864 BlastHSPResults* *phi_results =
865 PHIBlast_HSPResultsSplit(results, pattern_info);
866
867 if (phi_results) {
868 int pattern_index; /* Index over pattern occurrences. */
869
870 for (pattern_index = 0; pattern_index < pattern_info->num_patterns;
871 ++pattern_index) {
872 SBlastSeqalignArray* seqalign_arr = NULL;
873 SeqAlign* seqalign = NULL;
874 BlastHSPResults* one_phi_results = phi_results[pattern_index];
875
876 if (one_phi_results) {
877 /* PHI BLAST is always gapped, and never out-of-frame, hence
878 * TRUE and FALSE values for the respective booleans in the next
879 * call.
880 */
881 status =
882 BLAST_ResultsToSeqAlign(program, &one_phi_results,
883 query_seqloc, rdfp, NULL, TRUE,
884 FALSE, &seqalign_arr);
885 if (seqalign_arr)
886 {
887 seqalign = seqalign_arr->array[0];
888 seqalign_arr->array[0] = NULL;
889 SBlastSeqalignArrayFree(seqalign_arr);
890 }
891 ValNodeAddPointer(phivnps, pattern_index, seqalign);
892 }
893 }
894 sfree(phi_results);
895 }
896 return status;
897 }
898
899 Int2
900 PHIBlastRunSearch(SeqLoc* query_seqloc, char* db_name, SeqLoc* masking_locs,
901 const SBlastOptions* options, ValNode* *phivnps,
902 SeqLoc** filter_out, Blast_SummaryReturn* extra_returns)
903 {
904 BlastSeqSrc *seq_src = NULL;
905 Boolean is_prot;
906 Int2 status = 0;
907 BlastHSPResults* results = NULL;
908 ReadDBFILE* rdfp = NULL;
909
910 if (!options || !query_seqloc || !db_name || !extra_returns || !phivnps)
911 return -1;
912
913 ASSERT(Blast_ProgramIsPhiBlast(options->program));
914
915 is_prot = (options->program == eBlastTypePhiBlastp);
916
917 rdfp = readdb_new(db_name, is_prot);
918
919 seq_src = ReaddbBlastSeqSrcAttach(rdfp);
920
921 if (seq_src == NULL) {
922 SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
923 "Initialization of subject sequences source failed", NULL, options->believe_query);
924 } else {
925 char* error_str = BlastSeqSrcGetInitError(seq_src);
926 if (error_str)
927 SBlastMessageWrite(&extra_returns->error, SEV_WARNING, error_str, NULL, options->believe_query);
928 }
929
930 /* If there was an error initializing the sequence source, return without
931 doing the search. */
932 if (extra_returns->error)
933 return -1;
934
935 /* Masking at hash and on-the-fly tabular output are not applicable for
936 PHI BLAST, so pass NULL in corresponding arguments. */
937 status =
938 Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
939 seq_src, masking_locs, options,
940 (BlastTabularFormatData*) NULL, &results,
941 filter_out, extra_returns);
942
943 /* The ReadDBFILE structure will not be destroyed here, because the
944 initialising function used readdb_attach */
945 BlastSeqSrcFree(seq_src);
946
947 *phivnps = NULL;
948
949 if (!status) {
950 status =
951 s_PHIResultsToSeqAlign(results, extra_returns->pattern_info,
952 options->program, query_seqloc, rdfp,
953 phivnps);
954 }
955
956 results = Blast_HSPResultsFree(results);
957
958 readdb_destruct(rdfp);
959 return status;
960 }
961
962 Int2
963 Blast_TwoSeqLocSetsAdvanced(SeqLoc* query_seqloc,
964 SeqLoc* subject_seqloc,
965 SeqLoc* masking_locs,
966 const SBlastOptions* options,
967 BlastTabularFormatData* tf_data,
968 SBlastSeqalignArray* *seqalign_arr,
969 SeqLoc** filter_out,
970 Blast_SummaryReturn* extra_returns)
971 {
972 BlastSeqSrc *seq_src = NULL;
973 Int2 status = 0;
974 BlastHSPResults* results = NULL;
975
976
977 if (!options || !query_seqloc || !subject_seqloc || !extra_returns)
978 return -1;
979
980
981 seq_src = MultiSeqBlastSeqSrcInit(subject_seqloc, options->program);
982
983 if (seq_src == NULL) {
984 SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
985 "Initialization of subject sequences source failed", NULL, options->believe_query);
986 } else {
987 char* error_str = BlastSeqSrcGetInitError(seq_src);
988 if (error_str)
989 SBlastMessageWrite(&extra_returns->error, SEV_WARNING, error_str, NULL, options->believe_query);
990 }
991
992 /* If there was an error initializing the sequence source, return without
993 doing the search. */
994 if (extra_returns->error)
995 return -1;
996
997 status =
998 Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
999 seq_src, masking_locs, options, tf_data,
1000 &results, filter_out, extra_returns);
1001
1002 /* The ReadDBFILE structure will not be destroyed here, because the
1003 initialising function used readdb_attach */
1004 BlastSeqSrcFree(seq_src);
1005
1006 if (!status) {
1007 status =
1008