NCBI C Toolkit Cross Reference

C/algo/blast/api/blast_api.c


  1 /* $Id: blast_api.c,v 1.53 2008/02/13 22:35:49 camacho Exp $
  2 ***************************************************************************
  3 *                                                                         *
  4 *                             COPYRIGHT NOTICE                            *
  5 *                                                                         *
  6 * This software/database is categorized as "United States Government      *
  7 * Work" under the terms of the United States Copyright Act.  It was       *
  8 * produced as part of the author's official duties as a Government        *
  9 * employee and thus can not be copyrighted.  This software/database is    *
 10 * freely available to the public for use without a copyright notice.      *
 11 * Restrictions can not be placed on its present or future use.            *
 12 *                                                                         *
 13 * Although all reasonable efforts have been taken to ensure the accuracy  *
 14 * and reliability of the software and data, the National Library of       *
 15 * Medicine (NLM) and the U.S. Government do not and can not warrant the   *
 16 * performance or results that may be obtained by using this software,     *
 17 * data, or derivative works thereof.  The NLM and the U.S. Government     *
 18 * disclaim any and all warranties, expressed or implied, as to the        *
 19 * performance, merchantability or fitness for any particular purpose or   *
 20 * use.                                                                    *
 21 *                                                                         *
 22 * In any work or product derived from this material, proper attribution   *
 23 * of the author(s) as the source of the software or data would be         *
 24 * appreciated.                                                            *
 25 *
 26 * ===========================================================================
 27 *
 28 * Author:  Ilya Dondoshansky
 29 *
 30 * ===========================================================================
 31 */
 32 
 33 /** @file blast_api.c
 34  * Functions for C toolkit applications to perform a BLAST search 
 35  * against a BLAST database, using the rewritten blast engine.
 36  */
 37 
 38 #include <algo/blast/api/blast_api.h>
 39 #include <algo/blast/core/blast_setup.h>
 40 #include <algo/blast/core/blast_filter.h>
 41 #include <algo/blast/core/blast_util.h>
 42 #include <algo/blast/core/blast_message.h>
 43 #include <algo/blast/core/blast_engine.h>
 44 #include <algo/blast/core/blast_traceback.h>
 45 #include <algo/blast/core/hspstream_collector.h>
 46 #include <algo/blast/core/phi_lookup.h>
 47 #include <algo/blast/core/blast_psi.h>
 48 #include <algo/blast/api/hspstream_queue.h>
 49 #include <algo/blast/api/blast_mtlock.h>
 50 #include <algo/blast/api/blast_prelim.h>
 51 #include <algo/blast/api/blast_seq.h>
 52 #include <algo/blast/api/seqsrc_readdb.h>
 53 #include <algo/blast/api/seqsrc_multiseq.h>
 54 #include <algo/blast/api/blast_seqalign.h>
 55 #include <algo/blast/api/dust_filter.h>
 56 #include <algo/blast/api/blast_message_api.h>
 57 #include <algo/blast/core/gencode_singleton.h>
 58 
 59 /** @addtogroup CToolkitAlgoBlast
 60  *
 61  * @{
 62  */
 63 
 64 /** Initializes the auxiliary structure with RPS BLAST database information.
 65  * @param ppinfo Resulting structure. [out]
 66  * @param rps_mmap Memory mapped lookup table [out]
 67  * @param rps_pssm_mmap Memory mapped PSSM [out]
 68  * @param dbname Name of the database [in]
 69  */
 70 static Int2 
 71 s_BlastRPSInfoInit(BlastRPSInfo **ppinfo, Nlm_MemMap **rps_mmap,
 72                    Nlm_MemMap **rps_pssm_mmap, const char* dbname)
 73 {
 74    char filename[PATH_MAX];
 75    char pathname[PATH_MAX];
 76    BlastRPSInfo *info;
 77    FILE *auxfile;
 78    Int4 i;
 79    Int4 seq_size;
 80    Int4 num_db_seqs;
 81    Nlm_MemMapPtr lut_mmap;
 82    Nlm_MemMapPtr pssm_mmap;
 83    char buffer[PATH_MAX];
 84    ReadDBFILEPtr rdfp;
 85    char *tmp_dbname;
 86    Uint4 version;
 87 
 88    info = (BlastRPSInfo *)malloc(sizeof(BlastRPSInfo));
 89    if (info == NULL)
 90       ErrPostEx(SEV_FATAL, 1, 0, "Memory allocation failed");
 91 
 92    /* find the path to the RPS database */
 93    tmp_dbname = strdup(dbname);
 94    rdfp = readdb_new_ex2(tmp_dbname, READDB_DB_IS_PROT, 
 95                          READDB_NEW_DO_REPORT, NULL, NULL);
 96    sfree(tmp_dbname);
 97    if (rdfp == NULL)
 98       ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST database");
 99    sprintf(pathname, "%s", rdfp->full_filename);
100    rdfp = readdb_destruct(rdfp);
101 
102    sprintf(filename, "%s.loo", (char *)pathname);
103    lut_mmap = Nlm_MemMapInit(filename);
104    if (lut_mmap == NULL)
105       ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST lookup file");
106 
107    info->lookup_header = (BlastRPSLookupFileHeader *)lut_mmap->mmp_begin;
108    version = info->lookup_header->magic_number; 
109    if (version != RPS_MAGIC_NUM && version != RPS_MAGIC_NUM_28) {
110 
111       version = Nlm_SwitchUint4(version);
112       if (version == RPS_MAGIC_NUM || version == RPS_MAGIC_NUM_28) {
113          ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST lookup file was created "
114                            "on an incompatible platform");
115       }
116       else {
117          ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST lookup file is corrupt");
118       }
119    }
120 
121    sprintf(filename, "%s.rps", (char *)pathname);
122    pssm_mmap = Nlm_MemMapInit(filename);
123    if (pssm_mmap == NULL)
124       ErrPostEx(SEV_FATAL, 1, 0, "Cannot map RPS BLAST profile file");
125 
126    info->profile_header = (BlastRPSProfileHeader *)pssm_mmap->mmp_begin;
127    version = info->profile_header->magic_number;
128    if (version != RPS_MAGIC_NUM && version != RPS_MAGIC_NUM_28) {
129 
130       version = Nlm_SwitchUint4(version);
131       if (version == RPS_MAGIC_NUM || version == RPS_MAGIC_NUM_28) {
132          ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST profile file was created "
133                            "on an incompatible platform");
134       }
135       else {
136          ErrPostEx(SEV_FATAL, 1, 0, "RPS BLAST profile file is corrupt");
137       }
138    }
139 
140    num_db_seqs = info->profile_header->num_profiles;
141 
142    sprintf(filename, "%s.aux", (char *)pathname);
143    auxfile = FileOpen(filename, "r");
144    if (auxfile == NULL)
145       ErrPostEx(SEV_FATAL, 1, 0,"Cannot open RPS BLAST parameters file");
146 
147    fscanf(auxfile, "%s", buffer);
148    info->aux_info.orig_score_matrix = strdup(buffer);
149    fscanf(auxfile, "%d", &info->aux_info.gap_open_penalty);
150    fscanf(auxfile, "%d", &info->aux_info.gap_extend_penalty);
151    fscanf(auxfile, "%le", &info->aux_info.ungapped_k);
152    fscanf(auxfile, "%le", &info->aux_info.ungapped_h);
153    fscanf(auxfile, "%d", &info->aux_info.max_db_seq_length);
154    fscanf(auxfile, "%d", &info->aux_info.db_length);
155    fscanf(auxfile, "%lf", &info->aux_info.scale_factor);
156 
157    info->aux_info.karlin_k = (double *)malloc(num_db_seqs * sizeof(double));
158    for (i = 0; i < num_db_seqs && !feof(auxfile); i++) {
159       fscanf(auxfile, "%d", &seq_size); /* not used */
160       fscanf(auxfile, "%le", &info->aux_info.karlin_k[i]);
161    }
162 
163    if (i < num_db_seqs)
164       ErrPostEx(SEV_FATAL, 1, 0, "Missing Karlin parameters");
165 
166    FileClose(auxfile);
167    *ppinfo = info;
168    *rps_mmap = lut_mmap;
169    *rps_pssm_mmap = pssm_mmap;
170    return 0;
171 }
172 
173 /** Initializes and populates the RPS BLAST specific structures. 
174  * @param seq_src Database sequences source [in]
175  * @param options All search options [in]
176  * @param rps_options Copy of options, with RPS-specific modifications for 
177  *                    scoring and hit saving options. All other options pointers
178  *                    are left the same as in input "options". [out]
179  * @param rps_info_out Auxiliary structure with RPS-specific information [out]
180  * @param rps_mmap Memory mapped lookup table [out]
181  * @param rps_pssm_mmap Memory mapped PSSM [out]
182  * @param scale_factor Scaling factor for RPS matrix. [out]
183  * @param extra_returns Structure containing error information [in] [out]
184  * @return Status.
185  */
186 static Int2 
187 s_RPSExtraStructsSetUp(const BlastSeqSrc* seq_src, const SBlastOptions* options,
188                        SBlastOptions* *rps_options, BlastRPSInfo* *rps_info_out,
189                        Nlm_MemMapPtr *rps_mmap, Nlm_MemMapPtr *rps_pssm_mmap,
190                        double *scale_factor, Blast_SummaryReturn* extra_returns)
191 {
192     const char* kDbName;
193     BlastRPSInfo* rps_info = NULL;
194     BlastScoringOptions* rps_score_options;
195     BlastHitSavingOptions* rps_hit_options;
196     Int2 status = 0;
197 
198     /* The caller has already checked these, so we are just asserting it here. */
199     ASSERT(seq_src && options && extra_returns);
200 
201     kDbName = BlastSeqSrcGetName(seq_src);
202 
203     if (kDbName == NULL ||
204         (status = s_BlastRPSInfoInit(&rps_info, rps_mmap, 
205                                      rps_pssm_mmap, kDbName)) != 0) {
206         SBlastMessageWrite(&extra_returns->error, SEV_WARNING, "RPS BLAST database setup failed", NULL, FALSE);
207         return status;
208     }
209     *rps_info_out = rps_info;
210     *scale_factor = rps_info->aux_info.scale_factor;
211     rps_score_options = (BlastScoringOptions*)
212         BlastMemDup(options->score_options, sizeof(BlastScoringOptions));
213     rps_hit_options = (BlastHitSavingOptions*)
214         BlastMemDup(options->hit_options, sizeof(BlastHitSavingOptions));
215     rps_score_options->gap_open = 
216         rps_info->aux_info.gap_open_penalty;
217     rps_score_options->gap_extend = 
218         rps_info->aux_info.gap_extend_penalty;
219     rps_score_options->matrix = 
220         strdup(rps_info->aux_info.orig_score_matrix);
221 
222     *rps_options = (SBlastOptions*) BlastMemDup(options, sizeof(SBlastOptions));
223     (*rps_options)->score_options = rps_score_options;
224     (*rps_options)->hit_options = rps_hit_options;
225 
226     return 0;
227 }
228 
229 /** Frees the RPS BLAST specific extra structures. 
230  * @param rps_info Auxiliary structure with RPS-specific information [in]
231  * @param rps_mmap Memory mapped lookup table [in]
232  * @param rps_pssm_mmap Memory mapped PSSM [in]
233  * @param options Copy of the options wrapper structure, containing scoring 
234  *                and hit saving options, specially modified for RPS search.
235  *                All other options are the same as in the original structure,
236  *                so they should not be freed here. [in]
237  */
238 static void
239 s_RPSExtraStructsFree(BlastRPSInfo* rps_info, Nlm_MemMapPtr rps_mmap,
240                       Nlm_MemMapPtr rps_pssm_mmap, SBlastOptions* options)
241 {
242     Nlm_MemMapFini(rps_mmap);
243     Nlm_MemMapFini(rps_pssm_mmap);
244 
245     if (rps_info) {
246         sfree(rps_info->aux_info.karlin_k);
247         sfree(rps_info->aux_info.orig_score_matrix);
248         sfree(rps_info);
249     }
250     if (options) {
251         if (options->score_options) {
252             sfree(options->score_options->matrix);
253             sfree(options->score_options);
254         }
255         sfree(options->hit_options);
256         sfree(options);
257     }
258 }
259 
260 /** Sets up the HSP stream, depending on whether the search is single or
261  * multithreaded, and whether on-the-fly tabular output option is set. 
262  */
263 static Int2
264 s_BlastHSPStreamSetUp(BLAST_SequenceBlk* query, BlastQueryInfo* query_info, 
265                       const BlastSeqSrc* seq_src, const SBlastOptions* options, 
266                       BlastScoreBlk* sbp, BlastTabularFormatData* tf_data, 
267                       BlastHSPStream* *hsp_stream,
268                       Blast_SummaryReturn* extra_returns)
269 {
270     Int2 status = 0;
271 
272     /* If any of the required inputs were NULL, the caller would have exited 
273        before getting to this point. ASSERT this here. */
274     ASSERT(query && query_info && seq_src && options && sbp);
275 
276     if (!tf_data) {
277         const Int4 kNumResults = query_info->num_queries;
278         SBlastHitsParameters* blasthit_params=NULL;
279         MT_LOCK lock = NULL;
280         if (options->num_cpus > 1)
281             lock = Blast_MT_LOCKInit();
282         
283         SBlastHitsParametersNew(options->hit_options, options->ext_options,
284                                 options->score_options, &blasthit_params);
285         *hsp_stream =
286             Blast_HSPListCollectorInitMT(options->program, blasthit_params,
287                                          options->ext_options, TRUE,
288                                          kNumResults, lock);
289     } else {
290         /* Initialize the queue HSP stream for tabular formatting. */
291         *hsp_stream = Blast_HSPListQueueInit();
292         if ((status = Blast_TabularFormatDataSetUp(tf_data, options->program,
293                           *hsp_stream, seq_src, query, query_info,
294                           options->score_options, sbp, options->eff_len_options,
295                           options->ext_options, options->hit_options, 
296                           options->db_options)) != 0) {
297             SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
298                 "Failed to set up tabular formatting data structure", NULL, FALSE);
299             return status;
300         }
301     }
302     return status;
303 }
304 
305 /** Starts and joins all threads performing a multi-threaded search, with or 
306  * without on-the-fly output, or performs a single-threaded search.
307  */
308 static Int2
309 s_BlastThreadManager(BLAST_SequenceBlk* query, BlastQueryInfo* query_info, 
310                      const BlastSeqSrc* seq_src, const SBlastOptions* options, 
311                      LookupTableWrap* lookup_wrap, BlastScoreBlk* sbp, 
312                      BlastHSPStream* hsp_stream, BlastRPSInfo* rps_info, 
313                      BlastTabularFormatData* tf_data, BlastHSPResults **results,
314                      Blast_SummaryReturn* extra_returns)
315 {
316     Int2 status = 0;
317     /* The options input cannot be NULL here. The program would have exited 
318        before entering this function if it was. */
319     const BlastInitialWordOptions* word_options = options->word_options;
320     const BlastScoringOptions* score_options = options->score_options;
321     const BlastExtensionOptions* ext_options = options->ext_options;
322     const BlastHitSavingOptions* hit_options = options->hit_options;
323     const BlastEffectiveLengthsOptions* eff_len_options = 
324         options->eff_len_options;
325     const PSIBlastOptions* psi_options = options->psi_options;
326     const BlastDatabaseOptions* db_options = options->db_options;
327     TNlmThread format_thread = NULL;
328     BlastDiagnostics* diagnostics = NULL;
329     const EBlastProgramType kProgram = options->program;
330     const int kNumCpus = options->num_cpus;
331 
332     /* Assert that all required inputs are not NULL. They must be - otherwise 
333        the program should have exited before entering this function. */
334     ASSERT(query && query_info && seq_src && lookup_wrap && sbp && 
335            hsp_stream && extra_returns);
336 
337     BlastSeqSrcResetChunkIterator((BlastSeqSrc*) seq_src);
338 
339     /* Start the formatting thread */
340     if(tf_data && NlmThreadsAvailable() &&
341        (format_thread =
342         NlmThreadCreate(Blast_TabularFormatThread, (void*) tf_data))
343        == NULL_thread) {
344         SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
345                            "Cannot create thread for formatting tabular output\n", NULL, options->believe_query);
346         return -1;
347     }
348 
349     if (NlmThreadsAvailable() && kNumCpus > 1) {
350         TNlmThread* thread_array =
351             (TNlmThread*) calloc(kNumCpus, sizeof(TNlmThread));
352         BlastPrelimSearchThreadData* search_data = NULL;
353         void* join_status = NULL;
354         int index;
355         
356         diagnostics = Blast_DiagnosticsInitMT(Blast_MT_LOCKInit());
357 
358         for (index = 0; index < kNumCpus; index++) {
359             search_data = 
360                 BlastPrelimSearchThreadDataInit(kProgram, query, 
361                     query_info, seq_src, lookup_wrap, score_options, 
362                     word_options, ext_options, hit_options, eff_len_options, 
363                     psi_options, db_options, sbp, diagnostics, hsp_stream);
364 
365             thread_array[index] =
366                NlmThreadCreate(Blast_PrelimSearchThreadRun, 
367                                (void*) search_data);
368         }
369         for (index = 0; index < kNumCpus; index++)
370             NlmThreadJoin(thread_array[index], &join_status);
371   
372         MemFree(thread_array);
373         
374         if (!tf_data) {
375             SPHIPatternSearchBlk* pattern_blk = NULL;
376             if (Blast_ProgramIsPhiBlast(kProgram)) {
377                 pattern_blk = (SPHIPatternSearchBlk*) lookup_wrap->lut;
378                 pattern_blk->num_patterns_db = 
379                                 (Int4)diagnostics->ungapped_stat->lookup_hits;
380             }
381 
382             if ((status = Blast_RunTracebackSearch(kProgram, query, 
383                              query_info, seq_src, score_options, 
384                              ext_options, hit_options, eff_len_options, 
385                              db_options, psi_options, sbp, hsp_stream, 
386                              rps_info, pattern_blk, results)) != 0) {
387                 SBlastMessageWrite(&extra_returns->error, SEV_ERROR,
388                                    "Traceback engine failed\n", NULL, options->believe_query);
389             }
390         }
391     } else {
392         diagnostics = Blast_DiagnosticsInit();
393 
394         if (tf_data) { /* Single thread, tabular */
395             if ((status = 
396                  Blast_RunPreliminarySearch(kProgram, query, query_info, 
397                      seq_src, score_options, sbp, lookup_wrap, word_options, 
398                      ext_options, hit_options, eff_len_options, psi_options, 
399                      db_options, hsp_stream, diagnostics)) != 0) {
400                 SBlastMessageWrite(&extra_returns->error, SEV_ERROR,
401                                    "Preliminary search engine failed\n", NULL, options->believe_query);
402             }
403         } else { /* Single thread, non-tabular */
404             if ((status=Blast_RunFullSearch(kProgram, query, query_info, 
405                             seq_src, sbp, score_options, lookup_wrap, 
406                             word_options, ext_options, hit_options, 
407                             eff_len_options, psi_options, db_options, hsp_stream,
408                             rps_info, diagnostics, results, 0, 0)) != 0) {
409                 SBlastMessageWrite(&extra_returns->error, SEV_ERROR,  
410                                     "Blast_RunFullSearch failed\n", NULL, options->believe_query);
411             }
412         }
413     }
414 
415     if (tf_data) {
416         void* join_status = NULL;
417         BlastHSPStreamClose(hsp_stream);
418         NlmThreadJoin(format_thread, &join_status);
419         /* Free the internally allocated structures used for tabular
420            formatting. */
421         BlastTabularFormatDataClean(tf_data);
422     }
423 
424     hsp_stream = BlastHSPStreamFree(hsp_stream);
425     Blast_SummaryReturnFill(kProgram, score_options, sbp, options->lookup_options, 
426                             word_options, ext_options, hit_options,
427                             eff_len_options, options->query_options, query_info, 
428                             seq_src, &diagnostics, extra_returns);
429 
430     return status;
431 }
432 
433 /** GET_MATRIX_PATH callback to find the path to a specified matrix.
434  * Looks first in current directory, then one specified by
435  * .ncbirc, then in local data directory, then env
436  * variables.
437  * @param matrix_name name of the matrix (e.g., BLOSUM50) [in]
438  * @param is_prot protein matrix if TRUE [in]
439  * @return path to matrix if found, or NULL.
440  */
441 static char*
442 s_BlastFindMatrixPath(const char* matrix_name, Boolean is_prot)
443 {
444      char* matrix_path = NULL;  /* return value. */
445      char buf_path[PATH_MAX];  /* Used for path without matrix filename. */
446      char buf_full[PATH_MAX];  /* used for full path with filename. */
447      char* ptr = NULL;
448 
449      if (matrix_name == NULL)
450        return NULL;
451 
452      /* current directory */
453      if (Nlm_FileLength((char*) matrix_name) > 0)
454      {
455          char buf_path_2[PATH_MAX];
456          Nlm_ProgramPath(buf_path, PATH_MAX);
457          ptr = StringRChr (buf_path, DIRDELIMCHR);
458          if (ptr != NULL)
459              *ptr = '\0';
460          sprintf(buf_path_2, "%s%s", buf_path, DIRDELIMSTR);
461          matrix_path = StringSave(buf_path_2);
462          return matrix_path;
463      }
464      
465      /* local data directory. */
466      sprintf(buf_full, "data%s%s", DIRDELIMSTR, matrix_name);
467      if (Nlm_FileLength(buf_full) > 0)
468      {
469          char buf_path_2[PATH_MAX];
470          Nlm_ProgramPath(buf_path, PATH_MAX);
471          ptr = StringRChr (buf_path, DIRDELIMCHR);
472          if (ptr != NULL)
473              *ptr = '\0';
474          sprintf(buf_path_2, "%s%sdata%s", buf_path, DIRDELIMSTR, DIRDELIMSTR);
475          matrix_path = StringSave(buf_path_2);
476          return matrix_path;
477      }
478 
479      if(FindPath("ncbi", "ncbi", "data", buf_path, PATH_MAX)) {
480             sprintf(buf_full, "%s%s", buf_path, matrix_name);
481             if(FileLength(buf_full) > 0) {
482                 matrix_path = StringSave(buf_path);
483                 return matrix_path;
484             } else {
485                  char alphabet_type[3];     /* aa or nt */
486                  if (is_prot)
487                       Nlm_StringNCpy(alphabet_type, "aa", 2);
488                  else
489                       Nlm_StringNCpy(alphabet_type, "nt", 2);
490                  alphabet_type[2] = NULLB;
491 
492                  sprintf(buf_full, "%s%s%s%s", buf_path,
493                           alphabet_type, DIRDELIMSTR, matrix_name);
494                  if(FileLength(buf_full) > 0)
495                  {
496                     matrix_path = StringSave(buf_path);
497                     return matrix_path;
498                  }
499             }
500      }
501 
502      return NULL;
503 }
504 
505 
506 /**
507  * Read a checkpoint file and set the necessary structures in a
508  * BlastScoreBlk: the psi_matrix, kbp_psi[0], and kbp_gap_psi[0].
509  *
510  * @param sbp              a BlastScoreBlk to receive a PSSM [in/out]
511  * @param query            query sequence data
512  * @param psi_matrix_file  checkpoint file to read
513  * @pcore_msg              a pointer to receive error and warning messages
514  */
515 static int
516 s_SetupScoreBlkPssmFromChkpt(BlastScoreBlk * sbp,
517                              BLAST_SequenceBlk * query,
518                              Blast_PsiCheckpointLoc * psi_checkpoint,
519                              Blast_Message* *pcore_msg)
520 {
521     int status = 0;
522     /* An intermediate representation of the PSSM data that is used
523        in PSIBlast routines */
524     PSIMatrix * pssm = NULL;
525     /* The actual PSSM that is saved in the BlastScoreBlk */
526     SPsiBlastScoreMatrix * psi_matrix = NULL;
527     size_t i, j;
528 
529     psi_matrix = SPsiBlastScoreMatrixNew(query->length);
530     if (!psi_matrix) {
531         ErrPostEx(SEV_FATAL, 1, 0,
532             "Out-of-memory: cannot allocate a PSSM of length %d.\n",
533             query->length);
534         status = -1;
535         goto error_return;
536     }
537     status = Blast_PosReadCheckpoint(psi_matrix->freq_ratios,
538                                      query->length, query->sequence,
539                                      psi_checkpoint,
540                                      pcore_msg);
541     if (status != 0) {
542         goto error_return;
543     }
544     Blast_KarlinBlkCopy(psi_matrix->kbp, sbp->kbp_gap_std[0]);
545     status = PSICreatePssmFromFrequencyRatios(query->sequence,
546                                               query->length, sbp,
547                                               psi_matrix->freq_ratios,
548                                               kPSSM_NoImpalaScaling,
549                                               &pssm);
550     if (0 != status) {
551         goto error_return;
552     }
553     for (i = 0;  i < psi_matrix->pssm->ncols;  i++) {
554         for (j = 0;  j < psi_matrix->pssm->nrows;  j++) {
555             psi_matrix->pssm->data[i][j] = pssm->pssm[i][j];
556         }
557     }
558     PSIMatrixFree(pssm);
559     sbp->psi_matrix = psi_matrix;
560     return 0;
561 error_return:
562     if (psi_matrix)
563         SPsiBlastScoreMatrixFree(psi_matrix);
564     return status;
565 }
566 
567 
568 Int2
569 Blast_RunSearch(SeqLoc* query_seqloc,
570                 Blast_PsiCheckpointLoc * psi_checkpoint,
571                 const BlastSeqSrc* seq_src,
572                 SeqLoc* masking_locs,
573                 const SBlastOptions* options,
574                 BlastTabularFormatData* tf_data,
575                 BlastHSPResults **results,
576                 SeqLoc** filter_out,
577                 Blast_SummaryReturn* extra_returns)
578 {
579     Int2 status = 0;
580     BLAST_SequenceBlk *query = NULL;
581     BlastQueryInfo* query_info = NULL;
582     double scale_factor = 1.0;
583     BlastSeqLoc* lookup_segments = NULL;
584     BlastScoreBlk* sbp = NULL;
585     LookupTableWrap* lookup_wrap = NULL;
586     BlastMaskLoc* mask_loc = NULL;
587     BlastHSPStream* hsp_stream = NULL;
588     const EBlastProgramType kProgram = options->program;
589     const Boolean kRpsBlast = 
590         (kProgram == eBlastTypeRpsBlast ||
591          kProgram == eBlastTypeRpsTblastn);
592     BlastRPSInfo* rps_info = NULL;
593     Nlm_MemMapPtr rps_mmap = NULL;
594     Nlm_MemMapPtr rps_pssm_mmap = NULL;
595     const QuerySetUpOptions* query_options = options->query_options;
596     const LookupTableOptions* lookup_options = options->lookup_options;
597     const BlastScoringOptions* score_options = options->score_options;
598     const BlastHitSavingOptions* hit_options = options->hit_options;
599     SBlastOptions* rps_options = NULL;
600     const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(kProgram);
601     const Uint1 kDeallocateMe = 253;
602     Blast_Message *core_msg = NULL;
603 
604     if (!query_seqloc || !seq_src || !options || !extra_returns) 
605         return -1;
606 
607     if ((status = 
608          BLAST_ValidateOptions(kProgram, options->ext_options, score_options, 
609                                lookup_options, options->word_options, hit_options, 
610                                &core_msg)) != 0) {
611          extra_returns->error = Blast_MessageToSBlastMessage(core_msg, NULL, NULL, options->believe_query);
612          core_msg = Blast_MessageFree(core_msg);
613          
614         return status;
615     }
616 
617     if (options->program == eBlastTypeBlastn)
618     {
619          SeqLoc* dust_mask = NULL; /* Dust mask locations */
620          Blast_FindDustSeqLoc(query_seqloc, options, &dust_mask);
621          /* Combine dust mask with lower case mask 
622             The dust mask will be deallocated by the end of this function
623             though as it's copied in BLAST_MainSetUp 
624             Not deallocating it will result in a memory leak if masking_locs
625             was NULL at the start of this function */
626          if (dust_mask)
627          {
628             SeqLoc* dust_mask_var = dust_mask;
629             while (dust_mask_var)
630             {
631                dust_mask_var->choice = kDeallocateMe;
632                dust_mask_var = dust_mask_var->next;
633             }
634             ValNodeLink(&masking_locs, dust_mask);
635          }
636     }
637 
638     if (kRpsBlast) {
639         if ((status = 
640              s_RPSExtraStructsSetUp(seq_src, options, &rps_options, &rps_info, 
641                                     &rps_mmap, &rps_pssm_mmap, &scale_factor, 
642                                     extra_returns)))
643             return status;
644         score_options = rps_options->score_options;
645         hit_options = rps_options->hit_options;
646         options = rps_options; /* This will not change the caller's pointer. */
647     }
648 
649     if ((status = BLAST_SetUpQuery(kProgram, query_seqloc, query_options, 
650                                    masking_locs, &query_info, &query))) {
651         SBlastMessageWrite(&extra_returns->error, SEV_ERROR,  
652                 "BLAST_SetUpQuery returned non-zero status\n", NULL, FALSE);
653         return status;
654     }
655 
656     status = 
657         BLAST_MainSetUp(kProgram, query_options, score_options, query, 
658                         query_info, scale_factor, &lookup_segments, &mask_loc,
659                         &sbp, &core_msg, s_BlastFindMatrixPath);
660     if (core_msg)
661     {
662        extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query);
663        core_msg = Blast_MessageFree(core_msg);
664     }
665 
666     if (status)
667         return status;
668 
669     if (psi_checkpoint) {
670         core_msg = NULL;
671         status = s_SetupScoreBlkPssmFromChkpt(sbp, query, psi_checkpoint,
672                                               &core_msg);
673         if (core_msg) {
674             extra_returns->error =
675                 Blast_MessageToSBlastMessage(core_msg, query_seqloc,
676                                              query_info,
677                                              options->believe_query);
678             core_msg = Blast_MessageFree(core_msg);
679         }
680         if (status)
681             return status;
682     }
683     if (filter_out) {
684         *filter_out = 
685             BlastMaskLocToSeqLoc(kProgram, mask_loc, query_seqloc);
686     }
687 
688     /* Mask locations in BlastMaskLoc form are no longer needed. */
689     BlastMaskLocFree(mask_loc);
690 
691     if (masking_locs)
692     {
693           SeqLocPtr slp_var = masking_locs;
694           SeqLocPtr last = NULL;
695           while (slp_var)
696           {
697               if (slp_var->choice == kDeallocateMe)
698               {
699                   if (last == NULL)
700                   {
701                      masking_locs = slp_var->next;
702                      slp_var->next = NULL;
703                      Blast_ValNodeMaskListFree(slp_var);
704                      slp_var = masking_locs;
705                   }
706                   else
707                   {
708                      last->next = slp_var->next;
709                      slp_var->next = NULL;
710                      Blast_ValNodeMaskListFree(slp_var);
711                      slp_var = last->next;
712                   }
713               } 
714               else
715               {
716                   last = slp_var;
717                   slp_var = slp_var->next;
718               }
719           }
720     }
721 
722     status = LookupTableWrapInit(query, lookup_options, query_options,
723                         lookup_segments, sbp, &lookup_wrap, rps_info, &core_msg);
724     if (core_msg)
725     {
726           extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query);
727           core_msg = Blast_MessageFree(core_msg);
728     }
729     if (status)
730         return status;
731 
732     /* For PHI BLAST, save information about pattern occurrences in
733        query in the BlastQueryInfo structure. */
734     if (kPhiBlast) {
735         SPHIPatternSearchBlk* pattern_blk = 
736             (SPHIPatternSearchBlk*) lookup_wrap->lut;
737         Blast_SetPHIPatternInfo(kProgram, pattern_blk, query, lookup_segments, 
738                                 query_info, &core_msg);
739         if (core_msg)
740         {
741              extra_returns->error = Blast_MessageToSBlastMessage(core_msg, query_seqloc, query_info, options->believe_query);
742              core_msg = Blast_MessageFree(core_msg);
743         }
744 
745     }
746     /* Only need for the setup of lookup table. */
747     lookup_segments = BlastSeqLocFree(lookup_segments);
748 
749     if ((status = s_BlastHSPStreamSetUp(query, query_info, seq_src, options, sbp,
750                                         tf_data, &hsp_stream, extra_returns)))
751         return status;
752 
753     if ((status = s_BlastThreadManager(query, query_info, seq_src, options,
754                                        lookup_wrap, sbp, hsp_stream, rps_info, 
755                                        tf_data, results, extra_returns)))
756         return status;
757     
758     lookup_wrap = LookupTableWrapFree(lookup_wrap);
759     
760     query = BlastSequenceBlkFree(query);
761     query_info = BlastQueryInfoFree(query_info);
762     BlastScoreBlkFree(sbp);
763     
764     if (kRpsBlast)
765         s_RPSExtraStructsFree(rps_info, rps_mmap, rps_pssm_mmap, rps_options);
766     
767     return status;
768 }
769 
770 Int2
771 Blast_DatabaseSearch(SeqLoc* query_seqloc,
772                      Blast_PsiCheckpointLoc * psi_checkpoint,
773                      char* db_name,
774                      SeqLoc* masking_locs,
775                      const SBlastOptions* options,
776                      BlastTabularFormatData* tf_data,
777                      SBlastSeqalignArray* *seqalign_arr,
778                      SeqLoc** filter_out,
779                      Blast_SummaryReturn* extra_returns)
780 {
781     BlastSeqSrc *seq_src = NULL;
782     Boolean db_is_prot;
783     Int2 status = 0;
784     BlastHSPResults* results = NULL;
785     ReadDBFILE* rdfp = NULL;
786 
787     if (!options || !query_seqloc || !db_name || !extra_returns)
788         return -1;
789 
790     db_is_prot = 
791         (options->program == eBlastTypeBlastp   ||
792          options->program == eBlastTypeBlastx   ||
793          options->program == eBlastTypeRpsBlast ||
794          options->program == eBlastTypeRpsTblastn);
795 
796     rdfp = readdb_new(db_name, db_is_prot);
797 
798     seq_src = ReaddbBlastSeqSrcAttach(rdfp);
799 
800     if (seq_src == NULL) {
801         SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
802                            "Initialization of subject sequences source failed",
803                            NULL, options->believe_query);
804     } else if (BlastSeqSrcGetNumSeqs(seq_src) == 0) {
805         SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
806                            "Database is empty", NULL, options->believe_query);
807     } else {
808         char* error_str = BlastSeqSrcGetInitError(seq_src);
809         if (error_str)
810             SBlastMessageWrite(&extra_returns->error, SEV_WARNING, error_str, NULL, options->believe_query); 
811     }
812 
813     /* If there was an error initializing the sequence source, return without 
814        doing the search. */
815     if (extra_returns->error)
816         return -1;
817 
818     status =
819         Blast_RunSearch(query_seqloc, psi_checkpoint, seq_src,
820                         masking_locs, options, tf_data, &results,
821                         filter_out, extra_returns);
822 
823     /* The ReadDBFILE structure will not be destroyed here, because the 
824        initialising function used readdb_attach */
825     BlastSeqSrcFree(seq_src);
826 
827     if (!status && !tf_data) {
828         status = 
829             BLAST_ResultsToSeqAlign(options->program, &results, 
830                                     query_seqloc, rdfp, NULL, 
831                                     options->score_options->gapped_calculation,
832                                     options->score_options->is_ooframe, 
833                                     seqalign_arr);
834     }
835 
836     readdb_destruct(rdfp);
837 
838     if (status)
839         return status;
840 
841     return status;
842 }
843 
844 /** Splits the PHI BLAST results corresponding to different pattern occurrences
845  * in query, converts them to Seq-aligns and puts in a list of ValNodes.
846  * @param results All results from different pattern occurrences 
847  *                mixed together. On return points to NULL. [in]
848  * @param pattern_info Query pattern occurrences information [in]
849  * @param program Program type (phiblastp or phiblastn) [in]
850  * @param query_seqloc List of query locations [in]
851  * @param rdfp blast db object [in]
852  * @param phivnps List of ValNodes containing Seq-aligns. [out]
853  * @return Status, 0 on success, -1 on failure.
854  */
855 static Int2
856 s_PHIResultsToSeqAlign(const BlastHSPResults* results, 
857                        const SPHIQueryInfo* pattern_info,
858                        EBlastProgramType program, SeqLoc* query_seqloc, 
859                        ReadDBFILE* rdfp, ValNode* *phivnps)
860 {
861     Int2 status = 0;
862     /* Split results into an array of BlastHSPResults structures corresponding
863        to different pattern occurrences. */
864     BlastHSPResults* *phi_results = 
865         PHIBlast_HSPResultsSplit(results, pattern_info);
866 
867     if (phi_results) {
868         int pattern_index; /* Index over pattern occurrences. */
869 
870         for (pattern_index = 0; pattern_index < pattern_info->num_patterns;
871              ++pattern_index) {
872             SBlastSeqalignArray* seqalign_arr = NULL;
873             SeqAlign* seqalign = NULL;
874             BlastHSPResults* one_phi_results = phi_results[pattern_index];
875 
876             if (one_phi_results) {
877                 /* PHI BLAST is always gapped, and never out-of-frame, hence
878                  * TRUE and FALSE values for the respective booleans in the next
879                  * call.
880                  */
881                 status =
882                     BLAST_ResultsToSeqAlign(program, &one_phi_results, 
883                                             query_seqloc, rdfp, NULL, TRUE, 
884                                             FALSE, &seqalign_arr);
885                 if (seqalign_arr)
886                 {
887                     seqalign = seqalign_arr->array[0];
888                     seqalign_arr->array[0] = NULL;
889                     SBlastSeqalignArrayFree(seqalign_arr);
890                 }
891                 ValNodeAddPointer(phivnps, pattern_index, seqalign);
892             }
893         }
894         sfree(phi_results);
895     }
896     return status;
897 }
898 
899 Int2
900 PHIBlastRunSearch(SeqLoc* query_seqloc, char* db_name, SeqLoc* masking_locs,
901                   const SBlastOptions* options, ValNode* *phivnps,
902                   SeqLoc** filter_out, Blast_SummaryReturn* extra_returns)
903 {
904     BlastSeqSrc *seq_src = NULL;
905     Boolean is_prot;
906     Int2 status = 0;
907     BlastHSPResults* results = NULL;
908     ReadDBFILE* rdfp = NULL;
909 
910     if (!options || !query_seqloc || !db_name || !extra_returns || !phivnps)
911         return -1;
912 
913     ASSERT(Blast_ProgramIsPhiBlast(options->program));
914 
915     is_prot = (options->program == eBlastTypePhiBlastp);
916 
917     rdfp = readdb_new(db_name, is_prot);
918 
919     seq_src = ReaddbBlastSeqSrcAttach(rdfp);
920 
921     if (seq_src == NULL) {
922         SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
923                            "Initialization of subject sequences source failed", NULL, options->believe_query);
924     } else {
925         char* error_str = BlastSeqSrcGetInitError(seq_src);
926         if (error_str)
927             SBlastMessageWrite(&extra_returns->error, SEV_WARNING, error_str, NULL, options->believe_query);
928     }
929 
930     /* If there was an error initializing the sequence source, return without
931        doing the search. */
932     if (extra_returns->error)
933         return -1;
934 
935     /* Masking at hash and on-the-fly tabular output are not applicable for 
936        PHI BLAST, so pass NULL in corresponding arguments. */
937     status =
938         Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
939                         seq_src, masking_locs, options,
940                         (BlastTabularFormatData*) NULL, &results,
941                         filter_out, extra_returns);
942 
943     /* The ReadDBFILE structure will not be destroyed here, because the
944        initialising function used readdb_attach */
945     BlastSeqSrcFree(seq_src);
946 
947     *phivnps = NULL;
948 
949     if (!status) {
950         status = 
951             s_PHIResultsToSeqAlign(results, extra_returns->pattern_info, 
952                                    options->program, query_seqloc, rdfp,
953                                    phivnps);
954     }
955 
956     results = Blast_HSPResultsFree(results);
957 
958     readdb_destruct(rdfp);
959     return status;
960 }
961 
962 Int2 
963 Blast_TwoSeqLocSetsAdvanced(SeqLoc* query_seqloc, 
964                             SeqLoc* subject_seqloc,
965                             SeqLoc* masking_locs,
966                             const SBlastOptions* options,
967                             BlastTabularFormatData* tf_data,
968                             SBlastSeqalignArray* *seqalign_arr,
969                             SeqLoc** filter_out,
970                             Blast_SummaryReturn* extra_returns)
971 {
972     BlastSeqSrc *seq_src = NULL;
973     Int2 status = 0;
974     BlastHSPResults* results = NULL;
975 
976 
977     if (!options || !query_seqloc || !subject_seqloc || !extra_returns)
978         return -1;
979 
980 
981     seq_src = MultiSeqBlastSeqSrcInit(subject_seqloc, options->program);
982 
983     if (seq_src == NULL) {
984         SBlastMessageWrite(&extra_returns->error, SEV_WARNING,
985                            "Initialization of subject sequences source failed", NULL, options->believe_query);
986     } else {
987         char* error_str = BlastSeqSrcGetInitError(seq_src);
988         if (error_str)
989             SBlastMessageWrite(&extra_returns->error, SEV_WARNING, error_str, NULL, options->believe_query); 
990     }
991 
992     /* If there was an error initializing the sequence source, return without 
993        doing the search. */
994     if (extra_returns->error)
995         return -1;
996 
997     status =
998         Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
999                         seq_src, masking_locs, options, tf_data,
1000                         &results, filter_out, extra_returns);
1001 
1002     /* The ReadDBFILE structure will not be destroyed here, because the 
1003        initialising function used readdb_attach */
1004     BlastSeqSrcFree(seq_src);
1005 
1006     if (!status) {
1007         status = 
1008