src/algo/blast/core/blast_kappa.c

Go to the documentation of this file.
00001 /* $Id: blast_kappa.c 148871 2009-01-05 16:51:12Z camacho $
00002  * ==========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors: Alejandro Schaffer, Mike Gertz (ported to algo/blast by Tom Madden)
00027  *
00028  */
00029 
00030 /** @file blast_kappa.c
00031  * Utilities for doing Smith-Waterman alignments and adjusting the scoring
00032  * system for each match in blastpgp
00033  */
00034 
00035 #ifndef SKIP_DOXYGEN_PROCESSING
00036 static char const rcsid[] =
00037 "$Id: blast_kappa.c 148871 2009-01-05 16:51:12Z camacho $";
00038 #endif /* SKIP_DOXYGEN_PROCESSING */
00039 
00040 #include <float.h>
00041 #include <algo/blast/core/ncbi_math.h>
00042 #include <algo/blast/core/blast_hits.h>
00043 #include <algo/blast/core/blast_kappa.h>
00044 #include <algo/blast/core/blast_util.h>
00045 #include <algo/blast/core/blast_gapalign.h>
00046 #include <algo/blast/core/blast_filter.h>
00047 #include <algo/blast/core/blast_traceback.h>
00048 #include <algo/blast/core/link_hsps.h>
00049 #include <algo/blast/core/gencode_singleton.h>
00050 #include "blast_psi_priv.h"
00051 #include "blast_gapalign_priv.h"
00052 #include "blast_hits_priv.h"
00053 #include "blast_posit.h"
00054 
00055 #include <algo/blast/composition_adjustment/nlm_linear_algebra.h>
00056 #include <algo/blast/composition_adjustment/compo_heap.h>
00057 #include <algo/blast/composition_adjustment/redo_alignment.h>
00058 #include <algo/blast/composition_adjustment/matrix_frequency_data.h>
00059 #include <algo/blast/composition_adjustment/unified_pvalues.h>
00060 
00061 /* Define KAPPA_PRINT_DIAGNOSTICS to turn on printing of
00062  * diagnostic information from some routines. */
00063 
00064 /** Compile-time option; if set to a true value, then blastp runs
00065     that use Blast_RedoAlignmentCore to compute the traceback will not
00066     SEG the subject sequence */
00067 #ifndef KAPPA_BLASTP_NO_SEG_SEQUENCE
00068 #define KAPPA_BLASTP_NO_SEG_SEQUENCE 0
00069 #endif
00070 
00071 
00072 /** Compile-time option; if set to a true value, then blastp runs
00073     that use Blast_RedoAlignmentCore to compute the traceback will not
00074     SEG the subject sequence */
00075 #ifndef KAPPA_TBLASTN_NO_SEG_SEQUENCE
00076 #define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0
00077 #endif
00078 
00079 
00080 /**
00081  * Given a list of HSPs with (possibly) high-precision scores, rescale
00082  * the scores to have standard precision and set the scale-independent
00083  * bit scores.  This routine does *not* resort the list; it is assumed
00084  * that the list is already sorted according to e-values that have been
00085  * computed using the initial, higher-precision scores.
00086  *
00087  * @param hsp_list          the HSP list
00088  * @param logK              Karlin-Altschul statistical parameter [in]
00089  * @param lambda            Karlin-Altschul statistical parameter [in]
00090  * @param scoreDivisor      the value by which reported scores are to be
00091  */
00092 static void
00093 s_HSPListNormalizeScores(BlastHSPList * hsp_list,
00094                          double lambda,
00095                          double logK,
00096                          double scoreDivisor)
00097 {
00098     int hsp_index;
00099     for(hsp_index = 0; hsp_index < hsp_list->hspcnt; hsp_index++) {
00100         BlastHSP * hsp = hsp_list->hsp_array[hsp_index];
00101 
00102         hsp->score = BLAST_Nint(((double) hsp->score) / scoreDivisor);
00103         /* Compute the bit score using the newly computed scaled score. */
00104         hsp->bit_score = (hsp->score*lambda*scoreDivisor - logK)/NCBIMATH_LN2;
00105     }
00106 }
00107 
00108 
00109 /**
00110  * Adjusts the E-values in a BLAST_HitList to be composites of
00111  * a composition-based P-value and a score/alignment-based P-value
00112  *
00113  * @param hsp_list           the hitlist whose E-values need to be adjusted
00114  * @param comp_p_value      P-value from sequence composition
00115  * @param seqSrc            a source of sequence data
00116  * @param subject_length    length of database sequence
00117  * @param query_context     info about this query context; needed when
00118  *                          multiple queries are being used
00119  * @param LambdaRatio       the ratio between the observed value of Lambda
00120  *                          and the predicted value of lambda (used to print
00121  *                          diagnostics)
00122  * @param subject_id        the subject id of this sequence (used to print
00123  *                          diagnostics)
00124  **/
00125 static void
00126 s_AdjustEvaluesForComposition(
00127     BlastHSPList *hsp_list,
00128     double comp_p_value,
00129     const BlastSeqSrc* seqSrc,
00130     Int4 subject_length,
00131     BlastContextInfo * query_context,
00132     double LambdaRatio,
00133     int subject_id)
00134 {
00135     /* Smallest observed evalue after adjustment */
00136     double best_evalue = DBL_MAX;
00137 
00138     /* True length of the query */
00139     int query_length =      query_context->query_length;
00140     /* Length adjustment to compensate for edge effects */
00141     int length_adjustment = query_context->length_adjustment;
00142 
00143     /* Effective lengths of the query, subject, and database */
00144     double query_eff   = MAX((query_length - length_adjustment), 1);
00145     double subject_eff = MAX((subject_length - length_adjustment), 1.0);
00146     double dblen_eff = (double) query_context->eff_searchsp / query_eff;
00147     
00148     /* Scale factor to convert the database E-value to the sequence E-value */
00149     double db_to_sequence_scale = subject_eff / dblen_eff;
00150 
00151     int        hsp_index;
00152     for (hsp_index = 0;  hsp_index < hsp_list->hspcnt;  hsp_index++) {
00153         /* for all HSPs */
00154         double align_p_value;     /* P-value for the alignment score */
00155         double combined_p_value;  /* combination of two P-values */
00156 
00157         /* HSP for this iteration */
00158         BlastHSP * hsp = hsp_list->hsp_array[hsp_index];
00159 #ifdef KAPPA_PRINT_DIAGNOSTICS
00160         /* Original E-value, saved if diagnostics are printed. */
00161         double old_e_value = hsp->evalue;
00162 #endif
00163         hsp->evalue *= db_to_sequence_scale;
00164 
00165         align_p_value = BLAST_KarlinEtoP(hsp->evalue);
00166         combined_p_value = Blast_Overall_P_Value(comp_p_value,align_p_value);
00167         hsp->evalue = BLAST_KarlinPtoE(combined_p_value);
00168         hsp->evalue /= db_to_sequence_scale;
00169 
00170         if (hsp->evalue < best_evalue) {
00171             best_evalue = hsp->evalue;
00172         }
00173 
00174 #ifdef KAPPA_PRINT_DIAGNOSTICS
00175         {{
00176             int    sequence_gi; /*GI of a sequence*/
00177             Blast_GiList* gi_list; /*list of GI's for a sequence*/
00178             gi_list = BlastSeqSrcGetGis(seqSrc, (void *) (&subject_id));
00179             if ((gi_list) && (gi_list->num_used > 0)) {
00180                 sequence_gi = gi_list->data[0];
00181             } else {
00182                 sequence_gi = (-1);
00183             }
00184             printf("GI %d Lambda ratio %e comp. p-value %e; "
00185                    "adjust E-value of query length %d match length "
00186                    "%d from %e to %e\n",
00187                    sequence_gi, LambdaRatio, comp_p_value,
00188                    query_length, subject_length, old_e_value, hsp->evalue);
00189             Blast_GiListFree(gi_list);
00190         }}
00191 #endif
00192     } /* end for all HSPs */
00193 
00194     hsp_list->best_evalue = best_evalue;
00195 
00196     /* suppress unused parameter warnings if diagnostics are not printed */
00197     (void) seqSrc;
00198     (void) query_length;
00199     (void) LambdaRatio;
00200     (void) subject_id;
00201 }
00202 
00203 
00204 /**
00205  * Remove from a hitlist all HSPs that are completely contained in an
00206  * HSP that occurs earlier in the list and that:
00207  * - is on the same strand; and
00208  * - has equal or greater score.  T
00209  * The hitlist should be sorted by some measure of significance before
00210  * this routine is called.
00211  * @param hsp_array         array to be reaped
00212  * @param hspcnt            length of hsp_array
00213  */
00214 static void
00215 s_HitlistReapContained(BlastHSP * hsp_array[], Int4 * hspcnt)
00216 {
00217     Int4 iread;       /* iteration index used to read the hitlist */
00218     Int4 iwrite;      /* iteration index used to write to the hitlist */
00219     Int4 old_hspcnt;  /* number of HSPs in the hitlist on entry */
00220 
00221     old_hspcnt = *hspcnt;
00222 
00223     for (iread = 1;  iread < *hspcnt;  iread++) {
00224         /* for all HSPs in the hitlist */
00225         Int4      ireadBack;  /* iterator over indices less than iread */
00226         BlastHSP *hsp1;       /* an HSP that is a candidate for deletion */
00227 
00228         hsp1 = hsp_array[iread];
00229         for (ireadBack = 0;  ireadBack < iread && hsp1 != NULL;  ireadBack++) {
00230             /* for all HSPs before hsp1 in the hitlist and while hsp1
00231              * has not been deleted */
00232             BlastHSP *hsp2;    /* an HSP that occurs earlier in hsp_array
00233                                 * than hsp1 */
00234             hsp2 = hsp_array[ireadBack];
00235 
00236             if( hsp2 == NULL ) {  /* hsp2 was deleted in a prior iteration. */
00237                 continue;
00238             }
00239             if (SIGN(hsp2->query.frame)   == SIGN(hsp1->query.frame) &&
00240                 SIGN(hsp2->subject.frame) == SIGN(hsp1->subject.frame)) {
00241                 /* hsp1 and hsp2 are in the same query/subject frame. */
00242                 if (CONTAINED_IN_HSP
00243                     (hsp2->query.offset, hsp2->query.end, hsp1->query.offset,
00244                      hsp2->subject.offset, hsp2->subject.end,
00245                      hsp1->subject.offset) &&
00246                     CONTAINED_IN_HSP
00247                     (hsp2->query.offset, hsp2->query.end, hsp1->query.end,
00248                      hsp2->subject.offset, hsp2->subject.end,
00249                      hsp1->subject.end)    &&
00250                     hsp1->score <= hsp2->score) {
00251                     hsp1 = hsp_array[iread] = Blast_HSPFree(hsp_array[iread]);
00252                 }
00253             } /* end if hsp1 and hsp2 are in the same query/subject frame */
00254         } /* end for all HSPs before hsp1 in the hitlist */
00255     } /* end for all HSPs in the hitlist */
00256 
00257     /* Condense the hsp_array, removing any NULL items. */
00258     iwrite = 0;
00259     for (iread = 0;  iread < *hspcnt;  iread++) {
00260         if (hsp_array[iread] != NULL) {
00261             hsp_array[iwrite++] = hsp_array[iread];
00262         }
00263     }
00264     *hspcnt = iwrite;
00265     /* Fill the remaining memory in hsp_array with NULL pointers. */
00266     for ( ;  iwrite < old_hspcnt;  iwrite++) {
00267         hsp_array[iwrite] = NULL;
00268     }
00269 }
00270 
00271 
00272 /** A callback used to free an EditScript that has been stored in a
00273  * BlastCompo_Alignment. */
00274 static void s_FreeEditScript(void * edit_script)
00275 {
00276     if (edit_script != NULL)
00277         GapEditScriptDelete(edit_script);
00278 }
00279 
00280 
00281 /**
00282  * Converts a list of objects of type BlastCompo_Alignment to an
00283  * new object of type BlastHSPList and returns the result. Conversion
00284  * in this direction is lossless.  The list passed to this routine is
00285  * freed to ensure that there is no aliasing of fields between the
00286  * list of BlastCompo_Alignments and the new hitlist.
00287  *
00288  * @param alignments A list of distinct alignments; freed before return [in]
00289  * @param oid        Ordinal id of a database sequence [in]
00290  * @param queryInfo  information about all queries in this search [in]
00291  * @return Allocated and filled BlastHSPList structure.
00292  */
00293 static BlastHSPList *
00294 s_HSPListFromDistinctAlignments(BlastCompo_Alignment ** alignments,
00295                                 int oid,
00296                                 BlastQueryInfo* queryInfo)
00297 {
00298     int status = 0;                    /* return code for any routine called */
00299     static const int unknown_value = 0;   /* dummy constant to use when a
00300                                       parameter value is not known */
00301     BlastCompo_Alignment * align;  /* an alignment in the list */
00302     BlastHSPList * hsp_list;       /* the new HSP list */
00303 
00304     hsp_list = Blast_HSPListNew(0);
00305     if (hsp_list == NULL) {
00306         return NULL;
00307     }
00308     hsp_list->oid = oid;
00309 
00310     for (align = *alignments;  NULL != align;  align = align->next) {
00311         BlastHSP * new_hsp = NULL;
00312         GapEditScript * editScript = align->context;
00313         int query_offset, queryStart, queryEnd;
00314         align->context = NULL;
00315         
00316         query_offset = queryInfo->contexts[align->queryIndex].query_offset;
00317         queryStart = align->queryStart - query_offset;
00318         queryEnd = align->queryEnd - query_offset;
00319 
00320         status = Blast_HSPInit(queryStart, queryEnd,
00321                                align->matchStart, align->matchEnd,
00322                                unknown_value, unknown_value,
00323                                align->queryIndex, 
00324                                0, (Int2) align->frame, align->score,
00325                                &editScript, &new_hsp);
00326         switch (align->matrix_adjust_rule) {
00327         case eDontAdjustMatrix:
00328             new_hsp->comp_adjustment_method = eNoCompositionBasedStats;
00329             break;
00330         case eCompoScaleOldMatrix:
00331             new_hsp->comp_adjustment_method = eCompositionBasedStats;
00332             break;
00333         default:
00334             new_hsp->comp_adjustment_method = eCompositionMatrixAdjust;
00335             break;
00336         }
00337         if (status != 0)
00338             break;
00339         /* At this point, the subject and possibly the query sequence have
00340          * been filtered; since it is not clear that num_ident of the
00341          * filtered sequences, rather than the original, is desired,
00342          * explicitly leave num_ident blank. */
00343         new_hsp->num_ident = 0;
00344 
00345         status = Blast_HSPListSaveHSP(hsp_list, new_hsp);
00346         if (status != 0)
00347             break;
00348     }
00349     if (status == 0) {
00350         BlastCompo_AlignmentsFree(alignments, s_FreeEditScript);
00351         Blast_HSPListSortByScore(hsp_list);
00352     } else {
00353         hsp_list = Blast_HSPListFree(hsp_list);
00354     }
00355     return hsp_list;
00356 }
00357 
00358 
00359 /**
00360  * Adding evalues to a list of HSPs and remove those that do not have
00361  * sufficiently good (low) evalue.
00362  *
00363  * @param *pbestScore      best (highest) score in the list
00364  * @param *pbestEvalue     best (lowest) evalue in the list
00365  * @param hsp_list         the list
00366  * @param seqSrc            a source of sequence data
00367  * @param subject_length   length of the subject sequence
00368  * @param program_number   the type of BLAST search being performed
00369  * @param queryInfo        information about the queries
00370  * @param query_index      the index of the query corresponding to 
00371  *                         the HSPs in hsp_list
00372  * @param sbp              the score block for this search
00373  * @param hitParams        parameters used to assign evalues and
00374  *                         decide whether to save hits.
00375  * @param pvalueForThisPair  composition p-value
00376  * @param LambdaRatio        lambda ratio, if available
00377  * @param subject_id         index of subject
00378  *
00379  * @return 0 on success; -1 on failure (can fail because some methods
00380  *         of generating evalues use auxiliary structures)
00381  */
00382 static int
00383 s_HitlistEvaluateAndPurge(int * pbestScore, double *pbestEvalue,
00384                           BlastHSPList * hsp_list,
00385                           const BlastSeqSrc* seqSrc,
00386                           int subject_length,
00387                           EBlastProgramType program_number,
00388                           BlastQueryInfo* queryInfo,
00389                           int query_index,
00390                           BlastScoreBlk* sbp,
00391                           const BlastHitSavingParameters* hitParams,
00392                           double pvalueForThisPair,
00393                           double LambdaRatio,
00394                           int subject_id)
00395 {
00396     int status = 0;
00397     *pbestEvalue = DBL_MAX;
00398     *pbestScore  = 0;
00399     if (hitParams->do_sum_stats) {
00400         status = BLAST_LinkHsps(program_number, hsp_list, queryInfo,
00401                                 subject_length, sbp,
00402                                 hitParams->link_hsp_params, TRUE);
00403     } else {
00404         status =
00405             Blast_HSPListGetEvalues(queryInfo, hsp_list, TRUE, sbp,
00406                                     0.0, /* use a non-zero gap decay
00407                                             only when linking HSPs */
00408                                     1.0); /* Use scaling factor equal to
00409                                              1, because both scores and
00410                                              Lambda are scaled, so they
00411                                              will cancel each other. */
00412     }
00413     if (eBlastTypeBlastp == program_number) {
00414         if ((0 <= pvalueForThisPair) && (pvalueForThisPair <= 1)) {
00415             s_AdjustEvaluesForComposition(hsp_list, pvalueForThisPair, seqSrc,
00416                                           subject_length,
00417                                           &queryInfo->contexts[query_index],
00418                                           LambdaRatio, subject_id);
00419         }
00420     }
00421     if (status == 0) {
00422         Blast_HSPListReapByEvalue(hsp_list, hitParams->options);
00423         if (hsp_list->hspcnt > 0) {
00424             *pbestEvalue = hsp_list->best_evalue;
00425             *pbestScore  = hsp_list->hsp_array[0]->score;
00426         }
00427     }
00428     return status == 0 ? 0 : -1;
00429 }
00430 
00431 /** Compute the number of identities for the HSPs in the hsp_list
00432  * @note Should work for blastp and tblastn now.
00433  * 
00434  * @param query_blk the query sequence data [in]
00435  * @param query_info structure describing the query_blk structure [in]
00436  * @param seq_src source of subject sequence data [in]
00437  * @param hsp_list list of HSPs to be processed [in|out]
00438  * @param scoring_options scoring options [in]
00439  * @gen_code_string Genetic code for tblastn [in]
00440  */
00441 static void
00442 s_ComputeNumIdentities(const BLAST_SequenceBlk* query_blk,
00443                        const BlastQueryInfo* query_info,
00444                        const BlastSeqSrc* seq_src,
00445                        BlastHSPList* hsp_list,
00446                        const BlastScoringOptions* scoring_options,
00447                        const Uint1* gen_code_string)
00448 {
00449     Uint1* query = NULL;
00450     Uint1* subject = NULL;
00451     const EBlastProgramType program_number = scoring_options->program_number;
00452     const Boolean kIsOutOfFrame = scoring_options->is_ooframe;
00453     const EBlastEncoding encoding = Blast_TracebackGetEncoding(program_number);
00454     BlastSeqSrcGetSeqArg seq_arg;
00455     BLAST_SequenceBlk* subject_blk = NULL;
00456     Int2 status = 0;
00457     int i;
00458     SBlastTargetTranslation* target_t = NULL;
00459 
00460     if ( !hsp_list || (program_number != eBlastTypeBlastp && program_number != eBlastTypeTblastn)) 
00461         return;
00462 
00463     /* Initialize the subject */
00464     {
00465         memset((void*) &seq_arg, 0, sizeof(seq_arg));
00466         seq_arg.oid = hsp_list->oid;
00467         seq_arg.encoding = encoding;
00468         status = BlastSeqSrcGetSequence(seq_src, (void*) &seq_arg);
00469         ASSERT(status == 0);
00470     }
00471 
00472     if (program_number == eBlastTypeTblastn) {
00473         subject_blk = seq_arg.seq;
00474     BlastTargetTranslationNew(subject_blk, gen_code_string, eBlastTypeTblastn,
00475             kIsOutOfFrame, &target_t);
00476     }
00477     else {
00478         subject = seq_arg.seq->sequence;
00479     }
00480 
00481     for (i = 0; i < hsp_list->hspcnt; i++) {
00482         BlastHSP* hsp = hsp_list->hsp_array[i];
00483 
00484         /* Initialize the query */
00485         if (program_number == eBlastTypeBlastx && kIsOutOfFrame) {
00486             Int4 context = hsp->context - hsp->context % CODON_LENGTH;
00487             Int4 context_offset = query_info->contexts[context].query_offset;
00488             query = query_blk->oof_sequence + CODON_LENGTH + context_offset;
00489         } else {
00490             query = query_blk->sequence + 
00491                 query_info->contexts[hsp->context].query_offset;
00492         }
00493 
00494         /* Translate subject if needed. */
00495         if (program_number == eBlastTypeTblastn) {
00496             const Uint1* target_sequence = Blast_HSPGetTargetTranslation(target_t, hsp, NULL);
00497             status = Blast_HSPGetNumIdentities(query, target_sequence, hsp, scoring_options, 0);
00498         }
00499         else
00500             status = Blast_HSPGetNumIdentities(query, subject, hsp, scoring_options, 0);
00501 
00502         ASSERT(status == 0);
00503     }
00504     target_t = BlastTargetTranslationFree(target_t);
00505     BlastSeqSrcReleaseSequence(seq_src, (void*) &seq_arg);
00506     BlastSequenceBlkFree(seq_arg.seq);
00507 }
00508 
00509 
00510 /**
00511  * A callback routine: compute lambda for the given score
00512  * probabilities.
00513  * (@sa calc_lambda_type).
00514  */
00515 static double
00516 s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
00517 {
00518    
00519     int i;                 /* loop index */      
00520     int score_range;       /* range of possible scores */
00521     double avg;            /* expected score of aligning two characters */
00522     Blast_ScoreFreq freq;  /* score frequency data */
00523 
00524     score_range = max_score - min_score + 1;
00525     avg = 0.0;
00526     for (i = 0;  i < score_range;  i++) {
00527         avg += (min_score + i) * probs[i];
00528     }
00529     freq.score_min = min_score;
00530     freq.score_max = max_score;
00531     freq.obs_min = min_score;
00532     freq.obs_max = max_score;
00533     freq.sprob0 = probs;
00534     freq.sprob = &probs[-min_score];
00535     freq.score_avg = avg;
00536 
00537     return Blast_KarlinLambdaNR(&freq, lambda0);
00538 }
00539 
00540 
00541 /** Fill a two-dimensional array with the frequency ratios that
00542  * underlie a position specific score matrix (PSSM).
00543  *
00544  * @param returnRatios     a two-dimensional array with BLASTAA_SIZE
00545  *                         columns
00546  * @param numPositions     the number of rows in returnRatios
00547  * @param query            query sequence data, of length numPositions
00548  * @param matrixName       the name of the position independent matrix
00549  *                         corresponding to this PSSM
00550  * @param startNumerator   position-specific data used to generate the
00551  *                         PSSM
00552  * @return   0 on success; -1 if the named matrix isn't known, or if
00553  *           there was a memory error
00554  * @todo find out what start numerator is.
00555  */
00556 static int
00557 s_GetPosBasedStartFreqRatios(double ** returnRatios,
00558                              Int4 numPositions,
00559                              Uint1 * query,
00560                              const char *matrixName,
00561                              double **startNumerator)
00562 {
00563     Int4 i,j;                            /* loop indices */
00564     SFreqRatios * stdFreqRatios = NULL;  /* frequency ratios for the
00565                                             named matrix. */
00566     double *standardProb;                /* probabilities of each
00567                                             letter*/
00568     const double kPosEpsilon = 0.0001;   /* values below this cutoff
00569                                             are treated specially */
00570 
00571     stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
00572     if (stdFreqRatios == NULL) {
00573         return -1;
00574     }
00575     for (i = 0;  i < numPositions;  i++) {
00576         for (j = 0;  j < BLASTAA_SIZE;  j++) {
00577             returnRatios[i][j] = stdFreqRatios->data[query[i]][j];
00578         }
00579     }
00580     stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
00581 
00582     standardProb = BLAST_GetStandardAaProbabilities();
00583     if(standardProb == NULL) {
00584         return -1;
00585     }
00586     /*reverse multiplication done in posit.c*/
00587     for (i = 0;  i < numPositions;  i++) {
00588         for (j = 0;  j < BLASTAA_SIZE;  j++) {
00589             if ((standardProb[query[i]] > kPosEpsilon) &&
00590                 (standardProb[j] > kPosEpsilon) &&
00591                 (j != eStopChar) && (j != eXchar) &&
00592                 (startNumerator[i][j] > kPosEpsilon)) {
00593                 returnRatios[i][j] = startNumerator[i][j] / standardProb[j];
00594             }
00595         }
00596     }
00597     sfree(standardProb);
00598 
00599     return 0;
00600 }
00601 
00602 
00603 /**
00604  * Fill a two-dimensional array with the frequency ratios that underlie the
00605  * named score matrix.
00606  *
00607  * @param returnRatios  a two-dimensional array of size
00608  *                      BLASTAA_SIZE x  BLASTAA_SIZE
00609  * @param matrixName    the name of a matrix
00610  * @return   0 on success; -1 if the named matrix isn't known, or if
00611  *           there was a memory error
00612  */
00613 static int
00614 s_GetStartFreqRatios(double ** returnRatios,
00615                      const char *matrixName)
00616 {
00617     /* Loop indices */
00618     int i,j;
00619     /* Frequency ratios for the matrix */
00620     SFreqRatios * stdFreqRatios = NULL;
00621 
00622     stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
00623     if (stdFreqRatios == NULL) {
00624         return -1;
00625     }
00626     for (i = 0;  i < BLASTAA_SIZE;  i++) {
00627         for (j = 0;  j < BLASTAA_SIZE;  j++) {
00628             returnRatios[i][j] = stdFreqRatios->data[i][j];
00629         }
00630     }
00631     stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
00632 
00633     return 0;
00634 }
00635 
00636 
00637 /** SCALING_FACTOR is a multiplicative factor used to get more bits of
00638  * precision in the integer matrix scores. It cannot be arbitrarily
00639  * large because we do not want total alignment scores to exceed
00640  * -(BLAST_SCORE_MIN) */
00641 #define SCALING_FACTOR 32
00642 
00643 
00644 /**
00645  * Produce a scaled-up version of the position-specific matrix
00646  * with a given set of position-specific residue frequencies.
00647  *
00648  * @param fillPosMatrix     is the matrix to be filled
00649  * @param matrixName        name of the standard substitution matrix [in]
00650  * @param posFreqs          PSSM's frequency ratios [in]
00651  * @param query             Query sequence data [in]
00652  * @param queryLength       Length of the query sequence above [in]
00653  * @param sbp               stores various parameters of the search
00654  * @param scale_factor      amount by which ungapped parameters should be
00655  *                          scaled.
00656  * @return 0 on success; -1 on failure
00657  */
00658 static int
00659 s_ScalePosMatrix(int ** fillPosMatrix,
00660                  const char * matrixName,
00661                  double ** posFreqs,
00662                  Uint1 * query,
00663                  int queryLength,
00664                  BlastScoreBlk* sbp,
00665                  double scale_factor)
00666 {
00667     /* Data used by scaling routines */    
00668     Kappa_posSearchItems *posSearch = NULL;
00669     /* A reduced collection of search parameters used by PSI-blast */
00670     Kappa_compactSearchItems *compactSearch = NULL;
00671     /* Representation of a PSSM internal to PSI-blast */
00672     _PSIInternalPssmData* internal_pssm = NULL;
00673     /* return code */
00674     int status = 0;
00675 
00676     posSearch = Kappa_posSearchItemsNew(queryLength, matrixName,
00677                                         fillPosMatrix, posFreqs);
00678     compactSearch = Kappa_compactSearchItemsNew(query, queryLength, sbp);
00679     /* Copy data into new structures */
00680     internal_pssm = _PSIInternalPssmDataNew(queryLength, BLASTAA_SIZE);
00681     if (posSearch == NULL || compactSearch == NULL || internal_pssm == NULL) {
00682         status = -1;
00683         goto cleanup;
00684     }
00685     _PSICopyMatrix_int(internal_pssm->pssm, posSearch->posMatrix,
00686                        internal_pssm->ncols, internal_pssm->nrows);
00687     _PSICopyMatrix_int(internal_pssm->scaled_pssm,
00688                        posSearch->posPrivateMatrix,
00689                        internal_pssm->ncols, internal_pssm->nrows);
00690     _PSICopyMatrix_double(internal_pssm->freq_ratios,
00691                           posSearch->posFreqs, internal_pssm->ncols,
00692                           internal_pssm->nrows);
00693     status = _PSIConvertFreqRatiosToPSSM(internal_pssm, query, sbp,
00694                                          compactSearch->standardProb);
00695     if (status != 0) {
00696         goto cleanup;
00697     }
00698     /* Copy data from new structures to posSearchItems */
00699     _PSICopyMatrix_int(posSearch->posMatrix, internal_pssm->pssm,
00700                        internal_pssm->ncols, internal_pssm->nrows);
00701     _PSICopyMatrix_int(posSearch->posPrivateMatrix,
00702                        internal_pssm->scaled_pssm,
00703                        internal_pssm->ncols, internal_pssm->nrows);
00704     _PSICopyMatrix_double(posSearch->posFreqs,
00705                           internal_pssm->freq_ratios,
00706                           internal_pssm->ncols, internal_pssm->nrows);
00707     status = Kappa_impalaScaling(posSearch, compactSearch, (double)
00708                                  scale_factor, FALSE, sbp);
00709 cleanup:
00710     internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
00711     posSearch = Kappa_posSearchItemsFree(posSearch);
00712     compactSearch = Kappa_compactSearchItemsFree(compactSearch);
00713 
00714     return status;
00715 }
00716 
00717 
00718 /**
00719  * Convert an array of HSPs to a list of BlastCompo_Alignment objects.
00720  * The context field of each BlastCompo_Alignment is set to point to the
00721  * corresponding HSP.
00722  *
00723  * @param hsp_array             an array of HSPs
00724  * @param hspcnt                the length of hsp_array
00725  * @param queryInfo            information about the concatenated query
00726  * @param localScalingFactor    the amount by which this search is scaled
00727  *
00728  * @return the new list of alignments; or NULL if there is an out-of-memory
00729  *         error (or if the original array is empty)
00730  */
00731 static BlastCompo_Alignment *
00732 s_ResultHspToDistinctAlign(BlastHSP * hsp_array[], Int4 hspcnt,
00733                            BlastQueryInfo* queryInfo,
00734                            double localScalingFactor)
00735 {
00736     BlastCompo_Alignment * aligns = NULL;      /* new list of alignments */
00737     BlastCompo_Alignment * tail = NULL;        /* last element in aligns */
00738     int hsp_index;                             /* loop index */
00739     
00740     for (hsp_index = 0;  hsp_index < hspcnt;  hsp_index++) {
00741         BlastHSP * hsp = hsp_array[hsp_index]; /* current HSP */
00742         BlastCompo_Alignment * new_align;      /* newly-created alignment */
00743         int queryStart;                        /* start of the query context
00744                                                   in the concatenated query */
00745         int queryOffset, queryEnd;             /* coordinates of the
00746                                                   query portion of the
00747                                                   concatenated query */
00748         /* Incoming alignments will have coordinates of the query
00749            portion relative to a particular query context; they must
00750            be shifted for used in the composition_adjustment library.
00751         */
00752         queryStart = queryInfo->contexts[hsp->context].query_offset;
00753         queryOffset = hsp->query.offset + queryStart;
00754         queryEnd = hsp->query.end + queryStart;
00755         new_align =
00756             BlastCompo_AlignmentNew((int) (hsp->score * localScalingFactor),
00757                                     eDontAdjustMatrix,
00758                                     queryOffset, queryEnd, hsp->context,
00759                                     hsp->subject.offset, hsp->subject.end,
00760                                     hsp->subject.frame, hsp);
00761         if (new_align == NULL) /* out of memory */
00762             goto error_return;
00763         if (tail == NULL) { /* if the list aligns is empty; */
00764             /* make new_align the first element in the list */
00765             aligns = new_align;
00766         } else {
00767             /* otherwise add new_align to the end of the list */
00768             tail->next = new_align;
00769         }
00770         tail = new_align;
00771     }
00772     goto normal_return;
00773  error_return:
00774     BlastCompo_AlignmentsFree(&aligns, NULL);
00775  normal_return:
00776     return aligns;
00777 }
00778 
00779 
00780 /**
00781  * Redo a S-W alignment using an x-drop alignment.  The result will
00782  * usually be the same as the S-W alignment. The call to ALIGN_EX
00783  * attempts to force the endpoints of the alignment to match the
00784  * optimal endpoints determined by the Smith-Waterman algorithm.
00785  * ALIGN_EX is used, so that if the data structures for storing BLAST
00786  * alignments are changed, the code will not break
00787  *
00788  * @param query         the query data
00789  * @param queryStart    start of the alignment in the query sequence
00790  * @param queryEnd      end of the alignment in the query sequence,
00791  *                      as computed by the Smith-Waterman algorithm
00792  * @param subject       the subject (database) sequence
00793  * @param matchStart    start of the alignment in the subject sequence
00794  * @param matchEnd      end of the alignment in the query sequence,
00795  *                      as computed by the Smith-Waterman algorithm
00796  * @param gap_align     parameters for a gapped alignment
00797  * @param scoringParams Settings for gapped alignment.[in]
00798  * @param score         score computed by the Smith-Waterman algorithm
00799  * @param queryAlignmentExtent  length of the alignment in the query sequence,
00800  *                              as computed by the x-drop algorithm
00801  * @param matchAlignmentExtent  length of the alignment in the subject
00802  *                              sequence, as computed by the x-drop algorithm
00803  * @param newScore              alignment score computed by the x-drop
00804  *                              algorithm
00805  */
00806 static void
00807 s_SWFindFinalEndsUsingXdrop(BlastCompo_SequenceData * query,
00808                             Int4 queryStart,
00809                             Int4 queryEnd,
00810                             BlastCompo_SequenceData * subject,
00811                             Int4 matchStart,
00812                             Int4 matchEnd,
00813                             BlastGapAlignStruct* gap_align,
00814                             const BlastScoringParameters* scoringParams,
00815                             Int4 score,
00816                             Int4 * queryAlignmentExtent,
00817                             Int4 * matchAlignmentExtent,
00818                             Int4 * newScore)
00819 {
00820     Int4 XdropAlignScore;         /* alignment score obtained using X-dropoff
00821                                    * method rather than Smith-Waterman */
00822     Int4 doublingCount = 0;       /* number of times X-dropoff had to be
00823                                    * doubled */
00824     Int4 gap_x_dropoff_orig = gap_align->gap_x_dropoff;
00825 
00826     GapPrelimEditBlockReset(gap_align->rev_prelim_tback);
00827     GapPrelimEditBlockReset(gap_align->fwd_prelim_tback);
00828     do {
00829         XdropAlignScore =
00830             ALIGN_EX(&(query->data[queryStart]) - 1,
00831                      &(subject->data[matchStart]) - 1,
00832                      queryEnd - queryStart + 1, matchEnd - matchStart + 1,
00833                      queryAlignmentExtent,
00834                      matchAlignmentExtent, gap_align->fwd_prelim_tback,
00835                      gap_align, scoringParams, queryStart - 1, FALSE, FALSE,
00836                      NULL);
00837 
00838         gap_align->gap_x_dropoff *= 2;
00839         doublingCount++;
00840         if((XdropAlignScore < score) && (doublingCount < 3)) {
00841             GapPrelimEditBlockReset(gap_align->fwd_prelim_tback);
00842         }
00843     } while((XdropAlignScore < score) && (doublingCount < 3));
00844 
00845     gap_align->gap_x_dropoff = gap_x_dropoff_orig;
00846     *newScore = XdropAlignScore;
00847 }
00848 
00849 
00850 /**
00851  * BLAST-specific information that is associated with a
00852  * BlastCompo_MatchingSequence.
00853  */
00854 typedef struct
00855 BlastKappa_SequenceInfo {
00856     EBlastProgramType prog_number; /**< identifies the type of blast
00857                                         search being performed. The type
00858                                         of search determines how sequence
00859                                         data should be obtained. */
00860     const BlastSeqSrc* seq_src;    /**< BLAST sequence data source */
00861     BlastSeqSrcGetSeqArg seq_arg;  /**< argument to GetSequence method
00862                                      of the BlastSeqSrc (@todo this
00863                                      structure was designed to be
00864                                      allocated on the stack, i.e.: in
00865                                      Kappa_MatchingSequenceInitialize) */
00866 } BlastKappa_SequenceInfo;
00867 
00868 
00869 /** Release the resources associated with a matching sequence. */
00870 static void
00871 s_MatchingSequenceRelease(BlastCompo_MatchingSequence * self)
00872 {
00873     if (self != NULL) {
00874         BlastKappa_SequenceInfo * local_data = self->local_data;
00875         BlastSeqSrcReleaseSequence(local_data->seq_src,
00876                                    &local_data->seq_arg);
00877         BlastSequenceBlkFree(local_data->seq_arg.seq);
00878         free(self->local_data);
00879         self->local_data = NULL;
00880     }
00881 }
00882 
00883 /**
00884  * Test whether the aligned parts of two sequences that
00885  * have a high-scoring gapless alignment are nearly identical
00886  *
00887  * @param seqData           subject sequence
00888  * @param queryData         query sequence
00889  * @param queryOffset       offset for align if there are multiple queries
00890  * @param align             information about the alignment
00891  * @param rangeOffset       offset for subject sequence (used for tblastn)
00892  *
00893  * @return                  TRUE if the aligned portions are nearly identical
00894  */
00895 static Boolean 
00896 s_TestNearIdentical(const BlastCompo_SequenceData *seqData, 
00897          const BlastCompo_SequenceData *queryData, 
00898             const int queryOffset,   
00899          const BlastCompo_Alignment *align,
00900          const int rangeOffset)
00901 {
00902   int numIdentical = 0;
00903   double fractionIdentical;
00904   int qPos, sPos; /*positions in query and subject;*/
00905   int qEnd; /*end of query*/
00906   const double kMinFractionNearIdentical = 0.98; /* cutoff for this check. */
00907 
00908   qPos = align->queryStart - queryOffset;
00909   qEnd = align->queryEnd - queryOffset;
00910   sPos = align->matchStart - rangeOffset;
00911   while (qPos < qEnd)  {
00912       if (queryData->data[qPos] == seqData->data[sPos])
00913     numIdentical++;
00914       sPos++;
00915       qPos++;
00916   }
00917   fractionIdentical = ((double) numIdentical/
00918   (double) (align->queryEnd - align->queryStart));
00919   if (fractionIdentical >= kMinFractionNearIdentical)
00920     return(TRUE);
00921   else
00922     return(FALSE);
00923 }
00924 
00925 
00926 /**
00927  * Initialize a new matching sequence, obtaining information about the
00928  * sequence from the search.
00929  *
00930  * @param self              object to be initialized
00931  * @param seqSrc            A pointer to a source from which sequence data
00932  *                          may be obtained
00933  * @param program_number    identifies the type of blast search being
00934  *                          performed.
00935  * @param default_db_genetic_code   default genetic code to use when
00936  *                          subject sequences are translated and there is
00937  *                          no other guidance on what code to use
00938  * @param subject_index     index of the matching sequence in the database
00939  */
00940 static int
00941 s_MatchingSequenceInitialize(BlastCompo_MatchingSequence * self,
00942                              EBlastProgramType program_number,
00943                              const BlastSeqSrc* seqSrc,
00944                              Int4 default_db_genetic_code,
00945                              Int4 subject_index)
00946 {
00947     BlastKappa_SequenceInfo * seq_info;  /* BLAST-specific sequence
00948                                             information */
00949     self->length = 0;
00950     self->local_data = NULL;
00951 
00952     seq_info = malloc(sizeof(BlastKappa_SequenceInfo));
00953     if (seq_info != NULL) {
00954         self->local_data = seq_info;
00955 
00956         seq_info->seq_src      = seqSrc;
00957         seq_info->prog_number  = program_number;
00958 
00959         memset((void*) &seq_info->seq_arg, 0, sizeof(seq_info->seq_arg));
00960         seq_info->seq_arg.oid = self->index = subject_index;
00961 
00962         if( program_number == eBlastTypeTblastn ) {
00963             seq_info->seq_arg.encoding = eBlastEncodingNcbi4na;
00964         } else {
00965             seq_info->seq_arg.encoding = eBlastEncodingProtein;
00966         }
00967         if (BlastSeqSrcGetSequence(seqSrc, &seq_info->seq_arg) >= 0) {
00968             self->length =
00969                 BlastSeqSrcGetSeqLen(seqSrc, (void*) &seq_info->seq_arg);
00970 
00971             /* If the subject is translated and the BlastSeqSrc implementation
00972              * doesn't provide a genetic code string, use the default genetic
00973              * code for all subjects (as in the C toolkit) */
00974             if (Blast_SubjectIsTranslated(program_number) &&
00975                 seq_info->seq_arg.seq->gen_code_string == NULL) {
00976                 seq_info->seq_arg.seq->gen_code_string =
00977                              GenCodeSingletonFind(default_db_genetic_code);
00978                 ASSERT(seq_info->seq_arg.seq->gen_code_string);
00979             }
00980         } else {
00981             self->length = 0;
00982         }
00983     }
00984     if (self->length == 0) {
00985         /* Could not obtain the required data */
00986         s_MatchingSequenceRelease(self);
00987         return -1;
00988     } else {
00989         return 0;
00990     }
00991 }
00992 
00993 
00994 /** NCBIstdaa encoding for 'X' character */
00995 #define BLASTP_MASK_RESIDUE 21
00996 /** Default instructions and mask residue for SEG filtering */
00997 #define BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1"
00998 
00999 
01000 /**
01001  * Filter low complexity regions from the sequence data; uses the SEG
01002  * algorithm.
01003  *
01004  * @param seqData            data to be filtered
01005  * @param program_name       type of search being performed
01006  * @return   0 for success; -1 for out-of-memory
01007  */
01008 static int
01009 s_DoSegSequenceData(BlastCompo_SequenceData * seqData,
01010                     EBlastProgramType program_name)
01011 {
01012     int status = 0;
01013     BlastSeqLoc* mask_seqloc = NULL;
01014     SBlastFilterOptions* filter_options = NULL;
01015 
01016     status = BlastFilteringOptionsFromString(program_name,
01017                                              BLASTP_MASK_INSTRUCTIONS,
01018                                              &filter_options, NULL);
01019     if (status == 0) {
01020         status = BlastSetUp_Filter(program_name, seqData->data,
01021                                    seqData->length, 0, filter_options,
01022                                    &mask_seqloc, NULL);
01023         filter_options = SBlastFilterOptionsFree(filter_options);
01024     }
01025     if (status == 0) {
01026         Blast_MaskTheResidues(seqData->data, seqData->length,
01027                               FALSE, mask_seqloc, FALSE, 0);
01028     }
01029     if (mask_seqloc != NULL) {
01030         mask_seqloc = BlastSeqLocFree(mask_seqloc);
01031     }
01032     return status;
01033 }
01034 
01035 
01036 /**
01037  * Obtain a string of translated data
01038  *
01039  * @param self          the sequence from which to obtain the data [in]
01040  * @param range         the range and translation frame to get [in]
01041  * @param seqData       the resulting data [out]
01042  * @param queryData     the query sequence [in]
01043  * @param queryOffset   offset for align if there are multiple queries
01044  * @param align          information about the alignment between query and subject
01045  * @param shouldTestIdentical did alignment pass a preliminary test in
01046  *                       redo_alignment.c that indicates the sequence
01047  *                        pieces may be near identical
01048  *
01049  * @return 0 on success; -1 on failure
01050  */
01051 static int
01052 s_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence * self,
01053                              const BlastCompo_SequenceRange * range,
01054                              BlastCompo_SequenceData * seqData,
01055                  const BlastCompo_SequenceData * queryData,
01056                  const int queryOffset,
01057                  const BlastCompo_Alignment *align,
01058                  const Boolean shouldTestIdentical)
01059 {
01060     int status = 0;
01061     BlastKappa_SequenceInfo * local_data; /* BLAST-specific
01062                                              information associated
01063                                              with the sequence */
01064     Uint1 * translation_buffer;           /* a buffer for the translated,
01065                                              amino-acid sequence */
01066     Int4 translated_length;  /* length of the translated sequence */
01067     int translation_frame;   /* frame in which to translate */
01068     Uint1 * na_sequence;     /* the nucleotide sequence */
01069     int translation_start;   /* location in na_sequence to start
01070                                 translating */
01071     int num_nucleotides;     /* the number of nucleotides to be translated */
01072     
01073     local_data = self->local_data;
01074     na_sequence = local_data->seq_arg.seq->sequence_start;
01075 
01076     /* Initialize seqData to nil, in case this routine fails */
01077     seqData->buffer = NULL;
01078     seqData->data = NULL;
01079     seqData->length = 0;
01080 
01081     translation_frame = range->context;
01082     if (translation_frame > 0) {
01083         translation_start = 3 * range->begin;
01084     } else {
01085         translation_start =
01086             self->length - 3 * range->end + translation_frame + 1;
01087     }
01088     num_nucleotides =
01089         3 * (range->end - range->begin) + ABS(translation_frame) - 1;
01090     
01091     status = Blast_GetPartialTranslation(na_sequence + translation_start,
01092                                          num_nucleotides,
01093                                          (Int2) translation_frame,
01094                                          local_data->seq_arg.seq->gen_code_string,
01095                                          &translation_buffer,
01096                                          &translated_length,
01097                                          NULL);
01098     if (status == 0) {
01099         seqData->buffer = translation_buffer;
01100         seqData->data   = translation_buffer + 1;
01101         seqData->length = translated_length;
01102 
01103         if ( !(KAPPA_TBLASTN_NO_SEG_SEQUENCE) ) {
01104       if ((!shouldTestIdentical) || 
01105             (shouldTestIdentical && 
01106          (!s_TestNearIdentical(seqData, queryData, queryOffset, align, range->begin)))) {
01107             status = s_DoSegSequenceData(seqData, eBlastTypeTblastn);
01108             if (status != 0) {
01109                 free(seqData->buffer);
01110                 seqData->buffer = NULL;
01111                 seqData->data = NULL;
01112                 seqData->length = 0;
01113             }
01114       }
01115         }
01116     }
01117     return status;
01118 }
01119 
01120 
01121 /**
01122  * Get a string of protein data from a protein sequence.
01123  *
01124  * @param self          a protein sequence [in]
01125  * @param range         the range to get [in]
01126  * @param seqData       the resulting data [out]
01127  * @param queryData     the query sequence [in]
01128  * @param queryOffset   offset for align if there are multiple queries
01129  * @param align          information about the alignment 
01130  *                         between query and subject [in]
01131  * @param shouldTestIdentical did alignment pass a preliminary test in
01132  *                       redo_alignment.c that indicates the sequence
01133  *                        pieces may be near identical [in]
01134  *
01135  * @return 0 on success; -1 on failure
01136  */
01137 static int
01138 s_SequenceGetProteinRange(const BlastCompo_MatchingSequence * self,
01139                           const BlastCompo_SequenceRange * range,
01140                           BlastCompo_SequenceData * seqData,
01141               const BlastCompo_SequenceData * queryData,
01142               const int queryOffset,
01143               const BlastCompo_Alignment *align,
01144               const Boolean shouldTestIdentical)
01145 
01146 {
01147     int status = 0;       /* return status */
01148     Int4       idx;       /* loop index */
01149     Uint1     *origData;  /* the unfiltered data for the sequence */
01150     /* BLAST-specific sequence information */
01151     BlastKappa_SequenceInfo * local_data = self->local_data;
01152 
01153     seqData->data = NULL;
01154     seqData->length = 0;
01155     /* Copy the entire sequence (necessary for SEG filtering.) */
01156     seqData->buffer  = calloc((self->length + 2), sizeof(Uint1));
01157     if (seqData->buffer == NULL) {
01158         return -1;
01159     }
01160     /* First and last characters of the buffer MUST be '\0', which is
01161      * true here because the buffer was allocated using calloc. */
01162     seqData->data    = seqData->buffer + 1;
01163     seqData->length  = self->length;
01164 
01165     origData = local_data->seq_arg.seq->sequence;
01166     for (idx = 0;  idx < seqData->length;  idx++) {
01167         /* Copy the sequence data, replacing occurrences of amino acid
01168          * number 24 (Selenocysteine) with number 21 (Undetermined or
01169          * atypical). */
01170         if (origData[idx] != 24) {
01171             seqData->data[idx] = origData[idx];
01172         } else {
01173             seqData->data[idx] = 21;
01174             fprintf(stderr, "Selenocysteine (U) at position %ld"
01175                     " replaced by X\n",
01176                     (long) idx + 1);
01177         }
01178     }
01179     if ( !(KAPPA_BLASTP_NO_SEG_SEQUENCE) ) {
01180       if ((!shouldTestIdentical) || 
01181       (shouldTestIdentical && 
01182        (!s_TestNearIdentical(seqData, queryData, queryOffset, align, 0)))) {
01183     status = -1;
01184         status = s_DoSegSequenceData(seqData, eBlastTypeBlastp);
01185       }
01186     }
01187     /* Fit the data to the range. */
01188     seqData ->data    = &seqData->data[range->begin - 1];
01189     *seqData->data++  = '\0';
01190     seqData ->length  = range->end - range->begin;
01191 
01192     if (status != 0) {
01193         free(seqData->buffer);
01194         seqData->buffer = NULL;
01195         seqData->data = NULL;
01196     }
01197     return status;
01198 }
01199 
01200 
01201 /**
01202  * Obtain the sequence data that lies within the given range.
01203  *
01204  * @param self          sequence information [in]
01205  * @param range        range specifying the range of data [in]
01206  * @param seqData       the sequence data obtained [out]
01207  * @param seqData       the resulting data [out]
01208  * @param queryData     the query sequence [in]
01209  * @param queryOffset   offset for align if there are multiple queries
01210  * @param align          information about the alignment between query and subject
01211  * @param shouldTestIdentical did alignment pass a preliminary test in
01212  *                       redo_alignment.c that indicates the sequence
01213  *                        pieces may be near identical
01214  *
01215  * @return 0 on success; -1 on failure
01216  */
01217 static int
01218 s_SequenceGetRange(const BlastCompo_MatchingSequence * self,
01219                    const BlastCompo_SequenceRange * range,
01220                    BlastCompo_SequenceData * seqData,
01221            const BlastCompo_SequenceData * queryData,
01222            const int queryOffset,
01223            const BlastCompo_Alignment *align,
01224            const Boolean shouldTestIdentical)
01225 {
01226     BlastKappa_SequenceInfo * seq_info = self->local_data;
01227     if (seq_info->prog_number ==  eBlastTypeTblastn) {
01228         /* The sequence must be translated. */
01229       return s_SequenceGetTranslatedRange(self, range, seqData,
01230                       queryData, queryOffset, align, shouldTestIdentical);
01231     } else {
01232       return s_SequenceGetProteinRange(self, range, seqData, queryData, queryOffset, align, shouldTestIdentical);
01233     }
01234 }
01235 
01236 
01237 /** Data and data-structures needed to perform a gapped alignment */
01238 typedef struct BlastKappa_GappingParamsContext {
01239     const BlastScoringParameters*
01240         scoringParams;                /**< scoring parameters for a
01241                                            gapped alignment */
01242     BlastGapAlignStruct * gap_align;  /**< additional parameters for a
01243                                            gapped alignment */
01244     BlastScoreBlk* sbp;               /**< the score block for this search */
01245     double localScalingFactor;        /**< the amount by which this
01246                                            search has been scaled */
01247     EBlastProgramType prog_number;    /**< the type of search being
01248                                            performed */
01249 } BlastKappa_GappingParamsContext;
01250 
01251 
01252 /**
01253  * Reads a BlastGapAlignStruct that has been used to compute a
01254  * traceback, and return a BlastCompo_Alignment representing the
01255  * alignment.  The BlastGapAlignStruct is in coordinates local to the
01256  * ranges being aligned; the resulting alignment is in coordinates w.r.t.
01257  * the whole query and subject.
01258  *
01259  * @param gap_align         the BlastGapAlignStruct
01260  * @param *edit_script      the edit script from the alignment; on exit
01261  *                          NULL.  The edit_script is usually
01262  *                          gap_align->edit_script, but we don't want
01263  *                          an implicit side effect on the gap_align.
01264  * @param query_range       the range of the query used in this alignment
01265  * @param subject_range     the range of the subject used in this alignment
01266  * @param matrix_adjust_rule   the rule used to compute the scoring matrix
01267  *
01268  * @return the new alignment on success or NULL on error
01269  */
01270 static BlastCompo_Alignment *
01271 s_NewAlignmentFromGapAlign(BlastGapAlignStruct * gap_align,
01272                            GapEditScript ** edit_script,
01273                            BlastCompo_SequenceRange * query_range,
01274                            BlastCompo_SequenceRange * subject_range,
01275                            EMatrixAdjustRule matrix_adjust_rule)
01276 {
01277     /* parameters to BlastCompo_AlignmentNew */
01278     int queryStart, queryEnd, queryIndex, matchStart, matchEnd, frame;
01279     BlastCompo_Alignment * obj; /* the new alignment */
01280 
01281     /* In the composition_adjustment library, the query start/end are
01282        indices into the concatenated query, and so must be shifted.  */
01283     queryStart = gap_align->query_start + query_range->begin;
01284     queryEnd   = gap_align->query_stop + query_range->begin;
01285     queryIndex = query_range->context;
01286     matchStart = gap_align->subject_start + subject_range->begin;
01287     matchEnd   = gap_align->subject_stop  + subject_range->begin;
01288     frame      = subject_range->context;
01289 
01290     obj = BlastCompo_AlignmentNew(gap_align->score, matrix_adjust_rule,
01291                                   queryStart, queryEnd, queryIndex,
01292                                   matchStart, matchEnd, frame,
01293                                   *edit_script);
01294     if (obj != NULL) {
01295         *edit_script = NULL;
01296     }
01297     return obj;
01298 }
01299 
01300 
01301 /** A callback used when performing SmithWaterman alignments:
01302  * Calculate the traceback for one alignment by performing an x-drop
01303  * alignment in the forward direction, possibly increasing the x-drop
01304  * parameter until the desired score is attained.
01305  *
01306  * The start, end and score of the alignment should be obtained
01307  * using the Smith-Waterman algorithm before this routine is called.
01308  *
01309  * @param *pnewAlign       the new alignment
01310  * @param *pqueryEnd       on entry, the end of the alignment in the
01311  *                         query, as computed by the Smith-Waterman
01312  *                         algorithm.  On exit, the end as computed by
01313  *                         the x-drop algorithm
01314  * @param *pmatchEnd       like as *pqueryEnd, but for the subject
01315  *                         sequence
01316  * @param queryStart       the starting point in the query
01317  * @param matchStart       the starting point in the subject
01318  * @param score            the score of the alignment, as computed by
01319  *                         the Smith-Waterman algorithm
01320  * @param query            query sequence data
01321  * @param query_range      range of this query in the concatenated
01322  *                         query
01323  * @param ccat_query_length   total length of the concatenated query
01324  * @param subject          subject sequence data
01325  * @param subject_range    range of subject_data in the translated
01326  *                         query, in amino acid coordinates
01327  * @param full_subject_length   length of the full subject sequence
01328  * @param gapping_params        parameters used to compute gapped
01329  *                              alignments
01330  * @param matrix_adjust_rule    the rule used to compute the scoring matrix
01331  *
01332  * @returns 0   (posts a fatal error if it fails)
01333  * @sa new_xdrop_align_type
01334  */
01335 static int
01336 s_NewAlignmentUsingXdrop(BlastCompo_Alignment ** pnewAlign,
01337                          Int4 * pqueryEnd, Int4 *pmatchEnd,
01338                          Int4 queryStart, Int4 matchStart, Int4 score,
01339                          BlastCompo_SequenceData * query,
01340                          BlastCompo_SequenceRange * query_range,
01341                          Int4 ccat_query_length,
01342                          BlastCompo_SequenceData * subject,
01343                          BlastCompo_SequenceRange * subject_range,
01344                          Int4 full_subject_length,
01345                          BlastCompo_GappingParams * gapping_params,
01346                          EMatrixAdjustRule matrix_adjust_rule)
01347 {
01348     Int4 newScore;
01349     /* Extent of the alignment as computed by an x-drop alignment
01350      * (usually the same as (queryEnd - queryStart) and (matchEnd -
01351      * matchStart)) */
01352     Int4 queryExtent, matchExtent;
01353     BlastCompo_Alignment * obj = NULL;  /* the new object */
01354     /* BLAST-specific parameters needed compute an X-drop alignment */
01355     BlastKappa_GappingParamsContext * context = gapping_params->context;
01356     /* Auxiliarly structure for computing gapped alignments */
01357     BlastGapAlignStruct * gap_align = context->gap_align;
01358     /* Scoring parameters for gapped alignments */
01359     const BlastScoringParameters* scoringParams = context->scoringParams;
01360     /* A structure containing the traceback of a gapped alignment */
01361     GapEditScript* editScript = NULL;
01362 
01363     /* suppress unused parameter warnings; this is a callback
01364        function, so these parameter cannot be deleted */
01365     (void) ccat_query_length;
01366     (void) full_subject_length;
01367 
01368     gap_align->gap_x_dropoff = gapping_params->x_dropoff;
01369 
01370     s_SWFindFinalEndsUsingXdrop(query,   queryStart, *pqueryEnd,
01371                                 subject, matchStart, *pmatchEnd,
01372                                 gap_align, scoringParams,
01373                                 score, &queryExtent, &matchExtent,
01374                                 &newScore);
01375     *pqueryEnd = queryStart + queryExtent;
01376     *pmatchEnd = matchStart + matchExtent;
01377 
01378     editScript =
01379         Blast_PrelimEditBlockToGapEditScript(gap_align->rev_prelim_tback,
01380                                              gap_align->fwd_prelim_tback);
01381     if (editScript != NULL) {
01382         /* Shifted values of the endpoints */
01383         Int4 aqueryStart =  queryStart + query_range->begin;
01384         Int4 aqueryEnd   = *pqueryEnd  + query_range->begin;
01385         Int4 amatchStart =  matchStart + subject_range->begin;
01386         Int4 amatchEnd   = *pmatchEnd  + subject_range->begin;
01387 
01388         obj = BlastCompo_AlignmentNew(newScore, matrix_adjust_rule,
01389                                       aqueryStart, aqueryEnd,
01390                                       query_range->context,
01391                                       amatchStart, amatchEnd,
01392                                       subject_range->context, editScript);
01393         if (obj == NULL) {
01394             GapEditScriptDelete(editScript);
01395         }
01396     }
01397     *pnewAlign = obj;
01398 
01399     return obj != NULL ? 0 : -1;
01400 }
01401 
01402 
01403 /**
01404  * A callback: calculate the traceback for one alignment by
01405  * performing an x-drop alignment in both directions
01406  *
01407  * @param in_align         the existing alignment, without traceback
01408  * @param matrix_adjust_rule    the rule used to compute the scoring matrix
01409  * @param query_data       query sequence data
01410  * @param query_range      range of this query in the concatenated
01411  *                         query
01412  * @param ccat_query_length   total length of the concatenated query
01413  * @param subject_data     subject sequence data
01414  * @param subject_range    range of subject_data in the translated
01415  *                         query, in amino acid coordinates
01416  * @param full_subject_length   length of the full subject sequence
01417  * @param gapping_params        parameters used to compute gapped
01418  *                              alignments
01419  * @sa redo_one_alignment_type
01420  */
01421 static BlastCompo_Alignment *
01422 s_RedoOneAlignment(BlastCompo_Alignment * in_align,
01423                    EMatrixAdjustRule matrix_adjust_rule,
01424                    BlastCompo_SequenceData * query_data,
01425                    BlastCompo_SequenceRange * query_range,
01426                    int ccat_query_length,
01427                    BlastCompo_SequenceData * subject_data,
01428                    BlastCompo_SequenceRange * subject_range,
01429                    int full_subject_length,
01430                    BlastCompo_GappingParams * gapping_params)
01431 {
01432     int status;                /* return code */
01433     Int4 q_start, s_start;     /* starting point in query and subject */
01434     /* BLAST-specific parameters needed to compute a gapped alignment */
01435     BlastKappa_GappingParamsContext * context = gapping_params->context;
01436     /* Score block for this search */
01437     BlastScoreBlk* sbp = context->sbp;
01438     /* Auxiliary structure for computing gapped alignments */
01439     BlastGapAlignStruct* gapAlign = context->gap_align;
01440     /* The preliminary gapped HSP that were are recomputing */
01441     BlastHSP * hsp = in_align->context;
01442 
01443     /* suppress unused parameter warnings; this is a callback
01444        function, so these parameter cannot be deleted */
01445     (void) ccat_query_length;
01446     (void) full_subject_length;
01447 
01448     /* Shift the subject offset and gapped start to be offsets
01449        into the translated subject_range; shifting in this manner
01450        is necessary for BLAST_CheckStartForGappedAlignment */
01451     hsp->subject.offset       -= subject_range->begin;
01452     hsp->subject.end          -= subject_range->begin;
01453     hsp->subject.gapped_start -= subject_range->begin;
01454 
01455     if(BLAST_CheckStartForGappedAlignment(hsp, query_data->data,
01456                                           subject_data->data, sbp)) {
01457         /* We may use the starting point supplied by the HSP. */
01458         q_start = hsp->query.gapped_start;
01459         s_start = hsp->subject.gapped_start;
01460     } else {
01461         /* We must recompute the start for the gapped alignment, as the
01462            one in the HSP was unacceptable.*/
01463         q_start =
01464             BlastGetStartForGappedAlignment(query_data->data,
01465                                             subject_data->data, sbp,
01466                                             hsp->query.offset,
01467                                             hsp->query.end -
01468                                             hsp->query.offset,
01469                                             hsp->subject.offset,
01470                                             hsp->subject.end -
01471                                             hsp->subject.offset);
01472         s_start =
01473             (hsp->subject.offset - hsp->query.offset) + q_start;
01474     }
01475     /* Undo the shift so there is no side effect on the incoming HSP
01476        list. */
01477     hsp->subject.offset       += subject_range->begin;
01478     hsp->subject.end          += subject_range->begin;
01479     hsp->subject.gapped_start += subject_range->begin;
01480 
01481     gapAlign->gap_x_dropoff = gapping_params->x_dropoff;
01482 
01483     status =
01484         BLAST_GappedAlignmentWithTraceback(context->prog_number,
01485                                            query_data->data,
01486                                            subject_data->data, gapAlign,
01487                                            context->scoringParams,
01488                                            q_start, s_start,
01489                                            query_data->length,
01490                                            subject_data->length,
01491                                            NULL);
01492     if (status == 0) {
01493         return s_NewAlignmentFromGapAlign(gapAlign, &gapAlign->edit_script,
01494                                           query_range, subject_range,
01495                                           matrix_adjust_rule);
01496     } else {
01497         return NULL;
01498     }
01499 }
01500 
01501 
01502 /**
01503  * A BlastKappa_SavedParameters holds the value of certain search
01504  * parameters on entry to RedoAlignmentCore.  These values are
01505  * restored on exit.
01506  */
01507 typedef struct BlastKappa_SavedParameters {
01508     Int4          gap_open;    /**< a penalty for the existence of a gap */
01509     Int4          gapExtend;   /**< a penalty for each residue in the
01510                                     gap */
01511     double        scale_factor;     /**< the original scale factor */
01512     Int4 **origMatrix;              /**< The original matrix values */
01513     double original_expect_value;   /**< expect value on entry */
01514     /** copy of the original gapped Karlin-Altschul block
01515      * corresponding to the first context */
01516     Blast_KarlinBlk** kbp_gap_orig;
01517     Int4             num_queries;   /**< Number of queries in this search */
01518 } BlastKappa_SavedParameters;
01519 
01520 
01521 /**
01522  * Release the data associated with a BlastKappa_SavedParameters and
01523  * delete the object
01524  * @param searchParams the object to be deleted [in][out]
01525  */
01526 static void
01527 s_SavedParametersFree(BlastKappa_SavedParameters ** searchParams)
01528 {
01529     /* for convenience, remove one level of indirection from searchParams */
01530     BlastKappa_SavedParameters *sp = *searchParams;
01531 
01532     if (sp != NULL) {
01533         if (sp->kbp_gap_orig != NULL) {
01534             int i;
01535             for (i = 0;  i < sp->num_queries;  i++) {
01536                 if (sp->kbp_gap_orig[i] != NULL)
01537                     Blast_KarlinBlkFree(sp->kbp_gap_orig[i]);
01538             }
01539             free(sp->kbp_gap_orig);
01540         }
01541         if (sp->origMatrix != NULL)
01542             Nlm_Int4MatrixFree(&sp->origMatrix);
01543     }
01544     sfree(*searchParams);
01545     *searchParams = NULL;
01546 }
01547 
01548 
01549 /**
01550  * Create a new instance of BlastKappa_SavedParameters
01551  *
01552  * @param rows               number of rows in the scoring matrix
01553  * @param numQueries         number of queries in this search
01554  * @param compo_adjust_mode  if >0, use composition-based statistics
01555  * @param positionBased      if true, the search is position-based
01556  */
01557 static BlastKappa_SavedParameters *
01558 s_SavedParametersNew(Int4 rows,
01559                      Int4 numQueries,
01560                      ECompoAdjustModes compo_adjust_mode,
01561                      Boolean positionBased)
01562 {
01563     int i;
01564     BlastKappa_SavedParameters *sp;   /* the new object */
01565     sp = malloc(sizeof(BlastKappa_SavedParameters));
01566 
01567     if (sp == NULL) {
01568         goto error_return;
01569     }
01570     sp->kbp_gap_orig       = NULL;
01571     sp->origMatrix         = NULL;
01572 
01573     sp->kbp_gap_orig = calloc(numQueries, sizeof(Blast_KarlinBlk*));
01574     if (sp->kbp_gap_orig == NULL) {
01575         goto error_return;
01576     }
01577     sp->num_queries = numQueries;
01578     for (i = 0;  i < numQueries;  i++) {
01579         sp->kbp_gap_orig[i] = NULL;
01580     }
01581     if (compo_adjust_mode != eNoCompositionBasedStats) {
01582         if (positionBased) {
01583             sp->origMatrix = Nlm_Int4MatrixNew(rows, BLASTAA_SIZE);
01584         } else {
01585             sp->origMatrix = Nlm_Int4MatrixNew(BLASTAA_SIZE, BLASTAA_SIZE);
01586         }
01587         if (sp->origMatrix == NULL)
01588             goto error_return;
01589     }
01590     return sp;
01591 error_return:
01592     s_SavedParametersFree(&sp);
01593     return NULL;
01594 }
01595 
01596 
01597 /**
01598  * Record the initial value of the search parameters that are to be
01599  * adjusted.
01600  *
01601  * @param searchParams       holds the recorded values [out]
01602  * @param sbp                a score block [in]
01603  * @param scoring            gapped alignment parameters [in]
01604  * @param query_length       length of the concatenated query [in]
01605  * @param compo_adjust_mode  composition adjustment mode [in]
01606  * @param positionBased     is this search position-based [in]
01607  */
01608 static int
01609 s_RecordInitialSearch(BlastKappa_SavedParameters * searchParams,
01610                       BlastScoreBlk* sbp,
01611                       const BlastScoringParameters* scoring,
01612                       int query_length,
01613                       ECompoAdjustModes compo_adjust_mode,
01614                       Boolean positionBased)
01615 {
01616     int i;
01617 
01618     searchParams->gap_open     = scoring->gap_open;
01619     searchParams->gapExtend    = scoring->gap_extend;
01620     searchParams->scale_factor = scoring->scale_factor;
01621 
01622     for (i = 0;  i < searchParams->num_queries;  i++) { 
01623         if (sbp->kbp_gap[i] != NULL) {
01624             /* There is a kbp_gap for query i and it must be copied */
01625             searchParams->kbp_gap_orig[i] = Blast_KarlinBlkNew();
01626             if (searchParams->kbp_gap_orig[i] == NULL) {
01627                 return -1;
01628             }
01629             Blast_KarlinBlkCopy(searchParams->kbp_gap_orig[i],
01630                                 sbp->kbp_gap[i]);
01631         }
01632     }
01633 
01634     if (compo_adjust_mode != eNoCompositionBasedStats) {
01635         Int4 **matrix;              /* scoring matrix */
01636         int j;                      /* iteration index */
01637         int rows;                   /* number of rows in matrix */
01638         if (positionBased) {
01639             matrix = sbp->psi_matrix->pssm->data;
01640             rows = query_length;
01641         } else {
01642             matrix = sbp->matrix->data;
01643             rows = BLASTAA_SIZE;
01644         }
01645         for (i = 0;  i < rows;  i++) {
01646             for (j = 0;  j < BLASTAA_SIZE;  j++) {
01647                 searchParams->origMatrix[i][j] = matrix[i][j];
01648             }
01649         }
01650     }
01651     return 0;
01652 }
01653 
01654 
01655 /**
01656  * Rescale the search parameters in the search object and options
01657  * object to obtain more precision.
01658  *
01659  * @param sbp               score block to be rescaled
01660  * @param sp                scoring parameters to be rescaled
01661  * @param num_queries       number of queries in this search
01662  * @param scale_factor      amount by which to scale this search
01663  */
01664 static void
01665 s_RescaleSearch(BlastScoreBlk* sbp,
01666                 BlastScoringParameters* sp,
01667                 int num_queries,
01668                 double scale_factor)
01669 {
01670     int i;
01671     for (i = 0;  i < num_queries;  i++) {
01672         if (sbp->kbp_gap[i] != NULL) {
01673             Blast_KarlinBlk * kbp = sbp->kbp_gap[i];
01674             kbp->Lambda /= scale_factor;
01675             kbp->logK = log(kbp->K);
01676         }
01677     }
01678 
01679     sp->gap_open = BLAST_Nint(sp->gap_open  * scale_factor);
01680     sp->gap_extend = BLAST_Nint(sp->gap_extend * scale_factor);
01681     sp->scale_factor = scale_factor;
01682 }
01683 
01684 
01685 /**
01686  * Restore the parameters that were adjusted to their original values.
01687  *
01688  * @param sbp                the score block to be restored
01689  * @param scoring            the scoring parameters to be restored
01690  * @param searchParams       the initial recorded values of the parameters
01691  * @param query_length       the concatenated query length
01692  * @param positionBased      is this search position-based
01693  * @param compo_adjust_mode  mode of composition adjustment
01694  */
01695 static void
01696 s_RestoreSearch(BlastScoreBlk* sbp,
01697                 BlastScoringParameters* scoring,
01698                 const BlastKappa_SavedParameters * searchParams,
01699                 int query_length,
01700                 Boolean positionBased,
01701                 ECompoAdjustModes compo_adjust_mode)
01702 {
01703     int i;
01704 
01705     scoring->gap_open = searchParams->gap_open;
01706     scoring->gap_extend = searchParams->gapExtend;
01707     scoring->scale_factor = searchParams->scale_factor;
01708 
01709     for (i = 0;  i < searchParams->num_queries;  i++) {
01710         if (sbp->kbp_gap[i] != NULL) {
01711             Blast_KarlinBlkCopy(sbp->kbp_gap[i],
01712                                 searchParams->kbp_gap_orig[i]);
01713         }
01714     }
01715     if(compo_adjust_mode != eNoCompositionBasedStats) {
01716         int  j;             /* iteration index */
01717         Int4 ** matrix;     /* matrix to be restored */
01718         int rows;           /* number of rows in the matrix */
01719 
01720         if (positionBased) {
01721             matrix = sbp->psi_matrix->pssm->data;
01722             rows = query_length;
01723         } else {
01724             matrix = sbp->matrix->data;
01725             rows = BLASTAA_SIZE;
01726         }
01727         for (i = 0;  i < rows;  i++) {
01728             for (j = 0;  j < BLASTAA_SIZE;  j++) {
01729                 matrix[i][j] = searchParams->origMatrix[i][j];
01730             }
01731         }
01732     }
01733 }
01734 
01735 
01736 /**
01737  * Initialize an object of type Blast_MatrixInfo.
01738  *
01739  * @param self            object being initialized
01740  * @param queryBlk        the query sequence data
01741  * @param sbp             score block for this search
01742  * @param scale_factor    amount by which ungapped parameters should be
01743  *                        scaled
01744  * @param matrixName      name of the matrix
01745  */
01746 static int
01747 s_MatrixInfoInit(Blast_MatrixInfo * self,
01748                  BLAST_SequenceBlk* queryBlk,
01749                  BlastScoreBlk* sbp,
01750                  double scale_factor,
01751                  const char * matrixName)
01752 {
01753     int status = 0;    /* return status */
01754     int lenName;       /* length of matrixName as a string */
01755 
01756     /* copy the matrix name (strdup is not standard C) */
01757     lenName = strlen(matrixName);
01758     if (NULL == (self->matrixName = malloc(lenName + 1))) {
01759         return -1;
01760     }
01761     memcpy(self->matrixName, matrixName, lenName + 1);
01762 
01763     if (self->positionBased) {
01764         status = s_GetPosBasedStartFreqRatios(self->startFreqRatios,
01765                                               queryBlk->length,
01766                                               queryBlk->sequence,
01767                                               matrixName,
01768                                               sbp->psi_matrix->freq_ratios);
01769         if (status == 0) {
01770             status = s_ScalePosMatrix(self->startMatrix, matrixName,
01771                                       sbp->psi_matrix->freq_ratios,
01772                                       queryBlk->sequence,
01773                                       queryBlk->length, sbp, scale_factor);
01774             self->ungappedLambda = sbp->kbp_psi[0]->Lambda / scale_factor;
01775         }
01776     } else {
01777         self->ungappedLambda = sbp->kbp_ideal->Lambda / scale_factor;
01778         status = s_GetStartFreqRatios(self->startFreqRatios, matrixName);
01779         if (status == 0) {
01780             Blast_Int4MatrixFromFreq(self->startMatrix, self->cols,
01781                                      self->startFreqRatios,
01782                                      self->ungappedLambda);
01783         }
01784     }
01785     return status;
01786 }
01787 
01788 
01789 /**
01790  * Save information about all queries in an array of objects of type
01791  * BlastCompo_QueryInfo.
01792  *
01793  * @param query_data        query sequence data
01794  * @param blast_query_info  information about all queries, as an
01795  *                          internal blast data structure
01796  *
01797  * @return the new array on success, or NULL on error
01798  */
01799 static BlastCompo_QueryInfo *
01800 s_GetQueryInfo(Uint1 * query_data, BlastQueryInfo * blast_query_info)
01801 {
01802     int i;                   /* loop index */
01803     BlastCompo_QueryInfo *
01804         compo_query_info;    /* the new array */
01805     int num_queries;         /* the number of queries/elements in
01806                                 compo_query_info */
01807 
01808     num_queries = blast_query_info->num_queries;
01809     compo_query_info = calloc(num_queries, sizeof(BlastCompo_QueryInfo));
01810     if (compo_query_info != NULL) {
01811         for (i = 0;  i < num_queries;  i++) {
01812             BlastCompo_QueryInfo * query_info = &compo_query_info[i];
01813             BlastContextInfo * query_context = &blast_query_info->contexts[i];
01814 
01815             query_info->eff_search_space =
01816                 (double) query_context->eff_searchsp;
01817             query_info->origin = query_context->query_offset;
01818             query_info->seq.data = &query_data[query_info->origin];
01819             query_info->seq.length = query_context->query_length;
01820 
01821             Blast_ReadAaComposition(&query_info->composition, BLASTAA_SIZE,
01822                                     query_info->seq.data,
01823                                     query_info->seq.length);
01824         }
01825     }
01826     return compo_query_info;
01827 }
01828 
01829 
01830 /**
01831  * Create a new object of type BlastCompo_GappingParams.  The new
01832  * object contains the parameters needed by the composition adjustment
01833  * library to compute a gapped alignment.
01834  *
01835  * @param context     the data structures needed by callback functions
01836  *                    that perform the gapped alignments.
01837  * @param extendParams parameters used for a gapped extension
01838  * @param num_queries  the number of queries in the concatenated query
01839  */
01840 static BlastCompo_GappingParams *
01841 s_GappingParamsNew(BlastKappa_GappingParamsContext * context,
01842                    const BlastExtensionParameters* extendParams,
01843                    int num_queries)
01844 {
01845     int i;
01846     double min_lambda = DBL_MAX;   /* smallest gapped Lambda */
01847     const BlastScoringParameters * scoring = context->scoringParams;
01848     const BlastExtensionOptions * options = extendParams->options;
01849     /* The new object */
01850     BlastCompo_GappingParams * gapping_params = NULL;
01851 
01852     gapping_params = malloc(sizeof(BlastCompo_GappingParams));
01853     if (gapping_params != NULL) {
01854         gapping_params->gap_open = scoring->gap_open;
01855         gapping_params->gap_extend = scoring->gap_extend;
01856         gapping_params->context = context;
01857     }
01858     
01859     for (i = 0;  i < num_queries;  i++) {
01860         if (context->sbp->kbp_gap[i] != NULL &&
01861             context->sbp->kbp_gap[i]->Lambda < min_lambda) {
01862             min_lambda = context->sbp->kbp_gap[i]->Lambda;
01863         }
01864     }
01865     gapping_params->x_dropoff = (Int4)
01866         MAX(options->gap_x_dropoff_final*NCBIMATH_LN2 / min_lambda,
01867             extendParams->gap_x_dropoff_final);
01868     context->gap_align->gap_x_dropoff = gapping_params->x_dropoff;
01869 
01870     return gapping_params;
01871 }
01872 
01873 
01874 /** Callbacks used by the Blast_RedoOneMatch* routines */
01875 static const Blast_RedoAlignCallbacks
01876 redo_align_callbacks = {
01877     s_CalcLambda, s_SequenceGetRange, s_RedoOneAlignment,
01878     s_NewAlignmentUsingXdrop, s_FreeEditScript
01879 };
01880 
01881 
01882 /** 
01883  * Read the parameters required for the Blast_RedoOneMatch* functions from
01884  * the corresponding parameters in standard BLAST datatypes.  Return a new
01885  * object representing these parameters.
01886  */
01887 static Blast_RedoAlignParams *
01888 s_GetAlignParams(BlastKappa_GappingParamsContext * context,
01889                  BLAST_SequenceBlk * queryBlk,
01890                  BlastQueryInfo* queryInfo,
01891                  const BlastHitSavingParameters* hitParams,
01892                  const BlastExtensionParameters* extendParams)
01893 {
01894     int status = 0;    /* status code */
01895     int rows;          /* number of rows in the scoring matrix */
01896     int cutoff_s;      /* cutoff score for saving an alignment */
01897     double cutoff_e;   /* cutoff evalue for saving an alignment */
01898     BlastCompo_GappingParams *
01899         gapping_params = NULL;    /* parameters needed to compute a gapped
01900                                      alignment */
01901     Blast_MatrixInfo *
01902         scaledMatrixInfo;         /* information about the scoring matrix */
01903     /* does this kind of search translate the database sequence */
01904     int subject_is_translated = context->prog_number == eBlastTypeTblastn;
01905     /* is this a positiion-based search */
01906     Boolean positionBased = (Boolean) (context->sbp->psi_matrix != NULL);
01907     /* will BLAST_LinkHsps be called to assign e-values */
01908     Boolean do_link_hsps = (hitParams->do_sum_stats);
01909     ECompoAdjustModes compo_adjust_mode =
01910         (ECompoAdjustModes) extendParams->options->compositionBasedStats;
01911     
01912     if (do_link_hsps) {
01913         ASSERT(hitParams->link_hsp_params != NULL);
01914         cutoff_s =
01915             (int) (hitParams->cutoff_score_min * context->localScalingFactor);
01916     } else {
01917         /* There is no cutoff score; we consider e-values instead */
01918         cutoff_s = 1;
01919     }
01920     cutoff_e = hitParams->options->expect_value;
01921     rows = positionBased ? queryInfo->max_length : BLASTAA_SIZE;
01922     scaledMatrixInfo = Blast_MatrixInfoNew(rows, BLASTAA_SIZE, positionBased);
01923     status = s_MatrixInfoInit(scaledMatrixInfo, queryBlk, context->sbp,
01924                               context->localScalingFactor,
01925                               context->scoringParams->options->matrix);
01926     if (status != 0) {
01927         return NULL;
01928     }
01929     gapping_params = s_GappingParamsNew(context, extendParams,
01930                                         queryInfo->num_queries);
01931     if (gapping_params == NULL) {
01932         return NULL;
01933     } else {
01934         return
01935             Blast_RedoAlignParamsNew(&scaledMatrixInfo, &gapping_params,
01936                                      compo_adjust_mode, positionBased,
01937                                      subject_is_translated,
01938                                      queryInfo->max_length, cutoff_s, cutoff_e,
01939                                      do_link_hsps, &redo_align_callbacks);
01940     }
01941 }
01942 
01943 
01944 /**
01945  * Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
01946  *
01947  * @param results        BLAST core external results structure (pre-SeqAlign)
01948  *                       [out]
01949  * @param heaps          an array of BlastCompo_Heap objects
01950  * @param hitlist_size   size of each list in the results structure above [in]
01951  */
01952 static void
01953 s_FillResultsFromCompoHeaps(BlastHSPResults * results,
01954                             BlastCompo_Heap heaps[],
01955                             Int4 hitlist_size)
01956 {
01957     int query_index;   /* loop index */
01958     int num_queries;   /* Number of queries in this search */
01959 
01960     num_queries = results->num_queries; 
01961     for (query_index = 0;  query_index < num_queries;  query_index++) {
01962         BlastHSPList* hsp_list;
01963         BlastHitList* hitlist;
01964         BlastCompo_Heap * heap = &heaps[query_index];
01965 
01966         results->hitlist_array[query_index] = Blast_HitListNew(hitlist_size);
01967         hitlist = results->hitlist_array[query_index];
01968 
01969         while (NULL != (hsp_list = BlastCompo_HeapPop(heap))) {
01970             Blast_HitListUpdate(hitlist, hsp_list);
01971         }
01972     }
01973     Blast_HSPResultsReverseOrder(results);
01974 }
01975 
01976 
01977 /** Remove all matches from a BlastCompo_Heap. */
01978 static void s_ClearHeap(BlastCompo_Heap * self)
01979 {
01980     BlastHSPList* hsp_list = NULL;   /* an element of the heap */
01981 
01982     while (NULL != (hsp_list = BlastCompo_HeapPop(self))) {
01983         hsp_list = Blast_HSPListFree(hsp_list);
01984     }
01985 }
01986 
01987 
01988 /**
01989  *  Recompute alignments for each match found by the gapped BLAST
01990  *  algorithm.
01991  */
01992 Int2
01993 Blast_RedoAlignmentCore(EBlastProgramType program_number,
01994                         BLAST_SequenceBlk * queryBlk,
01995                         BlastQueryInfo* queryInfo,
01996                         BlastScoreBlk* sbp,
01997                         BlastHSPStream* hsp_stream,
01998                         const BlastSeqSrc* seqSrc,
01999                         Int4 default_db_genetic_code,
02000                         BlastScoringParameters* scoringParams,
02001                         const BlastExtensionParameters* extendParams,
02002                         const BlastHitSavingParameters* hitParams,
02003                         const PSIBlastOptions* psiOptions,
02004                         BlastHSPResults* results)
02005 {
02006     int status_code = 0;                    /* return value code */
02007     /* the factor by which to scale the scoring system in order to
02008      * obtain greater precision */
02009     double localScalingFactor;
02010     /* the values of the search parameters that will be recorded, altered
02011      * in the search structure in this routine, and then restored before
02012      * the routine exits. */
02013     BlastKappa_SavedParameters *savedParams = NULL;
02014     /* forbidden ranges for each database position (used in
02015      * Smith-Waterman alignments) */
02016     Blast_ForbiddenRanges forbidden = {0,};
02017     /* a collection of alignments for each query sequence with
02018      * sequences from the database */
02019     BlastCompo_Heap * redoneMatches = NULL;
02020     /* stores all fields needed for computing a compositionally
02021      * adjusted score matrix using Newton's method */
02022     Blast_CompositionWorkspace *NRrecord = NULL;
02023     /* loop index */
02024     int query_index;
02025     /* number of queries in the concatenated query */
02026     int numQueries = queryInfo->num_queries;
02027     /* keeps track of gapped alignment params */
02028     BlastGapAlignStruct* gapAlign = NULL;
02029     /* All alignments above this value will be reported, no matter how
02030      * many. */
02031     double inclusion_ethresh;
02032     /* array of lists of alignments for each query to this subject */
02033     BlastCompo_Alignment ** alignments = NULL;
02034 
02035     BlastCompo_QueryInfo * query_info = NULL;
02036     Blast_RedoAlignParams * redo_align_params = NULL;
02037     Boolean positionBased = (Boolean) (sbp->psi_matrix != NULL);
02038     ECompoAdjustModes compo_adjust_mode =
02039         (ECompoAdjustModes) extendParams->options->compositionBasedStats;
02040     Boolean smithWaterman =
02041         (Boolean) (extendParams->options->eTbackExt == eSmithWatermanTbck);
02042     /* alignment data for the current query-subject match */
02043     BlastHSPList* thisMatch = NULL;
02044     /* existing alignments for a match */
02045     BlastCompo_Alignment * incoming_aligns = NULL;
02046     Int4      **matrix;                   /* score matrix */
02047     BlastKappa_GappingParamsContext gapping_params_context;
02048 
02049     double pvalueForThisPair = (-1); /* p-value for this match
02050                                         for composition; -1 == no adjustment*/
02051     double LambdaRatio; /*lambda ratio*/
02052     /* which test function do we use to see if a composition-adjusted
02053        p-value is desired; value needs to be passed in eventually*/
02054     int compositionTestIndex = extendParams->options->unifiedP;
02055     Uint1* genetic_code_string = GenCodeSingletonFind(default_db_genetic_code);
02056 
02057     ASSERT(program_number == eBlastTypeBlastp ||
02058            program_number == eBlastTypeTblastn ||
02059            program_number == eBlastTypePsiBlast);
02060 
02061     if (positionBased) {
02062         matrix = sbp->psi_matrix->pssm->data;
02063     } else {
02064         matrix = sbp->matrix->data;
02065     }
02066     /**** Validate parameters *************/
02067     if (matrix == NULL) {
02068         return -1;
02069     }
02070     if (0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20") &&
02071         compo_adjust_mode == eNoCompositionBasedStats) {
02072         return -1;                   /* BLOSUM62_20 only makes sense if
02073                                       * compo_adjust_mode is on */
02074     }
02075     if (positionBased) {
02076         /* Position based searches can only use traditional
02077          * composition based stats */
02078         if ((int) compo_adjust_mode > 1) {
02079             compo_adjust_mode = eCompositionBasedStats;
02080         }
02081         /* A position-based search can only have one query */
02082         ASSERT(queryInfo->num_queries == 1);
02083         ASSERT(queryBlk->length == (Int4)sbp->psi_matrix->pssm->ncols);
02084     }
02085     if ((int) compo_adjust_mode > 1 &&
02086         !Blast_FrequencyDataIsAvailable(scoringParams->options->matrix)) {
02087         return -1;   /* Unsupported matrix */
02088     }
02089     /*****************/
02090     inclusion_ethresh = (psiOptions /* this can be NULL for CBl2Seq */
02091                          ? psiOptions->inclusion_ethresh 
02092                          : PSI_INCLUSION_ETHRESH);
02093     ASSERT(inclusion_ethresh != 0.0);
02094 
02095     /* Initialize savedParams */
02096     savedParams =
02097         s_SavedParametersNew(queryInfo->max_length, queryInfo->num_queries,
02098                              compo_adjust_mode, positionBased);
02099     if (savedParams == NULL) {
02100         status_code = -1;
02101         goto function_cleanup;
02102     }
02103     status_code =
02104         s_RecordInitialSearch(savedParams, sbp, scoringParams,
02105                               queryInfo->max_length, compo_adjust_mode,
02106                               positionBased);
02107     if (status_code != 0) {
02108         goto function_cleanup;
02109     }
02110     if (compo_adjust_mode != eNoCompositionBasedStats) {
02111         if((0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20"))) {
02112             localScalingFactor = SCALING_FACTOR / 10;
02113         } else {
02114             localScalingFactor = SCALING_FACTOR;
02115         }
02116     } else {
02117         localScalingFactor = 1.0;
02118     }
02119     s_RescaleSearch(sbp, scoringParams, queryInfo->num_queries,
02120                     localScalingFactor);
02121     status_code =
02122         BLAST_GapAlignStructNew(scoringParams, extendParams,
02123                                 BlastSeqSrcGetMaxSeqLen(seqSrc), sbp,
02124                                 &gapAlign);
02125     if (status_code != 0) {
02126         return (Int2) status_code;
02127     }
02128     gapping_params_context.gap_align = gapAlign;
02129     gapping_params_context.scoringParams = scoringParams;
02130     gapping_params_context.sbp = sbp;
02131     gapping_params_context.localScalingFactor = localScalingFactor;
02132     gapping_params_context.prog_number = program_number;
02133     redo_align_params =
02134         s_GetAlignParams(&gapping_params_context, queryBlk, queryInfo, 
02135                          hitParams, extendParams);
02136     if (redo_align_params == NULL) {
02137         status_code = -1;
02138         goto function_cleanup;
02139     }
02140     query_info = s_GetQueryInfo(queryBlk->sequence, queryInfo);
02141     if (query_info == NULL) {
02142         status_code = -1;
02143         goto function_cleanup;
02144     }
02145     if(smithWaterman) {
02146         status_code =
02147             Blast_ForbiddenRangesInitialize(&forbidden, queryInfo->max_length);
02148         if (status_code != 0) {
02149             goto function_cleanup;
02150         }
02151     }
02152     redoneMatches = calloc(numQueries, sizeof(BlastCompo_Heap));
02153     if (redoneMatches == NULL) {
02154         status_code = -1;
02155         goto function_cleanup;
02156     }
02157     for (query_index = 0;  query_index < numQueries;  query_index++) {
02158         status_code =
02159             BlastCompo_HeapInitialize(&redoneMatches[query_index],
02160                                       hitParams->options->hitlist_size,
02161                                       inclusion_ethresh);
02162         if (status_code != 0) {
02163             goto function_cleanup;
02164         }
02165     }
02166     if( (int) compo_adjust_mode > 1 && !positionBased ) {
02167         NRrecord = Blast_CompositionWorkspaceNew();
02168         status_code =
02169             Blast_CompositionWorkspaceInit(NRrecord,
02170                                            scoringParams->options->matrix);
02171         if (status_code != 0) {
02172             goto function_cleanup;
02173         }
02174     }
02175     alignments = calloc(numQueries, sizeof(BlastCompo_Alignment *));
02176     if (alignments == NULL) {
02177         status_code = -1;
02178         goto function_cleanup;
02179     }
02180     while (BlastHSPStreamRead(hsp_stream, &thisMatch) != kBlastHSPStream_Eof) {
02181         /* for all matching sequences */
02182         Blast_KarlinBlk * kbp;
02183 
02184         /* the data for a matching database sequence */
02185         BlastCompo_MatchingSequence matchingSeq = {0,};
02186         if(thisMatch->hsp_array == NULL) {
02187             continue;
02188         }
02189         if (BlastCompo_EarlyTermination(thisMatch->best_evalue,
02190                                         redoneMatches, numQueries)) {
02191             break;
02192         }
02193         /* Get the sequence for this match */
02194         status_code =
02195             s_MatchingSequenceInitialize(&matchingSeq, program_number,
02196                                          seqSrc, default_db_genetic_code,
02197                                          thisMatch->oid);
02198         if (status_code != 0) {
02199             goto match_loop_cleanup;
02200         }
02201         incoming_aligns =
02202             s_ResultHspToDistinctAlign(thisMatch->hsp_array, thisMatch->hspcnt,
02203                                        queryInfo, localScalingFactor);
02204         if (incoming_aligns == NULL) {
02205             status_code = -1;
02206             goto match_loop_cleanup;
02207         }
02208         /* All alignments in thisMatch should be to the same query */
02209         kbp = sbp->kbp_gap[thisMatch->query_index];
02210         if (smithWaterman) {
02211             status_code =
02212                 Blast_RedoOneMatchSmithWaterman(alignments,
02213                                                 redo_align_params,
02214                                                 incoming_aligns,
02215                                                 thisMatch->hspcnt,
02216                                                 kbp->Lambda, kbp->logK,
02217                                                 &matchingSeq, query_info,
02218                                                 numQueries,
02219                                                 matrix, BLASTAA_SIZE,
02220                                                 NRrecord, &forbidden,
02221                                                 redoneMatches,
02222                                                 &pvalueForThisPair,
02223                                                 compositionTestIndex,
02224                                                 &LambdaRatio);
02225         } else {
02226             status_code =
02227                 Blast_RedoOneMatch(alignments, redo_align_params,
02228                                    incoming_aligns, thisMatch->hspcnt,
02229                                    kbp->Lambda, &matchingSeq,
02230                                    queryInfo->max_length, query_info,
02231                                    numQueries, matrix, BLASTAA_SIZE,
02232                                    NRrecord, &pvalueForThisPair,
02233                                    compositionTestIndex,
02234                                    &LambdaRatio);
02235         }
02236         if (status_code != 0) {
02237             goto match_loop_cleanup;
02238         }
02239         for (query_index = 0;  query_index < numQueries;  query_index++) {
02240             /* Loop over queries */
02241             if( alignments[query_index] != NULL) { /* alignments were found */
02242                 double bestEvalue;   /* best e-value among alignments in the
02243                                         hitlist */
02244                 Int4 bestScore;      /* best score among alignments in
02245                                         the hitlist */
02246                 /* a hitlist containing the newly-computed alignments */
02247                 BlastHSPList * hsp_list = NULL;
02248                 void * discardedAligns = NULL;
02249                 hsp_list =
02250                     s_HSPListFromDistinctAlignments(&alignments[query_index],
02251                                                     matchingSeq.index,
02252                                                     queryInfo);
02253                 if (hsp_list == NULL) {
02254                     status_code = -1;
02255                     goto match_loop_cleanup;
02256                 }
02257                 if (hsp_list->hspcnt > 1) {
02258                     s_HitlistReapContained(hsp_list->hsp_array,
02259                                            &hsp_list->hspcnt);
02260                 }
02261                 status_code =
02262                     s_HitlistEvaluateAndPurge(&bestScore, &bestEvalue,
02263                                               hsp_list,
02264                                               seqSrc,
02265                                               matchingSeq.length,
02266                                               program_number,
02267                                               queryInfo, query_index,
02268                                               sbp, hitParams,
02269                                               pvalueForThisPair, LambdaRatio,
02270                                               matchingSeq.index);
02271                 if (status_code != 0) {
02272                     goto query_loop_cleanup;
02273                 }
02274                 if (bestEvalue <= hitParams->options->expect_value &&
02275                     BlastCompo_HeapWouldInsert(&redoneMatches[query_index],
02276                                                bestEvalue, bestScore,
02277                                                thisMatch->oid)) {
02278                     /* The best alignment is significant */
02279                     s_HSPListNormalizeScores(hsp_list,
02280                                              kbp->Lambda, kbp->logK,
02281                                              localScalingFactor);
02282                      s_ComputeNumIdentities(queryBlk, queryInfo, seqSrc,
02283                                            hsp_list, scoringParams->options,
02284                         genetic_code_string);
02285                     status_code =
02286                         BlastCompo_HeapInsert(&redoneMatches[query_index],
02287                                               hsp_list, bestEvalue,
02288                                               bestScore, thisMatch->oid,
02289                                               &discardedAligns);
02290                     if (status_code == 0) {
02291                         hsp_list = NULL;
02292                     } else {
02293                         goto query_loop_cleanup;
02294                     }
02295                     if (discardedAligns != NULL) {
02296                         Blast_HSPListFree(discardedAligns);
02297                     }
02298                 }
02299 query_loop_cleanup:
02300                 Blast_HSPListFree(hsp_list);
02301                 if (status_code != 0) {
02302                     goto match_loop_cleanup;
02303                 }
02304             } /* end if any alignments were found */
02305         } /* end loop over queries */
02306 match_loop_cleanup:
02307         if (status_code != 0) {
02308             for (query_index = 0;  query_index < numQueries;  query_index++) {
02309                 BlastCompo_AlignmentsFree(&alignments[query_index],
02310                                           s_FreeEditScript);
02311             }
02312         }
02313         s_MatchingSequenceRelease(&matchingSeq);
02314         thisMatch = Blast_HSPListFree(thisMatch);
02315         BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
02316         if (status_code != 0) {
02317             goto function_cleanup;
02318         }
02319     }
02320     /* end for all matching sequences */
02321 function_cleanup:
02322     sfree(alignments);
02323     if (status_code == 0) {
02324         s_FillResultsFromCompoHeaps(results, redoneMatches,
02325                                     hitParams->options->hitlist_size);
02326     } else {
02327         if (redoneMatches != NULL) {
02328             s_ClearHeap(&redoneMatches[0]);
02329         }
02330     }
02331     free(query_info);
02332     Blast_RedoAlignParamsFree(&redo_align_params);
02333     if (redoneMatches != NULL) {
02334         for (query_index = 0;  query_index < numQueries;  query_index++) {
02335             BlastCompo_HeapRelease(&redoneMatches[query_index]);
02336         }
02337         sfree(redoneMatches); redoneMatches = NULL;
02338     }
02339     if (smithWaterman) {
02340         Blast_ForbiddenRangesRelease(&forbidden);
02341     }
02342     if (gapAlign != NULL) {
02343         gapAlign = BLAST_GapAlignStructFree(gapAlign);
02344     }
02345     s_RestoreSearch(sbp, scoringParams, savedParams, queryBlk->length,
02346                     positionBased, compo_adjust_mode);
02347     s_SavedParametersFree(&savedParams);
02348     Blast_CompositionWorkspaceFree(&NRrecord);
02349 
02350     return (Int2) status_code;
02351 }
02352 
02353 

Generated on Sun Dec 6 22:17:18 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:50 2009 by modify_doxy.py rev. 173732