src/algo/blast/unit_tests/api/linkhsp_unit_test.cpp

Go to the documentation of this file.
00001 /*  $Id: linkhsp_unit_test.cpp 171622 2009-09-25 15:08:10Z avagyanv $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Author: Ilya Dondoshansky
00027 *
00028 * File Description:
00029 *   Unit test module to test the algorithms for linking HSPs
00030 *
00031 * ===========================================================================
00032 */
00033 #include <ncbi_pch.hpp>
00034 #include <corelib/test_boost.hpp>
00035 
00036 #include <corelib/ncbitime.hpp>
00037 #include <objmgr/object_manager.hpp>
00038 #include <objmgr/scope.hpp>
00039 
00040 #include <objects/seqloc/Seq_loc.hpp>
00041 #include <objmgr/util/sequence.hpp>
00042 
00043 #include "test_objmgr.hpp"
00044 
00045 #include <algo/blast/core/blast_encoding.h>
00046 #include <algo/blast/core/blast_options.h>
00047 #include <algo/blast/core/blast_setup.h>
00048 #include <algo/blast/core/blast_hits.h>
00049 #include <algo/blast/core/link_hsps.h>
00050 #include <algo/blast/api/blast_options.hpp>
00051 #include <blast_objmgr_priv.hpp>
00052 #include <algo/blast/api/seqsrc_seqdb.hpp>
00053 
00054 using namespace std;
00055 using namespace ncbi;
00056 using namespace ncbi::objects;
00057 using namespace ncbi::blast;
00058 
00059 struct AllCutoffScores {
00060     Int4 x_drop_ungapped;
00061     Int4 x_drop_gapped;
00062     Int4 x_drop_final;
00063     Int4 gap_trigger;
00064     Int4 cutoff_score_ungapped;
00065     Int4 cutoff_score_final;
00066     bool do_sum_stats;
00067     Int4 cutoff_small_gap;
00068     Int4 cutoff_big_gap;
00069 };
00070 
00071 /// Sets up the query information structure without a real sequence. Used 
00072 /// only for blastn test below, where query sequence is not available. 
00073 static void 
00074 s_SetupNuclQueryInfo(Uint4 query_length, BlastQueryInfo* *query_info)
00075 {
00076     (*query_info) = BlastQueryInfoNew(eBlastTypeBlastn, 1);
00077     (*query_info)->contexts[0].query_offset = 0;
00078     (*query_info)->contexts[0].query_length = query_length;
00079     (*query_info)->contexts[1].query_offset = query_length + 1;
00080     (*query_info)->contexts[1].query_length = query_length;
00081     (*query_info)->max_length = query_length;
00082 }
00083 
00084 struct LinkHspTestFixture {
00085 
00086     EBlastProgramType m_ProgramType;
00087     EProgram m_Program;
00088     BlastHSPList* m_HspList;
00089     BlastScoreBlk* m_ScoreBlk;
00090     CBlastQueryInfo m_QueryInfo;
00091     Int4 m_SubjectLength;
00092     BlastHitSavingParameters* m_HitParams;
00093 
00094     ~LinkHspTestFixture() {
00095         freeStructures();
00096     }
00097 
00098     /// Sets up the input list of HSPs. These must be sorted by score.
00099     void setupHSPListTransl()
00100     {
00101         const int kNumHsps = 10;
00102         const int kScores[kNumHsps] = 
00103             { 1023, 282, 246, 202, 142, 117, 98, 92, 63, 53 };
00104         const int kQueryOffsets[kNumHsps] = 
00105             { 11, 346, 399, 244, 287, 224, 311, 218, 0, 404};
00106         const int kQueryLengths[kNumHsps] = 
00107             { 244, 56, 49, 49, 104, 29, 36, 37, 12, 25 };
00108         const int kSubjectFrames[kNumHsps] = 
00109             { 2, 2, 3, 2, 1, 1, 2, 3, 3, 2 };
00110         const int kSubjectOffsets[kNumHsps] = 
00111             { 1372, 2677, 2756, 2062, 2209, 1832, 2351, 1732, 1140, 2683 };
00112         const int kSubjectLengths[kNumHsps] = 
00113             {300, 56, 49, 50, 75, 29, 32, 36, 12, 26 };
00114 
00115         m_HspList = Blast_HSPListNew(0);       
00116         Int4 index;
00117         BlastHSP* hsp;
00118 
00119         for (index = 0; index < kNumHsps; ++index) {
00120             m_HspList->hsp_array[index] = hsp = 
00121                 (BlastHSP*) calloc(1, sizeof(BlastHSP));
00122             hsp->score = kScores[index];
00123             if (m_ProgramType == eBlastTypeTblastn) {
00124                 hsp->query.offset = kQueryOffsets[index];
00125                 hsp->query.end = kQueryOffsets[index] + kQueryLengths[index];
00126                 hsp->subject.offset = kSubjectOffsets[index];
00127                 hsp->subject.end = 
00128                     kSubjectOffsets[index] + kSubjectLengths[index];
00129                 hsp->subject.frame = kSubjectFrames[index];
00130             } else {
00131                 hsp->query.offset = kSubjectOffsets[index];
00132                 hsp->query.end = 
00133                     kSubjectOffsets[index] + kSubjectLengths[index];
00134                 hsp->subject.offset = kQueryOffsets[index];
00135                 hsp->subject.end = kQueryOffsets[index] + kQueryLengths[index];
00136                 hsp->query.frame = kSubjectFrames[index];
00137             }
00138         }
00139 
00140         m_HspList->hspcnt = kNumHsps;
00141     }
00142 
00143     /// Sets up the scoring block with the Karlin-Altschul parameters
00144     void setupScoreBlk(Uint1* seqbuf, bool gapped,
00145                        BlastScoringOptions** score_options_ptr) 
00146     {
00147         Int2 status;
00148         BlastScoringOptions* score_options = NULL;
00149         m_ScoreBlk = 
00150             BlastScoreBlkNew((m_ProgramType==eBlastTypeBlastn ? 
00151                               BLASTNA_SEQ_CODE : BLASTAA_SEQ_CODE), 
00152                              m_QueryInfo->last_context+1);
00153 
00154         BlastScoringOptionsNew(m_ProgramType, &score_options);
00155         score_options->gapped_calculation = (gapped ? TRUE : FALSE);
00156 
00157         if (m_ProgramType != eBlastTypeBlastn) {
00158             BOOST_REQUIRE(!strcmp("BLOSUM62", score_options->matrix));
00159         }
00160         status = Blast_ScoreBlkMatrixInit(m_ProgramType, score_options, 
00161             m_ScoreBlk, &BlastFindMatrixPath);
00162 
00163         BOOST_REQUIRE(status == 0);
00164 
00165         Blast_Message* message = NULL;
00166         status = Blast_ScoreBlkKbpUngappedCalc(m_ProgramType, m_ScoreBlk, 
00167                                            seqbuf, m_QueryInfo, &message);
00168         message = Blast_MessageFree(message);
00169 
00170         BOOST_REQUIRE(status == 0);
00171 
00172         if (gapped) {
00173             status = Blast_ScoreBlkKbpGappedCalc(m_ScoreBlk, score_options, 
00174                                          m_ProgramType, m_QueryInfo, NULL);
00175             BOOST_REQUIRE(status == 0);
00176             m_ScoreBlk->kbp_gap = m_ScoreBlk->kbp_gap_std;
00177         }
00178 
00179         m_ScoreBlk->kbp = m_ScoreBlk->kbp_std;
00180 
00181         if (score_options_ptr)
00182             *score_options_ptr = score_options;
00183         else
00184             BlastScoringOptionsFree(score_options);
00185     }
00186 
00187     /// Sets up the hit saving parameters structures. Only the fields relevant
00188     /// to linking HSPs are filled.
00189     void setupHitParams(int longest_intron, double evalue)
00190     {
00191         int cutoff_small_gap = (m_ProgramType == eBlastTypeBlastn ? 16 : 42);
00192         m_HitParams = 
00193             (BlastHitSavingParameters*) calloc(1, sizeof(BlastHitSavingParameters));
00194         m_HitParams->options = (BlastHitSavingOptions *)
00195             calloc(1, sizeof(BlastHitSavingOptions));
00196         m_HitParams->options->expect_value = evalue;
00197         BlastLinkHSPParametersNew(m_ProgramType, TRUE,
00198                                   &m_HitParams->link_hsp_params);
00199         m_HitParams->link_hsp_params->cutoff_big_gap = 0;
00200         m_HitParams->link_hsp_params->cutoff_small_gap = cutoff_small_gap;
00201         m_HitParams->link_hsp_params->longest_intron = longest_intron;
00202     }
00203 
00204     /// Fills the effective lengths data into the query information structure
00205     void 
00206     fillEffectiveLengths(const BlastScoringOptions* score_options,
00207                          Int8 db_length, Int4 db_num_seq)
00208     {
00209         BlastEffectiveLengthsOptions* eff_len_options = NULL;
00210         BlastEffectiveLengthsOptionsNew(&eff_len_options);
00211         BlastEffectiveLengthsParameters* eff_len_params = NULL;
00212         BlastEffectiveLengthsParametersNew(eff_len_options, db_length, 
00213                                            db_num_seq, &eff_len_params);
00214         BLAST_CalcEffLengths(m_ProgramType, score_options, eff_len_params, 
00215                              m_ScoreBlk, m_QueryInfo, NULL);
00216         BlastEffectiveLengthsParametersFree(eff_len_params);
00217         BlastEffectiveLengthsOptionsFree(eff_len_options);
00218     }
00219 
00220     /// Complete set-up before calling the HSP linking algorithm
00221     void setupLinkHspInputTblastn()
00222     {
00223         const string kProtGi = "9930103";
00224         const string kNuclGi = "9930102";
00225         const Uint4 kProtLength = 448;
00226         const Uint4 kNuclLength = 8872;
00227 
00228         string qid_str = "gi|" + ((m_ProgramType == eBlastTypeTblastn) ? 
00229                                   kProtGi : kNuclGi);
00230         CSeq_id query_id(qid_str);
00231         TSeqLocVector query_v;
00232 
00233         if (m_ProgramType == eBlastTypeBlastx) {
00234             auto_ptr<SSeqLoc> qsl(
00235                 CTestObjMgr::Instance().CreateSSeqLoc(query_id, 
00236                                                       eNa_strand_both));
00237             query_v.push_back(*qsl);
00238         } else {
00239             auto_ptr<SSeqLoc> qsl(
00240                 CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00241             query_v.push_back(*qsl);
00242         }
00243 
00244         CBlastOptions options;
00245         options.SetStrandOption(eNa_strand_unknown);
00246         if (m_ProgramType == eBlastTypeBlastx)
00247             options.SetQueryGeneticCode(1);
00248         
00249         options.SetProgram(m_Program);
00250         CBLAST_SequenceBlk query_blk;
00251         TSearchMessages blast_msg;
00252 
00253         ENa_strand strand_opt = options.GetStrandOption();
00254  
00255         SetupQueryInfo(query_v, m_ProgramType, strand_opt, &m_QueryInfo);
00256         SetupQueries(query_v, m_QueryInfo, &query_blk, 
00257                      m_ProgramType, strand_opt, blast_msg);
00258         ITERATE(TSearchMessages, m, blast_msg) {
00259             BOOST_REQUIRE(m->empty());
00260         }
00261 
00262         BlastScoringOptions* score_options = NULL;
00263         setupScoreBlk(query_blk->sequence, true, &score_options);
00264 
00265         m_SubjectLength = (m_ProgramType == eBlastTypeTblastn ?
00266                            kNuclLength / 3 : kProtLength);
00267         
00268         fillEffectiveLengths(score_options, (Int8)m_SubjectLength, 1);
00269         BlastScoringOptionsFree(score_options);
00270 
00271     }
00272     
00273     /// Frees all the C structures used in the test
00274     void freeStructures()
00275     {
00276         m_HspList = Blast_HSPListFree(m_HspList);
00277 
00278         if (m_HitParams) {
00279             BlastHitSavingOptionsFree(m_HitParams->options);
00280             m_HitParams = BlastHitSavingParametersFree(m_HitParams);
00281         }
00282         m_ScoreBlk = BlastScoreBlkFree(m_ScoreBlk);
00283     }
00284 
00285     /// Test linking with uneven gap sum statistics
00286     void testUnevenGapLinkHsps() {
00287         const int kNumHsps = 8;
00288         const int kLongestIntron = 4000;
00289         const double kEvalue = 1e-10;
00290         const int kNumsLinked[kNumHsps] = { 4, 4, 4, 4, 4, 4, 4, 4 };
00291         const int kScores[kNumHsps] = { 1023, 282, 246, 202, 142, 117, 98, 63 };
00292 
00293         setupLinkHspInputTblastn();
00294         setupHSPListTransl();
00295         setupHitParams(kLongestIntron, kEvalue);
00296 
00297         BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength, 
00298                        m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
00299 
00300         Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
00301 
00302         BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
00303 
00304         for (int index = 0; index < kNumHsps; ++index) {
00305             BOOST_REQUIRE_EQUAL(kNumsLinked[index], m_HspList->hsp_array[index]->num);
00306             BOOST_REQUIRE_EQUAL(kScores[index], m_HspList->hsp_array[index]->score); 
00307         }
00308     }
00309 
00310     void setupHSPListForMiddleInsertTest()
00311     {
00312         const int kNumHsps = 5;
00313         const int kScores[kNumHsps] =
00314             { 80, 60, 55, 54, 52 };
00315         const int kQueryOffsets[kNumHsps] =
00316             { 100, 130, 239, 239, 191 };
00317         const int kLengths[kNumHsps] =
00318             { 100, 50, 100, 9, 57 };
00319         const int kSubjectOffsets[kNumHsps] =
00320             { 1100, 1130, 3240, 3240, 2195 };
00321 
00322         m_HspList = Blast_HSPListNew(0);
00323         Int4 index;
00324         BlastHSP* hsp;
00325 
00326         for (index = 0; index < kNumHsps; ++index) {
00327             m_HspList->hsp_array[index] = hsp =
00328                 (BlastHSP*) calloc(1, sizeof(BlastHSP));
00329             hsp->score = kScores[index];
00330             hsp->query.offset = kQueryOffsets[index];
00331             hsp->subject.offset = kSubjectOffsets[index];
00332             hsp->subject.frame = 1;
00333             hsp->query.end = hsp->query.offset + kLengths[index];
00334             hsp->subject.end = hsp->subject.offset + kLengths[index];
00335         }
00336 
00337         m_HspList->hspcnt = kNumHsps;
00338     }
00339 
00340     /// HSP list setup for blastn
00341     void setupHSPListNucl()
00342     {
00343        const int kNumHsps = 8;
00344        const int kScores[kNumHsps] = { 35, 31, 22, 21, 20, 20, 20, 20 };
00345        const int kQueryFrames[kNumHsps] = { 1, 1, 1, -1, 1, -1, -1, -1 };
00346        const int kQueryStarts[kNumHsps] = 
00347            { 790, 790, 791, 4606, 870, 4572, 4526, 4589 }; 
00348        const int kQueryEnds[kNumHsps] = 
00349            { 865, 865, 833, 4635, 894, 4604, 4550, 4629 };
00350        const int kSubjectStarts[kNumHsps] = 
00351            { 453, 3469, 5837, 12508, 5951, 11005, 9899, 7397 };
00352        const int kSubjectEnds[kNumHsps] = 
00353            { 528, 3544, 5879, 12537, 5975, 11037, 9923, 7437 };
00354        Int4 index;
00355        BlastHSP* hsp;
00356 
00357        m_HspList = Blast_HSPListNew(0);       
00358 
00359        for (index = 0; index < kNumHsps; ++index) {
00360       hsp = m_HspList->hsp_array[index] = 
00361          (BlastHSP*) calloc(1, sizeof(BlastHSP));
00362       hsp->score = kScores[index];
00363       hsp->query.offset = kQueryStarts[index];
00364       hsp->query.end = kQueryEnds[index];
00365       hsp->query.frame = kQueryFrames[index];
00366       hsp->context = (kQueryFrames[index] > 0 ? 0 : 1);
00367       hsp->subject.offset = kSubjectStarts[index];
00368       hsp->subject.end = kSubjectEnds[index];
00369       hsp->subject.frame = 1;
00370        }
00371        m_HspList->hspcnt = kNumHsps;
00372     }
00373 
00374     /// Complete set-up before calling the HSP linking algorithm
00375     void setupLinkHspInputBlastn()
00376     {
00377         const Uint4 kQueryLength = 5419;
00378         const Int8 kEffDbLength = 122632232;
00379 
00380         m_ProgramType = eBlastTypeBlastn;
00381         m_Program = eBlastn;
00382         
00383         // In subject sequence block, we only need to fill sequence length.
00384         s_SetupNuclQueryInfo(kQueryLength, &m_QueryInfo); 
00385         m_SubjectLength = 12991;
00386 
00387         CSeq_id seqid("gi|24638835");
00388         pair<TSeqPos, TSeqPos> range(26993,32411);
00389 
00390         auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(seqid, range));
00391 
00392         SBlastSequence sequence(
00393             GetSequence(*sl->seqloc, eBlastEncodingNucleotide,
00394                         sl->scope, eNa_strand_both, eSentinels));
00395         BlastScoringOptions* score_options = NULL;
00396         setupScoreBlk(sequence.data.get(), false, &score_options);
00397         
00398         fillEffectiveLengths(score_options, kEffDbLength, 1);
00399         BlastScoringOptionsFree(score_options);
00400 
00401         setupHSPListNucl();
00402     }
00403 
00404     AllCutoffScores* 
00405     setupCutoffScores(bool gapped, Int8 db_length, Uint4 db_num_seq,
00406                       Uint4 subj_length, int longest_intron=0)
00407     {
00408         BlastInitialWordOptions* word_options = NULL;
00409         BlastExtensionOptions* ext_options = NULL;
00410         BlastHitSavingOptions* hit_options = NULL;
00411 
00412         BlastInitialWordOptionsNew(m_ProgramType, &word_options);
00413         BlastExtensionOptionsNew(m_ProgramType, &ext_options, true);
00414         if (m_ProgramType == eBlastTypeBlastn) {
00415             word_options->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_NUCL;
00416             ext_options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
00417             ext_options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
00418         }
00419         BlastHitSavingOptionsNew(m_ProgramType, &hit_options, gapped);
00420         if (longest_intron > 0)
00421              hit_options->longest_intron = longest_intron;
00422 
00423         BlastInitialWordParameters* word_params = NULL;
00424         BlastExtensionParameters* ext_params = NULL;
00425 
00426         CRef<CSeq_id> qid;
00427         TSeqLocVector qv;
00428         
00429         if (m_ProgramType == eBlastTypeBlastn || m_ProgramType == eBlastTypeBlastx || 
00430             m_ProgramType == eBlastTypeTblastx) {
00431             qid.Reset(new CSeq_id("gi|555"));
00432             auto_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(*qid, 
00433                                                              eNa_strand_both));
00434             qv.push_back(*qsl);
00435         } else {
00436             qid.Reset(new CSeq_id("gi|129295"));
00437             auto_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(*qid));
00438             qv.push_back(*qsl);
00439         }
00440 
00441         CBlastOptions options;
00442         options.SetStrandOption(eNa_strand_unknown);
00443         if (m_ProgramType == eBlastTypeBlastx || 
00444             m_ProgramType == eBlastTypeTblastx)
00445             options.SetQueryGeneticCode(1);
00446 
00447         options.SetProgram(m_Program);
00448         CBLAST_SequenceBlk query_blk;
00449         TSearchMessages blast_msg;
00450 
00451         ENa_strand strand_opt = options.GetStrandOption();
00452 
00453         SetupQueryInfo(qv, m_ProgramType, strand_opt, &m_QueryInfo);
00454         SetupQueries(qv, m_QueryInfo, &query_blk, 
00455                      m_ProgramType, strand_opt, blast_msg);
00456         ITERATE(TSearchMessages, m, blast_msg) {
00457             BOOST_REQUIRE(m->empty());
00458         }
00459 
00460         BlastScoringOptions* score_options = NULL;
00461         setupScoreBlk(query_blk->sequence, gapped, &score_options);
00462 
00463         BlastExtensionParametersNew(m_ProgramType, ext_options, m_ScoreBlk, 
00464                                     m_QueryInfo, &ext_params);
00465         fillEffectiveLengths(score_options, (Int8)db_length, db_num_seq);
00466         score_options = BlastScoringOptionsFree(score_options);
00467 
00468         BlastHitSavingParametersNew(m_ProgramType, hit_options,
00469                                     m_ScoreBlk, m_QueryInfo, subj_length, &m_HitParams);
00470 
00471 
00472         QuerySetUpOptions* query_options = NULL;
00473         BlastQuerySetUpOptionsNew(&query_options);
00474         LookupTableWrap* lookup_wrap = NULL;
00475         LookupTableOptions* lookup_options = NULL;
00476         BlastSeqLoc* blast_seq_loc = BlastSeqLocNew(NULL, 0, m_QueryInfo->contexts[0].query_length-1);
00477         LookupTableOptionsNew(m_ProgramType, &lookup_options);
00478         LookupTableWrapInit(query_blk, lookup_options, query_options, blast_seq_loc, m_ScoreBlk, &lookup_wrap, NULL, NULL);
00479         query_options = BlastQuerySetUpOptionsFree(query_options);
00480 
00481         Uint4 avg_subj_length = (Uint4)(db_length/db_num_seq);
00482         BlastInitialWordParametersNew(m_ProgramType, word_options, m_HitParams, lookup_wrap, 
00483            m_ScoreBlk, m_QueryInfo, avg_subj_length, &word_params);
00484 
00485         blast_seq_loc = BlastSeqLocFree(blast_seq_loc);
00486         lookup_wrap = LookupTableWrapFree(lookup_wrap);
00487         lookup_options = LookupTableOptionsFree(lookup_options);
00488 
00489         BlastLinkHSPParametersUpdate(word_params, m_HitParams, (gapped ? TRUE : FALSE));
00490 
00491         
00492         if (m_HitParams->link_hsp_params && 
00493             m_ProgramType != eBlastTypeBlastn && !gapped) {
00494             CalculateLinkHSPCutoffs(m_ProgramType, m_QueryInfo, m_ScoreBlk, 
00495                m_HitParams->link_hsp_params, word_params, db_length, 
00496                subj_length); 
00497         }
00498 
00499         AllCutoffScores* retval = 
00500             (AllCutoffScores*) calloc(1, sizeof(AllCutoffScores));
00501         retval->x_drop_ungapped = word_params->x_dropoff_max;
00502         retval->x_drop_gapped = ext_params->gap_x_dropoff;
00503         retval->x_drop_final = ext_params->gap_x_dropoff_final;
00504         retval->cutoff_score_ungapped = word_params->cutoff_score_min;
00505         retval->cutoff_score_final = m_HitParams->cutoff_score_min;
00506         retval->do_sum_stats = m_HitParams->do_sum_stats;
00507         if (retval->do_sum_stats) {
00508             retval->cutoff_small_gap = 
00509                 m_HitParams->link_hsp_params->cutoff_small_gap;
00510             retval->cutoff_big_gap = 
00511                 m_HitParams->link_hsp_params->cutoff_big_gap;
00512         }
00513 
00514         BlastInitialWordParametersFree(word_params);
00515         BlastInitialWordOptionsFree(word_options);
00516         BlastExtensionParametersFree(ext_params);
00517         BlastExtensionOptionsFree(ext_options);
00518         // Set to NULL those member fields that are not used in these tests.
00519         m_HspList = NULL;
00520 
00521         return retval;
00522     }
00523 
00524 };
00525 
00526 BOOST_FIXTURE_TEST_SUITE(linkhsp, LinkHspTestFixture)
00527 
00528 /// Test linking with uneven gap sum statistics
00529 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsTblastn) {
00530     m_ProgramType = eBlastTypeTblastn;
00531     m_Program = eTblastn;
00532     testUnevenGapLinkHsps();
00533 }
00534 
00535 /// Test linking with uneven gap sum statistics
00536 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsBlastx) {
00537     m_ProgramType = eBlastTypeBlastx;
00538     m_Program = eBlastx;
00539     testUnevenGapLinkHsps();
00540 }
00541 
00542 /// Tests the uneven gap linking where an HSP has to be inserted in the 
00543 /// middle between two higher scoring HSPs that can be linked by themselves.
00544 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsMiddleInsertion) {
00545     const int kNumHsps = 5;
00546     const int kLongestIntron = 3000;
00547     const double kEvalue = 10;
00548     const int kLinkNums[kNumHsps] = { 3, 1, 3, 1, 3 };
00549     m_ProgramType = eBlastTypeTblastn;
00550     m_Program = eTblastn;
00551 
00552     setupLinkHspInputTblastn();
00553     setupHSPListForMiddleInsertTest();
00554     setupHitParams(kLongestIntron, kEvalue);
00555 
00556     BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength, 
00557                    m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
00558     for (int index = 0; index < m_HspList->hspcnt; ++index) {
00559         BOOST_REQUIRE_EQUAL(kLinkNums[index], 
00560                              m_HspList->hsp_array[index]->num);
00561     }
00562 }
00563 
00564 /// Test linking with small/large gap sum statistics for tblastn
00565 BOOST_AUTO_TEST_CASE(testEvenGapLinkHspsTblastn) {
00566     const int kNumHsps = 5;
00567     const double kEvalue = 1e-10;
00568     const int kNumsLinked[kNumHsps] = { 1, 2, 2, 1, 1 };
00569     const int kScores[kNumHsps] = { 1023, 282, 246, 202, 142 };
00570 
00571     m_ProgramType = eBlastTypeTblastn;
00572     m_Program = eTblastn;
00573     setupLinkHspInputTblastn();
00574     setupHSPListTransl();
00575 
00576     setupHitParams(0, kEvalue);
00577 
00578     BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength, 
00579                    m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
00580 
00581     Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
00582 
00583     BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
00584 
00585     Int4 index;
00586     for (index = 0; index < kNumHsps; ++index) {
00587         BOOST_REQUIRE_EQUAL(kNumsLinked[index], m_HspList->hsp_array[index]->num);
00588         BOOST_REQUIRE_EQUAL(kScores[index], 
00589                              m_HspList->hsp_array[index]->score); 
00590     }
00591 }
00592 
00593 /// Test linking with small/large gap sum statistics for blastn
00594 BOOST_AUTO_TEST_CASE(testEvenGapLinkHspsBlastn) {
00595     const int kNumHsps = 8;
00596     const double kEvalue = 10;
00597     const int kNumsLinked[kNumHsps] = 
00598     { 2, 1, 1, 3, 2, 1, 3, 3 };
00599     const double kEvalues[kNumHsps] = 
00600     { 3e-12, 3e-7, 0.07, 1e-7, 3e-12, 1.1, 1e-7, 1e-7 };
00601 
00602     setupLinkHspInputBlastn();
00603     setupHitParams(0, kEvalue);
00604 
00605     BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength, 
00606                    m_ScoreBlk, m_HitParams->link_hsp_params, FALSE);
00607 
00608     Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
00609     BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
00610 
00611     for (Int4 index = 0; index < kNumHsps; ++index) {
00612         BOOST_REQUIRE_EQUAL(kNumsLinked[index], 
00613                              m_HspList->hsp_array[index]->num);
00614         BOOST_REQUIRE(fabs(kEvalues[index] - m_HspList->hsp_array[index]->evalue)/kEvalues[index] < 0.5); 
00615     }
00616 }
00617 
00618 static void 
00619 testAllCutoffs(const AllCutoffScores& good_cutoffs, 
00620                AllCutoffScores& cutoffs)
00621 {
00622     BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_ungapped, 
00623                          cutoffs.x_drop_ungapped);
00624     BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_gapped, 
00625                          cutoffs.x_drop_gapped);
00626     BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_final, 
00627                          cutoffs.x_drop_final);
00628     BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_score_ungapped, 
00629                          cutoffs.cutoff_score_ungapped);
00630     BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_score_final, 
00631                          cutoffs.cutoff_score_final);
00632     BOOST_REQUIRE_EQUAL(good_cutoffs.do_sum_stats, 
00633                          cutoffs.do_sum_stats);
00634     BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_small_gap, 
00635                          cutoffs.cutoff_small_gap);
00636     BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_big_gap, 
00637                          cutoffs.cutoff_big_gap);
00638 }
00639 
00640 BOOST_AUTO_TEST_CASE(UngappedBlastnCutoffs)
00641 {
00642     const int kNumDbs = 4;
00643     const Int8 kDbLengths[kNumDbs] = 
00644         { 10000000000LL, 10000000000LL, 3000000000LL, 10000LL };
00645     const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500, 100 };
00646     const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000, 100 };
00647     const AllCutoffScores kGoodCutoffs[kNumDbs] = { 
00648         { 11, 0, 0, 0, 14, 20, true, 14, 0 },
00649         { 11, 0, 0, 0, 12, 20, true, 12, 0 },
00650         { 11, 0, 0, 0, 19, 19, true, 19, 0 },
00651         { 10, 0, 0, 0, 10, 10, true, 10, 0 } };
00652     
00653     AllCutoffScores* cutoffs = NULL;
00654     int index;
00655     m_ProgramType = eBlastTypeBlastn;
00656     m_Program = eBlastn;
00657     for (index = 0; index < kNumDbs; ++index) { 
00658         cutoffs = setupCutoffScores(false, kDbLengths[index], 
00659                      kDbNumSeqs[index], kSubjectLengths[index]);
00660         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00661         sfree(cutoffs);
00662         freeStructures();
00663         if (index < kNumDbs-1)
00664             BlastQueryInfoFree(m_QueryInfo);
00665     }
00666 }
00667 
00668 BOOST_AUTO_TEST_CASE(UngappedBlastpCutoffs)
00669 {
00670     const Int8 kDbLength =  500000000;
00671     const Uint4 kDbNumSeqs = 1000000;
00672     const int kNumSubjects = 3;
00673     const Uint4 kSubjectLengths[kNumSubjects] = {400, 60, 3000 };
00674     const AllCutoffScores kGoodCutoffs[kNumSubjects] = { 
00675         { 16, 0, 0, 0, 41, 66, true, 41, 38 },
00676         { 16, 0, 0, 0, 41, 66, true, 0, 29 },
00677         { 16, 0, 0, 0, 41, 66, true, 41, 44 } };
00678     AllCutoffScores* cutoffs = NULL;
00679     int index;
00680     m_ProgramType = eBlastTypeBlastp;
00681     m_Program = eBlastp;
00682     for (index = 0; index < kNumSubjects; ++index) { 
00683         cutoffs = setupCutoffScores(false, kDbLength,
00684                           kDbNumSeqs, kSubjectLengths[index]);
00685         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00686         sfree(cutoffs);
00687         freeStructures();
00688         if (index < kNumSubjects-1)
00689             BlastQueryInfoFree(m_QueryInfo);
00690     }
00691 }
00692 
00693 BOOST_AUTO_TEST_CASE(UngappedBlastxCutoffs)
00694 {
00695     const Int8 kDbLength =  /*500000000*/227102922;
00696     const Uint4 kDbNumSeqs = /*1000000*/761886;
00697     const int kNumSubjects = 3;
00698     const Uint4 kSubjectLengths[kNumSubjects] = { 400, 100, 3000 };
00699     const AllCutoffScores kGoodCutoffs[kNumSubjects] = { 
00700         { 16, 0, 0, 0, 31, 63, true, 31, 37 },
00701         { 16, 0, 0, 0, 31, 63, true,  0, 31 },
00702         { 16, 0, 0, 0, 31, 63, true, 31, 43 } };
00703     AllCutoffScores* cutoffs = NULL;
00704     int index;
00705     m_ProgramType = eBlastTypeBlastx;
00706     m_Program = eBlastx;
00707     for (index = 0; index < kNumSubjects; ++index) {  
00708         cutoffs = setupCutoffScores(false, kDbLength, kDbNumSeqs,
00709                                     kSubjectLengths[index]);
00710         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00711         sfree(cutoffs);
00712         freeStructures();
00713         if (index < kNumSubjects-1)
00714             BlastQueryInfoFree(m_QueryInfo);
00715     }
00716 }
00717 
00718 BOOST_AUTO_TEST_CASE(UngappedTblastnCutoffs)
00719 {
00720     const int kNumDbs = 3;
00721     const Int8 kDbLengths[kNumDbs] = 
00722         { 10000000000LL, 10000000000LL, 3000000000LL };
00723     const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
00724     const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
00725     const AllCutoffScores kGoodCutoffs[kNumDbs] = { 
00726         { 16, 0, 0, 0, 40, 72, true, 40, 40 },
00727         { 16, 0, 0, 0, 33, 71, true, 33, 35 },
00728         { 16, 0, 0, 0, 41, 69, true, 41, 60 } };
00729 
00730     AllCutoffScores* cutoffs = NULL;
00731     int index;
00732     m_ProgramType = eBlastTypeTblastn;
00733     m_Program = eTblastn;
00734     for (index = 0; index < kNumDbs; ++index) { 
00735         cutoffs = setupCutoffScores(false, kDbLengths[index],
00736                           kDbNumSeqs[index], kSubjectLengths[index]);
00737         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00738         sfree(cutoffs);
00739         freeStructures();
00740         if (index < kNumDbs-1)
00741             BlastQueryInfoFree(m_QueryInfo);
00742     }
00743 }
00744 
00745 BOOST_AUTO_TEST_CASE(UngappedTblastxCutoffs)
00746 {
00747     const int kNumDbs = 4;
00748     const Int8 kDbLengths[kNumDbs] = 
00749         { 10000000000LL, 10000000000LL, 10000000000LL, 3000000000LL };
00750     const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 2000000, 20000000, 500 };
00751     const Uint4 kSubjectLengths[kNumDbs] = { 2000, 100, 400, 3000000 };
00752     const AllCutoffScores kGoodCutoffs[kNumDbs] = { 
00753         { 16, 0, 0, 0, 41, 72, true, 41, 40 },
00754         { 16, 0, 0, 0, 41, 72, true,  0, 27 },
00755         { 16, 0, 0, 0, 41, 70, true, 41, 34 },
00756         { 16, 0, 0, 0, 41, 68, true, 41, 60 } };
00757 
00758     AllCutoffScores* cutoffs = NULL;
00759     int index;
00760     m_ProgramType = eBlastTypeTblastx;
00761     m_Program = eTblastx;
00762     for (index = 0; index < kNumDbs; ++index) { 
00763         cutoffs = setupCutoffScores(false, kDbLengths[index],
00764                           kDbNumSeqs[index], kSubjectLengths[index]);
00765         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00766         sfree(cutoffs);
00767         freeStructures();
00768         if (index < kNumDbs-1)
00769             BlastQueryInfoFree(m_QueryInfo);
00770     }
00771 }
00772 
00773 BOOST_AUTO_TEST_CASE(GappedBlastnCutoffs)
00774 {
00775     const int kNumDbs = 4;
00776     const Int8 kDbLengths[kNumDbs] = 
00777         { 10000000000LL, 10000000000LL, 3000000000LL, 10000LL };
00778     const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500, 200 };
00779     const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000, 60 };
00780     const AllCutoffScores kGoodCutoffs[kNumDbs] = { 
00781         { 11, 15, 50, 0, 14, 20, false, 0, 0 },
00782         { 11, 15, 50, 0, 11, 20, false, 0, 0 },
00783         { 11, 15, 50, 0, 19, 19, false, 0, 0 },
00784         {  8, 15, 50, 0,  8, 10, false, 0, 0 } };
00785 
00786     AllCutoffScores* cutoffs = NULL;
00787     int index;
00788     m_ProgramType = eBlastTypeBlastn;
00789     m_Program = eBlastn;
00790     for (index = 0; index < kNumDbs; ++index) { 
00791         cutoffs = setupCutoffScores(true, kDbLengths[index],
00792                           kDbNumSeqs[index], kSubjectLengths[index]);
00793         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00794         sfree(cutoffs);
00795         freeStructures();
00796         if (index < kNumDbs-1)
00797             BlastQueryInfoFree(m_QueryInfo);
00798     }
00799 }
00800 
00801 BOOST_AUTO_TEST_CASE(GappedBlastpCutoffs)
00802 {
00803     const Int8 kDbLength = 600000000;
00804     const Uint4 kDbNumSeqs = 1800000;
00805     const Uint4 kSubjectLength = 200;
00806     m_ProgramType = eBlastTypeBlastp;
00807     m_Program = eBlastp;
00808     const AllCutoffScores kGoodCutoffs =
00809         { 16, 38, 64, 41, 41, 72, false, 0, 0 };
00810     AllCutoffScores* cutoffs = 
00811         setupCutoffScores(true, kDbLength, kDbNumSeqs, kSubjectLength);
00812     testAllCutoffs(kGoodCutoffs, *cutoffs);
00813     sfree(cutoffs);
00814     freeStructures();
00815 }
00816 
00817 BOOST_AUTO_TEST_CASE(GappedBlastxCutoffs)
00818 {
00819     const int kNumDbs = 2;
00820     const Int8 kDbLengths[kNumDbs] = 
00821           {600000000, 6000000000LL};
00822     const Uint4 kDbNumSeqs = 1800000;
00823     const Uint4 kSubjectLength[kNumDbs] = {500, 2000};
00824     const AllCutoffScores kGoodCutoffs[kNumDbs] = {
00825         { 16, 38, 64, 0, 41, 32, true, 41, 0 },
00826         { 16, 38, 64, 0, 41, 37, true, 41, 0 } };
00827     m_ProgramType = eBlastTypeBlastx;
00828     m_Program = eBlastx;
00829     for (int index = 0; index < kNumDbs; ++index) { 
00830         AllCutoffScores* cutoffs = setupCutoffScores(true, 
00831               kDbLengths[index], kDbNumSeqs, kSubjectLength[index]);
00832         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00833         sfree(cutoffs);
00834         freeStructures();
00835         if (index < kNumDbs-1)
00836             BlastQueryInfoFree(m_QueryInfo);
00837     }
00838 }
00839 
00840 BOOST_AUTO_TEST_CASE(GappedTblastnCutoffs)
00841 {
00842     const int kNumDbs = 3;
00843     const Int8 kDbLengths[kNumDbs] = 
00844         { 10000000000LL, 10000000000LL, 3000000000LL };
00845     const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
00846     const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
00847     const AllCutoffScores kGoodCutoffs[kNumDbs] = { 
00848         { 16, 38, 64, 41, 41, 38, true, 41, 0 },
00849         { 16, 38, 64, 41, 41, 32, true, 41, 0 },
00850         { 16, 38, 64, 41, 41, 65, true, 41, 0 } };
00851 
00852     AllCutoffScores* cutoffs = NULL;
00853     int index;
00854     m_ProgramType = eBlastTypeTblastn;
00855     m_Program = eTblastn;
00856     for (index = 0; index < kNumDbs; ++index) { 
00857         cutoffs = setupCutoffScores(true, kDbLengths[index],
00858                           kDbNumSeqs[index], kSubjectLengths[index]);
00859         testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00860         sfree(cutoffs);
00861         freeStructures();
00862         if (index < kNumDbs-1)
00863             BlastQueryInfoFree(m_QueryInfo);
00864     }
00865 }
00866 
00867 BOOST_AUTO_TEST_CASE(GappedTblastnVeryShortIntron)
00868 {
00869     const int kNumDbs = 3;
00870     const Int8 kDbLengths[kNumDbs] = 
00871         { 10000000000LL, 10000000000LL, 3000000000LL };
00872     const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
00873     const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
00874 
00875     AllCutoffScores* cutoffs = NULL;
00876     int index;
00877     m_ProgramType = eBlastTypeTblastn;
00878     m_Program = eTblastn;
00879     for (index = 0; index < kNumDbs; ++index) { 
00880         cutoffs = setupCutoffScores(true, kDbLengths[index],
00881                           kDbNumSeqs[index], kSubjectLengths[index], 1);
00882         
00883         BOOST_REQUIRE_EQUAL((int) false, (int) cutoffs->do_sum_stats);
00884         sfree(cutoffs);
00885         freeStructures();
00886         if (index < kNumDbs-1)
00887             BlastQueryInfoFree(m_QueryInfo);
00888     }
00889 }
00890 BOOST_AUTO_TEST_SUITE_END()
00891 
00892 

Generated on Wed Dec 9 03:57:12 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:46 2009 by modify_doxy.py rev. 173732