src/algo/blast/unit_tests/api/bl2seq_unit_test.cpp

Go to the documentation of this file.
00001 /*  $Id: bl2seq_unit_test.cpp 171622 2009-09-25 15:08:10Z avagyanv $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors: Christiam Camacho
00027  *
00028  */
00029 
00030 /** @file blast_unit_test.cpp
00031  * Unit tests for the CBl2Seq class
00032  */
00033 
00034 #include <ncbi_pch.hpp>
00035 #include <corelib/test_boost.hpp>
00036 #include <algo/blast/api/bl2seq.hpp>
00037 #include <objects/seqalign/Seq_align.hpp>
00038 #include <objects/seqalign/Seq_align_set.hpp>
00039 #include <objects/seqalign/Std_seg.hpp>
00040 #include <objects/seqalign/Dense_seg.hpp>
00041 #include <objects/seqalign/Score.hpp>
00042 #include <objects/general/Object_id.hpp>
00043 
00044 #include <serial/serial.hpp>
00045 #include <serial/iterator.hpp>
00046 #include <serial/objostr.hpp>
00047 
00048 #include <algo/blast/api/tblastn_options.hpp>
00049 #include <algo/blast/format/blastfmtutil.hpp>
00050 
00051 #include <algo/blast/api/blast_options_handle.hpp>
00052 #include <algo/blast/api/blast_prot_options.hpp>
00053 #include <algo/blast/api/blastx_options.hpp>
00054 #include <algo/blast/api/tblastn_options.hpp>
00055 #include <algo/blast/api/blast_nucl_options.hpp>
00056 #include <algo/blast/api/disc_nucl_options.hpp>
00057 #include <algo/blast/api/local_blast.hpp>       // for CLocalBlast
00058 #include <algo/blast/api/local_db_adapter.hpp>  // for CLocalDbAdapter
00059 #include <algo/blast/api/objmgr_query_data.hpp> // for CObjMgr_QueryFactory
00060 #include <algo/blast/blastinput/blast_input.hpp>
00061 #include <algo/blast/blastinput/blast_fasta_input.hpp>
00062 
00063 #include <objtools/simple/simple_om.hpp>        // for CSimpleOM
00064 #include <objtools/readers/fasta.hpp>           // for CFastaReader
00065 #include <objmgr/util/seq_loc_util.hpp>
00066 
00067 #include "test_objmgr.hpp"
00068 
00069 #ifdef NCBI_OS_DARWIN
00070 #include <corelib/plugin_manager_store.hpp>
00071 #include <objmgr/data_loader_factory.hpp>
00072 #include <objtools/data_loaders/genbank/processors.hpp>
00073 #endif
00074 
00075 #include <util/random_gen.hpp>
00076 
00077 #include <corelib/test_boost.hpp>
00078 
00079 #ifndef SKIP_DOXYGEN_PROCESSING
00080 
00081 USING_NCBI_SCOPE;
00082 USING_SCOPE(blast);
00083 USING_SCOPE(objects);
00084 
00085 BOOST_AUTO_TEST_SUITE(bl2seq)
00086 
00087 BOOST_AUTO_TEST_CASE(ProteinBlastInvalidSeqIdSelfHit)
00088 {
00089     CRef<CSeq_loc> loc(new CSeq_loc());
00090     loc->SetWhole().SetGi(-1);
00091 
00092     CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
00093     scope->AddDefaults();
00094     SSeqLoc query(loc, scope);
00095 
00096     TSeqLocVector subjects;
00097     {
00098         CRef<CSeq_loc> local_loc(new CSeq_loc());
00099         local_loc->SetWhole().SetGi(-1);
00100 
00101         CScope* local_scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
00102         local_scope->AddDefaults();
00103         subjects.push_back(SSeqLoc(local_loc, local_scope));
00104     }
00105 
00106     // BLAST by concatenating all queries
00107     CBl2Seq blaster4all(query, subjects, eBlastp);
00108     TSeqAlignVector sas_v;
00109     BOOST_CHECK_THROW(sas_v = blaster4all.Run(), CBlastException);
00110 }
00111 
00112 enum EBl2seqTest {
00113     eBlastp_129295_129295 = 0,
00114     eBlastn_555_555,
00115     eMegablast_555_555,
00116     eDiscMegablast_555_555,
00117     eBlastx_555_129295,
00118     eTblastn_129295_555,
00119     eTblastn_129295_555_large_word,
00120     eTblastx_555_555,
00121     eTblastx_many_hits,
00122     eBlastp_129295_7662354,
00123     eBlastn_555_3090,
00124     eBlastp_multi_q,
00125     eBlastn_multi_q,
00126     eBlastp_multi_q_s,
00127     eTblastn_oof,
00128     eBlastx_oof,
00129     eDiscMegablast_U02544_U61969,
00130     eMegablast_chrom_mrna
00131 };
00132 
00133 /* The following functions are used to test the functionality to interrupt
00134  * CBl2Seq runs */
00135 
00136 /// Returns true so that the processing stops upon the first invocation of this
00137 /// callback
00138 extern "C" Boolean interrupt_immediately(SBlastProgress* /*progress_info*/)
00139 {
00140     return TRUE;
00141 }
00142 
00143 /// Returns false so that the processing never stops in spite of a callback
00144 /// function to interrupt the process is provided
00145 extern "C" Boolean do_not_interrupt(SBlastProgress* /*progress_info*/)
00146 {
00147     return FALSE;
00148 }
00149 
00150 /// This callback never interrupts the BLAST search, its only purpose is to
00151 /// count the number of times this is invoked for the given input. Also to be
00152 /// used in CBl2SeqTest::testInterruptXExitAtRandom.
00153 extern "C" Boolean callback_counter(SBlastProgress* progress_info)
00154 {
00155     int& counter = *reinterpret_cast<int*>(progress_info->user_data);
00156     counter++;
00157     return FALSE;
00158 }
00159 
00160 /// This callback interrupts the BLAST search after the callback has been
00161 /// executed the requested number of times in the pair's second member.
00162 /// This is used in CBl2SeqTest::testInterruptXExitAtRandom.
00163 extern "C" Boolean interrupt_at_random(SBlastProgress* progress_info)
00164 {
00165     pair<int, int>& progress_pair =
00166         *reinterpret_cast< pair<int, int>* >(progress_info->user_data);
00167 
00168     if (++progress_pair.first == progress_pair.second) {
00169         return TRUE;
00170     } else {
00171         return FALSE;
00172     }
00173 }
00174 
00175 /// The interruption occurs after 3 invokations of this callback
00176 extern "C" Boolean interrupt_after3calls(SBlastProgress* /*progress_info*/)
00177 {
00178     static int num_calls = 0;
00179     if (++num_calls < 3) {
00180         return FALSE;
00181     } else {
00182         return TRUE;
00183     }
00184 }
00185 
00186 /// The interruption occurs after starting the traceback stage
00187 extern "C" Boolean interrupt_on_traceback(SBlastProgress* progress_info)
00188 {
00189     if (progress_info->stage == eTracebackSearch) {
00190         return TRUE;
00191     } else {
00192         return FALSE;
00193     }
00194 }
00195 
00196 void testRawCutoffs(CBl2Seq& blaster, EProgram program, 
00197                     EBl2seqTest test_id)
00198 {
00199     BlastRawCutoffs* raw_cutoffs = 
00200         blaster.GetDiagnostics()->cutoffs;
00201     int x_drop_ungapped;
00202     int gap_trigger;
00203 
00204     if (program == eBlastn || program == eDiscMegablast) {
00205         x_drop_ungapped = 16;
00206         gap_trigger = 16;
00207     } else if (program == eMegablast) {
00208         x_drop_ungapped = 8;
00209         gap_trigger = 8;
00210     } else {
00211         x_drop_ungapped = 16;
00212         gap_trigger = 41;
00213     }
00214 
00215     switch (test_id) {
00216     case eBlastn_555_3090:
00217         x_drop_ungapped = 18; 
00218         gap_trigger = 18;
00219         break;
00220     case eBlastn_multi_q:
00221         x_drop_ungapped = 18; 
00222         gap_trigger = 18;
00223         break;
00224     case eMegablast_chrom_mrna: 
00225         x_drop_ungapped = 7;
00226         gap_trigger = 7; 
00227         break;
00228     case eDiscMegablast_U02544_U61969:
00229         x_drop_ungapped = 20; 
00230         gap_trigger = 20; 
00231         break;
00232     case eBlastp_multi_q:
00233         gap_trigger = 23;
00234         break;
00235     case eBlastp_multi_q_s:
00236         gap_trigger = 19;
00237         break;
00238     case eBlastp_129295_129295:
00239     case eTblastn_129295_555:
00240     case eTblastn_129295_555_large_word:
00241         gap_trigger = 20; break;
00242     case eBlastp_129295_7662354:
00243         gap_trigger = 23; break;
00244     case eBlastx_555_129295:
00245         gap_trigger = 19; break;
00246     case eTblastn_oof:
00247         gap_trigger = 43;
00248     default:
00249         break;
00250     }
00251 
00252     switch (program) {
00253     case eBlastn: case eDiscMegablast:
00254         BOOST_CHECK_EQUAL(x_drop_ungapped, 
00255                              raw_cutoffs->x_drop_ungapped);
00256         BOOST_CHECK_EQUAL(33, raw_cutoffs->x_drop_gap);
00257         // CC changed 08/07/08
00258         //BOOST_CHECK_EQUAL(55, raw_cutoffs->x_drop_gap_final);
00259         BOOST_CHECK_EQUAL(110, raw_cutoffs->x_drop_gap_final);
00260         BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
00261         break;
00262     case eMegablast:
00263         BOOST_CHECK_EQUAL(x_drop_ungapped, 
00264                              raw_cutoffs->x_drop_ungapped);
00265         BOOST_CHECK_EQUAL(16, raw_cutoffs->x_drop_gap);
00266         // CC changed 08/07/08
00267         //BOOST_CHECK_EQUAL(27, raw_cutoffs->x_drop_gap_final);
00268         BOOST_CHECK_EQUAL(54, raw_cutoffs->x_drop_gap_final);
00269         BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
00270         break;
00271     case eBlastp: case eBlastx: case eTblastn:
00272         BOOST_CHECK_EQUAL(38, raw_cutoffs->x_drop_gap);
00273         BOOST_CHECK_EQUAL(64, raw_cutoffs->x_drop_gap_final);
00274         BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
00275         /* No break intentional: next test is valid for all the above 
00276            programs */
00277     case eTblastx:
00278         BOOST_CHECK_EQUAL(x_drop_ungapped,
00279                              raw_cutoffs->x_drop_ungapped);
00280         break;
00281     default: break;
00282     }
00283 }
00284 
00285 void testResultAlignments(size_t num_queries,
00286                           size_t num_subjects,
00287                           TSeqAlignVector result_alnvec)
00288 {
00289     size_t num_total_alns = num_queries * num_subjects;
00290 
00291     // test the number of resulting alignments
00292     BOOST_REQUIRE_EQUAL(result_alnvec.size(), num_total_alns);
00293 
00294     // test the correct ordering of resulting alignments
00295     // (q1 s1 q1 s2 ... q2 s1 q2 s2 ...)
00296 
00297     CConstRef<CSeq_id> id_query, id_prev_query;
00298     CConstRef<CSeq_id> id_subject;
00299     vector< CConstRef<CSeq_id> > id_prev_subjects;
00300     id_prev_subjects.resize(num_subjects);
00301 
00302     bool prev_query_available = false;
00303     vector<bool> prev_subjects_available(num_subjects, false);
00304 
00305     /* DEBUG OUTPUT
00306     cerr << "................................................" << endl;
00307     for (size_t i = 0; i < result_alnvec.size(); i++)
00308         cerr << "\n<" << i << ">\n"
00309             << MSerial_AsnText << result_alnvec[i].GetObject() << endl;
00310     cerr << "................................................" << endl;
00311     ------------ */
00312 
00313     for (size_t i_query = 0; i_query < num_queries; i_query++)
00314     {
00315         prev_query_available = false;
00316         for (size_t i_subject = 0; i_subject < num_subjects; i_subject++)
00317         {
00318             size_t i_lin_index = i_query * num_subjects + i_subject;
00319             CRef<CSeq_align_set> aln_set = result_alnvec[i_lin_index];
00320 
00321             // test if the alignment set is available (even if empty)
00322             BOOST_REQUIRE(aln_set.NotNull());
00323 
00324             // if the alignment set is not empty, take the first alignment
00325             // and see if the ID's are in correct order
00326             if (aln_set->Get().size() > 0)
00327             {
00328                 CRef<CSeq_align> aln = aln_set->Get().front();
00329                 id_query.Reset(&(aln->GetSeq_id(0)));
00330                 id_subject.Reset(&(aln->GetSeq_id(1)));
00331 
00332                 // check if the query id was the same
00333                 // for the previous subject
00334                 if (i_subject > 0 &&
00335                     prev_query_available)
00336                 {
00337                     BOOST_REQUIRE(
00338                         id_query->Match(
00339                         id_prev_query.GetObject()));
00340                 }
00341 
00342                 // check if the subject id was the same
00343                 // on the same position for the previous query
00344                 if (i_query > 0 &&
00345                     prev_subjects_available[i_subject])
00346                 {
00347                     BOOST_REQUIRE(
00348                         id_subject->Match(
00349                         id_prev_subjects[i_subject].GetObject()));
00350                 }
00351 
00352                 // update the entry in previous subjects vector
00353                 prev_subjects_available[i_subject] = true;
00354                 id_prev_subjects[i_subject] = id_subject;
00355 
00356                 // update the previous query entry
00357                 prev_query_available = true;
00358                 id_prev_query = id_query;
00359             }
00360         }
00361     }
00362 }
00363 
00364 void testBlastHitCounts(CBl2Seq& blaster, EBl2seqTest test_id)
00365 {
00366     BlastUngappedStats* ungapped_stats = 
00367         blaster.GetDiagnostics()->ungapped_stat;
00368     BlastGappedStats* gapped_stats = 
00369         blaster.GetDiagnostics()->gapped_stat;
00370     
00371     switch (test_id) {
00372     case eBlastp_129295_129295:
00373         BOOST_CHECK_EQUAL(314, (int)ungapped_stats->lookup_hits);
00374         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00375         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00376         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00377         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00378         break;
00379     case eBlastn_555_555:
00380         BOOST_CHECK_EQUAL(157, (int)ungapped_stats->lookup_hits);
00381         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00382         BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
00383         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00384         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00385         break;
00386     case eMegablast_555_555:
00387         BOOST_CHECK_EQUAL(30, (int)ungapped_stats->lookup_hits);
00388         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00389         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00390         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00391         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00392         break;
00393     case eDiscMegablast_555_555:
00394         BOOST_CHECK_EQUAL(582, (int)ungapped_stats->lookup_hits);
00395         // CC changed 08/07/08
00396         //BOOST_CHECK_EQUAL(32, ungapped_stats->init_extends);
00397         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00398         // CC changed 08/07/08
00399         //BOOST_CHECK_EQUAL(32, ungapped_stats->good_init_extends);
00400         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00401         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00402         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00403         break;
00404     case eBlastx_555_129295:
00405         BOOST_CHECK_EQUAL(280, (int)ungapped_stats->lookup_hits);
00406         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00407         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00408         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00409         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00410         break;
00411     case eTblastn_129295_555:
00412         BOOST_CHECK_EQUAL(157, (int)ungapped_stats->lookup_hits);
00413         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00414         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00415         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00416         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00417         break;
00418     case eTblastn_129295_555_large_word:
00419         BOOST_CHECK_EQUAL(5, (int)ungapped_stats->lookup_hits);
00420         BOOST_CHECK_EQUAL(4, ungapped_stats->init_extends);
00421         BOOST_CHECK_EQUAL(2, ungapped_stats->good_init_extends);
00422         BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00423         BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00424         break;
00425     case eTblastx_555_555:
00426         BOOST_CHECK_EQUAL(2590, (int)ungapped_stats->lookup_hits);
00427         BOOST_CHECK_EQUAL(61, ungapped_stats->init_extends);
00428         BOOST_CHECK_EQUAL(41, ungapped_stats->good_init_extends);
00429         break;
00430     case eTblastx_many_hits:
00431         BOOST_CHECK_EQUAL(18587, (int)ungapped_stats->lookup_hits);
00432         BOOST_CHECK_EQUAL(362, ungapped_stats->init_extends);
00433         BOOST_CHECK_EQUAL(66, ungapped_stats->good_init_extends);
00434         break;
00435     case eBlastp_129295_7662354:
00436         BOOST_CHECK_EQUAL(210, (int)ungapped_stats->lookup_hits);
00437         BOOST_CHECK_EQUAL(10, ungapped_stats->init_extends);
00438         BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
00439         BOOST_CHECK_EQUAL(3, gapped_stats->extensions);
00440         BOOST_CHECK_EQUAL(3, gapped_stats->good_extensions);
00441         break;
00442     case eBlastn_555_3090:
00443         BOOST_CHECK_EQUAL(15, (int)ungapped_stats->lookup_hits);
00444         BOOST_CHECK_EQUAL(2, ungapped_stats->init_extends);
00445         BOOST_CHECK_EQUAL(2, ungapped_stats->good_init_extends);
00446         BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00447         BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00448         break;
00449     case eBlastp_multi_q:
00450         BOOST_CHECK_EQUAL(2129, (int)ungapped_stats->lookup_hits);
00451         BOOST_CHECK_EQUAL(78, ungapped_stats->init_extends);
00452         BOOST_CHECK_EQUAL(14, ungapped_stats->good_init_extends);
00453         BOOST_CHECK_EQUAL(8, gapped_stats->extensions);
00454         BOOST_CHECK_EQUAL(8, gapped_stats->good_extensions);
00455         break;
00456     case eBlastn_multi_q:
00457         BOOST_CHECK_EQUAL(963, (int)ungapped_stats->lookup_hits);
00458         BOOST_CHECK_EQUAL(13, ungapped_stats->init_extends);
00459         BOOST_CHECK_EQUAL(13, ungapped_stats->good_init_extends);
00460         BOOST_CHECK_EQUAL(5, gapped_stats->extensions);
00461         BOOST_CHECK_EQUAL(5, gapped_stats->good_extensions);
00462         break;
00463     case eBlastp_multi_q_s:
00464 #if 0
00465         // The following 2 numbers are different in Release and Debug modes
00466         // due to a minor discrepancy in locations masked by seg filtering.
00467         // The latter is due to a tiny difference in values involved in 
00468         // a comparison of real numbers inside the seg algorithm.
00469         // In Debug mode:
00470         BOOST_CHECK_EQUAL(3579, (int)ungapped_stats->lookup_hits);
00471         BOOST_CHECK_EQUAL(138, ungapped_stats->init_extends);
00472         // In Release mode:
00473         BOOST_CHECK_EQUAL(3580, (int)ungapped_stats->lookup_hits);
00474         BOOST_CHECK_EQUAL(140, ungapped_stats->init_extends);
00475 #endif
00476         // Note: Seg is not enabled for this case anymore
00477         // (changed blastp defaults)
00478         BOOST_CHECK_EQUAL(3939, (int)ungapped_stats->lookup_hits);
00479         BOOST_CHECK_EQUAL(159, ungapped_stats->init_extends);
00480         BOOST_CHECK_EQUAL(59, ungapped_stats->good_init_extends);
00481         BOOST_CHECK_EQUAL(25, gapped_stats->extensions);
00482         BOOST_CHECK_EQUAL(24, gapped_stats->good_extensions);
00483         break;
00484     case eTblastn_oof:
00485         BOOST_CHECK_EQUAL(2666, (int)ungapped_stats->lookup_hits);
00486         BOOST_CHECK_EQUAL(50, ungapped_stats->init_extends);
00487         BOOST_CHECK_EQUAL(4, ungapped_stats->good_init_extends);
00488         BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00489         BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00490         break;
00491     case eBlastx_oof:
00492         BOOST_CHECK_EQUAL(5950, (int)ungapped_stats->lookup_hits);
00493         BOOST_CHECK_EQUAL(159, ungapped_stats->init_extends);
00494         BOOST_CHECK_EQUAL(6, ungapped_stats->good_init_extends);
00495         BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00496         BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00497         break;
00498     case eDiscMegablast_U02544_U61969:
00499         BOOST_CHECK_EQUAL(108, (int)ungapped_stats->lookup_hits);
00500         // CC changed 08/07/08
00501         //BOOST_CHECK_EQUAL(15, ungapped_stats->init_extends);
00502         //BOOST_CHECK_EQUAL(15, ungapped_stats->good_init_extends);
00503         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00504         BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
00505         BOOST_CHECK_EQUAL(3, gapped_stats->extensions);
00506         BOOST_CHECK_EQUAL(3, gapped_stats->good_extensions);
00507         break;
00508     case eMegablast_chrom_mrna:
00509         BOOST_CHECK_EQUAL(14, (int)ungapped_stats->lookup_hits);
00510         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00511         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00512         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00513         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00514         break;
00515     default: break;
00516     }
00517 }
00518 
00519 BOOST_AUTO_TEST_CASE(ProteinBlastSelfHit)
00520 {
00521     //const int kSeqLength = 232;
00522     CSeq_id id("gi|129295");
00523     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
00524 
00525     CBl2Seq blaster(*sl, *sl, eBlastp);
00526     TSeqAlignVector sav(blaster.Run());
00527     BOOST_REQUIRE(sav[0].NotEmpty());
00528     BOOST_REQUIRE( !sav[0]->IsEmpty() );
00529     BOOST_REQUIRE(sav[0]->Get().begin()->NotEmpty());
00530     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00531     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
00532     testBlastHitCounts(blaster, eBlastp_129295_129295);
00533     testRawCutoffs(blaster, eBlastp, eBlastp_129295_129295);
00534 
00535     // the number of identities is NOT calculated when composition based
00536     // statistics is turned on (default for blastp)
00537     int num_ident = 0;
00538     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00539 #if 0
00540     ofstream o("0.asn");
00541     o << MSerial_AsnText << *sar ;
00542     o.close();
00543 #endif
00544     BOOST_CHECK_EQUAL(232, num_ident);
00545 
00546     // calculate the number of identities using the BLAST formatter
00547 /*
00548     double percent_identity = 
00549         CBlastFormatUtil::GetPercentIdentity(*sar, *sl->scope, false);
00550     BOOST_CHECK_EQUAL(1, (int) percent_identity);
00551 */
00552 
00553     // Check the ancillary results
00554     CSearchResultSet::TAncillaryVector ancillary_data;
00555     blaster.GetAncillaryResults(ancillary_data);
00556     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00557     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00558     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00559     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00560 
00561 }
00562 
00563 BOOST_AUTO_TEST_CASE(TBlastn2Seqs)
00564 {
00565     CSeq_id qid("gi|129295");
00566     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00567 
00568     CSeq_id sid("gi|555");
00569     auto_ptr<SSeqLoc> subj(
00570         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
00571 
00572     CBl2Seq blaster(*query, *subj, eTblastn);
00573     TSeqAlignVector sav(blaster.Run());
00574     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00575 
00576 #if 0
00577     ofstream o("1.asn");
00578     o << MSerial_AsnText << *sar ;
00579     o.close();
00580 #endif
00581 
00582     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00583     testBlastHitCounts(blaster, eTblastn_129295_555);
00584     testRawCutoffs(blaster, eTblastn, eTblastn_129295_555);
00585 
00586     int score = 0, comp_adj = 0;
00587     sar->GetNamedScore(CSeq_align::eScore_Score, score);
00588     sar->GetNamedScore(CSeq_align::eScore_CompAdjMethod, comp_adj);
00589     BOOST_CHECK_EQUAL(26, score);
00590     BOOST_CHECK_EQUAL(2, comp_adj);
00591 
00592     // Check the ancillary results
00593     CSearchResultSet::TAncillaryVector ancillary_data;
00594     blaster.GetAncillaryResults(ancillary_data);
00595     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00596     BOOST_REQUIRE( ancillary_data.front().NotEmpty() );
00597     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00598     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00599     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00600 }
00601 
00602 BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand1)
00603 {
00604     CSeq_id qid("gi|1945390");
00605     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00606 
00607     pair<TSeqPos, TSeqPos> range(150000, 170000);
00608     CSeq_id sid("gi|4755212");
00609     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_minus));
00610 
00611     CBl2Seq blaster(*query, *subj, eTblastn);
00612     TSeqAlignVector sav(blaster.Run());
00613     BOOST_CHECK_EQUAL(12, (int) sav[0]->Get().size());
00614     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00615     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00616     vector < CRef< CSeq_loc > > locs = sar->GetSegs().GetStd().front()->GetLoc();
00617     BOOST_CHECK_EQUAL(eNa_strand_minus, (int) (locs[1])->GetStrand());
00618     int num_ident = 0;
00619     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00620     BOOST_CHECK_EQUAL(155, num_ident);
00621 #if 0
00622 ofstream o("minus1.asn");
00623 o << MSerial_AsnText << *sar ;
00624 o.close();
00625 #endif
00626 }
00627 
00628 BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand2)
00629 {
00630     CSeq_id qid("gi|1945390");
00631     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00632 
00633     CSeq_id sid("gi|1945388");
00634     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_minus));
00635 
00636     CBl2Seq blaster(*query, *subj, eTblastn);
00637     TSeqAlignVector sav(blaster.Run());
00638     BOOST_CHECK_EQUAL(2, (int) sav[0]->Get().size());
00639     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00640     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00641     vector < CRef< CSeq_loc > > locs = sar->GetSegs().GetStd().front()->GetLoc();
00642     BOOST_CHECK_EQUAL(eNa_strand_minus, (int) (locs[1])->GetStrand());
00643     int num_ident = 0;
00644     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00645     BOOST_CHECK_EQUAL(11, num_ident);
00646 #if 0
00647 ofstream o("minus2.asn");
00648 o << MSerial_AsnText << *sar ;
00649 o.close();
00650 #endif
00651 }
00652 
00653 
00654 BOOST_AUTO_TEST_CASE(TBlastn2SeqsCompBasedStats)
00655 {
00656     CSeq_id qid("gi|68737"); // "pir|A01243|DXCH"
00657     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00658 
00659     CSeq_id sid("gi|118086484");
00660     auto_ptr<SSeqLoc> subj(
00661         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
00662 
00663     CTBlastnOptionsHandle opts;
00664     opts.SetOptions().SetCompositionBasedStats(eCompositionBasedStats);
00665 
00666     CBl2Seq blaster(*query, *subj, opts);
00667     TSeqAlignVector sav(blaster.Run());
00668     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00669     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00670 
00671     int num_ident = 0;
00672     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00673     BOOST_CHECK_EQUAL(229, num_ident);
00674 #if 0
00675 ofstream o("2.asn");
00676 o << MSerial_AsnText << *sar ;
00677 o.close();
00678 #endif
00679 
00680     // Check the ancillary results
00681     CSearchResultSet::TAncillaryVector ancillary_data;
00682     blaster.GetAncillaryResults(ancillary_data);
00683     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00684     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00685     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00686     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00687 }
00688 
00689 BOOST_AUTO_TEST_CASE(TBlastn2SeqsLargeWord)
00690 {
00691     CSeq_id qid("gi|129295");
00692     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00693 
00694     CSeq_id sid("gi|555");
00695     auto_ptr<SSeqLoc> subj(
00696         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
00697 
00698     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eTblastn));
00699     opts->SetOptions().SetWordSize(6);
00700     opts->SetOptions().SetLookupTableType(eCompressedAaLookupTable);
00701     opts->SetOptions().SetWordThreshold(21.69);
00702     opts->SetOptions().SetWindowSize(0);
00703     opts->SetOptions().SetCompositionBasedStats(eNoCompositionBasedStats);
00704 
00705     CBl2Seq blaster(*query, *subj, *opts);
00706     TSeqAlignVector sav(blaster.Run());
00707     BOOST_CHECK_EQUAL(2, (int)sav[0]->Size());
00708     testBlastHitCounts(blaster, eTblastn_129295_555_large_word);
00709     testRawCutoffs(blaster, eTblastn, eTblastn_129295_555_large_word);
00710 
00711     int num_ident = 0;
00712     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00713     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00714 #if 0
00715 ofstream o("3.asn");
00716 o << MSerial_AsnText << *sar ;
00717 o.close();
00718 #endif
00719     BOOST_CHECK_EQUAL(5, num_ident);
00720 }
00721 
00722 BOOST_AUTO_TEST_CASE(IdenticalProteins)
00723 {
00724     //const int kSeqLength = 377;
00725     CSeq_id qid("gi|34810917");
00726     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00727     CSeq_id sid("gi|34810916");
00728     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
00729 
00730     CBl2Seq blaster(*query, *subj, eBlastp);
00731     TSeqAlignVector sav(blaster.Run());
00732     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00733     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
00734 
00735     // the number of identities is NOT calculated when composition based
00736     // statistics is turned on (default for blastp)
00737     int num_ident = 0;
00738     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00739 #if 0
00740     ofstream o("4.asn");
00741     o << MSerial_AsnText << *sar ;
00742     o.close();
00743 #endif
00744     BOOST_CHECK_EQUAL(377, num_ident);
00745 
00746     // calculate the number of identities using the BLAST formatter
00747 /*
00748     double percent_identity = 
00749         CBlastFormatUtil::GetPercentIdentity(*sar, *query->scope, false);
00750     BOOST_CHECK_EQUAL(1, (int) percent_identity);
00751 */
00752 
00753     // Check the ancillary results
00754     CSearchResultSet::TAncillaryVector ancillary_data;
00755     blaster.GetAncillaryResults(ancillary_data);
00756     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00757     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00758     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00759     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00760 }
00761 
00762 BOOST_AUTO_TEST_CASE(UnsupportedOption) {
00763     CDiscNucleotideOptionsHandle opts_handle;
00764     BOOST_REQUIRE_THROW(opts_handle.SetTraditionalBlastnDefaults(),
00765                         CBlastException);
00766 }
00767 
00768 BOOST_AUTO_TEST_CASE(PositiveMismatchOption) {
00769     CSeq_id qid("gi|408478");  // zebrafish sequence U02544
00770     CSeq_id sid("gi|1546012"); // mouse sequence U61969
00771 
00772     auto_ptr<SSeqLoc> query(
00773         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
00774     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
00775 
00776     const int kMatch = 2;
00777     const int kMismatch = 5;  // Positive mismatch not allowed.
00778 
00779     CBlastNucleotideOptionsHandle nucl_options_handle;
00780 
00781     nucl_options_handle.SetMatchReward(kMatch);
00782     nucl_options_handle.SetMismatchPenalty(kMismatch);
00783     CBl2Seq blaster(*query, *subj, nucl_options_handle);
00784     try {
00785        TSeqAlignVector sav(blaster.Run());
00786     } catch (CBlastException& exptn) {
00787         BOOST_REQUIRE(
00788             !strcmp("BLASTN penalty must be negative",  
00789                     exptn.GetMsg().c_str()));
00790     }
00791 }
00792 
00793 BOOST_AUTO_TEST_CASE(FullyMaskedSequence) {
00794     CSeq_id qid("ref|NT_024524.13");
00795     pair<TSeqPos, TSeqPos> range(27886902, 27886932);
00796     auto_ptr<SSeqLoc> query(
00797         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
00798                                                eNa_strand_plus));
00799     range.first = 2052;
00800     range.second = 2082;
00801     CSeq_id sid("emb|BX641126.1");
00802     auto_ptr<SSeqLoc> subj(
00803         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
00804                                                eNa_strand_minus));
00805     CBlastNucleotideOptionsHandle options;
00806     options.SetTraditionalBlastnDefaults();
00807     options.SetMismatchPenalty(-1);
00808     options.SetMatchReward(1);
00809     options.SetGapXDropoff(100);
00810     options.SetMaskAtHash(false);
00811     CBl2Seq blaster(*query, *subj, options);
00812     try { blaster.Run(); }
00813     catch (const CException& e) {
00814         const string msg1("invalid query sequence");
00815         const string msg2("verify the query sequence(s) and/or filtering "
00816                           "options");
00817         BOOST_REQUIRE(string(e.what()).find(msg1) != NPOS);
00818         BOOST_REQUIRE(string(e.what()).find(msg2) != NPOS);
00819     }
00820 }
00821 
00822 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitImmediately) {
00823     CSeq_id id("gi|129295");
00824     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
00825 
00826     CBl2Seq blaster(*sl, *sl, eBlastp);
00827     TInterruptFnPtr fnptr =
00828         blaster.SetInterruptCallback(interrupt_immediately);
00829     BOOST_REQUIRE(fnptr == NULL);
00830 
00831     TSeqAlignVector sav;
00832     try { sav = blaster.Run(); }
00833     catch (...) {
00834         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00835     }
00836 }
00837 
00838 BOOST_AUTO_TEST_CASE(testInterruptBlastnExitImmediately) {
00839     CSeq_id id("gi|555");
00840     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
00841 
00842     CBl2Seq blaster(*sl, *sl, eBlastn);
00843     TInterruptFnPtr fnptr =
00844         blaster.SetInterruptCallback(interrupt_immediately);
00845     BOOST_REQUIRE(fnptr == NULL);
00846 
00847     TSeqAlignVector sav;
00848     try { sav = blaster.Run(); }
00849     catch (...) {
00850         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00851     }
00852 }
00853 
00854 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitImmediately) {
00855     CSeq_id query_id("gi|555");
00856     auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00857     CSeq_id subj_id("gi|129295");
00858     auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
00859 
00860     CBl2Seq blaster(*slq, *sls, eBlastx);
00861     TInterruptFnPtr fnptr =
00862         blaster.SetInterruptCallback(interrupt_immediately);
00863     BOOST_REQUIRE(fnptr == NULL);
00864 
00865     TSeqAlignVector sav;
00866     try { sav = blaster.Run(); }
00867     catch (...) {
00868         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00869     }
00870 }
00871 
00872 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitImmediately) {
00873     CSeq_id query_id("gi|555");
00874     auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00875     CSeq_id subj_id("gi|555");
00876     auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
00877 
00878     CBl2Seq blaster(*slq, *sls, eTblastx);
00879     TInterruptFnPtr fnptr =
00880         blaster.SetInterruptCallback(interrupt_immediately);
00881     BOOST_REQUIRE(fnptr == NULL);
00882 
00883     TSeqAlignVector sav;
00884     try { sav = blaster.Run(); }
00885     catch (...) {
00886         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00887     }
00888 }
00889 
00890 BOOST_AUTO_TEST_CASE(testInterruptTblastnExitImmediately) {
00891     CSeq_id query_id("gi|129295");
00892     auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00893     CSeq_id subj_id("gi|555");
00894     auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
00895 
00896     CBl2Seq blaster(*slq, *sls, eTblastn);
00897     TInterruptFnPtr fnptr =
00898         blaster.SetInterruptCallback(interrupt_immediately);
00899     BOOST_REQUIRE(fnptr == NULL);
00900 
00901     TSeqAlignVector sav;
00902     try { sav = blaster.Run(); }
00903     catch (...) {
00904         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00905     }
00906 }
00907 
00908 #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a))
00909 static
00910 CRef<CBl2Seq> s_SetupWithMultipleQueriesAndSubjects(bool query_is_nucl,
00911                                                     bool subj_is_nucl,
00912                                                     EProgram program) {
00913 
00914     int protein_gis[] = { 6, 129295, 15606659, 4336138, 5556 };
00915     int nucl_gis[] = { 272208, 272217, 272211, 272247, 272227, 272236, 
00916         272219 };
00917 
00918     vector<int> q_gis, s_gis;
00919     if (query_is_nucl) {
00920         copy(&nucl_gis[0],
00921              &nucl_gis[ARRAY_SIZE(nucl_gis)],
00922              back_inserter(q_gis));
00923     } else {
00924         copy(&protein_gis[0],
00925              &protein_gis[ARRAY_SIZE(protein_gis)],
00926              back_inserter(q_gis));
00927     }
00928 
00929     if (subj_is_nucl) {
00930         copy(&nucl_gis[0],
00931              &nucl_gis[ARRAY_SIZE(nucl_gis)],
00932              back_inserter(s_gis));
00933     } else {
00934         copy(&protein_gis[0],
00935              &protein_gis[ARRAY_SIZE(protein_gis)],
00936              back_inserter(s_gis));
00937     }
00938 
00939 
00940     TSeqLocVector queries;
00941     ITERATE(vector<int>, itr, q_gis) {
00942         CRef<CSeq_loc> loc(new CSeq_loc());
00943         loc->SetWhole().SetGi(*itr);
00944 
00945         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
00946         scope->AddDefaults();
00947         queries.push_back(SSeqLoc(loc, scope));
00948     }
00949 
00950     TSeqLocVector subjects;
00951     ITERATE(vector<int>, itr, s_gis) {
00952         CRef<CSeq_loc> loc(new CSeq_loc());
00953         loc->SetWhole().SetGi(*itr);
00954 
00955         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
00956         scope->AddDefaults();
00957         subjects.push_back(SSeqLoc(loc, scope));
00958     }
00959 
00960     return CRef<CBl2Seq>(new CBl2Seq(queries, subjects, program));
00961 }
00962 
00963 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAtRandom) {
00964 
00965     CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects(false,
00966                                                                   false,
00967                                                                   eBlastp);
00968 
00969     int num_callbacks_executed(0);
00970     TInterruptFnPtr fnptr =
00971         blaster->SetInterruptCallback(callback_counter, 
00972                                       (void*) &num_callbacks_executed);
00973     BOOST_REQUIRE(fnptr == NULL);
00974 
00975     TSeqAlignVector sav(blaster->Run()); // won't throw
00976     CRandom r(time(0));
00977     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
00978     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
00979 
00980     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
00981                                           (void*)&progress_pair);
00982     BOOST_REQUIRE(fnptr == callback_counter);
00983     sav.clear();
00984 
00985     try { sav = blaster->Run(); }
00986     catch (...) {
00987         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00988     }
00989 }
00990 
00991 BOOST_AUTO_TEST_CASE(testInterruptBlastnExitAtRandom) {
00992 
00993     CRef<CBl2Seq> blaster =
00994         s_SetupWithMultipleQueriesAndSubjects(true, true, eBlastn);
00995 
00996     int num_callbacks_executed(0);
00997     TInterruptFnPtr fnptr =
00998         blaster->SetInterruptCallback(callback_counter,
00999                                       (void*)&num_callbacks_executed);
01000     BOOST_REQUIRE(fnptr == NULL);
01001 
01002     TSeqAlignVector sav(blaster->Run()); // won't throw
01003     CRandom r(time(0));
01004     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01005     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01006 
01007     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01008                                           (void*)&progress_pair);
01009     BOOST_REQUIRE(fnptr == callback_counter);
01010     sav.clear();
01011 
01012     try { sav = blaster->Run(); }
01013     catch (...) {
01014         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01015     }
01016 }
01017 
01018 // interrupt_at_random.
01019 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitAtRandom) {
01020 
01021     CRef<CBl2Seq> blaster =
01022         s_SetupWithMultipleQueriesAndSubjects(true, false, eBlastx);
01023 
01024     int num_callbacks_executed(0);
01025     TInterruptFnPtr fnptr =
01026         blaster->SetInterruptCallback(callback_counter,
01027                                       (void*) & num_callbacks_executed);
01028     BOOST_REQUIRE(fnptr == NULL);
01029 
01030     TSeqAlignVector sav(blaster->Run()); // won't throw
01031     CRandom r(time(0));
01032     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01033     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01034 
01035     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01036                                           (void*)&progress_pair);
01037     BOOST_REQUIRE(fnptr == callback_counter);
01038     sav.clear();
01039 
01040     try { sav = blaster->Run(); }
01041     catch (...) {
01042         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01043     }
01044 }
01045 
01046 BOOST_AUTO_TEST_CASE(testInterruptTblastnExitAtRandom) {
01047 
01048     CRef<CBl2Seq> blaster =
01049         s_SetupWithMultipleQueriesAndSubjects(false, true, eTblastn);
01050 
01051     int num_callbacks_executed(0);
01052     TInterruptFnPtr fnptr =
01053         blaster->SetInterruptCallback(callback_counter,
01054                                       (void*)&num_callbacks_executed);
01055     BOOST_REQUIRE(fnptr == NULL);
01056 
01057     TSeqAlignVector sav(blaster->Run()); // won't throw
01058     CRandom r(time(0));
01059     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01060     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01061 
01062     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01063                                           (void*)&progress_pair);
01064     BOOST_REQUIRE(fnptr == callback_counter);
01065     sav.clear();
01066 
01067     try { sav = blaster->Run(); }
01068     catch (...) {
01069         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01070     }
01071 }
01072 
01073 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitAtRandom) {
01074 
01075     CRef<CBl2Seq> blaster =
01076         s_SetupWithMultipleQueriesAndSubjects(true, true, eTblastx);
01077 
01078     int num_callbacks_executed(0);
01079     TInterruptFnPtr fnptr =
01080         blaster->SetInterruptCallback(callback_counter,
01081                                       (void*) & num_callbacks_executed);
01082     BOOST_REQUIRE(fnptr == NULL);
01083 
01084     TSeqAlignVector sav(blaster->Run()); // won't throw
01085     CRandom r(time(0));
01086     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01087     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01088 
01089     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01090                                           (void*)&progress_pair);
01091     BOOST_REQUIRE(fnptr == callback_counter);
01092     sav.clear();
01093 
01094     try { sav = blaster->Run(); }
01095     catch (...) {
01096         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01097     }
01098 }
01099 
01100 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAfter3Callbacks) {
01101     CSeq_id id("gi|129295");
01102     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
01103 
01104     CBl2Seq blaster(*sl, *sl, eBlastp);
01105     TInterruptFnPtr fnptr =
01106         blaster.SetInterruptCallback(interrupt_after3calls);
01107     BOOST_REQUIRE(fnptr == NULL);
01108 
01109     TSeqAlignVector sav;
01110     try { sav = blaster.Run(); }
01111     catch (...) {
01112         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01113     }
01114 }
01115 
01116 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitOnTraceback) {
01117 
01118     CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects(true,
01119                                                                   false,
01120                                                                   eBlastx);
01121     TInterruptFnPtr fnptr =
01122         blaster->SetInterruptCallback(interrupt_on_traceback);
01123     BOOST_REQUIRE(fnptr == NULL);
01124 
01125     TSeqAlignVector sav;
01126     try { sav = blaster->Run(); }
01127     catch (...) {
01128         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01129     }
01130 }
01131 
01132 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitOnTraceback) {
01133 
01134     CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects
01135         (true, true, eTblastx);
01136     TInterruptFnPtr fnptr =
01137         blaster->SetInterruptCallback(interrupt_on_traceback);
01138     BOOST_REQUIRE(fnptr == NULL);
01139 
01140     TSeqAlignVector sav;
01141     try { sav = blaster->Run(); }
01142     catch (...) {
01143         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01144     }
01145 }
01146 
01147 BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueriesWithInvalidSeqId) {
01148     vector<int> q_gis, s_gis;
01149 
01150     // Setup the queries
01151     q_gis.push_back(129295);
01152     q_gis.push_back(-1);        // invalid seqid
01153 
01154     // setup the subjects
01155     s_gis.push_back(129295);
01156     s_gis.push_back(4336138);   // no hits with gi 129295
01157 
01158     TSeqLocVector queries;
01159     ITERATE(vector<int>, itr, q_gis) {
01160         CRef<CSeq_loc> loc(new CSeq_loc());
01161         loc->SetWhole().SetGi(*itr);
01162 
01163         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
01164         scope->AddDefaults();
01165         queries.push_back(SSeqLoc(loc, scope));
01166     }
01167 
01168     TSeqLocVector subjects;
01169     ITERATE(vector<int>, itr, s_gis) {
01170         CRef<CSeq_loc> loc(new CSeq_loc());
01171         loc->SetWhole().SetGi(*itr);
01172 
01173         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
01174         scope->AddDefaults();
01175         subjects.push_back(SSeqLoc(loc, scope));
01176     }
01177 
01178     // BLAST by concatenating all queries
01179     CBl2Seq blaster4all(queries, subjects, eBlastp);
01180     TSeqAlignVector sas_v = blaster4all.Run(); 
01181 
01182     TSearchMessages m;
01183     blaster4all.GetMessages(m);
01184     BOOST_REQUIRE_EQUAL(subjects.size()*queries.size(), sas_v.size());
01185     BOOST_REQUIRE_EQUAL(queries.size(), m.size()); 
01186 
01187     BOOST_REQUIRE(m[0].empty());
01188     BOOST_REQUIRE(!m[1].empty());
01189 
01190     // Verify the error message
01191     TQueryMessages qm = m[1];
01192     BOOST_REQUIRE(qm.front()->GetMessage().find("Cannot resolve") !=
01193                    string::npos);
01194 
01195     // Verify that the alignments corresponding to the 2nd query are indeed empty
01196     // in older version this was sas_v[1], order has changed
01197     BOOST_REQUIRE_EQUAL(0, (int) sas_v[2]->Size());
01198 }
01199 
01200 BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueriesWithInvalidSeqId) {
01201     CSeq_id id1(CSeq_id::e_Gi, 555);
01202     auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(id1));
01203     CSeq_id id2(CSeq_id::e_Gi, 556);
01204     auto_ptr<SSeqLoc> sl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
01205 
01206     const TSeqPos kFakeBioseqLength = 12;
01207     const char byte(0);   // string of 4 A's in ncbi2na
01208     vector<char> na_data(kFakeBioseqLength/4, byte);
01209 
01210     CRef<CSeq_id> fake_id(new CSeq_id("lcl|77"));
01211     CBioseq fake_bioseq;
01212     fake_bioseq.SetInst().SetLength(kFakeBioseqLength);
01213     fake_bioseq.SetInst().SetSeq_data().SetNcbi2na().Set().swap(na_data);
01214     fake_bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
01215     fake_bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
01216     fake_bioseq.SetId().push_back(fake_id);
01217     CRef<CSeq_loc> fake_loc(new CSeq_loc);
01218     fake_loc->SetWhole(*fake_id);
01219 
01220     CRef<CScope> scope(CSimpleOM::NewScope(false));
01221     scope->AddBioseq(fake_bioseq);
01222     auto_ptr<SSeqLoc> sl_bad(new SSeqLoc(*fake_loc, *scope));
01223 
01224     TSeqPos len = sequence::GetLength(*sl_bad->seqloc, sl_bad->scope);
01225     BOOST_REQUIRE_EQUAL(kFakeBioseqLength, len);
01226 
01227     TSeqLocVector queries;
01228     queries.push_back(*sl1);
01229     queries.push_back(*sl_bad);
01230     queries.push_back(*sl2);
01231 
01232     // All subjects have matches against this gi
01233     CSeq_id subj_id(CSeq_id::e_Gi, 555);
01234     auto_ptr<SSeqLoc> subj_loc
01235         (CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
01236     TSeqLocVector subject;
01237     subject.push_back(*subj_loc);;
01238     
01239     CBlastNucleotideOptionsHandle opts_handle;
01240     opts_handle.SetMaskAtHash(false);
01241     CBl2Seq bl2seq(queries, subject, opts_handle);
01242     TSeqAlignVector sas_v = bl2seq.Run(); 
01243     TSearchMessages m;
01244     bl2seq.GetMessages(m);
01245     BOOST_REQUIRE_EQUAL(sas_v.size(), m.size());
01246     BOOST_REQUIRE_EQUAL(queries.size(), sas_v.size());
01247 
01248     BOOST_REQUIRE(m[0].empty());
01249     BOOST_REQUIRE(!m[1].empty());
01250     BOOST_REQUIRE(m[2].empty());
01251 
01252     TQueryMessages qm = m[1];
01253 
01254     // no duplicate messages for the contexts
01255     BOOST_REQUIRE(qm.size() == 1); 
01256 
01257     // Verify the error message
01258     ITERATE(TQueryMessages, itr, qm) {
01259         BOOST_REQUIRE((*itr)->GetMessage().find("Could not calculate "
01260                                                  "ungapped Karlin-Altschul "
01261                                                  "parameters") 
01262                        != string::npos);
01263     }
01264 
01265     // Verify that the alignments corresponding to the 2nd query are indeed
01266     // empty
01267     ITERATE(CSeq_align_set::Tdata, alignments, sas_v[1]->Get()) {
01268         BOOST_REQUIRE((*alignments)->GetSegs().IsDisc());
01269         BOOST_REQUIRE((*alignments)->GetSegs().GetDisc().Get().empty());
01270     }
01271 }
01272 
01273 BOOST_AUTO_TEST_CASE(ProteinSelfHitWithMask) {
01274     CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 129295));
01275     CRef<CSeq_loc> sl(new CSeq_loc());
01276     sl->SetWhole(*id);
01277     CRef<CSeq_loc> mask(new CSeq_loc(*id, 50, 100));
01278     CRef<CScope> scope(CSimpleOM::NewScope());
01279     SSeqLoc seqloc(sl, scope, mask);
01280 
01281     CBl2Seq bl2seq(seqloc, seqloc, eBlastp);
01282     TSeqAlignVector sav(bl2seq.Run());
01283     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01284     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01285 }
01286 
01287 // Inspired by SB-285
01288 BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation) {
01289     CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 83219349));
01290     CRef<CSeq_loc> sl(new CSeq_loc());
01291     sl->SetWhole(*id);
01292     CRef<CSeq_loc> mask(new CSeq_loc(*id, 57, 484));
01293     CRef<CScope> scope(CSimpleOM::NewScope());
01294     SSeqLoc query_seqloc(sl, scope, mask);
01295 
01296     CRef<CSeq_id> sid(new CSeq_id(CSeq_id::e_Gi, 88954065));
01297     CRef<CSeq_loc> ssl(new CSeq_loc(*sid, 9909580-100, 9909607+100));
01298     SSeqLoc subj_seqloc(ssl, scope);
01299 
01300     CBl2Seq bl2seq(query_seqloc, subj_seqloc, eMegablast);
01301     TSeqAlignVector sav(bl2seq.Run());
01302     BOOST_REQUIRE_EQUAL(0, sav[0]->Get().size());
01303 }
01304 
01305 // Inspired by SB-285
01306 BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation_FromFile) {
01307     CNcbiIfstream infile("data/masked.fsa");
01308     const bool is_protein(false);
01309     CBlastInputSourceConfig iconfig(is_protein);
01310     iconfig.SetLowercaseMask(true);
01311     CRef<CBlastFastaInputSource> fasta_src
01312         (new CBlastFastaInputSource(infile, iconfig));
01313     CRef<CBlastInput> input(new CBlastInput(&*fasta_src));
01314     //CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
01315     //scope->AddDefaults();
01316     CRef<CScope> scope = CBlastScopeSource(is_protein).NewScope();
01317 
01318     CRef<blast::CBlastQueryVector> seqs = input->GetNextSeqBatch(*scope);
01319     CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*seqs));
01320 
01321     TSeqLocVector subj_vec;
01322     CRef<CSeq_id> sid(new CSeq_id(CSeq_id::e_Gi, 88954065));
01323     CRef<CSeq_loc> ssl(new CSeq_loc(*sid, 9909580-100, 9909607+100));
01324     subj_vec.push_back(SSeqLoc(ssl, scope));
01325     CRef<IQueryFactory> subj_qf(new CObjMgr_QueryFactory(subj_vec));
01326     CRef<CBlastOptionsHandle>
01327         opts_handle(CBlastOptionsFactory::Create(eBlastn));
01328     CRef<CLocalDbAdapter> subjects(new CLocalDbAdapter(subj_qf,
01329                                                        opts_handle));
01330 
01331     size_t num_queries = seqs->Size();
01332     size_t num_subjects = subj_vec.size();
01333     BOOST_REQUIRE_EQUAL((size_t)1, num_queries);
01334     BOOST_REQUIRE_EQUAL((size_t)1, num_subjects);
01335 
01336     // BLAST by concatenating all queries
01337     CLocalBlast blaster(queries, opts_handle, subjects);
01338     CRef<CSearchResultSet> results = blaster.Run();
01339     BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
01340     BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
01341                         results->GetNumResults());
01342     BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->size());
01343     BOOST_REQUIRE_EQUAL(num_queries, results->GetNumQueries());
01344     BOOST_REQUIRE_EQUAL(num_subjects,
01345                         results->GetNumResults()/results->GetNumQueries());
01346 
01347     CSearchResults& res = (*results)[0];
01348     BOOST_REQUIRE(res.HasAlignments() == false);
01349 }
01350 
01351 // test for the case where the use of composition based
01352 // satistics should have deleted a hit but did not (used to crash)
01353 BOOST_AUTO_TEST_CASE(ProteinCompBasedStats) {
01354 
01355     CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
01356     CRef<CScope> scope(new CScope(*kObjMgr));
01357     CRef<CSeq_entry> seq_entry1;
01358     const string kFileName("data/blastp_compstats.fa");
01359     ifstream in1(kFileName.c_str());
01360     if ( !in1 )
01361         throw runtime_error("Failed to open " + kFileName);
01362     if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
01363         throw runtime_error("Failed to read sequence from " + kFileName);
01364     scope->AddTopLevelSeqEntry(*seq_entry1);
01365     CRef<CSeq_loc> seqloc1(new CSeq_loc);
01366     const string kSeqIdString1("lcl|1");
01367     CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
01368     seqloc1->SetWhole(*id1);
01369     SSeqLoc ss1(seqloc1, scope);
01370 
01371     CSeq_id id("gi|4503637");
01372     auto_ptr<SSeqLoc> ss2(CTestObjMgr::Instance().CreateSSeqLoc(id));
01373 
01374     CBlastProteinOptionsHandle opts_handle;
01375     opts_handle.SetWordSize(2);
01376     opts_handle.SetEvalueThreshold(20000);
01377     opts_handle.SetFilterString("F");/* NCBI_FAKE_WARNING */
01378     opts_handle.SetMatrixName("PAM30");
01379     opts_handle.SetGapOpeningCost(9);
01380     opts_handle.SetGapExtensionCost(1);
01381     opts_handle.SetOptions().SetCompositionBasedStats(
01382                                           eCompositionBasedStats);
01383 
01384     CBl2Seq blaster(ss1, *ss2, opts_handle);
01385     TSeqAlignVector sav(blaster.Run());
01386     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01387     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01388 }
01389 
01390 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryBothStrands) {
01391     CSeq_id qid("gi|555");
01392     auto_ptr<SSeqLoc> query(
01393         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01394     query->genetic_code_id = 1;
01395 
01396     CSeq_id sid("gi|129295");
01397     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01398 
01399     CBl2Seq blaster(*query, *subj, eBlastx);
01400     TSeqAlignVector sav(blaster.Run());
01401     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01402     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetStd().size());
01403     testBlastHitCounts(blaster, eBlastx_555_129295);
01404     testRawCutoffs(blaster, eBlastx, eBlastx_555_129295);
01405 }
01406 
01407 BOOST_AUTO_TEST_CASE(NucleotideSelfHitWithSubjectMask) {
01408     CRef<CSeq_id> query_id(new CSeq_id(CSeq_id::e_Gi, 148727250));
01409     CRef<CSeq_id> subj_id(new CSeq_id(CSeq_id::e_Gi, 89059606));
01410     CRef<CSeq_loc> qsl(new CSeq_loc(*query_id, 0, 1000));
01411     CRef<CSeq_loc> ssl(new CSeq_loc(*subj_id, 0, 1000));
01412     CPacked_seqint::TRanges mask_vector;
01413     mask_vector.push_back(TSeqRange(0, 44));
01414     mask_vector.push_back(TSeqRange(69, 582));
01415     mask_vector.push_back(TSeqRange(610, 834));
01416     mask_vector.push_back(TSeqRange(854, 1000));
01417     CRef<CPacked_seqint> masks(new CPacked_seqint(*subj_id,
01418                                                   mask_vector));
01419     CRef<CSeq_loc> subj_mask(new CSeq_loc());
01420     subj_mask->SetPacked_int(*masks);
01421     CRef<CScope> scope(CSimpleOM::NewScope());
01422     SSeqLoc query(qsl, scope);
01423     auto_ptr<SSeqLoc> subject(new SSeqLoc(ssl, scope, subj_mask));
01424     {
01425         CBl2Seq bl2seq(query, *subject, eBlastn);
01426         TSeqAlignVector sav(bl2seq.Run());
01427         BOOST_REQUIRE_EQUAL((size_t)1, sav.front()->Get().size());
01428     }
01429 
01430     // Now compare the same sequences, without the subject masks
01431     subject.reset(new SSeqLoc(ssl, scope));
01432     {
01433         CBl2Seq bl2seq(query, *subject, eBlastn);
01434         TSeqAlignVector sav(bl2seq.Run());
01435         BOOST_REQUIRE_EQUAL((size_t)4, sav.front()->Get().size());
01436     }
01437 }
01438 
01439 BOOST_AUTO_TEST_CASE(NucleotideBlastSelfHit) {
01440     CSeq_id id("gi|555");
01441     auto_ptr<SSeqLoc> sl(
01442         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
01443 
01444     // Traditional blastn search
01445     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastn));
01446     CBl2Seq blaster(*sl, *sl, *opts);
01447     TSeqAlignVector sav = blaster.Run();
01448     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01449     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01450     testBlastHitCounts(blaster, eBlastn_555_555);
01451     testRawCutoffs(blaster, eBlastn, eBlastn_555_555);
01452 
01453     // Change the options to megablast
01454     opts.Reset(CBlastOptionsFactory::Create(eMegablast));
01455     blaster.SetOptionsHandle() = *opts;
01456     sav = blaster.Run();
01457     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
01458     sar = *(sav[0]->Get().begin());
01459     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01460     testBlastHitCounts(blaster, eMegablast_555_555);
01461     testRawCutoffs(blaster, eMegablast, eMegablast_555_555);
01462 
01463     // Change the options to discontiguous megablast
01464     opts.Reset(CBlastOptionsFactory::Create(eDiscMegablast));
01465     blaster.SetOptionsHandle() = *opts;
01466     sav = blaster.Run();
01467     sar = *(sav[0]->Get().begin());
01468     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01469     testBlastHitCounts(blaster, eDiscMegablast_555_555);
01470     testRawCutoffs(blaster, eDiscMegablast, eDiscMegablast_555_555);
01471 }
01472 
01473 BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback) {
01474     CSeq_id query_id("gi|2655203");
01475     auto_ptr<SSeqLoc> ql(
01476         CTestObjMgr::Instance().CreateSSeqLoc(query_id, 
01477                                               eNa_strand_plus));
01478 
01479     CSeq_id subject_id("gi|200811");
01480     auto_ptr<SSeqLoc> sl(
01481         CTestObjMgr::Instance().CreateSSeqLoc(subject_id, 
01482                                               eNa_strand_minus));
01483 
01484     // test a fix for a bug that corrupted the traceback
01485     // in the one hit returned from this search
01486 
01487     CBlastNucleotideOptionsHandle opts;
01488     opts.SetTraditionalMegablastDefaults();
01489     opts.SetMatchReward(1);
01490     opts.SetMismatchPenalty(-2);
01491     opts.SetGapOpeningCost(3);
01492     opts.SetGapExtensionCost(1);
01493     opts.SetWordSize(24);
01494     opts.SetGapExtnAlgorithm(eGreedyScoreOnly);
01495     opts.SetGapTracebackAlgorithm(eGreedyTbck);
01496 
01497     CBl2Seq blaster(*ql, *sl, opts);
01498     blaster.RunWithoutSeqalignGeneration(); /* NCBI_FAKE_WARNING */
01499     BlastHSPResults *results = blaster.GetResults(); /* NCBI_FAKE_WARNING */
01500     BlastHSPList *hsplist = results->hitlist_array[0]->hsplist_array[0];
01501     BOOST_REQUIRE_EQUAL(1, hsplist->hspcnt);
01502     BlastHSP *hsp = hsplist->hsp_array[0];
01503     BOOST_REQUIRE_EQUAL(832, hsp->score);
01504 }
01505 
01506 
01507 BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback2) {
01508     CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
01509     CRef<CScope> scope(new CScope(*kObjMgr));
01510 
01511     CRef<CSeq_entry> seq_entry1;
01512     ifstream in1("data/greedy1a.fsa");
01513     if ( !in1 )
01514         throw runtime_error("Failed to open file1");
01515     if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
01516         throw runtime_error("Failed to read sequence from file1");
01517     scope->AddTopLevelSeqEntry(*seq_entry1);
01518     CRef<CSeq_loc> seqloc1(new CSeq_loc);
01519     const string kSeqIdString1("lcl|1");
01520     CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
01521     seqloc1->SetWhole(*id1);
01522     SSeqLoc ss1(seqloc1, scope);
01523 
01524     CRef<CSeq_entry> seq_entry2;
01525     ifstream in2("data/greedy1b.fsa");
01526     if ( !in2 )
01527         throw runtime_error("Failed to open file2");
01528     if ( !(seq_entry2 = CFastaReader(in2).ReadOneSeq()))
01529         throw runtime_error("Failed to read sequence from file2");
01530     scope->AddTopLevelSeqEntry(*seq_entry2);
01531     CRef<CSeq_loc> seqloc2(new CSeq_loc);
01532     const string kSeqIdString2("lcl|2");
01533     CRef<CSeq_id> id2(new CSeq_id(kSeqIdString2));
01534     seqloc2->SetWhole(*id2);
01535     SSeqLoc ss2(seqloc2, scope);
01536 
01537     CBlastNucleotideOptionsHandle handle;
01538     handle.SetGapOpeningCost(0);
01539     handle.SetGapExtensionCost(0);
01540     handle.SetDustFiltering(false);
01541 
01542     // test multiple bug fixes in greedy gapped alignment
01543 
01544     CBl2Seq blaster1(ss1, ss2, handle);
01545     TSeqAlignVector sav(blaster1.Run());
01546     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01547     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
01548 
01549     const CSeq_align& seqalign1 = *sar;
01550     BOOST_REQUIRE(seqalign1.IsSetScore());
01551     ITERATE(CSeq_align::TScore, itr, seqalign1.GetScore()) {
01552         BOOST_REQUIRE((*itr)->IsSetId());
01553         if ((*itr)->GetId().GetStr() == "score") {
01554             BOOST_REQUIRE_EQUAL(619, (*itr)->GetValue().GetInt());
01555             break;
01556         }
01557     }
01558 
01559     handle.SetMatchReward(10);
01560     handle.SetMismatchPenalty(-25);
01561     handle.SetGapXDropoff(100.0);
01562     handle.SetGapXDropoffFinal(100.0);
01563 
01564     CBl2Seq blaster2(ss1, ss2, handle);
01565     sav = blaster2.Run();
01566     sar = *(sav[0]->Get().begin());
01567     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
01568 
01569     const CSeq_align& seqalign2 = *sar;
01570     BOOST_REQUIRE(seqalign2.IsSetScore());
01571     ITERATE(CSeq_align::TScore, itr, seqalign2.GetScore()) {
01572         BOOST_REQUIRE((*itr)->IsSetId());
01573         if ((*itr)->GetId().GetStr() == "score") {
01574             BOOST_REQUIRE_EQUAL(6034, (*itr)->GetValue().GetInt());
01575             break;
01576         }
01577     }
01578 }
01579 
01580 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryPlusStrand) {
01581     CSeq_id qid("gi|555");
01582     auto_ptr<SSeqLoc> query(
01583         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_plus));
01584 
01585     CSeq_id sid("gi|129295");
01586     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01587 
01588     CBl2Seq blaster(*query, *subj, eBlastx);
01589     TSeqAlignVector sav(blaster.Run());
01590     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01591     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetStd().size());
01592 }
01593 
01594 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryMinusStrand) {
01595     CSeq_id qid("gi|555");
01596     auto_ptr<SSeqLoc> query(
01597         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_minus));
01598 
01599     CSeq_id sid("gi|129295");
01600     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01601 
01602     CBl2Seq blaster(*query, *subj, eBlastx);
01603     TSeqAlignVector sav(blaster.Run());
01604     // No hits.  Empty CSeq_align_set returned.
01605     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
01606 }
01607 
01608 
01609 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryBothStrands) {
01610     CSeq_id id("gi|555");
01611     auto_ptr<SSeqLoc> sl(
01612         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
01613 
01614     CBl2Seq blaster(*sl, *sl, eTblastx);
01615     TSeqAlignVector sav(blaster.Run());
01616     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01617     BOOST_REQUIRE_EQUAL(39, (int)sar->GetSegs().GetStd().size());
01618     testBlastHitCounts(blaster, eTblastx_555_555);
01619     testRawCutoffs(blaster, eTblastx, eTblastx_555_555);
01620 }
01621 
01622 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryPlusStrand) {
01623     CSeq_id id("gi|555");
01624     auto_ptr<SSeqLoc> sl(
01625         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_plus));
01626 
01627     CBl2Seq blaster(*sl, *sl, eTblastx);
01628     TSeqAlignVector sav(blaster.Run());
01629     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01630     BOOST_REQUIRE_EQUAL(11, (int)sar->GetSegs().GetStd().size());
01631 }
01632 
01633 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryMinusStrand) {
01634     CSeq_id id("gi|555");
01635     auto_ptr<SSeqLoc> sl(
01636         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_minus));
01637 
01638     CBl2Seq blaster(*sl, *sl, eTblastx);
01639     TSeqAlignVector sav(blaster.Run());
01640     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01641     BOOST_REQUIRE_EQUAL(12, (int)sar->GetSegs().GetStd().size());
01642 }
01643 
01644 
01645 BOOST_AUTO_TEST_CASE(TblastxManyHits) {
01646     const int total_num_hsps = 50;
01647     const int num_hsps_to_check = 8;
01648     const int score_array[num_hsps_to_check] = 
01649         { 947, 125, 820, 113, 624, 221, 39, 778};
01650     const int sum_n_array[num_hsps_to_check] = 
01651         { 2, 2, 2, 2, 3, 3, 3, 0};
01652     CSeq_id qid("gi|24719404");
01653     auto_ptr<SSeqLoc> qsl(
01654         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01655     CSeq_id sid("gi|29807292");
01656     pair<TSeqPos, TSeqPos> range(15185000, 15195000);
01657     auto_ptr<SSeqLoc> ssl(
01658         CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
01659     CBl2Seq blaster(*qsl, *ssl, eTblastx);
01660     blaster.SetOptionsHandle().SetMaxNumHspPerSequence(total_num_hsps);
01661 
01662     TSeqAlignVector sav(blaster.Run());
01663 
01664     testBlastHitCounts(blaster, eTblastx_many_hits);
01665     testRawCutoffs(blaster, eTblastx, eTblastx_many_hits);
01666 
01667     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01668     list< CRef<CStd_seg> >& segs = sar->SetSegs().SetStd();
01669     BOOST_REQUIRE_EQUAL(total_num_hsps, (int)segs.size());
01670     int index = 0;
01671     ITERATE(list< CRef<CStd_seg> >, itr, segs) {
01672         const vector< CRef< CScore > >& score_v = (*itr)->GetScores();
01673         ITERATE(CSeq_align::TScore, sitr, score_v) {
01674             BOOST_REQUIRE((*sitr)->IsSetId());
01675             if ((*sitr)->GetId().GetStr() == "score") {
01676                 BOOST_REQUIRE_EQUAL(score_array[index], 
01677                                      (*sitr)->GetValue().GetInt());
01678             } else if ((*sitr)->GetId().GetStr() == "sum_n") {
01679                 BOOST_REQUIRE_EQUAL(sum_n_array[index], 
01680                                      (*sitr)->GetValue().GetInt());
01681             }
01682         }
01683         if (++index == num_hsps_to_check)
01684             break;
01685     }
01686 }
01687 
01688 BOOST_AUTO_TEST_CASE(ProteinBlast2Seqs) {
01689     CSeq_id id("gi|129295");
01690     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(id));
01691 
01692     id.SetGi(7662354);
01693     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(id));
01694 
01695     CBl2Seq blaster(*query, *subj, eBlastp);
01696     TSeqAlignVector sav(blaster.Run());
01697     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01698     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01699     testBlastHitCounts(blaster, eBlastp_129295_7662354);
01700     testRawCutoffs(blaster, eBlastp, eBlastp_129295_7662354);
01701 }
01702 
01703 BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering_InvalidDB) {
01704     CSeq_id qid("gi|555");
01705     auto_ptr<SSeqLoc> query(
01706         CTestObjMgr::Instance().CreateSSeqLoc(qid));
01707 
01708     CBlastNucleotideOptionsHandle opts;
01709     opts.SetTraditionalMegablastDefaults();
01710     const string kRepeatDb("junk");
01711     opts.SetRepeatFilteringDB(kRepeatDb.c_str());
01712     bool is_repeat_filtering_on = opts.GetRepeatFiltering();
01713     BOOST_REQUIRE(is_repeat_filtering_on);
01714     string repeat_db(opts.GetRepeatFilteringDB() 
01715                      ? opts.GetRepeatFilteringDB()
01716                      : kEmptyStr);
01717     BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
01718 
01719     CBl2Seq blaster(*query, *query, opts);
01720     try {
01721         TSeqAlignVector sav(blaster.Run());
01722         CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01723         BOOST_REQUIRE(sar.NotEmpty());
01724         BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01725     } catch (const CBlastException& e) {
01726         BOOST_REQUIRE(e.GetErrCode() == CBlastException::eSeqSrcInit);
01727     }
01728 }
01729 
01730 BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering) {
01731     CSeq_id qid("gi|555");
01732     auto_ptr<SSeqLoc> query(
01733         CTestObjMgr::Instance().CreateSSeqLoc(qid));
01734 
01735     CBlastNucleotideOptionsHandle opts;
01736     opts.SetTraditionalMegablastDefaults();
01737     opts.SetRepeatFiltering(true);
01738     string repeat_db(opts.GetRepeatFilteringDB() 
01739                      ? opts.GetRepeatFilteringDB()
01740                      : kEmptyStr);
01741     BOOST_REQUIRE_EQUAL(string(kDefaultRepeatFilterDb), repeat_db);
01742     // it's harmless to set them both, but only the latter one will be used
01743     const string kRepeatDb("repeat/repeat_9606");
01744     opts.SetRepeatFilteringDB(kRepeatDb.c_str());
01745     repeat_db.assign(opts.GetRepeatFilteringDB() 
01746                      ? opts.GetRepeatFilteringDB()
01747                      : kEmptyStr);
01748     BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
01749 
01750     bool is_repeat_filtering_on = opts.GetRepeatFiltering();
01751     BOOST_REQUIRE(is_repeat_filtering_on);
01752 
01753     CBl2Seq blaster(*query, *query, opts);
01754     TSeqAlignVector sav(blaster.Run());
01755     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01756     BOOST_REQUIRE(sar.NotEmpty());
01757     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01758 }
01759 
01760 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Db) {
01761     CSeq_id qid("gi|555");
01762     auto_ptr<SSeqLoc> query(
01763         CTestObjMgr::Instance().CreateSSeqLoc(qid));
01764 
01765     CBlastNucleotideOptionsHandle opts;
01766     opts.SetTraditionalMegablastDefaults();
01767     const string kWindowMaskerDb("9606");
01768     opts.SetWindowMaskerDatabase(kWindowMaskerDb.c_str());
01769     string wmdb(opts.GetWindowMaskerDatabase()
01770                 ? opts.GetWindowMaskerDatabase() : kEmptyStr);
01771     BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
01772     BOOST_REQUIRE_EQUAL(0, opts.GetWindowMaskerTaxId());
01773     CBl2Seq blaster(*query, *query, opts);
01774     TSeqAlignVector sav(blaster.Run());
01775     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01776     BOOST_REQUIRE(sar.NotEmpty());
01777     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01778 }
01779 
01780 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Taxid) {
01781     CSeq_id qid("gi|555");
01782     auto_ptr<SSeqLoc> query(
01783         CTestObjMgr::Instance().CreateSSeqLoc(qid));
01784 
01785     CBlastNucleotideOptionsHandle opts;
01786     opts.SetTraditionalMegablastDefaults();
01787     opts.SetWindowMaskerTaxId(9606);
01788     CBl2Seq blaster(*query, *query, opts);
01789     TSeqAlignVector sav(blaster.Run());
01790     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01791     BOOST_REQUIRE(sar.NotEmpty());
01792     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01793 }
01794 
01795 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidDb) {
01796     CSeq_id qid("gi|555");
01797     auto_ptr<SSeqLoc> query(
01798         CTestObjMgr::Instance().CreateSSeqLoc(qid));
01799 
01800     CBlastNucleotideOptionsHandle opts;
01801     opts.SetTraditionalMegablastDefaults();
01802     const string kWindowMaskerDb("Dummydb");
01803     opts.SetWindowMaskerDatabase(kWindowMaskerDb.c_str());
01804     string wmdb(opts.GetWindowMaskerDatabase()
01805                 ? opts.GetWindowMaskerDatabase() : kEmptyStr);
01806     BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
01807     CBl2Seq blaster(*query, *query, opts);
01808     TSeqAlignVector sav(blaster.Run());
01809     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01810     BOOST_REQUIRE(sar.NotEmpty());
01811     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() == 1);
01812 }
01813 
01814 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidTaxid) {
01815     CSeq_id qid("gi|555");
01816     auto_ptr<SSeqLoc> query(
01817         CTestObjMgr::Instance().CreateSSeqLoc(qid));
01818 
01819     CBlastNucleotideOptionsHandle opts;
01820     opts.SetTraditionalMegablastDefaults();
01821     const int kInvalidTaxId = -1;
01822     opts.SetWindowMaskerTaxId(kInvalidTaxId);
01823     BOOST_REQUIRE_EQUAL(kInvalidTaxId, opts.GetWindowMaskerTaxId());
01824     CBl2Seq blaster(*query, *query, opts);
01825     TSeqAlignVector sav(blaster.Run());
01826     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01827     BOOST_REQUIRE(sar.NotEmpty());
01828     // find self hit, silently ignoring the failed filtering
01829     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() == 1);
01830 }
01831 
01832 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_DbAndTaxid) {
01833     CSeq_id qid("gi|555");
01834     auto_ptr<SSeqLoc> query(
01835         CTestObjMgr::Instance().CreateSSeqLoc(qid));
01836 
01837     CBlastNucleotideOptionsHandle opts;
01838     opts.SetTraditionalMegablastDefaults();
01839     // if both are set, the database name will be given preference
01840     opts.SetWindowMaskerDatabase("9606");
01841     opts.SetWindowMaskerTaxId(-1);
01842     CBl2Seq blaster(*query, *query, opts);
01843     TSeqAlignVector sav(blaster.Run());
01844     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01845     BOOST_REQUIRE(sar.NotEmpty());
01846     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01847 }
01848 
01849 // Bug report from Alex Astashyn
01850 BOOST_AUTO_TEST_CASE(Alex) {
01851     CSeq_id qid("NG_007092.2");
01852     TSeqRange qr(0, 2311633);
01853     auto_ptr<SSeqLoc> query(
01854         CTestObjMgr::Instance().CreateSSeqLoc(qid, qr, eNa_strand_plus));
01855 
01856     CSeq_id sid("NT_007914.14");
01857     TSeqRange sr(5233652, 9849919);
01858     auto_ptr<SSeqLoc> subj(
01859         CTestObjMgr::Instance().CreateSSeqLoc(sid, sr));
01860 
01861     CBlastNucleotideOptionsHandle opts;
01862     opts.SetTraditionalMegablastDefaults();
01863     opts.SetRepeatFiltering(true);
01864     CBl2Seq blaster(*query, *subj, opts);
01865     TSeqAlignVector sav(blaster.Run());
01866     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01867     BOOST_REQUIRE(sar.NotEmpty());
01868     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01869 }
01870 
01871 BOOST_AUTO_TEST_CASE(NucleotideBlast2Seqs) {
01872     CSeq_id qid("gi|555");
01873     auto_ptr<SSeqLoc> query(
01874         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01875 
01876     CSeq_id sid("gi|3090");
01877     auto_ptr<SSeqLoc> subj(
01878         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
01879 
01880     CBlastNucleotideOptionsHandle opts;
01881     opts.SetTraditionalBlastnDefaults();
01882     CBl2Seq blaster(*query, *subj, opts);
01883     TSeqAlignVector sav(blaster.Run());
01884     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01885     BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
01886     testBlastHitCounts(blaster, eBlastn_555_3090);
01887     testRawCutoffs(blaster, eBlastn, eBlastn_555_3090);
01888 }
01889 
01890 BOOST_AUTO_TEST_CASE(ProteinBlastChangeQuery) {
01891     CSeq_id id("gi|129295");
01892     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(id));
01893 
01894     id.SetGi(7662354);
01895     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(id));
01896 
01897     // Run self hit first
01898     CBl2Seq blaster(*subj, *subj, eBlastp);
01899     TSeqAlignVector sav(blaster.Run());
01900     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01901     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01902 
01903     // Change the query sequence (recreates the lookup table)
01904     blaster.SetQuery(*query);
01905     sav = blaster.Run();
01906     sar = *(sav[0]->Get().begin());
01907     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01908 }
01909 
01910 BOOST_AUTO_TEST_CASE(ProteinBlastChangeSubject) {
01911     CSeq_id qid("gi|129295");
01912     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
01913 
01914     CSeq_id sid("gi|7662354");
01915     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01916 
01917     // Run self hit first
01918     CBl2Seq blaster(*query, *query, eBlastp);
01919     TSeqAlignVector sav(blaster.Run());
01920     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01921     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01922 
01923     // Change the subject sequence
01924     blaster.SetSubject(*subj);
01925     sav = blaster.Run();
01926     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
01927     sar = *(sav[0]->Get().begin());
01928     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01929 }
01930 
01931 BOOST_AUTO_TEST_CASE(NucleotideBlastChangeQuery) {
01932     CSeq_id qid("gi|555");
01933     auto_ptr<SSeqLoc> query(
01934         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01935 
01936     CSeq_id sid("gi|3090");
01937     auto_ptr<SSeqLoc> subj(
01938         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
01939 
01940     // Run self hit first
01941     CBlastNucleotideOptionsHandle opts;
01942     opts.SetTraditionalBlastnDefaults();
01943     CBl2Seq blaster(*subj, *subj, opts);
01944     TSeqAlignVector sav(blaster.Run());
01945     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01946     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01947 
01948     // Change the query sequence (recreates the lookup table)
01949     blaster.SetQuery(*query);
01950     sav = blaster.Run();
01951     BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
01952     sar = *(sav[0]->Get().begin());
01953     BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
01954 }
01955 
01956 BOOST_AUTO_TEST_CASE(NucleotideBlastChangeSubject) {
01957     CSeq_id qid("gi|555");
01958     auto_ptr<SSeqLoc> query(
01959         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01960 
01961     CSeq_id sid("gi|3090");
01962     auto_ptr<SSeqLoc> subj(
01963         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
01964 
01965     // Run self hit first
01966     CBlastNucleotideOptionsHandle opts;
01967     opts.SetTraditionalBlastnDefaults();
01968     CBl2Seq blaster(*query, *query, opts);
01969     TSeqAlignVector sav(blaster.Run());
01970     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01971     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01972 
01973     // Change the subject sequence
01974     blaster.SetSubject(*subj);
01975     sav = blaster.Run();
01976     sar = *(sav[0]->Get().begin());
01977     BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
01978 }
01979 
01980 
01981 BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueries) {
01982     TSeqLocVector sequences;
01983 
01984     CSeq_id qid("gi|129295");
01985     auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(qid));
01986     sequences.push_back(*sl1);
01987 
01988     CSeq_id sid("gi|7662354");
01989     auto_ptr<SSeqLoc> sl2(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01990     sequences.push_back(*sl2);
01991 
01992     CBl2Seq blaster(sequences, sequences, eBlastp);
01993     TSeqAlignVector seqalign_v = blaster.Run();
01994 
01995     BOOST_REQUIRE_EQUAL(4, (int)seqalign_v.size());
01996     BOOST_REQUIRE_EQUAL(2, (int)sequences.size());
01997 
01998     CRef<CSeq_align> sar;
01999     
02000     BOOST_REQUIRE_EQUAL(1, seqalign_v[0]->Get().size());
02001     sar = *(seqalign_v[0]->Get().begin());
02002     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02003 
02004     BOOST_REQUIRE_EQUAL(2, seqalign_v[1]->Get().size());
02005     sar = *(seqalign_v[1]->Get().begin());
02006     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02007     sar = *(++(seqalign_v[1]->Get().begin()));
02008     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02009 
02010     BOOST_REQUIRE_EQUAL(2, seqalign_v[2]->Get().size());
02011     sar = *(seqalign_v[2]->Get().begin());
02012     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02013     sar = *(++(seqalign_v[2]->Get().begin()));
02014     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02015 
02016     BOOST_REQUIRE_EQUAL(1, seqalign_v[3]->Get().size());
02017     sar = *(seqalign_v[3]->Get().begin());
02018     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02019 
02020 
02021     /* DEBUG OUTPUT
02022     for (size_t i = 0; i < seqalign_v.size(); i++)
02023         cerr << "\n<" << i << ">\n"
02024             << MSerial_AsnText << seqalign_v[i].GetObject() << endl;
02025     */
02026     
02027     testBlastHitCounts(blaster, eBlastp_multi_q);
02028     testRawCutoffs(blaster, eBlastp, eBlastp_multi_q);
02029 
02030     // test the order of queries and subjects:
02031     testResultAlignments(sequences.size(), sequences.size(),
02032                             seqalign_v);
02033 }
02034 
02035 BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueries) {
02036     TSeqLocVector sequences;
02037 
02038     CSeq_id qid("gi|555");
02039     auto_ptr<SSeqLoc> sl1(
02040         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02041     sequences.push_back(*sl1);
02042     BOOST_REQUIRE(sl1->mask.Empty());
02043 
02044     CSeq_id sid("gi|3090");
02045     auto_ptr<SSeqLoc> sl2(
02046         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
02047     sequences.push_back(*sl2);
02048     BOOST_REQUIRE(sl2->mask.Empty());
02049 
02050     CBl2Seq blaster(sequences, sequences, eBlastn);
02051     TSeqAlignVector seqalign_v = blaster.Run();
02052     BOOST_REQUIRE_EQUAL(2, (int)sequences.size());
02053     BOOST_REQUIRE_EQUAL(4, (int)seqalign_v.size());
02054 
02055     CRef<CSeq_align> sar = *(seqalign_v[0]->Get().begin());
02056     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02057 
02058     // in older version this was seqalign_v[1], order has changed
02059     sar = *(seqalign_v[2]->Get().begin());
02060     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02061 
02062     testBlastHitCounts(blaster, eBlastn_multi_q);
02063     testRawCutoffs(blaster, eBlastn, eBlastn_multi_q);
02064 
02065     // test the order of queries and subjects:
02066     testResultAlignments(sequences.size(), sequences.size(),
02067                             seqalign_v);
02068 }
02069 
02070 void DoSearchWordSize4(const char *file1, const char *file2) {
02071     CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
02072     CRef<CScope> scope(new CScope(*kObjMgr));
02073 
02074     CRef<CSeq_entry> seq_entry1;
02075     ifstream in1(file1);
02076     if ( !in1 )
02077         throw runtime_error("Failed to open file1");
02078     if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
02079         throw runtime_error("Failed to read sequence from file1");
02080     scope->AddTopLevelSeqEntry(*seq_entry1);
02081     CRef<CSeq_loc> seqloc1(new CSeq_loc);
02082     const string kSeqIdString1("lcl|1");
02083     CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
02084     seqloc1->SetWhole(*id1);
02085     SSeqLoc ss1(seqloc1, scope);
02086 
02087     CRef<CSeq_entry> seq_entry2;
02088     ifstream in2(file2);
02089     if ( !in2 )
02090         throw runtime_error("Failed to open file2");
02091     if ( !(seq_entry2 = CFastaReader(in2).ReadOneSeq()))
02092         throw runtime_error("Failed to read sequence from file2");
02093     scope->AddTopLevelSeqEntry(*seq_entry2);
02094     CRef<CSeq_loc> seqloc2(new CSeq_loc);
02095     const string kSeqIdString2("lcl|2");
02096     CRef<CSeq_id> id2(new CSeq_id(kSeqIdString2));
02097     seqloc2->SetWhole(*id2);
02098     SSeqLoc ss2(seqloc2, scope);
02099 
02100     CBlastNucleotideOptionsHandle handle;
02101     handle.SetTraditionalBlastnDefaults();
02102     handle.SetWordSize(4);
02103     handle.SetDustFiltering(false);
02104     handle.SetMismatchPenalty(-1);
02105     handle.SetMatchReward(1);
02106     handle.SetEvalueThreshold(10000);
02107 
02108     CBl2Seq blaster(ss1, ss2, handle);
02109     blaster.RunWithoutSeqalignGeneration(); /* NCBI_FAKE_WARNING */
02110     BlastHSPResults *results = blaster.GetResults(); /* NCBI_FAKE_WARNING */
02111     BOOST_REQUIRE(results != NULL);
02112     BOOST_REQUIRE(results->hitlist_array[0] != NULL);
02113     BOOST_REQUIRE(results->hitlist_array[0]->hsplist_array[0] != NULL);
02114     BlastHSPList *hsp_list = results->hitlist_array[0]->hsplist_array[0];
02115     BOOST_REQUIRE(hsp_list->hspcnt > 0);
02116     BOOST_REQUIRE(hsp_list->hsp_array[0] != NULL);
02117 
02118     // verify that all hits are properly formed, and
02119     // at least as long as the word size
02120 
02121     for (int i = 0; i < hsp_list->hspcnt; i++) {
02122         BlastHSP *hsp = hsp_list->hsp_array[i];
02123         BOOST_REQUIRE(hsp != NULL);
02124         BOOST_REQUIRE(hsp->query.offset < hsp->query.end);
02125         BOOST_REQUIRE(hsp->subject.offset < hsp->subject.end);
02126         BOOST_REQUIRE(hsp->query.gapped_start >= hsp->query.offset &&
02127                        hsp->query.gapped_start < hsp->query.end);
02128         BOOST_REQUIRE(hsp->subject.gapped_start >= hsp->subject.offset &&
02129                        hsp->subject.gapped_start < hsp->subject.end);
02130         BOOST_REQUIRE(hsp->query.end - hsp->query.offset >= 4);
02131         BOOST_REQUIRE(hsp->subject.end - hsp->subject.offset >= 4);
02132     }
02133 }
02134 
02135 BOOST_AUTO_TEST_CASE(NucleotideBlastWordSize4) {
02136     DoSearchWordSize4("data/blastn_size4a.fsa",
02137                       "data/blastn_size4b.fsa");
02138 }
02139 
02140 // test bug fix when size-4 seed falls at the end of
02141 // the subject sequence
02142 BOOST_AUTO_TEST_CASE(NucleotideBlastWordSize4_EOS) {
02143     DoSearchWordSize4("data/blastn_size4c.fsa",
02144                       "data/blastn_size4d.fsa");
02145 }
02146 
02147 BOOST_AUTO_TEST_CASE(TblastnOutOfFrame) {
02148     CSeq_id qid("NP_647642.2"); // Protein sequence
02149     CSeq_id sid("BC042576.1");  // DNA sequence
02150 
02151     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
02152     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02153 
02154     // Set the options
02155     CTBlastnOptionsHandle opts;
02156     opts.SetOutOfFrameMode();
02157     opts.SetFrameShiftPenalty(10);
02158     opts.SetFilterString("m;L");/* NCBI_FAKE_WARNING */
02159     opts.SetEvalueThreshold(0.01);
02160     opts.SetCompositionBasedStats(eNoCompositionBasedStats);
02161 
02162     CBl2Seq blaster(*query, *subj, opts);
02163     TSeqAlignVector sav(blaster.Run());
02164     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
02165     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02166     BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
02167     testBlastHitCounts(blaster, eTblastn_oof);
02168     testRawCutoffs(blaster, eTblastn, eTblastn_oof);
02169 }
02170 
02171 // test multiple fixes for OOF alignments on both subject strands
02172 BOOST_AUTO_TEST_CASE(TblastnOutOfFrame2) {
02173     CSeq_id qid("gi|38111923"); // Protein sequence
02174     CSeq_id sid("gi|6648925");  // DNA sequence
02175 
02176     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
02177     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02178 
02179     // Set the options
02180     CTBlastnOptionsHandle opts;
02181     opts.SetOutOfFrameMode();
02182     opts.SetFrameShiftPenalty(5);
02183     opts.SetCompositionBasedStats(eNoCompositionBasedStats);
02184     opts.SetFilterString("L");/* NCBI_FAKE_WARNING */
02185 
02186     CBl2Seq blaster(*query, *subj, opts);
02187     TSeqAlignVector sav(blaster.Run());
02188     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02189     BOOST_REQUIRE_EQUAL(5, (int)sav[0]->Size());
02190 
02191     // test fix for a bug generating OOF traceback
02192 
02193     const CSeq_align& seqalign = *sar;
02194     BOOST_REQUIRE(seqalign.IsSetScore());
02195     ITERATE(CSeq_align::TScore, itr, seqalign.GetScore()) {
02196         BOOST_REQUIRE((*itr)->IsSetId());
02197         if ((*itr)->GetId().GetStr() == "num_ident") {
02198             BOOST_REQUIRE_EQUAL(55, (*itr)->GetValue().GetInt());
02199             break;
02200         }
02201     }
02202 }
02203 
02204 BOOST_AUTO_TEST_CASE(BlastxOutOfFrame) {
02205     CSeq_id qid("BC042576.1");  // DNA sequence
02206     CSeq_id sid("NP_647642.2"); // Protein sequence
02207 
02208     auto_ptr<SSeqLoc> query(
02209         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02210     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02211 
02212     // Set the options
02213     CBlastxOptionsHandle opts;
02214     opts.SetOutOfFrameMode();
02215     opts.SetFrameShiftPenalty(10);
02216     opts.SetFilterString("m;L");/* NCBI_FAKE_WARNING */
02217     opts.SetEvalueThreshold(0.01);
02218 
02219     CBl2Seq blaster(*query, *subj, opts);
02220     TSeqAlignVector sav(blaster.Run());
02221     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02222     BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
02223     testBlastHitCounts(blaster, eBlastx_oof);
02224     testRawCutoffs(blaster, eBlastx, eBlastx_oof);
02225 }
02226 
02227 // test for a bug computing OOF sequence lengths during traceback
02228 
02229 BOOST_AUTO_TEST_CASE(BlastxOutOfFrame_DifferentFrames) {
02230     CSeq_id qid("gi|27486285");  // DNA sequence
02231     CSeq_id sid("gi|7331210"); // Protein sequence
02232 
02233     auto_ptr<SSeqLoc> query(
02234         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02235     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02236 
02237     // Set the options
02238     CBlastxOptionsHandle opts;
02239     opts.SetOutOfFrameMode();
02240     opts.SetFrameShiftPenalty(10);
02241 
02242     CBl2Seq blaster(*query, *subj, opts);
02243     TSeqAlignVector sav(blaster.Run());
02244     BOOST_REQUIRE_EQUAL(5, (int)sav[0]->Size());
02245 }
02246 
02247 // The following 3 functions are for checking results in the strand 
02248 // combinations tests.
02249 
02250 void x_TestAlignmentQuerySubjStrandCombinations(TSeqAlignVector& sav, 
02251                                                 string aligned_strands) {
02252 
02253     // Starting offsets in alignment as query/subject pairs
02254     vector< pair<TSignedSeqPos, TSignedSeqPos> > starts;
02255     starts.push_back(make_pair(7685759, 10));
02256     starts.push_back(make_pair(7685758, -1));
02257     starts.push_back(make_pair(7685718, 269));
02258     starts.push_back(make_pair(7685717, -1));
02259     starts.push_back(make_pair(7685545, 309));
02260 
02261     const size_t kNumSegments(starts.size());
02262 
02263     // Lengths of the aligned regions defined in starts vector
02264     vector<TSeqPos> lengths;
02265     lengths.reserve(kNumSegments);
02266     lengths.push_back(259);
02267     lengths.push_back(1);
02268     lengths.push_back(40);
02269     lengths.push_back(1);
02270     lengths.push_back(172);
02271 
02272     // Strands of the involved aligned segments as query/subject pairs
02273     typedef vector< pair<ENa_strand, ENa_strand> > TStrandPairs;
02274     TStrandPairs strands(kNumSegments, 
02275                          make_pair(eNa_strand_minus, eNa_strand_plus));
02276 
02277     // Reverse the contents of the vectors if necessary
02278     if (aligned_strands == "plus-minus") {
02279         reverse(starts.begin(), starts.end());
02280         reverse(lengths.begin(), lengths.end());
02281         NON_CONST_ITERATE(TStrandPairs, itr, strands) {
02282             swap(itr->first, itr->second);
02283         }
02284     }
02285     BOOST_REQUIRE_EQUAL(kNumSegments, lengths.size());
02286     BOOST_REQUIRE_EQUAL(kNumSegments, strands.size());
02287 
02288     // Obtain the data from the Seq-align's dense segs ...
02289     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02290     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
02291     // BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02292     const CDense_seg& ds = sar->GetSegs().GetDenseg();
02293     // CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
02294     const size_t kNumDim(ds.GetDim());
02295     vector< TSignedSeqPos > seg_starts = ds.GetStarts();
02296     vector< TSeqPos> seg_lengths = ds.GetLens();
02297     vector< ENa_strand> seg_strands = ds.GetStrands();
02298     BOOST_REQUIRE_EQUAL(kNumSegments, seg_lengths.size());
02299     BOOST_REQUIRE_EQUAL(kNumSegments*kNumDim, seg_starts.size());
02300 
02301     // ... and compare it to what is expected
02302     for (size_t index = 0; index < kNumSegments; ++index) {
02303         ostringstream os;
02304         os << "Segment " << index << ": expected " << lengths[index]
02305            << " actual " << seg_lengths[index];
02306         BOOST_REQUIRE_MESSAGE(lengths[index] == seg_lengths[index],
02307                               os.str());
02308 
02309         os.str("");
02310         os << "Segment " << index << ": expected " << starts[index].first
02311            << " actual " << seg_starts[2*index];
02312         BOOST_REQUIRE_MESSAGE(starts[index].first == seg_starts[2*index],
02313                               os.str());
02314         os.str("");
02315         os << "Segment " << index << ": expected " << starts[index].second
02316            << " actual " << seg_starts[2*index];
02317         BOOST_REQUIRE_MESSAGE(starts[index].second == seg_starts[2*index+1],
02318                               os.str());
02319         os.str("");
02320         os << "Segment " << index << ": expected " << strands[index].first
02321            << " actual " << seg_strands[2*index];
02322         BOOST_REQUIRE_MESSAGE(strands[index].first == seg_strands[2*index],
02323                               os.str());
02324         os.str("");
02325         os << "Segment " << index << ": expected " << strands[index].second
02326            << " actual " << seg_strands[2*index];
02327         BOOST_REQUIRE_MESSAGE(strands[index].second == seg_strands[2*index+1],
02328                               os.str());
02329     }
02330 }
02331 
02332 static void testIntervalWholeAlignment(TSeqAlignVector& sav)
02333 {
02334     const int num_segs = 5;
02335     const int num_starts = 10;
02336     const int starts[num_starts] = { 7685759, 0, 7685758, -1, 7685718,
02337                                      269, 7685717, -1, 7685545, 309 };
02338     const int lengths[num_segs] = { 269, 1, 40, 1, 172 };
02339     int index;
02340 
02341     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02342     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
02343     CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
02344     vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
02345     vector< TSeqPos> seg_lengths = segs_itr->GetLens();
02346     vector< ENa_strand> seg_strands = segs_itr->GetStrands();
02347     BOOST_REQUIRE_EQUAL(num_segs, (int)seg_lengths.size());
02348     BOOST_REQUIRE_EQUAL(num_starts, (int)seg_starts.size());
02349     for (index = 0; index < num_segs; ++index) {
02350         BOOST_REQUIRE_EQUAL(lengths[index], (int)seg_lengths[index]);
02351         BOOST_REQUIRE_EQUAL(starts[2*index], (int)seg_starts[2*index]);
02352         BOOST_REQUIRE_EQUAL(starts[2*index+1], (int)seg_starts[2*index+1]);
02353         BOOST_REQUIRE(seg_strands[2*index] == eNa_strand_minus);
02354         BOOST_REQUIRE(seg_strands[2*index+1] == eNa_strand_plus);
02355     }
02356 }
02357 
02358 static void testWholeIntervalAlignment(TSeqAlignVector& sav)
02359 {
02360     const int num_segs = 5;
02361     const int num_starts = 10;
02362     const int starts[num_starts] = { 309, 7685545, -1, 7685717, 269, 7685718,
02363                                      -1, 7685758, 0, 7685759 };
02364     const int lengths[num_segs] = { 172, 1, 40, 1, 269 };
02365     int index;
02366 
02367     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02368     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
02369     CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
02370     vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
02371     vector< TSeqPos> seg_lengths = segs_itr->GetLens();
02372     vector< ENa_strand> seg_strands = segs_itr->GetStrands();
02373     BOOST_REQUIRE_EQUAL(num_segs, (int)seg_lengths.size());
02374     BOOST_REQUIRE_EQUAL(num_starts, (int)seg_starts.size());
02375     for (index = 0; index < num_segs; ++index) {
02376         BOOST_REQUIRE_EQUAL(lengths[index], (int)seg_lengths[index]);
02377         BOOST_REQUIRE_EQUAL(starts[2*index], (int)seg_starts[2*index]);
02378         BOOST_REQUIRE_EQUAL(starts[2*index+1], (int)seg_starts[2*index+1]);
02379         BOOST_REQUIRE(seg_strands[2*index] == eNa_strand_minus);
02380         BOOST_REQUIRE(seg_strands[2*index+1] == eNa_strand_plus);
02381     }
02382 }
02383 
02384 // should find alignments
02385 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjBothStrands) {
02386     // Alignment in these sequences is from plus/minus strands
02387     CSeq_id qid("NT_004487.15");
02388     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02389     auto_ptr<SSeqLoc> query(
02390         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02391                                                eNa_strand_both));
02392 
02393     CSeq_id sid("AA441981.1");
02394     range.first = 10;
02395     range.second = 480;
02396     auto_ptr<SSeqLoc> subj(
02397         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02398                                                eNa_strand_both));
02399 
02400     CBlastNucleotideOptionsHandle* opts = new CBlastNucleotideOptionsHandle;
02401     opts->SetTraditionalBlastnDefaults();
02402     CBl2Seq blaster(*query, *subj, *opts);
02403     TSeqAlignVector sav(blaster.Run());
02404     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02405 }
02406 
02407 // should find alignment
02408 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjPlusStrand) {
02409     // Alignment in these sequences is from plus/minus strands
02410     CSeq_id qid("NT_004487.15");
02411     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02412     auto_ptr<SSeqLoc> query(
02413         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02414                                                eNa_strand_both));
02415 
02416     CSeq_id sid("AA441981.1");
02417     range.first = 10;
02418     range.second = 480;
02419     auto_ptr<SSeqLoc> subj(
02420         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02421                                                eNa_strand_plus));
02422 
02423     CBlastNucleotideOptionsHandle opts;
02424     opts.SetTraditionalBlastnDefaults();
02425     CBl2Seq blaster(*query, *subj, opts);
02426     TSeqAlignVector sav(blaster.Run());
02427     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02428 }
02429 
02430 // should find alignment
02431 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjMinusStrand) {
02432     // Alignment in these sequences is from plus/minus strands
02433     CSeq_id qid("NT_004487.15");
02434     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02435     auto_ptr<SSeqLoc> query(
02436         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02437                                                eNa_strand_both));
02438 
02439     CSeq_id sid("AA441981.1");
02440     range.first = 10;
02441     range.second = 480;
02442     auto_ptr<SSeqLoc> subj(
02443         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02444                                                eNa_strand_minus));
02445 
02446     CBlastNucleotideOptionsHandle opts;
02447     opts.SetTraditionalBlastnDefaults();
02448     CBl2Seq blaster(*query, *subj, opts);
02449     TSeqAlignVector sav(blaster.Run());
02450     x_TestAlignmentQuerySubjStrandCombinations(sav, "plus-minus");
02451 }
02452 
02453 // shouldn't find an alignment
02454 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjPlusStrand) {
02455     // Alignment in these sequences is from plus/minus strands
02456     CSeq_id qid("NT_004487.15");
02457     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02458     auto_ptr<SSeqLoc> query(
02459         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02460                                                eNa_strand_plus));
02461 
02462     CSeq_id sid("AA441981.1");
02463     range.first = 10;
02464     range.second = 480;
02465     auto_ptr<SSeqLoc> subj(
02466         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02467                                                eNa_strand_plus));
02468 
02469     CBlastNucleotideOptionsHandle opts;
02470     opts.SetTraditionalBlastnDefaults();
02471     CBl2Seq blaster(*query, *subj, opts);
02472     TSeqAlignVector sav(blaster.Run());
02473     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
02474 }
02475 
02476 // should find an alignment
02477 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjMinusStrand) {
02478     // Alignment in these sequences is from plus/minus strands
02479     CSeq_id qid("NT_004487.15");
02480     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02481     auto_ptr<SSeqLoc> query(
02482         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02483                                                eNa_strand_plus));
02484 
02485     CSeq_id sid("AA441981.1");
02486     range.first = 10;
02487     range.second = 480;
02488     auto_ptr<SSeqLoc> subj(
02489         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02490                                                eNa_strand_minus));
02491 
02492     CBlastNucleotideOptionsHandle opts;
02493     opts.SetTraditionalBlastnDefaults();
02494     CBl2Seq blaster(*query, *subj, opts);
02495     TSeqAlignVector sav(blaster.Run());
02496     x_TestAlignmentQuerySubjStrandCombinations(sav, "plus-minus");
02497 }
02498 
02499 // should NOT find an alignment because we only search the plus strand of
02500 // the subject sequence
02501 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjBothStrands) {
02502     // Alignment in these sequences is from plus/minus strands
02503     CSeq_id qid("NT_004487.15");
02504     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02505     auto_ptr<SSeqLoc> query(
02506         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02507                                                eNa_strand_plus));
02508 
02509     CSeq_id sid("AA441981.1");
02510     range.first = 10;
02511     range.second = 480;
02512     auto_ptr<SSeqLoc> subj(
02513         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02514                                                eNa_strand_both));
02515 
02516     CBlastNucleotideOptionsHandle opts;
02517     opts.SetTraditionalBlastnDefaults();
02518     CBl2Seq blaster(*query, *subj, opts);
02519     TSeqAlignVector sav(blaster.Run());
02520     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
02521 }
02522 
02523 // should not find an alignment because alignment is on opposite strands
02524 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjMinusStrand) {
02525     // Alignment in these sequences is from plus/minus strands
02526     CSeq_id qid("NT_004487.15");
02527     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02528     auto_ptr<SSeqLoc> query(
02529         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02530                                                eNa_strand_minus));
02531 
02532     CSeq_id sid("AA441981.1");
02533     range.first = 10;
02534     range.second = 480;
02535     auto_ptr<SSeqLoc> subj(
02536         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02537                                                eNa_strand_minus));
02538 
02539     CBlastNucleotideOptionsHandle opts;
02540     opts.SetTraditionalBlastnDefaults();
02541     CBl2Seq blaster(*query, *subj, opts);
02542     TSeqAlignVector sav(blaster.Run());
02543     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
02544 }
02545 
02546 // should find alignment
02547 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjPlusStrand) {
02548     // Alignment in these sequences is from plus/minus strands
02549     CSeq_id qid("NT_004487.15");
02550     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02551     auto_ptr<SSeqLoc> query(
02552         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02553                                                eNa_strand_minus));
02554 
02555     CSeq_id sid("AA441981.1");
02556     range.first = 10;
02557     range.second = 480;
02558     auto_ptr<SSeqLoc> subj(
02559         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02560                                                eNa_strand_plus));
02561 
02562     CBlastNucleotideOptionsHandle opts;
02563     opts.SetTraditionalBlastnDefaults();
02564     CBl2Seq blaster(*query, *subj, opts);
02565     TSeqAlignVector sav(blaster.Run());
02566     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02567 }
02568 
02569 // should find alignment
02570 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjBothStrands) {
02571     // Alignment in these sequences is from plus/minus strands
02572     CSeq_id qid("NT_004487.15");
02573     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02574     auto_ptr<SSeqLoc> query(
02575         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02576                                                eNa_strand_minus));
02577 
02578     CSeq_id sid("AA441981.1");
02579     range.first = 10;
02580     range.second = 480;
02581     auto_ptr<SSeqLoc> subj(
02582         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02583                                                eNa_strand_both));
02584 
02585     CBlastNucleotideOptionsHandle opts;
02586     opts.SetTraditionalBlastnDefaults();
02587     CBl2Seq blaster(*query, *subj, opts);
02588     TSeqAlignVector sav(blaster.Run());
02589     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02590 }
02591 
02592 // Should properly find alignment
02593 BOOST_AUTO_TEST_CASE(Blastn_QueryWhole_SubjInterval)
02594 {
02595     CSeq_id qid("AA441981.1");
02596     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateWholeSSeqLoc(qid));
02597 
02598     CSeq_id sid("NT_004487.15");
02599     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02600     auto_ptr<SSeqLoc> subj(
02601         CTestObjMgr::Instance().CreateSSeqLoc(sid, range, 
02602                                                eNa_strand_both));
02603 
02604     CBlastNucleotideOptionsHandle opts;
02605     opts.SetTraditionalBlastnDefaults();
02606     CBl2Seq blaster(*query, *subj, opts);
02607     TSeqAlignVector sav(blaster.Run());
02608     testWholeIntervalAlignment(sav);
02609 }
02610 
02611 BOOST_AUTO_TEST_CASE(Blastn_QueryInterval_SubjWhole)
02612 {
02613     CSeq_id qid("NT_004487.15");
02614     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02615     auto_ptr<SSeqLoc> query(
02616         CTestObjMgr::Instance().CreateSSeqLoc(qid, range, 
02617                                                eNa_strand_both));
02618 
02619     CSeq_id sid("AA441981.1");
02620     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateWholeSSeqLoc(sid));
02621 
02622     CBlastNucleotideOptionsHandle opts;
02623     opts.SetTraditionalBlastnDefaults();
02624     CBl2Seq blaster(*query, *subj, opts);
02625     TSeqAlignVector sav(blaster.Run());
02626     testIntervalWholeAlignment(sav);
02627 }
02628 
02629 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs) {
02630     vector<int> q_gis, s_gis;
02631 
02632     // Setup the queries
02633     q_gis.push_back(6);
02634     q_gis.push_back(129295);
02635     q_gis.push_back(15606659);
02636 
02637     // setup the subjects
02638     s_gis.push_back(129295);
02639     s_gis.push_back(6);
02640     s_gis.push_back(4336138); // no hits with gis 6 and 129295
02641     s_gis.push_back(15606659);
02642     s_gis.push_back(5556);
02643 
02644     TSeqLocVector queries;
02645     ITERATE(vector<int>, itr, q_gis) {
02646         CRef<CSeq_loc> loc(new CSeq_loc());
02647         loc->SetWhole().SetGi(*itr);
02648 
02649         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02650         scope->AddDefaults();
02651         queries.push_back(SSeqLoc(loc, scope));
02652     }
02653 
02654     TSeqLocVector subjects;
02655     ITERATE(vector<int>, itr, s_gis) {
02656         CRef<CSeq_loc> loc(new CSeq_loc());
02657         loc->SetWhole().SetGi(*itr);
02658 
02659         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02660         scope->AddDefaults();
02661         subjects.push_back(SSeqLoc(loc, scope));
02662     }
02663 
02664     size_t num_queries = queries.size();
02665     size_t num_subjects = subjects.size();
02666 
02667     // BLAST by concatenating all queries
02668     CBl2Seq blaster4all(queries, subjects, eBlastp);
02669     TSeqAlignVector sas_v = blaster4all.Run();
02670     BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
02671     testBlastHitCounts(blaster4all, eBlastp_multi_q_s);
02672     testRawCutoffs(blaster4all, eBlastp, eBlastp_multi_q_s);
02673 
02674     // test the order of queries and subjects:
02675     testResultAlignments(num_queries, num_subjects,
02676                             sas_v);
02677 }
02678 
02679 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_RunEx) {
02680     vector<int> q_gis, s_gis;
02681 
02682     // Setup the queries
02683     q_gis.push_back(6);
02684     q_gis.push_back(129295);
02685     q_gis.push_back(15606659);
02686 
02687     // setup the subjects
02688     s_gis.push_back(129295);
02689     s_gis.push_back(6);
02690     s_gis.push_back(4336138); // no hits with gis 6 and 129295
02691     s_gis.push_back(15606659);
02692     s_gis.push_back(5556);
02693 
02694     TSeqLocVector queries;
02695     ITERATE(vector<int>, itr, q_gis) {
02696         CRef<CSeq_loc> loc(new CSeq_loc());
02697         loc->SetWhole().SetGi(*itr);
02698 
02699         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02700         scope->AddDefaults();
02701         queries.push_back(SSeqLoc(loc, scope));
02702     }
02703 
02704     TSeqLocVector subjects;
02705     ITERATE(vector<int>, itr, s_gis) {
02706         CRef<CSeq_loc> loc(new CSeq_loc());
02707         loc->SetWhole().SetGi(*itr);
02708 
02709         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02710         scope->AddDefaults();
02711         subjects.push_back(SSeqLoc(loc, scope));
02712     }
02713 
02714     size_t num_queries = queries.size();
02715     size_t num_subjects = subjects.size();
02716 
02717     // BLAST by concatenating all queries
02718     CBl2Seq blaster4all(queries, subjects, eBlastp);
02719     CRef<CSearchResultSet> results = blaster4all.RunEx();
02720     BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
02721     BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
02722                         results->GetNumResults());
02723 
02724     // build the seqalign vector from the result set
02725     TSeqAlignVector sas_v;
02726     for (size_t i = 0; i < num_queries; i++)
02727     {
02728         for (size_t j = 0; j < num_subjects; j++)
02729         {
02730             CSearchResults& res_ij = results->GetResults(i, j);
02731             CRef<CSeq_align_set> aln_set;
02732             aln_set.Reset(const_cast<CSeq_align_set*>
02733                           (res_ij.GetSeqAlign().GetPointer()));
02734             sas_v.push_back(aln_set);
02735         }
02736     }
02737     
02738     // do the rest of the tests on sas_v as in the
02739     // BlastpMultipleQueries_MultipleSubjs function:
02740 
02741     BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
02742     testBlastHitCounts(blaster4all, eBlastp_multi_q_s);
02743     testRawCutoffs(blaster4all, eBlastp, eBlastp_multi_q_s);
02744 
02745     // test the order of queries and subjects:
02746     testResultAlignments(num_queries, num_subjects,
02747                             sas_v);
02748 }
02749 
02750 // This closely resembles how the command line applications invoke bl2seq
02751 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_CLocalBlast) {
02752     vector<int> q_gis, s_gis;
02753 
02754     // Setup the queries
02755     q_gis.push_back(6);
02756     q_gis.push_back(129295);
02757     q_gis.push_back(15606659);
02758 
02759     // setup the subjects
02760     s_gis.push_back(129295);
02761     s_gis.push_back(6);
02762     s_gis.push_back(4336138); // no hits with gis 6 and 129295
02763     s_gis.push_back(15606659);
02764     s_gis.push_back(5556);
02765 
02766     TSeqLocVector query_vec;
02767     ITERATE(vector<int>, itr, q_gis) {
02768         CRef<CSeq_loc> loc(new CSeq_loc());
02769         loc->SetWhole().SetGi(*itr);
02770 
02771         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02772         scope->AddDefaults();
02773         query_vec.push_back(SSeqLoc(loc, scope));
02774     }
02775     CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(query_vec));
02776 
02777     CRef<CBlastOptionsHandle>
02778         opts_handle(CBlastOptionsFactory::Create(eBlastp));
02779 
02780     TSeqLocVector subj_vec;
02781     ITERATE(vector<int>, itr, s_gis) {
02782         CRef<CSeq_loc> loc(new CSeq_loc());
02783         loc->SetWhole().SetGi(*itr);
02784 
02785         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02786         scope->AddDefaults();
02787         subj_vec.push_back(SSeqLoc(loc, scope));
02788     }
02789     CRef<IQueryFactory> subj_qf(new CObjMgr_QueryFactory(subj_vec));
02790     CRef<CLocalDbAdapter> subjects(new CLocalDbAdapter(subj_qf,
02791                                                        opts_handle));
02792 
02793     size_t num_queries = query_vec.size();
02794     size_t num_subjects = subj_vec.size();
02795 
02796     // BLAST by concatenating all queries
02797     CLocalBlast blaster(queries, opts_handle, subjects);
02798     CRef<CSearchResultSet> results = blaster.Run();
02799     BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
02800     BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
02801                         results->GetNumResults());
02802     BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->size());
02803     BOOST_REQUIRE_EQUAL(num_queries, results->GetNumQueries());
02804     BOOST_REQUIRE_EQUAL(num_subjects,
02805                         results->GetNumResults()/results->GetNumQueries());
02806 
02807     // build the seqalign vector from the result set
02808     TSeqAlignVector sas_v;
02809     for (size_t i = 0; i < num_queries; i++)
02810     {
02811         for (size_t j = 0; j < num_subjects; j++)
02812         {
02813             CSearchResults& res_ij = results->GetResults(i, j);
02814             CRef<CSeq_align_set> aln_set;
02815             aln_set.Reset(const_cast<CSeq_align_set*>
02816                           (res_ij.GetSeqAlign().GetPointer()));
02817             sas_v.push_back(aln_set);
02818         }
02819     }
02820     
02821     // do the rest of the tests on sas_v as in the
02822     // BlastpMultipleQueries_MultipleSubjs function:
02823     BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
02824 
02825     // test the order of queries and subjects:
02826     testResultAlignments(num_queries, num_subjects, sas_v);
02827 }
02828 
02829 BOOST_AUTO_TEST_CASE(BlastOptionsEquality) {
02830     // Create options object through factory
02831     auto_ptr<CBlastOptionsHandle> megablast_options_handle(
02832         CBlastOptionsFactory::Create(eMegablast));
02833     CBlastNucleotideOptionsHandle nucl_options_handle;
02834     BOOST_REQUIRE(megablast_options_handle->GetOptions() == 
02835                    nucl_options_handle.GetOptions());
02836 }
02837 
02838 BOOST_AUTO_TEST_CASE(BlastOptionsInequality) {
02839     CBlastProteinOptionsHandle prot_options_handle;
02840     CBlastNucleotideOptionsHandle nucl_options_handle;
02841     BOOST_REQUIRE(prot_options_handle.GetOptions() != 
02842                    nucl_options_handle.GetOptions());
02843 
02844     // Blastn and Megablast are different
02845     auto_ptr<CBlastOptionsHandle> blastn_options_handle(
02846         CBlastOptionsFactory::Create(eBlastn));
02847     BOOST_REQUIRE(blastn_options_handle->GetOptions() != 
02848                    nucl_options_handle.GetOptions());
02849 
02850     // Change the matrix and compare
02851     CBlastProteinOptionsHandle prot_options_handle2;
02852     prot_options_handle.SetMatrixName("pam30");
02853     BOOST_REQUIRE(prot_options_handle.GetOptions() !=
02854                    prot_options_handle2.GetOptions());
02855 }
02856 
02857 BOOST_AUTO_TEST_CASE(DiscontiguousMB) {
02858     CSeq_id qid("gi|408478");  // zebrafish sequence U02544
02859     CSeq_id sid("gi|1546012"); // mouse sequence U61969
02860     auto_ptr<SSeqLoc> query(
02861         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02862     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02863 
02864     CBl2Seq blaster(*query, *subj, eDiscMegablast);
02865     TSeqAlignVector sav(blaster.Run());
02866     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
02867 
02868     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02869     BOOST_REQUIRE_EQUAL(13, (int)sar->GetSegs().GetDenseg().GetNumseg());
02870     testBlastHitCounts(blaster, eDiscMegablast_U02544_U61969);
02871     testRawCutoffs(blaster, eDiscMegablast, eDiscMegablast_U02544_U61969);
02872 }
02873 
02874 BOOST_AUTO_TEST_CASE(BlastnHumanChrom_MRNA) {
02875     CSeq_id qid("NT_004487.16");
02876     CSeq_id sid("AA621478.1");
02877     pair<TSeqPos, TSeqPos> qrange(7868209-1, 7868602-1);
02878     pair<TSeqPos, TSeqPos> srange(2-1, 397-1);
02879     auto_ptr<SSeqLoc> query(
02880         CTestObjMgr::Instance().CreateSSeqLoc(qid, 
02881                                                qrange, eNa_strand_plus));
02882     auto_ptr<SSeqLoc> subj(
02883         CTestObjMgr::Instance().CreateSSeqLoc(sid, 
02884                                                srange, eNa_strand_plus));
02885 
02886     CBlastNucleotideOptionsHandle options;
02887     CBl2Seq blaster(*query, *subj, options);
02888     TSeqAlignVector sav(blaster.Run());
02889     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
02890 
02891     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02892     BOOST_REQUIRE_EQUAL(5, (int)sar->GetSegs().GetDenseg().GetNumseg());
02893     testBlastHitCounts(blaster, eMegablast_chrom_mrna);
02894     testRawCutoffs(blaster, eMegablast, eMegablast_chrom_mrna);
02895 }
02896 
02897 // Checks that results for multiple subjects are put in correct places
02898 // in the vector of Seq-aligns
02899 BOOST_AUTO_TEST_CASE(testOneSubjectResults2CSeqAlign)
02900 {
02901     const int num_subjects = 15;
02902     const int results_size[num_subjects] = 
02903         { 1, 1, 0, 1, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 1 };
02904     const int query_gi = 7274302;
02905     const int gi_diff = 28;
02906     string seqid_str("gi|");
02907     CRef<CSeq_id> id(new CSeq_id(seqid_str + NStr::IntToString(query_gi)));
02908     auto_ptr<SSeqLoc> sl(
02909         CTestObjMgr::Instance().CreateSSeqLoc(*id, eNa_strand_both));
02910     TSeqLocVector query;
02911     query.push_back(*sl);
02912     TSeqLocVector subjects;
02913     int index;
02914     for (index = 0; index < num_subjects; ++index) {
02915         id.Reset(new CSeq_id(seqid_str + 
02916                  NStr::IntToString(query_gi + gi_diff + index)));
02917         sl.reset(CTestObjMgr::Instance().CreateSSeqLoc(*id, 
02918                                                        eNa_strand_both));
02919         subjects.push_back(*sl);
02920     }
02921     CBl2Seq blaster(query, subjects, eMegablast);
02922     TSeqAlignVector seqalign_v = blaster.Run();
02923     BOOST_REQUIRE_EQUAL(num_subjects, (int)seqalign_v.size());
02924 
02925     index = 0;
02926     ITERATE(TSeqAlignVector, itr, seqalign_v)
02927     {
02928         BOOST_REQUIRE_EQUAL(results_size[index], (int) (*itr)->Get().size());
02929         index++;
02930     }
02931 }
02932 
02933 BOOST_AUTO_TEST_CASE(testMultiSeqSearchSymmetry)
02934 {
02935     const int num_seqs = 19;
02936     const int gi_list[num_seqs] = 
02937         { 1346057, 125527, 121064, 1711551, 125412, 128337, 2507199,
02938           1170625, 1730070, 585365, 140977, 1730069, 20455504, 125206,
02939           125319, 114152, 1706450, 1706307, 125565 };
02940     const int score_cutoff = 70;
02941 
02942     string seqid_str("gi|");
02943     TSeqLocVector seq_vec;
02944     int index;
02945     for (index = 0; index < num_seqs; ++index) {
02946         CRef<CSeq_id> id(new CSeq_id(seqid_str + 
02947                          NStr::IntToString(gi_list[index])));
02948         auto_ptr<SSeqLoc> sl(
02949             CTestObjMgr::Instance().CreateSSeqLoc(*id, eNa_strand_both));
02950         seq_vec.push_back(*sl);
02951     }
02952     
02953     CBlastProteinOptionsHandle prot_opts;
02954     prot_opts.SetSegFiltering(false);
02955     CBl2Seq blaster(seq_vec, seq_vec, prot_opts);
02956     blaster.RunWithoutSeqalignGeneration(); /* NCBI_FAKE_WARNING */
02957     BlastHSPResults* results = blaster.GetResults(); /* NCBI_FAKE_WARNING */
02958 
02959     int qindex, sindex, qindex1, sindex1;
02960     for (qindex = 0; qindex < num_seqs; ++qindex) {
02961         for (sindex = 0; sindex < results->hitlist_array[qindex]->hsplist_count;
02962              ++sindex) {
02963             BlastHSPList* hsp_list1, *hsp_list2 = NULL;
02964             hsp_list1 = results->hitlist_array[qindex]->hsplist_array[sindex];
02965             qindex1 = hsp_list1->oid;
02966             BlastHitList* hitlist = results->hitlist_array[qindex1];
02967             for (sindex1 = 0; sindex1 < hitlist->hsplist_count; ++sindex1) {
02968                 if (hitlist->hsplist_array[sindex1]->oid == qindex) {
02969                     hsp_list2 = hitlist->hsplist_array[sindex1];
02970                     break;
02971                 }
02972             }
02973             BOOST_REQUIRE(hsp_list2 != NULL);
02974             int hindex;
02975             for (hindex = 0; hindex < hsp_list1->hspcnt; ++hindex) {
02976                 if (hsp_list1->hsp_array[hindex]->score <= score_cutoff)
02977                     break;
02978                 BOOST_REQUIRE(hindex < hsp_list2->hspcnt);
02979                 BOOST_REQUIRE_EQUAL(hsp_list1->hsp_array[hindex]->score,
02980                                      hsp_list2->hsp_array[hindex]->score);
02981             }
02982         }
02983     }
02984 }
02985 
02986 BOOST_AUTO_TEST_CASE(testInterruptCallbackWithNull) {
02987     CSeq_id id("gi|129295");
02988     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
02989 
02990     CBl2Seq blaster(*sl, *sl, eBlastp);
02991     TInterruptFnPtr null_fnptr = 0;
02992     TInterruptFnPtr fnptr = blaster.SetInterruptCallback(null_fnptr);
02993     BOOST_REQUIRE(fnptr == NULL);
02994 
02995     TSeqAlignVector sav(blaster.Run());
02996     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02997     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02998 
02999     fnptr = blaster.SetInterruptCallback(interrupt_immediately);
03000     // make sure we get the previous interrupt callback
03001     BOOST_REQUIRE(fnptr == null_fnptr);
03002 
03003     fnptr = blaster.SetInterruptCallback(null_fnptr);
03004     // make sure we get the previous interrupt callback
03005     BOOST_REQUIRE(fnptr == interrupt_immediately);
03006 
03007     // Retry the search now that we've removed the interrupt callback
03008     sav = blaster.Run();
03009     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
03010     sar = *(sav[0]->Get().begin());
03011     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
03012 }
03013 
03014 BOOST_AUTO_TEST_CASE(testInterruptCallbackDoNotInterrupt) {
03015     CSeq_id id("gi|129295");
03016     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
03017 
03018     CBl2Seq blaster(*sl, *sl, eBlastp);
03019     TInterruptFnPtr fnptr = blaster.SetInterruptCallback(do_not_interrupt);
03020     BOOST_REQUIRE(fnptr == NULL);
03021 
03022     TSeqAlignVector sav(blaster.Run());
03023     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
03024     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
03025     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
03026 }
03027 
03028 #if SEQLOC_MIX_QUERY_OK
03029 BOOST_AUTO_TEST_CASE(MultiIntervalLoc) {
03030     const size_t kNumInts = 20;
03031     const size_t kStarts[kNumInts] = 
03032         { 838, 1838, 6542, 7459, 9246, 10431, 14807, 16336, 19563, 
03033           20606, 21232, 22615, 23822, 27941, 29597, 30136, 31287, 
03034           31786, 33315, 35402 };
03035     const size_t kEnds[kNumInts] = 
03036         { 961, 2010, 6740, 7573, 9408, 10609, 15043, 16511, 19783, 
03037           20748, 21365, 22817, 24049, 28171, 29839, 30348, 31362, 
03038           31911, 33485, 37952 };
03039     size_t index;
03040 
03041     CSeq_id qid("gi|3417288");
03042     CRef<CSeq_loc> qloc(new CSeq_loc());
03043     for (index = 0; index < kNumInts; ++index) {
03044         CRef<CSeq_loc> next_loc(new CSeq_loc());
03045         next_loc->SetInt().SetFrom(kStarts[index]);
03046         next_loc->SetInt().SetTo(kEnds[index]);
03047         next_loc->SetInt().SetId(qid);
03048         qloc->SetMix().Set().push_back(next_loc);
03049     }
03050 
03051     CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
03052     scope->AddDefaults();
03053 
03054     auto_ptr<SSeqLoc> query(new SSeqLoc(qloc, scope));
03055 
03056     CSeq_id sid("gi|51511732");
03057     pair<TSeqPos, TSeqPos> range(15595732, 15705419);
03058     auto_ptr<SSeqLoc> subject(
03059         CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
03060     CBl2Seq blaster(*query, *subject, eBlastn);
03061     TSeqAlignVector sav(blaster.Run());
03062     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
03063     BOOST_REQUIRE_EQUAL(60, (int)sar->GetSegs().GetDisc().Get().size());
03064 }
03065 #endif
03066 
03067 BOOST_AUTO_TEST_CASE(QueryMaskIgnoredInMiniExtension) {
03068     CRef<CSeq_loc> qloc(new CSeq_loc());
03069     qloc->SetWhole().SetGi(4505696);
03070     CSeq_id sid("gi|29809252");
03071     pair<TSeqPos, TSeqPos> range(662070, 662129);
03072 
03073     CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
03074     scope->AddDefaults();
03075 
03076     auto_ptr<SSeqLoc> query(new SSeqLoc(qloc, scope));
03077     auto_ptr<SSeqLoc> subject(
03078         CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
03079 
03080     CBl2Seq blaster(*query, *subject, eMegablast);
03081     TSeqAlignVector sav(blaster.Run());
03082     CRef<CSeq_align_set> sas = sav.front();
03083     BOOST_REQUIRE(sas->Get().empty());
03084 }
03085 
03086 #endif /* SKIP_DOXYGEN_PROCESSING */
03087 
03088 BOOST_AUTO_TEST_SUITE_END()
03089 
03090 

Generated on Sun Dec 6 22:17:33 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:51 2009 by modify_doxy.py rev. 173732