00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <ncbi_pch.hpp>
00035 #include <corelib/test_boost.hpp>
00036 #include <algo/blast/api/bl2seq.hpp>
00037 #include <objects/seqalign/Seq_align.hpp>
00038 #include <objects/seqalign/Seq_align_set.hpp>
00039 #include <objects/seqalign/Std_seg.hpp>
00040 #include <objects/seqalign/Dense_seg.hpp>
00041 #include <objects/seqalign/Score.hpp>
00042 #include <objects/general/Object_id.hpp>
00043
00044 #include <serial/serial.hpp>
00045 #include <serial/iterator.hpp>
00046 #include <serial/objostr.hpp>
00047
00048 #include <algo/blast/api/tblastn_options.hpp>
00049 #include <algo/blast/format/blastfmtutil.hpp>
00050
00051 #include <algo/blast/api/blast_options_handle.hpp>
00052 #include <algo/blast/api/blast_prot_options.hpp>
00053 #include <algo/blast/api/blastx_options.hpp>
00054 #include <algo/blast/api/tblastn_options.hpp>
00055 #include <algo/blast/api/blast_nucl_options.hpp>
00056 #include <algo/blast/api/disc_nucl_options.hpp>
00057 #include <algo/blast/api/local_blast.hpp>
00058 #include <algo/blast/api/local_db_adapter.hpp>
00059 #include <algo/blast/api/objmgr_query_data.hpp>
00060 #include <algo/blast/blastinput/blast_input.hpp>
00061 #include <algo/blast/blastinput/blast_fasta_input.hpp>
00062
00063 #include <objtools/simple/simple_om.hpp>
00064 #include <objtools/readers/fasta.hpp>
00065 #include <objmgr/util/seq_loc_util.hpp>
00066
00067 #include "test_objmgr.hpp"
00068
00069 #ifdef NCBI_OS_DARWIN
00070 #include <corelib/plugin_manager_store.hpp>
00071 #include <objmgr/data_loader_factory.hpp>
00072 #include <objtools/data_loaders/genbank/processors.hpp>
00073 #endif
00074
00075 #include <util/random_gen.hpp>
00076
00077 #include <corelib/test_boost.hpp>
00078
00079 #ifndef SKIP_DOXYGEN_PROCESSING
00080
00081 USING_NCBI_SCOPE;
00082 USING_SCOPE(blast);
00083 USING_SCOPE(objects);
00084
00085 BOOST_AUTO_TEST_SUITE(bl2seq)
00086
00087 BOOST_AUTO_TEST_CASE(ProteinBlastInvalidSeqIdSelfHit)
00088 {
00089 CRef<CSeq_loc> loc(new CSeq_loc());
00090 loc->SetWhole().SetGi(-1);
00091
00092 CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
00093 scope->AddDefaults();
00094 SSeqLoc query(loc, scope);
00095
00096 TSeqLocVector subjects;
00097 {
00098 CRef<CSeq_loc> local_loc(new CSeq_loc());
00099 local_loc->SetWhole().SetGi(-1);
00100
00101 CScope* local_scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
00102 local_scope->AddDefaults();
00103 subjects.push_back(SSeqLoc(local_loc, local_scope));
00104 }
00105
00106
00107 CBl2Seq blaster4all(query, subjects, eBlastp);
00108 TSeqAlignVector sas_v;
00109 BOOST_CHECK_THROW(sas_v = blaster4all.Run(), CBlastException);
00110 }
00111
00112 enum EBl2seqTest {
00113 eBlastp_129295_129295 = 0,
00114 eBlastn_555_555,
00115 eMegablast_555_555,
00116 eDiscMegablast_555_555,
00117 eBlastx_555_129295,
00118 eTblastn_129295_555,
00119 eTblastn_129295_555_large_word,
00120 eTblastx_555_555,
00121 eTblastx_many_hits,
00122 eBlastp_129295_7662354,
00123 eBlastn_555_3090,
00124 eBlastp_multi_q,
00125 eBlastn_multi_q,
00126 eBlastp_multi_q_s,
00127 eTblastn_oof,
00128 eBlastx_oof,
00129 eDiscMegablast_U02544_U61969,
00130 eMegablast_chrom_mrna
00131 };
00132
00133
00134
00135
00136
00137
00138 extern "C" Boolean interrupt_immediately(SBlastProgress* )
00139 {
00140 return TRUE;
00141 }
00142
00143
00144
00145 extern "C" Boolean do_not_interrupt(SBlastProgress* )
00146 {
00147 return FALSE;
00148 }
00149
00150
00151
00152
00153 extern "C" Boolean callback_counter(SBlastProgress* progress_info)
00154 {
00155 int& counter = *reinterpret_cast<int*>(progress_info->user_data);
00156 counter++;
00157 return FALSE;
00158 }
00159
00160
00161
00162
00163 extern "C" Boolean interrupt_at_random(SBlastProgress* progress_info)
00164 {
00165 pair<int, int>& progress_pair =
00166 *reinterpret_cast< pair<int, int>* >(progress_info->user_data);
00167
00168 if (++progress_pair.first == progress_pair.second) {
00169 return TRUE;
00170 } else {
00171 return FALSE;
00172 }
00173 }
00174
00175
00176 extern "C" Boolean interrupt_after3calls(SBlastProgress* )
00177 {
00178 static int num_calls = 0;
00179 if (++num_calls < 3) {
00180 return FALSE;
00181 } else {
00182 return TRUE;
00183 }
00184 }
00185
00186
00187 extern "C" Boolean interrupt_on_traceback(SBlastProgress* progress_info)
00188 {
00189 if (progress_info->stage == eTracebackSearch) {
00190 return TRUE;
00191 } else {
00192 return FALSE;
00193 }
00194 }
00195
00196 void testRawCutoffs(CBl2Seq& blaster, EProgram program,
00197 EBl2seqTest test_id)
00198 {
00199 BlastRawCutoffs* raw_cutoffs =
00200 blaster.GetDiagnostics()->cutoffs;
00201 int x_drop_ungapped;
00202 int gap_trigger;
00203
00204 if (program == eBlastn || program == eDiscMegablast) {
00205 x_drop_ungapped = 16;
00206 gap_trigger = 16;
00207 } else if (program == eMegablast) {
00208 x_drop_ungapped = 8;
00209 gap_trigger = 8;
00210 } else {
00211 x_drop_ungapped = 16;
00212 gap_trigger = 41;
00213 }
00214
00215 switch (test_id) {
00216 case eBlastn_555_3090:
00217 x_drop_ungapped = 18;
00218 gap_trigger = 18;
00219 break;
00220 case eBlastn_multi_q:
00221 x_drop_ungapped = 18;
00222 gap_trigger = 18;
00223 break;
00224 case eMegablast_chrom_mrna:
00225 x_drop_ungapped = 7;
00226 gap_trigger = 7;
00227 break;
00228 case eDiscMegablast_U02544_U61969:
00229 x_drop_ungapped = 20;
00230 gap_trigger = 20;
00231 break;
00232 case eBlastp_multi_q:
00233 gap_trigger = 23;
00234 break;
00235 case eBlastp_multi_q_s:
00236 gap_trigger = 19;
00237 break;
00238 case eBlastp_129295_129295:
00239 case eTblastn_129295_555:
00240 case eTblastn_129295_555_large_word:
00241 gap_trigger = 20; break;
00242 case eBlastp_129295_7662354:
00243 gap_trigger = 23; break;
00244 case eBlastx_555_129295:
00245 gap_trigger = 19; break;
00246 case eTblastn_oof:
00247 gap_trigger = 43;
00248 default:
00249 break;
00250 }
00251
00252 switch (program) {
00253 case eBlastn: case eDiscMegablast:
00254 BOOST_CHECK_EQUAL(x_drop_ungapped,
00255 raw_cutoffs->x_drop_ungapped);
00256 BOOST_CHECK_EQUAL(33, raw_cutoffs->x_drop_gap);
00257
00258
00259 BOOST_CHECK_EQUAL(110, raw_cutoffs->x_drop_gap_final);
00260 BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
00261 break;
00262 case eMegablast:
00263 BOOST_CHECK_EQUAL(x_drop_ungapped,
00264 raw_cutoffs->x_drop_ungapped);
00265 BOOST_CHECK_EQUAL(16, raw_cutoffs->x_drop_gap);
00266
00267
00268 BOOST_CHECK_EQUAL(54, raw_cutoffs->x_drop_gap_final);
00269 BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
00270 break;
00271 case eBlastp: case eBlastx: case eTblastn:
00272 BOOST_CHECK_EQUAL(38, raw_cutoffs->x_drop_gap);
00273 BOOST_CHECK_EQUAL(64, raw_cutoffs->x_drop_gap_final);
00274 BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
00275
00276
00277 case eTblastx:
00278 BOOST_CHECK_EQUAL(x_drop_ungapped,
00279 raw_cutoffs->x_drop_ungapped);
00280 break;
00281 default: break;
00282 }
00283 }
00284
00285 void testResultAlignments(size_t num_queries,
00286 size_t num_subjects,
00287 TSeqAlignVector result_alnvec)
00288 {
00289 size_t num_total_alns = num_queries * num_subjects;
00290
00291
00292 BOOST_REQUIRE_EQUAL(result_alnvec.size(), num_total_alns);
00293
00294
00295
00296
00297 CConstRef<CSeq_id> id_query, id_prev_query;
00298 CConstRef<CSeq_id> id_subject;
00299 vector< CConstRef<CSeq_id> > id_prev_subjects;
00300 id_prev_subjects.resize(num_subjects);
00301
00302 bool prev_query_available = false;
00303 vector<bool> prev_subjects_available(num_subjects, false);
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313 for (size_t i_query = 0; i_query < num_queries; i_query++)
00314 {
00315 prev_query_available = false;
00316 for (size_t i_subject = 0; i_subject < num_subjects; i_subject++)
00317 {
00318 size_t i_lin_index = i_query * num_subjects + i_subject;
00319 CRef<CSeq_align_set> aln_set = result_alnvec[i_lin_index];
00320
00321
00322 BOOST_REQUIRE(aln_set.NotNull());
00323
00324
00325
00326 if (aln_set->Get().size() > 0)
00327 {
00328 CRef<CSeq_align> aln = aln_set->Get().front();
00329 id_query.Reset(&(aln->GetSeq_id(0)));
00330 id_subject.Reset(&(aln->GetSeq_id(1)));
00331
00332
00333
00334 if (i_subject > 0 &&
00335 prev_query_available)
00336 {
00337 BOOST_REQUIRE(
00338 id_query->Match(
00339 id_prev_query.GetObject()));
00340 }
00341
00342
00343
00344 if (i_query > 0 &&
00345 prev_subjects_available[i_subject])
00346 {
00347 BOOST_REQUIRE(
00348 id_subject->Match(
00349 id_prev_subjects[i_subject].GetObject()));
00350 }
00351
00352
00353 prev_subjects_available[i_subject] = true;
00354 id_prev_subjects[i_subject] = id_subject;
00355
00356
00357 prev_query_available = true;
00358 id_prev_query = id_query;
00359 }
00360 }
00361 }
00362 }
00363
00364 void testBlastHitCounts(CBl2Seq& blaster, EBl2seqTest test_id)
00365 {
00366 BlastUngappedStats* ungapped_stats =
00367 blaster.GetDiagnostics()->ungapped_stat;
00368 BlastGappedStats* gapped_stats =
00369 blaster.GetDiagnostics()->gapped_stat;
00370
00371 switch (test_id) {
00372 case eBlastp_129295_129295:
00373 BOOST_CHECK_EQUAL(314, (int)ungapped_stats->lookup_hits);
00374 BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00375 BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00376 BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00377 BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00378 break;
00379 case eBlastn_555_555:
00380 BOOST_CHECK_EQUAL(157, (int)ungapped_stats->lookup_hits);
00381 BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00382 BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
00383 BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00384 BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00385 break;
00386 case eMegablast_555_555:
00387 BOOST_CHECK_EQUAL(30, (int)ungapped_stats->lookup_hits);
00388 BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00389 BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00390 BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00391 BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00392 break;
00393 case eDiscMegablast_555_555:
00394 BOOST_CHECK_EQUAL(582, (int)ungapped_stats->lookup_hits);
00395
00396
00397 BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00398
00399
00400 BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00401 BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00402 BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00403 break;
00404 case eBlastx_555_129295:
00405 BOOST_CHECK_EQUAL(280, (int)ungapped_stats->lookup_hits);
00406 BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00407 BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00408 BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00409 BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00410 break;
00411 case eTblastn_129295_555:
00412 BOOST_CHECK_EQUAL(157, (int)ungapped_stats->lookup_hits);
00413 BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00414 BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00415 BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00416 BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00417 break;
00418 case eTblastn_129295_555_large_word:
00419 BOOST_CHECK_EQUAL(5, (int)ungapped_stats->lookup_hits);
00420 BOOST_CHECK_EQUAL(4, ungapped_stats->init_extends);
00421 BOOST_CHECK_EQUAL(2, ungapped_stats->good_init_extends);
00422 BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00423 BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00424 break;
00425 case eTblastx_555_555:
00426 BOOST_CHECK_EQUAL(2590, (int)ungapped_stats->lookup_hits);
00427 BOOST_CHECK_EQUAL(61, ungapped_stats->init_extends);
00428 BOOST_CHECK_EQUAL(41, ungapped_stats->good_init_extends);
00429 break;
00430 case eTblastx_many_hits:
00431 BOOST_CHECK_EQUAL(18587, (int)ungapped_stats->lookup_hits);
00432 BOOST_CHECK_EQUAL(362, ungapped_stats->init_extends);
00433 BOOST_CHECK_EQUAL(66, ungapped_stats->good_init_extends);
00434 break;
00435 case eBlastp_129295_7662354:
00436 BOOST_CHECK_EQUAL(210, (int)ungapped_stats->lookup_hits);
00437 BOOST_CHECK_EQUAL(10, ungapped_stats->init_extends);
00438 BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
00439 BOOST_CHECK_EQUAL(3, gapped_stats->extensions);
00440 BOOST_CHECK_EQUAL(3, gapped_stats->good_extensions);
00441 break;
00442 case eBlastn_555_3090:
00443 BOOST_CHECK_EQUAL(15, (int)ungapped_stats->lookup_hits);
00444 BOOST_CHECK_EQUAL(2, ungapped_stats->init_extends);
00445 BOOST_CHECK_EQUAL(2, ungapped_stats->good_init_extends);
00446 BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00447 BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00448 break;
00449 case eBlastp_multi_q:
00450 BOOST_CHECK_EQUAL(2129, (int)ungapped_stats->lookup_hits);
00451 BOOST_CHECK_EQUAL(78, ungapped_stats->init_extends);
00452 BOOST_CHECK_EQUAL(14, ungapped_stats->good_init_extends);
00453 BOOST_CHECK_EQUAL(8, gapped_stats->extensions);
00454 BOOST_CHECK_EQUAL(8, gapped_stats->good_extensions);
00455 break;
00456 case eBlastn_multi_q:
00457 BOOST_CHECK_EQUAL(963, (int)ungapped_stats->lookup_hits);
00458 BOOST_CHECK_EQUAL(13, ungapped_stats->init_extends);
00459 BOOST_CHECK_EQUAL(13, ungapped_stats->good_init_extends);
00460 BOOST_CHECK_EQUAL(5, gapped_stats->extensions);
00461 BOOST_CHECK_EQUAL(5, gapped_stats->good_extensions);
00462 break;
00463 case eBlastp_multi_q_s:
00464 #if 0
00465
00466
00467
00468
00469
00470 BOOST_CHECK_EQUAL(3579, (int)ungapped_stats->lookup_hits);
00471 BOOST_CHECK_EQUAL(138, ungapped_stats->init_extends);
00472
00473 BOOST_CHECK_EQUAL(3580, (int)ungapped_stats->lookup_hits);
00474 BOOST_CHECK_EQUAL(140, ungapped_stats->init_extends);
00475 #endif
00476
00477
00478 BOOST_CHECK_EQUAL(3939, (int)ungapped_stats->lookup_hits);
00479 BOOST_CHECK_EQUAL(159, ungapped_stats->init_extends);
00480 BOOST_CHECK_EQUAL(59, ungapped_stats->good_init_extends);
00481 BOOST_CHECK_EQUAL(25, gapped_stats->extensions);
00482 BOOST_CHECK_EQUAL(24, gapped_stats->good_extensions);
00483 break;
00484 case eTblastn_oof:
00485 BOOST_CHECK_EQUAL(2666, (int)ungapped_stats->lookup_hits);
00486 BOOST_CHECK_EQUAL(50, ungapped_stats->init_extends);
00487 BOOST_CHECK_EQUAL(4, ungapped_stats->good_init_extends);
00488 BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00489 BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00490 break;
00491 case eBlastx_oof:
00492 BOOST_CHECK_EQUAL(5950, (int)ungapped_stats->lookup_hits);
00493 BOOST_CHECK_EQUAL(159, ungapped_stats->init_extends);
00494 BOOST_CHECK_EQUAL(6, ungapped_stats->good_init_extends);
00495 BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
00496 BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
00497 break;
00498 case eDiscMegablast_U02544_U61969:
00499 BOOST_CHECK_EQUAL(108, (int)ungapped_stats->lookup_hits);
00500
00501
00502
00503 BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
00504 BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
00505 BOOST_CHECK_EQUAL(3, gapped_stats->extensions);
00506 BOOST_CHECK_EQUAL(3, gapped_stats->good_extensions);
00507 break;
00508 case eMegablast_chrom_mrna:
00509 BOOST_CHECK_EQUAL(14, (int)ungapped_stats->lookup_hits);
00510 BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
00511 BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
00512 BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
00513 BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
00514 break;
00515 default: break;
00516 }
00517 }
00518
00519 BOOST_AUTO_TEST_CASE(ProteinBlastSelfHit)
00520 {
00521
00522 CSeq_id id("gi|129295");
00523 auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
00524
00525 CBl2Seq blaster(*sl, *sl, eBlastp);
00526 TSeqAlignVector sav(blaster.Run());
00527 BOOST_REQUIRE(sav[0].NotEmpty());
00528 BOOST_REQUIRE( !sav[0]->IsEmpty() );
00529 BOOST_REQUIRE(sav[0]->Get().begin()->NotEmpty());
00530 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00531 BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
00532 testBlastHitCounts(blaster, eBlastp_129295_129295);
00533 testRawCutoffs(blaster, eBlastp, eBlastp_129295_129295);
00534
00535
00536
00537 int num_ident = 0;
00538 sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00539 #if 0
00540 ofstream o("0.asn");
00541 o << MSerial_AsnText << *sar ;
00542 o.close();
00543 #endif
00544 BOOST_CHECK_EQUAL(232, num_ident);
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554 CSearchResultSet::TAncillaryVector ancillary_data;
00555 blaster.GetAncillaryResults(ancillary_data);
00556 BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00557 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00558 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00559 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00560
00561 }
00562
00563 BOOST_AUTO_TEST_CASE(TBlastn2Seqs)
00564 {
00565 CSeq_id qid("gi|129295");
00566 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00567
00568 CSeq_id sid("gi|555");
00569 auto_ptr<SSeqLoc> subj(
00570 CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
00571
00572 CBl2Seq blaster(*query, *subj, eTblastn);
00573 TSeqAlignVector sav(blaster.Run());
00574 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00575
00576 #if 0
00577 ofstream o("1.asn");
00578 o << MSerial_AsnText << *sar ;
00579 o.close();
00580 #endif
00581
00582 BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00583 testBlastHitCounts(blaster, eTblastn_129295_555);
00584 testRawCutoffs(blaster, eTblastn, eTblastn_129295_555);
00585
00586 int score = 0, comp_adj = 0;
00587 sar->GetNamedScore(CSeq_align::eScore_Score, score);
00588 sar->GetNamedScore(CSeq_align::eScore_CompAdjMethod, comp_adj);
00589 BOOST_CHECK_EQUAL(26, score);
00590 BOOST_CHECK_EQUAL(2, comp_adj);
00591
00592
00593 CSearchResultSet::TAncillaryVector ancillary_data;
00594 blaster.GetAncillaryResults(ancillary_data);
00595 BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00596 BOOST_REQUIRE( ancillary_data.front().NotEmpty() );
00597 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00598 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00599 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00600 }
00601
00602 BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand1)
00603 {
00604 CSeq_id qid("gi|1945390");
00605 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00606
00607 pair<TSeqPos, TSeqPos> range(150000, 170000);
00608 CSeq_id sid("gi|4755212");
00609 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_minus));
00610
00611 CBl2Seq blaster(*query, *subj, eTblastn);
00612 TSeqAlignVector sav(blaster.Run());
00613 BOOST_CHECK_EQUAL(12, (int) sav[0]->Get().size());
00614 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00615 BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00616 vector < CRef< CSeq_loc > > locs = sar->GetSegs().GetStd().front()->GetLoc();
00617 BOOST_CHECK_EQUAL(eNa_strand_minus, (int) (locs[1])->GetStrand());
00618 int num_ident = 0;
00619 sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00620 BOOST_CHECK_EQUAL(155, num_ident);
00621 #if 0
00622 ofstream o("minus1.asn");
00623 o << MSerial_AsnText << *sar ;
00624 o.close();
00625 #endif
00626 }
00627
00628 BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand2)
00629 {
00630 CSeq_id qid("gi|1945390");
00631 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00632
00633 CSeq_id sid("gi|1945388");
00634 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_minus));
00635
00636 CBl2Seq blaster(*query, *subj, eTblastn);
00637 TSeqAlignVector sav(blaster.Run());
00638 BOOST_CHECK_EQUAL(2, (int) sav[0]->Get().size());
00639 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00640 BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00641 vector < CRef< CSeq_loc > > locs = sar->GetSegs().GetStd().front()->GetLoc();
00642 BOOST_CHECK_EQUAL(eNa_strand_minus, (int) (locs[1])->GetStrand());
00643 int num_ident = 0;
00644 sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00645 BOOST_CHECK_EQUAL(11, num_ident);
00646 #if 0
00647 ofstream o("minus2.asn");
00648 o << MSerial_AsnText << *sar ;
00649 o.close();
00650 #endif
00651 }
00652
00653
00654 BOOST_AUTO_TEST_CASE(TBlastn2SeqsCompBasedStats)
00655 {
00656 CSeq_id qid("gi|68737");
00657 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00658
00659 CSeq_id sid("gi|118086484");
00660 auto_ptr<SSeqLoc> subj(
00661 CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
00662
00663 CTBlastnOptionsHandle opts;
00664 opts.SetOptions().SetCompositionBasedStats(eCompositionBasedStats);
00665
00666 CBl2Seq blaster(*query, *subj, opts);
00667 TSeqAlignVector sav(blaster.Run());
00668 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00669 BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
00670
00671 int num_ident = 0;
00672 sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00673 BOOST_CHECK_EQUAL(229, num_ident);
00674 #if 0
00675 ofstream o("2.asn");
00676 o << MSerial_AsnText << *sar ;
00677 o.close();
00678 #endif
00679
00680
00681 CSearchResultSet::TAncillaryVector ancillary_data;
00682 blaster.GetAncillaryResults(ancillary_data);
00683 BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00684 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00685 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00686 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00687 }
00688
00689 BOOST_AUTO_TEST_CASE(TBlastn2SeqsLargeWord)
00690 {
00691 CSeq_id qid("gi|129295");
00692 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00693
00694 CSeq_id sid("gi|555");
00695 auto_ptr<SSeqLoc> subj(
00696 CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
00697
00698 CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eTblastn));
00699 opts->SetOptions().SetWordSize(6);
00700 opts->SetOptions().SetLookupTableType(eCompressedAaLookupTable);
00701 opts->SetOptions().SetWordThreshold(21.69);
00702 opts->SetOptions().SetWindowSize(0);
00703 opts->SetOptions().SetCompositionBasedStats(eNoCompositionBasedStats);
00704
00705 CBl2Seq blaster(*query, *subj, *opts);
00706 TSeqAlignVector sav(blaster.Run());
00707 BOOST_CHECK_EQUAL(2, (int)sav[0]->Size());
00708 testBlastHitCounts(blaster, eTblastn_129295_555_large_word);
00709 testRawCutoffs(blaster, eTblastn, eTblastn_129295_555_large_word);
00710
00711 int num_ident = 0;
00712 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00713 sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00714 #if 0
00715 ofstream o("3.asn");
00716 o << MSerial_AsnText << *sar ;
00717 o.close();
00718 #endif
00719 BOOST_CHECK_EQUAL(5, num_ident);
00720 }
00721
00722 BOOST_AUTO_TEST_CASE(IdenticalProteins)
00723 {
00724
00725 CSeq_id qid("gi|34810917");
00726 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00727 CSeq_id sid("gi|34810916");
00728 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
00729
00730 CBl2Seq blaster(*query, *subj, eBlastp);
00731 TSeqAlignVector sav(blaster.Run());
00732 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
00733 BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
00734
00735
00736
00737 int num_ident = 0;
00738 sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
00739 #if 0
00740 ofstream o("4.asn");
00741 o << MSerial_AsnText << *sar ;
00742 o.close();
00743 #endif
00744 BOOST_CHECK_EQUAL(377, num_ident);
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754 CSearchResultSet::TAncillaryVector ancillary_data;
00755 blaster.GetAncillaryResults(ancillary_data);
00756 BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
00757 BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
00758 BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
00759 BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
00760 }
00761
00762 BOOST_AUTO_TEST_CASE(UnsupportedOption) {
00763 CDiscNucleotideOptionsHandle opts_handle;
00764 BOOST_REQUIRE_THROW(opts_handle.SetTraditionalBlastnDefaults(),
00765 CBlastException);
00766 }
00767
00768 BOOST_AUTO_TEST_CASE(PositiveMismatchOption) {
00769 CSeq_id qid("gi|408478");
00770 CSeq_id sid("gi|1546012");
00771
00772 auto_ptr<SSeqLoc> query(
00773 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
00774 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
00775
00776 const int kMatch = 2;
00777 const int kMismatch = 5;
00778
00779 CBlastNucleotideOptionsHandle nucl_options_handle;
00780
00781 nucl_options_handle.SetMatchReward(kMatch);
00782 nucl_options_handle.SetMismatchPenalty(kMismatch);
00783 CBl2Seq blaster(*query, *subj, nucl_options_handle);
00784 try {
00785 TSeqAlignVector sav(blaster.Run());
00786 } catch (CBlastException& exptn) {
00787 BOOST_REQUIRE(
00788 !strcmp("BLASTN penalty must be negative",
00789 exptn.GetMsg().c_str()));
00790 }
00791 }
00792
00793 BOOST_AUTO_TEST_CASE(FullyMaskedSequence) {
00794 CSeq_id qid("ref|NT_024524.13");
00795 pair<TSeqPos, TSeqPos> range(27886902, 27886932);
00796 auto_ptr<SSeqLoc> query(
00797 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
00798 eNa_strand_plus));
00799 range.first = 2052;
00800 range.second = 2082;
00801 CSeq_id sid("emb|BX641126.1");
00802 auto_ptr<SSeqLoc> subj(
00803 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
00804 eNa_strand_minus));
00805 CBlastNucleotideOptionsHandle options;
00806 options.SetTraditionalBlastnDefaults();
00807 options.SetMismatchPenalty(-1);
00808 options.SetMatchReward(1);
00809 options.SetGapXDropoff(100);
00810 options.SetMaskAtHash(false);
00811 CBl2Seq blaster(*query, *subj, options);
00812 try { blaster.Run(); }
00813 catch (const CException& e) {
00814 const string msg1("invalid query sequence");
00815 const string msg2("verify the query sequence(s) and/or filtering "
00816 "options");
00817 BOOST_REQUIRE(string(e.what()).find(msg1) != NPOS);
00818 BOOST_REQUIRE(string(e.what()).find(msg2) != NPOS);
00819 }
00820 }
00821
00822 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitImmediately) {
00823 CSeq_id id("gi|129295");
00824 auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
00825
00826 CBl2Seq blaster(*sl, *sl, eBlastp);
00827 TInterruptFnPtr fnptr =
00828 blaster.SetInterruptCallback(interrupt_immediately);
00829 BOOST_REQUIRE(fnptr == NULL);
00830
00831 TSeqAlignVector sav;
00832 try { sav = blaster.Run(); }
00833 catch (...) {
00834 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00835 }
00836 }
00837
00838 BOOST_AUTO_TEST_CASE(testInterruptBlastnExitImmediately) {
00839 CSeq_id id("gi|555");
00840 auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
00841
00842 CBl2Seq blaster(*sl, *sl, eBlastn);
00843 TInterruptFnPtr fnptr =
00844 blaster.SetInterruptCallback(interrupt_immediately);
00845 BOOST_REQUIRE(fnptr == NULL);
00846
00847 TSeqAlignVector sav;
00848 try { sav = blaster.Run(); }
00849 catch (...) {
00850 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00851 }
00852 }
00853
00854 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitImmediately) {
00855 CSeq_id query_id("gi|555");
00856 auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00857 CSeq_id subj_id("gi|129295");
00858 auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
00859
00860 CBl2Seq blaster(*slq, *sls, eBlastx);
00861 TInterruptFnPtr fnptr =
00862 blaster.SetInterruptCallback(interrupt_immediately);
00863 BOOST_REQUIRE(fnptr == NULL);
00864
00865 TSeqAlignVector sav;
00866 try { sav = blaster.Run(); }
00867 catch (...) {
00868 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00869 }
00870 }
00871
00872 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitImmediately) {
00873 CSeq_id query_id("gi|555");
00874 auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00875 CSeq_id subj_id("gi|555");
00876 auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
00877
00878 CBl2Seq blaster(*slq, *sls, eTblastx);
00879 TInterruptFnPtr fnptr =
00880 blaster.SetInterruptCallback(interrupt_immediately);
00881 BOOST_REQUIRE(fnptr == NULL);
00882
00883 TSeqAlignVector sav;
00884 try { sav = blaster.Run(); }
00885 catch (...) {
00886 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00887 }
00888 }
00889
00890 BOOST_AUTO_TEST_CASE(testInterruptTblastnExitImmediately) {
00891 CSeq_id query_id("gi|129295");
00892 auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00893 CSeq_id subj_id("gi|555");
00894 auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
00895
00896 CBl2Seq blaster(*slq, *sls, eTblastn);
00897 TInterruptFnPtr fnptr =
00898 blaster.SetInterruptCallback(interrupt_immediately);
00899 BOOST_REQUIRE(fnptr == NULL);
00900
00901 TSeqAlignVector sav;
00902 try { sav = blaster.Run(); }
00903 catch (...) {
00904 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00905 }
00906 }
00907
00908 #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a))
00909 static
00910 CRef<CBl2Seq> s_SetupWithMultipleQueriesAndSubjects(bool query_is_nucl,
00911 bool subj_is_nucl,
00912 EProgram program) {
00913
00914 int protein_gis[] = { 6, 129295, 15606659, 4336138, 5556 };
00915 int nucl_gis[] = { 272208, 272217, 272211, 272247, 272227, 272236,
00916 272219 };
00917
00918 vector<int> q_gis, s_gis;
00919 if (query_is_nucl) {
00920 copy(&nucl_gis[0],
00921 &nucl_gis[ARRAY_SIZE(nucl_gis)],
00922 back_inserter(q_gis));
00923 } else {
00924 copy(&protein_gis[0],
00925 &protein_gis[ARRAY_SIZE(protein_gis)],
00926 back_inserter(q_gis));
00927 }
00928
00929 if (subj_is_nucl) {
00930 copy(&nucl_gis[0],
00931 &nucl_gis[ARRAY_SIZE(nucl_gis)],
00932 back_inserter(s_gis));
00933 } else {
00934 copy(&protein_gis[0],
00935 &protein_gis[ARRAY_SIZE(protein_gis)],
00936 back_inserter(s_gis));
00937 }
00938
00939
00940 TSeqLocVector queries;
00941 ITERATE(vector<int>, itr, q_gis) {
00942 CRef<CSeq_loc> loc(new CSeq_loc());
00943 loc->SetWhole().SetGi(*itr);
00944
00945 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
00946 scope->AddDefaults();
00947 queries.push_back(SSeqLoc(loc, scope));
00948 }
00949
00950 TSeqLocVector subjects;
00951 ITERATE(vector<int>, itr, s_gis) {
00952 CRef<CSeq_loc> loc(new CSeq_loc());
00953 loc->SetWhole().SetGi(*itr);
00954
00955 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
00956 scope->AddDefaults();
00957 subjects.push_back(SSeqLoc(loc, scope));
00958 }
00959
00960 return CRef<CBl2Seq>(new CBl2Seq(queries, subjects, program));
00961 }
00962
00963 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAtRandom) {
00964
00965 CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects(false,
00966 false,
00967 eBlastp);
00968
00969 int num_callbacks_executed(0);
00970 TInterruptFnPtr fnptr =
00971 blaster->SetInterruptCallback(callback_counter,
00972 (void*) &num_callbacks_executed);
00973 BOOST_REQUIRE(fnptr == NULL);
00974
00975 TSeqAlignVector sav(blaster->Run());
00976 CRandom r(time(0));
00977 int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
00978 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
00979
00980 fnptr = blaster->SetInterruptCallback(interrupt_at_random,
00981 (void*)&progress_pair);
00982 BOOST_REQUIRE(fnptr == callback_counter);
00983 sav.clear();
00984
00985 try { sav = blaster->Run(); }
00986 catch (...) {
00987 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
00988 }
00989 }
00990
00991 BOOST_AUTO_TEST_CASE(testInterruptBlastnExitAtRandom) {
00992
00993 CRef<CBl2Seq> blaster =
00994 s_SetupWithMultipleQueriesAndSubjects(true, true, eBlastn);
00995
00996 int num_callbacks_executed(0);
00997 TInterruptFnPtr fnptr =
00998 blaster->SetInterruptCallback(callback_counter,
00999 (void*)&num_callbacks_executed);
01000 BOOST_REQUIRE(fnptr == NULL);
01001
01002 TSeqAlignVector sav(blaster->Run());
01003 CRandom r(time(0));
01004 int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01005 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01006
01007 fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01008 (void*)&progress_pair);
01009 BOOST_REQUIRE(fnptr == callback_counter);
01010 sav.clear();
01011
01012 try { sav = blaster->Run(); }
01013 catch (...) {
01014 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01015 }
01016 }
01017
01018
01019 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitAtRandom) {
01020
01021 CRef<CBl2Seq> blaster =
01022 s_SetupWithMultipleQueriesAndSubjects(true, false, eBlastx);
01023
01024 int num_callbacks_executed(0);
01025 TInterruptFnPtr fnptr =
01026 blaster->SetInterruptCallback(callback_counter,
01027 (void*) & num_callbacks_executed);
01028 BOOST_REQUIRE(fnptr == NULL);
01029
01030 TSeqAlignVector sav(blaster->Run());
01031 CRandom r(time(0));
01032 int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01033 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01034
01035 fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01036 (void*)&progress_pair);
01037 BOOST_REQUIRE(fnptr == callback_counter);
01038 sav.clear();
01039
01040 try { sav = blaster->Run(); }
01041 catch (...) {
01042 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01043 }
01044 }
01045
01046 BOOST_AUTO_TEST_CASE(testInterruptTblastnExitAtRandom) {
01047
01048 CRef<CBl2Seq> blaster =
01049 s_SetupWithMultipleQueriesAndSubjects(false, true, eTblastn);
01050
01051 int num_callbacks_executed(0);
01052 TInterruptFnPtr fnptr =
01053 blaster->SetInterruptCallback(callback_counter,
01054 (void*)&num_callbacks_executed);
01055 BOOST_REQUIRE(fnptr == NULL);
01056
01057 TSeqAlignVector sav(blaster->Run());
01058 CRandom r(time(0));
01059 int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01060 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01061
01062 fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01063 (void*)&progress_pair);
01064 BOOST_REQUIRE(fnptr == callback_counter);
01065 sav.clear();
01066
01067 try { sav = blaster->Run(); }
01068 catch (...) {
01069 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01070 }
01071 }
01072
01073 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitAtRandom) {
01074
01075 CRef<CBl2Seq> blaster =
01076 s_SetupWithMultipleQueriesAndSubjects(true, true, eTblastx);
01077
01078 int num_callbacks_executed(0);
01079 TInterruptFnPtr fnptr =
01080 blaster->SetInterruptCallback(callback_counter,
01081 (void*) & num_callbacks_executed);
01082 BOOST_REQUIRE(fnptr == NULL);
01083
01084 TSeqAlignVector sav(blaster->Run());
01085 CRandom r(time(0));
01086 int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
01087 pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
01088
01089 fnptr = blaster->SetInterruptCallback(interrupt_at_random,
01090 (void*)&progress_pair);
01091 BOOST_REQUIRE(fnptr == callback_counter);
01092 sav.clear();
01093
01094 try { sav = blaster->Run(); }
01095 catch (...) {
01096 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01097 }
01098 }
01099
01100 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAfter3Callbacks) {
01101 CSeq_id id("gi|129295");
01102 auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
01103
01104 CBl2Seq blaster(*sl, *sl, eBlastp);
01105 TInterruptFnPtr fnptr =
01106 blaster.SetInterruptCallback(interrupt_after3calls);
01107 BOOST_REQUIRE(fnptr == NULL);
01108
01109 TSeqAlignVector sav;
01110 try { sav = blaster.Run(); }
01111 catch (...) {
01112 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01113 }
01114 }
01115
01116 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitOnTraceback) {
01117
01118 CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects(true,
01119 false,
01120 eBlastx);
01121 TInterruptFnPtr fnptr =
01122 blaster->SetInterruptCallback(interrupt_on_traceback);
01123 BOOST_REQUIRE(fnptr == NULL);
01124
01125 TSeqAlignVector sav;
01126 try { sav = blaster->Run(); }
01127 catch (...) {
01128 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01129 }
01130 }
01131
01132 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitOnTraceback) {
01133
01134 CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects
01135 (true, true, eTblastx);
01136 TInterruptFnPtr fnptr =
01137 blaster->SetInterruptCallback(interrupt_on_traceback);
01138 BOOST_REQUIRE(fnptr == NULL);
01139
01140 TSeqAlignVector sav;
01141 try { sav = blaster->Run(); }
01142 catch (...) {
01143 BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
01144 }
01145 }
01146
01147 BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueriesWithInvalidSeqId) {
01148 vector<int> q_gis, s_gis;
01149
01150
01151 q_gis.push_back(129295);
01152 q_gis.push_back(-1);
01153
01154
01155 s_gis.push_back(129295);
01156 s_gis.push_back(4336138);
01157
01158 TSeqLocVector queries;
01159 ITERATE(vector<int>, itr, q_gis) {
01160 CRef<CSeq_loc> loc(new CSeq_loc());
01161 loc->SetWhole().SetGi(*itr);
01162
01163 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
01164 scope->AddDefaults();
01165 queries.push_back(SSeqLoc(loc, scope));
01166 }
01167
01168 TSeqLocVector subjects;
01169 ITERATE(vector<int>, itr, s_gis) {
01170 CRef<CSeq_loc> loc(new CSeq_loc());
01171 loc->SetWhole().SetGi(*itr);
01172
01173 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
01174 scope->AddDefaults();
01175 subjects.push_back(SSeqLoc(loc, scope));
01176 }
01177
01178
01179 CBl2Seq blaster4all(queries, subjects, eBlastp);
01180 TSeqAlignVector sas_v = blaster4all.Run();
01181
01182 TSearchMessages m;
01183 blaster4all.GetMessages(m);
01184 BOOST_REQUIRE_EQUAL(subjects.size()*queries.size(), sas_v.size());
01185 BOOST_REQUIRE_EQUAL(queries.size(), m.size());
01186
01187 BOOST_REQUIRE(m[0].empty());
01188 BOOST_REQUIRE(!m[1].empty());
01189
01190
01191 TQueryMessages qm = m[1];
01192 BOOST_REQUIRE(qm.front()->GetMessage().find("Cannot resolve") !=
01193 string::npos);
01194
01195
01196
01197 BOOST_REQUIRE_EQUAL(0, (int) sas_v[2]->Size());
01198 }
01199
01200 BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueriesWithInvalidSeqId) {
01201 CSeq_id id1(CSeq_id::e_Gi, 555);
01202 auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(id1));
01203 CSeq_id id2(CSeq_id::e_Gi, 556);
01204 auto_ptr<SSeqLoc> sl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
01205
01206 const TSeqPos kFakeBioseqLength = 12;
01207 const char byte(0);
01208 vector<char> na_data(kFakeBioseqLength/4, byte);
01209
01210 CRef<CSeq_id> fake_id(new CSeq_id("lcl|77"));
01211 CBioseq fake_bioseq;
01212 fake_bioseq.SetInst().SetLength(kFakeBioseqLength);
01213 fake_bioseq.SetInst().SetSeq_data().SetNcbi2na().Set().swap(na_data);
01214 fake_bioseq.SetInst().SetMol(CSeq_inst::eMol_na);
01215 fake_bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
01216 fake_bioseq.SetId().push_back(fake_id);
01217 CRef<CSeq_loc> fake_loc(new CSeq_loc);
01218 fake_loc->SetWhole(*fake_id);
01219
01220 CRef<CScope> scope(CSimpleOM::NewScope(false));
01221 scope->AddBioseq(fake_bioseq);
01222 auto_ptr<SSeqLoc> sl_bad(new SSeqLoc(*fake_loc, *scope));
01223
01224 TSeqPos len = sequence::GetLength(*sl_bad->seqloc, sl_bad->scope);
01225 BOOST_REQUIRE_EQUAL(kFakeBioseqLength, len);
01226
01227 TSeqLocVector queries;
01228 queries.push_back(*sl1);
01229 queries.push_back(*sl_bad);
01230 queries.push_back(*sl2);
01231
01232
01233 CSeq_id subj_id(CSeq_id::e_Gi, 555);
01234 auto_ptr<SSeqLoc> subj_loc
01235 (CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
01236 TSeqLocVector subject;
01237 subject.push_back(*subj_loc);;
01238
01239 CBlastNucleotideOptionsHandle opts_handle;
01240 opts_handle.SetMaskAtHash(false);
01241 CBl2Seq bl2seq(queries, subject, opts_handle);
01242 TSeqAlignVector sas_v = bl2seq.Run();
01243 TSearchMessages m;
01244 bl2seq.GetMessages(m);
01245 BOOST_REQUIRE_EQUAL(sas_v.size(), m.size());
01246 BOOST_REQUIRE_EQUAL(queries.size(), sas_v.size());
01247
01248 BOOST_REQUIRE(m[0].empty());
01249 BOOST_REQUIRE(!m[1].empty());
01250 BOOST_REQUIRE(m[2].empty());
01251
01252 TQueryMessages qm = m[1];
01253
01254
01255 BOOST_REQUIRE(qm.size() == 1);
01256
01257
01258 ITERATE(TQueryMessages, itr, qm) {
01259 BOOST_REQUIRE((*itr)->GetMessage().find("Could not calculate "
01260 "ungapped Karlin-Altschul "
01261 "parameters")
01262 != string::npos);
01263 }
01264
01265
01266
01267 ITERATE(CSeq_align_set::Tdata, alignments, sas_v[1]->Get()) {
01268 BOOST_REQUIRE((*alignments)->GetSegs().IsDisc());
01269 BOOST_REQUIRE((*alignments)->GetSegs().GetDisc().Get().empty());
01270 }
01271 }
01272
01273 BOOST_AUTO_TEST_CASE(ProteinSelfHitWithMask) {
01274 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 129295));
01275 CRef<CSeq_loc> sl(new CSeq_loc());
01276 sl->SetWhole(*id);
01277 CRef<CSeq_loc> mask(new CSeq_loc(*id, 50, 100));
01278 CRef<CScope> scope(CSimpleOM::NewScope());
01279 SSeqLoc seqloc(sl, scope, mask);
01280
01281 CBl2Seq bl2seq(seqloc, seqloc, eBlastp);
01282 TSeqAlignVector sav(bl2seq.Run());
01283 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01284 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01285 }
01286
01287
01288 BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation) {
01289 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 83219349));
01290 CRef<CSeq_loc> sl(new CSeq_loc());
01291 sl->SetWhole(*id);
01292 CRef<CSeq_loc> mask(new CSeq_loc(*id, 57, 484));
01293 CRef<CScope> scope(CSimpleOM::NewScope());
01294 SSeqLoc query_seqloc(sl, scope, mask);
01295
01296 CRef<CSeq_id> sid(new CSeq_id(CSeq_id::e_Gi, 88954065));
01297 CRef<CSeq_loc> ssl(new CSeq_loc(*sid, 9909580-100, 9909607+100));
01298 SSeqLoc subj_seqloc(ssl, scope);
01299
01300 CBl2Seq bl2seq(query_seqloc, subj_seqloc, eMegablast);
01301 TSeqAlignVector sav(bl2seq.Run());
01302 BOOST_REQUIRE_EQUAL(0, sav[0]->Get().size());
01303 }
01304
01305
01306 BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation_FromFile) {
01307 CNcbiIfstream infile("data/masked.fsa");
01308 const bool is_protein(false);
01309 CBlastInputSourceConfig iconfig(is_protein);
01310 iconfig.SetLowercaseMask(true);
01311 CRef<CBlastFastaInputSource> fasta_src
01312 (new CBlastFastaInputSource(infile, iconfig));
01313 CRef<CBlastInput> input(new CBlastInput(&*fasta_src));
01314
01315
01316 CRef<CScope> scope = CBlastScopeSource(is_protein).NewScope();
01317
01318 CRef<blast::CBlastQueryVector> seqs = input->GetNextSeqBatch(*scope);
01319 CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*seqs));
01320
01321 TSeqLocVector subj_vec;
01322 CRef<CSeq_id> sid(new CSeq_id(CSeq_id::e_Gi, 88954065));
01323 CRef<CSeq_loc> ssl(new CSeq_loc(*sid, 9909580-100, 9909607+100));
01324 subj_vec.push_back(SSeqLoc(ssl, scope));
01325 CRef<IQueryFactory> subj_qf(new CObjMgr_QueryFactory(subj_vec));
01326 CRef<CBlastOptionsHandle>
01327 opts_handle(CBlastOptionsFactory::Create(eBlastn));
01328 CRef<CLocalDbAdapter> subjects(new CLocalDbAdapter(subj_qf,
01329 opts_handle));
01330
01331 size_t num_queries = seqs->Size();
01332 size_t num_subjects = subj_vec.size();
01333 BOOST_REQUIRE_EQUAL((size_t)1, num_queries);
01334 BOOST_REQUIRE_EQUAL((size_t)1, num_subjects);
01335
01336
01337 CLocalBlast blaster(queries, opts_handle, subjects);
01338 CRef<CSearchResultSet> results = blaster.Run();
01339 BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
01340 BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
01341 results->GetNumResults());
01342 BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->size());
01343 BOOST_REQUIRE_EQUAL(num_queries, results->GetNumQueries());
01344 BOOST_REQUIRE_EQUAL(num_subjects,
01345 results->GetNumResults()/results->GetNumQueries());
01346
01347 CSearchResults& res = (*results)[0];
01348 BOOST_REQUIRE(res.HasAlignments() == false);
01349 }
01350
01351
01352
01353 BOOST_AUTO_TEST_CASE(ProteinCompBasedStats) {
01354
01355 CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
01356 CRef<CScope> scope(new CScope(*kObjMgr));
01357 CRef<CSeq_entry> seq_entry1;
01358 const string kFileName("data/blastp_compstats.fa");
01359 ifstream in1(kFileName.c_str());
01360 if ( !in1 )
01361 throw runtime_error("Failed to open " + kFileName);
01362 if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
01363 throw runtime_error("Failed to read sequence from " + kFileName);
01364 scope->AddTopLevelSeqEntry(*seq_entry1);
01365 CRef<CSeq_loc> seqloc1(new CSeq_loc);
01366 const string kSeqIdString1("lcl|1");
01367 CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
01368 seqloc1->SetWhole(*id1);
01369 SSeqLoc ss1(seqloc1, scope);
01370
01371 CSeq_id id("gi|4503637");
01372 auto_ptr<SSeqLoc> ss2(CTestObjMgr::Instance().CreateSSeqLoc(id));
01373
01374 CBlastProteinOptionsHandle opts_handle;
01375 opts_handle.SetWordSize(2);
01376 opts_handle.SetEvalueThreshold(20000);
01377 opts_handle.SetFilterString("F");
01378 opts_handle.SetMatrixName("PAM30");
01379 opts_handle.SetGapOpeningCost(9);
01380 opts_handle.SetGapExtensionCost(1);
01381 opts_handle.SetOptions().SetCompositionBasedStats(
01382 eCompositionBasedStats);
01383
01384 CBl2Seq blaster(ss1, *ss2, opts_handle);
01385 TSeqAlignVector sav(blaster.Run());
01386 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01387 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01388 }
01389
01390 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryBothStrands) {
01391 CSeq_id qid("gi|555");
01392 auto_ptr<SSeqLoc> query(
01393 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01394 query->genetic_code_id = 1;
01395
01396 CSeq_id sid("gi|129295");
01397 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01398
01399 CBl2Seq blaster(*query, *subj, eBlastx);
01400 TSeqAlignVector sav(blaster.Run());
01401 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01402 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetStd().size());
01403 testBlastHitCounts(blaster, eBlastx_555_129295);
01404 testRawCutoffs(blaster, eBlastx, eBlastx_555_129295);
01405 }
01406
01407 BOOST_AUTO_TEST_CASE(NucleotideSelfHitWithSubjectMask) {
01408 CRef<CSeq_id> query_id(new CSeq_id(CSeq_id::e_Gi, 148727250));
01409 CRef<CSeq_id> subj_id(new CSeq_id(CSeq_id::e_Gi, 89059606));
01410 CRef<CSeq_loc> qsl(new CSeq_loc(*query_id, 0, 1000));
01411 CRef<CSeq_loc> ssl(new CSeq_loc(*subj_id, 0, 1000));
01412 CPacked_seqint::TRanges mask_vector;
01413 mask_vector.push_back(TSeqRange(0, 44));
01414 mask_vector.push_back(TSeqRange(69, 582));
01415 mask_vector.push_back(TSeqRange(610, 834));
01416 mask_vector.push_back(TSeqRange(854, 1000));
01417 CRef<CPacked_seqint> masks(new CPacked_seqint(*subj_id,
01418 mask_vector));
01419 CRef<CSeq_loc> subj_mask(new CSeq_loc());
01420 subj_mask->SetPacked_int(*masks);
01421 CRef<CScope> scope(CSimpleOM::NewScope());
01422 SSeqLoc query(qsl, scope);
01423 auto_ptr<SSeqLoc> subject(new SSeqLoc(ssl, scope, subj_mask));
01424 {
01425 CBl2Seq bl2seq(query, *subject, eBlastn);
01426 TSeqAlignVector sav(bl2seq.Run());
01427 BOOST_REQUIRE_EQUAL((size_t)1, sav.front()->Get().size());
01428 }
01429
01430
01431 subject.reset(new SSeqLoc(ssl, scope));
01432 {
01433 CBl2Seq bl2seq(query, *subject, eBlastn);
01434 TSeqAlignVector sav(bl2seq.Run());
01435 BOOST_REQUIRE_EQUAL((size_t)4, sav.front()->Get().size());
01436 }
01437 }
01438
01439 BOOST_AUTO_TEST_CASE(NucleotideBlastSelfHit) {
01440 CSeq_id id("gi|555");
01441 auto_ptr<SSeqLoc> sl(
01442 CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
01443
01444
01445 CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastn));
01446 CBl2Seq blaster(*sl, *sl, *opts);
01447 TSeqAlignVector sav = blaster.Run();
01448 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01449 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01450 testBlastHitCounts(blaster, eBlastn_555_555);
01451 testRawCutoffs(blaster, eBlastn, eBlastn_555_555);
01452
01453
01454 opts.Reset(CBlastOptionsFactory::Create(eMegablast));
01455 blaster.SetOptionsHandle() = *opts;
01456 sav = blaster.Run();
01457 BOOST_REQUIRE_EQUAL(1, (int)sav.size());
01458 sar = *(sav[0]->Get().begin());
01459 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01460 testBlastHitCounts(blaster, eMegablast_555_555);
01461 testRawCutoffs(blaster, eMegablast, eMegablast_555_555);
01462
01463
01464 opts.Reset(CBlastOptionsFactory::Create(eDiscMegablast));
01465 blaster.SetOptionsHandle() = *opts;
01466 sav = blaster.Run();
01467 sar = *(sav[0]->Get().begin());
01468 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01469 testBlastHitCounts(blaster, eDiscMegablast_555_555);
01470 testRawCutoffs(blaster, eDiscMegablast, eDiscMegablast_555_555);
01471 }
01472
01473 BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback) {
01474 CSeq_id query_id("gi|2655203");
01475 auto_ptr<SSeqLoc> ql(
01476 CTestObjMgr::Instance().CreateSSeqLoc(query_id,
01477 eNa_strand_plus));
01478
01479 CSeq_id subject_id("gi|200811");
01480 auto_ptr<SSeqLoc> sl(
01481 CTestObjMgr::Instance().CreateSSeqLoc(subject_id,
01482 eNa_strand_minus));
01483
01484
01485
01486
01487 CBlastNucleotideOptionsHandle opts;
01488 opts.SetTraditionalMegablastDefaults();
01489 opts.SetMatchReward(1);
01490 opts.SetMismatchPenalty(-2);
01491 opts.SetGapOpeningCost(3);
01492 opts.SetGapExtensionCost(1);
01493 opts.SetWordSize(24);
01494 opts.SetGapExtnAlgorithm(eGreedyScoreOnly);
01495 opts.SetGapTracebackAlgorithm(eGreedyTbck);
01496
01497 CBl2Seq blaster(*ql, *sl, opts);
01498 blaster.RunWithoutSeqalignGeneration();
01499 BlastHSPResults *results = blaster.GetResults();
01500 BlastHSPList *hsplist = results->hitlist_array[0]->hsplist_array[0];
01501 BOOST_REQUIRE_EQUAL(1, hsplist->hspcnt);
01502 BlastHSP *hsp = hsplist->hsp_array[0];
01503 BOOST_REQUIRE_EQUAL(832, hsp->score);
01504 }
01505
01506
01507 BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback2) {
01508 CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
01509 CRef<CScope> scope(new CScope(*kObjMgr));
01510
01511 CRef<CSeq_entry> seq_entry1;
01512 ifstream in1("data/greedy1a.fsa");
01513 if ( !in1 )
01514 throw runtime_error("Failed to open file1");
01515 if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
01516 throw runtime_error("Failed to read sequence from file1");
01517 scope->AddTopLevelSeqEntry(*seq_entry1);
01518 CRef<CSeq_loc> seqloc1(new CSeq_loc);
01519 const string kSeqIdString1("lcl|1");
01520 CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
01521 seqloc1->SetWhole(*id1);
01522 SSeqLoc ss1(seqloc1, scope);
01523
01524 CRef<CSeq_entry> seq_entry2;
01525 ifstream in2("data/greedy1b.fsa");
01526 if ( !in2 )
01527 throw runtime_error("Failed to open file2");
01528 if ( !(seq_entry2 = CFastaReader(in2).ReadOneSeq()))
01529 throw runtime_error("Failed to read sequence from file2");
01530 scope->AddTopLevelSeqEntry(*seq_entry2);
01531 CRef<CSeq_loc> seqloc2(new CSeq_loc);
01532 const string kSeqIdString2("lcl|2");
01533 CRef<CSeq_id> id2(new CSeq_id(kSeqIdString2));
01534 seqloc2->SetWhole(*id2);
01535 SSeqLoc ss2(seqloc2, scope);
01536
01537 CBlastNucleotideOptionsHandle handle;
01538 handle.SetGapOpeningCost(0);
01539 handle.SetGapExtensionCost(0);
01540 handle.SetDustFiltering(false);
01541
01542
01543
01544 CBl2Seq blaster1(ss1, ss2, handle);
01545 TSeqAlignVector sav(blaster1.Run());
01546 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01547 BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
01548
01549 const CSeq_align& seqalign1 = *sar;
01550 BOOST_REQUIRE(seqalign1.IsSetScore());
01551 ITERATE(CSeq_align::TScore, itr, seqalign1.GetScore()) {
01552 BOOST_REQUIRE((*itr)->IsSetId());
01553 if ((*itr)->GetId().GetStr() == "score") {
01554 BOOST_REQUIRE_EQUAL(619, (*itr)->GetValue().GetInt());
01555 break;
01556 }
01557 }
01558
01559 handle.SetMatchReward(10);
01560 handle.SetMismatchPenalty(-25);
01561 handle.SetGapXDropoff(100.0);
01562 handle.SetGapXDropoffFinal(100.0);
01563
01564 CBl2Seq blaster2(ss1, ss2, handle);
01565 sav = blaster2.Run();
01566 sar = *(sav[0]->Get().begin());
01567 BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
01568
01569 const CSeq_align& seqalign2 = *sar;
01570 BOOST_REQUIRE(seqalign2.IsSetScore());
01571 ITERATE(CSeq_align::TScore, itr, seqalign2.GetScore()) {
01572 BOOST_REQUIRE((*itr)->IsSetId());
01573 if ((*itr)->GetId().GetStr() == "score") {
01574 BOOST_REQUIRE_EQUAL(6034, (*itr)->GetValue().GetInt());
01575 break;
01576 }
01577 }
01578 }
01579
01580 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryPlusStrand) {
01581 CSeq_id qid("gi|555");
01582 auto_ptr<SSeqLoc> query(
01583 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_plus));
01584
01585 CSeq_id sid("gi|129295");
01586 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01587
01588 CBl2Seq blaster(*query, *subj, eBlastx);
01589 TSeqAlignVector sav(blaster.Run());
01590 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01591 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetStd().size());
01592 }
01593
01594 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryMinusStrand) {
01595 CSeq_id qid("gi|555");
01596 auto_ptr<SSeqLoc> query(
01597 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_minus));
01598
01599 CSeq_id sid("gi|129295");
01600 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01601
01602 CBl2Seq blaster(*query, *subj, eBlastx);
01603 TSeqAlignVector sav(blaster.Run());
01604
01605 BOOST_REQUIRE(sav[0]->IsEmpty() == true);
01606 }
01607
01608
01609 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryBothStrands) {
01610 CSeq_id id("gi|555");
01611 auto_ptr<SSeqLoc> sl(
01612 CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
01613
01614 CBl2Seq blaster(*sl, *sl, eTblastx);
01615 TSeqAlignVector sav(blaster.Run());
01616 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01617 BOOST_REQUIRE_EQUAL(39, (int)sar->GetSegs().GetStd().size());
01618 testBlastHitCounts(blaster, eTblastx_555_555);
01619 testRawCutoffs(blaster, eTblastx, eTblastx_555_555);
01620 }
01621
01622 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryPlusStrand) {
01623 CSeq_id id("gi|555");
01624 auto_ptr<SSeqLoc> sl(
01625 CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_plus));
01626
01627 CBl2Seq blaster(*sl, *sl, eTblastx);
01628 TSeqAlignVector sav(blaster.Run());
01629 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01630 BOOST_REQUIRE_EQUAL(11, (int)sar->GetSegs().GetStd().size());
01631 }
01632
01633 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryMinusStrand) {
01634 CSeq_id id("gi|555");
01635 auto_ptr<SSeqLoc> sl(
01636 CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_minus));
01637
01638 CBl2Seq blaster(*sl, *sl, eTblastx);
01639 TSeqAlignVector sav(blaster.Run());
01640 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01641 BOOST_REQUIRE_EQUAL(12, (int)sar->GetSegs().GetStd().size());
01642 }
01643
01644
01645 BOOST_AUTO_TEST_CASE(TblastxManyHits) {
01646 const int total_num_hsps = 50;
01647 const int num_hsps_to_check = 8;
01648 const int score_array[num_hsps_to_check] =
01649 { 947, 125, 820, 113, 624, 221, 39, 778};
01650 const int sum_n_array[num_hsps_to_check] =
01651 { 2, 2, 2, 2, 3, 3, 3, 0};
01652 CSeq_id qid("gi|24719404");
01653 auto_ptr<SSeqLoc> qsl(
01654 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01655 CSeq_id sid("gi|29807292");
01656 pair<TSeqPos, TSeqPos> range(15185000, 15195000);
01657 auto_ptr<SSeqLoc> ssl(
01658 CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
01659 CBl2Seq blaster(*qsl, *ssl, eTblastx);
01660 blaster.SetOptionsHandle().SetMaxNumHspPerSequence(total_num_hsps);
01661
01662 TSeqAlignVector sav(blaster.Run());
01663
01664 testBlastHitCounts(blaster, eTblastx_many_hits);
01665 testRawCutoffs(blaster, eTblastx, eTblastx_many_hits);
01666
01667 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01668 list< CRef<CStd_seg> >& segs = sar->SetSegs().SetStd();
01669 BOOST_REQUIRE_EQUAL(total_num_hsps, (int)segs.size());
01670 int index = 0;
01671 ITERATE(list< CRef<CStd_seg> >, itr, segs) {
01672 const vector< CRef< CScore > >& score_v = (*itr)->GetScores();
01673 ITERATE(CSeq_align::TScore, sitr, score_v) {
01674 BOOST_REQUIRE((*sitr)->IsSetId());
01675 if ((*sitr)->GetId().GetStr() == "score") {
01676 BOOST_REQUIRE_EQUAL(score_array[index],
01677 (*sitr)->GetValue().GetInt());
01678 } else if ((*sitr)->GetId().GetStr() == "sum_n") {
01679 BOOST_REQUIRE_EQUAL(sum_n_array[index],
01680 (*sitr)->GetValue().GetInt());
01681 }
01682 }
01683 if (++index == num_hsps_to_check)
01684 break;
01685 }
01686 }
01687
01688 BOOST_AUTO_TEST_CASE(ProteinBlast2Seqs) {
01689 CSeq_id id("gi|129295");
01690 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(id));
01691
01692 id.SetGi(7662354);
01693 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(id));
01694
01695 CBl2Seq blaster(*query, *subj, eBlastp);
01696 TSeqAlignVector sav(blaster.Run());
01697 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01698 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01699 testBlastHitCounts(blaster, eBlastp_129295_7662354);
01700 testRawCutoffs(blaster, eBlastp, eBlastp_129295_7662354);
01701 }
01702
01703 BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering_InvalidDB) {
01704 CSeq_id qid("gi|555");
01705 auto_ptr<SSeqLoc> query(
01706 CTestObjMgr::Instance().CreateSSeqLoc(qid));
01707
01708 CBlastNucleotideOptionsHandle opts;
01709 opts.SetTraditionalMegablastDefaults();
01710 const string kRepeatDb("junk");
01711 opts.SetRepeatFilteringDB(kRepeatDb.c_str());
01712 bool is_repeat_filtering_on = opts.GetRepeatFiltering();
01713 BOOST_REQUIRE(is_repeat_filtering_on);
01714 string repeat_db(opts.GetRepeatFilteringDB()
01715 ? opts.GetRepeatFilteringDB()
01716 : kEmptyStr);
01717 BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
01718
01719 CBl2Seq blaster(*query, *query, opts);
01720 try {
01721 TSeqAlignVector sav(blaster.Run());
01722 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01723 BOOST_REQUIRE(sar.NotEmpty());
01724 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01725 } catch (const CBlastException& e) {
01726 BOOST_REQUIRE(e.GetErrCode() == CBlastException::eSeqSrcInit);
01727 }
01728 }
01729
01730 BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering) {
01731 CSeq_id qid("gi|555");
01732 auto_ptr<SSeqLoc> query(
01733 CTestObjMgr::Instance().CreateSSeqLoc(qid));
01734
01735 CBlastNucleotideOptionsHandle opts;
01736 opts.SetTraditionalMegablastDefaults();
01737 opts.SetRepeatFiltering(true);
01738 string repeat_db(opts.GetRepeatFilteringDB()
01739 ? opts.GetRepeatFilteringDB()
01740 : kEmptyStr);
01741 BOOST_REQUIRE_EQUAL(string(kDefaultRepeatFilterDb), repeat_db);
01742
01743 const string kRepeatDb("repeat/repeat_9606");
01744 opts.SetRepeatFilteringDB(kRepeatDb.c_str());
01745 repeat_db.assign(opts.GetRepeatFilteringDB()
01746 ? opts.GetRepeatFilteringDB()
01747 : kEmptyStr);
01748 BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
01749
01750 bool is_repeat_filtering_on = opts.GetRepeatFiltering();
01751 BOOST_REQUIRE(is_repeat_filtering_on);
01752
01753 CBl2Seq blaster(*query, *query, opts);
01754 TSeqAlignVector sav(blaster.Run());
01755 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01756 BOOST_REQUIRE(sar.NotEmpty());
01757 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01758 }
01759
01760 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Db) {
01761 CSeq_id qid("gi|555");
01762 auto_ptr<SSeqLoc> query(
01763 CTestObjMgr::Instance().CreateSSeqLoc(qid));
01764
01765 CBlastNucleotideOptionsHandle opts;
01766 opts.SetTraditionalMegablastDefaults();
01767 const string kWindowMaskerDb("9606");
01768 opts.SetWindowMaskerDatabase(kWindowMaskerDb.c_str());
01769 string wmdb(opts.GetWindowMaskerDatabase()
01770 ? opts.GetWindowMaskerDatabase() : kEmptyStr);
01771 BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
01772 BOOST_REQUIRE_EQUAL(0, opts.GetWindowMaskerTaxId());
01773 CBl2Seq blaster(*query, *query, opts);
01774 TSeqAlignVector sav(blaster.Run());
01775 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01776 BOOST_REQUIRE(sar.NotEmpty());
01777 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01778 }
01779
01780 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Taxid) {
01781 CSeq_id qid("gi|555");
01782 auto_ptr<SSeqLoc> query(
01783 CTestObjMgr::Instance().CreateSSeqLoc(qid));
01784
01785 CBlastNucleotideOptionsHandle opts;
01786 opts.SetTraditionalMegablastDefaults();
01787 opts.SetWindowMaskerTaxId(9606);
01788 CBl2Seq blaster(*query, *query, opts);
01789 TSeqAlignVector sav(blaster.Run());
01790 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01791 BOOST_REQUIRE(sar.NotEmpty());
01792 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01793 }
01794
01795 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidDb) {
01796 CSeq_id qid("gi|555");
01797 auto_ptr<SSeqLoc> query(
01798 CTestObjMgr::Instance().CreateSSeqLoc(qid));
01799
01800 CBlastNucleotideOptionsHandle opts;
01801 opts.SetTraditionalMegablastDefaults();
01802 const string kWindowMaskerDb("Dummydb");
01803 opts.SetWindowMaskerDatabase(kWindowMaskerDb.c_str());
01804 string wmdb(opts.GetWindowMaskerDatabase()
01805 ? opts.GetWindowMaskerDatabase() : kEmptyStr);
01806 BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
01807 CBl2Seq blaster(*query, *query, opts);
01808 TSeqAlignVector sav(blaster.Run());
01809 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01810 BOOST_REQUIRE(sar.NotEmpty());
01811 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() == 1);
01812 }
01813
01814 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidTaxid) {
01815 CSeq_id qid("gi|555");
01816 auto_ptr<SSeqLoc> query(
01817 CTestObjMgr::Instance().CreateSSeqLoc(qid));
01818
01819 CBlastNucleotideOptionsHandle opts;
01820 opts.SetTraditionalMegablastDefaults();
01821 const int kInvalidTaxId = -1;
01822 opts.SetWindowMaskerTaxId(kInvalidTaxId);
01823 BOOST_REQUIRE_EQUAL(kInvalidTaxId, opts.GetWindowMaskerTaxId());
01824 CBl2Seq blaster(*query, *query, opts);
01825 TSeqAlignVector sav(blaster.Run());
01826 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01827 BOOST_REQUIRE(sar.NotEmpty());
01828
01829 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() == 1);
01830 }
01831
01832 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_DbAndTaxid) {
01833 CSeq_id qid("gi|555");
01834 auto_ptr<SSeqLoc> query(
01835 CTestObjMgr::Instance().CreateSSeqLoc(qid));
01836
01837 CBlastNucleotideOptionsHandle opts;
01838 opts.SetTraditionalMegablastDefaults();
01839
01840 opts.SetWindowMaskerDatabase("9606");
01841 opts.SetWindowMaskerTaxId(-1);
01842 CBl2Seq blaster(*query, *query, opts);
01843 TSeqAlignVector sav(blaster.Run());
01844 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01845 BOOST_REQUIRE(sar.NotEmpty());
01846 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01847 }
01848
01849
01850 BOOST_AUTO_TEST_CASE(Alex) {
01851 CSeq_id qid("NG_007092.2");
01852 TSeqRange qr(0, 2311633);
01853 auto_ptr<SSeqLoc> query(
01854 CTestObjMgr::Instance().CreateSSeqLoc(qid, qr, eNa_strand_plus));
01855
01856 CSeq_id sid("NT_007914.14");
01857 TSeqRange sr(5233652, 9849919);
01858 auto_ptr<SSeqLoc> subj(
01859 CTestObjMgr::Instance().CreateSSeqLoc(sid, sr));
01860
01861 CBlastNucleotideOptionsHandle opts;
01862 opts.SetTraditionalMegablastDefaults();
01863 opts.SetRepeatFiltering(true);
01864 CBl2Seq blaster(*query, *subj, opts);
01865 TSeqAlignVector sav(blaster.Run());
01866 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01867 BOOST_REQUIRE(sar.NotEmpty());
01868 BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
01869 }
01870
01871 BOOST_AUTO_TEST_CASE(NucleotideBlast2Seqs) {
01872 CSeq_id qid("gi|555");
01873 auto_ptr<SSeqLoc> query(
01874 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01875
01876 CSeq_id sid("gi|3090");
01877 auto_ptr<SSeqLoc> subj(
01878 CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
01879
01880 CBlastNucleotideOptionsHandle opts;
01881 opts.SetTraditionalBlastnDefaults();
01882 CBl2Seq blaster(*query, *subj, opts);
01883 TSeqAlignVector sav(blaster.Run());
01884 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01885 BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
01886 testBlastHitCounts(blaster, eBlastn_555_3090);
01887 testRawCutoffs(blaster, eBlastn, eBlastn_555_3090);
01888 }
01889
01890 BOOST_AUTO_TEST_CASE(ProteinBlastChangeQuery) {
01891 CSeq_id id("gi|129295");
01892 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(id));
01893
01894 id.SetGi(7662354);
01895 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(id));
01896
01897
01898 CBl2Seq blaster(*subj, *subj, eBlastp);
01899 TSeqAlignVector sav(blaster.Run());
01900 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01901 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01902
01903
01904 blaster.SetQuery(*query);
01905 sav = blaster.Run();
01906 sar = *(sav[0]->Get().begin());
01907 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01908 }
01909
01910 BOOST_AUTO_TEST_CASE(ProteinBlastChangeSubject) {
01911 CSeq_id qid("gi|129295");
01912 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
01913
01914 CSeq_id sid("gi|7662354");
01915 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01916
01917
01918 CBl2Seq blaster(*query, *query, eBlastp);
01919 TSeqAlignVector sav(blaster.Run());
01920 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01921 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01922
01923
01924 blaster.SetSubject(*subj);
01925 sav = blaster.Run();
01926 BOOST_REQUIRE_EQUAL(1, (int)sav.size());
01927 sar = *(sav[0]->Get().begin());
01928 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01929 }
01930
01931 BOOST_AUTO_TEST_CASE(NucleotideBlastChangeQuery) {
01932 CSeq_id qid("gi|555");
01933 auto_ptr<SSeqLoc> query(
01934 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01935
01936 CSeq_id sid("gi|3090");
01937 auto_ptr<SSeqLoc> subj(
01938 CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
01939
01940
01941 CBlastNucleotideOptionsHandle opts;
01942 opts.SetTraditionalBlastnDefaults();
01943 CBl2Seq blaster(*subj, *subj, opts);
01944 TSeqAlignVector sav(blaster.Run());
01945 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01946 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01947
01948
01949 blaster.SetQuery(*query);
01950 sav = blaster.Run();
01951 BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
01952 sar = *(sav[0]->Get().begin());
01953 BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
01954 }
01955
01956 BOOST_AUTO_TEST_CASE(NucleotideBlastChangeSubject) {
01957 CSeq_id qid("gi|555");
01958 auto_ptr<SSeqLoc> query(
01959 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
01960
01961 CSeq_id sid("gi|3090");
01962 auto_ptr<SSeqLoc> subj(
01963 CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
01964
01965
01966 CBlastNucleotideOptionsHandle opts;
01967 opts.SetTraditionalBlastnDefaults();
01968 CBl2Seq blaster(*query, *query, opts);
01969 TSeqAlignVector sav(blaster.Run());
01970 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
01971 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
01972
01973
01974 blaster.SetSubject(*subj);
01975 sav = blaster.Run();
01976 sar = *(sav[0]->Get().begin());
01977 BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
01978 }
01979
01980
01981 BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueries) {
01982 TSeqLocVector sequences;
01983
01984 CSeq_id qid("gi|129295");
01985 auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(qid));
01986 sequences.push_back(*sl1);
01987
01988 CSeq_id sid("gi|7662354");
01989 auto_ptr<SSeqLoc> sl2(CTestObjMgr::Instance().CreateSSeqLoc(sid));
01990 sequences.push_back(*sl2);
01991
01992 CBl2Seq blaster(sequences, sequences, eBlastp);
01993 TSeqAlignVector seqalign_v = blaster.Run();
01994
01995 BOOST_REQUIRE_EQUAL(4, (int)seqalign_v.size());
01996 BOOST_REQUIRE_EQUAL(2, (int)sequences.size());
01997
01998 CRef<CSeq_align> sar;
01999
02000 BOOST_REQUIRE_EQUAL(1, seqalign_v[0]->Get().size());
02001 sar = *(seqalign_v[0]->Get().begin());
02002 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02003
02004 BOOST_REQUIRE_EQUAL(2, seqalign_v[1]->Get().size());
02005 sar = *(seqalign_v[1]->Get().begin());
02006 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02007 sar = *(++(seqalign_v[1]->Get().begin()));
02008 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02009
02010 BOOST_REQUIRE_EQUAL(2, seqalign_v[2]->Get().size());
02011 sar = *(seqalign_v[2]->Get().begin());
02012 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02013 sar = *(++(seqalign_v[2]->Get().begin()));
02014 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02015
02016 BOOST_REQUIRE_EQUAL(1, seqalign_v[3]->Get().size());
02017 sar = *(seqalign_v[3]->Get().begin());
02018 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02019
02020
02021
02022
02023
02024
02025
02026
02027 testBlastHitCounts(blaster, eBlastp_multi_q);
02028 testRawCutoffs(blaster, eBlastp, eBlastp_multi_q);
02029
02030
02031 testResultAlignments(sequences.size(), sequences.size(),
02032 seqalign_v);
02033 }
02034
02035 BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueries) {
02036 TSeqLocVector sequences;
02037
02038 CSeq_id qid("gi|555");
02039 auto_ptr<SSeqLoc> sl1(
02040 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02041 sequences.push_back(*sl1);
02042 BOOST_REQUIRE(sl1->mask.Empty());
02043
02044 CSeq_id sid("gi|3090");
02045 auto_ptr<SSeqLoc> sl2(
02046 CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
02047 sequences.push_back(*sl2);
02048 BOOST_REQUIRE(sl2->mask.Empty());
02049
02050 CBl2Seq blaster(sequences, sequences, eBlastn);
02051 TSeqAlignVector seqalign_v = blaster.Run();
02052 BOOST_REQUIRE_EQUAL(2, (int)sequences.size());
02053 BOOST_REQUIRE_EQUAL(4, (int)seqalign_v.size());
02054
02055 CRef<CSeq_align> sar = *(seqalign_v[0]->Get().begin());
02056 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02057
02058
02059 sar = *(seqalign_v[2]->Get().begin());
02060 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02061
02062 testBlastHitCounts(blaster, eBlastn_multi_q);
02063 testRawCutoffs(blaster, eBlastn, eBlastn_multi_q);
02064
02065
02066 testResultAlignments(sequences.size(), sequences.size(),
02067 seqalign_v);
02068 }
02069
02070 void DoSearchWordSize4(const char *file1, const char *file2) {
02071 CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
02072 CRef<CScope> scope(new CScope(*kObjMgr));
02073
02074 CRef<CSeq_entry> seq_entry1;
02075 ifstream in1(file1);
02076 if ( !in1 )
02077 throw runtime_error("Failed to open file1");
02078 if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
02079 throw runtime_error("Failed to read sequence from file1");
02080 scope->AddTopLevelSeqEntry(*seq_entry1);
02081 CRef<CSeq_loc> seqloc1(new CSeq_loc);
02082 const string kSeqIdString1("lcl|1");
02083 CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
02084 seqloc1->SetWhole(*id1);
02085 SSeqLoc ss1(seqloc1, scope);
02086
02087 CRef<CSeq_entry> seq_entry2;
02088 ifstream in2(file2);
02089 if ( !in2 )
02090 throw runtime_error("Failed to open file2");
02091 if ( !(seq_entry2 = CFastaReader(in2).ReadOneSeq()))
02092 throw runtime_error("Failed to read sequence from file2");
02093 scope->AddTopLevelSeqEntry(*seq_entry2);
02094 CRef<CSeq_loc> seqloc2(new CSeq_loc);
02095 const string kSeqIdString2("lcl|2");
02096 CRef<CSeq_id> id2(new CSeq_id(kSeqIdString2));
02097 seqloc2->SetWhole(*id2);
02098 SSeqLoc ss2(seqloc2, scope);
02099
02100 CBlastNucleotideOptionsHandle handle;
02101 handle.SetTraditionalBlastnDefaults();
02102 handle.SetWordSize(4);
02103 handle.SetDustFiltering(false);
02104 handle.SetMismatchPenalty(-1);
02105 handle.SetMatchReward(1);
02106 handle.SetEvalueThreshold(10000);
02107
02108 CBl2Seq blaster(ss1, ss2, handle);
02109 blaster.RunWithoutSeqalignGeneration();
02110 BlastHSPResults *results = blaster.GetResults();
02111 BOOST_REQUIRE(results != NULL);
02112 BOOST_REQUIRE(results->hitlist_array[0] != NULL);
02113 BOOST_REQUIRE(results->hitlist_array[0]->hsplist_array[0] != NULL);
02114 BlastHSPList *hsp_list = results->hitlist_array[0]->hsplist_array[0];
02115 BOOST_REQUIRE(hsp_list->hspcnt > 0);
02116 BOOST_REQUIRE(hsp_list->hsp_array[0] != NULL);
02117
02118
02119
02120
02121 for (int i = 0; i < hsp_list->hspcnt; i++) {
02122 BlastHSP *hsp = hsp_list->hsp_array[i];
02123 BOOST_REQUIRE(hsp != NULL);
02124 BOOST_REQUIRE(hsp->query.offset < hsp->query.end);
02125 BOOST_REQUIRE(hsp->subject.offset < hsp->subject.end);
02126 BOOST_REQUIRE(hsp->query.gapped_start >= hsp->query.offset &&
02127 hsp->query.gapped_start < hsp->query.end);
02128 BOOST_REQUIRE(hsp->subject.gapped_start >= hsp->subject.offset &&
02129 hsp->subject.gapped_start < hsp->subject.end);
02130 BOOST_REQUIRE(hsp->query.end - hsp->query.offset >= 4);
02131 BOOST_REQUIRE(hsp->subject.end - hsp->subject.offset >= 4);
02132 }
02133 }
02134
02135 BOOST_AUTO_TEST_CASE(NucleotideBlastWordSize4) {
02136 DoSearchWordSize4("data/blastn_size4a.fsa",
02137 "data/blastn_size4b.fsa");
02138 }
02139
02140
02141
02142 BOOST_AUTO_TEST_CASE(NucleotideBlastWordSize4_EOS) {
02143 DoSearchWordSize4("data/blastn_size4c.fsa",
02144 "data/blastn_size4d.fsa");
02145 }
02146
02147 BOOST_AUTO_TEST_CASE(TblastnOutOfFrame) {
02148 CSeq_id qid("NP_647642.2");
02149 CSeq_id sid("BC042576.1");
02150
02151 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
02152 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02153
02154
02155 CTBlastnOptionsHandle opts;
02156 opts.SetOutOfFrameMode();
02157 opts.SetFrameShiftPenalty(10);
02158 opts.SetFilterString("m;L");
02159 opts.SetEvalueThreshold(0.01);
02160 opts.SetCompositionBasedStats(eNoCompositionBasedStats);
02161
02162 CBl2Seq blaster(*query, *subj, opts);
02163 TSeqAlignVector sav(blaster.Run());
02164 BOOST_REQUIRE_EQUAL(1, (int)sav.size());
02165 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02166 BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
02167 testBlastHitCounts(blaster, eTblastn_oof);
02168 testRawCutoffs(blaster, eTblastn, eTblastn_oof);
02169 }
02170
02171
02172 BOOST_AUTO_TEST_CASE(TblastnOutOfFrame2) {
02173 CSeq_id qid("gi|38111923");
02174 CSeq_id sid("gi|6648925");
02175
02176 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
02177 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02178
02179
02180 CTBlastnOptionsHandle opts;
02181 opts.SetOutOfFrameMode();
02182 opts.SetFrameShiftPenalty(5);
02183 opts.SetCompositionBasedStats(eNoCompositionBasedStats);
02184 opts.SetFilterString("L");
02185
02186 CBl2Seq blaster(*query, *subj, opts);
02187 TSeqAlignVector sav(blaster.Run());
02188 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02189 BOOST_REQUIRE_EQUAL(5, (int)sav[0]->Size());
02190
02191
02192
02193 const CSeq_align& seqalign = *sar;
02194 BOOST_REQUIRE(seqalign.IsSetScore());
02195 ITERATE(CSeq_align::TScore, itr, seqalign.GetScore()) {
02196 BOOST_REQUIRE((*itr)->IsSetId());
02197 if ((*itr)->GetId().GetStr() == "num_ident") {
02198 BOOST_REQUIRE_EQUAL(55, (*itr)->GetValue().GetInt());
02199 break;
02200 }
02201 }
02202 }
02203
02204 BOOST_AUTO_TEST_CASE(BlastxOutOfFrame) {
02205 CSeq_id qid("BC042576.1");
02206 CSeq_id sid("NP_647642.2");
02207
02208 auto_ptr<SSeqLoc> query(
02209 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02210 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02211
02212
02213 CBlastxOptionsHandle opts;
02214 opts.SetOutOfFrameMode();
02215 opts.SetFrameShiftPenalty(10);
02216 opts.SetFilterString("m;L");
02217 opts.SetEvalueThreshold(0.01);
02218
02219 CBl2Seq blaster(*query, *subj, opts);
02220 TSeqAlignVector sav(blaster.Run());
02221 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02222 BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
02223 testBlastHitCounts(blaster, eBlastx_oof);
02224 testRawCutoffs(blaster, eBlastx, eBlastx_oof);
02225 }
02226
02227
02228
02229 BOOST_AUTO_TEST_CASE(BlastxOutOfFrame_DifferentFrames) {
02230 CSeq_id qid("gi|27486285");
02231 CSeq_id sid("gi|7331210");
02232
02233 auto_ptr<SSeqLoc> query(
02234 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02235 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02236
02237
02238 CBlastxOptionsHandle opts;
02239 opts.SetOutOfFrameMode();
02240 opts.SetFrameShiftPenalty(10);
02241
02242 CBl2Seq blaster(*query, *subj, opts);
02243 TSeqAlignVector sav(blaster.Run());
02244 BOOST_REQUIRE_EQUAL(5, (int)sav[0]->Size());
02245 }
02246
02247
02248
02249
02250 void x_TestAlignmentQuerySubjStrandCombinations(TSeqAlignVector& sav,
02251 string aligned_strands) {
02252
02253
02254 vector< pair<TSignedSeqPos, TSignedSeqPos> > starts;
02255 starts.push_back(make_pair(7685759, 10));
02256 starts.push_back(make_pair(7685758, -1));
02257 starts.push_back(make_pair(7685718, 269));
02258 starts.push_back(make_pair(7685717, -1));
02259 starts.push_back(make_pair(7685545, 309));
02260
02261 const size_t kNumSegments(starts.size());
02262
02263
02264 vector<TSeqPos> lengths;
02265 lengths.reserve(kNumSegments);
02266 lengths.push_back(259);
02267 lengths.push_back(1);
02268 lengths.push_back(40);
02269 lengths.push_back(1);
02270 lengths.push_back(172);
02271
02272
02273 typedef vector< pair<ENa_strand, ENa_strand> > TStrandPairs;
02274 TStrandPairs strands(kNumSegments,
02275 make_pair(eNa_strand_minus, eNa_strand_plus));
02276
02277
02278 if (aligned_strands == "plus-minus") {
02279 reverse(starts.begin(), starts.end());
02280 reverse(lengths.begin(), lengths.end());
02281 NON_CONST_ITERATE(TStrandPairs, itr, strands) {
02282 swap(itr->first, itr->second);
02283 }
02284 }
02285 BOOST_REQUIRE_EQUAL(kNumSegments, lengths.size());
02286 BOOST_REQUIRE_EQUAL(kNumSegments, strands.size());
02287
02288
02289 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02290 BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
02291
02292 const CDense_seg& ds = sar->GetSegs().GetDenseg();
02293
02294 const size_t kNumDim(ds.GetDim());
02295 vector< TSignedSeqPos > seg_starts = ds.GetStarts();
02296 vector< TSeqPos> seg_lengths = ds.GetLens();
02297 vector< ENa_strand> seg_strands = ds.GetStrands();
02298 BOOST_REQUIRE_EQUAL(kNumSegments, seg_lengths.size());
02299 BOOST_REQUIRE_EQUAL(kNumSegments*kNumDim, seg_starts.size());
02300
02301
02302 for (size_t index = 0; index < kNumSegments; ++index) {
02303 ostringstream os;
02304 os << "Segment " << index << ": expected " << lengths[index]
02305 << " actual " << seg_lengths[index];
02306 BOOST_REQUIRE_MESSAGE(lengths[index] == seg_lengths[index],
02307 os.str());
02308
02309 os.str("");
02310 os << "Segment " << index << ": expected " << starts[index].first
02311 << " actual " << seg_starts[2*index];
02312 BOOST_REQUIRE_MESSAGE(starts[index].first == seg_starts[2*index],
02313 os.str());
02314 os.str("");
02315 os << "Segment " << index << ": expected " << starts[index].second
02316 << " actual " << seg_starts[2*index];
02317 BOOST_REQUIRE_MESSAGE(starts[index].second == seg_starts[2*index+1],
02318 os.str());
02319 os.str("");
02320 os << "Segment " << index << ": expected " << strands[index].first
02321 << " actual " << seg_strands[2*index];
02322 BOOST_REQUIRE_MESSAGE(strands[index].first == seg_strands[2*index],
02323 os.str());
02324 os.str("");
02325 os << "Segment " << index << ": expected " << strands[index].second
02326 << " actual " << seg_strands[2*index];
02327 BOOST_REQUIRE_MESSAGE(strands[index].second == seg_strands[2*index+1],
02328 os.str());
02329 }
02330 }
02331
02332 static void testIntervalWholeAlignment(TSeqAlignVector& sav)
02333 {
02334 const int num_segs = 5;
02335 const int num_starts = 10;
02336 const int starts[num_starts] = { 7685759, 0, 7685758, -1, 7685718,
02337 269, 7685717, -1, 7685545, 309 };
02338 const int lengths[num_segs] = { 269, 1, 40, 1, 172 };
02339 int index;
02340
02341 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02342 BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
02343 CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
02344 vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
02345 vector< TSeqPos> seg_lengths = segs_itr->GetLens();
02346 vector< ENa_strand> seg_strands = segs_itr->GetStrands();
02347 BOOST_REQUIRE_EQUAL(num_segs, (int)seg_lengths.size());
02348 BOOST_REQUIRE_EQUAL(num_starts, (int)seg_starts.size());
02349 for (index = 0; index < num_segs; ++index) {
02350 BOOST_REQUIRE_EQUAL(lengths[index], (int)seg_lengths[index]);
02351 BOOST_REQUIRE_EQUAL(starts[2*index], (int)seg_starts[2*index]);
02352 BOOST_REQUIRE_EQUAL(starts[2*index+1], (int)seg_starts[2*index+1]);
02353 BOOST_REQUIRE(seg_strands[2*index] == eNa_strand_minus);
02354 BOOST_REQUIRE(seg_strands[2*index+1] == eNa_strand_plus);
02355 }
02356 }
02357
02358 static void testWholeIntervalAlignment(TSeqAlignVector& sav)
02359 {
02360 const int num_segs = 5;
02361 const int num_starts = 10;
02362 const int starts[num_starts] = { 309, 7685545, -1, 7685717, 269, 7685718,
02363 -1, 7685758, 0, 7685759 };
02364 const int lengths[num_segs] = { 172, 1, 40, 1, 269 };
02365 int index;
02366
02367 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02368 BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
02369 CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
02370 vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
02371 vector< TSeqPos> seg_lengths = segs_itr->GetLens();
02372 vector< ENa_strand> seg_strands = segs_itr->GetStrands();
02373 BOOST_REQUIRE_EQUAL(num_segs, (int)seg_lengths.size());
02374 BOOST_REQUIRE_EQUAL(num_starts, (int)seg_starts.size());
02375 for (index = 0; index < num_segs; ++index) {
02376 BOOST_REQUIRE_EQUAL(lengths[index], (int)seg_lengths[index]);
02377 BOOST_REQUIRE_EQUAL(starts[2*index], (int)seg_starts[2*index]);
02378 BOOST_REQUIRE_EQUAL(starts[2*index+1], (int)seg_starts[2*index+1]);
02379 BOOST_REQUIRE(seg_strands[2*index] == eNa_strand_minus);
02380 BOOST_REQUIRE(seg_strands[2*index+1] == eNa_strand_plus);
02381 }
02382 }
02383
02384
02385 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjBothStrands) {
02386
02387 CSeq_id qid("NT_004487.15");
02388 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02389 auto_ptr<SSeqLoc> query(
02390 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02391 eNa_strand_both));
02392
02393 CSeq_id sid("AA441981.1");
02394 range.first = 10;
02395 range.second = 480;
02396 auto_ptr<SSeqLoc> subj(
02397 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02398 eNa_strand_both));
02399
02400 CBlastNucleotideOptionsHandle* opts = new CBlastNucleotideOptionsHandle;
02401 opts->SetTraditionalBlastnDefaults();
02402 CBl2Seq blaster(*query, *subj, *opts);
02403 TSeqAlignVector sav(blaster.Run());
02404 x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02405 }
02406
02407
02408 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjPlusStrand) {
02409
02410 CSeq_id qid("NT_004487.15");
02411 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02412 auto_ptr<SSeqLoc> query(
02413 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02414 eNa_strand_both));
02415
02416 CSeq_id sid("AA441981.1");
02417 range.first = 10;
02418 range.second = 480;
02419 auto_ptr<SSeqLoc> subj(
02420 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02421 eNa_strand_plus));
02422
02423 CBlastNucleotideOptionsHandle opts;
02424 opts.SetTraditionalBlastnDefaults();
02425 CBl2Seq blaster(*query, *subj, opts);
02426 TSeqAlignVector sav(blaster.Run());
02427 x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02428 }
02429
02430
02431 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjMinusStrand) {
02432
02433 CSeq_id qid("NT_004487.15");
02434 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02435 auto_ptr<SSeqLoc> query(
02436 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02437 eNa_strand_both));
02438
02439 CSeq_id sid("AA441981.1");
02440 range.first = 10;
02441 range.second = 480;
02442 auto_ptr<SSeqLoc> subj(
02443 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02444 eNa_strand_minus));
02445
02446 CBlastNucleotideOptionsHandle opts;
02447 opts.SetTraditionalBlastnDefaults();
02448 CBl2Seq blaster(*query, *subj, opts);
02449 TSeqAlignVector sav(blaster.Run());
02450 x_TestAlignmentQuerySubjStrandCombinations(sav, "plus-minus");
02451 }
02452
02453
02454 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjPlusStrand) {
02455
02456 CSeq_id qid("NT_004487.15");
02457 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02458 auto_ptr<SSeqLoc> query(
02459 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02460 eNa_strand_plus));
02461
02462 CSeq_id sid("AA441981.1");
02463 range.first = 10;
02464 range.second = 480;
02465 auto_ptr<SSeqLoc> subj(
02466 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02467 eNa_strand_plus));
02468
02469 CBlastNucleotideOptionsHandle opts;
02470 opts.SetTraditionalBlastnDefaults();
02471 CBl2Seq blaster(*query, *subj, opts);
02472 TSeqAlignVector sav(blaster.Run());
02473 BOOST_REQUIRE(sav[0]->IsEmpty() == true);
02474 }
02475
02476
02477 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjMinusStrand) {
02478
02479 CSeq_id qid("NT_004487.15");
02480 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02481 auto_ptr<SSeqLoc> query(
02482 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02483 eNa_strand_plus));
02484
02485 CSeq_id sid("AA441981.1");
02486 range.first = 10;
02487 range.second = 480;
02488 auto_ptr<SSeqLoc> subj(
02489 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02490 eNa_strand_minus));
02491
02492 CBlastNucleotideOptionsHandle opts;
02493 opts.SetTraditionalBlastnDefaults();
02494 CBl2Seq blaster(*query, *subj, opts);
02495 TSeqAlignVector sav(blaster.Run());
02496 x_TestAlignmentQuerySubjStrandCombinations(sav, "plus-minus");
02497 }
02498
02499
02500
02501 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjBothStrands) {
02502
02503 CSeq_id qid("NT_004487.15");
02504 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02505 auto_ptr<SSeqLoc> query(
02506 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02507 eNa_strand_plus));
02508
02509 CSeq_id sid("AA441981.1");
02510 range.first = 10;
02511 range.second = 480;
02512 auto_ptr<SSeqLoc> subj(
02513 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02514 eNa_strand_both));
02515
02516 CBlastNucleotideOptionsHandle opts;
02517 opts.SetTraditionalBlastnDefaults();
02518 CBl2Seq blaster(*query, *subj, opts);
02519 TSeqAlignVector sav(blaster.Run());
02520 BOOST_REQUIRE(sav[0]->IsEmpty() == true);
02521 }
02522
02523
02524 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjMinusStrand) {
02525
02526 CSeq_id qid("NT_004487.15");
02527 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02528 auto_ptr<SSeqLoc> query(
02529 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02530 eNa_strand_minus));
02531
02532 CSeq_id sid("AA441981.1");
02533 range.first = 10;
02534 range.second = 480;
02535 auto_ptr<SSeqLoc> subj(
02536 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02537 eNa_strand_minus));
02538
02539 CBlastNucleotideOptionsHandle opts;
02540 opts.SetTraditionalBlastnDefaults();
02541 CBl2Seq blaster(*query, *subj, opts);
02542 TSeqAlignVector sav(blaster.Run());
02543 BOOST_REQUIRE(sav[0]->IsEmpty() == true);
02544 }
02545
02546
02547 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjPlusStrand) {
02548
02549 CSeq_id qid("NT_004487.15");
02550 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02551 auto_ptr<SSeqLoc> query(
02552 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02553 eNa_strand_minus));
02554
02555 CSeq_id sid("AA441981.1");
02556 range.first = 10;
02557 range.second = 480;
02558 auto_ptr<SSeqLoc> subj(
02559 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02560 eNa_strand_plus));
02561
02562 CBlastNucleotideOptionsHandle opts;
02563 opts.SetTraditionalBlastnDefaults();
02564 CBl2Seq blaster(*query, *subj, opts);
02565 TSeqAlignVector sav(blaster.Run());
02566 x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02567 }
02568
02569
02570 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjBothStrands) {
02571
02572 CSeq_id qid("NT_004487.15");
02573 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02574 auto_ptr<SSeqLoc> query(
02575 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02576 eNa_strand_minus));
02577
02578 CSeq_id sid("AA441981.1");
02579 range.first = 10;
02580 range.second = 480;
02581 auto_ptr<SSeqLoc> subj(
02582 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02583 eNa_strand_both));
02584
02585 CBlastNucleotideOptionsHandle opts;
02586 opts.SetTraditionalBlastnDefaults();
02587 CBl2Seq blaster(*query, *subj, opts);
02588 TSeqAlignVector sav(blaster.Run());
02589 x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
02590 }
02591
02592
02593 BOOST_AUTO_TEST_CASE(Blastn_QueryWhole_SubjInterval)
02594 {
02595 CSeq_id qid("AA441981.1");
02596 auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateWholeSSeqLoc(qid));
02597
02598 CSeq_id sid("NT_004487.15");
02599 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02600 auto_ptr<SSeqLoc> subj(
02601 CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
02602 eNa_strand_both));
02603
02604 CBlastNucleotideOptionsHandle opts;
02605 opts.SetTraditionalBlastnDefaults();
02606 CBl2Seq blaster(*query, *subj, opts);
02607 TSeqAlignVector sav(blaster.Run());
02608 testWholeIntervalAlignment(sav);
02609 }
02610
02611 BOOST_AUTO_TEST_CASE(Blastn_QueryInterval_SubjWhole)
02612 {
02613 CSeq_id qid("NT_004487.15");
02614 pair<TSeqPos, TSeqPos> range(7685545, 7686027);
02615 auto_ptr<SSeqLoc> query(
02616 CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
02617 eNa_strand_both));
02618
02619 CSeq_id sid("AA441981.1");
02620 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateWholeSSeqLoc(sid));
02621
02622 CBlastNucleotideOptionsHandle opts;
02623 opts.SetTraditionalBlastnDefaults();
02624 CBl2Seq blaster(*query, *subj, opts);
02625 TSeqAlignVector sav(blaster.Run());
02626 testIntervalWholeAlignment(sav);
02627 }
02628
02629 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs) {
02630 vector<int> q_gis, s_gis;
02631
02632
02633 q_gis.push_back(6);
02634 q_gis.push_back(129295);
02635 q_gis.push_back(15606659);
02636
02637
02638 s_gis.push_back(129295);
02639 s_gis.push_back(6);
02640 s_gis.push_back(4336138);
02641 s_gis.push_back(15606659);
02642 s_gis.push_back(5556);
02643
02644 TSeqLocVector queries;
02645 ITERATE(vector<int>, itr, q_gis) {
02646 CRef<CSeq_loc> loc(new CSeq_loc());
02647 loc->SetWhole().SetGi(*itr);
02648
02649 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02650 scope->AddDefaults();
02651 queries.push_back(SSeqLoc(loc, scope));
02652 }
02653
02654 TSeqLocVector subjects;
02655 ITERATE(vector<int>, itr, s_gis) {
02656 CRef<CSeq_loc> loc(new CSeq_loc());
02657 loc->SetWhole().SetGi(*itr);
02658
02659 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02660 scope->AddDefaults();
02661 subjects.push_back(SSeqLoc(loc, scope));
02662 }
02663
02664 size_t num_queries = queries.size();
02665 size_t num_subjects = subjects.size();
02666
02667
02668 CBl2Seq blaster4all(queries, subjects, eBlastp);
02669 TSeqAlignVector sas_v = blaster4all.Run();
02670 BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
02671 testBlastHitCounts(blaster4all, eBlastp_multi_q_s);
02672 testRawCutoffs(blaster4all, eBlastp, eBlastp_multi_q_s);
02673
02674
02675 testResultAlignments(num_queries, num_subjects,
02676 sas_v);
02677 }
02678
02679 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_RunEx) {
02680 vector<int> q_gis, s_gis;
02681
02682
02683 q_gis.push_back(6);
02684 q_gis.push_back(129295);
02685 q_gis.push_back(15606659);
02686
02687
02688 s_gis.push_back(129295);
02689 s_gis.push_back(6);
02690 s_gis.push_back(4336138);
02691 s_gis.push_back(15606659);
02692 s_gis.push_back(5556);
02693
02694 TSeqLocVector queries;
02695 ITERATE(vector<int>, itr, q_gis) {
02696 CRef<CSeq_loc> loc(new CSeq_loc());
02697 loc->SetWhole().SetGi(*itr);
02698
02699 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02700 scope->AddDefaults();
02701 queries.push_back(SSeqLoc(loc, scope));
02702 }
02703
02704 TSeqLocVector subjects;
02705 ITERATE(vector<int>, itr, s_gis) {
02706 CRef<CSeq_loc> loc(new CSeq_loc());
02707 loc->SetWhole().SetGi(*itr);
02708
02709 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02710 scope->AddDefaults();
02711 subjects.push_back(SSeqLoc(loc, scope));
02712 }
02713
02714 size_t num_queries = queries.size();
02715 size_t num_subjects = subjects.size();
02716
02717
02718 CBl2Seq blaster4all(queries, subjects, eBlastp);
02719 CRef<CSearchResultSet> results = blaster4all.RunEx();
02720 BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
02721 BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
02722 results->GetNumResults());
02723
02724
02725 TSeqAlignVector sas_v;
02726 for (size_t i = 0; i < num_queries; i++)
02727 {
02728 for (size_t j = 0; j < num_subjects; j++)
02729 {
02730 CSearchResults& res_ij = results->GetResults(i, j);
02731 CRef<CSeq_align_set> aln_set;
02732 aln_set.Reset(const_cast<CSeq_align_set*>
02733 (res_ij.GetSeqAlign().GetPointer()));
02734 sas_v.push_back(aln_set);
02735 }
02736 }
02737
02738
02739
02740
02741 BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
02742 testBlastHitCounts(blaster4all, eBlastp_multi_q_s);
02743 testRawCutoffs(blaster4all, eBlastp, eBlastp_multi_q_s);
02744
02745
02746 testResultAlignments(num_queries, num_subjects,
02747 sas_v);
02748 }
02749
02750
02751 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_CLocalBlast) {
02752 vector<int> q_gis, s_gis;
02753
02754
02755 q_gis.push_back(6);
02756 q_gis.push_back(129295);
02757 q_gis.push_back(15606659);
02758
02759
02760 s_gis.push_back(129295);
02761 s_gis.push_back(6);
02762 s_gis.push_back(4336138);
02763 s_gis.push_back(15606659);
02764 s_gis.push_back(5556);
02765
02766 TSeqLocVector query_vec;
02767 ITERATE(vector<int>, itr, q_gis) {
02768 CRef<CSeq_loc> loc(new CSeq_loc());
02769 loc->SetWhole().SetGi(*itr);
02770
02771 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02772 scope->AddDefaults();
02773 query_vec.push_back(SSeqLoc(loc, scope));
02774 }
02775 CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(query_vec));
02776
02777 CRef<CBlastOptionsHandle>
02778 opts_handle(CBlastOptionsFactory::Create(eBlastp));
02779
02780 TSeqLocVector subj_vec;
02781 ITERATE(vector<int>, itr, s_gis) {
02782 CRef<CSeq_loc> loc(new CSeq_loc());
02783 loc->SetWhole().SetGi(*itr);
02784
02785 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
02786 scope->AddDefaults();
02787 subj_vec.push_back(SSeqLoc(loc, scope));
02788 }
02789 CRef<IQueryFactory> subj_qf(new CObjMgr_QueryFactory(subj_vec));
02790 CRef<CLocalDbAdapter> subjects(new CLocalDbAdapter(subj_qf,
02791 opts_handle));
02792
02793 size_t num_queries = query_vec.size();
02794 size_t num_subjects = subj_vec.size();
02795
02796
02797 CLocalBlast blaster(queries, opts_handle, subjects);
02798 CRef<CSearchResultSet> results = blaster.Run();
02799 BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
02800 BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
02801 results->GetNumResults());
02802 BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->size());
02803 BOOST_REQUIRE_EQUAL(num_queries, results->GetNumQueries());
02804 BOOST_REQUIRE_EQUAL(num_subjects,
02805 results->GetNumResults()/results->GetNumQueries());
02806
02807
02808 TSeqAlignVector sas_v;
02809 for (size_t i = 0; i < num_queries; i++)
02810 {
02811 for (size_t j = 0; j < num_subjects; j++)
02812 {
02813 CSearchResults& res_ij = results->GetResults(i, j);
02814 CRef<CSeq_align_set> aln_set;
02815 aln_set.Reset(const_cast<CSeq_align_set*>
02816 (res_ij.GetSeqAlign().GetPointer()));
02817 sas_v.push_back(aln_set);
02818 }
02819 }
02820
02821
02822
02823 BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
02824
02825
02826 testResultAlignments(num_queries, num_subjects, sas_v);
02827 }
02828
02829 BOOST_AUTO_TEST_CASE(BlastOptionsEquality) {
02830
02831 auto_ptr<CBlastOptionsHandle> megablast_options_handle(
02832 CBlastOptionsFactory::Create(eMegablast));
02833 CBlastNucleotideOptionsHandle nucl_options_handle;
02834 BOOST_REQUIRE(megablast_options_handle->GetOptions() ==
02835 nucl_options_handle.GetOptions());
02836 }
02837
02838 BOOST_AUTO_TEST_CASE(BlastOptionsInequality) {
02839 CBlastProteinOptionsHandle prot_options_handle;
02840 CBlastNucleotideOptionsHandle nucl_options_handle;
02841 BOOST_REQUIRE(prot_options_handle.GetOptions() !=
02842 nucl_options_handle.GetOptions());
02843
02844
02845 auto_ptr<CBlastOptionsHandle> blastn_options_handle(
02846 CBlastOptionsFactory::Create(eBlastn));
02847 BOOST_REQUIRE(blastn_options_handle->GetOptions() !=
02848 nucl_options_handle.GetOptions());
02849
02850
02851 CBlastProteinOptionsHandle prot_options_handle2;
02852 prot_options_handle.SetMatrixName("pam30");
02853 BOOST_REQUIRE(prot_options_handle.GetOptions() !=
02854 prot_options_handle2.GetOptions());
02855 }
02856
02857 BOOST_AUTO_TEST_CASE(DiscontiguousMB) {
02858 CSeq_id qid("gi|408478");
02859 CSeq_id sid("gi|1546012");
02860 auto_ptr<SSeqLoc> query(
02861 CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
02862 auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
02863
02864 CBl2Seq blaster(*query, *subj, eDiscMegablast);
02865 TSeqAlignVector sav(blaster.Run());
02866 BOOST_REQUIRE_EQUAL(1, (int)sav.size());
02867
02868 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02869 BOOST_REQUIRE_EQUAL(13, (int)sar->GetSegs().GetDenseg().GetNumseg());
02870 testBlastHitCounts(blaster, eDiscMegablast_U02544_U61969);
02871 testRawCutoffs(blaster, eDiscMegablast, eDiscMegablast_U02544_U61969);
02872 }
02873
02874 BOOST_AUTO_TEST_CASE(BlastnHumanChrom_MRNA) {
02875 CSeq_id qid("NT_004487.16");
02876 CSeq_id sid("AA621478.1");
02877 pair<TSeqPos, TSeqPos> qrange(7868209-1, 7868602-1);
02878 pair<TSeqPos, TSeqPos> srange(2-1, 397-1);
02879 auto_ptr<SSeqLoc> query(
02880 CTestObjMgr::Instance().CreateSSeqLoc(qid,
02881 qrange, eNa_strand_plus));
02882 auto_ptr<SSeqLoc> subj(
02883 CTestObjMgr::Instance().CreateSSeqLoc(sid,
02884 srange, eNa_strand_plus));
02885
02886 CBlastNucleotideOptionsHandle options;
02887 CBl2Seq blaster(*query, *subj, options);
02888 TSeqAlignVector sav(blaster.Run());
02889 BOOST_REQUIRE_EQUAL(1, (int)sav.size());
02890
02891 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02892 BOOST_REQUIRE_EQUAL(5, (int)sar->GetSegs().GetDenseg().GetNumseg());
02893 testBlastHitCounts(blaster, eMegablast_chrom_mrna);
02894 testRawCutoffs(blaster, eMegablast, eMegablast_chrom_mrna);
02895 }
02896
02897
02898
02899 BOOST_AUTO_TEST_CASE(testOneSubjectResults2CSeqAlign)
02900 {
02901 const int num_subjects = 15;
02902 const int results_size[num_subjects] =
02903 { 1, 1, 0, 1, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 1 };
02904 const int query_gi = 7274302;
02905 const int gi_diff = 28;
02906 string seqid_str("gi|");
02907 CRef<CSeq_id> id(new CSeq_id(seqid_str + NStr::IntToString(query_gi)));
02908 auto_ptr<SSeqLoc> sl(
02909 CTestObjMgr::Instance().CreateSSeqLoc(*id, eNa_strand_both));
02910 TSeqLocVector query;
02911 query.push_back(*sl);
02912 TSeqLocVector subjects;
02913 int index;
02914 for (index = 0; index < num_subjects; ++index) {
02915 id.Reset(new CSeq_id(seqid_str +
02916 NStr::IntToString(query_gi + gi_diff + index)));
02917 sl.reset(CTestObjMgr::Instance().CreateSSeqLoc(*id,
02918 eNa_strand_both));
02919 subjects.push_back(*sl);
02920 }
02921 CBl2Seq blaster(query, subjects, eMegablast);
02922 TSeqAlignVector seqalign_v = blaster.Run();
02923 BOOST_REQUIRE_EQUAL(num_subjects, (int)seqalign_v.size());
02924
02925 index = 0;
02926 ITERATE(TSeqAlignVector, itr, seqalign_v)
02927 {
02928 BOOST_REQUIRE_EQUAL(results_size[index], (int) (*itr)->Get().size());
02929 index++;
02930 }
02931 }
02932
02933 BOOST_AUTO_TEST_CASE(testMultiSeqSearchSymmetry)
02934 {
02935 const int num_seqs = 19;
02936 const int gi_list[num_seqs] =
02937 { 1346057, 125527, 121064, 1711551, 125412, 128337, 2507199,
02938 1170625, 1730070, 585365, 140977, 1730069, 20455504, 125206,
02939 125319, 114152, 1706450, 1706307, 125565 };
02940 const int score_cutoff = 70;
02941
02942 string seqid_str("gi|");
02943 TSeqLocVector seq_vec;
02944 int index;
02945 for (index = 0; index < num_seqs; ++index) {
02946 CRef<CSeq_id> id(new CSeq_id(seqid_str +
02947 NStr::IntToString(gi_list[index])));
02948 auto_ptr<SSeqLoc> sl(
02949 CTestObjMgr::Instance().CreateSSeqLoc(*id, eNa_strand_both));
02950 seq_vec.push_back(*sl);
02951 }
02952
02953 CBlastProteinOptionsHandle prot_opts;
02954 prot_opts.SetSegFiltering(false);
02955 CBl2Seq blaster(seq_vec, seq_vec, prot_opts);
02956 blaster.RunWithoutSeqalignGeneration();
02957 BlastHSPResults* results = blaster.GetResults();
02958
02959 int qindex, sindex, qindex1, sindex1;
02960 for (qindex = 0; qindex < num_seqs; ++qindex) {
02961 for (sindex = 0; sindex < results->hitlist_array[qindex]->hsplist_count;
02962 ++sindex) {
02963 BlastHSPList* hsp_list1, *hsp_list2 = NULL;
02964 hsp_list1 = results->hitlist_array[qindex]->hsplist_array[sindex];
02965 qindex1 = hsp_list1->oid;
02966 BlastHitList* hitlist = results->hitlist_array[qindex1];
02967 for (sindex1 = 0; sindex1 < hitlist->hsplist_count; ++sindex1) {
02968 if (hitlist->hsplist_array[sindex1]->oid == qindex) {
02969 hsp_list2 = hitlist->hsplist_array[sindex1];
02970 break;
02971 }
02972 }
02973 BOOST_REQUIRE(hsp_list2 != NULL);
02974 int hindex;
02975 for (hindex = 0; hindex < hsp_list1->hspcnt; ++hindex) {
02976 if (hsp_list1->hsp_array[hindex]->score <= score_cutoff)
02977 break;
02978 BOOST_REQUIRE(hindex < hsp_list2->hspcnt);
02979 BOOST_REQUIRE_EQUAL(hsp_list1->hsp_array[hindex]->score,
02980 hsp_list2->hsp_array[hindex]->score);
02981 }
02982 }
02983 }
02984 }
02985
02986 BOOST_AUTO_TEST_CASE(testInterruptCallbackWithNull) {
02987 CSeq_id id("gi|129295");
02988 auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
02989
02990 CBl2Seq blaster(*sl, *sl, eBlastp);
02991 TInterruptFnPtr null_fnptr = 0;
02992 TInterruptFnPtr fnptr = blaster.SetInterruptCallback(null_fnptr);
02993 BOOST_REQUIRE(fnptr == NULL);
02994
02995 TSeqAlignVector sav(blaster.Run());
02996 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
02997 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
02998
02999 fnptr = blaster.SetInterruptCallback(interrupt_immediately);
03000
03001 BOOST_REQUIRE(fnptr == null_fnptr);
03002
03003 fnptr = blaster.SetInterruptCallback(null_fnptr);
03004
03005 BOOST_REQUIRE(fnptr == interrupt_immediately);
03006
03007
03008 sav = blaster.Run();
03009 BOOST_REQUIRE_EQUAL(1, (int)sav.size());
03010 sar = *(sav[0]->Get().begin());
03011 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
03012 }
03013
03014 BOOST_AUTO_TEST_CASE(testInterruptCallbackDoNotInterrupt) {
03015 CSeq_id id("gi|129295");
03016 auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
03017
03018 CBl2Seq blaster(*sl, *sl, eBlastp);
03019 TInterruptFnPtr fnptr = blaster.SetInterruptCallback(do_not_interrupt);
03020 BOOST_REQUIRE(fnptr == NULL);
03021
03022 TSeqAlignVector sav(blaster.Run());
03023 BOOST_REQUIRE_EQUAL(1, (int)sav.size());
03024 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
03025 BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
03026 }
03027
03028 #if SEQLOC_MIX_QUERY_OK
03029 BOOST_AUTO_TEST_CASE(MultiIntervalLoc) {
03030 const size_t kNumInts = 20;
03031 const size_t kStarts[kNumInts] =
03032 { 838, 1838, 6542, 7459, 9246, 10431, 14807, 16336, 19563,
03033 20606, 21232, 22615, 23822, 27941, 29597, 30136, 31287,
03034 31786, 33315, 35402 };
03035 const size_t kEnds[kNumInts] =
03036 { 961, 2010, 6740, 7573, 9408, 10609, 15043, 16511, 19783,
03037 20748, 21365, 22817, 24049, 28171, 29839, 30348, 31362,
03038 31911, 33485, 37952 };
03039 size_t index;
03040
03041 CSeq_id qid("gi|3417288");
03042 CRef<CSeq_loc> qloc(new CSeq_loc());
03043 for (index = 0; index < kNumInts; ++index) {
03044 CRef<CSeq_loc> next_loc(new CSeq_loc());
03045 next_loc->SetInt().SetFrom(kStarts[index]);
03046 next_loc->SetInt().SetTo(kEnds[index]);
03047 next_loc->SetInt().SetId(qid);
03048 qloc->SetMix().Set().push_back(next_loc);
03049 }
03050
03051 CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
03052 scope->AddDefaults();
03053
03054 auto_ptr<SSeqLoc> query(new SSeqLoc(qloc, scope));
03055
03056 CSeq_id sid("gi|51511732");
03057 pair<TSeqPos, TSeqPos> range(15595732, 15705419);
03058 auto_ptr<SSeqLoc> subject(
03059 CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
03060 CBl2Seq blaster(*query, *subject, eBlastn);
03061 TSeqAlignVector sav(blaster.Run());
03062 CRef<CSeq_align> sar = *(sav[0]->Get().begin());
03063 BOOST_REQUIRE_EQUAL(60, (int)sar->GetSegs().GetDisc().Get().size());
03064 }
03065 #endif
03066
03067 BOOST_AUTO_TEST_CASE(QueryMaskIgnoredInMiniExtension) {
03068 CRef<CSeq_loc> qloc(new CSeq_loc());
03069 qloc->SetWhole().SetGi(4505696);
03070 CSeq_id sid("gi|29809252");
03071 pair<TSeqPos, TSeqPos> range(662070, 662129);
03072
03073 CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
03074 scope->AddDefaults();
03075
03076 auto_ptr<SSeqLoc> query(new SSeqLoc(qloc, scope));
03077 auto_ptr<SSeqLoc> subject(
03078 CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
03079
03080 CBl2Seq blaster(*query, *subject, eMegablast);
03081 TSeqAlignVector sav(blaster.Run());
03082 CRef<CSeq_align_set> sas = sav.front();
03083 BOOST_REQUIRE(sas->Get().empty());
03084 }
03085
03086 #endif
03087
03088 BOOST_AUTO_TEST_SUITE_END()
03089
03090