00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <ncbi_pch.hpp>
00034 #include <corelib/test_boost.hpp>
00035
00036 #include <corelib/ncbitime.hpp>
00037 #include <objmgr/object_manager.hpp>
00038 #include <objmgr/scope.hpp>
00039
00040 #include <objects/seqloc/Seq_loc.hpp>
00041 #include <objmgr/util/sequence.hpp>
00042
00043 #include "test_objmgr.hpp"
00044
00045 #include <algo/blast/core/blast_encoding.h>
00046 #include <algo/blast/core/blast_options.h>
00047 #include <algo/blast/core/blast_setup.h>
00048 #include <algo/blast/core/blast_hits.h>
00049 #include <algo/blast/core/link_hsps.h>
00050 #include <algo/blast/api/blast_options.hpp>
00051 #include <blast_objmgr_priv.hpp>
00052 #include <algo/blast/api/seqsrc_seqdb.hpp>
00053
00054 using namespace std;
00055 using namespace ncbi;
00056 using namespace ncbi::objects;
00057 using namespace ncbi::blast;
00058
00059 struct AllCutoffScores {
00060 Int4 x_drop_ungapped;
00061 Int4 x_drop_gapped;
00062 Int4 x_drop_final;
00063 Int4 gap_trigger;
00064 Int4 cutoff_score_ungapped;
00065 Int4 cutoff_score_final;
00066 bool do_sum_stats;
00067 Int4 cutoff_small_gap;
00068 Int4 cutoff_big_gap;
00069 };
00070
00071
00072
00073 static void
00074 s_SetupNuclQueryInfo(Uint4 query_length, BlastQueryInfo* *query_info)
00075 {
00076 (*query_info) = BlastQueryInfoNew(eBlastTypeBlastn, 1);
00077 (*query_info)->contexts[0].query_offset = 0;
00078 (*query_info)->contexts[0].query_length = query_length;
00079 (*query_info)->contexts[1].query_offset = query_length + 1;
00080 (*query_info)->contexts[1].query_length = query_length;
00081 (*query_info)->max_length = query_length;
00082 }
00083
00084 struct LinkHspTestFixture {
00085
00086 EBlastProgramType m_ProgramType;
00087 EProgram m_Program;
00088 BlastHSPList* m_HspList;
00089 BlastScoreBlk* m_ScoreBlk;
00090 CBlastQueryInfo m_QueryInfo;
00091 Int4 m_SubjectLength;
00092 BlastHitSavingParameters* m_HitParams;
00093
00094 ~LinkHspTestFixture() {
00095 freeStructures();
00096 }
00097
00098
00099 void setupHSPListTransl()
00100 {
00101 const int kNumHsps = 10;
00102 const int kScores[kNumHsps] =
00103 { 1023, 282, 246, 202, 142, 117, 98, 92, 63, 53 };
00104 const int kQueryOffsets[kNumHsps] =
00105 { 11, 346, 399, 244, 287, 224, 311, 218, 0, 404};
00106 const int kQueryLengths[kNumHsps] =
00107 { 244, 56, 49, 49, 104, 29, 36, 37, 12, 25 };
00108 const int kSubjectFrames[kNumHsps] =
00109 { 2, 2, 3, 2, 1, 1, 2, 3, 3, 2 };
00110 const int kSubjectOffsets[kNumHsps] =
00111 { 1372, 2677, 2756, 2062, 2209, 1832, 2351, 1732, 1140, 2683 };
00112 const int kSubjectLengths[kNumHsps] =
00113 {300, 56, 49, 50, 75, 29, 32, 36, 12, 26 };
00114
00115 m_HspList = Blast_HSPListNew(0);
00116 Int4 index;
00117 BlastHSP* hsp;
00118
00119 for (index = 0; index < kNumHsps; ++index) {
00120 m_HspList->hsp_array[index] = hsp =
00121 (BlastHSP*) calloc(1, sizeof(BlastHSP));
00122 hsp->score = kScores[index];
00123 if (m_ProgramType == eBlastTypeTblastn) {
00124 hsp->query.offset = kQueryOffsets[index];
00125 hsp->query.end = kQueryOffsets[index] + kQueryLengths[index];
00126 hsp->subject.offset = kSubjectOffsets[index];
00127 hsp->subject.end =
00128 kSubjectOffsets[index] + kSubjectLengths[index];
00129 hsp->subject.frame = kSubjectFrames[index];
00130 } else {
00131 hsp->query.offset = kSubjectOffsets[index];
00132 hsp->query.end =
00133 kSubjectOffsets[index] + kSubjectLengths[index];
00134 hsp->subject.offset = kQueryOffsets[index];
00135 hsp->subject.end = kQueryOffsets[index] + kQueryLengths[index];
00136 hsp->query.frame = kSubjectFrames[index];
00137 }
00138 }
00139
00140 m_HspList->hspcnt = kNumHsps;
00141 }
00142
00143
00144 void setupScoreBlk(Uint1* seqbuf, bool gapped,
00145 BlastScoringOptions** score_options_ptr)
00146 {
00147 Int2 status;
00148 BlastScoringOptions* score_options = NULL;
00149 m_ScoreBlk =
00150 BlastScoreBlkNew((m_ProgramType==eBlastTypeBlastn ?
00151 BLASTNA_SEQ_CODE : BLASTAA_SEQ_CODE),
00152 m_QueryInfo->last_context+1);
00153
00154 BlastScoringOptionsNew(m_ProgramType, &score_options);
00155 score_options->gapped_calculation = (gapped ? TRUE : FALSE);
00156
00157 if (m_ProgramType != eBlastTypeBlastn) {
00158 BOOST_REQUIRE(!strcmp("BLOSUM62", score_options->matrix));
00159 }
00160 status = Blast_ScoreBlkMatrixInit(m_ProgramType, score_options,
00161 m_ScoreBlk, &BlastFindMatrixPath);
00162
00163 BOOST_REQUIRE(status == 0);
00164
00165 Blast_Message* message = NULL;
00166 status = Blast_ScoreBlkKbpUngappedCalc(m_ProgramType, m_ScoreBlk,
00167 seqbuf, m_QueryInfo, &message);
00168 message = Blast_MessageFree(message);
00169
00170 BOOST_REQUIRE(status == 0);
00171
00172 if (gapped) {
00173 status = Blast_ScoreBlkKbpGappedCalc(m_ScoreBlk, score_options,
00174 m_ProgramType, m_QueryInfo, NULL);
00175 BOOST_REQUIRE(status == 0);
00176 m_ScoreBlk->kbp_gap = m_ScoreBlk->kbp_gap_std;
00177 }
00178
00179 m_ScoreBlk->kbp = m_ScoreBlk->kbp_std;
00180
00181 if (score_options_ptr)
00182 *score_options_ptr = score_options;
00183 else
00184 BlastScoringOptionsFree(score_options);
00185 }
00186
00187
00188
00189 void setupHitParams(int longest_intron, double evalue)
00190 {
00191 int cutoff_small_gap = (m_ProgramType == eBlastTypeBlastn ? 16 : 42);
00192 m_HitParams =
00193 (BlastHitSavingParameters*) calloc(1, sizeof(BlastHitSavingParameters));
00194 m_HitParams->options = (BlastHitSavingOptions *)
00195 calloc(1, sizeof(BlastHitSavingOptions));
00196 m_HitParams->options->expect_value = evalue;
00197 BlastLinkHSPParametersNew(m_ProgramType, TRUE,
00198 &m_HitParams->link_hsp_params);
00199 m_HitParams->link_hsp_params->cutoff_big_gap = 0;
00200 m_HitParams->link_hsp_params->cutoff_small_gap = cutoff_small_gap;
00201 m_HitParams->link_hsp_params->longest_intron = longest_intron;
00202 }
00203
00204
00205 void
00206 fillEffectiveLengths(const BlastScoringOptions* score_options,
00207 Int8 db_length, Int4 db_num_seq)
00208 {
00209 BlastEffectiveLengthsOptions* eff_len_options = NULL;
00210 BlastEffectiveLengthsOptionsNew(&eff_len_options);
00211 BlastEffectiveLengthsParameters* eff_len_params = NULL;
00212 BlastEffectiveLengthsParametersNew(eff_len_options, db_length,
00213 db_num_seq, &eff_len_params);
00214 BLAST_CalcEffLengths(m_ProgramType, score_options, eff_len_params,
00215 m_ScoreBlk, m_QueryInfo, NULL);
00216 BlastEffectiveLengthsParametersFree(eff_len_params);
00217 BlastEffectiveLengthsOptionsFree(eff_len_options);
00218 }
00219
00220
00221 void setupLinkHspInputTblastn()
00222 {
00223 const string kProtGi = "9930103";
00224 const string kNuclGi = "9930102";
00225 const Uint4 kProtLength = 448;
00226 const Uint4 kNuclLength = 8872;
00227
00228 string qid_str = "gi|" + ((m_ProgramType == eBlastTypeTblastn) ?
00229 kProtGi : kNuclGi);
00230 CSeq_id query_id(qid_str);
00231 TSeqLocVector query_v;
00232
00233 if (m_ProgramType == eBlastTypeBlastx) {
00234 auto_ptr<SSeqLoc> qsl(
00235 CTestObjMgr::Instance().CreateSSeqLoc(query_id,
00236 eNa_strand_both));
00237 query_v.push_back(*qsl);
00238 } else {
00239 auto_ptr<SSeqLoc> qsl(
00240 CTestObjMgr::Instance().CreateSSeqLoc(query_id));
00241 query_v.push_back(*qsl);
00242 }
00243
00244 CBlastOptions options;
00245 options.SetStrandOption(eNa_strand_unknown);
00246 if (m_ProgramType == eBlastTypeBlastx)
00247 options.SetQueryGeneticCode(1);
00248
00249 options.SetProgram(m_Program);
00250 CBLAST_SequenceBlk query_blk;
00251 TSearchMessages blast_msg;
00252
00253 ENa_strand strand_opt = options.GetStrandOption();
00254
00255 SetupQueryInfo(query_v, m_ProgramType, strand_opt, &m_QueryInfo);
00256 SetupQueries(query_v, m_QueryInfo, &query_blk,
00257 m_ProgramType, strand_opt, blast_msg);
00258 ITERATE(TSearchMessages, m, blast_msg) {
00259 BOOST_REQUIRE(m->empty());
00260 }
00261
00262 BlastScoringOptions* score_options = NULL;
00263 setupScoreBlk(query_blk->sequence, true, &score_options);
00264
00265 m_SubjectLength = (m_ProgramType == eBlastTypeTblastn ?
00266 kNuclLength / 3 : kProtLength);
00267
00268 fillEffectiveLengths(score_options, (Int8)m_SubjectLength, 1);
00269 BlastScoringOptionsFree(score_options);
00270
00271 }
00272
00273
00274 void freeStructures()
00275 {
00276 m_HspList = Blast_HSPListFree(m_HspList);
00277
00278 if (m_HitParams) {
00279 BlastHitSavingOptionsFree(m_HitParams->options);
00280 m_HitParams = BlastHitSavingParametersFree(m_HitParams);
00281 }
00282 m_ScoreBlk = BlastScoreBlkFree(m_ScoreBlk);
00283 }
00284
00285
00286 void testUnevenGapLinkHsps() {
00287 const int kNumHsps = 8;
00288 const int kLongestIntron = 4000;
00289 const double kEvalue = 1e-10;
00290 const int kNumsLinked[kNumHsps] = { 4, 4, 4, 4, 4, 4, 4, 4 };
00291 const int kScores[kNumHsps] = { 1023, 282, 246, 202, 142, 117, 98, 63 };
00292
00293 setupLinkHspInputTblastn();
00294 setupHSPListTransl();
00295 setupHitParams(kLongestIntron, kEvalue);
00296
00297 BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
00298 m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
00299
00300 Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
00301
00302 BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
00303
00304 for (int index = 0; index < kNumHsps; ++index) {
00305 BOOST_REQUIRE_EQUAL(kNumsLinked[index], m_HspList->hsp_array[index]->num);
00306 BOOST_REQUIRE_EQUAL(kScores[index], m_HspList->hsp_array[index]->score);
00307 }
00308 }
00309
00310 void setupHSPListForMiddleInsertTest()
00311 {
00312 const int kNumHsps = 5;
00313 const int kScores[kNumHsps] =
00314 { 80, 60, 55, 54, 52 };
00315 const int kQueryOffsets[kNumHsps] =
00316 { 100, 130, 239, 239, 191 };
00317 const int kLengths[kNumHsps] =
00318 { 100, 50, 100, 9, 57 };
00319 const int kSubjectOffsets[kNumHsps] =
00320 { 1100, 1130, 3240, 3240, 2195 };
00321
00322 m_HspList = Blast_HSPListNew(0);
00323 Int4 index;
00324 BlastHSP* hsp;
00325
00326 for (index = 0; index < kNumHsps; ++index) {
00327 m_HspList->hsp_array[index] = hsp =
00328 (BlastHSP*) calloc(1, sizeof(BlastHSP));
00329 hsp->score = kScores[index];
00330 hsp->query.offset = kQueryOffsets[index];
00331 hsp->subject.offset = kSubjectOffsets[index];
00332 hsp->subject.frame = 1;
00333 hsp->query.end = hsp->query.offset + kLengths[index];
00334 hsp->subject.end = hsp->subject.offset + kLengths[index];
00335 }
00336
00337 m_HspList->hspcnt = kNumHsps;
00338 }
00339
00340
00341 void setupHSPListNucl()
00342 {
00343 const int kNumHsps = 8;
00344 const int kScores[kNumHsps] = { 35, 31, 22, 21, 20, 20, 20, 20 };
00345 const int kQueryFrames[kNumHsps] = { 1, 1, 1, -1, 1, -1, -1, -1 };
00346 const int kQueryStarts[kNumHsps] =
00347 { 790, 790, 791, 4606, 870, 4572, 4526, 4589 };
00348 const int kQueryEnds[kNumHsps] =
00349 { 865, 865, 833, 4635, 894, 4604, 4550, 4629 };
00350 const int kSubjectStarts[kNumHsps] =
00351 { 453, 3469, 5837, 12508, 5951, 11005, 9899, 7397 };
00352 const int kSubjectEnds[kNumHsps] =
00353 { 528, 3544, 5879, 12537, 5975, 11037, 9923, 7437 };
00354 Int4 index;
00355 BlastHSP* hsp;
00356
00357 m_HspList = Blast_HSPListNew(0);
00358
00359 for (index = 0; index < kNumHsps; ++index) {
00360 hsp = m_HspList->hsp_array[index] =
00361 (BlastHSP*) calloc(1, sizeof(BlastHSP));
00362 hsp->score = kScores[index];
00363 hsp->query.offset = kQueryStarts[index];
00364 hsp->query.end = kQueryEnds[index];
00365 hsp->query.frame = kQueryFrames[index];
00366 hsp->context = (kQueryFrames[index] > 0 ? 0 : 1);
00367 hsp->subject.offset = kSubjectStarts[index];
00368 hsp->subject.end = kSubjectEnds[index];
00369 hsp->subject.frame = 1;
00370 }
00371 m_HspList->hspcnt = kNumHsps;
00372 }
00373
00374
00375 void setupLinkHspInputBlastn()
00376 {
00377 const Uint4 kQueryLength = 5419;
00378 const Int8 kEffDbLength = 122632232;
00379
00380 m_ProgramType = eBlastTypeBlastn;
00381 m_Program = eBlastn;
00382
00383
00384 s_SetupNuclQueryInfo(kQueryLength, &m_QueryInfo);
00385 m_SubjectLength = 12991;
00386
00387 CSeq_id seqid("gi|24638835");
00388 pair<TSeqPos, TSeqPos> range(26993,32411);
00389
00390 auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(seqid, range));
00391
00392 SBlastSequence sequence(
00393 GetSequence(*sl->seqloc, eBlastEncodingNucleotide,
00394 sl->scope, eNa_strand_both, eSentinels));
00395 BlastScoringOptions* score_options = NULL;
00396 setupScoreBlk(sequence.data.get(), false, &score_options);
00397
00398 fillEffectiveLengths(score_options, kEffDbLength, 1);
00399 BlastScoringOptionsFree(score_options);
00400
00401 setupHSPListNucl();
00402 }
00403
00404 AllCutoffScores*
00405 setupCutoffScores(bool gapped, Int8 db_length, Uint4 db_num_seq,
00406 Uint4 subj_length, int longest_intron=0)
00407 {
00408 BlastInitialWordOptions* word_options = NULL;
00409 BlastExtensionOptions* ext_options = NULL;
00410 BlastHitSavingOptions* hit_options = NULL;
00411
00412 BlastInitialWordOptionsNew(m_ProgramType, &word_options);
00413 BlastExtensionOptionsNew(m_ProgramType, &ext_options, true);
00414 if (m_ProgramType == eBlastTypeBlastn) {
00415 word_options->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_NUCL;
00416 ext_options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
00417 ext_options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
00418 }
00419 BlastHitSavingOptionsNew(m_ProgramType, &hit_options, gapped);
00420 if (longest_intron > 0)
00421 hit_options->longest_intron = longest_intron;
00422
00423 BlastInitialWordParameters* word_params = NULL;
00424 BlastExtensionParameters* ext_params = NULL;
00425
00426 CRef<CSeq_id> qid;
00427 TSeqLocVector qv;
00428
00429 if (m_ProgramType == eBlastTypeBlastn || m_ProgramType == eBlastTypeBlastx ||
00430 m_ProgramType == eBlastTypeTblastx) {
00431 qid.Reset(new CSeq_id("gi|555"));
00432 auto_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(*qid,
00433 eNa_strand_both));
00434 qv.push_back(*qsl);
00435 } else {
00436 qid.Reset(new CSeq_id("gi|129295"));
00437 auto_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(*qid));
00438 qv.push_back(*qsl);
00439 }
00440
00441 CBlastOptions options;
00442 options.SetStrandOption(eNa_strand_unknown);
00443 if (m_ProgramType == eBlastTypeBlastx ||
00444 m_ProgramType == eBlastTypeTblastx)
00445 options.SetQueryGeneticCode(1);
00446
00447 options.SetProgram(m_Program);
00448 CBLAST_SequenceBlk query_blk;
00449 TSearchMessages blast_msg;
00450
00451 ENa_strand strand_opt = options.GetStrandOption();
00452
00453 SetupQueryInfo(qv, m_ProgramType, strand_opt, &m_QueryInfo);
00454 SetupQueries(qv, m_QueryInfo, &query_blk,
00455 m_ProgramType, strand_opt, blast_msg);
00456 ITERATE(TSearchMessages, m, blast_msg) {
00457 BOOST_REQUIRE(m->empty());
00458 }
00459
00460 BlastScoringOptions* score_options = NULL;
00461 setupScoreBlk(query_blk->sequence, gapped, &score_options);
00462
00463 BlastExtensionParametersNew(m_ProgramType, ext_options, m_ScoreBlk,
00464 m_QueryInfo, &ext_params);
00465 fillEffectiveLengths(score_options, (Int8)db_length, db_num_seq);
00466 score_options = BlastScoringOptionsFree(score_options);
00467
00468 BlastHitSavingParametersNew(m_ProgramType, hit_options,
00469 m_ScoreBlk, m_QueryInfo, subj_length, &m_HitParams);
00470
00471
00472 QuerySetUpOptions* query_options = NULL;
00473 BlastQuerySetUpOptionsNew(&query_options);
00474 LookupTableWrap* lookup_wrap = NULL;
00475 LookupTableOptions* lookup_options = NULL;
00476 BlastSeqLoc* blast_seq_loc = BlastSeqLocNew(NULL, 0, m_QueryInfo->contexts[0].query_length-1);
00477 LookupTableOptionsNew(m_ProgramType, &lookup_options);
00478 LookupTableWrapInit(query_blk, lookup_options, query_options, blast_seq_loc, m_ScoreBlk, &lookup_wrap, NULL, NULL);
00479 query_options = BlastQuerySetUpOptionsFree(query_options);
00480
00481 Uint4 avg_subj_length = (Uint4)(db_length/db_num_seq);
00482 BlastInitialWordParametersNew(m_ProgramType, word_options, m_HitParams, lookup_wrap,
00483 m_ScoreBlk, m_QueryInfo, avg_subj_length, &word_params);
00484
00485 blast_seq_loc = BlastSeqLocFree(blast_seq_loc);
00486 lookup_wrap = LookupTableWrapFree(lookup_wrap);
00487 lookup_options = LookupTableOptionsFree(lookup_options);
00488
00489 BlastLinkHSPParametersUpdate(word_params, m_HitParams, (gapped ? TRUE : FALSE));
00490
00491
00492 if (m_HitParams->link_hsp_params &&
00493 m_ProgramType != eBlastTypeBlastn && !gapped) {
00494 CalculateLinkHSPCutoffs(m_ProgramType, m_QueryInfo, m_ScoreBlk,
00495 m_HitParams->link_hsp_params, word_params, db_length,
00496 subj_length);
00497 }
00498
00499 AllCutoffScores* retval =
00500 (AllCutoffScores*) calloc(1, sizeof(AllCutoffScores));
00501 retval->x_drop_ungapped = word_params->x_dropoff_max;
00502 retval->x_drop_gapped = ext_params->gap_x_dropoff;
00503 retval->x_drop_final = ext_params->gap_x_dropoff_final;
00504 retval->cutoff_score_ungapped = word_params->cutoff_score_min;
00505 retval->cutoff_score_final = m_HitParams->cutoff_score_min;
00506 retval->do_sum_stats = m_HitParams->do_sum_stats;
00507 if (retval->do_sum_stats) {
00508 retval->cutoff_small_gap =
00509 m_HitParams->link_hsp_params->cutoff_small_gap;
00510 retval->cutoff_big_gap =
00511 m_HitParams->link_hsp_params->cutoff_big_gap;
00512 }
00513
00514 BlastInitialWordParametersFree(word_params);
00515 BlastInitialWordOptionsFree(word_options);
00516 BlastExtensionParametersFree(ext_params);
00517 BlastExtensionOptionsFree(ext_options);
00518
00519 m_HspList = NULL;
00520
00521 return retval;
00522 }
00523
00524 };
00525
00526 BOOST_FIXTURE_TEST_SUITE(linkhsp, LinkHspTestFixture)
00527
00528
00529 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsTblastn) {
00530 m_ProgramType = eBlastTypeTblastn;
00531 m_Program = eTblastn;
00532 testUnevenGapLinkHsps();
00533 }
00534
00535
00536 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsBlastx) {
00537 m_ProgramType = eBlastTypeBlastx;
00538 m_Program = eBlastx;
00539 testUnevenGapLinkHsps();
00540 }
00541
00542
00543
00544 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsMiddleInsertion) {
00545 const int kNumHsps = 5;
00546 const int kLongestIntron = 3000;
00547 const double kEvalue = 10;
00548 const int kLinkNums[kNumHsps] = { 3, 1, 3, 1, 3 };
00549 m_ProgramType = eBlastTypeTblastn;
00550 m_Program = eTblastn;
00551
00552 setupLinkHspInputTblastn();
00553 setupHSPListForMiddleInsertTest();
00554 setupHitParams(kLongestIntron, kEvalue);
00555
00556 BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
00557 m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
00558 for (int index = 0; index < m_HspList->hspcnt; ++index) {
00559 BOOST_REQUIRE_EQUAL(kLinkNums[index],
00560 m_HspList->hsp_array[index]->num);
00561 }
00562 }
00563
00564
00565 BOOST_AUTO_TEST_CASE(testEvenGapLinkHspsTblastn) {
00566 const int kNumHsps = 5;
00567 const double kEvalue = 1e-10;
00568 const int kNumsLinked[kNumHsps] = { 1, 2, 2, 1, 1 };
00569 const int kScores[kNumHsps] = { 1023, 282, 246, 202, 142 };
00570
00571 m_ProgramType = eBlastTypeTblastn;
00572 m_Program = eTblastn;
00573 setupLinkHspInputTblastn();
00574 setupHSPListTransl();
00575
00576 setupHitParams(0, kEvalue);
00577
00578 BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
00579 m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
00580
00581 Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
00582
00583 BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
00584
00585 Int4 index;
00586 for (index = 0; index < kNumHsps; ++index) {
00587 BOOST_REQUIRE_EQUAL(kNumsLinked[index], m_HspList->hsp_array[index]->num);
00588 BOOST_REQUIRE_EQUAL(kScores[index],
00589 m_HspList->hsp_array[index]->score);
00590 }
00591 }
00592
00593
00594 BOOST_AUTO_TEST_CASE(testEvenGapLinkHspsBlastn) {
00595 const int kNumHsps = 8;
00596 const double kEvalue = 10;
00597 const int kNumsLinked[kNumHsps] =
00598 { 2, 1, 1, 3, 2, 1, 3, 3 };
00599 const double kEvalues[kNumHsps] =
00600 { 3e-12, 3e-7, 0.07, 1e-7, 3e-12, 1.1, 1e-7, 1e-7 };
00601
00602 setupLinkHspInputBlastn();
00603 setupHitParams(0, kEvalue);
00604
00605 BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
00606 m_ScoreBlk, m_HitParams->link_hsp_params, FALSE);
00607
00608 Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
00609 BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
00610
00611 for (Int4 index = 0; index < kNumHsps; ++index) {
00612 BOOST_REQUIRE_EQUAL(kNumsLinked[index],
00613 m_HspList->hsp_array[index]->num);
00614 BOOST_REQUIRE(fabs(kEvalues[index] - m_HspList->hsp_array[index]->evalue)/kEvalues[index] < 0.5);
00615 }
00616 }
00617
00618 static void
00619 testAllCutoffs(const AllCutoffScores& good_cutoffs,
00620 AllCutoffScores& cutoffs)
00621 {
00622 BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_ungapped,
00623 cutoffs.x_drop_ungapped);
00624 BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_gapped,
00625 cutoffs.x_drop_gapped);
00626 BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_final,
00627 cutoffs.x_drop_final);
00628 BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_score_ungapped,
00629 cutoffs.cutoff_score_ungapped);
00630 BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_score_final,
00631 cutoffs.cutoff_score_final);
00632 BOOST_REQUIRE_EQUAL(good_cutoffs.do_sum_stats,
00633 cutoffs.do_sum_stats);
00634 BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_small_gap,
00635 cutoffs.cutoff_small_gap);
00636 BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_big_gap,
00637 cutoffs.cutoff_big_gap);
00638 }
00639
00640 BOOST_AUTO_TEST_CASE(UngappedBlastnCutoffs)
00641 {
00642 const int kNumDbs = 4;
00643 const Int8 kDbLengths[kNumDbs] =
00644 { 10000000000LL, 10000000000LL, 3000000000LL, 10000LL };
00645 const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500, 100 };
00646 const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000, 100 };
00647 const AllCutoffScores kGoodCutoffs[kNumDbs] = {
00648 { 11, 0, 0, 0, 14, 20, true, 14, 0 },
00649 { 11, 0, 0, 0, 12, 20, true, 12, 0 },
00650 { 11, 0, 0, 0, 19, 19, true, 19, 0 },
00651 { 10, 0, 0, 0, 10, 10, true, 10, 0 } };
00652
00653 AllCutoffScores* cutoffs = NULL;
00654 int index;
00655 m_ProgramType = eBlastTypeBlastn;
00656 m_Program = eBlastn;
00657 for (index = 0; index < kNumDbs; ++index) {
00658 cutoffs = setupCutoffScores(false, kDbLengths[index],
00659 kDbNumSeqs[index], kSubjectLengths[index]);
00660 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00661 sfree(cutoffs);
00662 freeStructures();
00663 if (index < kNumDbs-1)
00664 BlastQueryInfoFree(m_QueryInfo);
00665 }
00666 }
00667
00668 BOOST_AUTO_TEST_CASE(UngappedBlastpCutoffs)
00669 {
00670 const Int8 kDbLength = 500000000;
00671 const Uint4 kDbNumSeqs = 1000000;
00672 const int kNumSubjects = 3;
00673 const Uint4 kSubjectLengths[kNumSubjects] = {400, 60, 3000 };
00674 const AllCutoffScores kGoodCutoffs[kNumSubjects] = {
00675 { 16, 0, 0, 0, 41, 66, true, 41, 38 },
00676 { 16, 0, 0, 0, 41, 66, true, 0, 29 },
00677 { 16, 0, 0, 0, 41, 66, true, 41, 44 } };
00678 AllCutoffScores* cutoffs = NULL;
00679 int index;
00680 m_ProgramType = eBlastTypeBlastp;
00681 m_Program = eBlastp;
00682 for (index = 0; index < kNumSubjects; ++index) {
00683 cutoffs = setupCutoffScores(false, kDbLength,
00684 kDbNumSeqs, kSubjectLengths[index]);
00685 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00686 sfree(cutoffs);
00687 freeStructures();
00688 if (index < kNumSubjects-1)
00689 BlastQueryInfoFree(m_QueryInfo);
00690 }
00691 }
00692
00693 BOOST_AUTO_TEST_CASE(UngappedBlastxCutoffs)
00694 {
00695 const Int8 kDbLength = 227102922;
00696 const Uint4 kDbNumSeqs = 761886;
00697 const int kNumSubjects = 3;
00698 const Uint4 kSubjectLengths[kNumSubjects] = { 400, 100, 3000 };
00699 const AllCutoffScores kGoodCutoffs[kNumSubjects] = {
00700 { 16, 0, 0, 0, 31, 63, true, 31, 37 },
00701 { 16, 0, 0, 0, 31, 63, true, 0, 31 },
00702 { 16, 0, 0, 0, 31, 63, true, 31, 43 } };
00703 AllCutoffScores* cutoffs = NULL;
00704 int index;
00705 m_ProgramType = eBlastTypeBlastx;
00706 m_Program = eBlastx;
00707 for (index = 0; index < kNumSubjects; ++index) {
00708 cutoffs = setupCutoffScores(false, kDbLength, kDbNumSeqs,
00709 kSubjectLengths[index]);
00710 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00711 sfree(cutoffs);
00712 freeStructures();
00713 if (index < kNumSubjects-1)
00714 BlastQueryInfoFree(m_QueryInfo);
00715 }
00716 }
00717
00718 BOOST_AUTO_TEST_CASE(UngappedTblastnCutoffs)
00719 {
00720 const int kNumDbs = 3;
00721 const Int8 kDbLengths[kNumDbs] =
00722 { 10000000000LL, 10000000000LL, 3000000000LL };
00723 const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
00724 const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
00725 const AllCutoffScores kGoodCutoffs[kNumDbs] = {
00726 { 16, 0, 0, 0, 40, 72, true, 40, 40 },
00727 { 16, 0, 0, 0, 33, 71, true, 33, 35 },
00728 { 16, 0, 0, 0, 41, 69, true, 41, 60 } };
00729
00730 AllCutoffScores* cutoffs = NULL;
00731 int index;
00732 m_ProgramType = eBlastTypeTblastn;
00733 m_Program = eTblastn;
00734 for (index = 0; index < kNumDbs; ++index) {
00735 cutoffs = setupCutoffScores(false, kDbLengths[index],
00736 kDbNumSeqs[index], kSubjectLengths[index]);
00737 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00738 sfree(cutoffs);
00739 freeStructures();
00740 if (index < kNumDbs-1)
00741 BlastQueryInfoFree(m_QueryInfo);
00742 }
00743 }
00744
00745 BOOST_AUTO_TEST_CASE(UngappedTblastxCutoffs)
00746 {
00747 const int kNumDbs = 4;
00748 const Int8 kDbLengths[kNumDbs] =
00749 { 10000000000LL, 10000000000LL, 10000000000LL, 3000000000LL };
00750 const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 2000000, 20000000, 500 };
00751 const Uint4 kSubjectLengths[kNumDbs] = { 2000, 100, 400, 3000000 };
00752 const AllCutoffScores kGoodCutoffs[kNumDbs] = {
00753 { 16, 0, 0, 0, 41, 72, true, 41, 40 },
00754 { 16, 0, 0, 0, 41, 72, true, 0, 27 },
00755 { 16, 0, 0, 0, 41, 70, true, 41, 34 },
00756 { 16, 0, 0, 0, 41, 68, true, 41, 60 } };
00757
00758 AllCutoffScores* cutoffs = NULL;
00759 int index;
00760 m_ProgramType = eBlastTypeTblastx;
00761 m_Program = eTblastx;
00762 for (index = 0; index < kNumDbs; ++index) {
00763 cutoffs = setupCutoffScores(false, kDbLengths[index],
00764 kDbNumSeqs[index], kSubjectLengths[index]);
00765 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00766 sfree(cutoffs);
00767 freeStructures();
00768 if (index < kNumDbs-1)
00769 BlastQueryInfoFree(m_QueryInfo);
00770 }
00771 }
00772
00773 BOOST_AUTO_TEST_CASE(GappedBlastnCutoffs)
00774 {
00775 const int kNumDbs = 4;
00776 const Int8 kDbLengths[kNumDbs] =
00777 { 10000000000LL, 10000000000LL, 3000000000LL, 10000LL };
00778 const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500, 200 };
00779 const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000, 60 };
00780 const AllCutoffScores kGoodCutoffs[kNumDbs] = {
00781 { 11, 15, 50, 0, 14, 20, false, 0, 0 },
00782 { 11, 15, 50, 0, 11, 20, false, 0, 0 },
00783 { 11, 15, 50, 0, 19, 19, false, 0, 0 },
00784 { 8, 15, 50, 0, 8, 10, false, 0, 0 } };
00785
00786 AllCutoffScores* cutoffs = NULL;
00787 int index;
00788 m_ProgramType = eBlastTypeBlastn;
00789 m_Program = eBlastn;
00790 for (index = 0; index < kNumDbs; ++index) {
00791 cutoffs = setupCutoffScores(true, kDbLengths[index],
00792 kDbNumSeqs[index], kSubjectLengths[index]);
00793 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00794 sfree(cutoffs);
00795 freeStructures();
00796 if (index < kNumDbs-1)
00797 BlastQueryInfoFree(m_QueryInfo);
00798 }
00799 }
00800
00801 BOOST_AUTO_TEST_CASE(GappedBlastpCutoffs)
00802 {
00803 const Int8 kDbLength = 600000000;
00804 const Uint4 kDbNumSeqs = 1800000;
00805 const Uint4 kSubjectLength = 200;
00806 m_ProgramType = eBlastTypeBlastp;
00807 m_Program = eBlastp;
00808 const AllCutoffScores kGoodCutoffs =
00809 { 16, 38, 64, 41, 41, 72, false, 0, 0 };
00810 AllCutoffScores* cutoffs =
00811 setupCutoffScores(true, kDbLength, kDbNumSeqs, kSubjectLength);
00812 testAllCutoffs(kGoodCutoffs, *cutoffs);
00813 sfree(cutoffs);
00814 freeStructures();
00815 }
00816
00817 BOOST_AUTO_TEST_CASE(GappedBlastxCutoffs)
00818 {
00819 const int kNumDbs = 2;
00820 const Int8 kDbLengths[kNumDbs] =
00821 {600000000, 6000000000LL};
00822 const Uint4 kDbNumSeqs = 1800000;
00823 const Uint4 kSubjectLength[kNumDbs] = {500, 2000};
00824 const AllCutoffScores kGoodCutoffs[kNumDbs] = {
00825 { 16, 38, 64, 0, 41, 32, true, 41, 0 },
00826 { 16, 38, 64, 0, 41, 37, true, 41, 0 } };
00827 m_ProgramType = eBlastTypeBlastx;
00828 m_Program = eBlastx;
00829 for (int index = 0; index < kNumDbs; ++index) {
00830 AllCutoffScores* cutoffs = setupCutoffScores(true,
00831 kDbLengths[index], kDbNumSeqs, kSubjectLength[index]);
00832 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00833 sfree(cutoffs);
00834 freeStructures();
00835 if (index < kNumDbs-1)
00836 BlastQueryInfoFree(m_QueryInfo);
00837 }
00838 }
00839
00840 BOOST_AUTO_TEST_CASE(GappedTblastnCutoffs)
00841 {
00842 const int kNumDbs = 3;
00843 const Int8 kDbLengths[kNumDbs] =
00844 { 10000000000LL, 10000000000LL, 3000000000LL };
00845 const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
00846 const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
00847 const AllCutoffScores kGoodCutoffs[kNumDbs] = {
00848 { 16, 38, 64, 41, 41, 38, true, 41, 0 },
00849 { 16, 38, 64, 41, 41, 32, true, 41, 0 },
00850 { 16, 38, 64, 41, 41, 65, true, 41, 0 } };
00851
00852 AllCutoffScores* cutoffs = NULL;
00853 int index;
00854 m_ProgramType = eBlastTypeTblastn;
00855 m_Program = eTblastn;
00856 for (index = 0; index < kNumDbs; ++index) {
00857 cutoffs = setupCutoffScores(true, kDbLengths[index],
00858 kDbNumSeqs[index], kSubjectLengths[index]);
00859 testAllCutoffs(kGoodCutoffs[index], *cutoffs);
00860 sfree(cutoffs);
00861 freeStructures();
00862 if (index < kNumDbs-1)
00863 BlastQueryInfoFree(m_QueryInfo);
00864 }
00865 }
00866
00867 BOOST_AUTO_TEST_CASE(GappedTblastnVeryShortIntron)
00868 {
00869 const int kNumDbs = 3;
00870 const Int8 kDbLengths[kNumDbs] =
00871 { 10000000000LL, 10000000000LL, 3000000000LL };
00872 const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
00873 const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
00874
00875 AllCutoffScores* cutoffs = NULL;
00876 int index;
00877 m_ProgramType = eBlastTypeTblastn;
00878 m_Program = eTblastn;
00879 for (index = 0; index < kNumDbs; ++index) {
00880 cutoffs = setupCutoffScores(true, kDbLengths[index],
00881 kDbNumSeqs[index], kSubjectLengths[index], 1);
00882
00883 BOOST_REQUIRE_EQUAL((int) false, (int) cutoffs->do_sum_stats);
00884 sfree(cutoffs);
00885 freeStructures();
00886 if (index < kNumDbs-1)
00887 BlastQueryInfoFree(m_QueryInfo);
00888 }
00889 }
00890 BOOST_AUTO_TEST_SUITE_END()
00891
00892