33 #define NCBI_TEST_APPLICATION
73 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
92 const string kSeqEntryFile(
"data/7450545.seqentry.asn");
93 m_SeqEntry = TestUtil::ReadObject<CSeq_entry>(kSeqEntryFile);
97 BOOST_REQUIRE(m_Pssm &&
105 const string kPssmFile(
"data/pssm_freq_ratios.asn");
106 m_Pssm = TestUtil::ReadObject<CPssmWithParameters>(kPssmFile);
119 x_ReadPssmFromFile();
123 x_ReadSeqEntriesFromFile();
140 const CSeq_id& seqid = (*itr)->GetSeq_id(1);
143 if (new_gi != last_gi)
149 else if ( !seqid.
Equals(last_id) ) {
165 subjects.push_back(ssl);
179 subjects.push_back(ssl);
207 m_OptHandle->SetEvalueThreshold(1.5);
211 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
214 BOOST_REQUIRE(results[0].GetErrors().
empty());
216 const int kNumExpectedMatchingSeqs = 3;
219 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
221 const size_t kNumExpectedHSPs = 3;
225 expected_results[0].score = 64;
226 expected_results[0].evalue = 2.46806e-1;
227 expected_results[0].bit_score = 292610051e-7;
228 expected_results[0].num_ident = 18;
229 expected_results[0].starts.push_back(34);
230 expected_results[0].starts.push_back(95);
231 expected_results[0].lengths.push_back(53);
235 int starts[] = { 203, 280, 220, -1, 226, 297, 258, -1, 265, 329 };
236 int lengths[] = { 17, 6, 32, 7, 39 };
237 expected_results[1].score = 63;
238 expected_results[1].evalue = 3.48342e-1;
239 expected_results[1].bit_score = 288758055e-7;
240 expected_results[1].num_ident = 24;
242 back_inserter(expected_results[1].starts));
244 back_inserter(expected_results[1].lengths));
249 int starts[] = { 180, 97, 204, -1, 205, 121, -1, 127, 211, 128,
250 241, -1, 242, 158, -1, 197, 281, 201, 306, -1, 318, 226, 323,
252 int lengths[] = { 24, 1, 6, 1, 30, 1, 39, 4, 25, 12, 5, 4, 35 };
253 expected_results[2].score = 60;
254 expected_results[2].evalue = 7.36231e-1;
255 expected_results[2].bit_score = 277202068e-7;
256 expected_results[2].num_ident = 42;
258 back_inserter(expected_results[2].starts));
260 back_inserter(expected_results[2].lengths));
284 qa::CSeqAlignCmpOpts opts;
285 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
287 bool identical_results =
cmp.Run(&errors);
289 BOOST_REQUIRE_MESSAGE(identical_results, errors);
297 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
300 BOOST_REQUIRE(results[0].GetErrors().
empty());
302 const int kNumExpectedMatchingSeqs = 4;
305 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
307 const size_t kNumExpectedHSPs = 4;
311 expected_results[0].score = 63;
312 expected_results[0].evalue = 2.96035e-1;
313 expected_results[0].bit_score = 288758056e-7;
314 expected_results[0].starts.push_back(34);
315 expected_results[0].starts.push_back(95);
316 expected_results[0].lengths.push_back(53);
317 expected_results[0].num_ident = 18;
320 expected_results[1].score = 59;
321 expected_results[1].evalue = 9.29330e-1;
322 expected_results[1].bit_score = 273350073e-7;
323 expected_results[1].starts.push_back(203);
324 expected_results[1].starts.push_back(280);
325 expected_results[1].starts.push_back(220);
326 expected_results[1].starts.push_back(-1);
327 expected_results[1].starts.push_back(226);
328 expected_results[1].starts.push_back(297);
329 expected_results[1].starts.push_back(258);
330 expected_results[1].starts.push_back(-1);
331 expected_results[1].starts.push_back(265);
332 expected_results[1].starts.push_back(329);
333 expected_results[1].lengths.push_back(17);
334 expected_results[1].lengths.push_back(6);
335 expected_results[1].lengths.push_back(32);
336 expected_results[1].lengths.push_back(7);
337 expected_results[1].lengths.push_back(39);
338 expected_results[1].num_ident = 24;
341 expected_results[2].score = 52;
342 expected_results[2].evalue = 6.67208;
343 expected_results[2].bit_score = 246386102e-7;
344 expected_results[2].starts.push_back(322);
345 expected_results[2].starts.push_back(46);
346 expected_results[2].lengths.push_back(28);
347 expected_results[2].num_ident = 10;
351 expected_results[3].score = 50;
352 expected_results[3].evalue = 6.81719;
353 expected_results[3].bit_score = 23.8682;
354 expected_results[3].starts.push_back(295);
355 expected_results[3].starts.push_back(23);
356 expected_results[3].starts.push_back(301);
357 expected_results[3].starts.push_back(-1);
358 expected_results[3].starts.push_back(304);
359 expected_results[3].starts.push_back(29);
360 expected_results[3].lengths.push_back(6);
361 expected_results[3].lengths.push_back(3);
362 expected_results[3].lengths.push_back(23);
363 expected_results[3].num_ident = 16;
368 qa::CSeqAlignCmpOpts opts;
369 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
371 bool identical_results =
cmp.Run(&errors);
373 BOOST_REQUIRE_MESSAGE(identical_results, errors);
378 m_OptHandle->SetEvalueThreshold(5.0);
382 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
385 BOOST_REQUIRE(results[0].GetErrors().
empty());
387 const int kNumExpectedMatchingSeqs = 3;
390 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
392 const size_t kNumExpectedHSPs = 3;
396 expected_results[0].score = 59;
397 expected_results[0].evalue = 8.66100e-1;
398 expected_results[0].bit_score = 273350073e-7;
399 expected_results[0].starts.push_back(34);
400 expected_results[0].starts.push_back(95);
401 expected_results[0].lengths.push_back(53);
402 expected_results[0].sequence_gis.SetQuery(7450545);
403 expected_results[0].sequence_gis.SetSubject(22982149);
404 expected_results[0].num_ident = 18;
408 int starts[] = { 322 , 46 , -1 , 75 , 351 , 81 , -1 , 94 , 364 ,
409 97 , -1 , 106 , 373 , 109 };
410 int lengths[] = { 29 , 6 , 13 , 3 , 9 , 3 , 17 };
411 expected_results[1].score = 53;
412 expected_results[1].evalue = 4.15768;
413 expected_results[1].bit_score = 250238098e-7;
414 expected_results[1].sequence_gis.SetQuery(7450545);
415 expected_results[1].sequence_gis.SetSubject(43121985);
416 expected_results[1].num_ident = 19;
418 back_inserter(expected_results[1].starts));
420 back_inserter(expected_results[1].lengths));
425 int starts[] = { 125 , 199 , 146 , -1 , 148 , 220 , -1 , 228 ,
426 156 , 233 , -1 , 250 , 173 , 252 , 179 , -1 , 181 , 258 ,
427 220 , -1 , 226 , 297 , 258 , -1 , 265 , 329 };
428 int lengths[] = { 21 , 2 , 8 , 5 , 17 , 2 , 6 , 2 , 39 , 6 , 32 ,
430 expected_results[2].score = 54;
431 expected_results[2].evalue = 4.40967;
432 expected_results[2].bit_score = 254090094e-7;
433 expected_results[2].sequence_gis.SetQuery(7450545);
434 expected_results[2].sequence_gis.SetSubject(13242404);
435 expected_results[2].num_ident = 39;
437 back_inserter(expected_results[2].starts));
439 back_inserter(expected_results[2].lengths));
461 qa::CSeqAlignCmpOpts opts;
462 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
464 bool identical_results =
cmp.Run(&errors);
466 BOOST_REQUIRE_MESSAGE(identical_results, errors);
474 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
477 BOOST_REQUIRE(results[0].GetErrors().
empty());
479 const int kNumExpectedMatchingSeqs = 6;
482 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
484 const size_t kNumExpectedHSPs = 6;
488 expected_results[0].score = 59;
489 expected_results[0].evalue = 8.66100e-1;
490 expected_results[0].bit_score = 273350073e-7;
491 expected_results[0].starts.push_back(34);
492 expected_results[0].starts.push_back(95);
493 expected_results[0].lengths.push_back(53);
494 expected_results[0].sequence_gis.SetQuery(7450545);
495 expected_results[0].sequence_gis.SetSubject(22982149);
496 expected_results[0].num_ident = 18;
500 int starts[] = { 322 , 46 , -1 , 75 , 351 , 81 , -1 , 94 , 364 ,
501 97 , -1 , 106 , 373 , 109 };
502 int lengths[] = { 29 , 6 , 13 , 3 , 9 , 3 , 17 };
503 expected_results[1].score = 53;
504 expected_results[1].evalue = 4.15768;
505 expected_results[1].bit_score = 250238098e-7;
506 expected_results[1].sequence_gis.SetQuery(7450545);
507 expected_results[1].sequence_gis.SetSubject(43121985);
508 expected_results[1].num_ident = 19;
510 back_inserter(expected_results[1].starts));
512 back_inserter(expected_results[1].lengths));
518 int starts[] = { 125 , 199 , 146 , -1 , 148 , 220 , -1 , 228 ,
519 156 , 233 , -1 , 250 , 173 , 252 , 179 , -1 , 181 , 258 ,
520 220 , -1 , 226 , 297 , 258 , -1 , 265 , 329 };
521 int lengths[] = { 21 , 2 , 8 , 5 , 17 , 2 , 6 , 2 , 39 , 6 , 32 ,
523 expected_results[2].score = 54;
524 expected_results[2].evalue = 4.40967;
525 expected_results[2].bit_score = 254090094e-7;
526 expected_results[2].sequence_gis.SetQuery(7450545);
527 expected_results[2].sequence_gis.SetSubject(13242404);
528 expected_results[2].num_ident = 39;
530 back_inserter(expected_results[2].starts));
532 back_inserter(expected_results[2].lengths));
549 qa::CSeqAlignCmpOpts opts;
550 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
552 bool identical_results =
cmp.Run(&errors);
554 BOOST_REQUIRE_MESSAGE(identical_results, errors);
558 const int kNumIterations = 4;
559 const int kNumExpectedIterations = 2;
566 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
568 int hits_below_threshold[kNumIterations] = { 0, 0, 0, 0 };
569 size_t number_hits[kNumIterations] = { 11, 14, 0, 0 };
571 int iteration_counter = 0;
574 BOOST_REQUIRE(results[0].GetErrors().
empty());
576 BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
577 alignment->
Get().size());
582 string m(
"On round ");
585 BOOST_REQUIRE_MESSAGE(
586 hits_below_threshold[iteration_counter]==(
int)ids.
size(), m);
591 x_ComputePssmForNextIteration(*bioseq, alignment,
592 m_OptHandle, results[0].GetAncillaryData());
598 BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
605 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
608 BOOST_REQUIRE(results[0].GetErrors().
empty());
610 const int kNumExpectedMatchingSeqs = 6;
612 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
614 const size_t kNumExpectedHSPs = 7;
619 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
620 int lengths[] = { 152, 1, 80 };
621 expected_results[0].score = 595;
622 expected_results[0].evalue = 2.70189-71;
623 expected_results[0].bit_score = 233623298e-6;
624 expected_results[0].num_ident = 101;
626 back_inserter(expected_results[0].starts));
628 back_inserter(expected_results[0].lengths));
633 int starts[] = { 0, 154, -1, 308, 154, 309 };
634 int lengths[] = { 154, 1, 24 };
635 expected_results[1].score = 424;
636 expected_results[1].evalue = 6.54598e-49;
637 expected_results[1].bit_score = 167754171e-6;
638 expected_results[1].num_ident = 73;
640 back_inserter(expected_results[1].starts));
642 back_inserter(expected_results[1].lengths));
648 { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
649 -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
650 -1, 374, 190, 380, 200, -1, 202, 390 };
652 { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
653 expected_results[2].score = 372;
654 expected_results[2].evalue = 1.67386e-43;
655 expected_results[2].bit_score = 147723793e-6;
656 expected_results[2].num_ident = 87;
658 back_inserter(expected_results[2].starts));
660 back_inserter(expected_results[2].lengths));
664 expected_results[3].score = 53;
665 expected_results[3].evalue = 2.43336;
666 expected_results[3].bit_score = 248451288e-7;
667 expected_results[3].num_ident = 8;
668 expected_results[3].starts.push_back(206);
669 expected_results[3].starts.push_back(46);
670 expected_results[3].lengths.push_back(19);
674 int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
675 int lengths[] = { 6, 1, 22, 10, 14 };
676 expected_results[4].score = 52;
677 expected_results[4].evalue = 3.12771;
678 expected_results[4].bit_score = 244599292e-7;
679 expected_results[4].num_ident = 11;
681 back_inserter(expected_results[4].starts));
683 back_inserter(expected_results[4].lengths));
688 int starts[] = { 74, 181, 108, -1, 109, 215 };
689 int lengths[] = { 34, 1, 23 };
690 expected_results[5].score = 49;
691 expected_results[5].evalue = 8.37737;
692 expected_results[5].bit_score = 233043305e-7;
693 expected_results[5].num_ident = 14;
695 back_inserter(expected_results[5].starts));
697 back_inserter(expected_results[5].lengths));
701 expected_results[6].score = 49;
702 expected_results[6].evalue = 8.62465;
703 expected_results[6].bit_score = 233043305e-7;
704 expected_results[6].num_ident = 6;
705 expected_results[6].starts.push_back(188);
706 expected_results[6].starts.push_back(709);
707 expected_results[6].lengths.push_back(30);
712 qa::CSeqAlignCmpOpts opts;
713 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
715 bool identical_results =
cmp.Run(&errors);
717 BOOST_REQUIRE_MESSAGE(identical_results, errors);
724 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
727 BOOST_REQUIRE(results[0].GetErrors().
empty());
729 const int kNumExpectedMatchingSeqs = 4;
731 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
733 const size_t kNumExpectedHSPs = 5;
738 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
739 int lengths[] = { 152, 1, 80 };
740 expected_results[0].score = 593;
741 expected_results[0].evalue = 1.15934e-71;
742 expected_results[0].bit_score = 232843196e-6;
743 expected_results[0].sequence_gis.SetQuery(129295);
744 expected_results[0].sequence_gis.SetSubject(34878800);
745 expected_results[0].num_ident = 101;
747 back_inserter(expected_results[0].starts));
749 back_inserter(expected_results[0].lengths));
754 int starts[] = { 0, 154, -1, 308, 154, 309 };
755 int lengths[] = { 154, 1, 24 };
756 expected_results[1].score = 417;
757 expected_results[1].evalue = 5.13954e-48;
758 expected_results[1].bit_score = 165048071e-6;
759 expected_results[1].sequence_gis.SetQuery(129295);
760 expected_results[1].sequence_gis.SetSubject(34878800);
761 expected_results[1].num_ident = 73;
763 back_inserter(expected_results[1].starts));
765 back_inserter(expected_results[1].lengths));
771 { 0, 190, 65, -1, 67, 255, 93, -1, 94, 281, 111, -1, 113, 298,
772 -1, 304, 119, 305, 153, -1, 154, 339, 164, -1, 165, 349,
775 { 65, 2, 26, 1, 17, 2, 6, 1, 34, 1, 10, 1, 29, 4, 38 };
776 expected_results[2].score = 359;
777 expected_results[2].evalue = 1.18595e-41;
778 expected_results[2].bit_score = 142706496e-6;
779 expected_results[2].sequence_gis.SetQuery(129295);
780 expected_results[2].sequence_gis.SetSubject(20092202);
781 expected_results[2].num_ident = 85;
783 back_inserter(expected_results[2].starts));
785 back_inserter(expected_results[2].lengths));
789 expected_results[3].score = 53;
790 expected_results[3].evalue = 2.14427;
791 expected_results[3].bit_score = 248354256e-7;
792 expected_results[3].sequence_gis.SetQuery(129295);
793 expected_results[3].sequence_gis.SetSubject(44343511);
794 expected_results[3].starts.push_back(206);
795 expected_results[3].starts.push_back(46);
796 expected_results[3].lengths.push_back(19);
797 expected_results[3].num_ident = 8;
800 expected_results[4].score = 51;
801 expected_results[4].evalue = 5.09267;
802 expected_results[4].bit_score = 240650265e-7;
803 expected_results[4].sequence_gis.SetQuery(129295);
804 expected_results[4].sequence_gis.SetSubject(23481125);
805 expected_results[4].starts.push_back(188);
806 expected_results[4].starts.push_back(709);
807 expected_results[4].lengths.push_back(30);
808 expected_results[4].num_ident = 6;
829 qa::CSeqAlignCmpOpts opts;
830 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
832 bool identical_results =
cmp.Run(&errors);
834 BOOST_REQUIRE_MESSAGE(identical_results, errors);
843 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
846 BOOST_REQUIRE(results[0].GetErrors().
empty());
848 const int kNumExpectedMatchingSeqs = 6;
850 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
852 const size_t kNumExpectedHSPs = 7;
857 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
858 int lengths[] = { 152, 1, 80 };
859 expected_results[0].score = 595;
860 expected_results[0].evalue = 307180919e-71;
861 expected_results[0].bit_score = 233623298e-6;
862 expected_results[0].num_ident = 101;
864 back_inserter(expected_results[0].starts));
866 back_inserter(expected_results[0].lengths));
871 int starts[] = { 0, 154, -1, 308, 154, 309 };
872 int lengths[] = { 154, 1, 24 };
873 expected_results[1].score = 424;
874 expected_results[1].evalue = 20700336e-50;
875 expected_results[1].bit_score = 167754171e-6;
876 expected_results[1].num_ident = 73;
878 back_inserter(expected_results[1].starts));
880 back_inserter(expected_results[1].lengths));
886 { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
887 -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
888 -1, 374, 190, 380, 200, -1, 202, 390 };
890 { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
891 expected_results[2].score = 372;
892 expected_results[2].evalue = 221677687e-45;
893 expected_results[2].bit_score = 147723793e-6;
894 expected_results[2].num_ident = 87;
896 back_inserter(expected_results[2].starts));
898 back_inserter(expected_results[2].lengths));
902 expected_results[3].score = 53;
903 expected_results[3].evalue = 216713461e-8;
904 expected_results[3].bit_score = 248451288e-7;
905 expected_results[3].num_ident = 8;
906 expected_results[3].starts.push_back(206);
907 expected_results[3].starts.push_back(46);
908 expected_results[3].lengths.push_back(19);
912 int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
913 int lengths[] = { 6, 1, 22, 10, 14 };
914 expected_results[4].score = 52;
915 expected_results[4].evalue = 283036546e-8;
916 expected_results[4].bit_score = 244599292e-7;
917 expected_results[4].num_ident = 11;
919 back_inserter(expected_results[4].starts));
921 back_inserter(expected_results[4].lengths));
926 int starts[] = { 74, 181, 108, -1, 109, 215 };
927 int lengths[] = { 34, 1, 23 };
928 expected_results[5].score = 49;
929 expected_results[5].evalue = 630539642e-8;
930 expected_results[5].bit_score = 233043305e-7;
931 expected_results[5].num_ident = 14;
933 back_inserter(expected_results[5].starts));
935 back_inserter(expected_results[5].lengths));
939 expected_results[6].score = 49;
940 expected_results[6].evalue = 630539642e-8;
941 expected_results[6].bit_score = 233043305e-7;
942 expected_results[6].num_ident = 6;
943 expected_results[6].starts.push_back(188);
944 expected_results[6].starts.push_back(709);
945 expected_results[6].lengths.push_back(30);
950 qa::CSeqAlignCmpOpts opts;
951 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
953 bool identical_results =
cmp.Run(&errors);
955 BOOST_REQUIRE_MESSAGE(identical_results, errors);
962 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
965 BOOST_REQUIRE(results[0].GetErrors().
empty());
967 const int kNumExpectedMatchingSeqs = 6;
969 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
971 const size_t kNumExpectedHSPs = 7;
976 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
977 int lengths[] = { 152, 1, 80 };
978 expected_results[0].score = 595;
979 expected_results[0].evalue = 307180919e-71;
980 expected_results[0].bit_score = 233623298e-6;
981 expected_results[0].num_ident = 101;
983 back_inserter(expected_results[0].starts));
985 back_inserter(expected_results[0].lengths));
990 int starts[] = { 0, 154, -1, 308, 154, 309 };
991 int lengths[] = { 154, 1, 24 };
992 expected_results[1].score = 424;
993 expected_results[1].evalue = 20700336e-50;
994 expected_results[1].bit_score = 167754171e-6;
995 expected_results[1].num_ident = 73;
997 back_inserter(expected_results[1].starts));
999 back_inserter(expected_results[1].lengths));
1005 { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
1006 -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
1007 -1, 374, 190, 380, 200, -1, 202, 390 };
1009 { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
1010 expected_results[2].score = 372;
1011 expected_results[2].evalue = 221677687e-45;
1012 expected_results[2].bit_score = 147723793e-6;
1013 expected_results[2].num_ident = 87;
1015 back_inserter(expected_results[2].starts));
1017 back_inserter(expected_results[2].lengths));
1021 expected_results[3].score = 53;
1022 expected_results[3].evalue = 216713461e-8;
1023 expected_results[3].bit_score = 248451288e-7;
1024 expected_results[3].num_ident = 8;
1025 expected_results[3].starts.push_back(206);
1026 expected_results[3].starts.push_back(46);
1027 expected_results[3].lengths.push_back(19);
1031 int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
1032 int lengths[] = { 6, 1, 22, 10, 14 };
1033 expected_results[4].score = 52;
1034 expected_results[4].evalue = 283036546e-8;
1035 expected_results[4].bit_score = 244599292e-7;
1036 expected_results[4].num_ident = 11;
1038 back_inserter(expected_results[4].starts));
1040 back_inserter(expected_results[4].lengths));
1045 int starts[] = { 74, 181, 108, -1, 109, 215 };
1046 int lengths[] = { 34, 1, 23 };
1047 expected_results[5].score = 49;
1048 expected_results[5].evalue = 630539642e-8;
1049 expected_results[5].bit_score = 233043305e-7;
1050 expected_results[5].num_ident = 14;
1052 back_inserter(expected_results[5].starts));
1054 back_inserter(expected_results[5].lengths));
1058 expected_results[6].score = 49;
1059 expected_results[6].evalue = 630539642e-8;
1060 expected_results[6].bit_score = 233043305e-7;
1061 expected_results[6].num_ident = 6;
1062 expected_results[6].starts.push_back(188);
1063 expected_results[6].starts.push_back(709);
1064 expected_results[6].lengths.push_back(30);
1069 qa::CSeqAlignCmpOpts opts;
1070 qa::CSeqAlignCmp
cmp(expected_results, actual_results, opts);
1072 bool identical_results =
cmp.Run(&errors);
1074 BOOST_REQUIRE_MESSAGE(identical_results, errors);
1081 const int kNumIterations = 4;
1082 const int kNumExpectedIterations = 2;
1086 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1088 int hits_below_threshold[kNumIterations] = { 2, 2, 0, 0 };
1089 int number_hits[kNumIterations] = { 6, 5, 0, 0 };
1091 int iteration_counter = 0;
1094 BOOST_REQUIRE(results[0].GetErrors().
empty());
1097 BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
1098 s_CountNumberUniqueGIs(alignment));
1103 string m(
"On round ");
1106 BOOST_REQUIRE_EQUAL(hits_below_threshold[iteration_counter],
1111 const CBioseq&
query = m_Pssm->GetPssm().GetQuery().GetSeq();
1113 x_ComputePssmForNextIteration(
query, alignment,
1114 m_OptHandle, results[0].GetAncillaryData());
1117 iteration_counter++;
1120 BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
1126 const int kNumIterations = 4;
1127 const int kNumExpectedIterations = 2;
1131 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1133 int hits_below_threshold[kNumIterations] = { 2, 2, 0, 0 };
1134 int number_hits[kNumIterations] = { 4, 3, 0, 0 };
1136 int iteration_counter = 0;
1139 BOOST_REQUIRE(results[0].GetErrors().
empty());
1141 BOOST_REQUIRE(alignment.
NotEmpty());
1142 BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
1143 s_CountNumberUniqueGIs(alignment));
1148 string m(
"On round ");
1151 BOOST_REQUIRE_EQUAL(hits_below_threshold[iteration_counter],
1156 const CBioseq&
query = m_Pssm->GetPssm().GetQuery().GetSeq();
1158 x_ComputePssmForNextIteration(
query, alignment,
1159 m_OptHandle, results[0].GetAncillaryData());
1162 iteration_counter++;
1165 BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
1179 BOOST_REQUIRE_THROW(
CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle),
1185 m_Pssm->SetPssm().SetIntermediateData().SetFreqRatios()) {
1190 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1193 BOOST_REQUIRE( !messages.empty() );
1195 string expected_warning(
"Frequency ratios for PSSM are all zeros");
1199 ((*m)->GetMessage().find(expected_warning) != string::npos)) {
1200 warning = (*m)->GetMessage();
1204 BOOST_REQUIRE_MESSAGE(!warning.empty(),
"Did not find expected warning");
1210 BOOST_REQUIRE_THROW(
CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle),
1216 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1222 m_SearchDb->SetDatabaseName(
"dummy");
1224 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1229 m_OptHandle.Reset();
1233 BOOST_REQUIRE_THROW(
CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle),
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Definitions which are dependant on the NCBI C++ Object Manager.
PSIDiagnosticsRequest * PSIDiagnosticsRequestNew(void)
Allocates a PSIDiagnosticsRequest structure, setting all fields to false.
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
Defines BLAST error codes (user errors included)
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Handle to the protein-protein options to the BLAST algorithm.
Wrapper class for PSIDiagnosticsRequest .
Represents the iteration state in PSI-BLAST.
Runs a single iteration of the PSI-BLAST algorithm on a BLAST database.
Search Results for All Queries.
Template class for iteration on objects of class C (non-medifiable version)
Source of query sequence data for BLAST Provides an interface for search classes to retrieve sequence...
Class for the messages for an individual query sequence.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eCompoForceFullMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally.
@ eNoCompositionBasedStats
Don't use composition based statistics.
@ eCompositionMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...
CRef< CSearchResultSet > Run()
Run the PSI-BLAST engine for one iteration.
unsigned int GetIterationNumber() const
Return the number of the current iteration.
void SetPssm(CConstRef< objects::CPssmWithParameters > pssm)
This method allows the same object to be reused when performing multiple iterations.
ECompoAdjustModes GetCompositionBasedStats() const
Returns this mode, which mostly specifies whether composition of db sequence is taken into account wh...
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
CRef< objects::CPssmWithParameters > PsiBlastComputePssmFromAlignment(const objects::CBioseq &query, CConstRef< objects::CSeq_align_set > alignment, CRef< objects::CScope > database_scope, const CPSIBlastOptionsHandle &opts_handle, CConstRef< CBlastAncillaryData > ancillary_data, PSIDiagnosticsRequest *diagnostics_req=0)
Computes a PSSM from the result of a PSI-BLAST iteration.
void PsiBlastComputePssmScores(CRef< objects::CPssmWithParameters > pssm, const CBlastOptions &opts)
Given a PSSM with frequency ratios and options, invoke the PSSM engine to compute the scores.
void Advance(const TSeqIds &list)
Advance the iterator by passing it the list of Seq-ids which passed the inclusion criteria for the cu...
static void GetSeqIds(CConstRef< objects::CSeq_align_set > seqalign, CConstRef< CPSIBlastOptionsHandle > opts, TSeqIds &retval)
Extract the sequence ids from the sequence alignment which identify those sequences that will be used...
@ eBlastDbIsProtein
protein
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
bool CanGetIntermediateData(void) const
Check if it is safe to call GetIntermediateData method.
const TFinalData & GetFinalData(void) const
Get the FinalData member data.
bool CanGetFinalData(void) const
Check if it is safe to call GetFinalData method.
void SetPssm(TPssm &value)
Assign a value to Pssm data member.
bool CanGetQuery(void) const
Check if it is safe to call GetQuery method.
bool CanGetScores(void) const
Check if it is safe to call GetScores method.
list< double > TFreqRatios
bool CanGetPssm(void) const
Check if it is safe to call GetPssm method.
const TPssm & GetPssm(void) const
Get the Pssm member data.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
TGi GetGi(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
constexpr bool empty(list< Ts... >) noexcept
Magic spell ;-) needed for some weird compilers... very empiric.
std::vector< SeqAlign > TSeqAlignSet
Vector of neutral sequence alignments.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
Declarations of auxiliary functions/classes for PSI-BLAST.
Defines class which represents the iteration state in PSI-BLAST.
#define STATIC_ARRAY_SIZE(array)
Calculate the size of a static array.
BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_NoCBS)
C++ API for the PSI-BLAST PSSM engine.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
API to compare CSeq-aligns produced by BLAST.
void SeqAlignSetConvert(const objects::CSeq_align_set &ss, std::vector< SeqAlign > &retval)
Converts a Seq-align-set into a neutral seqalign for use with the CSeqAlignCmp class.
Defines BLAST database access classes.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
CRef< CPssmWithParameters > x_ComputePssmForNextIteration(const CBioseq &query, CConstRef< CSeq_align_set > sset, CConstRef< CPSIBlastOptionsHandle > opts_handle, CConstRef< CBlastAncillaryData > ancillary_data)
IQueryFactory * s_SetupSubject(CConstRef< CBioseq_set > bioseq_set)
IQueryFactory * s_SetupSubject(CConstRef< CBioseq > bioseq)
void x_ReadSeqEntriesFromFile()
CRef< CPSIBlastOptionsHandle > m_OptHandle
CSearchDatabase * m_SearchDb
CRef< CSeq_entry > m_SeqEntry
Contains a single Bioseq.
void x_ReadPssmFromFile()
int s_CountNumberUniqueGIs(CConstRef< CSeq_align_set > sas)
CRef< CSeq_entry > m_SeqSet
Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295.
CRef< CPssmWithParameters > m_Pssm
Boolean frequency_ratios
request frequency ratios
Structure to represent a single sequence to be fed to BLAST.
Utility stuff for more convenient using of Boost.Test library.