95 typedef vector< CRef<CSeq_id> > TSeqIds;
96 TSeqIds seqid_v(num_seqs);
97 generate(seqid_v.begin(), seqid_v.end(),
100 ITERATE(TSeqIds, seqid, seqid_v) {
108 const TSeqPos kOffsetLength(30);
109 for (
int index = 0; index <
mask->total_size; ++index) {
111 index+kOffsetLength);
115 BOOST_REQUIRE_EQUAL(num_seqs, mask_v.size());
117 unsigned int qindex(0);
121 BOOST_REQUIRE_MESSAGE( kNumMasks == query_masks_list->size(),
122 "Failed on " + kProgName);
126 ss <<
"Error in query number " << qindex <<
", context "
127 <<
context <<
" ('" << kProgName <<
"')";
132 BOOST_REQUIRE_MESSAGE(frame == (*itr)->GetFrame(),
138 BOOST_REQUIRE(loc !=
NULL);
140 BOOST_REQUIRE_MESSAGE
141 (offsets.
GetFrom() == (*itr)->GetInterval().GetFrom(),
143 BOOST_REQUIRE_MESSAGE
144 (offsets.
GetTo() == (*itr)->GetInterval().GetTo(),
148 BOOST_REQUIRE_EQUAL(kNumMasks,
context);
156 BOOST_CHECK(start <= stop);
157 for (
int i = start;
i < stop;
i++) {
168 bool ignore_strand_in_mask)
170 const int kNumLcaseLocs = 11;
171 const int kLcaseStarts[kNumLcaseLocs] =
172 { 0, 78, 217, 380, 694, 1018, 1128, 2817, 3084, 3428, 3782 };
173 const int kLcaseEnds[kNumLcaseLocs] =
174 { 75, 208, 316, 685, 1004, 1122, 1298, 2952, 3409, 3733, 3916 };
177 const int kQuerySize = 9180;
178 vector<int> kLcaseStartsNegStrand, kLcaseEndsNegStrand;
179 kLcaseStartsNegStrand.reserve(kNumLcaseLocs);
180 kLcaseEndsNegStrand.reserve(kNumLcaseLocs);
181 for (
i = 0;
i < kNumLcaseLocs;
i++) {
182 int start = kQuerySize - 1 - kLcaseEnds[
i];
183 int stop = kQuerySize - 1 - kLcaseStarts[
i];
184 kLcaseStartsNegStrand.push_back(start);
185 kLcaseEndsNegStrand.push_back(stop);
189 unique_ptr<SSeqLoc> qsl(
192 CSeq_loc* seqloc =
new CSeq_loc();
193 for (
int index = 0; index < kNumLcaseLocs; ++index) {
194 seqloc->SetPacked_int().AddInterval(
id, kLcaseStarts[index],
196 BOOST_CHECK(!seqloc->GetPacked_int().Get().back()->CanGetStrand());
197 seqloc->SetPacked_int().Set().back()->SetStrand(strand);
199 qsl->mask.Reset(seqloc);
200 qsl->ignore_strand_in_mask = ignore_strand_in_mask;
203 query_v.push_back(*qsl);
205 nucl_handle->SetDustFiltering(
false);
206 nucl_handle->SetMaskAtHash(
false);
210 blast::CBl2Seq blaster(*qsl.get(), *qsl.get(), *nucl_handle);
211 (void) blaster.Run();
215 BOOST_CHECK_EQUAL(
false, nucl_handle->GetMaskAtHash());
216 for (
i = 0;
i < kNumLcaseLocs;
i++) {
217 const pair<int, int> range_plus(kLcaseStarts[
i], kLcaseEnds[
i]);
218 const pair<int, int> range_minus(kLcaseStartsNegStrand[
i],
219 kLcaseEndsNegStrand[
i]);
220 int starting_offset = 0;
224 blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
226 (blaster.m_Blast->m_InternalData->m_Queries->sequence,
227 starting_offset + range_plus.first,
228 starting_offset + range_plus.second));
231 blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
233 (blaster.m_Blast->m_InternalData->m_Queries->sequence,
234 starting_offset + range_minus.first,
235 starting_offset + range_minus.second));
240 blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
242 (blaster.m_Blast->m_InternalData->m_Queries->sequence,
243 starting_offset + range_plus.first,
244 starting_offset + range_plus.second));
247 blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
249 (blaster.m_Blast->m_InternalData->m_Queries->sequence,
250 starting_offset + range_minus.first,
251 starting_offset + range_minus.second));
254 blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
256 (blaster.m_Blast->m_InternalData->m_Queries->sequence,
257 starting_offset + range_plus.first,
258 starting_offset + range_plus.second));
260 blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
262 (blaster.m_Blast->m_InternalData->m_Queries->sequence,
263 starting_offset + range_minus.first,
264 starting_offset + range_minus.second));
275 blaster.GetFilteredQueryRegions();
276 BOOST_CHECK(masked_regions_vector.size() == 1);
277 BOOST_CHECK_EQUAL(masked_regions_vector.front().size(),
278 (
size_t)kNumLcaseLocs);
280 BOOST_CHECK(query_v[0].
mask->IsPacked_int());
281 BOOST_CHECK_EQUAL(query_v[0].
mask->GetPacked_int().Get().size(),
282 masked_regions_vector.front().size());
285 query_v[0].
mask->GetPacked_int().Get()) {
286 BOOST_CHECK_EQUAL(kLcaseStarts[loc_index], (
int)(*itr)->GetFrom());
287 BOOST_CHECK_EQUAL(kLcaseEnds[loc_index], (
int)(*itr)->GetTo());
290 BOOST_CHECK_EQUAL(kNumLcaseLocs, loc_index);
295 BOOST_CHECK_EQUAL(kLcaseStarts[loc_index], (
int)intv.
GetFrom());
296 BOOST_CHECK_EQUAL(kLcaseEnds[loc_index], (
int)intv.
GetTo());
298 BOOST_CHECK_EQUAL((*itr)->GetFrame(),
303 BOOST_CHECK_EQUAL(kNumLcaseLocs, loc_index);
310 typedef vector< pair<TSeqPos, TSeqPos> > TSegments;
311 TSegments masked_offsets;
312 masked_offsets.push_back(make_pair(298U, 305U));
313 masked_offsets.push_back(make_pair(875U, 882U));
314 masked_offsets.push_back(make_pair(1018U, 1115U));
315 masked_offsets.push_back(make_pair(1449U, 1479U));
316 masked_offsets.push_back(make_pair(3113U, 3133U));
317 masked_offsets.push_back(make_pair(3282U, 3298U));
318 masked_offsets.push_back(make_pair(3428U, 3441U));
319 masked_offsets.push_back(make_pair(3598U, 3606U));
320 masked_offsets.push_back(make_pair(4704U, 4710U));
321 masked_offsets.push_back(make_pair(6364U, 6373U));
322 masked_offsets.push_back(make_pair(6512U, 6573U));
323 masked_offsets.push_back(make_pair(7600U, 7672U));
324 masked_offsets.push_back(make_pair(7766U, 7772U));
325 masked_offsets.push_back(make_pair(8873U, 8880U));
326 masked_offsets.push_back(make_pair(9109U, 9179U));
328 const size_t kNumQueries(1);
329 const size_t kNumLocs(masked_offsets.size());
333 unique_ptr<SSeqLoc> qsl(
341 BOOST_CHECK(query_reference[0].
mask->IsPacked_int());
343 query_reference[0].mask->GetPacked_int().Get();
344 BOOST_CHECK_EQUAL(kNumLocs, seqinterval_list.size());
346 bool reverse =
IsReverse(query_reference[0].
mask->GetStrand());
347 index = reverse ? masked_offsets.size() - 1 : 0;
349 BOOST_CHECK_EQUAL(masked_offsets[index].
first,
351 BOOST_CHECK_EQUAL(masked_offsets[index].second,
353 reverse ? index-- : index++;
359 (void) blaster.Run();
361 blaster.GetFilteredQueryRegions();
363 BOOST_CHECK_EQUAL(kNumQueries, query_reference.size());
364 BOOST_CHECK_EQUAL(kNumQueries,
query_test.size());
365 BOOST_CHECK_EQUAL(kNumQueries, masked_regions_vector.size());
368 BOOST_CHECK_EQUAL(kNumLocs, masked_regions.size());
371 BOOST_CHECK_EQUAL(masked_offsets[index].
first,
372 (*itr)->GetInterval().GetFrom());
373 BOOST_CHECK_EQUAL(masked_offsets[index].second,
374 (*itr)->GetInterval().GetTo());
381 vector< CRef<CSeq_id> > gis;
386 vector<TSeqRange> ranges;
391 BOOST_REQUIRE(gis.size() == ranges.size());
394 for (
i = 0;
i < gis.size();
i++) {
404 BOOST_REQUIRE(gis[
i]->Match((*query_interval)->GetId()));
405 BOOST_REQUIRE_EQUAL(ranges[
i].GetFrom(),
406 (*query_interval)->GetFrom());
407 BOOST_REQUIRE_EQUAL(ranges[
i].GetTo(),
408 (*query_interval)->GetTo());
414 typedef pair<TGi, TSeqPos> TGiLength;
415 vector<TGiLength> gis;
416 gis.push_back(make_pair(
GI_CONST(6), 342U));
417 gis.push_back(make_pair(
GI_CONST(129295), 232U));
418 gis.push_back(make_pair(
GI_CONST(15606659), 443U));
421 input.reserve(gis.size());
422 ITERATE(vector<TGiLength>, gi, gis) {
424 seqloc->
SetWhole().SetGi(gi->first);
430 const TSeqPos kStartingPosition(0);
432 const TGiLength& kGiLength = gis[
i++];
434 BOOST_REQUIRE(kTargetId.
Match((*query_interval)->GetId()));
435 BOOST_REQUIRE_EQUAL(kStartingPosition,
436 (*query_interval)->GetFrom());
437 BOOST_REQUIRE_EQUAL(kGiLength.second,
438 (*query_interval)->GetTo());
445 BOOST_REQUIRE(retval.
Empty());
461 prog, strand_opt, blast_msg);
464 BOOST_REQUIRE(m->empty());
469 const int kNumLocs = 3;
470 const int kSegStarts[kNumLocs] = { 15, 55, 495 };
471 const int kSegEnds[kNumLocs] = { 27, 68, 513 };
475 query_v.push_back(*qsl);
481 &query_blk, &query_info);
493 BOOST_REQUIRE(filtering_options ==
NULL);
494 BOOST_REQUIRE(status == 0);
498 for (loc_index=0, loc = filter_slp; loc; loc = loc->
next, ++loc_index) {
500 BOOST_REQUIRE_EQUAL(kSegStarts[loc_index], di->
left);
501 BOOST_REQUIRE_EQUAL(kSegEnds[loc_index], di->
right);
505 BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
509 const size_t kNumLocs = 4;
510 const TSeqPos kRepeatStarts[kNumLocs] = { 0, 380, 2851, 3113 };
511 const TSeqPos kRepeatEnds[kNumLocs] = { 212, 1297, 2953, 3764 };
513 unique_ptr<SSeqLoc> qsl(
516 query_v.push_back(*qsl);
522 BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
523 BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
525 query_v[0].mask->GetPacked_int().Get();
527 size_t loc_index = 0;
528 BOOST_REQUIRE_EQUAL(kNumLocs, seqinterval_list.size());
531 BOOST_REQUIRE_EQUAL(kRepeatStarts[loc_index], (*itr)->GetFrom());
532 BOOST_REQUIRE_EQUAL(kRepeatEnds[loc_index], (*itr)->GetTo());
533 BOOST_REQUIRE(!(*itr)->CanGetStrand());
537 BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
542 int pair_size =
sizeof(
TSeqPos) * 2;
571 size_t num_locs =
sizeof(intervals) / pair_size;
572 BOOST_REQUIRE(0 == (
sizeof(intervals) % pair_size));
579 query_v.push_back(*qsl);
585 BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
586 BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
588 query_v[0].mask->GetPacked_int().Get();
590 size_t loc_index = 0;
591 BOOST_REQUIRE_EQUAL(num_locs, seqinterval_list.size());
595 BOOST_REQUIRE_EQUAL(intervals[loc_index], (*itr)->GetFrom());
596 BOOST_REQUIRE_EQUAL(intervals[loc_index+1], (*itr)->GetTo());
597 BOOST_REQUIRE(! (*itr)->CanGetStrand());
601 BOOST_REQUIRE_EQUAL(num_locs*2, loc_index);
605 vector<TSeqRange> masked_regions;
606 masked_regions.push_back(
TSeqRange(85028, 85528));
607 masked_regions.push_back(
TSeqRange(85539, 85736));
608 masked_regions.push_back(
TSeqRange(86334, 86461));
609 masked_regions.push_back(
TSeqRange(86487, 86585));
610 masked_regions.push_back(
TSeqRange(86730, 87050));
611 masked_regions.push_back(
TSeqRange(87313, 87370));
612 masked_regions.push_back(
TSeqRange(88134, 88140));
613 masked_regions.push_back(
TSeqRange(88171, 88483));
614 masked_regions.push_back(
TSeqRange(89032, 89152));
615 masked_regions.push_back(
TSeqRange(91548, 91704));
616 masked_regions.push_back(
TSeqRange(92355, 92539));
617 masked_regions.push_back(
TSeqRange(92550, 92973));
618 masked_regions.push_back(
TSeqRange(92983, 93283));
619 masked_regions.push_back(
TSeqRange(93296, 93384));
620 masked_regions.push_back(
TSeqRange(93472, 93642));
621 masked_regions.push_back(
TSeqRange(93685, 94026));
622 masked_regions.push_back(
TSeqRange(94435, 94545));
625 unique_ptr<SSeqLoc> qsl(
627 make_pair<TSeqPos, TSeqPos>(84999, 94637),
630 query_v.push_back(*qsl);
638 BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
640 query_v[0].mask->GetPacked_int().Get();
642 size_t loc_index = 0;
643 BOOST_REQUIRE_EQUAL(masked_regions.size(), seqinterval_list.size());
646 BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetFrom(),
648 BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetTo(),
650 BOOST_REQUIRE(!(*itr)->CanGetStrand());
654 BOOST_REQUIRE_EQUAL(masked_regions.size(), loc_index);
663 BOOST_REQUIRE(
a ==
b);
666 BOOST_REQUIRE(
a !=
b);
676 query->AddMask(lower_case_mask);
688 BOOST_REQUIRE( !mqr.empty() );
692 BOOST_REQUIRE(e.
GetMsg().find(
"lossy direction") !=
NPOS);
697 BOOST_REQUIRE(*sli == *lower_case_mask);
698 BOOST_REQUIRE_EQUAL((
int)2, (
int)mqr.size());
699 BOOST_REQUIRE(mqr.front()->GetFrame() == 1);
700 BOOST_REQUIRE(mqr.back()->GetFrame() == -1);
709 query_v1.push_back(*qsl1);
714 query_v2.push_back(*qsl2);
736 query_v1.push_back(*qsl1);
741 query_v2.push_back(*qsl2);
761 vector<TSeqRange> masked_regions;
762 masked_regions.push_back(
TSeqRange(85019, 85172));
763 masked_regions.push_back(
TSeqRange(85190, 85345));
764 masked_regions.push_back(
TSeqRange(85385, 85452));
765 masked_regions.push_back(
TSeqRange(85483, 85505));
766 masked_regions.push_back(
TSeqRange(85511, 85533));
767 masked_regions.push_back(
TSeqRange(85575, 85596));
768 masked_regions.push_back(
TSeqRange(85673, 85694));
769 masked_regions.push_back(
TSeqRange(85725, 85745));
777 query_v.push_back(*qsl);
786 BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
788 query_v[0].mask->GetPacked_int().Get();
790 size_t loc_index = 0;
791 BOOST_REQUIRE_EQUAL(masked_regions.size(), seqinterval_list.size());
794 BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetFrom(),
796 BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetTo(),
798 BOOST_REQUIRE(!(*itr)->CanGetStrand());
802 BOOST_REQUIRE_EQUAL(masked_regions.size(), loc_index);
807 unique_ptr<SSeqLoc> qsl(
810 query_v.push_back(*qsl);
817 BOOST_REQUIRE(query_v[0].
mask.Empty());
822 unique_ptr<SSeqLoc> qsl
826 query_v.push_back(*qsl);
833 BOOST_REQUIRE(query_v[0].
mask.Empty());
840 query_v.push_back(*qsl);
853 query_v.push_back(*qsl);
875 vector<TSeqRange> masks;
908 BOOST_REQUIRE_EQUAL((
size_t)4, restricted_mask.size());
909 BOOST_REQUIRE_EQUAL((
TSeqPos)624,
910 restricted_mask.back()->GetInterval().GetTo());
912 (restricted_mask.front()->GetInterval().GetId()));
913 BOOST_REQUIRE(!(restricted_mask.front()->GetInterval().CanGetStrand()));
916 restriction.
SetTo(2000);
919 BOOST_REQUIRE_EQUAL((
size_t)3, restricted_mask.size());
920 TMaskedQueryRegions::iterator itr = restricted_mask.begin();
922 BOOST_REQUIRE_EQUAL((
TSeqPos)1000, (*itr)->GetInterval().GetFrom());
923 BOOST_REQUIRE_EQUAL((
TSeqPos)1004, (*itr)->GetInterval().GetTo()-1);
924 BOOST_REQUIRE(id->
Match((*itr)->GetInterval().GetId()));
925 BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
928 BOOST_REQUIRE_EQUAL((
TSeqPos)1018, (*itr)->GetInterval().GetFrom());
929 BOOST_REQUIRE_EQUAL((
TSeqPos)1122, (*itr)->GetInterval().GetTo()-1);
930 BOOST_REQUIRE(id->
Match((*itr)->GetInterval().GetId()));
931 BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
934 BOOST_REQUIRE_EQUAL((
TSeqPos)1128, (*itr)->GetInterval().GetFrom());
935 BOOST_REQUIRE_EQUAL((
TSeqPos)1298, (*itr)->GetInterval().GetTo()-1);
936 BOOST_REQUIRE(id->
Match((*itr)->GetInterval().GetId()));
937 BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
940 BOOST_REQUIRE(itr == restricted_mask.end());
943 restriction.
SetTo(20000);
945 BOOST_REQUIRE(restricted_mask.empty());
950 vector<TSeqRange> masks;
974 BOOST_REQUIRE(!bqff.
Empty());
976 const set<CSeqLocInfo::ETranslationFrame>& frames = bqff.
ListFrames();
977 ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
978 BOOST_REQUIRE(bqff[*fr] !=
NULL);
986 vector<TSeqRange> masks;
997 mqr.push_back(sli_plus);
1000 mqr.push_back(sli_minus);
1006 BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 0);
1007 BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 25);
1010 BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 0);
1011 BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 24);
1014 BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 0);
1015 BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 24);
1018 BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 3034);
1019 BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 3059);
1022 BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 3034);
1023 BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 3058);
1026 BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 3034);
1027 BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 3058);
1044 BOOST_REQUIRE(!bqff.
Empty());
1046 const set<CSeqLocInfo::ETranslationFrame>& frames = bqff.
ListFrames();
1047 const int kExpectedNumFrames = 2;
1049 ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
1050 BOOST_REQUIRE(bqff[*fr] !=
NULL);
1053 BOOST_REQUIRE_EQUAL(kExpectedNumFrames, bqff.
GetNumFrames());
1054 BOOST_REQUIRE_EQUAL(1, frame_ctr);
1055 BOOST_REQUIRE_EQUAL(1, frames.size());
1074 BOOST_REQUIRE(!bqff.
Empty());
1076 const set<CSeqLocInfo::ETranslationFrame>& frames = bqff.
ListFrames();
1077 const int kExpectedNumFrames = 2;
1079 ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
1080 BOOST_REQUIRE(bqff[*fr] !=
NULL);
1083 BOOST_REQUIRE_EQUAL(kExpectedNumFrames, bqff.
GetNumFrames());
1084 BOOST_REQUIRE_EQUAL(kExpectedNumFrames, frame_ctr);
1085 BOOST_REQUIRE_EQUAL(kExpectedNumFrames, frames.size());
1089 const bool ignore_strand_in_mask =
true;
1091 ignore_strand_in_mask);
1095 const bool ignore_strand_in_mask =
true;
1097 ignore_strand_in_mask);
1101 const bool ignore_strand_in_mask =
true;
1103 ignore_strand_in_mask);
1107 const bool ignore_strand_in_mask =
false;
1109 ignore_strand_in_mask);
1113 const bool ignore_strand_in_mask =
false;
1115 ignore_strand_in_mask);
1119 const bool ignore_strand_in_mask =
false;
1121 ignore_strand_in_mask);
1125 const int kNumLcaseLocs = 11;
1126 const int kLcaseStarts[kNumLcaseLocs] =
1127 { 0, 78, 217, 380, 694, 1018, 1128, 2817, 3084, 3428, 3782 };
1128 const int kLcaseEnds[kNumLcaseLocs] =
1129 { 75, 208, 316, 685, 1004, 1122, 1298, 2952, 3409, 3733, 3916 };
1131 const int kNumLocs = 6;
1132 const int kStarts[kNumLocs] = { 0, 217, 380, 2817, 3084, 3782 };
1133 const int kEnds[kNumLocs] = { 212, 316, 1298, 2953, 3764, 3916 };
1135 unique_ptr<SSeqLoc> qsl(
1139 CSeq_loc* seqloc =
new CSeq_loc();
1140 for (
int index = 0; index < kNumLcaseLocs; ++index) {
1141 seqloc->SetPacked_int().AddInterval(
id, kLcaseStarts[index],
1143 BOOST_REQUIRE(!seqloc->GetPacked_int().Get().back()->CanGetStrand());
1145 qsl->mask.Reset(seqloc);
1148 query_v.push_back(*qsl);
1153 BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
1157 BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
1159 query_v[0].
mask->GetPacked_int().Get()) {
1161 BOOST_REQUIRE_EQUAL(kStarts[loc_index], (
int)(*itr)->GetFrom());
1162 BOOST_REQUIRE_EQUAL(kEnds[loc_index], (
int)(*itr)->GetTo());
1166 BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1170 const int kNumLocs = 13;
1171 const int kStarts[kNumLocs] =
1172 { 0, 298, 380, 1449, 2851, 3113, 4704, 6364, 6512, 7600,
1174 const int kEnds[kNumLocs] =
1175 { 212, 305, 1297, 1479, 2953, 3764, 4710, 6373, 6573, 7672,
1178 unique_ptr<SSeqLoc> qsl(
1181 query_v.push_back(*qsl);
1191 BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
1193 query_v[0].
mask->GetPacked_int().Get()) {
1195 BOOST_REQUIRE_EQUAL(kStarts[loc_index], (
int)(*itr)->GetFrom());
1196 BOOST_REQUIRE_EQUAL(kEnds[loc_index], (
int)(*itr)->GetTo());
1199 BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1203 const int kNumLocs = 15;
1204 const int kDustStarts[kNumLocs] =
1205 { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1206 6512, 7600, 7766, 8873, 9109};
1207 const int kDustEnds[kNumLocs] =
1208 { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1209 6573, 7672, 7772, 8880 , 9179};
1212 unique_ptr<SSeqLoc> qsl(
1215 query_v.push_back(*qsl);
1223 query_v[0].
mask->GetPacked_int().Get()) {
1224 BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (
int)(*itr)->GetFrom());
1225 BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (
int)(*itr)->GetTo());
1229 BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1233 const int kNumLocs = 15;
1234 const int kDustStarts[kNumLocs] =
1235 { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1236 6512, 7600, 7766, 8873, 9109};
1237 const int kDustEnds[kNumLocs] =
1238 { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1239 6573, 7672, 7772, 8880 , 9179};
1242 unique_ptr<SSeqLoc> qsl(
1245 query_v.push_back(*qsl);
1253 query_v[0].
mask->GetPacked_int().Get()) {
1254 BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (
int)(*itr)->GetFrom());
1255 BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (
int)(*itr)->GetTo());
1259 BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1263 const int kNumLocs = 15;
1264 const int kDustStarts[kNumLocs] =
1265 { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1266 6512, 7600, 7766, 8873, 9109};
1267 const int kDustEnds[kNumLocs] =
1268 { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1269 6573, 7672, 7772, 8880 , 9179};
1272 unique_ptr<SSeqLoc> qsl(
1275 query_v.push_back(*qsl);
1282 int loc_index = reverse ? kNumLocs - 1 : 0;
1284 query_v[0].
mask->GetPacked_int().Get()) {
1285 BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (
int)(*itr)->GetFrom());
1286 BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (
int)(*itr)->GetTo());
1287 reverse ? --loc_index : ++loc_index;
1292 BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1295 BOOST_REQUIRE_EQUAL(loc_index, -1);
1301 const int kNumLocs = 3;
1302 const int kSegStarts[kNumLocs] = { 15, 55, 495 };
1303 const int kSegEnds[kNumLocs] = { 27, 68, 513 };
1307 query_v.push_back(*qsl);
1313 &query_blk, &query_info);
1323 &filter_out, &blast_message);
1325 BOOST_REQUIRE(filter_options ==
NULL);
1326 BOOST_REQUIRE(status == 0);
1332 for (loc_index=0, loc = filter_slp; loc; loc = loc->
next, ++loc_index) {
1334 BOOST_REQUIRE_EQUAL(kSegStarts[loc_index], di->
left);
1335 BOOST_REQUIRE_EQUAL(kSegEnds[loc_index], di->
right);
1338 BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1341 BOOST_REQUIRE(filter_out ==
NULL);
1345 const int kNumLocs = 3;
1346 const int kSegStarts[kNumLocs] = { 15, 55, 495 };
1347 const int kSegEnds[kNumLocs] = { 27, 68, 513 };
1351 query_v.push_back(*qsl);
1357 &query_blk, &query_info);
1361 for (
Int4 loc_index=0; loc_index<kNumLocs; ++loc_index) {
1364 kSegEnds[loc_index]);
1367 kSegEnds[loc_index]);
1376 BOOST_REQUIRE(filter_maskloc ==
NULL);
1381 for (
int index=0; index<query_length; index++)
1386 BOOST_REQUIRE_EQUAL(-241853716, (
int)
hash);
1390 const int kNumLocs = 15;
1391 const int kDustStarts[kNumLocs] =
1392 { 298, 875, 1018, 1064, 1448, 3113, 3282, 3428, 3598, 4704, 6364,
1393 6511, 7766, 8873, 9108 };
1394 const int kDustEnds[kNumLocs] =
1395 { 305, 882, 1045, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1396 6573, 7772, 8880, 9179 };
1402 query_v.push_back(*qsl);
1408 &query_blk, &query_info);
1412 for (
Int4 loc_index=0; loc_index<kNumLocs; ++loc_index) {
1415 kDustEnds[loc_index]);
1418 kDustEnds[loc_index]);
1428 BOOST_REQUIRE(filter_maskloc ==
NULL);
1433 for (
int index=0; index<query_length; index++)
1438 BOOST_REQUIRE_EQUAL(-1261879517, (
int)
hash);
1442 const int kNumLocs0 = 15;
1443 const int kNumLocs1 = 80;
1444 const int kNumLocs2 = 1;
1446 int dust_starts0[kNumLocs0] =
1447 { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1448 6512, 7600, 7766, 8873, 9109};
1449 int dust_ends0[kNumLocs0] =
1450 { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1451 6573, 7672, 7772, 8880 , 9179};
1452 int dust_starts1[kNumLocs1] =
1453 { 189, 862, 1717, 1880, 2301, 2850, 3074, 3301, 4865, 5231, 5397,
1454 5825, 5887, 6560, 6806, 7178, 7709, 8000, 8275, 8441, 9449, 9779,
1455 10297, 10457, 11033, 11242, 12271, 12410, 12727, 13803, 14743, 15052,
1456 15153, 15262, 16201, 16968, 17318, 18470, 20179, 21513, 21569,
1457 22034, 22207, 22657, 22890, 23326, 27984, 28305, 28581, 28960, 29678,
1458 30553, 31195, 32347, 33641, 33785, 34138, 34861, 34872, 35028,
1459 35676, 35727, 36105, 36312, 36841, 38459, 38610, 38997, 39217, 39428,
1460 39629, 42243, 42584, 43157, 43346, 43619, 44040, 44617, 46791, 47213};
1461 int dust_ends1[kNumLocs1] =
1462 { 230, 876, 1741, 1898, 2315, 2868, 3117, 3308, 4886, 5255, 5433, 5860,
1463 5943, 6566, 6857, 7245, 7737, 8014, 8286, 8479, 9496, 9830, 10306,
1464 10581, 11082, 11255, 12277, 12432, 12748, 13809, 14750, 15121, 15171,
1465 15345, 16237, 16992, 17332, 18482, 20185, 21524, 21688, 22072, 22220,
1466 22672, 22898, 23348, 27996, 28311, 28626, 28998, 29690, 30596, 31220,
1467 32359, 33683, 33815, 34203, 34870, 34894, 35039, 35725, 35797, 36114,
1468 36318, 36869, 38497, 38632, 39035, 39223, 39477, 39635, 42249, 42591,
1469 43175, 43410, 43648, 44049, 44630, 46811, 47219};
1470 int dust_starts2[kNumLocs2] = {156};
1471 int dust_ends2[kNumLocs2] = {172};
1473 typedef pair<int*, int*> TStartEndPair;
1474 TStartEndPair pair0(dust_starts0, dust_ends0);
1475 TStartEndPair pair1(dust_starts1, dust_ends1);
1476 TStartEndPair pair2(dust_starts2, dust_ends2);
1478 vector< TStartEndPair > start_end_v;
1479 start_end_v.push_back(pair0);
1480 start_end_v.push_back(pair1);
1481 start_end_v.push_back(pair2);
1484 unique_ptr<SSeqLoc> qsl1(
1487 unique_ptr<SSeqLoc> qsl2(
1490 unique_ptr<SSeqLoc> qsl3(
1495 query_v.push_back(*qsl1);
1496 query_v.push_back(*qsl2);
1497 query_v.push_back(*qsl3);
1506 ITERATE(vector< TStartEndPair >, vec_iter, start_end_v)
1508 TStartEndPair local_pair = *vec_iter;
1509 int* start = local_pair.first;
1510 int* stop = local_pair.second;
1513 query_v[query_number].
mask->GetPacked_int().Get()) {
1514 BOOST_REQUIRE_EQUAL(start[loc_index], (
int)(*itr)->GetFrom());
1515 BOOST_REQUIRE_EQUAL(stop[loc_index], (
int)(*itr)->GetTo());
1524 const int kNumLocs = 4;
1525 const int kMaskStarts[kNumLocs] = { 10, 20, 30, 40 };
1526 const int kMaskEnds[kNumLocs] = { 15, 25, 35, 45 };
1527 const int kRange[2] = { 12, 22 };
1531 for (index = 0; index < kNumLocs; ++index) {
1539 for (index = 0, loc_var = mask_loc; loc_var;
1540 ++index, loc_var = loc_var->
next) {
1541 BOOST_REQUIRE_EQUAL(kMaskStarts[index], (
int)loc_var->ssr->left);
1542 BOOST_REQUIRE_EQUAL(kMaskEnds[index], (
int)loc_var->ssr->right);
1544 BOOST_REQUIRE_EQUAL(kNumLocs, index);
1547 for (index = 0, loc_var = mask_loc; loc_var;
1548 ++index, loc_var = loc_var->
next);
1549 BOOST_REQUIRE_EQUAL(2, index);
1550 BOOST_REQUIRE_EQUAL(kMaskEnds[0]-kRange[0], (
int)mask_loc->
ssr->
right);
1551 BOOST_REQUIRE_EQUAL(kMaskStarts[1]-kRange[0],
1553 BOOST_REQUIRE_EQUAL(kRange[1]-kRange[0],
1558 BOOST_REQUIRE(mask_loc ==
NULL);
1563 const int kNumQueries = 3;
1565 const int kQueryLengths[kNumQueries] = { 1639, 1151, 1164 };
1569 for (
int index = 0; index < kNumQueries; ++index) {
1571 loc->
SetWhole().SetGi(kQueryGis[index]);
1574 query_v.push_back(
SSeqLoc(loc, scope));
1584 for (
int i = 0;
i < kNumQueries;
i++) {
1587 BOOST_REQUIRE_EQUAL(kQueryLengths[
i],
len);
1593 const int kNumQueries = 3;
1595 const int kNumContexts = kNumQueries*
NUM_FRAMES;
1598 BOOST_REQUIRE_EQUAL(kNumContexts, query_info->
last_context + 1);
1601 { { 660, 686 }, { 92, 119 }, { 1156, 1163 } };
1604 BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->
total_size);
1606 for (
int index = 0; index < kNumQueries; index++) {
1616 BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->
total_size);
1618 const int kProtStarts[kNumContexts] =
1619 { 220, 219, 219, 317, 317, 316, 30, 30, 30, 343, 343, 343, 385, 385,
1621 const int kProtEnds[kNumContexts] =
1622 { 228, 228, 228, 326, 325, 325, 39, 39, 39, 352, 352, 352, 387, 386,
1625 for (
int index = 0; index < kNumContexts; ++index) {
1628 os <<
"Context " << index <<
" has no mask!";
1634 os <<
"Context " << index;
1635 BOOST_REQUIRE_MESSAGE(kProtStarts[index] ==
range->left,
1637 BOOST_REQUIRE_MESSAGE(kProtEnds[index] ==
range->right,
1643 BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->
total_size);
1644 const int kNuclStarts[kNumContexts] =
1645 { 660, 658, 659, 661, 663, 662, 90, 91, 92, 95, 94, 93, 1155, 1156,
1646 1154, 1158, 1157, 1159 };
1647 const int kNuclEnds[kNumContexts] =
1648 { 684, 685, 686, 687, 686, 688, 117, 118, 119, 121, 120, 119, 1161,
1649 1159, 1160, 1163, 1162, 1161 };
1651 for (
int index = 0; index < kNumContexts; ++index) {
1654 os <<
"Context " << index <<
" has no mask!";
1660 os <<
"Context " << index;
1661 BOOST_REQUIRE_MESSAGE(kNuclStarts[index] ==
range->left,
1663 BOOST_REQUIRE_MESSAGE(kNuclEnds[index] ==
range->right,
1672 BOOST_REQUIRE(
strcmp(retval.
get(),
"F") == 0);
1680 BOOST_REQUIRE(
strcmp(retval.
get(),
"m;") == 0);
1689 string(4096,
'X').c_str());
1696 "D 20 128 1;R -d XXXXXXXXXXXXXXXXXXXX"));
1704 &filtering_options,
NULL);
1705 BOOST_REQUIRE(status == 0);
1706 BOOST_REQUIRE(filtering_options !=
NULL);
1707 BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1711 BOOST_REQUIRE(filtering_options ==
NULL);
1719 &filtering_options,
NULL);
1720 BOOST_REQUIRE(status == 0);
1721 BOOST_REQUIRE_EQUAL(
true, !!filtering_options->
mask_at_hash);
1726 BOOST_REQUIRE_EQUAL(
string(
"L;m;"),
string(retval.
get()));
1729 BOOST_REQUIRE(filtering_options ==
NULL);
1737 &filtering_options,
NULL);
1738 BOOST_REQUIRE(status == 0);
1739 BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1744 BOOST_REQUIRE(
strcmp(retval.
get(),
"L;") == 0);
1747 BOOST_REQUIRE(filtering_options ==
NULL);
1755 BOOST_REQUIRE(status == 0);
1756 BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1758 BOOST_REQUIRE(filtering_options->
segOptions);
1764 BOOST_REQUIRE(
strcmp(retval.
get(),
"S 10 1.0 1.5;") == 0);
1767 BOOST_REQUIRE(filtering_options ==
NULL);
1776 BOOST_REQUIRE_EQUAL(1, (
int) status);
1777 BOOST_REQUIRE(filtering_options ==
NULL);
1785 BOOST_REQUIRE(status == 0);
1786 BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1791 BOOST_REQUIRE(
strcmp(retval.
get(),
"L;") == 0);
1794 BOOST_REQUIRE(filtering_options ==
NULL);
1801 BOOST_REQUIRE(status == 0);
1802 BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1804 BOOST_REQUIRE(filtering_options->
segOptions);
1807 BOOST_REQUIRE(
strcmp(retval.
get(),
"L;") == 0);
1810 BOOST_REQUIRE(filtering_options ==
NULL);
1817 BOOST_REQUIRE(status == 0);
1820 BOOST_REQUIRE(! filtering_options->
segOptions);
1825 BOOST_REQUIRE(
strcmp(retval.
get(),
"W -t 9606;") == 0);
1828 BOOST_REQUIRE(filtering_options ==
NULL);
1833 const int kNewLevel = 21;
1834 const int kNewWindow = 68;
1848 BOOST_REQUIRE_EQUAL(0, (
int) status);
1850 BOOST_REQUIRE_EQUAL(
true, !!
result->mask_at_hash);
1851 BOOST_REQUIRE_EQUAL(kNewLevel,
result->dustOptions->level);
1852 BOOST_REQUIRE_EQUAL(kNewWindow,
result->dustOptions->window);
1853 BOOST_REQUIRE(
result->repeatFilterOptions);
1858 BOOST_REQUIRE_EQUAL(0, (
int) status);
1860 BOOST_REQUIRE_EQUAL(kNewLevel,
result->dustOptions->level);
1861 BOOST_REQUIRE_EQUAL(kNewWindow,
result->dustOptions->window);
1866 BOOST_REQUIRE_EQUAL(0, (
int) status);
1868 BOOST_REQUIRE_EQUAL(
true, !!
result->mask_at_hash);
1869 BOOST_REQUIRE(
result->repeatFilterOptions);
1917 BOOST_REQUIRE_THROW(
1926 const size_t kNumSeqs = 10;
1940 BOOST_REQUIRE_EQUAL((
size_t)kNumSeqs, (
size_t)mask_v.size());
1942 BOOST_REQUIRE_EQUAL((
size_t)0U, query_masks_list->size());
1947 const int kNumberLocIn = 7;
1948 const int kLocStartIn[kNumberLocIn] =
1949 { 281312, 281356, 281416, 281454, 281895, 282435, 282999};
1950 const int kLocEndIn[kNumberLocIn] =
1951 { 281736, 281406, 281446, 281878, 282423, 282968, 283191};
1953 const int kNumberLocOut = 4;
1954 const int kLocStartOut[kNumberLocOut] =
1955 { 281312, 281895, 282435, 282999};
1956 const int kLocEndOut[kNumberLocOut] =
1957 { 281878, 282423, 282968, 283191};
1960 for (
int index=0; index<kNumberLocIn; index++)
1977 BOOST_REQUIRE_EQUAL(count, kNumberLocOut);
1984 BOOST_REQUIRE_EQUAL(ssr->
left, kLocStartOut[count]);
1985 BOOST_REQUIRE_EQUAL(ssr->
right, kLocEndOut[count]);
1995 vector<EBlastProgramType> programs =
2000 vector<int> num_seqs_array;
2001 num_seqs_array.reserve(3);
2002 num_seqs_array.push_back(random_gen.
GetRand(1,10));
2003 num_seqs_array.push_back(random_gen.
GetRand(1,10));
2004 num_seqs_array.push_back(random_gen.
GetRand(1,10));
2006 ITERATE(vector<EBlastProgramType>, program, programs) {
2007 ITERATE(vector<int>, num_seqs, num_seqs_array) {
2014 #if SEQLOC_MIX_QUERY_OK
2017 const int kNumInts = 20;
2018 const int kStarts[kNumInts] =
2019 { 838, 1838, 6542, 7459, 9246, 10431, 14807, 16336, 19563,
2020 20606, 21232, 22615, 23822, 27941, 29597, 30136, 31287,
2021 31786, 33315, 35402 };
2022 const int kEnds[kNumInts] =
2023 { 961, 2010, 6740, 7573, 9408, 10609, 15043, 16511, 19783,
2024 20748, 21365, 22817, 24049, 28171, 29839, 30348, 31362,
2025 31911, 33485, 37952 };
2027 const int kNumMaskLocs = 7;
2028 const int kMaskStarts[kNumMaskLocs] =
2029 { 2607, 3000, 3739, 4238, 5211, 5602, 5716 };
2030 const int kMaskStops[kNumMaskLocs] =
2031 { 2769, 3006, 3809, 4244, 5218, 5608, 5722 };
2033 const int kNumMaskLocs = 8;
2034 const int kMaskStarts[kNumMaskLocs] =
2035 { 29678, 30136, 31305, 35786, 36285, 37258, 37649, 37763 };
2036 const int kMaskStops[kNumMaskLocs] =
2037 { 29839, 30136, 31311, 35856, 36291, 37265, 37655, 37769 };
2044 for (index = 0; index < kNumInts; ++index) {
2046 next_loc->SetInt().SetFrom(kStarts[index]);
2047 next_loc->SetInt().SetTo(kEnds[index]);
2048 next_loc->SetInt().SetId(qid);
2049 qloc->SetMix().Set().push_back(next_loc);
2053 scope->AddDefaults();
2057 query_v.push_back(*
query);
2065 query_v[0].
mask->GetPacked_int().Get()) {
2066 BOOST_REQUIRE_EQUAL(kMaskStarts[loc_index],
2067 (
int) (*itr)->GetFrom());
2068 BOOST_REQUIRE_EQUAL(kMaskStops[loc_index],
2069 (
int) (*itr)->GetTo());
2072 BOOST_REQUIRE_EQUAL(kNumMaskLocs, loc_index);
2092 BOOST_REQUIRE(mask_itr !=
NULL);
2093 BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2094 BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2095 mask_itr = mask_itr->
next;
2097 BOOST_REQUIRE(mask_itr ==
NULL);
2123 BOOST_REQUIRE(mask_itr !=
NULL);
2124 BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2125 BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2126 mask_itr = mask_itr->
next;
2128 BOOST_REQUIRE(mask_itr ==
NULL);
2150 BOOST_REQUIRE(mask_itr !=
NULL);
2151 BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2152 BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2153 mask_itr = mask_itr->
next;
2155 BOOST_REQUIRE(mask_itr ==
NULL);
2172 reverse(rv.begin(), rv.end());
2176 BOOST_REQUIRE(mask_itr !=
NULL);
2177 BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2178 BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2179 mask_itr = mask_itr->
next;
2181 BOOST_REQUIRE(mask_itr ==
NULL);
2191 BOOST_REQUIRE(taxids.
empty() ==
false);
2192 BOOST_REQUIRE(taxids.
find(9606) != taxids.
end());
bool IsReverse(ENa_strand s)
Declares the CBl2Seq (BLAST 2 Sequences) class.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
void BlastSetUp_MaskQuery(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastMaskLoc *filter_maskloc, EBlastProgramType program_number)
Masks the sequence given a BlastMaskLoc.
BlastMaskLoc * BlastMaskLocFree(BlastMaskLoc *mask_loc)
Deallocate memory for a BlastMaskLoc structure as well as the BlastSeqLoc's pointed to.
Int2 BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, EBlastProgramType program_number, const SBlastFilterOptions *filter_options, BlastMaskLoc **filter_out, Blast_Message **blast_message)
Does preparation for filtering and then calls BlastSetUp_Filter.
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast.
const Uint1 kNuclMask
BLASTNA element used to mask bases in BLAST.
Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)
Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.
Int2 BlastMaskLocDNAToProtein(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)
Given a BlastMaskLoc with an array of lists of DNA mask locations, substitutes that array by a new ar...
void BlastSeqLocCombine(BlastSeqLoc **mask_loc, Int4 link_value)
Go through all mask locations in one sequence and combine any that overlap, deallocating the unneeded...
BlastMaskLoc * BlastMaskLocNew(Int4 total)
Allocate memory for a BlastMaskLoc.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Int2 BlastMaskLocProteinToDNA(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)
Given a BlastMaskLoc with an array of lists of mask locations per protein frame, recalculates all mas...
char * BlastFilteringOptionsToString(const SBlastFilterOptions *filtering_options)
Convert the filtering options structure to a string.
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
SDustOptions * SDustOptionsFree(SDustOptions *dust_options)
Frees SDustOptions.
Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions **repeat_options, const char *dbname)
Resets name of db for repeat filtering.
SRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)
Frees SRepeatFilterOptions.
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
Int2 SBlastFilterOptionsMerge(SBlastFilterOptions **combined, const SBlastFilterOptions *opt1, const SBlastFilterOptions *opt2)
Merges two sets of options together, taking the non-default one as preferred.
Int2 SDustOptionsNew(SDustOptions **dust_options)
Allocates memory for SDustOptions, fills in defaults.
@ eRepeats
Repeat filtering for nucleotides.
@ eDust
low-complexity for nucleotides.
@ eSeg
low-complexity for proteins.
Int2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)
Allocates memory for SBlastFilterOptions and.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
Utilities initialize/setup BLAST.
void BlastSeqLoc_RestrictToInterval(BlastSeqLoc **mask, Int4 from, Int4 to)
Adjusts the mask locations coordinates to a sequence interval.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eBlastp
Protein-Protein.
@ eBlastx
Translated nucl-Protein.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
BOOST_AUTO_TEST_CASE(TSeqLocVector2Packed_seqint_TestIntervals)
vector< TSeqRange > TRangeVector
void setupQueryStructures(TSeqLocVector &query_vector, const CBlastOptions &kOpts, BLAST_SequenceBlk **query_blk, BlastQueryInfo **qinfo)
void setupQueryInfoForOffsetTranslation(CBlastQueryInfo &query_info)
static BlastSeqLoc * s_RangeVector2BlastSeqLoc(const TRangeVector &rv)
static void x_TestGetFilteredQueryRegions(ENa_strand strand)
static bool x_AreAllBasesMasked(const Uint1 *sequence, int start, int stop)
void BlastSeqLocListReverse(BlastSeqLoc **head)
Reverse elements in the list.
static void x_TestGetSeqLocInfoVector(EBlastProgramType program, size_t num_seqs)
Wrapper class for BLAST_SequenceBlk .
Defines BLAST error codes (user errors included)
static void x_TestLowerCaseMaskWith(ENa_strand strand, bool ignore_strand_in_mask)
Wrapper class for BlastMaskLoc .
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
Collection of BlastSeqLoc lists for filtering processing.
Wrapper class for BlastQueryInfo .
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
TMaskedQueryRegions GetMaskedRegions(size_type i) const
Get the masked regions for a query by number.
CRef< objects::CSeq_loc > GetMasks(size_type i) const
Convenience method to get a CSeq_loc representing the masking locations.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
vector< CRange< TSeqPos > > TRanges
void AddInterval(const CSeq_interval &ival)
for convenience
structure for seqloc info
static CRef< CScope > NewScope(bool with_defaults=true)
Return a new scope, possibly (by default) with default loaders, which will include the Genbank loader...
CRef< blast::CBlastSearchQuery > CreateBlastSearchQuery(objects::CSeq_id &id, objects::ENa_strand s=objects::eNa_strand_unknown)
static CTestObjMgr & Instance()
Collection of masked regions for a single query sequence.
TMaskedQueryRegions RestrictToSeqInt(const objects::CSeq_interval &location) const
Return a new instance of this object that is restricted to the location specified.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
const_iterator find(const key_type &key) const
const_iterator end() const
Calls sym dust lib in algo/dustmask and returns CSeq_locs for use by BLAST.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static void query_test(int prepare, SQLRETURN expected, const char *expected_status)
CRef< objects::CPacked_seqint > TSeqLocVector2Packed_seqint(const TSeqLocVector &sequences)
Converts a TSeqLocVector into a CPacked_seqint.
int GetDustFilteringLinker() const
Get linker parameter for dust.
void Blast_GetSeqLocInfoVector(EBlastProgramType program, const objects::CPacked_seqint &queries, const BlastMaskLoc *mask, TSeqLocInfoVector &mask_v)
Converts a BlastMaskLoc internal structure into an object returned by the C++ API.
BlastQueryInfo * Release()
bool GetDustFiltering() const
Is dust filtering enabled?
void SetWindowMaskerTaxId(int taxid)
Enable window masker and select a taxid (or 0 to disable).
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
bool Empty()
Returns true if this object contains any masking information.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
int GetDustFilteringLevel() const
Get level parameter for dust.
const char * GetWindowMaskerDatabase() const
Get the window masker database name (or NULL if not set).
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
void GetTaxIdWithWindowMaskerSupport(set< int > &supported_taxids)
This function returns a list of NCBI taxonomy IDs for which there exists windowmasker masking data to...
void SetRepeatFilteringDB(const char *db)
Enable repeat filtering.
bool QueryHasMultipleFrames() const
Check whether the query is multiframe for this type of search.
void Blast_FindWindowMaskerLoc(CBlastQueryVector &query, const CBlastOptions *opts)
Find Window Masker filtered locations using a BlastOptions.
size_t GetNumFrames() const
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
bool GetMaskAtHash() const
Returns whether masking should only be done for lookup table creation.
void SetRepeatFiltering(bool val)
Enable repeat filtering.
bool GetRepeatFiltering() const
Is repeat filtering enabled?
void Blast_FindRepeatFilterLoc(TSeqLocVector &query_loc, const CBlastOptionsHandle *opts_handle)
Finds repeats locations for a given set of sequences.
unsigned int GetNumberOfContexts(EBlastProgramType p)
Returns the number of contexts for a given BLAST program.
const set< ETranslationFrame > & ListFrames()
Returns the list of frame values for which this object contains masking information.
int GetDustFilteringWindow() const
Get window parameter for dust.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
void UseProteinCoords(TSeqPos dna_length)
Adjusts all stored masks from nucleotide to protein offsets.
int GetWindowMaskerTaxId() const
Get the window masker taxid (or 0 if not set).
void SetMaskAtHash(bool m=true)
Sets MaskAtHash.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
void Blast_FindDustFilterLoc(TSeqLocVector &queries, const CBlastNucleotideOptionsHandle *nucl_handle)
Finds dust locations for a given set of sequences by calling the the symmetric dust lib.
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
void SetDustFiltering(bool val)
Enable dust filtering.
@ eNotSupported
Feature not supported.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
element_type * get(void) const
Get pointer.
TErrCode GetErrCode(void) const
Get error code.
const string & GetMsg(void) const
Get message string.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
@ e_YES
SeqIds compared, but are different.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Uint4 TValue
Type of the generated integer value and/or the seed value.
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
bool CanGetStrand(void) const
Check if it is safe to call GetStrand method.
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
void SetStrand(TStrand value)
Assign a value to Strand data member.
@ eNa_strand_both
in forward orientation
@ e_Gi
GenInfo Integrated Database.
objects::CSeq_id * GenerateRandomSeqid_Gi()
vector< EBlastProgramType > GetAllBlastProgramTypes()
range(_Ty, _Ty) -> range< _Ty >
constexpr bool empty(list< Ts... >) noexcept
double value_type
The numeric datatype used by the parser.
Magic spell ;-) needed for some weird compilers... very empiric.
int strcmp(const char *str1, const char *str2)
Defines: CTimeFormat - storage class for time format.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static pcre_uint8 * buffer
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
C++ implementation of repeats filtering for C++ BLAST.
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to hold a sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Int4 query_length
Length of this query, strand or frame.
Structure for keeping the query masking information.
Int4 total_size
Total size of the BlastSeqLoc array below.
BlastSeqLoc ** seqloc_array
Array of masked locations.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 last_context
Index of the last element of the context array.
Used to hold a set of positions, mostly used for filtering.
SSeqRange * ssr
location data on the sequence.
struct BlastSeqLoc * next
next in linked list
Structure to hold the a message from the core of the BLAST engine.
SRepeatFilterOptions * repeatFilterOptions
for organism specific repeat filtering.
SSegOptions * segOptions
low-complexity filtering for proteins sequences (includes translated nucleotides).
Boolean mask_at_hash
mask query only for lookup table creation
SWindowMaskerOptions * windowMaskerOptions
organism specific filtering with window masker.
SDustOptions * dustOptions
low-complexity filtering for nucleotides.
int window
initial window to trigger further work.
Structure to represent a single sequence to be fed to BLAST.
A structure containing two integers, used e.g.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
Utility stuff for more convenient using of Boost.Test library.
static CS_CONTEXT * context
Interface to retrieve list of available windowmasker filtering.
Blast wrappers for WindowMasker filtering.
voidp calloc(uInt items, uInt size)