$Id: blastfilter_unit_test.cpp 95564 2021-11-26 14:52:02Z grichenk $
2  * ===========================================================================
3  *
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
Author: Ilya Dondoshansky
27  *
28  * File Description:
29  * Unit test for low complexity filtering
30  *
31  * ===========================================================================
32  */
34 #include <ncbi_pch.hpp>
35 #include <corelib/test_boost.hpp>
36 #include <corelib/ncbitime.hpp>
40 #include <serial/iterator.hpp>
41 #include <util/random_gen.hpp>
42 #include <objmgr/util/sequence.hpp>
46 #include "blast_objmgr_priv.hpp"
54 // For repeats and dust filtering only
57 #include "winmask_filter.hpp"
58 #include "dust_filter.hpp"
62 #include "test_objmgr.hpp"
63 #include "blast_test_util.hpp"
65 using namespace std;
66 using namespace ncbi;
67 using namespace ncbi::objects;
68 using namespace ncbi::blast;
70 typedef vector<TSeqRange> TRangeVector;
72 static BlastSeqLoc*
74 {
75  BlastSeqLoc* retval = NULL;
77  if (rv.empty()) {
78  return retval;
79  }
81  BlastSeqLoc* tail = NULL;
82  ITERATE(TRangeVector, itr, rv) {
83  tail = BlastSeqLocNew(tail ? &tail : &retval,
84  itr->GetFrom(),
85  itr->GetTo());
86  }
88  return retval;
89 }
92  size_t num_seqs)
93 {
94  const string kProgName(Blast_ProgramNameFromType(program));
95  typedef vector< CRef<CSeq_id> > TSeqIds;
96  TSeqIds seqid_v(num_seqs);
97  generate(seqid_v.begin(), seqid_v.end(),
99  CPacked_seqint seqintervals;
100  ITERATE(TSeqIds, seqid, seqid_v) {
101  seqintervals.AddInterval(**seqid, 0, 100000);
102  }
104  const size_t kNumContexts(GetNumberOfContexts(program));
105  CBlastMaskLoc mask(BlastMaskLocNew(num_seqs*kNumContexts));
107  // Fill the masks
108  const TSeqPos kOffsetLength(30);
109  for (int index = 0; index < mask->total_size; ++index) {
110  mask->seqloc_array[index] = BlastSeqLocNew(NULL, index,
111  index+kOffsetLength);
112  }
113  TSeqLocInfoVector mask_v;
114  Blast_GetSeqLocInfoVector(program, seqintervals, mask, mask_v);
115  BOOST_REQUIRE_EQUAL(num_seqs, mask_v.size());
117  unsigned int qindex(0); // query index
118  ITERATE(TSeqLocInfoVector, query_masks_list, mask_v) {
119  const size_t kNumMasks = program == eBlastTypeBlastn
120  ? 1 : kNumContexts;
121  BOOST_REQUIRE_MESSAGE( kNumMasks == query_masks_list->size(),
122  "Failed on " + kProgName);
123  size_t context = 0;
124  ITERATE(TMaskedQueryRegions, itr, *query_masks_list) {
125  CNcbiOstrstream ss;
126  ss << "Error in query number " << qindex << ", context "
127  << context << " ('" << kProgName << "')";
128  // Validate the frame
129  int frame = program == eBlastTypeBlastn
131  : BLAST_ContextToFrame(program, context);
132  BOOST_REQUIRE_MESSAGE(frame == (*itr)->GetFrame(),
133  (string)CNcbiOstrstreamToString(ss));
135  // Validate the artificially built offsets of the mask
136  const BlastSeqLoc* loc =
137  mask->seqloc_array[kNumContexts*qindex+context];
138  BOOST_REQUIRE(loc != NULL);
139  TSeqRange offsets(loc->ssr->left, loc->ssr->right);;
141  (offsets.GetFrom() == (*itr)->GetInterval().GetFrom(),
142  (string)CNcbiOstrstreamToString(ss));
144  (offsets.GetTo() == (*itr)->GetInterval().GetTo(),
145  (string)CNcbiOstrstreamToString(ss));
146  ++context;
147  }
148  BOOST_REQUIRE_EQUAL(kNumMasks, context);
149  ++qindex;
150  }
151 }
153 // Returns true if *all* bases in the range provided are masked
154 static bool x_AreAllBasesMasked(const Uint1* sequence, int start, int stop)
155 {
156  BOOST_CHECK(start <= stop);
157  for (int i = start; i < stop; i++) {
158  if (sequence[i] != kNuclMask) {
159  return false;
160  }
161  }
162  return true;
163 }
166 public:
168  bool ignore_strand_in_mask)
169  {
170  const int kNumLcaseLocs = 11;
171  const int kLcaseStarts[kNumLcaseLocs] =
172  { 0, 78, 217, 380, 694, 1018, 1128, 2817, 3084, 3428, 3782 };
173  const int kLcaseEnds[kNumLcaseLocs] =
174  { 75, 208, 316, 685, 1004, 1122, 1298, 2952, 3409, 3733, 3916 };
176  int i = 0; // loop index
177  const int kQuerySize = 9180;
178  vector<int> kLcaseStartsNegStrand, kLcaseEndsNegStrand;
179  kLcaseStartsNegStrand.reserve(kNumLcaseLocs);
180  kLcaseEndsNegStrand.reserve(kNumLcaseLocs);
181  for (i = 0; i < kNumLcaseLocs; i++) {
182  int start = kQuerySize - 1 - kLcaseEnds[i];
183  int stop = kQuerySize - 1 - kLcaseStarts[i];
184  kLcaseStartsNegStrand.push_back(start);
185  kLcaseEndsNegStrand.push_back(stop);
186  }
188  CSeq_id id("gi|1945388");
189  unique_ptr<SSeqLoc> qsl(
190  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
191  // Fill the lower case mask into the SSeqLoc
192  CSeq_loc* seqloc = new CSeq_loc();
193  for (int index = 0; index < kNumLcaseLocs; ++index) {
194  seqloc->SetPacked_int().AddInterval(id, kLcaseStarts[index],
195  kLcaseEnds[index]);
196  BOOST_CHECK(!seqloc->GetPacked_int().Get().back()->CanGetStrand());
197  seqloc->SetPacked_int().Set().back()->SetStrand(strand);
198  }
199  qsl->mask.Reset(seqloc);
200  qsl->ignore_strand_in_mask = ignore_strand_in_mask;
202  TSeqLocVector query_v;
203  query_v.push_back(*qsl);
205  nucl_handle->SetDustFiltering(false);
206  nucl_handle->SetMaskAtHash(false);
208  // Run a self hit BLAST search, discard the return value, and get the
209  // masked query regions
210  blast::CBl2Seq blaster(*qsl.get(), *qsl.get(), *nucl_handle);
211  (void) blaster.Run();
213  // check that the actual query sequence was masked at the proper
214  // locations
215  BOOST_CHECK_EQUAL(false, nucl_handle->GetMaskAtHash());
216  for (i = 0; i < kNumLcaseLocs; i++) {
217  const pair<int, int> range_plus(kLcaseStarts[i], kLcaseEnds[i]);
218  const pair<int, int> range_minus(kLcaseStartsNegStrand[i],
219  kLcaseEndsNegStrand[i]);
220  int starting_offset = 0;
222  if (ignore_strand_in_mask || strand == eNa_strand_both) {
223  starting_offset =
224  blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
225  BOOST_CHECK(x_AreAllBasesMasked
226  (blaster.m_Blast->m_InternalData->m_Queries->sequence,
227  starting_offset + range_plus.first,
228  starting_offset + range_plus.second));
230  starting_offset =
231  blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
232  BOOST_CHECK(x_AreAllBasesMasked
233  (blaster.m_Blast->m_InternalData->m_Queries->sequence,
234  starting_offset + range_minus.first,
235  starting_offset + range_minus.second));
236  } else {
238  if (strand == eNa_strand_plus) {
239  starting_offset =
240  blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
241  BOOST_CHECK(x_AreAllBasesMasked
242  (blaster.m_Blast->m_InternalData->m_Queries->sequence,
243  starting_offset + range_plus.first,
244  starting_offset + range_plus.second));
246  starting_offset =
247  blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
248  BOOST_CHECK(!x_AreAllBasesMasked
249  (blaster.m_Blast->m_InternalData->m_Queries->sequence,
250  starting_offset + range_minus.first,
251  starting_offset + range_minus.second));
252  } else if (strand == eNa_strand_minus) {
253  starting_offset =
254  blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
255  BOOST_CHECK(!x_AreAllBasesMasked
256  (blaster.m_Blast->m_InternalData->m_Queries->sequence,
257  starting_offset + range_plus.first,
258  starting_offset + range_plus.second));
259  starting_offset =
260  blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
261  BOOST_CHECK(x_AreAllBasesMasked
262  (blaster.m_Blast->m_InternalData->m_Queries->sequence,
263  starting_offset + range_minus.first,
264  starting_offset + range_minus.second));
265  } else {
266  abort();
267  }
268  }
269  }
271  // Check that the masked regions (returned as part of the original
272  // SSeqLoc.mask field or from CBl2Seq::GetFilteredQueryRegions) are
273  // those on the plus strand only
274  TSeqLocInfoVector masked_regions_vector =
275  blaster.GetFilteredQueryRegions();
276  BOOST_CHECK(masked_regions_vector.size() == 1);
277  BOOST_CHECK_EQUAL(masked_regions_vector.front().size(),
278  (size_t)kNumLcaseLocs);
280  BOOST_CHECK(query_v[0].mask->IsPacked_int());
281  BOOST_CHECK_EQUAL(query_v[0].mask->GetPacked_int().Get().size(),
282  masked_regions_vector.front().size());
283  int loc_index = 0;
284  ITERATE(list< CRef<CSeq_interval> >, itr,
285  query_v[0].mask->GetPacked_int().Get()) {
286  BOOST_CHECK_EQUAL(kLcaseStarts[loc_index], (int)(*itr)->GetFrom());
287  BOOST_CHECK_EQUAL(kLcaseEnds[loc_index], (int)(*itr)->GetTo());
288  ++loc_index;
289  }
290  BOOST_CHECK_EQUAL(kNumLcaseLocs, loc_index);
292  loc_index = 0;
293  ITERATE(TMaskedQueryRegions, itr, masked_regions_vector[0]) {
294  const CSeq_interval& intv = (*itr)->GetInterval();
295  BOOST_CHECK_EQUAL(kLcaseStarts[loc_index], (int)intv.GetFrom());
296  BOOST_CHECK_EQUAL(kLcaseEnds[loc_index], (int)intv.GetTo());
297  BOOST_CHECK(!intv.CanGetStrand());
298  BOOST_CHECK_EQUAL((*itr)->GetFrame(),
300  loc_index++;
301  }
303  BOOST_CHECK_EQUAL(kNumLcaseLocs, loc_index);
304  }
305 };
307 BOOST_AUTO_TEST_SUITE(blastfilter)
310  typedef vector< pair<TSeqPos, TSeqPos> > TSegments;
311  TSegments masked_offsets;
312  masked_offsets.push_back(make_pair(298U, 305U));
313  masked_offsets.push_back(make_pair(875U, 882U));
314  masked_offsets.push_back(make_pair(1018U, 1115U));
315  masked_offsets.push_back(make_pair(1449U, 1479U));
316  masked_offsets.push_back(make_pair(3113U, 3133U));
317  masked_offsets.push_back(make_pair(3282U, 3298U));
318  masked_offsets.push_back(make_pair(3428U, 3441U));
319  masked_offsets.push_back(make_pair(3598U, 3606U));
320  masked_offsets.push_back(make_pair(4704U, 4710U));
321  masked_offsets.push_back(make_pair(6364U, 6373U));
322  masked_offsets.push_back(make_pair(6512U, 6573U));
323  masked_offsets.push_back(make_pair(7600U, 7672U));
324  masked_offsets.push_back(make_pair(7766U, 7772U));
325  masked_offsets.push_back(make_pair(8873U, 8880U));
326  masked_offsets.push_back(make_pair(9109U, 9179U));
328  const size_t kNumQueries(1);
329  const size_t kNumLocs(masked_offsets.size());
330  size_t index(0);
332  CSeq_id id("gi|1945388");
333  unique_ptr<SSeqLoc> qsl(
334  CTestObjMgr::Instance().CreateSSeqLoc(id, strand));
335  TSeqLocVector query_reference(kNumQueries, *qsl);
336  TSeqLocVector query_test(kNumQueries, *qsl);
339  // Filter the query regions using the C++ APIs
340  Blast_FindDustFilterLoc(query_reference, &(*nucl_handle));
341  BOOST_CHECK(query_reference[0].mask->IsPacked_int());
342  const CPacked_seqint::Tdata& seqinterval_list =
343  query_reference[0].mask->GetPacked_int().Get();
344  BOOST_CHECK_EQUAL(kNumLocs, seqinterval_list.size());
345  // CSeq_loc_mapper returns intervals sorted in reverse order if on minus strand.
346  bool reverse = IsReverse(query_reference[0].mask->GetStrand());
347  index = reverse ? masked_offsets.size() - 1 : 0;
348  ITERATE(CPacked_seqint::Tdata, itr, seqinterval_list) {
349  BOOST_CHECK_EQUAL(masked_offsets[index].first,
350  (*itr)->GetFrom());
351  BOOST_CHECK_EQUAL(masked_offsets[index].second,
352  (*itr)->GetTo());
353  reverse ? index-- : index++;
354  }
356  // Run a self hit BLAST search, discard the return value, and get the
357  // masked query regions
358  blast::CBl2Seq blaster(query_test, query_test, *nucl_handle);
359  (void) blaster.Run();
360  TSeqLocInfoVector masked_regions_vector =
361  blaster.GetFilteredQueryRegions();
363  BOOST_CHECK_EQUAL(kNumQueries, query_reference.size());
364  BOOST_CHECK_EQUAL(kNumQueries, query_test.size());
365  BOOST_CHECK_EQUAL(kNumQueries, masked_regions_vector.size());
367  TMaskedQueryRegions& masked_regions = *masked_regions_vector.begin();
368  BOOST_CHECK_EQUAL(kNumLocs, masked_regions.size());
369  index = 0;
370  ITERATE(TMaskedQueryRegions, itr, masked_regions) {
371  BOOST_CHECK_EQUAL(masked_offsets[index].first,
372  (*itr)->GetInterval().GetFrom());
373  BOOST_CHECK_EQUAL(masked_offsets[index].second,
374  (*itr)->GetInterval().GetTo());
375  index++;
376  }
377 }
379 BOOST_AUTO_TEST_CASE(TSeqLocVector2Packed_seqint_TestIntervals) {
381  vector< CRef<CSeq_id> > gis;
382  gis.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Gi, 6)));
383  gis.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Gi, 129295)));
384  gis.push_back(CRef<CSeq_id>(new CSeq_id(CSeq_id::e_Gi, 15606659)));
386  vector<TSeqRange> ranges;
387  ranges.push_back(TSeqRange(10, 100));
388  ranges.push_back(TSeqRange(100, 200));
389  ranges.push_back(TSeqRange(50, 443));
391  BOOST_REQUIRE(gis.size() == ranges.size());
392  TSeqLocVector input(gis.size());
393  size_t i(0);
394  for (i = 0; i < gis.size(); i++) {
395  CRef<CSeq_loc> seqloc(new CSeq_loc(*gis[i],
396  ranges[i].GetFrom(),
397  ranges[i].GetTo()));
398  input[i] = SSeqLoc(seqloc, CSimpleOM::NewScope());
399  }
402  i = 0;
403  ITERATE(CPacked_seqint::Tdata, query_interval, packed_seqint->Get()) {
404  BOOST_REQUIRE(gis[i]->Match((*query_interval)->GetId()));
405  BOOST_REQUIRE_EQUAL(ranges[i].GetFrom(),
406  (*query_interval)->GetFrom());
407  BOOST_REQUIRE_EQUAL(ranges[i].GetTo(),
408  (*query_interval)->GetTo());
409  i++;
410  }
411 }
413 BOOST_AUTO_TEST_CASE(TSeqLocVector2Packed_seqint_TestNoIntervals) {
414  typedef pair<TGi, TSeqPos> TGiLength;
415  vector<TGiLength> gis;
416  gis.push_back(make_pair(GI_CONST(6), 342U));
417  gis.push_back(make_pair(GI_CONST(129295), 232U));
418  gis.push_back(make_pair(GI_CONST(15606659), 443U));
421  input.reserve(gis.size());
422  ITERATE(vector<TGiLength>, gi, gis) {
423  CRef<CSeq_loc> seqloc(new CSeq_loc);
424  seqloc->SetWhole().SetGi(gi->first);
425  input.push_back(SSeqLoc(seqloc, CSimpleOM::NewScope()));
426  }
429  int i(0);
430  const TSeqPos kStartingPosition(0);
431  ITERATE(CPacked_seqint::Tdata, query_interval, packed_seqint->Get()) {
432  const TGiLength& kGiLength = gis[i++];
433  const CSeq_id kTargetId(CSeq_id::e_Gi, kGiLength.first);
434  BOOST_REQUIRE(kTargetId.Match((*query_interval)->GetId()));
435  BOOST_REQUIRE_EQUAL(kStartingPosition,
436  (*query_interval)->GetFrom());
437  BOOST_REQUIRE_EQUAL(kGiLength.second,
438  (*query_interval)->GetTo());
439  }
440 }
442 BOOST_AUTO_TEST_CASE(TSeqLocVector2Packed_seqint_TestEmptyInput) {
445  BOOST_REQUIRE(retval.Empty());
446 }
449  const CBlastOptions& kOpts,
450  BLAST_SequenceBlk** query_blk,
451  BlastQueryInfo** qinfo)
452 {
453  TSearchMessages blast_msg;
456  ENa_strand strand_opt = kOpts.GetStrandOption();
458  SetupQueryInfo(query_vector, prog, strand_opt, qinfo);
459  CBlastQueryInfo qi_tmp(*qinfo);
460  SetupQueries(query_vector, qi_tmp, query_blk,
461  prog, strand_opt, blast_msg);
462  qi_tmp.Release();
463  ITERATE(TSearchMessages, m, blast_msg) {
464  BOOST_REQUIRE(m->empty());
465  }
466 }
469  const int kNumLocs = 3;
470  const int kSegStarts[kNumLocs] = { 15, 55, 495 };
471  const int kSegEnds[kNumLocs] = { 27, 68, 513 };
472  CSeq_id id("gi|3091");
473  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(id));
474  TSeqLocVector query_v;
475  query_v.push_back(*qsl);
476  CBlastQueryInfo query_info;
477  CBLAST_SequenceBlk query_blk;
480  setupQueryStructures(query_v, opts->GetOptions(),
481  &query_blk, &query_info);
483  BlastSeqLoc *filter_slp = NULL, *loc;
484  SBlastFilterOptions* filtering_options;
485  SBlastFilterOptionsNew(&filtering_options, eSeg);
486  Int2 status = BlastSetUp_Filter(opts->GetOptions().GetProgramType(),
487  query_blk->sequence,
488  query_info->contexts[0].query_length,
489  0,
490  filtering_options,
491  & filter_slp, NULL);
492  filtering_options = SBlastFilterOptionsFree(filtering_options);
493  BOOST_REQUIRE(filtering_options == NULL);
494  BOOST_REQUIRE(status == 0);
496  Int4 loc_index;
497  SSeqRange* di;
498  for (loc_index=0, loc = filter_slp; loc; loc = loc->next, ++loc_index) {
499  di = loc->ssr;
500  BOOST_REQUIRE_EQUAL(kSegStarts[loc_index], di->left);
501  BOOST_REQUIRE_EQUAL(kSegEnds[loc_index], di->right);
502  }
503  BlastSeqLocFree(filter_slp);
505  BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
506 }
508 BOOST_AUTO_TEST_CASE(RepeatsFilter) {
509  const size_t kNumLocs = 4;
510  const TSeqPos kRepeatStarts[kNumLocs] = { 0, 380, 2851, 3113 };
511  const TSeqPos kRepeatEnds[kNumLocs] = { 212, 1297, 2953, 3764 };
512  CSeq_id id("gi|1945388");
513  unique_ptr<SSeqLoc> qsl(
514  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
515  TSeqLocVector query_v;
516  query_v.push_back(*qsl);
518  CBlastNucleotideOptionsHandle nucl_handle;
519  nucl_handle.SetRepeatFiltering(true);
520  Blast_FindRepeatFilterLoc(query_v, &nucl_handle);
522  BOOST_REQUIRE(query_v[0].mask.NotEmpty());
523  BOOST_REQUIRE(query_v[0].mask->IsPacked_int());
524  const CPacked_seqint::Tdata& seqinterval_list =
525  query_v[0].mask->GetPacked_int().Get();
527  size_t loc_index = 0;
528  BOOST_REQUIRE_EQUAL(kNumLocs, seqinterval_list.size());
529  ITERATE(CPacked_seqint::Tdata, itr, seqinterval_list) {
530 // cerr << (*itr)->GetFrom() << " " << (*itr)->GetTo() << endl;
531  BOOST_REQUIRE_EQUAL(kRepeatStarts[loc_index], (*itr)->GetFrom());
532  BOOST_REQUIRE_EQUAL(kRepeatEnds[loc_index], (*itr)->GetTo());
533  BOOST_REQUIRE(!(*itr)->CanGetStrand());
534  ++loc_index;
535  }
537  BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
538 }
540 BOOST_AUTO_TEST_CASE(WindowMasker)
541 {
542  int pair_size = sizeof(TSeqPos) * 2;
544  const TSeqPos intervals[] =
545  { 0, 79,
546  100, 122,
547  146, 169,
548  225, 247,
549  286, 329,
550  348, 366,
551  373, 688,
552  701, 1303,
553  1450, 1485,
554  2858, 2887,
555  3103, 3212,
556  3217, 3735,
557  4142, 4162,
558  5423, 5443,
559  5797, 5817,
560  6333, 6383,
561  6458, 6477,
562  6519, 6539,
563  7043, 7063,
564  7170, 7189,
565  7604, 7623,
566  8454, 8476,
567  8829, 8851,
568  8860, 8889
569  };
571  size_t num_locs = sizeof(intervals) / pair_size;
572  BOOST_REQUIRE(0 == (sizeof(intervals) % pair_size));
574  CSeq_id id("gi|1945388");
575  unique_ptr<SSeqLoc>
576  qsl(CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
578  TSeqLocVector query_v;
579  query_v.push_back(*qsl);
581  CBlastNucleotideOptionsHandle nucl_handle;
582  nucl_handle.SetWindowMaskerTaxId(9606);
583  Blast_FindWindowMaskerLoc(query_v, &nucl_handle);
585  BOOST_REQUIRE(query_v[0].mask.NotEmpty());
586  BOOST_REQUIRE(query_v[0].mask->IsPacked_int());
587  const CPacked_seqint::Tdata& seqinterval_list =
588  query_v[0].mask->GetPacked_int().Get();
590  size_t loc_index = 0;
591  BOOST_REQUIRE_EQUAL(num_locs, seqinterval_list.size());
593  ITERATE(CPacked_seqint::Tdata, itr, seqinterval_list) {
594  //cout << (*itr)->GetFrom() << " " << (*itr)->GetTo() << endl;
595  BOOST_REQUIRE_EQUAL(intervals[loc_index], (*itr)->GetFrom());
596  BOOST_REQUIRE_EQUAL(intervals[loc_index+1], (*itr)->GetTo());
597  BOOST_REQUIRE(! (*itr)->CanGetStrand());
598  loc_index += 2;
599  }
601  BOOST_REQUIRE_EQUAL(num_locs*2, loc_index);
602 }
604 BOOST_AUTO_TEST_CASE(RepeatsFilter_OnSeqInterval) {
605  vector<TSeqRange> masked_regions;
606  masked_regions.push_back(TSeqRange(85028, 85528));
607  masked_regions.push_back(TSeqRange(85539, 85736));
608  masked_regions.push_back(TSeqRange(86334, 86461));
609  masked_regions.push_back(TSeqRange(86487, 86585));
610  masked_regions.push_back(TSeqRange(86730, 87050));
611  masked_regions.push_back(TSeqRange(87313, 87370));
612  masked_regions.push_back(TSeqRange(88134, 88140));
613  masked_regions.push_back(TSeqRange(88171, 88483));
614  masked_regions.push_back(TSeqRange(89032, 89152));
615  masked_regions.push_back(TSeqRange(91548, 91704));
616  masked_regions.push_back(TSeqRange(92355, 92539));
617  masked_regions.push_back(TSeqRange(92550, 92973));
618  masked_regions.push_back(TSeqRange(92983, 93283));
619  masked_regions.push_back(TSeqRange(93296, 93384));
620  masked_regions.push_back(TSeqRange(93472, 93642));
621  masked_regions.push_back(TSeqRange(93685, 94026));
622  masked_regions.push_back(TSeqRange(94435, 94545));
624  CSeq_id id("gi|20196551");
625  unique_ptr<SSeqLoc> qsl(
626  CTestObjMgr::Instance().CreateSSeqLoc(id,
627  make_pair<TSeqPos, TSeqPos>(84999, 94637),
628  eNa_strand_both));
629  TSeqLocVector query_v;
630  query_v.push_back(*qsl);
632  CBlastNucleotideOptionsHandle nucl_handle;
633  nucl_handle.SetDustFiltering(true);
634  nucl_handle.SetRepeatFiltering(true);
635  Blast_FindDustFilterLoc(query_v, &nucl_handle);
636  Blast_FindRepeatFilterLoc(query_v, &nucl_handle);
638  BOOST_REQUIRE(query_v[0].mask->IsPacked_int());
639  const CPacked_seqint::Tdata& seqinterval_list =
640  query_v[0].mask->GetPacked_int().Get();
642  size_t loc_index = 0;
643  BOOST_REQUIRE_EQUAL(masked_regions.size(), seqinterval_list.size());
644  ITERATE(CPacked_seqint::Tdata, itr, seqinterval_list) {
645 // cerr << (*itr)->GetFrom() << " " << (*itr)->GetTo() << endl;
646  BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetFrom(),
647  (*itr)->GetFrom());
648  BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetTo(),
649  (*itr)->GetTo());
650  BOOST_REQUIRE(!(*itr)->CanGetStrand());
651  ++loc_index;
652  }
654  BOOST_REQUIRE_EQUAL(masked_regions.size(), loc_index);
655 }
657 BOOST_AUTO_TEST_CASE(CSeqLocInfo_EqualityOperators)
658 {
659  CSeq_id id("gi|197670657");
660  TSeqRange r(1, 100);
663  BOOST_REQUIRE(a == b);
665  b.SetFrame(2);
666  BOOST_REQUIRE(a != b);
667 }
669 BOOST_AUTO_TEST_CASE(CombineDustAndLowerCaseMasking_WithBlastQueryVector) {
670  CSeq_id id("gi|197670657");
671  TSeqRange r(2, 299);
672  CRef<CSeqLocInfo> lower_case_mask
673  (new CSeqLocInfo(id, r, (int)CSeqLocInfo::eFramePlus1));
676  query->AddMask(lower_case_mask);
677  CBlastQueryVector queries;
678  queries.AddQuery(query);
680  CBlastNucleotideOptionsHandle nucl_handle;
681  nucl_handle.SetDustFiltering(true);
682  Blast_FindDustFilterLoc(queries,
683  nucl_handle.GetDustFilteringLevel(),
684  nucl_handle.GetDustFilteringWindow(),
685  nucl_handle.GetDustFilteringLinker());
686  TMaskedQueryRegions mqr = queries.GetMaskedRegions(0);
688  BOOST_REQUIRE( !mqr.empty() );
689  try { CRef<CSeq_loc> masks = queries.GetMasks(0); }
690  catch (const CBlastException& e) {
691  BOOST_REQUIRE(e.GetErrCode() == CBlastException::eNotSupported);
692  BOOST_REQUIRE(e.GetMsg().find("lossy direction") != NPOS);
693  }
695  CRef<CSeqLocInfo> sli = mqr.front();
696  BOOST_REQUIRE(sli.NotEmpty());
697  BOOST_REQUIRE(*sli == *lower_case_mask);
698  BOOST_REQUIRE_EQUAL((int)2, (int)mqr.size());
699  BOOST_REQUIRE(mqr.front()->GetFrame() == 1);
700  BOOST_REQUIRE(mqr.back()->GetFrame() == -1);
701 }
704 BOOST_AUTO_TEST_CASE(RepeatsAndDustFilter) {
706  CSeq_id id1("gi|197333738");
707  unique_ptr<SSeqLoc> qsl1(CTestObjMgr::Instance().CreateSSeqLoc(id1));
708  TSeqLocVector query_v1;
709  query_v1.push_back(*qsl1);
711  CSeq_id id2("gi|197333738");
712  unique_ptr<SSeqLoc> qsl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
713  TSeqLocVector query_v2;
714  query_v2.push_back(*qsl2);
716  CBlastNucleotideOptionsHandle nucl_handle;
717  nucl_handle.SetDustFiltering(true);
718  nucl_handle.SetRepeatFiltering(true);
720  Blast_FindDustFilterLoc(query_v1, &nucl_handle);
721  Blast_FindRepeatFilterLoc(query_v1, &nucl_handle);
724  Blast_FindRepeatFilterLoc(query_v2, &nucl_handle);
725  Blast_FindDustFilterLoc(query_v2, &nucl_handle);
727  BOOST_REQUIRE_EQUAL(sequence::Compare(*(query_v1[0].mask), *(query_v2[0].mask),
729 }
731 BOOST_AUTO_TEST_CASE(WindowMaskerAndDustFilter) {
733  CSeq_id id1("gi|197333738");
734  unique_ptr<SSeqLoc> qsl1(CTestObjMgr::Instance().CreateSSeqLoc(id1));
735  TSeqLocVector query_v1;
736  query_v1.push_back(*qsl1);
738  CSeq_id id2("gi|197333738");
739  unique_ptr<SSeqLoc> qsl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
740  TSeqLocVector query_v2;
741  query_v2.push_back(*qsl2);
743  CBlastNucleotideOptionsHandle nucl_handle;
744  nucl_handle.SetDustFiltering(true);
745  nucl_handle.SetWindowMaskerTaxId(9606);
747  Blast_FindDustFilterLoc(query_v1, &nucl_handle);
748  Blast_FindWindowMaskerLoc(query_v1, &nucl_handle);
751  Blast_FindWindowMaskerLoc(query_v2, &nucl_handle);
752  Blast_FindDustFilterLoc(query_v2, &nucl_handle);
754  BOOST_REQUIRE_EQUAL(sequence::Compare(*(query_v1[0].mask), *(query_v2[0].mask),
756 }
758 BOOST_AUTO_TEST_CASE(WindowMasker_OnSeqInterval)
759 {
760  // these are from window masker and dust
761  vector<TSeqRange> masked_regions;
762  masked_regions.push_back(TSeqRange(85019, 85172));
763  masked_regions.push_back(TSeqRange(85190, 85345));
764  masked_regions.push_back(TSeqRange(85385, 85452));
765  masked_regions.push_back(TSeqRange(85483, 85505));
766  masked_regions.push_back(TSeqRange(85511, 85533));
767  masked_regions.push_back(TSeqRange(85575, 85596));
768  masked_regions.push_back(TSeqRange(85673, 85694));
769  masked_regions.push_back(TSeqRange(85725, 85745));
771  CSeq_id id("gi|20196551");
772  unique_ptr<SSeqLoc>
773  qsl(CTestObjMgr::Instance().CreateSSeqLoc
774  (id, make_pair<TSeqPos, TSeqPos>(85000, 86200), eNa_strand_both));
776  TSeqLocVector query_v;
777  query_v.push_back(*qsl);
779  CBlastNucleotideOptionsHandle nucl_handle;
780  nucl_handle.SetDustFiltering(true);
781  nucl_handle.SetWindowMaskerTaxId(9606);
783  Blast_FindDustFilterLoc(query_v, &nucl_handle);
784  Blast_FindWindowMaskerLoc(query_v, &nucl_handle);
786  BOOST_REQUIRE(query_v[0].mask->IsPacked_int());
787  const CPacked_seqint::Tdata& seqinterval_list =
788  query_v[0].mask->GetPacked_int().Get();
790  size_t loc_index = 0;
791  BOOST_REQUIRE_EQUAL(masked_regions.size(), seqinterval_list.size());
793  ITERATE(CPacked_seqint::Tdata, itr, seqinterval_list) {
794  BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetFrom(),
795  (*itr)->GetFrom());
796  BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetTo(),
797  (*itr)->GetTo());
798  BOOST_REQUIRE(!(*itr)->CanGetStrand());
799  ++loc_index;
800  }
802  BOOST_REQUIRE_EQUAL(masked_regions.size(), loc_index);
803 }
805 BOOST_AUTO_TEST_CASE(RepeatsFilter_NoHitsFound) {
806  CSeq_id id("gi|33079743");
807  unique_ptr<SSeqLoc> qsl(
808  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
809  TSeqLocVector query_v;
810  query_v.push_back(*qsl);
812  CBlastNucleotideOptionsHandle nucl_handle;
813  nucl_handle.SetRepeatFiltering(true);
814  nucl_handle.SetRepeatFilteringDB("repeat/repeat_9606");
815  Blast_FindRepeatFilterLoc(query_v, &nucl_handle);
817  BOOST_REQUIRE(query_v[0].mask.Empty());
818 }
820 BOOST_AUTO_TEST_CASE(WindowMasker_NoHitsFound) {
821  CSeq_id id("gi|33079743");
822  unique_ptr<SSeqLoc> qsl
823  (CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
825  TSeqLocVector query_v;
826  query_v.push_back(*qsl);
828  CBlastNucleotideOptionsHandle nucl_handle;
829  nucl_handle.SetWindowMaskerTaxId(9606);
831  Blast_FindRepeatFilterLoc(query_v, &nucl_handle);
833  BOOST_REQUIRE(query_v[0].mask.Empty());
834 }
836 BOOST_AUTO_TEST_CASE(RepeatsFilterWithMissingParameter) {
837  CSeq_id id("gi|1945388");
838  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(id));
839  TSeqLocVector query_v;
840  query_v.push_back(*qsl);
842  CBlastNucleotideOptionsHandle nucl_handle;
843  // note the missing argument to the repeats database
844  nucl_handle.SetFilterString("m L; R -d ");/* NCBI_FAKE_WARNING */
845  BOOST_REQUIRE_THROW(Blast_FindRepeatFilterLoc(query_v, &nucl_handle),
847 }
849 BOOST_AUTO_TEST_CASE(WindowMaskerWithMissingParameter) {
850  CSeq_id id("gi|1945388");
851  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(id));
852  TSeqLocVector query_v;
853  query_v.push_back(*qsl);
855  CBlastNucleotideOptionsHandle nucl_handle;
856  // note the missing argument to the repeats database
857  nucl_handle.SetFilterString("m L; W -d ");/* NCBI_FAKE_WARNING */
858  BOOST_REQUIRE_THROW(Blast_FindWindowMaskerLoc(query_v, &nucl_handle),
860 }
862 /// Test the conversion of a BlastMaskLoc internal structure to the
863 /// TSeqLocInfoVector type, used in formatting.
864 BOOST_AUTO_TEST_CASE(TestGetFilteredQueryRegions_BothStrandsOneQuery) {
866 }
867 BOOST_AUTO_TEST_CASE(TestGetFilteredQueryRegions_PlusStrandsOneQuery) {
869 }
870 BOOST_AUTO_TEST_CASE(TestGetFilteredQueryRegions_MinusStrandsOneQuery) {
872 }
874 BOOST_AUTO_TEST_CASE(RestrictLowerCaseMask) {
875  vector<TSeqRange> masks;
876  masks.push_back(TSeqRange(0, 75));
877  masks.push_back(TSeqRange(78, 208));
878  masks.push_back(TSeqRange(217, 316));
879  masks.push_back(TSeqRange(380, 685));
880  masks.push_back(TSeqRange(694, 1004));
881  masks.push_back(TSeqRange(1018, 1122));
882  masks.push_back(TSeqRange(1128, 1298));
883  masks.push_back(TSeqRange(2817, 2952));
884  masks.push_back(TSeqRange(2084, 3409));
885  masks.push_back(TSeqRange(3428, 3733));
886  masks.push_back(TSeqRange(3782, 3916));
889  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 1945388));
890  ITERATE(vector<TSeqRange>, range, masks) {
891  CRef<CSeq_interval> intv(new CSeq_interval(*id,
892  range->GetFrom(),
893  range->GetTo()));
894  // N.B.: this is deliberate, because of this the return value of
895  // TMaskedQueryRegions::RestrictToSeqInt() will have its strand
896  // unset (see CSeq_interval parametrized constructor for that)
897  BOOST_REQUIRE(intv->CanGetStrand() == false);
898  CRef<CSeqLocInfo> sli(new CSeqLocInfo(intv,
900  mqr.push_back(sli);
901  }
903  // N.B.: even a different Seq-id will work!
904  CSeq_id other_id(CSeq_id::e_Gi, 555);
905  CSeq_interval restriction(other_id, 0, 624);
906  TMaskedQueryRegions restricted_mask;
907  restricted_mask = mqr.RestrictToSeqInt(restriction);
908  BOOST_REQUIRE_EQUAL((size_t)4, restricted_mask.size());
910  restricted_mask.back()->GetInterval().GetTo());
911  BOOST_REQUIRE_EQUAL(CSeq_id::e_YES, id->Compare
912  (restricted_mask.front()->GetInterval().GetId()));
913  BOOST_REQUIRE(!(restricted_mask.front()->GetInterval().CanGetStrand()));
915  restriction.SetFrom(1000);
916  restriction.SetTo(2000);
917  restriction.SetStrand(eNa_strand_plus); // this is irrelevant
918  restricted_mask = mqr.RestrictToSeqInt(restriction);
919  BOOST_REQUIRE_EQUAL((size_t)3, restricted_mask.size());
920  TMaskedQueryRegions::iterator itr = restricted_mask.begin();
922  BOOST_REQUIRE_EQUAL((TSeqPos)1000, (*itr)->GetInterval().GetFrom());
923  BOOST_REQUIRE_EQUAL((TSeqPos)1004, (*itr)->GetInterval().GetTo()-1);
924  BOOST_REQUIRE(id->Match((*itr)->GetInterval().GetId()));
925  BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
926  BOOST_REQUIRE_EQUAL((int)CSeqLocInfo::eFrameNotSet, (*itr)->GetFrame());
927  ++itr;
928  BOOST_REQUIRE_EQUAL((TSeqPos)1018, (*itr)->GetInterval().GetFrom());
929  BOOST_REQUIRE_EQUAL((TSeqPos)1122, (*itr)->GetInterval().GetTo()-1);
930  BOOST_REQUIRE(id->Match((*itr)->GetInterval().GetId()));
931  BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
932  BOOST_REQUIRE_EQUAL((int)CSeqLocInfo::eFrameNotSet, (*itr)->GetFrame());
933  ++itr;
934  BOOST_REQUIRE_EQUAL((TSeqPos)1128, (*itr)->GetInterval().GetFrom());
935  BOOST_REQUIRE_EQUAL((TSeqPos)1298, (*itr)->GetInterval().GetTo()-1);
936  BOOST_REQUIRE(id->Match((*itr)->GetInterval().GetId()));
937  BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
938  BOOST_REQUIRE_EQUAL((int)CSeqLocInfo::eFrameNotSet, (*itr)->GetFrame());
939  ++itr;
940  BOOST_REQUIRE(itr == restricted_mask.end());
942  restriction.SetFrom(10000);
943  restriction.SetTo(20000);
944  restricted_mask = mqr.RestrictToSeqInt(restriction);
945  BOOST_REQUIRE(restricted_mask.empty());
946 }
948 // Inspired by JIRA SB-264
949 BOOST_AUTO_TEST_CASE(BlastxLowerCaseMask) {
950  vector<TSeqRange> masks;
951  masks.push_back(TSeqRange(0, 75));
952  masks.push_back(TSeqRange(78, 208));
953  masks.push_back(TSeqRange(217, 316));
954  masks.push_back(TSeqRange(380, 685));
955  masks.push_back(TSeqRange(694, 1004));
956  masks.push_back(TSeqRange(1018, 1122));
957  masks.push_back(TSeqRange(1128, 1298));
958  masks.push_back(TSeqRange(2817, 2952));
959  masks.push_back(TSeqRange(2084, 3409));
960  masks.push_back(TSeqRange(3428, 3733));
961  masks.push_back(TSeqRange(3782, 3916));
964  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 1945388));
965  ITERATE(vector<TSeqRange>, range, masks) {
966  CRef<CSeq_interval> intv(new CSeq_interval(*id,
967  range->GetFrom(),
968  range->GetTo()));
969  CRef<CSeqLocInfo> sli(new CSeqLocInfo(intv,
971  mqr.push_back(sli);
972  }
974  BOOST_REQUIRE(!bqff.Empty());
975  BOOST_REQUIRE(bqff.QueryHasMultipleFrames());
976  const set<CSeqLocInfo::ETranslationFrame>& frames = bqff.ListFrames();
977  ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
978  BOOST_REQUIRE(bqff[*fr] != NULL);
979  }
980  BOOST_REQUIRE(bqff.GetNumFrames() == NUM_FRAMES);
981 }
983 // Inspired by SB-597
984 BOOST_AUTO_TEST_CASE(BlastxLowerCaseMaskProteinLocations)
985 {
986  vector<TSeqRange> masks;
987  masks.push_back(TSeqRange(0, 75));
990  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 1945388));
991  ITERATE(vector<TSeqRange>, range, masks) {
992  CRef<CSeq_interval> intv(new CSeq_interval(*id,
993  range->GetFrom(),
994  range->GetTo()));
995  CRef<CSeqLocInfo> sli_plus(new CSeqLocInfo(intv,
997  mqr.push_back(sli_plus);
998  CRef<CSeqLocInfo> sli_minus(new CSeqLocInfo(intv,
1000  mqr.push_back(sli_minus);
1001  }
1003  bqff.UseProteinCoords(9180); // 9180 is length of GI|1945388
1005  BlastSeqLoc* bsl = *bqff[CSeqLocInfo::eFramePlus1];
1006  BOOST_REQUIRE_EQUAL(bsl->ssr->left, 0);
1007  BOOST_REQUIRE_EQUAL(bsl->ssr->right, 25);
1009  bsl = *bqff[CSeqLocInfo::eFramePlus2];
1010  BOOST_REQUIRE_EQUAL(bsl->ssr->left, 0);
1011  BOOST_REQUIRE_EQUAL(bsl->ssr->right, 24);
1013  bsl = *bqff[CSeqLocInfo::eFramePlus3];
1014  BOOST_REQUIRE_EQUAL(bsl->ssr->left, 0);
1015  BOOST_REQUIRE_EQUAL(bsl->ssr->right, 24);
1017  bsl = *bqff[CSeqLocInfo::eFrameMinus1];
1018  BOOST_REQUIRE_EQUAL(bsl->ssr->left, 3034);
1019  BOOST_REQUIRE_EQUAL(bsl->ssr->right, 3059);
1021  bsl = *bqff[CSeqLocInfo::eFrameMinus2];
1022  BOOST_REQUIRE_EQUAL(bsl->ssr->left, 3034);
1023  BOOST_REQUIRE_EQUAL(bsl->ssr->right, 3058);
1025  bsl = *bqff[CSeqLocInfo::eFrameMinus3];
1026  BOOST_REQUIRE_EQUAL(bsl->ssr->left, 3034);
1027  BOOST_REQUIRE_EQUAL(bsl->ssr->right, 3058);
1028 }
1030 // Inspired by SB-285
1031 BOOST_AUTO_TEST_CASE(BlastnLowerCaseMask_SingleStrand) {
1032  TSeqRange mask(TSeqRange(0, 75));
1034  TMaskedQueryRegions mqr;
1035  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 1945388));
1036  CRef<CSeq_interval> intv(new CSeq_interval(*id,
1037  mask.GetFrom(),
1038  mask.GetTo()));
1039  CRef<CSeqLocInfo> sli(new CSeqLocInfo(intv,
1041  mqr.push_back(sli);
1044  BOOST_REQUIRE(!bqff.Empty());
1045  BOOST_REQUIRE(bqff.QueryHasMultipleFrames());
1046  const set<CSeqLocInfo::ETranslationFrame>& frames = bqff.ListFrames();
1047  const int kExpectedNumFrames = 2;
1048  int frame_ctr = 0;
1049  ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
1050  BOOST_REQUIRE(bqff[*fr] != NULL);
1051  frame_ctr++;
1052  }
1053  BOOST_REQUIRE_EQUAL(kExpectedNumFrames, bqff.GetNumFrames());
1054  BOOST_REQUIRE_EQUAL(1, frame_ctr); // NOTE!!
1055  BOOST_REQUIRE_EQUAL(1, frames.size()); // NOTE!!
1056 }
1058 // Inspired by SB-285
1059 BOOST_AUTO_TEST_CASE(BlastnLowerCaseMask_BothStrands) {
1060  TSeqRange mask(TSeqRange(0, 75));
1062  TMaskedQueryRegions mqr;
1063  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 1945388));
1064  CRef<CSeq_interval> intv(new CSeq_interval(*id,
1065  mask.GetFrom(),
1066  mask.GetTo()));
1067  CRef<CSeqLocInfo> sli(new CSeqLocInfo(intv,
1069  mqr.push_back(sli);
1071  mqr.push_back(sli);
1074  BOOST_REQUIRE(!bqff.Empty());
1075  BOOST_REQUIRE(bqff.QueryHasMultipleFrames());
1076  const set<CSeqLocInfo::ETranslationFrame>& frames = bqff.ListFrames();
1077  const int kExpectedNumFrames = 2;
1078  int frame_ctr = 0;
1079  ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
1080  BOOST_REQUIRE(bqff[*fr] != NULL);
1081  frame_ctr++;
1082  }
1083  BOOST_REQUIRE_EQUAL(kExpectedNumFrames, bqff.GetNumFrames());
1084  BOOST_REQUIRE_EQUAL(kExpectedNumFrames, frame_ctr); // NOTE!!
1085  BOOST_REQUIRE_EQUAL(kExpectedNumFrames, frames.size()); // NOTE!!
1086 }
1088 BOOST_AUTO_TEST_CASE(LowerCaseMask_PlusStrand) {
1089  const bool ignore_strand_in_mask = true;
1091  ignore_strand_in_mask);
1092 }
1094 BOOST_AUTO_TEST_CASE(LowerCaseMask_MinusStrand) {
1095  const bool ignore_strand_in_mask = true;
1097  ignore_strand_in_mask);
1098 }
1100 BOOST_AUTO_TEST_CASE(LowerCaseMask_BothStrands) {
1101  const bool ignore_strand_in_mask = true;
1103  ignore_strand_in_mask);
1104 }
1106 BOOST_AUTO_TEST_CASE(LowerCaseMask_PlusStrand_Explicit) {
1107  const bool ignore_strand_in_mask = false;
1109  ignore_strand_in_mask);
1110 }
1112 BOOST_AUTO_TEST_CASE(LowerCaseMask_MinusStrand_Explicit) {
1113  const bool ignore_strand_in_mask = false;
1115  ignore_strand_in_mask);
1116 }
1118 BOOST_AUTO_TEST_CASE(LowerCaseMask_BothStrands_Explicit) {
1119  const bool ignore_strand_in_mask = false;
1121  ignore_strand_in_mask);
1122 }
1124 BOOST_AUTO_TEST_CASE(CombineRepeatAndLowerCaseMask) {
1125  const int kNumLcaseLocs = 11;
1126  const int kLcaseStarts[kNumLcaseLocs] =
1127  { 0, 78, 217, 380, 694, 1018, 1128, 2817, 3084, 3428, 3782 };
1128  const int kLcaseEnds[kNumLcaseLocs] =
1129  { 75, 208, 316, 685, 1004, 1122, 1298, 2952, 3409, 3733, 3916 };
1131  const int kNumLocs = 6;
1132  const int kStarts[kNumLocs] = { 0, 217, 380, 2817, 3084, 3782 };
1133  const int kEnds[kNumLocs] = { 212, 316, 1298, 2953, 3764, 3916 };
1134  CSeq_id id("gi|1945388");
1135  unique_ptr<SSeqLoc> qsl(
1136  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
1138  // Fill the lower case mask into the SSeqLoc
1139  CSeq_loc* seqloc = new CSeq_loc();
1140  for (int index = 0; index < kNumLcaseLocs; ++index) {
1141  seqloc->SetPacked_int().AddInterval(id, kLcaseStarts[index],
1142  kLcaseEnds[index]);
1143  BOOST_REQUIRE(!seqloc->GetPacked_int().Get().back()->CanGetStrand());
1144  }
1145  qsl->mask.Reset(seqloc);
1147  TSeqLocVector query_v;
1148  query_v.push_back(*qsl);
1149  CBlastNucleotideOptionsHandle nucl_handle;
1150  nucl_handle.SetRepeatFiltering(true);
1151  Blast_FindRepeatFilterLoc(query_v, &nucl_handle);
1153  BOOST_REQUIRE(query_v[0].mask->IsPacked_int());
1155  int loc_index = 0;
1157  BOOST_REQUIRE(query_v[0].mask.NotEmpty());
1159  query_v[0].mask->GetPacked_int().Get()) {
1160  // cerr << (*itr)->GetFrom() << " " << (*itr)->GetTo() << endl;
1161  BOOST_REQUIRE_EQUAL(kStarts[loc_index], (int)(*itr)->GetFrom());
1162  BOOST_REQUIRE_EQUAL(kEnds[loc_index], (int)(*itr)->GetTo());
1163  ++loc_index;
1164  }
1166  BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1167 }
1169 BOOST_AUTO_TEST_CASE(CombineRepeatAndDustFilter) {
1170  const int kNumLocs = 13;
1171  const int kStarts[kNumLocs] =
1172  { 0, 298, 380, 1449, 2851, 3113, 4704, 6364, 6512, 7600,
1173  7766, 8873, 9109};
1174  const int kEnds[kNumLocs] =
1175  { 212, 305, 1297, 1479, 2953, 3764, 4710, 6373, 6573, 7672,
1176  7772, 8880, 9179};
1177  CSeq_id id("gi|1945388");
1178  unique_ptr<SSeqLoc> qsl(
1179  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
1180  TSeqLocVector query_v;
1181  query_v.push_back(*qsl);
1183  CBlastNucleotideOptionsHandle nucl_handle;
1184  nucl_handle.SetRepeatFiltering(true);
1185  nucl_handle.SetDustFiltering(true);
1186  Blast_FindDustFilterLoc(query_v, &nucl_handle);
1187  Blast_FindRepeatFilterLoc(query_v, &nucl_handle);
1189  int loc_index = 0;
1191  BOOST_REQUIRE(query_v[0].mask.NotEmpty());
1193  query_v[0].mask->GetPacked_int().Get()) {
1194  // cerr << (*itr)->GetFrom() << " " << (*itr)->GetTo() << endl;
1195  BOOST_REQUIRE_EQUAL(kStarts[loc_index], (int)(*itr)->GetFrom());
1196  BOOST_REQUIRE_EQUAL(kEnds[loc_index], (int)(*itr)->GetTo());
1197  ++loc_index;
1198  }
1199  BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1200 }
1202 BOOST_AUTO_TEST_CASE(FilterLocNuclBoth) {
1203  const int kNumLocs = 15;
1204  const int kDustStarts[kNumLocs] =
1205  { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1206  6512, 7600, 7766, 8873, 9109};
1207  const int kDustEnds[kNumLocs] =
1208  { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1209  6573, 7672, 7772, 8880 , 9179};
1211  CSeq_id id("gi|1945388");
1212  unique_ptr<SSeqLoc> qsl(
1213  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
1214  TSeqLocVector query_v;
1215  query_v.push_back(*qsl);
1217  CBlastNucleotideOptionsHandle nucl_handle;
1218  nucl_handle.SetDustFiltering(true);
1219  Blast_FindDustFilterLoc(query_v, &nucl_handle);
1221  int loc_index=0;
1222  ITERATE(list< CRef<CSeq_interval> >, itr,
1223  query_v[0].mask->GetPacked_int().Get()) {
1224  BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (int)(*itr)->GetFrom());
1225  BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (int)(*itr)->GetTo());
1226  ++loc_index;
1227  }
1229  BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1230 }
1232 BOOST_AUTO_TEST_CASE(FilterLocNuclPlus) {
1233  const int kNumLocs = 15;
1234  const int kDustStarts[kNumLocs] =
1235  { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1236  6512, 7600, 7766, 8873, 9109};
1237  const int kDustEnds[kNumLocs] =
1238  { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1239  6573, 7672, 7772, 8880 , 9179};
1241  CSeq_id id("gi|1945388");
1242  unique_ptr<SSeqLoc> qsl(
1243  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_plus));
1244  TSeqLocVector query_v;
1245  query_v.push_back(*qsl);
1247  CBlastNucleotideOptionsHandle nucl_handle;
1248  nucl_handle.SetDustFiltering(true);
1249  Blast_FindDustFilterLoc(query_v, &nucl_handle);
1251  int loc_index=0;
1252  ITERATE(list< CRef<CSeq_interval> >, itr,
1253  query_v[0].mask->GetPacked_int().Get()) {
1254  BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (int)(*itr)->GetFrom());
1255  BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (int)(*itr)->GetTo());
1256  ++loc_index;
1257  }
1259  BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1260 }
1262 BOOST_AUTO_TEST_CASE(FilterLocNuclMinus) {
1263  const int kNumLocs = 15;
1264  const int kDustStarts[kNumLocs] =
1265  { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1266  6512, 7600, 7766, 8873, 9109};
1267  const int kDustEnds[kNumLocs] =
1268  { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1269  6573, 7672, 7772, 8880 , 9179};
1271  CSeq_id id("gi|1945388");
1272  unique_ptr<SSeqLoc> qsl(
1273  CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_minus));
1274  TSeqLocVector query_v;
1275  query_v.push_back(*qsl);
1277  CBlastNucleotideOptionsHandle nucl_handle;
1278  nucl_handle.SetDustFiltering(true);
1279  Blast_FindDustFilterLoc(query_v, &nucl_handle);
1280  // CSeq_loc_mapper sorts intervals in reverse order if on minus strand.
1281  bool reverse = IsReverse(query_v[0].mask->GetStrand());
1282  int loc_index = reverse ? kNumLocs - 1 : 0;
1283  ITERATE(list< CRef<CSeq_interval> >, itr,
1284  query_v[0].mask->GetPacked_int().Get()) {
1285  BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (int)(*itr)->GetFrom());
1286  BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (int)(*itr)->GetTo());
1287  reverse ? --loc_index : ++loc_index;
1288  }
1290  // Check that we finished loop on reverse strand is that loc_index is -1.
1291  if ( !reverse ) {
1292  BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1293  }
1294  else {
1295  BOOST_REQUIRE_EQUAL(loc_index, -1);
1296  }
1297 }
1300 BOOST_AUTO_TEST_CASE(FilterLocProtein) {
1301  const int kNumLocs = 3;
1302  const int kSegStarts[kNumLocs] = { 15, 55, 495 };
1303  const int kSegEnds[kNumLocs] = { 27, 68, 513 };
1304  CSeq_id id("gi|3091");
1305  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(id));
1306  TSeqLocVector query_v;
1307  query_v.push_back(*qsl);
1308  CBlastQueryInfo query_info;
1309  CBLAST_SequenceBlk query_blk;
1312  setupQueryStructures(query_v, opts->GetOptions(),
1313  &query_blk, &query_info);
1315  BlastMaskLoc* filter_out = NULL;
1316  Blast_Message *blast_message=NULL;
1317  SBlastFilterOptions* filter_options;
1318  SBlastFilterOptionsNew(&filter_options, eSeg);
1320  Int2 status =
1321  BlastSetUp_GetFilteringLocations(query_blk, query_info,
1322  eBlastTypeBlastp, filter_options,
1323  &filter_out, &blast_message);
1324  filter_options = SBlastFilterOptionsFree(filter_options);
1325  BOOST_REQUIRE(filter_options == NULL);
1326  BOOST_REQUIRE(status == 0);
1328  BlastSeqLoc *filter_slp = filter_out->seqloc_array[0];
1329  Int4 loc_index;
1330  SSeqRange* di;
1331  BlastSeqLoc *loc = NULL;
1332  for (loc_index=0, loc = filter_slp; loc; loc = loc->next, ++loc_index) {
1333  di = loc->ssr;
1334  BOOST_REQUIRE_EQUAL(kSegStarts[loc_index], di->left);
1335  BOOST_REQUIRE_EQUAL(kSegEnds[loc_index], di->right);
1336  }
1338  BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1340  filter_out = BlastMaskLocFree(filter_out);
1341  BOOST_REQUIRE(filter_out == NULL);
1342 }
1344 BOOST_AUTO_TEST_CASE(MaskProteinSequence) {
1345  const int kNumLocs = 3;
1346  const int kSegStarts[kNumLocs] = { 15, 55, 495 };
1347  const int kSegEnds[kNumLocs] = { 27, 68, 513 };
1348  CSeq_id id("gi|3091");
1349  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(id));
1350  TSeqLocVector query_v;
1351  query_v.push_back(*qsl);
1354  CBlastQueryInfo query_info;
1355  CBLAST_SequenceBlk query_blk;
1356  setupQueryStructures(query_v, opts->GetOptions(),
1357  &query_blk, &query_info);
1359  BlastSeqLoc *head = NULL;
1360  BlastSeqLoc *last = NULL;
1361  for (Int4 loc_index=0; loc_index<kNumLocs; ++loc_index) {
1362  if (head == NULL)
1363  last = BlastSeqLocNew(&head, kSegStarts[loc_index],
1364  kSegEnds[loc_index]);
1365  else
1366  last = BlastSeqLocNew(&last, kSegStarts[loc_index],
1367  kSegEnds[loc_index]);
1368  }
1370  BlastMaskLoc* filter_maskloc = BlastMaskLocNew(1);
1371  filter_maskloc->seqloc_array[0] = head;
1373  BlastSetUp_MaskQuery(query_blk, query_info, filter_maskloc,
1375  filter_maskloc = BlastMaskLocFree(filter_maskloc);
1376  BOOST_REQUIRE(filter_maskloc == NULL);
1378  Uint1* buffer = &query_blk->sequence[0];
1379  Int4 query_length = query_info->contexts[0].query_length;
1380  Uint4 hash = 0;
1381  for (int index=0; index<query_length; index++)
1382  {
1383  hash *= 1103515245;
1384  hash += (Uint4)buffer[index] + 12345;
1385  }
1386  BOOST_REQUIRE_EQUAL(-241853716, (int) hash);
1387 }
1389 BOOST_AUTO_TEST_CASE(MaskNucleotideBothStrands) {
1390  const int kNumLocs = 15;
1391  const int kDustStarts[kNumLocs] =
1392  { 298, 875, 1018, 1064, 1448, 3113, 3282, 3428, 3598, 4704, 6364,
1393  6511, 7766, 8873, 9108 };
1394  const int kDustEnds[kNumLocs] =
1395  { 305, 882, 1045, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1396  6573, 7772, 8880, 9179 };
1398  CSeq_id id("gi|1945388");
1399  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(id,
1400  eNa_strand_both));
1401  TSeqLocVector query_v;
1402  query_v.push_back(*qsl);
1405  CBlastQueryInfo query_info;
1406  CBLAST_SequenceBlk query_blk;
1407  setupQueryStructures(query_v, opts->GetOptions(),
1408  &query_blk, &query_info);
1410  BlastSeqLoc *head = NULL;
1411  BlastSeqLoc *last = NULL;
1412  for (Int4 loc_index=0; loc_index<kNumLocs; ++loc_index) {
1413  if (head == NULL)
1414  last = BlastSeqLocNew(&head, kDustStarts[loc_index],
1415  kDustEnds[loc_index]);
1416  else
1417  last = BlastSeqLocNew(&last, kDustStarts[loc_index],
1418  kDustEnds[loc_index]);
1419  }
1421  BlastMaskLoc* filter_maskloc =
1422  BlastMaskLocNew(query_info->last_context+1);
1423  filter_maskloc->seqloc_array[0] = head;
1425  BlastSetUp_MaskQuery(query_blk, query_info, filter_maskloc,
1427  filter_maskloc = BlastMaskLocFree(filter_maskloc);
1428  BOOST_REQUIRE(filter_maskloc == NULL);
1430  Uint1* buffer = &query_blk->sequence[0];
1431  Int4 query_length = query_info->contexts[0].query_length;
1432  Uint4 hash = 0;
1433  for (int index=0; index<query_length; index++)
1434  {
1435  hash *= 1103515245;
1436  hash += (Uint4)buffer[index] + 12345;
1437  }
1438  BOOST_REQUIRE_EQUAL(-1261879517, (int) hash);
1439 }
1441 BOOST_AUTO_TEST_CASE(FilterMultipleQueriesLocNuclPlus) {
1442  const int kNumLocs0 = 15;
1443  const int kNumLocs1 = 80;
1444  const int kNumLocs2 = 1;
1446  int dust_starts0[kNumLocs0] =
1447  { 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
1448  6512, 7600, 7766, 8873, 9109};
1449  int dust_ends0[kNumLocs0] =
1450  { 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
1451  6573, 7672, 7772, 8880 , 9179};
1452  int dust_starts1[kNumLocs1] =
1453  { 189, 862, 1717, 1880, 2301, 2850, 3074, 3301, 4865, 5231, 5397,
1454  5825, 5887, 6560, 6806, 7178, 7709, 8000, 8275, 8441, 9449, 9779,
1455  10297, 10457, 11033, 11242, 12271, 12410, 12727, 13803, 14743, 15052,
1456  15153, 15262, 16201, 16968, 17318, 18470, 20179, 21513, 21569,
1457  22034, 22207, 22657, 22890, 23326, 27984, 28305, 28581, 28960, 29678,
1458  30553, 31195, 32347, 33641, 33785, 34138, 34861, 34872, 35028,
1459  35676, 35727, 36105, 36312, 36841, 38459, 38610, 38997, 39217, 39428,
1460  39629, 42243, 42584, 43157, 43346, 43619, 44040, 44617, 46791, 47213};
1461  int dust_ends1[kNumLocs1] =
1462  { 230, 876, 1741, 1898, 2315, 2868, 3117, 3308, 4886, 5255, 5433, 5860,
1463  5943, 6566, 6857, 7245, 7737, 8014, 8286, 8479, 9496, 9830, 10306,
1464  10581, 11082, 11255, 12277, 12432, 12748, 13809, 14750, 15121, 15171,
1465  15345, 16237, 16992, 17332, 18482, 20185, 21524, 21688, 22072, 22220,
1466  22672, 22898, 23348, 27996, 28311, 28626, 28998, 29690, 30596, 31220,
1467  32359, 33683, 33815, 34203, 34870, 34894, 35039, 35725, 35797, 36114,
1468  36318, 36869, 38497, 38632, 39035, 39223, 39477, 39635, 42249, 42591,
1469  43175, 43410, 43648, 44049, 44630, 46811, 47219};
1470  int dust_starts2[kNumLocs2] = {156};
1471  int dust_ends2[kNumLocs2] = {172};
1473  typedef pair<int*, int*> TStartEndPair;
1474  TStartEndPair pair0(dust_starts0, dust_ends0);
1475  TStartEndPair pair1(dust_starts1, dust_ends1);
1476  TStartEndPair pair2(dust_starts2, dust_ends2);
1478  vector< TStartEndPair > start_end_v;
1479  start_end_v.push_back(pair0);
1480  start_end_v.push_back(pair1);
1481  start_end_v.push_back(pair2);
1483  CSeq_id qid1("gi|1945388");
1484  unique_ptr<SSeqLoc> qsl1(
1485  CTestObjMgr::Instance().CreateSSeqLoc(qid1, eNa_strand_both));
1486  CSeq_id qid2("gi|2655203");
1487  unique_ptr<SSeqLoc> qsl2(
1488  CTestObjMgr::Instance().CreateSSeqLoc(qid2, eNa_strand_both));
1489  CSeq_id qid3("gi|557");
1490  unique_ptr<SSeqLoc> qsl3(
1491  CTestObjMgr::Instance().CreateSSeqLoc(qid3, eNa_strand_both));
1493  TSeqLocVector query_v;
1495  query_v.push_back(*qsl1);
1496  query_v.push_back(*qsl2);
1497  query_v.push_back(*qsl3);
1500  CBlastNucleotideOptionsHandle nucl_handle;
1501  nucl_handle.SetDustFiltering(true);
1502  Blast_FindDustFilterLoc(query_v, &nucl_handle);
1505  int query_number=0;
1506  ITERATE(vector< TStartEndPair >, vec_iter, start_end_v)
1507  {
1508  TStartEndPair local_pair = *vec_iter;
1509  int* start = local_pair.first;
1510  int* stop = local_pair.second;
1511  int loc_index=0;
1512  ITERATE(list< CRef<CSeq_interval> >, itr,
1513  query_v[query_number].mask->GetPacked_int().Get()) {
1514  BOOST_REQUIRE_EQUAL(start[loc_index], (int)(*itr)->GetFrom());
1515  BOOST_REQUIRE_EQUAL(stop[loc_index], (int)(*itr)->GetTo());
1516  ++loc_index;
1517  }
1518  ++query_number;
1519  }
1520 }
1522 BOOST_AUTO_TEST_CASE(MaskRestrictToInterval)
1523 {
1524  const int kNumLocs = 4;
1525  const int kMaskStarts[kNumLocs] = { 10, 20, 30, 40 };
1526  const int kMaskEnds[kNumLocs] = { 15, 25, 35, 45 };
1527  const int kRange[2] = { 12, 22 };
1528  BlastSeqLoc* mask_loc = NULL, *loc_var;
1529  int index;
1531  for (index = 0; index < kNumLocs; ++index) {
1532  BlastSeqLocNew(&mask_loc, kMaskStarts[index], kMaskEnds[index]);
1533  }
1535  // Test that restricting to a full sequence does not change anything;
1536  // this also checks that negative ending offset indicates full
1537  // sequence.
1538  BlastSeqLoc_RestrictToInterval(&mask_loc, 0, -2);
1539  for (index = 0, loc_var = mask_loc; loc_var;
1540  ++index, loc_var = loc_var->next) {
1541  BOOST_REQUIRE_EQUAL(kMaskStarts[index], (int)loc_var->ssr->left);
1542  BOOST_REQUIRE_EQUAL(kMaskEnds[index], (int)loc_var->ssr->right);
1543  }
1544  BOOST_REQUIRE_EQUAL(kNumLocs, index);
1546  BlastSeqLoc_RestrictToInterval(&mask_loc, kRange[0], kRange[1]);
1547  for (index = 0, loc_var = mask_loc; loc_var;
1548  ++index, loc_var = loc_var->next);
1549  BOOST_REQUIRE_EQUAL(2, index);
1550  BOOST_REQUIRE_EQUAL(kMaskEnds[0]-kRange[0], (int)mask_loc->ssr->right);
1551  BOOST_REQUIRE_EQUAL(kMaskStarts[1]-kRange[0],
1552  (int)mask_loc->next->ssr->left);
1553  BOOST_REQUIRE_EQUAL(kRange[1]-kRange[0],
1554  (int)mask_loc->next->ssr->right);
1556  BlastSeqLoc_RestrictToInterval(&mask_loc, kRange[0], kRange[1]);
1558  BOOST_REQUIRE(mask_loc == NULL);
1559 }
1562 {
1563  const int kNumQueries = 3;
1564  const TGi kQueryGis[kNumQueries] = { GI_CONST(215041), GI_CONST(441158), GI_CONST(214981) };
1565  const int kQueryLengths[kNumQueries] = { 1639, 1151, 1164 };
1567  TSeqLocVector query_v;
1569  for (int index = 0; index < kNumQueries; ++index) {
1570  CRef<CSeq_loc> loc(new CSeq_loc());
1571  loc->SetWhole().SetGi(kQueryGis[index]);
1572  CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1573  scope->AddDefaults();
1574  query_v.push_back(SSeqLoc(loc, scope));
1575  }
1579  const CBlastOptions& kOpts = opts->GetOptions();
1581  ENa_strand strand_opt = kOpts.GetStrandOption();
1583  SetupQueryInfo(query_v, prog, strand_opt, &query_info);
1584  for (int i = 0; i < kNumQueries; i++) {
1585  int len = BlastQueryInfoGetQueryLength(query_info,
1586  eBlastTypeBlastx, i);
1587  BOOST_REQUIRE_EQUAL(kQueryLengths[i], len);
1588  }
1589 }
1591 BOOST_AUTO_TEST_CASE(ConvertTranslatedFilterOffsets)
1592 {
1593  const int kNumQueries = 3;
1594  CBlastQueryInfo query_info;
1595  const int kNumContexts = kNumQueries*NUM_FRAMES;
1598  BOOST_REQUIRE_EQUAL(kNumContexts, query_info->last_context + 1);
1600  const SSeqRange kMasks[kNumQueries] =
1601  { { 660, 686 }, { 92, 119 }, { 1156, 1163 } };
1603  CBlastMaskLoc mask_loc(BlastMaskLocNew(kNumContexts));
1604  BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->total_size);
1606  for (int index = 0; index < kNumQueries; index++) {
1607  BlastSeqLoc* seqloc = mask_loc->seqloc_array[index*NUM_FRAMES] =
1608  (BlastSeqLoc*) calloc(1, sizeof(BlastSeqLoc));
1609  seqloc->ssr = (SSeqRange*) malloc(sizeof(SSeqRange));
1610  seqloc->ssr->left = kMasks[index].left;
1611  seqloc->ssr->right = kMasks[index].right;
1612  }
1614  BlastMaskLocDNAToProtein(mask_loc, query_info);
1616  BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->total_size);
1618  const int kProtStarts[kNumContexts] =
1619  { 220, 219, 219, 317, 317, 316, 30, 30, 30, 343, 343, 343, 385, 385,
1620  384, 0, 0, 0 };
1621  const int kProtEnds[kNumContexts] =
1622  { 228, 228, 228, 326, 325, 325, 39, 39, 39, 352, 352, 352, 387, 386,
1623  386, 2, 2, 1 };
1625  for (int index = 0; index < kNumContexts; ++index) {
1626  {{
1627  CNcbiOstrstream os;
1628  os << "Context " << index << " has no mask!";
1629  BOOST_REQUIRE_MESSAGE(mask_loc->seqloc_array[index],
1630  (string)CNcbiOstrstreamToString(os));
1631  }}
1632  const SSeqRange* range = mask_loc->seqloc_array[index]->ssr;
1633  CNcbiOstrstream os;
1634  os << "Context " << index;
1635  BOOST_REQUIRE_MESSAGE(kProtStarts[index] == range->left,
1636  (string)CNcbiOstrstreamToString(os));
1637  BOOST_REQUIRE_MESSAGE(kProtEnds[index] == range->right,
1638  (string)CNcbiOstrstreamToString(os));
1639  }
1641  BlastMaskLocProteinToDNA(mask_loc, query_info);
1643  BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->total_size);
1644  const int kNuclStarts[kNumContexts] =
1645  { 660, 658, 659, 661, 663, 662, 90, 91, 92, 95, 94, 93, 1155, 1156,
1646  1154, 1158, 1157, 1159 };
1647  const int kNuclEnds[kNumContexts] =
1648  { 684, 685, 686, 687, 686, 688, 117, 118, 119, 121, 120, 119, 1161,
1649  1159, 1160, 1163, 1162, 1161 };
1651  for (int index = 0; index < kNumContexts; ++index) {
1652  {{
1653  CNcbiOstrstream os;
1654  os << "Context " << index << " has no mask!";
1655  BOOST_REQUIRE_MESSAGE(mask_loc->seqloc_array[index],
1656  (string)CNcbiOstrstreamToString(os));
1657  }}
1658  const SSeqRange* range = mask_loc->seqloc_array[index]->ssr;
1659  CNcbiOstrstream os;
1660  os << "Context " << index;
1661  BOOST_REQUIRE_MESSAGE(kNuclStarts[index] == range->left,
1662  (string)CNcbiOstrstreamToString(os));
1663  BOOST_REQUIRE_MESSAGE(kNuclEnds[index] == range->right,
1664  (string)CNcbiOstrstreamToString(os));
1665  }
1667 }
1669 BOOST_AUTO_TEST_CASE(FilterOptionsToStringFromNULL)
1670 {
1672  BOOST_REQUIRE(strcmp(retval.get(), "F") == 0);
1673 }
1675 BOOST_AUTO_TEST_CASE(FilterOptionsToStringFromMaskAtHashOnly)
1676 {
1677  SBlastFilterOptions filtering_options = { '\0' };
1678  filtering_options.mask_at_hash = true;
1679  TAutoCharPtr retval = BlastFilteringOptionsToString(&filtering_options);
1680  BOOST_REQUIRE(strcmp(retval.get(), "m;") == 0);
1681 }
1683 BOOST_AUTO_TEST_CASE(FilterOptionsToStringLargeData)
1684 {
1685  SBlastFilterOptions filtering_options = { '\0' };
1686  SDustOptionsNew(&filtering_options.dustOptions);
1687  filtering_options.dustOptions->window *= 2;
1689  string(4096, 'X').c_str());
1691  TAutoCharPtr retval = BlastFilteringOptionsToString(&filtering_options);
1692  SDustOptionsFree(filtering_options.dustOptions);
1693  SRepeatFilterOptionsFree(filtering_options.repeatFilterOptions);
1694  //cerr << "FilterStr ='" << retval.get() << "'" << endl;
1695  BOOST_REQUIRE(NStr::StartsWith(string(retval.get()),
1696  "D 20 128 1;R -d XXXXXXXXXXXXXXXXXXXX"));
1697 }
1699 BOOST_AUTO_TEST_CASE(FilterOptionsFromNULLString)
1700 {
1701  const EBlastProgramType kProgram = eBlastTypeBlastn;
1702  SBlastFilterOptions* filtering_options;
1703  Int2 status = BlastFilteringOptionsFromString(kProgram, NULL,
1704  &filtering_options, NULL);
1705  BOOST_REQUIRE(status == 0);
1706  BOOST_REQUIRE(filtering_options != NULL);
1707  BOOST_REQUIRE_EQUAL(false, !!filtering_options->mask_at_hash);
1708  BOOST_REQUIRE(filtering_options->segOptions == NULL);
1709  BOOST_REQUIRE(filtering_options->dustOptions == NULL);
1710  filtering_options = SBlastFilterOptionsFree(filtering_options);
1711  BOOST_REQUIRE(filtering_options == NULL);
1712 }
1714 BOOST_AUTO_TEST_CASE(FilterOptionsFromStringDustMaskAtHash)
1715 {
1716  const EBlastProgramType kProgram = eBlastTypeBlastn;
1717  SBlastFilterOptions* filtering_options;
1718  Int2 status = BlastFilteringOptionsFromString(kProgram, (char*) "m D",
1719  &filtering_options, NULL);
1720  BOOST_REQUIRE(status == 0);
1721  BOOST_REQUIRE_EQUAL(true, !!filtering_options->mask_at_hash);
1722  BOOST_REQUIRE(filtering_options->dustOptions);
1723  BOOST_REQUIRE(filtering_options->segOptions == NULL);
1725  TAutoCharPtr retval = BlastFilteringOptionsToString(filtering_options);
1726  BOOST_REQUIRE_EQUAL(string("L;m;"), string(retval.get()));
1728  filtering_options = SBlastFilterOptionsFree(filtering_options);
1729  BOOST_REQUIRE(filtering_options == NULL);
1730 }
1732 BOOST_AUTO_TEST_CASE(FilterOptionsFromStringDust)
1733 {
1734  const EBlastProgramType kProgram = eBlastTypeBlastn;
1735  SBlastFilterOptions* filtering_options;
1736  Int2 status = BlastFilteringOptionsFromString(kProgram, (char*) "D",
1737  &filtering_options, NULL);
1738  BOOST_REQUIRE(status == 0);
1739  BOOST_REQUIRE_EQUAL(false, !!filtering_options->mask_at_hash);
1740  BOOST_REQUIRE(filtering_options->dustOptions);
1741  BOOST_REQUIRE(filtering_options->segOptions == NULL);
1743  TAutoCharPtr retval = BlastFilteringOptionsToString(filtering_options);
1744  BOOST_REQUIRE(strcmp(retval.get(), "L;") == 0);
1746  filtering_options = SBlastFilterOptionsFree(filtering_options);
1747  BOOST_REQUIRE(filtering_options == NULL);
1748 }
1750 BOOST_AUTO_TEST_CASE(FilterOptionsFromStringSEGWithParams)
1751 {
1752  const EBlastProgramType kProgram = eBlastTypeBlastp;
1753  SBlastFilterOptions* filtering_options;
1754  Int2 status = BlastFilteringOptionsFromString(kProgram, (char*) "S 10 1.0 1.5", &filtering_options, NULL);
1755  BOOST_REQUIRE(status == 0);
1756  BOOST_REQUIRE_EQUAL(false, !!filtering_options->mask_at_hash);
1757  BOOST_REQUIRE(filtering_options->dustOptions == NULL);
1758  BOOST_REQUIRE(filtering_options->segOptions);
1759  BOOST_REQUIRE_EQUAL(10, filtering_options->segOptions->window);
1760  BOOST_REQUIRE_CLOSE(1.0, filtering_options->segOptions->locut, 0.01);
1761  BOOST_REQUIRE_CLOSE(1.5, filtering_options->segOptions->hicut, 0.01);
1763  TAutoCharPtr retval = BlastFilteringOptionsToString(filtering_options);
1764  BOOST_REQUIRE(strcmp(retval.get(), "S 10 1.0 1.5;") == 0);
1766  filtering_options = SBlastFilterOptionsFree(filtering_options);
1767  BOOST_REQUIRE(filtering_options == NULL);
1768 }
1770 BOOST_AUTO_TEST_CASE(FilterOptionsFromBadStringSEGWithParams)
1771 {
1772  const EBlastProgramType kProgram = eBlastTypeBlastp;
1773  SBlastFilterOptions* filtering_options;
1774  // Only three numbers are allowed.
1775  Int2 status = BlastFilteringOptionsFromString(kProgram, (char*) "S 10 1.0 1.5 1.0", &filtering_options, NULL);
1776  BOOST_REQUIRE_EQUAL(1, (int) status);
1777  BOOST_REQUIRE(filtering_options == NULL);
1778 }
1780 BOOST_AUTO_TEST_CASE(FilterOptionsFromStringBlastnL)
1781 {
1782  const EBlastProgramType kProgram = eBlastTypeBlastn;
1783  SBlastFilterOptions* filtering_options;
1784  Int2 status = BlastFilteringOptionsFromString(kProgram, (char*) "L", &filtering_options, NULL);
1785  BOOST_REQUIRE(status == 0);
1786  BOOST_REQUIRE_EQUAL(false, !!filtering_options->mask_at_hash);
1787  BOOST_REQUIRE(filtering_options->dustOptions);
1788  BOOST_REQUIRE(filtering_options->segOptions == NULL);
1790  TAutoCharPtr retval = BlastFilteringOptionsToString(filtering_options);
1791  BOOST_REQUIRE(strcmp(retval.get(), "L;") == 0);
1793  filtering_options = SBlastFilterOptionsFree(filtering_options);
1794  BOOST_REQUIRE(filtering_options == NULL);
1795 }
1796 BOOST_AUTO_TEST_CASE(FilterOptionsFromStringBlastpL)
1797 {
1798  const EBlastProgramType kProgram = eBlastTypeBlastp;
1799  SBlastFilterOptions* filtering_options;
1800  Int2 status = BlastFilteringOptionsFromString(kProgram, (char*) "L", &filtering_options, NULL);
1801  BOOST_REQUIRE(status == 0);
1802  BOOST_REQUIRE_EQUAL(false, !!filtering_options->mask_at_hash);
1803  BOOST_REQUIRE(filtering_options->dustOptions == NULL);
1804  BOOST_REQUIRE(filtering_options->segOptions);
1806  TAutoCharPtr retval = BlastFilteringOptionsToString(filtering_options);
1807  BOOST_REQUIRE(strcmp(retval.get(), "L;") == 0);
1809  filtering_options = SBlastFilterOptionsFree(filtering_options);
1810  BOOST_REQUIRE(filtering_options == NULL);
1811 }
1812 BOOST_AUTO_TEST_CASE(FilterOptionsFromStringBlastnW)
1813 {
1814  const EBlastProgramType kProgram = eBlastTypeBlastn;
1815  SBlastFilterOptions* filtering_options = NULL;
1816  Int2 status = BlastFilteringOptionsFromString(kProgram, (char*) "W -t 9606", &filtering_options, NULL);
1817  BOOST_REQUIRE(status == 0);
1818  BOOST_REQUIRE(! filtering_options->mask_at_hash);
1819  BOOST_REQUIRE(! filtering_options->dustOptions);
1820  BOOST_REQUIRE(! filtering_options->segOptions);
1821  BOOST_REQUIRE(! filtering_options->repeatFilterOptions);
1822  BOOST_REQUIRE(filtering_options->windowMaskerOptions);
1824  TAutoCharPtr retval = BlastFilteringOptionsToString(filtering_options);
1825  BOOST_REQUIRE(strcmp(retval.get(), "W -t 9606;") == 0);
1827  filtering_options = SBlastFilterOptionsFree(filtering_options);
1828  BOOST_REQUIRE(filtering_options == NULL);
1829 }
1832 {
1833  const int kNewLevel = 21;
1834  const int kNewWindow = 68;
1836  SBlastFilterOptions* opt1 = NULL;
1837  SBlastFilterOptionsNew(&opt1, eDust);
1838  opt1->dustOptions->level = kNewLevel;
1839  opt1->dustOptions->window = kNewWindow;
1841  SBlastFilterOptions* opt2 = NULL;
1843  opt2->mask_at_hash = true;
1847  Int2 status = SBlastFilterOptionsMerge(&result, opt1, opt2);
1848  BOOST_REQUIRE_EQUAL(0, (int) status);
1849  BOOST_REQUIRE(result);
1850  BOOST_REQUIRE_EQUAL(true, !!result->mask_at_hash);
1851  BOOST_REQUIRE_EQUAL(kNewLevel, result->dustOptions->level);
1852  BOOST_REQUIRE_EQUAL(kNewWindow, result->dustOptions->window);
1853  BOOST_REQUIRE(result->repeatFilterOptions);
1855  BOOST_REQUIRE(result == NULL);
1857  status = SBlastFilterOptionsMerge(&result, opt1, NULL);
1858  BOOST_REQUIRE_EQUAL(0, (int) status);
1859  BOOST_REQUIRE(result);
1860  BOOST_REQUIRE_EQUAL(kNewLevel, result->dustOptions->level);
1861  BOOST_REQUIRE_EQUAL(kNewWindow, result->dustOptions->window);
1863  BOOST_REQUIRE(result == NULL);
1865  status = SBlastFilterOptionsMerge(&result, NULL, opt2);
1866  BOOST_REQUIRE_EQUAL(0, (int) status);
1867  BOOST_REQUIRE(result);
1868  BOOST_REQUIRE_EQUAL(true, !!result->mask_at_hash);
1869  BOOST_REQUIRE(result->repeatFilterOptions);
1871  BOOST_REQUIRE(result == NULL);
1875 }
1877 BOOST_AUTO_TEST_CASE(FilterStringFalse)
1878 {
1879  CBlastNucleotideOptionsHandle nucl_handle;
1880  nucl_handle.SetFilterString("F");/* NCBI_FAKE_WARNING */
1881  BOOST_REQUIRE_EQUAL(false, nucl_handle.GetMaskAtHash());
1882  BOOST_REQUIRE_EQUAL(false, nucl_handle.GetDustFiltering());
1883  BOOST_REQUIRE_EQUAL(0, nucl_handle.GetWindowMaskerTaxId());
1884  BOOST_REQUIRE(nucl_handle.GetWindowMaskerDatabase() == NULL);
1885 }
1887 BOOST_AUTO_TEST_CASE(MergeOptionHandle) {
1889  CBlastNucleotideOptionsHandle nucl_handle;
1890  nucl_handle.SetFilterString("R -d repeat/repeat_9606");/* NCBI_FAKE_WARNING */
1891  nucl_handle.SetMaskAtHash(true);
1892  nucl_handle.SetDustFiltering(true);
1893  BOOST_REQUIRE_EQUAL(true, nucl_handle.GetMaskAtHash());
1894  BOOST_REQUIRE_EQUAL(true, nucl_handle.GetDustFiltering());
1895 }
1897 BOOST_AUTO_TEST_CASE(OptionsHandleNotClear) {
1898  CBlastNucleotideOptionsHandle nucl_handle;
1899  nucl_handle.SetFilterString("R -d repeat/repeat_9606", false);/* NCBI_FAKE_WARNING */
1900  BOOST_REQUIRE_EQUAL(true, nucl_handle.GetDustFiltering());
1901  BOOST_REQUIRE_EQUAL(true, nucl_handle.GetRepeatFiltering());
1902 }
1904 BOOST_AUTO_TEST_CASE(OptionsHandleClear) {
1905  CBlastNucleotideOptionsHandle nucl_handle;
1906  nucl_handle.SetFilterString("R -d repeat/repeat_9606");/* NCBI_FAKE_WARNING */
1907  BOOST_REQUIRE_EQUAL(false, nucl_handle.GetDustFiltering());
1908  BOOST_REQUIRE_EQUAL(true, nucl_handle.GetRepeatFiltering());
1909  BOOST_REQUIRE_EQUAL(0, nucl_handle.GetWindowMaskerTaxId());
1910  BOOST_REQUIRE(nucl_handle.GetWindowMaskerDatabase() == NULL);
1911 }
1913 BOOST_AUTO_TEST_CASE(GetSeqLocInfoVector_EmptyQueryIdVector) {
1915  CPacked_seqint empty_seqids;
1916  TSeqLocInfoVector mask_v;
1918  Blast_GetSeqLocInfoVector(eBlastTypeBlastp, empty_seqids, mask, mask_v),
1919  CBlastException);
1920 }
1922 // Check that the conversion function will now create a vector of empty
1923 // mask lists.
1924 BOOST_AUTO_TEST_CASE(GetSeqLocInfoVector_EmptyMasks) {
1925  const EBlastProgramType kProgram = eBlastTypeBlastn;
1926  const size_t kNumSeqs = 10;
1928  (BlastMaskLocNew(kNumSeqs*GetNumberOfContexts(kProgram)));
1930  // since the masks won't have any data in them, we don't care about the
1931  // Seq-id's passed in
1932  const CPacked_seqint::TRanges ranges(kNumSeqs, TSeqRange(0, 100000));
1933  CSeq_id seqid(CSeq_id::e_Gi, 555);
1934  CPacked_seqint seqintervals(seqid, ranges);
1936  TSeqLocInfoVector mask_v;
1938  Blast_GetSeqLocInfoVector(kProgram, seqintervals, mask, mask_v);
1940  BOOST_REQUIRE_EQUAL((size_t)kNumSeqs, (size_t)mask_v.size());
1941  ITERATE(TSeqLocInfoVector, query_masks_list, mask_v) {
1942  BOOST_REQUIRE_EQUAL((size_t)0U, query_masks_list->size());
1943  }
1944 }
1946 BOOST_AUTO_TEST_CASE(BlastSeqLocCombineTest) {
1947  const int kNumberLocIn = 7;
1948  const int kLocStartIn[kNumberLocIn] =
1949  { 281312, 281356, 281416, 281454, 281895, 282435, 282999};
1950  const int kLocEndIn[kNumberLocIn] =
1951  { 281736, 281406, 281446, 281878, 282423, 282968, 283191};
1953  const int kNumberLocOut = 4;
1954  const int kLocStartOut[kNumberLocOut] =
1955  { 281312, 281895, 282435, 282999};
1956  const int kLocEndOut[kNumberLocOut] =
1957  { 281878, 282423, 282968, 283191};
1959  BlastSeqLoc *head = NULL;
1960  for (int index=0; index<kNumberLocIn; index++)
1961  {
1962  BlastSeqLocNew(&head, kLocStartIn[index],
1963  kLocEndIn[index]);
1964  }
1966  BlastSeqLocCombine(&head, 0);
1967  BlastSeqLoc* result = head;
1968  head = NULL;
1970  int count = 0;
1971  BlastSeqLoc* var = result;
1972  while (var)
1973  {
1974  var = var->next;
1975  count++;
1976  }
1977  BOOST_REQUIRE_EQUAL(count, kNumberLocOut);
1979  var = result;
1980  count = 0;
1981  while (var)
1982  {
1983  SSeqRange* ssr = var->ssr;
1984  BOOST_REQUIRE_EQUAL(ssr->left, kLocStartOut[count]);
1985  BOOST_REQUIRE_EQUAL(ssr->right, kLocEndOut[count]);
1986  var = var->next;
1987  count++;
1988  }
1991  BOOST_REQUIRE(result == NULL);
1992 }
1994 BOOST_AUTO_TEST_CASE(GetSeqLocInfoVector_AllPrograms) {
1995  vector<EBlastProgramType> programs =
1998  // Generate the different number of sequences to pass to test function
1999  CRandom random_gen((CRandom::TValue)time(0));
2000  vector<int> num_seqs_array;
2001  num_seqs_array.reserve(3);
2002  num_seqs_array.push_back(random_gen.GetRand(1,10));
2003  num_seqs_array.push_back(random_gen.GetRand(1,10));
2004  num_seqs_array.push_back(random_gen.GetRand(1,10));
2006  ITERATE(vector<EBlastProgramType>, program, programs) {
2007  ITERATE(vector<int>, num_seqs, num_seqs_array) {
2008  x_TestGetSeqLocInfoVector(*program, *num_seqs);
2009  }
2010  }
2012 }
2015  /// Test the dust filtering API on a mixed Seqloc input.
2016  BOOST_AUTO_TEST_CASE(DustSeqlocMix) {
2017  const int kNumInts = 20;
2018  const int kStarts[kNumInts] =
2019  { 838, 1838, 6542, 7459, 9246, 10431, 14807, 16336, 19563,
2020  20606, 21232, 22615, 23822, 27941, 29597, 30136, 31287,
2021  31786, 33315, 35402 };
2022  const int kEnds[kNumInts] =
2023  { 961, 2010, 6740, 7573, 9408, 10609, 15043, 16511, 19783,
2024  20748, 21365, 22817, 24049, 28171, 29839, 30348, 31362,
2025  31911, 33485, 37952 };
2026 #if 0 // These are the locations produced directly by CSymDustMasker
2027  const int kNumMaskLocs = 7;
2028  const int kMaskStarts[kNumMaskLocs] =
2029  { 2607, 3000, 3739, 4238, 5211, 5602, 5716 };
2030  const int kMaskStops[kNumMaskLocs] =
2031  { 2769, 3006, 3809, 4244, 5218, 5608, 5722 };
2032 #else // These are locations that have been mapped to the full sequence scale
2033  const int kNumMaskLocs = 8;
2034  const int kMaskStarts[kNumMaskLocs] =
2035  { 29678, 30136, 31305, 35786, 36285, 37258, 37649, 37763 };
2036  const int kMaskStops[kNumMaskLocs] =
2037  { 29839, 30136, 31311, 35856, 36291, 37265, 37655, 37769 };
2038 #endif
2040  int index;
2042  CSeq_id qid("gi|3417288");
2043  CRef<CSeq_loc> qloc(new CSeq_loc());
2044  for (index = 0; index < kNumInts; ++index) {
2045  CRef<CSeq_loc> next_loc(new CSeq_loc());
2046  next_loc->SetInt().SetFrom(kStarts[index]);
2047  next_loc->SetInt().SetTo(kEnds[index]);
2048  next_loc->SetInt().SetId(qid);
2049  qloc->SetMix().Set().push_back(next_loc);
2050  }
2052  CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
2053  scope->AddDefaults();
2055  unique_ptr<SSeqLoc> query(new SSeqLoc(qloc, scope));
2056  TSeqLocVector query_v;
2057  query_v.push_back(*query);
2059  CBlastNucleotideOptionsHandle nucl_handle;
2060  nucl_handle.SetDustFiltering(true);
2061  Blast_FindDustFilterLoc(query_v, &nucl_handle);
2063  int loc_index = 0;
2064  ITERATE(list< CRef<CSeq_interval> >, itr,
2065  query_v[0].mask->GetPacked_int().Get()) {
2066  BOOST_REQUIRE_EQUAL(kMaskStarts[loc_index],
2067  (int) (*itr)->GetFrom());
2068  BOOST_REQUIRE_EQUAL(kMaskStops[loc_index],
2069  (int) (*itr)->GetTo());
2070  ++loc_index;
2071  }
2072  BOOST_REQUIRE_EQUAL(kNumMaskLocs, loc_index);
2073  }
2074 #endif
2076 BOOST_AUTO_TEST_CASE(TestBlastSeqLocCombine_MergeElems)
2077 {
2078  TRangeVector rv;
2079  rv.push_back(TRangeVector::value_type(10, 77));
2080  rv.push_back(TRangeVector::value_type(0, 100));
2081  rv.push_back(TRangeVector::value_type(20, 45));
2082  rv.push_back(TRangeVector::value_type(3, 50));
2083  rv.push_back(TRangeVector::value_type(10, 77));
2086  BlastSeqLocCombine(&mask, 0);
2087  TRangeVector merged_rv;
2088  merged_rv.push_back(TRangeVector::value_type(0, 100));
2090  BlastSeqLoc* mask_itr = mask;
2091  ITERATE(TRangeVector, itr, merged_rv) {
2092  BOOST_REQUIRE(mask_itr != NULL);
2093  BOOST_REQUIRE_EQUAL((int)itr->GetFrom(), (int)mask_itr->ssr->left);
2094  BOOST_REQUIRE_EQUAL((int)itr->GetTo(), (int)mask_itr->ssr->right);
2095  mask_itr = mask_itr->next;
2096  }
2097  BOOST_REQUIRE(mask_itr == NULL);
2100  BOOST_REQUIRE(mask == NULL);
2101 }
2103 BOOST_AUTO_TEST_CASE(TestBlastSeqLocCombine_MergeIdenticals)
2104 {
2105  TRangeVector rv;
2106  rv.push_back(TRangeVector::value_type(380, 684));
2107  rv.push_back(TRangeVector::value_type(0, 74));
2108  rv.push_back(TRangeVector::value_type(78, 207));
2109  rv.push_back(TRangeVector::value_type(695, 776));
2110  rv.push_back(TRangeVector::value_type(380, 684));
2111  rv.push_back(TRangeVector::value_type(78, 212));
2114  BlastSeqLocCombine(&mask, 0);
2115  TRangeVector merged_rv;
2116  merged_rv.push_back(TRangeVector::value_type(0, 74));
2117  merged_rv.push_back(TRangeVector::value_type(78, 212));
2118  merged_rv.push_back(TRangeVector::value_type(380, 684));
2119  merged_rv.push_back(TRangeVector::value_type(695, 776));
2121  BlastSeqLoc* mask_itr = mask;
2122  ITERATE(TRangeVector, itr, merged_rv) {
2123  BOOST_REQUIRE(mask_itr != NULL);
2124  BOOST_REQUIRE_EQUAL((int)itr->GetFrom(), (int)mask_itr->ssr->left);
2125  BOOST_REQUIRE_EQUAL((int)itr->GetTo(), (int)mask_itr->ssr->right);
2126  mask_itr = mask_itr->next;
2127  }
2128  BOOST_REQUIRE(mask_itr == NULL);
2131  BOOST_REQUIRE(mask == NULL);
2132 }
2134 BOOST_AUTO_TEST_CASE(TestBlastSeqLocCombine_NoMerging)
2135 {
2136  TRangeVector rv;
2137  rv.push_back(TRangeVector::value_type(10, 77));
2138  rv.push_back(TRangeVector::value_type(250, 3400));
2139  rv.push_back(TRangeVector::value_type(3, 8));
2142  BlastSeqLocCombine(&mask, 0);
2143  TRangeVector merged_rv;
2144  merged_rv.push_back(TRangeVector::value_type(3, 8));
2145  merged_rv.push_back(TRangeVector::value_type(10, 77));
2146  merged_rv.push_back(TRangeVector::value_type(250, 3400));
2148  BlastSeqLoc* mask_itr = mask;
2149  ITERATE(TRangeVector, itr, merged_rv) {
2150  BOOST_REQUIRE(mask_itr != NULL);
2151  BOOST_REQUIRE_EQUAL((int)itr->GetFrom(), (int)mask_itr->ssr->left);
2152  BOOST_REQUIRE_EQUAL((int)itr->GetTo(), (int)mask_itr->ssr->right);
2153  mask_itr = mask_itr->next;
2154  }
2155  BOOST_REQUIRE(mask_itr == NULL);
2158  BOOST_REQUIRE(mask == NULL);
2159 }
2161 extern "C" void BlastSeqLocListReverse(BlastSeqLoc** head);
2163 BOOST_AUTO_TEST_CASE(TestBlastSeqLocListReverse)
2164 {
2165  TRangeVector rv;
2166  rv.push_back(TRangeVector::value_type(10, 77));
2167  rv.push_back(TRangeVector::value_type(0, 100));
2168  rv.push_back(TRangeVector::value_type(3, 50));
2172  reverse(rv.begin(), rv.end());
2174  BlastSeqLoc* mask_itr = mask;
2175  ITERATE(TRangeVector, itr, rv) {
2176  BOOST_REQUIRE(mask_itr != NULL);
2177  BOOST_REQUIRE_EQUAL((int)itr->GetFrom(), (int)mask_itr->ssr->left);
2178  BOOST_REQUIRE_EQUAL((int)itr->GetTo(), (int)mask_itr->ssr->right);
2179  mask_itr = mask_itr->next;
2180  }
2181  BOOST_REQUIRE(mask_itr == NULL);
2184  BOOST_REQUIRE(mask == NULL);
2185 }
2187 BOOST_AUTO_TEST_CASE(TestGetTaxIdWithWindowMaskerSupport)
2188 {
2189  set<int> taxids;
2191  BOOST_REQUIRE(taxids.empty() == false);
2192  BOOST_REQUIRE(taxids.find(9606) != taxids.end());
2193 }
