57 const string & summary_name,
58 GA::EGapType eGapType,
59 const GA::TVectorGapLengthSummary & summary )
61 cerr <<
"BEGIN " << GA::s_GapTypeToStr(eGapType)
62 <<
" SUMMARY for " << summary_name << endl;
63 ITERATE( GA::TVectorGapLengthSummary, summary_iter,
66 const GA::SOneGapLengthSummary & one_summary =
68 cerr <<
"\tlen: " << one_summary.gap_length
69 <<
"\tnum seqs: " << one_summary.num_seqs
70 <<
"\tnum gaps: " << one_summary.num_gaps << endl;
72 cerr <<
"END GAP SUMMARY" << endl;
75 void s_PrintSummaryForAllGapTypes(
76 const string & summary_name,
80 cout <<
"GAP SUMMARY FOR ALL GAP_TYPES" << endl;
82 GA::EGapType gap_types[] = {
84 GA::eGapType_UnknownBases
87 GA::EGapType eGapType = gap_types[gap_idx];
108 s_scope->AddDefaults();
112 void LoadSeqEntryIntoGapAnalysis(
115 GA::TAddFlag add_flags,
116 GA::TFlag fFlags = 0,
118 bool do_rev_comp =
false)
123 : CreateScope().GetPointer() );
129 if( entry->IsSeq() ) {
131 entry->SetSeq().SetInst(), scope.GetPointer());
137 scope->AddTopLevelSeqEntry(*pSeqEntry);
138 BOOST_REQUIRE( entry_h );
150 GA::TAddFlag add_flags = GA::fAddFlag_All,
151 GA::EGapType gap_type = GA::eGapType_All,
152 GA::TFlag fFlags = 0)
154 in_text_asn.seekg(0);
160 LoadSeqEntryIntoGapAnalysis(
161 gap_analysis, pSeqEntry, add_flags, fFlags);
166 struct SExpectedResult {
171 bool empty(
void)
const {
173 return 0 == (
gap_length | num_seqs | num_gaps); }
176 static const SExpectedResult s_ExpectedResultEnd = {0, 0, 0};
179 ostream& s,
const SExpectedResult & expected_result)
181 s <<
"SExpectedResult ("
182 <<
"gap_length: " << expected_result.gap_length
183 <<
", num_seqs: " << expected_result.num_seqs
184 <<
", num_gaps: " << expected_result.num_gaps <<
")";
189 ostream& s,
const SExpectedResult expected_results[] )
191 s <<
"The expected results: (" << endl;
193 for(
size_t idx = 0; ! expected_results[idx].empty(); ++idx ) {
194 const SExpectedResult & one_expected_result =
195 expected_results[idx];
196 s << one_expected_result << endl;
203 void CheckExpectedResults(
204 const GA::TVectorGapLengthSummary & basic_gap_summary,
207 const SExpectedResult expected_results[])
209 cout <<
"CheckExpectedResults basic_gap_summary: "
210 << basic_gap_summary << endl;
213 for( ; idx < basic_gap_summary.size(); ++idx) {
214 const GA::SOneGapLengthSummary & one_gap_summary =
215 *basic_gap_summary[idx];
216 const SExpectedResult & one_expected_result =
217 expected_results[idx];
219 one_gap_summary.gap_length, one_expected_result.gap_length);
221 one_gap_summary.num_seqs, one_expected_result.num_seqs);
223 one_gap_summary.num_gaps, one_expected_result.num_gaps);
225 BOOST_CHECK( expected_results[idx].
empty() );
229 struct SGapTypeExpectedResult {
230 const GA::EGapType gap_type;
233 const SExpectedResult expected_result[5];
236 void CheckGapTypeExpectedResult(
238 const SGapTypeExpectedResult & gap_type_expected_result,
239 const GA::TVectorGapLengthSummary & basic_gap_summary)
241 cout <<
"In " <<
test_name <<
" running expected results for "
242 << GA::s_GapTypeToStr(gap_type_expected_result.gap_type) <<
": "
243 << gap_type_expected_result.expected_result << endl;
245 CheckExpectedResults(
247 gap_type_expected_result.expected_result);
256 arg_desc->AddKey(
"basic-data",
"InputFile",
257 "This is the basic input file used to run the test",
260 "in-letter-gap-data",
"InputFile",
261 "This is the input file used to run the "
262 "'gaps as run of unknown bases' test",
265 "mixed-gap-type-data",
"InputFile",
266 "This is the input file used to run the "
267 "'gaps as run of unknown bases and seq-gaps' test, distinguishing "
277 CNcbiIfstream basic_data_fstrm(args[
"basic-data"].AsString().c_str());
279 AnalyzeSeqEntryTextAsn(
280 basic_data_fstrm, GA::fAddFlag_All, GA::eGapType_All,
281 GA::fFlag_IncludeEndGaps);
283 "TestBasic - basic-data", GA::eGapType_All, *basic_gap_summary);
285 SExpectedResult expected_results[] = {
292 CheckExpectedResults(*basic_gap_summary, expected_results);
303 const GA::TGapLength kGapLength = 10;
304 const TSeqPos kBioseqlen = 100;
308 GA::eGapType_All, pSeqId1, kGapLength, kBioseqlen, 2, 12);
310 GA::eGapType_All, pSeqId2, kGapLength, kBioseqlen, 20, 30);
312 GA::eGapType_All, pSeqId3, kGapLength, kBioseqlen, 40, 50);
316 kGapLength)->second.size(),
324 CNcbiIfstream gap_data_strm(args[
"in-letter-gap-data"].AsString().c_str());
326 AnalyzeSeqEntryTextAsn(
327 gap_data_strm, GA::fAddFlag_IncludeUnknownBases,
328 GA::eGapType_All, GA::fFlag_IncludeEndGaps);
330 "TestGapsAsLetters - in-letter-gap-data",
331 GA::eGapType_All, *basic_gap_summary);
333 SExpectedResult expected_results[] = {
339 CheckExpectedResults(*basic_gap_summary, expected_results);
347 args[
"mixed-gap-type-data"].AsString().c_str());
352 LoadSeqEntryIntoGapAnalysis(
353 gap_analysis, pSeqEntry, GA::fAddFlag_All,
354 GA::fFlag_IncludeEndGaps);
355 s_PrintSummaryForAllGapTypes(
356 "TestEndGaps - mixed-gap-type-data", gap_analysis);
358 const SGapTypeExpectedResult gap_type_expected_results[] = {
376 GA::eGapType_UnknownBases,
388 CheckGapTypeExpectedResult(
390 gap_type_expected_results[gap_type_idx],
392 gap_type_expected_results[gap_type_idx].gap_type));
403 const SGapTypeExpectedResult gap_type_expected_results[] = {
419 GA::eGapType_UnknownBases,
431 cout <<
"TestAllGapTypes "
432 << (is_minus_strand ?
"minus" :
"plus")
433 <<
" strand" << endl;
436 args[
"mixed-gap-type-data"].AsString().c_str());
441 LoadSeqEntryIntoGapAnalysis(
443 gap_analysis, pSeqEntry, GA::fAddFlag_All, 0,
NULL,
true);
444 s_PrintSummaryForAllGapTypes(
445 "TestAllGapTypes - mixed-gap-type-data", gap_analysis);
449 const SGapTypeExpectedResult & gap_type_expected_result =
450 gap_type_expected_results[gap_type_idx];
452 CheckGapTypeExpectedResult(
454 gap_type_expected_result,
456 gap_type_expected_result.gap_type));
467 cout <<
"(NULL)" << endl;
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Give this gaps, or handles containing gaps and then you can get statistics on those gaps.
const TMapGapLengthToSeqIds & GetGapLengthSeqIds(EGapType eGapType) const
Returns a map of gap_length to the set of all seq-ids that contain at least one gap of that length.
void AddSeqEntryGaps(const CSeq_entry_Handle &entry_h, CSeq_inst::EMol filter=CSeq_inst::eMol_not_set, CBioseq_CI::EBioseqLevelFlag level=CBioseq_CI::eLevel_All, TAddFlag add_flags=fAddFlag_All, TFlag fFlags=0, size_t max_resolve_count=kMax_Int)
Calls AddGap for each gap anywhere under the given CSeq_entry.
void AddGap(EGapType eGapType, TSeqIdConstRef pSeqId, TGapLength iGapLength, TSeqPos iBioseqLength, TSeqPos iGapStartPos, TSeqPos iGapEndPosExclusive, TFlag fFlags=0)
AddSeqEntryGaps is more convenient, but if you want finer-grained control you can use this function t...
AutoPtr< TVectorGapLengthSummary > GetGapLengthSummary(EGapType eGapType, ESortGapLength eSortGapLength=eSortGapLength_Length, ESortDir eSortDir=eSortDir_Ascending) const
This gives summary information about every gap-length encountered so far.
static CNcbiApplication * Instance(void)
Singleton method.
Base class for all serializable objects.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C.
const_iterator find(const key_type &key) const
CNcbiOstream & operator<<(CNcbiOstream &out, const CEquivRange &range)
static char test_name[128]
Analyzes gaps and produces various statistics.
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
#define ITERATE_0_IDX(idx, up_to)
idx loops from 0 (inclusive) to up_to (exclusive)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ITERATE_BOTH_BOOL_VALUES(BoolVar)
The body of the loop will be run with Var equal to false and then true.
@ eInputFile
Name of file (must exist and be readable)
#define MSerial_AsnText
I/O stream manipulators –.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void Reset(void)
Reset reference object.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
#define DEFINE_STATIC_FAST_MUTEX(id)
Define static fast mutex and initialize it.
@ eMol_not_set
> cdna = rna
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
constexpr bool empty(list< Ts... >) noexcept
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
CRef< objects::CObjectManager > om
Utility stuff for more convenient using of Boost.Test library.
BOOST_AUTO_TEST_CASE(TestBasic)
NCBITEST_INIT_CMDLINE(arg_desc)
void PS(const CSerialObject *obj)