NCBI C++ ToolKit
unit_test_fasta_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_fasta_reader.cpp 78088 2017-05-24 15:31:29Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Michael Kornbluh, NCBI
27 * (initial skeleton generated by script written by Pavel Ivanov)
28 *
29 * File Description:
30 * Does misc tests on the CFastaReader that aren't already covered by
31 * test_fasta_round_trip, etc.
32 *
33 *
34 * ===========================================================================
35 */
36 
37 // TODO: This test shows a lot of NULL pointer exceptions when it runs which,
38 // although they do not indicate a problem, could cause confusion.
39 
40 #include <ncbi_pch.hpp>
41 
42 #include <corelib/ncbi_system.hpp>
43 #include <corelib/ncbiapp.hpp>
44 
45 // This header must be included before all Boost.Test headers if there are any
46 #include <corelib/test_boost.hpp>
47 
48 #include <corelib/rwstream.hpp>
49 #include <corelib/stream_utils.hpp>
50 #include <corelib/ncbimisc.hpp>
51 
53 
56 
57 #include <serial/objistr.hpp>
58 
60 
61 #include <objmgr/seq_vector.hpp>
62 
63 // for places where we don't care if it throws or not
64 #define IGNORE_ANY_THROWS(_body) try { ((_body), true); } catch(...) { }
65 
68 
69 namespace {
70 
71  class CIgnoreBelowWarningMessageListener :
73  {
74  public:
75  CIgnoreBelowWarningMessageListener() {};
76  ~CIgnoreBelowWarningMessageListener() {};
77 
78  bool PutError(
79  const ILineError& err )
80  {
81  switch( err.Severity() ) {
82  case eDiag_Info:
83  case eDiag_Trace:
84  // don't store things below warning
85  return true;
86  case eDiag_Warning:
87  StoreError(err);
88  return true;
89  default:
90  StoreError(err);
91  return false;
92  }
93  };
94  };
95 }
96 
97 namespace {
98 
99  // Each SWarningTest has one SOneWarningsInfo for each
100  // warning that might appear.
101  struct SOneWarningsInfo {
102  ILineError::EProblem m_eType;
103  string m_sFeatureName; // can be empty
104  unsigned int m_iLineNumExpected; // might be zero for multiple-line errors
105 
106  int Compare(const SOneWarningsInfo & rhs) const;
107 
108  bool operator == (const SOneWarningsInfo & rhs) const { return 0 == Compare(rhs); }
109  bool operator != (const SOneWarningsInfo & rhs) const { return 0 != Compare(rhs); }
110  bool operator < (const SOneWarningsInfo & rhs) const { return Compare(rhs) < 0; }
111  };
112 
113  int SOneWarningsInfo::Compare(const SOneWarningsInfo & rhs) const {
114  if( m_eType != rhs.m_eType ) {
115  return static_cast<int>(m_eType) -
116  static_cast<int>(rhs.m_eType);
117  }
118  int feat_comparison = m_sFeatureName.compare(rhs.m_sFeatureName);
119  if( 0 != feat_comparison ) {
120  return feat_comparison;
121  }
122  return m_iLineNumExpected - rhs.m_iLineNumExpected;
123  }
124 
125  ostream & operator <<( ostream & ostrm, const SOneWarningsInfo & info )
126  {
127  ostrm << "(problem: " << ILineError::ProblemStr(info.m_eType)
128  << ", feature: " << info.m_sFeatureName
129  << ", line num: " << info.m_iLineNumExpected << ")";
130  return ostrm;
131  }
132 
133  // represents information about one test of the CFastaReader warning system
134  struct SWarningTest {
135 
136  string m_sName; // easier than array index for humans to understand
137  // In m_warnings_expected, the list of warnings ends with a problem of value 0
138  // (although that's really enum eProblem_UnrecognizedFeatureName, that problem
139  // is irrelevant for FASTA)
140  SOneWarningsInfo m_warnings_expected[ILineError::eProblem_GeneralParsingError];
141  CFastaReader::TFlags m_fFastaFlags;
142  string m_sInputFASTA;
143  };
144 
145  const static CFastaReader::TFlags kDefaultFastaReaderFlags =
148 
149 
150  const static CFastaReader::TFlags kProtFastaReaderFlags =
154 
155 
156  // list of FASTA warning tests
157  const SWarningTest fasta_warning_test_arr[] = {
158 
159  {
160  "test case of no warnings",
161 
162  { },
163  kDefaultFastaReaderFlags, // CFastaReader flags
164  "> blah \n"
165  "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n"
166  },
167 
168  {
169  "title too long",
170 
171  {
172  { ILineError::eProblem_TooLong, "defline", 1 },
173  },
174  kDefaultFastaReaderFlags, // CFastaReader flags
175  "> blah ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ ABCDEFGHIJ\n"
176  "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n"
177  },
178 
179  {
180  "nucs in title",
181 
182  {
184  },
185  kDefaultFastaReaderFlags, // CFastaReader flags
186  "> blah ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n"
187  "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n"
188  },
189 
190  {
191  "too many ambig on first line",
192 
193  {
194  { ILineError::eProblem_TooManyAmbiguousResidues, "first data line", 2 },
195  },
196  kDefaultFastaReaderFlags, // CFastaReader flags
197  "> blah\n"
198  "ACGTACGTACGTNNNNNNNNNNNNNNNTUYYYYYYYYYYYYYYYYYYYYYYYYYYTACGT\n"
199  },
200 
201  {
202  "invalid residue on first line",
203 
204  {
206  },
207  kDefaultFastaReaderFlags, // CFastaReader flags
208  "> blah\n"
209  "ACEACGTAEEEACGTACGTACGTACGTACGTACGTACGTACGTACGT\n"
210  },
211 
212  {
213  "invalid residue on subsequent line",
214 
215  {
217  },
218  kDefaultFastaReaderFlags, // CFastaReader flags
219  "> blah\n"
220  "ACGACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n"
221  "ACEACGTACGTACGTAEETACGTACGTACGTACGTACGTACGTACGT\n"
222  },
223  {
224  "amino acids in title",
225 
226  {
228  },
229  kProtFastaReaderFlags, // CFastaReader flags
230  "> blah ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ\n"
231  },
232  {
233  "trigger as many warnings as possible",
234 
235  {
236  { ILineError::eProblem_TooLong, "defline", 1 },
238  { ILineError::eProblem_TooManyAmbiguousResidues, "first data line", 2 },
241  },
242  kDefaultFastaReaderFlags, // CFastaReader flags
243  "> blah [topology=linear] ACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTACACGTACGTAC\n"
244  "ACGACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGTEACGTACGTACGT\n"
245  },
246 
247  {
248  "invalid residue on multiple lines",
249 
250  {
252  },
253  kDefaultFastaReaderFlags, // CFastaReader flags
254  "> blah\n"
255  "ACACEACGTACGTACGTEEEETACGTACGTACGTACGTACGTACGTA\n"
256  "ACEACGTACGTACGTAEETACGTACGTACGTACGTACGTACGTACGT\n"
257  },
258 
259  {
260  "Make sure it reads modifiers if requested",
261 
262  {
263  },
264  // Note: non-default flags
265  CFastaReader::fAddMods | kDefaultFastaReaderFlags, // CFastaReader flags
266  "> blah [topology=linear]\n"
267  "ACACAACGTACGTACGTAAAATACGTACGTACGTACGTACGTACGTA\n"
268  "ACAACGTACGTACGTAAATACGTACGTACGTACGTACGTACGTACGT\n"
269  },
270 
271  {
272  "Test unexpected mods on line other than first line",
273 
274  {
276  },
277  kDefaultFastaReaderFlags, // CFastaReader flags
278  ">blah \n"
279  "ACACAACGTACGTACGTAAAATACGTACGTACGTACGTACGTACGTA\n"
280  "ACAACGTACGTACGTAAATACGTACGTACGTACGTACGTACGTACGT\n"
281  ">blahblah [topology=linear]\n"
282  "TCACAACGTACGTACGTAAAATACGTACGTACGTACGTACGTACGTA\n"
283  "ACAACGTACGTACGTAAATACGTACGTACGTACGTACGTACGTACGT\n"
284  ">blahblah2 \n"
285  "GCACAACGTACGTACGTAAAATACGTACGTACGTACGTACGTACGTA\n"
286  "ACAACGTACGTACGTAAATACGTACGTACGTACGTACGTACGTACGT\n"
287  },
288 
289  {
290  "Test that having no residues is fine if the right flag is set.",
291 
292  {
293  },
294  kDefaultFastaReaderFlags | CFastaReader::fNoSeqData, // CFastaReader flags
295  ">blah \n"
296  ">blah2 \n"
297  ">blah3 \n"
298  }
299  };
300 }
301 
302 namespace {
303  template<typename TObjRef>
304  void s_LoadObjectRefFromTextASN(
305  TObjRef & pObj, const CTempString & sTextASN )
306  {
307  auto_ptr<CObjectIStream> pObjIStrm(
310  sTextASN.data(), sTextASN.length() ) );
311  pObjIStrm->Read( &*pObj,
312  pObj->GetThisTypeInfo() );
313  }
314 }
315 
316 BOOST_AUTO_TEST_CASE(TestBadResidues)
317 {
318  const string kData =
319  ">Seq1\n"
320  "AC/TACGTACGTACGTACGTACGTACGTAC/TACGTACGTACGTACGT\n"
321  "AC/TACGTACGTACGTUCGTACGTACGTACGTACGTACGTACGTACGT\n"
322  "AC/TACGTACGTACGTACGTACGTAC/TACGTACGTACGTACGTACGT\n";
323  const static CFastaReader::TFlags kFlags =
327 
328  CMemoryLineReader line_reader( kData.c_str(), kData.length() );
329  CFastaReader fasta_reader( line_reader, kFlags );
330  try {
331  fasta_reader.ReadOneSeq();
332  BOOST_ERROR("Bad residue did not cause exception to be thrown");
333  } catch(const CBadResiduesException & bad_residue_ex) {
334 
336  TBadIndexMap;
337  const TBadIndexMap & bad_index_map =
338  bad_residue_ex.GetBadResiduePositions().m_BadIndexMap;
339  TBadIndexMap::const_iterator bad_index_it = bad_index_map.begin();
340 
341  BOOST_CHECK_EQUAL(bad_index_it->first, 2);
342  vector<TSeqPos> vecExpectedPositions;
343  vecExpectedPositions.push_back(2);
344  vecExpectedPositions.push_back(30);
345  BOOST_CHECK_EQUAL_COLLECTIONS(
346  bad_index_it->second.begin(),
347  bad_index_it->second.end(),
348  vecExpectedPositions.begin(),
349  vecExpectedPositions.end());
350 
351  ++bad_index_it;
352  BOOST_CHECK_EQUAL(bad_index_it->first, 3);
353  vecExpectedPositions.clear();
354  vecExpectedPositions.push_back(2);
355  BOOST_CHECK_EQUAL_COLLECTIONS(
356  bad_index_it->second.begin(),
357  bad_index_it->second.end(),
358  vecExpectedPositions.begin(),
359  vecExpectedPositions.end());
360 
361  ++bad_index_it;
362  BOOST_CHECK_EQUAL(bad_index_it->first, 4);
363  vecExpectedPositions.clear();
364  vecExpectedPositions.push_back(2);
365  vecExpectedPositions.push_back(26);
366  BOOST_CHECK_EQUAL_COLLECTIONS(
367  bad_index_it->second.begin(),
368  bad_index_it->second.end(),
369  vecExpectedPositions.begin(),
370  vecExpectedPositions.end());
371  }
372 }
373 
374 // Test that the right warnings appear under the right conditions
375 BOOST_AUTO_TEST_CASE(TestWarnings)
376 {
377 
378  for( size_t warn_test_idx = 0;
379  warn_test_idx < ArraySize(fasta_warning_test_arr);
380  ++warn_test_idx )
381  {
382  const SWarningTest & warning_test = fasta_warning_test_arr[warn_test_idx];
383 
384  cout << endl;
385  cout << "Running test case '" << warning_test.m_sName << "'" << endl;
386 
387  // this will hold warnings found
389  new CIgnoreBelowWarningMessageListener );
390 
391  // create fasta reader
392  CStringReader fastaStringReader( warning_test.m_sInputFASTA );
393  CRStream fastaRStream( &fastaStringReader );
394  CFastaReader fasta_reader( fastaRStream, warning_test.m_fFastaFlags );
395  // do the parsing
396  BOOST_CHECK_NO_THROW( fasta_reader.ReadSet(kMax_Int, pMessageListener.GetPointer()) );
397 
398  typedef set<SOneWarningsInfo> TWarningInfoSet;
399  TWarningInfoSet setWarningsSeen;
400 
401  // load the warnings that were seen into warningsSeenFromThisTest
402  ITERATE_0_IDX(ii, pMessageListener->Count() ) {
403  const ILineError & line_error = pMessageListener->GetError(ii);
404  SOneWarningsInfo warning_info = {
405  line_error.Problem(),
406  line_error.FeatureName(),
407  line_error.Line()
408  };
409  setWarningsSeen.insert(warning_info);
410  }
411 
412  // load the warnings that are expected
413  set<SOneWarningsInfo> setExpectedWarnings;
414  ITERATE_0_IDX(warning_check_idx,
415  ArraySize(warning_test.m_warnings_expected) )
416  {
417  const SOneWarningsInfo & one_warning_info =
418  warning_test.m_warnings_expected[warning_check_idx];
419  const ILineError::EProblem eExpectedType =
420  one_warning_info.m_eType;
421  if( static_cast<int>(eExpectedType) <= 0 ) {
422  continue;
423  }
424  setExpectedWarnings.insert( one_warning_info );
425  }
426 
427  BOOST_CHECK_EQUAL_COLLECTIONS(
428  setWarningsSeen.begin(),
429  setWarningsSeen.end(),
430  setExpectedWarnings.begin(),
431  setExpectedWarnings.end() );
432  }
433 }
434 
435 namespace {
436 
437  typedef vector<ILineError::EProblem> TWarnVec;
438 
439  // returns empty reference on error
440  // (should never let exceptions escape)
441  CRef<CBioseq> s_ParseFasta( const string & sFasta,
442  CFastaReader::TFlags fFlags,
443  const string & sExpectedExceptionErrCode = kEmptyStr,
444  const TWarnVec & pExpectedWarningTypes = TWarnVec(),
446  set<string> expected_unused_mods = set<string>() )
447  {
448  CRef<CBioseq> pRetvalBioseq;
449  string sErrCodeThatOccurred;
451  new CIgnoreBelowWarningMessageListener );
452 
453  try {
454  CMemoryLineReader line_reader( sFasta.c_str(), sFasta.length() );
455  CFastaReader fasta_reader( line_reader, fFlags );
456  if( pModFilter ) {
457  fasta_reader.SetModFilter( pModFilter );
458  }
459 
460  CRef<CSeq_entry> pEntry = fasta_reader.ReadOneSeq(pMessageListener.GetPointer());
461  BOOST_REQUIRE(pEntry->IsSeq());
462  pRetvalBioseq.Reset( & pEntry->SetSeq() );
463 
464  CSourceModParser::TMods unused_mods = fasta_reader.GetUnusedMods();
465  set<string> unused_mods_as_strings;
466  ITERATE(CSourceModParser::TMods, unused_mod_it, unused_mods) {
467  unused_mods_as_strings.insert( unused_mod_it->key );
468  }
469  BOOST_CHECK_EQUAL_COLLECTIONS(
470  unused_mods_as_strings.begin(), unused_mods_as_strings.end(),
471  expected_unused_mods.begin(), expected_unused_mods.end() );
472  } catch(const CException & ex ) {
473  sErrCodeThatOccurred = ex.GetErrCodeString();
474  } catch(...) {
475  sErrCodeThatOccurred = "UNKNOWN";
476  }
477 
478  // extract the warning codes
479  TWarnVec pWarningTypes;
480  ITERATE_0_IDX(ii, pMessageListener->Count()) {
481  pWarningTypes.push_back(
482  pMessageListener->GetError(ii).Problem() );
483  }
484 
485  // check warnings
486  BOOST_CHECK_EQUAL_COLLECTIONS(
487  pExpectedWarningTypes.begin(),
488  pExpectedWarningTypes.end(),
489  pWarningTypes.begin(),
490  pWarningTypes.end() );
491 
492  // check error
493  BOOST_CHECK_EQUAL( sExpectedExceptionErrCode, sErrCodeThatOccurred );
494 
495  return pRetvalBioseq;
496  }
497 
498  // kludge to work around the inability to have templated typedefs
499  template<typename TType>
500  class TRefStd : public CConstRef<CObjectFor<TType> > { };
501 
502  // turns even a literal into a reference
503  template<typename TType>
504  TRefStd<TType> s_RefStd(const TType & value )
505  {
506  TRefStd<TType> pAnswer;
507  pAnswer.Reset( new CObjectFor<TType>(value) );
508  return pAnswer;
509  }
510 
511  // lets us have nullable non-CObject data
512  template<typename TType>
513  TRefStd<TType> s_RefOrNull(
514  bool bReturnNonNull,
515  const TType & valueIfNonNull )
516  {
517  if( bReturnNonNull ) {
518  return s_RefStd(valueIfNonNull);
519  }
520  return TRefStd<TType>();
521  }
522 
523  template<typename TType>
524  vector<TType> s_VecOfOne(
525  const TType & value)
526  {
527  vector<TType> answerVec;
528  answerVec.push_back(value);
529  return answerVec;
530  }
531 
532  // The given bioseq should have exactly one gap
533  // The TRefStd's are NULL for "should be unset"
534  void s_CheckOnlyBioseqGap(
535  CConstRef<CBioseq> pBioseq,
536  TRefStd<TSeqPos> pNumDeltasExpected,
537  TRefStd<TSeqPos> pGapLenExpected,
538  TRefStd<CInt_fuzz::ELim> pLimExpected,
539  TRefStd<CSeq_gap::EType> pGapTypeExpected,
540  TRefStd< vector<CLinkage_evidence::EType> > pLinkEvidsExpected )
541  {
542  // check number of deltas
543  const CDelta_ext::Tdata * pDeltaData = NULL;
544  if (pBioseq->IsSetInst() && pBioseq->GetInst().IsSetExt() && pBioseq->GetInst().GetExt().IsDelta())
545  pDeltaData = & pBioseq->GetInst().GetExt().GetDelta().Get();
546  if( pNumDeltasExpected ) {
547  if( pDeltaData ) {
548  NCBITEST_CHECK_EQUAL(pDeltaData->size(), *pNumDeltasExpected);
549  } else {
550  BOOST_ERROR("no delta-ext's");
551  }
552  } else {
553  NCBITEST_CHECK( ! pDeltaData );
554  }
555 
556  // find the one gap seq-literal
557  CConstRef<CSeq_literal> pGapLiteral;
558  if( pDeltaData ) {
559  ITERATE(CDelta_ext::Tdata, delta_it, *pDeltaData) {
560  const CSeq_literal & seq_literal =
561  (*delta_it)->GetLiteral();
562  if( ! seq_literal.IsSetSeq_data() ||
563  FIELD_IS_SET_AND_IS(seq_literal, Seq_data, Gap) )
564  {
565  // it's a gap
566  BOOST_REQUIRE_MESSAGE( ! pGapLiteral,
567  "There should be only one gap" );
568  pGapLiteral.Reset( & seq_literal );
569  }
570  }
571  }
572 
573  // check gap len
574  if( pGapLenExpected ) {
575  NCBITEST_CHECK_EQUAL( pGapLiteral->GetLength(), *pGapLenExpected );
576  } else {
577  NCBITEST_CHECK( ! pGapLiteral->IsSetLength() );
578  }
579 
580  // check fuzz
581  if( pLimExpected ) {
583  pGapLiteral->GetFuzz().GetLim(), CInt_fuzz::eLim_unk);
584  } else {
585  BOOST_CHECK( ! pGapLiteral || ! pGapLiteral->IsSetFuzz() );
586  }
587 
588  // extract CSeq_gap, if any
589  CConstRef<CSeq_gap> pSeqGap;
590  if (pGapLiteral->IsSetSeq_data() && pGapLiteral->GetSeq_data().IsGap())
591  pSeqGap.Reset( & pGapLiteral->GetSeq_data().GetGap() );
592 
593  if( pGapTypeExpected ) {
594  NCBITEST_CHECK_EQUAL(pSeqGap->GetType(), *pGapTypeExpected);
595  } else {
596  NCBITEST_CHECK( ! pSeqGap || ! pSeqGap->IsSetType() );
597  }
598 
599  // check linkage and linkage-evidence
600  if( pLinkEvidsExpected && pLinkEvidsExpected->GetData().empty() ) {
601  // consider empty and unset to be the same thing
602  // for our expected array and our actual array
603  pLinkEvidsExpected.Reset();
604  }
605  if( pLinkEvidsExpected ) {
606  NCBITEST_CHECK_EQUAL( pSeqGap->GetLinkage(),
608  } else {
609  NCBITEST_CHECK( ! pSeqGap ||
610  ! FIELD_EQUALS(*pSeqGap, Linkage, CSeq_gap::eLinkage_linked) );
611  }
612  const CSeq_gap::TLinkage_evidence * pLinkEvidObjs = NULL;
613  if (pSeqGap.NotNull() && pSeqGap->IsSetLinkage_evidence())
614  pLinkEvidObjs = & pSeqGap->GetLinkage_evidence();
615  if( pLinkEvidsExpected ) {
616  vector<CLinkage_evidence::EType> vecLinkEvids;
617  if( pLinkEvidObjs ) {
618  ITERATE(CSeq_gap::TLinkage_evidence, evid_obj_it, *pLinkEvidObjs) {
619  BOOST_CHECK_NO_THROW(
620  vecLinkEvids.push_back(
621  static_cast<CLinkage_evidence::EType>(
622  (*evid_obj_it)->GetType() ) ) );
623  }
624  }
625  BOOST_CHECK_EQUAL_COLLECTIONS(
626  pLinkEvidsExpected->GetData().begin(),
627  pLinkEvidsExpected->GetData().end(),
628  vecLinkEvids.begin(), vecLinkEvids.end());
629  } else {
630  NCBITEST_CHECK( ! pLinkEvidObjs ||
631  pLinkEvidObjs->empty() );
632  }
633  }
634 }
635 
636 
637 // Put in a bunch here for the ParseIDs
638 BOOST_AUTO_TEST_CASE(TestDefLineParser)
639 {
641  parseInfo.maxIdLength = 40;
642  parseInfo.lineNumber = 0;
644  parseInfo.fBaseFlags = 0;
645 
646  CFastaReader::TIgnoredProblems noIgnoredErrors;
647  list<CRef<CSeq_id>> ids;
648  bool hasRange;
649  TSeqPos rangeStart, rangeEnd;
650  CFastaReader::TSeqTitles seqTitles;
651 
652  // Anything following a title\1 is interpreted as an ID
653  // unless a range has already been specified.
654  // The string below thus contains three IDs.
655  {
656  static const string kFastaDefLine =
657  ">ID1 Title1\1ID2 Title2.1 Title2.2 \1[ID3]\n";
658  CFastaReader::ParseDefLine(kFastaDefLine,
659  parseInfo,
660  noIgnoredErrors,
661  ids,
662  hasRange,
663  rangeStart,
664  rangeEnd,
665  seqTitles,
666  nullptr);
667 
668  BOOST_CHECK( ids.size() == 3 );
669  BOOST_CHECK( !hasRange );
670  BOOST_CHECK( seqTitles.size() == 2 );
671  BOOST_CHECK( seqTitles[1].m_sLineText == "Title2.1 Title2.2 " );
672 
673  ids.clear();
674  seqTitles.clear();
675  }
676 
677 
678  // If a range is encountered, do not search for any more IDs.
679  // ID2 is ignored in the string below.
680  {
681  static const string kFastaDefLine =
682  ">ID1:123-456 Title\11D2\n";
683  CFastaReader::ParseDefLine(kFastaDefLine,
684  parseInfo,
685  noIgnoredErrors,
686  ids,
687  hasRange,
688  rangeStart,
689  rangeEnd,
690  seqTitles,
691  nullptr);
692 
693 
694  BOOST_CHECK( ids.size() == 1 );
695  BOOST_CHECK( hasRange );
696  BOOST_CHECK( rangeStart == 122 );
697  BOOST_CHECK( rangeEnd == 455 );
698  BOOST_CHECK( seqTitles.size() == 1 &&
699  seqTitles.front().m_sLineText == "Title" );
700  ids.clear();
701  seqTitles.clear();
702  }
703 
704 
705  // ID3 is ignorned in the string below, due to the appearance of
706  // a range after ID2.
707  {
708  static const string kFastaDefLine =
709  ">ID1 Title1 \1ID2:12-34 Title2 \1ID3 \n";
710  CFastaReader::ParseDefLine(kFastaDefLine,
711  parseInfo,
712  noIgnoredErrors,
713  ids,
714  hasRange,
715  rangeStart,
716  rangeEnd,
717  seqTitles,
718  nullptr);
719 
720 
721 
722  BOOST_CHECK( ids.size() == 2 );
723  BOOST_CHECK( hasRange );
724  BOOST_CHECK( rangeStart == 11 );
725  BOOST_CHECK( rangeEnd == 33 );
726  BOOST_CHECK( seqTitles.size() == 2 );
727  BOOST_CHECK( seqTitles.front().m_sLineText == "Title1 " );
728  BOOST_CHECK( seqTitles.back().m_sLineText == "Title2 " );
729 
730  ids.clear();
731  seqTitles.clear();
732  }
733 
734 
735  // Check that lcl|... is handled correctly
736  {
737  static const string kFastaDefLine =
738  ">lcl|ID1 Title\n";
739  CFastaReader::ParseDefLine(kFastaDefLine,
740  parseInfo,
741  noIgnoredErrors,
742  ids,
743  hasRange,
744  rangeStart,
745  rangeEnd,
746  seqTitles,
747  nullptr);
748 
749  BOOST_CHECK ( ids.size() == 1 );
750  const CRef<CSeq_id>& localId = ids.front();
751  BOOST_CHECK( localId->GetLocal().IsStr() &&
752  localId->GetLocal().GetStr() == "ID1" );
753 
754  ids.clear();
755  seqTitles.clear();
756  }
757 
758  // Check that Genbank accessions are read correctly
759  {
760  static const string kFastaDefLine =
761  ">gb|M73307";
762  CFastaReader::ParseDefLine(kFastaDefLine,
763  parseInfo,
764  noIgnoredErrors,
765  ids,
766  hasRange,
767  rangeStart,
768  rangeEnd,
769  seqTitles,
770  nullptr);
771 
772  BOOST_CHECK( ids.size() == 1 );
773  BOOST_CHECK( !hasRange );
774  BOOST_CHECK( seqTitles.empty() );
775  BOOST_CHECK( ids.front()->IsGenbank() &&
776  ids.front()->GetGenbank().GetAccession() == "M73307");
777 
778  ids.clear();
779  seqTitles.clear();
780  }
781 
782 
783  // Check that the following is interpreted as a defline
784  // modifier, not an ID.
785  {
786  static const string kFastaDefLine =
787  ">[topology=linear]";
788  CFastaReader::ParseDefLine(kFastaDefLine,
789  parseInfo,
790  noIgnoredErrors,
791  ids,
792  hasRange,
793  rangeStart,
794  rangeEnd,
795  seqTitles,
796  nullptr);
797 
798  BOOST_CHECK( ids.empty() );
799  BOOST_CHECK( !hasRange );
800  BOOST_CHECK( seqTitles.size() == 1 );
801 
802  ids.clear();
803  seqTitles.clear();
804  }
805 }
806 
807 
808 
809 
810 BOOST_AUTO_TEST_CASE(TestTitleRemovedIfEmpty)
811 {
812  static const string kFastaWhereAllModsRemoved =
813  ">Seq1 [topology=circular]\n"
814  "ACGTACGTACGTACGTACGTACGTACGTACGTACGT\n";
815  CRef<CBioseq> pBioseq = s_ParseFasta( kFastaWhereAllModsRemoved,
817  BOOST_REQUIRE(pBioseq);
818 
819  FOR_EACH_SEQDESC_ON_BIOSEQ(desc_it, *pBioseq) {
820  BOOST_CHECK( ! (*desc_it)->IsTitle() );
821  }
822 }
823 
824 
825 
826 
827 
828 BOOST_AUTO_TEST_CASE(TestProteinSeqGapChar)
829 {
830  static const string kFastaWithProtGap =
831  ">Dobi [organism=Canis familiaris] [breed=Doberman pinscher]\n"
832  "MMMTGCMTGGGTMMMMGTMGTMGMMGMGMMGGCTTTTMGCCCMGMMGTMMTMCCCMTGTTTTCMGCMTTM\n"
833  "GGMMMMMGGGCTGTTG\n"
834  ">?unk100\n"
835  "TGGMTGMCMGMMMCCTTGTTGGTCCMMMMTGCMMMCCCMGMTKGTMMGMCCMTTTTMMMMGCMTTGGGTC\n"
836  "TTMGMMMTMGGGCMMCMCMGMMCMMMMMT\n"
837  ">?234\n"
838  "MMMMMTMMMMGCMTTMGTMGMMMTTTGTMCMGMMCTGGMMMMGGMMGGMMMMMTTTCMMMMMTTGGGCCT\n";
839 
840  CFastaReader::TFlags fFastaReaderFlags =
844 
845  // test with and without nosplit
846  ITERATE_BOTH_BOOL_VALUES(bSetNoSplit) {
847  cout << "Trying with" << (bSetNoSplit ? "" : "out") << " CFastaReader::fNoSplit" << endl;
848  BOOST_CHECK(s_ParseFasta( kFastaWithProtGap,
849  fFastaReaderFlags | (bSetNoSplit ? CFastaReader::fNoSplit : 0)));
850  }
851 }
852 
853 // Make sure [protein=whatever] doesn't work
854 // on nuc sequences but does work on prots.
855 // Also, "[gene=...]" becomes a feature on a nuc,
856 // but is an xref on a prot
857 BOOST_AUTO_TEST_CASE(TestGeneAndProtein)
858 {
859  {{
860  static const string kFastaNuc =
861  ">Seq1 [gene=some_gene] [protein=foo]\n"
862  "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC\n";
863 
864  set<string> expected_unused_mods;
865  expected_unused_mods.insert("protein");
866 
867  CRef<CBioseq> pBioseq = s_ParseFasta(
868  kFastaNuc, CFastaReader::fAddMods,
869  kEmptyStr, TWarnVec(),
871  expected_unused_mods );
872  BOOST_REQUIRE(pBioseq);
873 
874  bool bFoundGene = false;
875 
876  CTypeConstIterator<CSeqFeatData> seqfeatdat_ci(Begin(*pBioseq));
877  for( ; seqfeatdat_ci; ++seqfeatdat_ci ) {
878  BOOST_REQUIRE( ! seqfeatdat_ci->IsProt() );
879  if( FIELD_IS_AND_IS_SET(*seqfeatdat_ci, Gene, Locus) &&
880  seqfeatdat_ci->GetGene().GetLocus() == "some_gene" )
881  {
882  bFoundGene = true;
883  }
884  }
885 
886  BOOST_CHECK(bFoundGene);
887  }}
888 
889  {{
890  static const string kFastaProt =
891  ">Seq1 [gene=some_gene] [protein=foo]\n"
892  "MALWMHLLTVLALLALWGPNTNQAFVSRHLCGSNLVETLYSVCQDDGFFYIPKDRRELED\n";
893 
894  set<string> expected_unused_mods;
895  expected_unused_mods.insert("gene");
896 
897  CRef<CBioseq> pBioseq = s_ParseFasta(
898  kFastaProt, CFastaReader::fAddMods,
899  kEmptyStr, TWarnVec(),
901  expected_unused_mods );
902  BOOST_REQUIRE(pBioseq);
903 
904  bool bHasProt = false;
905  CTypeConstIterator<CSeq_feat> seqfeat_ci(Begin(*pBioseq));
906  for( ; seqfeat_ci; ++seqfeat_ci ) {
907  BOOST_CHECK( ! FIELD_IS_SET_AND_IS(*seqfeat_ci, Data, Gene) );
908  if( FIELD_IS_SET_AND_IS(*seqfeat_ci, Data, Prot) ) {
909  bHasProt = true;
910  NCBITEST_CHECK( RAW_FIELD_IS_EMPTY_OR_UNSET(*seqfeat_ci, Xref) );
911  }
912  }
913  BOOST_CHECK(bHasProt);
914  }}
915 }
916 
918 {
919  const string kPreGapNucs =
920  "GATTACAACGTGATTACAACGTGATTACAACGTGATTACAACGTGATTACAACGTGATTACA";
921  const string kPostGapNucs[2] = {
922  "TCGACCCACGCGTCCGGAGAAGTTTTTCACCTACTGGAACCCGCCTAGGGTACGGGAAAC",
923  "AGGTGCCCTCCAAAACGAGAGCGCGAACTGCAGCCTACGTCCCACTGCAGCTCAGGAGCA"
924  };
925 
926  const string kLinesBeforeGap =
927  ">Seq1\n" +
928  kPreGapNucs + "\n";
929  const string kLinesAfterGap =
930  kPostGapNucs[0] + "\n" +
931  kPostGapNucs[1] + "\n";
932 
933  // the 2 above, plus a gap line added in-between
934  const TSeqPos kNumDeltasExpected = 3;
935 
936  // arbitrary gap length to use when the length doesn't matter
937  const TSeqPos kArbGapLen = 42;
938 
939  const CFastaReader::TFlags kDefaultFastaFlags =
941 
942  // test that numbers other than kUnknownGapLen will work
943  // (no values provided should imply [gap-type=unknown][linkage-evidence=unspecified])
944  // (and test that negative or zero fails)
945  ITERATE_BOTH_BOOL_VALUES(bIsUnknown) {
946  const int arrGapLensToTry[] = {-250, -8, 0, 1, 20, 84, 100, 158, 2093};
947  ITERATE_0_IDX(gapLenIdx, ArraySize(arrGapLensToTry) ) {
948  const int iGapLen = arrGapLensToTry[gapLenIdx];
949  const string sDataToRead = kLinesBeforeGap + ">?" +
950  ( bIsUnknown ? "unk" : "") +
951  NStr::NumericToString(iGapLen) + "\n" + kLinesAfterGap;
952 
953  cerr << "Testing with " << (bIsUnknown ? "unknown" : "known")
954  << " gap size of " << iGapLen << endl;
955 
956  // non-positive gap sizes should create a warning
957  TWarnVec expectedWarningsVec;
958 
959  if( iGapLen <= 0 ) {
960  expectedWarningsVec.push_back(
962  }
963 
964  if( iGapLen < 0 ) {
965  // in this case, the negative length is
966  // interpreted as a
967  expectedWarningsVec.push_back(
969  }
970 
971  CRef<CBioseq> pBioseq = s_ParseFasta(
972  sDataToRead, kDefaultFastaFlags,
973  kEmptyStr, expectedWarningsVec );
974 
975  // non-positive gap sizes should create a format error
976  if( iGapLen <= 0 ) {
977  continue;
978  }
979 
980  const int iExpectedGapLen = iGapLen;
981 
982  s_CheckOnlyBioseqGap(
983  pBioseq,
984  s_RefStd(kNumDeltasExpected),
985  s_RefStd( static_cast<TSeqPos>(iExpectedGapLen) ),
986  s_RefOrNull(bIsUnknown, CInt_fuzz::eLim_unk),
987  TRefStd<CSeq_gap::EType>(),
988  TRefStd<vector<CLinkage_evidence::EType> >() );
989  }
990  }
991 
992  // test possible gap types
994 
995  ITERATE_BOTH_BOOL_VALUES(bPutLinkEvidInInput) {
996  ITERATE( CSeq_gap::TGapTypeMap, gap_type_text_it, gapTypeMap )
997  {
998  const char *pchGapType = gap_type_text_it->first;
999  const CSeq_gap::SGapTypeInfo & gapTypeInfo =
1000  gap_type_text_it->second;
1001 
1002  // build the data we're reading;
1003  CNcbiOstrstream fasta_in_strm;
1004  fasta_in_strm << kLinesBeforeGap
1005  << ">?unk" << kArbGapLen
1006  << " [gap-type=" << pchGapType << ']';
1007  if( bPutLinkEvidInInput ) {
1008  fasta_in_strm << " [linkage-evidence=pcr]";
1009  }
1010  fasta_in_strm << '\n';
1011  fasta_in_strm << kLinesAfterGap;
1012 
1013  // print what we're doing here
1014  cerr << "Testing gap-type " << pchGapType << "("
1015  << (bPutLinkEvidInInput ? "with" : "without" )
1016  << " a linkage-evidence)" << endl;
1017 
1018  TWarnVec expectedWarningsVec;
1019  if( bPutLinkEvidInInput )
1020  {
1021  if( gapTypeInfo.m_eLinkEvid == CSeq_gap::eLinkEvid_Forbidden ) {
1022  expectedWarningsVec.push_back(
1024  } else if( gapTypeInfo.m_eLinkEvid == CSeq_gap::eLinkEvid_UnspecifiedOnly ) {
1025  expectedWarningsVec.push_back(
1027  }
1028  } else {
1029  if(gapTypeInfo.m_eLinkEvid == CSeq_gap::eLinkEvid_Required )
1030  {
1031  expectedWarningsVec.push_back(
1033  }
1034  }
1035 
1036  CRef<CBioseq> pBioseq =
1037  s_ParseFasta(
1038  CNcbiOstrstreamToString(fasta_in_strm),
1039  kDefaultFastaFlags,
1040  kEmptyStr,
1041  expectedWarningsVec);
1042 
1043  // add checking function
1044  s_CheckOnlyBioseqGap(
1045  pBioseq,
1046  s_RefStd(kNumDeltasExpected),
1047  s_RefStd(kArbGapLen),
1048  s_RefStd(CInt_fuzz::eLim_unk),
1049  s_RefStd(gapTypeInfo.m_eType),
1050  s_RefOrNull(gapTypeInfo.m_eLinkEvid != CSeq_gap::eLinkEvid_Forbidden,
1051  s_VecOfOne(
1052  gapTypeInfo.m_eLinkEvid == CSeq_gap::eLinkEvid_Required &&
1053  bPutLinkEvidInInput ?
1056  }
1057  }
1058 
1059  // test interaction with "unspecified"
1060  ITERATE( CSeq_gap::TGapTypeMap, gap_type_text_it, gapTypeMap )
1061  {
1062  const char *pchGapType = gap_type_text_it->first;
1063  const CSeq_gap::SGapTypeInfo & gapTypeInfo = gap_type_text_it->second;
1064 
1065  // build the data we're reading;
1066  CNcbiOstrstream fasta_in_strm;
1067  fasta_in_strm << kLinesBeforeGap
1068  << ">?unk" << kArbGapLen
1069  << " [gap-type=" << pchGapType << ']'
1070  << " [linkage-evidence=unspecified]\n"
1071  << kLinesAfterGap;
1072 
1073  // print what we're doing here
1074  cerr << "Testing gap-type " << pchGapType << " with 'unspecified'" << endl;
1075 
1076  TWarnVec expectedWarningsVec;
1077  switch( gapTypeInfo.m_eLinkEvid ) {
1079  // no problem
1080  break;
1082  expectedWarningsVec.push_back(
1084  break;
1086  expectedWarningsVec.push_back(
1088  break;
1089  default:
1090  BOOST_FAIL("Unknown CSeq_gap::ELinkEvid: "
1091  << static_cast<int>(gapTypeInfo.m_eLinkEvid) );
1092  break;
1093  }
1094 
1095  CRef<CBioseq> pBioseq =
1096  s_ParseFasta(
1097  CNcbiOstrstreamToString(fasta_in_strm),
1098  kDefaultFastaFlags,
1099  kEmptyStr,
1100  expectedWarningsVec);
1101 
1102  // add checking function
1103  s_CheckOnlyBioseqGap(
1104  pBioseq,
1105  s_RefStd(kNumDeltasExpected),
1106  s_RefStd(kArbGapLen),
1107  s_RefStd(CInt_fuzz::eLim_unk),
1108  s_RefStd(gapTypeInfo.m_eType),
1109  s_RefOrNull(
1111  s_VecOfOne(CLinkage_evidence::eType_unspecified)) );
1112  }
1113 
1114  // test format errors after gap length
1115  {
1116  struct {
1117  const char * gap_mods;
1118  ILineError::EProblem problem_arr[2];
1119  }
1120  arrBadGapMods[] = {
1121  // bogus mod key
1122  { " [foo=baz]",
1125  // bogus gap type
1126  { " [gap-type=foo]",
1129  // bogus linkage-evidence
1130  { " [gap-type=between scaffolds] [linkage-evidence=foo]",
1133  // extra junk on gap line (even if good mods)
1134  { " extra junk",
1137  { " [gap-type=short arm] extra junk",
1140  { " [gap-type=short arm] extra junk [linkage-evidence=map]",
1143  // conflicting gap types
1144  { " [gap-type=short arm] [gap-type=heterochromatin]",
1147  };
1148  const size_t arrBadGapMods_len = sizeof(arrBadGapMods) / sizeof(arrBadGapMods[0]);
1149  ITERATE_0_IDX( ii, arrBadGapMods_len ) {
1150  const string sDataToRead = kLinesBeforeGap +
1151  ">?unk" + NStr::NumericToString(kArbGapLen) +
1152  arrBadGapMods[ii].gap_mods + "\n" + kLinesAfterGap;
1153 
1154  TWarnVec expectedWarningsVec;
1155  expectedWarningsVec.push_back(
1156  arrBadGapMods[ii].problem_arr[0] );
1157  if( arrBadGapMods[ii].problem_arr[1] !=
1159  {
1160  expectedWarningsVec.push_back(
1161  arrBadGapMods[ii].problem_arr[1] );
1162  }
1163 
1164  CRef<CBioseq> pBioseq =
1165  s_ParseFasta(
1166  sDataToRead,
1167  kDefaultFastaFlags,
1168  kEmptyStr,
1169  expectedWarningsVec );
1170  }
1171  }
1172 
1173  // test multiple linkage evidences (both approaches)
1174  {
1175  // all ways should give the same result
1176  const char * arrLinkageEvidences[] = {
1177  // shouldn't matter if all in one semi-colon-separated mod
1178  // or split across multiple ones
1179  "[linkage-evidence=pcr;strobe;map]",
1180  "[linkage-evidence=pcr][linkage-evidence=strobe][linkage-evidence=map]",
1181  "[linkage-evidence=pcr;strobe][linkage-evidence=map]",
1182  "[linkage-evidence=pcr][linkage-evidence=strobe;map]",
1183  // dups should be ignored
1184  "[linkage-evidence=pcr;strobe;pcr;map]",
1185  "[linkage-evidence=pcr][linkage-evidence=strobe][linkage-evidence=pcr][linkage-evidence=map]"
1186  };
1187 
1188  // same result every iteration, so just build it here
1189  vector<CLinkage_evidence::EType> vecExpectedLinkEvids;
1190  vecExpectedLinkEvids.push_back(CLinkage_evidence::eType_pcr);
1191  vecExpectedLinkEvids.push_back(CLinkage_evidence::eType_strobe);
1192  vecExpectedLinkEvids.push_back(CLinkage_evidence::eType_map);
1193  sort( vecExpectedLinkEvids.begin(), vecExpectedLinkEvids.end() );
1194 
1195  ITERATE_0_IDX(linkEvidIdx, ArraySize(arrLinkageEvidences) ) {
1196  const char * pchLinkageEvidences =
1197  arrLinkageEvidences[linkEvidIdx];
1198 
1199  cerr << "Trying: " << pchLinkageEvidences << endl;
1200 
1201  const string sDataToRead = kLinesBeforeGap +
1202  ">?" + NStr::NumericToString(kArbGapLen) +
1203  " [gap-type=between scaffolds] " + pchLinkageEvidences + "\n" +
1204  kLinesAfterGap;
1205 
1206  CRef<CBioseq> pBioseq =
1207  s_ParseFasta(sDataToRead, kDefaultFastaFlags);
1208 
1209  BOOST_REQUIRE( pBioseq );
1210 
1211  s_CheckOnlyBioseqGap(
1212  pBioseq,
1213  s_RefStd(kNumDeltasExpected),
1214  s_RefStd(kArbGapLen),
1215  TRefStd<CInt_fuzz::ELim>(),
1216  s_RefStd(CSeq_gap::eType_contig),
1217  s_RefStd(vecExpectedLinkEvids) );
1218  }
1219  }
1220 
1221  // test that canonicalization does work
1222  // (that is, case, etc. is ignored)
1223  {
1224  const string sDataToRead = kLinesBeforeGap +
1225  ">?" + NStr::NumericToString(kArbGapLen) +
1226  " [ GAP tYpe = Between_Scaffolds ] [ linkage_evidence = Align xgenus ]\n" +
1227  kLinesAfterGap;
1228 
1229  CRef<CBioseq> pBioseq =
1230  s_ParseFasta(
1231  sDataToRead,
1232  kDefaultFastaFlags);
1233 
1234  BOOST_REQUIRE( pBioseq );
1235 
1236  vector<CLinkage_evidence::EType> vecExpectedLinkEvids(
1238 
1239  s_CheckOnlyBioseqGap(
1240  pBioseq,
1241  s_RefStd(kNumDeltasExpected),
1242  s_RefStd(kArbGapLen),
1243  TRefStd<CInt_fuzz::ELim>(),
1244  s_RefStd(CSeq_gap::eType_contig),
1245  s_RefStd(s_VecOfOne(CLinkage_evidence::eType_align_xgenus)) );
1246  }
1247 }
1248 
1249 BOOST_AUTO_TEST_CASE(TestNonDeltaGaps)
1250 {
1251  const string kDefline = ">Seq1";
1252  const string kNucsBeforeGap =
1253  "GATTACAACGTGATTACAACGTGATTACAAC";
1254  // arbitrary kArbGapSize, but used prime to make accidentally correct
1255  // results less likely
1256  const TSeqPos kArbGapSize = 17;
1257  const string kNucsAfterGap = "GTGATTACAACGTGATTACAACGTGATTACA";
1258 
1259  const string kExpectedBasesIfKnown = kNucsBeforeGap +
1260  string(kArbGapSize, 'N') +
1261  kNucsAfterGap;
1262  const string kExpectedBasesIfUnknown = kNucsBeforeGap +
1263  string(kArbGapSize, 'N') +
1264  kNucsAfterGap;
1265  // test if the fParseGaps flag is NOT set, which would result in
1266  // one big sequence (no deltas).
1267 
1268  // all these different representations should result in a gap
1269  // of the same size
1270  const string arrGapStrings[] = {
1271  string(kArbGapSize, 'N'),
1272  string(kArbGapSize, '-'),
1273  "\n>?" + NStr::NumericToString(kArbGapSize) + "\n",
1274  "\n>?unk" + NStr::NumericToString(kArbGapSize) + "\n",
1275  "\n>?unk" + NStr::NumericToString(kArbGapSize) +
1276  " [gap-type=unknown] [linkage-evidence=unspecified]\n",
1277  "\n>?unk" + NStr::NumericToString(kArbGapSize) +
1278  " [gap-type=repeat within scaffold] [linkage-evidence=pcr]\n"
1279  };
1280  ITERATE_0_IDX( gap_str_idx, ArraySize(arrGapStrings) ) {
1281  const string & sGapString = arrGapStrings[gap_str_idx];
1282  const string sDataToRead =
1283  kDefline + "\n" +
1284  kNucsBeforeGap + sGapString + kNucsAfterGap + "\n";
1285 
1286  // if there are substantive mods on the gaps, a warning
1287  // is expected
1288  TWarnVec expectedWarnings;
1289  if( sDataToRead.find('[') != string::npos &&
1290  sDataToRead.find("unknown") == string::npos )
1291  {
1292  expectedWarnings.push_back(
1294  }
1295 
1296  CRef<CBioseq> pBioseq =
1297  s_ParseFasta(sDataToRead, 0,
1298  kEmptyStr, expectedWarnings);
1299 
1300  BOOST_REQUIRE( pBioseq );
1301 
1302  // should NOT contain a gap per se
1303  NCBITEST_CHECK( ! pBioseq->GetInst().GetSeq_data().IsGap() );
1304 
1305  CSeqVector seq_vec(*pBioseq, NULL, CBioseq_Handle::eCoding_Iupac);
1306 
1307  const string & kExpectedBases = kExpectedBasesIfKnown;
1308 
1309  BOOST_CHECK_EQUAL_COLLECTIONS(
1310  kExpectedBases.begin(), kExpectedBases.end(),
1311  seq_vec.begin(), seq_vec.end() );
1312  }
1313 }
1314 
1315 BOOST_AUTO_TEST_CASE(TestLetterGaps)
1316 {
1317  const string kFasta =
1318  ">lcl|Seq1\n"
1319  "AC---NNNNNNNNNACGTGATTACAN\n"
1320  ">?48\n"
1321  "ACGTACGT\n"
1322  "GATTACA\n"
1323  ">?unk50\n"
1324  "ACGT---GA-TTNACAN\n"
1325  ">?1\n"
1326  "ACGTACGT\n"
1327  ">?unk1\n"
1328  "GATTAACGTTATGC\n"
1329  "CGATTAACGTTATGCN\n"
1330  "GGATTAACGTTATGC-N\n"
1331  "TGATTAACGTTATGC\n"
1332  ">?42\n"
1333  ">?37\n"
1334  ">?unk20\n"
1335  "ACGTTGCA\n";
1336 
1337  const string kExpectedDeltaExt =
1338  "Delta-ext ::= {\n"
1339  " literal {\n"
1340  " length 2,\n"
1341  " seq-data ncbi4na '12'H\n"
1342  " },\n"
1343  " literal {\n"
1344  " length 12\n"
1345  " },\n"
1346  " literal {\n"
1347  " length 11,\n"
1348  " seq-data ncbi2na '1B8F10'H\n"
1349  " },\n"
1350  " literal {\n"
1351  " length 1\n"
1352  " },\n"
1353  " literal {\n"
1354  " length 48\n"
1355  " },\n"
1356  " literal {\n"
1357  " length 15,\n"
1358  " seq-data ncbi2na '1B1B8F10'H\n"
1359  " },\n"
1360  " literal {\n"
1361  " length 50,\n"
1362  " fuzz lim unk\n"
1363  " },\n"
1364  " literal {\n"
1365  " length 4,\n"
1366  " seq-data ncbi2na '1B'H\n"
1367  " },\n"
1368  " literal {\n"
1369  " length 3\n"
1370  " },\n"
1371  " literal {\n"
1372  " length 2,\n"
1373  " seq-data ncbi4na '41'H\n"
1374  " },\n"
1375  " literal {\n"
1376  " length 1\n"
1377  " },\n"
1378  " literal {\n"
1379  " length 2,\n"
1380  " seq-data ncbi4na '88'H\n"
1381  " },\n"
1382  " literal {\n"
1383  " length 1\n"
1384  " },\n"
1385  " literal {\n"
1386  " length 3,\n"
1387  " seq-data ncbi2na '10'H\n"
1388  " },\n"
1389  " literal {\n"
1390  " length 1\n"
1391  " },\n"
1392  " literal {\n"
1393  " length 1\n"
1394  " },\n"
1395  " literal {\n"
1396  " length 8,\n"
1397  " seq-data ncbi2na '1B1B'H\n"
1398  " },\n"
1399  " literal {\n"
1400  " length 1,\n"
1401  " fuzz lim unk\n"
1402  " },\n"
1403  " literal {\n"
1404  " length 29,\n"
1405  " seq-data ncbi2na '8F06F3963C1BCE40'H\n"
1406  " },\n"
1407  " literal {\n"
1408  " length 1\n"
1409  " },\n"
1410  " literal {\n"
1411  " length 15,\n"
1412  " seq-data ncbi2na 'A3C1BCE4'H\n"
1413  " },\n"
1414  " literal {\n"
1415  " length 2\n"
1416  " },\n"
1417  " literal {\n"
1418  " length 15,\n"
1419  " seq-data ncbi2na 'E3C1BCE4'H\n"
1420  " },\n"
1421  " literal {\n"
1422  " length 42\n"
1423  " },\n"
1424  " literal {\n"
1425  " length 37\n"
1426  " },\n"
1427  " literal {\n"
1428  " length 20,\n"
1429  " fuzz lim unk\n"
1430  " },\n"
1431  " literal {\n"
1432  " length 8,\n"
1433  " seq-data ncbi2na '1BE4'H\n"
1434  " }\n"
1435  "}\n";
1436 
1437  CAutoInitRef<CDelta_ext> pExpectedDeltaExt;
1438  s_LoadObjectRefFromTextASN(pExpectedDeltaExt, kExpectedDeltaExt);
1439 
1440  TWarnVec expectedWarningsVec;
1441 
1442  CRef<CBioseq> pBioseq =
1443  s_ParseFasta(kFasta,
1447  kEmptyStr,
1448  expectedWarningsVec );
1449 
1450  CConstRef<CDelta_ext> pResultingDeltaExt(
1451  & pBioseq->GetInst().GetExt().GetDelta() );
1452  if( ! pResultingDeltaExt->Equals(
1453  *pExpectedDeltaExt ) )
1454  {
1455  BOOST_ERROR("Delta-ext differs from expected.");
1456  cerr << "Expected: " << MSerial_AsnText << *pExpectedDeltaExt << endl;
1457  cerr << "Received: " << MSerial_AsnText << *pResultingDeltaExt << endl;
1458  }
1459 }
1460 
1461 // Not sure what to do about this since lone end-of-line hyphens
1462 // produce weird results
1463 
1464 //BOOST_AUTO_TEST_CASE(TestLoneEndOfLineHyphens)
1465 //{
1466 // const string kFasta =
1467 // ">Seq1\n"
1468 // "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA-\n"
1469 // "GTACGTACGTACGT\n"
1470 // ">?58\n"
1471 // "CGGTACGTACGTACGT\n"
1472 // ">?unk90\n"
1473 // "AAACGGTACGTACGTACGT\n";
1474 //
1475 // const string kCompletelyUnknownGap =
1476 // "Delta-seq ::= \n"
1477 // "loc null NULL\n";
1478 // CAutoInitRef<CDelta_seq> pCompletelyUnknownGap;
1479 // s_LoadObjectRefFromTextASN(pCompletelyUnknownGap, kCompletelyUnknownGap);
1480 //
1481 // const string kRegularUnknownGap =
1482 // "Delta-seq ::= \n"
1483 // "literal {\n"
1484 // " length 100,\n"
1485 // " fuzz lim unk\n"
1486 // "}\n";
1487 // // length of pRegularUnknownGap may be adjusted as we go
1488 // CAutoInitRef<CDelta_seq> pRegularUnknownGap;
1489 // s_LoadObjectRefFromTextASN(pRegularUnknownGap, kRegularUnknownGap);
1490 //
1491 // const string kRegularGap =
1492 // "Delta-seq ::= \n"
1493 // "literal {\n"
1494 // " length 100\n"
1495 // "}\n";
1496 // // length of pRegularGap may be adjusted as we go
1497 // CAutoInitRef<CDelta_seq> pRegularGap;
1498 // s_LoadObjectRefFromTextASN(pRegularGap, kRegularGap);
1499 //
1500 // ITERATE_BOTH_BOOL_VALUES(bSetCompletelyUnknownGapLen) {
1501 // CRef<CBioseq> pBioseq =
1502 // s_ParseFasta(kFasta,
1503 // CFastaReader::fParseGaps | CFastaReader::fAssumeNuc,
1504 // kEmptyStr,
1505 // TWarnVec(),
1506 // ( bSetCompletelyUnknownGapLen ? 100 : 0 ) );
1507 //
1508 // const CDelta_ext::Tdata & delta_seqs =
1509 // pBioseq->GetInst().GetExt().GetDelta().Get();
1510 // CDelta_ext::Tdata::const_iterator delta_seq_it = delta_seqs.begin();
1511 //
1512 // NCBITEST_CHECK(
1513 // FIELD_IS_AND_IS_SET( **delta_seq_it, Literal, Seq_data ) );
1514 //
1515 // ++delta_seq_it;
1516 // if( bSetCompletelyUnknownGapLen ) {
1517 // pRegularUnknownGap->SetLiteral().SetLength(100);
1518 // NCBITEST_CHECK(
1519 // (*delta_seq_it)->Equals(*pRegularUnknownGap) );
1520 // } else {
1521 // NCBITEST_CHECK(
1522 // (*delta_seq_it)->Equals(*pCompletelyUnknownGap) );
1523 // }
1524 //
1525 // ++delta_seq_it;
1526 // NCBITEST_CHECK(
1527 // FIELD_IS_AND_IS_SET( **delta_seq_it, Literal, Seq_data ) );
1528 //
1529 // ++delta_seq_it;
1530 // pRegularGap->SetLiteral().SetLength(58);
1531 // NCBITEST_CHECK(
1532 // (*delta_seq_it)->Equals(*pRegularGap) );
1533 //
1534 // ++delta_seq_it;
1535 // NCBITEST_CHECK(
1536 // FIELD_IS_AND_IS_SET( **delta_seq_it, Literal, Seq_data ) );
1537 //
1538 // ++delta_seq_it;
1539 // pRegularUnknownGap->SetLiteral().SetLength(90);
1540 // NCBITEST_CHECK(
1541 // (*delta_seq_it)->Equals(*pRegularUnknownGap) );
1542 //
1543 // ++delta_seq_it;
1544 // NCBITEST_CHECK(
1545 // FIELD_IS_AND_IS_SET( **delta_seq_it, Literal, Seq_data ) );
1546 // }
1547 //}
1548 
1549 BOOST_AUTO_TEST_CASE(TestHyphensIgnoreAndWarn)
1550 {
1551  const string kFasta =
1552  ">Seq1\n"
1553  "ACGTACGTACGTACGTACGTACGTA---CGTACGTACGTACGTACGTACGTA-\n"
1554  "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA\n"
1555  "ACGTACGTACGTACG----TACGTA---CGTACGTACGTACGTACGTACGTA-\n";
1556 
1557  TWarnVec expectedWarnings;
1558  ITERATE_0_IDX(dummy, 2) {
1559  expectedWarnings.push_back( ILineError::eProblem_IgnoredResidue );
1560  }
1561 
1562  CRef<CBioseq> pBioseq =
1563  s_ParseFasta(kFasta,
1565  kEmptyStr,
1566  expectedWarnings );
1567 
1568  // ignored, but shouldn't cause an error
1569  BOOST_CHECK( pBioseq );
1570 
1571  // make sure answer is the correct length
1572  {{
1573  // calculate num bases expected
1574  string::size_type next_char_idx = 0;
1575  // skip first line, which is a defline
1576  next_char_idx = kFasta.find('\n');
1577  BOOST_CHECK_NE( next_char_idx, string::npos );
1578  ++next_char_idx;
1579  BOOST_CHECK_LT( next_char_idx, kFasta.length() );
1580 
1581  size_t uNumBasesExpected = 0;
1582  for( ; next_char_idx < kFasta.length(); ++next_char_idx ) {
1583  const char ch = kFasta[next_char_idx];
1584  if( isalpha(ch) ) {
1585  ++uNumBasesExpected;
1586  }
1587  }
1588 
1589  BOOST_CHECK_EQUAL( uNumBasesExpected, pBioseq->GetLength() );
1590  }}
1591 }
1592 
1593 BOOST_AUTO_TEST_CASE(TestIgnoringSpacesAfterGreaterThanInDefline)
1594 {
1595  const string kLocalId = "Seq1";
1596  const string kSeq = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA";
1597 
1598  ITERATE_0_IDX(num_spaces, 3) {
1599  const string sDefline =
1600  ">" + string(num_spaces, ' ') + kLocalId;
1601 
1602  cout << "Trying with defline '" << sDefline << "'" << endl;
1603 
1604  const string sFastaToParse =
1605  sDefline + "\n" +
1606  kSeq + "\n";
1607 
1608  CRef<CBioseq> pBioseq =
1609  s_ParseFasta(sFastaToParse,
1610  kDefaultFastaReaderFlags );
1611 
1612  BOOST_CHECK( pBioseq );
1613 
1615  pBioseq->GetFirstId()->GetLocal().GetStr(),
1616  kLocalId );
1617 
1618  CSeqVector seqvec( *pBioseq, NULL, CBioseq_Handle::eCoding_Iupac );
1619  BOOST_CHECK_EQUAL_COLLECTIONS(
1620  kSeq.begin(), kSeq.end(),
1621  seqvec.begin(), seqvec.end() );
1622  }
1623 }
1624 
1625 BOOST_AUTO_TEST_CASE(TestModFilter)
1626 {
1627  const string kData = ">Seq1 Seq2 [topology=circular] [org=ia io] [taxid=123]\n"
1628  "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA\n";
1629 
1630  // a filter that filters out org mods only.
1631  class COrgModFilter : public CSourceModParser::CModFilter
1632  {
1633  public:
1634  virtual bool operator()( const CTempString & mod_name ) {
1635  return ( mod_name != "org" && mod_name != "taxid" );
1636  }
1637  };
1638  CRef<CSourceModParser::CModFilter> pModFilter( new COrgModFilter );
1639 
1640  ITERATE_BOTH_BOOL_VALUES( bUseFilter ) {
1641 
1642  set<string> expected_unused_mods;
1643  if( bUseFilter ) {
1644  expected_unused_mods.insert( "org" );
1645  expected_unused_mods.insert( "taxid" );
1646  }
1647 
1648  CRef<CBioseq> pBioseq =
1649  s_ParseFasta( kData,
1651  kEmptyStr,
1652  TWarnVec(),
1653  ( bUseFilter ? pModFilter : CRef<CSourceModParser::CModFilter>() ),
1654  expected_unused_mods );
1655 
1656  cout << MSerial_AsnText << *pBioseq << endl;
1657 
1658  // check if pBioseq has an org
1659  bool has_org = false;
1660  FOR_EACH_SEQDESC_ON_BIOSEQ(desc_it, *pBioseq) {
1661  if( FIELD_IS_AND_IS_SET(**desc_it, Source, Org) ) {
1662  has_org = true;
1663  break;
1664  }
1665  }
1666 
1667  BOOST_CHECK_EQUAL( has_org, ! bUseFilter );
1668  }
1669 }
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:958
only the "unspecified" linkage-evidence is allowed
Definition: Seq_gap.hpp:77
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:802
bool IsSetLinkage_evidence(void) const
Check if a value has been assigned to Linkage_evidence data member.
Definition: Seq_gap_.hpp:347
Parse runs of Ns when splitting data.
Definition: fasta.hpp:102
CConstRef –.
Definition: ncbiobj.hpp:1192
Set coding to printable coding (Iupacna or Iupacaa)
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:811
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:641
When a hyphen is encountered in seq data, ignore it but warn.
Definition: fasta.hpp:109
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:62
const SBadResiduePositions & GetBadResiduePositions(void) const THROWS_NONE
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:970
Force specified type regardless of accession.
Definition: fasta.hpp:86
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:836
CNcbiOstream & operator<<(CNcbiOstream &out, const CEquivRange &range)
Definition: equiv_range.cpp:51
BOOST_AUTO_TEST_CASE(TestBadResidues)
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:740
ELinkEvid m_eLinkEvid
Indicates what linkage-evidences are compatible with this.
Definition: Seq_gap.hpp:88
int TFlags
binary OR of EFlags
Definition: fasta.hpp:112
ASN.1 text.
Definition: serialdef.hpp:73
virtual bool operator()(const CTempString &mod_name)=0
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
virtual EDiagSev Severity(void) const
Definition: line_error.hpp:347
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
Check (alphabetic) residue validity.
Definition: fasta.hpp:97
static const TGapTypeMap & GetNameToGapTypeInfoMap(void)
This is for if the user needs to get the gap-type string to SGapTypeInfo info directly (For example...
Definition: Seq_gap.cpp:176
string
Definition: cgiapp.hpp:498
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:823
USING_SCOPE(objects)
static CBioSource dummy
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
Warning message.
Definition: ncbidiag.hpp:646
static const string kLocalId
Parse defline mods and add to SeqEntry.
Definition: fasta.hpp:101
CSeq_gap::EType m_eType
The underlying type that the string corresponds to.
Definition: Seq_gap.hpp:86
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>...
Definition: Seq_gap.hpp:54
#define NULL
Definition: ncbistd.hpp:225
TLinkage GetLinkage(void) const
Get the Linkage member data.
Definition: Seq_gap_.hpp:319
Don't split out ambiguous sequence regions.
Definition: fasta.hpp:96
const_iterator end() const
Definition: set.hpp:136
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1343
#define kEmptyStr
Definition: ncbistr.hpp:120
bool NotNull(void) const THROWS_NONE
Check if pointer is not null – same effect as NotEmpty().
Definition: ncbiobj.hpp:1314
Make a delta sequence if gaps found.
Definition: fasta.hpp:88
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:776
Reader-writer based streams.
Note about the "buf_size" parameter for streams in this API.
Definition: rwstream.hpp:105
static CObjectIStream * CreateFromBuffer(ESerialDataFormat format, const char *buffer, size_t size)
Create serial object reader and attach it to a data source.
Definition: objistr.cpp:168
CRef< CSeq_entry > ReadSet(int max_seqs=kMax_Int, ILineErrorListener *pMessageListener=0)
Read multiple sequences (by default, as many as are available.)
Definition: fasta.cpp:466
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:314
vector< SLineTextAndLoc > TSeqTitles
Definition: fasta.hpp:198
#define FIELD_EQUALS(Var, Fld, Value)
FIELD_EQUALS base macro.
Parse the deflines but skip the data.
Definition: fasta.hpp:91
#define FOR_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQDESC_ON_BIOSEQ EDIT_EACH_SEQDESC_ON_BIOSEQ.
Definition: seq_macros.hpp:218
bool IsGap(void) const
Check if variant Gap is selected.
Definition: Seq_data_.hpp:704
If Prot, use iupacaa instead of the default ncbieaa.
Definition: fasta.hpp:108
virtual const std::string & FeatureName(void) const =0
bool IsSetFuzz(void) const
could be unsure Check if a value has been assigned to Fuzz data member.
const_iterator begin() const
Definition: set.hpp:135
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
Informational message.
Definition: ncbidiag.hpp:645
virtual std::string ProblemStr(void) const
Definition: line_error.hpp:174
static struct @828 kFlags[]
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
no linkage-evidence is allowed
Definition: Seq_gap.hpp:79
#define NCBITEST_CHECK(P)
Definition: test_boost.hpp:357
Utility stuff for more convenient using of Boost.Test library.
CSeqVector –.
Definition: seq_vector.hpp:64
Operators to edit gaps in sequences.
list< CRef< CLinkage_evidence > > TLinkage_evidence
Definition: Seq_gap_.hpp:117
#define NCBITEST_CHECK_EQUAL(L, R)
Definition: test_boost.hpp:368
const TGene & GetGene(void) const
Get the variant data.
any linkage-evidence is allowed, and at least one is required
Definition: Seq_gap.hpp:81
If no residues found do not raise an error.
Definition: fasta.hpp:110
Trace message.
Definition: ncbidiag.hpp:651
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: Seq_gap_.hpp:253
const string kData
static void ParseDefLine(const TStr &defLine, const SDefLineParseInfo &info, const TIgnoredProblems &ignoredErrors, list< CRef< CSeq_id >> &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, ILineErrorListener *pMessageListener)
Definition: fasta.cpp:512
const TGap & GetGap(void) const
Get the variant data.
Definition: Seq_data_.cpp:184
bool operator!=(const _Ht_iterator< _Val, _Nonconst_traits< _Val >, _Key, _HF, _ExK, _EqK, _All > &__x, const _Ht_iterator< _Val, _Const_traits< _Val >, _Key, _HF, _ExK, _EqK, _All > &__y)
Definition: _hashtable.h:173
bool operator<(const CEquivRange &A, const CEquivRange &B)
Definition: equiv_range.cpp:90
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
Miscellaneous common-use basic types and functionality.
CException –.
Definition: ncbiexpt.hpp:709
bool IsProt(void) const
Check if variant Prot is selected.
Base class for reading FASTA sequences.
Definition: fasta.hpp:77
Simple implementation of ILineReader for regions of memory (such as memory-mapped files)...
void StoreError(const ILineError &err)
CRef –.
Definition: ncbiobj.hpp:616
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:485
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
Assume nucs unless accns indicate otherwise.
Definition: fasta.hpp:84
CObjectFor –.
Definition: ncbiobj.hpp:2023
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
#define FIELD_IS_AND_IS_SET(Var, Chs, Fld)
FIELD_IS_AND_IS_SET base macro.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1360
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
TLength GetLength(void) const
Get the Length member data.
Read Seq-ids past the first ^A (see note)
Definition: fasta.hpp:90
string-based IReader
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
virtual unsigned int Line(void) const =0
bool IsSetLength(void) const
must give a length in residues Check if a value has been assigned to Length data member.
vector< ILineError::EProblem > TIgnoredProblems
Definition: fasta.hpp:197
CAutoInitRef<>::
const TLinkage_evidence & GetLinkage_evidence(void) const
Get the Linkage_evidence member data.
Definition: Seq_gap_.hpp:359
virtual EProblem Problem(void) const =0
IO_PREFIX::ostrstream CNcbiOstrstream
Portable alias for ostrstream.
Definition: ncbistre.hpp:155
TType GetType(void) const
Get the Type member data.
Definition: Seq_gap_.hpp:272
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:326
bool operator==(const CEquivRange &A, const CEquivRange &B)
Definition: equiv_range.cpp:86
Holds information about a given gap-type string.
Definition: Seq_gap.hpp:84
const_iterator begin() const
Definition: map.hpp:151
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:165
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:360
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:353
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
Definition: set.hpp:44
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:634
#define ITERATE_BOTH_BOOL_VALUES(BoolVar)
The body of the loop will be run with Var equal to false and then true.
Definition: ncbimisc.hpp:822
#define kMax_Int
Definition: ncbi_limits.h:184
static string NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:4277
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
#define ITERATE_0_IDX(idx, up_to)
idx loops from 0 (inclusive) to up_to (exclusive)
Definition: ncbimisc.hpp:826
Assume prots unless accns indicate otherwise.
Definition: fasta.hpp:85
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:426
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Modified on Wed May 24 16:20:05 2017 by modify_doxy.py rev. 533848