NCBI C++ ToolKit
unit_test_fasta_ostream.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_fasta_ostream.cpp 77569 2017-04-25 15:53:49Z drozdov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Pavel Ivanov, NCBI
27 *
28 * File Description:
29 * Sample unit tests file for main stream test developing.
30 *
31 * This file represents basic most common usage of Ncbi.Test framework based
32 * on Boost.Test framework. For more advanced techniques look into another
33 * sample - unit_test_alt_sample.cpp.
34 *
35 * ===========================================================================
36 */
37 
38 #include <ncbi_pch.hpp>
39 
40 #include <corelib/ncbi_system.hpp>
41 
42 // This macro should be defined before inclusion of test_boost.hpp in all
43 // "*.cpp" files inside executable except one. It is like function main() for
44 // non-Boost.Test executables is defined only in one *.cpp file - other files
45 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
46 // then test_boost.hpp will define such "main()" function for tests.
47 //
48 // Usually if your unit tests contain only one *.cpp file you should not
49 // care about this macro at all.
50 //
51 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
52 
53 
54 // This header must be included before all Boost.Test headers if there are any
55 #include <corelib/test_boost.hpp>
56 
58 
61 #include <objmgr/scope.hpp>
62 #include <objmgr/bioseq_ci.hpp>
63 #include <objmgr/feat_ci.hpp>
64 #include <objmgr/seq_vector.hpp>
65 #include <objmgr/util/sequence.hpp>
67 #include <objects/seq/Seq_inst.hpp>
68 #include <objects/seq/Seq_ext.hpp>
69 #include <objects/seq/Seq_gap.hpp>
73 #include <objects/seq/IUPACna.hpp>
75 
78 
79 extern const char* sc_TestEntry;
80 
82 {
83  static CRef<CSeq_entry> s_entry;
84  if ( ! s_entry ) {
85  s_entry.Reset(new CSeq_entry);
86 
87  CNcbiIstrstream istr(sc_TestEntry);
88  istr >> MSerial_AsnText >> *s_entry;
89  }
90 
91  return s_entry;
92 }
93 
94 BOOST_AUTO_TEST_CASE(Test_FastaRaw)
95 {
96  CRef<CSeq_entry> entry = s_ReadData();
97 
98  ///
99  /// we have one bioseq
100  /// add this to a scope and get it back so we can format
101  ///
103  CRef<CScope> scope(new CScope(*om));
104 
105  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
106 
107  ///
108  /// first: raw formatting
109  ///
110  {{
111  CNcbiOstrstream os;
112  {{
113  CFastaOstream fasta_os(os);
115  /// FIXME: this should be the default!!
116  //fasta_os.SetFlag(CFastaOstream::fInstantiateGaps);
117  //fasta_os.SetFlag(CFastaOstream::fAssembleParts);
118  fasta_os.Write(seh);
119  }}
120  os.flush();
121  string s = string(CNcbiOstrstreamToString(os));
122  static const char* sc_Expected =
123 ">lcl|test-seq test sequence\n"
124 "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTGCCCTGTTCAACAATCGATA\n"
125 "GCTGCCTTTGGCCACCAAAATCCCAAACT--------------------AATTAAAGAATTAAATAATTC\n"
126 "GAATAATAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGATATCTAGTATACATTTCGATAC\n"
127 "ATCGAAAT\n";
128  BOOST_CHECK_EQUAL(s, string(sc_Expected));
129  }}
130 
131 }
132 
133 BOOST_AUTO_TEST_CASE(Test_FastaGap)
134 {
135  CRef<CSeq_entry> entry = s_ReadData();
136 
137  ///
138  /// we have one bioseq
139  /// add this to a scope and get it back so we can format
140  ///
142  CRef<CScope> scope(new CScope(*om));
143 
144  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
145 
146 
147  ///
148  /// next: alternate gap processing
149  ///
150  {{
151  CNcbiOstrstream os;
152  {{
153  CFastaOstream fasta_os(os);
154  /// FIXME: this should be the default!!
157  fasta_os.Write(seh);
158  }}
159  os.flush();
160  string s = string(CNcbiOstrstreamToString(os));
161  static const char* sc_Expected =
162 ">lcl|test-seq test sequence\n"
163 "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTGCCCTGTTCAACAATCGATA\n"
164 "GCTGCCTTTGGCCACCAAAATCCCAAACT-\n"
165 "AATTAAAGAATTAAATAATTCGAATAATAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGAT\n"
166 "ATCTAGTATACATTTCGATACATCGAAAT\n";
167  BOOST_CHECK_EQUAL(s, string(sc_Expected));
168  }}
169 }
170 
171 BOOST_AUTO_TEST_CASE(Test_FastaMask_SimpleSoft)
172 {
173  CRef<CSeq_entry> entry = s_ReadData();
174 
175  ///
176  /// we have one bioseq
177  /// add this to a scope and get it back so we can format
178  ///
180  CRef<CScope> scope(new CScope(*om));
181 
182  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
183 
184 
185  ///
186  /// next: simple lower-case masking
187  ///
188  {{
189  CRef<CSeq_id> id(new CSeq_id("lcl|test-seq"));
190  CRef<CSeq_loc> loc(new CSeq_loc);
191  for (TSeqPos pos = 10; pos < 200; pos += 27) {
193  ival->SetFrom(pos);
194  ival->SetTo(pos + 9);
195  loc->SetPacked_int().Set().push_back(ival);
196  }
197  loc->SetId(*id);
198 
199  CNcbiOstrstream os;
200  {{
201  CFastaOstream fasta_os(os);
202  // fasta_os.SetGapMode(CFastaOstream::eGM_letters);
203  fasta_os.SetMask(CFastaOstream::eSoftMask, loc);
204  fasta_os.Write(seh);
205  }}
206  os.flush();
207  string s = string(CNcbiOstrstreamToString(os));
208  static const char* sc_Expected =
209 ">lcl|test-seq test sequence\n"
210 "CGGTTGCTTGggttttataaCATCAGTCAGTGACAGGcatttccagaGTTGCCCTGTTCAACAAtcgata\n"
211 "gctgCCTTTGGCCACCAAAATcccaaactnnNNNNNNNNNNNNNNNNNnaattaaagaATTAAATAATTC\n"
212 "GAATAataattaagcCCAGTAACCTACGCAGCttgagtgcgtAACCGATATCTAGTATAcatttcgataC\n"
213 "ATCGAAAT\n";
214  BOOST_CHECK_EQUAL(s, string(sc_Expected));
215  }}
216 }
217 
218 
219 BOOST_AUTO_TEST_CASE(Test_FastaMask_SimpleHard)
220 {
221  CRef<CSeq_entry> entry = s_ReadData();
222 
223  ///
224  /// we have one bioseq
225  /// add this to a scope and get it back so we can format
226  ///
228  CRef<CScope> scope(new CScope(*om));
229 
230  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
231 
232 
233  ///
234  /// next: simple hard ('N') masking
235  ///
236  {{
237  CRef<CSeq_id> id(new CSeq_id("lcl|test-seq"));
238  CRef<CSeq_loc> loc(new CSeq_loc);
239  for (TSeqPos pos = 10; pos < 200; pos += 27) {
241  ival->SetFrom(pos);
242  ival->SetTo(pos + 9);
243  loc->SetPacked_int().Set().push_back(ival);
244  }
245  loc->SetId(*id);
246 
247  CNcbiOstrstream os;
248  {{
249  CFastaOstream fasta_os(os);
251  fasta_os.SetMask(CFastaOstream::eHardMask, loc);
252  fasta_os.Write(seh);
253  }}
254  os.flush();
255  string s = string(CNcbiOstrstreamToString(os));
256  static const char* sc_Expected =
257 ">lcl|test-seq test sequence\n"
258 "CGGTTGCTTGNNNNNNNNNNCATCAGTCAGTGACAGGNNNNNNNNNNGTTGCCCTGTTCAACAANNNNNN\n"
259 "NNNNCCTTTGGCCACCAAAATNNNNNNNN--------------------NNNNNNNNNATTAAATAATTC\n"
260 "GAATANNNNNNNNNNCCAGTAACCTACGCAGCNNNNNNNNNNAACCGATATCTAGTATANNNNNNNNNNC\n"
261 "ATCGAAAT\n";
262  BOOST_CHECK_EQUAL(s, string(sc_Expected));
263  }}
264 }
265 
266 
267 BOOST_AUTO_TEST_CASE(Test_FastaMask_ComplexSoft)
268 {
269  CRef<CSeq_entry> entry = s_ReadData();
270 
271  ///
272  /// we have one bioseq
273  /// add this to a scope and get it back so we can format
274  ///
276  CRef<CScope> scope(new CScope(*om));
277 
278  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
279 
280  ///
281  /// next: complex lower-case masking
282  /// we do two things - first, provide a set of overlapping locations and
283  /// second, clear the masks
284  ///
285  {{
286  CRef<CSeq_id> id(new CSeq_id("lcl|test-seq"));
287  CRef<CSeq_loc> loc(new CSeq_loc);
288  for (TSeqPos pos = 10; pos < 200; pos += 27) {
290  ival->SetFrom(pos);
291  ival->SetTo(pos + 9);
292  loc->SetPacked_int().Set().push_back(ival);
293  }
294 
296  ival->SetFrom(100);
297  ival->SetTo(150);
298  loc->SetPacked_int().Set().push_back(ival);
299  loc->SetId(*id);
300 
301  CNcbiOstrstream os;
302  {{
303  CFastaOstream fasta_os(os);
304  // fasta_os.SetGapMode(CFastaOstream::eGM_letters);
305  fasta_os.SetMask(CFastaOstream::eSoftMask, loc);
306  fasta_os.Write(seh);
308  CSeq_loc loc2(*id, (TSeqPos)0, 24);
309  fasta_os.Write(seh, &loc2);
310  }}
311  os.flush();
312  string s = string(CNcbiOstrstreamToString(os));
313  static const char* sc_Expected =
314 ">lcl|test-seq test sequence\n"
315 "CGGTTGCTTGggttttataaCATCAGTCAGTGACAGGcatttccagaGTTGCCCTGTTCAACAAtcgata\n"
316 "gctgCCTTTGGCCACCAAAATcccaaactnnnnnnnnnnnnnnnnnnnnaattaaagaattaaataattc\n"
317 "gaataataattaagcCCAGTAACCTACGCAGCttgagtgcgtAACCGATATCTAGTATAcatttcgataC\n"
318 "ATCGAAAT\n"
319 ">lcl|test-seq:1-25 test sequence\n"
320 "CGGTTGCTTGGGTTTTATAACATCA\n";
321  BOOST_CHECK_EQUAL(s, string(sc_Expected));
322  }}
323 }
324 
325 
326 
327 BOOST_AUTO_TEST_CASE(Test_FastaMask_ComplexSoftHard)
328 {
329  CRef<CSeq_entry> entry = s_ReadData();
330 
331  ///
332  /// we have one bioseq
333  /// add this to a scope and get it back so we can format
334  ///
336  CRef<CScope> scope(new CScope(*om));
337 
338  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
339 
340  ///
341  /// next: complex lower-case masking
342  /// first, test just supplying hard and soft masks
343  ///
344  {{
345  CRef<CSeq_id> id(new CSeq_id("lcl|test-seq"));
346  CRef<CSeq_loc> soft_loc(new CSeq_loc);
347  for (TSeqPos pos = 10; pos < 200; pos += 27) {
349  ival->SetFrom(pos);
350  ival->SetTo(pos + 9);
351  soft_loc->SetPacked_int().Set().push_back(ival);
352  }
353 
355  ival->SetFrom(100);
356  ival->SetTo(150);
357  soft_loc->SetPacked_int().Set().push_back(ival);
358  soft_loc->SetId(*id);
359 
360  CRef<CSeq_loc> hard_loc(new CSeq_loc);
361  hard_loc->SetInt().SetFrom(105);
362  hard_loc->SetInt().SetTo(145);
363  hard_loc->SetId(*id);
364 
365  CNcbiOstrstream os;
366  {{
367  CFastaOstream fasta_os(os);
368  // fasta_os.SetGapMode(CFastaOstream::eGM_letters);
369  fasta_os.SetMask(CFastaOstream::eSoftMask, soft_loc);
370  fasta_os.SetMask(CFastaOstream::eHardMask, hard_loc);
371  fasta_os.Write(seh);
372  }}
373  os.flush();
374  string s = string(CNcbiOstrstreamToString(os));
375  static const char* sc_Expected =
376 ">lcl|test-seq test sequence\n"
377 "CGGTTGCTTGggttttataaCATCAGTCAGTGACAGGcatttccagaGTTGCCCTGTTCAACAAtcgata\n"
378 "gctgCCTTTGGCCACCAAAATcccaaactnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn\n"
379 "nnnnnntaattaagcCCAGTAACCTACGCAGCttgagtgcgtAACCGATATCTAGTATAcatttcgataC\n"
380 "ATCGAAAT\n";
381  BOOST_CHECK_EQUAL(s, string(sc_Expected));
382  }}
383 
384  ///
385  /// complex, part deux:
386  /// test setting and clearing masks
387  ///
388  {{
389  CRef<CSeq_id> id(new CSeq_id("lcl|test-seq"));
390  CRef<CSeq_loc> soft_loc(new CSeq_loc);
391  for (TSeqPos pos = 10; pos < 200; pos += 27) {
393  ival->SetFrom(pos);
394  ival->SetTo(pos + 9);
395  soft_loc->SetPacked_int().Set().push_back(ival);
396  }
397 
399  ival->SetFrom(100);
400  ival->SetTo(150);
401  soft_loc->SetPacked_int().Set().push_back(ival);
402  soft_loc->SetId(*id);
403 
404  CRef<CSeq_loc> hard_loc(new CSeq_loc);
405  hard_loc->SetInt().SetFrom(105);
406  hard_loc->SetInt().SetTo(145);
407  hard_loc->SetId(*id);
408 
409  CNcbiOstrstream os;
410  {{
411  CFastaOstream fasta_os(os);
412  // fasta_os.SetGapMode(CFastaOstream::eGM_letters);
413  fasta_os.SetMask(CFastaOstream::eSoftMask, soft_loc);
414  fasta_os.SetMask(CFastaOstream::eHardMask, hard_loc);
415  fasta_os.Write(seh);
417  CSeq_loc loc2(*id, (TSeqPos)0, 217);
418  fasta_os.Write(seh, &loc2);
420  fasta_os.Write(seh, &loc2);
421  fasta_os.SetMask(CFastaOstream::eSoftMask, soft_loc);
422  fasta_os.Write(seh, &loc2);
423  }}
424  os.flush();
425  string s = string(CNcbiOstrstreamToString(os));
426  static const char* sc_Expected =
427 ">lcl|test-seq test sequence\n"
428 "CGGTTGCTTGggttttataaCATCAGTCAGTGACAGGcatttccagaGTTGCCCTGTTCAACAAtcgata\n"
429 "gctgCCTTTGGCCACCAAAATcccaaactnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn\n"
430 "nnnnnntaattaagcCCAGTAACCTACGCAGCttgagtgcgtAACCGATATCTAGTATAcatttcgataC\n"
431 "ATCGAAAT\n"
432 ">lcl|test-seq:1-218 test sequence\n"
433 "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTGCCCTGTTCAACAATCGATA\n"
434 "GCTGCCTTTGGCCACCAAAATCCCAAACTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n"
435 "NNNNNNTAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGATATCTAGTATACATTTCGATAC\n"
436 "ATCGAAAT\n"
437 ">lcl|test-seq:1-218 test sequence\n"
438 "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTGCCCTGTTCAACAATCGATA\n"
439 "GCTGCCTTTGGCCACCAAAATCCCAAACTNNNNNNNNNNNNNNNNNNNNAATTAAAGAATTAAATAATTC\n"
440 "GAATAATAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGATATCTAGTATACATTTCGATAC\n"
441 "ATCGAAAT\n"
442 ">lcl|test-seq:1-218 test sequence\n"
443 "CGGTTGCTTGggttttataaCATCAGTCAGTGACAGGcatttccagaGTTGCCCTGTTCAACAAtcgata\n"
444 "gctgCCTTTGGCCACCAAAATcccaaactnnnnnnnnnnnnnnnnnnnnaattaaagaattaaataattc\n"
445 "gaataataattaagcCCAGTAACCTACGCAGCttgagtgcgtAACCGATATCTAGTATAcatttcgataC\n"
446 "ATCGAAAT\n";
447  BOOST_CHECK_EQUAL(s, string(sc_Expected));
448  }}
449 }
450 
451 
452 
453 BOOST_AUTO_TEST_CASE(Test_FastaMask_SoftHardSimpleOverlap)
454 {
455  CRef<CSeq_entry> entry = s_ReadData();
456 
457  ///
458  /// we have one bioseq
459  /// add this to a scope and get it back so we can format
460  ///
462  CRef<CScope> scope(new CScope(*om));
463 
464  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
465 
466  ///
467  /// next: complex lower-case masking
468  /// we do two things - first, provide a set of overlapping locations and
469  /// second, clear the masks
470  ///
471  {{
472  CRef<CSeq_id> id(new CSeq_id("lcl|test-seq"));
473  CRef<CSeq_loc> soft_loc(new CSeq_loc);
474  soft_loc->SetInt().SetFrom(50);
475  soft_loc->SetInt().SetTo(75);
476  soft_loc->SetId(*id);
477 
478  CRef<CSeq_loc> hard_loc(new CSeq_loc);
479  hard_loc->SetInt().SetFrom(60);
480  hard_loc->SetInt().SetTo(80);
481  hard_loc->SetId(*id);
482 
483  CNcbiOstrstream os;
484  {{
485  CFastaOstream fasta_os(os);
486  // fasta_os.SetGapMode(CFastaOstream::eGM_letters);
487  fasta_os.SetMask(CFastaOstream::eSoftMask, soft_loc);
488  fasta_os.SetMask(CFastaOstream::eHardMask, hard_loc);
489  fasta_os.Write(seh);
490  }}
491  os.flush();
492  string s = string(CNcbiOstrstreamToString(os));
493  static const char* sc_Expected =
494 ">lcl|test-seq test sequence\n"
495 "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTgccctgttcannnnnnnnnn\n"
496 "nnnnnnNNNNNCCACCAAAATCCCAAACTNNNNNNNNNNNNNNNNNNNNAATTAAAGAATTAAATAATTC\n"
497 "GAATAATAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGATATCTAGTATACATTTCGATAC\n"
498 "ATCGAAAT\n";
499  BOOST_CHECK_EQUAL(s, string(sc_Expected));
500  }}
501 }
502 
503 BOOST_AUTO_TEST_CASE(Test_FastaMods)
504 {
505  CRef<CSeq_entry> entry = s_ReadData();
506 
507  ///
508  /// we have one bioseq
509  /// add this to a scope and get it back so we can format
510  ///
512  CRef<CScope> scope(new CScope(*om));
513 
514  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
515 
516  ///
517  /// formatting with modifiers
518  ///
519  {{
520  CNcbiOstrstream os;
521  {{
522  CFastaOstream fasta_os(os);
524  /// FIXME: this should be the default!!
525  //fasta_os.SetFlag(CFastaOstream::fInstantiateGaps);
527  fasta_os.Write(seh);
528  }}
529  os.flush();
530  string s = string(CNcbiOstrstreamToString(os));
531  static const char* sc_Expected =
532 ">lcl|test-seq [organism=\"Sarcophilus='harrisii\"] [strain=some strain] [gcode=1] [tech=physical map] test sequence\n"
533 "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTGCCCTGTTCAACAATCGATA\n"
534 "GCTGCCTTTGGCCACCAAAATCCCAAACT--------------------AATTAAAGAATTAAATAATTC\n"
535 "GAATAATAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGATATCTAGTATACATTTCGATAC\n"
536 "ATCGAAAT\n";
537  BOOST_CHECK_EQUAL(s, string(sc_Expected));
538  }}
539 
540  // check with topology circular
541  {{
542  seh.GetEditHandle().SetSeq().SetInst_Topology( CSeq_inst::eTopology_circular );
543 
544  CNcbiOstrstream os;
545  {{
546  CFastaOstream fasta_os(os);
548  /// FIXME: this should be the default!!
549  //fasta_os.SetFlag(CFastaOstream::fInstantiateGaps);
551  fasta_os.Write(seh);
552  }}
553  os.flush();
554  string s = string(CNcbiOstrstreamToString(os));
555  static const char* sc_Expected =
556  ">lcl|test-seq [topology=circular] [organism=\"Sarcophilus='harrisii\"] [strain=some strain] [gcode=1] [tech=physical map] test sequence\n"
557  "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTGCCCTGTTCAACAATCGATA\n"
558  "GCTGCCTTTGGCCACCAAAATCCCAAACT--------------------AATTAAAGAATTAAATAATTC\n"
559  "GAATAATAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGATATCTAGTATACATTTCGATAC\n"
560  "ATCGAAAT\n";
561  BOOST_CHECK_EQUAL(s, string(sc_Expected));
562  }}
563 }
564 
565 BOOST_AUTO_TEST_CASE(Test_GapMods)
566 {
567  CRef<CSeq_entry> entry = s_ReadData();
568 
569  /// Give the seq entry's gaps gap-type and linkage-evidence
570  CTypeIterator<CSeq_literal> literal_iter(Begin(*entry));
571  for( ; literal_iter; ++literal_iter) {
572  if( ! literal_iter->IsSetSeq_data() ) {
573  CSeq_gap & seq_gap = literal_iter->SetSeq_data().SetGap();
574 
576 
578 
579  CSeq_gap::TLinkage_evidence & linkage_evidence_vec =
580  seq_gap.SetLinkage_evidence();
583  linkage_evidence_vec.push_back( Ref(&*mapEvid) );
586  linkage_evidence_vec.push_back( Ref(&*strobeEvid) );
587  }
588  }
589 
590  ///
591  /// we have one bioseq
592  /// add this to a scope and get it back so we can format
593  ///
595  CRef<CScope> scope(new CScope(*om));
596 
597  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
598 
599  ///
600  /// formatting with and without gap modifiers
601  ///
602  ITERATE_BOTH_BOOL_VALUES(bShowGapModifiers)
603  {
604  CNcbiOstrstream os;
605  {{
606  CFastaOstream fasta_os(os);
608  /// FIXME: this should be the default!!
609  //fasta_os.SetFlag(CFastaOstream::fInstantiateGaps);
610  if( bShowGapModifiers ) {
612  }
613  fasta_os.Write(seh);
614  }}
615  os.flush();
616  string s = string(CNcbiOstrstreamToString(os));
617 
618  CNcbiOstrstream expected_os;
619  expected_os << ">lcl|test-seq test sequence\n"
620  "CGGTTGCTTGGGTTTTATAACATCAGTCAGTGACAGGCATTTCCAGAGTTGCCCTGTTCAACAATCGATA\n"
621  "GCTGCCTTTGGCCACCAAAATCCCAAACT\n"
622  ">?20";
623  if( bShowGapModifiers ) {
624  expected_os << " [gap-type=within scaffold] [linkage-evidence=map;strobe]";
625  }
626  expected_os << '\n';
627  expected_os << "AATTAAAGAATTAAATAATTCGAATAATAATTAAGCCCAGTAACCTACGCAGCTTGAGTGCGTAACCGAT\n"
628  "ATCTAGTATACATTTCGATACATCGAAAT\n";
629 
630  BOOST_CHECK_EQUAL(s, string(CNcbiOstrstreamToString(expected_os)));
631  }
632 }
633 
634 #if 0
635 BOOST_AUTO_TEST_CASE(Test_AutoGenerateData)
636 {
638 
639  CNcbiIstream& istr = args["data-in"].AsInputFile();
640  CSeq_entry entry;
641  istr >> MSerial_AsnText >> entry;
642 
643  /// modify the seq-entry - truncate the sequence, add a delta
644  string fasta1;
645  string fasta2;
646 
647  {{
648  CSeqVector vec(entry.GetSeq(), NULL, CBioseq_Handle::eCoding_Iupac);
649  vec.GetSeqData(0, 99, fasta1);
650  vec.GetSeqData(100, 199, fasta2);
651  }}
652 
653  CSeq_inst& inst = entry.SetSeq().SetInst();
654  inst.Reset();
657  inst.SetLength(fasta1.size() + fasta2.size() + 20);
658 
659  CRef<CDelta_seq> del1(new CDelta_seq);
660  del1->SetLiteral().SetLength(fasta1.size());
661  del1->SetLiteral().SetSeq_data().SetIupacna(*new CIUPACna(fasta1));
662  CSeqportUtil::Pack(&del1->SetLiteral().SetSeq_data());
663  inst.SetExt().SetDelta().Set().push_back(del1);
664 
665  CRef<CDelta_seq> del_gap(new CDelta_seq);
666  del_gap->SetLiteral().SetLength(20);
667  inst.SetExt().SetDelta().Set().push_back(del_gap);
668 
669  CRef<CDelta_seq> del2(new CDelta_seq);
670  del2->SetLiteral().SetLength(fasta2.size());
671  del2->SetLiteral().SetSeq_data().SetIupacna(*new CIUPACna(fasta2));
672  CSeqportUtil::Pack(&del2->SetLiteral().SetSeq_data());
673  inst.SetExt().SetDelta().Set().push_back(del2);
674 
675  cerr << MSerial_AsnText << entry;
676 }
677 #endif
678 
679 
680 const char* sc_TestEntry = "\
681 Seq-entry ::= seq {\
682  id {\
683  local str \"test-seq\"\
684  },\
685  descr {\
686  title \"test sequence\" ,\
687  source {\
688  org {\
689  taxname \"Sarcophilus=\"\"harrisii\" ,\
690  orgname {\
691  mod {\
692  { subtype pathovar, subname \"fake data\" } ,\
693  { subtype strain, subname \"some strain\" }\
694  } ,\
695  gcode 1\
696  }\
697  }\
698  } ,\
699  molinfo {\
700  tech physmap\
701  }\
702  },\
703  inst {\
704  repr delta,\
705  mol dna,\
706  length 218,\
707  ext delta {\
708  literal {\
709  length 99,\
710  seq-data ncbi2na '6BE7EAFF304D2D2E1293F522F95EF410D8C9E5FE94500D501C'H\
711  },\
712  literal {\
713  length 20\
714  },\
715  literal {\
716  length 99,\
717  seq-data ncbi2na '0F020F030F60C30F0952C171927E2E6C163372CC4FD8C4D80C'H\
718  }\
719  }\
720  }\
721 }";
722 
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:958
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:968
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:559
void SetMask(EMaskType type, CConstRef< CSeq_loc > location)
Definition: sequence.cpp:3280
Set coding to printable coding (Iupacna or Iupacaa)
void SetTo(TTo value)
Assign a value to To data member.
const char * sc_TestEntry
void SetFrom(TFrom value)
Assign a value to From data member.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:1849
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:653
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:681
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:836
Template class for iteration on objects of class C.
Definition: iterator.hpp:691
FASTA-format output; see also ReadFasta in
Definition: sequence.hpp:743
TSeq SetSeq(void) const
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_gap_.hpp:281
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CDelta_seq –.
Definition: Delta_seq.hpp:65
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
honor specifed gap mode; on by default
Definition: sequence.hpp:747
string
Definition: cgiapp.hpp:498
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:606
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:99
Deprecated. Used only for AGP 1.1.
Definition: Seq_gap_.hpp:91
#define NULL
Definition: ncbistd.hpp:225
>?N or >?unk100, as appropriate.
Definition: sequence.hpp:774
show key-value pair modifiers (e.g. "[organism=Homo sapiens]")
Definition: sequence.hpp:754
User-defined methods of the data storage class.
virtual void Reset(void)
Reset the whole object.
Definition: Seq_inst_.cpp:176
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
User-defined methods of the data storage class.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:479
User-defined methods of the data storage class.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
void SetExt(TExt &value)
Assign a value to Ext data member.
Definition: Seq_inst_.cpp:147
void SetLinkage(TLinkage value)
Assign a value to Linkage data member.
Definition: Seq_gap_.hpp:328
IO_PREFIX::istrstream CNcbiIstrstream
Portable alias for istrstream.
Definition: ncbistre.hpp:151
User-defined methods of the data storage class.
Utility stuff for more convenient using of Boost.Test library.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2607
CSeqVector –.
Definition: seq_vector.hpp:64
show gap key-value pair modifiers (e.g. "[linkage-evidence=map;strobe]"). Only works if gap mode is e...
Definition: sequence.hpp:756
list< CRef< CLinkage_evidence > > TLinkage_evidence
Definition: Seq_gap_.hpp:117
User-defined methods of the data storage class.
CRef< objects::CObjectManager > om
CSeq_entry_Handle –.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:196
void ResetFlag(EFlags flag)
Definition: sequence.hpp:827
The Object manager core.
assemble FAR delta sequences; on by dflt
Definition: sequence.hpp:746
User-defined methods of the data storage class.
CIUPACna –.
Definition: IUPACna.hpp:65
void SetType(TType value)
Assign a value to Type data member.
TLinkage_evidence & SetLinkage_evidence(void)
Assign a value to Linkage_evidence data member.
Definition: Seq_gap_.hpp:365
BOOST_AUTO_TEST_CASE(Test_FastaRaw)
CScope –.
Definition: scope.hpp:90
CArgs –.
Definition: ncbiargs.hpp:356
User-defined methods of the data storage class.
void SetInt(TInt &v)
Definition: Seq_loc.hpp:967
Definition: Seq_entry.hpp:55
Multiple inline dashes.
Definition: sequence.hpp:772
CAutoInitRef<>::
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
IO_PREFIX::ostrstream CNcbiOstrstream
Portable alias for ostrstream.
Definition: ncbistre.hpp:155
USING_SCOPE(objects)
void SetGapMode(EGapMode mode)
Definition: sequence.hpp:828
CRef< CSeq_entry > s_ReadData()
write as N for nucleotides, X for peptides
Definition: sequence.hpp:816
write as lowercase rather than uppercase
Definition: sequence.hpp:815
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:139
#define ITERATE_BOTH_BOOL_VALUES(BoolVar)
The body of the loop will be run with Var equal to false and then true.
Definition: ncbimisc.hpp:822
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3457
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
void SetFlag(EFlags flag)
Definition: sequence.hpp:826
Modified on Wed Aug 16 06:03:02 2017 by modify_doxy.py rev. 533848