NCBI C++ ToolKit
unit_test_seq_translator.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_seq_translator.cpp 75991 2017-01-05 12:28:33Z bollin $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Pavel Ivanov, NCBI
27 *
28 * File Description:
29 * Sample unit tests file for main stream test developing.
30 *
31 * This file represents basic most common usage of Ncbi.Test framework based
32 * on Boost.Test framework. For more advanced techniques look into another
33 * sample - unit_test_alt_sample.cpp.
34 *
35 * ===========================================================================
36 */
37 
38 #include <ncbi_pch.hpp>
39 
40 #include <corelib/ncbi_system.hpp>
41 
42 // This macro should be defined before inclusion of test_boost.hpp in all
43 // "*.cpp" files inside executable except one. It is like function main() for
44 // non-Boost.Test executables is defined only in one *.cpp file - other files
45 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
46 // then test_boost.hpp will define such "main()" function for tests.
47 //
48 // Usually if your unit tests contain only one *.cpp file you should not
49 // care about this macro at all.
50 //
51 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
52 
53 
54 // This header must be included before all Boost.Test headers if there are any
55 #include <corelib/test_boost.hpp>
56 
59 #include <objmgr/scope.hpp>
60 #include <objmgr/bioseq_ci.hpp>
61 #include <objmgr/feat_ci.hpp>
62 #include <objmgr/seq_vector.hpp>
63 #include <objmgr/util/sequence.hpp>
65 #include <objects/seq/Seq_inst.hpp>
66 #include <objects/seq/Seq_ext.hpp>
73 
76 
77 extern const char* sc_TestEntry; //
78 extern const char* sc_TestEntry_code_break; //
79 extern const char* sc_TestEntry_alt_frame; //
80 extern const char* sc_TestEntry_internal_stop; //
81 extern const char* sc_TestEntry_5prime_partial;
82 extern const char* sc_TestEntry_3prime_partial;
83 extern const char* sc_TestEntry_5prime_partial_minus;
84 extern const char* sc_TestEntry_TerminalTranslExcept;
85 extern const char* sc_TestEntry_ShortCDS;
86 extern const char* sc_TestEntry_FirstCodon;
87 extern const char* sc_TestEntry_FirstCodon2;
88 extern const char* sc_TestEntry_GapInSeq1;
89 extern const char* sc_TestEntry_GapInSeq2;
90 extern const char* sc_TestEntry_GapInSeq3;
91 extern const char* sc_TestEntry_GapInSeq4;
92 extern const char* sc_TestEntry_GapInSeq5;
93 extern const char* sc_TestEntry_CodeBreakForStopCodon;
94 extern const char* sc_TestEntry_GB_2236;
95 
96 static string GetProteinString (CFeat_CI fi, CScope& scope)
97 {
98  string real_prot_seq;
99  CBioseq_Handle bsh =
100  scope.GetBioseqHandle(*(fi->GetProduct().GetId()));
102  vec.SetCoding(CSeq_data::e_Ncbieaa); // allow extensions
103  vec.GetSeqData(0, bsh.GetBioseqLength(), real_prot_seq);
104  return real_prot_seq;
105 }
106 
107 
108 #ifdef TEST_DEPRECATED
109 // removed, CCdregion_translate::TranslateCdregion is deprecated, so discontinue unit test
110 BOOST_AUTO_TEST_CASE(Test_TranslateCdregion)
111 {
112  CSeq_entry entry;
113  {{
114  CNcbiIstrstream istr(sc_TestEntry);
115  istr >> MSerial_AsnText >> entry;
116  }}
117 
119  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
120  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
121  CFeat_CI feat_iter(*bs_iter,
122  SAnnotSelector().IncludeFeatSubtype
124  for ( ; feat_iter; ++feat_iter) {
125  ///
126  /// retrieve the actual protein sequence
127  ///
128  string real_prot_seq = GetProteinString(feat_iter, scope);
129 
130  ///
131  /// translate the CDRegion directly
132  ///
133  string tmp;
134 
135  /// use CCdregion_translate
136  tmp.clear();
138  (tmp, feat_iter->GetOriginalFeature(), scope, false);
139  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
140 
141  /// use CCdregion_translate, include the stop codon
142  real_prot_seq += '*';
143  tmp.clear();
145  (tmp, feat_iter->GetOriginalFeature(), scope, true);
146  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
147  }
148  }
149 }
150 #endif
151 
152 
153 BOOST_AUTO_TEST_CASE(Test_Translator_Raw)
154 {
155  CSeq_entry entry;
156  {{
157  CNcbiIstrstream istr(sc_TestEntry);
158  istr >> MSerial_AsnText >> entry;
159  }}
160 
162  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
163  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
164  CBioseq_Handle bsh = *bs_iter;
166 
167  CFeat_CI feat_iter(*bs_iter,
168  SAnnotSelector().IncludeFeatSubtype
170  for ( ; feat_iter; ++feat_iter) {
171  ///
172  /// retrieve the actual protein sequence
173  ///
174  string real_prot_seq = GetProteinString (feat_iter, scope);
175 
176  string nucleotide_sequence;
177  vec.GetSeqData(feat_iter->GetTotalRange().GetFrom(),
178  feat_iter->GetTotalRange().GetTo() + 1,
179  nucleotide_sequence);
180 
181  ///
182  /// translate the CDRegion directly
183  ///
184  string tmp;
185 
186  /// use CSeqTranslator::Translate()
187  tmp.clear();
188  CSeqTranslator::Translate(nucleotide_sequence, tmp,
191 
192  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
193 
194  /// use CSeqTranslator::Translate(), include the stop codon
195  real_prot_seq += '*';
196  tmp.clear();
198 
199  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
200  }
201  }
202 }
203 
204 
205 BOOST_AUTO_TEST_CASE(Test_Translator_CSeqVector)
206 {
207  CSeq_entry entry;
208  {{
209  CNcbiIstrstream istr(sc_TestEntry);
210  istr >> MSerial_AsnText >> entry;
211  }}
212 
214  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
215  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
216  CFeat_CI feat_iter(*bs_iter,
217  SAnnotSelector().IncludeFeatSubtype
219  for ( ; feat_iter; ++feat_iter) {
220  ///
221  /// retrieve the actual protein sequence
222  ///
223  string real_prot_seq = GetProteinString (feat_iter, scope);
224 
225 
226  CSeqVector vec(feat_iter->GetLocation(), scope);
227 
228  ///
229  /// translate the CDRegion directly
230  ///
231  string tmp;
232 
233  /// use CSeqTranslator::Translate()
234  tmp.clear();
235  CSeqTranslator::Translate(vec, tmp,
238  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
239 
240  /// use CSeqTranslator::Translate()
241  real_prot_seq += '*';
242  tmp.clear();
244  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
245  }
246  }
247 }
248 
249 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_loc_1)
250 {
251  CSeq_entry entry;
252  {{
253  CNcbiIstrstream istr(sc_TestEntry);
254  istr >> MSerial_AsnText >> entry;
255  }}
256 
258  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
259  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
260  CFeat_CI feat_iter(*bs_iter,
261  SAnnotSelector().IncludeFeatSubtype
263  for ( ; feat_iter; ++feat_iter) {
264  ///
265  /// retrieve the actual protein sequence
266  ///
267  string real_prot_seq = GetProteinString (feat_iter, scope);
268 
269  ///
270  /// translate the CDRegion directly
271  ///
272  string tmp;
273 
274  /// use CSeqTranslator::Translate()
275  tmp.clear();
276  CSeqTranslator::Translate(feat_iter->GetLocation(), bs_iter->GetScope(), tmp,
277  NULL, false, true);
278  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
279 
280  /// use CSeqTranslator::Translate()
281  real_prot_seq += '*';
282  tmp.clear();
283  CSeqTranslator::Translate(feat_iter->GetLocation(), bs_iter->GetScope(), tmp,
284  NULL, true, true);
285  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
286  }
287  }
288 }
289 
290 
291 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_loc_2)
292 {
293  CSeq_entry entry;
294  {{
295  CNcbiIstrstream istr(sc_TestEntry);
296  istr >> MSerial_AsnText >> entry;
297  }}
298 
300  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
301  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
302  CFeat_CI feat_iter(*bs_iter,
303  SAnnotSelector().IncludeFeatSubtype
305  for ( ; feat_iter; ++feat_iter) {
306  ///
307  /// retrieve the actual protein sequence
308  ///
309  string real_prot_seq = GetProteinString (feat_iter, scope);
310 
311  ///
312  /// translate the CDRegion directly
313  ///
314  string tmp;
315 
316  /// use CSeqTranslator::Translate()
317  tmp.clear();
318  CSeqTranslator::Translate(feat_iter->GetLocation(), scope, tmp,
319  NULL, false, true);
320  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
321 
322  /// use CSeqTranslator::Translate()
323  real_prot_seq += '*';
324  tmp.clear();
325  CSeqTranslator::Translate(feat_iter->GetLocation(), scope, tmp,
326  NULL, true, true);
327  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
328  }
329  }
330 }
331 
332 
333 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat)
334 {
335  CSeq_entry entry;
336  {{
337  CNcbiIstrstream istr(sc_TestEntry);
338  istr >> MSerial_AsnText >> entry;
339  }}
340 
342  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
343  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
344  CFeat_CI feat_iter(*bs_iter,
345  SAnnotSelector().IncludeFeatSubtype
347  for ( ; feat_iter; ++feat_iter) {
348  ///
349  /// retrieve the actual protein sequence
350  ///
351  string real_prot_seq = GetProteinString (feat_iter, scope);
352 
353  ///
354  /// translate the CDRegion directly
355  ///
356  string tmp;
357 
358  /// use CSeqTranslator::Translate()
359  tmp.clear();
361  scope, tmp,
362  false, true);
363  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
364 
365  /// use CSeqTranslator::Translate()
366  real_prot_seq += '*';
367  tmp.clear();
369  scope, tmp,
370  true, true);
371  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
372  }
373  }
374 }
375 
376 
377 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_code_break)
378 {
379  CSeq_entry entry;
380  {{
381  CNcbiIstrstream istr(sc_TestEntry_code_break);
382  istr >> MSerial_AsnText >> entry;
383  }}
384 
386  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
387  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
388  CFeat_CI feat_iter(*bs_iter,
389  SAnnotSelector().IncludeFeatSubtype
391  for ( ; feat_iter; ++feat_iter) {
392  ///
393  /// retrieve the actual protein sequence
394  ///
395  string real_prot_seq = GetProteinString (feat_iter, scope);
396 
397  ///
398  /// translate the CDRegion directly
399  ///
400  string tmp;
401 
402  /// use CSeqTranslator::Translate()
403  tmp.clear();
405  scope, tmp,
406  false, true);
407  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
408 
409  /// use CSeqTranslator::Translate()
410  real_prot_seq += '*';
411  tmp.clear();
413  scope, tmp,
414  true, true);
415  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
416  }
417  }
418 }
419 
420 
421 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_alt_frame)
422 {
423  CSeq_entry entry;
424  {{
425  CNcbiIstrstream istr(sc_TestEntry_alt_frame);
426  istr >> MSerial_AsnText >> entry;
427  }}
428 
430  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
431  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
432  CFeat_CI feat_iter(*bs_iter,
433  SAnnotSelector().IncludeFeatSubtype
435  for ( ; feat_iter; ++feat_iter) {
436  ///
437  /// retrieve the actual protein sequence
438  ///
439  string real_prot_seq = GetProteinString (feat_iter, scope);
440 
441  ///
442  /// translate the CDRegion directly
443  ///
444  string tmp;
445 
446  /// use CSeqTranslator::Translate()
447  tmp.clear();
449  scope, tmp,
450  false, true);
451  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
452 
453  /// use CSeqTranslator::Translate()
454  real_prot_seq += '*';
455  tmp.clear();
457  scope, tmp,
458  true, true);
459  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
460  }
461  }
462 }
463 
464 
465 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_internal_stop)
466 {
467  CSeq_entry entry;
468  {{
469  CNcbiIstrstream istr(sc_TestEntry_internal_stop);
470  istr >> MSerial_AsnText >> entry;
471  }}
472 
474  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
475  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
476  CFeat_CI feat_iter(*bs_iter,
477  SAnnotSelector().IncludeFeatSubtype
479  for ( ; feat_iter; ++feat_iter) {
480  ///
481  /// retrieve the actual protein sequence
482  ///
483  string real_prot_seq = GetProteinString (feat_iter, scope);
484  real_prot_seq[51] = '*';
485 
486  ///
487  /// translate the CDRegion directly
488  ///
489  string tmp;
490 
491  /// use CSeqTranslator::Translate()
492  real_prot_seq += '*';
493  tmp.clear();
495  scope, tmp,
496  true /*include stops*/,
497  true /*remove trailing X*/);
498  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
499 
500  /// use CSeqTranslator::Translate()
501  tmp.clear();
502  real_prot_seq.erase(real_prot_seq.find_first_of("*"));
504  scope, tmp,
505  false /*include stops*/,
506  true /*remove trailing X*/);
507  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
508 
509  }
510  }
511 }
512 
513 
514 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_5prime_partial)
515 {
516  CSeq_entry entry;
517  {{
518  CNcbiIstrstream istr(sc_TestEntry_5prime_partial);
519  istr >> MSerial_AsnText >> entry;
520  }}
521 
523  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
524  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
525  CFeat_CI feat_iter(*bs_iter,
526  SAnnotSelector().IncludeFeatSubtype
528  for ( ; feat_iter; ++feat_iter) {
529  ///
530  /// retrieve the actual protein sequence
531  ///
532  string real_prot_seq = GetProteinString (feat_iter, scope);
533 
534  ///
535  /// translate the CDRegion directly
536  ///
537  string tmp;
538 
539  /// use CSeqTranslator::Translate()
540  real_prot_seq += '*';
541  tmp.clear();
543  scope, tmp,
544  true /*include stops*/,
545  true /*remove trailing X*/);
546  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
547 
548  /// use CSeqTranslator::Translate()
549  real_prot_seq.erase(real_prot_seq.find_first_of("*"));
550  tmp.clear();
552  scope, tmp,
553  false /*include stops*/,
554  true /*remove trailing X*/);
555  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
556  for (size_t i = 0; i < real_prot_seq.size() && i < tmp.size(); ++i) {
557  if (real_prot_seq[i] != tmp[i]) {
558  LOG_POST(Error << "char " << i << ": "
559  << real_prot_seq[i] << " != "
560  << tmp[i]);
561  }
562  }
563  }
564  }
565 }
566 
567 
568 
569 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_3prime_partial)
570 {
571  CSeq_entry entry;
572  {{
573  CNcbiIstrstream istr(sc_TestEntry_3prime_partial);
574  istr >> MSerial_AsnText >> entry;
575  }}
576 
578  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
579  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
580  CFeat_CI feat_iter(*bs_iter,
581  SAnnotSelector().IncludeFeatSubtype
583  for ( ; feat_iter; ++feat_iter) {
584  ///
585  /// retrieve the actual protein sequence
586  ///
587  string real_prot_seq = GetProteinString (feat_iter, scope);
588  real_prot_seq[51] = '*';
589 
590  ///
591  /// translate the CDRegion directly
592  ///
593  string tmp;
594 
595  /// use CSeqTranslator::Translate()
596  tmp.clear();
598  scope, tmp,
599  true, true);
600  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
601  }
602  }
603 }
604 
605 
606 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_5prime_partial_minus)
607 {
608  CSeq_entry entry;
609  {{
610  CNcbiIstrstream istr(sc_TestEntry_5prime_partial_minus);
611  istr >> MSerial_AsnText >> entry;
612  }}
613 
615  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
616  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
617  CFeat_CI feat_iter(*bs_iter,
618  SAnnotSelector().IncludeFeatSubtype
620  for ( ; feat_iter; ++feat_iter) {
621  ///
622  /// retrieve the actual protein sequence
623  ///
624  string real_prot_seq = GetProteinString (feat_iter, scope);
625 
626  ///
627  /// translate the CDRegion directly
628  ///
629  string tmp;
630 
631  /// use CSeqTranslator::Translate()
632  tmp.clear();
634  scope, tmp,
635  false, true);
636  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
637  for (size_t i = 0; i < real_prot_seq.size() && i < tmp.size(); ++i) {
638  if (real_prot_seq[i] != tmp[i]) {
639  LOG_POST(Error << "char " << i << ": "
640  << real_prot_seq[i] << " != "
641  << tmp[i]);
642  }
643  }
644 
645  /// use CSeqTranslator::Translate()
646  tmp.clear();
648  scope, tmp,
649  true, true);
650  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
651  }
652  }
653 }
654 
655 
656 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_TerminalTranslExcept)
657 {
658  CSeq_entry entry;
659  {{
660  CNcbiIstrstream istr(sc_TestEntry_TerminalTranslExcept);
661  istr >> MSerial_AsnText >> entry;
662  }}
663 
665  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
666  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
667  CFeat_CI feat_iter(*bs_iter,
668  SAnnotSelector().IncludeFeatSubtype
670  for ( ; feat_iter; ++feat_iter) {
671  ///
672  /// retrieve the actual protein sequence
673  ///
674  string real_prot_seq = GetProteinString (feat_iter, scope);
675 
676  ///
677  /// translate the CDRegion directly
678  ///
679  string tmp;
680 
681  /// use CSeqTranslator::Translate()
682  tmp.clear();
684  scope, tmp,
685  false, true);
686  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
687  for (size_t i = 0; i < real_prot_seq.size() && i < tmp.size(); ++i) {
688  if (real_prot_seq[i] != tmp[i]) {
689  LOG_POST(Error << "char " << i << ": "
690  << real_prot_seq[i] << " != "
691  << tmp[i]);
692  }
693  }
694 
695  /// use CSeqTranslator::Translate()
696  real_prot_seq += '*';
697  tmp.clear();
699  scope, tmp,
700  true, true);
701  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
702  }
703  }
704 }
705 
706 
707 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_ShortCDS)
708 {
709  CSeq_entry entry;
710  {{
711  CNcbiIstrstream istr(sc_TestEntry_ShortCDS);
712  istr >> MSerial_AsnText >> entry;
713  }}
714 
716  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
717  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
718  CFeat_CI feat_iter(*bs_iter,
719  SAnnotSelector().IncludeFeatSubtype
721  for ( ; feat_iter; ++feat_iter) {
722 
723  ///
724  /// translate the CDRegion directly
725  ///
726  string tmp;
727 
728  /// use CSeqTranslator::Translate()
729  tmp.clear();
731  scope, tmp,
732  false, true);
733  BOOST_CHECK_EQUAL("-", tmp);
734  }
735  }
736 }
737 
738 
739 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_FirstCodon)
740 {
741  CSeq_entry entry;
742  {{
743  CNcbiIstrstream istr(sc_TestEntry_FirstCodon);
744  istr >> MSerial_AsnText >> entry;
745  }}
746 
748  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
749 
750  CRef<CSeq_feat> feat (new CSeq_feat());
751  feat->SetData().SetCdregion();
752  feat->SetLocation().SetInt().SetId().SetLocal().SetStr("FirstCodon");
753  feat->SetLocation().SetInt().SetFrom(0);
754  feat->SetLocation().SetInt().SetTo(38);
755  CRef<CSeq_annot> annot(new CSeq_annot());
756  annot->SetData().SetFtable().push_back(feat);
757  entry.SetSeq().SetAnnot().push_back(annot);
758 
759  string tmp;
760  string complete_trans = "-MGMCFLRGWKGV";
761  string partial_trans = "KMGMCFLRGWKGV";
762 
763  // translate with vector
764  tmp.clear();
765  CSeqVector vec(feat->GetLocation(), scope);
766  // default value for 5' complete is true
769  BOOST_CHECK_EQUAL(complete_trans, tmp);
770  // try it with flag version
771  tmp.clear();
773  BOOST_CHECK_EQUAL(complete_trans, tmp);
774 
775  // set 5' complete false
776  tmp.clear();
777 #ifdef TEST_DEPRECATED
778  CSeqTranslator::Translate(vec, tmp,
779  NULL, false, true, 0, false);
780  BOOST_CHECK_EQUAL(partial_trans, tmp);
781 #endif
782  // try it with flag version
783  tmp.clear();
785  BOOST_CHECK_EQUAL(partial_trans, tmp);
786 
787  // translate with string
788  string seq_str;
789  vec.GetSeqData(0, entry.GetSeq().GetLength(), seq_str);
790  // default value for 5' complete is true
791 #ifdef TEST_DEPRECATED
792  CSeqTranslator::Translate(seq_str, tmp,
793  NULL, false, true);
794  BOOST_CHECK_EQUAL(complete_trans, tmp);
795 #endif
796  // try it with flag version
797  tmp.clear();
799  BOOST_CHECK_EQUAL(complete_trans, tmp);
800 
801  // set 5' complete false
802  tmp.clear();
803 #ifdef TEST_DEPRECATED
804  CSeqTranslator::Translate(seq_str, tmp,
805  NULL, false, true, 0, false);
806  BOOST_CHECK_EQUAL(partial_trans, tmp);
807 #endif
808  // try it with flag version
809  tmp.clear();
811  BOOST_CHECK_EQUAL(partial_trans, tmp);
812 
813 
814  ///
815  /// translate the CDRegion directly
816  ///
817 
818  /// use CSeqTranslator::Translate()
819  tmp.clear();
821  scope, tmp,
822  false, true);
823  BOOST_CHECK_EQUAL(complete_trans, tmp);
824 
825  // if partial, should translate first codon
826  feat->SetLocation().SetPartialStart(true, eExtreme_Biological);
827  tmp.clear();
829  scope, tmp,
830  false, true);
831  BOOST_CHECK_EQUAL(partial_trans, tmp);
832 
833 
834 
835 }
836 
837 
838 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_FirstCodon2)
839 {
840  // here, the first codon translates to M if complete, because it's an alternate start,
841  // but L if partial
842  CSeq_entry entry;
843  {{
844  CNcbiIstrstream istr(sc_TestEntry_FirstCodon2);
845  istr >> MSerial_AsnText >> entry;
846  }}
847 
849  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
850 
851  CRef<CSeq_feat> feat (new CSeq_feat());
852  feat->SetData().SetCdregion();
853  feat->SetLocation().SetInt().SetId().SetLocal().SetStr("FirstCodon2");
854  feat->SetLocation().SetInt().SetFrom(0);
855  feat->SetLocation().SetInt().SetTo(26);
856  CRef<CSeq_annot> annot(new CSeq_annot());
857  annot->SetData().SetFtable().push_back(feat);
858  entry.SetSeq().SetAnnot().push_back(annot);
859 
860  string tmp;
861  string complete_trans = "MP*K*E*N*";
862  string partial_trans = "LP*K*E*N*";
863 
864  // translate with vector
865  tmp.clear();
866  CSeqVector vec(feat->GetLocation(), scope);
867 
868  //
869  // default value for 5' complete is true
870 #ifdef TEST_DEPRECATED
871  CSeqTranslator::Translate(vec, tmp,
872  NULL, true, true);
873  BOOST_CHECK_EQUAL(complete_trans, tmp);
874 #endif
875 
876  // try it with flag version
877  tmp.clear();
878  CSeqTranslator::Translate(vec, tmp, 0);
879  BOOST_CHECK_EQUAL(complete_trans, tmp);
880 
881  //
882  // set 5' complete false
883  tmp.clear();
884 #ifdef TEST_DEPRECATED
885  CSeqTranslator::Translate(vec, tmp,
886  NULL, true, true, 0, false);
887  BOOST_CHECK_EQUAL(partial_trans, tmp);
888 #endif
889 
890  // try it with flag version
891  tmp.clear();
893  BOOST_CHECK_EQUAL(partial_trans, tmp);
894 
895 
896  // translate with string
897  string seq_str;
898  vec.GetSeqData(0, entry.GetSeq().GetLength(), seq_str);
899  // default value for 5' complete is true
900 #ifdef TEST_DEPRECATED
901  CSeqTranslator::Translate(seq_str, tmp,
902  NULL, true, true);
903  BOOST_CHECK_EQUAL(complete_trans, tmp);
904 #endif
905  // try it with flag version
906  tmp.clear();
907  CSeqTranslator::Translate(seq_str, tmp, 0);
908  BOOST_CHECK_EQUAL(complete_trans, tmp);
909 
910  // set 5' complete false
911  tmp.clear();
912 #ifdef TEST_DEPRECATED
913  CSeqTranslator::Translate(seq_str, tmp,
914  NULL, true, true, 0, false);
915  BOOST_CHECK_EQUAL(partial_trans, tmp);
916 #endif
917  // try it with flag version
918  tmp.clear();
920  BOOST_CHECK_EQUAL(partial_trans, tmp);
921 
922 
923  ///
924  /// translate the CDRegion directly
925  ///
926 
927  /// use CSeqTranslator::Translate()
928  tmp.clear();
930  scope, tmp,
931  true, true);
932  BOOST_CHECK_EQUAL(complete_trans, tmp);
933 
934  // if partial, should translate first codon
935  feat->SetLocation().SetPartialStart(true, eExtreme_Biological);
936  tmp.clear();
938  scope, tmp,
939  true, true);
940  BOOST_CHECK_EQUAL(partial_trans, tmp);
941 
942 }
943 
944 
945 static void CheckTranslatedBioseq (CRef<CBioseq> bioseq, string seg1, bool mid_fuzz, string seg2)
946 {
947  if (bioseq) {
948  BOOST_CHECK_EQUAL(CSeq_inst::eRepr_delta, bioseq->GetInst().GetRepr());
949  if (bioseq->GetInst().IsSetExt()
950  && bioseq->GetInst().GetExt().IsDelta()) {
951  CDelta_ext::Tdata::iterator seg_it = bioseq->SetInst().SetExt().SetDelta().Set().begin();
952  CRef<CDelta_seq> seg = *seg_it;
953  const CSeq_literal& lit1 = seg->GetLiteral();
954  string p1 = lit1.GetSeq_data().GetIupacaa().Get();
955  BOOST_CHECK_EQUAL(seg1, p1);
956 
957  ++seg_it;
958  if (seg_it != bioseq->SetInst().SetExt().SetDelta().Set().end()) {
959  seg = *seg_it;
960 
961  BOOST_CHECK_EQUAL(true, seg->GetLiteral().GetSeq_data().IsGap());
962  BOOST_CHECK_EQUAL(mid_fuzz, seg->GetLiteral().IsSetFuzz());
963  ++seg_it;
964  } else {
965  BOOST_CHECK_EQUAL("Missing segment", "Missing segment in Bioseq");
966  }
967 
968  if (seg_it != bioseq->SetInst().SetExt().SetDelta().Set().end()) {
969  seg = *seg_it;
970  const CSeq_literal& lit2 = seg->GetLiteral();
971  string p2 = lit2.GetSeq_data().GetIupacaa().Get();
972  BOOST_CHECK_EQUAL(seg2, p2);
973  } else {
974  BOOST_CHECK_EQUAL("Missing segment", "Missing segment in Bioseq");
975  }
976  } else {
977  BOOST_CHECK_EQUAL("Expected delta seq", "Result not delta seq");
978  }
979  } else {
980  BOOST_CHECK_EQUAL("Expected Bioseq creation", "Bioseq creation failed");
981  }
982 }
983 
984 
985 static void CheckTranslatedBioseq (CRef<CBioseq> bioseq, string seqdata)
986 {
987  if (bioseq) {
988  BOOST_CHECK_EQUAL(CSeq_inst::eRepr_raw, bioseq->GetInst().GetRepr());
989  if (bioseq->GetInst().IsSetSeq_data()) {
990  if (bioseq->GetInst().GetSeq_data().IsIupacaa()) {
991  BOOST_CHECK_EQUAL(seqdata, bioseq->GetInst().GetSeq_data().GetIupacaa().Get());
992  } else if (bioseq->GetInst().GetSeq_data().IsNcbieaa()) {
993  BOOST_CHECK_EQUAL(seqdata, bioseq->GetInst().GetSeq_data().GetNcbieaa().Get());
994  } else {
995  BOOST_CHECK_EQUAL("Unexpected encoding", "Result not Iupacaa or Ncbieaa");
996  }
997  } else {
998  BOOST_CHECK_EQUAL("Expected raw seq", "Result not raw seq");
999  }
1000  } else {
1001  BOOST_CHECK_EQUAL("Expected Bioseq creation", "Bioseq creation failed");
1002  }
1003 }
1004 
1005 
1006 static void SetLocationSkipGap (CRef<CSeq_feat> feat, const CBioseq& bioseq)
1007 {
1008  string local_id = bioseq.GetId().front()->GetLocal().GetStr();
1009 
1010  feat->ResetLocation();
1011  CDelta_ext::Tdata::const_iterator nuc_it = bioseq.GetInst().GetExt().GetDelta().Get().begin();
1012  size_t pos = 0;
1013  while (nuc_it != bioseq.GetInst().GetExt().GetDelta().Get().end()) {
1014  size_t lit_len = (*nuc_it)->GetLiteral().GetLength();
1015  if ((*nuc_it)->GetLiteral().IsSetSeq_data() && (*nuc_it)->GetLiteral().GetSeq_data().IsIupacna()) {
1016  CRef<CSeq_id> id(new CSeq_id());
1017  id->SetLocal().SetStr(local_id);
1018  feat->SetLocation().SetMix().AddInterval(*id, pos, pos + lit_len - 1);
1019  }
1020  pos += lit_len;
1021  ++nuc_it;
1022  }
1023 }
1024 
1025 
1026 static void TestOneGapSeq(const char *asn, string seg1, string seg2)
1027 {
1028  CSeq_entry entry;
1029  {{
1030  CNcbiIstrstream istr(asn);
1031  istr >> MSerial_AsnText >> entry;
1032  }}
1033 
1034  string local_id = entry.GetSeq().GetId().front()->GetLocal().GetStr();
1035 
1036  CRef<CSeq_feat> feat (new CSeq_feat());
1037  feat->SetData().SetCdregion();
1038  feat->SetLocation().SetInt().SetId().SetLocal().SetStr(local_id);
1039  feat->SetLocation().SetInt().SetFrom(0);
1040  feat->SetLocation().SetInt().SetTo(entry.GetSeq().GetLength() - 1);
1041  CRef<CSeq_annot> annot(new CSeq_annot());
1042  annot->SetData().SetFtable().push_back(feat);
1043  entry.SetSeq().SetAnnot().push_back(annot);
1044 
1046  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1047 
1048  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1049  CheckTranslatedBioseq (bioseq, seg1, false, seg2);
1050 
1051  // take sequence out of scope, so that change in fuzz will be noted
1052  scope.RemoveTopLevelSeqEntry(seh);
1053  CDelta_ext::Tdata::iterator nuc_it = entry.SetSeq().SetInst().SetExt().SetDelta().Set().begin();
1054  ++nuc_it;
1055  CRef<CDelta_seq> nuc_mid = *nuc_it;
1056  nuc_mid->SetLiteral().SetFuzz().SetLim(CInt_fuzz::eLim_unk);
1057  seh = scope.AddTopLevelSeqEntry(entry);
1058 
1059  bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1060  CheckTranslatedBioseq (bioseq, seg1, true, seg2);
1061 }
1062 
1063 
1064 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_GapInSeq)
1065 {
1066  TestOneGapSeq (sc_TestEntry_GapInSeq1, "MPK", "PK");
1067  // try with gap not on codon boundary
1068  TestOneGapSeq (sc_TestEntry_GapInSeq2, "MPX", "XPK");
1069  // try with 2 leftover nt, no stop codon
1070  TestOneGapSeq (sc_TestEntry_GapInSeq3, "MPK", "PKI");
1071 
1072  // try with coding region that has gap in intron
1073  CSeq_entry entry;
1074  {{
1075  CNcbiIstrstream istr(sc_TestEntry_GapInSeq4);
1076  istr >> MSerial_AsnText >> entry;
1077  }}
1078 
1080  CRef<CSeq_feat> feat (new CSeq_feat());
1081  feat->SetData().SetCdregion();
1082  SetLocationSkipGap (feat, entry.SetSeq());
1083  CRef<CSeq_annot> annot(new CSeq_annot());
1084  annot->SetData().SetFtable().push_back(feat);
1085  entry.SetSeq().SetAnnot().push_back(annot);
1086  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1087 
1088  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1089  CheckTranslatedBioseq (bioseq, "MPKPK");
1090 }
1091 
1092 
1093 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_ZeroGap)
1094 {
1095  // try with coding region that has zero-length gap
1096 
1097  CSeq_entry entry;
1098  {{
1099  CNcbiIstrstream istr(sc_TestEntry_GapInSeq5);
1100  istr >> MSerial_AsnText >> entry;
1101  }}
1102 
1104  CRef<CSeq_feat> feat (new CSeq_feat());
1105  feat->SetData().SetCdregion();
1106  feat->SetLocation().SetInt().SetId().SetLocal().SetStr("GapInSeq5");
1107  feat->SetLocation().SetInt().SetFrom(0);
1108  feat->SetLocation().SetInt().SetTo(17);
1109  CRef<CSeq_annot> annot(new CSeq_annot());
1110  annot->SetData().SetFtable().push_back(feat);
1111  entry.SetSeq().SetAnnot().push_back(annot);
1112  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1113 
1114  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1115  CheckTranslatedBioseq (bioseq, "MPK", true, "PK");
1116 }
1117 
1118 
1119 
1120 BOOST_AUTO_TEST_CASE(Test_Translate_CodeBreakForStopCodon)
1121 {
1122  CSeq_entry entry;
1123  {{
1124  CNcbiIstrstream istr(sc_TestEntry_CodeBreakForStopCodon);
1125  istr >> MSerial_AsnText >> entry;
1126  }}
1127 
1128  CRef<CBioseq> prot(new CBioseq);
1129  prot->SetId().push_back(CRef<CSeq_id>(new CSeq_id("gnl|GNOMON|912063.p")));
1130 
1132  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1133  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
1134  CFeat_CI feat_iter(*bs_iter,
1135  SAnnotSelector().IncludeFeatSubtype
1137  for ( ; feat_iter; ++feat_iter) {
1138  ///
1139  /// retrieve the actual protein sequence
1140  ///
1141  string real_prot_seq = GetProteinString (feat_iter, scope);
1142 
1143  ///
1144  /// translate the CDRegion directly
1145  ///
1146  string tmp;
1147 
1148  tmp.clear();
1150  (feat_iter->GetOriginalFeature(), scope, tmp, false);
1151  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
1152 
1153 
1154  /// use CCdregion_translate, include the stop codon
1155  //NOTE: the test case lacks a trailing stop!
1156  //real_prot_seq += '*';
1157  tmp.clear();
1159  (feat_iter->GetOriginalFeature(), scope, tmp, true);
1160  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
1161 
1162  prot->SetInst().SetRepr(CSeq_inst::eRepr_raw);
1163  prot->SetInst().SetMol(CSeq_inst::eMol_aa);
1164  prot->SetInst().SetLength(tmp.size());
1165  prot->SetInst().SetSeq_data().SetNcbieaa().Set(tmp);
1166  }
1167  }
1168 
1169  /**
1170  CRef<CSeq_entry> nuc_se(new CSeq_entry);
1171  nuc_se->Assign(entry);
1172 
1173  CRef<CSeq_entry> prot_se(new CSeq_entry);
1174  prot_se->SetSeq(*prot);
1175 
1176  CRef<CSeq_entry> e(new CSeq_entry);
1177  e->SetSet().SetSeq_set().push_back(nuc_se);
1178  e->SetSet().SetSeq_set().push_back(prot_se);
1179  cerr << MSerial_AsnText << *e;
1180  **/
1181 }
1182 
1183 
1184 BOOST_AUTO_TEST_CASE(Test_FindBestFrame)
1185 {
1186  CSeq_entry entry;
1187  {{
1188  CNcbiIstrstream istr(sc_TestEntry);
1189  istr >> MSerial_AsnText >> entry;
1190  }}
1191 
1192  CRef<CSeq_feat> cds = entry.SetSet().SetAnnot().front()->SetData().SetFtable().front();
1193 
1195  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1196 
1197  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_one);
1198  cds->SetLocation().SetInt().SetFrom(15);
1199  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_three);
1200  cds->SetLocation().SetInt().SetFrom(16);
1201  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_two);
1202 }
1203 
1204 const char* sc_TestBestFrameEntry ="\
1205 Seq-entry ::= seq {\
1206  id { local str \"nuc1\" } , \
1207  inst { repr raw, mol dna, length 45,\
1208  seq-data iupacna \"TTTTTATGGAGTAATCGCTAACTTGTAATGCCCAGGCTGGAGTGC\"\
1209  },\
1210  annot { { data ftable {\
1211  {\
1212  data cdregion { frame one, code { id 1 } },\
1213  location int { from 5, to 43, id local str \"nuc1\" }\
1214  }\
1215  } } }\
1216 }";
1217 
1218 
1219 BOOST_AUTO_TEST_CASE(Test_FindFrame2)
1220 {
1221  CSeq_entry entry;
1222  // only change if new frame has no internal stops
1223  {{
1224  CNcbiIstrstream istr(sc_TestBestFrameEntry);
1225  istr >> MSerial_AsnText >> entry;
1226  }}
1227  CRef<CSeq_feat> cds = entry.SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1228 
1230  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1231  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_one);
1232 
1233  bool ambiguous = false;
1234  CSeqTranslator::FindBestFrame(*cds, scope, ambiguous);
1235  BOOST_CHECK_EQUAL(ambiguous, false);
1236 }
1237 
1238 
1239 const char* sc_TestAmbiguousBestFrameEntry ="\
1240 Seq-entry ::= seq {\
1241  id { local str \"nuc1\" } , \
1242  inst { repr raw, mol dna, length 45,\
1243  seq-data iupacna \"TTTTTATGGAGAAATCGCAAACTTGAAATGCCCAGGCTGGAGTGC\"\
1244  },\
1245  annot { { data ftable {\
1246  {\
1247  data cdregion { frame one, code { id 1 } },\
1248  location int { from 5, to 43, id local str \"nuc1\" }\
1249  }\
1250  } } }\
1251 }";
1252 
1253 
1254 BOOST_AUTO_TEST_CASE(Test_FindFrame3)
1255 {
1256  CSeq_entry entry;
1257  // only change if new frame has no internal stops
1258  {{
1259  CNcbiIstrstream istr(sc_TestAmbiguousBestFrameEntry);
1260  istr >> MSerial_AsnText >> entry;
1261  }}
1262  CRef<CSeq_feat> cds = entry.SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1263 
1265  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1266  bool ambiguous = false;
1267  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope, ambiguous), CCdregion::eFrame_two);
1268  BOOST_CHECK_EQUAL(ambiguous, true);
1269 }
1270 
1271 
1272 
1273 const char * sc_MinusOrigin = "\
1274 Seq-entry ::= seq {\
1275  id { \
1276  local str \"test\" } , \
1277  inst { \
1278  repr raw , \
1279  mol dna , \
1280  length 20 , \
1281  topology circular , \
1282  seq-data iupacna \"AAAATTTTGGGGCCCCAAAA\" } , \
1283  annot {\
1284  {\
1285  data ftable {\
1286  {\
1287  data cdregion {\
1288  },\
1289  location mix { \
1290  int {\
1291  from 0,\
1292  to 8,\
1293  strand minus,\
1294  id local str \"test\" } , \
1295  int { \
1296  from 17 , \
1297  to 19 , \
1298  strand minus,\
1299  id local str \"test\" } } } , \
1300  {\
1301  data gene {\
1302  },\
1303  location mix { \
1304  int {\
1305  from 0,\
1306  to 8,\
1307  strand minus,\
1308  id local str \"test\" } , \
1309  int { \
1310  from 17 , \
1311  to 19 , \
1312  strand minus,\
1313  id local str \"test\" } \
1314  } \
1315  }\
1316  }\
1317  }\
1318  }\
1319  }\
1320 }";
1321 
1322 
1323 BOOST_AUTO_TEST_CASE(Test_FindOverlappingFeatureForMinusStrandCrossingOrigin)
1324 {
1325 
1326  CSeq_entry entry;
1327  {{
1328  CNcbiIstrstream istr(sc_MinusOrigin);
1329  istr >> MSerial_AsnText >> entry;
1330  }}
1331 
1333  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1334  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
1335  CBioseq_Handle bsh = *bs_iter;
1336  CFeat_CI feat_iter(*bs_iter,
1337  SAnnotSelector().IncludeFeatSubtype
1339  for ( ; feat_iter; ++feat_iter) {
1340  size_t num_cds = 0;
1344  ITERATE (sequence::TFeatScores, s, cds) {
1345  num_cds++;
1346  }
1347  BOOST_CHECK_EQUAL(num_cds, 1u);
1348  num_cds = 0;
1349  cds.clear();
1352  ITERATE (sequence::TFeatScores, s, cds) {
1353  num_cds++;
1354  }
1355  BOOST_CHECK_EQUAL(num_cds, 1u);
1356  }
1357  }
1358 }
1359 
1360 
1361 const char * sc_TooManyOverlap = "\
1362 Seq-entry ::= seq {\
1363  id { \
1364  local str \"test\" } , \
1365  inst { \
1366  repr raw , \
1367  mol dna , \
1368  length 20 , \
1369  topology circular , \
1370  seq-data iupacna \"AAAATTTTGGGGCCCCAAAA\" } , \
1371  annot {\
1372  {\
1373  data ftable {\
1374  {\
1375  data rna {\
1376  type mRNA },\
1377  partial TRUE , \
1378  location mix { \
1379  int {\
1380  from 0,\
1381  to 19,\
1382  id local str \"test\" } , \
1383  null NULL , \
1384  int { \
1385  from 0 , \
1386  to 19 , \
1387  id gi 1213148 } } } , \
1388  {\
1389  data gene {\
1390  },\
1391  location mix { \
1392  int {\
1393  from 0,\
1394  to 19,\
1395  id local str \"test\" } , \
1396  null NULL , \
1397  int { \
1398  from 0 , \
1399  to 19 , \
1400  id gi 1213148 } \
1401  } \
1402  }\
1403  }\
1404  }\
1405  }\
1406  }\
1407 }";
1408 
1409 
1410 BOOST_AUTO_TEST_CASE(Test_FindOverlappingFeaturesOnMultipleSeqs)
1411 {
1412 
1413  CSeq_entry entry;
1414  {{
1415  CNcbiIstrstream istr(sc_TooManyOverlap);
1416  istr >> MSerial_AsnText >> entry;
1417  }}
1418 
1420  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1421 
1422  FOR_EACH_ANNOT_ON_BIOSEQ (annot, entry.GetSeq()) {
1423  if ((*annot)->IsFtable()) {
1424  FOR_EACH_FEATURE_ON_ANNOT (feat, **annot) {
1425  if ((*feat)->GetData().IsRna()) {
1426  sequence::TFeatScores gene;
1427  GetOverlappingFeatures ((*feat)->GetLocation(), CSeqFeatData::e_Gene,
1429  BOOST_CHECK_EQUAL(gene.size(), 1u);
1430  } else if ((*feat)->GetData().IsGene()) {
1431  BOOST_CHECK_EQUAL((*feat)->IsSetPartial(), false);
1432  }
1433  }
1434  }
1435  }
1436 
1437 
1438  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
1439  CBioseq_Handle bsh = *bs_iter;
1440 
1441  CFeat_CI mrna_iter(*bs_iter,
1442  SAnnotSelector().IncludeFeatSubtype
1444  for ( ; mrna_iter; ++mrna_iter) {
1445  sequence::TFeatScores gene;
1448  BOOST_CHECK_EQUAL(gene.size(), 1u);
1449  }
1450 
1451  CFeat_CI gene_iter(*bs_iter,
1452  SAnnotSelector().IncludeFeatSubtype
1454  for ( ; gene_iter; ++gene_iter) {
1455  BOOST_CHECK_EQUAL(gene_iter->IsSetPartial(), false);
1456  }
1457 
1458  }
1459 }
1460 
1461 
1463 {
1464  CSeq_entry entry;
1465  {{
1466  CNcbiIstrstream istr(sc_TestEntry_GB_2236);
1467  istr >> MSerial_AsnText >> entry;
1468  }}
1469 
1471  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1472 
1473  CRef<CSeq_feat> cds(new CSeq_feat());
1474 
1475  // set genetic code
1477  ce->SetId(1);
1478  CRef<CGenetic_code> gcode(new CGenetic_code());
1479  cds->SetData().SetCdregion().SetCode().Set().push_back(ce);
1480 
1481  // set location
1482  CRef<CSeq_loc> int1(new CSeq_loc());
1483  int1->SetInt().SetId().Assign(*(entry.GetSeq().GetId().front()));
1484  int1->SetInt().SetFrom(0);
1485  int1->SetInt().SetTo(40);
1486  CRef<CSeq_loc> int2(new CSeq_loc());
1487  int2->SetInt().SetId().Assign(*(entry.GetSeq().GetId().front()));
1488  int2->SetInt().SetFrom(121);
1489  int2->SetInt().SetTo(175);
1490  CRef<CSeq_loc> int3(new CSeq_loc());
1491  int3->SetInt().SetId().Assign(*(entry.GetSeq().GetId().front()));
1492  int3->SetInt().SetFrom(201);
1493  int3->SetInt().SetTo(416);
1494  cds->SetLocation().SetMix().Set().push_back(int1);
1495  cds->SetLocation().SetMix().Set().push_back(int2);
1496  cds->SetLocation().SetMix().Set().push_back(int3);
1497 
1498  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*cds, scope);
1499  string seg1 = "-TISGEHGLDSNGVYNGTSELQLERMNVYFNESSHHPASCLLLPSGLLWPDRACPSDAFLVQASGNKYVPRAVLVDLEPGTMDAVRAGPFGQLFRPDNFVFGQS";
1500  CheckTranslatedBioseq (bioseq, seg1);
1501 
1502 }
1503 
1504 
1505 
1506 //////////////////////////////////////////////////////////////////////////////
1507 
1508 const char* sc_TestEntry ="\
1509 Seq-entry ::= set {\
1510  class nuc-prot,\
1511  seq-set {\
1512  seq {\
1513  id {\
1514  genbank {\
1515  name \"AF010144\",\
1516  accession \"AF010144\",\
1517  version 1\
1518  },\
1519  gi 3002526\
1520  },\
1521  inst {\
1522  repr raw,\
1523  mol rna,\
1524  length 1442,\
1525  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1526 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1527 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTATTTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1528 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1529 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1530 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1531 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1532 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1533 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1534 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1535 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1536 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1537 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1538 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1539 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1540 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1541 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1542 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1543 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1544  }\
1545  },\
1546  seq {\
1547  id {\
1548  genbank {\
1549  accession \"AAC08737\",\
1550  version 1\
1551  },\
1552  gi 3002527\
1553  },\
1554  inst {\
1555  repr raw,\
1556  mol aa,\
1557  length 375,\
1558  topology not-set,\
1559  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILY\
1560 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1561 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1562 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1563 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1564 LSGWSQTPDLR\"\
1565  },\
1566  annot {\
1567  {\
1568  data ftable {\
1569  {\
1570  data prot {\
1571  name {\
1572  \"neuronal thread protein AD7c-NTP\"\
1573  }\
1574  },\
1575  location int {\
1576  from 0,\
1577  to 374,\
1578  strand plus,\
1579  id gi 3002527\
1580  }\
1581  }\
1582  }\
1583  }\
1584  }\
1585  }\
1586  },\
1587  annot {\
1588  {\
1589  data ftable {\
1590  {\
1591  data cdregion {\
1592  frame one,\
1593  code {\
1594  id 1\
1595  }\
1596  },\
1597  product whole gi 3002527,\
1598  location int {\
1599  from 14,\
1600  to 1141,\
1601  strand plus,\
1602  id gi 3002526\
1603  }\
1604  }\
1605  }\
1606  }\
1607  }\
1608 }";
1609 
1610 const char* sc_TestEntry_code_break ="\
1611 Seq-entry ::= set {\
1612  class nuc-prot,\
1613  seq-set {\
1614  seq {\
1615  id {\
1616  genbank {\
1617  name \"AF010144\",\
1618  accession \"AF010144\",\
1619  version 1\
1620  },\
1621  gi 3002526\
1622  },\
1623  inst {\
1624  repr raw,\
1625  mol rna,\
1626  length 1442,\
1627  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1628 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1629 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTATTTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1630 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1631 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1632 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1633 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1634 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1635 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1636 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1637 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1638 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1639 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1640 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1641 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1642 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1643 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1644 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1645 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1646  }\
1647  },\
1648  seq {\
1649  id {\
1650  genbank {\
1651  accession \"AAC08737\",\
1652  version 1\
1653  },\
1654  gi 3002527\
1655  },\
1656  inst {\
1657  repr raw,\
1658  mol aa,\
1659  length 375,\
1660  topology not-set,\
1661  seq-data ncbieaa \"MQFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILY\
1662 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1663 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1664 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1665 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1666 LSGWSQTPDLR\"\
1667  },\
1668  annot {\
1669  {\
1670  data ftable {\
1671  {\
1672  data prot {\
1673  name {\
1674  \"neuronal thread protein AD7c-NTP\"\
1675  }\
1676  },\
1677  location int {\
1678  from 0,\
1679  to 374,\
1680  strand plus,\
1681  id gi 3002527\
1682  }\
1683  }\
1684  }\
1685  }\
1686  }\
1687  }\
1688  },\
1689  annot {\
1690  {\
1691  data ftable {\
1692  {\
1693  data cdregion {\
1694  frame one,\
1695  code {\
1696  id 1\
1697  },\
1698  code-break {\
1699  {\
1700  loc int {\
1701  from 17,\
1702  to 19,\
1703  strand plus,\
1704  id gi 3002526\
1705  },\
1706  aa ncbieaa 81\
1707  }\
1708  }\
1709  },\
1710  product whole gi 3002527,\
1711  location int {\
1712  from 14,\
1713  to 1141,\
1714  strand plus,\
1715  id gi 3002526\
1716  }\
1717  }\
1718  }\
1719  }\
1720  }\
1721 }";
1722 
1723 
1724 const char* sc_TestEntry_alt_frame ="\
1725 Seq-entry ::= set {\
1726  class nuc-prot,\
1727  seq-set {\
1728  seq {\
1729  id {\
1730  genbank {\
1731  name \"AF010144\",\
1732  accession \"AF010144\",\
1733  version 1\
1734  },\
1735  gi 3002526\
1736  },\
1737  inst {\
1738  repr raw,\
1739  mol rna,\
1740  length 1442,\
1741  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1742 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1743 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTATTTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1744 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1745 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1746 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1747 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1748 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1749 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1750 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1751 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1752 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1753 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1754 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1755 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1756 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1757 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1758 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1759 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1760  }\
1761  },\
1762  seq {\
1763  id {\
1764  genbank {\
1765  accession \"AAC08737\",\
1766  version 1\
1767  },\
1768  gi 3002527\
1769  },\
1770  inst {\
1771  repr raw,\
1772  mol aa,\
1773  length 375,\
1774  topology not-set,\
1775  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILY\
1776 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1777 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1778 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1779 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1780 LSGWSQTPDLR\"\
1781  },\
1782  annot {\
1783  {\
1784  data ftable {\
1785  {\
1786  data prot {\
1787  name {\
1788  \"neuronal thread protein AD7c-NTP\"\
1789  }\
1790  },\
1791  location int {\
1792  from 0,\
1793  to 374,\
1794  strand plus,\
1795  id gi 3002527\
1796  }\
1797  }\
1798  }\
1799  }\
1800  }\
1801  }\
1802  },\
1803  annot {\
1804  {\
1805  data ftable {\
1806  {\
1807  data cdregion {\
1808  frame two,\
1809  code {\
1810  id 1\
1811  }\
1812  },\
1813  product whole gi 3002527,\
1814  location int {\
1815  from 13,\
1816  to 1141,\
1817  strand plus,\
1818  id gi 3002526\
1819  }\
1820  }\
1821  }\
1822  }\
1823  }\
1824 }";
1825 
1826 const char* sc_TestEntry_internal_stop ="\
1827 Seq-entry ::= set {\
1828  class nuc-prot,\
1829  seq-set {\
1830  seq {\
1831  id {\
1832  genbank {\
1833  name \"AF010144\",\
1834  accession \"AF010144\",\
1835  version 1\
1836  },\
1837  gi 3002526\
1838  },\
1839  inst {\
1840  repr raw,\
1841  mol rna,\
1842  length 1442,\
1843  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1844 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1845 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTGATTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1846 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1847 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1848 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1849 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1850 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1851 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1852 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1853 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1854 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1855 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1856 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1857 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1858 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1859 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1860 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1861 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1862  }\
1863  },\
1864  seq {\
1865  id {\
1866  genbank {\
1867  accession \"AAC08737\",\
1868  version 1\
1869  },\
1870  gi 3002527\
1871  },\
1872  inst {\
1873  repr raw,\
1874  mol aa,\
1875  length 375,\
1876  topology not-set,\
1877  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILX\
1878 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1879 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1880 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1881 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1882 LSGWSQTPDLR\"\
1883  },\
1884  annot {\
1885  {\
1886  data ftable {\
1887  {\
1888  data prot {\
1889  name {\
1890  \"neuronal thread protein AD7c-NTP\"\
1891  }\
1892  },\
1893  location int {\
1894  from 0,\
1895  to 374,\
1896  strand plus,\
1897  id gi 3002527\
1898  }\
1899  }\
1900  }\
1901  }\
1902  }\
1903  }\
1904  },\
1905  annot {\
1906  {\
1907  data ftable {\
1908  {\
1909  data cdregion {\
1910  frame one,\
1911  code {\
1912  id 1\
1913  }\
1914  },\
1915  product whole gi 3002527,\
1916  location int {\
1917  from 14,\
1918  to 1141,\
1919  strand plus,\
1920  id gi 3002526\
1921  }\
1922  }\
1923  }\
1924  }\
1925  }\
1926 }";
1927 
1928 const char* sc_TestEntry_5prime_partial ="\
1929 Seq-entry ::= set {\
1930  class nuc-prot,\
1931  seq-set {\
1932  seq {\
1933  id {\
1934  genbank {\
1935  name \"AF010144\",\
1936  accession \"AF010144\",\
1937  version 1\
1938  },\
1939  gi 3002526\
1940  },\
1941  inst {\
1942  repr raw,\
1943  mol rna,\
1944  length 1442,\
1945  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1946 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1947 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTGATTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1948 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1949 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1950 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1951 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1952 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1953 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1954 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1955 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1956 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1957 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1958 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1959 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1960 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1961 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1962 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1963 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1964  }\
1965  },\
1966  seq {\
1967  id {\
1968  genbank {\
1969  accession \"AAC08737\",\
1970  version 1\
1971  },\
1972  gi 3002527\
1973  },\
1974  inst {\
1975  repr raw,\
1976  mol aa,\
1977  length 374,\
1978  topology not-set,\
1979  seq-data ncbieaa \"-FSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLIL*\
1980 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1981 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1982 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1983 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1984 LSGWSQTPDLR\"\
1985  },\
1986  annot {\
1987  {\
1988  data ftable {\
1989  {\
1990  data prot {\
1991  name {\
1992  \"neuronal thread protein AD7c-NTP\"\
1993  }\
1994  },\
1995  location int {\
1996  from 0,\
1997  to 374,\
1998  strand plus,\
1999  id gi 3002527\
2000  }\
2001  }\
2002  }\
2003  }\
2004  }\
2005  }\
2006  },\
2007  annot {\
2008  {\
2009  data ftable {\
2010  {\
2011  data cdregion {\
2012  frame one,\
2013  code {\
2014  id 1\
2015  }\
2016  },\
2017  product whole gi 3002527,\
2018  location int {\
2019  from 17,\
2020  to 1141,\
2021  strand plus,\
2022  id gi 3002526,\
2023  fuzz-from lim tr\
2024  }\
2025  }\
2026  }\
2027  }\
2028  }\
2029 }";
2030 
2031 const char* sc_TestEntry_3prime_partial ="\
2032 Seq-entry ::= set {\
2033  class nuc-prot,\
2034  seq-set {\
2035  seq {\
2036  id {\
2037  genbank {\
2038  name \"AF010144\",\
2039  accession \"AF010144\",\
2040  version 1\
2041  },\
2042  gi 3002526\
2043  },\
2044  inst {\
2045  repr raw,\
2046  mol rna,\
2047  length 1442,\
2048  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
2049 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
2050 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTGATTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
2051 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
2052 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
2053 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
2054 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
2055 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
2056 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
2057 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
2058 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
2059 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
2060 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
2061 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
2062 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
2063 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
2064 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
2065 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
2066 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
2067  }\
2068  },\
2069  seq {\
2070  id {\
2071  genbank {\
2072  accession \"AAC08737\",\
2073  version 1\
2074  },\
2075  gi 3002527\
2076  },\
2077  inst {\
2078  repr raw,\
2079  mol aa,\
2080  length 374,\
2081  topology not-set,\
2082  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILX\
2083 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
2084 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
2085 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
2086 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
2087 LSGWSQTPDL\"\
2088  },\
2089  annot {\
2090  {\
2091  data ftable {\
2092  {\
2093  data prot {\
2094  name {\
2095  \"neuronal thread protein AD7c-NTP\"\
2096  }\
2097  },\
2098  location int {\
2099  from 0,\
2100  to 374,\
2101  strand plus,\
2102  id gi 3002527\
2103  }\
2104  }\
2105  }\
2106  }\
2107  }\
2108  }\
2109  },\
2110  annot {\
2111  {\
2112  data ftable {\
2113  {\
2114  data cdregion {\
2115  frame one,\
2116  code {\
2117  id 1\
2118  }\
2119  },\
2120  product whole gi 3002527,\
2121  location int {\
2122  from 14,\
2123  to 1135,\
2124  strand plus,\
2125  id gi 3002526,\
2126  fuzz-from lim tl\
2127  }\
2128  }\
2129  }\
2130  }\
2131  }\
2132 }";
2133 
2134 const char* sc_TestEntry_5prime_partial_minus ="\
2135 Seq-entry ::= set {\
2136  class nuc-prot,\
2137  seq-set {\
2138  seq {\
2139  id {\
2140  local str \"minus_5_prime_partial\" },\
2141  inst {\
2142  repr raw,\
2143  mol dna,\
2144  length 20,\
2145  seq-data iupacna \"AAATTTGGGCAAATTTGGGC\"\
2146  }\
2147  },\
2148  seq {\
2149  id {\
2150  local str \"minus_5_prime_partial_prot\" },\
2151  inst {\
2152  repr raw,\
2153  mol aa,\
2154  length 5,\
2155  seq-data ncbieaa \"-FAQI\"\
2156  },\
2157  annot {\
2158  {\
2159  data ftable {\
2160  {\
2161  data prot {\
2162  name {\
2163  \"test protein\"\
2164  }\
2165  },\
2166  location int {\
2167  from 0,\
2168  to 5,\
2169  strand plus,\
2170  id local str \"minus_5_prime_partial_prot\"\
2171  }\
2172  }\
2173  }\
2174  }\
2175  }\
2176  }\
2177  },\
2178  annot {\
2179  {\
2180  data ftable {\
2181  {\
2182  data cdregion {\
2183  frame one,\
2184  code {\
2185  id 1\
2186  }\
2187  },\
2188  product whole local str \"minus_5_prime_partial_prot\",\
2189  location int {\
2190  from 0,\
2191  to 15,\
2192  strand minus,\
2193  id local str \"minus_5_prime_partial\",\
2194  fuzz-from lim tr\
2195  }\
2196  }\
2197  }\
2198  }\
2199  }\
2200 }";
2201 
2202 const char *sc_TestEntry_TerminalTranslExcept = "\
2203 Seq-entry ::= set {\
2204  class nuc-prot ,\
2205  descr {\
2206  source {\
2207  genome mitochondrion ,\
2208  org {\
2209  taxname \"Takifugu fasciatus\" ,\
2210  common \"obscure pufferfish\" ,\
2211  db {\
2212  {\
2213  db \"taxon\" ,\
2214  tag\
2215  id 301270 } } ,\
2216  orgname {\
2217  name\
2218  binomial {\
2219  genus \"Takifugu\" ,\
2220  species \"fasciatus\" } ,\
2221  lineage \"Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;\
2222  Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Euteleostei;\
2223  Neoteleostei; Acanthomorpha; Acanthopterygii; Percomorpha; Tetraodontiformes;\
2224  Tetradontoidea; Tetraodontidae; Takifugu\" ,\
2225  gcode 1 ,\
2226  mgcode 2 ,\
2227  div \"VRT\" } } } } ,\
2228  seq-set {\
2229  seq {\
2230  id {\
2231  local\
2232  str \"bankit1246641\" ,\
2233  general {\
2234  db \"BankIt\" ,\
2235  tag\
2236  id 1246641 } ,\
2237  general {\
2238  db \"TMSMART\" ,\
2239  tag\
2240  id 10764938 } ,\
2241  genbank {\
2242  accession \"GQ409967\" } } ,\
2243  descr {\
2244  title \"Takifugu fasciatus mitochondrion, complete genome.\" ,\
2245  molinfo {\
2246  biomol genomic ,\
2247  completeness complete } } ,\
2248  inst {\
2249  repr raw ,\
2250  mol dna ,\
2251  length 16444 ,\
2252  topology circular ,\
2253  strand ds ,\
2254  seq-data\
2255  ncbi2na '9C9B27C140922C78239C23A15E00B564A44027EB5E1FC7041DE3407\
2256 C44E42CD64D52E00E5559556D680CA8BEB34A4440FEC953845C9FE519550A83D24B8C04F094C2E\
2257 007E1F2D38DC08B6B0076E525166BCC622150BEF25069B02AEBC81CC04101E216045F429EF319F\
2258 58241820430602C97470760546027284407A8F2315473971570118CE01C6C4CD65EBC71893C9F0\
2259 0540A1FA6B9FC014DC8A25EFF0058C755BD05D1575FBFC165CCC516DB49715EE0A1032C8403E91\
2260 25001B4AD8AEC983A2AA100EA713DDE5C8811808EE780E44560A28FC92C2420322ED39E0169CE0\
2261 999111656D1DD54077C3F003070C25140200AA290B6C13AC2EC5A0AE47E8005A24C9F049F0245D\
2262 5F1162F8656E40D89E55817049C95515454410147303155C231F07010040D3FC515CB3288C8028\
2263 1CA2732302C590A819E0220380C152C0B00009223C45DB17FE4D38FC9CB30F2902247F2DC11560\
2264 1E0E271D42125FCCA9135B77BA4022E8089FE2C8AE3017162F4BCC9EBE562078B309D25FFA7DF0\
2265 753073F33C1587F0805008BC340AAB1255FE3108041FF04A28C28F3000F0A4799F0B29F2092514\
2266 42026F0277244D4E510314300475C157D5716A7FDCE7D4C8203CE700E2C30AA5855750910BB134\
2267 8181551603D068541408A80C0CF01D10420013F047FD6F15711EBB940CA021C020020A076901D0\
2268 25D97BF14004D977E7D2E0C22B465E57B8733BF06966B3FE16E40AC990D1FB5FC0EE85ECE0E930\
2269 62A7C9EDD7F742D0E07E3755B9209AA30053086208573A27F210004957B43015C0C0A030172E05\
2270 EFF0EDFEBEA9859AAC1000554EE8380115FFC01422D147728C48137850C38D69C0963C18162F15\
2271 CA8C126435DFF22D4CD8422AFC61763BE8D284D70EB9259CF02BDBFBD063C0B5C6E378BD2168B0\
2272 D4AD2FDCDCE0B17FF52C602858008AA50ED4042551DD1F9E0549D0903088B10070B4081384EF2E\
2273 E92256B3E4025F015F6048AF41D7757073871AC70F151F0F4557053CFB57BDF725B27FFC13C3E0\
2274 600B7CAF1390F1802A54136CA17CE875D4153658E9BC07FD340895B595371759D535CFCF3E55C4\
2275 F257C776530C338114397757C5535F87C0DE94F73FB5C9CDDC9FA4B71DCF729D68E25D41D00CE5\
2276 7CCA3771892F91030DD3182C2DDA1C35FF34F8F3FF12907F15F404FC1B454A024FE1C34D506E15\
2277 76490CE3137519C96010162753F85C120AA2D607ACDEAF41BE0CE4A2957F95CF7F72483394135D\
2278 F0C04477510D73D72895C4CE5277D481ED77343F8CD001253FCD5D37D738958975C5463D633852\
2279 70E45D138001F5C51F13C93D3CDE137E47541C4324A5F55740CC0281EE5E0102851FE38AE0DC8A\
2280 BC0F575275F2020A87E054156223401DD2E7D471147F72C0B4AC0C27FCA54C57003A2BC03555FC\
2281 40E057CCF1E5D5FF3FA5E7BCAC41CF107149111E1C3E4E0EA5C80F0457273CF57C325410451518\
2282 90F8251C4031F7C1D09E592490575D764944141978304A50E2071242715152D50414C3C53E4F25\
2283 F0036872559D445E1D520B0C4287217C41E87CD7A513840036553DD5F71C40F4150C140477C35D\
2284 76535DD4C7EDA68E2AA5C0D0150B1B00D725C75D4D951729E0CB4F3DF43DD1415E9EC305724771\
2285 34D304D7517D7E7F4E3010051043E817C10DD3894015634FD3570C55C35F774FA8AFE554707AFD\
2286 3970387CD7D081F100421729304944F2495F2E571C2DFCFDCDC65CD719CC15C14DD540C1784A04\
2287 715E1815010100013C50594D73ED77D4F75D757F05560B1F11DFC1130887CAF045214225F409DD\
2288 2E82E00DFD2D5E3087E4253CDD137DE4E401211FC3C27025F072321297635E4077CBC127099500\
2289 50627D4DC1FD565C900900A6A80956921B4379F7F23F90DC13B04552A78C808A1F0177B33AA710\
2290 1459F057494F717BA4344678FFDD050D102136915CC5CBFFAE5E25A0CB2912470B7DF3DA96074B\
2291 415A6477EA638523710EC36F1254E4F6C38FF7F32C31434E3E8A7FA81E3CBD57CC36895484E97D\
2292 55830104C27DE1C7D5535F5D7DE764D77A2C825A26AC6A781AFC5455C9280DF9519289F7B21745\
2293 37DDDF4DF92AB75DCFF2A90D07D3441CD3C138055490DD10C501177FDBB896FF0F1E7B1F75E775\
2294 7D4B5F924A8D10C7DD1E1601703105F7F854928A2885535EC5247CF78F7FA515E0B713DF3DD5E9\
2295 F6A30FD1336C971C76A4002053DAF13A93ADE2538E94FADF7E9FCFB3895144EFC4B693A1B21158\
2296 971FC5D6D10C3CF94D584A2D02CFC9E1F905F93A28D0D0388055CC73895DA7D37D73F12EA69705\
2297 A0FB5E941D3572136FF11845C71B2F953F51CED7753AB9ECFE43CEAE4F6C478F5473FD28C44751\
2298 247E0700D47FA2C3BD3EBB41705F7D55047D7EB727A8C5D8631D61C55865C65738075B75D0FA74\
2299 32DDDCBA4B0F3BD77F35DE2097D16509882D434BE07043860EC838711AB95D75F14C4F6089597D\
2300 B501D017735880A8A0D8155B01EBF4252710051DED1FDF4C21DCB00E90F11C7FB42B20FBABC855\
2301 46EDF3CF0E9135F441CABF42B927D14B0C82075F47D4614E770C36CF5F3C913CB1FC4F3EF9CCB7\
2302 5140707042C4F721DD088F80F37814D7149CF3DF35D3E5757D5D60F7F170C8603438554DC10F02\
2303 532B450E31E09CE0CC4873261725FE1D3130D5444217A556943D65FF201E14D83ADBD4BA1D553D\
2304 835CB749208ED751D7896D57772BB003A396C5696DC050125FCD7B5617AEFF73A50E7780DE689C\
2305 1449F4C50DBEF825B5572047F801E35D1C39D823974708270068449BC97FC27003AE5C50115F2E\
2306 039741D055917E3D74F3ADF77E3BB3D70DF5D5D400D32747CF540E2575D50047E5502034057857\
2307 8D384709F7D850FFC255165FE935578F95D9DD73C5FA15DF551141416F81410DB7FC11D40943F3\
2308 C363F1D041E75D51C04CA2A440E257CCF65D3C32EF5C3C53C131CA1D7D4C44F455C51127F5B033\
2309 25727B157BA76412C34DA0C60105412495CA517DF520AC55419DC35535C3CF3601ED25DF4F6177\
2310 A6728BD87859C17449294DCF0F41C3671249FDB9D7557CE507B653DC13415EF3D75E11F720BE5B\
2311 6430D425CEFF6C7DF709773710801B70E953424454C5132C854254E17704A24B6795F7DF135A5C\
2312 94FE3D47F0741CF70C15CA1CB5C7FC74731F438E188CFB1882913D4294511575ED400A5F631A0C\
2313 3DCFCF17582CF7DF5FA7DF7893DC51D097255475E0F28A79E15544A4F35570357D82F51D701124\
2314 B5E7A4D28BC5B05E2745124F3A0AA2600424352D5D15C10D7DCAFDC7F13D79090C8B3CE0955F44\
2315 3648EBBF1A7417DF6D9C5A7D4697D1B4D368D05F7C96CEDC71840D63D47D17581147FA7D824965\
2316 E31E11FEC8EFB78F3DF31374371E38A74C37F72CC0B4B17B87D43454B7EBC0750A0230E07C7041\
2317 0CF0F345127F3574FF0E16DD3DE1D5657C4521C5001CD14CE0EE9F6155CA34958757F74719F7DF\
2318 2D90D7BD75FF61F2036575D755757E28850754D5515E11F33A17595F70F75D10FA5C971838750A\
2319 297E0E25832B0F2FC3C013F8FDA74001CEBC0B530F05E38574D41DD3D174B7DF5CA1CFE97E4FF1\
2320 62D45F73791DF3B7E024C33C95CF5D91FD04E09790335D4527FD25914F1D7DC93DD24EE09EBB2A\
2321 7A7F0CB651256C5468D2345C40170175C439C00D70D5051CC7CD724138F04557038F38577D1D74\
2322 32DD70F9DD1C971CE9C003177804A381FD741571F2516151CD0555FF0D774E7875F55C30D72525\
2323 00511374E250F05B4183333447DF975790F7D78D7E5F7594480E34EFF1B0CFE094770F545F0D74\
2324 F159EA80509206DF064AC4C7D7FDCC5724A7571575D36471C77C401D43A3573575F31C551F2941\
2325 F81C134CE48C2378EA4A3B35E93D72F00C55FC68BD177871502746C82754F928DCCB1C967B1C70\
2326 0728A71A4C3183FCB4572157C14081D27153D3ECF25778A6E3CC1EBD4DE4C6D0121700D3C3E5C7\
2327 4D2D2513287ACBE8ACF74D407578A3D16991C35D30F951A3E136D67F3DE5C9431C9C6264444960\
2328 531C7E762B39030C75770C925E38F4D9097650FC91C51575507C32881C30F3C5D5CF43E3557810\
2329 FA5F1EA72911F3C7969C7477333D74C15062A56554145D7277E057444562245F7CF95D45D75470\
2330 D71F3C401481C378AB81E7EB2132FC34013C8FB8F700122BC015DFB516022BD9043821E70DF4D5\
2331 5DAF01D6A9D3D8549FF028C3270D6FADF28140077EB90750B009CE4751D770F304DC570F3CFF64\
2332 71D3F152FF0417FD54014401501E2474D42E00492C03197D76D25F7957397517C18281E2DCD347\
2333 07C07830115D17F84D04D25F03D875C743CDFC552C91CC6C178D4FF20FE4D78C4C474854F4C058\
2334 F7F0B17DE37D70D90C3CF7ED16504133D41DF4DA7882ABEB3CCD3D7DD369E38C646268670C4965\
2335 71092D373058B6884FA3C34D9CC9F8325151701DF889C41037DFF51C004C84C15D53C3E578DF25\
2336 904A40D2743DA5F4D6E1F5F790C82B5C45ADDE571D47724532DBE4A0DF7C30D6CDD5577C801054\
2337 12574477973972A95C14573F16517997C15206334000FB27FF41F52D072970E32C14DA5C0D2550\
2338 7E5F5E44DE45467FFD02533CF7CE77AB73CD449F038E042135800EA2831147E157BC5D74E5C10F\
2339 A49F25705A0557D725A7DFF502394D3E0DFC145D50F0197895CE5D15D7241F7F449CDC49718B6C\
2340 F71B3532945767F0D57F450D0E0041535B0F05534061C938A493CF9E973C3451017DD5040045EC\
2341 3343173AFBC07C79DF3ED10DFA875C3E5720F27D5C1750107C05C45117B5D5147DDC331FA7DF54\
2342 10FB116E5DD700F0DD3FEA1010F9C540F3E1705E3C800BE854090FD373411D574DDC53CB04D410\
2343 A343421317ED77D74711F99CD0576F7DD17079D8225558541559141DC31C4241BC30401509550C\
2344 B0C755451C83330B8055353B45D80079374373775201574D0177D3428533FBB108C500100000CD\
2345 40020C0145ED05ECA65F28D7CDE48499E183030110504C55533034F04114084202F554E401E095\
2346 449020B90524503E007530028928FEC84110F3441413570C020010993004CBF794A3FC14A173A6\
2347 E0014DBEF1D0710011C3A525C64015155C70036C06132C3E17D5157D013F5978E01FE9D5C768F3\
2348 97CF11034D1287BD7E4311C44D61377165FF4D6C9533F9621B01C69E1C3D90DC46406B974F7FFC\
2349 DE7CCF54CDA58ADDC73A77C5C2C0805E01B2AB2D77C7FCB0E9459FDB28C6D7D4E2840CD7FE2894\
2350 7B0F10179DDE5B55C6CA0119FBD0E2CE28A7DD2C84994770463DFE5F51F5D754FCDBE496794FB1\
2351 37CFFDF460129D410557287C3D0112300D53D454C7DDF102175EA7D10D39DD2576431D95CF7550\
2352 1C5DA215E107D1149435DDB455654CF014838CFD73F93190D79ADCD54309CA2BBDE95F7E5D0DF0\
2353 F7CCB2F5FDF11177010609704F5951CD10F5CF7815C3E786C94D705E0FA29315B608D7C4F3CDA1\
2354 03E5D0C7F1F7577D7ADF0E58C96BE1C80100C701C1093CB2748F489B6B7EC0583B6AAF00D557CE\
2355 740082A1F4155147A75409493DF0F01C77F8C31333B3CD54F4CCCF013C310E4C1C2132CF333D1F\
2356 30C5F41130242C4801C02E720930F200F57003CF04078180FD21604301D34B423314A1D04D7300\
2357 C50B3C3B2C2058534BE3F7C3933CF3E0AE2A10C12EAAFD1C0E073D7A4FEBD71F4A94F0F8F475D3\
2358 DFD3619E130BEFAE8B534B88C3544E589BDDD44AAD2BCFFFDDFD7F43E13F48B9266D06BD342BE0\
2359 4FFDFAFCEB0EF2F0E0F88CD33083C4F3E334284C0C30C63D0434C43F4557DF7FF008FC1B31555C\
2360 555400328805FCA7E05095D47C3C0CF4D33CF34CCF30CF30C330F33'H } } ,\
2361  seq {\
2362  id {\
2363  local\
2364  str \"PROT_4_bankit1246641\" ,\
2365  general {\
2366  db \"TMSMART\" ,\
2367  tag\
2368  id 10764942 } } ,\
2369  descr {\
2370  title \"cytochrome c oxidase subunit II [Takifugu fasciatus]\" ,\
2371  molinfo {\
2372  biomol peptide ,\
2373  tech concept-trans-a } } ,\
2374  inst {\
2375  repr raw ,\
2376  mol aa ,\
2377  length 230 ,\
2378  topology not-set ,\
2379  seq-data\
2380  ncbieaa \"MAHPSQLGFQGAASPVMEELLHFHDHALMIVFLISTLVLYIIVAMVSTKLTNKYI\
2381 LDSQEIEIIWTILPAIILILIALPSLRILYLMDEINDPHLTIKAMGHQWYWSYEYTDYSDLAFDSYMIPTQDLAPGQF\
2382 RLLETDHRMVVPVDSPIRILVSAEDVLHSWAVPSLGVKMDAVPGRLNQTAFILSRPGVFYGQCSEICGANHSFMPIVV\
2383 EAVPLEHFENWSSLMLEDA\" } ,\
2384  annot {\
2385  {\
2386  data\
2387  ftable {\
2388  {\
2389  data\
2390  prot {\
2391  name {\
2392  \"cytochrome c oxidase subunit II\" } } ,\
2393  location\
2394  int {\
2395  from 0 ,\
2396  to 229 ,\
2397  id\
2398  local\
2399  str \"PROT_4_bankit1246641\" } } } } } } } ,\
2400  annot {\
2401  {\
2402  data\
2403  ftable {\
2404  {\
2405  data\
2406  cdregion {\
2407  frame one ,\
2408  code {\
2409  id 2 } ,\
2410  code-break {\
2411  {\
2412  loc\
2413  int {\
2414  from 7837 ,\
2415  to 7837 ,\
2416  strand plus ,\
2417  id\
2418  genbank {\
2419  accession \"GQ409967\" } } ,\
2420  aa\
2421  ncbieaa 42 } } } ,\
2422  comment \"TAA stop codon is completed by the addition of 3' A\
2423  residues to the mRNA\" ,\
2424  product\
2425  whole\
2426  local\
2427  str \"PROT_4_bankit1246641\" ,\
2428  location\
2429  int {\
2430  from 7147 ,\
2431  to 7837 ,\
2432  strand plus ,\
2433  id\
2434  genbank {\
2435  accession \"GQ409967\" } } } } } } }\
2436 ";
2437 
2438 const char *sc_TestEntry_ShortCDS = "\
2439 Seq-entry ::= seq {\
2440  id {\
2441  local\
2442  str \"ShortCDS\" } ,\
2443  descr {\
2444  molinfo {\
2445  biomol mRNA } } ,\
2446  inst {\
2447  repr raw ,\
2448  mol rna ,\
2449  length 20 ,\
2450  seq-data\
2451  iupacna \"ATGTTTAAACATGTTTAAAC\" } ,\
2452  annot {\
2453  {\
2454  data\
2455  ftable {\
2456  {\
2457  data\
2458  cdregion {\
2459  } ,\
2460  location\
2461  int {\
2462  from 12 ,\
2463  to 13 ,\
2464  strand plus ,\
2465  id\
2466  local\
2467  str \"ShortCDS\" } } ,\
2468  {\
2469  data\
2470  cdregion {\
2471  } ,\
2472  location\
2473  int {\
2474  from 12 ,\
2475  to 13 ,\
2476  strand minus ,\
2477  id\
2478  local\
2479  str \"ShortCDS\" } } } } } }\
2480 ";
2481 
2482 const char *sc_TestEntry_FirstCodon = "\
2483 Seq-entry ::= seq {\
2484  id {\
2485  local\
2486  str \"FirstCodon\" } ,\
2487  descr {\
2488  molinfo {\
2489  biomol mRNA } } ,\
2490  inst {\
2491  repr raw ,\
2492  mol rna ,\
2493  length 39 ,\
2494  seq-data\
2495  iupacna \"AAAATGGGAATGTGCTTTTTGAGAGGATGGAAAGGTGTT\" } }\
2496 ";
2497 
2498 const char *sc_TestEntry_FirstCodon2 = "\
2499 Seq-entry ::= seq {\
2500  id {\
2501  local\
2502  str \"FirstCodon2\" } ,\
2503  descr {\
2504  molinfo {\
2505  biomol genomic } } ,\
2506  inst {\
2507  repr raw ,\
2508  mol dna ,\
2509  length 27 ,\
2510  seq-data\
2511  iupacna \"TTGCCCTAAAAATAAGAGTAAAACTAA\" } }\
2512 ";
2513 
2514 
2515 const char *sc_TestEntry_GapInSeq1 = "\
2516 Seq-entry ::= seq {\
2517  id {\
2518  local\
2519  str \"GapInSeq1\" } ,\
2520  descr {\
2521  molinfo {\
2522  biomol genomic } } ,\
2523  inst {\
2524  repr delta ,\
2525  mol dna ,\
2526  length 27 ,\
2527  ext \
2528  delta { \
2529  literal { \
2530  length 9 , \
2531  seq-data \
2532  iupacna \"ATGCCCAAA\" } , \
2533  literal { \
2534  length 9 } , \
2535  literal { \
2536  length 9 , \
2537  seq-data \
2538  iupacna \"CCCAAATAA\" } } } } \
2539 ";
2540 
2541 
2542 const char *sc_TestEntry_GapInSeq2 = "\
2543 Seq-entry ::= seq {\
2544  id {\
2545  local\
2546  str \"GapInSeq2\" } ,\
2547  descr {\
2548  molinfo {\
2549  biomol genomic } } ,\
2550  inst {\
2551  repr delta ,\
2552  mol dna ,\
2553  length 27 ,\
2554  ext \
2555  delta { \
2556  literal { \
2557  length 8 , \
2558  seq-data \
2559  iupacna \"ATGCCCAA\" } , \
2560  literal { \
2561  length 9 } , \
2562  literal { \
2563  length 10 , \
2564  seq-data \
2565  iupacna \"ACCCAAATAA\" } } } } \
2566 ";
2567 
2568 const char *sc_TestEntry_GapInSeq3 = "\
2569 Seq-entry ::= seq {\
2570  id {\
2571  local\
2572  str \"GapInSeq3\" } ,\
2573  descr {\
2574  molinfo {\
2575  biomol genomic } } ,\
2576  inst {\
2577  repr delta ,\
2578  mol dna ,\
2579  length 29 ,\
2580  ext \
2581  delta { \
2582  literal { \
2583  length 9 , \
2584  seq-data \
2585  iupacna \"ATGCCCAAA\" } , \
2586  literal { \
2587  length 9 } , \
2588  literal { \
2589  length 11 , \
2590  seq-data \
2591  iupacna \"CCCAAAATAAA\" } } } } \
2592 ";
2593 
2594 
2595 const char *sc_TestEntry_GapInSeq4 = "\
2596 Seq-entry ::= seq {\
2597  id {\
2598  local\
2599  str \"GapInSeq4\" } ,\
2600  descr {\
2601  molinfo {\
2602  biomol genomic } } ,\
2603  inst {\
2604  repr delta ,\
2605  mol dna ,\
2606  length 27 ,\
2607  ext \
2608  delta { \
2609  literal { \
2610  length 9 , \
2611  seq-data \
2612  iupacna \"ATGCCCAAA\" } , \
2613  literal { \
2614  length 9 } , \
2615  literal { \
2616  length 9 , \
2617  seq-data \
2618  iupacna \"CCCAAATAA\" } } } } \
2619 ";
2620 
2621 
2622 const char *sc_TestEntry_GapInSeq5 = "\
2623 Seq-entry ::= seq {\
2624  id {\
2625  local\
2626  str \"GapInSeq5\" } ,\
2627  descr {\
2628  molinfo {\
2629  biomol genomic } } ,\
2630  inst {\
2631  repr delta ,\
2632  mol dna ,\
2633  length 18 ,\
2634  ext \
2635  delta { \
2636  literal { \
2637  length 9 , \
2638  seq-data \
2639  iupacna \"ATGCCCAAA\" } , \
2640  literal { \
2641  length 0 } , \
2642  literal { \
2643  length 9 , \
2644  seq-data \
2645  iupacna \"CCCAAATAA\" } } } } \
2646 ";
2647 
2648 const char* sc_TestEntry_CodeBreakForStopCodon = "\
2649 Seq-entry ::= set {\
2650  seq-set {\
2651  seq {\
2652  id {\
2653  general {\
2654  db \"GNOMON\",\
2655  tag str \"912063.m\"\
2656  }\
2657  },\
2658  descr {\
2659  molinfo {\
2660  biomol mRNA,\
2661  completeness no-ends\
2662  }\
2663  },\
2664  inst {\
2665  repr raw,\
2666  mol rna,\
2667  length 1674,\
2668  seq-data ncbi4na '2481822428821148121184414284124281824121844848888241\
2669 141141141484144141411144128442828241148441842444141141114121411142142144412884\
2670 284114118182828124121144121282418824821288821144188824821182484118844842828828\
2671 241182824121124142488211882218482884844184488821142214412112441144882828824248\
2672 428821141418214121142184444821114844282118844224414484824284181848284288122182\
2673 12441411214824288184121121188484218282422214418812488441822112112182112F842882\
2674 882211824411848824481282482882144484411144121484288214282148121828881282112848\
2675 242214821282481212828822248282124184181184882842488882888184114111182142411884\
2676 142214148448188482211881482211844882824821184414282114441824181214418442421241\
2677 121822281188121182212484112844821141121821142482881824224424142242141114228814\
2678 282218448121141128822424411141821882111824122242482418884218428184484114884284\
2679 842884184121121221884141821224148842211821112148411282114188121114141144888844\
2680 144418841844141428244124141141112242284881824844128122144148182121214181211284\
2681 821118824824841142844842214411112882484118844142421142144182882182114842822142\
2682 822118284821828421841428221844842888824184284224418482882421248221288242214144\
2683 211881282114148828124181422422114141148122842141411144148122822824114882882144\
2684 282141421142482821281122144282488821822824221141821121114141884818824141121141\
2685 141144814221884824184188841821111844488884184284182214841141148441144144142111\
2686 142821142882484118284414111844124114184182814281284824224824114184124114424888\
2687 228284284241144224884111221142818214428124148122888824411841281822824124888244\
2688 114114141281121142821821184118244144141141841214142882414842841141141141228442\
2689 11418288844218414418284412112882288822414282214211141144284142'H\
2690  },\
2691  annot {\
2692  {\
2693  data ftable {\
2694  {\
2695  data cdregion {\
2696  frame one,\
2697  code {\
2698  id 1\
2699  },\
2700  code-break {\
2701  {\
2702  loc int {\
2703  from 879,\
2704  to 881,\
2705  strand plus,\
2706  id general {\
2707  db \"GNOMON\",\
2708  tag str \"912063.m\"\
2709  }\
2710  },\
2711  aa ncbieaa 88\
2712  }\
2713  }\
2714  },\
2715  partial TRUE,\
2716  product whole general {\
2717  db \"GNOMON\",\
2718  tag str \"912063.p\"\
2719  },\
2720  location int {\
2721  from 0,\
2722  to 1673,\
2723  strand plus,\
2724  id general {\
2725  db \"GNOMON\",\
2726  tag str \"912063.m\"\
2727  },\
2728  fuzz-from lim lt,\
2729  fuzz-to lim gt\
2730  }\
2731  }\
2732  }\
2733  }\
2734  }\
2735  },\
2736  seq {\
2737  id {\
2738  general {\
2739  db \"GNOMON\",\
2740  tag str \"912063.p\"\
2741  }\
2742  },\
2743  inst {\
2744  repr raw,\
2745  mol aa,\
2746  length 558,\
2747  seq-data ncbieaa \"RIRFKYNGADAIDMVFSKKKSEERKDWLSKWMREKKDRKQQGLAEEYLYDKD\
2748 TRFVTFKDFVNRELVLFSNLDNERSIPCLVDGFKPGQRKVLFACFKRSDKHGVKVAQLAGGVADMSAYHHGEQSLMTT\
2749 IVHLAQDYVGSNNINXLLPIGMFGTRLQGGKDSASAQYIFTQLSPVTRTLFPSHDDNVLRFLYEENQRIEPEWYCPIS\
2750 PMVLVNGAQGIDTGWRTNIPNYNPRELVKNIKRLIAGEPQKALAPWYKNFRGKIIQIDPRRFACYGEVAVLDDNTIEI\
2751 TELPIKQXTQDYKEKVLEGLMESSDEKKPPVIVDYQEYHTDTTVKFVVKLVPGKLRELERKQDLHQVLQLQSVICMSS\
2752 MVLFDAAGCLRTSTSPEAITQEFYDSRQEKYLQRKEYLLEVLQAQSKRLTNQARFILAKINKEIVFENKKKVAIVDDL\
2753 IKMGFDADPVKKWKEEQKLKLRESGEMDEDDLATVAVEDDEGVSSAAKAVETKLSGYEYLFGMTILDVSEEETNKLIN\
2754 ESEEKMTELRVLKKKTWQDLWHEDLDNFLSELQQRRLS\"\
2755  }\
2756  }\
2757  }\
2758 }\
2759 ";
2760 
2761 const char* sc_TestEntry_GB_2236 = "\
2762 Seq-entry ::= seq {\
2763  id {\
2764  local str \"CMW8541SA\"\
2765  },\
2766  descr {\
2767  source {\
2768  genome genomic,\
2769  org {\
2770  taxname \"Holocryphia eucalypti\",\
2771  orgname {\
2772  mod {\
2773  {\
2774  subtype isolate,\
2775  subname \"CMW8541\"\
2776  }\
2777  },\
2778  lineage \"Cryphonectriaceae Diaporthales\",\
2779  gcode 1\
2780  }\
2781  },\
2782  subtype {\
2783  {\
2784  subtype country,\
2785  name \"South Africa\"\
2786  }\
2787  }\
2788  },\
2789  title \"Holocryphia eucalypti\",\
2790  molinfo {\
2791  biomol genomic\
2792  },\
2793  create-date std {\
2794  year 2009,\
2795  month 8,\
2796  day 5\
2797  }\
2798  },\
2799  inst {\
2800  repr delta,\
2801  mol dna,\
2802  length 987,\
2803  ext delta {\
2804  literal {\
2805  length 417,\
2806  seq-data iupacna \"CAAACCATCTCGGGCGAGCACGGCCTCGACAGCAATGGCGTGTA\
2807 TGTACCACACCATACCCTACACGGCGGCCCACGCAAGATGGACGCGGCTCGGGCTTTCCTGCTAACCACCCGCGTAGC\
2808 TACAACGGCACCTCCGAGCTCCAGCTCGAGCGCATGAACGTCTACTTCAACGAGGTATGTCTTGTCGGCTGACCAGGC\
2809 CTCCAGCCATCATCCTGCCTCCTGCCTCCTCCTTCCATCGGGACTTCTGTGGCCTGACCGAGCTTGCCCTTCTGACGC\
2810 GTTTCTCGTCCAGGCCTCCGGCAACAAGTATGTTCCCCGCGCCGTCCTCGTCGATCTCGAGCCCGGTACCATGGATGC\
2811 CGTCCGCGCCGGCCCCTTCGGCCAGCTGTTCCGTCCCGACAACTTCGTCTTCGGCCAGTCC\"\
2812  },\
2813  literal {\
2814  length 100,\
2815  fuzz lim unk,\
2816  seq-data gap {\
2817  type unknown\
2818  }\
2819  },\
2820  literal {\
2821  length 470,\
2822  seq-data iupacna \"TGACCAGCCGTGGCGCCCACTCCTTCCGCGCCCTCACCGTGCCC\
2823 GAGTTGACCCAGCAAATGTTCGACCCCAAGAACATGATGGCTGCCTCGGACTTCCGCAACGGCCGCTACCTGACGTGC\
2824 TCTGCCATCTTGTACGTTTTTGTCTTCTCTGTCTCACACATCTCGGATCCACCTCTCGGGCTTGTTTTTGCTAACCCT\
2825 GCTTTCCTCTCTCCCCTACAGCCGTGGCAAGGTCTCCATGAAGGAGGTCGAGGACCAGATGCGCAACGTCCAGAGCAA\
2826 GAACTCGTCCTACTTCGTCGAGTGGATCCCCAACAACGTCCAGACCGCCCTCTGCTCCATCCCCCCCAAGGGCCTCAA\
2827 GATGTCCTCCACCTTTGTCGGCAACTCCACCGCCATCCAGGAGCTCTTCAAGCGTGTTGGCGAGCAGTTCACCGCCAT\
2828 GTTCCGGCGCAAGGCTTTCTTGCATTGGTACACTGG\"\
2829  }\
2830  }\
2831  }\
2832 }\
2833 ";
static char tmp[2048]
Definition: utf8.c:62
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
const char * sc_TestEntry_GapInSeq2
CBioseq_Handle –.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:802
BOOST_AUTO_TEST_CASE(Test_Translator_Raw)
Set coding to printable coding (Iupacna or Iupacaa)
TId & SetId(void)
Select the variant.
const char * sc_TestEntry_ShortCDS
void SetCoding(TCoding coding)
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:811
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:641
const char * sc_TestEntry_5prime_partial
#define FOR_EACH_ANNOT_ON_BIOSEQ
Definition: seq_macros.hpp:286
const CSeq_loc & GetProduct(void) const
any overlap of extremes
const char * sc_TestEntry_TerminalTranslExcept
#define fi
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
User-defined methods of the data storage class.
static void TranslateCdregion(string &prot, const CBioseq_Handle &bsh, const CSeq_loc &loc, const CCdregion &cdr, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=0, ETranslationLengthProblemOptions options=eThrowException)
translation coding region into ncbieaa protein sequence
Definition: sequence.cpp:4156
const char * sc_TestEntry_FirstCodon2
const char * sc_TestEntry_GapInSeq5
= 0x2 Remove trailing Xs from protein
Definition: sequence.hpp:947
const char * sc_TestEntry_CodeBreakForStopCodon
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
bool IsSetPartial(void) const
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:823
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
SAnnotSelector –.
#define NULL
Definition: ncbistd.hpp:225
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:893
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:280
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:350
int i
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1082
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:479
= 0x1 Do not include stop in translation
Definition: sequence.hpp:946
const char * sc_TestEntry_GapInSeq1
User-defined methods of the data storage class.
const CSeq_loc & GetLocation(void) const
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3681
= 0x4 Translate first codon even if not start codon (because sequence is 5' partial) ...
Definition: sequence.hpp:948
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
bool IsGap(void) const
Check if variant Gap is selected.
Definition: Seq_data_.hpp:704
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:362
#define FOR_EACH_FEATURE_ON_ANNOT
Definition: seq_macros.hpp:433
bool IsSetFuzz(void) const
could be unsure Check if a value has been assigned to Fuzz data member.
const char * sc_TestEntry_FirstCodon
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:550
const char * sc_TestEntry_5prime_partial_minus
CFeat_CI –.
Definition: feat_ci.hpp:63
IO_PREFIX::istrstream CNcbiIstrstream
Portable alias for istrstream.
Definition: ncbistre.hpp:159
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
User-defined methods of the data storage class.
Utility stuff for more convenient using of Boost.Test library.
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
Definition: sequence.cpp:859
static string GetProteinString(CFeat_CI fi, CScope &scope)
CSeqVector –.
Definition: seq_vector.hpp:64
2nd contained within 1st extremes
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
Definition: Seq_data_.hpp:644
USING_SCOPE(objects)
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:198
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
CSeq_entry_Handle –.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
The Object manager core.
User-defined methods of the data storage class.
position_type GetTo(void) const
Definition: range.hpp:142
bool IsIupacaa(void) const
Check if variant Iupacaa is selected.
Definition: Seq_data_.hpp:524
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:286
void ResetLocation(void)
Reset Location data member.
Definition: Seq_feat_.cpp:122
TRange GetTotalRange(void) const
Definition: mapped_feat.hpp:93
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:3929
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const TLiteral & GetLiteral(void) const
Get the variant data.
Definition: Delta_seq_.cpp:124
position_type GetFrom(void) const
Definition: range.hpp:134
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1014
static void TestOneGapSeq(const char *asn, string seg1, string seg2)
const char * sc_TestEntry_internal_stop
TSeqPos GetBioseqLength(void) const
CScope –.
Definition: scope.hpp:90
User-defined methods of the data storage class.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
void SetInt(TInt &v)
Definition: Seq_loc.hpp:965
Definition: Seq_entry.hpp:55
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
const char * sc_TestEntry_GapInSeq3
const char * sc_TestEntry_GapInSeq4
const char * sc_TestEntry_3prime_partial
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:923
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4148
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
const char * sc_TestEntry_alt_frame
continuous sequence
Definition: Seq_inst_.hpp:94
namespace ncbi::objects::
Definition: Seq_feat.hpp:56
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:326
CBioseq_CI –.
Definition: bioseq_ci.hpp:68
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:243
const char * sc_TestEntry_GB_2236
const char * sc_TestEntry
vector< TFeatScore > TFeatScores
Definition: sequence.hpp:351
const char * sc_TestEntry_code_break
static void CheckTranslatedBioseq(CRef< CBioseq > bioseq, string seg1, bool mid_fuzz, string seg2)
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:165
TLiteral & SetLiteral(void)
Select the variant.
Definition: Delta_seq_.cpp:130
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
Definition: Seq_inst_.hpp:790
const TPrim & Get(void) const
Definition: serialbase.hpp:306
static void SetLocationSkipGap(CRef< CSeq_feat > feat, const CBioseq &bioseq)
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:650
Modified on Thu Mar 30 17:14:32 2017 by modify_doxy.py rev. 506947