NCBI C++ ToolKit
gff2_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff2_reader.cpp 76674 2017-02-27 15:14:06Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * GFF file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbithr.hpp>
37 #include <corelib/ncbiutil.hpp>
38 #include <corelib/ncbiexpt.hpp>
39 #include <corelib/stream_utils.hpp>
40 
41 #include <util/static_map.hpp>
42 #include <util/line_reader.hpp>
43 
44 #include <serial/iterator.hpp>
45 #include <serial/objistrasn.hpp>
46 
47 // Objects includes
53 
58 
60 #include <objects/seq/Annot_id.hpp>
64 #include <objects/seq/Seq_inst.hpp>
67 
84 
87 
88 #include <objmgr/feat_ci.hpp>
89 
97 #include <objtools/error_codes.hpp>
98 
103 
104 
105 #include <algorithm>
106 
107 //#include "gff3_data.hpp"
108 
109 #define NCBI_USE_ERRCODE_X Objtools_Rd_RepMask
110 
112 
113 BEGIN_objects_SCOPE // namespace ncbi::objects::
114 
115 // ----------------------------------------------------------------------------
117  const CSeq_annot& annot)
118 // ----------------------------------------------------------------------------
119 {
120  if ( ! annot.CanGetId() || annot.GetId().size() != 1 ) {
121  // internal error
122  return 0;
123  }
124 
125  CRef< CAnnot_id > pId = *( annot.GetId().begin() );
126  if ( ! pId->IsLocal() ) {
127  // internal error
128  return 0;
129  }
130  return &pId->GetLocal().GetStr();
131 }
132 
133 // ----------------------------------------------------------------------------
135  int iFlags,
136  const string& name,
137  const string& title):
138 // ----------------------------------------------------------------------------
139  CReaderBase(iFlags, name, title),
140  m_pErrors(0),
141  mCurrentFeatureCount(0),
142  mParsingAlignment(false)
143 {
144 }
145 
146 // ----------------------------------------------------------------------------
148 // ----------------------------------------------------------------------------
149 {
150 }
151 
152 // ---------------------------------------------------------------------------
153 void
155  TAnnotList& annots,
156  CNcbiIstream& istr,
157  ILineErrorListener* pMessageListener )
158 // ---------------------------------------------------------------------------
159 {
160  xReadInit();
161  CStreamLineReader lr( istr );
162  ReadSeqAnnots( annots, lr, pMessageListener );
163 }
164 
165 // ---------------------------------------------------------------------------
166 void
168  TAnnotList& annots,
169  ILineReader& lr,
170  ILineErrorListener* pMessageListener )
171 // ----------------------------------------------------------------------------
172 {
173  xProgressInit(lr);
174 
175  if ( m_iFlags & fNewCode ) {
176  ReadSeqAnnotsNew(annots, lr, pMessageListener);
177  }
178  else {
179  CRef< CSeq_entry > entry = ReadSeqEntry(lr, pMessageListener);
180  CTypeIterator<CSeq_annot> annot_iter( *entry );
181  for (; annot_iter; ++annot_iter) {
182  annots.push_back(CRef<CSeq_annot>(annot_iter.operator->()));
183  }
184  }
185 }
186 
187 // ----------------------------------------------------------------------------
190  ILineReader& lr,
191  ILineErrorListener* pEC )
192 // ----------------------------------------------------------------------------
193 {
194  CRef<CSeq_annot> pAnnot;
195  pAnnot.Reset(new CSeq_annot);
196 
198  mParsingAlignment = false;
199 
201  list<string> id_list;
202 
203 
204  string line;
205  while (xGetLine(lr, line)) {
206 
207  if (IsCanceled()) {
210  eDiag_Info,
211  0,
212  "Reader stopped by user.",
214  ProcessError(*pErr, pEC);
215  return pAnnot;
216  }
217  xReportProgress(pEC);
218  if ( xParseStructuredComment(line) ) {
219  continue;
220  }
221  if (xIsTrackLine(line)) {
222  if (!mCurrentFeatureCount) {
223  xParseTrackLine(line, pEC);
224  continue;
225  }
226  xUngetLine(lr);
227  break;
228  }
229  if (xParseBrowserLine(line, pAnnot, pEC)) {
230  continue;
231  }
232 
233  if (!xIsCurrentDataType(line)) {
234  xUngetLine(lr);
235  break;
236  }
237 
238  if ( CGff2Reader::IsAlignmentData(line) &&
239  x_ParseAlignmentGff(line, id_list, alignments)) {
240  continue;
241  }
242 
243  if (xParseFeature(line, pAnnot, pEC)) {
244  continue;
245  }
246  }
247 
248 
249  if (!mCurrentFeatureCount) {
250  return CRef<CSeq_annot>();
251  }
252 
253  if (!alignments.empty()) {
254  x_ProcessAlignmentsGff(id_list, alignments, pAnnot);
255  }
256 
257  xPostProcessAnnot(pAnnot, pEC);
258  return pAnnot;
259 }
260 
261 // ---------------------------------------------------------------------------
262 void
264  TAnnots& annots,
265  ILineReader& lr,
266  ILineErrorListener* pEC )
267 // ----------------------------------------------------------------------------
268 {
269  xProgressInit(lr);
270 
271  if (m_iFlags&fGenbankMode) {
272  CRef<CSeq_annot> pAnnot;
273  pAnnot.Reset(new CSeq_annot);
274 
276  list<string> id_list;
277 
278  string line;
279  while (xGetLine(lr, line)) {
280  if (IsCanceled()) {
283  eDiag_Info,
284  0,
285  "Reader stopped by user.",
287  ProcessError(*pErr, pEC);
288  annots.clear();
289  return;
290  }
291  xReportProgress(pEC);
292 
293  try {
294  if (xParseStructuredComment(line)) {
295  continue;
296  }
298  continue;
299  }
301  continue;
302  }
303 
304  if ( ! x_ParseDataGff(line, annots, pEC) ) {
305  continue;
306  }
307  }
308  catch( CObjReaderLineException& err ) {
310  ProcessError(err, pEC);
311  }
312  }
313 
314  if (!alignments.empty()) {
315  x_ProcessAlignmentsGff(id_list, alignments, pAnnot);
316  }
317  return;
318  }
319 
320  //main line code:
321  CRef<CSeq_annot> pAnnot = ReadSeqAnnot(lr, pEC);
322  while (pAnnot) {
323  annots.push_back(pAnnot);
324  pAnnot = ReadSeqAnnot(lr, pEC);
325  }
326  return;
327 }
328 
329 // ----------------------------------------------------------------------------
332  ILineReader& lr,
333  ILineErrorListener* pMessageListener )
334 // ----------------------------------------------------------------------------
335 {
336  xProgressInit(lr);
337 
338  TAnnots annots;
339  ReadSeqAnnotsNew( annots, lr, pMessageListener );
340 
341  CRef<CSeq_entry> pSeqEntry(new CSeq_entry());
342  pSeqEntry->SetSet();
343 
344  for (TAnnots::iterator it = annots.begin();
345  it != annots.end(); ++it) {
346  CRef<CBioseq> pSeq( new CBioseq() );
347  pSeq->SetAnnot().push_back(*it);
348  pSeq->SetId().push_back( CRef<CSeq_id>(
349  new CSeq_id(CSeq_id::e_Local, "gff-import") ) );
350  pSeq->SetInst().SetRepr(CSeq_inst::eRepr_not_set);
351  pSeq->SetInst().SetMol(CSeq_inst::eMol_not_set);
352 
353  CRef<CSeq_entry> pEntry(new CSeq_entry());
354  pEntry->SetSeq(*pSeq);
355  pSeqEntry->SetSet().SetSeq_set().push_back( pEntry );
356  }
357  return pSeqEntry;
358 }
359 
360 // ----------------------------------------------------------------------------
363  ILineReader& lr,
364  ILineErrorListener* pMessageListener )
365 // ----------------------------------------------------------------------------
366 {
367  CRef<CSerialObject> object(
368  ReadSeqEntry( lr, pMessageListener ).ReleaseOrNull() );
369  return object;
370 }
371 
372 // ----------------------------------------------------------------------------
374  CRef<CSeq_annot>& pAnnot,
375  ILineErrorListener *pEC)
376 // ----------------------------------------------------------------------------
377 {
378  xAddConversionInfo(pAnnot, pEC);
379  xAssignTrackData(pAnnot);
380  xAssignAnnotId(pAnnot);
382 }
383 
384 // ----------------------------------------------------------------------------
386  CRef<CSeq_annot>& pAnnot,
387  const string& givenId)
388 // ----------------------------------------------------------------------------
389 {
390  if (givenId.empty() && pAnnot->GetData().IsAlign()) {
391  return;
392  }
393 
394  string annotId(givenId);
395  if (annotId.empty() && pAnnot->GetData().IsFtable()) {
396  const CSeq_annot::TData::TFtable ftable = pAnnot->GetData().GetFtable();
397  if (ftable.empty()) {
398  return;
399  }
400  const CSeq_feat& front = *ftable.front();
401  annotId = front.GetLocation().GetId()->GetSeqIdString(true);
402  }
403 
404  CRef< CAnnot_id > pAnnotId(new CAnnot_id);
405  pAnnotId->SetLocal().SetStr(annotId);
406  pAnnot->SetId().push_back(pAnnotId);
407 }
408 
409 
410 // ----------------------------------------------------------------------------
412  CRef<CSeq_entry>& entry,
413  CRef<CUser_object>& trackdata,
414  const string& strKey,
415  const string& strValue )
416 // ----------------------------------------------------------------------------
417 {
418  CSeq_descr& descr = entry->SetDescr();
419 
420  if ( strKey == "name" ) {
421  CRef<CSeqdesc> name( new CSeqdesc() );
422  name->SetName( strValue );
423  descr.Set().push_back( name );
424  return;
425  }
426  if ( strKey == "description" ) {
427  CRef<CSeqdesc> title( new CSeqdesc() );
428  title->SetTitle( strValue );
429  descr.Set().push_back( title );
430  return;
431  }
432  trackdata->AddField( strKey, strValue );
433 }
434 
435 // ----------------------------------------------------------------------------
437  const string& strLine)
438 // ----------------------------------------------------------------------------
439 {
440  if ( ! NStr::StartsWith( strLine, "##" ) ) {
441  return false;
442  }
443  return true;
444 }
445 
446 // ----------------------------------------------------------------------------
448  const string& strLine,
449  TAnnots& annots,
450  ILineErrorListener* pEC)
451 // ----------------------------------------------------------------------------
452 {
453  if (CGff2Reader::IsAlignmentData(strLine)) {
454  if (m_iFlags&fGenbankMode) {
455  return true;
456  }
457  //return x_ParseAlignmentGff(strLine, annots);
458  return true;
459  }
460  return x_ParseFeatureGff(strLine, annots, pEC);
461 }
462 
463 // ----------------------------------------------------------------------------
464 bool
466  const string& line,
467  CRef<CSeq_annot>& pAnnot,
468  ILineErrorListener* pEC)
469 // ----------------------------------------------------------------------------
470 {
471  if (CGff2Reader::IsAlignmentData(line)) {
472  return false;
473  }
474 
475  //parse record:
477  try {
478  if (!pRecord->AssignFromGff(line)) {
479  return false;
480  }
481  }
482  catch(CObjReaderLineException& err) {
483  ProcessError(err, pEC);
484  return false;
485  }
486 
487  //make sure we are interested:
488  string ftype = pRecord->Type();
489  if (xIsIgnoredFeatureType(ftype)) {
490  return true;
491  }
492 
493  //append feature to annot:
494  if (!x_UpdateAnnotFeature(*pRecord, pAnnot, pEC)) {
495  return false;
496  }
497 
499  mParsingAlignment = false;
500  return true;
501 }
502 
503 
504 // ----------------------------------------------------------------------------
506  TScoreValueMap& score_values) const
507 // ----------------------------------------------------------------------------
508 {
509  // Start with empty scores
510  score_values.clear();
511 
512  if (!alignment.IsSetScore()) {
513  return;
514  }
515 
516  for (const CRef<CScore>& score : alignment.GetScore()) {
517 
518  if (!score->IsSetId() ||
519  !score->GetId().IsStr() ||
520  !score->IsSetValue()) {
521  continue;
522  }
523  const string name = score->GetId().GetStr();
524  const CScore::TValue& value = score->GetValue();
525  score_values[name] = Ref(new CScore::TValue());
526  score_values[name]->Assign(value);
527  }
528 }
529 
530 
531 // ----------------------------------------------------------------------------
532 bool s_CompareValues(const CScore::TValue& score_val1,
533  const CScore::TValue& score_val2)
534 // ----------------------------------------------------------------------------
535 {
536 
537  if (score_val1.IsInt() &&
538  score_val2.IsInt() &&
539  score_val1.GetInt() == score_val2.GetInt()) {
540  return true;
541  }
542 
543  if (score_val1.IsReal() &&
544  score_val2.IsReal() &&
545  score_val1.GetReal() == score_val2.GetReal()) {
546  return true;
547  }
548 
549  return false;
550 }
551 
552 // Result is a set of matching scores
553 // ----------------------------------------------------------------------------
555  const TScoreValueMap& scores_2,
556  set<string>& matching_scores) const
557 // ----------------------------------------------------------------------------
558 {
559  matching_scores.clear();
560 
561  for (const auto& score1 : scores_1) {
562  const string& name = score1.first;
563  const CScore::TValue& value = *(score1.second);
564 
565  const auto& it = scores_2.find(name);
566  if (it != scores_2.end() &&
567  s_CompareValues(value, *(it->second))) {
568  matching_scores.insert(name);
569  }
570  }
571 }
572 
573 
574 // ----------------------------------------------------------------------------
575 void CGff2Reader::x_ProcessAlignmentsGff(const list<string>& id_list,
576  const map<string, list<CRef<CSeq_align>>>& alignments,
577  CRef<CSeq_annot> pAnnot)
578 // ----------------------------------------------------------------------------
579 {
580  if (pAnnot.IsNull()) {
581  pAnnot = Ref(new CSeq_annot());
582  }
583 
584  for (const auto id : id_list) {
585  CRef<CSeq_align> pAlign = Ref(new CSeq_align());
586  if (x_MergeAlignments(alignments.at(id), pAlign)) {
587  // if available, add current browser information
588  if ( m_CurrentBrowserInfo ) {
589  pAnnot->SetDesc().Set().push_back( m_CurrentBrowserInfo );
590  }
591 
592  pAnnot->SetNameDesc("alignments");
593 
594  if ( !m_AnnotTitle.empty() ) {
595  pAnnot->SetTitleDesc(m_AnnotTitle);
596  }
597  // Add alignment
598  pAnnot->SetData().SetAlign().push_back(pAlign);
599  }
600  }
601 }
602 
603 
604 // ----------------------------------------------------------------------------
606  const string& strLine,
607  list<string>& id_list, // Add id to alignment
608  map<string, list<CRef<CSeq_align>>>& alignments)
609 // ----------------------------------------------------------------------------
610 {
611  unique_ptr<CGff2Record> pRecord(x_CreateRecord());
612 
613  if ( !pRecord->AssignFromGff(strLine) ) {
614  return false;
615  }
616 
617  string id;
618  if ( !pRecord->GetAttribute("ID", id) ) {
619  id = pRecord->Id();
620  }
621 
622  if (alignments.find(id) == alignments.end()) {
623  id_list.push_back(id);
624  }
625 
626  CRef<CSeq_align> alignment;
627  if (!x_CreateAlignment(*pRecord, alignment)) {
628  return false;
629  }
630 
631  alignments[id].push_back(alignment);
632 
634  mParsingAlignment = true;
635  return true;
636 }
637 
638 
639 
640 // ----------------------------------------------------------------------------
642  map<string, TSeqPos>& summed_scores) const
643 // ----------------------------------------------------------------------------
644 {
645  const list<string> score_names {"num_ident", "num_mismatch"};
646 
647  for (const string& score_name : score_names) {
648  if (score_values.find(score_name) != score_values.end()) {
649  summed_scores[score_name] = score_values.at(score_name)->GetInt();
650  }
651  }
652 }
653 
654 
655 // ----------------------------------------------------------------------------
657  map<string, TSeqPos>& summed_scores,
658  TScoreValueMap& common_scores) const
659 // ----------------------------------------------------------------------------
660 {
661  const list<string> summed_score_names {"num_ident", "num_mismatch"};
662 
663  TScoreValueMap new_scores;
664  x_GetAlignmentScores(alignment, new_scores);
665 
666  for (const string& score_name : summed_score_names) {
667  if (new_scores.find(score_name) == new_scores.end()) {
668  summed_scores.erase(score_name);
669  } else if (summed_scores.find(score_name) != summed_scores.end()) {
670  summed_scores[score_name] += new_scores[score_name]->GetInt();
671  new_scores.erase(score_name);
672  }
673  }
674 
675  set<string> matching_score_names;
676  x_FindMatchingScores(common_scores,
677  new_scores,
678  matching_score_names);
679 
680  common_scores.clear();
681  for (string score_name : matching_score_names) {
682  common_scores[score_name] = Ref(new CScore::TValue());
683  common_scores[score_name]->Assign(*new_scores[score_name]);
684  }
685 }
686 
687 
688 // ----------------------------------------------------------------------------
690  const list<CRef<CSeq_align>>& alignment_list,
691  CRef<CSeq_align>& processed)
692 // ----------------------------------------------------------------------------
693 {
694  if (alignment_list.empty()) {
695  return false;
696  }
697 
698  if (alignment_list.size() == 1) {
699  processed = alignment_list.front();
700  return true;
701  }
702 
703  map<string, TSeqPos> summed_scores;
704  const list<string> summed_score_names {"num_ident", "num_mismatch"};
705 
706  // Factor out identical scores
707  list<CRef<CSeq_align>>::const_iterator align_it = alignment_list.begin();
708  TScoreValueMap score_values;
709  x_GetAlignmentScores(**align_it, score_values);
710 
711  x_InitializeScoreSums(score_values,
712  summed_scores);
713  ++align_it;
714 
715  while (align_it != alignment_list.end() &&
716  !score_values.empty()) {
717 
718  x_ProcessAlignmentScores(**align_it, summed_scores, score_values);
719  ++align_it;
720  }
721  // At this point, the score_values map should contain the scores that
722  // do not change over the rows
723 
724  const auto first_alignment = alignment_list.front();
725  if (first_alignment->IsSetSegs() &&
726  first_alignment->GetSegs().IsSpliced()) {
727 
728  processed->SetType(CSeq_align::eType_global);
729 
730  if (first_alignment->IsSetDim()) {
731  processed->SetDim(first_alignment->GetDim());
732  }
733 
734  for (auto& kv : summed_scores) {
735  auto score = Ref(new CScore());
736  score->SetId().SetStr(kv.first);
737  score->SetValue().SetInt(kv.second);
738  processed->SetScore().push_back(score);
739  }
740 
741  for (auto& kv : score_values) {
742  auto score = Ref(new CScore());
743  score->SetId().SetStr(kv.first);
744  score->SetValue().Assign(*(kv.second));
745  processed->SetScore().push_back(score);
746  }
747 
748  CRef<CSpliced_seg> spliced = Ref(new CSpliced_seg());
749  spliced->Assign(first_alignment->GetSegs().GetSpliced());
750  processed->SetSegs().SetSpliced(*spliced);
751 
752  auto align_it = alignment_list.cbegin();
753  ++align_it;
754 
755  while(align_it != alignment_list.end()) {
756  const auto& spliced_seg = (*align_it)->GetSegs().GetSpliced();
757  if (spliced_seg.IsSetExons()) {
758  for (auto exon : spliced_seg.GetExons()) {
759  processed->SetSegs().SetSpliced().SetExons().push_back(exon);
760  }
761  }
762  ++align_it;
763  }
764  return true;
765  }
766 
767 
768  processed->SetType(CSeq_align::eType_disc);
769 
770  for (auto& kv : summed_scores) {
771  auto score = Ref(new CScore());
772  score->SetId().SetStr(kv.first);
773  score->SetValue().SetInt(kv.second);
774  processed->SetScore().push_back(score);
775  }
776 
777  for (auto& kv : score_values) {
778  auto score = Ref(new CScore());
779  score->SetId().SetStr(kv.first);
780  score->SetValue().Assign(*(kv.second));
781  processed->SetScore().push_back(score);
782  }
783 
784  for (auto current : alignment_list) {
785  auto new_align = Ref(new CSeq_align());
786  new_align->Assign(*current);
787  new_align->ResetScore();
788 
789  for (CRef<CScore> score : current->GetScore()) {
790  const string& score_name = score->GetId().GetStr();
791  if (score_values.find(score_name) == score_values.end()) {
792  new_align->SetScore().push_back(score);
793  }
794  }
795  processed->SetSegs().SetDisc().Set().push_back(new_align);
796  }
797 
798  return true;
799 }
800 
801 
802 // ----------------------------------------------------------------------------
803 bool
805  const string& line,
806  CRef<CSeq_annot>& pAnnot,
807  ILineErrorListener* pEC)
808 // ----------------------------------------------------------------------------
809 {
810  if (!CGff2Reader::IsAlignmentData(line)) {
811  return false;
812  }
813 
814  //parse record:
816  try {
817  if ( ! pRecord->AssignFromGff(line) ) {
818  return false;
819  }
820  }
821  catch(CObjReaderLineException& err) {
822  ProcessError(err, pEC);
823  return false;
824  }
825 
826  if (!x_UpdateAnnotAlignment(*pRecord, pAnnot, pEC)) {
827  return false;
828  }
829 
831  mParsingAlignment = true;
832  return true;
833 }
834 
835 // ----------------------------------------------------------------------------
836 bool
838  const string& line)
839 // ----------------------------------------------------------------------------
840 {
841  if (CGff2Reader::IsAlignmentData(line)) {
843  }
845 }
846 
847 // ----------------------------------------------------------------------------
849  const string& strLine,
850  TAnnots& annots,
851  ILineErrorListener* pEC)
852 // ----------------------------------------------------------------------------
853 {
854  //
855  // Parse the record and determine which ID the given feature will pertain
856  // to:
857  //
859  try {
860  if (!pRecord->AssignFromGff(strLine)) {
861  return false;
862  }
863  }
864  catch(CObjReaderLineException& err) {
865  ProcessError(err, pEC);
866  return false;
867  }
868  string ftype = pRecord->Type();
869  if (xIsIgnoredFeatureType(ftype)) {
870  return true;
871  }
872 
873  //
874  // Search annots for a pre-existing annot pertaining to the same ID:
875  //
876  TAnnotIt it = annots.begin();
877  for ( /*NOOP*/; it != annots.end(); ++it ) {
878  if (!(**it).IsFtable()) continue;
879  const string* strAnnotId = s_GetAnnotId(**it);
880  if (strAnnotId == 0) {
881  return false;
882  }
883  if ( pRecord->Id() == *strAnnotId ) {
884  break;
885  }
886  }
887 
888  //
889  // If a preexisting annot was found, update it with the new feature
890  // information:
891  //
892  if (it != annots.end()) {
893  if ( ! x_UpdateAnnotFeature( *pRecord, *it, pEC ) ) {
894  return false;
895  }
896  }
897 
898  //
899  // Otherwise, create a new annot pertaining to the new ID and initialize it
900  // with the given feature information:
901  //
902  else {
903  CRef< CSeq_annot > pAnnot( new CSeq_annot );
904  if ( ! x_InitAnnot( *pRecord, pAnnot, pEC ) ) {
905  return false;
906  }
907  annots.push_back(pAnnot);
908  //annots.insert(annots.end(), pAnnot );
909  }
910  return true;
911 };
912 
913 
914 
915 // ----------------------------------------------------------------------------
917  const string& strLine,
918  TAnnots& annots )
919 // ----------------------------------------------------------------------------
920 {
921  //
922  // Parse the record and determine which ID the given feature will pertain
923  // to:
924  //
926  if ( ! pRecord->AssignFromGff( strLine ) ) {
927  return false;
928  }
929 
930  //
931  // Search annots for a pre-existing annot pertaining to the same ID:
932  //
933  TAnnotIt it = annots.begin();
934  for ( /*NOOP*/; it != annots.end(); ++it ) {
935  if (!(**it).IsAlign()) continue;
936  const string* strAnnotId = s_GetAnnotId(**it);
937  if (!strAnnotId) {
938  return false;
939  }
940  if ( pRecord->Id() == *strAnnotId ) {
941  break;
942  }
943  }
944 
945  //
946  // If a preexisting annot was found, update it with the new feature
947  // information:
948  //
949  if ( it != annots.end() ) {
950  if ( ! x_UpdateAnnotAlignment( *pRecord, *it ) ) {
951  return false;
952  }
953  }
954 
955  //
956  // Otherwise, create a new annot pertaining to the new ID and initialize it
957  // with the given feature information:
958  //
959  else {
960  CRef< CSeq_annot > pAnnot( new CSeq_annot );
961  if ( ! x_InitAnnot( *pRecord, pAnnot ) ) {
962  return false;
963  }
964  annots.insert(annots.begin(), pAnnot );
965  }
966 
967  return true;
968 };
969 
970 // ----------------------------------------------------------------------------
972  const string& strRawInput,
973  CRef< CAnnotdesc >& pAnnotDesc )
974 // ----------------------------------------------------------------------------
975 {
976  if ( ! NStr::StartsWith( strRawInput, "browser" ) ) {
977  return false;
978  }
979  vector< string > columns;
980  NStr::Split( strRawInput, " \t", columns, NStr::eMergeDelims );
981 
982  if ( columns.size() <= 1 || 1 != ( columns.size() % 2 ) ) {
983  // don't know how to unwrap this
984  pAnnotDesc.Reset();
985  return true;
986  }
987  pAnnotDesc.Reset( new CAnnotdesc );
988  CUser_object& user = pAnnotDesc->SetUser();
989  user.SetType().SetStr( "browser" );
990 
991  for ( size_t u = 1 /* skip "browser" */; u < columns.size(); u += 2 ) {
992  user.AddField( columns[ u ], columns[ u+1 ] );
993  }
994  return true;
995 };
996 
997 // ----------------------------------------------------------------------------
999  const string& strRawInput,
1000  CRef< CAnnotdesc >& pAnnotDesc )
1001 // ----------------------------------------------------------------------------
1002 {
1003  const char cBlankReplace( '+' );
1004 
1005  if ( ! NStr::StartsWith( strRawInput, "track" ) ) {
1006  return false;
1007  }
1008 
1009  string strCookedInput( strRawInput );
1010  bool bInString = false;
1011  for ( size_t u=0; u < strCookedInput.length(); ++u ) {
1012  if ( strCookedInput[u] == ' ' && bInString ) {
1013  strCookedInput[u] = cBlankReplace;
1014  }
1015  if ( strCookedInput[u] == '\"' ) {
1016  bInString = !bInString;
1017  }
1018  }
1019  vector< string > columns;
1020  NStr::Split( strCookedInput, " \t", columns, NStr::eMergeDelims );
1021 
1022  if ( columns.size() <= 1 ) {
1023  pAnnotDesc.Reset();
1024  return true;
1025  }
1026  pAnnotDesc.Reset( new CAnnotdesc );
1027  CUser_object& user = pAnnotDesc->SetUser();
1028  user.SetType().SetStr( "track" );
1029 
1030  for ( size_t u = 1 /* skip "track" */; u < columns.size(); ++u ) {
1031  string strKey;
1032  string strValue;
1033  NStr::SplitInTwo( columns[u], "=", strKey, strValue );
1035  if ( NStr::StartsWith( strValue, "\"" ) && NStr::EndsWith( strValue, "\"" ) ) {
1036  strValue = strValue.substr( 1, strValue.length() - 2 );
1037  }
1038  for ( unsigned u = 0; u < strValue.length(); ++u ) {
1039  if ( strValue[u] == cBlankReplace ) {
1040  strValue[u] = ' ';
1041  }
1042  }
1044  user.AddField( strKey, strValue );
1045  }
1046 
1047  return true;
1048 };
1049 
1050 // ----------------------------------------------------------------------------
1052  const CGff2Record& gff,
1053  CRef< CSeq_annot > pAnnot,
1054  ILineErrorListener* pEC )
1055 // ----------------------------------------------------------------------------
1056 {
1057  CRef< CAnnot_id > pAnnotId( new CAnnot_id );
1058  pAnnotId->SetLocal().SetStr( gff.Id() );
1059  pAnnot->SetId().push_back( pAnnotId );
1060  //pAnnot->SetData().SetFtable();
1061 
1062  // if available, add current browser information
1063  if ( m_CurrentBrowserInfo ) {
1064  pAnnot->SetDesc().Set().push_back( m_CurrentBrowserInfo );
1065  }
1066 
1067  // if available, add current track information
1068  if (m_pTrackDefaults->ContainsData() ) {
1069  m_pTrackDefaults->WriteToAnnot(*pAnnot);
1070  }
1071 
1072  if ( !m_AnnotName.empty() ) {
1073  pAnnot->SetNameDesc(m_AnnotName);
1074  }
1075  if ( !m_AnnotTitle.empty() ) {
1076  pAnnot->SetTitleDesc(m_AnnotTitle);
1077  }
1078 
1079  if (gff.IsAlignmentRecord()) {
1080  pAnnot->SetData().SetAlign();
1081  return x_UpdateAnnotAlignment( gff, pAnnot );
1082  }
1083  else {
1084  pAnnot->SetData().SetFtable();
1085  return x_UpdateAnnotFeature( gff, pAnnot, pEC );
1086  }
1087 }
1088 
1089 // ----------------------------------------------------------------------------
1091  const CGff2Record& gff,
1092  CRef< CSeq_annot > pAnnot,
1093  ILineErrorListener* pEC)
1094 // ----------------------------------------------------------------------------
1095 {
1096  CRef< CSeq_feat > pFeature( new CSeq_feat );
1097 
1098  if ( ! x_FeatureSetId( gff, pFeature ) ) {
1099  return false;
1100  }
1101  if ( ! x_FeatureSetLocation( gff, pFeature ) ) {
1102  return false;
1103  }
1104  if ( ! x_FeatureSetData( gff, pFeature ) ) {
1105  return false;
1106  }
1107  if ( ! x_FeatureSetGffInfo( gff, pFeature ) ) {
1108  return false;
1109  }
1110  if ( ! x_FeatureSetQualifiers( gff, pFeature ) ) {
1111  return false;
1112  }
1113  if (!xAddFeatureToAnnot( pFeature, pAnnot )) {
1114  return false;
1115  }
1116  string strId;
1117  if (gff.GetAttribute("ID", strId) ) {
1118  if (m_MapIdToFeature.find(strId) == m_MapIdToFeature.end()) {
1119  m_MapIdToFeature[strId] = pFeature;
1120  }
1121  }
1122  return true;
1123 }
1124 
1125 
1127  const CGff2Record& gff,
1128  CRef<CSeq_align>& pAlign )
1129 {
1130  pAlign = Ref(new CSeq_align());
1132  pAlign->SetDim(2);
1133 
1134  //score
1135  if (!xAlignmentSetScore(gff, pAlign)) {
1136  return false;
1137  }
1138 
1139  if (!xAlignmentSetSegment(gff, pAlign)) {
1140  return false;
1141  }
1142 
1143  return true;
1144 }
1145 
1146 
1147 // ----------------------------------------------------------------------------
1149  const CGff2Record& gff,
1150  CRef< CSeq_annot > pAnnot,
1151  ILineErrorListener* pEC)
1152 // ----------------------------------------------------------------------------
1153 {
1154  CRef<CSeq_align> pAlign( new CSeq_align );
1156  pAlign->SetDim(2);
1157 
1158  //score
1159  if (!xAlignmentSetScore(gff, pAlign)) {
1160  return false;
1161  }
1162  if (!xAlignmentSetSegment(gff, pAlign)) {
1163  return false;
1164  }
1165  pAnnot->SetData().SetAlign().push_back( pAlign ) ;
1166  return true;
1167 }
1168 
1169 
1170 
1172  CRef<CSeq_align> pAlign) const
1173 {
1174  if (!pAlign->IsSetType()) {
1176  }
1177  // Need to set a whole bunch of things
1178 
1179  if (!xUpdateSplicedSegment(gff, pAlign->SetSegs().SetSpliced())) {
1180  return false;
1181  }
1182 
1183  return true;
1184 }
1185 
1186 
1187 
1189  const CGff2Record& gff,
1190  CSpliced_seg& segment) const
1191 {
1192  if (segment.IsSetProduct_type()) {
1194  }
1195 
1196 
1197  CRef<CSpliced_exon> pExon = Ref(new CSpliced_exon());
1198  if (!xSetSplicedExon(gff, pExon)) {
1199  return false;
1200  }
1201 
1202  segment.SetExons().push_back(pExon);
1203 
1204  return true;
1205 }
1206 
1207 
1208 
1209 // ----------------------------------------------------------------------------
1211  const CGff2Record& gff,
1212  CRef<CSpliced_exon> pExon) const
1213 // ----------------------------------------------------------------------------
1214 {
1215  vector<string> targetParts;
1216  if (!xGetTargetParts(gff, targetParts)) {
1217  return false;
1218  }
1219 
1220 
1221  pExon->SetGenomic_start(gff.SeqStart()-1);
1222  pExon->SetGenomic_end(gff.SeqStop()-1);
1223  if (gff.IsSetStrand()) {
1224  pExon->SetGenomic_strand(gff.Strand());
1225  }
1226 
1227 
1228  const int product_start = NStr::StringToInt(targetParts[1])-1;
1229  const int product_end = NStr::StringToInt(targetParts[2])-1;
1230 
1231  // Check to see that product start and product end are
1232  // non-negative and that product_end >= product_start
1233 
1234  pExon->SetProduct_start().SetNucpos(product_start);
1235  pExon->SetProduct_end().SetNucpos(product_end);
1236 
1237  ENa_strand targetStrand = eNa_strand_plus;
1238  if (targetParts[3] == "-") {
1239  targetStrand = eNa_strand_minus;
1240  }
1241  pExon->SetProduct_strand(targetStrand);
1242 
1243  return true;
1244 }
1245 
1246 
1247 // ----------------------------------------------------------------------------
1248 bool CGff2Reader::xGetTargetParts(const CGff2Record& gff, vector<string>& targetParts) const
1249 // ----------------------------------------------------------------------------
1250 {
1251  string targetInfo;
1252  if (!gff.GetAttribute("Target", targetInfo)) {
1253  return false;
1254  }
1255 
1256  NStr::Split(targetInfo, " ", targetParts);
1257  if (targetParts.size() != 4) {
1258  return false;
1259  }
1260 
1261  return true;
1262 }
1263 
1264 
1265 // ----------------------------------------------------------------------------
1267  const vector<string>& gapParts,
1268  const bool isTarget,
1269  vector<int>& starts) const
1270 // ----------------------------------------------------------------------------
1271 {
1272  starts.clear();
1273  const auto gapCount = gapParts.size();
1274 
1275  for (auto i=0; i<gapCount; ++i) {
1276  char changeType = gapParts[i][0];
1277  int changeSize = NStr::StringToInt(gapParts[i].substr(1));
1278  switch (changeType) {
1279  default:
1280  return false;
1281 
1282  case 'M':
1283  starts.push_back(offset+1-changeSize);
1284  offset -= changeSize;
1285  break;
1286 
1287  case 'I':
1288  if (isTarget) {
1289  starts.push_back(offset+1-changeSize);
1290  offset -= changeSize;
1291  } else {
1292  starts.push_back(-1);
1293  }
1294  break;
1295 
1296  case 'D':
1297  if (isTarget) {
1298  starts.push_back(-1);
1299  } else {
1300  starts.push_back(offset+1-changeSize);
1301  offset -= changeSize;
1302  }
1303  break;
1304  }
1305  }
1306  return true;
1307 }
1308 
1309 
1310 // ----------------------------------------------------------------------------
1312  const vector<string>& gapParts,
1313  const bool isTarget,
1314  vector<int>& starts) const
1315 // ----------------------------------------------------------------------------
1316 {
1317  starts.clear();
1318  const auto gapCount = gapParts.size();
1319 
1320  for (auto i=0; i<gapCount; ++i) {
1321  char changeType = gapParts[i][0];
1322  int changeSize = NStr::StringToInt(gapParts[i].substr(1));
1323  switch (changeType) {
1324  default:
1325  return false;
1326 
1327  case 'M':
1328  starts.push_back(offset);
1329  offset += changeSize;
1330  break;
1331 
1332  case 'I':
1333  if (isTarget) {
1334  starts.push_back(offset);
1335  offset += changeSize;
1336  } else {
1337  starts.push_back(-1);
1338  }
1339  break;
1340 
1341  case 'D':
1342  if (isTarget) {
1343  starts.push_back(-1);
1344  } else {
1345  starts.push_back(offset);
1346  offset += changeSize;
1347  }
1348  break;
1349  }
1350  }
1351  return true;
1352 }
1353 
1354 
1355 // ----------------------------------------------------------------------------
1356 bool CGff2Reader::xSetDensegStarts(const vector<string>& gapParts,
1357  const ENa_strand identStrand,
1358  const ENa_strand targetStrand,
1359  const TSeqPos targetStart,
1360  const TSeqPos targetEnd,
1361  const CGff2Record& gff,
1363 // ----------------------------------------------------------------------------
1364 {
1365  const size_t gapCount = gapParts.size();
1366 
1367  const bool isTarget = true;
1368  vector<int> targetStarts;
1369  if (targetStrand == eNa_strand_minus) {
1370  if( !xGetStartsOnMinusStrand(targetEnd,
1371  gapParts,
1372  isTarget,
1373  targetStarts)) {
1374  return false;
1375  }
1376  }
1377  else {
1378  if (!xGetStartsOnPlusStrand(targetStart,
1379  gapParts,
1380  isTarget,
1381  targetStarts)) {
1382  return false;
1383  }
1384  }
1385 
1386  vector<int> identStarts;
1387  const bool isIdent = !isTarget;
1388 
1389  if (identStrand == eNa_strand_minus) {
1390 
1391  if ( !xGetStartsOnMinusStrand(gff.SeqStop(),
1392  gapParts,
1393  isIdent,
1394  identStarts)) {
1395  return false;
1396  }
1397  }
1398  else {
1399  if ( !xGetStartsOnPlusStrand(gff.SeqStart(),
1400  gapParts,
1401  isIdent,
1402  identStarts)) {
1403  return false;
1404  }
1405  }
1406 
1407  for (auto i=0; i<gapCount; ++i) {
1408  denseg.SetStarts().push_back(targetStarts[i]);
1409  denseg.SetStarts().push_back(identStarts[i]);
1410  }
1411  return true;
1412 }
1413 
1414 
1415 // ----------------------------------------------------------------------------
1417  const CGff2Record& gff,
1418  CRef<CSeq_align> pAlign)
1419 // ----------------------------------------------------------------------------
1420 {
1421  const string& type = gff.Type();
1422 
1423  if (type == "cDNA_match" ||
1424  type == "EST_match" ||
1425  type == "translated_nucleotide_match") {
1426  return xAlignmentSetSpliced_seg(gff, pAlign);
1427  }
1428 
1429  return xAlignmentSetDenseg(gff, pAlign);
1430 }
1431 
1432 
1433 // ----------------------------------------------------------------------------
1435  const CGff2Record& gff,
1436  CRef<CSeq_align> pAlign)
1437 // ----------------------------------------------------------------------------
1438 {
1439  vector<string> targetParts;
1440  if (!xGetTargetParts(gff, targetParts)) {
1441  return false;
1442  }
1443 
1444  CSeq_align::TSegs& segs = pAlign->SetSegs();
1445 
1446  auto& spliced_seg = segs.SetSpliced();
1447 
1448  const string& type = gff.Type();
1449  if (type == "translated_nucleotide_match") {
1450  spliced_seg.SetProduct_type(CSpliced_seg::eProduct_type_protein);
1451  }
1452  else {
1453  spliced_seg.SetProduct_type(CSpliced_seg::eProduct_type_transcript);
1454  }
1455  CRef<CSeq_id> product_id = CReadUtil::AsSeqId(targetParts[0]);
1456  spliced_seg.SetProduct_id(*product_id);
1457 
1458  CRef<CSeq_id> genomic_id = CReadUtil::AsSeqId(gff.Id());
1459  spliced_seg.SetGenomic_id(*genomic_id);
1460 
1461  if (targetParts[3] == "+") {
1462  spliced_seg.SetProduct_strand(eNa_strand_plus);
1463  }
1464  else
1465  if (targetParts[3] == "-") {
1466  spliced_seg.SetProduct_strand(eNa_strand_minus);
1467  }
1468 
1469  if (gff.IsSetStrand()) {
1470  ENa_strand ident_strand = gff.Strand();
1471  spliced_seg.SetGenomic_strand(ident_strand);
1472  }
1473 
1474  CRef<CSpliced_exon> exon(new CSpliced_exon());
1475  exon->SetProduct_start().SetNucpos(NStr::StringToInt(targetParts[1])-1);
1476  exon->SetProduct_end().SetNucpos(NStr::StringToInt(targetParts[2])-1);
1477 
1478  const auto genomic_start = gff.SeqStart();
1479  const auto genomic_end = gff.SeqStop();
1480  exon->SetGenomic_start(genomic_start);
1481  exon->SetGenomic_end(genomic_end);
1482 
1483  string gapInfo;
1484  vector<string> gapParts;
1485  if (gff.GetAttribute("Gap", gapInfo)) {
1486  NStr::Split(gapInfo, " ", gapParts);
1487  }
1488  else {
1489  gapParts.push_back(string("M") + NStr::NumericToString(gff.SeqStop()-gff.SeqStart()+1));
1490  }
1491 
1492  const auto gapCount = gapParts.size();
1493 
1494  for (auto i=0; i<gapCount; ++i) {
1496  char changeType = gapParts[i][0];
1497  int changeSize = NStr::StringToInt(gapParts[i].substr(1));
1498  switch (changeType) {
1499  default:
1500  return false;
1501 
1502  case 'M':
1503  chunk->SetMatch(changeSize);
1504  break;
1505 
1506  case 'I':
1507  chunk->SetProduct_ins(changeSize);
1508  break;
1509 
1510  case 'D':
1511  chunk->SetGenomic_ins(changeSize);
1512  break;
1513 
1514  }
1515  exon->SetParts().push_back(chunk);
1516  }
1517 
1518  spliced_seg.SetExons().push_back(exon);
1519 
1520  return true;
1521 }
1522 
1523 
1524 // ----------------------------------------------------------------------------
1526  const CGff2Record& gff,
1527  CRef<CSeq_align> pAlign)
1528 // ----------------------------------------------------------------------------
1529 {
1530  vector<string> targetParts;
1531  if (!xGetTargetParts(gff, targetParts)) {
1532  return false;
1533  }
1534 
1535  //strands
1536  ENa_strand targetStrand = eNa_strand_plus;
1537  if (targetParts[3] == "-") {
1538  targetStrand = eNa_strand_minus;
1539  }
1540  ENa_strand identStrand = eNa_strand_plus;
1541  if (gff.IsSetStrand()) {
1542  identStrand = gff.Strand();
1543  }
1544 
1545 
1546  string gapInfo;
1547  vector<string> gapParts;
1548  if (gff.GetAttribute("Gap", gapInfo)) {
1549  NStr::Split(gapInfo, " ", gapParts);
1550  }
1551  else {
1552  gapParts.push_back(string("M") + NStr::NumericToString(gff.SeqStop()-gff.SeqStart()+1));
1553  }
1554 
1555  int gapCount = gapParts.size();
1556 
1557  //meta
1558  CSeq_align::TSegs& segs = pAlign->SetSegs();
1559  CSeq_align::C_Segs::TDenseg& denseg = segs.SetDenseg();
1560  denseg.SetDim(2);
1561  denseg.SetNumseg(gapCount);
1562 
1563  //ids
1564  denseg.SetIds().push_back(
1565  CReadUtil::AsSeqId(targetParts[0]));
1566  denseg.SetIds().push_back(
1567  CReadUtil::AsSeqId(gff.Id()));
1568 
1569  const TSeqPos targetStart = NStr::StringToInt(targetParts[1])-1;
1570  const TSeqPos targetEnd = NStr::StringToInt(targetParts[2])-1;
1571 
1572  if (!xSetDensegStarts(gapParts,
1573  identStrand,
1574  targetStrand,
1575  targetStart,
1576  targetEnd,
1577  gff,
1578  denseg)) {
1579  return false;
1580  }
1581 
1582  //lengths
1583  for (int i=0; i < gapCount; ++i) {
1584  denseg.SetLens().push_back(NStr::StringToInt(CTempString(gapParts[i],1,string::npos)));
1585  }
1586 
1587  for (int i=0; i < gapCount; ++i) {
1588  denseg.SetStrands().push_back(targetStrand);
1589  denseg.SetStrands().push_back(identStrand);
1590  }
1591  return true;
1592 }
1593 
1594 
1595 
1596 
1597 // ----------------------------------------------------------------------------
1599  const CGff2Record& gff,
1600  CRef<CSeq_align> pAlign)
1601 // ----------------------------------------------------------------------------
1602 {
1603  if (gff.IsSetScore()) {
1605  int(gff.Score()));
1606  }
1607 
1608  string extraScore;
1609 
1610  const string intScores[] = {
1611  //official
1612  "score",
1613  "align_length",
1614  "num_ident",
1615  "num_positives",
1616  "num_negatives",
1617  "num_mismatch",
1618  "num_gap",
1619 
1620  //picked up from real data files
1621  "common_component",
1622  "filter_score",
1623  "for_remapping",
1624  "merge_aligner",
1625  "rank",
1626  "reciprocity",
1627  "batch_id",
1628  "align_id",
1629  };
1630 
1631  const size_t intCount(sizeof(intScores)/sizeof(string));
1632  for (size_t i=0; i < intCount; ++i) {
1633  if (gff.GetAttribute(intScores[i], extraScore)) {
1634  pAlign->SetNamedScore(
1635  intScores[i], int(NStr::StringToDouble(extraScore)));
1636  }
1637  }
1638 
1639  const string realScores[] = {
1640  //official
1641  "bit_score",
1642  "e_value",
1643  "pct_identity_gap",
1644  "pct_identity_ungap",
1645  "pct_identity_gapopen_only",
1646  "pct_coverage",
1647  "sum_e",
1648  "comp_adjustment_method",
1649  "pct_coverage_hiqual",
1650 
1651  //picked up from real data files
1652  "inversion_merge_alignmer",
1653  "expansion",
1654  };
1655 
1656  const size_t realCount(sizeof(realScores)/sizeof(string));
1657  for (size_t i=0; i < realCount; ++i) {
1658  if (gff.GetAttribute(realScores[i], extraScore)) {
1659  pAlign->SetNamedScore(
1660  realScores[i], NStr::StringToDouble(extraScore));
1661  }
1662  }
1663 
1664  return true;
1665 }
1666 
1667 // ----------------------------------------------------------------------------
1669  const CGff2Record& record,
1670  CRef< CSeq_feat > pFeature )
1671 // ----------------------------------------------------------------------------
1672 {
1673  string strId;
1674  if ( record.GetAttribute( "ID", strId ) ) {
1675  pFeature->SetId().SetLocal().SetStr( strId );
1676  }
1677  return true;
1678 }
1679 
1680 // ----------------------------------------------------------------------------
1682  const CGff2Record& record,
1683  CRef< CSeq_feat > pFeature )
1684 // ----------------------------------------------------------------------------
1685 {
1686  CRef< CSeq_id > pId = CReadUtil::AsSeqId(record.Id(), m_iFlags);
1687  CRef< CSeq_loc > pLocation( new CSeq_loc );
1688  pLocation->SetInt().SetId( *pId );
1689  pLocation->SetInt().SetFrom( record.SeqStart() );
1690  pLocation->SetInt().SetTo( record.SeqStop() );
1691  if ( record.IsSetStrand() ) {
1692  pLocation->SetInt().SetStrand( record.Strand() );
1693  }
1694  pFeature->SetLocation( *pLocation );
1695 
1696  return true;
1697 }
1698 
1699 // ----------------------------------------------------------------------------
1702  CRef< CSeq_feat > pFeature )
1703 // ----------------------------------------------------------------------------
1704 {
1705  return false;
1706 }
1707 
1708 // ----------------------------------------------------------------------------
1710  const CGff2Record& record,
1711  CRef< CSeq_feat > pFeature )
1712 // ----------------------------------------------------------------------------
1713 {
1714  typedef CSeq_feat::TQual TQual;
1715  //task:
1716  // for each attribute of the new piece check if we already got a feature
1717  // qualifier
1718  // if so, and with the same value, then the qualifier is allowed to live
1719  // otherwise it is subfeature specific and hence removed from the feature
1720  TQual& quals = pFeature->SetQual();
1721  for (TQual::iterator it = quals.begin(); it != quals.end(); /**/) {
1722  const string& qualKey = (*it)->GetQual();
1723  if (NStr::StartsWith(qualKey, "gff_")) {
1724  it++;
1725  continue;
1726  }
1727  if (qualKey == "locus_tag") {
1728  it++;
1729  continue;
1730  }
1731  if (qualKey == "old_locus_tag") {
1732  it++;
1733  continue;
1734  }
1735  if (qualKey == "product") {
1736  it++;
1737  continue;
1738  }
1739  if (qualKey == "protein_id") {
1740  it++;
1741  continue;
1742  }
1743  const string& qualVal = (*it)->GetVal();
1744  string attrVal;
1745  if (!record.GetAttribute(qualKey, attrVal)) {
1746  //superfluous qualifier- squish
1747  it = quals.erase(it);
1748  continue;
1749  }
1750  if (qualVal != attrVal) {
1751  //ambiguous qualifier- squish
1752  it = quals.erase(it);
1753  continue;
1754  }
1755  it++;
1756  }
1757  return true;
1758 }
1759 
1760 // ----------------------------------------------------------------------------
1762  const CGff2Record& record,
1763  CRef< CSeq_feat > pFeature )
1764 // ----------------------------------------------------------------------------
1765 {
1766  //
1767  // Create GB qualifiers for the record attributes:
1768  //
1769  CRef< CGb_qual > pQual(0);
1770  const CGff2Record::TAttributes& attrs = record.Attributes();
1771  CGff2Record::TAttrCit it = attrs.begin();
1772  for (/*NOOP*/; it != attrs.end(); ++it) {
1773  // special case some well-known attributes
1774  if (x_ProcessQualifierSpecialCase(it, pFeature)) {
1775  continue;
1776  }
1777 
1778  // turn everything else into a qualifier
1779  pQual.Reset(new CGb_qual);
1780  pQual->SetQual(it->first);
1781  pQual->SetVal(it->second);
1782  pFeature->SetQual().push_back(pQual);
1783  }
1784  return true;
1785 }
1786 
1787 // ----------------------------------------------------------------------------
1789  const string& key,
1790  const string& value,
1791  CRef<CSeq_feat> pTargetFeature)
1792 // ----------------------------------------------------------------------------
1793 {
1794  if (!pTargetFeature) {
1795  return false;
1796  }
1797  pTargetFeature->AddOrReplaceQualifier(key, value);
1798  return true;
1799 }
1800 
1801 // ----------------------------------------------------------------------------
1803  const CGff2Record& record,
1804  CRef< CSeq_feat > pFeature )
1805 // ----------------------------------------------------------------------------
1806 {
1807  CRef< CUser_object > pGffInfo( new CUser_object );
1808  pGffInfo->SetType().SetStr( "gff-info" );
1809  pGffInfo->AddField( "gff-attributes", record.AttributesLiteral() );
1810  pGffInfo->AddField( "gff-start", NStr::NumericToString( record.SeqStart() ) );
1811  pGffInfo->AddField( "gff-stop", NStr::NumericToString( record.SeqStop() ) );
1812  pGffInfo->AddField( "gff-cooked", string( "false" ) );
1813 
1814  pFeature->SetExts().push_back( pGffInfo );
1815  return true;
1816 }
1817 
1818 // ----------------------------------------------------------------------------
1820  const CGff2Record& record,
1821  CRef< CSeq_feat > pFeature)
1822 // ----------------------------------------------------------------------------
1823 {
1824  //
1825  // Do something with the phase information --- but only for CDS features!
1826  //
1827 
1829  record.Type());
1830 
1831  switch(iGenbankType) {
1832  default:
1833  return x_FeatureSetDataMiscFeature(record, pFeature);
1834 
1836  return x_FeatureSetDataCDS(record, pFeature);
1838  return x_FeatureSetDataExon(record, pFeature);
1840  return x_FeatureSetDataGene(record, pFeature);
1850  return x_FeatureSetDataRna(record, pFeature, iGenbankType);
1851  }
1852  return true;
1853 }
1854 
1855 // ----------------------------------------------------------------------------
1857  const CGff2Record& record,
1858  CRef< CSeq_feat > pFeature )
1859 // ----------------------------------------------------------------------------
1860 {
1861  pFeature->SetData().SetGene();
1862  return true;
1863 }
1864 
1865 // ----------------------------------------------------------------------------
1867  const CGff2Record& record,
1868  CRef< CSeq_feat > pFeature,
1869  CSeqFeatData::ESubtype subType)
1870 // ----------------------------------------------------------------------------
1871 {
1872  CRNA_ref& rnaRef = pFeature->SetData().SetRna();
1873  switch (subType){
1874  default:
1876  break;
1878  rnaRef.SetType(CRNA_ref::eType_mRNA);
1879  break;
1881  rnaRef.SetType(CRNA_ref::eType_rRNA);
1882  break;
1883  }
1884  return true;
1885 }
1886 
1887 // ----------------------------------------------------------------------------
1889  const CGff2Record& record,
1890  CRef< CSeq_feat > pFeature )
1891 // ----------------------------------------------------------------------------
1892 {
1893  pFeature->SetData().SetCdregion();
1894  return true;
1895 }
1896 
1897 // ----------------------------------------------------------------------------
1899  const CGff2Record& record,
1900  CRef< CSeq_feat > pFeature )
1901 // ----------------------------------------------------------------------------
1902 {
1903  CSeqFeatData& data = pFeature->SetData();
1904  data.SetImp().SetKey( "exon" );
1905 
1906  return true;
1907 }
1908 
1909 // ----------------------------------------------------------------------------
1911  const CGff2Record& record,
1912  CRef< CSeq_feat > pFeature )
1913 // ----------------------------------------------------------------------------
1914 {
1915  CSeqFeatData& data = pFeature->SetData();
1916  data.SetImp().SetKey( "misc_feature" );
1917  if ( record.IsSetPhase() ) {
1918  CRef< CGb_qual > pQual( new CGb_qual );
1919  pQual->SetQual( "gff_phase" );
1920  pQual->SetVal( NStr::UIntToString( record.Phase() ) );
1921  pFeature->SetQual().push_back( pQual );
1922  }
1923 
1924  return true;
1925 }
1926 
1927 // ----------------------------------------------------------------------------
1929  const string & strId,
1930  ncbi::CRef<CSeq_feat>& pFeature )
1931 // ----------------------------------------------------------------------------
1932 {
1933  map< string, CRef< CSeq_feat > >::iterator it;
1934  it = m_MapIdToFeature.find(strId);
1935  if(it != m_MapIdToFeature.end()) {
1936  pFeature = it->second;
1937  return true;
1938  }
1939  return false;
1940 }
1941 
1942 // ----------------------------------------------------------------------------
1944  const CSeq_feat& feature )
1945 // ----------------------------------------------------------------------------
1946 {
1947  if ( ! feature.CanGetExts() ) {
1948  return false;
1949  }
1950  list< CRef< CUser_object > > pExts = feature.GetExts();
1951  list< CRef< CUser_object > >::iterator it;
1952  for ( it = pExts.begin(); it != pExts.end(); ++it ) {
1953  if ( ! (*it)->CanGetType() || ! (*it)->GetType().IsStr() ) {
1954  continue;
1955  }
1956  if ( (*it)->GetType().GetStr() != "gff-info" ) {
1957  continue;
1958  }
1959  if ( ! (*it)->HasField( "gff-cooked" ) ) {
1960  return false;
1961  }
1962  return ( (*it)->GetField( "gff-cooked" ).GetData().GetStr() == "false" );
1963  }
1964  return false;
1965 }
1966 
1967 // ----------------------------------------------------------------------------
1969  CRef<CSeq_feat> pFeature )
1970 // ----------------------------------------------------------------------------
1971 {
1972  if (!pFeature->CanGetData() || !pFeature->GetData().IsImp()) {
1973  return false;
1974  }
1975  return (pFeature->GetData().GetImp().GetKey() == "exon" );
1976 }
1977 
1978 // ----------------------------------------------------------------------------
1980  CRef<CSeq_feat> pFeature)
1981 // ----------------------------------------------------------------------------
1982 {
1983  if (!pFeature->CanGetData()) {
1984  return false;
1985  }
1986  return (pFeature->GetData().GetSubtype() == CSeqFeatData::eSubtype_cdregion);
1987 }
1988 
1989 // ----------------------------------------------------------------------------
1991  CRef< CSeq_feat > pFeature,
1992  CRef< CSeq_annot > pAnnot )
1993 // ----------------------------------------------------------------------------
1994 {
1995  if (IsExon(pFeature)) {
1996  CRef< CSeq_feat > pParent;
1997  if (!xGetParentFeature(*pFeature, pParent) ) {
1998  pAnnot->SetData().SetFtable().push_back(pFeature) ;
1999  return true;
2000  }
2001  return xFeatureMergeExon( pFeature, pParent );
2002  }
2003  if (IsCds(pFeature)) {
2004  CRef<CSeq_feat> pExisting;
2005  if (!xGetExistingFeature(*pFeature, pAnnot, pExisting)) {
2006  pAnnot->SetData().SetFtable().push_back(pFeature) ;
2007  return true;
2008  }
2009  return xFeatureMergeCds(pFeature, pExisting);
2010  }
2011  pAnnot->SetData().SetFtable().push_back( pFeature ) ;
2012  return true;
2013 }
2014 
2015 // ----------------------------------------------------------------------------
2017  const CSeq_feat& feature,
2018  CRef<CSeq_annot> pAnnot,
2019  CRef< CSeq_feat >& pExisting )
2020 // ----------------------------------------------------------------------------
2021 {
2022  if (!feature.CanGetQual()) {
2023  return false;
2024  }
2025 
2026  string strExistingId = feature.GetNamedQual("ID");
2027  if (strExistingId.empty()) {
2028  return false;
2029  }
2030  if (!x_GetFeatureById( strExistingId, pExisting)) {
2031  return false;
2032  }
2033  return true;
2034 }
2035 
2036 // ----------------------------------------------------------------------------
2038  const CSeq_feat& feature,
2039  CRef< CSeq_feat >& pParent )
2040 // ----------------------------------------------------------------------------
2041 {
2042  if ( ! feature.CanGetQual() ) {
2043  return false;
2044  }
2045 
2046  string strParentId = feature.GetNamedQual("Parent");
2047  if (strParentId.empty()) {
2048  return false;
2049  }
2050  if ( ! x_GetFeatureById( strParentId, pParent ) ) {
2051  return false;
2052  }
2053  return true;
2054 }
2055 
2056 // ---------------------------------------------------------------------------
2058  CRef< CSeq_feat > pExon,
2059  CRef< CSeq_feat > pMrna )
2060 // ---------------------------------------------------------------------------
2061 {
2062  if ( x_HasTemporaryLocation( *pMrna ) ) {
2063  // start rebuilding parent location from scratch
2064  pMrna->SetLocation().Assign( pExon->GetLocation() );
2065  list< CRef< CUser_object > > pExts = pMrna->SetExts();
2066  list< CRef< CUser_object > >::iterator it;
2067  for ( it = pExts.begin(); it != pExts.end(); ++it ) {
2068  if ( ! (*it)->CanGetType() || ! (*it)->GetType().IsStr() ) {
2069  continue;
2070  }
2071  if ( (*it)->GetType().GetStr() != "gff-info" ) {
2072  continue;
2073  }
2074  (*it)->SetField( "gff-cooked" ).SetData().SetStr( "true" );
2075  }
2076  }
2077  else {
2078  // add exon location to current parent location
2079  pMrna->SetLocation().Add( pExon->GetLocation() );
2080  }
2081 
2082  return true;
2083 }
2084 
2085 // ---------------------------------------------------------------------------
2087  CRef< CSeq_feat > pNewPiece,
2088  CRef< CSeq_feat > pExisting )
2089 // ---------------------------------------------------------------------------
2090 {
2091  pExisting->SetLocation().Add(pNewPiece->GetLocation());
2092  return true;
2093 }
2094 
2095 // ============================================================================
2098  const string& str )
2099 // ============================================================================
2100 {
2101  CRef< CDbtag > pDbtag( new CDbtag() );
2102  static const char* digits = "0123456789";
2103  string strDb, strTag;
2104  NStr::SplitInTwo( str, ":", strDb, strTag );
2105 
2106  // dbtag names for Gff2 do not always match the names for genbank.
2107  // special case known fixups here:
2108  if ( strDb == "NCBI_gi" ) {
2109  strDb = "GI";
2110  }
2111  // todo: all the other ones
2112 
2113 
2114  if ( ! strTag.empty() ) {
2115  pDbtag->SetDb( strDb );
2116  if (strTag.find_first_not_of(digits, 0) == string::npos)
2117  pDbtag->SetTag().SetId( NStr::StringToUInt( strTag ) );
2118  else
2119  pDbtag->SetTag().SetStr( strTag );
2120 
2121  }
2122  else {
2123  pDbtag->SetDb( "unknown" );
2124  pDbtag->SetTag().SetStr( str );
2125  }
2126  return pDbtag;
2127 }
2128 
2129 // ============================================================================
2131  CRef<CSeq_annot> pAnnot)
2132 // ============================================================================
2133 {
2134  if (!xGenerateParentChildXrefs(pAnnot)) {
2135  return false;
2136  }
2137  return true;
2138 }
2139 
2140 // ============================================================================
2142  CRef<CSeq_annot> pAnnot)
2143 // ============================================================================
2144 {
2145  typedef list<CRef<CSeq_feat> > FTABLE;
2146  typedef list<string> PARENTS;
2147 
2148  if (!pAnnot->IsFtable()) {
2149  return true;
2150  }
2151  FTABLE& ftable = pAnnot->SetData().SetFtable();
2152  for (FTABLE::iterator featIt = ftable.begin(); featIt != ftable.end(); ++featIt) {
2153  CSeq_feat& feat = **featIt;
2154  const string& parentStr = feat.GetNamedQual("Parent");
2155  PARENTS parents;
2156  NStr::Split(parentStr, ",", parents, 0);
2157  for (PARENTS::iterator parentIt = parents.begin(); parentIt != parents.end(); ++parentIt) {
2158  const string& parent = *parentIt;
2159  xSetAncestryLine(feat, parent);
2160  }
2161  }
2162  return true;
2163 }
2164 
2165 // ============================================================================
2167  CSeq_feat& feat,
2168  const string& directParentStr)
2169 // ============================================================================
2170 {
2171  typedef list<string> PARENTS;
2172 
2173  string ancestorStr(directParentStr);
2174  CRef<CSeq_feat> pAncestor;
2175  while (!ancestorStr.empty()) {
2176  if (!x_GetFeatureById(ancestorStr, pAncestor)) {
2177  return;
2178  }
2179  xSetAncestorXrefs(feat, *pAncestor);
2180  ancestorStr = pAncestor->GetNamedQual("Parent");
2181  PARENTS ancestors;
2182  NStr::Split(ancestorStr, ",", ancestors, 0);
2183  for (PARENTS::iterator it = ancestors.begin(); it != ancestors.end(); ++it) {
2184  const string& ancestorStr = *it;
2185  xSetAncestryLine(feat, ancestorStr);
2186  }
2187  }
2188 }
2189 
2190 // ============================================================================
2192  const CSeq_feat& feat,
2193  const CFeat_id& featId)
2194 // ============================================================================
2195 {
2196  typedef vector<CRef<CSeqFeatXref> > XREFS;
2197  if (!feat.IsSetXref()) {
2198  return false;
2199  }
2200  int xrefId = featId.GetLocal().GetId();
2201  const XREFS& xrefs = feat.GetXref();
2202  for (XREFS::const_iterator cit = xrefs.begin(); cit != xrefs.end(); ++cit) {
2203  const CSeqFeatXref& ref = **cit;
2204  int contentId = ref.GetId().GetLocal().GetId();
2205  if (contentId == xrefId) {
2206  return true;
2207  }
2208  }
2209  return false;
2210 }
2211 
2212 // ============================================================================
2214  CSeq_feat& descendent,
2215  CSeq_feat& ancestor)
2216 // ============================================================================
2217 {
2218 
2219  //xref descendent->ancestor
2220  if (!sFeatureHasXref(descendent, ancestor.GetId())) {
2221  CRef<CFeat_id> pAncestorId(new CFeat_id);
2222  pAncestorId->Assign(ancestor.GetId());
2223  CRef<CSeqFeatXref> pAncestorXref(new CSeqFeatXref);
2224  pAncestorXref->SetId(*pAncestorId);
2225  descendent.SetXref().push_back(pAncestorXref);
2226  }
2227 
2228  //xref ancestor->descendent
2229  if (!sFeatureHasXref(ancestor, descendent.GetId())) {
2230  CRef<CFeat_id> pDescendentId(new CFeat_id);
2231  pDescendentId->Assign(descendent.GetId());
2232  CRef<CSeqFeatXref> pDescendentXref(new CSeqFeatXref);
2233  pDescendentXref->SetId(*pDescendentId);
2234  ancestor.SetXref().push_back(pDescendentXref);
2235  }
2236 }
2237 
2238 // ============================================================================
2240 // ============================================================================
2241 {
2242  if (!CReaderBase::xReadInit()) {
2243  return false;
2244  }
2245  return true;
2246 }
2247 
2248 // ============================================================================
2250  const string& line)
2251 // ============================================================================
2252 {
2253  vector<CTempStringEx> columns;
2254  CGff2Record::TokenizeGFF(columns, line);
2255  if (columns.size() < 9) {
2256  return false;
2257  }
2258  if (NStr::StartsWith(columns[2], "match") ||
2259  NStr::EndsWith(columns[2], "_match")) {
2260  return true;
2261  }
2262  return false;
2263 }
2264 
2265 // ============================================================================
2267  const string& type)
2268 // ============================================================================
2269 {
2270  return false;
2271 }
2272 
2273 END_objects_SCOPE
void xReportProgress(ILineErrorListener *=0)
const TId & GetId(void) const
Get the Id member data.
void SetType(TType value)
Assign a value to Type data member.
Definition: RNA_ref_.hpp:529
Truncate leading spaces only.
Definition: ncbistr.hpp:2175
CSpliced_exon_chunk –.
bool xAlignmentSetScore(const CGff2Record &, CRef< CSeq_align >)
CGff3SofaTypes & SofaTypes()
Definition: gff3_sofa.cpp:53
bool xGetStartsOnPlusStrand(TSeqPos offset, const vector< string > &gapParts, bool isTarget, vector< int > &starts) const
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:889
const TAttributes & Attributes() const
Definition: gff2_data.hpp:114
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
void x_InitializeScoreSums(const TScoreValueMap score_values, map< string, TSeqPos > &summed_scores) const
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
Definition: Seq_feat_.hpp:1106
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:718
virtual bool x_ParseFeatureGff(const string &, TAnnots &, ILineErrorListener *)
Definition: dbpivot.c:60
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:1849
virtual void ReadSeqAnnots(TAnnotList &, CNcbiIstream &, ILineErrorListener *=0)
Read all objects from given insput stream, returning them as a vector of Seq-annots.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5211
virtual bool xIsTrackLine(const CTempString &)
bool IsInt(void) const
Check if variant Int is selected.
Definition: Score_.hpp:397
static const string * s_GetAnnotId(const CSeq_annot &)
void SetDb(const TDb &value)
Assign a value to Db data member.
Definition: Dbtag_.hpp:220
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:211
TId & SetId(void)
Assign a value to Id data member.
Definition: Seq_annot_.hpp:727
bool WriteToAnnot(CSeq_annot &)
Definition: track_data.cpp:113
virtual bool x_ParseDataGff(const string &, TAnnots &, ILineErrorListener *)
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:62
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3353
bool CanGetData(void) const
Check if it is safe to call GetData method.
Definition: Seq_feat_.hpp:891
TAnnotList TAnnots
Definition: reader_base.hpp:82
size_t SeqStop() const
Definition: gff2_data.hpp:71
bool xGetParentFeature(const CSeq_feat &, CRef< CSeq_feat > &)
const struct ncbi::grid::netcache::search::fields::KEY key
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:953
User-defined methods of the data storage class.
bool xAlignmentSetSegment(const CGff2Record &, CRef< CSeq_align >)
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:852
discontinuous alignment
Definition: Seq_align_.hpp:104
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:173
bool IsAlignmentRecord() const
Definition: gff2_data.hpp:102
Template class for iteration on objects of class C.
Definition: iterator.hpp:691
double Score() const
Definition: gff2_data.hpp:80
CGff2Reader(int iFlags, const string &name="", const string &title="")
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:417
TDenseg & SetDenseg(void)
Select the variant.
Definition: Seq_align_.cpp:159
TMatch & SetMatch(void)
Select the variant.
User-defined methods of the data storage class.
TGenomic_ins & SetGenomic_ins(void)
Select the variant.
static bool IsAlignmentData(const string &)
CAnnotdesc –.
Definition: Annotdesc.hpp:65
bool xGetTargetParts(const CGff2Record &gff, vector< string > &targetParts) const
virtual bool x_ParseAlignmentGff(const string &strLine, list< string > &id_list, map< string, list< CRef< CSeq_align >>> &alignments)
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:53
mapping pieces together
Definition: Seq_align_.hpp:103
const string & AttributesLiteral() const
Definition: gff2_data.hpp:89
void x_ProcessAlignmentsGff(const list< string > &id_list, const map< string, list< CRef< CSeq_align >>> &alignments, CRef< CSeq_annot > pAnnot)
IdToFeatureMap m_MapIdToFeature
bool xFeatureSetQualifier(const string &, const string &, CRef< CSeq_feat >)
Definition: Score.hpp:56
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
bool IsFtable(void) const
Check if variant Ftable is selected.
Definition: Seq_annot_.hpp:603
void AddOrReplaceQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature, or replace the value for the first one if it already exists...
Definition: Seq_feat.cpp:287
TAnnots::iterator TAnnotIt
Definition: reader_base.hpp:83
const TSeqPos offset(200)
User-defined methods of the data storage class.
bool xUpdateSplicedAlignment(const CGff2Record &gff, CRef< CSeq_align > pAlign) const
const NCBI_NS_NCBI::CEnumeratedTypeValues *ENUM_METHOD_NAME() ENa_strand(void)
Access to ENa_strand's attributes (values, names) as defined in spec.
Defines NCBI C++ exception handling.
bool s_CompareValues(const CScore::TValue &score_val1, const CScore::TValue &score_val2)
TAttributes::const_iterator TAttrCit
Definition: gff2_data.hpp:47
virtual CGff2Record * x_CreateRecord()
virtual bool xIsCurrentDataType(const string &)
bool IsReal(void) const
Check if variant Real is selected.
Definition: Score_.hpp:370
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
bool xFeatureMergeExon(CRef< CSeq_feat >, CRef< CSeq_feat >)
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:54
const TKey & GetKey(void) const
Get the Key member data.
Definition: Imp_feat_.hpp:247
string m_AnnotName
bool IsSetStrand() const
Definition: gff2_data.hpp:96
bool xGetExistingFeature(const CSeq_feat &, CRef< CSeq_annot >, CRef< CSeq_feat > &)
static bool IsCds(CRef< CSeq_feat >)
bool x_FeatureSetDataMiscFeature(const CGff2Record &, CRef< CSeq_feat >)
User-defined methods of the data storage class.
int i
string m_AnnotTitle
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1254
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1082
User-defined methods of the data storage class.
void SetGenomic_start(TGenomic_start value)
Assign a value to Genomic_start data member.
TUser & SetUser(void)
Select the variant.
Definition: Annotdesc_.cpp:190
CFeat_id –.
Definition: Feat_id.hpp:65
void xSetAncestryLine(CSeq_feat &, const string &)
virtual bool xParseFeature(const string &, CRef< CSeq_annot > &, ILineErrorListener *)
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1272
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
bool ContainsData() const
Definition: track_data.hpp:56
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:543
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
bool IsSetPhase() const
Definition: gff2_data.hpp:99
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TName & SetName(void)
Select the variant.
Definition: Seqdesc_.hpp:1019
const_iterator end() const
Definition: map.hpp:152
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:362
virtual bool x_FeatureSetId(const CGff2Record &, CRef< CSeq_feat >)
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:1435
void SetProduct_end(TProduct_end &value)
Assign a value to Product_end data member.
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:65
bool IsAlign(void) const
Check if variant Align is selected.
Definition: Seq_annot_.hpp:623
Merge the delimiters.
Definition: ncbistr.hpp:2446
User-defined methods of the data storage class.
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:414
unsigned int mCurrentFeatureCount
static bool IsExon(CRef< CSeq_feat >)
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
Informational message.
Definition: ncbidiag.hpp:645
bool IsImp(void) const
Check if variant Imp is selected.
const_iterator find(const key_type &key) const
Definition: map.hpp:153
auto_ptr –
Definition: ncbimisc.hpp:336
virtual void xPostProcessAnnot(CRef< CSeq_annot > &, ILineErrorListener *)
virtual void xAssignAnnotId(CRef< CSeq_annot > &, const string &="")
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3524
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Annot_id_.hpp:318
Lightweight interface for getting lines of data with minimal memory copying.
TReaderFlags m_iFlags
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
virtual void xAddConversionInfo(CRef< CSeq_annot > &, ILineErrorListener *)
void SetType(TType &value)
Assign a value to Type data member.
virtual void xAssignTrackData(CRef< CSeq_annot > &)
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1288
const string & Type() const
Definition: gff2_data.hpp:77
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:805
Definition: type.c:8
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:58
TLocal & SetLocal(void)
Select the variant.
Definition: Annot_id_.cpp:118
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CSeqFeatData::ESubtype MapSofaTermToGenbankType(const string &)
Definition: gff3_sofa.cpp:139
CRef< CAnnotdesc > m_CurrentBrowserInfo
User-defined methods of the data storage class.
.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:53
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:251
bool IsSetProduct_type(void) const
Check if a value has been assigned to Product_type data member.
virtual bool x_FeatureSetLocation(const CGff2Record &, CRef< CSeq_feat >)
User-defined methods of the data storage class.
TInt GetInt(void) const
Get the variant data.
Definition: Score_.hpp:403
virtual bool x_UpdateAnnotAlignment(const CGff2Record &, CRef< CSeq_annot >, ILineErrorListener *=0)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5519
list< CRef< CSeq_annot > > TAnnotList
Definition: reader_base.hpp:81
C_Value –.
Definition: Score_.hpp:90
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
bool x_FeatureSetDataExon(const CGff2Record &, CRef< CSeq_feat >)
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CRef< CSeq_annot >)
void SetProduct_start(TProduct_start &value)
Assign a value to Product_start data member.
TExons & SetExons(void)
Assign a value to Exons data member.
bool sFeatureHasXref(const CSeq_feat &feat, const CFeat_id &featId)
bool xGetStartsOnMinusStrand(TSeqPos offset, const vector< string > &gapParts, bool isTarget, vector< int > &starts) const
bool xSetDensegStarts(const vector< string > &gapParts, ENa_strand identStrand, ENa_strand targetStrand, const TSeqPos targetStart, const TSeqPos targetEnd, const CGff2Record &gff, CSeq_align::C_Segs::TDenseg &denseg)
Definition: map.hpp:337
CAnnot_id –.
Definition: Annot_id.hpp:65
virtual bool AssignFromGff(const string &)
Definition: gff2_data.cpp:259
virtual bool x_ProcessQualifierSpecialCase(CGff2Record::TAttrCit, CRef< CSeq_feat >)
bool x_FeatureTrimQualifiers(const CGff2Record &, CRef< CSeq_feat >)
virtual bool x_FeatureSetDataGene(const CGff2Record &, CRef< CSeq_feat >)
void SetGenomic_end(TGenomic_end value)
Assign a value to Genomic_end data member.
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:60
virtual bool xParseBrowserLine(const string &, CRef< CSeq_annot > &, ILineErrorListener *)
void x_ProcessAlignmentScores(const CSeq_align &alignment, map< string, TSeqPos > &summed_scores, TScoreValueMap &common_scores) const
virtual bool x_InitAnnot(const CGff2Record &, CRef< CSeq_annot >, ILineErrorListener *=0)
TReal GetReal(void) const
Get the variant data.
Definition: Score_.hpp:376
void SetLineNumber(unsigned int uLineNumber)
Definition: line_error.hpp:519
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:609
bool GetAttribute(const string &, string &) const
Definition: gff2_data.cpp:350
CRef< CAnnotdesc > m_CurrentTrackInfo
const TExts & GetExts(void) const
Get the Exts member data.
Definition: Seq_feat_.hpp:1428
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:286
char value[7]
Definition: config.c:428
virtual bool x_ParseTrackLineGff(const string &, CRef< CAnnotdesc > &)
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
virtual bool xParseStructuredComment(const string &)
static CRef< CDbtag > x_ParseDbtag(const string &)
virtual CRef< CSerialObject > ReadObject(ILineReader &, ILineErrorListener *=0)
Read an object from a given line reader, render it as the most appropriate Genbank object...
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
User-defined methods of the data storage class.
Multi-threading – classes, functions, and features.
virtual bool x_UpdateAnnotFeature(const CGff2Record &, CRef< CSeq_annot >, ILineErrorListener *=0)
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
Useful/utility classes and methods.
CTrackData * m_pTrackDefaults
bool x_ParseBrowserLineGff(const string &, CRef< CAnnotdesc > &)
static void TokenizeGFF(vector< CTempStringEx > &columns, const CTempStringEx &line)
Definition: gff2_data.cpp:213
size_t SeqStart() const
Definition: gff2_data.hpp:68
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:876
CRef –.
Definition: ncbiobj.hpp:616
virtual bool xParseTrackLine(const string &, ILineErrorListener *)
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:222
AutoPtr –.
Definition: ncbimisc.hpp:483
void erase(iterator pos)
Definition: map.hpp:167
Definition: Seq_entry.hpp:55
virtual CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=0)
Read an object from a given line reader, render it as a single Seq-annot, if possible.
ENa_strand Strand() const
Definition: gff2_data.hpp:83
static const char * str(char *buf, int n)
Definition: stats.c:84
virtual bool xUngetLine(ILineReader &)
static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())
Please use this instead of the constructor because the ctor is protected.
Definition: line_error.cpp:103
bool CanGetExts(void) const
Check if it is safe to call GetExts method.
Definition: Seq_feat_.hpp:1422
bool x_FeatureSetData(const CGff2Record &, CRef< CSeq_feat >)
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:923
bool xAlignmentSetDenseg(const CGff2Record &, CRef< CSeq_align >)
void x_GetAlignmentScores(const CSeq_align &alignment, TScoreValueMap &score_values) const
virtual bool xGenerateParentChildXrefs(CRef< CSeq_annot >)
virtual ~CGff2Reader()
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3096
void SetTitleDesc(const string &title)
Definition: Seq_annot.cpp:96
virtual bool xParseAlignment(const string &, CRef< CSeq_annot > &, ILineErrorListener *)
virtual void ReadSeqAnnotsNew(TAnnots &, ILineReader &, ILineErrorListener *=0)
TSpliced & SetSpliced(void)
Select the variant.
Definition: Seq_align_.cpp:225
unsigned int m_uLineNumber
TExts & SetExts(void)
Assign a value to Exts data member.
Definition: Seq_feat_.hpp:1434
bool xSetSplicedExon(const CGff2Record &gff, CRef< CSpliced_exon > pExon) const
bool x_MergeAlignments(const list< CRef< CSeq_align >> &alignment_list, CRef< CSeq_align > &processed)
void x_FindMatchingScores(const TScoreValueMap &scores_1, const TScoreValueMap &scores_2, set< string > &matching_scores) const
bool x_HasTemporaryLocation(const CSeq_feat &)
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: Seq_align_.hpp:777
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_annot_.hpp:721
TFrame Phase() const
Definition: gff2_data.hpp:86
bool CanGetId(void) const
Check if it is safe to call GetId method.
Definition: Seq_annot_.hpp:715
virtual bool xAnnotPostProcess(CRef< CSeq_annot >)
static CRef< CSeq_id > AsSeqId(const string &rawId, unsigned int flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:309
virtual bool x_FeatureSetDataRna(const CGff2Record &, CRef< CSeq_feat >, CSeqFeatData::ESubtype)
void ProcessError(CObjReaderLineException &, ILineErrorListener *)
namespace ncbi::objects::
Definition: Seq_feat.hpp:56
User-defined methods of the data storage class.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5535
void SetImp(TImp &v)
User-defined methods of the data storage class.
const TImp & GetImp(void) const
Get the variant data.
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1266
virtual bool xGetLine(ILineReader &, string &)
virtual bool xIsIgnoredFeatureType(const string &)
virtual bool x_FeatureSetDataCDS(const CGff2Record &, CRef< CSeq_feat >)
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:243
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
virtual bool xReadInit()
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
virtual void x_SetTrackDataToSeqEntry(CRef< CSeq_entry > &, CRef< CUser_object > &, const string &, const string &)
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
Simple implementation of ILineReader for i(o)streams.
void SetId(TId &value)
Assign a value to Id data member.
const_iterator begin() const
Definition: map.hpp:151
bool xAlignmentSetSpliced_seg(const CGff2Record &, CRef< CSeq_align >)
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1118
bool x_FeatureSetGffInfo(const CGff2Record &, CRef< CSeq_feat >)
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:854
bool mParsingAlignment
CRef< CSeq_entry > ReadSeqEntry(ILineReader &, ILineErrorListener *=0)
Read an object from a given line reader, render it as a single Seq-entry, if possible.
bool IsSetScore() const
Definition: gff2_data.hpp:93
Definition: Dbtag.hpp:52
bool xFeatureMergeCds(CRef< CSeq_feat >, CRef< CSeq_feat >)
bool x_FeatureSetQualifiers(const CGff2Record &, CRef< CSeq_feat >)
bool xUpdateSplicedSegment(const CGff2Record &gff, CSpliced_seg &segment) const
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:147
Truncate trailing spaces only.
Definition: ncbistr.hpp:2176
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Annot_id_.cpp:112
TParts & SetParts(void)
Assign a value to Parts data member.
virtual bool xProgressInit(ILineReader &istr)
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
bool x_GetFeatureById(const string &, CRef< CSeq_feat > &)
void xSetAncestorXrefs(CSeq_feat &, CSeq_feat &)
const string & Id() const
Definition: gff2_data.hpp:65
static string NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:4384
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
TProduct_ins & SetProduct_ins(void)
Select the variant.
virtual bool xReadInit()
ESubtype GetSubtype(void) const
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:897
bool IsCanceled() const
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:531
virtual bool x_CreateAlignment(const CGff2Record &gff, CRef< CSeq_align > &pAlign)
Modified on Sun Mar 26 18:25:29 2017 by modify_doxy.py rev. 506947