NCBI C++ ToolKit
gff2_write_data.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff2_write_data.cpp 75701 2016-12-07 18:06:34Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * GFF file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
38 #include <objmgr/mapped_feat.hpp>
39 #include <objmgr/util/feature.hpp>
40 #include <objmgr/util/sequence.hpp>
41 
44 
46 
47 BEGIN_objects_SCOPE // namespace ncbi::objects::
48 
49 // ----------------------------------------------------------------------------
51 // ----------------------------------------------------------------------------
52  = "; ";
53 
54 // ----------------------------------------------------------------------------
56 // ----------------------------------------------------------------------------
57 {
58  return m_strId;
59 }
60 
61 // ----------------------------------------------------------------------------
63 // ----------------------------------------------------------------------------
64 {
65  return m_strSource;
66 }
67 
68 // ----------------------------------------------------------------------------
71  const string& id ):
72  m_fc(fc),
73  m_strId( "" ),
74  m_uSeqStart( 0 ),
75  m_uSeqStop( 0 ),
76  m_strSource( "." ),
77  m_strType( "." ),
78  m_pScore( 0 ),
79  m_peStrand( 0 ),
80  m_puPhase( 0 )
81 // ----------------------------------------------------------------------------
82 {
83  if (!id.empty()) {
84  SetAttribute("ID", id);
85  }
86 };
87 
88 // ----------------------------------------------------------------------------
90  const CGffWriteRecord& other ):
91  m_fc( other.m_fc ),
92  m_strId( other.m_strId ),
93  m_uSeqStart( other.m_uSeqStart ),
94  m_uSeqStop( other.m_uSeqStop ),
95  m_strSource( other.m_strSource ),
96  m_strType( other.m_strType ),
97  m_pScore( 0 ),
98  m_peStrand( 0 ),
99  m_puPhase( 0 )
100 // ----------------------------------------------------------------------------
101 {
102  if ( other.m_pScore ) {
103  m_pScore = new string( *(other.m_pScore) );
104  }
105  if ( other.m_peStrand ) {
106  m_peStrand = new ENa_strand( *(other.m_peStrand) );
107  }
108  if ( other.m_puPhase ) {
109  m_puPhase = new unsigned int( *(other.m_puPhase) );
110  }
111 
112  this->m_Attributes.insert(
113  other.m_Attributes.begin(), other.m_Attributes.end() );
114 };
115 
116 // ----------------------------------------------------------------------------
118 // ----------------------------------------------------------------------------
119 {
120  delete m_pScore;
121  delete m_peStrand;
122  delete m_puPhase;
123 };
124 
125 // ----------------------------------------------------------------------------
127  const string& key,
128  const string& value )
129 // ----------------------------------------------------------------------------
130 {
131  TAttrIt it = m_Attributes.find(key);
132  if (it == m_Attributes.end()) {
133  m_Attributes[key] = vector<string>();
134  }
135  if (std::find(m_Attributes[key].begin(), m_Attributes[key].end(), value) ==
136  m_Attributes[key].end()) {
137  m_Attributes[key].push_back(value);
138  }
139  return true;
140 }
141 
142 // ----------------------------------------------------------------------------
144  const string& key,
145  vector<string>& value ) const
146 // ----------------------------------------------------------------------------
147 {
148  TAttrCit it = m_Attributes.find(key);
149  if (it == m_Attributes.end() || it->second.empty()) {
150  return false;
151  }
152  value = it->second;
153  return true;
154 }
155 
156 // ----------------------------------------------------------------------------
158 // ----------------------------------------------------------------------------
159 {
160  vector<string> gffType;
161  if ( GetAttribute( "gff_type", gffType ) ) {
162  return gffType.front();
163  }
164  return m_strType;
165 }
166 
167 // ----------------------------------------------------------------------------
169 // ----------------------------------------------------------------------------
170 {
171  return NStr::UIntToString( m_uSeqStart + 1 );;
172 }
173 
174 // ----------------------------------------------------------------------------
176 // ----------------------------------------------------------------------------
177 {
178  return NStr::UIntToString( m_uSeqStop + 1 );
179 }
180 
181 // ----------------------------------------------------------------------------
183 // ----------------------------------------------------------------------------
184 {
185  if ( ! m_pScore ) {
186  return ".";
187  }
188  return *m_pScore;
189 }
190 
191 // ----------------------------------------------------------------------------
193 // ----------------------------------------------------------------------------
194 {
195  if ( ! m_peStrand ) {
196  return "+";
197  }
198  switch ( *m_peStrand ) {
199  default:
200  return "+";
201  case eNa_strand_minus:
202  return "-";
203  }
204 }
205 
206 // ----------------------------------------------------------------------------
208 // ----------------------------------------------------------------------------
209 {
210  if ( ! m_puPhase ) {
211  return ".";
212  }
213  return NStr::UIntToString( *m_puPhase );
214 }
215 
216 // ----------------------------------------------------------------------------
218 // ----------------------------------------------------------------------------
219 {
220  string strAttributes;
221  strAttributes.reserve(256);
223  attrs.insert( Attributes().begin(), Attributes().end() );
225 
226  for ( it = attrs.begin(); it != attrs.end(); ++it ) {
227  string strKey = it->first;
228 
229  if ( ! strAttributes.empty() ) {
230  strAttributes += "; ";
231  }
232  strAttributes += strKey;
233  strAttributes += "=";
234 // strAttributes += " ";
235 
236  bool quote = NeedsQuoting(it->second.front());
237  if ( quote )
238  strAttributes += '\"';
239  strAttributes += it->second.front();
240  if ( quote )
241  strAttributes += '\"';
242  }
243  if ( strAttributes.empty() ) {
244  strAttributes = ".";
245  }
246  return strAttributes;
247 }
248 
249 // ----------------------------------------------------------------------------
251  const string& strKey,
252  const string& attr_separator,
253  const string& multivalue_separator,
254  map<string, vector<string> >& attrs,
255  string& strAttributes ) const
256 // ----------------------------------------------------------------------------
257 {
258  TAttrIt it = attrs.find( strKey );
259  if ( it == attrs.end() ) {
260  return;
261  }
262  string strValue;
263  vector<string> tags = it->second;
264  for ( vector<string>::iterator pTag = tags.begin(); pTag != tags.end(); pTag++ ) {
265  if ( !strValue.empty() ) {
266  strValue += multivalue_separator;
267  }
268  string strTag = CWriteUtil::UrlEncode( *pTag );
269  if (NeedsQuoting(strTag)) {
270  strTag = string("\"") + strTag + string("\"");
271  }
272  strValue += strTag;
273  }
274 
275  if ( ! strAttributes.empty() ) {
276  strAttributes += attr_separator;
277  }
278  strAttributes += strKey;
279  strAttributes += "=";
280  strAttributes += strValue;
281 
282  attrs.erase(it);
283 }
284 
285 // ----------------------------------------------------------------------------
287  const CGffWriteRecord& parent,
288  const CSeq_interval& interval,
289  unsigned int seqLength )
290 // ----------------------------------------------------------------------------
291 {
292  if ( interval.CanGetFrom() ) {
293  m_uSeqStart = interval.GetFrom();
294  }
295  if (interval.IsPartialStart(eExtreme_Biological)) {
296  DropAttribute("start_range");
297  string min = NStr::IntToString(m_uSeqStart + 1);
298  SetAttribute("start_range", string(".,") + min);
299  }
300  if ( interval.CanGetTo() ) {
301  m_uSeqStop = interval.GetTo();
302  }
303  if (interval.IsPartialStop(eExtreme_Biological)) {
304  DropAttribute("end_range");
305  string max = NStr::IntToString(m_uSeqStop + 1);
306  SetAttribute("end_range", max + string(",."));
307  }
308  if ( interval.IsSetStrand() ) {
309  if ( 0 == m_peStrand ) {
310  m_peStrand = new ENa_strand( interval.GetStrand() );
311  }
312  else {
313  *m_peStrand = interval.GetStrand();
314  }
315  }
316  return true;
317 }
318 
319 // ----------------------------------------------------------------------------
321  int iPhase )
322 // ----------------------------------------------------------------------------
323 {
324  if ( 0 == m_puPhase ) {
325  return false;
326  }
327  *m_puPhase = (3+iPhase)%3;
328  return true;
329 }
330 
331 // ----------------------------------------------------------------------------
333  const string& strAttr )
334 // ----------------------------------------------------------------------------
335 {
336  TAttrIt it = m_Attributes.find( strAttr );
337  if ( it == m_Attributes.end() ) {
338  return false;
339  }
340  m_Attributes.erase( it );
341  return true;
342 }
343 
344 // ----------------------------------------------------------------------------
346  unsigned int uSequenceNumber,
347  const string& strPrefix )
348 // ----------------------------------------------------------------------------
349 {
350  vector<string> ids;
351  if (!GetAttribute("ID", ids)) {
352  return false;
353  }
354  ids.at(0) += string( "|" ) + strPrefix + NStr::UIntToString( uSequenceNumber );
355  return false;
356 }
357 
358 // ----------------------------------------------------------------------------
360  const CMappedFeat& mapped_feature,
361  unsigned int flags )
362 // ----------------------------------------------------------------------------
363 {
364  if ( ! x_AssignType( mapped_feature, flags ) ) {
365  return false;
366  }
367  if ( ! x_AssignSeqId( mapped_feature ) ) {
368  return false;
369  }
370  if ( ! x_AssignSource( mapped_feature ) ) {
371  return false;
372  }
373  if ( ! x_AssignStart( mapped_feature ) ) {
374  return false;
375  }
376  if ( ! x_AssignStop( mapped_feature ) ) {
377  return false;
378  }
379  if ( ! x_AssignScore( mapped_feature ) ) {
380  return false;
381  }
382  if ( ! x_AssignStrand( mapped_feature ) ) {
383  return false;
384  }
385  if ( ! x_AssignPhase( mapped_feature ) ) {
386  return false;
387  }
388  if ( ! x_AssignAttributes( mapped_feature, flags ) ) {
389  return false;
390  }
391  return true;
392 }
393 
394 // ----------------------------------------------------------------------------
396  const CMappedFeat& mf )
397 // ----------------------------------------------------------------------------
398 {
399  if (CWriteUtil::GetBestId(mf, m_strId)) {
400  return true;
401  }
402  m_strId = ".";
403  return true;
404 }
405 
406 // ----------------------------------------------------------------------------
408  const CMappedFeat& mf,
409  unsigned int )
410 // ----------------------------------------------------------------------------
411 {
412  m_strType = "region";
413 
414  if ( mf.IsSetQual() ) {
415  const vector< CRef< CGb_qual > >& quals = mf.GetQual();
416  vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
417  for( ; it != quals.end(); ++it) {
418  if ( !(*it)->CanGetQual() || !(*it)->CanGetVal() ) {
419  continue;
420  }
421  if ( (*it)->GetQual() == "standard_name" ) {
422  m_strType = (*it)->GetVal();
423  return true;
424  }
425  }
426  }
427 
428  switch ( mf.GetFeatSubtype() ) {
429  default:
430  break;
432  m_strType = "CDS";
433  break;
435  m_strType = "exon";
436  break;
438  m_strType = "transcript";
439  break;
441  m_strType = "gene";
442  break;
444  m_strType = "mRNA";
445  break;
447  m_strType = "scRNA";
448  break;
449  }
450  return true;
451 }
452 
453 // ----------------------------------------------------------------------------
455  const CMappedFeat& mf )
456 // ----------------------------------------------------------------------------
457 {
459  return true;
460 }
461 
462 // ----------------------------------------------------------------------------
464  const CMappedFeat& mf )
465 // ----------------------------------------------------------------------------
466 {
468  return true;
469 }
470 
471 // ----------------------------------------------------------------------------
473  const CUser_object& uo,
474  const string& strType )
475 // ----------------------------------------------------------------------------
476 {
477  if ( uo.IsSetType() && uo.GetType().IsStr() &&
478  uo.GetType().GetStr() == strType ) {
479  return CConstRef<CUser_object>( &uo );
480  }
481  const CUser_object::TData& fields = uo.GetData();
482  for ( CUser_object::TData::const_iterator it = fields.begin();
483  it != fields.end();
484  ++it ) {
485  const CUser_field& field = **it;
486  if ( field.IsSetData() ) {
487  const CUser_field::TData& data = field.GetData();
488  if ( data.Which() == CUser_field::TData::e_Object ) {
490  data.GetObject(), strType );
491  if ( recur ) {
492  return recur;
493  }
494  }
495  }
496  }
497  return CConstRef<CUser_object>();
498 }
499 
500 // ----------------------------------------------------------------------------
502  const list<CRef<CUser_object > >& uos,
503  const string& strType)
504 // ----------------------------------------------------------------------------
505 {
506  CConstRef<CUser_object> pResult;
507  typedef list<CRef<CUser_object > >::const_iterator CIT;
508  for (CIT cit=uos.begin(); cit != uos.end(); ++cit) {
509  const CUser_object& uo = **cit;
510  pResult = sGetUserObjectByType(uo, strType);
511  if (pResult) {
512  return pResult;
513  }
514  }
515  return CConstRef<CUser_object>();
516 }
517 
518 // ----------------------------------------------------------------------------
520  const CMappedFeat& mf )
521 // ----------------------------------------------------------------------------
522 {
523  m_strSource = ".";
524 
525  if ( mf.IsSetQual() ) {
526  const vector< CRef< CGb_qual > >& quals = mf.GetQual();
527  vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
528  for ( ; it != quals.end(); ++it ) {
529  if ( !(*it)->CanGetQual() || !(*it)->CanGetVal() ) {
530  continue;
531  }
532  if ( (*it)->GetQual() == "gff_source" ) {
533  m_strSource = (*it)->GetVal();
534  return true;
535  }
536  }
537  }
538 
539  if ( mf.IsSetExt() ) {
541  mf.GetExt(), "ModelEvidence" );
542  if ( model_evidence ) {
543  string strMethod;
544  if ( model_evidence->HasField( "Method" ) ) {
545  m_strSource = model_evidence->GetField(
546  "Method" ).GetData().GetStr();
547  return true;
548  }
549  }
550  }
551 
552  if (mf.IsSetExts()) {
554  mf.GetExts(), "ModelEvidence" );
555  if ( model_evidence ) {
556  string strMethod;
557  if ( model_evidence->HasField( "Method" ) ) {
558  m_strSource = model_evidence->GetField(
559  "Method" ).GetData().GetStr();
560  return true;
561  }
562  }
563  }
564 
565  CScope& scope = mf.GetScope();
567  &mf.GetScope());
569  m_strSource);
570  return true;
571 }
572 
573 // ----------------------------------------------------------------------------
575  const CMappedFeat& mf )
576 // ----------------------------------------------------------------------------
577 {
578  if ( !mf.IsSetQual() ) {
579  return true;
580  }
581  const vector< CRef< CGb_qual > >& quals = mf.GetQual();
582  vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
583  for ( ; it != quals.end(); ++it ) {
584  if ( !(*it)->CanGetQual() || !(*it)->CanGetVal() ) {
585  continue;
586  }
587  if ( (*it)->GetQual() == "gff_score" ) {
588  m_pScore = new string((*it)->GetVal());
589  return true;
590  }
591  }
592  return true;
593 }
594 
595 // ----------------------------------------------------------------------------
597  const CMappedFeat& mf )
598 // ----------------------------------------------------------------------------
599 {
600  m_peStrand = new ENa_strand( mf.GetLocation().GetStrand() );
601  return true;
602 }
603 
604 // ----------------------------------------------------------------------------
606  const CMappedFeat& mf )
607 // ----------------------------------------------------------------------------
608 {
610  m_puPhase = new unsigned int( 0 ); // will be corrected by external code
611  }
612  return true;
613 }
614 
615 // ----------------------------------------------------------------------------
617  const CMappedFeat& mapped_feat,
618  unsigned int )
619 // ----------------------------------------------------------------------------
620 {
621  cerr << "FIXME: CGffWriteRecord::x_AssignAttributes" << endl;
622  return true;
623 }
624 
625 END_objects_SCOPE
TAttributes::const_iterator TAttrCit
const TObject & GetObject(void) const
Get the variant data.
static bool GetBestId(CSeq_id_Handle, CScope &, string &)
Definition: write_util.cpp:642
bool DropAttribute(const string &)
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const CSeq_feat::TQual & GetQual(void) const
virtual bool AssignFromAsn(const CMappedFeat &, unsigned int=0)
TTo GetTo(void) const
Get the To member data.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:4833
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:866
virtual string StrAttributes() const
virtual bool x_AssignSource(const CMappedFeat &)
bool CanGetTo(void) const
Check if it is safe to call GetTo method.
const struct ncbi::grid::netcache::search::fields::KEY key
CGffWriteRecord(CGffFeatureContext &fc, const string &id="")
const TStr & GetStr(void) const
Get the variant data.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:68
TStrand GetStrand(void) const
Get the Strand member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
virtual string StrSource() const
static string UrlEncode(const string &)
Definition: write_util.cpp:510
string
Definition: cgiapp.hpp:514
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
bool GetAttribute(const string &, vector< string > &) const
bool IsSetExts(void) const
TAttributes m_Attributes
virtual string StrPhase() const
const NCBI_NS_NCBI::CEnumeratedTypeValues *ENUM_METHOD_NAME() ENa_strand(void)
Access to ENa_strand's attributes (values, names) as defined in spec.
virtual string StrId() const
virtual ~CGffWriteRecord()
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
virtual bool x_AssignAttributes(const CMappedFeat &, unsigned int=0)
#define fc
const TAttributes & Attributes() const
vector< CRef< CUser_field > > TData
const TType & GetType(void) const
Get the Type member data.
CScope & GetScope(void) const
Get scope this handle belongs to.
static bool GetIdType(CBioseq_Handle, string &)
Definition: write_util.cpp:161
CSeqFeatData::ESubtype GetFeatSubtype(void) const
virtual void x_StrAttributesAppendValue(const string &, const string &, const string &, map< string, vector< string > > &, string &) const
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:4808
const CSeq_loc & GetLocation(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
const_iterator end() const
Definition: map.hpp:152
bool AssignSequenceNumber(unsigned int, const string &="")
virtual bool x_AssignStop(const CMappedFeat &)
bool IsSetQual(void) const
virtual bool CorrectLocation(const CGffWriteRecord &, const CSeq_interval &, unsigned int)
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:899
const_iterator find(const key_type &key) const
Definition: map.hpp:153
unsigned int m_uSeqStop
ENa_strand * m_peStrand
CConstRef< CUser_object > sGetUserObjectByType(const CUser_object &uo, const string &strType)
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:947
TFrom GetFrom(void) const
Get the From member data.
virtual bool x_AssignScore(const CMappedFeat &)
virtual string StrType() const
virtual string StrStrand() const
virtual bool NeedsQuoting(const string &str) const
TAttributes::iterator TAttrIt
numerical value
Definition: Na_strand.hpp:63
virtual bool x_AssignStart(const CMappedFeat &)
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
T max(T x_, T y_)
virtual string StrSeqStart() const
T min(T x_, T y_)
bool IsSetExt(void) const
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
const CUser_object & GetExt(void) const
CScope –.
Definition: scope.hpp:90
void erase(iterator pos)
Definition: map.hpp:167
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
bool IsPartialStart(ESeqLocExtremes ext) const
static uch flags
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1153
bool CanGetFrom(void) const
Check if it is safe to call GetFrom method.
for using other definitions
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1185
bool IsPartialStop(ESeqLocExtremes ext) const
unsigned int m_uSeqStart
#define const
Definition: zconf.h:217
virtual bool x_AssignStrand(const CMappedFeat &)
virtual bool x_AssignPhase(const CMappedFeat &)
const TData & GetData(void) const
Get the Data member data.
const_iterator begin() const
Definition: map.hpp:151
E_Choice Which(void) const
Which variant is currently selected.
virtual bool x_AssignType(const CMappedFeat &, unsigned int=0)
static const char * ATTR_SEPARATOR
CMappedFeat –.
Definition: mapped_feat.hpp:58
const CSeq_feat::TExts & GetExts(void) const
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
virtual string StrSeqStop() const
unsigned int * m_puPhase
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
virtual bool x_AssignSeqId(const CMappedFeat &)
virtual string StrScore() const
bool SetAttribute(const string &, const string &)
Modified on Fri Apr 20 12:41:24 2018 by modify_doxy.py rev. 546573