NCBI C++ ToolKit
readfeat.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jonathan Kans
27  *
28  * File Description:
29  * Feature table reader
30  *
31  */
32 
33 #ifndef OBJTOOLS_READERS___READFEAT__HPP
34 #define OBJTOOLS_READERS___READFEAT__HPP
35 
36 #include <corelib/ncbistd.hpp>
37 
39 
40 // Objects includes
42 
43 #include <memory> // for auto_ptr<>
44 
45 #include <util/line_reader.hpp>
46 
48 
49 BEGIN_objects_SCOPE // namespace ncbi::objects::
50 
52 class ILineErrorListener;
53 class ITableFilter;
54 class CSeq_entry;
55 
56 // public interface for (single instance) feature table reader
57 
59 {
60 public:
61  /// Some flags that control feature table generation.
62  /// Note that "bad key" could mean many different conditions, from
63  /// unrecognized feature name to qualifier without a feature, and so on.
64  enum EFlags {
65  fReportBadKey NCBI_STD_DEPRECATED("This flag is redundant. Bad keys are now reported by default"
66  " and fSuppressBadKeyWarning must be specified to silence these warnings.")
67  = (1<<0), ///< = 0x01 (Report bad keys into the error container)
68  fKeepBadKey = (1<<1), ///< = 0x02 (As much as possible, try to use bad keys as if they were acceptable)
69  fTranslateBadKey = (1<<2), ///< = 0x04 (yields misc_feature /standard_name="...")
70  fIgnoreWebComments = (1<<3), ///< = 0x08 (ignore web comment lines such as lines that start with " INFO:", or consist of many equals signs)
71  fCreateGenesFromCDSs = (1<<4), ///< = 0x10 (If a CDS has a gene xref, create a gene with the same intervals if one doesn't already exist.)
72  fCDSsMustBeInTheirGenes = (1<<5), ///< = 0x20 (If a CDS has a gene xref, it *must* be inside of that gene)
73  fReportDiscouragedKey = (1<<6), ///< = 0x40 (Report discouraged keys into the error container)
74  fLeaveProteinIds = (1<<7), ///< = 0x80 (Leave all protein_id as a qualifiers)
75  fAllIdsAsLocal = (1<<8), ///< = 0x100 (Do not attempt to parse accessions)
76  fPreferGenbankId = (1<<9), ///< = 0x200 (Prefer Genbank accession ids)
77  fSuppressBadKeyWarning = (1<<10), ///<= 0x400 (Suppress 'bad key' errors; Not recommended.)
78  };
79  using TFlags = long; ///< binary OR of EFlags
80  static void AddStringFlags(
81  const list<string>& stringFlags,
82  TFlags& baseFlags);
83 
84  CFeature_table_reader(TReaderFlags fReaderFlags = 0);
85 
87  ILineErrorListener* pErrors = nullptr);
88 
90 
91 
93 
94  // For CReaderBase
97 
98  CRef<CSeq_annot> ReadSequinFeatureTable(const TFlags flags=0,
99  ITableFilter* filter = nullptr,
100  const string& seqid_prefix = kEmptyStr);
101 
102  // read 5-column feature table and return Seq-annot
103  static CRef<CSeq_annot> ReadSequinFeatureTable(ILineReader& reader,
104  const TFlags flags = 0,
105  ILineErrorListener* pMessageListener = nullptr,
106  ITableFilter* filter = nullptr,
107  const string& seqid_prefix = kEmptyStr);
108 
109  static CRef<CSeq_annot> ReadSequinFeatureTable(CNcbiIstream& ifs,
110  const TFlags flags = 0,
111  ILineErrorListener* pMessageListener = nullptr,
112  ITableFilter* filter = nullptr);
113 
114  static CRef<CSeq_annot> ReadSequinFeatureTable(ILineReader& reader,
115  const string& seqid,
116  const string& annotname,
117  const TFlags flags = 0,
118  ILineErrorListener* pMessageListener = nullptr,
119  ITableFilter* filter = nullptr);
120 
121  static CRef<CSeq_annot> ReadSequinFeatureTable(CNcbiIstream& ifs,
122  const string& seqid,
123  const string& annotname,
124  const TFlags flags = 0,
125  ILineErrorListener* pMessageListener = nullptr,
126  ITableFilter* filter = nullptr);
127 
128  // read all feature tables available from the input, attaching each
129  // at an appropriate position within the Seq-entry object
130  static void ReadSequinFeatureTables(ILineReader& reader,
131  CSeq_entry& entry,
132  const TFlags flags = 0,
133  ILineErrorListener* pMessageListener = nullptr,
134  ITableFilter* filter = nullptr);
135 
136  static void ReadSequinFeatureTables(CNcbiIstream& ifs,
137  CSeq_entry& entry,
138  const TFlags flags = 0,
139  ILineErrorListener* pMessageListener = nullptr,
140  ITableFilter* filter = nullptr);
141 
142  // create single feature from key
143  static CRef<CSeq_feat> CreateSeqFeat(const string& feat,
145  const TFlags flags = 0,
146  ILineErrorListener* pMessageListener = nullptr,
147  unsigned int line = 0,
148  std::string* seq_id = nullptr,
149  ITableFilter* filter = nullptr);
150 
151  // add single qualifier to feature
152  static void AddFeatQual(CRef<CSeq_feat> sfp,
153  const string& feat_name,
154  const string& qual,
155  const string& val,
156  const TFlags flags = 0,
157  ILineErrorListener* pMessageListener = nullptr,
158  int line = 0,
159  const string& seq_id = std::string());
160 
161  /// If line_arg is a feature line (e.g. ">Feature lcl|seq1"), then
162  /// it will parse it and return true.
163  ///
164  /// @param line_arg
165  /// The input line that will be parsed.
166  /// @param out_seqid
167  /// If returning true, this will hold the seqid of line_arg
168  /// (or empty if there is none)
169  /// @param out_annotname
170  /// If returning true, this will hold the annotname of
171  /// line_arg (or empty if there is none)
172  /// @returns
173  /// True if it is a feature line and false if it's not.
174  static bool ParseInitialFeatureLine (
175  const CTempString& line_arg,
176  CTempStringEx& out_seqid,
177  CTempStringEx& out_annotname );
178 
179 private:
180  static CRef<CSeq_annot> x_ReadFeatureTable(CFeatureTableReader_Imp& reader,
181  const CTempString& seqid,
182  const CTempString& annot_name,
183  const TFlags flags,
184  ITableFilter* filter);
185 
186  static CRef<CSeq_annot> x_ReadFeatureTable(CFeatureTableReader_Imp& reader,
187  const TFlags flags,
188  ITableFilter* filter,
189  const string& seqid_prefix=kEmptyStr);
190 
191  unique_ptr<CFeatureTableReader_Imp> m_pImpl;
192 };
193 
194 
195 
196 END_objects_SCOPE
198 
199 #endif // OBJTOOLS_READERS___READFEAT__HPP
long TFlags
binary OR of EFlags
Definition: readfeat.hpp:79
unique_ptr< CFeatureTableReader_Imp > m_pImpl
Definition: readfeat.hpp:191
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
EFlags
Customization flags that are relevant to all CReaderBase derived readers.
Definition: reader_base.hpp:73
virtual CRef< CSerialObject > ReadObject(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as the most appropriate Genbank object.
long TReaderFlags
Definition: reader_base.hpp:84
TAnnotList TAnnots
Definition: reader_base.hpp:91
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
Definition: Seq_entry.hpp:56
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
Use to give a feature filter to CFeature_table_reader.
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static const char location[]
Definition: config.c:97
string
Definition: cgiapp.hpp:687
#define NCBI_STD_DEPRECATED(message)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
Lightweight interface for getting lines of data with minimal memory copying.
Modified on Wed Apr 17 13:09:45 2024 by modify_doxy.py rev. 669887