NCBI C++ ToolKit
cuCdCore.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuCdCore.hpp 61653 2014-02-05 15:59:24Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Adapted from CDTree-1 code by Chris Lanczycki
27  *
28  * File Description:
29  *
30  * Subclass of CCdd for use by CDTree
31  *
32  * ===========================================================================
33  */
34 
35 #ifndef CU_CDCORE_HPP
36 #define CU_CDCORE_HPP
37 
41 #include <map>
42 //USING_NCBI_SCOPE;
45 BEGIN_SCOPE(cd_utils)
46 
50 };
51 
52 const int PENDING_ROW_START = 1000000;
53 
55 {
56 public:
57  enum AlignmentSrc {
58  NORMAL_ALIGNMENT = 0,
59  PENDING_ALIGNMENT
60  };
62  USE_NORMAL_ALIGNMENT=0,
64  USE_ALL_ALIGNMENT
65  };
66 
67  CCdCore(void); // constructor
68  virtual ~CCdCore(void); // destructor
69 
70  /* CD identifier methods */
71  string GetAccession(int& Version) const; // get accession and version of CD
72  string GetAccession() const;
73  void SetAccession(string Accession, int Version); // set accession and version of CD
74  void SetAccession(string Accession);
75  void EraseUID(); // erase CD's uid
76  int GetUID() const; // return the first 'uid' found, or '0' if none exist.
77  // (this is the PSSM_id for a published CD)
78  /* ADDED */
79  bool HasCddId(const CCdd_id& id) const; // is 'id' an identifier for this CD
80 
81  /* Basic information about CD */
82  string GetLongDescription(); // long description of CD
83  string GetUpdateDate(); // last update date of CD
84  int GetNumRows() const; // number of rows in CD
85  int GetNumSequences() const; // number of sequences in CD
86  int GetNumRowsWithSequences() const; // number of rows with a valid sequence index
87  int GetAlignmentLength() const; // total number aligned residues
88  int GetPSSMLength() const; // number of residues in master, from first to last aligned residue
89 
90  /* ADDED: Block information */
91  int GetNumBlocks() const; // return number of blocks in alignment (0 if no alignment)
92  bool GetCDBlockLengths(vector<int>& lengths) const;
93  bool GetBlockStartsForRow(int rowIndex, vector<int>& starts) const;
94 
95  /* Find/convert sequence list and row indices */
96  int GetSeqIndexForRowIndex(int rowIndex) const; // map alignment row to sequence index (-1 if invalid row)
97  int GetMasterSeqIndex() const; // get sequence index of the master sequence (-1 if fails)
98  int GetSeqIndex(const CRef<CSeq_id>& SeqID) const; // map seqId to the first possible sequence list index (-1 if fails)
99  int GetNthMatchFor(CRef<CSeq_id>& ID, int N); // get RowIndex of Nth match
100  /* ADDED: find all row indices for a seqID (return # found) */
101  int GetAllRowIndicesForSeqId(const CRef<CSeq_id>& SeqID, list<int>& rows) const;
102  int GetAllRowIndicesForSeqId(const CRef<CSeq_id>& SeqID, vector<int>& rows) const;
103 
104 
105  /* Access CD info via alignment row number */
106  bool GetGI(int Row, TGi& GI, bool ignorePDBs = true); // get GI of Row (if ignorePDBs = true, don't look @PDBs for the GI)
107  bool GetPDB(int Row, const CPDB_seq_id*& pPDB); // get PDB ID of Row
108  int GetLowerBound(int Row) const; // get Row lower alignment bound; return INVALID_MAPPED_POSITION on failure
109  int GetUpperBound(int Row) const; // get Row upper alignment bound; return INVALID_MAPPED_POSITION on failure
110  bool Get_GI_or_PDB_String_FromAlignment(int RowIndex, std::string& Str, bool Pad, int Len) const ;
111  /* ADDED */
112  string GetSpeciesForRow(int Row); // find the species string for alignment row
113  string GetSequenceStringByRow(int rowId); // return the full sequence for rowId
114  bool GetSeqEntryForRow(int rowId, CRef< CSeq_entry >& seqEntry) const; // get the indicated seq_entry
115  bool GetBioseqForRow(int rowId, CRef< CBioseq >& bioseq);
116 
117 
118  /* Access CD info via sequence list index */
119  TGi GetGIFromSequenceList(int SeqIndex) const; // get GI from sequence list
120  string GetDefline(int SeqIndex) const; // get description from sequence list
121 
122  string GetSequenceStringByIndex(int SeqIndex); // return the full sequence for index SeqIndex
123  string GetSpeciesForIndex(int SeqIndex); // get species name from sequence list
124  bool GetSeqEntryForIndex(int seqIndex, CRef< CSeq_entry > & seqEntry) const; // was cdGetSeq from algMerge
125  bool GetBioseqForIndex(int seqIndex, CRef< CBioseq >& bioseq) ;
126 
127  // Obtain a copy of the first bioseq found that matches the ID passed in.
128  // Returns true if this is possible; false otherwise.
129  bool CopyBioseqForSeqId(const CRef< CSeq_id>& seqId, CRef< CBioseq >& bioseq) const;
130 
131  // Recursively look for a bioseq with the given seqid in the sequence list; return the first instance found.
132  bool GetBioseqWithSeqId(const CRef< CSeq_id>& seqid, const CBioseq*& bioseq) const;
133 
134  /* Examine alignment for a SeqId or footprint */
135  bool HasSeqId(const CRef<CSeq_id>& ID) const; // see if ID matches any ID in alignment (deprecate???)
136  bool HasSeqId(const CRef<CSeq_id>& ID, int& RowIndex) const; // same, but return row that matches
137  int GetNumMatches(const CRef<CSeq_id>& ID) const; // get num-times ID matches an ID in alignment
138 
139  /* SeqID getters ... from alignment info */
140  bool GetSeqIDForRow(int Pair, int DenDiagRow, CRef<CSeq_id>& SeqID) const; // get SeqID from alignment
141  bool GetSeqIDFromAlignment(int RowIndex, CRef<CSeq_id>& SeqID) const;
142 
143  /* SeqID getters ... from sequence list */
144  // CAUTION: the first method here may not give you the CSeq_id you expect/want.
145  // when there are multiple CSeq_ids for the specified index, priority is
146  // given to the PDB-type identifier, then to a GI, and then to 'other'.
147  // If any other type is present, this method returned false and an empty CRef.
148  bool GetSeqIDForIndex(int SeqIndex, CRef<CSeq_id>& SeqID) const; // get SeqID from sequence list
149  bool GetSeqIDs(int SeqIndex, list< CRef< CSeq_id > >& SeqIDs); // get all SeqIDs from sequence list
150  const list< CRef< CSeq_id > >& GetSeqIDs(int SeqIndex) const; // get all SeqIDs from sequence list
151 
152  /* Sequence or row removal */
153  bool EraseOtherRows(const std::vector<int>& KeepRows); // erase all rows from alignment not in KeepRows
154  bool EraseTheseRows(const std::vector<int>& TossRows); // erase all rows from alignment in TossRows
155  void EraseSequence(int SeqIndex); // erase a sequence from the set of seqs
156  void EraseSequences(); // erase sequences not in alignment
157  void ErasePendingRows(set<int>& rows);
158  void ErasePendingRow(int row);
159 
160  /* Methods for structures, structure alignments, MMDB identifiers */
161  bool HasStructure() const; // true if there are any PDB seq-ids in seqlist
162  bool Has3DMaster() const;
163  int Num3DAlignments() const;
164  bool Has3DSuperpos(list<int>& MMDBId_vec) const;
165  bool GetRowsForMmdbId(int mmdbId, list<int>& rows) const; // find all rows with this mmdbId
166  bool GetRowsWithMmdbId(vector<int>& rows) const; // find all rows with a mmdbid
167  bool GetMmdbId(int SeqIndex, int& id) const; // get mmdb-id from sequence list
168  int GetMmdbIdWithEvidence(set<int>& MmdbIds) const;
169  int GetStructuralRowsWithEvidence(vector<int>& rows) const;
170 
171  // Returns true only if one of the following is true:
172  // a) the master is not a structure and master3d is empty,
173  // b) the master is a structure and master3d contains only the Seq-id for the master, or
174  // c) the master is consensus, and row 1 is a structure whose Seq-id is the only entry in master3d.
175  bool IsMaster3DOK() const;
176 
177  // If the master is a structure, fill in the master3d field with its PDB SeqId.
178  // Return true if the field is populated at exit (whether or not it was correctly set
179  // to begin with), or false if master is not a structure or otherwise failed.
180  // If checkRow1WhenConsensusMaster is true and the master is a consensus sequence,
181  // then synchronize master3d based on row 1, as above; otherwise, master3d is always emptied.
182  // *** NOTE: this method *always* resets master3d first. So, when false is returned,
183  // master3d will be empty.
184  bool SynchronizeMaster3D(bool checkRow1WhenConsensusMaster = true);
185 
186  /* CD alignment methods (most added or renamed) */
187 
188  // Returns coordinate on 'otherRow' that is mapped to 'thisPos' on 'thisRow'.
189  // Returns INVALID_MAPPED_POSITION on failure.
190  int MapPositionToOtherRow(int thisRow, int thisPos, int otherRow) const;
191 
192  // Formerly GetSeqPosition(...). Returns INVALID_MAPPED_POSITION on failure.
193  int MapPositionToOtherRow(const CRef< CSeq_align >& seqAlign, int thisPos, CoordMapDir mapDir) const;
194 
195  bool IsSeqAligns() const; // true is CD has alignment
196  bool GetAlignment(CRef< CSeq_annot >& seqAnnot); // return the first Seq_annot of type 'align'
197  const CRef< CSeq_annot >& GetAlignment() const; // return the first seq_annot of type 'align'
198 
199  const list< CRef< CSeq_align > >& GetSeqAligns() const; // get the list of Seq-aligns in alignment
200  list< CRef< CSeq_align > >& GetSeqAligns(); // get the list of Seq-aligns in alignment (editable)
201  bool GetSeqAlign(int Row, CRef< CSeq_align >& seqAlign); // get the Rowth Seq-align (editable)
202  const CRef< CSeq_align >& GetSeqAlign(int Row) const; // get the Rowth Seq-align
203  //int FindDDBySeqId(CRef<CSeq_id>& SeqID, TDendiag* & ResultDD, TDendiag* pNeedOverlapDD, int isSelf,int istart);
204 
205  bool UsesConsensusSequenceAsMaster() const;
206  bool HasConsensusSequence() const;
207  int GetRowsWithConsensus(vector<int>& consensusRows) const;
208  bool FindConsensusInSequenceList(vector<int>* indices = NULL) const;
209 
210  bool IsInPendingList(const CRef<CSeq_id>& ID, vector<int>& listIndex) const; // true if ID is in pending list; returns all indices found
211  int GetNumPending() const {return(GetPending().size());}
212 
213  //add aligns or sequences to CD
214 
215  bool AddSeqAlign(CRef< CSeq_align > seqAlign);
216  bool AddPendingSeqAlign(CRef< CSeq_align > seqAlign);
217  bool AddSequence(CRef< CSeq_entry > seqAntry);
218  void Clear();
219 
220  /* Comment methods (there can be multiple comments) */
221  void SetComment(CCdd_descr::TComment oldComment, CCdd_descr::TComment newComment);
222 
223  /* Old root methods */
224  bool IsOldRoot(); // old-root of CD
225  void SetOldRoot(string Accession, int Version);
226  bool GetOldRoot(int Index, string& Accession, int& Version);
227  int GetNumIdsInOldRoot();
228 
229  /* CD annotations */
230  // These add a specific type of Cdd-descr to the CD.
231  // Typically, duplicates will not be added; functions return
232  // 'false' when attempting to add a duplicate description.
233  bool AddComment(const string& comment);
234  bool AddOthername(const string& othername);
235  bool AddTitle(const string& title);
236  bool AddPmidReference(unsigned int pmid);
237  bool AddSource(const string& source, bool removeExistingSources = true);
238  bool AddCreateDate(); // uses the current time
239 
240  // Get all PMIDs found in the Cdd-descr-set, returning the number
241  // of unique PMIDs. Duplicate PMIDs are ignored by the returned set.
242  unsigned int GetPmidReferences(set<unsigned int>& pmids) const;
243 
244  // Return the first title in the list of CCdd_descrs; by convention there should
245  // be at most one. If there is no title, an empty string is returned.
246  string GetTitle() const;
247 
248  // Return all 'title' strings found in the list of CCdd_descrs.
249  // Return value is the number of such strings returned.
250  unsigned int GetTitles(vector<string>& titles) const;
251 
252  // Removes any CCdd_descr of the specified choice type.
253  bool RemoveCddDescrsOfType(int cddDescrChoice);
254 
255 
256  /* Alignment & structure annotation methods */
257  bool AllResiduesInRangeAligned(int rowId, int from, int to) const;
258  bool AlignAnnotsValid(string* err = NULL) const; // one of the checks for re-mastering/validation
259  int GetNumAlignmentAnnotations();
260  string GetAlignmentAnnotationDescription(int Index);
261  bool DeleteAlignAnnot(int Index);
262  void EraseStructureEvidence(); // scan structure-evidence, erase missing biostruc-ids
263 
264  bool HasParentType(EClassicalOrComponent parentType) const;
265  bool HasParentType(CDomain_parent::EParent_type parentType) const;
266  bool GetClassicalParentId(const CCdd_id*& parentId) const; // get id of classical parent
267  bool GetComponentParentIds(vector< const CCdd_id* >& parentIds) const; // excludes classical parent
268  string GetClassicalParentAccession(int& Version) const; // get accession and version of parent
269  string GetClassicalParentAccession() const;
270  /* Uses the 'ancestor' field in the spec; returns false if has a component parent
271  or couldn't create the Domain_parent object.
272  If 'parent' field is in use, reset 'parent' and create in the 'ancestor' field.
273  If no 'parent' or 'ancestor' field is filled, create it new.
274  */
275  bool SetClassicalParentAccessionNew(string Parent, int Version);// set accession and version of parent
276  /* Deprecated: uses the old 'parent' field in the spec */
277  void SetClassicalParentAccession(string Parent, int Version);// set accession and version of parent
278 
279 protected:
280 
281  // Return true only if 'descr' was added.
282  bool AddCddDescr(CRef< CCdd_descr >& descr);
283 
284 private:
285 
286  static bool GetBioseqWithSeqid(const CRef< CSeq_id>& seqid, const list< CRef< CSeq_entry > >& bsset, const CBioseq*& bioseq);
287 
288  bool IsNoEvidenceFor(list<int>& MmdbIds, list< CRef< CFeature_evidence > >::iterator& FeatureIterator);
289  list< CRef< CFeature_evidence > >& GetFeatureSet(list<int>& MmdbIds);
290 
291  // Prohibit copy constructor and assignment operator
292  CCdCore(const CCdCore& value);
293  CCdCore& operator=(const CCdCore& value);
294 };
295 
296 /////////////////// end of CCd inline methods
297 
298 
299 END_SCOPE(cd_utils)
301 
302 
303 #endif // NEWCDCCD_HPP
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:73
const TPending & GetPending(void) const
Get the Pending member data.
Definition: Cdd_.hpp:1440
static CRef< CSeqdesc > AddSource(CRef< CSeq_entry > entry, string taxname)
AlignmentSrc
Definition: cuCdCore.hpp:57
string
Definition: cgiapp.hpp:407
#define NULL
Definition: ncbistd.hpp:225
AlignmentUsage
Definition: cuCdCore.hpp:61
USING_SCOPE(objects)
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
const int PENDING_ROW_START
Definition: cuCdCore.hpp:52
CoordMapDir
int GetUpperBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:471
int GetLowerBound(const CRef< CSeq_align > &seqAlign, bool onMaster)
Definition: cuAlign.cpp:456
const CharType(& source)[N]
Definition: pointer.h:1107
#define Len
Definition: deflate.h:79
int size
CCdd & operator=(const CCdd &value)
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
Definition: seqtitle.cpp:106
static void AddTitle(CRef< CSeq_entry > entry, string defline)
Definition: Cdd.hpp:50
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1185
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:375
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:70
int GetNumPending() const
Definition: cuCdCore.hpp:211
CCdd_id –.
Definition: Cdd_id.hpp:65
EClassicalOrComponent
Definition: cuCdCore.hpp:47
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
int TGi
Type for sequence GI.
Definition: ncbimisc.hpp:1018
Modified on Tue Jul 28 11:17:55 2015 by modify_doxy.py rev. 426318