NCBI C++ ToolKit
cuPrefTaxNodes.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuPrefTaxNodes.cpp 72676 2016-05-20 15:44:21Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Repackaged by Chris Lanczycki from Charlie Liu's algTaxDataSource
27  *
28  * File Description:
29  *
30  * Class to maintain lists of preferred and model tax nodes,
31  * using the Cdd-pref-nodes ASN specification.
32  *
33  * ===========================================================================
34  */
35 
36 
37 #include <ncbi_pch.hpp>
41 
44 BEGIN_SCOPE(cd_utils)
45 
46 const string CPriorityTaxNodes::PREF_TAXNODE_FILE = "data/txnodes.asn";
47 
48 CPriorityTaxNodes::CPriorityTaxNodes(TaxNodeInputType inputType) : m_inputType(inputType)
49 {
50  string filename = PREF_TAXNODE_FILE;
51  LoadFromFile(filename, false);
52 }
53 
54 CPriorityTaxNodes::CPriorityTaxNodes(const string& prefTaxnodeFileName, TaxNodeInputType inputType) : m_inputType(inputType)
55 {
56  LoadFromFile(prefTaxnodeFileName, false);
57 }
58 
59 CPriorityTaxNodes::CPriorityTaxNodes(const CCdd_pref_nodes& prefNodes, TaxNodeInputType inputType) : m_inputType(inputType)
60 {
61  BuildMap(prefNodes, false);
62  m_loaded = true;
63 }
64 
65 
66 CPriorityTaxNodes::CPriorityTaxNodes(const vector< int >& taxids, TaxClient& taxClient, TaxNodeInputType inputType) : m_inputType(inputType)
67 {
68  CCdd_org_ref_set cddOrgRefSet;
69  unsigned int nAdded = TaxIdsToCddOrgRefSet(taxids, cddOrgRefSet, taxClient);
70 
71  Reset();
72  putIntoMap(cddOrgRefSet);
73  m_loaded = (nAdded == taxids.size());
74 }
75 
77 {
78 }
79 
80 void CPriorityTaxNodes::Reset(TaxNodeInputType* inputType, bool forceClearAncestorMap) {
81 
82  m_err = "";
83  m_loaded = false;
85 
86  if (forceClearAncestorMap || (inputType && !(m_inputType & *inputType))) {
88  }
89 
90  if (inputType) {
91  m_inputType = *inputType;
92  }
93 }
94 
95 
96 bool CPriorityTaxNodes::LoadFromFile(const string& prefTaxnodeFileName, bool doReset)
97 {
98  bool result = ReadPreferredTaxnodes(prefTaxnodeFileName, doReset);
99 
100  if (!result)
101  m_err = "Failed to read preferred Taxonomy nodes from file '" + prefTaxnodeFileName + "'.\n";
102 
103  m_loaded = result;
104  return result;
105 }
106 
107 unsigned int CPriorityTaxNodes::Load(const CCdd_pref_nodes& prefNodes, bool reset)
108 {
109  unsigned int nInit = (reset) ? 0 : m_selectedTaxNodesMap.size();
110  BuildMap(prefNodes, reset);
111  return m_selectedTaxNodesMap.size() - nInit;
112 }
113 
114 bool CPriorityTaxNodes::ReadPreferredTaxnodes(const string& filename, bool reset)
115 {
116  CCdd_pref_nodes prefNodes;
117  if (!ReadASNFromFile(filename.c_str(), &prefNodes, false, &m_err))
118  {
119  return false;
120  }
121 
122  BuildMap(prefNodes, reset);
123  return true;
124 }
125 
126 void CPriorityTaxNodes::BuildMap(const CCdd_pref_nodes& prefNodes, bool reset) {
127  if (reset)
128  Reset();
129 
130  //build a taxId/taxName map
132  putIntoMap(prefNodes.GetPreferred_nodes());
133  if ((m_inputType & eCddModelOrgs) && prefNodes.CanGetModel_organisms())
134  putIntoMap(prefNodes.GetModel_organisms());
135  if ((m_inputType & eCddOptional) && prefNodes.CanGetOptional_nodes())
136  putIntoMap(prefNodes.GetOptional_nodes());
137 }
138 
140 {
141  const list< CRef< CCdd_org_ref > >& orgList = orgRefs.Get();
142  list< CRef< CCdd_org_ref > >::const_iterator cit = orgList.begin();
143  int i = m_selectedTaxNodesMap.size();
144  for (; cit != orgList.end(); cit++)
145  {
147  i++;
148  }
149 }
150 
152 {
153  if (orgRef->CanGetReference())
154  {
155  const COrg_ref& org = orgRef->GetReference();
156  if (org.IsSetTaxname()) {
157  return org.GetTaxname();
158  }
159  }
160 
161  return kEmptyStr;
162 }
163 
165 {
166  if (orgRef->CanGetReference())
167  {
168  const COrg_ref& org = orgRef->GetReference();
169  return org.GetTaxId();
170  }
171  else
172  return 0;
173 }
174 
176 {
177  return orgRef->GetActive();
178 }
179 
180 unsigned int CPriorityTaxNodes::TaxIdsToCddOrgRefSet(const vector< int >& taxids, CCdd_org_ref_set& cddOrgRefSet, TaxClient& taxClient, vector<int>* notAddedTaxids)
181 {
182 
183  unsigned int nAdded = 0, nTaxa = taxids.size();
184  CCdd_org_ref_set::Tdata cddOrgRefList = cddOrgRefSet.Set();
185 
186  if (notAddedTaxids) notAddedTaxids->clear();
187 
188  for (unsigned int i = 0; i < nTaxa; ++i) {
189  CRef< CCdd_org_ref > cddOrgRef( new CCdd_org_ref );
190  CRef< CCdd_org_ref::TReference > orgRef( &cddOrgRef->SetReference());
191  if (cddOrgRef.NotEmpty() && taxClient.GetOrgRef(taxids[i], orgRef)) {
192  cddOrgRef->SetActive(true);
193  cddOrgRefList.push_back(cddOrgRef);
194  ++nAdded;
195  } else if (notAddedTaxids) {
196  notAddedTaxids->push_back(taxids[i]);
197  }
198  }
199  return nAdded;
200 }
201 
202 unsigned int CPriorityTaxNodes::CddOrgRefSetToTaxIds(const CCdd_org_ref_set& cddOrgRefSet, vector< int >& taxids, vector<int>* notAddedIndices)
203 {
204  int taxId;
205  unsigned int taxaIndex = 0, nAdded = 0;
206  const CCdd_org_ref_set::Tdata cddOrgRefList = cddOrgRefSet.Get();
207  CCdd_org_ref_set::Tdata::const_iterator cddOrgRefListCit = cddOrgRefList.begin(), citEnd = cddOrgRefList.end();
208 
209  if (notAddedIndices) notAddedIndices->clear();
210 
211  for (; cddOrgRefListCit != citEnd; ++cddOrgRefListCit ) {
212  taxId = getTaxId(*cddOrgRefListCit);
213  if (taxId > 0) {
214  taxids.push_back(taxId);
215  ++nAdded;
216  } else if (notAddedIndices) {
217  notAddedIndices->push_back(taxaIndex);
218  }
219  ++taxaIndex;
220  }
221  return nAdded;
222 }
223 
225 {
226  TaxidToOrgMap::iterator titEnd = m_selectedTaxNodesMap.end(), tit = titEnd;
227  TAncestorMap::iterator ancestorIt;
228 
229  if (taxid != 0) {
230  // First see if this taxid has been seen before; if so, retrieve iterator from toMap...
231  ancestorIt = m_ancestralTaxNodeMap.find(taxid);
232  if (ancestorIt != m_ancestralTaxNodeMap.end() && ancestorIt->second >= 0) {
233  tit = m_selectedTaxNodesMap.find(ancestorIt->second);
234  }
235 
236  // If no ancestralMap, or ancestor not in ancestralMap, use taxClient if present.
237  // Add ancestral taxid to ancestralMap if found.
238  if (taxClient && tit == titEnd) {
239  for (tit = m_selectedTaxNodesMap.begin(); tit != titEnd; tit++)
240  {
241  if (taxClient->IsTaxDescendant(tit->first, taxid)) {
243  break;
244  }
245  }
246  }
247  }
248 
249  return tit;
250 }
251 
253 {
255  return it != m_selectedTaxNodesMap.end();
256 }
257 
258 bool CPriorityTaxNodes::GetPriorityTaxid(int taxidIn, int& priorityTaxid, TaxClient& taxClient)
259 {
260  string nodeName;
261  return GetPriorityTaxidAndName(taxidIn, priorityTaxid, nodeName, taxClient);
262 }
263 
264 bool CPriorityTaxNodes::GetPriorityTaxidAndName(int taxidIn, int& priorityTaxid, string& nodeName, TaxClient& taxClient)
265 {
266  bool result = false;
268 
269  priorityTaxid = 0;
270  nodeName = kEmptyStr;
271  if (it != itEnd) {
272  priorityTaxid = taxidIn;
273  result = true;
274  } else { // fail to find exact match; try to find ancetral match
275  it = findAncestor(taxidIn, &taxClient);
276  if (it != itEnd)
277  {
278  priorityTaxid = it->first;
279  result = true;
280  }
281  }
282 
283  if (it != itEnd) { // result = true
284  nodeName = getTaxName(it->second.orgRef);
285  }
286 
287  return result;
288 }
289 
290 // return -1 if fails or taxid = 0
291 int CPriorityTaxNodes::GetPriorityTaxnode(int taxid, const OrgNode*& orgNode, TaxClient* taxClient)
292 {
294 
295  orgNode = NULL;
296  if (taxid != 0) {
297  if (it != itEnd)
298  {
299  orgNode = &(it->second);
300  return it->second.order;
301  }
302  else // fail to find exact match; try to find ancetral match
303  {
304  it = findAncestor(taxid, taxClient);
305  if (it != itEnd)
306  {
307  orgNode = &(it->second);
308  return it->second.order;
309  }
310  }
311  }
312  return -1;
313 }
314 
315 // return index into list; -1 if fails
316 int CPriorityTaxNodes::GetPriorityTaxnode(int taxid, string& nodeName, TaxClient* taxClient)
317 {
318  const OrgNode* orgNode = NULL;
319 
320  nodeName = kEmptyStr;
321  if (GetPriorityTaxnode(taxid, orgNode, taxClient) != -1 && orgNode) {
322  nodeName.append(getTaxName(orgNode->orgRef));
323  return orgNode->order;
324  }
325  return -1;
326 }
327 
328 END_SCOPE(cd_utils)
void SetReference(TReference &value)
Assign a value to Reference data member.
TaxidToOrgMap m_selectedTaxNodesMap
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
size_type size() const
Definition: map.hpp:148
void putIntoMap(const CCdd_org_ref_set &orgRefs)
int GetTaxId() const
Definition: Org_ref.cpp:72
bool CanGetPreferred_nodes(void) const
Check if it is safe to call GetPreferred_nodes method.
User-defined methods of the data storage class.
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:73
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:346
void clear()
Definition: map.hpp:169
virtual bool GetOrgRef(int taxId, CRef< COrg_ref > &orgRef)
const Tdata & Get(void) const
Get the member data.
list< CRef< CCdd_org_ref > > Tdata
const TReference & GetReference(void) const
Get the Reference member data.
void SetActive(TActive value)
Assign a value to Active data member.
const TModel_organisms & GetModel_organisms(void) const
Get the Model_organisms member data.
CCdd_pref_nodes –.
USING_SCOPE(objects)
#define NULL
Definition: ncbistd.hpp:225
#define kEmptyStr
Definition: ncbistr.hpp:120
bool CanGetModel_organisms(void) const
Check if it is safe to call GetModel_organisms method.
static bool ReadASNFromFile(const char *filename, ASNClass *ASNobject, bool isBinary, std::string *err)
int i
static bool isActive(const CRef< CCdd_org_ref > &orgRef)
CCdd_org_ref_set –.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
TAncestorMap m_ancestralTaxNodeMap
const_iterator end() const
Definition: map.hpp:152
const TOptional_nodes & GetOptional_nodes(void) const
Get the Optional_nodes member data.
bool LoadFromFile(const string &prefTaxnodeFileName, bool doReset=false)
bool GetPriorityTaxid(int taxidIn, int &priorityTaxid, TaxClient &taxClient)
const_iterator find(const key_type &key) const
Definition: map.hpp:153
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:358
static unsigned int CddOrgRefSetToTaxIds(const CCdd_org_ref_set &cddOrgRefSet, vector< int > &taxids, vector< int > *notAddedIndices=NULL)
void BuildMap(const CCdd_pref_nodes &prefNodes, bool reset=false)
static int getTaxId(const CRef< CCdd_org_ref > &orgRef)
unsigned int Load(const CCdd_pref_nodes &prefNodes, bool doReset=false)
TaxNodeInputType m_inputType
static unsigned int TaxIdsToCddOrgRefSet(const vector< int > &taxids, CCdd_org_ref_set &cddOrgRefSet, TaxClient &taxClient, vector< int > *notAddedTaxids=NULL)
static string getTaxName(const CRef< CCdd_org_ref > &orgRef)
bool CanGetReference(void) const
Check if it is safe to call GetReference method.
virtual bool IsTaxDescendant(int tax1, int tax2)
const TPreferred_nodes & GetPreferred_nodes(void) const
Get the Preferred_nodes member data.
Tdata & Set(void)
Assign a value to data member.
CCdd_org_ref –.
Definition: Cdd_org_ref.hpp:65
else result
Definition: token2.c:20
const CRef< CCdd_org_ref > orgRef
virtual ~CPriorityTaxNodes()
bool CanGetOptional_nodes(void) const
Check if it is safe to call GetOptional_nodes method.
void Reset(TaxNodeInputType *inputType=NULL, bool forceClearAncestorMap=false)
int GetPriorityTaxnode(int taxid, string &nodeName, TaxClient *taxClient=NULL)
bool GetPriorityTaxidAndName(int taxidIn, int &priorityTaxid, string &nodeName, TaxClient &taxClient)
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:70
bool IsPriorityTaxnode(int taxid)
TaxidToOrgMap::iterator findAncestor(int taxid, TaxClient *taxClient)
#define const
Definition: zconf.h:217
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:709
const_iterator begin() const
Definition: map.hpp:151
CPriorityTaxNodes(TaxNodeInputType inputType)
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
bool ReadPreferredTaxnodes(const string &fileName, bool reset=false)
TActive GetActive(void) const
Get the Active member data.
Modified on Sun Jul 23 20:10:21 2017 by modify_doxy.py rev. 533848