NCBI C++ ToolKit
phy_object_loader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: phy_object_loader.cpp 46268 2021-03-01 15:26:08Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Roman Katargin
27 */
28 
29 
30 #include <ncbi_pch.hpp>
31 
32 #include <corelib/ncbifile.hpp>
33 #include <corelib/ncbistr.hpp>
34 
35 #include <gui/objutils/label.hpp>
36 
39 
41 
43 
44 #include <util/icanceled.hpp>
45 #include <util/xregexp/regexp.hpp>
47 
50 
52  : m_FileNames(filenames)
53 {
54 }
55 
57 {
58  return m_Objects;
59 }
60 
62 {
63  return "Loading Newick Tree Files";
64 }
65 
67 {
68  return true;
69 }
70 
72 {
74  CRef<CScope> scope(new CScope(*obj_mgr));
75  scope->AddDefaults();
76 
77  ITERATE(vector<wxString>, it, m_FileNames) {
78  if (canceled.IsCanceled())
79  return false;
80 
81  const wxString& fn = *it;
82 
83  try {
85 
86  // load the tree(s). If the file is a nexus file, have to first find
87  // the tree(s) within the file, then pass them to the newick reader.
88  // Otherwise call the newick reader on the whole file
89  CNcbiIstream& is = file.GetIstream();
90  if (is.peek() == '#') {
91 
92  // Must be a nexus file
93  // find tree section (starts with "begin trees;")
94  string line;
95  while (!is.eof() && is.good()) {
96  NcbiGetlineEOL(is, line);
97 
98  // find tree block
99  if (NPOS != NStr::FindNoCase(line, "BEGIN TREES;")) {
100 
101  // Find line(s) that start with "tree" and pass the first
102  // '(' to the last ';' to ReadNewickTree
103  while (!is.eof() && is.good()) {
104  NcbiGetlineEOL(is, line);
105  if (NPOS != NStr::FindNoCase(line, "tree")) {
106  size_t start_pos = line.find_first_of('(');
107  size_t stop_pos = line.find_last_of(';');
108 
109  if (start_pos != NPOS) {
110  // Read tree into a string. Tree may be on multiple lines
111  string tree_str = line.substr(start_pos);
112  if (stop_pos == NPOS) {
113  while (!is.eof() && is.good()) {
114  NcbiGetlineEOL(is, line);
115  tree_str += line;
116  if (NPOS != line.find_last_of(';')) {
117  break;
118  }
119  }
120  }
121 
122  // Load the tree. But first remove any nexus metacomments:
123  // https://code.google.com/p/beast-mcmc/wiki/NexusMetacommentFormat
124  CRegexpUtil reg(tree_str);
125  reg.Replace("\\)\\[([^]]+)\\]:", "):");
126  string res = reg.GetResult();
127 
128  CNcbiIstrstream tree_istr(res);
129  unique_ptr<TPhyTreeNode> tree(ReadNewickTree(tree_istr));
131 
132  string label;
133  CLabel::GetLabel(*btc, &label, CLabel::eDefault, scope);
134  m_Objects.push_back(SObject(*btc, label));
135  }
136  }
137  }
138 
139  // Done
140  break;
141  }
142  }
143  }
144  else {
145  // Standard Newick format - file contains 1 tree
146  unique_ptr<TPhyTreeNode> tree(ReadNewickTree(file.GetIstream()));
148 
149  // not implemented now
150  /*
151  if (m_Params.GetSort()) {
152  CRef<CFeatureDescr> fd(new CFeatureDescr());
153  fd->SetId(btc->SetFdict().Get().size());
154  fd->SetName("do_sort");
155  btc->SetFdict().Set().push_back(fd);
156  }
157  */
158 
159  string label;
160  CLabel::GetLabel(*btc, &label, CLabel::eDefault, scope);
161  m_Objects.push_back(SObject(*btc, label));
162  }
163  }
164  catch (const CException& e) {
165  x_UpdateHTMLResults(fn, 0, e.GetMsg());
166  }
167  catch (const exception& e) {
168  x_UpdateHTMLResults(fn, 0, e.what());
169  }
170  }
171 
172  return true;
173 }
174 
176 {
177  x_ShowErrorsDlg(wxT("Newick tree import errors"));
178  return true;
179 }
180 
CRegexpUtil –.
Definition: regexp.hpp:312
void x_ShowErrorsDlg(const wxString &title)
void x_UpdateHTMLResults(const wxString &object, objects::ILineErrorListener *errCont, const string &exception="", const string &error_msg="", const wxString &objectName=wxT("File:"))
CScope –.
Definition: scope.hpp:92
Interface for testing cancellation request in a long lasting operation.
Definition: icanceled.hpp:51
vector< SObject > TObjects
The NCBI C++ standard methods for dealing with std::string.
CRef< objects::CBioTreeContainer > MakeDistanceSensitiveBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer, potentially without dist feature key.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
virtual bool PreExecute()
vector< wxString > m_FileNames
virtual bool Execute(ICanceled &canceled)
virtual TObjects & GetObjects()
CPhyObjectLoader(const vector< wxString > &filenames)
virtual bool PostExecute()
virtual string GetDescription() const
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
@ eDefault
Definition: label.hpp:73
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
size_t Replace(CTempStringEx search, CTempString replace, CRegexp::TCompile compile_flags=CRegexp::fCompile_default, CRegexp::TMatch match_flags=CRegexp::fMatch_default, size_t max_replace=0)
Replace occurrences of a substring within a string by pattern.
Definition: regexp.cpp:289
string GetResult(void)
Get result string.
Definition: regexp.hpp:582
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
virtual bool IsCanceled(void) const =0
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
#define NPOS
Definition: ncbistr.hpp:133
static const char label[]
FILE * file
#define wxT(x)
Definition: muParser.cpp:41
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The Object manager core.
static int filenames
Definition: pcregrep.c:172
TPhyTreeNode * ReadNewickTree(CNcbiIstream &is)
Newick format input.
USING_SCOPE(objects)
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
Modified on Wed Apr 17 13:08:05 2024 by modify_doxy.py rev. 669887