NCBI C++ ToolKit
entrez_search_tool.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: entrez_search_tool.cpp 36594 2016-10-12 20:17:36Z evgeniev $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "entrez_search_tool.hpp"
36 
37 #include <gui/objutils/label.hpp>
38 
41 
43 
45 
46 #include <wx/sizer.h>
47 #include <wx/stattext.h>
48 #include <wx/choice.h>
49 #include <wx/srchctrl.h>
50 
52 
55 
56 #define ID_COMBOBOX 11003
57 #define ID_TEXT 11414
58 
59 ///////////////////////////////////////////////////////////////////////////////
60 /// CDocsumTableModel
61 
62 typedef pair<string,string> TStringPair;
63 
65 {
66 public:
67  CDocsumTableModel( const string& aDbName );
68 
69  virtual int GetNumExtraColumns() const;
70  virtual wxString GetExtraColumnName( int col ) const;
71  virtual wxVariant GetExtraValueAt( int row, int col ) const;
72 
73  virtual wxString GetImageAlias( int row ) const;
74 
75 protected:
76  string m_DbName;
77 
78  /// maps doc summary field names to columns
79  vector<TStringPair> m_Fields;
80 };
81 
82 
83 ///////////////////////////////////////////////////////////////////////////////
84 /// IDMSearchTool
86 {
87 }
88 
89 
91 {
92  return new CEntrezSearchTool();
93 }
94 
95 
97 {
98  static string s_name("Search NCBI Public Databases");
99  return s_name;
100 }
101 
102 
104 {
105  return "";
106 }
107 
108 
110 {
111  CIRef<IDMSearchForm> form(new CEntrezSearchForm(*this));
112  return form;
113 }
114 
115 
117 {
118  return false;
119 }
120 
121 
123 {
125  CEntrezSearchQuery* e_query = dynamic_cast<CEntrezSearchQuery*>(&query);
126  if(e_query) {
127  job.Reset(new CEntrezSearchJob(*e_query));
128  }
129  return job;
130 }
131 
133 {
134  return "search_tool::entrez_search_tool";
135 }
136 
138 {
139  return "Datamining Tool - Entrez Search search";
140 }
141 
142 
143 ///////////////////////////////////////////////////////////////////////////////
144 /// CEntrezSearchTool
145 CEntrezSearchQuery::CEntrezSearchQuery(const string& terms, const string& db_name)
146 : m_Terms(terms),
147  m_DbName(db_name)
148 {
149 }
150 
151 
152 ///////////////////////////////////////////////////////////////////////////////
153 /// CEntrezSearchForm
154 
156 : m_Tool(&tool),
157  m_CurrDbName("Entrez Gene"),
158  m_DbCombo(NULL)
159 {
160 }
161 
162 
164 {
165 }
166 
167 
169 {
171 
172  m_DbNames.clear();
174  m_CurrDbName = m_DbNames.begin()->second;
175 }
176 
177 
178 
179 static const char* kDatabaseTag = "Database";
180 
181 
183 {
184 
187 }
188 
189 
191 {
192 
194 
195  if (m_DbCombo) {
196  view.Set(kDatabaseTag, ToStdString( m_DbCombo->GetStringSelection() ));
197  }
198 }
199 
200 
201 wxSizer* CEntrezSearchForm::GetWidget(wxWindow * parent)
202 {
203  if ( !m_Sizer) {
204  wxFlexGridSizer * sz = new wxFlexGridSizer(1, 3, 0, 0);
205  sz->AddGrowableCol(2);
206  m_Sizer = sz;
207 
208  m_Sizer->Add(new wxStaticText( parent, wxID_STATIC,
209  wxT("Select NCBI Database:"),
210  wxDefaultPosition, wxDefaultSize, 0 ),
211  0, wxALIGN_CENTER_VERTICAL|wxALL, 5);
212 
213  m_DbCombo = new wxChoice(parent, ID_COMBOBOX,
214  wxDefaultPosition, wxDefaultSize,
215  0, (const wxString*)NULL);
216 
217  m_Sizer->Add(m_DbCombo,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
218 
219  m_Text = new CSearchControl(parent, ID_TEXT, wxT(""),
220  wxDefaultPosition, wxDefaultSize,
221  wxTE_PROCESS_ENTER );
222  m_Sizer->Add(m_Text,1, wxGROW|wxALIGN_CENTER_VERTICAL|wxALL, 5);
223  //m_Text->SetAutoOff();
224  }
225  return m_Sizer;
226 }
227 
229 {
230  m_DbCombo->Clear();
231 
232  m_DbNames.clear();
235  m_DbCombo->Append(ToWxString(it->second), (void*)it->first.c_str());
236  }
237 
238  if ( !m_CurrDbName.empty() ) {
239  m_DbCombo->SetStringSelection(ToWxString(m_CurrDbName));
240  int sel = m_DbCombo->GetSelection();
241  if (sel == wxNOT_FOUND) {
242  m_DbCombo->Select(0);
243  }
244  } else {
245  m_DbCombo->Select(0);
246  }
247  m_CurrDbName = ToStdString(m_DbCombo->GetStringSelection());
248 }
249 
251 {
253  // context - independent
255 }
256 
257 
259 {
260  const char * dbname =
261  (const char*)m_DbCombo->GetClientData(m_DbCombo->GetSelection());
262 
263  string q_s = ToStdString(m_Text->GetValue());
264  CIRef<IDMSearchQuery> ref(new CEntrezSearchQuery(q_s, dbname));
265  return ref;
266 }
267 
268 
269 ///////////////////////////////////////////////////////////////////////////////
270 /// CEntrezSearchJob
271 
273 : m_Query(&query)
274 {
275  string vis_db_name = CEntrezDB::GetVisibleName(m_Query->GetDbName());
276  m_Descr = "Query: " + m_Query->GetTerms()
277  + ", database = " + vis_db_name;
278 }
279 
280 
282 {
283  if(m_Query->GetTerms().empty()) {
284  m_Error = new CAppJobError("Invalid input parameters - no search terms specified.");
285  return false;
286  }
287  return true;
288 }
289 
290 
291 static int kMaxResults = 1000;
292 
294 {
295  // prepare search params and search
296  string terms(m_Query->GetTerms());
297  string db_name(m_Query->GetDbName());
298  bool assemblyDB(db_name == "assembly");
299  size_t total_uids = 0;
300 
301  xml::document docsums;
302  CEntrezDB::Query(db_name, terms, total_uids, docsums, kMaxResults);
303  m_ResultsCount = (int)total_uids;
305 
306 
308  CRef<CScope> scope(new CScope(*om));
309  scope->AddDefaults();
310 
311  if(total_uids && !IsCanceled()) {
312  // process results
313  CMutexGuard Guard(m_Mutex);
314 
315  CObjectList * obj_list = m_TempResult->GetObjectList();
316 
317  xml::node_set nodes ( docsums.get_root_node().run_xpath_query("//DocumentSummary") );
318  NON_CONST_ITERATE(xml::node_set, it, nodes) {
319  if (assemblyDB)
320  SetReleaseType(*it);
321  obj_list->AddRow(new CXmlNodeObject(*it, db_name), scope.GetPointer());
322  if(IsCanceled()) {
323  return eCanceled;
324  }
325  }
326  return eCompleted;
327  }
328  return eCanceled;
329 }
330 
332 {
333  return new CDocsumTableModel( m_Query->GetDbName() );
334 }
335 
337 {
338  xml::node::const_iterator itAccession = ds.find("AssemblyAccession");
339  if (itAccession == ds.end())
340  return;
341  if (NPOS != NStr::Find(itAccession->get_content(), "GCF_")) { // RefSeq Accession
342  xml::node releaseType("ReleaseType", "RefSeq");
343  ds.insert(releaseType);
344  }
345  else { // GenBank Accession
346  xml::node releaseType("ReleaseType", "GenBank");
347  ds.insert(releaseType);
348  }
349 }
350 
351 ///////////////////////////////////////////////////////////////////////////////
352 /// CDocsumTableModel
353 
354 static const TStringPair skGeneralFields[] = {
355  TStringPair("AccessionVersion", "Label"),
356  TStringPair("Title", "Description"),
357  TStringPair("Extra", "FASTA IDs"),
358  TStringPair("TaxId", "Taxonomic ID")
359 };
360 
361 static const TStringPair skAssemblyFields[] = {
362  TStringPair("AssemblyName", "Name"),
363  TStringPair("AssemblyAccession", "Accession"),
364  TStringPair("Organism", "Organism"),
365  TStringPair("AssemblyDescription", "Description"),
366  TStringPair("AssemblyClass", "Class"),
367  TStringPair("ReleaseType", "Release Type"),
368  TStringPair("NCBIReleaseDate", "Release Date"),
369 };
370 
371 static const TStringPair skGeneFields[] = {
372  TStringPair("Name", "Label"),
373  TStringPair("Description", "Description"),
374  // The path to the ScientificName (it is nested in Organism node)
375  TStringPair("Organism/ScientificName", "Organism"),
376  TStringPair("Chromosome", "Chromosome"),
377  TStringPair("OtherAliases", "Aliases"),
378  TStringPair("MapLocation", "Map Location")
379 };
380 
381 static const TStringPair skGenomeFields[] = {
382  TStringPair("Organism_Name", "Name"),
383  TStringPair("Organism_Kingdom", "Kingdom"),
384  TStringPair("Organism_Group", "Group"),
385  TStringPair("Organism_Subgroup", "Subgroup"),
386  TStringPair("Defline", "Defline"),
387  TStringPair("Assembly_Name", "Assembly Name"),
388  TStringPair("Assembly_Accession", "Assembly Accession")
389 };
390 
391 CDocsumTableModel::CDocsumTableModel( const string& aDbName )
392 : m_DbName( aDbName )
393 {
394  const TStringPair* ptr;
395  int size;
396 
397  if( m_DbName == "gene" ){
398  ptr = &skGeneFields[0];
399  size = sizeof(skGeneFields);
400  }
401  else if( m_DbName == "genome" ){
402  ptr = &skGenomeFields[0];
403  size = sizeof(skGenomeFields);
404  }
405  else if (m_DbName == "assembly"){
406  ptr = &skAssemblyFields[0];
407  size = sizeof(skAssemblyFields);
408  }
409  else {
410  ptr = &skGeneralFields[0];
411  size = sizeof(skGeneralFields);
412  }
413 
414  int num = size /sizeof(TStringPair);
415  for( int i = 0; ptr && i < num; i++ ){
416  m_Fields.push_back( *(ptr +i) );
417  }
418 }
419 
421 {
422  return (int)m_Fields.size();
423 }
424 
425 wxString CDocsumTableModel::GetExtraColumnName( int col ) const
426 {
427  if( col < 0 || col >= GetNumExtraColumns() ){
428  _ASSERT(false);
429  NCBI_THROW(CException, eUnknown, "Invalid extra column index");
430  }
431  return ToWxString(m_Fields[col].second);
432 }
433 
434 wxVariant CDocsumTableModel::GetExtraValueAt( int row, int col ) const
435 {
436  if( col < 0 || col >= GetNumExtraColumns() ){
437  _ASSERT(false);
438  NCBI_THROW(CException, eUnknown, "Invalid extra column index");
439  }
440 
441  const CObject* obj = m_ObjectList->GetObject( row );
442  const CXmlNodeObject* doc_sum = dynamic_cast<const CXmlNodeObject*>(obj);
443  if( !doc_sum )
444  return ToWxString( "" );
445 
446  const string& field = m_Fields[col].first;
447  xml::node::const_iterator field_node = doc_sum->GetNode().find(field.c_str());
448  if ((field_node == doc_sum->GetNode().end())) {
449  do {
450  if ((string::npos == field.find('/')))
451  break;
452 
453  // Search for a path (unfortunately XPath queries are supportted only for the root node)
454  vector<string> nodes;
455  NStr::Split(field, "/", nodes);
456  const xml::node* parent = &(doc_sum->GetNode());
457  size_t count = nodes.size();
458  size_t i;
459  for (i = 0; parent && (i<count); ++i) {
460  field_node = parent->find(nodes[i].c_str());
461  if ((field_node == parent->end()))
462  break;
463  parent = &(*field_node);
464  }
465 
466  if (!parent || (i != count))
467  break;
468 
469  return ToWxString(parent->get_content());
470  }
471  while (false);
472  return ToWxString("");
473  }
474 
475  return ToWxString( field_node->get_content() );
476 }
477 
478 wxString CDocsumTableModel::GetImageAlias( int row ) const
479 {
480  if( m_DbName == "gene" ) return wxT("symbol::feature");
481  if( m_DbName == "protein" ) return wxT("symbol::sequence_protein");
482  if( m_DbName == "nucleotide" ) return wxT("symbol::sequence_dna");
483  if( m_DbName == "assembly" ) return wxT("symbol::sequence");
484 
485  return wxT("");
486 }
487 
488 
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" ) ...
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:783
The xml::node_set class is used to store xpath query result set.
Definition: node_set.hpp:67
#define ID_COMBOBOX
virtual void x_LoadSettings(CGuiRegistry::TReadView &view)
virtual string GetName() const
returns unique name of the method that is used in UI to identify it
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:970
virtual IUITool * Clone() const
CSearchControl * m_Text
CObject * GetObject(int row)
access to values (row, column)
pair< string, string > TStringPair
CDocsumTableModel.
wxString ToWxString(const char *s)
Definition: wx_utils.hpp:157
virtual void x_LoadSettings(CGuiRegistry::TReadView &)
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:461
CEntrezSearchJob(CEntrezSearchQuery &query)
CEntrezSearchJob.
string m_Descr
human-readable description of the Job
static int kMaxResults
virtual bool IsCanceled() const
virtual void Init()
#define NULL
Definition: ncbistd.hpp:225
string GetDbName() const
IDMSearchQuery - abstract data mining query.
string GetTerms() const
static const TStringPair skGeneFields[]
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:776
#define NPOS
Definition: ncbistr.hpp:130
CObjectList Data structure representing a list of CObjects with associated Scopes and other optional ...
Definition: object_list.hpp:61
#define ID_TEXT
int i
The xml::document class is used to hold the XML tree and various bits of information about it...
Definition: document.hpp:80
virtual CObjectListTableModel * x_GetNewOLTModel() const
factory method creating new column handler for CObjectListWidget
CMutex m_Mutex
synchronizes access to the Job members
virtual wxSizer * GetWidget(wxWindow *parent)
return a widget associated with the form; the form controls the lifetime of the widget (do not delete...
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
int m_MaxResultsCount
Max possible results count.
virtual int GetNumExtraColumns() const
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
IUITool represents an abstract algorithm that is bound to a UI component.
Definition: ui_tool.hpp:58
CRef< CAppJobError > m_Error
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2786
CEntrezSearchTool.
IDMSearchFormController * m_Controller
virtual CIRef< IDMSearchQuery > ConstructQuery()
static const TStringPair skAssemblyFields[]
static const TStringPair skGenomeFields[]
CDocsumTableModel(const string &aDbName)
static string query
virtual void OnSearchEnabled(bool)
virtual void x_SaveSettings(CGuiRegistry::TReadWriteView &view) const
virtual void UpdateContexts()
updates m_ContextCombo
virtual string GetDescription() const
returns a detailed description of the method that is used in UI
vector< TStringPair > m_Fields
maps doc summary field names to columns
static void GetDbNames(vector< string > &names)
CTestSearchForm.
static const char * kDatabaseTag
virtual CIRef< IDMSearchForm > CreateSearchForm()
factory method for creating a form representing the tool
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
virtual bool x_ValidateParams()
returns true if Job params are correct, implement in derived classes
CRef< CObjectList > m_ObjectList
vector< TStrPair > TNamePairs
CRef< CEntrezSearchQuery > m_Query
static string GetVisibleName(const string &db_name)
CRef< objects::CObjectManager > om
The xml::node class is used to hold information about one XML node.
Definition: node.hpp:106
virtual EJobState x_DoSearch()
performs searching, assuming that params are correct; Implement in derived classes ...
int size
const char * get_content(void) const
Get the content for this text node.
Definition: node.cpp:774
virtual wxString GetImageAlias(int row) const
IDataMiningContext IDataMiningContext represents an abstract context for a Search.
USING_SCOPE(objects)
CObjectListTableModel.
string GetString(const string &key, const string &default_val=kEmptyStr, const string &delim=CGuiRegistry::kDecimalDot) const
Definition: registry.cpp:1412
The Object manager core.
virtual wxVariant GetExtraValueAt(int row, int col) const
const node & get_root_node(void) const
Get a reference to the root node of this document.
Definition: document.cpp:539
virtual void x_SaveSettings(CGuiRegistry::TReadWriteView &) const
CSearchControl.
CEntrezSearchTool.
virtual void UpdateContexts()
updates m_ContextCombo
const xml::node & GetNode() const
node_set run_xpath_query(const xpath_expression &expr)
Run the given XPath query.
Definition: node.cpp:1269
iterator find(const char *name, const ns *nspace=NULL)
Find the first child node that has the given name and namespace.
Definition: node.cpp:1235
CRef< CDMSearchResult > m_TempResult
holds temporary results, guarded by Mutex
CEntrezSearchTool()
IDMSearchTool.
The xml::node::const_iterator provides a way to access children nodes similar to a standard C++ conta...
Definition: node.hpp:746
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6854
CScope –.
Definition: scope.hpp:90
CRef –.
Definition: ncbiobj.hpp:616
virtual bool IsCompatible(IDataMiningContext *context)
retuns true if the tool is compatible with the provided Search Context
CObject –.
Definition: ncbiobj.hpp:180
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1153
CEntrezSearchQuery(const string &terms, const string &db_name)
CEntrezSearchTool.
static const TStringPair skGeneralFields[]
CDocsumTableModel.
string m_CurrDbName
Entrez db names.
#define wxT(x)
Definition: muParser.cpp:41
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
iterator end(void)
Get an iterator that points one past the last child for this node.
Definition: node.hpp:835
static void Query(const string &db_name, const string &terms, size_t &total_uids, xml::document &docsums, size_t max_return=0)
virtual CRef< CSearchJobBase > x_CreateJob(IDMSearchQuery &query)
implementing CSearchToolBase pure virtual function
void SetReleaseType(xml::node &ds)
Adds an additional child node, indicating the release type (RefSeq or GenBank)
#define _ASSERT
void Set(const string &key, int val, const string &delim=CGuiRegistry::kDecimalDot)
access a named key at this level, with no recursion
Definition: registry.cpp:1813
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:546
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3362
string ToStdString(const wxString &s)
Definition: wx_utils.hpp:145
int AddRow(CObject *obj, objects::CScope *scope)
CObjectList * GetObjectList()
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
int m_ResultsCount
total number of results
CEntrezSearchJob.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
size_type size(void) const
Returns the number of childer this nodes has.
Definition: node.cpp:1176
virtual wxString GetExtraColumnName(int col) const
iterator insert(const node &n)
Insert a new child node.
Definition: node.cpp:1440
wxChoice * m_DbCombo
techical name
Modified on Sat Sep 23 14:27:31 2017 by modify_doxy.py rev. 546573