NCBI C++ ToolKit
remote_search.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* ===========================================================================
2  *
3  * PUBLIC DOMAIN NOTICE
4  * National Center for Biotechnology Information
5  *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Author: Kevin Bealer
26  *
27  */
28 
29 /** @file remote_search.cpp
30  * This file implements the uniform Blast search interface in terms of
31  * the blast4 network API via the CRemoteBlast library.
32  * NOTE: This file contains work in progress and the APIs are likely to change,
33  * please do not rely on them until this notice is removed.
34  */
35 
36 #include <ncbi_pch.hpp>
41 
42 /** @addtogroup AlgoBlast
43  *
44  * @{
45  */
46 
49 BEGIN_SCOPE(blast)
50 
51 /// Supporting elements
52 
53 //
54 // Factory
55 //
56 
59 {
60  return CRef<ISeqSearch>(new CRemoteSeqSearch());
61 }
62 
65 {
67 }
68 
71 {
74 
75  return opts;
76 }
77 
78 //
79 // Seq Search
80 //
81 
83 {
84  if (m_RemoteBlast.Empty()) {
85  // Verify all parts accounted for....
86  if (m_SearchOpts.Empty()) {
87  NCBI_THROW(CSearchException, eConfigErr, "No options specified");
88  }
89 
90  if (m_Queries.Empty()) {
91  NCBI_THROW(CSearchException, eConfigErr, "No queries specified");
92  }
93 
94  if (m_Subject.Empty() || m_Subject->GetDatabaseName().empty()) {
95  NCBI_THROW(CSearchException, eConfigErr,
96  "No database name specified");
97  }
98 
99  // .. Done...
100 
103  const string& kEntrezQuery = m_Subject->GetEntrezQueryLimitation();
104  if ( !kEntrezQuery.empty() ) {
105  m_RemoteBlast->SetEntrezQuery(kEntrezQuery.c_str());
106  }
107 
108  const CSearchDatabase::TGiList& kGiList =
110  if ( !kGiList.empty() ) {
111  list<TGi> temp(kGiList.begin(), kGiList.end());
112  m_RemoteBlast->SetGIList(temp);
113  }
114 
117 
118  if ((bss.Empty()) && (sll.empty())) {
119  NCBI_THROW(CSearchException, eConfigErr,
120  "Empty queries object specified.");
121  }
122 
123  if (bss.NotEmpty()) {
125  } else {
126  _ASSERT(! sll.empty());
128  }
129  }
130 
131  return *m_RemoteBlast;
132 }
133 
134 /// Build a result set from results in a remote blast search.
135 ///
136 /// The remote blast object will be queried for results and these will
137 /// be used to build a CSearchResultSet. If the search has not yet
138 /// completed, this function will wait until it has.
139 ///
140 /// @param rb The remote blast object representing the search.
141 /// @return The results of the search as a CSearchResultSet.
144 
147 {
148  // Calling Run() directly always queues a new search.
150  //x_RemoteBlast().SetVerbose();
152 
153  const vector<string> & w = x_RemoteBlast().GetWarningVector();
154  m_Warnings.insert(m_Warnings.end(), w.begin(), w.end());
155 
157 }
158 
160 {
161  m_SearchOpts = opts;
162 }
163 
165 {
166  m_Subject = subject;
167 }
168 
170 {
171  if (query_factory.Empty()) {
172  NCBI_THROW(CSearchException, eConfigErr,
173  "CRemoteSeqSearch: empty query factory was specified.");
174  }
175 
176  m_Queries.Reset(query_factory->MakeRemoteQueryData());
177 }
178 
179 /// CRemoteBlast does not separate each hit to the query in discontinuous
180 /// Seq-aligns, so we do it here. This functionality might be merged with
181 /// CRemoteBlast::GetSeqAlignSets() in the future
182 static TSeqAlignVector
184 {
185  // For each query...
186  NON_CONST_ITERATE(TSeqAlignVector, itr, seqaligns) {
187  CRef<CSeq_align_set> seq_align = *itr;
188 
189  CRef<CSeq_align_set> new_seq_align(new CSeq_align_set);
190 
191  // set the current Seq-id to an invalid gi
192  CConstRef<CSeq_id> current_subject(new CSeq_id(CSeq_id::e_Gi, 1));
193  // list of HSPs for a single query-subject pair
194  CRef<CSeq_align> current_hsp_list;
195 
196  // for each HSP ...
197  ITERATE(CSeq_align_set::Tdata, hsp_itr, seq_align->Get()) {
198 
199  const int kSubjectIndex = 1;
200  CConstRef<CSeq_id> subj_id(& (*hsp_itr)->GetSeq_id(kSubjectIndex));
201 
202  // new subject sequence (hit) found
203  if (subj_id->Compare(*current_subject) == CSeq_id::e_NO) {
204 
205  current_subject = subj_id;
206 
207  if (current_hsp_list.NotEmpty()) {
208  new_seq_align->Set().push_back(current_hsp_list);
209  }
210  current_hsp_list.Reset(new CSeq_align);
211  current_hsp_list->SetType(CSeq_align::eType_disc);
212  current_hsp_list->SetDim(2);
213  current_hsp_list->SetSegs().SetDisc().Set().push_back(*hsp_itr);
214 
215  } else {
216  // same subject sequence as in previous iteration
217  current_hsp_list->SetSegs().SetDisc().Set().push_back(*hsp_itr);
218  }
219  }
220  if (current_hsp_list.NotEmpty()) {
221  new_seq_align->Set().push_back(current_hsp_list);
222  }
223 
224  *itr = new_seq_align;
225  }
226  return seqaligns;
227 }
228 
231 {
232  // This cascades the warnings and errors: all queries get all
233  // errors and warnings. At the moment, none of the remote (or for
234  // that matter, local) code seems to have a way to categorize
235  // errors by type and query.
236 
237  // If the query number were known, and the error number were
238  // known, it is possible that the user could (in some cases) cope
239  // with the error or possibly salvage data from the non-failing
240  // requests.
241 
242  // Comments:
243  //
244  // 1. In how many (if any) client code scenarios does error
245  // recovery makes sense?
246  //
247  // 2. What kinds of errors that are recoverable?
248  //
249  // 3. Does the user ever need to know more than that a request
250  // found results, found nothing, or produced an error message?
251  //
252  // 4. If a single query fails, how do we avoid pairing the fatal
253  // error message with non-failing requests.
254 
255  TQueryMessages msgs;
257 
258  // Convert warnings and errors into CSearchMessage objects.
259 
260  ITERATE(vector<string>, iter, rb.GetWarningVector()) {
261  msg.Reset(new CSearchMessage(eBlastSevError, -1, *iter));
262  msgs.push_back(msg);
263  }
264 
265  ITERATE(vector<string>, iter, rb.GetErrorVector()) {
266  msg.Reset(new CSearchMessage(eBlastSevError, -1, *iter));
267  msgs.push_back(msg);
268  }
269 
270  TSeqAlignVector aligns =
272 
273  // Cascade the messages -- this will result in a lot of CRef<>
274  // sharing but hopefully not too much actual computation.
275 
276  TSearchMessages msg_vec;
277 
278  for(size_t i = 0; i<aligns.size(); i++) {
279  msg_vec.push_back(msgs);
280  }
281 
282  return CRef<CSearchResultSet>(new CSearchResultSet(aligns, msg_vec));
283 }
284 
285 
286 //
287 // Psi Search
288 //
289 
291 {
292  m_SearchOpts = opts;
293  m_RemoteBlast.Reset(new CRemoteBlast(& * opts));
294 }
295 
297 {
298  m_Subject = subject;
299 }
300 
302 {
303  if (m_RemoteBlast.Empty()) {
304  // Verify all parts accounted for....
305  if (m_SearchOpts.Empty()) {
306  NCBI_THROW(CSearchException, eConfigErr, "No options specified");
307  }
308 
309  if (m_Pssm.Empty()) {
310  NCBI_THROW(CSearchException, eConfigErr, "No queries specified");
311  }
312 
313  if (m_Subject.Empty() || m_Subject->GetDatabaseName().empty()) {
314  NCBI_THROW(CSearchException, eConfigErr,
315  "No database name specified");
316  }
317 
318  // .. Done...
319 
323 
324  const string& kEntrezQuery = m_Subject->GetEntrezQueryLimitation();
325  if ( !kEntrezQuery.empty() ) {
326  m_RemoteBlast->SetEntrezQuery(kEntrezQuery.c_str());
327  }
328 
329  const CSearchDatabase::TGiList& kGiList =
331  if ( !kGiList.empty() ) {
332  list<TGi> temp(kGiList.begin(), kGiList.end());
333  m_RemoteBlast->SetGIList(temp);
334  }
335  }
336 
337  return *m_RemoteBlast;
338 }
339 
342 {
343  // Calling Run() directly always queues a new search.
345  //x_RemoteBlast().SetVerbose();
346 
348 
349  const vector<string> & w = x_RemoteBlast().GetWarningVector();
350  m_Warnings.insert(m_Warnings.end(), w.begin(), w.end());
351 
353 }
354 
355 
357 {
358  if (pssm.Empty()) {
359  NCBI_THROW(CSearchException, eConfigErr,
360  "CRemotePssmSearch: empty query object was specified.");
361  }
362 
363  m_Pssm = pssm;
364 }
365 
366 END_SCOPE(blast)
368 
369 /* @} */
User-defined methods of the data storage class.
@ eBlastSevError
Definition: blast_message.h:58
Declares the CBlastProteinOptionsHandle class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eRemote
To be used when running BLAST remotely.
CRef –.
Definition: ncbiobj.hpp:618
API for Remote Blast Requests.
Remote Sequence Search.
Remote Sequence Search.
Exception class.
Error or Warning Message from search.
Search Results for All Queries.
Class for the messages for an individual query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
CRemoteBlast & x_RemoteBlast()
Method to construct and run the remote blast search.
string GetDatabaseName() const
Accessor for the database name.
virtual void SetOptions(CRef< CBlastOptionsHandle > options)
Configure the search.
TSeqAlignVector GetSeqAlignSets()
Get the seqalign vector from the results.
virtual void SetQueryFactory(CRef< IQueryFactory > query_factory)
Set the factory which will return the queries to search for.
virtual CRef< IPssmSearch > GetPssmSearch()
Get an object to manage a remote PSSM search.
void SetDatabase(const string &x)
Set the name of the database to search against.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
list< CRef< objects::CSeq_loc > > TSeqLocs
Type definition for CSeq_loc set used as queries in the BLAST remote search class.
Definition: query_data.hpp:123
void SetGIList(const list< TGi > &gi_list)
This restricts the subject database to this list of GIs (this is not supported yet on the server end)...
static CRef< CSearchResultSet > s_BuildResultsRemote(CRemoteBlast &rb)
Build a result set from results in a remote blast search.
CConstRef< CSearchDatabase > m_Subject
Search subject.
CConstRef< CSearchDatabase > m_Subject
Search subject.
virtual CRef< ISeqSearch > GetSeqSearch()
Get an object to manage a remote sequence search.
vector< TGi > TGiList
Define a list of gis.
void SetQueries(CRef< objects::CBioseq_set > bioseqs)
Set the query as a Bioseq_set.
virtual CRef< CBlastOptionsHandle > GetOptions(EProgram)
Get an options handle for a search of the specified type.
CRemoteBlast & x_RemoteBlast()
Method to construct and run the remote blast search.
virtual CRef< CSearchResultSet > Run()
Run the search.
virtual void SetOptions(CRef< CBlastOptionsHandle > options)
Configure the search.
virtual CRef< objects::CBioseq_set > GetBioseqSet()=0
Accessor for the CBioseq_set.
CRef< CBlastOptionsHandle > m_SearchOpts
Search configuration.
const vector< string > & GetErrorVector()
This returns any errors encountered as a vector of strings.
CRef< IRemoteQueryData > m_Queries
Search queries.
CRef< IRemoteQueryData > MakeRemoteQueryData()
Creates and caches an IRemoteQueryData.
Definition: query_data.cpp:61
CRef< objects::CPssmWithParameters > m_Pssm
Search queries.
const TGiList GetGiListLimitation() const
CRef< CRemoteBlast > m_RemoteBlast
Remote search management object.
virtual CRef< CSearchResultSet > Run()
Run the search.
CRef< CBlastOptionsHandle > m_SearchOpts
Search configuration.
vector< string > m_Warnings
Warnings produced by the search.
const vector< string > & GetWarningVector()
This returns any warnings encountered as a vector of strings.
virtual void SetSubject(CConstRef< CSearchDatabase > subject)
Set the databases to search.
static TSeqAlignVector s_SplitAlignVectorBySubjects(TSeqAlignVector seqaligns)
CRemoteBlast does not separate each hit to the query in discontinuous Seq-aligns, so we do it here.
virtual TSeqLocs GetSeqLocs()=0
Accessor for the TSeqLocs.
virtual void SetQuery(CRef< objects::CPssmWithParameters > query)
Set the query to search with.
vector< string > m_Warnings
Warnings produced by the search.
string GetEntrezQueryLimitation() const
Accessor for the entrez query.
virtual void SetSubject(CConstRef< CSearchDatabase > subject)
Set the databases to search.
void SetEntrezQuery(const char *x)
Restrict search to sequences matching this Entrez query.
CRef< CRemoteBlast > m_RemoteBlast
Remote search management object.
bool SubmitSync(void)
This submits the search (if necessary) and polls for results.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_NO
different SeqId types-can't compare
Definition: Seq_id.hpp:582
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
@ eType_disc
discontinuous alignment
Definition: Seq_align_.hpp:104
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
int i
Remote implementation of the uniform BLAST search interface.
static string subject
#define _ASSERT
Modified on Thu Apr 25 08:21:41 2024 by modify_doxy.py rev. 669887