|
NCBI C++ ToolKit
|
00001 /* $Id: remote_blast.cpp 53103 2012-02-23 16:01:09Z zaretska $ 00002 * =========================================================================== 00003 * 00004 * PUBLIC DOMAIN NOTICE 00005 * National Center for Biotechnology Information 00006 * 00007 * This software/database is a "United States Government Work" under the 00008 * terms of the United States Copyright Act. It was written as part of 00009 * the author's official duties as a United States Government employee and 00010 * thus cannot be copyrighted. This software/database is freely available 00011 * to the public for use. The National Library of Medicine and the U.S. 00012 * Government have not placed any restriction on its use or reproduction. 00013 * 00014 * Although all reasonable efforts have been taken to ensure the accuracy 00015 * and reliability of the software and data, the NLM and the U.S. 00016 * Government do not and cannot warrant the performance or results that 00017 * may be obtained by using this software or data. The NLM and the U.S. 00018 * Government disclaim all warranties, express or implied, including 00019 * warranties of performance, merchantability or fitness for any particular 00020 * purpose. 00021 * 00022 * Please cite the author in any work or product based on this material. 00023 * 00024 * =========================================================================== 00025 * 00026 * Author: Kevin Bealer 00027 * 00028 * =========================================================================== 00029 */ 00030 00031 /// @file remote_blast.cpp 00032 /// Queueing and Polling code for Remote Blast API. 00033 00034 #include <ncbi_pch.hpp> 00035 #include <corelib/ncbi_system.hpp> 00036 #include <corelib/ncbitime.hpp> 00037 #include <serial/iterator.hpp> 00038 #include <algo/blast/api/remote_blast.hpp> 00039 #include <algo/blast/api/blast_options_builder.hpp> 00040 #include <algo/blast/api/search_strategy.hpp> 00041 00042 #include <objects/blast/blastclient.hpp> 00043 #include <objects/blast/blast__.hpp> 00044 #include <objects/blast/names.hpp> 00045 #include <objects/seq/Seq_data.hpp> 00046 #include <objects/seq/Bioseq.hpp> 00047 #include <objects/scoremat/Pssm.hpp> 00048 #include <objects/scoremat/PssmWithParameters.hpp> 00049 #include <objects/seqalign/seqalign__.hpp> 00050 #include <objects/blast/blastclient.hpp> 00051 #include <objmgr/util/seq_loc_util.hpp> 00052 #include "psiblast_aux_priv.hpp" // For CPsiBlastValidate::Pssm() 00053 #include <util/format_guess.hpp> // for CFormatGuess 00054 #include <serial/objistrxml.hpp> // for CObjectIStreamXml 00055 #include <serial/objistrasnb.hpp> // for CObjectIStreamAsnBinary 00056 #include <serial/objistrasn.hpp> // for CObjectIStreamAsn 00057 #include <algo/blast/api/objmgr_query_data.hpp> 00058 00059 #if defined(NCBI_OS_UNIX) 00060 #include <unistd.h> 00061 #endif 00062 00063 /** @addtogroup AlgoBlast 00064 * 00065 * @{ 00066 */ 00067 00068 BEGIN_NCBI_SCOPE 00069 USING_SCOPE(objects); 00070 BEGIN_SCOPE(blast) 00071 00072 00073 // Static functions 00074 00075 00076 /// Error value type used by Blast4 ASN.1 objects. 00077 typedef list< CRef<objects::CBlast4_error> > TErrorList; 00078 00079 00080 /// Determine whether the search is still running. 00081 /// @param reply Reply from get-search-results request. 00082 /// @return True if search needs more time, false if done or failed. 00083 static bool 00084 s_SearchPending(CRef<objects::CBlast4_reply> reply) 00085 { 00086 const list< CRef<objects::CBlast4_error> > & errors = reply->GetErrors(); 00087 00088 TErrorList::const_iterator i; 00089 00090 for(i = errors.begin(); i != errors.end(); i++) { 00091 if ((*i)->GetCode() == eBlast4_error_code_search_pending) { 00092 return true; 00093 } 00094 } 00095 return false; 00096 } 00097 00098 00099 void CRemoteBlast::x_SearchErrors(CRef<objects::CBlast4_reply> reply) 00100 { 00101 const list< CRef<CBlast4_error> > & errors = reply->GetErrors(); 00102 00103 TErrorList::const_iterator i; 00104 00105 for(i = errors.begin(); i != errors.end(); i++) { 00106 string msg; 00107 00108 if ((*i)->CanGetMessage() && (! (*i)->GetMessage().empty())) { 00109 msg = ": "; 00110 msg += (*i)->GetMessage(); 00111 } 00112 00113 switch((*i)->GetCode()) { 00114 case eBlast4_error_code_conversion_warning: 00115 m_Warn.push_back(string("conversion_warning") + msg); 00116 break; 00117 00118 case eBlast4_error_code_internal_error: 00119 m_Errs.push_back(string("internal_error") + msg); 00120 break; 00121 00122 case eBlast4_error_code_not_implemented: 00123 m_Errs.push_back(string("not_implemented") + msg); 00124 break; 00125 00126 case eBlast4_error_code_not_allowed: 00127 m_Errs.push_back(string("not_allowed") + msg); 00128 break; 00129 00130 case eBlast4_error_code_bad_request: 00131 m_Errs.push_back(string("bad_request") + msg); 00132 break; 00133 00134 case eBlast4_error_code_bad_request_id: 00135 m_Errs.push_back(string("Invalid/unknown RID (bad_request_id)") + 00136 msg); 00137 break; 00138 } 00139 } 00140 } 00141 00142 00143 00144 // CBlast4Option methods 00145 00146 void CRemoteBlast::x_CheckConfig(void) 00147 { 00148 // If not configured, throw an exception - the associated string 00149 // will contain a list of the missing pieces. 00150 00151 if (0 != m_NeedConfig) { 00152 string cfg("Configuration required:"); 00153 00154 if (eProgram & m_NeedConfig) { 00155 cfg += " <program>"; 00156 } 00157 00158 if (eService & m_NeedConfig) { 00159 cfg += " <service>"; 00160 } 00161 00162 if (eQueries & m_NeedConfig) { 00163 cfg += " <queries>"; 00164 } 00165 00166 if (eSubject & m_NeedConfig) { 00167 cfg += " <subject>"; 00168 } 00169 00170 NCBI_THROW(CRemoteBlastException, eIncompleteConfig, cfg); 00171 } 00172 } 00173 00174 CRef<objects::CBlast4_request> 00175 CRemoteBlast::GetSearchStrategy() 00176 { 00177 CRef<CBlast4_request_body> body(x_GetBlast4SearchRequestBody()); 00178 x_CheckConfig(); 00179 string errors(GetErrors()); 00180 if ( !errors.empty() ) { 00181 NCBI_THROW(CRemoteBlastException, eIncompleteConfig, errors); 00182 } 00183 CRef<CBlast4_request> retval(new CBlast4_request); 00184 if ( !m_ClientId.empty() ) { 00185 retval->SetIdent(m_ClientId); 00186 } 00187 retval->SetBody(*body); 00188 return retval; 00189 } 00190 00191 CRef<objects::CBlast4_reply> 00192 CRemoteBlast::x_SendRequest(CRef<objects::CBlast4_request_body> body) 00193 { 00194 // If not configured, throw. 00195 x_CheckConfig(); 00196 00197 // Create the request; optionally echo it 00198 00199 CRef<CBlast4_request> request(new CBlast4_request); 00200 if ( !m_ClientId.empty() ) { 00201 request->SetIdent(m_ClientId); 00202 } 00203 request->SetBody(*body); 00204 00205 if (eDebug == m_Verbose) { 00206 NcbiCout << MSerial_AsnText << *request << endl; 00207 } 00208 00209 // submit to server, get reply; optionally echo it 00210 00211 CRef<CBlast4_reply> reply(new CBlast4_reply); 00212 00213 try { 00214 CStopWatch sw(CStopWatch::eStart); 00215 00216 if (eDebug == m_Verbose) { 00217 NcbiCout << "Starting network transaction (" << sw.Elapsed() << ")" << endl; 00218 } 00219 00220 CBlast4Client().Ask(*request, *reply); 00221 00222 if (eDebug == m_Verbose) { 00223 NcbiCout << "Done network transaction (" << sw.Elapsed() << ")" << endl; 00224 } 00225 } 00226 catch(const CEofException&) { 00227 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 00228 "No response from server, cannot complete request."); 00229 } 00230 00231 if (eDebug == m_Verbose) { 00232 NcbiCout << MSerial_AsnText << *reply << endl; 00233 } 00234 00235 return reply; 00236 } 00237 00238 CRef<objects::CBlast4_reply> 00239 CRemoteBlast::x_GetSearchResults(void) 00240 { 00241 CRef<CBlast4_get_search_results_request> 00242 gsrr(new CBlast4_get_search_results_request); 00243 00244 gsrr->SetRequest_id(m_RID); 00245 00246 CRef<CBlast4_request_body> body(new CBlast4_request_body); 00247 body->SetGet_search_results(*gsrr); 00248 00249 return x_SendRequest(body); 00250 } 00251 00252 // Pre: start, wait, or done 00253 // Post: failed or done 00254 00255 // Returns: true if done 00256 00257 bool CRemoteBlast::SubmitSync(int seconds) 00258 { 00259 // eFailed: no work to do, already an error. 00260 // eDone: already done, just return. 00261 00262 EImmediacy immed = ePollAsync; 00263 00264 switch(x_GetState()) { 00265 case eStart: 00266 x_SubmitSearch(); 00267 if (! m_Errs.empty()) { 00268 break; 00269 } 00270 immed = ePollImmed; 00271 // fall through 00272 00273 case eWait: 00274 x_PollUntilDone(immed, seconds); 00275 break; 00276 default: 00277 break; 00278 } 00279 00280 return (x_GetState() == eDone); 00281 } 00282 00283 00284 00285 // Pre: start 00286 // Post: failed, wait or done 00287 00288 // Returns: true if no error so far 00289 00290 bool CRemoteBlast::Submit(void) 00291 { 00292 switch(x_GetState()) { 00293 case eStart: 00294 x_SubmitSearch(); 00295 default: break; 00296 } 00297 00298 return m_Errs.empty(); 00299 } 00300 00301 // 00302 // The following table summarizes how to determine the status of a given 00303 // RID/search submission: 00304 // 00305 // | CheckDone() | CheckDone() 00306 // | returns true | returns false 00307 // ------------------------------------------------------------ 00308 // GetErrors() == kEmptyStr | DONE | PENDING 00309 // ------------------------------------------------------------ 00310 // GetErrors() != kEmptyStr | FAILED | UNKNOWN RID 00311 // ------------------------------------------------------------ 00312 // 00313 CRemoteBlast::ESearchStatus 00314 CRemoteBlast::CheckStatus() 00315 { 00316 ESearchStatus retval = eStatus_Unknown; 00317 00318 bool done = CheckDone(); 00319 string errors = GetErrors(); 00320 00321 if (done && errors == kEmptyStr) { 00322 retval = eStatus_Done; 00323 } else if (!done && errors == kEmptyStr) { 00324 retval = eStatus_Pending; 00325 } else if (!done && errors.find("bad_request_id") != NPOS) { 00326 retval = eStatus_Unknown; 00327 } else if (done && errors != kEmptyStr) { 00328 retval = eStatus_Failed; 00329 } 00330 return retval; 00331 } 00332 00333 bool CRemoteBlast::x_IsUnknownRID(void) 00334 { 00335 bool retval = false; 00336 if (NStr::Find(GetErrors(), "bad_request_id") != NPOS) { 00337 retval = true; 00338 } 00339 return retval; 00340 } 00341 00342 // Pre: start, wait or done 00343 // Post: wait, done, or failed 00344 00345 // Returns: true if done 00346 00347 bool CRemoteBlast::CheckDone(void) 00348 { 00349 switch(x_GetState()) { 00350 case eFailed: 00351 case eDone: 00352 break; 00353 00354 case eStart: 00355 Submit(); 00356 break; 00357 00358 case eWait: 00359 if( m_use_disk_cache ) x_CheckResultsDC(); else x_CheckResults(); 00360 } 00361 00362 int state = x_GetState(); 00363 return (state == eDone || (state == eFailed && !x_IsUnknownRID())); 00364 } 00365 00366 CRemoteBlast::TGSRR * CRemoteBlast::x_GetGSRR(void) 00367 { 00368 TGSRR* rv = NULL; 00369 00370 if (m_ReadFile) 00371 { 00372 rv = &(m_Archive->SetResults()); 00373 } 00374 else if (SubmitSync() && 00375 m_Reply.NotEmpty() && 00376 m_Reply->CanGetBody() && 00377 m_Reply->GetBody().IsGet_search_results()) { 00378 00379 rv = & (m_Reply->SetBody().SetGet_search_results()); 00380 } 00381 00382 return rv; 00383 } 00384 00385 CRef<objects::CSeq_align_set> CRemoteBlast::GetAlignments(void) 00386 { 00387 CRef<CSeq_align_set> rv; 00388 00389 TGSRR * gsrr = x_GetGSRR(); 00390 00391 if (gsrr && gsrr->CanGetAlignments()) { 00392 rv = & (gsrr->SetAlignments()); 00393 } 00394 00395 return rv; 00396 } 00397 00398 TSeqAlignVector CRemoteBlast::GetSeqAlignSets() 00399 { 00400 CRef<CSeq_align_set> al = GetAlignments(); 00401 00402 TSeqAlignVector rv; 00403 00404 CRef<CSeq_align_set> cur_set; 00405 CConstRef<CSeq_id> current_id; 00406 00407 // this loop groups all matches to one target sequences in one vector element. 00408 TSeqAlignVector temp; 00409 00410 if (al.NotEmpty()) 00411 { 00412 ITERATE(CSeq_align_set::Tdata, it, al->Get()) { 00413 // index 0 = query, index 1 = subject 00414 const int query_index = 0; 00415 CConstRef<CSeq_id> this_id( & (*it)->GetSeq_id(query_index) ); 00416 00417 if (current_id.Empty() || (CSeq_id::e_YES != this_id->Compare(*current_id))) { 00418 if (cur_set.NotEmpty()) { 00419 temp.push_back(cur_set); 00420 } 00421 cur_set.Reset(new CSeq_align_set); 00422 current_id = this_id; 00423 } 00424 cur_set->Set().push_back(*it); 00425 } 00426 } 00427 00428 if (cur_set.NotEmpty()) { 00429 temp.push_back(cur_set); 00430 } 00431 00432 CSearchResultSet::TQueryIdVector query_ids; 00433 x_ExtractQueryIds(query_ids); 00434 00435 // Fill out the return value, with empty Seq-align-set if not match for a query. 00436 TSeqAlignVector::size_type sap_index = 0; 00437 ITERATE(CSearchResultSet::TQueryIdVector, it, query_ids) { 00438 const int query_index = 0; 00439 if (sap_index < temp.size()) 00440 { 00441 list< CRef< CSeq_align > > sal = temp[sap_index]->Get(); 00442 CConstRef<CSeq_id> this_id( & (sal.front()->GetSeq_id(query_index) )); 00443 if (CSeq_id::e_YES == (*it)->Compare(sal.front()->GetSeq_id(query_index) )) 00444 { 00445 rv.push_back(temp[sap_index]); 00446 sap_index++; 00447 } 00448 else 00449 { 00450 cur_set.Reset(new CSeq_align_set); 00451 rv.push_back(cur_set); 00452 } 00453 } 00454 else 00455 { 00456 cur_set.Reset(new CSeq_align_set); 00457 rv.push_back(cur_set); 00458 } 00459 } 00460 00461 return rv; 00462 } 00463 00464 CRef<objects::CBlast4_phi_alignments> CRemoteBlast::GetPhiAlignments(void) 00465 { 00466 CRef<CBlast4_phi_alignments> rv; 00467 00468 TGSRR * gsrr = x_GetGSRR(); 00469 00470 if (gsrr && gsrr->CanGetPhi_alignments()) { 00471 rv = & (gsrr->SetPhi_alignments()); 00472 } 00473 00474 return rv; 00475 } 00476 00477 // N.B.: this function assumes that the BLAST 4 server sends the query masked 00478 // locations for each query adjacent to one another in the list of masks (i.e.: 00479 // masks-for-query1-frameA, masks-for-query1-frameB, ..., 00480 // masks-for-query2-frameA, masks-for-query2-frameB, ... etc). 00481 TSeqLocInfoVector 00482 CRemoteBlast::GetMasks(void) 00483 { 00484 TSeqLocInfoVector retval; 00485 retval.resize(GetQueries()->GetNumQueries()); 00486 00487 TGSRR::TMasks network_masks = x_GetMasks(); 00488 if (network_masks.empty()) { 00489 return retval; 00490 } 00491 00492 EBlastProgramType program = NetworkProgram2BlastProgramType(m_Program, 00493 m_Service); 00494 CConstRef<CSeq_id> previous_seqid; 00495 size_t query_index = 0; 00496 00497 ITERATE(TGSRR::TMasks, masks_for_frame, network_masks) { 00498 00499 _ASSERT(masks_for_frame->NotEmpty()); 00500 00501 CConstRef<CSeq_id> current_seqid 00502 ((*masks_for_frame)->GetLocations().front()->GetId()); 00503 if (previous_seqid.Empty()) { 00504 previous_seqid = current_seqid; 00505 } 00506 00507 // determine which query are we setting the masks for... 00508 TMaskedQueryRegions* mqr = NULL; 00509 if (CSeq_id::e_YES == current_seqid->Compare(*previous_seqid)) { 00510 mqr = &retval[query_index]; 00511 } else { 00512 mqr = &retval[++query_index]; 00513 previous_seqid = current_seqid; 00514 } 00515 00516 // all the masks for a given query and frame are in a single 00517 // Packed-seqint 00518 _ASSERT((*masks_for_frame)->GetLocations().size() == (size_t) 1); 00519 _ASSERT((*masks_for_frame)->GetLocations().front().NotEmpty()); 00520 CRef<CSeq_loc> masks = 00521 (*masks_for_frame)->GetLocations().front(); 00522 _ASSERT(masks->IsPacked_int()); 00523 00524 const CPacked_seqint& packed_int = masks->GetPacked_int(); 00525 const EBlast4_frame_type frame = (*masks_for_frame)->GetFrame(); 00526 ITERATE(CPacked_seqint::Tdata, mask, packed_int.Get()) { 00527 CRef<CSeq_interval> si 00528 (new CSeq_interval(const_cast<CSeq_id&>((*mask)->GetId()), 00529 (*mask)->GetFrom(), (*mask)->GetTo())); 00530 CRef<CSeqLocInfo> sli 00531 (new CSeqLocInfo(si, NetworkFrame2FrameNumber(frame, program))); 00532 mqr->push_back(sli); 00533 } 00534 } 00535 00536 // _ASSERT(query_index == GetQueries()->GetNumQueries() - 1); 00537 00538 return retval; 00539 } 00540 00541 CRemoteBlast::TGSRR::TMasks CRemoteBlast::x_GetMasks(void) 00542 { 00543 TGSRR::TMasks rv; 00544 00545 TGSRR * gsrr = x_GetGSRR(); 00546 00547 if (gsrr && gsrr->CanGetMasks()) { 00548 rv = gsrr->SetMasks(); 00549 } 00550 00551 return rv; 00552 } 00553 00554 list< CRef<objects::CBlast4_ka_block > > CRemoteBlast::GetKABlocks(void) 00555 { 00556 list< CRef<CBlast4_ka_block > > rv; 00557 00558 TGSRR * gsrr = x_GetGSRR(); 00559 00560 if (gsrr && gsrr->CanGetKa_blocks()) { 00561 rv = (gsrr->SetKa_blocks()); 00562 } 00563 00564 return rv; 00565 } 00566 00567 list< string > CRemoteBlast::GetSearchStats(void) 00568 { 00569 list< string > rv; 00570 00571 TGSRR * gsrr = x_GetGSRR(); 00572 00573 if (gsrr && gsrr->CanGetSearch_stats()) { 00574 rv = (gsrr->SetSearch_stats()); 00575 } 00576 00577 return rv; 00578 } 00579 00580 CRef<objects::CPssmWithParameters> CRemoteBlast::GetPSSM(void) 00581 { 00582 CRef<CPssmWithParameters> rv; 00583 00584 TGSRR * gsrr = x_GetGSRR(); 00585 00586 if (gsrr && gsrr->CanGetPssm()) { 00587 rv = & (gsrr->SetPssm()); 00588 } 00589 00590 return rv; 00591 } 00592 00593 00594 // Internal CRemoteBlast methods 00595 00596 CRemoteBlast::EState CRemoteBlast::x_GetState(void) 00597 { 00598 // CBlast4Option states: 00599 00600 // 0. start (no rid, no errors) 00601 // 1. failed (errors) 00602 // 2. wait (has rid, no errors, still pending) 00603 // 3. done (has rid, no errors, not pending) 00604 00605 EState rv = eDone; 00606 00607 if (! m_Errs.empty()) { 00608 rv = eFailed; 00609 } else if (m_RID.empty()) { 00610 rv = eStart; 00611 } else if (m_Pending) { 00612 rv = eWait; 00613 } 00614 00615 return rv; 00616 } 00617 00618 CRef<objects::CBlast4_request_body> 00619 CRemoteBlast::x_GetBlast4SearchRequestBody() 00620 { 00621 CRef<CBlast4_request_body> retval; 00622 00623 if (m_QSR.Empty()) { 00624 m_Errs.push_back("No request exists and no RID was specified."); 00625 return retval; 00626 } 00627 00628 x_SetAlgoOpts(); 00629 x_QueryMaskingLocationsToNetwork(); 00630 00631 retval.Reset(new CBlast4_request_body); 00632 retval->SetQueue_search(*m_QSR); 00633 return retval; 00634 } 00635 00636 void CRemoteBlast::x_SubmitSearch(void) 00637 { 00638 CRef<CBlast4_request_body> body(x_GetBlast4SearchRequestBody()); 00639 CRef<CBlast4_reply> reply; 00640 00641 try { 00642 reply = x_SendRequest(body); 00643 } 00644 catch(const CEofException&) { 00645 m_Errs.push_back("No response from server, cannot complete request."); 00646 return; 00647 } 00648 00649 if (reply->CanGetBody() && 00650 reply->GetBody().GetQueue_search().CanGetRequest_id()) { 00651 00652 m_RID = reply->GetBody().GetQueue_search().GetRequest_id(); 00653 } 00654 00655 x_SearchErrors(reply); 00656 00657 if (m_Errs.empty()) { 00658 m_Pending = true; 00659 } 00660 } 00661 00662 void CRemoteBlast::x_CheckResults(void) 00663 { 00664 if (! m_Errs.empty()) { 00665 m_Pending = false; 00666 } 00667 00668 if (! m_Pending) { 00669 return; 00670 } 00671 00672 CRef<CBlast4_reply> r; 00673 00674 bool try_again = true; 00675 00676 while(try_again) { 00677 try { 00678 r = x_GetSearchResults(); 00679 m_Pending = s_SearchPending(r); 00680 try_again = false; 00681 } 00682 catch(const CEofException&) { 00683 --m_ErrIgn; 00684 00685 if (m_ErrIgn == 0) { 00686 m_Errs.push_back("No response from server, " 00687 "cannot complete request."); 00688 return; 00689 } 00690 00691 SleepSec(10); 00692 } 00693 } 00694 00695 if (! m_Pending) { 00696 x_SearchErrors(r); 00697 00698 if (! m_Errs.empty()) { 00699 return; 00700 } else if (r->CanGetBody() && r->GetBody().IsGet_search_results()) { 00701 m_Reply = r; 00702 } else { 00703 m_Errs.push_back("Results were not a get-search-results reply"); 00704 } 00705 } 00706 } 00707 00708 // The input here is a hint as to whether the request might be ready. 00709 // If the flag is true, then we are polling immediately after 00710 // submission. In this case, the results will not be ready, and so we 00711 // skip the first results check to reduce net traffic. If the flag is 00712 // false, then the user is using the asynchronous interface, and we do 00713 // not know how long it has been since the request was submitted. In 00714 // this case, we check the results before sleeping. 00715 // 00716 // If this was always set to 'true' then async mode would -always- 00717 // sleep. This is undesireable in the case where (for example) 100 00718 // requests are batched together - the mandatory sleeps would add to a 00719 // total of 1000 seconds, more than a quarter hour. 00720 // 00721 // If it were always specified as 'false', then synchronous mode would 00722 // shoot off an immediate 'check results' as soon as the "submit" 00723 // returned, which creates unnecessary traffic. 00724 // 00725 // Futher optimizations are no doubt possible. 00726 00727 void CRemoteBlast::x_PollUntilDone(EImmediacy immed, int timeout) 00728 { 00729 if (eDebug == m_Verbose) 00730 cout << "polling " << 0 << endl; 00731 00732 // Configuration - internal for now 00733 00734 double start_sec = 10.0; 00735 double increment = 1.30; 00736 double max_sleep = 300.0; 00737 double max_time = timeout; 00738 00739 if (eDebug == m_Verbose) 00740 cout << "polling " << start_sec << "/" << increment << "/" << max_sleep << "/" << max_time << "/" << endl; 00741 00742 // End config 00743 00744 double sleep_next = start_sec; 00745 double sleep_totl = 0.0; 00746 00747 if (eDebug == m_Verbose) 00748 cout << "line " << __LINE__ << " sleep next " << sleep_next << " sleep totl " << sleep_totl << endl; 00749 00750 if (ePollAsync == immed) { 00751 if( m_use_disk_cache ) x_CheckResultsDC(); else x_CheckResults(); 00752 } 00753 00754 while (m_Pending && (sleep_totl < max_time)) { 00755 if (eDebug == m_Verbose) 00756 cout << " about to sleep " << sleep_next << endl; 00757 00758 double max_left = max_time - sleep_totl; 00759 00760 // Don't oversleep 00761 if (sleep_next > max_left) { 00762 sleep_next = max_left; 00763 00764 // But never sleep less than 2 00765 if (sleep_next < 2.0) 00766 sleep_next = 2.0; 00767 } 00768 00769 SleepSec(int(sleep_next)); 00770 sleep_totl += sleep_next; 00771 00772 if (eDebug == m_Verbose) 00773 cout << " done, total = " << sleep_totl << endl; 00774 00775 if (sleep_next < max_sleep) { 00776 sleep_next *= increment; 00777 if (sleep_next > max_sleep) { 00778 sleep_next = max_sleep; 00779 } 00780 } 00781 00782 if (eDebug == m_Verbose) 00783 cout << " next sleep time = " << sleep_next << endl; 00784 00785 if( m_use_disk_cache ) x_CheckResultsDC(); else x_CheckResults(); 00786 } 00787 } 00788 00789 void CRemoteBlast::x_Init(CNcbiIstream& f) 00790 { 00791 00792 // m_Archive.Reset(new CBlast4_archive); 00793 CFormatGuess::EFormat fmt_type = ncbi::CFormatGuess().Format(f); 00794 switch (fmt_type) { 00795 case CFormatGuess::eBinaryASN: 00796 m_ObjectStream.reset(new CObjectIStreamAsnBinary(f)); 00797 break; 00798 00799 case CFormatGuess::eTextASN: 00800 m_ObjectStream.reset(new CObjectIStreamAsn(f)); 00801 break; 00802 00803 /* What's up here? 00804 case CFormatGuess::eXml: 00805 m_ObjectStream.reset(new CObjectIStreamXml(f)); 00806 break; 00807 */ 00808 00809 default: 00810 NCBI_THROW(CBlastException, eInvalidArgument, 00811 "BLAST archive must be one of text ASN.1, binary ASN.1 or XML."); 00812 } 00813 m_ReadFile = true; 00814 m_ObjectType = fmt_type; 00815 m_ErrIgn = 5; 00816 m_Verbose = eSilent; 00817 m_DbFilteringAlgorithmId = -1; 00818 } 00819 00820 void CRemoteBlast::x_Init(CBlastOptionsHandle * opts) 00821 { 00822 string p; 00823 string s; 00824 opts->GetOptions().GetRemoteProgramAndService_Blast3(p, s); 00825 00826 x_Init(opts, p, s); 00827 } 00828 00829 void CRemoteBlast::x_Init(CBlastOptionsHandle * opts_handle, 00830 const string & program, 00831 const string & service) 00832 { 00833 if ((! opts_handle) || program.empty() || service.empty()) { 00834 if (! opts_handle) { 00835 NCBI_THROW(CBlastException, eInvalidArgument, 00836 "NULL argument specified: options handle"); 00837 } 00838 if (program.empty()) { 00839 NCBI_THROW(CBlastException, eInvalidArgument, 00840 "NULL argument specified: program"); 00841 } 00842 NCBI_THROW(CBlastException, eInvalidArgument, 00843 "NULL argument specified: service"); 00844 } 00845 00846 m_CBOH.Reset( opts_handle ); 00847 m_ErrIgn = 5; 00848 m_Pending = false; 00849 m_Verbose = eSilent; 00850 m_NeedConfig = eNeedAll; 00851 m_QueryMaskingLocations.clear(); 00852 m_ReadFile = false; 00853 m_DbFilteringAlgorithmId = -1; 00854 00855 m_QSR.Reset(new CBlast4_queue_search_request); 00856 00857 m_QSR->SetProgram(m_Program = program); 00858 m_QSR->SetService(m_Service = service); 00859 00860 m_NeedConfig = ENeedConfig(m_NeedConfig & ~(eProgram | eService)); 00861 00862 if (! (opts_handle && opts_handle->SetOptions().GetBlast4AlgoOpts())) { 00863 // This happens if you do not specify eRemote for the 00864 // CBlastOptions subclass constructor. 00865 00866 NCBI_THROW(CBlastException, eInvalidArgument, 00867 "CRemoteBlast: No remote API options."); 00868 } 00869 m_ClientId = kEmptyStr; 00870 } 00871 00872 void CRemoteBlast::x_Init(const string & RID) 00873 { 00874 if (RID.empty()) { 00875 NCBI_THROW(CBlastException, eInvalidArgument, 00876 "Empty RID string specified"); 00877 } 00878 00879 m_RID = RID; 00880 m_ErrIgn = 5; 00881 m_Pending = true; 00882 m_Verbose = eSilent; 00883 m_NeedConfig = eNoConfig; 00884 m_QueryMaskingLocations.clear(); 00885 m_ReadFile = false; 00886 m_DbFilteringAlgorithmId = -1; 00887 } 00888 00889 void CRemoteBlast::x_SetAlgoOpts(void) 00890 { 00891 CBlast4_parameters * algo_opts = 00892 m_CBOH->SetOptions().GetBlast4AlgoOpts(); 00893 00894 m_QSR->SetAlgorithm_options().Set() = *algo_opts; 00895 } 00896 00897 // the "int" version is not actually used (no program options need it.) 00898 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field, 00899 const int * x) 00900 { 00901 CRef<CBlast4_value> v(new CBlast4_value); 00902 v->SetInteger(*x); 00903 00904 CRef<CBlast4_parameter> p(new CBlast4_parameter); 00905 p->SetName(field.GetName()); 00906 p->SetValue(*v); 00907 _ASSERT(field.Match(*p)); 00908 00909 m_QSR->SetProgram_options().Set().push_back(p); 00910 } 00911 00912 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field, 00913 CRef<objects::CBlast4_mask> mask) 00914 { 00915 CRef<CBlast4_value> v(new CBlast4_value); 00916 v->SetQuery_mask(*mask); 00917 00918 CRef<CBlast4_parameter> p(new CBlast4_parameter); 00919 // as dictated by internal/blast/interfaces/blast4/params.hpp 00920 p->SetName(field.GetName()); 00921 p->SetValue(*v); 00922 _ASSERT(field.Match(*p)); 00923 00924 m_QSR->SetProgram_options().Set().push_back(p); 00925 } 00926 00927 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field, 00928 const list<int> * x) 00929 { 00930 CRef<CBlast4_value> v(new CBlast4_value); 00931 v->SetInteger_list() = *x; 00932 00933 CRef<CBlast4_parameter> p(new CBlast4_parameter); 00934 p->SetName(field.GetName()); 00935 p->SetValue(*v); 00936 _ASSERT(field.Match(*p)); 00937 00938 m_QSR->SetProgram_options().Set().push_back(p); 00939 } 00940 00941 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field, 00942 const char ** x) 00943 { 00944 CRef<CBlast4_value> v(new CBlast4_value); 00945 v->SetString().assign((x && (*x)) ? (*x) : ""); 00946 00947 CRef<CBlast4_parameter> p(new CBlast4_parameter); 00948 p->SetName(field.GetName()); 00949 p->SetValue(*v); 00950 _ASSERT(field.Match(*p)); 00951 00952 m_QSR->SetProgram_options().Set().push_back(p); 00953 } 00954 00955 void CRemoteBlast::SetQueries(CRef<objects::CBioseq_set> bioseqs) 00956 { 00957 if (bioseqs.Empty()) { 00958 NCBI_THROW(CBlastException, eInvalidArgument, 00959 "Empty reference for query."); 00960 } 00961 00962 m_Queries.Reset(new CBlast4_queries); 00963 m_Queries->SetBioseq_set(*bioseqs); 00964 00965 m_QSR->SetQueries(*m_Queries); 00966 m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eQueries)); 00967 } 00968 00969 void CRemoteBlast::SetQueries(CRef<objects::CBioseq_set> bioseqs, 00970 const TSeqLocInfoVector& masking_locations) 00971 { 00972 SetQueries(bioseqs); 00973 x_SetMaskingLocationsForQueries(masking_locations); 00974 } 00975 00976 void CRemoteBlast::SetQueryMasks(const TSeqLocInfoVector& masking_locations) 00977 { 00978 if (!m_QSR->IsSetQueries()) 00979 { 00980 NCBI_THROW(CBlastException, eInvalidArgument, 00981 "Queries must be set before setting the masks."); 00982 } 00983 x_SetMaskingLocationsForQueries(masking_locations); 00984 } 00985 00986 void CRemoteBlast::SetQueries(CRemoteBlast::TSeqLocList& seqlocs) 00987 { 00988 if (seqlocs.empty()) { 00989 NCBI_THROW(CBlastException, eInvalidArgument, 00990 "Empty list for query."); 00991 } 00992 00993 m_Queries.Reset(new CBlast4_queries); 00994 m_Queries->SetSeq_loc_list() = seqlocs; 00995 00996 m_QSR->SetQueries(*m_Queries); 00997 m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eQueries)); 00998 } 00999 01000 void CRemoteBlast::SetQueries(CRemoteBlast::TSeqLocList& seqlocs, 01001 const TSeqLocInfoVector& masking_locations) 01002 { 01003 SetQueries(seqlocs); 01004 x_SetMaskingLocationsForQueries(masking_locations); 01005 } 01006 01007 void 01008 CRemoteBlast::x_SetMaskingLocationsForQueries(const TSeqLocInfoVector& 01009 masking_locations) 01010 { 01011 _ASSERT(m_QSR->CanGetQueries()); 01012 if (masking_locations.empty()) { 01013 return; 01014 } 01015 01016 if (m_QSR->GetQueries().GetNumQueries() != masking_locations.size()) { 01017 CNcbiOstrstream oss; 01018 oss << "Mismatched number of queries (" 01019 << m_QSR->GetQueries().GetNumQueries() 01020 << ") and masking locations (" << masking_locations.size() << ")"; 01021 NCBI_THROW(CBlastException, eInvalidArgument, 01022 CNcbiOstrstreamToString(oss)); 01023 } 01024 01025 m_QueryMaskingLocations = const_cast<TSeqLocInfoVector&>(masking_locations); 01026 } 01027 01028 /** Creates a Blast4-mask which is supposed to contain all masked locations for 01029 * a given query sequence and frame, all of which are in the packed_int 01030 * argument. 01031 */ 01032 static CRef<CBlast4_mask> 01033 s_CreateBlastMask(const CPacked_seqint& packed_int, EBlastProgramType program) 01034 { 01035 CRef<CBlast4_mask> retval(new CBlast4_mask); 01036 01037 CRef<CSeq_loc> seqloc(new CSeq_loc); 01038 ITERATE(CPacked_seqint::Tdata, masked_region, packed_int.Get()) { 01039 CRef<CSeq_interval> seqint 01040 (new CSeq_interval(const_cast<CSeq_id&>((*masked_region)->GetId()), 01041 (*masked_region)->GetFrom(), 01042 (*masked_region)->GetTo())); 01043 if ((*masked_region)->CanGetStrand() && 01044 (*masked_region)->GetStrand() == eNa_strand_minus) { 01045 // skip this as locations on the negative strand are not 01046 // represented in the remote masking locations 01047 continue; 01048 } 01049 seqloc->SetPacked_int().Set().push_back(seqint); 01050 } 01051 retval->SetLocations().push_back(seqloc); 01052 01053 /// The frame can only be notset for protein queries or plus1 for 01054 /// nucleotide queries 01055 EBlast4_frame_type frame = 01056 (Blast_QueryIsNucleotide(program) || Blast_QueryIsTranslated(program)) 01057 ? eBlast4_frame_type_plus1 01058 : eBlast4_frame_type_notset; 01059 retval->SetFrame(frame); 01060 01061 return retval; 01062 } 01063 01064 CBlast4_get_search_results_reply::TMasks 01065 CRemoteBlast::ConvertToRemoteMasks(const TSeqLocInfoVector& masking_locations, 01066 EBlastProgramType program, 01067 vector<string>* warnings /* = NULL */) 01068 { 01069 CBlast4_get_search_results_reply::TMasks retval; 01070 01071 ITERATE(TSeqLocInfoVector, query_masks, masking_locations) { 01072 CRef<CPacked_seqint> packed_seqint(new CPacked_seqint); 01073 01074 if (query_masks->empty()) { 01075 continue; 01076 } 01077 01078 int current_frame = query_masks->front()->GetFrame(); 01079 ITERATE(TMaskedQueryRegions, mask_locs, *query_masks) { 01080 if (Blast_QueryIsTranslated(program) && current_frame != (*mask_locs)->GetFrame()) 01081 { 01082 if (!packed_seqint.Empty()) 01083 { 01084 CRef<CBlast4_mask> network_mask = s_CreateBlastMask(*packed_seqint, program); 01085 network_mask->SetFrame(FrameNumber2NetworkFrame(current_frame, program)); 01086 retval.push_back(network_mask); 01087 } 01088 current_frame = (*mask_locs)->GetFrame(); 01089 packed_seqint.Reset(new CPacked_seqint); 01090 } 01091 01092 packed_seqint->AddInterval((*mask_locs)->GetSeqId(), 01093 (*mask_locs)->GetInterval().GetFrom(), 01094 (*mask_locs)->GetInterval().GetTo()); 01095 } 01096 01097 if (!packed_seqint.Empty()) 01098 { 01099 CRef<CBlast4_mask> network_mask = s_CreateBlastMask(*packed_seqint, program); 01100 if (Blast_QueryIsTranslated(program)) 01101 network_mask->SetFrame(FrameNumber2NetworkFrame(current_frame, program)); 01102 retval.push_back(network_mask); 01103 } 01104 packed_seqint.Reset(); 01105 } 01106 return retval; 01107 } 01108 // Puts in each Blast4-mask all the masks that correspond to the same query 01109 // and the same frame. 01110 void 01111 CRemoteBlast::x_QueryMaskingLocationsToNetwork() 01112 { 01113 if (m_QueryMaskingLocations.empty()) { 01114 return; 01115 } 01116 01117 m_CBOH->GetOptions().GetRemoteProgramAndService_Blast3(m_Program, 01118 m_Service); 01119 EBlastProgramType program = NetworkProgram2BlastProgramType(m_Program, 01120 m_Service); 01121 01122 const CBlast4_get_search_results_reply::TMasks& network_masks = 01123 CRemoteBlast::ConvertToRemoteMasks(m_QueryMaskingLocations, 01124 program, &m_Warn); 01125 ITERATE(CBlast4_get_search_results_reply::TMasks, itr, network_masks) { 01126 x_SetOneParam(B4Param_LCaseMask, *itr); 01127 } 01128 01129 } 01130 01131 void CRemoteBlast::SetQueries(CRef<objects::CPssmWithParameters> pssm) 01132 { 01133 if (pssm.Empty()) { 01134 NCBI_THROW(CBlastException, eInvalidArgument, 01135 "Empty reference for query pssm."); 01136 } 01137 01138 CPsiBlastValidate::Pssm(*pssm); 01139 01140 string psi_program("blastp"); 01141 string old_service("plain"); 01142 string new_service("psi"); 01143 string delta_service("delta_blast"); 01144 01145 if (m_QSR->GetProgram() != psi_program) { 01146 NCBI_THROW(CBlastException, eNotSupported, 01147 "PSI-Blast is only supported for blastp."); 01148 } 01149 01150 if (m_QSR->GetService().empty()) { 01151 NCBI_THROW(CBlastException, eInvalidArgument, 01152 "Internal error: service is not set."); 01153 } 01154 01155 if ((m_QSR->GetService() != old_service) && 01156 (m_QSR->GetService() != new_service) && 01157 (m_QSR->GetService() != delta_service)) { 01158 01159 // Allowing "psi" allows the matrix to be set, then replaced. 01160 01161 NCBI_THROW(CBlastException, eInvalidArgument, 01162 string("PSI-Blast cannot also be ") + 01163 m_QSR->GetService() + "."); 01164 } 01165 01166 CRef<CBlast4_queries> queries_p(new CBlast4_queries); 01167 queries_p->SetPssm(*pssm); 01168 01169 m_QSR->SetQueries(*queries_p); 01170 m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eQueries)); 01171 01172 if(m_QSR->GetService() != delta_service) { 01173 m_QSR->SetService(new_service); 01174 } 01175 } 01176 01177 string CRemoteBlast::GetErrors(void) 01178 { 01179 if (m_Errs.empty()) { 01180 return string(); 01181 } 01182 01183 string rvalue = m_Errs[0]; 01184 01185 for(unsigned i = 1; i<m_Errs.size(); i++) { 01186 rvalue += "\n"; 01187 rvalue += m_Errs[i]; 01188 } 01189 01190 return rvalue; 01191 } 01192 01193 string CRemoteBlast::GetWarnings(void) 01194 { 01195 if (m_Warn.empty()) { 01196 return string(); 01197 } 01198 01199 string rvalue = m_Warn[0]; 01200 01201 for(unsigned i = 1; i<m_Warn.size(); i++) { 01202 rvalue += "\n"; 01203 rvalue += m_Warn[i]; 01204 } 01205 01206 return rvalue; 01207 } 01208 01209 const vector<string> & CRemoteBlast::GetWarningVector() 01210 { 01211 return m_Warn; 01212 } 01213 01214 const vector<string> & CRemoteBlast::GetErrorVector() 01215 { 01216 return m_Errs; 01217 } 01218 01219 CRemoteBlast::CRemoteBlast(CNcbiIstream& f) 01220 { 01221 x_Init(f); 01222 x_InitDiskCache(); 01223 } 01224 01225 CRemoteBlast::CRemoteBlast(const string & RID) 01226 { 01227 x_Init(RID); 01228 x_InitDiskCache(); 01229 } 01230 01231 CRemoteBlast::CRemoteBlast(CBlastOptionsHandle * algo_opts) 01232 { 01233 x_Init(algo_opts); 01234 x_InitDiskCache(); 01235 } 01236 01237 CRemoteBlast::CRemoteBlast(CRef<IQueryFactory> queries, 01238 CRef<CBlastOptionsHandle> opts_handle, 01239 const CSearchDatabase & db) 01240 { 01241 x_Init(opts_handle, db); 01242 x_InitQueries(queries); 01243 x_InitDiskCache(); 01244 } 01245 01246 void 01247 FlattenBioseqSet(const CBioseq_set & bss, list< CRef<CBioseq> > & seqs) 01248 { 01249 if (bss.CanGetSeq_set()) { 01250 ITERATE(CBioseq_set::TSeq_set, iter, bss.GetSeq_set()) { 01251 if (iter->NotEmpty()) { 01252 const CSeq_entry & entry = **iter; 01253 01254 if (entry.IsSeq()) { 01255 CBioseq & bs = const_cast<CBioseq &>(entry.GetSeq()); 01256 seqs.push_back(CRef<CBioseq>(& bs)); 01257 } else { 01258 _ASSERT(entry.IsSet()); 01259 FlattenBioseqSet(entry.GetSet(), seqs); 01260 } 01261 } 01262 } 01263 } 01264 } 01265 01266 CRemoteBlast::CRemoteBlast(CRef<IQueryFactory> queries, 01267 CRef<CBlastOptionsHandle> opts_handle, 01268 CRef<IQueryFactory> subjects) 01269 { 01270 x_Init(&* opts_handle); 01271 x_InitQueries(queries); 01272 SetSubjectSequences(subjects); 01273 x_InitDiskCache(); 01274 } 01275 01276 void CRemoteBlast::x_InitQueries(CRef<IQueryFactory> queries) 01277 { 01278 if (queries.Empty()) { 01279 NCBI_THROW(CBlastException, 01280 eInvalidArgument, 01281 "Error: No queries specified"); 01282 } 01283 01284 CRef<IRemoteQueryData> Q(queries->MakeRemoteQueryData()); 01285 CRef<CBioseq_set> bss = Q->GetBioseqSet(); 01286 IRemoteQueryData::TSeqLocs sll = Q->GetSeqLocs(); 01287 01288 if (bss.Empty() && sll.empty()) { 01289 NCBI_THROW(CBlastException, 01290 eInvalidArgument, 01291 "Error: No query data."); 01292 } 01293 01294 // Check if there are any range restrictions applied and if local IDs are 01295 // being used to determine how to specify the query sequence(s) 01296 01297 bool has_local_ids = false; 01298 01299 if ( !sll.empty() ) { 01300 // Only one range restriction can be sent in this protocol 01301 if (sll.front()->IsInt()) { 01302 const int kStart((int)sll.front()->GetStart(eExtreme_Positional)); 01303 const int kStop((int)sll.front()->GetStop(eExtreme_Positional)); 01304 const int kRangeLength = kStop - kStart + 1; 01305 01306 _ASSERT(bss->CanGetSeq_set()); 01307 _ASSERT( !bss->GetSeq_set().empty() ); 01308 _ASSERT(bss->GetSeq_set().front()->IsSeq()); 01309 _ASSERT(bss->GetSeq_set().front()->GetSeq().CanGetInst()); 01310 const int kFullLength = 01311 bss->GetSeq_set().front()->GetSeq().GetInst().GetLength(); 01312 01313 if (kFullLength != kRangeLength) { 01314 x_SetOneParam(B4Param_RequiredStart, &kStart); 01315 x_SetOneParam(B4Param_RequiredEnd, &kStop); 01316 } 01317 } 01318 01319 ITERATE(IRemoteQueryData::TSeqLocs, itr, sll) { 01320 if (IsLocalId((*itr)->GetId())) { 01321 has_local_ids = true; 01322 break; 01323 } 01324 } 01325 } 01326 01327 TSeqLocInfoVector user_specified_masks; 01328 x_ExtractUserSpecifiedMasks(queries, user_specified_masks); 01329 01330 if (has_local_ids) { 01331 SetQueries(bss, user_specified_masks); 01332 } else { 01333 SetQueries(sll, user_specified_masks); 01334 } 01335 } 01336 01337 void 01338 CRemoteBlast::x_ExtractUserSpecifiedMasks(CRef<IQueryFactory> query_factory, 01339 TSeqLocInfoVector& masks) 01340 { 01341 masks.clear(); 01342 CObjMgr_QueryFactory* objmgrqf = NULL; 01343 if ( (objmgrqf = dynamic_cast<CObjMgr_QueryFactory*>(&*query_factory))) { 01344 masks = objmgrqf->ExtractUserSpecifiedMasks(); 01345 } 01346 } 01347 01348 CRemoteBlast::CRemoteBlast(CRef<objects::CPssmWithParameters> pssm, 01349 CRef<CBlastOptionsHandle> opts_handle, 01350 const CSearchDatabase & db) 01351 { 01352 if (pssm.Empty()) { 01353 NCBI_THROW(CBlastException, 01354 eInvalidArgument, 01355 "Error: No PSSM specified"); 01356 } 01357 01358 x_Init(opts_handle, db); 01359 01360 SetQueries(pssm); 01361 } 01362 01363 void CRemoteBlast::x_Init(CRef<CBlastOptionsHandle> opts_handle, 01364 const CSearchDatabase & db) 01365 { 01366 if (opts_handle.Empty()) { 01367 NCBI_THROW(CBlastException, 01368 eInvalidArgument, 01369 "Error: No options specified"); 01370 } 01371 01372 if (db.GetDatabaseName().empty()) { 01373 NCBI_THROW(CBlastException, 01374 eInvalidArgument, 01375 "Error: No database specified"); 01376 } 01377 01378 x_Init(&* opts_handle); 01379 01380 SetDatabase(db.GetDatabaseName()); 01381 SetEntrezQuery(db.GetEntrezQueryLimitation().c_str()); 01382 // Set the GI list restriction 01383 {{ 01384 const CSearchDatabase::TGiList& tmplist = db.GetGiListLimitation(); 01385 if ( !tmplist.empty() ) { 01386 list<Int4> gilist; 01387 copy(tmplist.begin(), tmplist.end(), back_inserter(gilist)); 01388 SetGIList(gilist); 01389 } 01390 }} 01391 01392 // Set the negative GI list 01393 {{ 01394 const CSearchDatabase::TGiList& tmplist = 01395 db.GetNegativeGiListLimitation(); 01396 if ( !tmplist.empty() ) { 01397 list<Int4> gilist; 01398 copy(tmplist.begin(), tmplist.end(), back_inserter(gilist)); 01399 SetNegativeGIList(gilist); 01400 } 01401 }} 01402 01403 // Set the filtering algorithms 01404 SetDbFilteringAlgorithmId(db.GetFilteringAlgorithm()); 01405 } 01406 // initialize disk cache support variables 01407 void CRemoteBlast::x_InitDiskCache(void) 01408 { 01409 m_use_disk_cache = false; 01410 m_disk_cache_error_flag = false; 01411 m_disk_cache_error_msg.clear(); 01412 CNcbiEnvironment env; 01413 if( env.Get("BLAST4_DISK_CACHE") != kEmptyStr ) 01414 { 01415 string l_disk_cache_flag = env.Get("BLAST4_DISK_CACHE"); 01416 if( !NStr::CompareNocase(l_disk_cache_flag,"ON") ) 01417 { 01418 m_use_disk_cache = true; 01419 LOG_POST(Info << "CRemoteBlast: DISK CACHE IS ON" ); 01420 } 01421 else{ 01422 LOG_POST(Info << "CRemoteBlast: DISK CACHE IS OFF; KEY: "<<l_disk_cache_flag ); 01423 } 01424 } 01425 else{ 01426 LOG_POST(Info << "CRemoteBlast: DISK CACHE IS OFF; NO ENVIRONMENT SETTINGS FOUND"); 01427 } 01428 } 01429 01430 CRemoteBlast::~CRemoteBlast() 01431 { 01432 } 01433 01434 void CRemoteBlast::SetGIList(const list<Int4> & gi_list) 01435 { 01436 if (gi_list.empty()) { 01437 return; 01438 } else { 01439 NCBI_THROW(CBlastException, eNotSupported, 01440 "Submitting gi lists remotely is currently not supported"); 01441 } 01442 x_SetOneParam(B4Param_GiList, & gi_list); 01443 01444 m_GiList.clear(); 01445 copy(gi_list.begin(), gi_list.end(), back_inserter(m_GiList)); 01446 } 01447 01448 void CRemoteBlast::SetDbFilteringAlgorithmId(int algo_id) 01449 { 01450 if (algo_id == -1) 01451 return; 01452 01453 x_SetOneParam(B4Param_DbFilteringAlgorithmId, &algo_id); 01454 m_DbFilteringAlgorithmId = algo_id; 01455 } 01456 01457 void CRemoteBlast::SetNegativeGIList(const list<Int4> & gi_list) 01458 { 01459 if (gi_list.empty()) { 01460 return; 01461 } else { 01462 NCBI_THROW(CBlastException, eNotSupported, 01463 "Submitting negative gi lists remotely is currently not supported"); 01464 } 01465 x_SetOneParam(B4Param_NegativeGiList, & gi_list); 01466 01467 m_NegativeGiList.clear(); 01468 copy(gi_list.begin(), gi_list.end(), back_inserter(m_NegativeGiList)); 01469 } 01470 01471 void CRemoteBlast::x_SetDatabase(const string & x) 01472 { 01473 EBlast4_residue_type rtype(eBlast4_residue_type_unknown); 01474 01475 if (m_Program == "blastp" || 01476 m_Program == "blastx" || 01477 (m_Program == "tblastn" && m_Service == "rpsblast")) { 01478 01479 rtype = eBlast4_residue_type_protein; 01480 } else { 01481 rtype = eBlast4_residue_type_nucleotide; 01482 } 01483 01484 m_Dbs.Reset(new CBlast4_database); 01485 m_Dbs->SetName(x); 01486 m_Dbs->SetType(rtype); 01487 01488 m_SubjectSequences.clear(); 01489 } 01490 01491 void CRemoteBlast::SetDatabase(const string & x) 01492 { 01493 if (x.empty()) { 01494 NCBI_THROW(CBlastException, eInvalidArgument, 01495 "NULL specified for database."); 01496 } 01497 01498 CRef<CBlast4_subject> subject_p(new CBlast4_subject); 01499 subject_p->SetDatabase(x); 01500 m_QSR->SetSubject(*subject_p); 01501 m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eSubject)); 01502 01503 x_SetDatabase(x); 01504 } 01505 01506 void CRemoteBlast::SetSubjectSequences(CRef<IQueryFactory> subjects) 01507 { 01508 CRef<IRemoteQueryData> Q(subjects->MakeRemoteQueryData()); 01509 CRef<CBioseq_set> bss = Q->GetBioseqSet(); 01510 01511 if (bss.Empty()) { 01512 NCBI_THROW(CBlastException, 01513 eInvalidArgument, 01514 "Error: No query data."); 01515 } 01516 01517 list< CRef<CBioseq> > seqs; 01518 FlattenBioseqSet(*bss, seqs); 01519 01520 SetSubjectSequences(seqs); 01521 } 01522 01523 void 01524 CRemoteBlast::SetSubjectSequences(const list< CRef< objects::CBioseq > > & subj) 01525 { 01526 CRef<CBlast4_subject> subject_p(new CBlast4_subject); 01527 subject_p->SetSequences() = subj; 01528 01529 m_QSR->SetSubject(*subject_p); 01530 m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eSubject)); 01531 01532 x_SetSubjectSequences(subj); 01533 } 01534 01535 void 01536 CRemoteBlast::x_SetSubjectSequences(const list< CRef< objects::CBioseq > > & subj) 01537 { 01538 m_SubjectSequences = subj; 01539 m_Dbs.Reset(); 01540 } 01541 01542 void CRemoteBlast::SetEntrezQuery(const char * x) 01543 { 01544 if (!x) { 01545 NCBI_THROW(CBlastException, eInvalidArgument, 01546 "NULL specified for entrez query."); 01547 } 01548 01549 if (*x) { // Ignore empty strings. 01550 x_SetOneParam(B4Param_EntrezQuery, &x); 01551 m_EntrezQuery.assign(x); 01552 } 01553 } 01554 01555 bool CRemoteBlast::SubmitSync(void) 01556 { 01557 return SubmitSync( x_DefaultTimeout() ); 01558 } 01559 01560 const string & CRemoteBlast::GetRID(void) 01561 { 01562 return m_RID; 01563 } 01564 01565 void CRemoteBlast::SetVerbose(EDebugMode verb) 01566 { 01567 m_Verbose = verb; 01568 } 01569 01570 /// The default timeout is 3.5 hours. 01571 const int CRemoteBlast::x_DefaultTimeout(void) 01572 { 01573 return int(3600*3.5); 01574 } 01575 01576 static const string 01577 kNoRIDSpecified("Cannot fetch query info: No RID was specified."); 01578 01579 static const string 01580 kNoArchiveFile("Cannot fetch query info: No archive file."); 01581 01582 void 01583 CRemoteBlast::x_GetRequestInfo() 01584 { 01585 if(m_ReadFile == true){ 01586 x_GetRequestInfoFromFile(); 01587 } 01588 else{ 01589 x_GetRequestInfoFromRID(); 01590 } 01591 } 01592 01593 bool 01594 CRemoteBlast::LoadFromArchive() 01595 { 01596 if (m_ObjectStream->EndOfData()) 01597 return false; 01598 01599 m_Archive.Reset(new CBlast4_archive); 01600 *m_ObjectStream >> *m_Archive; 01601 x_GetRequestInfoFromFile(); // update info. 01602 01603 return true; 01604 } 01605 01606 01607 void 01608 CRemoteBlast::x_GetRequestInfoFromFile() 01609 { 01610 // Archive file must be present to fetch. 01611 if (!m_Archive || m_Archive.Empty()) { 01612 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 01613 kNoArchiveFile); 01614 } 01615 01616 if (m_Archive->CanGetRequest()) 01617 { 01618 CRef<objects::CBlast4_request> request(&m_Archive->SetRequest()); 01619 CImportStrategy strategy(request); 01620 m_Program = strategy.GetProgram(); 01621 m_Service = strategy.GetService(); 01622 m_CreatedBy = strategy.GetCreatedBy(); 01623 m_Queries = strategy.GetQueries(); 01624 m_AlgoOpts.Reset( & strategy.GetAlgoOptions() ); 01625 m_ProgramOpts.Reset( & strategy.GetProgramOptions() ); 01626 01627 if (strategy.GetSubject()->IsDatabase()) 01628 x_SetDatabase(strategy.GetSubject()->GetDatabase()); 01629 else 01630 m_SubjectSequences = strategy.GetSubject()->SetSequences(); 01631 01632 if(m_Service == "psi") 01633 { 01634 // Would have errored out in CImportStrategy if we can't get queue search 01635 CBlast4_queue_search_request& qs = request->SetBody().SetQueue_search(); 01636 if(qs.CanGetFormat_options()) 01637 m_FormatOpts.Reset(&qs.SetFormat_options()); 01638 } 01639 // Ignore return value, want side effect of setting fields. 01640 GetSearchOptions(); 01641 return; 01642 } 01643 01644 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 01645 "Could not get information from archive file."); 01646 } 01647 01648 void 01649 CRemoteBlast::x_GetRequestInfoFromRID() 01650 { 01651 // Must have an RID to do this. 01652 01653 if (m_RID.empty()) { 01654 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 01655 kNoRIDSpecified); 01656 } 01657 01658 // First... poll until done. 01659 01660 x_PollUntilDone(ePollAsync, x_DefaultTimeout()); 01661 01662 if (x_GetState() != eDone) { 01663 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 01664 "Polling terminated, but search is in incomplete state."); 01665 } 01666 01667 // Build the request 01668 01669 CRef<CBlast4_request_body> body(new CBlast4_request_body); 01670 CRef<CBlast4_request> request(new CBlast4_request); 01671 if ( !m_ClientId.empty() ) { 01672 request->SetIdent(m_ClientId); 01673 } 01674 01675 body->SetGet_request_info().SetRequest_id(m_RID); 01676 request->SetBody(*body); 01677 01678 CRef<CBlast4_reply> reply(new CBlast4_reply); 01679 01680 if (eDebug == m_Verbose) { 01681 NcbiCout << MSerial_AsnText << *request << endl; 01682 } 01683 01684 try { 01685 CStopWatch sw(CStopWatch::eStart); 01686 01687 if (eDebug == m_Verbose) { 01688 NcbiCout << "Starting network transaction (" << sw.Elapsed() << ")" << endl; 01689 } 01690 01691 // Send request. 01692 CBlast4Client().Ask(*request, *reply); 01693 01694 if (eDebug == m_Verbose) { 01695 NcbiCout << "Done network transaction (" << sw.Elapsed() << ")" << endl; 01696 } 01697 } 01698 catch(const CEofException&) { 01699 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 01700 "No response from server, cannot complete request."); 01701 } 01702 01703 if (eDebug == m_Verbose) { 01704 NcbiCout << MSerial_AsnText << *reply << endl; 01705 } 01706 01707 if (reply->CanGetBody()) { 01708 if (reply->GetBody().IsGet_request_info()) { 01709 CRef<CBlast4_get_request_info_reply> grir 01710 (& reply->SetBody().SetGet_request_info()); 01711 01712 if (grir->GetDatabase().GetName() != "n/a") { 01713 m_Dbs.Reset( & grir->SetDatabase() ); 01714 } else { 01715 x_GetSubjects(); 01716 } 01717 01718 m_Program = grir->GetProgram(); 01719 m_Service = grir->GetService(); 01720 m_CreatedBy = grir->GetCreated_by(); 01721 01722 m_Queries .Reset( & grir->SetQueries() ); 01723 m_AlgoOpts .Reset( & grir->SetAlgorithm_options() ); 01724 m_ProgramOpts.Reset( & grir->SetProgram_options() ); 01725 if( grir->IsSetFormat_options() ) 01726 m_FormatOpts.Reset( & grir->SetFormat_options() ); 01727 01728 return; 01729 } 01730 } 01731 01732 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 01733 "Could not get information from search."); 01734 } 01735 01736 01737 CRef<CBlast4_database> 01738 CRemoteBlast::GetDatabases() 01739 { 01740 if (! m_Dbs.Empty()) { 01741 return m_Dbs; 01742 } 01743 01744 x_GetRequestInfo(); 01745 01746 return m_Dbs; 01747 } 01748 01749 bool 01750 CRemoteBlast::IsDbSearch() 01751 { 01752 if (m_Dbs.Empty() && m_SubjectSequences.empty() && m_SubjectSeqLocs.empty()) 01753 x_GetRequestInfo(); 01754 01755 if (! m_Dbs.Empty()) { 01756 return true; 01757 } 01758 return false; 01759 } 01760 01761 list< CRef<objects::CBioseq> > 01762 CRemoteBlast::GetSubjectSequences() 01763 { 01764 if (x_HasRetrievedSubjects()) { 01765 return m_SubjectSequences; 01766 } 01767 01768 x_GetRequestInfo(); 01769 01770 return m_SubjectSequences; 01771 } 01772 01773 CBlast4_subject::TSeq_loc_list 01774 CRemoteBlast::GetSubjectSeqLocs() 01775 { 01776 if (x_HasRetrievedSubjects()) { 01777 return m_SubjectSeqLocs; 01778 } 01779 01780 x_GetRequestInfo(); 01781 01782 return m_SubjectSeqLocs; 01783 } 01784 01785 string 01786 CRemoteBlast::GetProgram() 01787 { 01788 if (! m_Program.empty()) { 01789 return m_Program; 01790 } 01791 01792 x_GetRequestInfo(); 01793 01794 return m_Program; 01795 } 01796 01797 string 01798 CRemoteBlast::GetService() 01799 { 01800 if (! m_Service.empty()) { 01801 return m_Service; 01802 } 01803 01804 x_GetRequestInfo(); 01805 01806 return m_Service; 01807 } 01808 01809 string 01810 CRemoteBlast::GetCreatedBy() 01811 { 01812 if (! m_CreatedBy.empty()) { 01813 return m_CreatedBy; 01814 } 01815 01816 x_GetRequestInfo(); 01817 01818 return m_CreatedBy; 01819 } 01820 01821 CRef<CBlast4_queries> 01822 CRemoteBlast::GetQueries() 01823 { 01824 if (! m_Queries.Empty()) { 01825 return m_Queries; 01826 } 01827 01828 x_GetRequestInfo(); 01829 01830 return m_Queries; 01831 } 01832 01833 EBlastProgramType 01834 NetworkProgram2BlastProgramType(const string& program, const string& service) 01835 { 01836 _ASSERT(!program.empty()); 01837 _ASSERT(!service.empty()); 01838 01839 EBlastProgramType retval = eBlastTypeUndefined; 01840 Int2 rv = BlastProgram2Number(program.c_str(), &retval); 01841 _ASSERT(rv == 0); 01842 rv += 0; // to eliminate compiler warning 01843 _ASSERT(retval != eBlastTypeUndefined); 01844 01845 if (service == "rpsblast") { 01846 01847 if (program == "blastp") { 01848 retval = eBlastTypeRpsBlast; 01849 } else if (program == "tblastn" || program == "blastx") { 01850 retval = eBlastTypeRpsTblastn; 01851 } else { 01852 abort(); 01853 } 01854 01855 } 01856 01857 if (service == "psi") { 01858 _ASSERT(program == "blastp"); 01859 retval = eBlastTypePsiBlast; 01860 } 01861 01862 return retval; 01863 } 01864 01865 01866 EBlast4_frame_type 01867 FrameNumber2NetworkFrame(int frame, EBlastProgramType program) 01868 { 01869 if (Blast_QueryIsTranslated(program)) { 01870 switch (frame) { 01871 case 1: return eBlast4_frame_type_plus1; 01872 case 2: return eBlast4_frame_type_plus2; 01873 case 3: return eBlast4_frame_type_plus3; 01874 case -1: return eBlast4_frame_type_minus1; 01875 case -2: return eBlast4_frame_type_minus2; 01876 case -3: return eBlast4_frame_type_minus3; 01877 default: abort(); 01878 } 01879 _TROUBLE; 01880 } 01881 01882 if (Blast_QueryIsNucleotide(program)) { 01883 _ASSERT(frame == -1 || frame == 1); 01884 // For some reason, the return value here is not set... 01885 return eBlast4_frame_type_notset; 01886 } 01887 01888 return eBlast4_frame_type_notset; 01889 } 01890 01891 CSeqLocInfo::ETranslationFrame 01892 NetworkFrame2FrameNumber(objects::EBlast4_frame_type frame, 01893 EBlastProgramType program) 01894 { 01895 if (Blast_QueryIsTranslated(program)) { 01896 switch (frame) { 01897 case eBlast4_frame_type_plus1: return CSeqLocInfo::eFramePlus1; 01898 case eBlast4_frame_type_plus2: return CSeqLocInfo::eFramePlus2; 01899 case eBlast4_frame_type_plus3: return CSeqLocInfo::eFramePlus3; 01900 case eBlast4_frame_type_minus1: return CSeqLocInfo::eFrameMinus1; 01901 case eBlast4_frame_type_minus2: return CSeqLocInfo::eFrameMinus2; 01902 case eBlast4_frame_type_minus3: return CSeqLocInfo::eFrameMinus3; 01903 default: abort(); 01904 } 01905 _TROUBLE; 01906 } 01907 01908 // The BLAST formatter expects nucleotide masks to have a 'not-set' strand, 01909 // which implies that they're on the plus strand. If they're set to 01910 // anything else, it won't display them. 01911 //if (Blast_QueryIsNucleotide(program)) { 01912 // _ASSERT(frame == eBlast4_frame_type_plus1); 01913 // return CSeqLocInfo::eFramePlus1; 01914 //} 01915 01916 return CSeqLocInfo::eFrameNotSet; 01917 } 01918 01919 CRef<CBlastOptionsHandle> CRemoteBlast::GetSearchOptions() 01920 { 01921 if (m_CBOH.Empty()) { 01922 string program_s = GetProgram(); 01923 string service_s = GetService(); 01924 01925 CBlastOptionsBuilder bob(program_s, service_s, CBlastOptions::eRemote ); 01926 01927 m_CBOH = bob.GetSearchOptions(m_AlgoOpts, m_ProgramOpts, &m_Task); 01928 01929 if (bob.HaveEntrezQuery()) { 01930 m_EntrezQuery = bob.GetEntrezQuery(); 01931 } 01932 01933 if (bob.HaveFirstDbSeq()) { 01934 m_FirstDbSeq = bob.GetFirstDbSeq(); 01935 } 01936 01937 if (bob.HaveFinalDbSeq()) { 01938 m_FinalDbSeq = bob.GetFinalDbSeq(); 01939 } 01940 01941 if (bob.HaveGiList()) { 01942 m_GiList = bob.GetGiList(); 01943 } 01944 01945 if (bob.HasDbFilteringAlgorithmId() && 01946 bob.GetDbFilteringAlgorithmId() != -1) { 01947 m_DbFilteringAlgorithmId = bob.GetDbFilteringAlgorithmId(); 01948 } 01949 01950 if (bob.HaveNegativeGiList()) { 01951 m_NegativeGiList = bob.GetNegativeGiList(); 01952 } 01953 } 01954 01955 return m_CBOH; 01956 } 01957 01958 /// Extract the query IDs from a CBioseq_set 01959 /// @param bss CBioseq_set object used as source [in] 01960 /// @param query_ids where the query_ids will be added [in|out] 01961 static void s_ExtractQueryIdsFromBioseqSet(const CBioseq_set& bss, 01962 CSearchResultSet::TQueryIdVector& 01963 query_ids) 01964 { 01965 // sacrifice speed for protection against infinite loops 01966 CTypeConstIterator<objects::CBioseq> itr(ConstBegin(bss, eDetectLoops)); 01967 for (; itr; ++itr) { 01968 query_ids.push_back(FindBestChoice(itr->GetId(), CSeq_id::BestRank)); 01969 } 01970 } 01971 01972 void 01973 CRemoteBlast::x_ExtractQueryIds(CSearchResultSet::TQueryIdVector& query_ids) 01974 { 01975 query_ids.clear(); 01976 CRef<CBlast4_queries> queries = GetQueries(); 01977 query_ids.reserve(queries->GetNumQueries()); 01978 _ASSERT(queries); 01979 01980 if (queries->IsPssm()) { 01981 const CSeq_entry& seq_entry = queries->GetPssm().GetQuery(); 01982 if (seq_entry.IsSeq()) { 01983 query_ids.push_back(FindBestChoice(seq_entry.GetSeq().GetId(), 01984 CSeq_id::BestRank)); 01985 } else { 01986 _ASSERT(seq_entry.IsSet()); 01987 s_ExtractQueryIdsFromBioseqSet(seq_entry.GetSet(), query_ids); 01988 } 01989 } else if (queries->IsSeq_loc_list()) { 01990 query_ids.reserve(queries->GetSeq_loc_list().size()); 01991 ITERATE(CBlast4_queries::TSeq_loc_list, i, queries->GetSeq_loc_list()) { 01992 CConstRef<CSeq_id> id((*i)->GetId()); 01993 query_ids.push_back(id); 01994 } 01995 } else { 01996 _ASSERT(queries->IsBioseq_set()); 01997 s_ExtractQueryIdsFromBioseqSet(queries->GetBioseq_set(), query_ids); 01998 } 01999 } 02000 02001 /// Submit the search and return the results. 02002 /// @return Search results. 02003 CRef<CSearchResultSet> CRemoteBlast::GetResultSet() 02004 { 02005 CRef<CSearchResultSet> retval; 02006 if (m_ReadFile == false) 02007 SubmitSync(); 02008 02009 TSeqAlignVector alignments = GetSeqAlignSets(); 02010 02011 /* Process errors and warnings */ 02012 TSearchMessages search_messages; 02013 { 02014 const vector<string> & W = GetWarningVector(); 02015 const vector<string> & E = GetErrorVector(); 02016 02017 TQueryMessages query_messages; 02018 02019 // Represents the context of the error, not the error id. 02020 int err = kBlastMessageNoContext; 02021 02022 ITERATE(vector<string>, itw, W) { 02023 CRef<CSearchMessage> 02024 sm(new CSearchMessage(eBlastSevWarning, err, *itw)); 02025 02026 query_messages.push_back(sm); 02027 } 02028 02029 ITERATE(vector<string>, ite, E) { 02030 err = kBlastMessageNoContext; 02031 02032 CRef<CSearchMessage> 02033 sm(new CSearchMessage(eBlastSevError, err, *ite)); 02034 02035 query_messages.push_back(sm); 02036 } 02037 02038 // Since there is no way to report per-query messages, all 02039 // warnings and errors are applied to all queries. 02040 search_messages.insert(search_messages.end(), 02041 alignments.empty() ? 1 : alignments.size(), 02042 query_messages); 02043 02044 if (eDebug == m_Verbose) { 02045 NcbiCout << "Error/Warning messages: '" 02046 << search_messages.ToString() << "'" << endl; 02047 } 02048 } 02049 02050 CSearchResultSet::TQueryIdVector query_ids; 02051 x_ExtractQueryIds(query_ids); 02052 02053 if (alignments.empty()) { 02054 // this is required by the CSearchResultSet ctor 02055 alignments.resize(1); 02056 try { x_ExtractQueryIds(query_ids); } 02057 catch (const CRemoteBlastException& e) { 02058 if (e.GetMsg() == kNoRIDSpecified) { 02059 retval.Reset(new CSearchResultSet(alignments, search_messages)); 02060 return retval; 02061 } 02062 throw; 02063 } 02064 } 02065 02066 /* Build the ancillary data structure */ 02067 CSearchResultSet::TAncillaryVector ancill_vector; 02068 { 02069 /* Get the effective search space */ 02070 const string kTarget("Effective search space used: "); 02071 list<string> search_stats = GetSearchStats(); 02072 Int8 effective_search_space = 0; 02073 NON_CONST_ITERATE(list<string>, itr, search_stats) { 02074 if (NStr::Find(*itr, kTarget) != NPOS) { 02075 NStr::ReplaceInPlace(*itr, kTarget, kEmptyStr); 02076 effective_search_space = 02077 NStr::StringToInt8(*itr, NStr::fConvErr_NoThrow); 02078 break; 02079 } 02080 } 02081 02082 /* Get the Karlin-Altschul parameters */ 02083 bool found_gapped = false, found_ungapped = false; 02084 pair<double, double> lambdas, Ks, Hs; 02085 TKarlinAltschulBlocks ka_blocks = GetKABlocks(); 02086 02087 ITERATE(TKarlinAltschulBlocks, itr, ka_blocks) { 02088 if ((*itr)->GetGapped()) { 02089 lambdas.second = (*itr)->GetLambda(); 02090 Ks.second = (*itr)->GetK(); 02091 Hs.second = (*itr)->GetH(); 02092 found_gapped = true; 02093 } else { 02094 lambdas.first = (*itr)->GetLambda(); 02095 Ks.first = (*itr)->GetK(); 02096 Hs.first = (*itr)->GetH(); 02097 found_ungapped = true; 02098 } 02099 02100 if (found_gapped && found_ungapped) { 02101 break; 02102 } 02103 } 02104 02105 // N.B.: apparently the BLAST3 protocol doesn't send PSI-BLAST Karlin & 02106 // Altschul parameters, so we don't set the is_psiblast 02107 // CBlastAncillaryData constructor argument 02108 CRef<CBlastAncillaryData> ancillary_data 02109 (new CBlastAncillaryData(lambdas, Ks, Hs, effective_search_space, m_Task == "psiblast")); 02110 ancill_vector.insert(ancill_vector.end(), alignments.size(), 02111 ancillary_data); 02112 } 02113 02114 TSeqLocInfoVector masks = GetMasks(); 02115 retval.Reset(new CSearchResultSet(query_ids, alignments, search_messages, 02116 ancill_vector, &masks)); 02117 retval->SetRID(GetRID()); 02118 return retval; 02119 } 02120 02121 CRef<objects::CBlast4_request> 02122 ExtractBlast4Request(CNcbiIstream& in) 02123 { 02124 // First try to read a Blast4-get-search-strategy-reply... 02125 CRef<CBlast4_get_search_strategy_reply> b4_ss_reply; 02126 bool succeeded = false; 02127 try { 02128 switch (CFormatGuess().Format(in)) { 02129 case CFormatGuess::eBinaryASN: 02130 b4_ss_reply.Reset(new CBlast4_get_search_strategy_reply); 02131 in >> MSerial_AsnBinary >> *b4_ss_reply; 02132 succeeded = true; 02133 break; 02134 02135 case CFormatGuess::eTextASN: 02136 b4_ss_reply.Reset(new CBlast4_get_search_strategy_reply); 02137 in >> MSerial_AsnText >> *b4_ss_reply; 02138 succeeded = true; 02139 break; 02140 02141 case CFormatGuess::eXml: 02142 { 02143 auto_ptr<CObjectIStream> is( 02144 CObjectIStream::Open(eSerial_Xml, in)); 02145 dynamic_cast<CObjectIStreamXml*> 02146 (is.get())->SetEnforcedStdXml(true); 02147 b4_ss_reply.Reset(new CBlast4_get_search_strategy_reply); 02148 *is >> *b4_ss_reply; 02149 succeeded = true; 02150 } 02151 break; 02152 02153 default: 02154 _ASSERT(b4_ss_reply.Empty()); 02155 } 02156 } catch (const CException&) { 02157 succeeded = false; 02158 } 02159 02160 CRef<CBlast4_request> retval; 02161 if (succeeded) { 02162 retval.Reset(&b4_ss_reply->Set()); 02163 return retval; 02164 } 02165 b4_ss_reply.Reset(); 02166 in.seekg(0); 02167 02168 // Go for broke and try the Blast4-request... 02169 retval.Reset(new CBlast4_request); 02170 switch (CFormatGuess().Format(in)) { 02171 case CFormatGuess::eBinaryASN: 02172 in >> MSerial_AsnBinary >> *retval; 02173 break; 02174 02175 case CFormatGuess::eTextASN: 02176 in >> MSerial_AsnText >> *retval; 02177 break; 02178 02179 case CFormatGuess::eXml: 02180 { 02181 auto_ptr<CObjectIStream> is( 02182 CObjectIStream::Open(eSerial_Xml, in)); 02183 dynamic_cast<CObjectIStreamXml*> 02184 (is.get())->SetEnforcedStdXml(true); 02185 *is >> *retval; 02186 } 02187 break; 02188 02189 default: 02190 NCBI_THROW(CSerialException, eInvalidData, 02191 "Unrecognized input format "); 02192 } 02193 02194 return retval; 02195 } 02196 02197 static CRef<CBlast4_request_body> 02198 s_BuildSearchInfoRequest(const string& rid, 02199 const string& name, 02200 const string& value) 02201 { 02202 CRef<CBlast4_get_search_info_request> info_request( new CBlast4_get_search_info_request ); 02203 info_request->SetRequest_id(rid); 02204 info_request->SetInfo().Add(name, value); 02205 CRef<CBlast4_request_body> retval(new CBlast4_request_body); 02206 retval->SetGet_search_info(*info_request); 02207 return retval; 02208 } 02209 02210 string 02211 CRemoteBlast::x_GetStringFromSearchInfoReply(CRef<CBlast4_reply> reply, 02212 const string& name, 02213 const string& value) 02214 { 02215 string retval; 02216 if (reply.Empty() || !reply->CanGetBody()) { 02217 return retval; 02218 } 02219 if (reply->GetBody().IsGet_search_info()) { 02220 const CBlast4_get_search_info_reply &info_reply = reply->GetBody().GetGet_search_info(); 02221 if (info_reply.CanGetRequest_id() && (info_reply.GetRequest_id() == m_RID)) { 02222 if( info_reply.CanGetInfo() ){ 02223 const CBlast4_parameters ¶ms = info_reply.GetInfo(); 02224 const string reply_name = 02225 Blast4SearchInfo_BuildReplyName(name, value); 02226 CRef< CBlast4_parameter > search_param = 02227 params.GetParamByName(reply_name); 02228 if( search_param.NotEmpty() && search_param->GetValue().IsString()) { 02229 retval = search_param->GetValue().GetString(); 02230 } 02231 } // get info 02232 } // request id == m_RID 02233 } // search info reply 02234 return retval; 02235 } 02236 02237 02238 // 02239 // based on a new request 02240 // 02241 string CRemoteBlast::GetTitle(void) 02242 { 02243 // Build the request 02244 CRef<CBlast4_request_body> request_body = 02245 s_BuildSearchInfoRequest(m_RID, kBlast4SearchInfoReqName_Search, 02246 kBlast4SearchInfoReqValue_Title); 02247 CRef<CBlast4_reply> reply = x_SendRequest(request_body); 02248 return x_GetStringFromSearchInfoReply(reply, 02249 kBlast4SearchInfoReqName_Search, 02250 kBlast4SearchInfoReqValue_Title); 02251 02252 } 02253 // Disk Cache version: x_CheckResults 02254 // only difference is that if search finished, 02255 // different approach to call and get results will be orchestrated 02256 // to fist get data from services as-is an deserialize them 02257 // later. This steps will minimize time OM is working. 02258 // 02259 void CRemoteBlast::x_CheckResultsDC(void) 02260 { 02261 LOG_POST(Info << "CRemoteBlast::x_CheckResultsDC"); 02262 if (! m_Errs.empty()) { 02263 m_Pending = false; 02264 } 02265 02266 if (! m_Pending) { 02267 return; 02268 } 02269 02270 CRef<CBlast4_reply> r; 02271 02272 bool try_again = true; 02273 02274 while(try_again) { 02275 try { 02276 // asking for search statistics 02277 r = x_GetSearchStatsOnly(); 02278 m_Pending = s_SearchPending(r); 02279 try_again = false; 02280 } 02281 catch(const CEofException&) { 02282 --m_ErrIgn; 02283 02284 if (m_ErrIgn == 0) { 02285 m_Errs.push_back("No response from server, " 02286 "cannot complete request."); 02287 return; 02288 } 02289 02290 SleepSec(10); 02291 } 02292 } 02293 02294 if (! m_Pending) { 02295 // search finishedi check for errors 02296 x_SearchErrors(r); 02297 02298 if (! m_Errs.empty()) { 02299 return; 02300 } 02301 02302 if( !r->CanGetBody() ) { 02303 m_Errs.push_back("Results were not a get-search-results reply 2"); 02304 return; 02305 } 02306 if( r->CanGetBody() && !r->GetBody().IsGet_search_results()) { 02307 m_Errs.push_back("Results were not a get-search-results reply"); 02308 return; 02309 } 02310 //ATTENTION: fullscale get results call 02311 // search finished, retriev results 02312 r = x_GetSearchResultsHTTP(); 02313 if( r.Empty() ){ 02314 m_Errs.push_back("Results were not a get-search-results reply 3"); 02315 return; 02316 } 02317 if( r->CanGetBody() && !r->GetBody().IsGet_search_results()) { 02318 m_Errs.push_back("Results were not a get-search-results reply 4"); 02319 return; 02320 } 02321 m_Pending = s_SearchPending(r); 02322 m_Reply = r; 02323 } 02324 02325 } 02326 // disk cache support. 02327 // ask for search statistics to check status w/o polling results. 02328 CRef<objects::CBlast4_reply> 02329 CRemoteBlast::x_GetSearchStatsOnly(void) 02330 { 02331 CRef<CBlast4_get_search_results_request> 02332 gsrr(new CBlast4_get_search_results_request); 02333 02334 gsrr->SetRequest_id(m_RID); 02335 // result-types 02336 gsrr->ResetResult_types(); 02337 gsrr->SetResult_types( 16) ; 02338 02339 CRef<CBlast4_request_body> body(new CBlast4_request_body); 02340 body->SetGet_search_results(*gsrr); 02341 02342 return x_SendRequest(body); 02343 } 02344 // 02345 // get search results caching first on a file system. 02346 // TODO: check for errors and disable disk caching 02347 CRef<objects::CBlast4_reply> 02348 CRemoteBlast::x_GetSearchResultsHTTP(void) 02349 { 02350 CRef<objects::CBlast4_reply> one_reply( new CBlast4_reply ); 02351 CStopWatch swatch; 02352 CNcbiEnvironment env; 02353 string BLAST4_CONN_SERVICE_NAME = "blast4"; 02354 if( env.Get("BLAST4_CONN_SERVICE_NAME") != kEmptyStr ) 02355 BLAST4_CONN_SERVICE_NAME = env.Get("BLAST4_CONN_SERVICE_NAME"); 02356 02357 // construct request 02358 CRef<CBlast4_get_search_results_request> gsrr(new CBlast4_get_search_results_request); 02359 gsrr->SetRequest_id( m_RID); 02360 02361 CRef<CBlast4_request_body> body(new CBlast4_request_body); 02362 body->SetGet_search_results(*gsrr); 02363 02364 CRef<CBlast4_request> request( new CBlast4_request ); 02365 request->SetBody(*body ); 02366 // call service 02367 swatch.Start(); 02368 CConn_ServiceStream ios( BLAST4_CONN_SERVICE_NAME , fSERV_HttpPost, 0); 02369 ios << MSerial_AsnBinary << *request; 02370 ios.flush(); 02371 // cache answer to the file 02372 char incoming_buffer[8192]; 02373 int read_max = 8192; 02374 int l_total_bytes=0, n_read; 02375 bool l_cached_ok = true; 02376 02377 auto_ptr<fstream> tmp_stream( CDirEntry::CreateTmpFile() ); 02378 02379 do{ 02380 ios.readsome(incoming_buffer, read_max); 02381 n_read = ios.gcount(); 02382 if( n_read >= 0 ){ 02383 l_total_bytes += n_read; 02384 try{ 02385 tmp_stream->write(incoming_buffer,n_read); 02386 if( tmp_stream->bad() || tmp_stream->fail() ) 02387 { 02388 l_cached_ok = false; 02389 LOG_POST(Error << "CRemoteBlast::x_GetSearchResultsHTTP CAN'T WRITE CACHED DATA: BAD/FAIL STATE" ); 02390 m_disk_cache_error_msg = "bad/fail fstream state on write"; 02391 break; 02392 } 02393 } 02394 catch ( ios_base::failure &err){ 02395 LOG_POST(Error << "CRemoteBlast::x_GetSearchResultsHTTP CAN'T WRITE CACHED DATA: "<<err.what() ); 02396 l_cached_ok = false; 02397 m_disk_cache_error_msg = err.what(); 02398 } 02399 } 02400 } 02401 while( ios); 02402 swatch.Stop(); 02403 02404 if(!l_cached_ok ){ 02405 // Attention: in case of caching error, disable it and re-read w/o caching 02406 LOG_POST(Info << "CRemoteBlast::x_GetSearchResultsHTTP: DISABLE CACHE, RE-READ"); 02407 m_use_disk_cache = false; 02408 m_disk_cache_error_flag = true; 02409 return x_GetSearchResults(); 02410 } 02411 02412 tmp_stream->seekg(0); 02413 // read cached answer 02414 swatch.Restart(); 02415 { 02416 auto_ptr<CObjectIStream> 02417 in_stream( CObjectIStream::Open(eSerial_AsnBinary, *tmp_stream) ); 02418 in_stream->Read(ObjectInfo(*one_reply), CObjectIStream::eNoFileHeader); 02419 02420 } 02421 02422 swatch.Stop(); 02423 02424 return one_reply ; 02425 } 02426 // 02427 // Get search subject and set 02428 // m_SubjectSeqLocs or m_SubjectSequences 02429 // 02430 void CRemoteBlast::x_GetSubjects(void) 02431 { 02432 if( !m_SubjectSequences.empty() && !m_SubjectSeqLocs.empty() ) 02433 return; // already got data 02434 02435 // Build the request 02436 CRef<CBlast4_get_search_info_request> info_request( new CBlast4_get_search_info_request ); 02437 info_request->SetRequest_id( m_RID ); 02438 info_request->SetInfo().Add(kBlast4SearchInfoReqName_Search, 02439 kBlast4SearchInfoReqValue_Subjects); 02440 02441 CRef<CBlast4_request_body> body(new CBlast4_request_body); 02442 body->SetGet_search_info( *info_request ); 02443 02444 CRef<CBlast4_request> request(new CBlast4_request); 02445 request->SetBody(*body); 02446 02447 CRef<CBlast4_reply> reply(new CBlast4_reply); 02448 02449 if (eDebug == m_Verbose) { 02450 NcbiCout << MSerial_AsnText << *request << endl; 02451 } 02452 02453 try { 02454 CStopWatch sw(CStopWatch::eStart); 02455 02456 if (eDebug == m_Verbose) { 02457 NcbiCout << "Starting network transaction (" << sw.Elapsed() << ")" << endl; 02458 } 02459 02460 // Send request. 02461 CBlast4Client().Ask(*request, *reply); 02462 02463 if (eDebug == m_Verbose) { 02464 NcbiCout << "Done network transaction (" << sw.Elapsed() << ")" << endl; 02465 } 02466 } 02467 catch(const CEofException&) { 02468 NCBI_THROW(CRemoteBlastException, eServiceNotAvailable, 02469 "No response from server, cannot complete request."); 02470 } 02471 02472 if (eDebug == m_Verbose) { 02473 NcbiCout << MSerial_AsnText << *reply << endl; 02474 } 02475 02476 // get reply. it will be status and subjects 02477 if (reply->CanGetBody()) { 02478 if (reply->GetBody().IsGet_search_info()) { 02479 const CBlast4_get_search_info_reply &info_reply = reply->GetBody().GetGet_search_info(); 02480 if( info_reply.CanGetRequest_id() && ( info_reply.GetRequest_id() == m_RID ) ){ 02481 if( info_reply.CanGetInfo() ){ 02482 const CBlast4_parameters ¶ms = info_reply.GetInfo(); 02483 string reply_name = 02484 Blast4SearchInfo_BuildReplyName(kBlast4SearchInfoReqName_Search, 02485 kBlast4SearchInfoReqValue_Subjects); 02486 CRef< CBlast4_parameter > search_param = params.GetParamByName (reply_name); 02487 // reply could have string, seq-loc-list or 02488 // bioseq-list, but we don't care about string result for bl2seq 02489 if( search_param.NotEmpty() && search_param->GetValue().IsSeq_loc_list()) 02490 { 02491 m_SubjectSeqLocs = search_param->GetValue().GetSeq_loc_list(); 02492 } 02493 // bioseq-list // SEQUENCE OF Bioseq 02494 else if( search_param.NotEmpty() && search_param->GetValue().IsBioseq_list()) 02495 { 02496 x_SetSubjectSequences( search_param->GetValue().GetBioseq_list() ); 02497 02498 } 02499 else 02500 { 02501 NCBI_THROW(CRemoteBlastException, eIncompleteConfig, 02502 "Obtained database name for remote bl2seq search"); 02503 } 02504 02505 } // get info 02506 } // request id == m_RID 02507 } // search info reply 02508 } // get body 02509 } 02510 02511 unsigned int CRemoteBlast::GetPsiNumberOfIterations(void) 02512 { 02513 unsigned int iter_num = 0; 02514 if(!m_FormatOpts.Empty()) 02515 { 02516 CRef< CBlast4_parameter > param = m_FormatOpts->GetParamByName (B4Param_Web_StepNumber.GetName()); 02517 if( param.NotEmpty()) 02518 { 02519 iter_num = param->GetValue().GetInteger(); 02520 } 02521 } 02522 else if(!m_RID.empty()) 02523 { 02524 iter_num = x_GetPsiIterationsFromServer(); 02525 } 02526 02527 return iter_num; 02528 } 02529 02530 unsigned int CRemoteBlast::x_GetPsiIterationsFromServer() 02531 { 02532 unsigned int retval=0; 02533 02534 CRef<CBlast4_request_body> request_body = 02535 s_BuildSearchInfoRequest(m_RID, kBlast4SearchInfoReqName_Search, 02536 kBlast4SearchInfoReqValue_PsiIterationNum); 02537 CRef<CBlast4_reply> reply = x_SendRequest(request_body); 02538 string num = x_GetStringFromSearchInfoReply(reply, 02539 kBlast4SearchInfoReqName_Search, 02540 kBlast4SearchInfoReqValue_PsiIterationNum); 02541 if ( !num.empty() ) { 02542 try { retval = NStr::StringToUInt(num); } 02543 catch (...) {} // ignore errors and leave as unset 02544 } 02545 return retval; 02546 } 02547 02548 02549 END_SCOPE(blast) 02550 END_NCBI_SCOPE 02551 02552 /* @} */
1.7.5.1
Modified on Wed May 23 12:52:33 2012 by modify_doxy.py rev. 337098