NCBI C++ ToolKit
remote_blast.cpp
Go to the documentation of this file.
00001 /*  $Id: remote_blast.cpp 53103 2012-02-23 16:01:09Z zaretska $ 
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Author:  Kevin Bealer
00027 *
00028 * ===========================================================================
00029 */
00030 
00031 /// @file remote_blast.cpp
00032 /// Queueing and Polling code for Remote Blast API.
00033 
00034 #include <ncbi_pch.hpp>
00035 #include <corelib/ncbi_system.hpp>
00036 #include <corelib/ncbitime.hpp>
00037 #include <serial/iterator.hpp>
00038 #include <algo/blast/api/remote_blast.hpp>
00039 #include <algo/blast/api/blast_options_builder.hpp>
00040 #include <algo/blast/api/search_strategy.hpp>
00041 
00042 #include <objects/blast/blastclient.hpp>
00043 #include <objects/blast/blast__.hpp>
00044 #include <objects/blast/names.hpp>
00045 #include <objects/seq/Seq_data.hpp>
00046 #include <objects/seq/Bioseq.hpp>
00047 #include <objects/scoremat/Pssm.hpp>
00048 #include <objects/scoremat/PssmWithParameters.hpp>
00049 #include <objects/seqalign/seqalign__.hpp>
00050 #include <objects/blast/blastclient.hpp>
00051 #include <objmgr/util/seq_loc_util.hpp>
00052 #include "psiblast_aux_priv.hpp"    // For CPsiBlastValidate::Pssm()
00053 #include <util/format_guess.hpp>    // for CFormatGuess
00054 #include <serial/objistrxml.hpp>    // for CObjectIStreamXml
00055 #include <serial/objistrasnb.hpp>    // for CObjectIStreamAsnBinary
00056 #include <serial/objistrasn.hpp>    // for CObjectIStreamAsn
00057 #include <algo/blast/api/objmgr_query_data.hpp>
00058 
00059 #if defined(NCBI_OS_UNIX)
00060 #include <unistd.h>
00061 #endif
00062 
00063 /** @addtogroup AlgoBlast
00064  *
00065  * @{
00066  */
00067 
00068 BEGIN_NCBI_SCOPE
00069 USING_SCOPE(objects);
00070 BEGIN_SCOPE(blast)
00071 
00072 
00073 // Static functions
00074 
00075 
00076 /// Error value type used by Blast4 ASN.1 objects.
00077 typedef list< CRef<objects::CBlast4_error> > TErrorList;
00078 
00079 
00080 /// Determine whether the search is still running.
00081 /// @param reply Reply from get-search-results request.
00082 /// @return True if search needs more time, false if done or failed.
00083 static bool
00084 s_SearchPending(CRef<objects::CBlast4_reply> reply)
00085 {
00086     const list< CRef<objects::CBlast4_error> > & errors = reply->GetErrors();
00087     
00088     TErrorList::const_iterator i;
00089     
00090     for(i = errors.begin(); i != errors.end(); i++) {
00091         if ((*i)->GetCode() == eBlast4_error_code_search_pending) {
00092             return true;
00093         }
00094     }
00095     return false;
00096 }
00097 
00098 
00099 void CRemoteBlast::x_SearchErrors(CRef<objects::CBlast4_reply> reply)
00100 {
00101     const list< CRef<CBlast4_error> > & errors = reply->GetErrors();
00102     
00103     TErrorList::const_iterator i;
00104     
00105     for(i = errors.begin(); i != errors.end(); i++) {
00106         string msg;
00107         
00108         if ((*i)->CanGetMessage() && (! (*i)->GetMessage().empty())) {
00109             msg = ": ";
00110             msg += (*i)->GetMessage();
00111         }
00112         
00113         switch((*i)->GetCode()) {
00114         case eBlast4_error_code_conversion_warning:
00115             m_Warn.push_back(string("conversion_warning") + msg);
00116             break;
00117             
00118         case eBlast4_error_code_internal_error:
00119             m_Errs.push_back(string("internal_error") + msg);
00120             break;
00121             
00122         case eBlast4_error_code_not_implemented:
00123             m_Errs.push_back(string("not_implemented") + msg);
00124             break;
00125             
00126         case eBlast4_error_code_not_allowed:
00127             m_Errs.push_back(string("not_allowed") + msg);
00128             break;
00129             
00130         case eBlast4_error_code_bad_request:
00131             m_Errs.push_back(string("bad_request") + msg);
00132             break;
00133             
00134         case eBlast4_error_code_bad_request_id:
00135             m_Errs.push_back(string("Invalid/unknown RID (bad_request_id)") +
00136                              msg);
00137             break;
00138         }
00139     }
00140 }
00141 
00142 
00143 
00144 // CBlast4Option methods
00145 
00146 void CRemoteBlast::x_CheckConfig(void)
00147 {
00148     // If not configured, throw an exception - the associated string
00149     // will contain a list of the missing pieces.
00150     
00151     if (0 != m_NeedConfig) {
00152         string cfg("Configuration required:");
00153         
00154         if (eProgram & m_NeedConfig) {
00155             cfg += " <program>";
00156         }
00157         
00158         if (eService & m_NeedConfig) {
00159             cfg += " <service>";
00160         }
00161         
00162         if (eQueries & m_NeedConfig) {
00163             cfg += " <queries>";
00164         }
00165         
00166         if (eSubject & m_NeedConfig) {
00167             cfg += " <subject>";
00168         }
00169         
00170         NCBI_THROW(CRemoteBlastException, eIncompleteConfig, cfg);
00171     }
00172 }
00173 
00174 CRef<objects::CBlast4_request>
00175 CRemoteBlast::GetSearchStrategy()
00176 {
00177     CRef<CBlast4_request_body> body(x_GetBlast4SearchRequestBody());
00178     x_CheckConfig();
00179     string errors(GetErrors());
00180     if ( !errors.empty() ) {
00181         NCBI_THROW(CRemoteBlastException, eIncompleteConfig, errors);
00182     }
00183     CRef<CBlast4_request> retval(new CBlast4_request);
00184     if ( !m_ClientId.empty() ) {
00185         retval->SetIdent(m_ClientId);
00186     }
00187     retval->SetBody(*body);
00188     return retval;
00189 }
00190 
00191 CRef<objects::CBlast4_reply>
00192 CRemoteBlast::x_SendRequest(CRef<objects::CBlast4_request_body> body)
00193 {
00194     // If not configured, throw.
00195     x_CheckConfig();
00196     
00197     // Create the request; optionally echo it
00198     
00199     CRef<CBlast4_request> request(new CBlast4_request);
00200     if ( !m_ClientId.empty() ) {
00201         request->SetIdent(m_ClientId);
00202     }
00203     request->SetBody(*body);
00204     
00205     if (eDebug == m_Verbose) {
00206         NcbiCout << MSerial_AsnText << *request << endl;
00207     }
00208     
00209     // submit to server, get reply; optionally echo it
00210     
00211     CRef<CBlast4_reply> reply(new CBlast4_reply);
00212     
00213     try {
00214         CStopWatch sw(CStopWatch::eStart);
00215         
00216         if (eDebug == m_Verbose) {
00217             NcbiCout << "Starting network transaction (" << sw.Elapsed() << ")" << endl;
00218         }
00219         
00220         CBlast4Client().Ask(*request, *reply);
00221         
00222         if (eDebug == m_Verbose) {
00223             NcbiCout << "Done network transaction (" << sw.Elapsed() << ")" << endl;
00224         }
00225     }
00226     catch(const CEofException&) {
00227         NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
00228                    "No response from server, cannot complete request.");
00229     }
00230     
00231     if (eDebug == m_Verbose) {
00232         NcbiCout << MSerial_AsnText << *reply << endl;
00233     }
00234     
00235     return reply;
00236 }
00237 
00238 CRef<objects::CBlast4_reply>
00239 CRemoteBlast::x_GetSearchResults(void)
00240 {
00241     CRef<CBlast4_get_search_results_request>
00242         gsrr(new CBlast4_get_search_results_request);
00243     
00244     gsrr->SetRequest_id(m_RID);
00245     
00246     CRef<CBlast4_request_body> body(new CBlast4_request_body);
00247     body->SetGet_search_results(*gsrr);
00248     
00249     return x_SendRequest(body);
00250 }
00251 
00252 // Pre:  start, wait, or done
00253 // Post: failed or done
00254 
00255 // Returns: true if done
00256 
00257 bool CRemoteBlast::SubmitSync(int seconds)
00258 {
00259     // eFailed: no work to do, already an error.
00260     // eDone:   already done, just return.
00261     
00262     EImmediacy immed = ePollAsync;
00263     
00264     switch(x_GetState()) {
00265     case eStart:
00266         x_SubmitSearch();
00267         if (! m_Errs.empty()) {
00268             break;
00269         }
00270         immed = ePollImmed;
00271         // fall through
00272         
00273     case eWait:
00274         x_PollUntilDone(immed, seconds);
00275         break;
00276     default:
00277         break;
00278     }
00279     
00280     return (x_GetState() == eDone);
00281 }
00282 
00283 
00284 
00285 // Pre:  start
00286 // Post: failed, wait or done
00287 
00288 // Returns: true if no error so far
00289 
00290 bool CRemoteBlast::Submit(void)
00291 {
00292     switch(x_GetState()) {
00293     case eStart:
00294         x_SubmitSearch();
00295     default: break;
00296     }
00297     
00298     return m_Errs.empty();
00299 }
00300 
00301 //
00302 // The following table summarizes how to determine the status of a given
00303 // RID/search submission:
00304 //
00305 //                           | CheckDone()   |  CheckDone()
00306 //                           | returns true  |  returns false
00307 // ------------------------------------------------------------
00308 // GetErrors() == kEmptyStr  |    DONE       |   PENDING
00309 // ------------------------------------------------------------
00310 // GetErrors() != kEmptyStr  |    FAILED     |   UNKNOWN RID
00311 // ------------------------------------------------------------
00312 //
00313 CRemoteBlast::ESearchStatus
00314 CRemoteBlast::CheckStatus()
00315 {
00316     ESearchStatus retval = eStatus_Unknown;
00317 
00318     bool done = CheckDone();
00319     string errors = GetErrors();
00320 
00321     if (done && errors == kEmptyStr) {
00322         retval = eStatus_Done;
00323     } else if (!done && errors == kEmptyStr) {
00324         retval = eStatus_Pending;
00325     } else if (!done && errors.find("bad_request_id") != NPOS) {
00326         retval = eStatus_Unknown;
00327     } else if (done && errors != kEmptyStr) {
00328         retval = eStatus_Failed;
00329     } 
00330     return retval;
00331 }
00332 
00333 bool CRemoteBlast::x_IsUnknownRID(void)
00334 {
00335     bool retval = false;
00336     if (NStr::Find(GetErrors(), "bad_request_id") != NPOS) {
00337         retval = true;
00338     }
00339     return retval;
00340 }
00341 
00342 // Pre:  start, wait or done
00343 // Post: wait, done, or failed
00344 
00345 // Returns: true if done
00346 
00347 bool CRemoteBlast::CheckDone(void)
00348 {
00349     switch(x_GetState()) {
00350     case eFailed:
00351     case eDone:
00352         break;
00353         
00354     case eStart:
00355         Submit();
00356         break;
00357         
00358     case eWait:
00359         if( m_use_disk_cache ) x_CheckResultsDC(); else x_CheckResults();
00360     }
00361     
00362     int state = x_GetState();
00363     return (state == eDone || (state == eFailed && !x_IsUnknownRID()));
00364 }
00365 
00366 CRemoteBlast::TGSRR * CRemoteBlast::x_GetGSRR(void)
00367 {
00368     TGSRR* rv = NULL;
00369 
00370     if (m_ReadFile)
00371     {
00372         rv = &(m_Archive->SetResults());
00373     } 
00374     else if (SubmitSync() &&
00375         m_Reply.NotEmpty() &&
00376         m_Reply->CanGetBody() &&
00377         m_Reply->GetBody().IsGet_search_results()) {
00378         
00379         rv = & (m_Reply->SetBody().SetGet_search_results());
00380     }
00381 
00382     return rv;
00383 }
00384 
00385 CRef<objects::CSeq_align_set> CRemoteBlast::GetAlignments(void)
00386 {
00387     CRef<CSeq_align_set> rv;
00388     
00389     TGSRR * gsrr = x_GetGSRR();
00390     
00391     if (gsrr && gsrr->CanGetAlignments()) {
00392         rv = & (gsrr->SetAlignments());
00393     }
00394     
00395     return rv;
00396 }
00397 
00398 TSeqAlignVector CRemoteBlast::GetSeqAlignSets()
00399 {
00400     CRef<CSeq_align_set> al = GetAlignments();
00401     
00402     TSeqAlignVector rv;
00403 
00404     CRef<CSeq_align_set> cur_set;
00405     CConstRef<CSeq_id> current_id;
00406     
00407     // this loop groups all matches to one target sequences in one vector element.
00408     TSeqAlignVector temp;
00409 
00410     if (al.NotEmpty())
00411     {
00412            ITERATE(CSeq_align_set::Tdata, it, al->Get()) {
00413            // index 0 = query, index 1 = subject
00414            const int query_index = 0;
00415            CConstRef<CSeq_id> this_id( & (*it)->GetSeq_id(query_index) );
00416        
00417             if (current_id.Empty() || (CSeq_id::e_YES != this_id->Compare(*current_id))) {
00418               if (cur_set.NotEmpty()) {
00419                   temp.push_back(cur_set);
00420               }
00421               cur_set.Reset(new CSeq_align_set);
00422               current_id = this_id;
00423             }
00424             cur_set->Set().push_back(*it);
00425           }
00426     }
00427     
00428     if (cur_set.NotEmpty()) {
00429         temp.push_back(cur_set);
00430     }
00431 
00432     CSearchResultSet::TQueryIdVector query_ids;
00433     x_ExtractQueryIds(query_ids);
00434 
00435     // Fill out the return value, with empty Seq-align-set if not match for a query.
00436     TSeqAlignVector::size_type sap_index = 0;
00437     ITERATE(CSearchResultSet::TQueryIdVector, it,  query_ids) {
00438         const int query_index = 0;
00439         if (sap_index < temp.size())
00440         {
00441              list< CRef< CSeq_align > > sal = temp[sap_index]->Get();
00442              CConstRef<CSeq_id> this_id( & (sal.front()->GetSeq_id(query_index) ));
00443              if (CSeq_id::e_YES == (*it)->Compare(sal.front()->GetSeq_id(query_index) ))
00444              {
00445                   rv.push_back(temp[sap_index]);
00446                   sap_index++;
00447              }
00448              else
00449              {
00450                   cur_set.Reset(new CSeq_align_set);
00451                   rv.push_back(cur_set);
00452              }
00453         }
00454         else
00455         {
00456              cur_set.Reset(new CSeq_align_set);
00457              rv.push_back(cur_set);
00458         }
00459     }
00460     
00461     return rv;
00462 }
00463 
00464 CRef<objects::CBlast4_phi_alignments> CRemoteBlast::GetPhiAlignments(void)
00465 {
00466     CRef<CBlast4_phi_alignments> rv;
00467     
00468     TGSRR * gsrr = x_GetGSRR();
00469     
00470     if (gsrr && gsrr->CanGetPhi_alignments()) {
00471         rv = & (gsrr->SetPhi_alignments());
00472     }
00473     
00474     return rv;
00475 }
00476 
00477 // N.B.: this function assumes that the BLAST 4 server sends the query masked
00478 // locations for each query adjacent to one another in the list of masks (i.e.:
00479 // masks-for-query1-frameA, masks-for-query1-frameB, ...,
00480 // masks-for-query2-frameA, masks-for-query2-frameB, ... etc).
00481 TSeqLocInfoVector
00482 CRemoteBlast::GetMasks(void)
00483 {
00484     TSeqLocInfoVector retval;
00485     retval.resize(GetQueries()->GetNumQueries());
00486 
00487     TGSRR::TMasks network_masks = x_GetMasks();
00488     if (network_masks.empty()) {
00489         return retval;
00490     }
00491 
00492     EBlastProgramType program = NetworkProgram2BlastProgramType(m_Program,
00493                                                                 m_Service);
00494     CConstRef<CSeq_id> previous_seqid;
00495     size_t query_index = 0;
00496 
00497     ITERATE(TGSRR::TMasks, masks_for_frame, network_masks) {
00498 
00499         _ASSERT(masks_for_frame->NotEmpty());
00500 
00501         CConstRef<CSeq_id> current_seqid
00502             ((*masks_for_frame)->GetLocations().front()->GetId());
00503         if (previous_seqid.Empty()) {
00504             previous_seqid = current_seqid;
00505         }
00506 
00507         // determine which query are we setting the masks for...
00508         TMaskedQueryRegions* mqr = NULL;
00509         if (CSeq_id::e_YES == current_seqid->Compare(*previous_seqid)) {
00510             mqr = &retval[query_index];
00511         } else {
00512             mqr = &retval[++query_index];
00513             previous_seqid = current_seqid;
00514         }
00515 
00516         // all the masks for a given query and frame are in a single
00517         // Packed-seqint
00518         _ASSERT((*masks_for_frame)->GetLocations().size() == (size_t) 1);
00519         _ASSERT((*masks_for_frame)->GetLocations().front().NotEmpty());
00520         CRef<CSeq_loc> masks =
00521             (*masks_for_frame)->GetLocations().front();
00522         _ASSERT(masks->IsPacked_int());
00523 
00524         const CPacked_seqint& packed_int = masks->GetPacked_int();
00525         const EBlast4_frame_type frame = (*masks_for_frame)->GetFrame();
00526         ITERATE(CPacked_seqint::Tdata, mask, packed_int.Get()) {
00527             CRef<CSeq_interval> si
00528                 (new CSeq_interval(const_cast<CSeq_id&>((*mask)->GetId()), 
00529                                    (*mask)->GetFrom(), (*mask)->GetTo()));
00530             CRef<CSeqLocInfo> sli
00531                 (new CSeqLocInfo(si, NetworkFrame2FrameNumber(frame, program)));
00532             mqr->push_back(sli);
00533         }
00534     }
00535 
00536     // _ASSERT(query_index == GetQueries()->GetNumQueries() - 1);
00537 
00538     return retval;
00539 }
00540 
00541 CRemoteBlast::TGSRR::TMasks CRemoteBlast::x_GetMasks(void)
00542 {
00543     TGSRR::TMasks rv;
00544     
00545     TGSRR * gsrr = x_GetGSRR();
00546     
00547     if (gsrr && gsrr->CanGetMasks()) {
00548         rv = gsrr->SetMasks();
00549     }
00550 
00551     return rv;
00552 }
00553 
00554 list< CRef<objects::CBlast4_ka_block > > CRemoteBlast::GetKABlocks(void)
00555 { 
00556     list< CRef<CBlast4_ka_block > > rv;
00557         
00558     TGSRR * gsrr = x_GetGSRR();
00559     
00560     if (gsrr && gsrr->CanGetKa_blocks()) {
00561         rv = (gsrr->SetKa_blocks());
00562     }
00563     
00564     return rv;
00565 }
00566 
00567 list< string > CRemoteBlast::GetSearchStats(void)
00568 {
00569     list< string > rv;
00570     
00571     TGSRR * gsrr = x_GetGSRR();
00572     
00573     if (gsrr && gsrr->CanGetSearch_stats()) {
00574         rv = (gsrr->SetSearch_stats());
00575     }
00576     
00577     return rv;
00578 }
00579 
00580 CRef<objects::CPssmWithParameters> CRemoteBlast::GetPSSM(void)
00581 {
00582     CRef<CPssmWithParameters> rv;
00583     
00584     TGSRR * gsrr = x_GetGSRR();
00585     
00586     if (gsrr && gsrr->CanGetPssm()) {
00587         rv = & (gsrr->SetPssm());
00588     }
00589     
00590     return rv;
00591 }
00592 
00593 
00594 // Internal CRemoteBlast methods
00595 
00596 CRemoteBlast::EState CRemoteBlast::x_GetState(void)
00597 {
00598     // CBlast4Option states:
00599     
00600     // 0. start  (no rid, no errors)
00601     // 1. failed (errors)
00602     // 2. wait   (has rid, no errors, still pending)
00603     // 3. done   (has rid, no errors, not pending)
00604     
00605     EState rv = eDone;
00606     
00607     if (! m_Errs.empty()) {
00608         rv = eFailed;
00609     } else if (m_RID.empty()) {
00610         rv = eStart;
00611     } else if (m_Pending) {
00612         rv = eWait;
00613     }
00614     
00615     return rv;
00616 }
00617 
00618 CRef<objects::CBlast4_request_body>
00619 CRemoteBlast::x_GetBlast4SearchRequestBody()
00620 {
00621     CRef<CBlast4_request_body> retval;
00622 
00623     if (m_QSR.Empty()) {
00624         m_Errs.push_back("No request exists and no RID was specified.");
00625         return retval;
00626     }
00627     
00628     x_SetAlgoOpts();
00629     x_QueryMaskingLocationsToNetwork();
00630     
00631     retval.Reset(new CBlast4_request_body);
00632     retval->SetQueue_search(*m_QSR);
00633     return retval;
00634 }
00635 
00636 void CRemoteBlast::x_SubmitSearch(void)
00637 {
00638     CRef<CBlast4_request_body> body(x_GetBlast4SearchRequestBody());
00639     CRef<CBlast4_reply> reply;
00640     
00641     try {
00642         reply = x_SendRequest(body);
00643     }
00644     catch(const CEofException&) {
00645         m_Errs.push_back("No response from server, cannot complete request.");
00646         return;
00647     }
00648     
00649     if (reply->CanGetBody()  &&
00650         reply->GetBody().GetQueue_search().CanGetRequest_id()) {
00651         
00652         m_RID = reply->GetBody().GetQueue_search().GetRequest_id();
00653     }
00654     
00655     x_SearchErrors(reply);
00656     
00657     if (m_Errs.empty()) {
00658         m_Pending = true;
00659     }
00660 }
00661 
00662 void CRemoteBlast::x_CheckResults(void)
00663 {
00664     if (! m_Errs.empty()) {
00665         m_Pending = false;
00666     }
00667     
00668     if (! m_Pending) {
00669         return;
00670     }
00671     
00672     CRef<CBlast4_reply> r;
00673     
00674     bool try_again = true;
00675     
00676     while(try_again) {
00677         try {
00678             r = x_GetSearchResults();
00679             m_Pending = s_SearchPending(r);
00680             try_again = false;
00681         }
00682         catch(const CEofException&) {
00683             --m_ErrIgn;
00684             
00685             if (m_ErrIgn == 0) {
00686                 m_Errs.push_back("No response from server, "
00687                                  "cannot complete request.");
00688                 return;
00689             }
00690             
00691             SleepSec(10);
00692         }
00693     }
00694     
00695     if (! m_Pending) {
00696         x_SearchErrors(r);
00697         
00698         if (! m_Errs.empty()) {
00699             return;
00700         } else if (r->CanGetBody() && r->GetBody().IsGet_search_results()) {
00701             m_Reply = r;
00702         } else {
00703             m_Errs.push_back("Results were not a get-search-results reply");
00704         }
00705     }
00706 }
00707 
00708 // The input here is a hint as to whether the request might be ready.
00709 // If the flag is true, then we are polling immediately after
00710 // submission.  In this case, the results will not be ready, and so we
00711 // skip the first results check to reduce net traffic.  If the flag is
00712 // false, then the user is using the asynchronous interface, and we do
00713 // not know how long it has been since the request was submitted.  In
00714 // this case, we check the results before sleeping.
00715 //
00716 // If this was always set to 'true' then async mode would -always-
00717 // sleep.  This is undesireable in the case where (for example) 100
00718 // requests are batched together - the mandatory sleeps would add to a
00719 // total of 1000 seconds, more than a quarter hour.
00720 //
00721 // If it were always specified as 'false', then synchronous mode would
00722 // shoot off an immediate 'check results' as soon as the "submit"
00723 // returned, which creates unnecessary traffic.
00724 //
00725 // Futher optimizations are no doubt possible.
00726 
00727 void CRemoteBlast::x_PollUntilDone(EImmediacy immed, int timeout)
00728 {
00729     if (eDebug == m_Verbose)
00730         cout << "polling " << 0 << endl;
00731     
00732     // Configuration - internal for now
00733     
00734     double start_sec = 10.0;
00735     double increment = 1.30;
00736     double max_sleep = 300.0;
00737     double max_time  = timeout;
00738     
00739     if (eDebug == m_Verbose)
00740         cout << "polling " << start_sec << "/" << increment << "/" << max_sleep << "/" << max_time << "/" << endl;
00741     
00742     // End config
00743     
00744     double sleep_next = start_sec;
00745     double sleep_totl = 0.0;
00746     
00747     if (eDebug == m_Verbose)
00748         cout << "line " << __LINE__ << " sleep next " << sleep_next << " sleep totl " << sleep_totl << endl;
00749     
00750     if (ePollAsync == immed) {
00751         if( m_use_disk_cache ) x_CheckResultsDC(); else x_CheckResults();
00752     }
00753     
00754     while (m_Pending && (sleep_totl < max_time)) {
00755         if (eDebug == m_Verbose)
00756             cout << " about to sleep " << sleep_next << endl;
00757         
00758         double max_left = max_time - sleep_totl;
00759         
00760         // Don't oversleep
00761         if (sleep_next > max_left) {
00762             sleep_next = max_left;
00763             
00764             // But never sleep less than 2
00765             if (sleep_next < 2.0)
00766                 sleep_next = 2.0;
00767         }
00768         
00769         SleepSec(int(sleep_next));
00770         sleep_totl += sleep_next;
00771         
00772         if (eDebug == m_Verbose)
00773             cout << " done, total = " << sleep_totl << endl;
00774         
00775         if (sleep_next < max_sleep) {
00776             sleep_next *= increment;
00777             if (sleep_next > max_sleep) {
00778                 sleep_next = max_sleep;
00779             }
00780         }
00781         
00782         if (eDebug == m_Verbose)
00783             cout << " next sleep time = " << sleep_next << endl;
00784         
00785         if( m_use_disk_cache ) x_CheckResultsDC(); else x_CheckResults();
00786     }
00787 }
00788 
00789 void CRemoteBlast::x_Init(CNcbiIstream& f)
00790 {
00791       
00792       // m_Archive.Reset(new CBlast4_archive);
00793       CFormatGuess::EFormat fmt_type = ncbi::CFormatGuess().Format(f);
00794       switch (fmt_type) {
00795         case CFormatGuess::eBinaryASN:
00796             m_ObjectStream.reset(new CObjectIStreamAsnBinary(f));
00797             break;
00798 
00799         case CFormatGuess::eTextASN:
00800             m_ObjectStream.reset(new CObjectIStreamAsn(f));
00801             break;
00802 
00803 /* What's up here?
00804         case CFormatGuess::eXml:
00805             m_ObjectStream.reset(new CObjectIStreamXml(f));
00806             break;
00807 */
00808 
00809          default:
00810             NCBI_THROW(CBlastException, eInvalidArgument,
00811                        "BLAST archive must be one of text ASN.1, binary ASN.1 or XML.");
00812       }     
00813       m_ReadFile = true;
00814       m_ObjectType = fmt_type;
00815       m_ErrIgn     = 5;
00816       m_Verbose    = eSilent;
00817       m_DbFilteringAlgorithmId = -1;
00818 }
00819 
00820 void CRemoteBlast::x_Init(CBlastOptionsHandle * opts)
00821 {
00822     string p;
00823     string s;
00824     opts->GetOptions().GetRemoteProgramAndService_Blast3(p, s);
00825     
00826     x_Init(opts, p, s);
00827 }
00828 
00829 void CRemoteBlast::x_Init(CBlastOptionsHandle * opts_handle,
00830                           const string        & program,
00831                           const string        & service)
00832 {
00833     if ((! opts_handle) || program.empty() || service.empty()) {
00834         if (! opts_handle) {
00835             NCBI_THROW(CBlastException, eInvalidArgument,
00836                        "NULL argument specified: options handle");
00837         }
00838         if (program.empty()) {
00839             NCBI_THROW(CBlastException, eInvalidArgument,
00840                        "NULL argument specified: program");
00841         }
00842         NCBI_THROW(CBlastException, eInvalidArgument,
00843                    "NULL argument specified: service");
00844     }
00845     
00846     m_CBOH.Reset( opts_handle );
00847     m_ErrIgn     = 5;
00848     m_Pending    = false;
00849     m_Verbose    = eSilent;
00850     m_NeedConfig = eNeedAll;
00851     m_QueryMaskingLocations.clear();
00852     m_ReadFile = false;
00853     m_DbFilteringAlgorithmId = -1;
00854     
00855     m_QSR.Reset(new CBlast4_queue_search_request);
00856     
00857     m_QSR->SetProgram(m_Program = program);
00858     m_QSR->SetService(m_Service = service);
00859     
00860     m_NeedConfig = ENeedConfig(m_NeedConfig & ~(eProgram | eService));
00861     
00862     if (! (opts_handle && opts_handle->SetOptions().GetBlast4AlgoOpts())) {
00863         // This happens if you do not specify eRemote for the
00864         // CBlastOptions subclass constructor.
00865         
00866         NCBI_THROW(CBlastException, eInvalidArgument,
00867                    "CRemoteBlast: No remote API options.");
00868     }
00869     m_ClientId = kEmptyStr;
00870 }
00871 
00872 void CRemoteBlast::x_Init(const string & RID)
00873 {
00874     if (RID.empty()) {
00875         NCBI_THROW(CBlastException, eInvalidArgument,
00876                    "Empty RID string specified");
00877     }
00878     
00879     m_RID        = RID;
00880     m_ErrIgn     = 5;
00881     m_Pending    = true;
00882     m_Verbose    = eSilent;
00883     m_NeedConfig = eNoConfig;
00884     m_QueryMaskingLocations.clear();
00885     m_ReadFile = false;
00886     m_DbFilteringAlgorithmId = -1;
00887 }
00888 
00889 void CRemoteBlast::x_SetAlgoOpts(void)
00890 {
00891     CBlast4_parameters * algo_opts =
00892         m_CBOH->SetOptions().GetBlast4AlgoOpts();
00893     
00894     m_QSR->SetAlgorithm_options().Set() = *algo_opts;
00895 }
00896 
00897 // the "int" version is not actually used (no program options need it.)
00898 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field,
00899                                  const int * x)
00900 {
00901     CRef<CBlast4_value> v(new CBlast4_value);
00902     v->SetInteger(*x);
00903     
00904     CRef<CBlast4_parameter> p(new CBlast4_parameter);
00905     p->SetName(field.GetName());
00906     p->SetValue(*v);
00907     _ASSERT(field.Match(*p));
00908     
00909     m_QSR->SetProgram_options().Set().push_back(p);
00910 }
00911 
00912 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field,
00913                                  CRef<objects::CBlast4_mask> mask)
00914 {
00915     CRef<CBlast4_value> v(new CBlast4_value);
00916     v->SetQuery_mask(*mask);
00917         
00918     CRef<CBlast4_parameter> p(new CBlast4_parameter);
00919     // as dictated by internal/blast/interfaces/blast4/params.hpp
00920     p->SetName(field.GetName());
00921     p->SetValue(*v);
00922     _ASSERT(field.Match(*p));
00923     
00924     m_QSR->SetProgram_options().Set().push_back(p);
00925 }
00926 
00927 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field,
00928                                  const list<int> * x)
00929 {
00930     CRef<CBlast4_value> v(new CBlast4_value);
00931     v->SetInteger_list() = *x;
00932         
00933     CRef<CBlast4_parameter> p(new CBlast4_parameter);
00934     p->SetName(field.GetName());
00935     p->SetValue(*v);
00936     _ASSERT(field.Match(*p));
00937     
00938     m_QSR->SetProgram_options().Set().push_back(p);
00939 }
00940 
00941 void CRemoteBlast::x_SetOneParam(objects::CBlast4Field & field,
00942                                  const char ** x)
00943 {
00944     CRef<CBlast4_value> v(new CBlast4_value);
00945     v->SetString().assign((x && (*x)) ? (*x) : "");
00946         
00947     CRef<CBlast4_parameter> p(new CBlast4_parameter);
00948     p->SetName(field.GetName());
00949     p->SetValue(*v);
00950     _ASSERT(field.Match(*p));
00951         
00952     m_QSR->SetProgram_options().Set().push_back(p);
00953 }
00954 
00955 void CRemoteBlast::SetQueries(CRef<objects::CBioseq_set> bioseqs)
00956 {
00957     if (bioseqs.Empty()) {
00958         NCBI_THROW(CBlastException, eInvalidArgument,
00959                    "Empty reference for query.");
00960     }
00961     
00962     m_Queries.Reset(new CBlast4_queries);
00963     m_Queries->SetBioseq_set(*bioseqs);
00964     
00965     m_QSR->SetQueries(*m_Queries);
00966     m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eQueries));
00967 }
00968 
00969 void CRemoteBlast::SetQueries(CRef<objects::CBioseq_set> bioseqs,
00970                               const TSeqLocInfoVector& masking_locations)
00971 {
00972     SetQueries(bioseqs);
00973     x_SetMaskingLocationsForQueries(masking_locations);
00974 }
00975 
00976 void CRemoteBlast::SetQueryMasks(const TSeqLocInfoVector& masking_locations)
00977 {
00978     if (!m_QSR->IsSetQueries())
00979     {
00980         NCBI_THROW(CBlastException, eInvalidArgument,
00981                    "Queries must be set before setting the masks.");
00982     }
00983     x_SetMaskingLocationsForQueries(masking_locations);
00984 }
00985 
00986 void CRemoteBlast::SetQueries(CRemoteBlast::TSeqLocList& seqlocs)
00987 {
00988     if (seqlocs.empty()) {
00989         NCBI_THROW(CBlastException, eInvalidArgument,
00990                    "Empty list for query.");
00991     }
00992     
00993     m_Queries.Reset(new CBlast4_queries);
00994     m_Queries->SetSeq_loc_list() = seqlocs;
00995     
00996     m_QSR->SetQueries(*m_Queries);
00997     m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eQueries));
00998 }
00999 
01000 void CRemoteBlast::SetQueries(CRemoteBlast::TSeqLocList& seqlocs,
01001                               const TSeqLocInfoVector& masking_locations)
01002 {
01003     SetQueries(seqlocs);
01004     x_SetMaskingLocationsForQueries(masking_locations);
01005 }
01006 
01007 void 
01008 CRemoteBlast::x_SetMaskingLocationsForQueries(const TSeqLocInfoVector&
01009                                               masking_locations)
01010 {
01011     _ASSERT(m_QSR->CanGetQueries());
01012     if (masking_locations.empty()) {
01013         return;
01014     }
01015 
01016     if (m_QSR->GetQueries().GetNumQueries() != masking_locations.size()) {
01017         CNcbiOstrstream oss;
01018         oss << "Mismatched number of queries (" 
01019              << m_QSR->GetQueries().GetNumQueries() 
01020              << ") and masking locations (" << masking_locations.size() << ")";
01021         NCBI_THROW(CBlastException, eInvalidArgument,
01022                    CNcbiOstrstreamToString(oss));
01023     }
01024 
01025     m_QueryMaskingLocations = const_cast<TSeqLocInfoVector&>(masking_locations);
01026 }
01027 
01028 /** Creates a Blast4-mask which is supposed to contain all masked locations for
01029  * a given query sequence and frame, all of which are in the packed_int
01030  * argument.
01031  */
01032 static CRef<CBlast4_mask> 
01033 s_CreateBlastMask(const CPacked_seqint& packed_int, EBlastProgramType program)
01034 {
01035     CRef<CBlast4_mask> retval(new CBlast4_mask);
01036 
01037     CRef<CSeq_loc> seqloc(new CSeq_loc);
01038     ITERATE(CPacked_seqint::Tdata, masked_region, packed_int.Get()) {
01039         CRef<CSeq_interval> seqint
01040             (new CSeq_interval(const_cast<CSeq_id&>((*masked_region)->GetId()), 
01041                           (*masked_region)->GetFrom(), 
01042                           (*masked_region)->GetTo()));
01043         if ((*masked_region)->CanGetStrand() && 
01044             (*masked_region)->GetStrand() == eNa_strand_minus) {
01045             // skip this as locations on the negative strand are not
01046             // represented in the remote masking locations
01047             continue;   
01048         }
01049         seqloc->SetPacked_int().Set().push_back(seqint);
01050     }
01051     retval->SetLocations().push_back(seqloc);
01052 
01053     /// The frame can only be notset for protein queries or plus1 for
01054     /// nucleotide queries
01055     EBlast4_frame_type frame =
01056         (Blast_QueryIsNucleotide(program) || Blast_QueryIsTranslated(program))
01057         ? eBlast4_frame_type_plus1
01058         : eBlast4_frame_type_notset;
01059     retval->SetFrame(frame);
01060 
01061     return retval;
01062 }
01063 
01064 CBlast4_get_search_results_reply::TMasks
01065 CRemoteBlast::ConvertToRemoteMasks(const TSeqLocInfoVector& masking_locations,
01066                                    EBlastProgramType program,
01067                                    vector<string>* warnings /* = NULL */)
01068 {
01069     CBlast4_get_search_results_reply::TMasks retval;
01070 
01071     ITERATE(TSeqLocInfoVector, query_masks, masking_locations) {
01072         CRef<CPacked_seqint> packed_seqint(new CPacked_seqint);
01073 
01074         if (query_masks->empty()) {
01075             continue;
01076         }
01077 
01078         int current_frame = query_masks->front()->GetFrame();
01079         ITERATE(TMaskedQueryRegions, mask_locs, *query_masks) {
01080               if  (Blast_QueryIsTranslated(program) && current_frame != (*mask_locs)->GetFrame())
01081               {
01082                   if (!packed_seqint.Empty())
01083                   {
01084                      CRef<CBlast4_mask> network_mask = s_CreateBlastMask(*packed_seqint, program);
01085                      network_mask->SetFrame(FrameNumber2NetworkFrame(current_frame, program));
01086                      retval.push_back(network_mask);
01087                   }
01088                   current_frame = (*mask_locs)->GetFrame();
01089                   packed_seqint.Reset(new CPacked_seqint);
01090               }
01091 
01092               packed_seqint->AddInterval((*mask_locs)->GetSeqId(),
01093                              (*mask_locs)->GetInterval().GetFrom(),
01094                              (*mask_locs)->GetInterval().GetTo());
01095         } 
01096 
01097         if (!packed_seqint.Empty()) 
01098         {
01099              CRef<CBlast4_mask> network_mask = s_CreateBlastMask(*packed_seqint, program);
01100              if (Blast_QueryIsTranslated(program))
01101                   network_mask->SetFrame(FrameNumber2NetworkFrame(current_frame, program));
01102              retval.push_back(network_mask);
01103         }
01104         packed_seqint.Reset();
01105     }
01106     return retval;
01107 }
01108 // Puts in each Blast4-mask all the masks that correspond to the same query 
01109 // and the same frame.
01110 void
01111 CRemoteBlast::x_QueryMaskingLocationsToNetwork()
01112 {
01113     if (m_QueryMaskingLocations.empty()) {
01114         return;
01115     }
01116 
01117     m_CBOH->GetOptions().GetRemoteProgramAndService_Blast3(m_Program, 
01118                                                            m_Service);
01119     EBlastProgramType program = NetworkProgram2BlastProgramType(m_Program,
01120                                                                 m_Service);
01121 
01122     const CBlast4_get_search_results_reply::TMasks& network_masks = 
01123         CRemoteBlast::ConvertToRemoteMasks(m_QueryMaskingLocations,
01124                                            program, &m_Warn);
01125     ITERATE(CBlast4_get_search_results_reply::TMasks, itr, network_masks) {
01126         x_SetOneParam(B4Param_LCaseMask, *itr);
01127     }
01128 
01129 }
01130 
01131 void CRemoteBlast::SetQueries(CRef<objects::CPssmWithParameters> pssm)
01132 {
01133     if (pssm.Empty()) {
01134         NCBI_THROW(CBlastException, eInvalidArgument,
01135                    "Empty reference for query pssm.");
01136     }
01137     
01138     CPsiBlastValidate::Pssm(*pssm);
01139     
01140     string psi_program("blastp");
01141     string old_service("plain");
01142     string new_service("psi");
01143     string delta_service("delta_blast");
01144     
01145     if (m_QSR->GetProgram() != psi_program) {
01146         NCBI_THROW(CBlastException, eNotSupported,
01147                    "PSI-Blast is only supported for blastp.");
01148     }
01149     
01150     if (m_QSR->GetService().empty()) {
01151         NCBI_THROW(CBlastException, eInvalidArgument,
01152                    "Internal error: service is not set.");
01153     }
01154     
01155     if ((m_QSR->GetService() != old_service) &&
01156         (m_QSR->GetService() != new_service) &&
01157         (m_QSR->GetService() != delta_service)) {
01158         
01159         // Allowing "psi" allows the matrix to be set, then replaced.
01160         
01161         NCBI_THROW(CBlastException, eInvalidArgument,
01162                    string("PSI-Blast cannot also be ") +
01163                    m_QSR->GetService() + ".");
01164     }
01165     
01166     CRef<CBlast4_queries> queries_p(new CBlast4_queries);
01167     queries_p->SetPssm(*pssm);
01168     
01169     m_QSR->SetQueries(*queries_p);
01170     m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eQueries));
01171 
01172     if(m_QSR->GetService() != delta_service) {
01173         m_QSR->SetService(new_service);
01174     }
01175 }
01176 
01177 string CRemoteBlast::GetErrors(void)
01178 {
01179     if (m_Errs.empty()) {
01180         return string();
01181     }
01182     
01183     string rvalue = m_Errs[0];
01184     
01185     for(unsigned i = 1; i<m_Errs.size(); i++) {
01186         rvalue += "\n";
01187         rvalue += m_Errs[i];
01188     }
01189     
01190     return rvalue;
01191 }
01192 
01193 string CRemoteBlast::GetWarnings(void)
01194 {
01195     if (m_Warn.empty()) {
01196         return string();
01197     }
01198     
01199     string rvalue = m_Warn[0];
01200     
01201     for(unsigned i = 1; i<m_Warn.size(); i++) {
01202         rvalue += "\n";
01203         rvalue += m_Warn[i];
01204     }
01205     
01206     return rvalue;
01207 }
01208 
01209 const vector<string> & CRemoteBlast::GetWarningVector()
01210 {
01211     return m_Warn;
01212 }
01213 
01214 const vector<string> & CRemoteBlast::GetErrorVector()
01215 {
01216     return m_Errs;
01217 }
01218 
01219 CRemoteBlast::CRemoteBlast(CNcbiIstream&  f)
01220 {
01221     x_Init(f);
01222     x_InitDiskCache();
01223 }
01224 
01225 CRemoteBlast::CRemoteBlast(const string & RID)
01226 {
01227     x_Init(RID);
01228     x_InitDiskCache();
01229 }
01230 
01231 CRemoteBlast::CRemoteBlast(CBlastOptionsHandle * algo_opts)
01232 {
01233     x_Init(algo_opts);
01234     x_InitDiskCache();
01235 }
01236 
01237 CRemoteBlast::CRemoteBlast(CRef<IQueryFactory>         queries,
01238                            CRef<CBlastOptionsHandle>   opts_handle,
01239                            const CSearchDatabase     & db)
01240 {
01241     x_Init(opts_handle, db);
01242     x_InitQueries(queries);
01243     x_InitDiskCache();
01244 }
01245 
01246 void
01247 FlattenBioseqSet(const CBioseq_set & bss, list< CRef<CBioseq> > & seqs)
01248 {
01249     if (bss.CanGetSeq_set()) {
01250         ITERATE(CBioseq_set::TSeq_set, iter, bss.GetSeq_set()) {
01251             if (iter->NotEmpty()) {
01252                 const CSeq_entry & entry = **iter;
01253                 
01254                 if (entry.IsSeq()) {
01255                     CBioseq & bs = const_cast<CBioseq &>(entry.GetSeq());
01256                     seqs.push_back(CRef<CBioseq>(& bs));
01257                 } else {
01258                     _ASSERT(entry.IsSet());
01259                     FlattenBioseqSet(entry.GetSet(), seqs);
01260                 }
01261             }
01262         }
01263     }
01264 }
01265 
01266 CRemoteBlast::CRemoteBlast(CRef<IQueryFactory>       queries,
01267                            CRef<CBlastOptionsHandle> opts_handle,
01268                            CRef<IQueryFactory>       subjects)
01269 {
01270     x_Init(&* opts_handle);
01271     x_InitQueries(queries);
01272     SetSubjectSequences(subjects);
01273     x_InitDiskCache();
01274 }
01275 
01276 void CRemoteBlast::x_InitQueries(CRef<IQueryFactory> queries)
01277 {
01278     if (queries.Empty()) {
01279         NCBI_THROW(CBlastException,
01280                    eInvalidArgument,
01281                    "Error: No queries specified");
01282     }
01283     
01284     CRef<IRemoteQueryData> Q(queries->MakeRemoteQueryData());
01285     CRef<CBioseq_set> bss = Q->GetBioseqSet();
01286     IRemoteQueryData::TSeqLocs sll = Q->GetSeqLocs();
01287 
01288     if (bss.Empty() && sll.empty()) {
01289         NCBI_THROW(CBlastException,
01290                    eInvalidArgument,
01291                    "Error: No query data.");
01292     }
01293 
01294     // Check if there are any range restrictions applied and if local IDs are
01295     // being used to determine how to specify the query sequence(s)
01296     
01297     bool has_local_ids = false;
01298     
01299     if ( !sll.empty() ) {
01300         // Only one range restriction can be sent in this protocol
01301         if (sll.front()->IsInt()) {
01302             const int kStart((int)sll.front()->GetStart(eExtreme_Positional));
01303             const int kStop((int)sll.front()->GetStop(eExtreme_Positional));
01304             const int kRangeLength = kStop - kStart + 1;
01305 
01306             _ASSERT(bss->CanGetSeq_set());
01307             _ASSERT( !bss->GetSeq_set().empty() );
01308             _ASSERT(bss->GetSeq_set().front()->IsSeq());
01309             _ASSERT(bss->GetSeq_set().front()->GetSeq().CanGetInst());
01310             const int kFullLength =
01311                 bss->GetSeq_set().front()->GetSeq().GetInst().GetLength();
01312 
01313             if (kFullLength != kRangeLength) {
01314                 x_SetOneParam(B4Param_RequiredStart, &kStart);
01315                 x_SetOneParam(B4Param_RequiredEnd, &kStop);
01316             }
01317         }
01318     
01319         ITERATE(IRemoteQueryData::TSeqLocs, itr, sll) {
01320             if (IsLocalId((*itr)->GetId())) {
01321                 has_local_ids = true;
01322                 break;
01323             }
01324         }
01325     } 
01326 
01327     TSeqLocInfoVector user_specified_masks;
01328     x_ExtractUserSpecifiedMasks(queries, user_specified_masks);
01329     
01330     if (has_local_ids) {
01331         SetQueries(bss, user_specified_masks);
01332     } else {
01333         SetQueries(sll, user_specified_masks);
01334     }
01335 }
01336 
01337 void
01338 CRemoteBlast::x_ExtractUserSpecifiedMasks(CRef<IQueryFactory> query_factory,
01339                                           TSeqLocInfoVector& masks)
01340 {
01341     masks.clear();
01342     CObjMgr_QueryFactory* objmgrqf = NULL;
01343     if ( (objmgrqf = dynamic_cast<CObjMgr_QueryFactory*>(&*query_factory))) {
01344         masks = objmgrqf->ExtractUserSpecifiedMasks();
01345     }
01346 }
01347 
01348 CRemoteBlast::CRemoteBlast(CRef<objects::CPssmWithParameters>   pssm,
01349                            CRef<CBlastOptionsHandle>            opts_handle,
01350                            const CSearchDatabase              & db)
01351 {
01352     if (pssm.Empty()) {
01353         NCBI_THROW(CBlastException,
01354                    eInvalidArgument,
01355                    "Error: No PSSM specified");
01356     }
01357     
01358     x_Init(opts_handle, db);
01359     
01360     SetQueries(pssm);
01361 }
01362 
01363 void CRemoteBlast::x_Init(CRef<CBlastOptionsHandle>   opts_handle,
01364                           const CSearchDatabase     & db)
01365 {
01366     if (opts_handle.Empty()) {
01367         NCBI_THROW(CBlastException,
01368                    eInvalidArgument,
01369                    "Error: No options specified");
01370     }
01371     
01372     if (db.GetDatabaseName().empty()) {
01373         NCBI_THROW(CBlastException,
01374                    eInvalidArgument,
01375                    "Error: No database specified");
01376     }
01377     
01378     x_Init(&* opts_handle);
01379     
01380     SetDatabase(db.GetDatabaseName());
01381     SetEntrezQuery(db.GetEntrezQueryLimitation().c_str());
01382     // Set the GI list restriction
01383     {{
01384         const CSearchDatabase::TGiList& tmplist = db.GetGiListLimitation();
01385         if ( !tmplist.empty() ) {
01386             list<Int4> gilist;
01387             copy(tmplist.begin(), tmplist.end(), back_inserter(gilist));
01388             SetGIList(gilist);
01389         }
01390     }}
01391 
01392     // Set the negative GI list
01393     {{
01394         const CSearchDatabase::TGiList& tmplist = 
01395             db.GetNegativeGiListLimitation();
01396         if ( !tmplist.empty() ) {
01397             list<Int4> gilist;
01398             copy(tmplist.begin(), tmplist.end(), back_inserter(gilist));
01399             SetNegativeGIList(gilist);
01400         }
01401     }}
01402 
01403     // Set the filtering algorithms
01404     SetDbFilteringAlgorithmId(db.GetFilteringAlgorithm());
01405 }
01406 // initialize disk cache support variables
01407 void CRemoteBlast::x_InitDiskCache(void)
01408 {
01409     m_use_disk_cache = false;
01410     m_disk_cache_error_flag = false;
01411     m_disk_cache_error_msg.clear();
01412     CNcbiEnvironment env;
01413     if( env.Get("BLAST4_DISK_CACHE") != kEmptyStr )
01414     {
01415         string l_disk_cache_flag = env.Get("BLAST4_DISK_CACHE");
01416         if( !NStr::CompareNocase(l_disk_cache_flag,"ON") )
01417         {
01418             m_use_disk_cache = true;
01419             LOG_POST(Info << "CRemoteBlast: DISK CACHE IS ON" );
01420         }
01421         else{
01422             LOG_POST(Info << "CRemoteBlast: DISK CACHE IS OFF; KEY: "<<l_disk_cache_flag );
01423         }
01424     }
01425     else{
01426             LOG_POST(Info << "CRemoteBlast: DISK CACHE IS OFF; NO ENVIRONMENT SETTINGS FOUND");
01427     }
01428 }
01429 
01430 CRemoteBlast::~CRemoteBlast()
01431 {
01432 }
01433 
01434 void CRemoteBlast::SetGIList(const list<Int4> & gi_list)
01435 {
01436     if (gi_list.empty()) {
01437         return;
01438     } else {
01439         NCBI_THROW(CBlastException, eNotSupported, 
01440            "Submitting gi lists remotely is currently not supported");
01441     }
01442     x_SetOneParam(B4Param_GiList, & gi_list);
01443     
01444     m_GiList.clear();
01445     copy(gi_list.begin(), gi_list.end(), back_inserter(m_GiList));
01446 }
01447 
01448 void CRemoteBlast::SetDbFilteringAlgorithmId(int algo_id)
01449 {
01450     if (algo_id == -1) 
01451         return;
01452 
01453     x_SetOneParam(B4Param_DbFilteringAlgorithmId, &algo_id);
01454     m_DbFilteringAlgorithmId = algo_id;
01455 }
01456 
01457 void CRemoteBlast::SetNegativeGIList(const list<Int4> & gi_list)
01458 {
01459     if (gi_list.empty()) {
01460         return;
01461     } else {
01462         NCBI_THROW(CBlastException, eNotSupported, 
01463            "Submitting negative gi lists remotely is currently not supported");
01464     }
01465     x_SetOneParam(B4Param_NegativeGiList, & gi_list);
01466     
01467     m_NegativeGiList.clear();
01468     copy(gi_list.begin(), gi_list.end(), back_inserter(m_NegativeGiList));
01469 }
01470 
01471 void CRemoteBlast::x_SetDatabase(const string & x)
01472 {
01473    EBlast4_residue_type rtype(eBlast4_residue_type_unknown);
01474 
01475     if (m_Program == "blastp" ||
01476         m_Program == "blastx" ||
01477         (m_Program == "tblastn" && m_Service == "rpsblast")) {
01478 
01479         rtype = eBlast4_residue_type_protein;
01480     } else {
01481         rtype = eBlast4_residue_type_nucleotide;
01482     }
01483 
01484     m_Dbs.Reset(new CBlast4_database);
01485     m_Dbs->SetName(x);
01486     m_Dbs->SetType(rtype);
01487 
01488     m_SubjectSequences.clear();
01489 }
01490 
01491 void CRemoteBlast::SetDatabase(const string & x)
01492 {
01493     if (x.empty()) {
01494         NCBI_THROW(CBlastException, eInvalidArgument,
01495                    "NULL specified for database.");
01496     }
01497         
01498     CRef<CBlast4_subject> subject_p(new CBlast4_subject);
01499     subject_p->SetDatabase(x);
01500     m_QSR->SetSubject(*subject_p);
01501     m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eSubject));
01502 
01503     x_SetDatabase(x);
01504 }
01505 
01506 void CRemoteBlast::SetSubjectSequences(CRef<IQueryFactory> subjects)
01507 {
01508     CRef<IRemoteQueryData> Q(subjects->MakeRemoteQueryData());
01509     CRef<CBioseq_set> bss = Q->GetBioseqSet();
01510     
01511     if (bss.Empty()) {
01512         NCBI_THROW(CBlastException,
01513                    eInvalidArgument,
01514                    "Error: No query data.");
01515     }
01516     
01517     list< CRef<CBioseq> > seqs;
01518     FlattenBioseqSet(*bss, seqs);
01519     
01520     SetSubjectSequences(seqs);
01521 }
01522 
01523 void 
01524 CRemoteBlast::SetSubjectSequences(const list< CRef< objects::CBioseq > > & subj)
01525 {
01526     CRef<CBlast4_subject> subject_p(new CBlast4_subject);
01527     subject_p->SetSequences() = subj;
01528     
01529     m_QSR->SetSubject(*subject_p);
01530     m_NeedConfig = ENeedConfig(m_NeedConfig & (~ eSubject));
01531 
01532     x_SetSubjectSequences(subj);
01533 }
01534 
01535 void
01536 CRemoteBlast::x_SetSubjectSequences(const list< CRef< objects::CBioseq > > & subj)
01537 {   
01538     m_SubjectSequences = subj;
01539     m_Dbs.Reset();
01540 }
01541 
01542 void CRemoteBlast::SetEntrezQuery(const char * x)
01543 {
01544     if (!x) {
01545         NCBI_THROW(CBlastException, eInvalidArgument,
01546                    "NULL specified for entrez query.");
01547     }
01548     
01549     if (*x) { // Ignore empty strings.
01550         x_SetOneParam(B4Param_EntrezQuery, &x);
01551         m_EntrezQuery.assign(x);
01552     }
01553 }
01554 
01555 bool CRemoteBlast::SubmitSync(void)
01556 {
01557     return SubmitSync( x_DefaultTimeout() );
01558 }
01559 
01560 const string & CRemoteBlast::GetRID(void)
01561 {
01562     return m_RID;
01563 }
01564 
01565 void CRemoteBlast::SetVerbose(EDebugMode verb)
01566 {
01567     m_Verbose = verb;
01568 }
01569 
01570 /// The default timeout is 3.5 hours.
01571 const int CRemoteBlast::x_DefaultTimeout(void)
01572 {
01573     return int(3600*3.5);
01574 }
01575 
01576 static const string 
01577     kNoRIDSpecified("Cannot fetch query info: No RID was specified.");
01578 
01579 static const string 
01580     kNoArchiveFile("Cannot fetch query info: No archive file.");
01581 
01582 void
01583 CRemoteBlast::x_GetRequestInfo()
01584 {
01585     if(m_ReadFile == true){
01586         x_GetRequestInfoFromFile();
01587     }
01588     else{
01589         x_GetRequestInfoFromRID();
01590     }
01591 }
01592 
01593 bool 
01594 CRemoteBlast::LoadFromArchive()
01595 {
01596       if (m_ObjectStream->EndOfData())
01597          return false;
01598 
01599       m_Archive.Reset(new CBlast4_archive);
01600       *m_ObjectStream >> *m_Archive;
01601       x_GetRequestInfoFromFile(); // update info.
01602 
01603       return true;
01604 }
01605 
01606 
01607 void
01608 CRemoteBlast::x_GetRequestInfoFromFile()
01609 {
01610     // Archive file must be present to fetch.
01611     if (!m_Archive || m_Archive.Empty()) {
01612         NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
01613                    kNoArchiveFile);
01614     }
01615     
01616     if (m_Archive->CanGetRequest())
01617     {
01618         CRef<objects::CBlast4_request> request(&m_Archive->SetRequest());
01619         CImportStrategy strategy(request);
01620         m_Program   = strategy.GetProgram();
01621         m_Service   = strategy.GetService();
01622         m_CreatedBy = strategy.GetCreatedBy();
01623         m_Queries    = strategy.GetQueries();
01624         m_AlgoOpts.Reset( & strategy.GetAlgoOptions() );
01625         m_ProgramOpts.Reset( & strategy.GetProgramOptions() );
01626 
01627         if (strategy.GetSubject()->IsDatabase())
01628             x_SetDatabase(strategy.GetSubject()->GetDatabase());
01629         else
01630             m_SubjectSequences = strategy.GetSubject()->SetSequences();
01631 
01632         if(m_Service == "psi")
01633         {
01634             // Would have errored out in CImportStrategy if we can't get queue search
01635              CBlast4_queue_search_request& qs = request->SetBody().SetQueue_search();
01636              if(qs.CanGetFormat_options())
01637                         m_FormatOpts.Reset(&qs.SetFormat_options());
01638         }
01639         // Ignore return value, want side effect of setting fields.
01640         GetSearchOptions();
01641         return;
01642     }
01643     
01644     NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
01645                "Could not get information from archive file.");
01646 }
01647 
01648 void
01649 CRemoteBlast::x_GetRequestInfoFromRID()
01650 {
01651     // Must have an RID to do this.
01652     
01653     if (m_RID.empty()) {
01654         NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
01655                    kNoRIDSpecified);
01656     }
01657     
01658     // First... poll until done.
01659     
01660     x_PollUntilDone(ePollAsync, x_DefaultTimeout());
01661     
01662     if (x_GetState() != eDone) {
01663         NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
01664                    "Polling terminated, but search is in incomplete state.");
01665     }
01666     
01667     // Build the request
01668     
01669     CRef<CBlast4_request_body> body(new CBlast4_request_body);
01670     CRef<CBlast4_request> request(new CBlast4_request);
01671     if ( !m_ClientId.empty() ) {
01672         request->SetIdent(m_ClientId);
01673     }
01674     
01675     body->SetGet_request_info().SetRequest_id(m_RID);
01676     request->SetBody(*body);
01677     
01678     CRef<CBlast4_reply> reply(new CBlast4_reply);
01679     
01680     if (eDebug == m_Verbose) {
01681         NcbiCout << MSerial_AsnText << *request << endl;
01682     }
01683     
01684     try {
01685         CStopWatch sw(CStopWatch::eStart);
01686         
01687         if (eDebug == m_Verbose) {
01688             NcbiCout << "Starting network transaction (" << sw.Elapsed() << ")" << endl;
01689         }
01690         
01691         // Send request.
01692         CBlast4Client().Ask(*request, *reply);
01693         
01694         if (eDebug == m_Verbose) {
01695             NcbiCout << "Done network transaction (" << sw.Elapsed() << ")" << endl;
01696         }
01697     }
01698     catch(const CEofException&) {
01699         NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
01700                    "No response from server, cannot complete request.");
01701     }
01702 
01703     if (eDebug == m_Verbose) {
01704         NcbiCout << MSerial_AsnText << *reply << endl;
01705     }
01706     
01707     if (reply->CanGetBody()) {
01708         if (reply->GetBody().IsGet_request_info()) {
01709             CRef<CBlast4_get_request_info_reply> grir
01710                 (& reply->SetBody().SetGet_request_info());
01711             
01712             if (grir->GetDatabase().GetName() != "n/a") {
01713                 m_Dbs.Reset( & grir->SetDatabase() );
01714             } else {
01715                 x_GetSubjects();
01716             }
01717             
01718             m_Program   = grir->GetProgram();
01719             m_Service   = grir->GetService();
01720             m_CreatedBy = grir->GetCreated_by();
01721             
01722             m_Queries    .Reset( & grir->SetQueries() );
01723             m_AlgoOpts   .Reset( & grir->SetAlgorithm_options() );
01724             m_ProgramOpts.Reset( & grir->SetProgram_options() );
01725         if( grir->IsSetFormat_options() )
01726                m_FormatOpts.Reset( & grir->SetFormat_options() );
01727             
01728             return;
01729         }
01730     }
01731     
01732     NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
01733                "Could not get information from search.");
01734 }
01735 
01736 
01737 CRef<CBlast4_database>
01738 CRemoteBlast::GetDatabases()
01739 {
01740     if (! m_Dbs.Empty()) {
01741         return m_Dbs;
01742     }
01743     
01744     x_GetRequestInfo();
01745     
01746     return m_Dbs;
01747 }
01748 
01749 bool
01750 CRemoteBlast::IsDbSearch()
01751 {
01752     if (m_Dbs.Empty() && m_SubjectSequences.empty() && m_SubjectSeqLocs.empty())
01753        x_GetRequestInfo();
01754 
01755     if (! m_Dbs.Empty()) {
01756        return true;
01757     }
01758     return false;
01759 }
01760 
01761 list< CRef<objects::CBioseq> > 
01762 CRemoteBlast::GetSubjectSequences()
01763 {
01764     if (x_HasRetrievedSubjects()) {
01765         return m_SubjectSequences;
01766     }
01767     
01768     x_GetRequestInfo();
01769     
01770     return m_SubjectSequences;
01771 }
01772 
01773 CBlast4_subject::TSeq_loc_list
01774 CRemoteBlast::GetSubjectSeqLocs()
01775 {
01776     if (x_HasRetrievedSubjects()) {
01777         return m_SubjectSeqLocs;
01778     }
01779     
01780     x_GetRequestInfo();
01781     
01782     return m_SubjectSeqLocs;
01783 }
01784 
01785 string
01786 CRemoteBlast::GetProgram()
01787 {
01788     if (! m_Program.empty()) {
01789         return m_Program;
01790     }
01791     
01792     x_GetRequestInfo();
01793     
01794     return m_Program;
01795 }
01796 
01797 string
01798 CRemoteBlast::GetService()
01799 {
01800     if (! m_Service.empty()) {
01801         return m_Service;
01802     }
01803     
01804     x_GetRequestInfo();
01805     
01806     return m_Service;
01807 }
01808 
01809 string
01810 CRemoteBlast::GetCreatedBy()
01811 {
01812     if (! m_CreatedBy.empty()) {
01813         return m_CreatedBy;
01814     }
01815     
01816     x_GetRequestInfo();
01817     
01818     return m_CreatedBy;
01819 }
01820 
01821 CRef<CBlast4_queries>
01822 CRemoteBlast::GetQueries()
01823 {
01824     if (! m_Queries.Empty()) {
01825         return m_Queries;
01826     }
01827     
01828     x_GetRequestInfo();
01829     
01830     return m_Queries;
01831 }
01832 
01833 EBlastProgramType
01834 NetworkProgram2BlastProgramType(const string& program, const string& service)
01835 {
01836     _ASSERT(!program.empty());
01837     _ASSERT(!service.empty());
01838 
01839     EBlastProgramType retval = eBlastTypeUndefined;
01840     Int2 rv = BlastProgram2Number(program.c_str(), &retval);
01841     _ASSERT(rv == 0);
01842     rv += 0;    // to eliminate compiler warning
01843     _ASSERT(retval != eBlastTypeUndefined);
01844 
01845     if (service == "rpsblast") {
01846 
01847         if (program == "blastp") {
01848             retval = eBlastTypeRpsBlast;
01849         } else if (program == "tblastn" || program == "blastx") {
01850             retval = eBlastTypeRpsTblastn;
01851         } else {
01852             abort();
01853         }
01854 
01855     } 
01856     
01857     if (service == "psi") {
01858         _ASSERT(program == "blastp");
01859         retval = eBlastTypePsiBlast;
01860     }
01861 
01862     return retval;
01863 }
01864 
01865 
01866 EBlast4_frame_type
01867 FrameNumber2NetworkFrame(int frame, EBlastProgramType program)
01868 {
01869     if (Blast_QueryIsTranslated(program)) {
01870         switch (frame) {
01871         case  1: return eBlast4_frame_type_plus1;
01872         case  2: return eBlast4_frame_type_plus2;
01873         case  3: return eBlast4_frame_type_plus3;
01874         case -1: return eBlast4_frame_type_minus1;
01875         case -2: return eBlast4_frame_type_minus2;
01876         case -3: return eBlast4_frame_type_minus3;
01877         default: abort();
01878         }
01879         _TROUBLE;
01880     }
01881     
01882     if (Blast_QueryIsNucleotide(program)) {
01883         _ASSERT(frame == -1 || frame == 1);
01884         // For some reason, the return value here is not set...
01885         return eBlast4_frame_type_notset;
01886     }
01887     
01888     return eBlast4_frame_type_notset;
01889 }
01890 
01891 CSeqLocInfo::ETranslationFrame
01892 NetworkFrame2FrameNumber(objects::EBlast4_frame_type frame, 
01893                          EBlastProgramType program)
01894 {
01895     if (Blast_QueryIsTranslated(program)) {
01896         switch (frame) {
01897         case eBlast4_frame_type_plus1:  return CSeqLocInfo::eFramePlus1;
01898         case eBlast4_frame_type_plus2:  return CSeqLocInfo::eFramePlus2;
01899         case eBlast4_frame_type_plus3:  return CSeqLocInfo::eFramePlus3;
01900         case eBlast4_frame_type_minus1: return CSeqLocInfo::eFrameMinus1;
01901         case eBlast4_frame_type_minus2: return CSeqLocInfo::eFrameMinus2;
01902         case eBlast4_frame_type_minus3: return CSeqLocInfo::eFrameMinus3;
01903         default: abort();
01904         }
01905         _TROUBLE;
01906     }
01907     
01908     // The BLAST formatter expects nucleotide masks to have a 'not-set' strand,
01909     // which implies that they're on the plus strand. If they're set to
01910     // anything else, it won't display them.
01911     //if (Blast_QueryIsNucleotide(program)) {
01912     //    _ASSERT(frame == eBlast4_frame_type_plus1);
01913     //    return CSeqLocInfo::eFramePlus1;
01914     //}
01915     
01916     return CSeqLocInfo::eFrameNotSet;
01917 }
01918 
01919 CRef<CBlastOptionsHandle> CRemoteBlast::GetSearchOptions()
01920 {
01921     if (m_CBOH.Empty()) {
01922         string program_s = GetProgram();
01923         string service_s = GetService();
01924         
01925         CBlastOptionsBuilder bob(program_s, service_s, CBlastOptions::eRemote );
01926         
01927         m_CBOH = bob.GetSearchOptions(m_AlgoOpts, m_ProgramOpts, &m_Task);
01928         
01929         if (bob.HaveEntrezQuery()) {
01930             m_EntrezQuery = bob.GetEntrezQuery();
01931         }
01932         
01933         if (bob.HaveFirstDbSeq()) {
01934             m_FirstDbSeq = bob.GetFirstDbSeq();
01935         }
01936         
01937         if (bob.HaveFinalDbSeq()) {
01938             m_FinalDbSeq = bob.GetFinalDbSeq();
01939         }
01940         
01941         if (bob.HaveGiList()) {
01942             m_GiList = bob.GetGiList();
01943         }
01944 
01945         if (bob.HasDbFilteringAlgorithmId() &&
01946             bob.GetDbFilteringAlgorithmId() != -1) {
01947             m_DbFilteringAlgorithmId = bob.GetDbFilteringAlgorithmId();
01948         }
01949 
01950         if (bob.HaveNegativeGiList()) {
01951             m_NegativeGiList = bob.GetNegativeGiList();
01952         }
01953     }
01954     
01955     return m_CBOH;
01956 }
01957 
01958 /// Extract the query IDs from a CBioseq_set
01959 /// @param bss CBioseq_set object used as source [in]
01960 /// @param query_ids where the query_ids will be added [in|out]
01961 static void s_ExtractQueryIdsFromBioseqSet(const CBioseq_set& bss,
01962                                            CSearchResultSet::TQueryIdVector&
01963                                            query_ids)
01964 {
01965     // sacrifice speed for protection against infinite loops
01966     CTypeConstIterator<objects::CBioseq> itr(ConstBegin(bss, eDetectLoops)); 
01967     for (; itr; ++itr) {
01968         query_ids.push_back(FindBestChoice(itr->GetId(), CSeq_id::BestRank));
01969     }
01970 }
01971 
01972 void 
01973 CRemoteBlast::x_ExtractQueryIds(CSearchResultSet::TQueryIdVector& query_ids)
01974 {
01975     query_ids.clear();
01976     CRef<CBlast4_queries> queries = GetQueries();
01977     query_ids.reserve(queries->GetNumQueries());
01978     _ASSERT(queries);
01979 
01980     if (queries->IsPssm()) {
01981         const CSeq_entry& seq_entry = queries->GetPssm().GetQuery();
01982         if (seq_entry.IsSeq()) {
01983             query_ids.push_back(FindBestChoice(seq_entry.GetSeq().GetId(), 
01984                                                CSeq_id::BestRank));
01985         } else {
01986             _ASSERT(seq_entry.IsSet());
01987             s_ExtractQueryIdsFromBioseqSet(seq_entry.GetSet(), query_ids);
01988         }
01989     } else if (queries->IsSeq_loc_list()) {
01990         query_ids.reserve(queries->GetSeq_loc_list().size());
01991         ITERATE(CBlast4_queries::TSeq_loc_list, i, queries->GetSeq_loc_list()) {
01992             CConstRef<CSeq_id> id((*i)->GetId());
01993             query_ids.push_back(id);
01994         }
01995     } else {
01996         _ASSERT(queries->IsBioseq_set());
01997         s_ExtractQueryIdsFromBioseqSet(queries->GetBioseq_set(), query_ids);
01998     }
01999 }
02000 
02001 /// Submit the search and return the results.
02002 /// @return Search results.
02003 CRef<CSearchResultSet> CRemoteBlast::GetResultSet()
02004 {
02005     CRef<CSearchResultSet> retval;
02006     if (m_ReadFile == false)
02007        SubmitSync();
02008     
02009     TSeqAlignVector alignments = GetSeqAlignSets();
02010     
02011     /* Process errors and warnings */
02012     TSearchMessages search_messages;
02013     {
02014         const vector<string> & W = GetWarningVector();
02015         const vector<string> & E = GetErrorVector();
02016         
02017         TQueryMessages query_messages;
02018         
02019         // Represents the context of the error, not the error id.
02020         int err = kBlastMessageNoContext;
02021         
02022         ITERATE(vector<string>, itw, W) {
02023             CRef<CSearchMessage>
02024                 sm(new CSearchMessage(eBlastSevWarning, err, *itw));
02025             
02026             query_messages.push_back(sm);
02027         }
02028         
02029         ITERATE(vector<string>, ite, E) {
02030             err = kBlastMessageNoContext;
02031             
02032             CRef<CSearchMessage>
02033                 sm(new CSearchMessage(eBlastSevError, err, *ite));
02034             
02035             query_messages.push_back(sm);
02036         }
02037 
02038         // Since there is no way to report per-query messages, all
02039         // warnings and errors are applied to all queries.
02040         search_messages.insert(search_messages.end(), 
02041                                alignments.empty() ? 1 : alignments.size(), 
02042                                query_messages);
02043 
02044         if (eDebug == m_Verbose) {
02045             NcbiCout << "Error/Warning messages: '" 
02046                      << search_messages.ToString() << "'" << endl;
02047         }
02048     }
02049 
02050     CSearchResultSet::TQueryIdVector query_ids;
02051     x_ExtractQueryIds(query_ids);
02052 
02053     if (alignments.empty()) {
02054         // this is required by the CSearchResultSet ctor
02055         alignments.resize(1);    
02056         try { x_ExtractQueryIds(query_ids); } 
02057         catch (const CRemoteBlastException& e) {
02058             if (e.GetMsg() == kNoRIDSpecified) {
02059                 retval.Reset(new CSearchResultSet(alignments, search_messages));
02060                 return retval;
02061             }
02062             throw;
02063         }
02064     }
02065 
02066     /* Build the ancillary data structure */
02067     CSearchResultSet::TAncillaryVector ancill_vector;
02068     {
02069         /* Get the effective search space */
02070         const string kTarget("Effective search space used: ");
02071         list<string> search_stats = GetSearchStats();
02072         Int8 effective_search_space = 0;
02073         NON_CONST_ITERATE(list<string>, itr, search_stats) {
02074             if (NStr::Find(*itr, kTarget) != NPOS) {
02075                 NStr::ReplaceInPlace(*itr, kTarget, kEmptyStr);
02076                 effective_search_space = 
02077                     NStr::StringToInt8(*itr, NStr::fConvErr_NoThrow);
02078                 break;
02079             }
02080         }
02081 
02082         /* Get the Karlin-Altschul parameters */
02083         bool found_gapped = false, found_ungapped = false;
02084         pair<double, double> lambdas, Ks, Hs;
02085         TKarlinAltschulBlocks ka_blocks = GetKABlocks();
02086 
02087         ITERATE(TKarlinAltschulBlocks, itr, ka_blocks) {
02088             if ((*itr)->GetGapped()) {
02089                 lambdas.second = (*itr)->GetLambda();
02090                 Ks.second = (*itr)->GetK();
02091                 Hs.second = (*itr)->GetH();
02092                 found_gapped = true;
02093             } else {
02094                 lambdas.first = (*itr)->GetLambda();
02095                 Ks.first = (*itr)->GetK();
02096                 Hs.first = (*itr)->GetH();
02097                 found_ungapped = true;
02098             }
02099 
02100             if (found_gapped && found_ungapped) {
02101                 break;
02102             }
02103         }
02104 
02105         // N.B.: apparently the BLAST3 protocol doesn't send PSI-BLAST Karlin &
02106         // Altschul parameters, so we don't set the is_psiblast
02107         // CBlastAncillaryData constructor argument
02108         CRef<CBlastAncillaryData> ancillary_data
02109             (new CBlastAncillaryData(lambdas, Ks, Hs, effective_search_space, m_Task == "psiblast"));
02110         ancill_vector.insert(ancill_vector.end(), alignments.size(),
02111                              ancillary_data);
02112     }
02113     
02114     TSeqLocInfoVector masks = GetMasks();
02115     retval.Reset(new CSearchResultSet(query_ids, alignments, search_messages,
02116                                       ancill_vector, &masks));
02117     retval->SetRID(GetRID());
02118     return retval;
02119 }
02120 
02121 CRef<objects::CBlast4_request> 
02122 ExtractBlast4Request(CNcbiIstream& in)
02123 {
02124     // First try to read a Blast4-get-search-strategy-reply...
02125     CRef<CBlast4_get_search_strategy_reply> b4_ss_reply;
02126     bool succeeded = false;
02127     try {
02128         switch (CFormatGuess().Format(in)) {
02129         case CFormatGuess::eBinaryASN:
02130             b4_ss_reply.Reset(new CBlast4_get_search_strategy_reply);
02131             in >> MSerial_AsnBinary >> *b4_ss_reply;
02132             succeeded = true;
02133             break;
02134 
02135         case CFormatGuess::eTextASN:
02136             b4_ss_reply.Reset(new CBlast4_get_search_strategy_reply);
02137             in >> MSerial_AsnText >> *b4_ss_reply;
02138             succeeded = true;
02139             break;
02140 
02141         case CFormatGuess::eXml:
02142             {
02143                 auto_ptr<CObjectIStream> is(
02144                     CObjectIStream::Open(eSerial_Xml, in));
02145                 dynamic_cast<CObjectIStreamXml*>
02146                     (is.get())->SetEnforcedStdXml(true);
02147                 b4_ss_reply.Reset(new CBlast4_get_search_strategy_reply);
02148                 *is >> *b4_ss_reply;
02149                 succeeded = true;
02150             }
02151             break;
02152 
02153         default:
02154             _ASSERT(b4_ss_reply.Empty());
02155         }
02156     } catch (const CException&) {
02157         succeeded = false;
02158     }
02159 
02160     CRef<CBlast4_request> retval;
02161     if (succeeded) {
02162         retval.Reset(&b4_ss_reply->Set());
02163         return retval;
02164     }
02165     b4_ss_reply.Reset();
02166     in.seekg(0);
02167 
02168     // Go for broke and try the Blast4-request...
02169     retval.Reset(new CBlast4_request);
02170     switch (CFormatGuess().Format(in)) {
02171     case CFormatGuess::eBinaryASN:
02172         in >> MSerial_AsnBinary >> *retval;
02173         break;
02174 
02175     case CFormatGuess::eTextASN:
02176         in >> MSerial_AsnText >> *retval;
02177         break;
02178 
02179     case CFormatGuess::eXml:
02180         {
02181             auto_ptr<CObjectIStream> is(
02182                 CObjectIStream::Open(eSerial_Xml, in));
02183             dynamic_cast<CObjectIStreamXml*>
02184                 (is.get())->SetEnforcedStdXml(true);
02185             *is >> *retval;
02186         }
02187         break;
02188 
02189     default:
02190         NCBI_THROW(CSerialException, eInvalidData, 
02191                    "Unrecognized input format ");
02192     }
02193 
02194     return retval;
02195 }
02196 
02197 static CRef<CBlast4_request_body>
02198 s_BuildSearchInfoRequest(const string& rid,
02199                          const string& name,
02200                          const string& value)
02201 {
02202     CRef<CBlast4_get_search_info_request> info_request( new CBlast4_get_search_info_request );
02203     info_request->SetRequest_id(rid);
02204     info_request->SetInfo().Add(name, value);
02205     CRef<CBlast4_request_body> retval(new CBlast4_request_body);
02206     retval->SetGet_search_info(*info_request);
02207     return retval;
02208 }
02209 
02210 string
02211 CRemoteBlast::x_GetStringFromSearchInfoReply(CRef<CBlast4_reply> reply,
02212                                              const string& name,
02213                                              const string& value)
02214 {
02215     string retval;
02216     if (reply.Empty() || !reply->CanGetBody()) {
02217         return retval;
02218     }
02219     if (reply->GetBody().IsGet_search_info()) {
02220         const CBlast4_get_search_info_reply &info_reply = reply->GetBody().GetGet_search_info();
02221         if (info_reply.CanGetRequest_id() && (info_reply.GetRequest_id() == m_RID)) {
02222             if( info_reply.CanGetInfo() ){
02223                 const CBlast4_parameters &params = info_reply.GetInfo();
02224                 const string reply_name =
02225                     Blast4SearchInfo_BuildReplyName(name, value);
02226                 CRef< CBlast4_parameter > search_param =
02227                     params.GetParamByName(reply_name);
02228                 if( search_param.NotEmpty() && search_param->GetValue().IsString()) {
02229                     retval = search_param->GetValue().GetString();
02230                 }
02231             } // get info
02232         } // request id == m_RID
02233     } // search info reply
02234     return retval;
02235 }
02236 
02237 
02238 //
02239 // based on a new request 
02240 //
02241 string CRemoteBlast::GetTitle(void)
02242 {
02243         // Build the request
02244         CRef<CBlast4_request_body> request_body =
02245             s_BuildSearchInfoRequest(m_RID, kBlast4SearchInfoReqName_Search,
02246                                      kBlast4SearchInfoReqValue_Title);
02247         CRef<CBlast4_reply> reply = x_SendRequest(request_body);
02248         return x_GetStringFromSearchInfoReply(reply,
02249                                               kBlast4SearchInfoReqName_Search,
02250                                               kBlast4SearchInfoReqValue_Title);
02251 
02252 }
02253 // Disk Cache version: x_CheckResults
02254 // only difference is that if search finished,
02255 // different approach to call and get results will be orchestrated
02256 // to fist get data from services as-is an deserialize them
02257 // later. This steps will minimize time OM is working.
02258 //
02259 void CRemoteBlast::x_CheckResultsDC(void)
02260 {
02261     LOG_POST(Info << "CRemoteBlast::x_CheckResultsDC");
02262     if (! m_Errs.empty()) {
02263         m_Pending = false;
02264     }
02265     
02266     if (! m_Pending) {
02267         return;
02268     }
02269     
02270     CRef<CBlast4_reply> r;
02271     
02272     bool try_again = true;
02273     
02274     while(try_again) {
02275         try {
02276         // asking for search statistics
02277             r = x_GetSearchStatsOnly();
02278             m_Pending = s_SearchPending(r);
02279             try_again = false;
02280         }
02281         catch(const CEofException&) {
02282             --m_ErrIgn;
02283             
02284             if (m_ErrIgn == 0) {
02285                 m_Errs.push_back("No response from server, "
02286                                  "cannot complete request.");
02287                 return;
02288             }
02289             
02290             SleepSec(10);
02291         }
02292     }
02293     
02294     if (! m_Pending) {
02295     // search finishedi check for errors
02296         x_SearchErrors(r);
02297         
02298         if (! m_Errs.empty()) {
02299             return;
02300         }
02301     
02302     if( !r->CanGetBody() ) {
02303             m_Errs.push_back("Results were not a get-search-results reply 2");
02304         return;
02305     }
02306     if( r->CanGetBody() && !r->GetBody().IsGet_search_results()) {
02307             m_Errs.push_back("Results were not a get-search-results reply");
02308         return;
02309     }
02310     //ATTENTION: fullscale get results call
02311     // search finished, retriev results
02312     r = x_GetSearchResultsHTTP();
02313     if( r.Empty() ){
02314             m_Errs.push_back("Results were not a get-search-results reply 3");
02315         return;
02316     }
02317     if( r->CanGetBody() && !r->GetBody().IsGet_search_results()) {
02318             m_Errs.push_back("Results were not a get-search-results reply 4");
02319         return;
02320     }
02321         m_Pending = s_SearchPending(r);
02322         m_Reply = r;
02323     }
02324 
02325 }
02326 // disk cache support.
02327 // ask for search statistics  to check status w/o polling results.
02328 CRef<objects::CBlast4_reply>
02329 CRemoteBlast::x_GetSearchStatsOnly(void)
02330 {
02331     CRef<CBlast4_get_search_results_request>
02332         gsrr(new CBlast4_get_search_results_request);
02333     
02334     gsrr->SetRequest_id(m_RID);
02335     // result-types
02336     gsrr->ResetResult_types();
02337     gsrr->SetResult_types( 16) ;
02338     
02339     CRef<CBlast4_request_body> body(new CBlast4_request_body);
02340     body->SetGet_search_results(*gsrr);
02341     
02342     return x_SendRequest(body);
02343 }
02344 //
02345 // get search results caching first on a file system.
02346 // TODO: check for errors and disable disk caching
02347 CRef<objects::CBlast4_reply>
02348 CRemoteBlast::x_GetSearchResultsHTTP(void)
02349 {
02350     CRef<objects::CBlast4_reply>   one_reply( new CBlast4_reply );
02351     CStopWatch swatch;
02352     CNcbiEnvironment env;
02353     string BLAST4_CONN_SERVICE_NAME = "blast4";
02354     if( env.Get("BLAST4_CONN_SERVICE_NAME") != kEmptyStr )
02355     BLAST4_CONN_SERVICE_NAME = env.Get("BLAST4_CONN_SERVICE_NAME");
02356 
02357     // construct request
02358     CRef<CBlast4_get_search_results_request> gsrr(new CBlast4_get_search_results_request);
02359     gsrr->SetRequest_id( m_RID);
02360 
02361     CRef<CBlast4_request_body> body(new CBlast4_request_body);
02362     body->SetGet_search_results(*gsrr);
02363 
02364     CRef<CBlast4_request> request( new CBlast4_request );
02365     request->SetBody(*body );
02366     // call service
02367     swatch.Start();
02368     CConn_ServiceStream ios( BLAST4_CONN_SERVICE_NAME , fSERV_HttpPost, 0);
02369     ios << MSerial_AsnBinary << *request;
02370     ios.flush();
02371     // cache answer to the file
02372     char incoming_buffer[8192];
02373     int  read_max = 8192;
02374     int  l_total_bytes=0, n_read;
02375     bool l_cached_ok = true;
02376 
02377     auto_ptr<fstream> tmp_stream( CDirEntry::CreateTmpFile() );
02378 
02379     do{
02380         ios.readsome(incoming_buffer, read_max);
02381         n_read = ios.gcount();
02382         if( n_read >= 0 ){
02383             l_total_bytes += n_read;
02384             try{
02385                 tmp_stream->write(incoming_buffer,n_read);
02386                 if( tmp_stream->bad() || tmp_stream->fail() )
02387                 {
02388                     l_cached_ok = false;
02389                     LOG_POST(Error << "CRemoteBlast::x_GetSearchResultsHTTP CAN'T WRITE CACHED DATA: BAD/FAIL STATE" );
02390                     m_disk_cache_error_msg = "bad/fail fstream state on write";
02391                     break;
02392                 }
02393             }
02394             catch ( ios_base::failure &err){
02395                 LOG_POST(Error << "CRemoteBlast::x_GetSearchResultsHTTP CAN'T WRITE CACHED DATA: "<<err.what() );
02396                 l_cached_ok = false;
02397                 m_disk_cache_error_msg = err.what();    
02398             }
02399         }
02400     }
02401     while( ios);
02402     swatch.Stop();
02403     
02404     if(!l_cached_ok ){
02405         // Attention: in case of caching error, disable it and re-read w/o caching
02406         LOG_POST(Info << "CRemoteBlast::x_GetSearchResultsHTTP: DISABLE CACHE, RE-READ");
02407         m_use_disk_cache = false;
02408         m_disk_cache_error_flag = true;
02409         return x_GetSearchResults();
02410     }
02411 
02412     tmp_stream->seekg(0);
02413     // read cached answer
02414     swatch.Restart();
02415     {
02416         auto_ptr<CObjectIStream> 
02417         in_stream( CObjectIStream::Open(eSerial_AsnBinary,  *tmp_stream) );
02418         in_stream->Read(ObjectInfo(*one_reply), CObjectIStream::eNoFileHeader);
02419 
02420     }
02421     
02422     swatch.Stop();
02423     
02424     return one_reply ;
02425 }
02426 //
02427 // Get search subject and set 
02428 // m_SubjectSeqLocs or m_SubjectSequences 
02429 //
02430 void CRemoteBlast::x_GetSubjects(void)
02431 {
02432     if( !m_SubjectSequences.empty() && !m_SubjectSeqLocs.empty() )
02433         return; // already got data
02434 
02435     // Build the request
02436     CRef<CBlast4_get_search_info_request> info_request( new CBlast4_get_search_info_request );
02437     info_request->SetRequest_id( m_RID );
02438     info_request->SetInfo().Add(kBlast4SearchInfoReqName_Search, 
02439                                 kBlast4SearchInfoReqValue_Subjects);
02440 
02441     CRef<CBlast4_request_body> body(new CBlast4_request_body);
02442     body->SetGet_search_info( *info_request );
02443 
02444     CRef<CBlast4_request> request(new CBlast4_request);
02445     request->SetBody(*body);
02446     
02447     CRef<CBlast4_reply> reply(new CBlast4_reply);
02448     
02449     if (eDebug == m_Verbose) {
02450         NcbiCout << MSerial_AsnText << *request << endl;
02451     }
02452     
02453     try {
02454         CStopWatch sw(CStopWatch::eStart);
02455         
02456         if (eDebug == m_Verbose) {
02457             NcbiCout << "Starting network transaction (" << sw.Elapsed() << ")" << endl;
02458         }
02459         
02460         // Send request.
02461         CBlast4Client().Ask(*request, *reply);
02462         
02463         if (eDebug == m_Verbose) {
02464             NcbiCout << "Done network transaction (" << sw.Elapsed() << ")" << endl;
02465         }
02466     }
02467     catch(const CEofException&) {
02468         NCBI_THROW(CRemoteBlastException, eServiceNotAvailable,
02469                    "No response from server, cannot complete request.");
02470     }
02471 
02472     if (eDebug == m_Verbose) {
02473         NcbiCout << MSerial_AsnText << *reply << endl;
02474     }
02475   
02476     // get reply. it will be status and subjects 
02477     if (reply->CanGetBody()) {
02478         if (reply->GetBody().IsGet_search_info()) {
02479             const CBlast4_get_search_info_reply &info_reply = reply->GetBody().GetGet_search_info();
02480             if( info_reply.CanGetRequest_id() && ( info_reply.GetRequest_id() == m_RID ) ){
02481                 if( info_reply.CanGetInfo() ){
02482                     const CBlast4_parameters &params = info_reply.GetInfo();
02483                     string reply_name =
02484                           Blast4SearchInfo_BuildReplyName(kBlast4SearchInfoReqName_Search,
02485                                                           kBlast4SearchInfoReqValue_Subjects);
02486                     CRef< CBlast4_parameter > search_param = params.GetParamByName (reply_name);
02487                     // reply could have string, seq-loc-list or 
02488                     // bioseq-list, but we don't care about string result for bl2seq
02489                     if( search_param.NotEmpty() && search_param->GetValue().IsSeq_loc_list())
02490                     {
02491                         m_SubjectSeqLocs  = search_param->GetValue().GetSeq_loc_list();
02492                     }
02493                     // bioseq-list  // SEQUENCE OF Bioseq 
02494                     else if( search_param.NotEmpty() && search_param->GetValue().IsBioseq_list())
02495                     {
02496                         x_SetSubjectSequences( search_param->GetValue().GetBioseq_list() );
02497 
02498                     }
02499                     else 
02500                     {
02501                         NCBI_THROW(CRemoteBlastException, eIncompleteConfig,
02502                                    "Obtained database name for remote bl2seq search");
02503                     }
02504 
02505                 } // get info
02506             } // request id == m_RID
02507        } // search info reply
02508    } // get body
02509 }
02510 
02511 unsigned int CRemoteBlast::GetPsiNumberOfIterations(void)
02512 {
02513     unsigned int iter_num = 0;
02514     if(!m_FormatOpts.Empty())
02515     {
02516         CRef< CBlast4_parameter > param = m_FormatOpts->GetParamByName (B4Param_Web_StepNumber.GetName());
02517         if( param.NotEmpty())
02518         {
02519             iter_num  = param->GetValue().GetInteger();
02520         }
02521     }
02522     else if(!m_RID.empty())
02523     {
02524         iter_num = x_GetPsiIterationsFromServer();
02525     }
02526 
02527     return iter_num;
02528 }
02529 
02530 unsigned int CRemoteBlast::x_GetPsiIterationsFromServer()
02531 {
02532     unsigned int retval=0;
02533 
02534      CRef<CBlast4_request_body> request_body =
02535          s_BuildSearchInfoRequest(m_RID, kBlast4SearchInfoReqName_Search,
02536                                   kBlast4SearchInfoReqValue_PsiIterationNum);
02537      CRef<CBlast4_reply> reply = x_SendRequest(request_body);
02538      string num = x_GetStringFromSearchInfoReply(reply,
02539                                                  kBlast4SearchInfoReqName_Search,
02540                                                  kBlast4SearchInfoReqValue_PsiIterationNum);
02541       if ( !num.empty() ) {
02542          try { retval = NStr::StringToUInt(num); }
02543          catch (...) {}  // ignore errors and leave as unset
02544      }
02545      return retval;
02546 }
02547 
02548 
02549 END_SCOPE(blast)
02550 END_NCBI_SCOPE
02551 
02552 /* @} */
Modified on Wed May 23 12:52:33 2012 by modify_doxy.py rev. 337098