src/algo/blast/api/local_db_adapter.cpp

Go to the documentation of this file.
00001 #ifndef SKIP_DOXYGEN_PROCESSING
00002 static char const rcsid[] =
00003     "$Id: local_db_adapter.cpp 170772 2009-09-16 18:00:03Z camacho $";
00004 #endif /* SKIP_DOXYGEN_PROCESSING */
00005 /* ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Author:  Christiam Camacho
00030  *
00031  */
00032 
00033 /** @file local_db_adapter.cpp
00034  * Defines class which provides internal BLAST database representations to the
00035  * internal BLAST APIs
00036  */
00037 
00038 #include <ncbi_pch.hpp>
00039 #include <algo/blast/api/local_db_adapter.hpp>
00040 #include <algo/blast/api/objmgr_query_data.hpp> // for CObjMgr_QueryFactory
00041 #include <algo/blast/api/seqsrc_seqdb.hpp>  // for SeqDbBlastSeqSrcInit
00042 #include <algo/blast/api/seqinfosrc_seqdb.hpp>  // for CSeqDbSeqInfoSrc
00043 #include <algo/blast/api/seqinfosrc_seqvec.hpp> // for CSeqVecSeqInfoSrc
00044 #include "seqsrc_query_factory.hpp"  // for QueryFactoryBlastSeqSrcInit
00045 #include "psiblast_aux_priv.hpp"    // for CPsiBlastValidate
00046 #include "seqinfosrc_bioseq.hpp"    // for CBioseqInfoSrc
00047 
00048 /** @addtogroup AlgoBlast
00049  *
00050  * @{
00051  */
00052 
00053 BEGIN_NCBI_SCOPE
00054 BEGIN_SCOPE(blast)
00055 
00056 CLocalDbAdapter::CLocalDbAdapter(const CSearchDatabase& dbinfo)
00057     : m_SeqSrc(0), m_SeqInfoSrc(0), m_DbName(dbinfo.GetDatabaseName()),
00058     m_FilteringAlg(dbinfo.GetFilteringAlgorithm())
00059 {
00060     m_DbInfo.Reset(new CSearchDatabase(dbinfo));
00061 }
00062 
00063 CLocalDbAdapter::CLocalDbAdapter(CRef<CSeqDB> seqdb,
00064                                  int filtering_algorithm)
00065     : m_SeqSrc(0), m_SeqInfoSrc(0), m_SeqDb(seqdb),
00066     m_DbName(seqdb->GetDBNameList()), m_FilteringAlg(filtering_algorithm)
00067 {
00068     if (m_SeqDb.Empty()) {
00069         NCBI_THROW(CBlastException, eInvalidArgument, "NULL CSeqDB");
00070     }
00071 }
00072 
00073 CLocalDbAdapter::CLocalDbAdapter(CRef<IQueryFactory> subject_sequences,
00074                                  CConstRef<CBlastOptionsHandle> opts_handle)
00075     : m_SeqSrc(0), m_SeqInfoSrc(0), m_SubjectFactory(subject_sequences),
00076     m_OptsHandle(opts_handle), m_DbName(kEmptyStr)
00077 {
00078     if (subject_sequences.Empty()) {
00079         NCBI_THROW(CBlastException, eInvalidArgument, 
00080                    "Missing subject sequence data");
00081     }
00082     if (opts_handle.Empty()) {
00083         NCBI_THROW(CBlastException, eInvalidArgument, "Missing options");
00084     }
00085     if (opts_handle->GetOptions().GetProgram() == ePSIBlast) {
00086         CPsiBlastValidate::QueryFactory(subject_sequences, *opts_handle,
00087                                         CPsiBlastValidate::eQFT_Subject);
00088     }
00089 
00090     CObjMgr_QueryFactory* objmgr_qf = NULL;
00091     if ( (objmgr_qf = dynamic_cast<CObjMgr_QueryFactory*>(&*m_SubjectFactory)) )
00092     {
00093         m_Subjects = objmgr_qf->GetTSeqLocVector();
00094         _ASSERT(!m_Subjects.empty());
00095     }
00096 }
00097 
00098 CLocalDbAdapter::CLocalDbAdapter(BlastSeqSrc* seqSrc,
00099                                  CRef<IBlastSeqInfoSrc> seqInfoSrc)
00100     : m_SeqSrc(seqSrc), m_SeqInfoSrc(seqInfoSrc), m_DbName(kEmptyStr)
00101 {
00102 }
00103 
00104 CLocalDbAdapter::~CLocalDbAdapter()
00105 {
00106     if (m_SeqSrc) {
00107         m_SeqSrc = BlastSeqSrcFree(m_SeqSrc);
00108     }
00109 }
00110 
00111 void
00112 CLocalDbAdapter::ResetBlastSeqSrcIteration()
00113 {
00114     if (m_SeqSrc) {
00115         BlastSeqSrcResetChunkIterator(m_SeqSrc);
00116     }
00117 }
00118 
00119 /// Checks if the BlastSeqSrc initialization succeeded
00120 /// @throws CBlastException if BlastSeqSrc initialization failed
00121 static void
00122 s_CheckForBlastSeqSrcErrors(const BlastSeqSrc* seqsrc)
00123 {
00124     if ( !seqsrc ) {
00125         return;
00126     }
00127 
00128     char* error_str = BlastSeqSrcGetInitError(seqsrc);
00129     if (error_str) {
00130         string msg(error_str);
00131         sfree(error_str);
00132         NCBI_THROW(CBlastException, eSeqSrcInit, msg);
00133     }
00134 }
00135 
00136 BlastSeqSrc*
00137 CLocalDbAdapter::MakeSeqSrc()
00138 {
00139     if ( ! m_SeqSrc ) {
00140         if (m_DbInfo.NotEmpty() || m_SeqDb.NotEmpty()) {
00141             if (m_SeqDb.Empty()) {
00142                 m_SeqDb = x_InitSeqDB(m_DbInfo);
00143             }
00144             m_SeqSrc = SeqDbBlastSeqSrcInit(m_SeqDb.GetNonNullPointer(),
00145                                             m_FilteringAlg);
00146         } else if (m_SubjectFactory.NotEmpty() && m_OptsHandle.NotEmpty()) {
00147             const EBlastProgramType program =
00148                                m_OptsHandle->GetOptions().GetProgramType();
00149             if ( !m_Subjects.empty() ) {
00150                 m_SeqSrc = QueryFactoryBlastSeqSrcInit(m_Subjects, program);
00151             } else {
00152                 m_SeqSrc = QueryFactoryBlastSeqSrcInit(m_SubjectFactory,
00153                                                        program);
00154             }
00155             _ASSERT(m_SeqSrc);
00156         } else {
00157             abort();
00158         }
00159         s_CheckForBlastSeqSrcErrors(m_SeqSrc);
00160         _ASSERT(m_SeqSrc);
00161     }
00162     return m_SeqSrc;
00163 }
00164 
00165 CRef<CSeqDB>
00166 CLocalDbAdapter::x_InitSeqDB(CConstRef<CSearchDatabase> dbinfo)
00167 {
00168     _ASSERT(dbinfo.NotEmpty());
00169 
00170     const CSeqDB::ESeqType type = dbinfo->IsProtein()
00171         ? CSeqDB::eProtein
00172         : CSeqDB::eNucleotide;
00173 
00174     CRef<CSeqDBGiList> gi_list;
00175     if ( !dbinfo->GetGiListLimitation().empty() ) {
00176         CSeqDBIdSet idset(dbinfo->GetGiListLimitation(), CSeqDBIdSet::eGi);
00177         gi_list = idset.GetPositiveList();
00178     }
00179 
00180     // FIXME: refactor code in SplitDB/LibEntrezCacheEx.cpp ?
00181     // Also, the results of this entrez query should be intersected with
00182     // gi_list above
00183     if ( !dbinfo->GetEntrezQueryLimitation().empty() ) {
00184         NCBI_THROW(CException, eUnknown, "Unimplemented");
00185     }
00186 
00187     return CRef<CSeqDB>(new CSeqDB(dbinfo->GetDatabaseName(), type, gi_list));
00188 }
00189 
00190 /*** Auxiliary function to initialize CSeqDB with the filtering algorithms used
00191  * for the database
00192  * @param dbhandle CSeqDB instance [in]
00193  * @param filtering_algorithm filtering algorithm ID used for this search
00194  * [in]
00195  * @return CSeqDbSeqInfoSrc initialized accordingly
00196  */
00197 static CRef<CSeqDbSeqInfoSrc>
00198 s_InitCSeqDbSeqInfoSrc(CRef<CSeqDB> dbhandle, 
00199                        int filtering_algorithm)
00200 {
00201     _ASSERT(dbhandle.NotEmpty());
00202     CRef<CSeqDbSeqInfoSrc> retval(new CSeqDbSeqInfoSrc(dbhandle));
00203     retval->SetFilteringAlgorithmId(filtering_algorithm);
00204     return retval;
00205 }
00206 
00207 IBlastSeqInfoSrc*
00208 CLocalDbAdapter::MakeSeqInfoSrc()
00209 {
00210     if ( !m_SeqInfoSrc ) {
00211         if (m_SeqDb.NotEmpty()) {
00212             m_SeqInfoSrc = &*s_InitCSeqDbSeqInfoSrc(m_SeqDb, m_FilteringAlg);
00213         } else if (m_DbInfo.NotEmpty()) {
00214             m_SeqDb = x_InitSeqDB(m_DbInfo);
00215             m_SeqInfoSrc = &*s_InitCSeqDbSeqInfoSrc(m_SeqDb, m_FilteringAlg);
00216         } else if (m_SubjectFactory.NotEmpty() && m_OptsHandle.NotEmpty()) {
00217             EBlastProgramType p(m_OptsHandle->GetOptions().GetProgramType());
00218             if ( !m_Subjects.empty() ) {
00219                 m_SeqInfoSrc = new CSeqVecSeqInfoSrc(m_Subjects);
00220             } else {
00221                 CRef<IRemoteQueryData> subj_data
00222                     (m_SubjectFactory->MakeRemoteQueryData());
00223                 CRef<CBioseq_set> subject_bioseqs(subj_data->GetBioseqSet());
00224                 bool is_prot = Blast_SubjectIsProtein(p) ? true : false;
00225                 m_SeqInfoSrc = new CBioseqSeqInfoSrc(*subject_bioseqs, is_prot);
00226             }
00227         } else {
00228             abort();
00229         }
00230         _ASSERT(m_SeqInfoSrc);
00231     }
00232     return m_SeqInfoSrc;
00233 }
00234 
00235 bool
00236 CLocalDbAdapter::IsProtein() const
00237 {
00238     bool retval = false;
00239     if (m_DbInfo) {
00240         retval = m_DbInfo->IsProtein();
00241     } else if (m_SeqDb) {
00242         retval = (m_SeqDb->GetSequenceType() == CSeqDB::eProtein) ? true :
00243             false;
00244     } else if (m_OptsHandle) {
00245         const EBlastProgramType p = m_OptsHandle->GetOptions().GetProgramType();
00246         retval = Blast_SubjectIsProtein(p) ? true : false;
00247     } else if (m_SeqSrc) {
00248         retval = (bool)BlastSeqSrcGetIsProt(m_SeqSrc);
00249     } else {
00250         // Data type provided in a constructor, but not handled here
00251         abort();
00252     }
00253     return retval;
00254 }
00255 
00256 END_SCOPE(Blast)
00257 END_NCBI_SCOPE
00258 
00259 /* @} */
00260 
00261 

Generated on Sun Dec 6 22:16:56 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:49 2009 by modify_doxy.py rev. 173732