include/algo/blast/api/bl2seq.hpp

Go to the documentation of this file.
00001 /*  $Id: bl2seq.hpp 138123 2008-08-21 19:28:07Z camacho $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Author:  Christiam Camacho
00027 *
00028 */
00029 
00030 /// @file bl2seq.hpp
00031 /// Declares the CBl2Seq (BLAST 2 Sequences) class
00032 
00033 #ifndef ALGO_BLAST_API___BL2SEQ__HPP
00034 #define ALGO_BLAST_API___BL2SEQ__HPP
00035 
00036 #include <algo/blast/api/blast_types.hpp>
00037 #include <algo/blast/api/sseqloc.hpp>
00038 #include <algo/blast/api/blast_aux.hpp>
00039 #include <algo/blast/api/blast_options_handle.hpp>
00040 #include <algo/blast/api/blast_results.hpp>
00041 
00042 /** @addtogroup AlgoBlast
00043  *
00044  * @{
00045  */
00046 
00047 class CBlastFilterTest;
00048 
00049 BEGIN_NCBI_SCOPE
00050 BEGIN_SCOPE(blast)
00051 
00052 /// Runs the BLAST algorithm between 2 sequences.
00053 
00054 class  CBl2Seq : public CObject
00055 {
00056 public:
00057 
00058     /// Constructor to compare 2 sequences with default options
00059     CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject, EProgram p);
00060 
00061     /// Constructor to compare query against all subject sequences with 
00062     /// default options
00063     CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects, EProgram p);
00064 
00065     /// Constructor to allow query concatenation with default options
00066     CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects, 
00067             EProgram p);
00068 
00069     /// Constructor to compare 2 sequences with specified options
00070     CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject, 
00071             CBlastOptionsHandle& opts);
00072 
00073     /// Constructor to compare query against all subject sequences with
00074     /// specified options
00075     CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects, 
00076             CBlastOptionsHandle& opts);
00077 
00078     /// Constructor to allow query concatenation with specified options
00079     CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects, 
00080             CBlastOptionsHandle& opts);
00081 
00082     /// Destructor
00083     virtual ~CBl2Seq();
00084     
00085     /// Set the query.
00086     void SetQuery(const SSeqLoc& query);
00087 
00088     /// Retrieve the query sequence.
00089     const SSeqLoc& GetQuery() const;
00090 
00091     /// Set a vector of query sequences for a concatenated search.
00092     void SetQueries(const TSeqLocVector& queries);
00093 
00094     /// Retrieve a vector of query sequences.
00095     const TSeqLocVector& GetQueries() const;
00096 
00097     /// Set the subject sequence.
00098     void SetSubject(const SSeqLoc& subject);
00099 
00100     /// Retrieve the subject sequence.
00101     const SSeqLoc& GetSubject() const;
00102 
00103     /// Set a vector of subject sequences.
00104     void SetSubjects(const TSeqLocVector& subjects);
00105 
00106     /// Retrieve a vector of subject sequences.
00107     const TSeqLocVector& GetSubjects() const;
00108 
00109     /// Set the options handle.
00110     CBlastOptionsHandle& SetOptionsHandle();
00111 
00112     /// Retrieve the options handle.
00113     const CBlastOptionsHandle& GetOptionsHandle() const;
00114 
00115     /// Perform BLAST search
00116     /// Assuming N queries and M subjects, the structure of the returned 
00117     /// vector is as follows, with types indicated in parenthesis:
00118     /// TSeqAlignVector = 
00119     ///     [ {Results for query 1 and subject 1 (Seq-align-set)},
00120     ///       {Results for query 1 and subject 2 (Seq-align-set)}, ...
00121     ///       {Results for query 1 and subject M (Seq-align-set)},
00122     ///       {Results for query 2 and subject 1 (Seq-align-set)},
00123     ///       {Results for query 2 and subject 2 (Seq-align-set)}, ...
00124     ///       {Results for query 2 and subject M (Seq-align-set)},
00125     ///       {Results for query 3 and subject 1 (Seq-align-set)}, ...
00126     ///       {Results for query N and subject M (Seq-align-set)} ]
00127     virtual TSeqAlignVector Run();
00128 
00129     /// Performs the same functionality as Run(), but it returns a different
00130     /// data type
00131     /// @note the number of CSearchResultSet::value_type objects in this
00132     /// function's return value will be (number of queries * number of
00133     /// subjects)
00134     CRef<CSearchResultSet> RunEx();
00135 
00136     /// Runs the search but does not produce seqalign output
00137     /// (useful if the raw search results are needed, rather
00138     /// than a set of complete Seq-aligns)
00139     /// @deprecated Please DO NOT use this method, use Run() or RunEx() instead.
00140     NCBI_DEPRECATED virtual void RunWithoutSeqalignGeneration();
00141 
00142     /// Retrieves the list of HSP results from the engine
00143     /// (to be used after RunWithoutSeqalignGeneration() method)
00144     /// @deprecated Please DO NOT use this method, use Run() or RunEx() 
00145     /// instead, as this is an internal data structure of the BLAST engine
00146     NCBI_DEPRECATED BlastHSPResults* GetResults() const;
00147 
00148     /// Retrieves regions filtered on the query/queries
00149     TSeqLocInfoVector GetFilteredQueryRegions() const;
00150 
00151     /// Retrieves regions filtered on the subject sequence(s)
00152     /// @param retval the return value of this method [in|out]
00153     void GetFilteredSubjectRegions(vector<TSeqLocInfoVector>& retval) const;
00154 
00155     /// Retrieves the diagnostics information returned from the engine
00156     BlastDiagnostics* GetDiagnostics() const;
00157 
00158     /// Get the ancillary results for a BLAST search (to be used with the Run()
00159     /// method)
00160     /// @param retval the return value of this method [in|out]
00161     void GetAncillaryResults(CSearchResultSet::TAncillaryVector& retval) const;
00162 
00163     /// Returns error messages/warnings.
00164     void GetMessages(TSearchMessages& messages) const;
00165 
00166     /// Set a function callback to be invoked by the CORE of BLAST to allow
00167     /// interrupting a BLAST search in progress.
00168     /// @param fnptr pointer to callback function [in]
00169     /// @param user_data user data to be attached to SBlastProgress structure
00170     /// [in]
00171     /// @return the previously set TInterruptFnPtr (NULL if none was
00172     /// provided before)
00173     TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr, 
00174                                          void* user_data = NULL);
00175 
00176 protected:
00177     /// Process the queries, do setup, and build the lookup table.
00178     virtual void SetupSearch();
00179 
00180     /// Creates a BlastHSPStream and calls the engine.
00181     virtual void RunFullSearch();
00182 
00183     /// Return a seqalign list for each query/subject pair, even if it is empty.
00184     virtual TSeqAlignVector x_Results2SeqAlign();
00185 
00186     /// Convert the TSeqLocVector to a vector of Seq-ids
00187     /// @param slv TSeqLocVector used as source [in]
00188     /// @param query_ids output of this method [in|out]
00189     static void x_SimplifyTSeqLocVector(const TSeqLocVector& slv,
00190                            vector< CConstRef<objects::CSeq_id> >& query_ids);
00191 
00192     /// Populate the internal m_AncillaryData member
00193     /// @param alignments aligments to use
00194     void x_BuildAncillaryData(const TSeqAlignVector& alignments);
00195 
00196 private:
00197     // Data members received from client code
00198     TSeqLocVector        m_tQueries;         ///< query sequence(s)
00199     TSeqLocVector        m_tSubjects;        ///< sequence(s) to BLAST against
00200     CRef<CBlastOptionsHandle>  m_OptsHandle; ///< Blast options
00201 
00202     /// Common initialization code for all c-tors
00203     void x_Init(const TSeqLocVector& queries, const TSeqLocVector& subjs);
00204 
00205     /// Prohibit copy constructor
00206     CBl2Seq(const CBl2Seq& rhs);
00207     /// Prohibit assignment operator
00208     CBl2Seq& operator=(const CBl2Seq& rhs);
00209 
00210     /************ Internal data structures (m_i = internal members)***********/
00211     bool                                mi_bQuerySetUpDone;    ///< internal: query processing already done?
00212     CBLAST_SequenceBlk                  mi_clsQueries;         ///< internal: one for all queries
00213     CBlastQueryInfo                     mi_clsQueryInfo;       ///< internal: one for all queries
00214 
00215     BlastSeqSrc*                        mi_pSeqSrc;            ///< internal: Subject sequences source
00216     BlastScoreBlk*                      mi_pScoreBlock;        ///< internal: score block
00217     CLookupTableWrap                    mi_pLookupTable;       ///< internal: one for all queries
00218     BlastSeqLoc*                        mi_pLookupSegments;    ///< internal: regions of queries to scan during lookup table creation
00219 
00220     /// Stores any warnings emitted during query setup
00221     TSearchMessages                     m_Messages;
00222 
00223     /// Results for all queries and subjects together
00224     BlastHSPResults*                    mi_pResults;
00225     /// Return search statistics data
00226     BlastDiagnostics*                   mi_pDiagnostics;
00227 
00228     /// Regions filtered out from the query sequences
00229     BlastMaskLoc* m_ipFilteredRegions;
00230 
00231     /// User-provided interrupt callback
00232     TInterruptFnPtr                     m_fnpInterrupt;
00233     /// Structure to aid in progress monitoring/interruption
00234     CSBlastProgress                     m_ProgressMonitor;
00235     /// Ancillary BLAST data
00236     CSearchResultSet::TAncillaryVector  m_AncillaryData;
00237     /// Subject masks for those which intersect hits
00238     vector<TSeqLocInfoVector>           m_SubjectMasks;
00239 
00240     /// Resets query data structures
00241     void x_ResetQueryDs();
00242     /// Resets subject data structures
00243     void x_ResetSubjectDs();
00244 
00245     friend class ::CBlastFilterTest;
00246 };
00247 
00248 inline void
00249 CBl2Seq::SetQuery(const SSeqLoc& query)
00250 {
00251     x_ResetQueryDs();
00252     m_tQueries.clear();
00253     m_tQueries.push_back(query);
00254 }
00255 
00256 inline const SSeqLoc&
00257 CBl2Seq::GetQuery() const
00258 {
00259     return m_tQueries.front();
00260 }
00261 
00262 inline void
00263 CBl2Seq::SetQueries(const TSeqLocVector& queries)
00264 {
00265     x_ResetQueryDs();
00266     m_tQueries.clear();
00267     m_tQueries = queries;
00268 }
00269 
00270 inline const TSeqLocVector&
00271 CBl2Seq::GetQueries() const
00272 {
00273     return m_tQueries;
00274 }
00275 
00276 inline void
00277 CBl2Seq::SetSubject(const SSeqLoc& subject)
00278 {
00279     x_ResetSubjectDs();
00280     m_tSubjects.clear();
00281     m_tSubjects.push_back(subject);
00282 }
00283 
00284 inline const SSeqLoc&
00285 CBl2Seq::GetSubject() const
00286 {
00287     return m_tSubjects.front();
00288 }
00289 
00290 inline void
00291 CBl2Seq::SetSubjects(const TSeqLocVector& subjects)
00292 {
00293     x_ResetSubjectDs();
00294     m_tSubjects.clear();
00295     m_tSubjects = subjects;
00296 }
00297 
00298 inline const TSeqLocVector&
00299 CBl2Seq::GetSubjects() const
00300 {
00301     return m_tSubjects;
00302 }
00303 
00304 inline CBlastOptionsHandle&
00305 CBl2Seq::SetOptionsHandle()
00306 {
00307     mi_bQuerySetUpDone = false;
00308     return *m_OptsHandle;
00309 }
00310 
00311 inline const CBlastOptionsHandle&
00312 CBl2Seq::GetOptionsHandle() const
00313 {
00314     return *m_OptsHandle;
00315 }
00316 
00317 inline BlastDiagnostics* CBl2Seq::GetDiagnostics() const
00318 {
00319     return mi_pDiagnostics;
00320 }
00321 
00322 inline BlastHSPResults* CBl2Seq::GetResults() const
00323 {
00324     return mi_pResults;
00325 }
00326 
00327 inline void
00328 CBl2Seq::GetMessages(TSearchMessages& messages) const
00329 {
00330     messages = m_Messages;
00331 }
00332 
00333 inline TInterruptFnPtr
00334 CBl2Seq::SetInterruptCallback(TInterruptFnPtr fnptr, void* user_data)
00335 {
00336     swap(m_fnpInterrupt, fnptr);
00337     m_ProgressMonitor.Reset(SBlastProgressNew(user_data));
00338     return fnptr;
00339 }
00340 
00341 inline void 
00342 CBl2Seq::GetAncillaryResults(CSearchResultSet::TAncillaryVector& retval) const
00343 {
00344     retval = m_AncillaryData;
00345 }
00346 
00347 inline void
00348 CBl2Seq::GetFilteredSubjectRegions(vector<TSeqLocInfoVector>& retval) const
00349 {
00350     retval = m_SubjectMasks;
00351 }
00352 
00353 END_SCOPE(blast)
00354 END_NCBI_SCOPE
00355 
00356 /* @} */
00357 
00358 #endif  /* ALGO_BLAST_API___BL2SEQ__HPP */
00359 
00360 

Generated on Wed Dec 9 02:54:29 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:25 2009 by modify_doxy.py rev. 173732