include/algo/align/util/align_filter.hpp

Go to the documentation of this file.
00001 #ifndef GPIPE_COMMON___ALIGN_FILTER__HPP
00002 #define GPIPE_COMMON___ALIGN_FILTER__HPP
00003 
00004 /*  $Id: align_filter.hpp 174318 2009-10-27 13:32:47Z dicuccio $
00005  * ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Authors:  Mike DiCuccio
00030  *
00031  * File Description:
00032  *
00033  */
00034 
00035 #include <corelib/ncbiobj.hpp>
00036 #include <util/qparse/query_parse.hpp>
00037 
00038 #include <objects/seq/seq_id_handle.hpp>
00039 
00040 #include <set>
00041 
00042 BEGIN_NCBI_SCOPE
00043 BEGIN_SCOPE(objects)
00044     class CSeq_align;
00045     class CSeq_align_set;
00046     class CSeq_annot;
00047     class CScope;
00048 END_SCOPE(objects)
00049 
00050 
00051 class CAlignFilter : public CObject
00052 {
00053 public:
00054     CAlignFilter();
00055     CAlignFilter(const string& filter_string);
00056 
00057     /// Set the query to be used
00058     void SetFilter(const string& filter_string);
00059 
00060     /// CAlignFilter uses a scope internally.  You can set a scope yourself;
00061     /// alternatively, the scope used internally will be a default scope
00062     void SetScope(objects::CScope& scope);
00063     objects::CScope& SetScope();
00064 
00065     /// Remove duplicate alignments when filtering
00066     /// NOTE: this may be expensive for a large number of alignments, as it
00067     /// forces the algorithm to maintain a list of hash keys for each alignment
00068     CAlignFilter& SetRemoveDuplicates(bool b = true);
00069 
00070     /// Add a sequence to a blacklist.
00071     /// Blacklisted sequences are excluded always; if an alignment contains a
00072     /// query or subject that matches a blacklisted alignment, then that
00073     /// alignment will be excluded.
00074     ///
00075     /// NOTE: this is only triggered if the alignments are pairwise!
00076     ///
00077     void AddBlacklistQueryId(const objects::CSeq_id_Handle& idh);
00078     void AddBlacklistSubjectId(const objects::CSeq_id_Handle& idh);
00079 
00080     /// Add a sequence to the white list.
00081     /// If an alignment matches a whitelisted ID as appropriate, it will always
00082     /// be returned.
00083     ///
00084     /// NOTE: this is only triggered if the alignments are pairwise!
00085     ///
00086     void AddWhitelistQueryId(const objects::CSeq_id_Handle& idh);
00087     void AddWhitelistSubjectId(const objects::CSeq_id_Handle& idh);
00088 
00089     /// Match a single alignment
00090     bool Match(const objects::CSeq_align& align);
00091 
00092     /// Filter a set of alignments, iteratively applying Match() to each
00093     /// alignment and emitting all matched alignments in the output set.
00094     void Filter(const list< CRef<objects::CSeq_align> >& aligns_in,
00095                 list< CRef<objects::CSeq_align> >& aligns_out);
00096 
00097     /// Filter a set of alignments, iteratively applying Match() to each
00098     /// alignment and emitting all matched alignments in the output set.
00099     void Filter(const objects::CSeq_align_set& aligns_in,
00100                 objects::CSeq_align_set&       aligns_out);
00101 
00102     /// Filter a set of alignments, iteratively applying Match() to each
00103     /// alignment and emitting all matched alignments in the output seq-annot.
00104     void Filter(const objects::CSeq_annot& aligns_in,
00105                 objects::CSeq_annot&       aligns_out);
00106 
00107 private:
00108     bool x_Match(const CQueryParseTree::TNode& node,
00109                  const objects::CSeq_align& align);
00110 
00111     bool x_IsUnique(const objects::CSeq_align& align);
00112 
00113     double x_GetAlignmentScore(const string& score_name,
00114                                const objects::CSeq_align& align);
00115 
00116     bool x_Query_Op(const CQueryParseTree::TNode& key_node,
00117                     CQueryParseNode::EType type,
00118                     bool is_not,
00119                     const CQueryParseTree::TNode& val_node,
00120                     const objects::CSeq_align& align);
00121 
00122     double x_FuncCall(const CQueryParseTree::TNode& func_node,
00123                       const objects::CSeq_align& align);
00124     double x_TermValue(const CQueryParseTree::TNode& term_node,
00125                        const objects::CSeq_align& align);
00126 
00127     bool x_Query_Range(const CQueryParseTree::TNode& key_node,
00128                        bool is_not,
00129                        const CQueryParseTree::TNode& val1_node,
00130                        const CQueryParseTree::TNode& val2_node,
00131                        const objects::CSeq_align& align);
00132 
00133 private:
00134     bool m_RemoveDuplicates;
00135     string m_Query;
00136     auto_ptr<CQueryParseTree> m_ParseTree;
00137 
00138     CRef<objects::CScope> m_Scope;
00139 
00140     set<objects::CSeq_id_Handle> m_QueryBlacklist;
00141     set<objects::CSeq_id_Handle> m_QueryWhitelist;
00142     set<objects::CSeq_id_Handle> m_SubjectBlacklist;
00143     set<objects::CSeq_id_Handle> m_SubjectWhitelist;
00144 
00145     typedef set<string> TUniqueAligns;
00146     TUniqueAligns m_UniqueAligns;
00147 };
00148 
00149 
00150 
00151 END_NCBI_SCOPE
00152 
00153 
00154 #endif  // GPIPE_COMMON___ALIGN_FILTER__HPP
00155 
00156 

Generated on Sun Dec 6 21:55:31 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:32 2009 by modify_doxy.py rev. 173732