include/objmgr/seq_loc_mapper.hpp

Go to the documentation of this file.
00001 #ifndef SEQ_LOC_MAPPER__HPP
00002 #define SEQ_LOC_MAPPER__HPP
00003 
00004 /*  $Id: seq_loc_mapper.hpp 175292 2009-11-05 15:50:05Z grichenk $
00005 * ===========================================================================
00006 *
00007 *                            PUBLIC DOMAIN NOTICE
00008 *               National Center for Biotechnology Information
00009 *
00010 *  This software/database is a "United States Government Work" under the
00011 *  terms of the United States Copyright Act.  It was written as part of
00012 *  the author's official duties as a United States Government employee and
00013 *  thus cannot be copyrighted.  This software/database is freely available
00014 *  to the public for use. The National Library of Medicine and the U.S.
00015 *  Government have not placed any restriction on its use or reproduction.
00016 *
00017 *  Although all reasonable efforts have been taken to ensure the accuracy
00018 *  and reliability of the software and data, the NLM and the U.S.
00019 *  Government do not and cannot warrant the performance or results that
00020 *  may be obtained by using this software or data. The NLM and the U.S.
00021 *  Government disclaim all warranties, express or implied, including
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.
00024 *
00025 *  Please cite the author in any work or product based on this material.
00026 *
00027 * ===========================================================================
00028 *
00029 * Author: Aleksey Grichenko
00030 *
00031 * File Description:
00032 *   Seq-loc mapper
00033 *
00034 */
00035 
00036 #include <corelib/ncbistd.hpp>
00037 #include <corelib/ncbiobj.hpp>
00038 #include <util/range.hpp>
00039 #include <util/rangemap.hpp>
00040 #include <objects/seqloc/Na_strand.hpp>
00041 #include <objects/seqalign/Seq_align.hpp>
00042 #include <objects/seq/seq_id_handle.hpp>
00043 #include <objects/general/Int_fuzz.hpp>
00044 #include <objmgr/impl/heap_scope.hpp>
00045 #include <objects/seq/seq_loc_mapper_base.hpp>
00046 
00047 
00048 BEGIN_NCBI_SCOPE
00049 BEGIN_SCOPE(objects)
00050 
00051 
00052 /** @addtogroup ObjectManagerCore
00053  *
00054  * @{
00055  */
00056 
00057 
00058 class CScope;
00059 class CBioseq_Handle;
00060 class CSeqMap;
00061 class CSeqMap_CI;
00062 struct SSeqMapSelector;
00063 
00064 
00065 /////////////////////////////////////////////////////////////////////////////
00066 ///
00067 ///  CSeq_loc_Mapper --
00068 ///
00069 ///  Mapping locations and alignments between bioseqs through seq-locs,
00070 ///  features, alignments or between parts of segmented bioseqs.
00071 
00072 
00073 class  CSeq_loc_Mapper : public CSeq_loc_Mapper_Base
00074 {
00075 public:
00076     enum ESeqMapDirection {
00077         eSeqMap_Up,    ///< map from segments to the top level bioseq
00078         eSeqMap_Down   ///< map from a segmented bioseq to segments
00079     };
00080 
00081     /// Mapping through a pre-filled CMappipngRanges. Source(s) and
00082     /// destination(s) are considered as having the same width.
00083     /// @param mapping_ranges
00084     ///  CMappingRanges filled with the desired source and destination
00085     ///  ranges. Must be a heap object (will be stored in a CRef<>).
00086     /// @param scope
00087     ///  Optional scope (required only for mapping alignments).
00088     CSeq_loc_Mapper(CMappingRanges* mapping_ranges,
00089                     CScope*         scope = 0);
00090 
00091     /// Mapping through a feature, both location and product must be set.
00092     /// If scope is set, synonyms are resolved for each source ID.
00093     CSeq_loc_Mapper(const CSeq_feat&  map_feat,
00094                     EFeatMapDirection dir,
00095                     CScope*           scope = 0);
00096 
00097     /// Mapping between two seq_locs. If scope is set, synonyms are resolved
00098     /// for each source ID.
00099     CSeq_loc_Mapper(const CSeq_loc&   source,
00100                     const CSeq_loc&   target,
00101                     CScope*           scope = 0);
00102 
00103     /// Mapping through an alignment. Need to specify target ID or
00104     /// target row of the alignment. Any other ID is mapped to the
00105     /// target one. If scope is set, synonyms are resolved for each source ID.
00106     /// Only the first row matching target ID is used, all other rows
00107     /// are considered source.
00108     CSeq_loc_Mapper(const CSeq_align& map_align,
00109                     const CSeq_id&    to_id,
00110                     CScope*           scope = 0,
00111                     TMapOptions       opts = 0);
00112     CSeq_loc_Mapper(const CSeq_align& map_align,
00113                     size_t            to_row,
00114                     CScope*           scope = 0,
00115                     TMapOptions       opts = 0);
00116 
00117     /// Mapping between segments and the top level sequence.
00118     /// @param target_seq
00119     ///  Top level bioseq
00120     /// @param direction
00121     ///  Direction of mapping: up (from segments to master) or down.
00122     CSeq_loc_Mapper(CBioseq_Handle   target_seq,
00123                     ESeqMapDirection direction);
00124 
00125     /// Mapping through a seq-map.
00126     /// @param seq_map
00127     ///  Sequence map defining the mapping
00128     /// @param direction
00129     ///  Direction of mapping: up (from segments to master) or down.
00130     /// @param top_level_id
00131     ///  Explicit destination id when mapping up, may be used with
00132     ///  seq-maps constructed from a seq-loc with multiple ids.
00133     CSeq_loc_Mapper(const CSeqMap&   seq_map,
00134                     ESeqMapDirection direction,
00135                     const CSeq_id*   top_level_id = 0,
00136                     CScope*          scope = 0);
00137 
00138     /// Mapping between segments and the top level sequence limited by depth.
00139     /// @param depth
00140     ///  Mapping depth. Depth of 0 converts synonyms.
00141     /// @param top_level_seq
00142     ///  Top level bioseq
00143     /// @param direction
00144     ///  Direction of mapping: up (from segments to master) or down.
00145     CSeq_loc_Mapper(size_t                depth,
00146                     const CBioseq_Handle& top_level_seq,
00147                     ESeqMapDirection      direction);
00148 
00149     /// Depth-limited mapping through a seq-map.
00150     /// @param depth
00151     ///  Mapping depth. Depth of 0 converts synonyms.
00152     /// @param seq_map
00153     ///  Sequence map defining the mapping
00154     /// @param direction
00155     ///  Direction of mapping: up (from segments to master) or down.
00156     /// @param top_level_id
00157     ///  Explicit destination id when mapping up, may be used with
00158     ///  seq-maps constructed from a seq-loc with multiple ids.
00159     CSeq_loc_Mapper(size_t           depth,
00160                     const CSeqMap&   top_level_seq,
00161                     ESeqMapDirection direction,
00162                     const CSeq_id*   top_level_id = 0,
00163                     CScope*          scope = 0);
00164 
00165     /// Mapping between segments and the top level sequence.
00166     /// @param target_seq
00167     ///  Top level bioseq
00168     /// @param direction
00169     ///  Direction of mapping: up (from segments to master) or down.
00170     /// @param selector
00171     ///  Seq-map selector with additional restrictions (range, strand etc.).
00172     ///  Some properties of the selector are always adjusted by the mapper.
00173     CSeq_loc_Mapper(CBioseq_Handle   target_seq,
00174                     ESeqMapDirection direction,
00175                     SSeqMapSelector  selector);
00176 
00177     /// Mapping through a seq-map.
00178     /// @param seq_map
00179     ///  Sequence map defining the mapping
00180     /// @param direction
00181     ///  Direction of mapping: up (from segments to master) or down.
00182     /// @param selector
00183     ///  Seq-map selector with additional restrictions (range, strand etc.).
00184     ///  Some properties of the selector are always adjusted by the mapper.
00185     /// @param top_level_id
00186     ///  Explicit destination id when mapping up, may be used with
00187     ///  seq-maps constructed from a seq-loc with multiple ids.
00188     CSeq_loc_Mapper(const CSeqMap&   seq_map,
00189                     ESeqMapDirection direction,
00190                     SSeqMapSelector  selector,
00191                     const CSeq_id*   top_level_id = 0,
00192                     CScope*          scope = 0);
00193 
00194     ~CSeq_loc_Mapper(void);
00195 
00196     // Collect synonyms for the given seq-id
00197     virtual void CollectSynonyms(const CSeq_id_Handle& id,
00198                                  TSynonyms&            synonyms) const;
00199 
00200 protected:
00201     // Check molecule type
00202     virtual ESeqType GetSeqType(const CSeq_id_Handle& idh) const;
00203 
00204     // Get sequence length for the given seq-id
00205     virtual TSeqPos GetSequenceLength(const CSeq_id& id);
00206 
00207     // Create CSeq_align_Mapper, add any necessary arguments
00208     virtual CSeq_align_Mapper_Base*
00209         InitAlignMapper(const CSeq_align& src_align);
00210 
00211 private:
00212     CSeq_loc_Mapper(const CSeq_loc_Mapper&);
00213     CSeq_loc_Mapper& operator=(const CSeq_loc_Mapper&);
00214 
00215     void x_InitializeSeqMap(const CSeqMap&   seq_map,
00216                             const CSeq_id*   top_id,
00217                             ESeqMapDirection direction);
00218     void x_InitializeSeqMap(const CSeqMap&   seq_map,
00219                             size_t           depth,
00220                             const CSeq_id*   top_id,
00221                             ESeqMapDirection direction);
00222     void x_InitializeSeqMap(CSeqMap_CI       seg_it,
00223                             const CSeq_id*   top_id,
00224                             ESeqMapDirection direction);
00225     void x_InitializeBioseq(const CBioseq_Handle& bioseq,
00226                             const CSeq_id*        top_id,
00227                             ESeqMapDirection      direction);
00228     void x_InitializeBioseq(const CBioseq_Handle& bioseq,
00229                             size_t                depth,
00230                             const CSeq_id*        top_id,
00231                             ESeqMapDirection      direction);
00232 
00233 private:
00234     CHeapScope        m_Scope;
00235 };
00236 
00237 
00238 /* @} */
00239 
00240 
00241 END_SCOPE(objects)
00242 END_NCBI_SCOPE
00243 
00244 #endif  // SEQ_LOC_MAPPER__HPP
00245 
00246 

Generated on Sun Dec 6 22:12:43 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:47 2009 by modify_doxy.py rev. 173732