include/objmgr/seq_map.hpp

Go to the documentation of this file.
00001 #ifndef OBJECTS_OBJMGR___SEQ_MAP__HPP
00002 #define OBJECTS_OBJMGR___SEQ_MAP__HPP
00003 
00004 /*  $Id: seq_map.hpp 177381 2009-11-30 21:09:04Z ucko $
00005 * ===========================================================================
00006 *
00007 *                            PUBLIC DOMAIN NOTICE
00008 *               National Center for Biotechnology Information
00009 *
00010 *  This software/database is a "United States Government Work" under the
00011 *  terms of the United States Copyright Act.  It was written as part of
00012 *  the author's official duties as a United States Government employee and
00013 *  thus cannot be copyrighted.  This software/database is freely available
00014 *  to the public for use. The National Library of Medicine and the U.S.
00015 *  Government have not placed any restriction on its use or reproduction.
00016 *
00017 *  Although all reasonable efforts have been taken to ensure the accuracy
00018 *  and reliability of the software and data, the NLM and the U.S.
00019 *  Government do not and cannot warrant the performance or results that
00020 *  may be obtained by using this software or data. The NLM and the U.S.
00021 *  Government disclaim all warranties, express or implied, including
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.
00024 *
00025 *  Please cite the author in any work or product based on this material.
00026 *
00027 * ===========================================================================
00028 *
00029 * Authors:
00030 *           Aleksey Grichenko
00031 *           Michael Kimelman
00032 *           Andrei Gourianov
00033 *           Eugene Vasilchenko
00034 *
00035 * File Description:
00036 *   CSeqMap -- formal sequence map to describe sequence parts in general,
00037 *   i.e. location and type only, without providing real data
00038 *
00039 */
00040 
00041 #include <objects/seq/seq_id_handle.hpp>
00042 #include <objmgr/objmgr_exception.hpp>
00043 #include <objects/seqloc/Na_strand.hpp>
00044 #include <objects/seq/Seq_inst.hpp>
00045 #include <corelib/ncbimtx.hpp>
00046 #include <vector>
00047 #include <list>
00048 
00049 BEGIN_NCBI_SCOPE
00050 BEGIN_SCOPE(objects)
00051 
00052 
00053 /** @addtogroup ObjectManagerSequenceRep
00054  *
00055  * @{
00056  */
00057 
00058 
00059 class CBioseq;
00060 class CDelta_seq;
00061 class CSeq_loc;
00062 class CSeq_point;
00063 class CSeq_interval;
00064 class CSeq_loc_mix;
00065 class CSeq_loc_equiv;
00066 class CSeq_literal;
00067 class CSeq_data;
00068 class CPacked_seqint;
00069 class CPacked_seqpnt;
00070 class CTSE_Chunk_Info;
00071 
00072 // Provided for compatibility with old code; new code should just use TSeqPos.
00073 typedef TSeqPos TSeqPosition;
00074 typedef TSeqPos TSeqLength;
00075 
00076 class CScope;
00077 class CBioseq_Handle;
00078 class CBioseq_Info;
00079 class CSeqMap_CI;
00080 class CSeqMap_CI_SegmentInfo;
00081 class CSeqMap_Delta_seqs;
00082 struct SSeqMapSelector;
00083 
00084 
00085 /////////////////////////////////////////////////////////////////////////////
00086 ///
00087 ///  CSeqMap --
00088 ///
00089 ///  Formal sequence map -- to describe sequence parts in general --
00090 ///  location and type only, without providing real data
00091 
00092 class  CSeqMap : public CObject
00093 {
00094 public:
00095     // SeqMap segment type
00096     enum ESegmentType {
00097         eSeqGap,              ///< gap
00098         eSeqData,             ///< real sequence data
00099         eSeqSubMap,           ///< sub seqmap
00100         eSeqRef,              ///< reference to Bioseq
00101         eSeqEnd,
00102         eSeqChunk
00103     };
00104 
00105     typedef CSeq_inst::TMol TMol;
00106     typedef CSeqMap_CI const_iterator;
00107     
00108     ~CSeqMap(void);
00109 
00110     size_t GetSegmentsCount(void) const;
00111 
00112     TSeqPos GetLength(CScope* scope) const;
00113     TMol GetMol(void) const;
00114 
00115     // new interface
00116     /// STL style methods
00117     const_iterator begin(CScope* scope) const;
00118     const_iterator end(CScope* scope) const;
00119 
00120     /// NCBI style methods
00121     CSeqMap_CI Begin(CScope* scope) const;
00122     CSeqMap_CI End(CScope* scope) const;
00123     /// Find segment containing the position
00124     CSeqMap_CI FindSegment(TSeqPos pos, CScope* scope) const;
00125 
00126     /// Segment type flags
00127     enum EFlags {
00128         fFindData       = (1<<0),
00129         fFindGap        = (1<<1),
00130         fFindLeafRef    = (1<<2),
00131         fFindInnerRef   = (1<<3),
00132         fFindExactLevel = (1<<4),
00133         fIgnoreUnresolved = (1<<5),
00134         fByFeaturePolicy= (1<<6),
00135         fFindRef        = (fFindLeafRef | fFindInnerRef),
00136         fFindAny        = fFindData | fFindGap | fFindRef,
00137         fFindAnyLeaf    = fFindData | fFindGap | fFindLeafRef,
00138         fDefaultFlags   = fFindAnyLeaf
00139     };
00140     typedef int TFlags;
00141 
00142     CSeqMap_CI BeginResolved(CScope* scope) const;
00143     CSeqMap_CI BeginResolved(CScope*                scope,
00144                              const SSeqMapSelector& selector) const;
00145     CSeqMap_CI EndResolved(CScope* scope) const;
00146     CSeqMap_CI EndResolved(CScope*                scope,
00147                            const SSeqMapSelector& selector) const;
00148     CSeqMap_CI FindResolved(CScope*                scope,
00149                             TSeqPos                pos,
00150                             const SSeqMapSelector& selector) const;
00151 
00152     /// Iterate segments in the range with specified strand coordinates
00153     CSeqMap_CI ResolvedRangeIterator(CScope* scope,
00154                                      TSeqPos from,
00155                                      TSeqPos length,
00156                                      ENa_strand strand = eNa_strand_plus,
00157                                      size_t maxResolve = size_t(-1),
00158                                      TFlags flags = fDefaultFlags) const;
00159 
00160     bool HasSegmentOfType(ESegmentType type) const;
00161     size_t CountSegmentsOfType(ESegmentType type) const;
00162 
00163     bool CanResolveRange(CScope* scope, const SSeqMapSelector& sel) const;
00164     bool CanResolveRange(CScope* scope,
00165                          TSeqPos from,
00166                          TSeqPos length,
00167                          ENa_strand strand = eNa_strand_plus,
00168                          size_t maxResolve = size_t(-1),
00169                          TFlags flags = fDefaultFlags) const;
00170 
00171     // Methods used internally by other OM classes
00172 
00173     static CRef<CSeqMap> CreateSeqMapForBioseq(const CBioseq& seq);
00174     static CRef<CSeqMap> CreateSeqMapForSeq_loc(const CSeq_loc& loc,
00175                                                 CScope* scope);
00176     static CConstRef<CSeqMap> GetSeqMapForSeq_loc(const CSeq_loc& loc,
00177                                                   CScope* scope);
00178     virtual CRef<CSeqMap> CloneFor(const CBioseq& seq) const;
00179 
00180     // copy map for editing
00181     CSeqMap(const CSeqMap& sm);
00182 
00183     void SetRegionInChunk(CTSE_Chunk_Info& chunk, TSeqPos pos, TSeqPos length);
00184     void LoadSeq_data(TSeqPos pos, TSeqPos len, const CSeq_data& data);
00185 
00186     void SetSegmentGap(const CSeqMap_CI& seg,
00187                        TSeqPos length);
00188     void SetSegmentGap(const CSeqMap_CI& seg,
00189                        TSeqPos length,
00190                        CSeq_data& gap_data);
00191     void SetSegmentData(const CSeqMap_CI& seg,
00192                         TSeqPos length,
00193                         CSeq_data& data);
00194     void SetSegmentRef(const CSeqMap_CI& seg,
00195                        TSeqPos length,
00196                        const CSeq_id_Handle& ref_id,
00197                        TSeqPos ref_pos,
00198                        bool ref_minus_strand);
00199     /// Insert new gap into sequence map.
00200     /// @param seg
00201     ///   Iterator pointing to the place where new gap will be inserted.
00202     ///   Becomes invalid after the call.
00203     /// @return
00204     ///   New iterator pointing to the new segment.
00205     CSeqMap_CI InsertSegmentGap(const CSeqMap_CI& seg,
00206                                 TSeqPos length);
00207     /// Delete segment from sequence map.
00208     /// @param seg
00209     ///   Iterator pointing to the segment to be deleted.
00210     ///   Becomes invalid after the call.
00211     /// @return
00212     ///   New iterator pointing to the next segment.
00213     CSeqMap_CI RemoveSegment(const CSeqMap_CI& seg);
00214 
00215     void SetRepr(CSeq_inst::TRepr repr);
00216     void ResetRepr(void);
00217     void SetMol(CSeq_inst::TMol mol);
00218     void ResetMol(void);
00219 
00220 protected:
00221 
00222     class CSegment;
00223     class SPosLessSegment;
00224 
00225     friend class CSegment;
00226     friend class SPosLessSegment;
00227     friend class CSeqMap_SeqPoss;
00228     friend class CBioseq_Info;
00229 
00230     class CSegment
00231     {
00232     public:
00233         CSegment(ESegmentType seg_type = eSeqEnd,
00234                  TSeqPos length = kInvalidSeqPos,
00235                  bool unknown_len = false);
00236 
00237         // Check if this segment has CSeq_data object (may be gap)
00238         bool IsSetData(void) const;
00239 
00240         // Relative position of the segment in seqmap
00241         mutable TSeqPos      m_Position;
00242         // Length of the segment (kInvalidSeqPos if unresolved)
00243         mutable TSeqPos      m_Length;
00244         bool                 m_UnknownLength;
00245 
00246         // Segment type
00247         char                 m_SegType;
00248         char                 m_ObjType;
00249 
00250         // reference info, valid for eSeqData, eSeqSubMap, eSeqRef
00251         bool                 m_RefMinusStrand;
00252         TSeqPos              m_RefPosition;
00253         CConstRef<CObject>   m_RefObject; // CSeq_data, CSeqMap, CSeq_id
00254 
00255         typedef list<TSeqPos>::iterator TList0_I;
00256         TList0_I m_Iterator;
00257     };
00258 
00259     class SPosLessSegment
00260     {
00261     public:
00262         bool operator()(TSeqPos pos, const CSegment& seg)
00263             {
00264                 return pos < seg.m_Position + seg.m_Length;
00265             }
00266         bool operator()(const CSegment& seg, TSeqPos pos)
00267             {
00268                 return seg.m_Position + seg.m_Length < pos;
00269             }
00270         bool operator()(const CSegment& seg1, const CSegment& seg2)
00271             {
00272                 return seg1.m_Position + seg1.m_Length < seg2.m_Position + seg2.m_Length;
00273             }
00274     };
00275 
00276     // 'ctors
00277     CSeqMap(CSeqMap* parent, size_t index);
00278     CSeqMap(void);
00279     CSeqMap(const CSeq_loc& ref);
00280     CSeqMap(TSeqPos len); // gap
00281     CSeqMap(const CSeq_inst& inst);
00282 
00283     void x_AddEnd(void);
00284     void x_AddSegment(ESegmentType type,
00285                       TSeqPos      len,
00286                       bool         unknown_len = false);
00287     void x_AddSegment(ESegmentType type, TSeqPos len, const CObject* object);
00288     void x_AddSegment(ESegmentType type, const CObject* object,
00289                       TSeqPos refPos, TSeqPos len,
00290                       ENa_strand strand = eNa_strand_plus);
00291     void x_AddGap(TSeqPos len, bool unknown_len);
00292     void x_AddGap(TSeqPos len, bool unknown_len, const CSeq_data& gap_data);
00293     void x_Add(CSeqMap* submap);
00294     void x_Add(const CSeq_data& data, TSeqPos len);
00295     void x_Add(const CPacked_seqint& seq);
00296     void x_Add(const CPacked_seqpnt& seq);
00297     void x_Add(const CSeq_loc_mix& seq);
00298     void x_Add(const CSeq_loc_equiv& seq);
00299     void x_Add(const CSeq_literal& seq);
00300     void x_Add(const CDelta_seq& seq);
00301     void x_Add(const CSeq_loc& seq);
00302     void x_Add(const CSeq_id& seq);
00303     void x_Add(const CSeq_point& seq);
00304     void x_Add(const CSeq_interval& seq);
00305     void x_AddUnloadedSeq_data(TSeqPos len);
00306 
00307 private:
00308     void ResolveAll(void) const;
00309     
00310 private:
00311     // Prohibit copy operator
00312     CSeqMap& operator= (const CSeqMap&);
00313     
00314 protected:    
00315     // interface for iterators
00316     size_t x_GetLastEndSegmentIndex(void) const;
00317     size_t x_GetFirstEndSegmentIndex(void) const;
00318 
00319     const CSegment& x_GetSegment(size_t index) const;
00320     void x_GetSegmentException(size_t index) const;
00321     CSegment& x_SetSegment(size_t index);
00322 
00323     size_t x_FindSegment(TSeqPos position, CScope* scope) const;
00324     
00325     TSeqPos x_GetSegmentLength(size_t index, CScope* scope) const;
00326     TSeqPos x_GetSegmentPosition(size_t index, CScope* scope) const;
00327     TSeqPos x_GetSegmentEndPosition(size_t index, CScope* scope) const;
00328     TSeqPos x_ResolveSegmentLength(size_t index, CScope* scope) const;
00329     TSeqPos x_ResolveSegmentPosition(size_t index, CScope* scope) const;
00330 
00331     void x_StartEditing(void);
00332     bool x_IsChanged(void) const;
00333     void x_SetChanged(size_t index);
00334     bool x_UpdateSeq_inst(CSeq_inst& inst);
00335     virtual bool x_DoUpdateSeq_inst(CSeq_inst& inst);
00336 
00337     CBioseq_Handle x_GetBioseqHandle(const CSegment& seg, CScope* scope) const;
00338 
00339     CConstRef<CSeqMap> x_GetSubSeqMap(const CSegment& seg, CScope* scope,
00340                                       bool resolveExternal = false) const;
00341     virtual const CSeq_data& x_GetSeq_data(const CSegment& seg) const;
00342     virtual const CSeq_id& x_GetRefSeqid(const CSegment& seg) const;
00343     virtual TSeqPos x_GetRefPosition(const CSegment& seg) const;
00344     virtual bool x_GetRefMinusStrand(const CSegment& seg) const;
00345     
00346     void x_LoadObject(const CSegment& seg) const;
00347     CRef<CTSE_Chunk_Info> x_GetChunkToLoad(const CSegment& seg) const;
00348     const CObject* x_GetObject(const CSegment& seg) const;
00349     void x_SetObject(CSegment& seg, const CObject& obj);
00350     void x_SetChunk(CSegment& seg, CTSE_Chunk_Info& chunk);
00351 
00352     virtual void x_SetSeq_data(size_t index, CSeq_data& data);
00353     virtual void x_SetSubSeqMap(size_t index, CSeqMap_Delta_seqs* subMap);
00354 
00355     virtual void x_SetSegmentGap(size_t index,
00356                                  TSeqPos length,
00357                                  CSeq_data* gap_data = 0);
00358     virtual void x_SetSegmentData(size_t index,
00359                                   TSeqPos length,
00360                                   CSeq_data& data);
00361     virtual void x_SetSegmentRef(size_t index,
00362                                  TSeqPos length,
00363                                  const CSeq_id& ref_id,
00364                                  TSeqPos ref_pos,
00365                                  bool ref_minus_strand);
00366 
00367     CBioseq_Info*    m_Bioseq;
00368 
00369     typedef vector<CSegment> TSegments;
00370     
00371     // segments in this seqmap
00372     vector<CSegment> m_Segments;
00373     
00374     // index of last resolved segment position
00375     mutable size_t   m_Resolved;
00376     
00377     // representation object of the sequence
00378     CRef<CObject>    m_Delta;
00379 
00380     // Molecule type from seq-inst
00381     TMol    m_Mol;
00382 
00383     // segments' flags
00384     typedef Uint1 THasSegments;
00385     mutable THasSegments m_HasSegments;
00386     // needs to update Seq-inst
00387     typedef bool TChanged;
00388     TChanged m_Changed;
00389 
00390     // Sequence length
00391     mutable TSeqPos m_SeqLength;
00392 
00393     // MT-protection
00394     mutable CMutex  m_SeqMap_Mtx;
00395     
00396     friend class CSeqMap_CI;
00397     friend class CSeqMap_CI_SegmentInfo;
00398 };
00399 
00400 
00401 /////////////////////////////////////////////////////////////////////
00402 //  CSeqMap: inline methods
00403 
00404 inline
00405 bool CSeqMap::CSegment::IsSetData(void) const
00406 {
00407     return static_cast<ESegmentType>(m_SegType) == CSeqMap::eSeqData 
00408         || static_cast<ESegmentType>(m_ObjType) == CSeqMap::eSeqData;
00409 }
00410 
00411 
00412 inline
00413 size_t CSeqMap::GetSegmentsCount(void) const
00414 {
00415     return m_Segments.size() - 2;
00416 }
00417 
00418 
00419 inline
00420 size_t CSeqMap::x_GetLastEndSegmentIndex(void) const
00421 {
00422     return m_Segments.size() - 1;
00423 }
00424 
00425 
00426 inline
00427 size_t CSeqMap::x_GetFirstEndSegmentIndex(void) const
00428 {
00429     return 0;
00430 }
00431 
00432 
00433 inline
00434 const CSeqMap::CSegment& CSeqMap::x_GetSegment(size_t index) const
00435 {
00436     _ASSERT(index < m_Segments.size());
00437     return m_Segments[index];
00438 }
00439 
00440 
00441 inline
00442 TSeqPos CSeqMap::x_GetSegmentPosition(size_t index, CScope* scope) const
00443 {
00444     if ( index <= m_Resolved )
00445         return m_Segments[index].m_Position;
00446     return x_ResolveSegmentPosition(index, scope);
00447 }
00448 
00449 
00450 inline
00451 TSeqPos CSeqMap::x_GetSegmentLength(size_t index, CScope* scope) const
00452 {
00453     TSeqPos length = x_GetSegment(index).m_Length;
00454     if ( length == kInvalidSeqPos ) {
00455         length = x_ResolveSegmentLength(index, scope);
00456     }
00457     return length;
00458 }
00459 
00460 
00461 inline
00462 TSeqPos CSeqMap::x_GetSegmentEndPosition(size_t index, CScope* scope) const
00463 {
00464     return x_GetSegmentPosition(index, scope)+x_GetSegmentLength(index, scope);
00465 }
00466 
00467 
00468 inline
00469 TSeqPos CSeqMap::GetLength(CScope* scope) const
00470 {
00471     if (m_SeqLength == kInvalidSeqPos) {
00472         m_SeqLength = x_GetSegmentPosition(x_GetLastEndSegmentIndex(), scope);
00473     }
00474     return m_SeqLength;
00475 }
00476 
00477 
00478 inline
00479 CSeqMap::TMol CSeqMap::GetMol(void) const
00480 {
00481     return m_Mol;
00482 }
00483 
00484 
00485 inline
00486 bool CSeqMap::x_IsChanged(void) const
00487 {
00488     return m_Changed;
00489 }
00490 
00491 
00492 inline
00493 bool CSeqMap::x_UpdateSeq_inst(CSeq_inst& inst)
00494 {
00495     if ( !x_IsChanged() ) {
00496         return false;
00497     }
00498     m_Changed = false;
00499     return x_DoUpdateSeq_inst(inst);
00500 }
00501 
00502 
00503 /* @} */
00504 
00505 END_SCOPE(objects)
00506 END_NCBI_SCOPE
00507 
00508 #endif  // OBJECTS_OBJMGR___SEQ_MAP__HPP
00509 
00510 

Generated on Sun Dec 6 22:03:40 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:37 2009 by modify_doxy.py rev. 173732