include/objmgr/seq_map_ci.hpp

Go to the documentation of this file.
00001 #ifndef OBJECTS_OBJMGR___SEQ_MAP_CI__HPP
00002 #define OBJECTS_OBJMGR___SEQ_MAP_CI__HPP
00003 
00004 /*  $Id: seq_map_ci.hpp 176124 2009-11-16 15:20:36Z vasilche $
00005 * ===========================================================================
00006 *
00007 *                            PUBLIC DOMAIN NOTICE
00008 *               National Center for Biotechnology Information
00009 *
00010 *  This software/database is a "United States Government Work" under the
00011 *  terms of the United States Copyright Act.  It was written as part of
00012 *  the author's official duties as a United States Government employee and
00013 *  thus cannot be copyrighted.  This software/database is freely available
00014 *  to the public for use. The National Library of Medicine and the U.S.
00015 *  Government have not placed any restriction on its use or reproduction.
00016 *
00017 *  Although all reasonable efforts have been taken to ensure the accuracy
00018 *  and reliability of the software and data, the NLM and the U.S.
00019 *  Government do not and cannot warrant the performance or results that
00020 *  may be obtained by using this software or data. The NLM and the U.S.
00021 *  Government disclaim all warranties, express or implied, including
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.
00024 *
00025 *  Please cite the author in any work or product based on this material.
00026 *
00027 * ===========================================================================
00028 *
00029 * Authors:
00030 *           Eugene Vasilchenko
00031 *
00032 * File Description:
00033 *   CSeqMap -- formal sequence map to describe sequence parts in general,
00034 *   i.e. location and type only, without providing real data
00035 *
00036 */
00037 
00038 #include <objmgr/seq_map.hpp>
00039 #include <objmgr/impl/heap_scope.hpp>
00040 #include <objmgr/tse_handle.hpp>
00041 #include <objects/seq/seq_id_handle.hpp>
00042 #include <util/range.hpp>
00043 
00044 BEGIN_NCBI_SCOPE
00045 BEGIN_SCOPE(objects)
00046 
00047 
00048 class CSeq_entry;
00049 class CSeq_entry_Handle;
00050 
00051 
00052 /** @addtogroup ObjectManagerIterators
00053  *
00054  * @{
00055  */
00056 
00057 
00058 class CScope;
00059 class CSeqMap;
00060 class CSeq_entry;
00061 
00062 class  CSeqMap_CI_SegmentInfo
00063 {
00064 public:
00065     CSeqMap_CI_SegmentInfo(void);
00066 
00067     TSeqPos GetRefPosition(void) const;
00068     bool GetRefMinusStrand(void) const;
00069 
00070     const CSeqMap& x_GetSeqMap(void) const;
00071     size_t x_GetIndex(void) const;
00072     const CSeqMap::CSegment& x_GetSegment(void) const;
00073     const CSeqMap::CSegment& x_GetNextSegment(void) const;
00074 
00075     bool InRange(void) const;
00076     CSeqMap::ESegmentType GetType(void) const;
00077     bool IsSetData(void) const;
00078     bool x_Move(bool minusStrand, CScope* scope);
00079 
00080     TSeqPos x_GetLevelRealPos(void) const;
00081     TSeqPos x_GetLevelRealEnd(void) const;
00082     TSeqPos x_GetLevelPos(void) const;
00083     TSeqPos x_GetLevelEnd(void) const;
00084     TSeqPos x_GetSkipBefore(void) const;
00085     TSeqPos x_GetSkipAfter(void) const;
00086     TSeqPos x_CalcLength(void) const;
00087     TSeqPos x_GetTopOffset(void) const;
00088 
00089 private:
00090 
00091     // seqmap
00092     CTSE_Handle        m_TSE;
00093     CConstRef<CSeqMap> m_SeqMap;
00094     // index of segment in seqmap
00095     size_t             m_Index;
00096     // position inside m_SeqMap
00097     // m_RangeEnd >= m_RangePos
00098     TSeqPos            m_LevelRangePos;
00099     TSeqPos            m_LevelRangeEnd;
00100     bool               m_MinusStrand;
00101 
00102     friend class CSeqMap_CI;
00103     friend class CSeqMap;
00104 };
00105 
00106 
00107 /// Selector used in CSeqMap methods returning iterators.
00108 struct  SSeqMapSelector
00109 {
00110     typedef CSeqMap::TFlags TFlags;
00111 
00112     SSeqMapSelector(void);
00113     SSeqMapSelector(TFlags flags, size_t resolve_count = 0);
00114 
00115     /// Find segment containing the position
00116     SSeqMapSelector& SetPosition(TSeqPos pos)
00117         {
00118             m_Position = pos;
00119             return *this;
00120         }
00121 
00122     /// Set range for iterator
00123     SSeqMapSelector& SetRange(TSeqPos start, TSeqPos length)
00124         {
00125             m_Position = start;
00126             m_Length = length;
00127             return *this;
00128         }
00129 
00130     typedef CRange<TSeqPos> TRange;
00131     /// Set range for iterator - CRange<> version
00132     SSeqMapSelector& SetRange(const TRange& range)
00133         {
00134             m_Position = range.GetFrom();
00135             m_Length = range.GetLength();
00136             return *this;
00137         }
00138 
00139     /// Set strand to iterate over
00140     SSeqMapSelector& SetStrand(ENa_strand strand)
00141         {
00142             m_MinusStrand = IsReverse(strand);
00143             return *this;
00144         }
00145 
00146     /// Set max depth of resolving seq-map
00147     SSeqMapSelector& SetResolveCount(size_t res_cnt)
00148         {
00149             m_MaxResolveCount = res_cnt;
00150             return *this;
00151         }
00152 
00153     SSeqMapSelector& SetLinkUsedTSE(bool link = true)
00154         {
00155             m_LinkUsedTSE = link;
00156             return *this;
00157         }
00158     SSeqMapSelector& SetLinkUsedTSE(const CTSE_Handle& top_tse)
00159         {
00160             m_LinkUsedTSE = true;
00161             m_TopTSE = top_tse;
00162             return *this;
00163         }
00164     SSeqMapSelector& SetLinkUsedTSE(vector<CTSE_Handle>& used_tses)
00165         {
00166             m_LinkUsedTSE = true;
00167             m_UsedTSEs = &used_tses;
00168             return *this;
00169         }
00170 
00171     /// Limit TSE to resolve references
00172     SSeqMapSelector& SetLimitTSE(const CSeq_entry_Handle& tse);
00173 
00174     /// Select segment type(s)
00175     SSeqMapSelector& SetFlags(TFlags flags)
00176         {
00177             m_Flags = flags;
00178             return *this;
00179         }
00180 
00181     SSeqMapSelector& SetByFeaturePolicy(void)
00182         {
00183             m_Flags |= CSeqMap::fByFeaturePolicy;
00184             return *this;
00185         }
00186 
00187     size_t GetResolveCount(void) const
00188         {
00189             return m_MaxResolveCount;
00190         }
00191     bool CanResolve(void) const
00192         {
00193             return GetResolveCount() > 0;
00194         }
00195 
00196     void PushResolve(void)
00197         {
00198             _ASSERT(CanResolve());
00199             --m_MaxResolveCount;
00200         }
00201     void PopResolve(void)
00202         {
00203             ++m_MaxResolveCount;
00204             _ASSERT(CanResolve());
00205         }
00206 
00207     void AddUsedTSE(const CTSE_Handle& tse) const;
00208 
00209 private:
00210     friend class CSeqMap;
00211     friend class CSeqMap_CI;
00212 
00213     bool x_HasLimitTSE(void) const
00214         {
00215             return m_LimitTSE;
00216         }
00217     const CTSE_Handle& x_GetLimitTSE(CScope* scope = 0) const;
00218 
00219     // position of segment in whole sequence in residues
00220     TSeqPos             m_Position;
00221     // length of current segment
00222     TSeqPos             m_Length;
00223     // Requested strand
00224     bool                m_MinusStrand;
00225     // Link segment bioseqs to master
00226     bool                m_LinkUsedTSE;
00227     CTSE_Handle         m_TopTSE;
00228     // maximum resolution level
00229     size_t              m_MaxResolveCount;
00230     // limit search to single TSE
00231     CTSE_Handle         m_LimitTSE;
00232     // return all intermediate resolved sequences
00233     TFlags              m_Flags;
00234     // keep all used TSEs which can not be linked
00235     vector<CTSE_Handle>* m_UsedTSEs;
00236 };
00237 
00238 
00239 /// Iterator over CSeqMap
00240 class  CSeqMap_CI
00241 {
00242 public:
00243     typedef SSeqMapSelector::TFlags TFlags;
00244 
00245     CSeqMap_CI(void);
00246     CSeqMap_CI(const CBioseq_Handle&     bioseq,
00247                const SSeqMapSelector&    selector,
00248                TSeqPos                   pos = 0);
00249     CSeqMap_CI(const CBioseq_Handle&     bioseq,
00250                const SSeqMapSelector&    selector,
00251                const CRange<TSeqPos>&    range);
00252     CSeqMap_CI(const CConstRef<CSeqMap>& seqmap,
00253                CScope*                   scope,
00254                const SSeqMapSelector&    selector,
00255                TSeqPos                   pos = 0);
00256     CSeqMap_CI(const CConstRef<CSeqMap>& seqmap,
00257                CScope*                   scope,
00258                const SSeqMapSelector&    selector,
00259                const CRange<TSeqPos>&    range);
00260 
00261     ~CSeqMap_CI(void);
00262 
00263     bool IsInvalid(void) const;
00264     bool IsValid(void) const;
00265 
00266     DECLARE_OPERATOR_BOOL(IsValid());
00267 
00268     bool operator==(const CSeqMap_CI& seg) const;
00269     bool operator!=(const CSeqMap_CI& seg) const;
00270     bool operator< (const CSeqMap_CI& seg) const;
00271     bool operator> (const CSeqMap_CI& seg) const;
00272     bool operator<=(const CSeqMap_CI& seg) const;
00273     bool operator>=(const CSeqMap_CI& seg) const;
00274 
00275     /// go to next/next segment, return false if no more segments
00276     /// if no_resolve_current == true, do not resolve current segment
00277     bool Next(bool resolveExternal = true);
00278     bool Prev(void);
00279 
00280     TFlags GetFlags(void) const;
00281     void SetFlags(TFlags flags);
00282 
00283     CSeqMap_CI& operator++(void);
00284     CSeqMap_CI& operator--(void);
00285 
00286     /// return position of current segment in sequence
00287     TSeqPos      GetPosition(void) const;
00288     /// return length of current segment
00289     TSeqPos      GetLength(void) const;
00290     /// return true if current segment is a gap of unknown length
00291     bool         IsUnknownLength(void) const;
00292     /// return end position of current segment in sequence (exclusive)
00293     TSeqPos      GetEndPosition(void) const;
00294 
00295     CSeqMap::ESegmentType GetType(void) const;
00296     bool IsSetData(void) const;
00297     /// will allow only regular data segments (whole, plus strand)
00298     const CSeq_data& GetData(void) const;
00299     /// will allow any data segments, user should check for position and strand
00300     const CSeq_data& GetRefData(void) const;
00301 
00302     /// The following function makes sense only
00303     /// when the segment is a reference to another seq.
00304     CSeq_id_Handle GetRefSeqid(void) const;
00305     TSeqPos GetRefPosition(void) const;
00306     TSeqPos GetRefEndPosition(void) const;
00307     bool GetRefMinusStrand(void) const;
00308 
00309     CScope* GetScope(void) const;
00310 
00311     const CTSE_Handle& GetUsingTSE(void) const;
00312 
00313 private:
00314     friend class CSeqMap;
00315     typedef CSeqMap_CI_SegmentInfo TSegmentInfo;
00316 
00317     CSeqMap_CI(const CSeqMap_CI& base,
00318                const CSeqMap& seqmap, size_t index,
00319                TSeqPos pos);
00320 
00321     const TSegmentInfo& x_GetSegmentInfo(void) const;
00322     TSegmentInfo& x_GetSegmentInfo(void);
00323 
00324     // Check if the current reference can be resolved in the TSE
00325     // set by selector
00326     bool x_RefTSEMatch(const CSeqMap::CSegment& seg) const;
00327     bool x_CanResolve(const CSeqMap::CSegment& seg) const;
00328 
00329     // valid iterator
00330     const CSeqMap& x_GetSeqMap(void) const;
00331     size_t x_GetIndex(void) const;
00332     const CSeqMap::CSegment& x_GetSegment(void) const;
00333 
00334     TSeqPos x_GetTopOffset(void) const;
00335     void x_Resolve(TSeqPos pos);
00336 
00337     bool x_Found(void) const;
00338 
00339     bool x_Push(TSeqPos offset, bool resolveExternal);
00340     bool x_Push(TSeqPos offset);
00341     void x_Push(const CConstRef<CSeqMap>& seqMap, const CTSE_Handle& tse,
00342                 TSeqPos from, TSeqPos length, bool minusStrand, TSeqPos pos);
00343     bool x_Pop(void);
00344 
00345     bool x_Next(bool resolveExternal);
00346     bool x_Next(void);
00347     bool x_Prev(void);
00348 
00349     bool x_TopNext(void);
00350     bool x_TopPrev(void);
00351 
00352     bool x_SettleNext(TSeqPos end_pos = kInvalidSeqPos);
00353     bool x_SettlePrev(void);
00354 
00355     void x_Select(const CConstRef<CSeqMap>& seqMap,
00356                   const SSeqMapSelector& selector,
00357                   TSeqPos pos,
00358                   TSeqPos end_pos);
00359 
00360     typedef vector<TSegmentInfo> TStack;
00361 
00362     // scope for length resolution
00363     CHeapScope           m_Scope;
00364     // position stack
00365     TStack               m_Stack;
00366     // iterator parameters
00367     SSeqMapSelector      m_Selector;
00368 };
00369 
00370 
00371 /////////////////////////////////////////////////////////////////////
00372 //  CSeqMap_CI_SegmentInfo
00373 
00374 
00375 inline
00376 const CSeqMap& CSeqMap_CI_SegmentInfo::x_GetSeqMap(void) const
00377 {
00378     return *m_SeqMap;
00379 }
00380 
00381 
00382 inline
00383 size_t CSeqMap_CI_SegmentInfo::x_GetIndex(void) const
00384 {
00385     return m_Index;
00386 }
00387 
00388 
00389 inline
00390 const CSeqMap::CSegment& CSeqMap_CI_SegmentInfo::x_GetSegment(void) const
00391 {
00392     return x_GetSeqMap().x_GetSegment(x_GetIndex());
00393 }
00394 
00395 
00396 inline
00397 CSeqMap_CI_SegmentInfo::CSeqMap_CI_SegmentInfo(void)
00398     : m_Index(kInvalidSeqPos),
00399       m_LevelRangePos(kInvalidSeqPos), m_LevelRangeEnd(kInvalidSeqPos)
00400 {
00401 }
00402 
00403 
00404 
00405 inline
00406 TSeqPos CSeqMap_CI_SegmentInfo::x_GetLevelRealPos(void) const
00407 {
00408     return x_GetSegment().m_Position;
00409 }
00410 
00411 
00412 inline
00413 TSeqPos CSeqMap_CI_SegmentInfo::x_GetLevelRealEnd(void) const
00414 {
00415     const CSeqMap::CSegment& seg = x_GetSegment();
00416     return seg.m_Position + seg.m_Length;
00417 }
00418 
00419 
00420 inline
00421 TSeqPos CSeqMap_CI_SegmentInfo::x_GetLevelPos(void) const
00422 {
00423     return max(m_LevelRangePos, x_GetLevelRealPos());
00424 }
00425 
00426 
00427 inline
00428 TSeqPos CSeqMap_CI_SegmentInfo::x_GetLevelEnd(void) const
00429 {
00430     return min(m_LevelRangeEnd, x_GetLevelRealEnd());
00431 }
00432 
00433 
00434 inline
00435 TSeqPos CSeqMap_CI_SegmentInfo::x_GetSkipBefore(void) const
00436 {
00437     TSignedSeqPos skip = m_LevelRangePos - x_GetLevelRealPos();
00438     if ( skip < 0 )
00439         skip = 0;
00440     return skip;
00441 }
00442 
00443 
00444 inline
00445 TSeqPos CSeqMap_CI_SegmentInfo::x_GetSkipAfter(void) const
00446 {
00447     TSignedSeqPos skip = x_GetLevelRealEnd() - m_LevelRangeEnd;
00448     if ( skip < 0 )
00449         skip = 0;
00450     return skip;
00451 }
00452 
00453 
00454 inline
00455 TSeqPos CSeqMap_CI_SegmentInfo::x_CalcLength(void) const
00456 {
00457     return x_GetLevelEnd() - x_GetLevelPos();
00458 }
00459 
00460 
00461 inline
00462 bool CSeqMap_CI_SegmentInfo::GetRefMinusStrand(void) const
00463 {
00464     return x_GetSegment().m_RefMinusStrand ^ m_MinusStrand;
00465 }
00466 
00467 
00468 inline
00469 bool CSeqMap_CI_SegmentInfo::InRange(void) const
00470 {
00471     const CSeqMap::CSegment& seg = x_GetSegment();
00472     return seg.m_Position < m_LevelRangeEnd &&
00473         seg.m_Position + seg.m_Length > m_LevelRangePos;
00474 }
00475 
00476 
00477 inline
00478 CSeqMap::ESegmentType CSeqMap_CI_SegmentInfo::GetType(void) const
00479 {
00480     return InRange()?
00481         CSeqMap::ESegmentType(x_GetSegment().m_SegType): CSeqMap::eSeqEnd;
00482 }
00483 
00484 
00485 inline
00486 bool CSeqMap_CI_SegmentInfo::IsSetData(void) const
00487 {
00488     return InRange() && x_GetSegment().IsSetData();
00489 }
00490 
00491 
00492 /////////////////////////////////////////////////////////////////////
00493 //  CSeqMap_CI
00494 
00495 
00496 inline
00497 const CSeqMap_CI::TSegmentInfo& CSeqMap_CI::x_GetSegmentInfo(void) const
00498 {
00499     return m_Stack.back();
00500 }
00501 
00502 
00503 inline
00504 CSeqMap_CI::TSegmentInfo& CSeqMap_CI::x_GetSegmentInfo(void)
00505 {
00506     return m_Stack.back();
00507 }
00508 
00509 
00510 inline
00511 const CSeqMap& CSeqMap_CI::x_GetSeqMap(void) const
00512 {
00513     return x_GetSegmentInfo().x_GetSeqMap();
00514 }
00515 
00516 
00517 inline
00518 size_t CSeqMap_CI::x_GetIndex(void) const
00519 {
00520     return x_GetSegmentInfo().x_GetIndex();
00521 }
00522 
00523 
00524 inline
00525 const CSeqMap::CSegment& CSeqMap_CI::x_GetSegment(void) const
00526 {
00527     return x_GetSegmentInfo().x_GetSegment();
00528 }
00529 
00530 
00531 inline
00532 CScope* CSeqMap_CI::GetScope(void) const
00533 {
00534     return m_Scope.GetScopeOrNull();
00535 }
00536 
00537 
00538 inline
00539 CSeqMap::ESegmentType CSeqMap_CI::GetType(void) const
00540 {
00541     return x_GetSegmentInfo().GetType();
00542 }
00543 
00544 
00545 inline
00546 bool CSeqMap_CI::IsSetData(void) const
00547 {
00548     return x_GetSegmentInfo().IsSetData();
00549 }
00550 
00551 
00552 inline
00553 TSeqPos CSeqMap_CI::GetPosition(void) const
00554 {
00555     return m_Selector.m_Position;
00556 }
00557 
00558 
00559 inline
00560 TSeqPos CSeqMap_CI::GetLength(void) const
00561 {
00562     return m_Selector.m_Length;
00563 }
00564 
00565 
00566 inline
00567 TSeqPos CSeqMap_CI::GetEndPosition(void) const
00568 {
00569     return m_Selector.m_Position + m_Selector.m_Length;
00570 }
00571 
00572 
00573 inline
00574 bool CSeqMap_CI::IsInvalid(void) const
00575 {
00576     return m_Stack.empty();
00577 }
00578 
00579 
00580 inline
00581 bool CSeqMap_CI::IsValid(void) const
00582 {
00583     return !m_Stack.empty()  &&  m_Stack.front().InRange()  &&
00584         m_Stack.front().GetType() != CSeqMap::eSeqEnd;
00585 }
00586 
00587 
00588 inline
00589 TSeqPos CSeqMap_CI::GetRefPosition(void) const
00590 {
00591     return x_GetSegmentInfo().GetRefPosition();
00592 }
00593 
00594 
00595 inline
00596 bool CSeqMap_CI::GetRefMinusStrand(void) const
00597 {
00598     return x_GetSegmentInfo().GetRefMinusStrand();
00599 }
00600 
00601 
00602 inline
00603 TSeqPos CSeqMap_CI::GetRefEndPosition(void) const
00604 {
00605     return GetRefPosition() + GetLength();
00606 }
00607 
00608 
00609 inline
00610 bool CSeqMap_CI::operator==(const CSeqMap_CI& seg) const
00611 {
00612     return
00613         GetPosition() == seg.GetPosition() &&
00614         m_Stack.size() == seg.m_Stack.size() &&
00615         x_GetIndex() == seg.x_GetIndex();
00616 }
00617 
00618 
00619 inline
00620 bool CSeqMap_CI::operator<(const CSeqMap_CI& seg) const
00621 {
00622     return
00623         GetPosition() < seg.GetPosition() ||
00624         (GetPosition() == seg.GetPosition() && 
00625          (m_Stack.size() < seg.m_Stack.size() ||
00626           (m_Stack.size() == seg.m_Stack.size() &&
00627            x_GetIndex() < seg.x_GetIndex())));
00628 }
00629 
00630 
00631 inline
00632 bool CSeqMap_CI::operator>(const CSeqMap_CI& seg) const
00633 {
00634     return
00635         GetPosition() > seg.GetPosition() ||
00636         (GetPosition() == seg.GetPosition() && 
00637          (m_Stack.size() > seg.m_Stack.size() ||
00638           (m_Stack.size() == seg.m_Stack.size() &&
00639            x_GetIndex() > seg.x_GetIndex())));
00640 }
00641 
00642 
00643 inline
00644 bool CSeqMap_CI::operator!=(const CSeqMap_CI& seg) const
00645 {
00646     return !(*this == seg);
00647 }
00648 
00649 
00650 inline
00651 bool CSeqMap_CI::operator<=(const CSeqMap_CI& seg) const
00652 {
00653     return !(*this > seg);
00654 }
00655 
00656 
00657 inline
00658 bool CSeqMap_CI::operator>=(const CSeqMap_CI& seg) const
00659 {
00660     return !(*this < seg);
00661 }
00662 
00663 
00664 inline
00665 CSeqMap_CI& CSeqMap_CI::operator++(void)
00666 {
00667     Next();
00668     return *this;
00669 }
00670 
00671 
00672 inline
00673 CSeqMap_CI& CSeqMap_CI::operator--(void)
00674 {
00675     Prev();
00676     return *this;
00677 }
00678 
00679 
00680 inline
00681 CSeqMap_CI::TFlags CSeqMap_CI::GetFlags(void) const
00682 {
00683     return m_Selector.m_Flags;
00684 }
00685 
00686 
00687 inline
00688 const CTSE_Handle& CSeqMap_CI::GetUsingTSE(void) const
00689 {
00690     return x_GetSegmentInfo().m_TSE;
00691 }
00692 
00693 
00694 /* @} */
00695 
00696 
00697 END_SCOPE(objects)
00698 END_NCBI_SCOPE
00699 
00700 #endif  // OBJECTS_OBJMGR___SEQ_MAP_CI__HPP
00701 
00702 

Generated on Sun Dec 6 22:12:43 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:47 2009 by modify_doxy.py rev. 173732