00001 #ifndef OBJECTS_OBJMGR___SEQ_MAP__HPP
00002 #define OBJECTS_OBJMGR___SEQ_MAP__HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 #include <objects/seq/seq_id_handle.hpp>
00042 #include <objmgr/objmgr_exception.hpp>
00043 #include <objects/seqloc/Na_strand.hpp>
00044 #include <objects/seq/Seq_inst.hpp>
00045 #include <corelib/ncbimtx.hpp>
00046 #include <vector>
00047 #include <list>
00048
00049 BEGIN_NCBI_SCOPE
00050 BEGIN_SCOPE(objects)
00051
00052
00053
00054
00055
00056
00057
00058
00059 class CBioseq;
00060 class CDelta_seq;
00061 class CSeq_loc;
00062 class CSeq_point;
00063 class CSeq_interval;
00064 class CSeq_loc_mix;
00065 class CSeq_loc_equiv;
00066 class CSeq_literal;
00067 class CSeq_data;
00068 class CPacked_seqint;
00069 class CPacked_seqpnt;
00070 class CTSE_Chunk_Info;
00071
00072
00073 typedef TSeqPos TSeqPosition;
00074 typedef TSeqPos TSeqLength;
00075
00076 class CScope;
00077 class CBioseq_Handle;
00078 class CBioseq_Info;
00079 class CSeqMap_CI;
00080 class CSeqMap_CI_SegmentInfo;
00081 class CSeqMap_Delta_seqs;
00082 struct SSeqMapSelector;
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092 class CSeqMap : public CObject
00093 {
00094 public:
00095
00096 enum ESegmentType {
00097 eSeqGap,
00098 eSeqData,
00099 eSeqSubMap,
00100 eSeqRef,
00101 eSeqEnd,
00102 eSeqChunk
00103 };
00104
00105 typedef CSeq_inst::TMol TMol;
00106 typedef CSeqMap_CI const_iterator;
00107
00108 ~CSeqMap(void);
00109
00110 size_t GetSegmentsCount(void) const;
00111
00112 TSeqPos GetLength(CScope* scope) const;
00113 TMol GetMol(void) const;
00114
00115
00116
00117 const_iterator begin(CScope* scope) const;
00118 const_iterator end(CScope* scope) const;
00119
00120
00121 CSeqMap_CI Begin(CScope* scope) const;
00122 CSeqMap_CI End(CScope* scope) const;
00123
00124 CSeqMap_CI FindSegment(TSeqPos pos, CScope* scope) const;
00125
00126
00127 enum EFlags {
00128 fFindData = (1<<0),
00129 fFindGap = (1<<1),
00130 fFindLeafRef = (1<<2),
00131 fFindInnerRef = (1<<3),
00132 fFindExactLevel = (1<<4),
00133 fIgnoreUnresolved = (1<<5),
00134 fByFeaturePolicy= (1<<6),
00135 fFindRef = (fFindLeafRef | fFindInnerRef),
00136 fFindAny = fFindData | fFindGap | fFindRef,
00137 fFindAnyLeaf = fFindData | fFindGap | fFindLeafRef,
00138 fDefaultFlags = fFindAnyLeaf
00139 };
00140 typedef int TFlags;
00141
00142 CSeqMap_CI BeginResolved(CScope* scope) const;
00143 CSeqMap_CI BeginResolved(CScope* scope,
00144 const SSeqMapSelector& selector) const;
00145 CSeqMap_CI EndResolved(CScope* scope) const;
00146 CSeqMap_CI EndResolved(CScope* scope,
00147 const SSeqMapSelector& selector) const;
00148 CSeqMap_CI FindResolved(CScope* scope,
00149 TSeqPos pos,
00150 const SSeqMapSelector& selector) const;
00151
00152
00153 CSeqMap_CI ResolvedRangeIterator(CScope* scope,
00154 TSeqPos from,
00155 TSeqPos length,
00156 ENa_strand strand = eNa_strand_plus,
00157 size_t maxResolve = size_t(-1),
00158 TFlags flags = fDefaultFlags) const;
00159
00160 bool HasSegmentOfType(ESegmentType type) const;
00161 size_t CountSegmentsOfType(ESegmentType type) const;
00162
00163 bool CanResolveRange(CScope* scope, const SSeqMapSelector& sel) const;
00164 bool CanResolveRange(CScope* scope,
00165 TSeqPos from,
00166 TSeqPos length,
00167 ENa_strand strand = eNa_strand_plus,
00168 size_t maxResolve = size_t(-1),
00169 TFlags flags = fDefaultFlags) const;
00170
00171
00172
00173 static CRef<CSeqMap> CreateSeqMapForBioseq(const CBioseq& seq);
00174 static CRef<CSeqMap> CreateSeqMapForSeq_loc(const CSeq_loc& loc,
00175 CScope* scope);
00176 static CConstRef<CSeqMap> GetSeqMapForSeq_loc(const CSeq_loc& loc,
00177 CScope* scope);
00178 virtual CRef<CSeqMap> CloneFor(const CBioseq& seq) const;
00179
00180
00181 CSeqMap(const CSeqMap& sm);
00182
00183 void SetRegionInChunk(CTSE_Chunk_Info& chunk, TSeqPos pos, TSeqPos length);
00184 void LoadSeq_data(TSeqPos pos, TSeqPos len, const CSeq_data& data);
00185
00186 void SetSegmentGap(const CSeqMap_CI& seg,
00187 TSeqPos length);
00188 void SetSegmentGap(const CSeqMap_CI& seg,
00189 TSeqPos length,
00190 CSeq_data& gap_data);
00191 void SetSegmentData(const CSeqMap_CI& seg,
00192 TSeqPos length,
00193 CSeq_data& data);
00194 void SetSegmentRef(const CSeqMap_CI& seg,
00195 TSeqPos length,
00196 const CSeq_id_Handle& ref_id,
00197 TSeqPos ref_pos,
00198 bool ref_minus_strand);
00199
00200
00201
00202
00203
00204
00205 CSeqMap_CI InsertSegmentGap(const CSeqMap_CI& seg,
00206 TSeqPos length);
00207
00208
00209
00210
00211
00212
00213 CSeqMap_CI RemoveSegment(const CSeqMap_CI& seg);
00214
00215 void SetRepr(CSeq_inst::TRepr repr);
00216 void ResetRepr(void);
00217 void SetMol(CSeq_inst::TMol mol);
00218 void ResetMol(void);
00219
00220 protected:
00221
00222 class CSegment;
00223 class SPosLessSegment;
00224
00225 friend class CSegment;
00226 friend class SPosLessSegment;
00227 friend class CSeqMap_SeqPoss;
00228 friend class CBioseq_Info;
00229
00230 class CSegment
00231 {
00232 public:
00233 CSegment(ESegmentType seg_type = eSeqEnd,
00234 TSeqPos length = kInvalidSeqPos,
00235 bool unknown_len = false);
00236
00237
00238 bool IsSetData(void) const;
00239
00240
00241 mutable TSeqPos m_Position;
00242
00243 mutable TSeqPos m_Length;
00244 bool m_UnknownLength;
00245
00246
00247 char m_SegType;
00248 char m_ObjType;
00249
00250
00251 bool m_RefMinusStrand;
00252 TSeqPos m_RefPosition;
00253 CConstRef<CObject> m_RefObject;
00254
00255 typedef list<TSeqPos>::iterator TList0_I;
00256 TList0_I m_Iterator;
00257 };
00258
00259 class SPosLessSegment
00260 {
00261 public:
00262 bool operator()(TSeqPos pos, const CSegment& seg)
00263 {
00264 return pos < seg.m_Position + seg.m_Length;
00265 }
00266 bool operator()(const CSegment& seg, TSeqPos pos)
00267 {
00268 return seg.m_Position + seg.m_Length < pos;
00269 }
00270 bool operator()(const CSegment& seg1, const CSegment& seg2)
00271 {
00272 return seg1.m_Position + seg1.m_Length < seg2.m_Position + seg2.m_Length;
00273 }
00274 };
00275
00276
00277 CSeqMap(CSeqMap* parent, size_t index);
00278 CSeqMap(void);
00279 CSeqMap(const CSeq_loc& ref);
00280 CSeqMap(TSeqPos len);
00281 CSeqMap(const CSeq_inst& inst);
00282
00283 void x_AddEnd(void);
00284 void x_AddSegment(ESegmentType type,
00285 TSeqPos len,
00286 bool unknown_len = false);
00287 void x_AddSegment(ESegmentType type, TSeqPos len, const CObject* object);
00288 void x_AddSegment(ESegmentType type, const CObject* object,
00289 TSeqPos refPos, TSeqPos len,
00290 ENa_strand strand = eNa_strand_plus);
00291 void x_AddGap(TSeqPos len, bool unknown_len);
00292 void x_AddGap(TSeqPos len, bool unknown_len, const CSeq_data& gap_data);
00293 void x_Add(CSeqMap* submap);
00294 void x_Add(const CSeq_data& data, TSeqPos len);
00295 void x_Add(const CPacked_seqint& seq);
00296 void x_Add(const CPacked_seqpnt& seq);
00297 void x_Add(const CSeq_loc_mix& seq);
00298 void x_Add(const CSeq_loc_equiv& seq);
00299 void x_Add(const CSeq_literal& seq);
00300 void x_Add(const CDelta_seq& seq);
00301 void x_Add(const CSeq_loc& seq);
00302 void x_Add(const CSeq_id& seq);
00303 void x_Add(const CSeq_point& seq);
00304 void x_Add(const CSeq_interval& seq);
00305 void x_AddUnloadedSeq_data(TSeqPos len);
00306
00307 private:
00308 void ResolveAll(void) const;
00309
00310 private:
00311
00312 CSeqMap& operator= (const CSeqMap&);
00313
00314 protected:
00315
00316 size_t x_GetLastEndSegmentIndex(void) const;
00317 size_t x_GetFirstEndSegmentIndex(void) const;
00318
00319 const CSegment& x_GetSegment(size_t index) const;
00320 void x_GetSegmentException(size_t index) const;
00321 CSegment& x_SetSegment(size_t index);
00322
00323 size_t x_FindSegment(TSeqPos position, CScope* scope) const;
00324
00325 TSeqPos x_GetSegmentLength(size_t index, CScope* scope) const;
00326 TSeqPos x_GetSegmentPosition(size_t index, CScope* scope) const;
00327 TSeqPos x_GetSegmentEndPosition(size_t index, CScope* scope) const;
00328 TSeqPos x_ResolveSegmentLength(size_t index, CScope* scope) const;
00329 TSeqPos x_ResolveSegmentPosition(size_t index, CScope* scope) const;
00330
00331 void x_StartEditing(void);
00332 bool x_IsChanged(void) const;
00333 void x_SetChanged(size_t index);
00334 bool x_UpdateSeq_inst(CSeq_inst& inst);
00335 virtual bool x_DoUpdateSeq_inst(CSeq_inst& inst);
00336
00337 CBioseq_Handle x_GetBioseqHandle(const CSegment& seg, CScope* scope) const;
00338
00339 CConstRef<CSeqMap> x_GetSubSeqMap(const CSegment& seg, CScope* scope,
00340 bool resolveExternal = false) const;
00341 virtual const CSeq_data& x_GetSeq_data(const CSegment& seg) const;
00342 virtual const CSeq_id& x_GetRefSeqid(const CSegment& seg) const;
00343 virtual TSeqPos x_GetRefPosition(const CSegment& seg) const;
00344 virtual bool x_GetRefMinusStrand(const CSegment& seg) const;
00345
00346 void x_LoadObject(const CSegment& seg) const;
00347 CRef<CTSE_Chunk_Info> x_GetChunkToLoad(const CSegment& seg) const;
00348 const CObject* x_GetObject(const CSegment& seg) const;
00349 void x_SetObject(CSegment& seg, const CObject& obj);
00350 void x_SetChunk(CSegment& seg, CTSE_Chunk_Info& chunk);
00351
00352 virtual void x_SetSeq_data(size_t index, CSeq_data& data);
00353 virtual void x_SetSubSeqMap(size_t index, CSeqMap_Delta_seqs* subMap);
00354
00355 virtual void x_SetSegmentGap(size_t index,
00356 TSeqPos length,
00357 CSeq_data* gap_data = 0);
00358 virtual void x_SetSegmentData(size_t index,
00359 TSeqPos length,
00360 CSeq_data& data);
00361 virtual void x_SetSegmentRef(size_t index,
00362 TSeqPos length,
00363 const CSeq_id& ref_id,
00364 TSeqPos ref_pos,
00365 bool ref_minus_strand);
00366
00367 CBioseq_Info* m_Bioseq;
00368
00369 typedef vector<CSegment> TSegments;
00370
00371
00372 vector<CSegment> m_Segments;
00373
00374
00375 mutable size_t m_Resolved;
00376
00377
00378 CRef<CObject> m_Delta;
00379
00380
00381 TMol m_Mol;
00382
00383
00384 typedef Uint1 THasSegments;
00385 mutable THasSegments m_HasSegments;
00386
00387 typedef bool TChanged;
00388 TChanged m_Changed;
00389
00390
00391 mutable TSeqPos m_SeqLength;
00392
00393
00394 mutable CMutex m_SeqMap_Mtx;
00395
00396 friend class CSeqMap_CI;
00397 friend class CSeqMap_CI_SegmentInfo;
00398 };
00399
00400
00401
00402
00403
00404 inline
00405 bool CSeqMap::CSegment::IsSetData(void) const
00406 {
00407 return static_cast<ESegmentType>(m_SegType) == CSeqMap::eSeqData
00408 || static_cast<ESegmentType>(m_ObjType) == CSeqMap::eSeqData;
00409 }
00410
00411
00412 inline
00413 size_t CSeqMap::GetSegmentsCount(void) const
00414 {
00415 return m_Segments.size() - 2;
00416 }
00417
00418
00419 inline
00420 size_t CSeqMap::x_GetLastEndSegmentIndex(void) const
00421 {
00422 return m_Segments.size() - 1;
00423 }
00424
00425
00426 inline
00427 size_t CSeqMap::x_GetFirstEndSegmentIndex(void) const
00428 {
00429 return 0;
00430 }
00431
00432
00433 inline
00434 const CSeqMap::CSegment& CSeqMap::x_GetSegment(size_t index) const
00435 {
00436 _ASSERT(index < m_Segments.size());
00437 return m_Segments[index];
00438 }
00439
00440
00441 inline
00442 TSeqPos CSeqMap::x_GetSegmentPosition(size_t index, CScope* scope) const
00443 {
00444 if ( index <= m_Resolved )
00445 return m_Segments[index].m_Position;
00446 return x_ResolveSegmentPosition(index, scope);
00447 }
00448
00449
00450 inline
00451 TSeqPos CSeqMap::x_GetSegmentLength(size_t index, CScope* scope) const
00452 {
00453 TSeqPos length = x_GetSegment(index).m_Length;
00454 if ( length == kInvalidSeqPos ) {
00455 length = x_ResolveSegmentLength(index, scope);
00456 }
00457 return length;
00458 }
00459
00460
00461 inline
00462 TSeqPos CSeqMap::x_GetSegmentEndPosition(size_t index, CScope* scope) const
00463 {
00464 return x_GetSegmentPosition(index, scope)+x_GetSegmentLength(index, scope);
00465 }
00466
00467
00468 inline
00469 TSeqPos CSeqMap::GetLength(CScope* scope) const
00470 {
00471 if (m_SeqLength == kInvalidSeqPos) {
00472 m_SeqLength = x_GetSegmentPosition(x_GetLastEndSegmentIndex(), scope);
00473 }
00474 return m_SeqLength;
00475 }
00476
00477
00478 inline
00479 CSeqMap::TMol CSeqMap::GetMol(void) const
00480 {
00481 return m_Mol;
00482 }
00483
00484
00485 inline
00486 bool CSeqMap::x_IsChanged(void) const
00487 {
00488 return m_Changed;
00489 }
00490
00491
00492 inline
00493 bool CSeqMap::x_UpdateSeq_inst(CSeq_inst& inst)
00494 {
00495 if ( !x_IsChanged() ) {
00496 return false;
00497 }
00498 m_Changed = false;
00499 return x_DoUpdateSeq_inst(inst);
00500 }
00501
00502
00503
00504
00505 END_SCOPE(objects)
00506 END_NCBI_SCOPE
00507
00508 #endif // OBJECTS_OBJMGR___SEQ_MAP__HPP
00509
00510