NCBI C++ ToolKit
tse_chunk_info.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJECTS_OBJMGR_IMPL___TSE_CHUNK_INFO__HPP
2 #define OBJECTS_OBJMGR_IMPL___TSE_CHUNK_INFO__HPP
3 
4 /* $Id: tse_chunk_info.hpp 78347 2017-06-12 18:41:22Z vasilche $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Eugene Vasilchenko
30 *
31 * File Description:
32 * Split TSE chunk info
33 *
34 */
35 
36 
37 #include <corelib/ncbiobj.hpp>
38 
39 #include <objmgr/annot_name.hpp>
42 #include <util/mutex_pool.hpp>
43 #include <objmgr/blob_id.hpp>
44 
45 #include <vector>
46 #include <list>
47 #include <map>
48 
51 
52 class CTSE_Info;
53 class CTSE_Split_Info;
54 class CSeq_entry_Info;
55 class CSeq_annot_Info;
56 class CSeq_literal;
57 class CSeq_descr;
58 class CSeq_annot;
59 class CBioseq_Base_Info;
60 class CBioseq_Info;
61 class CBioseq_set_Info;
62 class CDataLoader;
63 class CTSE_SetObjectInfo;
64 class ITSE_Assigner;
66 
67 
69 {
70 public:
71  //////////////////////////////////////////////////////////////////
72  // types used
73  //////////////////////////////////////////////////////////////////
74 
75  // chunk identification
77  typedef int TBlobVersion;
78  typedef int TChunkId;
79 
80  enum {
81  kMain_ChunkId = -1, // not a chunk, but main Seq-entry
82  kMasterWGS_ChunkId = kMax_Int-1, // chunk with master WGS descr
83  kDelayedMain_ChunkId= kMax_Int // main Seq-entry with delayed ext annot
84  };
85 
86  // contents place identification
87  typedef int TBioseq_setId;
89  typedef pair<TBioseqId, TBioseq_setId> TPlace;
90  typedef unsigned TDescTypeMask;
91  typedef pair<TDescTypeMask, TPlace> TDescInfo;
92  typedef vector<TPlace> TPlaces;
93  typedef vector<TDescInfo> TDescInfos;
94  typedef vector<TBioseq_setId> TBioseqPlaces;
95  typedef vector<TBioseqId> TBioseqIds;
96  typedef TBioseqId TAssemblyInfo;
97  typedef vector<TAssemblyInfo> TAssemblyInfos;
98 
99  // annot contents identification
102  typedef pair<TLocationId, TLocationRange> TLocation;
103  typedef vector<TLocation> TLocationSet;
106 
107  // annot contents indexing
109  typedef list<TObjectIndex> TObjectIndexList;
110 
111  // attached data types
112  typedef list< CRef<CSeq_literal> > TSequence;
113  typedef list< CRef<CSeq_align> > TAssembly;
114 
115  //////////////////////////////////////////////////////////////////
116  // constructor & destructor
117  //////////////////////////////////////////////////////////////////
118  CTSE_Chunk_Info(TChunkId id);
119  virtual ~CTSE_Chunk_Info(void);
120 
121  //////////////////////////////////////////////////////////////////
122  // chunk identification getters
123  //////////////////////////////////////////////////////////////////
124  TBlobId GetBlobId(void) const;
125  TBlobVersion GetBlobVersion(void) const;
126  TChunkId GetChunkId(void) const;
127  const CTSE_Split_Info& GetSplitInfo(void) const;
128 
129  //////////////////////////////////////////////////////////////////
130  // loading control
131  //////////////////////////////////////////////////////////////////
132  bool NotLoaded(void) const;
133  bool IsLoaded(void) const;
134  void Load(void) const;
135  CInitGuard* GetLoadInitGuard(void);
136 
137  //////////////////////////////////////////////////////////////////
138  // chunk content identification
139  // should be set before attaching to CTSE_Info
140  //////////////////////////////////////////////////////////////////
141  void x_AddDescInfo(TDescTypeMask type_mask, const TBioseqId& id);
142  void x_AddDescInfo(TDescTypeMask type_mask, TBioseq_setId id);
143  void x_AddDescInfo(const TDescInfo& info);
144 
145  void x_AddAssemblyInfo(const TBioseqId& id);
146 
147  void x_AddAnnotPlace(const TBioseqId& id);
148  void x_AddAnnotPlace(TBioseq_setId id);
149  void x_AddAnnotPlace(const TPlace& place);
150 
151  // The bioseq-set contains some bioseq(s)
152  void x_AddBioseqPlace(TBioseq_setId id);
153  // The chunk contains the whole bioseq and its annotations,
154  // the annotations can not refer other bioseqs.
155  void x_AddBioseqId(const TBioseqId& id);
156 
157  void x_AddAnnotType(const CAnnotName& annot_name,
158  const SAnnotTypeSelector& annot_type,
159  const TLocationId& location_id);
160  void x_AddAnnotType(const CAnnotName& annot_name,
161  const SAnnotTypeSelector& annot_type,
162  const TLocationId& location_id,
163  const TLocationRange& location_range);
164  void x_AddAnnotType(const CAnnotName& annot_name,
165  const SAnnotTypeSelector& annot_type,
166  const TLocationSet& location);
167 
168  // The chunk contains features with ids
169  void x_AddFeat_ids(void);
170  typedef int TFeatIdInt;
171  typedef string TFeatIdStr;
172  typedef vector<TFeatIdInt> TFeatIdIntList;
173  typedef list<TFeatIdStr> TFeatIdStrList;
174  struct SFeatIds {
175  TFeatIdIntList m_IntList;
176  TFeatIdStrList m_StrList;
177  };
179 
180  void x_AddFeat_ids(const SAnnotTypeSelector& type,
181  const TFeatIdIntList& ids);
182  void x_AddXref_ids(const SAnnotTypeSelector& type,
183  const TFeatIdIntList& ids);
184  void x_AddFeat_ids(const SAnnotTypeSelector& type,
185  const TFeatIdStrList& ids);
186  void x_AddXref_ids(const SAnnotTypeSelector& type,
187  const TFeatIdStrList& ids);
188 
189  // The chunk contains seq-data. The corresponding bioseq's
190  // data should be not set or set to delta with empty literal(s)
191  void x_AddSeq_data(const TLocationSet& location);
192 
193  //////////////////////////////////////////////////////////////////
194  // chunk data loading interface
195  // is called from CDataLoader
196  //////////////////////////////////////////////////////////////////
197 
198  // synchronization
199  operator CInitMutex_Base&(void)
200  {
201  return m_LoadLock;
202  }
203  void SetLoaded(CObject* obj = 0);
204 
205  // data attachment
206  void x_LoadDescr(const TPlace& place, const CSeq_descr& descr);
207  void x_LoadAnnot(const TPlace& place, const CSeq_annot& annot);
209  void x_LoadBioseq(const TPlace& place, const CBioseq& bioseq);
210  void x_LoadBioseqs(const TPlace& place, const list< CRef<CBioseq> >& bioseqs);
211  void x_LoadSequence(const TPlace& place, TSeqPos pos,
212  const TSequence& seq);
213  void x_LoadAssembly(const TBioseqId& seq_id, const TAssembly& assembly);
214 
215  void x_LoadSeq_entry(CSeq_entry& entry, CTSE_SetObjectInfo* set_info = 0);
216 
217  //////////////////////////////////////////////////////////////////
218  // methods to find out what information is needed to be loaded
219  //////////////////////////////////////////////////////////////////
220  const TDescInfos& GetDescInfos(void) const
221  {
222  return m_DescInfos;
223  }
224  const TPlaces GetAnnotPlaces(void) const
225  {
226  return m_AnnotPlaces;
227  }
228  const TBioseqPlaces GetBioseqPlaces(void) const
229  {
230  return m_BioseqPlaces;
231  }
232  const TBioseqIds GetBioseqIds(void) const
233  {
234  return m_BioseqIds;
235  }
236  const TAnnotContents GetAnnotContents(void) const
237  {
238  return m_AnnotContents;
239  }
240  const TLocationSet& GetSeq_dataInfos(void) const
241  {
242  return m_Seq_data;
243  }
244  const TAssemblyInfos& GetAssemblyInfos(void) const
245  {
246  return m_AssemblyInfos;
247  }
248 
249  Uint4 GetLoadBytes() const;
250  double GetLoadSeconds() const;
251  pair<Uint4, double> GetLoadCost() const;
252 
253  void x_SetLoadBytes(Uint4 bytes);
254  void x_SetLoadSeconds(double seconds);
255 
256 protected:
257  //////////////////////////////////////////////////////////////////
258  // interaction with CTSE_Info
259  //////////////////////////////////////////////////////////////////
260 
261  // attach to CTSE_Info
262  void x_SplitAttach(CTSE_Split_Info& split_info);
263  void x_TSEAttach(CTSE_Info& tse, ITSE_Assigner& tse_info);
264  bool x_Attached(void) const;
265 
266  // return true if chunk is loaded
267  bool x_GetRecords(const CSeq_id_Handle& id, bool bioseq) const;
268 
269  // append ids with all Bioseqs Seq-ids from this Split-Info
270  void GetBioseqsIds(TBioseqIds& ids) const;
271 
272  // biose lookup
273  bool ContainsBioseq(const CSeq_id_Handle& id) const;
274 
275  // annot index maintainance
276  void x_EnableAnnotIndex(void);
277  void x_DisableAnnotIndexWhenLoaded(void);
278  void x_UpdateAnnotIndex(CTSE_Info& tse);
279  void x_UpdateAnnotIndexContents(CTSE_Info& tse);
280  bool x_ContainsFeatType(CSeqFeatData::E_Choice type) const;
281  bool x_ContainsFeatType(CSeqFeatData::ESubtype subtype) const;
282  bool x_ContainsFeatIds(CSeqFeatData::E_Choice type,
283  EFeatIdType id_type) const;
284  bool x_ContainsFeatIds(CSeqFeatData::ESubtype subtype,
285  EFeatIdType id_type) const;
286 
287  //void x_UnmapAnnotObjects(CTSE_Info& tse);
288  //void x_DropAnnotObjects(CTSE_Info& tse);
289  void x_DropAnnotObjects(void);
290 
291  void x_InitObjectIndexList(void);
292 
293 private:
294  friend class CTSE_Info;
295  friend class CTSE_Split_Info;
296 
297  friend class CTSE_Default_Assigner;
298 
299 
302 
303  CTSE_Split_Info* m_SplitInfo;
304  TChunkId m_ChunkId;
305 
308 
311 
312  TDescInfos m_DescInfos;
313  TPlaces m_AnnotPlaces;
314  TBioseqPlaces m_BioseqPlaces;
315  TBioseqIds m_BioseqIds;
316  TAnnotContents m_AnnotContents;
317  TLocationSet m_Seq_data;
318  TAssemblyInfos m_AssemblyInfos;
319 
320  TFeatIdsMap m_FeatIds;
321  TFeatIdsMap m_XrefIds;
322 
324  TObjectIndexList m_ObjectIndexList;
325 };
326 
327 
328 inline
330 {
331  return m_ChunkId;
332 }
333 
334 
335 inline
337 {
338  return !m_LoadLock;
339 }
340 
341 
342 inline
344 {
345  return m_LoadLock;
346 }
347 
348 
349 inline
351 {
353  return *m_SplitInfo;
354 }
355 
356 
357 inline
359 {
360  return m_LoadBytes;
361 }
362 
363 
364 inline
366 {
367  return m_LoadSeconds;
368 }
369 
370 
373 
374 #endif//OBJECTS_OBJMGR_IMPL___TSE_CHUNK_INFO__HPP
vector< TLocation > TLocationSet
TDescInfos m_DescInfos
vector< TBioseqId > TBioseqIds
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:73
unsigned int Uint4
Alias for unsigned int.
Definition: ncbitype.h:121
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:836
pair< TBioseqId, TBioseq_setId > TPlace
const TAnnotContents GetAnnotContents(void) const
TChunkId GetChunkId(void) const
#define NCBI_XOBJMGR_EXPORT
Definition: ncbi_export.h:1290
const TBioseqPlaces GetBioseqPlaces(void) const
pair< TDescTypeMask, TPlace > TDescInfo
CInitMutex< CObject > m_LoadLock
CTSE_Split_Info * m_SplitInfo
TAnnotContents m_AnnotContents
CSeq_id_Handle TLocationId
CRange< TSeqPos > TLocationRange
.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:54
const CTSE_Split_Info & GetSplitInfo(void) const
TBioseqId TAssemblyInfo
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
map< CAnnotName, TAnnotTypes > TAnnotContents
Uint4 GetLoadBytes() const
unsigned TDescTypeMask
TFeatIdsMap m_FeatIds
bool NotLoaded(void) const
CTSE_Default_Assigner & operator=(const CTSE_Default_Assigner &)
SAnnotObjectsIndex TObjectIndex
Definition: type.c:8
TLocationSet m_Seq_data
TBioseqIds m_BioseqIds
list< CRef< CSeq_align > > TAssembly
const TLocationSet & GetSeq_dataInfos(void) const
TBioseqPlaces m_BioseqPlaces
TObjectIndexList m_ObjectIndexList
Definition: map.hpp:337
static MDB_envinfo info
Definition: mdb_load.c:37
const TBioseqIds GetBioseqIds(void) const
E_Choice
Choice variants.
TFeatIdsMap m_XrefIds
map< SAnnotTypeSelector, SFeatIds > TFeatIdsMap
const TAssemblyInfos & GetAssemblyInfos(void) const
bool IsLoaded(void) const
const TPlaces GetAnnotPlaces(void) const
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx...
TAssemblyInfos m_AssemblyInfos
vector< TFeatIdInt > TFeatIdIntList
vector< TAssemblyInfo > TAssemblyInfos
Definition: Seq_entry.hpp:55
CObject –.
Definition: ncbiobj.hpp:180
map< SAnnotTypeSelector, TLocationSet > TAnnotTypes
vector< TDescInfo > TDescInfos
vector< TPlace > TPlaces
list< TFeatIdStr > TFeatIdStrList
double GetLoadSeconds() const
list< TObjectIndex > TObjectIndexList
#define _ASSERT
const TDescInfos & GetDescInfos(void) const
static const char location[]
Definition: config.c:97
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:70
CBlobIdKey TBlobId
#define NCBI_DEPRECATED
Definition: ncbiconf_msvc.h:71
CSeq_id_Handle TBioseqId
pair< TLocationId, TLocationRange > TLocation
vector< TBioseq_setId > TBioseqPlaces
#define kMax_Int
Definition: ncbi_limits.h:184
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
list< CRef< CSeq_literal > > TSequence
Modified on Wed Aug 16 05:45:30 2017 by modify_doxy.py rev. 533848