NCBI C++ ToolKit
bdbloader.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
2 #define OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
3 
4 /* $Id: bdbloader.hpp 90547 2020-06-26 12:58:17Z grichenk $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Christiam Camacho
30 *
31 * ===========================================================================
32 */
33 
34 /** @file bdbloader.hpp
35  * Data loader implementation that uses the blast databases
36  */
37 
38 #include <corelib/ncbistd.hpp>
39 #include <objmgr/data_loader.hpp>
43 
46 
47 // Parameter names used by loader factory
48 
49 const string kCFParam_BlastDb_DbName = "DbName"; // = string
50 const string kCFParam_BlastDb_DbType = "DbType"; // = EDbType (e.g. "Protein")
51 
52 
54 {
55 public:
56  /// Describes the type of blast database to use
57  enum EDbType {
58  eNucleotide = 0, ///< nucleotide database
59  eProtein = 1, ///< protein database
60  eUnknown = 2 ///< protein is attempted first, then nucleotide
61  };
62 
64  {
65  SBlastDbParam(const string& db_name = "nr",
66  EDbType dbtype = eUnknown,
67  bool use_fixed_size_slices = true);
68 
69  SBlastDbParam(CRef<CSeqDB> db_handle,
70  bool use_fixed_size_slices = true);
71 
72  string m_DbName;
76  };
77 
79  static TRegisterLoaderInfo RegisterInObjectManager(
81  const string& dbname = "nr",
82  const EDbType dbtype = eUnknown,
83  bool use_fixed_size_slices = true,
86  static TRegisterLoaderInfo RegisterInObjectManager(
88  CRef<CSeqDB> db_handle,
89  bool use_fixed_size_slices = true,
92  static string GetLoaderNameFromArgs(CConstRef<CSeqDB> db_handle);
93  static string GetLoaderNameFromArgs(const SBlastDbParam& param);
94  static string GetLoaderNameFromArgs(const string& dbname = "nr",
95  const EDbType dbtype = eUnknown)
96  {
97  return GetLoaderNameFromArgs(SBlastDbParam(dbname, dbtype));
98  }
99 
100  virtual ~CBlastDbDataLoader();
101 
102  virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const;
103 
104 
105  /// Load TSE
106  virtual TTSE_LockSet GetRecords(const CSeq_id_Handle& idh, EChoice choice);
107  /// Load a description or data chunk.
108  virtual void GetChunk(TChunk chunk);
109 
110  virtual TTaxId GetTaxId(const CSeq_id_Handle& idh);
111  virtual void GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret);
112  virtual TSeqPos GetSequenceLength(const CSeq_id_Handle& idh);
113  virtual void GetSequenceLengths(const TIds& ids, TLoaded& loaded,
114  TSequenceLengths& ret);
115  virtual CSeq_inst::TMol GetSequenceType(const CSeq_id_Handle& idh);
116  virtual void GetSequenceTypes(const TIds& ids, TLoaded& loaded,
117  TSequenceTypes& ret);
118 
119  /// Gets the blob id for a given sequence.
120  ///
121  /// Given a Seq_id_Handle, this method finds the corresponding top
122  /// level Seq-entry (TSE) and returns a blob corresponding to it.
123  /// The BlobId is initialized with a pointer to that CSeq_entry if
124  /// the sequence is known to this data loader, which will be true
125  /// if GetRecords() was called for this sequence.
126  ///
127  /// @param idh
128  /// Indicates the sequence for which to get a blob id.
129  /// @return
130  /// A TBlobId corresponding to the provided Seq_id_Handle.
131  virtual TBlobId GetBlobId(const CSeq_id_Handle& idh);
132 
133  /// Test method for GetBlobById feature.
134  ///
135  /// The caller will use this method to determine whether this data
136  /// loader allows blobs to be managed by ID.
137  ///
138  /// @return
139  /// Returns true to indicate that GetBlobById() is available.
140  virtual bool CanGetBlobById() const;
141 
142  /// For a given TBlobId, get the TTSE_Lock.
143  ///
144  /// If the provided TBlobId is known to this code, the
145  /// corresponding TTSE_Lock data will be fetched and returned.
146  /// Otherwise, an empty valued TTSE_Lock is returned.
147  ///
148  /// @param blob_id
149  /// Indicates which data to get.
150  /// @return
151  /// The returned data.
152  virtual TTSE_Lock GetBlobById(const TBlobId& blob_id);
153 
154  /// A mapping from sequence identifier to blob ids.
156 
157  /// @note this is added to temporarily comply with the toolkit's stable
158  /// components rule of having backwards compatible APIs
160  static TRegisterLoaderInfo RegisterInObjectManager(
162  const string& dbname,
163  const EDbType dbtype,
164  CObjectManager::EIsDefault is_default,
166  /// @note this is added to temporarily comply with the toolkit's stable
167  /// components rule of having backwards compatible APIs
169  static TRegisterLoaderInfo RegisterInObjectManager(
171  CRef<CSeqDB> db_handle,
174 protected:
175  /// TPlace is a Seq-id or an integer id, this data loader uses the former.
176  typedef int TBioseq_setId;
178  typedef pair<TBioseqId, TBioseq_setId> TPlace;
179 
182 
183  /// Default (no-op) constructor
185  /// Parametrized constructor
186  /// @param loader_name name of this data loader [in]
187  /// @param param parameters to initialize this data loader [in]
188  CBlastDbDataLoader(const string& loader_name, const SBlastDbParam& param);
189 
190  /// Prevent automatic copy constructor generation
192 
193  /// Prevent automatic assignment operator generation
195 
196  /// Gets the OID from m_Ids cache or the BLAST databases
197  int x_GetOid(const CSeq_id_Handle& idh);
198  /// Gets the OID from a TBlobId (see typedef in bdbloader.cpp)
199  int x_GetOid(const TBlobId& blob_id) const;
200 
201  /// Load sequence data from cache or from the database.
202  ///
203  /// This checks the OID cache and loads the sequence data from
204  /// there or if not found, from the CSeqDB database. When new
205  /// data is built, the sequence is also split into chunks. A
206  /// description of what data is available will be returned in the
207  /// "lock" parameter.
208  ///
209  /// @param idh
210  /// A handle to the sequence identifier.
211  /// @param oid
212  /// Object id in BLAST DB
213  /// @param lock
214  /// Information about the sequence data is returned here.
215  void x_LoadData(const CSeq_id_Handle& idh, int oid, CTSE_LoadLock & lock,
216  int slice_size);
217 
218  string m_DBName; ///< Blast database name
219  EDbType m_DBType; ///< Is this database protein or nucleotide?
220  CRef<IBlastDbAdapter> m_BlastDb; ///< The sequence database
221 
222  TIdMap m_Ids; ///< ID to OID translation
223 
224  /// Configuration value specified to the CCachedSequence
226 };
227 
229 
230 
232 
233 extern "C"
234 {
235 
240 
245 
246 } // extern C
247 
248 
250 
251 #endif /* OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP */
void NCBI_EntryPoint_DataLoader_BlastDb(CPluginManager< objects::CDataLoader >::TDriverInfoList &info_list, CPluginManager< objects::CDataLoader >::EEntryPointRequest method)
const string kDataLoader_BlastDb_DriverName
const string kCFParam_BlastDb_DbName
Definition: bdbloader.hpp:49
void NCBI_EntryPoint_xloader_blastdb(CPluginManager< objects::CDataLoader >::TDriverInfoList &info_list, CPluginManager< objects::CDataLoader >::EEntryPointRequest method)
Definition: bdbloader.cpp:571
const string kCFParam_BlastDb_DbType
Definition: bdbloader.hpp:50
Interface definition of IBlastDbAdapter.
limited_size_map< CSeq_id_Handle, int > TIdMap
A mapping from sequence identifier to blob ids.
Definition: bdbloader.hpp:155
CParamLoaderMaker< CBlastDbDataLoader, SBlastDbParam > TMaker
Definition: bdbloader.hpp:180
static string GetLoaderNameFromArgs(const string &dbname="nr", const EDbType dbtype=eUnknown)
Definition: bdbloader.hpp:94
CRef< IBlastDbAdapter > m_BlastDb
The sequence database.
Definition: bdbloader.hpp:220
TIdMap m_Ids
ID to OID translation.
Definition: bdbloader.hpp:222
SRegisterLoaderInfo< CBlastDbDataLoader > TRegisterLoaderInfo
Definition: bdbloader.hpp:78
string m_DBName
Blast database name.
Definition: bdbloader.hpp:218
EDbType m_DBType
Is this database protein or nucleotide?
Definition: bdbloader.hpp:219
int TBioseq_setId
TPlace is a Seq-id or an integer id, this data loader uses the former.
Definition: bdbloader.hpp:176
CSeq_id_Handle TBioseqId
Definition: bdbloader.hpp:177
CBlastDbDataLoader()
Default (no-op) constructor.
Definition: bdbloader.hpp:184
CBlastDbDataLoader & operator=(const CBlastDbDataLoader &)
Prevent automatic assignment operator generation.
EDbType
Describes the type of blast database to use.
Definition: bdbloader.hpp:57
pair< TBioseqId, TBioseq_setId > TPlace
Definition: bdbloader.hpp:178
CBlastDbDataLoader(const CBlastDbDataLoader &)
Prevent automatic copy constructor generation.
bool m_UseFixedSizeSlices
Configuration value specified to the CCachedSequence.
Definition: bdbloader.hpp:225
CConstRef –.
Definition: ncbiobj.hpp:1266
CObjectManager –.
Include a standard set of the NCBI C++ Toolkit most basic headers.
static unsigned char depth[2 *(256+1+29)+1]
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
@ eUnknown
Definition: app_popup.hpp:72
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
Definition: sequence.cpp:274
EIsDefault
Flag defining if the data loader is included in the "default" group.
@ kPriority_NotSet
Deprecated: use kPriority_Default instead.
list< SDriverInfo > TDriverInfoList
List of driver information.
EEntryPointRequest
Actions performed by the entry point.
#define NCBI_DEPRECATED
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XLOADER_BLASTDB_EXPORT
Definition: ncbi_export.h:1155
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
Generic map with size limited by some number.
Uint4 GetSequenceType(const CBioseq_Handle &bsh)
Return a (corrected) set of flags identifying the sequence type.
Definition: sequtils.cpp:42
CRef< objects::CObjectManager > om
SRegisterLoaderInfo –.
#define const
Definition: zconf.h:232
Modified on Tue Apr 23 07:37:59 2024 by modify_doxy.py rev. 669887