|
NCBI C++ ToolKit
|
00001 #ifndef LOG___BDB_SPLIT_CURSOR__HPP 00002 #define LOG___BDB_SPLIT_CURSOR__HPP 00003 00004 /* $Id: bdb_split_cursor.hpp 42218 2009-06-15 19:59:58Z ivanovp $ 00005 * =========================================================================== 00006 * 00007 * PUBLIC DOMAIN NOTICE 00008 * National Center for Biotechnology Information 00009 * 00010 * This software/database is a "United States Government Work" under the 00011 * terms of the United States Copyright Act. It was written as part of 00012 * the author's official duties as a United States Government employee and 00013 * thus cannot be copyrighted. This software/database is freely available 00014 * to the public for use. The National Library of Medicine and the U.S. 00015 * Government have not placed any restriction on its use or reproduction. 00016 * 00017 * Although all reasonable efforts have been taken to ensure the accuracy 00018 * and reliability of the software and data, the NLM and the U.S. 00019 * Government do not and cannot warrant the performance or results that 00020 * may be obtained by using this software or data. The NLM and the U.S. 00021 * Government disclaim all warranties, express or implied, including 00022 * warranties of performance, merchantability or fitness for any particular 00023 * purpose. 00024 * 00025 * Please cite the author in any work or product based on this material. 00026 * 00027 * =========================================================================== 00028 * 00029 * Authors: Mike DiCuccio 00030 * 00031 * File Description: 00032 * 00033 */ 00034 00035 #include <corelib/ncbistd.hpp> 00036 #include <corelib/ncbifile.hpp> 00037 #include <db/bdb/bdb_file.hpp> 00038 #include <db/bdb/bdb_cursor.hpp> 00039 #include <db/bdb/bdb_env.hpp> 00040 #include <db/error_codes.hpp> 00041 00042 00043 BEGIN_NCBI_SCOPE 00044 00045 00046 00047 template <typename BDB_SplitStore, typename BDB_Vol = typename BDB_SplitStore::TBlobFile> 00048 class CBDB_SplitCursor 00049 { 00050 public: 00051 struct SVolumeLess : public binary_function<string, string, bool> 00052 { 00053 bool operator() (const string& s1, const string& s2) const 00054 { 00055 string::size_type pos1 = s1.find_last_of("_"); 00056 if (pos1 == string::npos) { 00057 pos1 = 0; 00058 } 00059 string::size_type pos2 = s2.find_last_of("_"); 00060 if (pos2 == string::npos) { 00061 pos2 = 0; 00062 } 00063 00064 CTempString ts1(s1, pos1, s1.size()); 00065 CTempString ts2(s2, pos2, s2.size()); 00066 if (ts1 < ts2) { 00067 return true; 00068 } 00069 if (ts2 < ts1) { 00070 return false; 00071 } 00072 00073 return s1 < s2; 00074 } 00075 }; 00076 typedef BDB_SplitStore TSplitStore; 00077 typedef BDB_Vol TVolume; 00078 00079 CBDB_SplitCursor(TSplitStore& store); 00080 void InitMultiFetch(size_t buffer_size); 00081 EBDB_ErrCode Fetch(); 00082 const void* GetLastMultiFetchData() const; 00083 size_t GetLastMultiFetchDataLen() const; 00084 TVolume& GetSourceVolume(); 00085 NCBI_DEPRECATED Uint4 GetCurrentBlobId() const; 00086 Uint4 GetLastBlobId() const; 00087 00088 private: 00089 CBDB_Env* m_Env; 00090 string m_Path; 00091 string m_StoreName; 00092 size_t m_BufferSize; 00093 00094 vector<string> m_Files; 00095 auto_ptr<TVolume> m_Volume; 00096 auto_ptr<CBDB_FileCursor> m_Cursor; 00097 00098 CStopWatch m_SW; 00099 00100 void x_NextVolume(); 00101 }; 00102 00103 00104 template <typename BDB_SplitStore, typename BDB_Vol> 00105 inline 00106 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::CBDB_SplitCursor(TSplitStore& store) 00107 : m_Env(NULL) 00108 , m_BufferSize(40 * 1024 * 1024) 00109 { 00110 m_SW.Start(); 00111 m_Env = store.GetEnv(); 00112 00113 /// 00114 /// find our relevant files 00115 /// 00116 {{ 00117 vector<string> paths; 00118 vector<string> masks; 00119 00120 string path = store.GetFileName(); 00121 string path_dir; 00122 string path_base; 00123 string path_ext; 00124 CDirEntry::SplitPath(path, &path_dir, &path_base, &path_ext); 00125 path_base += path_ext; 00126 00127 if (CDirEntry::IsAbsolutePath(path_dir)) { 00128 path = path_dir; 00129 } else { 00130 path.erase(); 00131 if (m_Env) { 00132 path = m_Env->GetPath(); 00133 path += "/"; 00134 } 00135 path += path_dir; 00136 path = CDirEntry::CreateAbsolutePath(path); 00137 } 00138 00139 paths.push_back(path); 00140 masks.push_back(path_base + "_*"); 00141 FindFiles(m_Files, 00142 paths.begin(), paths.end(), masks.begin(), masks.end(), 00143 fFF_File); 00144 00145 std::sort(m_Files.begin(), m_Files.end(), SVolumeLess()); 00146 00147 LOG_POST_XX(Db_Bdb_Cursor, 2, Info << 00148 "found " << m_Files.size() << " candidate files"); 00149 }} 00150 } 00151 00152 00153 template <typename BDB_SplitStore, typename BDB_Vol> 00154 inline EBDB_ErrCode 00155 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::Fetch() 00156 { 00157 for (;;) { 00158 if ( !m_Cursor.get() || m_Cursor->Fetch() != eBDB_Ok) { 00159 x_NextVolume(); 00160 if ( !m_Cursor.get() ) { 00161 return eBDB_NotFound; 00162 } 00163 } else { 00164 break; 00165 } 00166 } 00167 00168 return eBDB_Ok; 00169 } 00170 00171 00172 template <typename BDB_SplitStore, typename BDB_Vol> 00173 inline void 00174 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::InitMultiFetch(size_t size) 00175 { 00176 m_BufferSize = size; 00177 } 00178 00179 00180 template <typename BDB_SplitStore, typename BDB_Vol> 00181 inline const void* 00182 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetLastMultiFetchData() const 00183 { 00184 if (m_Cursor.get()) { 00185 return m_Cursor->GetLastMultiFetchData(); 00186 } 00187 return NULL; 00188 } 00189 00190 00191 template <typename BDB_SplitStore, typename BDB_Vol> 00192 inline size_t 00193 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetLastMultiFetchDataLen() const 00194 { 00195 if (m_Cursor.get()) { 00196 return m_Cursor->GetLastMultiFetchDataLen(); 00197 } 00198 return 0; 00199 } 00200 00201 00202 template <typename BDB_SplitStore, typename BDB_Vol> 00203 inline typename CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::TVolume& 00204 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetSourceVolume() 00205 { 00206 if (m_Volume.get()) { 00207 return *m_Volume; 00208 } 00209 NCBI_THROW(CException, eUnknown, "no open volume"); 00210 } 00211 00212 00213 template <typename BDB_SplitStore, typename BDB_Vol> 00214 inline Uint4 00215 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetCurrentBlobId() const 00216 { 00217 return GetLastBlobId(); 00218 } 00219 00220 00221 template <typename BDB_SplitStore, typename BDB_Vol> 00222 inline Uint4 00223 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetLastBlobId() const 00224 { 00225 if (m_Volume.get()) { 00226 return (Uint4)m_Volume->GetUid(); 00227 } 00228 NCBI_THROW(CException, eUnknown, "no open volume"); 00229 } 00230 00231 00232 template <typename BDB_SplitStore, typename BDB_Vol> 00233 inline void 00234 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::x_NextVolume() 00235 { 00236 /// get rid of our existing cursor + volume 00237 m_Cursor.reset(); 00238 m_Volume.reset(); 00239 if ( !m_Files.size() ) { 00240 return; 00241 } 00242 00243 /// open the next file 00244 string path = m_Files.back(); 00245 m_Files.pop_back(); 00246 00247 m_Volume.reset(new TVolume); 00248 m_Volume->SetCacheSize(10 * 1024 * 1024); 00249 if (m_Env) { 00250 m_Volume->SetEnv(*m_Env); 00251 } 00252 00253 LOG_POST_XX(Db_Bdb_Cursor, 1, Info 00254 << "CBDB_SplitCursor::x_NextVolume(): opening: " << path); 00255 m_Volume->Open(path, CBDB_RawFile::eReadOnly); 00256 00257 m_Cursor.reset(new CBDB_FileCursor(*m_Volume)); 00258 m_Cursor->InitMultiFetch(m_BufferSize); 00259 } 00260 00261 00262 END_NCBI_SCOPE 00263 00264 00265 #endif // LOG___BDB_SPLIT_CURSOR__HPP
1.7.5.1
Modified on Wed May 23 13:30:17 2012 by modify_doxy.py rev. 337098