NCBI C++ ToolKit
bdb_split_cursor.hpp
Go to the documentation of this file.
00001 #ifndef LOG___BDB_SPLIT_CURSOR__HPP
00002 #define LOG___BDB_SPLIT_CURSOR__HPP
00003 
00004 /*  $Id: bdb_split_cursor.hpp 42218 2009-06-15 19:59:58Z ivanovp $
00005  * ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Authors:  Mike DiCuccio
00030  *
00031  * File Description:
00032  *
00033  */
00034 
00035 #include <corelib/ncbistd.hpp>
00036 #include <corelib/ncbifile.hpp>
00037 #include <db/bdb/bdb_file.hpp>
00038 #include <db/bdb/bdb_cursor.hpp>
00039 #include <db/bdb/bdb_env.hpp>
00040 #include <db/error_codes.hpp>
00041 
00042 
00043 BEGIN_NCBI_SCOPE
00044 
00045 
00046 
00047 template <typename BDB_SplitStore, typename BDB_Vol = typename BDB_SplitStore::TBlobFile>
00048 class CBDB_SplitCursor
00049 {
00050 public:
00051     struct SVolumeLess : public binary_function<string, string, bool>
00052     {
00053         bool operator() (const string& s1, const string& s2) const
00054         {
00055             string::size_type pos1 = s1.find_last_of("_");
00056             if (pos1 == string::npos) {
00057                 pos1 = 0;
00058             }
00059             string::size_type pos2 = s2.find_last_of("_");
00060             if (pos2 == string::npos) {
00061                 pos2 = 0;
00062             }
00063 
00064             CTempString ts1(s1, pos1, s1.size());
00065             CTempString ts2(s2, pos2, s2.size());
00066             if (ts1 < ts2) {
00067                 return true;
00068             }
00069             if (ts2 < ts1) {
00070                 return false;
00071             }
00072 
00073             return s1 < s2;
00074         }
00075     };
00076     typedef BDB_SplitStore TSplitStore;
00077     typedef BDB_Vol        TVolume;
00078 
00079     CBDB_SplitCursor(TSplitStore& store);
00080     void InitMultiFetch(size_t buffer_size);
00081     EBDB_ErrCode Fetch();
00082     const void* GetLastMultiFetchData() const;
00083     size_t      GetLastMultiFetchDataLen() const;
00084     TVolume&    GetSourceVolume();
00085     NCBI_DEPRECATED Uint4       GetCurrentBlobId() const;
00086     Uint4       GetLastBlobId() const;
00087 
00088 private:
00089     CBDB_Env* m_Env;
00090     string m_Path;
00091     string m_StoreName;
00092     size_t m_BufferSize;
00093 
00094     vector<string>            m_Files;
00095     auto_ptr<TVolume>         m_Volume;
00096     auto_ptr<CBDB_FileCursor> m_Cursor;
00097 
00098     CStopWatch m_SW;
00099 
00100     void x_NextVolume();
00101 };
00102 
00103 
00104 template <typename BDB_SplitStore, typename BDB_Vol>
00105 inline
00106 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::CBDB_SplitCursor(TSplitStore& store)
00107     : m_Env(NULL)
00108     , m_BufferSize(40 * 1024 * 1024)
00109 {
00110     m_SW.Start();
00111     m_Env = store.GetEnv();
00112 
00113     ///
00114     /// find our relevant files
00115     ///
00116     {{
00117          vector<string> paths;
00118          vector<string> masks;
00119 
00120          string path = store.GetFileName();
00121          string path_dir;
00122          string path_base;
00123          string path_ext;
00124          CDirEntry::SplitPath(path, &path_dir, &path_base, &path_ext);
00125          path_base += path_ext;
00126 
00127          if (CDirEntry::IsAbsolutePath(path_dir)) {
00128              path = path_dir;
00129          } else {
00130              path.erase();
00131              if (m_Env) {
00132                  path = m_Env->GetPath();
00133                  path += "/";
00134              }
00135              path += path_dir;
00136              path = CDirEntry::CreateAbsolutePath(path);
00137          }
00138 
00139          paths.push_back(path);
00140          masks.push_back(path_base + "_*");
00141          FindFiles(m_Files,
00142                    paths.begin(), paths.end(), masks.begin(), masks.end(),
00143                    fFF_File);
00144 
00145          std::sort(m_Files.begin(), m_Files.end(), SVolumeLess());
00146 
00147          LOG_POST_XX(Db_Bdb_Cursor, 2, Info <<
00148                      "found " << m_Files.size() << " candidate files");
00149      }}
00150 }
00151 
00152 
00153 template <typename BDB_SplitStore, typename BDB_Vol>
00154 inline EBDB_ErrCode
00155 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::Fetch()
00156 {
00157     for (;;) {
00158         if ( !m_Cursor.get()  ||  m_Cursor->Fetch() != eBDB_Ok) {
00159             x_NextVolume();
00160             if ( !m_Cursor.get() ) {
00161                 return eBDB_NotFound;
00162             }
00163         } else {
00164             break;
00165         }
00166     }
00167 
00168     return eBDB_Ok;
00169 }
00170 
00171 
00172 template <typename BDB_SplitStore, typename BDB_Vol>
00173 inline void
00174 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::InitMultiFetch(size_t size)
00175 {
00176     m_BufferSize = size;
00177 }
00178 
00179 
00180 template <typename BDB_SplitStore, typename BDB_Vol>
00181 inline const void*
00182 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetLastMultiFetchData() const
00183 {
00184     if (m_Cursor.get()) {
00185         return m_Cursor->GetLastMultiFetchData();
00186     }
00187     return NULL;
00188 }
00189 
00190 
00191 template <typename BDB_SplitStore, typename BDB_Vol>
00192 inline size_t
00193 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetLastMultiFetchDataLen() const
00194 {
00195     if (m_Cursor.get()) {
00196         return m_Cursor->GetLastMultiFetchDataLen();
00197     }
00198     return 0;
00199 }
00200 
00201 
00202 template <typename BDB_SplitStore, typename BDB_Vol>
00203 inline typename CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::TVolume&
00204 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetSourceVolume()
00205 {
00206     if (m_Volume.get()) {
00207         return *m_Volume;
00208     }
00209     NCBI_THROW(CException, eUnknown, "no open volume");
00210 }
00211 
00212 
00213 template <typename BDB_SplitStore, typename BDB_Vol>
00214 inline Uint4
00215 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetCurrentBlobId() const
00216 {
00217     return GetLastBlobId();
00218 }
00219 
00220 
00221 template <typename BDB_SplitStore, typename BDB_Vol>
00222 inline Uint4
00223 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::GetLastBlobId() const
00224 {
00225     if (m_Volume.get()) {
00226         return (Uint4)m_Volume->GetUid();
00227     }
00228     NCBI_THROW(CException, eUnknown, "no open volume");
00229 }
00230 
00231 
00232 template <typename BDB_SplitStore, typename BDB_Vol>
00233 inline void
00234 CBDB_SplitCursor<BDB_SplitStore, BDB_Vol>::x_NextVolume()
00235 {
00236     /// get rid of our existing cursor + volume
00237     m_Cursor.reset();
00238     m_Volume.reset();
00239     if ( !m_Files.size() ) {
00240         return;
00241     }
00242 
00243     /// open the next file
00244     string path = m_Files.back();
00245     m_Files.pop_back();
00246 
00247     m_Volume.reset(new TVolume);
00248     m_Volume->SetCacheSize(10 * 1024 * 1024);
00249     if (m_Env) {
00250         m_Volume->SetEnv(*m_Env);
00251     }
00252 
00253     LOG_POST_XX(Db_Bdb_Cursor, 1, Info
00254                 << "CBDB_SplitCursor::x_NextVolume(): opening: " << path);
00255     m_Volume->Open(path, CBDB_RawFile::eReadOnly);
00256 
00257     m_Cursor.reset(new CBDB_FileCursor(*m_Volume));
00258     m_Cursor->InitMultiFetch(m_BufferSize);
00259 }
00260 
00261 
00262 END_NCBI_SCOPE
00263 
00264 
00265 #endif  // LOG___BDB_SPLIT_CURSOR__HPP
Modified on Wed May 23 13:30:17 2012 by modify_doxy.py rev. 337098