NCBI C++ ToolKit
seqdbisam.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /*  $Id: seqdbisam.cpp 65402 2014-11-26 18:04:24Z fongah2 $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author:  Kevin Bealer
00027  *
00028  */
00029 
00030 /// @file seqdbisam.cpp
00031 /// Implementation for the CSeqDBIsam class, which manages an ISAM
00032 /// index of some particular kind of identifiers.
00033 
00034 #ifndef SKIP_DOXYGEN_PROCESSING
00035 static char const rcsid[] = "$Id: seqdbisam.cpp 65402 2014-11-26 18:04:24Z fongah2 $";
00036 #endif /* SKIP_DOXYGEN_PROCESSING */
00037 
00038 #include <ncbi_pch.hpp>
00039 #include <objtools/blast/seqdb_reader/impl/seqdbisam.hpp>
00040 #include <objects/seqloc/Seq_id.hpp>
00041 #include <objects/general/general__.hpp>
00042 #include <corelib/ncbiutil.hpp>
00043 
00044 /// Place these definitions in the ncbi namespace
00045 BEGIN_NCBI_SCOPE
00046 
00047 /// Import this namespace
00048 USING_SCOPE(objects);
00049 
00050 /// Format version of the ISAM files
00051 #define ISAM_VERSION 1
00052 
00053 /// Default page size for numeric indices
00054 #define DEFAULT_NISAM_SIZE 256
00055 
00056 /// Default page size for string indices
00057 #define DEFAULT_SISAM_SIZE 64
00058 
00059 /// Special page size value which indicates a memory-only string index
00060 #define MEMORY_ONLY_PAGE_SIZE 1
00061 
00062 
00063 CSeqDBIsam::EErrorCode
00064 CSeqDBIsam::x_InitSearch(CSeqDBLockHold & locked)
00065 {
00066     if(m_Initialized == true)
00067         return eNoError;
00068     
00069     TIndx info_needed = 10 * sizeof(Int4);
00070     
00071     m_Atlas.Lock(locked);
00072     
00073     bool found_index_file =
00074         m_Atlas.GetFileSize(m_IndexFname, m_IndexFileLength, locked);
00075     
00076     if ((! found_index_file) || (m_IndexFileLength < info_needed)) {
00077         return eWrongFile;
00078     }
00079     
00080     m_Atlas.GetRegion(m_IndexLease, m_IndexFname, 0, info_needed);
00081     
00082     Int4 * FileInfo = (Int4*) m_IndexLease.GetPtr(0);
00083     
00084     // Check for consistence of files and parameters
00085     
00086     Int4 Version = SeqDB_GetStdOrd(& FileInfo[0]);
00087     
00088     if (Version != ISAM_VERSION)
00089         return eBadVersion;
00090     
00091     Int4 IsamType = SeqDB_GetStdOrd(& FileInfo[1]);
00092     
00093     if (IsamType == eNumericLongId && m_Type == eNumeric) {
00094         m_LongId = true;
00095         m_TermSize = 12;
00096         IsamType = eNumeric;
00097     }
00098     
00099     if (IsamType != m_Type)
00100         return eBadType;
00101     
00102     m_NumTerms    = SeqDB_GetStdOrd(& FileInfo[3]);
00103     m_NumSamples  = SeqDB_GetStdOrd(& FileInfo[4]);
00104     m_PageSize    = SeqDB_GetStdOrd(& FileInfo[5]);
00105     m_MaxLineSize = SeqDB_GetStdOrd(& FileInfo[6]);
00106     
00107     if(m_PageSize != MEMORY_ONLY_PAGE_SIZE) { 
00108         // Special case of memory-only index
00109         m_DataFileLength = SeqDB_GetStdOrd(& FileInfo[2]);
00110         
00111         TIndx disk_file_length(0);
00112         bool found_data_file =
00113             m_Atlas.GetFileSize(m_DataFname, disk_file_length, locked);
00114         
00115         if ((! found_data_file) || (m_DataFileLength != disk_file_length)) {
00116             return eWrongFile;
00117         }
00118     }
00119     
00120     // This space reserved for future use
00121     
00122     m_IdxOption = SeqDB_GetStdOrd(& FileInfo[7]);
00123     
00124     m_KeySampleOffset = (9 * sizeof(Int4));
00125     
00126     m_Initialized = true;
00127     
00128     return eNoError;
00129 }
00130 
00131 Int4 CSeqDBIsam::x_GetPageNumElements(Int4   sample_num,
00132                                       Int4 * start)
00133 {
00134     Int4 num_elements(0);
00135     
00136     *start = sample_num * m_PageSize;
00137     
00138     if (sample_num + 1 == m_NumSamples) {
00139         num_elements = m_NumTerms - *start;
00140     } else {
00141         num_elements = m_PageSize;
00142     }
00143     
00144     return num_elements;
00145 }
00146 
00147 CSeqDBIsam::EErrorCode
00148 CSeqDBIsam::x_SearchIndexNumeric(Int8             Number, 
00149                                  int            * Data,
00150                                  Uint4          * Index,
00151                                  Int4           & SampleNum,
00152                                  bool           & done,
00153                                  CSeqDBLockHold & locked)
00154 {
00155     m_Atlas.Lock(locked);
00156     
00157     if(m_Initialized == false) {
00158         EErrorCode error = x_InitSearch(locked);
00159         
00160         if(error != eNoError) {
00161             done = true;
00162             return error;
00163         }
00164     }
00165     
00166     if (x_OutOfBounds(Number, locked)) {
00167         done = true;
00168         return eNotFound;
00169     }
00170     
00171     _ASSERT(m_Type != eNumericNoData);
00172     
00173     // Search the sample file.
00174     
00175     Int4 Start     (0);
00176     Int4 Stop      (m_NumSamples - 1);
00177     
00178     while(Stop >= Start) {
00179         SampleNum = ((Uint4)(Stop + Start)) >> 1;
00180     
00181         TIndx offset_begin = m_KeySampleOffset + (m_TermSize * SampleNum);
00182         TIndx offset_end   = offset_begin + m_TermSize;
00183     
00184         m_Atlas.Lock(locked);
00185         
00186         if (! m_IndexLease.Contains(offset_begin, offset_end)) {
00187             m_Atlas.GetRegion(m_IndexLease,
00188                               m_IndexFname,
00189                               offset_begin,
00190                               offset_end);
00191         }
00192         
00193         const void* keydatap(0);
00194         
00195         Int8 Key(0);
00196         
00197         keydatap = m_IndexLease.GetPtr(offset_begin);
00198         Key = x_GetNumericKey (keydatap);
00199         
00200         // If this is an exact match, return the master term number.
00201         
00202         if (Key == Number) {
00203             if (Data != NULL) {
00204                 *Data = x_GetNumericData(keydatap);
00205             }
00206             
00207             if (Index != NULL)
00208                 *Index = SampleNum * m_PageSize;
00209             
00210             done = true;
00211             return eNoError;
00212         }
00213         
00214         // Otherwise, search for the next sample.
00215         
00216         if ( Number < Key )
00217             Stop = --SampleNum;
00218         else
00219             Start = SampleNum +1;
00220     }
00221     
00222     // If the term is out of range altogether, report not finding it.
00223     
00224     if ( (SampleNum < 0) || (SampleNum >= m_NumSamples)) {
00225         
00226         if (Data != NULL)
00227             *Data = eNotFound;
00228         
00229         if(Index != NULL)
00230             *Index = eNotFound;
00231         
00232         done = true;
00233         return eNotFound;
00234     }
00235     
00236     done = false;
00237     return eNoError;
00238 }
00239 
00240 void
00241 CSeqDBIsam::x_SearchNegativeMulti(int                  vol_start,
00242                                   int                  vol_end,
00243                                   CSeqDBNegativeList & ids,
00244                                   bool                 use_tis,
00245                                   CSeqDBLockHold     & locked)
00246 {
00247     m_Atlas.Lock(locked);
00248     
00249     if(m_Initialized == false) {
00250         EErrorCode error = x_InitSearch(locked);
00251         
00252         if(error != eNoError) {
00253             // Most ordinary errors (missing IDs for example) are
00254             // ignored for "multi" mode searches.  But if a GI list is
00255             // specified, and cannot be interpreted, it is an error.
00256             
00257             NCBI_THROW(CSeqDBException,
00258                        eArgErr,
00259                        "Error: Unable to use ISAM index in batch mode.");
00260         }
00261     }
00262     
00263     m_Atlas.Lock(locked);
00264     
00265     // We can use Parabolic Binary Search for the negative GI list but
00266     // not for the ISAM file data, because in the negative ID list
00267     // case, every line of the ISAM data must be looked at.
00268     
00269     _ASSERT(m_Type != eNumericNoData);
00270     
00271     //......................................................................
00272     //
00273     // Translate the entire Gi List.
00274     //
00275     //......................................................................
00276     
00277     int gilist_size = use_tis ? ids.GetNumTis() : ids.GetNumGis();
00278     
00279     int gilist_index = 0;
00280     
00281     int sample_index(0);
00282     const void * data_page (0);
00283     
00284     while(sample_index < m_NumSamples) {
00285         int start = 0, num_elements = 0;
00286         
00287         x_MapDataPage(sample_index,
00288                       start,
00289                       num_elements,
00290                       & data_page,
00291                       locked);
00292         
00293         for(int i = 0; i < num_elements; i++) {
00294             Int8 isam_key(0);
00295             int isam_data(0);
00296             
00297             // 1. Get the ID+OID from the data page.
00298             
00299             x_GetDataElement(data_page,
00300                              i,
00301                              isam_key,
00302                              isam_data);
00303             
00304             // 2. Look for it in the negative id list.
00305             
00306             bool found = false;
00307             
00308             if (gilist_index < gilist_size) {
00309                 found = x_FindInNegativeList(ids,
00310                                              gilist_index,
00311                                              isam_key,
00312                                              use_tis);
00313             }
00314             
00315             // 3. If not found, add the OID to the negative ID list.
00316             
00317             if (isam_data < vol_end) {
00318                 if (found) {
00319                     // OID is found, but may not be included yet.
00320                     ids.AddVisibleOid(isam_data + vol_start);
00321                 } else {
00322                     // OID is included for iteration.
00323                     ids.AddIncludedOid(isam_data + vol_start);
00324                 }
00325             }
00326         }
00327         
00328         // Move to next data page.  Note that for a negative ID list
00329         // processing, we don't actually fetch any samples, because
00330         // every ID->OID line needs to be examined anyway.
00331         
00332         sample_index ++;
00333     }
00334 }
00335 
00336 
00337 CSeqDBIsam::EErrorCode
00338 CSeqDBIsam::x_SearchDataNumeric(Int8             Number,
00339                                 int            * Data,
00340                                 Uint4          * Index,
00341                                 Int4             SampleNum,
00342                                 CSeqDBLockHold & locked)
00343 {
00344     // Load the appropriate page of numbers into memory.
00345     _ASSERT(m_Type != eNumericNoData);
00346     
00347     Int4 Start(0);
00348     Int4 NumElements = x_GetPageNumElements(SampleNum, & Start);
00349     
00350     Int4 first = Start;
00351     Int4 last  = Start + NumElements - 1;
00352     
00353     const void * KeyDataPage      = NULL;
00354     const void * KeyDataPageStart = NULL;
00355     
00356     TIndx offset_begin = Start * m_TermSize;
00357     TIndx offset_end = offset_begin + m_TermSize * NumElements;
00358     
00359     m_Atlas.Lock(locked);
00360     
00361     if (! m_DataLease.Contains(offset_begin, offset_end)) {
00362         m_Atlas.GetRegion(m_DataLease,
00363                           m_DataFname,
00364                           offset_begin,
00365                           offset_end);
00366     }
00367     
00368     KeyDataPageStart = m_DataLease.GetPtr(offset_begin);
00369     
00370     KeyDataPage = (char *)KeyDataPageStart - Start * m_TermSize;
00371     
00372     bool found   (false);
00373     Int4 current (0);
00374     
00375     // Search the page for the number.
00376     while (first <= last) {
00377         current = (first+last)/2;
00378         
00379         Int8 Key = x_GetNumericKey((char *)KeyDataPage + current * m_TermSize);
00380         
00381         if (Key > Number) {
00382             last = --current;
00383         } else if (Key < Number) {
00384             first = ++current;
00385         } else {
00386             found = true;
00387             break;
00388         }
00389     }
00390     
00391     if (found == false) {
00392         if (Data != NULL)
00393             *Data = eNotFound;
00394         
00395         if(Index != NULL)
00396             *Index = eNotFound;
00397         
00398         return eNotFound;
00399     }
00400     
00401     if (Data != NULL) {
00402         *Data = x_GetNumericData((char *)KeyDataPage + current * m_TermSize);
00403     }
00404     
00405     if(Index != NULL)
00406         *Index = Start + current;
00407     
00408     return eNoError;
00409 }
00410 
00411 
00412 // ------------------------NumericSearch--------------------------
00413 // Purpose:     Main search function of Numeric ISAM
00414 // 
00415 // Parameters:  Key - interer to search
00416 //              Data - returned value (for NIASM with data)
00417 //              Index - internal index in database
00418 // Returns:     ISAM Error Code
00419 // NOTE:        None
00420 // ----------------------------------------------------------------
00421 
00422 CSeqDBIsam::EErrorCode
00423 CSeqDBIsam::x_NumericSearch(Int8             Number, 
00424                             int            * Data,
00425                             Uint4          * Index,
00426                             CSeqDBLockHold & locked)
00427 {
00428     bool done      (false);
00429     Int4 SampleNum (0);
00430     
00431     EErrorCode error =
00432         x_SearchIndexNumeric(Number, Data, Index, SampleNum, done, locked);
00433     
00434     if (! done) {
00435         error = x_SearchDataNumeric(Number, Data, Index, SampleNum, locked);
00436     }
00437     
00438     return error;
00439 }
00440 
00441 int CSeqDBIsam::x_DiffCharLease(const string   & term_in,
00442                                 CSeqDBMemLease & lease,
00443                                 const string   & file_name,
00444                                 TIndx            file_length,
00445                                 Uint4            at_least,
00446                                 TIndx            KeyOffset,
00447                                 bool             ignore_case,
00448                                 CSeqDBLockHold & locked)
00449 {
00450     int result(-1);
00451     
00452     m_Atlas.Lock(locked);
00453     
00454     // Add one to term_end to insure we don't consider "AA" and "AAB"
00455     // as equal.
00456     
00457     TIndx offset_begin = KeyOffset;
00458     TIndx term_end     = KeyOffset + term_in.size() + 1;
00459     TIndx map_end      = term_end + at_least;
00460     
00461     if (map_end > file_length) {
00462         map_end = file_length;
00463         
00464         if (term_end > map_end) {
00465             term_end = map_end;
00466             result = int(file_length - offset_begin);
00467         }
00468     }
00469     
00470     if (! lease.Contains(offset_begin, map_end)) {
00471         m_Atlas.GetRegion( lease,
00472                            file_name,
00473                            offset_begin,
00474                            term_end );
00475     }
00476     
00477     const char * file_data = lease.GetPtr(offset_begin);
00478     
00479     Int4 dc_result =
00480         x_DiffChar(term_in,
00481                    file_data,
00482                    file_data + term_in.size() + 1,
00483                    ignore_case);
00484     
00485     if (dc_result != -1) {
00486         result = dc_result;
00487     }
00488     
00489     return dc_result;
00490 }
00491 
00492 /// Return NUL for nulls or EOL characters
00493 ///
00494 /// This function returns a NUL byte for any of NUL, CR, or NL.  This
00495 /// is done because these characters are used to terminate the
00496 /// variable length records in a string-based ISAM file.
00497 ///
00498 /// @param c
00499 ///   A character
00500 /// @return
00501 ///   NUL or the same character
00502 static inline char
00503 s_SeqDBIsam_NullifyEOLs(char c)
00504 {
00505     if (SEQDB_ISEOL(c)) {
00506         return 0;
00507     } else {
00508         return c;
00509     }
00510 }
00511 
00512 /// The terminating character for string ISAM keys when data is present.
00513 const char ISAM_DATA_CHAR = (char) 2;
00514 
00515 /// Returns true if the character is a terminator for an ISAM key.
00516 static inline bool ENDS_ISAM_KEY(char P)
00517 {
00518     return (P == ISAM_DATA_CHAR) || (s_SeqDBIsam_NullifyEOLs(P) == 0);
00519 }
00520 
00521 Int4 CSeqDBIsam::x_DiffChar(const string & term_in,
00522                             const char   * begin,
00523                             const char   * end,
00524                             bool           ignore_case)
00525 {
00526     int result(-1);
00527     int i(0);
00528     
00529     const char * file_data = begin;
00530     int bytes = int(end - begin);
00531     
00532     for(i = 0; (i < bytes) && i < (int) term_in.size(); i++) {
00533         char ch1 = term_in[i];
00534         char ch2 = file_data[i];
00535         
00536         if (ch1 != ch2) {
00537             ch1 = s_SeqDBIsam_NullifyEOLs(ch1);
00538             ch2 = s_SeqDBIsam_NullifyEOLs(ch2);
00539             
00540             if (ignore_case) {
00541                 ch1 = toupper((unsigned char) ch1);
00542                 ch2 = toupper((unsigned char) ch2);
00543             }
00544             
00545             if (ch1 != ch2) {
00546                 break;
00547             }
00548         }
00549     }
00550     
00551     const char * p = file_data + i;
00552     
00553     while((p < end) && ((*p) == ' ')) {
00554         p++;
00555     }
00556     
00557     if (((p == end) || ENDS_ISAM_KEY(*p)) && (i == (int) term_in.size())) {
00558         result = -1;
00559     } else {
00560         result = i;
00561     }
00562     
00563     return result;
00564 }
00565 
00566 void CSeqDBIsam::x_ExtractPageData(const string   & term_in,
00567                                    TIndx            page_index,
00568                                    const char     * beginp,
00569                                    const char     * endp,
00570                                    vector<TIndx>  & indices_out,
00571                                    vector<string> & keys_out,
00572                                    vector<string> & data_out)
00573 {
00574     // Collect all 'good' data from the page.
00575     
00576     bool ignore_case = true;
00577     
00578     Uint4 TermNum(0);
00579     
00580     const char * indexp(beginp);
00581     bool found_match(false);
00582     
00583     while (indexp < endp) {
00584         Int4 Diff = x_DiffChar(term_in,
00585                                indexp,
00586                                endp,
00587                                ignore_case);
00588         
00589         if (Diff == -1) { // Complete match
00590             found_match = true;
00591             
00592             x_ExtractData(indexp,
00593                           endp,
00594                           keys_out,
00595                           data_out);
00596             
00597             indices_out.push_back(page_index + TermNum);
00598         } else {
00599             // If we found a match, but the current term doesn't
00600             // match, then we are past the set of matching entries.
00601             
00602             if (found_match) {
00603                 break;
00604             }
00605         }
00606         
00607         // Skip remainder of term, and any nulls after it.
00608         
00609         while((indexp < endp) && s_SeqDBIsam_NullifyEOLs(*indexp)) {
00610             indexp++;
00611         }
00612         while((indexp < endp) && (! s_SeqDBIsam_NullifyEOLs(*indexp))) {
00613             indexp++;
00614         }
00615         
00616         TermNum++;
00617     }
00618 }
00619 
00620 void CSeqDBIsam::x_ExtractAllData(const string   & term_in,
00621                                   TIndx            sample_index,
00622                                   vector<TIndx>  & indices_out,
00623                                   vector<string> & keys_out,
00624                                   vector<string> & data_out,
00625                                   CSeqDBLockHold & locked)
00626 {
00627     // The object at sample_index is known to match; we will iterate
00628     // over the surrounding values to see if they match as well.  No
00629     // assumptions about how many keys can match are made here.
00630     
00631     bool ignore_case = true;
00632     
00633     int pre_amt  = 1;
00634     int post_amt = 1;
00635     
00636     bool done_b(false), done_e(false);
00637     
00638     const char * beginp(0);
00639     const char * endp(0);
00640     
00641     TIndx beg_off(0);
00642     TIndx end_off(0);
00643     
00644     while(! (done_b && done_e)) {
00645         if (sample_index < pre_amt) {
00646             beg_off = 0;
00647             done_b = true;
00648         } else {
00649             beg_off = sample_index - pre_amt;
00650         }
00651         
00652         if ((m_NumSamples - sample_index) < post_amt) {
00653             end_off = m_NumSamples;
00654             done_e = true;
00655         } else {
00656             end_off = sample_index + post_amt;
00657         }
00658         
00659         x_LoadPage(beg_off, end_off, & beginp, & endp, locked);
00660         
00661         if (! done_b) {
00662             Int4 diff_begin = x_DiffChar(term_in,
00663                                          beginp,
00664                                          endp,
00665                                          ignore_case);
00666             
00667             if (diff_begin != -1) {
00668                 done_b = true;
00669             } else {
00670                 pre_amt ++;
00671             }
00672         }
00673         
00674         if (! done_e) {
00675             const char * last_term(0);
00676             const char * p(endp-1);
00677             
00678             // Skip over any non-terminating junk at the end
00679             
00680             enum { eEndNulls, eLastTerm } search_stage = eEndNulls;
00681             
00682             while(p > beginp) {
00683                 bool terminal = (0 == s_SeqDBIsam_NullifyEOLs(*p));
00684                 
00685                 if (search_stage == eEndNulls) {
00686                     if (! terminal) { 
00687                         search_stage = eLastTerm;
00688                     }
00689                 } else {
00690                     if (terminal) {
00691                         last_term = p + 1;
00692                         break;
00693                     }
00694                 }
00695                 
00696                 p--;
00697             }
00698             
00699             if (! last_term) {
00700                 last_term = beginp;
00701             }
00702             
00703             Int4 diff_end = x_DiffChar(term_in,
00704                                        last_term,
00705                                        endp,
00706                                        ignore_case);
00707             
00708             if (diff_end != -1) {
00709                 done_e = true;
00710             } else {
00711                 post_amt ++;
00712             }
00713         }
00714     }
00715     
00716     x_ExtractPageData(term_in,
00717                       m_PageSize * beg_off,
00718                       beginp,
00719                       endp,
00720                       indices_out,
00721                       keys_out,
00722                       data_out);
00723 }
00724 
00725 void CSeqDBIsam::x_ExtractData(const char     * key_start,
00726                                const char     * map_end,
00727                                vector<string> & keys_out,
00728                                vector<string> & data_out)
00729 {
00730     const char * data_ptr(0);
00731     const char * p(key_start);
00732     
00733     while(p < map_end) {
00734         switch(s_SeqDBIsam_NullifyEOLs(*p)) {
00735         case 0:
00736             if (data_ptr) {
00737                 keys_out.push_back(string(key_start, data_ptr));
00738                 data_out.push_back(string(data_ptr+1, p));
00739             } else {
00740                 keys_out.push_back(string(key_start, p));
00741                 data_out.push_back("");
00742             }
00743             return;
00744             
00745         case ISAM_DATA_CHAR:
00746             data_ptr = p;
00747             
00748         default:
00749             p++;
00750         }
00751     }
00752 }
00753 
00754 CSeqDBIsam::TIndx
00755 CSeqDBIsam::x_GetIndexKeyOffset(TIndx            sample_offset,
00756                                 Uint4            sample_num,
00757                                 CSeqDBLockHold & locked)
00758 {
00759     TIndx offset_begin = sample_offset + (sample_num * sizeof(Uint4));
00760     TIndx offset_end   = offset_begin + sizeof(Uint4);
00761     
00762     m_Atlas.Lock(locked);
00763     
00764     if (! m_IndexLease.Contains(offset_begin, offset_end)) {
00765         m_Atlas.GetRegion(m_IndexLease,
00766                           m_IndexFname,
00767                           offset_begin,
00768                           offset_end);
00769     }
00770     
00771     Int4 * key_offset_addr = (Int4 *) m_IndexLease.GetPtr(offset_begin);
00772     
00773     return SeqDB_GetStdOrd(key_offset_addr);
00774 }
00775 
00776 void
00777 CSeqDBIsam::x_GetIndexString(TIndx            key_offset,
00778                              int              length,
00779                              string         & str,
00780                              bool             trim_to_null,
00781                              CSeqDBLockHold & locked)
00782 {
00783     TIndx offset_end = key_offset + length;
00784     
00785     m_Atlas.Lock(locked);
00786     
00787     if (! m_IndexLease.Contains(key_offset, offset_end)) {
00788         m_Atlas.GetRegion(m_IndexLease,
00789                           m_IndexFname,
00790                           key_offset,
00791                           offset_end);
00792     }
00793     
00794     const char * key_offset_addr =
00795         (const char *) m_IndexLease.GetPtr(key_offset);
00796     
00797     if (trim_to_null) {
00798         for(int i = 0; i<length; i++) {
00799             if (! key_offset_addr[i]) {
00800                 length = i;
00801                 break;
00802             }
00803         }
00804     }
00805     
00806     str.assign(key_offset_addr, length);
00807 }
00808 
00809 // Given an index, this computes the diff from the input term.  It
00810 // also returns the offset for that sample's key in KeyOffset.
00811 
00812 int CSeqDBIsam::x_DiffSample(const string   & term_in,
00813                              Uint4            SampleNum,
00814                              TIndx          & KeyOffset,
00815                              CSeqDBLockHold & locked)
00816 {
00817     // Meaning:
00818     // a. Compute SampleNum*4
00819     // b. Address this number into SamplePos (indexlease)
00820     // c. Swap this number to compute Key offset.
00821     // d. Add to beginning of file to get key data pointer.
00822     
00823     bool ignore_case(true);
00824     
00825     TIndx SampleOffset(m_KeySampleOffset);
00826     
00827     if(m_PageSize != MEMORY_ONLY_PAGE_SIZE) {
00828         SampleOffset += (m_NumSamples + 1) * sizeof(Uint4);
00829     }
00830     
00831     TIndx offset_begin = SampleOffset + (SampleNum * sizeof(Uint4));
00832     TIndx offset_end   = offset_begin + sizeof(Uint4);
00833     
00834     m_Atlas.Lock(locked);
00835     
00836     if (! m_IndexLease.Contains(offset_begin, offset_end)) {
00837         m_Atlas.GetRegion(m_IndexLease,
00838                           m_IndexFname,
00839                           offset_begin,
00840                           offset_end);
00841     }
00842     
00843     KeyOffset = SeqDB_GetStdOrd((Int4*) m_IndexLease.GetPtr(offset_begin));
00844     
00845     Uint4 max_lines_2 = m_MaxLineSize * 2;
00846     
00847     return x_DiffCharLease(term_in,
00848                            m_IndexLease,
00849                            m_IndexFname,
00850                            m_IndexFileLength,
00851                            max_lines_2,
00852                            KeyOffset,
00853                            ignore_case,
00854                            locked);
00855 }
00856 
00857 void CSeqDBIsam::x_LoadPage(TIndx             SampleNum1,
00858                             TIndx             SampleNum2,
00859                             const char     ** beginp,
00860                             const char     ** endp,
00861                             CSeqDBLockHold &  locked)
00862 {
00863     // Load the appropriate page of terms into memory.
00864     
00865     _ASSERT(SampleNum2 > SampleNum1);
00866     
00867     TIndx begin_offset = m_KeySampleOffset + SampleNum1       * sizeof(Uint4);
00868     TIndx end_offset   = m_KeySampleOffset + (SampleNum2 + 1) * sizeof(Uint4);
00869     
00870     m_Atlas.Lock(locked);
00871     
00872     if (! m_IndexLease.Contains(begin_offset, end_offset)) {
00873         m_Atlas.GetRegion(m_IndexLease, m_IndexFname, begin_offset, end_offset);
00874     }
00875     
00876     Uint4 * key_offsets((Uint4*) m_IndexLease.GetPtr(begin_offset));
00877     
00878     Uint4 key_off1 = SeqDB_GetStdOrd(& key_offsets[0]);
00879     Uint4 key_off2 = SeqDB_GetStdOrd(& key_offsets[SampleNum2 - SampleNum1]);
00880     
00881     if (! m_DataLease.Contains(key_off1, key_off2)) {
00882         m_Atlas.GetRegion(m_DataLease, m_DataFname, key_off1, key_off2);
00883     }
00884     
00885     *beginp = (const char *) m_DataLease.GetPtr(key_off1);
00886     *endp   = (const char *) m_DataLease.GetPtr(key_off2);
00887 }
00888 
00889 
00890 // ------------------------StringSearch--------------------------
00891 // Purpose:     Main search function of string search.
00892 // 
00893 // Parameters:  Key - interer to search
00894 //              Data - returned value
00895 //              Index - internal index in database
00896 // Returns:     ISAM Error Code
00897 // NOTE:        None
00898 // --------------------------------------------------------------
00899 
00900 CSeqDBIsam::EErrorCode
00901 CSeqDBIsam::x_StringSearch(const string   & term_in,
00902                            vector<string> & terms_out,
00903                            vector<string> & values_out,
00904                            vector<TIndx>  & indices_out,
00905                            CSeqDBLockHold & locked)
00906 {
00907     // These are always false; They may relate to the prior find_one /
00908     // expand_to_many method of getting multiple OIDs.
00909     
00910     bool short_match(false);
00911     bool follow_match(false);
00912     
00913     size_t preexisting_data_count = values_out.size();
00914     
00915     if (m_Initialized == false) {
00916         EErrorCode error = x_InitSearch(locked);
00917         
00918         if(error != eNoError) {
00919             return error;
00920         }
00921     }
00922     
00923     if (x_OutOfBounds(term_in, locked)) {
00924         return eNotFound;
00925     }
00926     
00927     // We will set this option to avoid more complications
00928     bool ignore_case = true;
00929     
00930     // search the sample file first
00931     
00932     TIndx Start(0);
00933     TIndx Stop(m_NumSamples - 1);
00934     
00935     int Length = (int) term_in.size();
00936     
00937     TIndx SampleOffset(m_KeySampleOffset);
00938     
00939     if(m_PageSize != MEMORY_ONLY_PAGE_SIZE) {
00940         SampleOffset += (m_NumSamples + 1) * sizeof(Uint4);
00941     }
00942     
00943     int found_short(-1);
00944     
00945     string short_term;
00946     int SampleNum(-1);
00947     
00948     while(Stop >= Start) {
00949         SampleNum = ((Uint4)(Stop + Start)) >> 1;
00950         
00951         TIndx KeyOffset(0);
00952         
00953         int diff = x_DiffSample(term_in, SampleNum, KeyOffset, locked);
00954         
00955         // If this is an exact match, return the master term number.
00956         
00957         const char * KeyData = m_IndexLease.GetPtr(KeyOffset);
00958         TIndx BytesToEnd = m_IndexFileLength - KeyOffset;
00959         
00960         Uint4 max_lines_2 = m_MaxLineSize * 2;
00961         
00962         if (BytesToEnd > (TIndx) max_lines_2) {
00963             BytesToEnd = max_lines_2;
00964         }
00965         
00966         if (diff == -1) {
00967             x_ExtractAllData(term_in,
00968                              SampleNum,
00969                              indices_out,
00970                              terms_out,
00971                              values_out,
00972                              locked);
00973             
00974             return eNoError;
00975         }
00976         
00977         // If the key is a superset of the sample term, backup until
00978         // just before the term.
00979         
00980         if (short_match && (diff >= Length)) {
00981             if (SampleNum > 0)
00982                 SampleNum--;
00983             
00984             while(SampleNum > 0) {
00985                 TIndx key_offset =
00986                     x_GetIndexKeyOffset(SampleOffset,
00987                                         SampleNum,
00988                                         locked);
00989                 
00990                 string prefix;
00991                 x_GetIndexString(key_offset, Length, prefix, false, locked);
00992                 
00993                 if (ignore_case) {
00994                     if (NStr::CompareNocase(prefix, term_in) != 0) {
00995                         break;
00996                     }
00997                 } else {
00998                     if (prefix != term_in) {
00999                         break;
01000                     }
01001                 }
01002                 
01003                 SampleNum--;
01004             }
01005             
01006             found_short = SampleNum + 1;
01007             
01008             TIndx key_offset =
01009                 x_GetIndexKeyOffset(SampleOffset,
01010                                     SampleNum + 1,
01011                                     locked);
01012             
01013             string prefix;
01014             x_GetIndexString(key_offset, max_lines_2, short_term, true, locked);
01015             
01016             break;
01017         } else {
01018             // If preceding is desired, note the key.
01019             
01020             if (follow_match) {
01021                 found_short = SampleNum;
01022                 
01023                 x_GetIndexString(KeyOffset, max_lines_2, short_term, true, locked);
01024             }
01025         }
01026         
01027         // Otherwise, search for the next sample.
01028         
01029         if (ignore_case
01030             ? tolower((unsigned char) term_in[diff]) < tolower((unsigned char) KeyData[diff])
01031             : term_in[diff] < KeyData[diff]) {
01032             Stop = --SampleNum;
01033         } else {
01034             Start = SampleNum + 1;
01035         }
01036     }
01037     
01038     
01039     // If the term is out of range altogether, report not finding it.
01040     
01041     if ( (SampleNum < 0) || (SampleNum >= m_NumSamples)) {
01042         return eNotFound;
01043     }
01044     
01045     // Load the appropriate page of terms into memory.
01046     
01047     const char * beginp(0);
01048     const char * endp(0);
01049     
01050     x_LoadPage(SampleNum, SampleNum + 1, & beginp, & endp, locked);
01051     
01052     // Search the page for the term.
01053     
01054     x_ExtractPageData(term_in,
01055                       m_PageSize * SampleNum,
01056                       beginp,
01057                       endp,
01058                       indices_out,
01059                       terms_out,
01060                       values_out);
01061     
01062     // For now the short and follow logic is not implemented.
01063     
01064     EErrorCode rv(eNoError);
01065     
01066     if (preexisting_data_count == values_out.size()) {
01067         rv = eNotFound;
01068     }
01069     
01070     return rv;
01071 }
01072 
01073 CSeqDBIsam::CSeqDBIsam(CSeqDBAtlas  & atlas,
01074                        const string & dbname,
01075                        char           prot_nucl,
01076                        char           file_ext_char,
01077                        ESeqDBIdType   ident_type)
01078     : m_Atlas          (atlas),
01079       m_IdentType      (ident_type),
01080       m_IndexLease     (atlas),
01081       m_DataLease      (atlas),
01082       m_Type           (eNumeric),
01083       m_NumTerms       (0),
01084       m_NumSamples     (0),
01085       m_PageSize       (0),
01086       m_MaxLineSize    (0),
01087       m_IdxOption      (0),
01088       m_Initialized    (false),
01089       m_KeySampleOffset(0),
01090       m_TestNonUnique  (true),
01091       m_FileStart      (0),
01092       m_FirstOffset    (0),
01093       m_LastOffset     (0),
01094       m_LongId         (false),
01095       m_TermSize       (8)
01096 {
01097     // These are the types that readdb.c seems to use.
01098     
01099     switch(ident_type) {
01100     case eGiId:
01101     case ePigId:
01102     case eTiId:
01103         m_Type = eNumeric;
01104         break;
01105         
01106     case eStringId:
01107     case eHashId:
01108         m_Type = eString;
01109         break;
01110         
01111     default:
01112         NCBI_THROW(CSeqDBException,
01113                    eArgErr,
01114                    "Error: ident type argument not valid");
01115     }
01116     
01117     x_MakeFilenames(dbname,
01118                     prot_nucl,
01119                     file_ext_char,
01120                     m_IndexFname,
01121                     m_DataFname);
01122     
01123     if (! (CFile(m_IndexFname).Exists() &&
01124            CFile(m_DataFname).Exists()) ) {
01125         
01126         string msg("Error: Could not open input file (");
01127         msg += m_IndexFname + "/" + m_DataFname + ")";
01128         NCBI_THROW(CSeqDBException, eFileErr, msg);
01129     }
01130     
01131     if(m_Type == eNumeric) {
01132         m_PageSize = DEFAULT_NISAM_SIZE;
01133     } else {
01134         m_PageSize = DEFAULT_SISAM_SIZE;
01135     }
01136 }
01137 
01138 void CSeqDBIsam::x_MakeFilenames(const string & dbname,
01139                                  char           prot_nucl,
01140                                  char           file_ext_char,
01141                                  string       & index_name,
01142                                  string       & data_name)
01143 {
01144     if (dbname.empty() ||
01145         (! isalpha((unsigned char) prot_nucl)) ||
01146         (! isalpha((unsigned char) file_ext_char))) {
01147         
01148         NCBI_THROW(CSeqDBException,
01149                    eArgErr,
01150                    "Error: argument not valid");
01151     }
01152     
01153     index_name.reserve(dbname.size() + 4);
01154     data_name.reserve(dbname.size() + 4);
01155     
01156     index_name = dbname;
01157     index_name += '.';
01158     index_name += prot_nucl;
01159     index_name += file_ext_char;
01160     
01161     data_name = index_name;
01162     index_name += 'i';
01163     data_name  += 'd';
01164 }
01165 
01166 bool CSeqDBIsam::IndexExists(const string & dbname,
01167                              char           prot_nucl,
01168                              char           file_ext_char)
01169 {
01170     string iname, dname;
01171     x_MakeFilenames(dbname, prot_nucl, file_ext_char, iname, dname);
01172     
01173     return CFile(iname).Exists() && CFile(dname).Exists();
01174 }
01175 
01176 CSeqDBIsam::~CSeqDBIsam()
01177 {
01178     UnLease();
01179 }
01180 
01181 void CSeqDBIsam::UnLease()
01182 {
01183     if (! m_IndexLease.Empty()) {
01184         m_Atlas.RetRegion(m_IndexLease);
01185     }
01186     if (! m_DataLease.Empty()) {
01187         m_Atlas.RetRegion(m_DataLease);
01188     }
01189 }
01190 
01191 bool CSeqDBIsam::x_IdentToOid(Int8 ident, TOid & oid, CSeqDBLockHold & locked)
01192 {
01193     EErrorCode err =
01194         x_NumericSearch(ident, & oid, 0, locked);
01195     
01196     if (err == eNoError) {
01197         return true;
01198     }
01199     
01200     oid = -1u;  /* NCBI_FAKE_WARNING */
01201     
01202     return false;
01203 }
01204 
01205 void CSeqDBIsam::StringToOids(const string   & acc,
01206                               vector<TOid>   & oids,
01207                               bool             adjusted,
01208                               bool           & version_check,
01209                               CSeqDBLockHold & locked)
01210 {
01211     bool strip_version = version_check;
01212     version_check = false;
01213     
01214     _ASSERT(m_IdentType == eStringId);
01215     
01216     m_Atlas.Lock(locked);
01217     
01218     if(m_Initialized == false) {
01219         if (eNoError != x_InitSearch(locked)) {
01220             return;
01221         }
01222     }
01223     
01224     bool found = false;
01225     
01226     string accession(string("gb|") + acc + "|");
01227     string locus_str(string("gb||") + acc);
01228     
01229     EErrorCode err = eNoError;
01230     
01231     vector<string> keys_out;
01232     vector<string> data_out;
01233     vector<TIndx>  indices_out;
01234     
01235     if (! adjusted) {
01236         if ((err = x_StringSearch(accession,
01237                                   keys_out,
01238                                   data_out,
01239                                   indices_out,
01240                                   locked)) < 0) {
01241             return;
01242         }
01243         
01244         if (err == eNoError) {
01245             found = true;
01246         }
01247         
01248         if ((! found) &&
01249             (err = x_StringSearch(locus_str,
01250                                   keys_out,
01251                                   data_out,
01252                                   indices_out,
01253                                   locked)) < 0) {
01254             return;
01255         }
01256         
01257         if (err != eNotFound) {
01258             found = true;
01259         }
01260     }
01261     
01262     if ((! found) &&
01263         (err = x_StringSearch(acc,
01264                               keys_out,
01265                               data_out,
01266                               indices_out,
01267                               locked)) < 0) {
01268         
01269         return;
01270     }
01271     
01272     if (err != eNotFound) {
01273         found = true;
01274     }
01275     
01276     if ((! found) && strip_version) {
01277         size_t pos = acc.find(".");
01278         
01279         bool is_version = false;
01280         
01281         if (pos != string::npos) {
01282             int ver_len = acc.size() - pos - 1;
01283             
01284             is_version = (ver_len <= 3 && ver_len >= 1);
01285             
01286             for(size_t vp = pos+1; vp < acc.size(); vp++) {
01287                 if (! isdigit(acc[vp])) {
01288                     is_version = false;
01289                     break;
01290                 }
01291             }
01292         }
01293         
01294         if (is_version) {
01295             string nover(acc, 0, pos);
01296             
01297             err = x_StringSearch(nover,
01298                                  keys_out,
01299                                  data_out,
01300                                  indices_out,
01301                                  locked);
01302             
01303             if (data_out.size()) {
01304                 version_check = true;
01305             }
01306             
01307             if (err < 0) {
01308                 return;
01309             }
01310         }
01311     }
01312     
01313     if (err != eNotFound) {
01314         found = true;
01315     }
01316     
01317     if (! found) {
01318         // Use CSeq_id to parse the id string and build a replacement,
01319         // FASTA type string.  This allows some IDs, such as PDBs with
01320         // chains, such as '1qcfA' to be parsed.
01321         
01322         string id;
01323         
01324         try {
01325             CSeq_id seqid(acc, CSeq_id::fParse_RawText | CSeq_id::fParse_AnyLocal);
01326             id = seqid.AsFastaString();
01327         }
01328         catch(CSeqIdException &) {
01329         }
01330         
01331         if (id.size() &&
01332             ((err = x_StringSearch(id,
01333                                    keys_out,
01334                                    data_out,
01335                                    indices_out,
01336                                    locked)) < 0)) {
01337             return;
01338         }
01339     }
01340     
01341     if (err != eNotFound) {
01342         found = true;
01343     }
01344     
01345     if (found) {
01346         ITERATE(vector<string>, iter, data_out) {
01347             oids.push_back(atoi((*iter).c_str()));
01348         }
01349     }
01350 }
01351 
01352 bool CSeqDBIsam::x_SparseStringToOids(const string   &,
01353                                       vector<int>    &,
01354                                       bool,
01355                                       CSeqDBLockHold &)
01356 {
01357     cerr << " this should be derived from readdb_acc2fastaEx().." << endl;
01358     _TROUBLE;
01359     return false;
01360 }
01361 
01362 void CSeqDBIsam::IdsToOids(int              vol_start,
01363                            int              vol_end,
01364                            CSeqDBGiList   & ids,
01365                            CSeqDBLockHold & locked)
01366 {
01367     // The vol_start parameter is needed because translations in the
01368     // GI list should refer to global OIDs, not per-volume OIDs.
01369     
01370     switch (m_IdentType) {
01371     case eGiId:
01372         x_TranslateGiList<int>(vol_start, ids, locked);
01373         break;
01374 
01375     case eTiId:
01376         x_TranslateGiList<Int8>(vol_start, ids, locked);
01377         break;
01378 
01379     case eStringId:
01380         x_TranslateGiList<string>(vol_start, ids, locked);
01381         break;
01382 
01383     default: 
01384         NCBI_THROW(CSeqDBException,
01385                        eArgErr,
01386                        "Error: Wrong type of idlist specified.");
01387     }
01388 }
01389 
01390 void CSeqDBIsam::IdsToOids(int                  vol_start,
01391                            int                  vol_end,
01392                            CSeqDBNegativeList & ids,
01393                            CSeqDBLockHold     & locked)
01394 {
01395     // The vol_start parameter is needed because translations in the
01396     // GI list should refer to global OIDs, not per-volume OIDs.
01397     
01398     _ASSERT(m_IdentType == eGiId || m_IdentType == eTiId);
01399     
01400     m_Atlas.Lock(locked);
01401     ids.InsureOrder();
01402     
01403     if ((m_IdentType == eGiId) && ids.GetNumGis()) {
01404         x_SearchNegativeMulti(vol_start,
01405                               vol_end,
01406                               ids,
01407                               false,
01408                               locked);
01409     }
01410     
01411     if ((m_IdentType == eTiId) && ids.GetNumTis()) {
01412         x_SearchNegativeMulti(vol_start,
01413                               vol_end,
01414                               ids,
01415                               true,
01416                               locked);
01417     }
01418 }
01419 
01420 void CSeqDBIsam::x_FindIndexBounds(CSeqDBLockHold & locked)
01421 {
01422     Int4 Start (0);
01423     Int4 Stop  (m_NumSamples - 1);
01424     
01425     m_Atlas.Lock(locked);
01426     
01427     if (m_Type == eNumeric) {
01428         //
01429         // Get first key from data file
01430         
01431         int num_elements(0);
01432         int start(0);
01433         const void * data_page(0);
01434         
01435         x_MapDataPage(Start,
01436                       start,
01437                       num_elements,
01438                       & data_page,
01439                       locked);
01440         
01441         _ASSERT(num_elements);
01442         
01443         int elem_index = 0;
01444         
01445         Int8 data_gi(0);
01446         int data_oid(-1);
01447         
01448         x_GetDataElement(data_page,
01449                          elem_index,
01450                          data_gi,
01451                          data_oid);
01452         
01453         m_FirstKey.SetNumeric(data_gi);
01454         
01455         
01456         //
01457         // Get last key from data file
01458         
01459         x_MapDataPage(Stop,
01460                       start,
01461                       num_elements,
01462                       & data_page,
01463                       locked);
01464         
01465         _ASSERT(num_elements);
01466         
01467         elem_index = num_elements - 1;
01468         
01469         x_GetDataElement(data_page,
01470                          elem_index,
01471                          data_gi,
01472                          data_oid);
01473         
01474         m_LastKey.SetNumeric(data_gi);
01475     } else {
01476         //
01477         // Load the appropriate page of terms into memory.
01478         
01479         const char * beginp(0);
01480         const char * endp(0);
01481         
01482         //
01483         // Load the first page
01484         
01485         x_LoadPage(Start, Start + 1, & beginp, & endp, locked);
01486         
01487         // Get first term
01488         
01489         vector<string> keys_out;
01490         vector<string> data_out; // not used
01491         
01492         x_ExtractData(beginp,
01493                       endp,
01494                       keys_out,
01495                       data_out);
01496         
01497         x_Lower(keys_out.front());
01498         m_FirstKey.SetString(keys_out.front());
01499         
01500         
01501         //
01502         // Load the last page
01503         
01504         x_LoadPage(Stop, Stop + 1, & beginp, & endp, locked);
01505         
01506         // Advance to last item
01507         
01508         const char * lastp(0);
01509         const char * indexp(beginp);
01510         
01511         while (indexp < endp) {
01512             // Remember our new "last term" value.
01513             
01514             lastp = indexp;
01515             
01516             // Skip remainder of term, and any nulls after it.
01517             
01518             while((indexp < endp) && s_SeqDBIsam_NullifyEOLs(*indexp)) {
01519                 indexp++;
01520             }
01521             while((indexp < endp) && (! s_SeqDBIsam_NullifyEOLs(*indexp))) {
01522                 indexp++;
01523             }
01524         }
01525         
01526         // Get the last key
01527         
01528         _ASSERT(lastp);
01529         
01530         keys_out.clear();
01531         data_out.clear();
01532         
01533         x_ExtractData(lastp,
01534                       endp,
01535                       keys_out,
01536                       data_out);
01537         
01538         x_Lower(keys_out.front());
01539         m_LastKey.SetString(keys_out.front());
01540     }
01541 }
01542 
01543 bool CSeqDBIsam::x_OutOfBounds(Int8 key, CSeqDBLockHold & locked)
01544 {
01545     if (! m_FirstKey.IsSet()) {
01546         x_FindIndexBounds(locked);
01547     }
01548     
01549     if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
01550         return false;
01551     }
01552     
01553     _ASSERT(m_Type == eNumeric);
01554     
01555     if (m_FirstKey.OutsideFirstBound(key)) {
01556         return true;
01557     }
01558     
01559     if (m_LastKey.OutsideLastBound(key)) {
01560         return true;
01561     }
01562     
01563     return false;
01564 }
01565 
01566 bool CSeqDBIsam::x_OutOfBounds(string key, CSeqDBLockHold & locked)
01567 {
01568     if (! m_FirstKey.IsSet()) {
01569         x_FindIndexBounds(locked);
01570     }
01571     
01572     if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
01573         return false;
01574     }
01575     
01576     _ASSERT(m_Type == eString);
01577     
01578     x_Lower(key);
01579     
01580     if (m_FirstKey.OutsideFirstBound(key)) {
01581         return true;
01582     }
01583     
01584     if (m_LastKey.OutsideLastBound(key)) {
01585         return true;
01586     }
01587     
01588     return false;
01589 }
01590 
01591 void CSeqDBIsam::GetIdBounds(Int8           & low_id,
01592                              Int8           & high_id,
01593                              int            & count,
01594                              CSeqDBLockHold & locked)
01595 {
01596     m_Atlas.Lock(locked);
01597     
01598     if(m_Initialized == false) {
01599         EErrorCode error = x_InitSearch(locked);
01600         
01601         if(error != eNoError) {
01602             count = 0;
01603             return;
01604         }
01605     }
01606     
01607     if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
01608         x_FindIndexBounds(locked);
01609     }
01610     
01611     low_id = m_FirstKey.GetNumeric();
01612     high_id = m_LastKey.GetNumeric();
01613     count = m_NumTerms;
01614 }
01615 
01616 void CSeqDBIsam::GetIdBounds(string         & low_id,
01617                              string         & high_id,
01618                              int            & count,
01619                              CSeqDBLockHold & locked)
01620 {
01621     m_Atlas.Lock(locked);
01622     
01623     if(m_Initialized == false) {
01624         EErrorCode error = x_InitSearch(locked);
01625         
01626         if(error != eNoError) {
01627             count = 0;
01628             return;
01629         }
01630     }
01631     
01632     if (! (m_FirstKey.IsSet() && m_LastKey.IsSet())) {
01633         x_FindIndexBounds(locked);
01634     }
01635     
01636     low_id = m_FirstKey.GetString();
01637     high_id = m_LastKey.GetString();
01638     count = m_NumTerms;
01639 }
01640 
01641 void CSeqDBIsam::HashToOids(unsigned         hash,
01642                             vector<TOid>   & oids,
01643                             CSeqDBLockHold & locked)
01644 {
01645     _ASSERT(m_IdentType == eHashId);
01646     
01647     m_Atlas.Lock(locked);
01648     
01649     if(m_Initialized == false) {
01650         if (eNoError != x_InitSearch(locked)) {
01651             return;
01652         }
01653     }
01654     
01655     bool found = false;
01656     
01657     string key(NStr::UIntToString(hash));
01658     
01659     EErrorCode err = eNoError;
01660     
01661     vector<string> keys_out;
01662     vector<string> data_out;
01663     vector<TIndx>  indices_out;
01664     
01665     if ((err = x_StringSearch(key,
01666                               keys_out,
01667                               data_out,
01668                               indices_out,
01669                               locked)) < 0) {
01670         return;
01671     }
01672     
01673     if (err != eNotFound) {
01674         found = true;
01675     }
01676     
01677     if (found) {
01678         ITERATE(vector<string>, iter, data_out) {
01679             oids.push_back(atoi(iter->c_str()));
01680         }
01681     }
01682 }
01683 
01684 END_NCBI_SCOPE
01685 
Modified on Wed Mar 04 13:50:40 2015 by modify_doxy.py rev. 426318