Definition in file seqdbcommon.cpp.
#include <ncbi_pch.hpp>
#include <corelib/metareg.hpp>
#include <corelib/ncbienv.hpp>
#include <corelib/ncbifile.hpp>
#include <objtools/blast/seqdb_reader/seqdbcommon.hpp>
#include <util/sequtil/sequtil.hpp>
#include <util/sequtil/sequtil_convert.hpp>
#include <objects/seq/seq__.hpp>
#include <objects/general/general__.hpp>
#include "seqdbgeneral.hpp"
#include "seqdbatlas.hpp"
#include <algorithm>
Include dependency graph for seqdbcommon.cpp:

Go to the source code of this file.
Classes | |
| class | CSeqDB_FileExistence |
| File existence test interface. More... | |
| class | CSeqDB_AtlasAccessor |
| Check file existence using CSeqDBAtlas. More... | |
| class | CSeqDB_SimpleAccessor |
| Check file existence using CFile. More... | |
| class | CSeqDB_SortOidLessThan |
| Compare SGiOid structs by OID. More... | |
| class | CSeqDB_SortGiLessThan |
| Compare SGiOid structs by GI. More... | |
| class | CSeqDB_SortTiLessThan |
| Compare SGiOid structs by GI. More... | |
| class | CSeqDB_SortSeqIdLessThan |
| Compare SSeqIdOid structs by SeqId. More... | |
Functions | |
| CSeqDB_Substring | SeqDB_RemoveDirName (CSeqDB_Substring s) |
| Returns a filename minus greedy path. | |
| CSeqDB_Substring | SeqDB_RemoveFileName (CSeqDB_Substring s) |
| Returns a path minus filename. | |
| CSeqDB_Substring | SeqDB_RemoveExtn (CSeqDB_Substring s) |
| Returns a filename minus greedy path. | |
| bool | SeqDB_SplitString (CSeqDB_Substring &buffer, CSeqDB_Substring &front, char delim) |
| Parse a prefix from a substring. | |
| void | SeqDB_CombinePath (const CSeqDB_Substring &one, const CSeqDB_Substring &two, const CSeqDB_Substring *extn, string &outp) |
| Combine a filesystem path and file name. | |
| static bool | s_SeqDB_DBExists (const string &dbname, char dbtype, CSeqDB_FileExistence &access) |
| Test whether an index or alias file exists. | |
| static string | s_GetPathSplitter () |
| Returns the character used to seperate path components in the current operating system or platform. | |
| void | SeqDB_ConvertOSPath (string &dbs) |
| Change path delimiters to platform preferred kind in-place. | |
| string | SeqDB_MakeOSPath (const string &dbs) |
| Return path with delimiters changed to platform preferred kind. | |
| static string | s_SeqDB_TryPaths (const string &blast_paths, const string &dbname, char dbtype, bool exact, CSeqDB_FileExistence &access) |
| Search for a file in a provided set of paths. | |
| static string | s_SeqDB_FindBlastDBPath (const string &dbname, char dbtype, string *sp, bool exact, CSeqDB_FileExistence &access, const string path="") |
| string | SeqDB_FindBlastDBPath (const string &dbname, char dbtype, string *sp, bool exact, CSeqDBAtlas &atlas, CSeqDBLockHold &locked) |
| Finds a file in the search path. | |
| string | SeqDB_ResolveDbPath (const string &filename) |
| Resolve a file path using SeqDB's path algorithms. | |
| void | SeqDB_JoinDelim (string &a, const string &b, const string &delim) |
| Join two strings with a delimiter. | |
| template<class TCompare, class TVector> | |
| void | s_InsureOrder (TVector &v) |
| void | SeqDB_ReadBinaryGiList (const string &fname, vector< int > &gis) |
| Read a binary-format GI list from a file. | |
| static bool | s_SeqDB_IsBinaryGiList (const char *fbeginp, const char *fendp, bool &has_long_ids) |
| This function determines whether a file is a valid binary gi file. | |
| void | SeqDB_ReadMemoryGiList (const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order) |
| Read a text or binary GI list from an area of memory. | |
| void | SeqDB_ReadMemoryTiList (const char *fbeginp, const char *fendp, vector< CSeqDBGiList::STiOid > &tis, bool *in_order) |
| Read a text or binary TI list from an area of memory. | |
| bool | SeqDB_IsBinaryGiList (const string &fname) |
| Returns true if the file name passed contains a binary gi list. | |
| void | SeqDB_ReadGiList (const string &fname, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order) |
| Read a text or binary GI list from a file. | |
| void | SeqDB_ReadTiList (const string &fname, vector< CSeqDBGiList::STiOid > &tis, bool *in_order) |
| Read a text or binary TI list from a file. | |
| void | SeqDB_ReadGiList (const string &fname, vector< int > &gis, bool *in_order) |
| Read a text or binary GI list from a file. | |
| void | SeqDB_CombineAndQuote (const vector< string > &dbs, string &dbname) |
| Combine and quote list of database names. | |
| void | SeqDB_SplitQuoted (const string &dbname, vector< CTempString > &dbs) |
| Split a (possibly) quoted list of database names into pieces. | |
| void | SeqDB_SplitQuoted (const string &dbname, vector< CSeqDB_Substring > &dbs) |
| Combine and quote list of database names. | |
| void | SeqDB_FileIntegrityAssert (const string &file, int line, const string &text) |
| Report file corruption by throwing an eFile CSeqDBException. | |
Variables | |
| static char const | rcsid [] = "$Id: seqdbcommon.cpp 168560 2009-08-18 12:28:42Z maning $" |
|
|
Returns the character used to seperate path components in the current operating system or platform.
Definition at line 251 of file seqdbcommon.cpp. Referenced by s_SeqDB_TryPaths(). |
|
||||||||||
|
Definition at line 547 of file seqdbcommon.cpp. |
|
||||||||||||||||
|
Test whether an index or alias file exists. The provide filename is combined with both of the extensions appropriate to the database sequence type, and the resulting strings are checked for existence in the file system. The 'access' object defines how to check file existence.
Definition at line 223 of file seqdbcommon.cpp. References CSeqDB_FileExistence::DoesFileExist(). Referenced by s_SeqDB_TryPaths(). |
|
||||||||||||||||||||||||||||
|
Definition at line 360 of file seqdbcommon.cpp. References CSeqDBAtlas::GenerateSearchPath(), and s_SeqDB_TryPaths(). Referenced by SeqDB_FindBlastDBPath(), and SeqDB_ResolveDbPath(). |
|
||||||||||||||||
|
This function determines whether a file is a valid binary gi file.
Definition at line 779 of file seqdbcommon.cpp. References NCBI_THROW. Referenced by SeqDB_IsBinaryGiList(), SeqDB_ReadMemoryGiList(), and SeqDB_ReadMemoryTiList(). |
|
||||||||||||||||||||||||
|
Search for a file in a provided set of paths. This function takes a search path as a ":" delimited set of path names, and searches in those paths for the given database component. The component name may include path components. If the exact flag is set, the path is assumed to contain any required extension; otherwise extensions for index and alias files will be tried. Each element of the search path is tried in sequential order for both index or alias files (if exact is not set), before moving to the next element of the search path. The path returned from this function will not contain a file extension unless the provided filename did (in which case, exact is normally set).
Definition at line 315 of file seqdbcommon.cpp. References CSeqDB_FileExistence::DoesFileExist(), NStr::eMergeDelims, ITERATE, s_GetPathSplitter(), s_SeqDB_DBExists(), SeqDB_CombinePath(), SeqDB_MakeOSPath(), and NStr::Tokenize(). Referenced by s_SeqDB_FindBlastDBPath(). |
|
||||||||||||
|
Combine and quote list of database names. SeqDB permits multiple databases to be opened by a single CSeqDB instance, by passing the database names as a space-delimited list to the CSeqDB constructor. To support paths and filenames with embedded spaces, surround any space-containing names with double quotes ('"'). Filenames not containing spaces may be quoted safely with no effect. (This solution prevents the use of names containing embedded double quotes.) This method combines a list of database names into a string encoded in this way.
Definition at line 1313 of file seqdbcommon.cpp. References size. |
|
||||||||||||||||||||
|
Combine a filesystem path and file name. Combine a provided filesystem path and a file name. This function tries to avoid duplicated delimiters. If either string is empty, the other is returned. Conceptually, the first path might be the current working directory and the second path is a filename. So, if the second path starts with "/", the first path is ignored. Also, care is taken to avoid duplicated delimiters. If the first path ends with the delimiter character, another delimiter will not be added between the strings. The delimiter used will vary from operating system to operating system, and is adjusted accordingly. If a file extension is specified, it will also be appended.
Definition at line 133 of file seqdbcommon.cpp. References CDirEntry::GetPathSeparator(), one, CSeqDB_Substring::Size(), and two. Referenced by CSeqDB_BasePath::CSeqDB_BasePath(), CSeqDB_Path::CSeqDB_Path(), CSeqDB_Path::ReplaceFilename(), and s_SeqDB_TryPaths(). |
|
|
Change path delimiters to platform preferred kind in-place. The path is modified in place. The 'Convert' interface is more efficient for cases where the new path would be assigned to the same string object. Delimiter conversion should be called by SeqDB at least once on any path received from the user, or via filesystem sources such as alias files.
Definition at line 265 of file seqdbcommon.cpp. References CDirEntry::GetPathSeparator(). Referenced by CSeqDB_BasePath::FixDelimiters(), CSeqDB_FileName::FixDelimiters(), CSeqDB_BaseName::FixDelimiters(), and SeqDB_MakeOSPath(). |
|
||||||||||||||||
|
Report file corruption by throwing an eFile CSeqDBException. This function is only called in the case of validation failure, and is used in code paths where the validation failure may be related to file corruption or filesystem problems. File data is considered a user input, so checks for corrupt file are treated as input validation. This means that (1) checks that may be caused by file corruption scenarios are not disabled in debug mode, and (2) an exception (rather than an abort) is used. Note that this function does not check the assert, so it should only be called in case of failure.
Definition at line 1789 of file seqdbcommon.cpp. References CSeqDBException::eFileErr, NStr::IntToString(), and SeqDB_ThrowException(). |
|
||||||||||||||||||||||||||||
|
Finds a file in the search path. This function resolves the full name of a file. It searches for a file of the provided base name and returns the provided name with the full path attached. If the exact_name flag is set, the file is assumed to have any extension it may need, and none is added for searching or stripped from the return value. If exact_name is not set, the file is assumed to end in ".pin", ".nin", ".pal", or ".nal", and if such a file is found, that extension is stripped from the returned string. Furthermore, in the exact_name == false case, only file extensions relevant to the dbtype are considered. Thus, if dbtype is set to 'p' for protein, only ".pin" and ".pal" are checked for; if it is set to nucleotide, only ".nin" and ".nal" are considered. The places where the file may be found are dependant on the search path. The search path consists of the current working directory, the contents of the BLASTDB environment variable, the BLASTDB member of the BLAST group of settings in the NCBI meta-registry. This registry is an interface to settings found in (for example) a ".ncbirc" file found in the user's home directory (but several paths are usually checked). Finally, if the provided file_name starts with the default path delimiter (which is OS dependant, but for example, "/" on Linux), the path will be taken to be absolute, and the search path will not affect the results.
Definition at line 401 of file seqdbcommon.cpp. References CSeqDBAtlas::GetSearchPath(), and s_SeqDB_FindBlastDBPath(). Referenced by CSeqDBTaxInfo::CSeqDBTaxInfo(), and CSeqDBAliasSets::x_FindBlastDBPath(). |
|
|
Returns true if the file name passed contains a binary gi list.
Definition at line 1157 of file seqdbcommon.cpp. References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), s_SeqDB_IsBinaryGiList(), and SeqDB_MakeOSPath(). Referenced by BOOST_AUTO_TEST_CASE(), and CBlastDBAliasApp::CreateAliasFile(). |
|
||||||||||||||||
|
Join two strings with a delimiter. This function returns whichever of two provided strings is non-empty. If both are non-empty, they are joined with a delimiter placed between them. It is intended for use when combining strings, such as a space delimited list of database volumes. It is probably not suitable for joining file system paths with filenames (use something like SeqDB_CombinePaths).
Definition at line 451 of file seqdbcommon.cpp. Referenced by CSeqDB_TitleWalker::AddString(). |
|
|
Return path with delimiters changed to platform preferred kind. The path is modified and returned. The 'Make' interface is more convenient for cases where the input path and output path are different objects. Delimiter conversion should be called by SeqDB at least once on any path received from the user, or via filesystem sources such as alias files.
Definition at line 279 of file seqdbcommon.cpp. References SeqDB_ConvertOSPath(). Referenced by CSeqDB_SimpleAccessor::DoesFileExist(), s_SeqDB_TryPaths(), SeqDB_IsBinaryGiList(), SeqDB_ReadBinaryGiList(), SeqDB_ReadGiList(), and SeqDB_ReadTiList(). |
|
||||||||||||
|
Read a binary-format GI list from a file.
Definition at line 746 of file seqdbcommon.cpp. References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), NCBI_THROW, SeqDB_GetStdOrd(), and SeqDB_MakeOSPath(). |
|
||||||||||||||||
|
Read a text or binary GI list from a file. The GIs in a file are read into the provided vector<int>. If the in_order parameter is not null, the function will test the GIs for orderedness. It will set the bool to which in_order points to true if so, false if not.
Definition at line 1193 of file seqdbcommon.cpp. References ITERATE, and SeqDB_ReadGiList(). |
|
||||||||||||||||
|
Read a text or binary GI list from a file. The GIs in a file are read into the provided SGiOid vector. The GI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the GIs for orderedness. It will set the bool to which in_order points to true if so, false if not.
Definition at line 1169 of file seqdbcommon.cpp. References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), SeqDB_MakeOSPath(), and SeqDB_ReadMemoryGiList(). |
|
||||||||||||||||||||
|
Read a text or binary GI list from an area of memory. The GIs in a memory region are read into the provided SGiOid vector. The GI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the GIs for orderedness. It will set the bool to which in_order points to true if so, false if not.
Definition at line 809 of file seqdbcommon.cpp. References _ASSERT, NCBI_THROW, s_SeqDB_IsBinaryGiList(), and SeqDB_GetStdOrd(). Referenced by CSeqDBNodeFileIdList::CSeqDBNodeFileIdList(), and SeqDB_ReadGiList(). |
|
||||||||||||||||||||
|
Read a text or binary TI list from an area of memory. The TIs in a memory region are read into the provided STiOid vector. The TI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the TIs for orderedness. It will set the bool to which in_order points to true if so, false if not.
Definition at line 969 of file seqdbcommon.cpp. References NCBI_THROW, s_SeqDB_IsBinaryGiList(), and SeqDB_GetStdOrd(). Referenced by CSeqDBNodeFileIdList::CSeqDBNodeFileIdList(), and SeqDB_ReadTiList(). |
|
||||||||||||||||
|
Read a text or binary TI list from a file. The TIs in a file are read into the provided STiOid vector. The TI half of each element of the vector is assigned, but the OID half will be left as -1. If the in_order parameter is not null, the function will test the TIs for orderedness. It will set the bool to which in_order points to true if so, false if not.
Definition at line 1181 of file seqdbcommon.cpp. References CMemoryFile::GetPtr(), CMemoryFile::GetSize(), SeqDB_MakeOSPath(), and SeqDB_ReadMemoryTiList(). |
|
|
Returns a filename minus greedy path. Substring version. This returns the part of a file name after the last path delimiter, or the whole path if no delimiter was found.
Definition at line 52 of file seqdbcommon.cpp. References CSeqDB_Substring::EraseFront(), CSeqDB_Substring::FindLastOf(), and CDirEntry::GetPathSeparator(). Referenced by CSeqDB_Path::FindBaseName(), CSeqDB_BasePath::FindBaseName(), and CSeqDB_Path::FindFileName(). |
|
|
Returns a filename minus greedy path. This returns the part of a file name after the last path delimiter, or the whole path if no delimiter was found.
Definition at line 78 of file seqdbcommon.cpp. References CSeqDB_Substring::GetEnd(), CSeqDB_Substring::Resize(), and CSeqDB_Substring::Size(). Referenced by CSeqDB_Path::FindBaseName(), and CSeqDB_Path::FindBasePath(). |
|
|
Returns a path minus filename. Substring version of the above. This returns the part of a file path before the last path delimiter, or the whole path if no delimiter was found.
Definition at line 64 of file seqdbcommon.cpp. References CSeqDB_Substring::Clear(), CSeqDB_Substring::FindLastOf(), CDirEntry::GetPathSeparator(), and CSeqDB_Substring::Resize(). Referenced by CSeqDB_Path::FindDirName(), and CSeqDB_BasePath::FindDirName(). |
|
|
Resolve a file path using SeqDB's path algorithms. This finds a file using the same algorithm used by SeqDB to find blast database filenames. The filename must include the extension if any. Paths which start with '/', '\', or a drive letter (depending on operating system) will be treated as absolute paths. If the file is not found an empty string will be returned.
Definition at line 439 of file seqdbcommon.cpp. References s_SeqDB_FindBlastDBPath(). Referenced by BOOST_AUTO_TEST_CASE(), and s_ReadGiList(). |
|
||||||||||||
|
Combine and quote list of database names.
Definition at line 1357 of file seqdbcommon.cpp. |
|
||||||||||||
|
Split a (possibly) quoted list of database names into pieces. SeqDB permits multiple databases to be opened by a single CSeqDB instance, by passing the database names as a space-delimited list to the CSeqDB constructor. To support paths and filenames with embedded spaces, surround any space-containing names with double quotes ('"'). Filenames not containing spaces may be quoted safely with no effect. (This solution prevents the use of names containing embedded double quotes.) This method splits a string encoded in this way into individual database names. Note that the resulting vector's objects are CTempString "slice" objects, and are only valid while the original (encoded) string is unchanged.
Definition at line 1340 of file seqdbcommon.cpp. References ITERATE, and SeqDB_SplitQuoted(). |
|
||||||||||||||||
|
Parse a prefix from a substring. The `buffer' argument is searched for a character. If found, the region before the delimiter is returned in `front' and the region after the delimiter is returned in `buffer', and true is returned. If not found, neither argument changes and false is returned.
Definition at line 115 of file seqdbcommon.cpp. References buffer, and CSeqDB_Substring::Resize(). |
|
|
Definition at line 34 of file seqdbcommon.cpp. |
1.4.6
Modified on Wed Dec 09 08:18:39 2009 by modify_doxy.py rev. 173732