#ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BLASTDB_ADAPTER__HPP #define OBJTOOLS_DATA_LOADERS_BLASTDB___BLASTDB_ADAPTER__HPP /* $Id: blastdb_adapter.hpp 90547 2020-06-26 12:58:17Z grichenk $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Christiam Camacho * * =========================================================================== */ /** @file blastdb_adapter.hpp * Interface definition of IBlastDbAdapter. */ #include <corelib/ncbistd.hpp> #include <objtools/blast/seqdb_reader/seqdb.hpp> #include <objects/seq/seq_id_handle.hpp> BEGIN_NCBI_SCOPE BEGIN_SCOPE(objects) /// When fixed size slices are not used, each subsequent slice grows its size /// by this factor #define kSliceGrowthFactor 2 /// The sequence data will sliced into pieces of this size by default enum { /// If sequence is shorter than this size, it will not be split and it will /// be loaded full as soon as its data is requested kFastSequenceLoadSize = 1024, /// If sequence is shorter than this size but greater than /// kFastSequenceLoadSize, it will be "split" into once /// piece and its sequence data will be loaded when the chunks are /// requested, otherwise, if it's larger than this, the sequence data will /// be split into multiple chunks and retrieved on demand. kSequenceSliceSize = 65536, /// Same as above, but used for fetching sequences from remote BLAST /// databases kRmtSequenceSliceSize = kSequenceSliceSize * 2 }; /** Interface that provides a common interface to retrieve sequence data from * local vs. remote BLAST databases. */ class IBlastDbAdapter : public CObject { public: /// Virtual destructor virtual ~IBlastDbAdapter() {} /// Get the molecule type of this object (protein or nucleotide) /// @return The sequence type. virtual CSeqDB::ESeqType GetSequenceType() = 0; /// Get the length of the sequence. /// @param oid An ID for this sequence in this db. /// @return Sequence length (in bases). virtual int GetSeqLength(int oid) = 0; /// Convenience typedef for a list of CSeq_id-s typedef list< CRef<CSeq_id> > TSeqIdList; /// Get the list of Seq-ids for the given OID. virtual list< CRef<CSeq_id> > GetSeqIDs(int oid) = 0; /// Get a CBioseq for the requested oid, but without sequence data. /// /// If target is specified, that defline will be promoted to the /// top of the CBioseq object, if possible /// @note The current implementation of the remote BLAST database interface /// does not implement this promotion; the blast4 service will promote /// whichever Seq-id was used to fetch the OID, which in practice /// should be the same one. /// /// @param oid An ID for this sequence in this db. /// @param target_gi If non-zero, the target GI to filter the header /// information. /// @param target_id if non-NULL, the target ID is used to filter /// the header information. /// @return object corresponding to the sequence, but without /// sequence data. virtual CRef<CBioseq> GetBioseqNoData(int oid, TGi target_gi = ZERO_GI, const CSeq_id * target_id = NULL) = 0; /// Get all or part of the sequence data as a Seq-data object. /// @param oid Identifies which sequence to get. /// @param begin Starting offset of the section to get. /// @param end Ending offset of the section to get. /// @return The sequence data. /// @note if the begin and end arguments are zero, the whole sequence will /// be returned virtual CRef<CSeq_data> GetSequence(int oid, int begin = 0, int end = 0) = 0; /// Find a Seq-id in the database and get an OID if found. /// /// If the Seq-id is found, this method returns true, and the oid argument /// will be populated accordingly. This oid should be used in the other /// methods provided by this interface. /// /// @param id The Seq-id to find. /// @param oid An ID for this sequence (if it was found). /// @return True if the sequence was found in the database. virtual bool SeqidToOid(const CSeq_id & id, int & oid) = 0; /// Retrieve the taxonomy ID for the requested sequence identifier /// @param idh The Seq-id for which to get the taxonomy ID /// @return taxonomy ID if found, otherwise kInvalidSeqPos virtual TTaxId GetTaxId(const CSeq_id_Handle& /*idh*/) { return INVALID_TAX_ID; } }; END_SCOPE(objects) END_NCBI_SCOPE #endif /* OBJTOOLS_DATA_LOADERS_BLASTDB___BLASTDB_ADAPTER__HPP */
0001 0002 0003 0004 0005 0006 0007 0008 0009 0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 0080 0081 0082 0083 0084 0085 0086 0087 0088 0089 0090 0091 0092 0093 0094 0095 0096 0097 0098 0099 0100 0101 0102 0103 0104 0105 0106 0107 0108 0109 0110 0111 0112 0113 0114 0115 0116 0117 0118 0119 0120 0121 0122 0123 0124 0125 0126 0127 0128 0129 0130 0131 0132 0133 0134 0135 0136 0137 0138 0139