#ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BLASTDB_ADAPTER__HPP
#define OBJTOOLS_DATA_LOADERS_BLASTDB___BLASTDB_ADAPTER__HPP

/*  $Id: blastdb_adapter.hpp 90547 2020-06-26 12:58:17Z grichenk $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 *  ===========================================================================
 *
 *  Author: Christiam Camacho
 *
 * ===========================================================================
 */

/** @file blastdb_adapter.hpp
  * Interface definition of IBlastDbAdapter.
  */

#include <corelib/ncbistd.hpp>
#include <objtools/blast/seqdb_reader/seqdb.hpp>
#include <objects/seq/seq_id_handle.hpp>

BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)

/// When fixed size slices are not used, each subsequent slice grows its size
/// by this factor
#define kSliceGrowthFactor 2

/// The sequence data will sliced into pieces of this size by default
enum {
    /// If sequence is shorter than this size, it will not be split and it will
    /// be loaded full as soon as its data is requested
    kFastSequenceLoadSize = 1024,
    /// If sequence is shorter than this size but greater than
    /// kFastSequenceLoadSize, it will be "split" into once
    /// piece and its sequence data will be loaded when the chunks are
    /// requested, otherwise, if it's larger than this, the sequence data will
    /// be split into multiple chunks and retrieved on demand.
    kSequenceSliceSize    = 65536,
    /// Same as above, but used for fetching sequences from remote BLAST
    /// databases
    kRmtSequenceSliceSize = kSequenceSliceSize * 2
};

/** Interface that provides a common interface to retrieve sequence data from
 * local vs. remote BLAST databases.
 */
class IBlastDbAdapter : public CObject
{
public:
    /// Virtual destructor
    virtual ~IBlastDbAdapter() {}

    /// Get the molecule type of this object (protein or nucleotide)
    /// @return The sequence type.
    virtual CSeqDB::ESeqType GetSequenceType() = 0;

    /// Get the length of the sequence.
    /// @param oid An ID for this sequence in this db.
    /// @return Sequence length (in bases).
    virtual int GetSeqLength(int oid) = 0;

    /// Convenience typedef for a list of CSeq_id-s
    typedef list< CRef<CSeq_id> > TSeqIdList;

    /// Get the list of Seq-ids for the given OID.
    virtual list< CRef<CSeq_id> > GetSeqIDs(int oid) = 0;
    
    /// Get a CBioseq for the requested oid, but without sequence data.
    ///
    /// If target is specified, that defline will be promoted to the
    /// top of the CBioseq object, if possible
    /// @note The current implementation of the remote BLAST database interface
    /// does not implement this promotion; the blast4 service will promote
    /// whichever Seq-id was used to fetch the OID, which in practice
    /// should be the same one.
    ///
    /// @param oid An ID for this sequence in this db.
    /// @param target_gi If non-zero, the target GI to filter the header
    /// information.
    /// @param target_id if non-NULL, the target ID is used to filter
    /// the header information.
    /// @return object corresponding to the sequence, but without
    ///   sequence data.
    virtual CRef<CBioseq> GetBioseqNoData(int oid, TGi target_gi = ZERO_GI, const CSeq_id * target_id = NULL) = 0;
    
    /// Get all or part of the sequence data as a Seq-data object.
    /// @param oid    Identifies which sequence to get.
    /// @param begin  Starting offset of the section to get.
    /// @param end    Ending offset of the section to get.
    /// @return       The sequence data.
    /// @note if the begin and end arguments are zero, the whole sequence will
    /// be returned
    virtual CRef<CSeq_data> 
    GetSequence(int oid, int begin = 0, int end = 0) = 0;
    
    /// Find a Seq-id in the database and get an OID if found.
    ///
    /// If the Seq-id is found, this method returns true, and the oid argument
    /// will be populated accordingly. This oid should be used in the other
    /// methods provided by this interface.
    ///
    /// @param id The Seq-id to find.
    /// @param oid An ID for this sequence (if it was found).
    /// @return True if the sequence was found in the database.
    virtual bool SeqidToOid(const CSeq_id & id, int & oid) = 0;
    
    /// Retrieve the taxonomy ID for the requested sequence identifier
    /// @param idh The Seq-id for which to get the taxonomy ID
    /// @return taxonomy ID if found, otherwise kInvalidSeqPos
    virtual TTaxId GetTaxId(const CSeq_id_Handle& /*idh*/) {
        return INVALID_TAX_ID;
    }
};

END_SCOPE(objects)
END_NCBI_SCOPE

#endif /* OBJTOOLS_DATA_LOADERS_BLASTDB___BLASTDB_ADAPTER__HPP */
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139