NCBI C Toolkit Cross Reference

C/algo/blast/api/twoseq_api.h


  1 /* $Id: twoseq_api.h,v 1.14 2009/05/27 19:29:17 camacho Exp $
  2 ***************************************************************************
  3 *                                                                         *
  4 *                             COPYRIGHT NOTICE                            *
  5 *                                                                         *
  6 * This software/database is categorized as "United States Government      *
  7 * Work" under the terms of the United States Copyright Act.  It was       *
  8 * produced as part of the author's official duties as a Government        *
  9 * employee and thus can not be copyrighted.  This software/database is    *
 10 * freely available to the public for use without a copyright notice.      *
 11 * Restrictions can not be placed on its present or future use.            *
 12 *                                                                         *
 13 * Although all reasonable efforts have been taken to ensure the accuracy  *
 14 * and reliability of the software and data, the National Library of       *
 15 * Medicine (NLM) and the U.S. Government do not and can not warrant the   *
 16 * performance or results that may be obtained by using this software,     *
 17 * data, or derivative works thereof.  The NLM and the U.S. Government     *
 18 * disclaim any and all warranties, expressed or implied, as to the        *
 19 * performance, merchantability or fitness for any particular purpose or   *
 20 * use.                                                                    *
 21 *                                                                         *
 22 * In any work or product derived from this material, proper attribution   *
 23 * of the author(s) as the source of the software or data would be         *
 24 * appreciated.                                                            *
 25 *                                                                         *
 26 * Author: Jason Papadopoulos                                              *
 27 *                                                                         *
 28 ***************************************************************************/
 29 
 30 /** @file twoseq_api.h
 31  * Functions for C toolkit applications to compare two sequences using the
 32  * rewritten BLAST engine.
 33  */ 
 34 
 35 #ifndef _TWOSEQ_API_H_
 36 #define _TWOSEQ_API_H_
 37 
 38 #include <ncbi.h>
 39 #include <objseq.h>
 40 #include <tofasta.h>
 41 #include <sqnutils.h>
 42 #include <algo/blast/api/blast_returns.h>
 43 #include <algo/blast/api/blast_options_api.h>
 44 #include <algo/blast/api/blast_seqalign.h>
 45 
 46 /** @addtogroup CToolkitAlgoBlast
 47  *
 48  * @{
 49  */
 50 
 51 /** Maximal query length, for which Blastn is used as default. Mega BLAST or
 52  * discontiguous Mega BLAST are set to be default for fast or sensitive 
 53  * searches, if query is longer than this cutoff.
 54  */
 55 #define MEGABLAST_CUTOFF 10000
 56 
 57 /**
 58  * The type of blast search to perform. For nucleotide searches,
 59  * the blastn algorithm is used unless the first input sequence
 60  * exceeds MEGABLAST_CUTOFF bases in size. In that case, megablast
 61  * is used instead. If the blast_hint is eSensitive, discontiguous
 62  * megablast with word size 11 is used (and any user-specified
 63  * word size is ignored).
 64  */
 65 enum blast_type {
 66     eChoose = 100,     /**< Choose type of search by sequences molecule type:
 67                             n-n=blastn, p-p=blastp, n-p=blastx, p-n=tblastn */
 68     eBlastn = 101,     /**< blastn or megablast (determined automatically) */
 69     eBlastp = 102,     /**< blastp search between protein sequences */
 70     eBlastx = 103,     /**< blastx for nucleotide vs protein sequences */
 71     eTblastn = 104,    /**< tblastn for protein vs nucleotide sequences */
 72     eTblastx = 105     /**< tblastx for translated nucleotide sequences */
 73 };
 74 
 75 /**
 76  * Provide a hint on how the search is to be set up. At
 77  * present this only applies to nucleotide searches
 78  */
 79 enum blast_hint {
 80     eBlastHint_Sensitive = 0,     /**< trade off speed for sensitivity */
 81     eBlastHint_Fast = 1,           /**< trade off sensitivity for speed */
 82     eBlastHint_None = 2           /**< no hint provided, do not attempt to guess what is desired. */
 83 };
 84 
 85 typedef enum seed_type {
 86    eDefaultSeedType = 0, /**< BLAST will decide which method to use based on 
 87                             program and other information. */
 88    eOneHit = 1,          /**< Require only one initial hit for extension */
 89    eTwoHits = 2           /**< Require more than one hit within a window 
 90                             for extension */
 91 } seed_type;
 92 
 93 /**
 94   * The main user-visible setup structure for the API. This
 95   * only makes a (small) subset of the complete options available
 96   */
 97 typedef struct {
 98     enum blast_hint hint;       /**< for nucleotide searches, how should
 99                                      the search be set up? 
100                                      Default = eSensitive */
101     enum blast_type program;    /**< the BLAST program to use.
102                                      Default = eChoose */
103     char strand;                /**< For nucleotide searches, the strand
104                                      of the first sequence to check: 
105                                      choices are Seq_strand_{plus|minus|both}
106                                      Default is Seq_strand_both */
107     double cutoff_evalue;       /**< Alignments whose E value is larger than
108                                      this number are discarded. Default 10.0 */
109     char* matrix;               /**< The scoring matrix to use (protein
110                                      searches only). NULL means "BLOSUM62".
111                                      Default is NULL */
112     char* filter_string;        /**< Specifies filtering to apply to the
113                                      first of the two input sequences. 
114                                      NULL or "T" implies DUST/SEG, "F"
115                                      turns off filtering. Default = NULL */
116     Int4 word_size;             /**< The word size to use. 0 chooses the
117                                      default for the specified program
118                                      (i.e. 3 for blastp, 11 for blastn, 
119                                      28 for blastn with large sequences).
120                                      Default = 0 */
121     Boolean gapped_calculation; /**< Perform gapped alignments. Default = TRUE*/
122     Boolean use_megablast;      /**< Use megablast for the search. Default = FALSE. */
123     Int4 nucleotide_match;      /**< For nucleotide searches, the reward
124                                      for matching letters (default 1) */
125     Int4 nucleotide_mismatch;   /**< For nucleotide searches, the penalty
126                                      for mismatching letters (default -3) */
127     Int4 gap_open;              /**< Cost of opening a gap. Default=0, invokes 
128                                      default values: 5 for nucleotide; 
129                                      depends on matrix for protein search.*/
130     Int4 gap_extend;            /**< Cost of extending a gap. Default=0, 
131                                      invokes default values: 2 for nucleotide; 
132                                      depends on matrix for protein search.*/
133     Int4 gap_x_dropoff;         /**< Dropoff value for the gapped extension.
134                                      Default=0, invokes default values. */
135     double db_length;           /**< Database length to use in statistical 
136                                      calculations. 
137                                      Default=0 means "database length" is set
138                                      to the subject sequence length for each
139                                      subject sequence. */
140     Int4 word_threshold;        /**< Threshold for finding neighboring words
141                                      in protein searches. Default=0, which
142                                      invokes default values*/
143     Int4 longest_intron;        /**< Used in uneven sum gap statistics. Only used 
144                                      with tblastn right now.  Default = 0 (turned off) */
145     seed_type init_seed_method; /**< Single-hit or multiple-hit choice of 
146                                      initial seeds for extension. */
147 } BLAST_SummaryOptions;
148 
149 
150 /**
151   * Allocate storage for an API setup structure and set the
152   * default options for it.
153   *
154   * @param options pointer to be updated with newly allocated structure [out]
155   * @return 0 for successful allocation, -1 otherwise
156   */
157 Int2 BLAST_SummaryOptionsInit(BLAST_SummaryOptions **options);
158 
159 /**
160   * Free the storage previously allocated for an API setup structure
161   *
162   * @param options pointer tothe structure to be freed [in]
163   * @return always NULL
164   */
165 BLAST_SummaryOptions* BLAST_SummaryOptionsFree(BLAST_SummaryOptions *options);
166 
167 /**
168   * Perform a BLAST search on the two input sequences and return
169   * the list of alignments the search generates
170   * @param options structure describing how the search will be configured [in]
171   * @param bsp1 the first sequence to be compared. Filtering and selection
172   *             of nucleotide strand apply only to this sequence [in]
173   * @param bsp2 the second sequence to be compared [in]
174   * @param seqalign_out the list of alignments generated by the search.
175   *             If search failed or no alignments were found, set to NULL [out]
176   * @return 0 for a successful search, nonzero if search failed
177   */
178 Int2 BLAST_TwoSequencesSearch(BLAST_SummaryOptions *options,
179                               Bioseq *bsp1, 
180                               Bioseq *bsp2,
181                               SeqAlign **seqalign_out);
182 
183 /** Creates the advanced search options structure from the basic options. 
184  * @param basic_options Basic options for the two sequences search [in]
185  * @param query_seqloc Query Seq-loc, needed to find query length. [in]
186  * @param extra_returns Initialized summary returns structure. [in]
187  * @param search_options Populated advanced options structure [out]
188  * @param program_name Program name [out]
189  */
190 Int2 
191 Blast_SearchOptionsFromSummaryOptions(const BLAST_SummaryOptions *basic_options,
192                                       SeqLoc* query_seqloc,
193                                       Blast_SummaryReturn* extra_returns, 
194                                       SBlastOptions* *search_options,
195                                       char* *program_name);
196 
197 /**
198   * Perform a BLAST search on the two input sequences and return
199   * the list of alignments the search generates
200   * @param options Structure describing how the search will be configured [in]
201   * @param seqloc1 The first list of sequences (queries) to be compared. 
202   *                Filtering is applied only to these sequences [in]
203   * @param seqloc2 The second list of sequences (subjects) to be compared [in]
204   * @param masking_locs locations to be used for masking [in]
205   * @param seqalign_arr Object containing the SeqAligns. [in|out]
206   * @param filter_out Masking locations [out]
207   * @param mask_at_hash set to TRUE if filtering only on lookup table [out]
208   * @param extra_returns Data needed to print the bottom of BLAST report [out]
209   * @return 0 for a successful search, nonzero if search failed
210   */
211 Int2 BLAST_TwoSeqLocSets(const BLAST_SummaryOptions *options,
212                          SeqLoc* seqloc1, SeqLoc* seqloc2,
213                          SeqLoc* masking_locs,
214                          SBlastSeqalignArray* *seqalign_arr,
215                          SeqLoc** filter_out,
216                          Boolean* mask_at_hash,
217                          Blast_SummaryReturn* *extra_returns);
218 
219 /* @} */
220 
221 #endif  /* !_TWOSEQ_API_H_ */
222 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.