|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/algo/blast/api/twoseq_api.h |
source navigation diff markup identifier search freetext search file search |
1 /* $Id: twoseq_api.h,v 1.13 2006/01/13 15:59:27 madden Exp $
2 ***************************************************************************
3 * *
4 * COPYRIGHT NOTICE *
5 * *
6 * This software/database is categorized as "United States Government *
7 * Work" under the terms of the United States Copyright Act. It was *
8 * produced as part of the author's official duties as a Government *
9 * employee and thus can not be copyrighted. This software/database is *
10 * freely available to the public for use without a copyright notice. *
11 * Restrictions can not be placed on its present or future use. *
12 * *
13 * Although all reasonable efforts have been taken to ensure the accuracy *
14 * and reliability of the software and data, the National Library of *
15 * Medicine (NLM) and the U.S. Government do not and can not warrant the *
16 * performance or results that may be obtained by using this software, *
17 * data, or derivative works thereof. The NLM and the U.S. Government *
18 * disclaim any and all warranties, expressed or implied, as to the *
19 * performance, merchantability or fitness for any particular purpose or *
20 * use. *
21 * *
22 * In any work or product derived from this material, proper attribution *
23 * of the author(s) as the source of the software or data would be *
24 * appreciated. *
25 * *
26 * Author: Jason Papadopoulos *
27 * *
28 ***************************************************************************/
29
30 /** @file twoseq_api.h
31 * Functions for C toolkit applications to compare two sequences using the
32 * rewritten BLAST engine.
33 */
34
35 #ifndef _TWOSEQ_API_H_
36 #define _TWOSEQ_API_H_
37
38 #include <ncbi.h>
39 #include <objseq.h>
40 #include <tofasta.h>
41 #include <sqnutils.h>
42 #include <algo/blast/api/blast_returns.h>
43 #include <algo/blast/api/blast_options_api.h>
44 #include <algo/blast/api/blast_seqalign.h>
45
46 /** @addtogroup CToolkitAlgoBlast
47 *
48 * @{
49 */
50
51 /** Maximal query length, for which Blastn is used as default. Mega BLAST or
52 * discontiguous Mega BLAST are set to be default for fast or sensitive
53 * searches, if query is longer than this cutoff.
54 */
55 #define MEGABLAST_CUTOFF 10000
56
57 /**
58 * The type of blast search to perform. For nucleotide searches,
59 * the blastn algorithm is used unless the first input sequence
60 * exceeds MEGABLAST_CUTOFF bases in size. In that case, megablast
61 * is used instead. If the blast_hint is eSensitive, discontiguous
62 * megablast with word size 11 is used (and any user-specified
63 * word size is ignored).
64 */
65 enum blast_type {
66 eChoose = 100, /**< Choose type of search by sequences molecule type:
67 n-n=blastn, p-p=blastp, n-p=blastx, p-n=tblastn */
68 eBlastn = 101, /**< blastn or megablast (determined automatically) */
69 eBlastp = 102, /**< blastp search between protein sequences */
70 eBlastx = 103, /**< blastx for nucleotide vs protein sequences */
71 eTblastn = 104, /**< tblastn for protein vs nucleotide sequences */
72 eTblastx = 105 /**< tblastx for translated nucleotide sequences */
73 };
74
75 /**
76 * Provide a hint on how the search is to be set up. At
77 * present this only applies to nucleotide searches
78 */
79 enum blast_hint {
80 eSensitive = 0, /**< trade off speed for sensitivity */
81 eFast = 1, /**< trade off sensitivity for speed */
82 eNone = 2 /**< no hint provided, do not attempt to guess what is desired. */
83 };
84
85 typedef enum seed_type {
86 eDefaultSeedType = 0, /**< BLAST will decide which method to use based on
87 program and other information. */
88 eOneHit = 1, /**< Require only one initial hit for extension */
89 eTwoHits = 2 /**< Require more than one hit within a window
90 for extension */
91 } seed_type;
92
93 /**
94 * The main user-visible setup structure for the API. This
95 * only makes a (small) subset of the complete options available
96 */
97 typedef struct {
98 enum blast_hint hint; /**< for nucleotide searches, how should
99 the search be set up?
100 Default = eSensitive */
101 enum blast_type program; /**< the BLAST program to use.
102 Default = eChoose */
103 char strand; /**< For nucleotide searches, the strand
104 of the first sequence to check:
105 choices are Seq_strand_{plus|minus|both}
106 Default is Seq_strand_both */
107 double cutoff_evalue; /**< Alignments whose E value is larger than
108 this number are discarded. Default 10.0 */
109 char* matrix; /**< The scoring matrix to use (protein
110 searches only). NULL means "BLOSUM62".
111 Default is NULL */
112 char* filter_string; /**< Specifies filtering to apply to the
113 first of the two input sequences.
114 NULL or "T" implies DUST/SEG, "F"
115 turns off filtering. Default = NULL */
116 Int4 word_size; /**< The word size to use. 0 chooses the
117 default for the specified program
118 (i.e. 3 for blastp, 11 for blastn,
119 28 for blastn with large sequences).
120 Default = 0 */
121 Boolean gapped_calculation; /**< Perform gapped alignments. Default = TRUE*/
122 Boolean use_megablast; /**< Use megablast for the search. Default = FALSE. */
123 Int4 nucleotide_match; /**< For nucleotide searches, the reward
124 for matching letters (default 1) */
125 Int4 nucleotide_mismatch; /**< For nucleotide searches, the penalty
126 for mismatching letters (default -3) */
127 Int4 gap_open; /**< Cost of opening a gap. Default=0, invokes
128 default values: 5 for nucleotide;
129 depends on matrix for protein search.*/
130 Int4 gap_extend; /**< Cost of extending a gap. Default=0,
131 invokes default values: 2 for nucleotide;
132 depends on matrix for protein search.*/
133 Int4 gap_x_dropoff; /**< Dropoff value for the gapped extension.
134 Default=0, invokes default values. */
135 double db_length; /**< Database length to use in statistical
136 calculations.
137 Default=0 means "database length" is set
138 to the subject sequence length for each
139 subject sequence. */
140 Int4 word_threshold; /**< Threshold for finding neighboring words
141 in protein searches. Default=0, which
142 invokes default values*/
143 Int4 longest_intron; /**< Used in uneven sum gap statistics. Only used
144 with tblastn right now. Default = 0 (turned off) */
145 seed_type init_seed_method; /**< Single-hit or multiple-hit choice of
146 initial seeds for extension. */
147 } BLAST_SummaryOptions;
148
149
150 /**
151 * Allocate storage for an API setup structure and set the
152 * default options for it.
153 *
154 * @param options pointer to be updated with newly allocated structure [out]
155 * @return 0 for successful allocation, -1 otherwise
156 */
157 Int2 BLAST_SummaryOptionsInit(BLAST_SummaryOptions **options);
158
159 /**
160 * Free the storage previously allocated for an API setup structure
161 *
162 * @param options pointer tothe structure to be freed [in]
163 * @return always NULL
164 */
165 BLAST_SummaryOptions* BLAST_SummaryOptionsFree(BLAST_SummaryOptions *options);
166
167 /**
168 * Perform a BLAST search on the two input sequences and return
169 * the list of alignments the search generates
170 * @param options structure describing how the search will be configured [in]
171 * @param bsp1 the first sequence to be compared. Filtering and selection
172 * of nucleotide strand apply only to this sequence [in]
173 * @param bsp2 the second sequence to be compared [in]
174 * @param seqalign_out the list of alignments generated by the search.
175 * If search failed or no alignments were found, set to NULL [out]
176 * @return 0 for a successful search, nonzero if search failed
177 */
178 Int2 BLAST_TwoSequencesSearch(BLAST_SummaryOptions *options,
179 Bioseq *bsp1,
180 Bioseq *bsp2,
181 SeqAlign **seqalign_out);
182
183 /** Creates the advanced search options structure from the basic options.
184 * @param basic_options Basic options for the two sequences search [in]
185 * @param query_seqloc Query Seq-loc, needed to find query length. [in]
186 * @param extra_returns Initialized summary returns structure. [in]
187 * @param search_options Populated advanced options structure [out]
188 * @param program_name Program name [out]
189 */
190 Int2
191 Blast_SearchOptionsFromSummaryOptions(const BLAST_SummaryOptions *basic_options,
192 SeqLoc* query_seqloc,
193 Blast_SummaryReturn* extra_returns,
194 SBlastOptions* *search_options,
195 char* *program_name);
196
197 /**
198 * Perform a BLAST search on the two input sequences and return
199 * the list of alignments the search generates
200 * @param options Structure describing how the search will be configured [in]
201 * @param seqloc1 The first list of sequences (queries) to be compared.
202 * Filtering is applied only to these sequences [in]
203 * @param seqloc2 The second list of sequences (subjects) to be compared [in]
204 * @param masking_locs locations to be used for masking [in]
205 * @param seqalign_arr Object containing the SeqAligns. [in|out]
206 * @param filter_out Masking locations [out]
207 * @param mask_at_hash set to TRUE if filtering only on lookup table [out]
208 * @param extra_returns Data needed to print the bottom of BLAST report [out]
209 * @return 0 for a successful search, nonzero if search failed
210 */
211 Int2 BLAST_TwoSeqLocSets(const BLAST_SummaryOptions *options,
212 SeqLoc* seqloc1, SeqLoc* seqloc2,
213 SeqLoc* masking_locs,
214 SBlastSeqalignArray* *seqalign_arr,
215 SeqLoc** filter_out,
216 Boolean* mask_at_hash,
217 Blast_SummaryReturn* *extra_returns);
218
219 /* @} */
220
221 #endif /* !_TWOSEQ_API_H_ */
222 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |