|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/algo/blast/api/repeats_filter.c |
source navigation diff markup identifier search freetext search file search |
1 #ifndef SKIP_DOXYGEN_PROCESSING
2 static char const rcsid[] = "$Id: repeats_filter.c,v 1.19 2007/12/19 21:56:48 camacho Exp $";
3 #endif /* SKIP_DOXYGEN_PROCESSING */
4
5 /*
6 * ===========================================================================
7 *
8 * PUBLIC DOMAIN NOTICE
9 * National Center for Biotechnology Information
10 *
11 * This software/database is a "United States Government Work" under the
12 * terms of the United States Copyright Act. It was written as part of
13 * the author's official duties as a United States Government employee and
14 * thus cannot be copyrighted. This software/database is freely available
15 * to the public for use. The National Library of Medicine and the U.S.
16 * Government have not placed any restriction on its use or reproduction.
17 *
18 * Although all reasonable efforts have been taken to ensure the accuracy
19 * and reliability of the software and data, the NLM and the U.S.
20 * Government do not and cannot warrant the performance or results that
21 * may be obtained by using this software or data. The NLM and the U.S.
22 * Government disclaim all warranties, express or implied, including
23 * warranties of performance, merchantability or fitness for any particular
24 * purpose.
25 *
26 * Please cite the author in any work or product based on this material.
27 *
28 * ===========================================================================
29 *
30 * File Name: $RCSfile: repeats_filter.c,v $
31 *
32 * Author: Ilya Dondoshansky
33 *
34 */
35
36 /** @file repeats_filter.c
37 * Repeats filtering API for the new BLAST code
38 */
39
40 /* Prototypes of functions defined below */
41 #include <algo/blast/api/repeats_filter.h>
42 #include <algo/blast/api/blast_api.h>
43 #include <algo/blast/core/blast_filter.h>
44 #include <algo/blast/core/blast_util.h>
45 #include <algo/blast/api/blast_seq.h>
46 #include <algo/blast/api/seqsrc_readdb.h>
47
48 /** @addtogroup CToolkitAlgoBlast
49 *
50 * @{
51 */
52
53 /** Sets options for a repeats search.
54 * @param options Options wrapper structure to modify [in] [out]
55 */
56 static Int2
57 s_SetRepeatsSearchOptions(SBlastOptions* options)
58 {
59 Int2 status = 0;
60
61 if (!options || !options->score_options || !options->word_options ||
62 !options->ext_options)
63 return -1;
64
65 if ((status = SBlastOptionsSetEvalue(options, REPEATS_SEARCH_EVALUE)))
66 return status;
67 if ((status = SBlastOptionsSetWordSize(options, REPEATS_SEARCH_WORD_SIZE)))
68 return status;
69 if ((status =
70 SBlastOptionsSetFilterString(options, REPEATS_SEARCH_FILTER_STRING)))
71 return status;
72
73 options->lookup_options->lut_type = eNaLookupTable;
74 options->score_options->penalty = REPEATS_SEARCH_PENALTY;
75 options->score_options->gap_open = REPEATS_SEARCH_GAP_OPEN;
76 options->score_options->gap_extend = REPEATS_SEARCH_GAP_EXTEND;
77 options->word_options->x_dropoff = REPEATS_SEARCH_XDROP_UNGAPPED;
78 options->ext_options->gap_x_dropoff_final = REPEATS_SEARCH_XDROP_FINAL;
79
80 return status;
81 }
82
83 /** Create a SeqLoc with repeat masking locations, given the results of a
84 * BLAST search against a database of repeats.
85 * @param query_seqloc Query sequence locations [in]
86 * @param results Internal results structure, returned from a BLAST search
87 * against a repeats database [in]
88 * @param mask_seqloc List of ValNode's, one per query, containing mask
89 * locations. [out]
90 */
91 static Int2
92 s_FillMaskLocFromBlastHSPResults(SeqLoc* query_seqloc, BlastHSPResults* results,
93 SeqLoc* *mask_seqloc)
94 {
95 Int4 num_seqs = 0;
96 Int4 query_index;
97 SeqLoc* slp;
98 BlastMaskLoc* mask;
99 const EBlastProgramType kProgram = eBlastTypeBlastn;
100 const Uint4 kNumContexts = BLAST_GetNumberOfContexts(eBlastTypeBlastn);
101
102 if (!query_seqloc || !mask_seqloc)
103 return -1;
104
105 *mask_seqloc = NULL;
106
107 if (!results) {
108 return 0;
109 }
110
111 num_seqs = ValNodeLen(query_seqloc);
112 mask = BlastMaskLocNew(num_seqs*kNumContexts);
113
114 for (query_index = 0, slp = query_seqloc; slp;
115 ++query_index, slp = slp->next) {
116 Int4 query_length, query_start;
117 Int4 hit_index;
118 BlastSeqLoc* loc_list = NULL, *ordered_loc_list = NULL;
119 BlastHitList* hit_list = results->hitlist_array[query_index];
120
121 if (!hit_list) {
122 continue;
123 }
124 query_length = SeqLocLen(slp);
125 query_start = SeqLocStart(slp);
126
127 /* Find all HSP intervals in query */
128 for (hit_index = 0; hit_index < hit_list->hsplist_count; ++hit_index) {
129 Int4 hsp_index;
130 BlastHSPList* hsp_list = hit_list->hsplist_array[hit_index];
131 /* HSP lists cannot be NULL! */
132 ASSERT(hsp_list);
133 for (hsp_index = 0; hsp_index < hsp_list->hspcnt; ++hsp_index) {
134 Int4 left, right;
135 BlastHSP* hsp = hsp_list->hsp_array[hsp_index];
136 /* HSP cannot be NULL! */
137 ASSERT(hsp);
138 if (hsp->query.frame == hsp->subject.frame) {
139 left = hsp->query.offset;
140 right = hsp->query.end - 1;
141 } else {
142 left = query_length - hsp->query.end;
143 right = query_length - hsp->query.offset - 1;
144 }
145 /* Shift the coordinates so they correspond to the full
146 sequence. */
147 left += query_start;
148 right += query_start;
149 BlastSeqLocNew(&loc_list, left, right);
150 }
151 }
152 /* Make the intervals unique */
153 BlastSeqLocCombine(&loc_list, REPEAT_MASK_LINK_VALUE);
154 ordered_loc_list = loc_list;
155 loc_list = NULL;
156
157 mask->seqloc_array[query_index*kNumContexts] = ordered_loc_list;
158 }
159
160 *mask_seqloc = BlastMaskLocToSeqLoc(kProgram, mask, query_seqloc);
161
162 mask = BlastMaskLocFree(mask);
163
164 return 0;
165 }
166
167 Int2
168 Blast_FindRepeatFilterSeqLoc(SeqLoc* query_seqloc,
169 const char* filter_string,
170 SeqLoc* *mask_loc,
171 SBlastMessage **message)
172 {
173 char* repeat_database = NULL;
174 SBlastOptions* options = NULL;
175 Blast_SummaryReturn* sum_returns = NULL;
176 Int2 status = 0;
177 BlastSeqSrc* seq_src = NULL;
178 SeqLoc* filter_loc = NULL; /* Dummy variable, since search will be performed
179 without filtering. */
180 BlastHSPResults* results = NULL;
181 SBlastFilterOptions* filtering_options = NULL;
182
183 ASSERT(message);
184 if (filter_string == NULL)
185 return 0;
186
187 status = BlastFilteringOptionsFromString(eBlastTypeBlastn, filter_string,
188 &filtering_options, NULL);
189
190 if (status)
191 return status;
192
193 /* If repeat filtering not requested, return success. */
194 if (filtering_options == NULL || filtering_options->repeatFilterOptions == NULL)
195 {
196 filtering_options = SBlastFilterOptionsFree(filtering_options);
197 return 0;
198 }
199
200 repeat_database = filtering_options->repeatFilterOptions->database;
201
202 sum_returns = Blast_SummaryReturnNew();
203 status = SBlastOptionsNew("blastn", &options, sum_returns);
204
205 seq_src = ReaddbBlastSeqSrcInit(repeat_database, FALSE, 0, 0);
206
207 if (!seq_src) {
208 SBlastMessageWrite(&sum_returns->error, SEV_ERROR,
209 "Initialization of subject sequences source failed", NULL, FALSE);
210 } else {
211 char* error_str = BlastSeqSrcGetInitError(seq_src);
212 if (error_str)
213 SBlastMessageWrite(&sum_returns->error, SEV_ERROR, error_str, NULL, FALSE);
214 }
215
216 /* If there was an error initializing the sequence source, return without
217 doing the search. */
218 if (sum_returns->error) {
219 *mask_loc = NULL;
220 *message = sum_returns->error;
221 sum_returns->error = NULL;
222 Blast_SummaryReturnFree(sum_returns);
223 filtering_options = SBlastFilterOptionsFree(filtering_options);
224 return -1;
225 }
226
227 s_SetRepeatsSearchOptions(options);
228
229 status =
230 Blast_RunSearch(query_seqloc, (Blast_PsiCheckpointLoc *) NULL,
231 seq_src, (SeqLoc*) NULL, options,
232 (BlastTabularFormatData*) NULL,
233 &results, &filter_loc, sum_returns);
234
235 /* The ReadDBFILE structure will not be destroyed here, because the
236 initialising function used readdb_attach */
237 BlastSeqSrcFree(seq_src);
238
239 Blast_SummaryReturnFree(sum_returns);
240
241 /* filter_loc must be NULL on return from Blast_RunSearch, but call the
242 destruction function anyway - in case changes are made in the future. */
243 filter_loc = Blast_ValNodeMaskListFree(filter_loc);
244
245 options = SBlastOptionsFree(options);
246 filtering_options = SBlastFilterOptionsFree(filtering_options);
247
248 if (!status) {
249 /* Extract the repeat locations from the search results */
250 s_FillMaskLocFromBlastHSPResults(query_seqloc, results, mask_loc);
251 }
252
253 results = Blast_HSPResultsFree(results);
254 return status;
255 }
256
257 /* @} */
258
259 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |