|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/algo/blast/api/seqsrc_multiseq.c |
source navigation diff markup identifier search freetext search file search |
1 /* $Id: seqsrc_multiseq.c,v 1.27 2007/05/16 18:11:11 camacho Exp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * Author: Ilya Dondoshansky
25 * ===========================================================================*/
26
27 /** @file seqsrc_multiseq.c
28 * C implementation of the BlastSeqSrc interface for a list of sequence
29 * locations.
30 */
31
32 #ifndef SKIP_DOXYGEN_PROCESSING
33 static char const rcsid[] = "$Id: seqsrc_multiseq.c,v 1.27 2007/05/16 18:11:11 camacho Exp $";
34 #endif /* SKIP_DOXYGEN_PROCESSING */
35
36 #include <algo/blast/api/seqsrc_multiseq.h>
37 #include <algo/blast/core/blast_seqsrc_impl.h>
38 #include <algo/blast/core/blast_util.h>
39 #include <algo/blast/api/blast_seq.h>
40 #include <sequtil.h>
41
42 /** @addtogroup CToolkitAlgoBlast
43 *
44 * @{
45 */
46
47 /** Encapsulates the arguments needed to initialize multi-sequence source. */
48 typedef struct MultiSeqSrcNewArgs {
49 SeqLoc* seqloc_list; /**< List of sequence locations. */
50 EBlastProgramType program; /**< Type of BLAST program */
51 } MultiSeqSrcNewArgs;
52
53 /** Contains information about all sequences in a set */
54 typedef struct MultiSeqInfo {
55 Boolean is_prot; /**< Are these sequences protein or nucleotide? */
56 BLAST_SequenceBlk** seqblk_array; /**< Array of sequence blocks */
57 Uint4 max_length; /**< Maximal length of the participating sequences */
58 Uint4 avg_length; /**< Average length of the participating sequences */
59 Uint4 num_seqs; /**< Number of sequences. */
60 Boolean contents_allocated; /**< Is seqblk_array allocated or just copied */
61 } MultiSeqInfo;
62
63 /** Initializes the MultiSeqInfo structure, given a list of SeqLoc's and
64 * program.
65 */
66 static MultiSeqInfo*
67 s_MultiSeqInfoNew(const SeqLoc* seqloc_list, EBlastProgramType program)
68 {
69 Uint4 index;
70 Uint4 num_seqs = ValNodeLen((ValNode*)seqloc_list);
71 Uint4 max_length = 0;
72 SeqLoc* seqloc_ptr;
73 MultiSeqInfo* retval = (MultiSeqInfo*) calloc(1, sizeof(MultiSeqInfo));
74
75 retval->num_seqs = num_seqs;
76 retval->is_prot =
77 (program == eBlastTypeBlastp || program == eBlastTypePhiBlastp ||
78 program == eBlastTypePsiBlast || program == eBlastTypeBlastx);
79 retval->seqblk_array = (BLAST_SequenceBlk**)
80 calloc(retval->num_seqs, sizeof(BLAST_SequenceBlk*));
81
82 for (index = 0, seqloc_ptr = (SeqLoc*) seqloc_list;
83 index < num_seqs; ++index, seqloc_ptr = seqloc_ptr->next) {
84 /** @todo FIXME: check the return value and set the call SetInitErrorStr
85 * if appropriate */
86 BLAST_SetUpSubject(program, seqloc_ptr, &retval->seqblk_array[index]);
87 max_length = MAX(max_length, (Uint4)retval->seqblk_array[index]->length);
88 }
89 retval->max_length = max_length;
90 retval->contents_allocated = TRUE;
91
92 return retval;
93 }
94
95 /** Destruct the MultiSeqInfo structure, except the SeqLoc structures, which it
96 * does not own.
97 */
98 static MultiSeqInfo*
99 s_MultiSeqInfoFree(MultiSeqInfo* seq_info)
100 {
101 Uint4 index;
102
103 if (seq_info->contents_allocated) {
104 for (index = 0; index < seq_info->num_seqs; ++index) {
105 BlastSequenceBlkFree(seq_info->seqblk_array[index]);
106 }
107 sfree(seq_info->seqblk_array);
108 }
109 sfree(seq_info);
110 return NULL;
111 }
112
113 /** Retrieves the length of the longest sequence in the BlastSeqSrc.
114 * @param multiseq_handle Pointer to the structure containing sequences [in]
115 * @param ignoreme Unused by this implementation [in]
116 */
117 static Int4
118 s_MultiSeqGetMaxLength(void* multiseq_handle, void* ignoreme)
119 {
120 Int4 retval = 0;
121 Uint4 index;
122 MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
123
124 ASSERT(seq_info);
125
126 if ((retval = seq_info->max_length) > 0)
127 return retval;
128
129 for (index=0; index<seq_info->num_seqs; ++index)
130 retval = MAX(retval, seq_info->seqblk_array[index]->length);
131 seq_info->max_length = retval;
132
133 return retval;
134 }
135
136 /** Retrieves the length of the longest sequence in the BlastSeqSrc.
137 * @param multiseq_handle Pointer to the structure containing sequences [in]
138 * @param ignoreme Unused by this implementation [in]
139 */
140 static Int4
141 s_MultiSeqGetAvgLength(void* multiseq_handle, void* ignoreme)
142 {
143 Int8 total_length = 0;
144 Uint4 avg_length;
145 Uint4 index;
146 MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
147
148 ASSERT(seq_info);
149
150 if ((avg_length = seq_info->avg_length) > 0)
151 return avg_length;
152
153 for (index=0; index<seq_info->num_seqs; ++index)
154 total_length += (Int8) seq_info->seqblk_array[index]->length;
155 avg_length = (Uint4) (total_length / seq_info->num_seqs);
156 seq_info->avg_length = avg_length;
157
158 return avg_length;
159 }
160
161 /** Retrieves the number of sequences in the BlastSeqSrc.
162 * @param multiseq_handle Pointer to the structure containing sequences [in]
163 * @param ignoreme Unused by this implementation [in]
164 */
165 static Int4
166 s_MultiSeqGetNumSeqs(void* multiseq_handle, void* ignoreme)
167 {
168 MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
169
170 ASSERT(seq_info);
171 return seq_info->num_seqs;
172 }
173
174 /** Returns zero in this implementation as not supported without an alias file.
175 * @param multiseq_handle Pointer to the structure containing sequences [in]
176 * @param ignoreme Unused by this implementation [in]
177 */
178 static Int4
179 s_MultiSeqGetNumSeqsStats(void* multiseq_handle, void* ignoreme)
180 {
181 return 0;
182 }
183
184 /** Returns 0 as total length, indicating that this is NOT a database!
185 * @param multiseq_handle Pointer to the structure containing sequences [in]
186 * @param ignoreme Unused by this implementation [in]
187 */
188 static Int8
189 s_MultiSeqGetTotLen(void* multiseq_handle, void* ignoreme)
190 {
191 return 0;
192 }
193
194 /** Returns 0 for statistical total length as this is not supported without an alias file.
195 * @param multiseq_handle Pointer to the structure containing sequences [in]
196 * @param ignoreme Unused by this implementation [in]
197 */
198 static Int8
199 s_MultiSeqGetTotLenStats(void* multiseq_handle, void* ignoreme)
200 {
201 return 0;
202 }
203
204 /** Needed for completeness only.
205 * @param multiseq_handle Pointer to the structure containing sequences [in]
206 * @param ignoreme Unused by this implementation [in]
207 */
208 static const char*
209 s_MultiSeqGetName(void* multiseq_handle, void* ignoreme)
210 {
211 return NULL;
212 }
213
214 /** Retrieves the date of the BLAST database.
215 * @param multiseq_handle Pointer to the structure containing sequences [in]
216 * @param ignoreme Unused by this implementation [in]
217 */
218 static Boolean
219 s_MultiSeqGetIsProt(void* multiseq_handle, void* ignoreme)
220 {
221 MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
222
223 ASSERT(seq_info);
224
225 return (Boolean) seq_info->is_prot;
226 }
227
228 /** Retrieves the sequence meeting the criteria defined by its second argument.
229 * @param multiseq_handle Pointer to the structure containing sequences [in]
230 * @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
231 * @return return codes defined in blast_seqsrc.h
232 */
233 static Int2
234 s_MultiSeqGetSequence(void* multiseq_handle, void* args)
235 {
236 MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
237 BlastSeqSrcGetSeqArg* seq_args = (BlastSeqSrcGetSeqArg*) args;
238 Int4 index;
239
240 ASSERT(seq_info);
241 ASSERT(args);
242
243 if (seq_info->num_seqs == 0 || !seq_args)
244 return BLAST_SEQSRC_ERROR;
245
246 index = seq_args->oid;
247
248 if (index >= (Int4) seq_info->num_seqs)
249 return BLAST_SEQSRC_EOF;
250
251 BlastSequenceBlkCopy(&seq_args->seq, seq_info->seqblk_array[index]);
252 /* If this is a nucleotide sequence, and it is the traceback stage,
253 we need the uncompressed buffer, stored in the 'sequence_start'
254 pointer. That buffer has a sentinel byte in case of blastn, but
255 no sentinel byte for translated programs. */
256 if (seq_args->encoding == eBlastEncodingNucleotide)
257 seq_args->seq->sequence = seq_args->seq->sequence_start + 1;
258 else if (seq_args->encoding == eBlastEncodingNcbi4na)
259 seq_args->seq->sequence = seq_args->seq->sequence_start;
260
261 seq_args->seq->oid = index;
262 return BLAST_SEQSRC_SUCCESS;
263 }
264
265 /** Deallocates uncompressed sequence buffer, obtained by MultiSeqGetSequence
266 * @param multiseq_handle Pointer to the structure containing sequences [in]
267 * @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
268 */
269 static void
270 s_MultiSeqReleaseSequence(void* multiseq_handle, void* args)
271 {
272 BlastSeqSrcGetSeqArg* seq_args = (BlastSeqSrcGetSeqArg*) args;
273 ASSERT(seq_args);
274 if (seq_args->seq->sequence_start_allocated)
275 sfree(seq_args->seq->sequence_start);
276 }
277
278 /** Retrieve length of a given sequence.
279 * @param multiseq_handle Pointer to the structure containing sequences [in]
280 * @param args Pointer to integer indicating index into the sequences
281 * vector [in]
282 * @return Length of the sequence or BLAST_SEQSRC_ERROR.
283 */
284 static Int4
285 s_MultiSeqGetSeqLen(void* multiseq_handle, void* args)
286 {
287 MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
288 Int4 index;
289
290 ASSERT(seq_info);
291 ASSERT(args);
292
293 index = *((Int4*) args);
294 return seq_info->seqblk_array[index]->length;
295 }
296
297 /** Gets the next sequence index, given a MultiSeqInfo pointer. */
298 static Int2
299 s_MultiSeqGetNextChunk(void* multiseq_handle, BlastSeqSrcIterator* itr)
300 {
301 MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
302
303 ASSERT(itr);
304
305 if (itr->current_pos == UINT4_MAX) {
306 itr->current_pos = 0;
307 }
308
309 if (itr->current_pos >= seq_info->num_seqs)
310 return BLAST_SEQSRC_EOF;
311
312 return BLAST_SEQSRC_SUCCESS;
313 }
314
315 /** Gets the next sequence index, given a BlastSeqSrc pointer. */
316 static Int4
317 s_MultiSeqIteratorNext(void* multiseq_handle, BlastSeqSrcIterator* itr)
318 {
319 Int4 retval = BLAST_SEQSRC_EOF;
320 Int2 status = 0;
321
322 ASSERT(multiseq_handle);
323 ASSERT(itr);
324
325 if ((status = s_MultiSeqGetNextChunk(multiseq_handle, itr))
326 == BLAST_SEQSRC_EOF) {
327 return status;
328 }
329 retval = itr->current_pos++;
330
331 return retval;
332 }
333
334 /** Resets the internal bookmark iterator (N/A in this case) */
335 static void
336 s_MultiSeqResetChunkIter(void* multiseq_handle)
337 {
338 return;
339 }
340
341 /** Multi sequence source destructor: frees its internal data structure and the
342 * BlastSeqSrc structure itself.
343 * @param seq_src BlastSeqSrc structure to free [in]
344 * @return NULL
345 */
346 static BlastSeqSrc*
347 s_MultiSeqSrcFree(BlastSeqSrc* seq_src)
348 {
349 MultiSeqInfo* seq_info;
350
351 if (!seq_src)
352 return NULL;
353
354 seq_info = (MultiSeqInfo*)_BlastSeqSrcImpl_GetDataStructure(seq_src);
355
356 seq_info = s_MultiSeqInfoFree(seq_info);
357 return NULL;
358 }
359
360 /** Multi sequence source copier; copies the MultiSeqInfo structure
361 * @param seq_src BlastSeqSrc structure to copy [in]
362 * @return New BlastSeqSrc structure
363 */
364 static BlastSeqSrc*
365 s_MultiSeqSrcCopy(BlastSeqSrc* seq_src)
366 {
367 MultiSeqInfo* seq_info;
368
369 if (!seq_src)
370 return NULL;
371
372 seq_info = (MultiSeqInfo*) BlastMemDup
373 (_BlastSeqSrcImpl_GetDataStructure(seq_src), sizeof(MultiSeqInfo));
374 seq_info->contents_allocated = FALSE;
375
376 _BlastSeqSrcImpl_SetDataStructure(seq_src, (void*) seq_info);
377
378 return seq_src;
379 }
380
381 /** Multi-sequence source constructor
382 * @param retval BlastSeqSrc structure (already allocated) to populate [in]
383 * @param args Pointer to MultiSeqSrcNewArgs structure above [in]
384 * @return Updated bssp structure (with all function pointers initialized)
385 */
386 static BlastSeqSrc*
387 s_MultiSeqSrcNew(BlastSeqSrc* retval, void* args)
388 {
389 MultiSeqSrcNewArgs* seqsrc_args = (MultiSeqSrcNewArgs*) args;
390 MultiSeqInfo* seq_info = NULL;
391
392 ASSERT(retval);
393 ASSERT(seqsrc_args);
394
395 seq_info =
396 s_MultiSeqInfoNew(seqsrc_args->seqloc_list, seqsrc_args->program);
397
398 /* Initialize the BlastSeqSrc structure fields with user-defined function
399 * pointers and seq_info */
400 _BlastSeqSrcImpl_SetDeleteFnPtr(retval, &s_MultiSeqSrcFree);
401 _BlastSeqSrcImpl_SetCopyFnPtr(retval, &s_MultiSeqSrcCopy);
402 _BlastSeqSrcImpl_SetDataStructure(retval, (void*) seq_info);
403 _BlastSeqSrcImpl_SetGetNumSeqs(retval, &s_MultiSeqGetNumSeqs);
404 _BlastSeqSrcImpl_SetGetNumSeqsStats(retval, &s_MultiSeqGetNumSeqsStats);
405 _BlastSeqSrcImpl_SetGetMaxSeqLen(retval, &s_MultiSeqGetMaxLength);
406 _BlastSeqSrcImpl_SetGetAvgSeqLen(retval, &s_MultiSeqGetAvgLength);
407 _BlastSeqSrcImpl_SetGetTotLen(retval, &s_MultiSeqGetTotLen);
408 _BlastSeqSrcImpl_SetGetTotLenStats(retval, &s_MultiSeqGetTotLenStats);
409 _BlastSeqSrcImpl_SetGetName(retval, &s_MultiSeqGetName);
410 _BlastSeqSrcImpl_SetGetIsProt(retval, &s_MultiSeqGetIsProt);
411 _BlastSeqSrcImpl_SetGetSequence(retval, &s_MultiSeqGetSequence);
412 _BlastSeqSrcImpl_SetGetSeqLen(retval, &s_MultiSeqGetSeqLen);
413 _BlastSeqSrcImpl_SetIterNext(retval, &s_MultiSeqIteratorNext);
414 _BlastSeqSrcImpl_SetResetChunkIterator(retval, &s_MultiSeqResetChunkIter);
415 _BlastSeqSrcImpl_SetReleaseSequence(retval, &s_MultiSeqReleaseSequence);
416
417 return retval;
418 }
419
420 BlastSeqSrc*
421 MultiSeqBlastSeqSrcInit(SeqLoc* seqloc_list, EBlastProgramType program)
422 {
423 BlastSeqSrc* seq_src;
424 BlastSeqSrcNewInfo bssn_info;
425 MultiSeqSrcNewArgs* args =
426 (MultiSeqSrcNewArgs*) calloc(1, sizeof(MultiSeqSrcNewArgs));;
427 args->seqloc_list = seqloc_list;
428 args->program = program;
429 bssn_info.constructor = &s_MultiSeqSrcNew;
430 bssn_info.ctor_argument = (void*) args;
431
432 seq_src = BlastSeqSrcNew(&bssn_info);
433 sfree(args);
434
435 return seq_src;
436 }
437
438 /* @} */
439
440 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |