NCBI C Toolkit Cross Reference

C/algo/blast/api/seqsrc_multiseq.c


  1 /*  $Id: seqsrc_multiseq.c,v 1.27 2007/05/16 18:11:11 camacho Exp $
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *               National Center for Biotechnology Information
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government have not placed any restriction on its use or reproduction.
 13 *
 14 *  Although all reasonable efforts have been taken to ensure the accuracy
 15 *  and reliability of the software and data, the NLM and the U.S.
 16 *  Government do not and cannot warrant the performance or results that
 17 *  may be obtained by using this software or data. The NLM and the U.S.
 18 *  Government disclaim all warranties, express or implied, including
 19 *  warranties of performance, merchantability or fitness for any particular
 20 *  purpose.
 21 *
 22 *  Please cite the author in any work or product based on this material.
 23 *
 24 *  Author:  Ilya Dondoshansky
 25 * ===========================================================================*/
 26 
 27 /** @file seqsrc_multiseq.c
 28  * C implementation of the BlastSeqSrc interface for a list of sequence 
 29  * locations.
 30  */
 31 
 32 #ifndef SKIP_DOXYGEN_PROCESSING
 33 static char const rcsid[] = "$Id: seqsrc_multiseq.c,v 1.27 2007/05/16 18:11:11 camacho Exp $";
 34 #endif /* SKIP_DOXYGEN_PROCESSING */
 35 
 36 #include <algo/blast/api/seqsrc_multiseq.h>
 37 #include <algo/blast/core/blast_seqsrc_impl.h>
 38 #include <algo/blast/core/blast_util.h>
 39 #include <algo/blast/api/blast_seq.h>
 40 #include <sequtil.h>
 41 
 42 /** @addtogroup CToolkitAlgoBlast
 43  *
 44  * @{
 45  */
 46 
 47 /** Encapsulates the arguments needed to initialize multi-sequence source. */
 48 typedef struct MultiSeqSrcNewArgs {
 49     SeqLoc* seqloc_list; /**< List of sequence locations. */
 50     EBlastProgramType program; /**< Type of BLAST program */
 51 } MultiSeqSrcNewArgs;
 52 
 53 /** Contains information about all sequences in a set */
 54 typedef struct MultiSeqInfo {
 55     Boolean is_prot; /**< Are these sequences protein or nucleotide? */
 56     BLAST_SequenceBlk** seqblk_array; /**< Array of sequence blocks */
 57     Uint4 max_length; /**< Maximal length of the participating sequences */
 58     Uint4 avg_length; /**< Average length of the participating sequences */
 59     Uint4 num_seqs; /**< Number of sequences. */
 60     Boolean contents_allocated; /**< Is seqblk_array allocated or just copied */
 61 } MultiSeqInfo;
 62 
 63 /** Initializes the MultiSeqInfo structure, given a list of SeqLoc's and 
 64  * program.
 65  */
 66 static MultiSeqInfo* 
 67 s_MultiSeqInfoNew(const SeqLoc* seqloc_list, EBlastProgramType program)
 68 {
 69    Uint4 index;
 70    Uint4 num_seqs = ValNodeLen((ValNode*)seqloc_list);
 71    Uint4 max_length = 0;
 72    SeqLoc* seqloc_ptr;
 73    MultiSeqInfo* retval = (MultiSeqInfo*) calloc(1, sizeof(MultiSeqInfo));
 74 
 75    retval->num_seqs = num_seqs;
 76    retval->is_prot = 
 77       (program == eBlastTypeBlastp || program == eBlastTypePhiBlastp ||
 78        program == eBlastTypePsiBlast || program == eBlastTypeBlastx);
 79    retval->seqblk_array = (BLAST_SequenceBlk**) 
 80       calloc(retval->num_seqs, sizeof(BLAST_SequenceBlk*));
 81       
 82    for (index = 0, seqloc_ptr = (SeqLoc*) seqloc_list; 
 83         index < num_seqs; ++index, seqloc_ptr = seqloc_ptr->next) {
 84        /** @todo FIXME: check the return value and set the call SetInitErrorStr
 85         * if appropriate */
 86       BLAST_SetUpSubject(program, seqloc_ptr, &retval->seqblk_array[index]);
 87       max_length = MAX(max_length, (Uint4)retval->seqblk_array[index]->length);
 88    }
 89    retval->max_length = max_length;
 90    retval->contents_allocated = TRUE;
 91 
 92    return retval;
 93 }
 94 
 95 /** Destruct the MultiSeqInfo structure, except the SeqLoc structures, which it
 96  * does not own.
 97  */
 98 static MultiSeqInfo* 
 99 s_MultiSeqInfoFree(MultiSeqInfo* seq_info)
100 {
101    Uint4 index;
102    
103    if (seq_info->contents_allocated) {
104       for (index = 0; index < seq_info->num_seqs; ++index) {
105          BlastSequenceBlkFree(seq_info->seqblk_array[index]);
106       }
107       sfree(seq_info->seqblk_array);
108    }
109    sfree(seq_info);
110    return NULL;
111 }
112   
113 /** Retrieves the length of the longest sequence in the BlastSeqSrc.
114  * @param multiseq_handle Pointer to the structure containing sequences [in]
115  * @param ignoreme Unused by this implementation [in]
116  */
117 static Int4 
118 s_MultiSeqGetMaxLength(void* multiseq_handle, void* ignoreme)
119 {
120     Int4 retval = 0;
121     Uint4 index;
122     MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
123 
124     ASSERT(seq_info);
125     
126     if ((retval = seq_info->max_length) > 0)
127         return retval;
128 
129     for (index=0; index<seq_info->num_seqs; ++index)
130         retval = MAX(retval, seq_info->seqblk_array[index]->length);
131     seq_info->max_length = retval;
132 
133     return retval;
134 }
135 
136 /** Retrieves the length of the longest sequence in the BlastSeqSrc.
137  * @param multiseq_handle Pointer to the structure containing sequences [in]
138  * @param ignoreme Unused by this implementation [in]
139  */
140 static Int4 
141 s_MultiSeqGetAvgLength(void* multiseq_handle, void* ignoreme)
142 {
143     Int8 total_length = 0;
144     Uint4 avg_length;
145     Uint4 index;
146     MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
147 
148     ASSERT(seq_info);
149 
150     if ((avg_length = seq_info->avg_length) > 0)
151         return avg_length;
152 
153     for (index=0; index<seq_info->num_seqs; ++index) 
154         total_length += (Int8) seq_info->seqblk_array[index]->length;
155     avg_length = (Uint4) (total_length / seq_info->num_seqs);
156     seq_info->avg_length = avg_length;
157 
158     return avg_length;
159 }
160 
161 /** Retrieves the number of sequences in the BlastSeqSrc.
162  * @param multiseq_handle Pointer to the structure containing sequences [in]
163  * @param ignoreme Unused by this implementation [in]
164  */
165 static Int4 
166 s_MultiSeqGetNumSeqs(void* multiseq_handle, void* ignoreme)
167 {
168     MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
169 
170     ASSERT(seq_info);
171     return seq_info->num_seqs;
172 }
173 
174 /** Returns zero in this implementation as not supported without an alias file.
175  * @param multiseq_handle Pointer to the structure containing sequences [in]
176  * @param ignoreme Unused by this implementation [in]
177  */
178 static Int4 
179 s_MultiSeqGetNumSeqsStats(void* multiseq_handle, void* ignoreme)
180 {
181     return 0;
182 }
183 
184 /** Returns 0 as total length, indicating that this is NOT a database!
185  * @param multiseq_handle Pointer to the structure containing sequences [in]
186  * @param ignoreme Unused by this implementation [in]
187  */
188 static Int8 
189 s_MultiSeqGetTotLen(void* multiseq_handle, void* ignoreme)
190 {
191     return 0;
192 }
193 
194 /** Returns 0 for statistical total length as this is not supported without an alias file.
195  * @param multiseq_handle Pointer to the structure containing sequences [in]
196  * @param ignoreme Unused by this implementation [in]
197  */
198 static Int8 
199 s_MultiSeqGetTotLenStats(void* multiseq_handle, void* ignoreme)
200 {
201     return 0;
202 }
203 
204 /** Needed for completeness only.
205  * @param multiseq_handle Pointer to the structure containing sequences [in]
206  * @param ignoreme Unused by this implementation [in]
207  */
208 static const char* 
209 s_MultiSeqGetName(void* multiseq_handle, void* ignoreme)
210 {
211     return NULL;
212 }
213 
214 /** Retrieves the date of the BLAST database.
215  * @param multiseq_handle Pointer to the structure containing sequences [in]
216  * @param ignoreme Unused by this implementation [in]
217  */
218 static Boolean 
219 s_MultiSeqGetIsProt(void* multiseq_handle, void* ignoreme)
220 {
221     MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
222 
223     ASSERT(seq_info);
224 
225     return (Boolean) seq_info->is_prot;
226 }
227 
228 /** Retrieves the sequence meeting the criteria defined by its second argument.
229  * @param multiseq_handle Pointer to the structure containing sequences [in]
230  * @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
231  * @return return codes defined in blast_seqsrc.h
232  */
233 static Int2 
234 s_MultiSeqGetSequence(void* multiseq_handle, void* args)
235 {
236     MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
237     BlastSeqSrcGetSeqArg* seq_args = (BlastSeqSrcGetSeqArg*) args;
238     Int4 index;
239 
240     ASSERT(seq_info);
241     ASSERT(args);
242 
243     if (seq_info->num_seqs == 0 || !seq_args)
244         return BLAST_SEQSRC_ERROR;
245 
246     index = seq_args->oid;
247 
248     if (index >= (Int4) seq_info->num_seqs)
249         return BLAST_SEQSRC_EOF;
250 
251     BlastSequenceBlkCopy(&seq_args->seq, seq_info->seqblk_array[index]);
252     /* If this is a nucleotide sequence, and it is the traceback stage, 
253        we need the uncompressed buffer, stored in the 'sequence_start' 
254        pointer. That buffer has a sentinel byte in case of blastn, but
255        no sentinel byte for translated programs. */
256     if (seq_args->encoding == eBlastEncodingNucleotide)
257        seq_args->seq->sequence = seq_args->seq->sequence_start + 1;
258     else if (seq_args->encoding == eBlastEncodingNcbi4na)
259        seq_args->seq->sequence = seq_args->seq->sequence_start;
260 
261     seq_args->seq->oid = index;
262     return BLAST_SEQSRC_SUCCESS;
263 }
264 
265 /** Deallocates uncompressed sequence buffer, obtained by MultiSeqGetSequence
266  * @param multiseq_handle Pointer to the structure containing sequences [in]
267  * @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
268  */
269 static void
270 s_MultiSeqReleaseSequence(void* multiseq_handle, void* args)
271 {
272     BlastSeqSrcGetSeqArg* seq_args = (BlastSeqSrcGetSeqArg*) args;
273     ASSERT(seq_args);
274     if (seq_args->seq->sequence_start_allocated)
275        sfree(seq_args->seq->sequence_start);
276 }
277 
278 /** Retrieve length of a given sequence.
279  * @param multiseq_handle Pointer to the structure containing sequences [in]
280  * @param args Pointer to integer indicating index into the sequences 
281  *             vector [in]
282  * @return Length of the sequence or BLAST_SEQSRC_ERROR.
283  */
284 static Int4 
285 s_MultiSeqGetSeqLen(void* multiseq_handle, void* args)
286 {
287     MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
288     Int4 index;
289 
290     ASSERT(seq_info);
291     ASSERT(args);
292 
293     index = *((Int4*) args);
294     return seq_info->seqblk_array[index]->length;
295 }
296 
297 /** Gets the next sequence index, given a MultiSeqInfo pointer. */
298 static Int2 
299 s_MultiSeqGetNextChunk(void* multiseq_handle, BlastSeqSrcIterator* itr)
300 {
301     MultiSeqInfo* seq_info = (MultiSeqInfo*) multiseq_handle;
302 
303     ASSERT(itr);
304 
305     if (itr->current_pos == UINT4_MAX) {
306         itr->current_pos = 0;
307     }
308 
309     if (itr->current_pos >= seq_info->num_seqs)
310         return BLAST_SEQSRC_EOF;
311 
312     return BLAST_SEQSRC_SUCCESS;
313 }
314 
315 /** Gets the next sequence index, given a BlastSeqSrc pointer. */
316 static Int4 
317 s_MultiSeqIteratorNext(void* multiseq_handle, BlastSeqSrcIterator* itr)
318 {
319     Int4 retval = BLAST_SEQSRC_EOF;
320     Int2 status = 0;
321 
322     ASSERT(multiseq_handle);
323     ASSERT(itr);
324 
325     if ((status = s_MultiSeqGetNextChunk(multiseq_handle, itr))
326         == BLAST_SEQSRC_EOF) {
327         return status;
328     }
329     retval = itr->current_pos++;
330 
331     return retval;
332 }
333 
334 /** Resets the internal bookmark iterator (N/A in this case) */
335 static void
336 s_MultiSeqResetChunkIter(void* multiseq_handle)
337 {
338     return;
339 }
340 
341 /** Multi sequence source destructor: frees its internal data structure and the
342  * BlastSeqSrc structure itself.
343  * @param seq_src BlastSeqSrc structure to free [in]
344  * @return NULL
345  */
346 static BlastSeqSrc* 
347 s_MultiSeqSrcFree(BlastSeqSrc* seq_src)
348 {
349    MultiSeqInfo* seq_info;
350 
351     if (!seq_src) 
352         return NULL;
353 
354     seq_info = (MultiSeqInfo*)_BlastSeqSrcImpl_GetDataStructure(seq_src);
355 
356     seq_info = s_MultiSeqInfoFree(seq_info);
357     return NULL;
358 }
359 
360 /** Multi sequence source copier; copies the MultiSeqInfo structure
361  * @param seq_src BlastSeqSrc structure to copy [in]
362  * @return New BlastSeqSrc structure
363  */
364 static BlastSeqSrc* 
365 s_MultiSeqSrcCopy(BlastSeqSrc* seq_src)
366 {
367    MultiSeqInfo* seq_info;
368 
369    if (!seq_src) 
370       return NULL;
371 
372    seq_info = (MultiSeqInfo*) BlastMemDup
373        (_BlastSeqSrcImpl_GetDataStructure(seq_src), sizeof(MultiSeqInfo));
374    seq_info->contents_allocated = FALSE;
375 
376    _BlastSeqSrcImpl_SetDataStructure(seq_src, (void*) seq_info);
377     
378    return seq_src;
379 }
380 
381 /** Multi-sequence source constructor 
382  * @param retval BlastSeqSrc structure (already allocated) to populate [in]
383  * @param args Pointer to MultiSeqSrcNewArgs structure above [in]
384  * @return Updated bssp structure (with all function pointers initialized)
385  */
386 static BlastSeqSrc* 
387 s_MultiSeqSrcNew(BlastSeqSrc* retval, void* args)
388 {
389     MultiSeqSrcNewArgs* seqsrc_args = (MultiSeqSrcNewArgs*) args;
390     MultiSeqInfo* seq_info = NULL;
391 
392     ASSERT(retval);
393     ASSERT(seqsrc_args);
394     
395     seq_info = 
396         s_MultiSeqInfoNew(seqsrc_args->seqloc_list, seqsrc_args->program);
397     
398     /* Initialize the BlastSeqSrc structure fields with user-defined function
399      * pointers and seq_info */
400     _BlastSeqSrcImpl_SetDeleteFnPtr(retval, &s_MultiSeqSrcFree);
401     _BlastSeqSrcImpl_SetCopyFnPtr(retval, &s_MultiSeqSrcCopy);
402     _BlastSeqSrcImpl_SetDataStructure(retval, (void*) seq_info);
403     _BlastSeqSrcImpl_SetGetNumSeqs(retval, &s_MultiSeqGetNumSeqs);
404     _BlastSeqSrcImpl_SetGetNumSeqsStats(retval, &s_MultiSeqGetNumSeqsStats);
405     _BlastSeqSrcImpl_SetGetMaxSeqLen(retval, &s_MultiSeqGetMaxLength);
406     _BlastSeqSrcImpl_SetGetAvgSeqLen(retval, &s_MultiSeqGetAvgLength);
407     _BlastSeqSrcImpl_SetGetTotLen(retval, &s_MultiSeqGetTotLen);
408     _BlastSeqSrcImpl_SetGetTotLenStats(retval, &s_MultiSeqGetTotLenStats);
409     _BlastSeqSrcImpl_SetGetName(retval, &s_MultiSeqGetName);
410     _BlastSeqSrcImpl_SetGetIsProt(retval, &s_MultiSeqGetIsProt);
411     _BlastSeqSrcImpl_SetGetSequence(retval, &s_MultiSeqGetSequence);
412     _BlastSeqSrcImpl_SetGetSeqLen(retval, &s_MultiSeqGetSeqLen);
413     _BlastSeqSrcImpl_SetIterNext(retval, &s_MultiSeqIteratorNext);
414     _BlastSeqSrcImpl_SetResetChunkIterator(retval, &s_MultiSeqResetChunkIter);
415     _BlastSeqSrcImpl_SetReleaseSequence(retval, &s_MultiSeqReleaseSequence);
416     
417     return retval;
418 }
419 
420 BlastSeqSrc*
421 MultiSeqBlastSeqSrcInit(SeqLoc* seqloc_list, EBlastProgramType program)
422 {
423     BlastSeqSrc* seq_src;
424     BlastSeqSrcNewInfo bssn_info;
425     MultiSeqSrcNewArgs* args =
426         (MultiSeqSrcNewArgs*) calloc(1, sizeof(MultiSeqSrcNewArgs));;
427     args->seqloc_list = seqloc_list;
428     args->program = program;
429     bssn_info.constructor = &s_MultiSeqSrcNew;
430     bssn_info.ctor_argument = (void*) args;
431 
432     seq_src = BlastSeqSrcNew(&bssn_info);
433     sfree(args);
434 
435     return seq_src;
436 }
437 
438 /* @} */
439 
440 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.