NCBI C Toolkit Cross Reference

C/algo/blast/api/seqsrc_readdb.c


  1 /*  $Id: seqsrc_readdb.c,v 1.59 2008/01/02 14:00:35 madden Exp $
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *               National Center for Biotechnology Information
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government have not placed any restriction on its use or reproduction.
 13 *
 14 *  Although all reasonable efforts have been taken to ensure the accuracy
 15 *  and reliability of the software and data, the NLM and the U.S.
 16 *  Government do not and cannot warrant the performance or results that
 17 *  may be obtained by using this software or data. The NLM and the U.S.
 18 *  Government disclaim all warranties, express or implied, including
 19 *  warranties of performance, merchantability or fitness for any particular
 20 *  purpose.
 21 *
 22 *  Please cite the author in any work or product based on this material.
 23 *
 24 *  Author:  Christiam Camacho
 25 * ===========================================================================*/
 26 
 27 /** @file seqsrc_readdb.c
 28  * Implementation of the BlastSeqSrc interface using readdb
 29  */
 30 
 31 #ifndef SKIP_DOXYGEN_PROCESSING
 32 static char const rcsid[] = "$Id: seqsrc_readdb.c,v 1.59 2008/01/02 14:00:35 madden Exp $";
 33 #endif /* SKIP_DOXYGEN_PROCESSING */
 34 
 35 #include <algo/blast/api/seqsrc_readdb.h>
 36 #include <algo/blast/core/blast_seqsrc_impl.h>
 37 #include <algo/blast/core/blast_def.h>
 38 #include <algo/blast/core/blast_util.h>
 39 
 40 /** @addtogroup CToolkitAlgoBlast
 41  *
 42  * @{
 43  */
 44 
 45 /** Retrieves the length of the longest sequence in the BlastSeqSrc.
 46  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
 47  * @param ignoreme Unused by this implementation [in]
 48  */
 49 static Int4 
 50 s_ReaddbGetMaxLength(void* readdb_handle, void* ignoreme)
 51 {
 52     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
 53     Int4 retval = 0;
 54 
 55     for (; rdfp; rdfp = rdfp->next)
 56         retval = MAX(retval, readdb_get_maxlen(rdfp));
 57 
 58     return retval;
 59 }
 60 
 61 /** Retrieves the number of sequences in the BlastSeqSrc.
 62  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
 63  * @param ignoreme Unused by this implementation [in]
 64  */
 65 static Int4 
 66 s_ReaddbGetNumSeqs(void* readdb_handle, void* ignoreme)
 67 {
 68    ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
 69    Int4 dbnseqs = 0;
 70    Int8 dblength = 0;
 71    
 72    readdb_get_totals_ex(rdfp, &dblength, &dbnseqs, TRUE);
 73    return dbnseqs;
 74 }
 75 
 76 /** Retrieves a number of sequences in the BlastSeqSrc
 77  * for use in calculating search space (and expect value).
 78  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
 79  * @param ignoreme Unused by this implementation [in]
 80  */
 81 static Int4
 82 s_ReaddbGetNumSeqsStats(void* readdb_handle, void* ignoreme)
 83 {
 84    ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
 85    Int4 dbnseqs = 0;
 86    Int8 dblength = 0;
 87 
 88    readdb_get_stats_numbers(rdfp, &dbnseqs, &dblength);
 89    return dbnseqs;
 90 }
 91 
 92 /** Retrieves the total length of all sequences in the BlastSeqSrc.
 93  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
 94  * @param ignoreme Unused by this implementation [in]
 95  */
 96 static Int8 
 97 s_ReaddbGetTotLen(void* readdb_handle, void* ignoreme)
 98 {
 99     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
100     Int4 dbnseqs = 0;
101     Int8 dblength = 0;
102 
103     readdb_get_totals_ex(rdfp, &dblength, &dbnseqs, TRUE);
104     return dblength;
105 }
106 
107 /** Retrieves the total length of all sequences in the BlastSeqSrc
108  * for use in calculating search space (and expect value).
109  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
110  * @param ignoreme Unused by this implementation [in]
111  */
112 static Int8
113 s_ReaddbGetTotLenStats(void* readdb_handle, void* ignoreme)
114 {
115     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
116     Int4 dbnseqs = 0;
117     Int8 dblength = 0;
118 
119     readdb_get_stats_numbers(rdfp, &dbnseqs, &dblength);
120     return dblength;
121 }
122 
123 /** Retrieves the average length of sequences in the BlastSeqSrc.
124  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
125  * @param ignoreme Unused by this implementation [in]
126  */
127 static Int4 
128 s_ReaddbGetAvgLength(void* readdb_handle, void* ignoreme)
129 {
130    Int8 total_length = s_ReaddbGetTotLen(readdb_handle, ignoreme);
131    Int4 num_seqs = MAX(1, s_ReaddbGetNumSeqs(readdb_handle, ignoreme));
132 
133    return (Int4) (total_length/num_seqs);
134 }
135 
136 /** Retrieves the name of the BLAST database.
137  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
138  * @param ignoreme Unused by this implementation [in]
139  */
140 static const char* 
141 s_ReaddbGetName(void* readdb_handle, void* ignoreme)
142 {
143     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
144 
145     return readdb_get_full_filename(rdfp);
146 }
147 
148 /** Retrieves the date of the BLAST database.
149  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
150  * @param ignoreme Unused by this implementation [in]
151  */
152 static Boolean 
153 s_ReaddbGetIsProt(void* readdb_handle, void* ignoreme)
154 {
155     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
156 
157     return readdb_is_prot(rdfp);
158 }
159 
160 /** Retrieves the sequence meeting the criteria defined by its second argument.
161  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
162  * @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
163  * @return return codes defined in blast_seqsrc.h
164  */
165 static Int2 
166 s_ReaddbGetSequence(void* readdb_handle, void* args)
167 {
168     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
169     BlastSeqSrcGetSeqArg* readdb_args = (BlastSeqSrcGetSeqArg*) args;
170     Int4 oid = -1, len = 0, buflen = 0;
171     Uint1 *buf = NULL;
172     EBlastEncoding encoding = eBlastEncodingError;
173     Boolean has_sentinel_byte;
174     Boolean buffer_allocated;
175 
176     if (!rdfp || !readdb_args)
177         return BLAST_SEQSRC_ERROR;
178 
179     oid = readdb_args->oid;
180     encoding = readdb_args->encoding;
181     has_sentinel_byte = (encoding == eBlastEncodingNucleotide);
182     buffer_allocated = 
183        (encoding == eBlastEncodingNucleotide || encoding == eBlastEncodingNcbi4na);
184 
185     /* free buffers if necessary */
186     if (readdb_args->seq)
187         BlastSequenceBlkClean(readdb_args->seq);
188 
189     /* TODO: this should be cached somewhere */
190     if (oid >= readdb_get_num_entries_total(rdfp))
191         return BLAST_SEQSRC_EOF;
192 
193     if (!buffer_allocated) 
194         len = readdb_get_sequence(rdfp, oid, &buf);
195     else
196         len = readdb_get_sequence_ex(rdfp, oid, &buf, &buflen, has_sentinel_byte);
197        
198     if (len <= 0) {
199         sfree(buf);
200         return BLAST_SEQSRC_ERROR;
201     }
202 
203     BlastSetUp_SeqBlkNew(buf, len, &readdb_args->seq, buffer_allocated);
204     /* If there is no sentinel byte, and buffer is allocated, i.e. this is
205        the traceback stage of a translated search, set "sequence" to the same 
206        position as "sequence_start". */
207     if (buffer_allocated && !has_sentinel_byte)
208        readdb_args->seq->sequence = readdb_args->seq->sequence_start;
209 
210     readdb_args->seq->oid = oid;
211 
212     return BLAST_SEQSRC_SUCCESS;
213 }
214 
215 /** Deallocates uncompressed sequence buffer, obtained by ReaddbGetSequence.
216  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
217  * @param args Pointer to BlastSeqSrcGetSeqArg structure [in]
218  */
219 static void
220 s_ReaddbReleaseSequence(void* readdb_handle, void* args)
221 {
222     BlastSeqSrcGetSeqArg* readdb_args = (BlastSeqSrcGetSeqArg*) args;
223     ASSERT(readdb_args);
224     BlastSequenceBlkClean(readdb_args->seq);
225 }
226 
227 /** Retrieve length of a given database sequence.
228  * @param readdb_handle Pointer to initialized ReadDBFILEPtr structure [in]
229  * @param args Pointer to integer indicating ordinal id [in]
230  * @return Length of the database sequence or BLAST_SEQSRC_ERROR.
231  */
232 static Int4 
233 s_ReaddbGetSeqLen(void* readdb_handle, void* args)
234 {
235     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
236     Int4* oid = (Int4*) args;
237 
238     if (!rdfp || !oid)
239        return BLAST_SEQSRC_ERROR;
240 
241     return readdb_get_sequence_length(rdfp, *oid);
242 }
243 
244 #ifdef KAPPA_PRINT_DIAGNOSTICS
245 
246 static Blast_GiList*
247 s_ReaddbGetGis(void* readdb_handle, void* args)
248 {
249     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
250     Int4* oid = (Int4*) args;
251     Blast_GiList* retval = NULL;
252     Uint4 header = 0;
253     SeqId* sip = NULL;
254 
255     if (!rdfp || !oid)
256        return NULL;
257 
258     retval = Blast_GiListNew();
259 
260     while (readdb_get_header(rdfp, *oid, &header, &sip, NULL)) {
261         int gi = -1;
262         SeqId* best_id = SeqIdFindBest(sip, SEQID_GI);
263         if ( !best_id ) {
264             sip = SeqIdSetFree(sip);
265             continue;
266         }
267         gi = best_id->data.intvalue;
268         Blast_GiList_Append(retval, gi);
269         sip = SeqIdSetFree(sip);
270     }
271 
272     return retval;
273 }
274 
275 #endif /* KAPPA_PRINT_DIAGNOSTICS */
276 
277 /** Mutex for retrieving ordinal id chunks from ReadDB in a multi-threaded
278  * search.
279  */
280 static TNlmMutex ReaddbMutex;
281 
282 
283 /** Retrieve next chunk of ordinal ids from a ReadDBFILE structure, in case
284  * it contains an oidlist.
285  * NB: this function is not MT-safe: ReaddbMutex must be locked/unlocked around
286  * any call to this function.
287  * @param rdfp List of ReadDBFILE structures [in]
288  * @param itr BLAST sequence source iterator [in]
289  * @param last_oid_assigned Last ordinal id processed [in] [out]
290  * @return Status
291  */ 
292 static Int2
293 s_ReadDbGetNextOidListChunk(ReadDBFILEPtr rdfp, BlastSeqSrcIterator* itr,
294                           Uint4* last_oid_assigned)
295      
296 {
297    Int2 status = BLAST_SEQSRC_SUCCESS;
298    OIDListPtr oidlist;
299    Uint4  gi_start, gi_end;
300    Int4* id_list;
301    Uint4 oidindex  = 0;
302 
303    if (!itr || !last_oid_assigned)
304       return BLAST_SEQSRC_ERROR;
305 
306    for ( ; rdfp; rdfp = rdfp->next) {
307       oidlist = rdfp->oidlist;
308    
309       /* If there is no OID list, go to the next readdb structure. */
310       if (!oidlist)
311          continue;
312 
313       gi_start = MAX(*last_oid_assigned, (Uint4)rdfp->start) - rdfp->start;
314       gi_end = (Uint4)oidlist->total + 1;
315       id_list = itr->oid_list;
316 
317       if (gi_start < gi_end) {
318          Uint4 bit_start = gi_start % MASK_WORD_SIZE;
319          Uint4 gi;
320 
321          for(gi = gi_start; (gi < gi_end) && (oidindex < itr->chunk_sz);) {
322             Int4 bit_end = ((gi_end - gi + bit_start) < MASK_WORD_SIZE) ?
323                (gi_end - gi + bit_start) : MASK_WORD_SIZE;
324             Int4 bit;
325             
326             Uint4 mask_index = gi / MASK_WORD_SIZE;
327             Uint4 mask_word  = Nlm_SwapUint4(oidlist->list[mask_index]);
328             
329             if ( mask_word ) {
330                for(bit = bit_start; bit<bit_end && oidindex<itr->chunk_sz; bit++) {
331                   Uint4 bitshift = (MASK_WORD_SIZE-1)-bit;
332                   
333                   if ((mask_word >> bitshift) & 1) {
334                      id_list[ oidindex++ ] = rdfp->start + (gi - bit_start) + bit;
335                   }
336                }
337                gi += bit - bit_start;
338             } else {
339                gi += bit_end - bit_start;
340             }
341             
342             bit_start = 0;
343          }
344 
345          if (oidindex == itr->chunk_sz || (oidindex && !rdfp->next)) {
346             itr->itr_type = eOidList;
347             itr->current_pos = 0;
348             *last_oid_assigned = rdfp->start + gi;
349             itr->chunk_sz = oidindex;
350             break;
351          }
352       } /* End if (gi_start < gi_end) */
353    } /* End loop over ReadDBFILE's */
354    
355    if (!rdfp) 
356       status = BLAST_SEQSRC_EOF;
357 
358    return status;
359 }
360 
361 /** Assigns next chunk of ordinal ids from the database to the sequence source
362  * iterator.
363  * @param readdb_handle Pointer to the ReadDBFILE structure [in]
364  * @param itr Iterator over the sequence source associated with the 
365  *            readdb_handle, requesting the next chunk of the 
366  *            database.  [in] [out]
367  * @return Status.
368  */
369 static Int2 
370 s_ReaddbGetNextChunk(void* readdb_handle, BlastSeqSrcIterator* itr)
371 {
372     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
373     ReadDBFILEPtr rdfp_head = rdfp;
374     unsigned int nseqs = 0;
375     Uint4 current_oid;
376     Int2 status = BLAST_SEQSRC_SUCCESS;
377     Uint4 real_readdb_entries;
378 
379     if (!rdfp || !itr)
380         return BLAST_SEQSRC_ERROR;
381 
382     real_readdb_entries = readdb_get_num_entries_total_real(rdfp);
383     
384     /* Lock the mutex before retrieving the next chunk */
385     NlmMutexLockEx(&ReaddbMutex);
386     ASSERT(rdfp->shared_info);
387 
388     current_oid = rdfp->shared_info->last_oid_assigned;
389 
390     if (current_oid < (unsigned int) rdfp->start)
391        current_oid = (unsigned int) rdfp->start;
392     
393     for ( ; rdfp && !rdfp->oidlist; rdfp = rdfp->next) {
394        if (rdfp->stop > 0) {
395           nseqs = rdfp->stop + 1;
396        } else if (rdfp->aliasnseq) {
397           nseqs = rdfp->aliasnseq;
398        } else {
399           nseqs = rdfp->num_seqs;
400        }
401        
402        if (current_oid < nseqs)
403           break;
404     }
405 
406     if (!rdfp) {
407        status = BLAST_SEQSRC_EOF;
408     } else if (!rdfp->oidlist) {
409        itr->itr_type = eOidRange;
410        itr->current_pos = itr->oid_range[0] = current_oid;
411        itr->oid_range[1] = MIN(current_oid + itr->chunk_sz, nseqs);
412        rdfp_head->shared_info->last_oid_assigned = itr->oid_range[1];
413     } else {
414        status = s_ReadDbGetNextOidListChunk(rdfp, itr, 
415                    &rdfp_head->shared_info->last_oid_assigned);
416     }
417 
418     NlmMutexUnlock(ReaddbMutex);
419 
420     return status;
421 }
422 
423 /** Given an iterator over a BLAST database, returns the next ordinal id 
424  * to search.
425  * @param readdb_handle Pointer to the ReadDBFILE structure [in]
426  * @param itr Iterator over seqsrc. [in]
427  * @return Next ordinal id to search.
428  */
429 static Int4 
430 s_ReaddbIteratorNext(void* readdb_handle, BlastSeqSrcIterator* itr)
431 {
432     Int4 retval = BLAST_SEQSRC_EOF;
433     Int4 status = BLAST_SEQSRC_SUCCESS;
434     Uint4 last_pos = 0;
435 
436     ASSERT(readdb_handle);
437     ASSERT(itr);
438 
439     /* If iterator is uninitialized/invalid, retrieve the next chunk from the
440      * BlastSeqSrc */
441     if (itr->current_pos == UINT4_MAX) {
442         status = s_ReaddbGetNextChunk(readdb_handle, itr);
443         if (status != BLAST_SEQSRC_SUCCESS) {
444             return status;
445         }
446     }
447 
448     if (itr->itr_type == eOidRange) {
449         retval = itr->current_pos;
450         last_pos = itr->oid_range[1];
451     } else if (itr->itr_type == eOidList) {
452         retval = itr->oid_list[itr->current_pos];
453         last_pos = itr->chunk_sz;
454     } else {
455         /* Unsupported/invalid iterator type! */
456         fprintf(stderr, "Invalid iterator type: %d\n", itr->itr_type);
457         retval = BLAST_SEQSRC_ERROR;
458     }
459 
460     ++itr->current_pos;
461     if (itr->current_pos >= last_pos) {
462         itr->current_pos = UINT4_MAX;  /* invalidate internal iteration */
463     }
464 
465     return retval;
466 }
467 
468 /** Reset the ReadDBFilePtr's internal chunk "bookmark"
469  * @param readdb_handle Pointer to the ReadDBFILE structure [in]
470  */
471 static void
472 s_ReaddbResetChunkIterator(void* readdb_handle)
473 {
474     ReadDBFILEPtr rdfp = (ReadDBFILEPtr) readdb_handle;
475     ASSERT(rdfp);
476     rdfp->shared_info->last_oid_assigned = 0;
477     return;
478 }
479 
480 /** Readdb sequence source destructor: frees its internal data structure and the
481  * BlastSeqSrc structure itself.
482  * @param bssp BlastSeqSrc structure to free [in]
483  * @return NULL
484  */
485 static BlastSeqSrc* 
486 s_ReaddbSeqSrcFree(BlastSeqSrc* bssp)
487 {
488     if (!bssp) 
489         return NULL;
490     readdb_destruct((ReadDBFILEPtr)_BlastSeqSrcImpl_GetDataStructure(bssp));
491     return NULL;
492 }
493 
494 /** Readdb sequence source copier: 
495  * creates a new copy of the ReadDBFILE structure by calling readdb_attach.
496  * @param bssp BlastSeqSrc structure to copy [in]
497  * @return New BlastSeqSrc structure
498  */
499 static BlastSeqSrc* 
500 s_ReaddbSeqSrcCopy(BlastSeqSrc* bssp)
501 {
502    ReadDBFILE* rdfp = NULL;
503 
504    if (!bssp) 
505       return NULL;
506 
507    rdfp = readdb_attach((ReadDBFILEPtr)_BlastSeqSrcImpl_GetDataStructure(bssp));
508 
509    _BlastSeqSrcImpl_SetDataStructure(bssp, (void*) rdfp);
510     
511    return bssp;
512 }
513 
514 /** Initializes function pointers and data structure in a new readdb-based 
515  * BlastSeqSrc.
516  * @param retval Allocated BlastSeqSrc structure [in|out]
517  * @param rdfp ReadDBFILE structure to be used as data structure. [in]
518  */
519 static void
520 s_InitNewReaddbSeqSrc(BlastSeqSrc* retval, ReadDBFILE* rdfp)
521 {
522     ASSERT(retval);
523 
524     /* Initialize the BlastSeqSrc structure fields with user-defined function
525      * pointers and rdfp */
526     _BlastSeqSrcImpl_SetDeleteFnPtr(retval, &s_ReaddbSeqSrcFree);
527     _BlastSeqSrcImpl_SetCopyFnPtr(retval, &s_ReaddbSeqSrcCopy);
528     _BlastSeqSrcImpl_SetDataStructure(retval, (void*) rdfp);
529     _BlastSeqSrcImpl_SetGetNumSeqs(retval, &s_ReaddbGetNumSeqs);
530     _BlastSeqSrcImpl_SetGetNumSeqsStats(retval, &s_ReaddbGetNumSeqsStats);
531     _BlastSeqSrcImpl_SetGetMaxSeqLen(retval, &s_ReaddbGetMaxLength);
532     _BlastSeqSrcImpl_SetGetAvgSeqLen(retval, &s_ReaddbGetAvgLength);
533     _BlastSeqSrcImpl_SetGetTotLen(retval, &s_ReaddbGetTotLen);
534     _BlastSeqSrcImpl_SetGetTotLenStats(retval, &s_ReaddbGetTotLenStats);
535     _BlastSeqSrcImpl_SetGetName(retval, &s_ReaddbGetName);
536     _BlastSeqSrcImpl_SetGetIsProt(retval, &s_ReaddbGetIsProt);
537     _BlastSeqSrcImpl_SetGetSequence(retval, &s_ReaddbGetSequence);
538     _BlastSeqSrcImpl_SetGetSeqLen(retval, &s_ReaddbGetSeqLen);
539     _BlastSeqSrcImpl_SetIterNext(retval, &s_ReaddbIteratorNext);
540     _BlastSeqSrcImpl_SetResetChunkIterator(retval, &s_ReaddbResetChunkIterator);
541     _BlastSeqSrcImpl_SetReleaseSequence(retval, &s_ReaddbReleaseSequence);
542 #ifdef KAPPA_PRINT_DIAGNOSTICS
543     _BlastSeqSrcImpl_SetGetGis(retval, &s_ReaddbGetGis);
544 #endif /* KAPPA_PRINT_DIAGNOSTICS */
545 }
546 
547 /** Fills contents in the allocated BlastSeqSrc, given an already created
548  * ReadDBFILE structure. Uses readdb_attach to ensure that input ReadDBFILE
549  * is not freed in BlastSeqSrcFree.
550  * @param retval Allocated BlastSeqSrc to fill [in|out]
551  * @param args Pointer to a ReadDBFILE structure, cast to void to ensure 
552  *             correct signature. [in]
553  * @return Same as retval.
554  */
555 static BlastSeqSrc* 
556 s_ReaddbSeqSrcAttach(BlastSeqSrc* retval, void* args)
557 {
558     ReadDBFILE* rdfp_in = (ReadDBFILE*) args;
559     ReadDBFILE* rdfp = NULL;
560 
561     ASSERT(retval);
562 
563     if (!rdfp_in) {
564         _BlastSeqSrcImpl_SetInitErrorStr(retval, 
565          strdup("Cannot attach BlastSeqSrc to a NULL ReadDBFILE structure"));
566     } else {
567         rdfp = readdb_attach(rdfp_in);
568         if ( !rdfp ) {
569             _BlastSeqSrcImpl_SetInitErrorStr(retval, 
570              strdup("Failed to attach to existing ReadDBFILE structure"));
571         }
572     }
573 
574     s_InitNewReaddbSeqSrc(retval, rdfp);
575 
576     return retval;
577 }
578 
579 /** Encapsulates the arguments needed to initialize a BLAST database using
580  * readdb */
581 typedef struct ReaddbNewArgs {
582     char* dbname;     /**< Database name */
583     Boolean is_protein; /**< Is this database protein? */
584    Int4 first_db_seq; /**< Ordinal id of the first sequence to search */
585    Int4 final_db_seq; /**< Ordinal id of the last sequence to search */
586 } ReaddbNewArgs;
587 
588 /** Readdb sequence source constructor 
589  * @param retval BlastSeqSrc structure (already allocated) to populate [in]
590  * @param args Pointer to ReaddbNewArgs structure above [in]
591  * @return Updated bssp structure (with all function pointers initialized
592  */
593 static BlastSeqSrc* 
594 s_ReaddbSeqSrcNew(BlastSeqSrc* retval, void* args)
595 {
596     ReaddbNewArgs* rargs = (ReaddbNewArgs*) args;
597     ReadDBFILEPtr rdfp = NULL;
598 
599     ASSERT(retval);
600     ASSERT(rargs);
601 
602     if ( !rargs->dbname ) {
603         _BlastSeqSrcImpl_SetInitErrorStr(retval, 
604          strdup("Cannot initialize readdb BlastSeqSrc "
605                "with NULL database name"));
606     } else {
607         /* Initialize the rdfp */
608         if ( !(rdfp = readdb_new(rargs->dbname, rargs->is_protein))) {
609             char buf[1024];
610             snprintf(buf, sizeof(buf), 
611                      "s_ReaddbSeqSrcNew: could not open %s %s database\n",
612                      rargs->dbname, 
613                      (rargs->is_protein ? "protein" : "nucleotide"));
614             _BlastSeqSrcImpl_SetInitErrorStr(retval, strdup(buf));
615         }
616     }
617 
618     s_InitNewReaddbSeqSrc(retval, rdfp);
619 
620     /* Set the range, if it is specified */
621     if (rargs->first_db_seq > 0) {
622        while (rdfp && rdfp->stop < rargs->first_db_seq) {
623           /* Make this rdfp's range empty */
624           rdfp->start = rdfp->stop + 1;
625           rdfp = rdfp->next;
626        }
627        rdfp->start = rargs->first_db_seq;
628     }
629     if (rargs->final_db_seq > 0) {
630        while (rdfp && rdfp->stop < rargs->final_db_seq)
631           rdfp = rdfp->next;
632        /* Set last sequence for this and all subsequent rdfp's to the one
633           in the arguments, making the subsequent rdfp's ranges empty. 
634           Note that final_db_seq in arguments is 1 beyond the last sequence
635           number to search. */
636        for ( ; rdfp; rdfp = rdfp->next)
637           rdfp->stop = rargs->final_db_seq - 1;
638     }
639 
640     return retval;
641 }
642 
643 BlastSeqSrc* 
644 ReaddbBlastSeqSrcInit(const char* dbname, Boolean is_prot, int first_seq, 
645                       int last_seq)
646 {
647     BlastSeqSrcNewInfo bssn_info;
648     BlastSeqSrc* seq_src = NULL;
649     ReaddbNewArgs readdb_args;
650 
651     readdb_args.dbname = (char*) dbname;
652     readdb_args.is_protein = is_prot;
653     readdb_args.first_db_seq = first_seq;
654     readdb_args.final_db_seq = last_seq;
655     bssn_info.constructor = &s_ReaddbSeqSrcNew;
656     bssn_info.ctor_argument = (void*) &readdb_args;
657 
658     seq_src = BlastSeqSrcNew(&bssn_info);
659     return seq_src;
660 }
661 
662 BlastSeqSrc*
663 ReaddbBlastSeqSrcAttach(ReadDBFILE* rdfp)
664 {
665     BlastSeqSrcNewInfo bssn_info;
666     BlastSeqSrc* seq_src = NULL;
667 
668     bssn_info.constructor = &s_ReaddbSeqSrcAttach;
669     bssn_info.ctor_argument = (void*) rdfp;
670 
671     seq_src = BlastSeqSrcNew(&bssn_info);
672     return seq_src;
673 }
674 
675 /* @} */
676 
677 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.