NCBI C Toolkit Cross Reference

C/demo/cdscan.c


  1 /*   cdscan.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  cdscan.c
 27 *
 28 * Author:  Kans, Schuler, Ostell
 29 *
 30 * Version Creation Date:   2/26/95
 31 *
 32 * $Revision: 6.1 $
 33 *
 34 * File Description: 
 35 *       scans through sequence records on the Entrez discs
 36 *       This program is meant to serve as a model for programs that scan
 37 *       all the files on the Entrez CDROM disks. Basically it does some
 38 *       setup in the main routine to find the disks and get a list of files
 39 *       on them for nucleic acid or protein sequences. It then goes through
 40 *       each file and reads each Seq-entry from them. On the Entrez CDROMs
 41 *       the Seq-entries are Huffman compressed, so the Casn calls decompress
 42 *       them for you. Once you have a Seq-entry, you can do whatever you
 43 *       want with it. In this program we give the options of printing as
 44 *       FASTA file or as GenBank or GenPept file. You can modify this part
 45 *       to do whatever you want. The function that receives the SeqEntry is
 46 *       called "ProcessSeqEntry".
 47 *
 48 *   This function is passed one SeqEntry at a time by the CDROM scanning
 49 *     Routines. Depending on the global variables it will call routines to
 50 *     make fasta, genbank format, or to call a custom routine. The default
 51 *     custom routine (CustomRoutine) just prints the SeqIds of the sequences
 52 *     to the outputfile and to the progress monitor.
 53 *
 54 *   At the end of the file are some custom routines written for various
 55 *     purposes that could be substituted or modified for other purposes.
 56 *     To activate, call them instead of "CustomRoutine"
 57 *
 58 *      The first locates all GenBank entries and prints out a short summary
 59 *      of their citations. It is called GenBankPubs()
 60 *
 61 *      The second prints the sequence of all CdRegion features in the entry.
 62 *      It is called SeqEntryToFeat()
 63 *
 64 *
 65 *       Before trying any of this, be sure you have installed the Entrez
 66 *       application itself and ensured that it works. This program uses the
 67 *       the same configuration file and will not run if Entrez has not been
 68 *       properly installed.
 69 *
 70 * Modifications:  
 71 * --------------------------------------------------------------------------
 72 * Date     Name        Description of modification
 73 * -------  ----------  -----------------------------------------------------
 74 *
 75 *
 76 * ==========================================================================
 77 */
 78 
 79 #ifndef _NEW_CdEntrez_
 80 #define _NEW_CdEntrez_
 81 #endif
 82 
 83 #include <ncbi.h>
 84 #include <casn.h>
 85 #include <accentr.h>
 86 #include <cdromlib.h>
 87 #include <seqport.h>
 88 #include <asn2ff.h>
 89 #include <tofasta.h>
 90 
 91 /*****************************************************************************
 92 *
 93 *   structs used by main routines
 94 *
 95 *****************************************************************************/
 96 
 97 typedef struct filelist {
 98   Int2                  cdnum;
 99   CharPtr               fdir;
100   CharPtr               fname;
101   struct filelist PNTR  next;
102 } FileList, PNTR FileListPtr;
103 
104 /*****************************************************************************
105 *
106 *   Function prototypes for routines in this module
107 *
108 *****************************************************************************/
109 static Boolean LIBCALLBACK EnumerateFiles PROTO((int cdnum, const char *fdir,
110                                            const char *fname, long fsize,
111                                            void *opaque_data));
112 
113 static FileListPtr FileListNew PROTO((FileListPtr flp, Int2 cdnum,
114                                 CharPtr fdir, CharPtr fname));
115 
116 static void ProcessFileList PROTO((FileListPtr flp, CharPtr outputfile));
117 
118 static void ProcessFile PROTO((FileListPtr flp, CharPtr root, CharPtr outputfile));
119 
120 static void ProcessSeqEntry PROTO((SeqEntryPtr sep, FILE *fp));
121 
122 static void CustomRoutine PROTO((SeqEntryPtr sep, FILE * fp));
123 
124 static void PrintIdDefLine PROTO((SeqEntryPtr sep, Pointer data,
125                          Int4 index, Int2 indent));
126 
127 /*****************************************************************************
128 *
129 *   Static Data used by the main routines
130 *
131 *****************************************************************************/
132 
133 static Char root [PATH_MAX];
134 static EntrezInfoPtr eip;
135 static EntrezDivInfo *div_info;
136 
137 static Int2 format;    /* 1 = GenBank, 2 = FASTA */
138 static Boolean is_na,  /* TRUE = nucleic acids, FALSE = proteins */
139         is_custom;  /* call custom process instead of std ones */
140 static MonitorPtr pmon = NULL;   /* progress monitor */
141 
142 #define NUMARGS 5
143 
144 Args myargs [NUMARGS] = {
145   {"Scan DNA (1) or Protein (2)", "1", "1", "2", FALSE, 's', ARG_INT, 0.0, 0, NULL},
146   {"Output format: GenBank (1) or FASTA (2)", "1", "1", "2", TRUE, 'f', ARG_INT, 0.0, 0, NULL},
147   {"Call custom process", "F", NULL, NULL, TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
148   {"Show progress monitor", "F", NULL, NULL, TRUE, 'p', ARG_BOOLEAN, 0.0, 0, NULL},
149   {"Output File", "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
150 };
151 
152 /*****************************************************************************
153 *
154 *   This is the main program.
155 *     It reads command line or initial dialogue arguments
156 *     It initializes parse trees, entrez
157 *     It makes a list of the relevant files to search
158 *     It then calls ProcessFileList to open each file and process it
159 *
160 *****************************************************************************/
161 Int2 Main (void)
162 {
163   Char         div [8];
164   FileListPtr  flp = NULL;
165   FILE         *fp;
166   Int2         i;
167   Boolean      is_network, did_init=FALSE;
168   FileListPtr  next;
169   CharPtr      outputfile;
170 
171   if (! GetArgs ("CdScan", NUMARGS, myargs))   /* get input args */
172         goto ret;
173 
174                                                                                            /* process input args */
175   if (myargs[0].intvalue == 1)
176         is_na = TRUE;      /* scan nucleic acids */
177   else
178         is_na = FALSE;     /* scan proteins */
179   format = (Int2)(myargs[1].intvalue);
180   is_custom = (Boolean)(myargs[2].intvalue);
181   if (myargs[3].intvalue)   /* show progress */
182   {
183         pmon = MonitorStrNew("CdScan", 40);
184   }
185   outputfile = myargs[4].strvalue;   /* output file name */
186 
187   if (pmon != NULL)
188         MonitorStrValue(pmon, "Reading Parse Trees");
189 
190   if (! SeqEntryLoad() || ! SubmitAsnLoad())   /* read ASN.1 parse trees */
191   {
192         Message(MSG_ERROR, "Can't open parse trees");
193         goto ret;
194   }
195 
196   if (! PrintTemplateSetLoad ("asn2ff.prt"))
197   {
198     Message(MSG_ERROR, "Can't load print templates");
199         goto ret;
200   }
201 
202   if (pmon != NULL)
203         MonitorStrValue(pmon, "Initializing Entrez");
204 
205   if (! EntrezInit ("cdscan", FALSE, &is_network)) /* init Entrez */
206   {
207         Message(MSG_ERROR, "Can't initialize Entrez");
208         goto ret;
209   }
210 
211   did_init = TRUE;
212   if (is_network)
213   {
214       Message (MSG_ERROR, "Network service does not allow scanning");
215         goto ret;
216   }
217 
218   if (pmon != NULL)
219         MonitorStrValue(pmon, "Building File List");
220 
221   eip = EntrezGetInfo ();                        /* set up the file lists */
222   if ((eip == NULL) || (eip->div_info == NULL))
223   {
224         Message(MSG_ERROR, "Can't find Entrez file info");
225         goto ret;
226   }
227 
228 
229   flp = FileListNew (NULL, INT2_MIN, NULL, NULL);
230   if (flp == NULL)
231   {
232         Message(MSG_ERROR, "Can't allocate file list");
233         goto ret;
234   }
235 
236   div_info = eip->div_info;
237   for (i = 0; i < eip->div_count; i++)
238   {
239         StringNCpy (div, div_info [i].tag, sizeof (div) - 1);
240         if (! is_na)
241         {
242         CdEnumFiles (CdDir_rec, TYP_AA, div, EnumerateFiles, &flp);
243       }
244         else
245         {
246         CdEnumFiles (CdDir_rec, TYP_NT, div, EnumerateFiles, &flp);
247         }
248   }
249 
250   fp = FileOpen (outputfile, "w");   /* test that we can open output file */
251   if (fp == NULL)
252   {
253         Message(MSG_ERROR, "Can't open [%s]", outputfile);
254         goto ret;
255   }
256 
257   FileClose (fp);     /* will be reopened for each input file */
258 
259   ProcessFileList (flp, outputfile);              /* process the file list */
260 
261 ret:                                                         /* clean up */
262 
263   if (pmon != NULL)     /* close the progress monitor */
264         MonitorFree(pmon);
265 
266   if (did_init)
267           EntrezFini();         /* close entrez */
268 
269   while (flp != NULL)       /* free file list */
270   {
271     next = flp->next;
272     MemFree (flp->fdir);
273     MemFree (flp->fname);
274     MemFree (flp);
275     flp = next;
276   }
277 
278   return 0;
279 }
280 
281 /*****************************************************************************
282 *
283 *   ProcessSeqEntry (sep, fp)
284 *
285 *
286 *   This function is passed one SeqEntry at a time by the CDROM scanning
287 *     Routines. Depending on the global variables it will call routines to
288 *     make fasta, genbank format, or to call a custom routine. The default
289 *     custom routine (CustomRoutine) just prints the SeqIds of the sequences
290 *     to the outputfile and to the progress monitor.
291 *
292 *   At the end of the file are some custom routines written for various
293 *     purposes that could be substituted or modified for other purposes.
294 *     To active, call them instead of "CustomRoutine"
295 *
296 *****************************************************************************/
297 static void ProcessSeqEntry (SeqEntryPtr sep, FILE *fp)
298 
299 {
300   Uint1 fmt;
301 
302   if ((sep == NULL) || (fp == NULL))
303         return;
304 
305   if (is_custom)
306         CustomRoutine(sep, fp);    /* this is the one you modify */
307   else if (format == 1)   /* genbank format */
308   {
309         if (is_na)                                /* defined in asn2ff.h */
310                 fmt = GENBANK_FMT;
311         else
312                 fmt = GENPEPT_FMT;
313         SeqEntryToFlat(sep, fp, fmt, RELEASE_MODE);   /* dump like entrez does */
314   }  
315   else if (format == 2)   /* fasta format */
316         SeqEntryToFasta(sep, fp, is_na);   /* defined in tofasta.h */
317 
318   return;
319 }
320 
321 /*****************************************************************************
322 *
323 *   CustomRoutine (sep, fp)
324 *     This is just a little model of a customized routine
325 *     Normally you would replace this with one of your own design
326 *     Some examples follow below. In this routine, it prints the
327 *     SeqId and definition line of each entry it finds using SeqEntryExplore.
328 *
329 *****************************************************************************/
330 static void CustomRoutine (SeqEntryPtr sep, FILE * fp)
331 {
332 
333         SeqEntryExplore(sep, (Pointer)(fp), PrintIdDefLine);
334 }
335 
336 
337 /*****************************************************************************
338 *
339 *   PrintIdDefLine
340 *     SeqEntryExplore callback routine that prints the seqids and definition
341 *       lines.
342 *
343 *****************************************************************************/
344 static void PrintIdDefLine (SeqEntryPtr sep, Pointer data,
345                          Int4 index, Int2 indent)
346 
347 {
348   BioseqPtr     bsp;
349   FILE * fp;
350   Char buf[40];
351   CharPtr title = NULL;
352 
353   if (IS_Bioseq (sep)) {
354         *buf = '\0';
355     bsp = (BioseqPtr) sep->data.ptrvalue;
356     fp = (FILE *) data;
357         title = BioseqGetTitle(bsp);   /* this does not deal with all cases */
358         SeqIdPrint(bsp->id, buf, PRINTID_FASTA_LONG);  /* print SeqId */
359         if (pmon != NULL)
360                 MonitorStrValue(pmon, buf);
361         fprintf(fp, ">%s", buf);
362         if (title != NULL)
363                 fprintf(fp, " %s", title);
364         fprintf(fp, "\n");
365   }
366   return;
367 }
368 
369 
370 /*****************************************************************************
371 *
372 *   Other possible Custom routines follow below.
373 *      The first locates all GenBank entries and prints out a short summary
374 *      of their citations. It is called GenBankPubs
375 *
376 *      The second prints the sequence of all CdRegion features in the entry.
377 *      It is called SeqEntryToFeat()
378 *
379 *****************************************************************************/
380 
381 
382 /*****************************************************************************
383 *
384 *   GenBankPubs(sep, fp)
385 *     This set of routines finds GenBank entries and prints a summary of
386 *     their citations
387 *
388 *****************************************************************************/
389 static void GetPubFromGenBank PROTO((SeqEntryPtr sep, Pointer data,
390                          Int4 index, Int2 indent));
391 
392 static void GenBankPubs (SeqEntryPtr sep, FILE * fp)
393 {
394 
395         SeqEntryExplore(sep, (Pointer)(fp), GetPubFromGenBank);
396 }
397 
398 
399 static void PubWrite(CharPtr accession, ValNodePtr vnp, FILE * fp)
400 {
401         CitArtPtr cap = NULL;
402         CitJourPtr cjp;
403         ValNodePtr tvnp;
404         ImprintPtr ip;
405         Int2 year = 0;
406         CharPtr jta = NULL,
407                 volume,
408                 pages;
409         Char buf[250];
410 
411         switch (vnp->choice)
412         {
413                 case PUB_Equiv:
414                         for (tvnp = (ValNodePtr)(vnp->data.ptrvalue); tvnp != NULL;
415                                 tvnp = tvnp->next)
416                         {
417                                 PubWrite(accession, tvnp, fp);
418                         }
419                         break;
420                 case PUB_Article:
421                         cap = (CitArtPtr)(vnp->data.ptrvalue);
422                         if (cap->from == 1)  /* from a journal */
423                         {
424                                 cjp = (CitJourPtr)(cap->fromptr);
425                                 ip = cjp->imp;
426                                 for (tvnp = cjp->title; tvnp != NULL; tvnp = tvnp->next)
427                                 {
428                                         switch (tvnp->choice)
429                                         {
430                                                 case Cit_title_jta:
431                                                 case Cit_title_iso_jta:
432                                                 case Cit_title_ml_jta:
433                                                         jta = (CharPtr)(tvnp->data.ptrvalue);
434                                                         break;
435                                                 default:
436                                                         break;
437                                         }
438                                         if (jta != NULL) break;
439                                 }
440 
441                                 if (ip->date->data[0] == 1)  /* std date */
442                                         year = (Int2)ip->date->data[1] + 1900;
443                                 else
444                                         year = 0;
445                                 volume = ip->volume;
446                                 if (volume == NULL)
447                                         volume = "(no volume)";
448                                 pages = ip->pages;
449                                 if (pages == NULL)
450                                         pages = "(no pages)";
451                                 if (jta == NULL)
452                                         jta = "(no jta)";
453                                 sprintf(buf, "%s - %s (%d) %s:%s", accession, jta, (int)year,
454                                                 volume, pages);
455                         fprintf(fp, "%s\n", buf);
456                         if (pmon != NULL)
457                                 MonitorStrValue(pmon, buf);
458                         }
459                         break;
460                 default:
461                         break;
462 
463         }
464         return;
465 }
466 
467 static void GetPubFromGenBank (SeqEntryPtr sep, Pointer data,
468                          Int4 index, Int2 indent)
469 
470 {
471   BioseqPtr     bsp;
472   BioseqContextPtr bcp;
473   SeqFeatPtr sfp;
474   ValNodePtr vnp;
475   FILE * fp;
476   CharPtr accession = NULL;
477   ValNode tpub;
478   TextSeqIdPtr tsip;
479   PubdescPtr pdp;
480   Char buf[40];
481 
482   if (! IS_Bioseq(sep))
483         return;
484 
485   fp = (FILE *) data;   /* get the output file pointer */
486         
487   bsp = (BioseqPtr) sep->data.ptrvalue;
488   if (pmon != NULL)
489   {
490         *buf = '\0';
491         SeqIdPrint(bsp->id, buf, PRINTID_FASTA_LONG);
492         MonitorStrValue(pmon, buf);
493   }
494   for (vnp = bsp->id; ((vnp != NULL) && (accession == NULL)); vnp = vnp->next)
495   {
496         switch (vnp->choice)
497         {
498                 case SEQID_GENBANK:
499                 case SEQID_EMBL:
500                 case SEQID_DDBJ:
501                         tsip = (TextSeqIdPtr)(vnp->data.ptrvalue);
502                         if (tsip->accession != NULL)
503                                 accession = tsip->accession;
504                         break;
505                 default:
506                         break;
507         }
508   }
509   
510   if (accession == NULL) return;
511 
512   bcp = BioseqContextNew(bsp);
513   if (bcp == NULL)
514         return;
515 
516   vnp = NULL;
517   tpub.choice = PUB_Equiv;
518   tpub.next = NULL;
519                                  /* get any pub descriptors */
520 
521   while ((vnp = BioseqContextGetSeqDescr(bcp, (Int2)Seq_descr_pub, vnp, NULL)) != NULL)
522   {
523         pdp = (PubdescPtr)(vnp->data.ptrvalue);   /* it's a Pubdesc */
524         tpub.data.ptrvalue = pdp->pub;                    /* make Pub-equiv into a Pub */
525         PubWrite(accession, &tpub, fp);
526         
527   }
528 
529   sfp = NULL;
530   while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
531   {
532         if (sfp->data.choice == 6)   /* a pub feature */
533         {
534                 pdp = (PubdescPtr)(sfp->data.value.ptrvalue);
535                 tpub.data.ptrvalue = pdp->pub;
536                 PubWrite(accession, &tpub, fp);
537         }
538                                                                  /* get any feature citations */
539         if (sfp->cit != NULL)
540         {
541                 for (vnp = sfp->cit->data.ptrvalue; vnp != NULL; vnp = vnp->next)
542                         PubWrite(accession, vnp, fp);
543         }
544         
545   }
546 
547   BioseqContextFree(bcp);
548 
549   return;
550 
551 }
552 
553 
554 /*****************************************************************************
555 *
556 *   SeqEntryToFeat()
557 *     This set of routines finds features (in this case CdRegion) and prints
558 *     the part of the sequence that they cover
559 *
560 *****************************************************************************/
561 
562 
563 #define CHARSPERLINE  50
564 
565 typedef struct expstruct {
566   FILE      *fp;          /* file to write sequence to */
567   AsnIoPtr  aip;
568   Boolean   is_na;        /* target sequence nucleic acid? */
569   Uint1     feat;     /* type of feature to find */
570 } ExpStruct, PNTR ExpStructPtr;
571 
572 static void PrintSequence PROTO((BioseqPtr bsp, SeqFeatPtr sfp,
573                            FILE *fp, Boolean is_na));
574 static void LIBCALLBACK GetSeqFeat PROTO((AsnExpOptStructPtr aeosp));
575 
576 static void SeqEntryToFeat (SeqEntryPtr sep, FILE *fp)
577 
578 {
579   AsnExpOptPtr  aeop;
580   AsnIoPtr      aip;
581   ExpStructPtr  esp;
582 
583   if (sep != NULL && fp != NULL) {
584     esp = MemNew (sizeof (ExpStruct));
585     if (esp != NULL) {
586       aip = AsnIoNullOpen ();
587       if (aip != NULL) {
588         esp->fp = fp;
589         esp->aip = AsnIoNew (ASNIO_TEXT_OUT, fp, NULL, NULL, NULL);
590         esp->is_na = is_na;
591         esp->feat = 3;  /* look for CdRegion SeqFeat */
592         aeop = AsnExpOptNew (aip, "Seq-feat", (Pointer) esp, GetSeqFeat);
593         if (aeop != NULL) {
594           SeqEntryAsnWrite (sep, aip, NULL);
595           fflush (fp);
596           AsnExpOptFree (aip, aeop);
597         }
598         AsnIoClose (aip);
599       }
600       MemFree (esp);
601     }
602   }
603 }
604 
605 
606 static void PrintSequence (BioseqPtr bsp, SeqFeatPtr sfp,
607                            FILE *fp, Boolean is_na)
608 
609 {
610   Char        buffer [255];
611   Uint1       code;
612   Int2        count;
613   Uint1       repr;
614   Uint1       residue;
615   SeqPortPtr  spp;
616   CharPtr     title;
617   CharPtr     tmp;
618 
619   if (bsp != NULL && fp != NULL) {
620     if ((Boolean) ISA_na (bsp->mol) == is_na) {
621       repr = Bioseq_repr (bsp);
622       if (repr == Seq_repr_raw || repr == Seq_repr_const) {
623         title = BioseqGetTitle (bsp);
624         tmp = StringMove (buffer, ">");
625         tmp = SeqIdPrint (bsp->id, tmp, PRINTID_FASTA_LONG);
626         tmp = StringMove (tmp, " ");
627         StringNCpy (tmp, title, 200);
628         fprintf (fp, "%s\n", buffer);
629                 if (pmon != NULL)
630                         MonitorStrValue(pmon, buffer);
631         if (is_na) {
632           code = Seq_code_iupacna;
633         } else {
634           code = Seq_code_iupacaa;
635         }
636         if (sfp != NULL) {
637           spp = SeqPortNewByLoc (sfp->location, code);
638         } else {
639           spp = SeqPortNew (bsp, 0, -1, 0, code);
640         }
641         if (spp != NULL) {
642           count = 0;
643           while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) {
644             if (! IS_residue (residue)) {
645               buffer [count] = '\0';
646               fprintf (fp, "%s\n", buffer);
647               count = 0;
648               switch (residue) {
649                 case SEQPORT_VIRT :
650                   fprintf (fp, "[Gap]\n");
651                   break;
652                 case SEQPORT_EOS :
653                   fprintf (fp, "[EOS]\n");
654                   break;
655                 default :
656                   fprintf (fp, "[Invalid Residue]\n");
657                   break;
658               }
659             } else {
660               buffer [count] = residue;
661               count++;
662               if (count >= CHARSPERLINE) {
663                 buffer [count] = '\0';
664                 fprintf (fp, "%s\n", buffer);
665                 count = 0;
666               }
667             }
668           }
669           if (count != 0) {
670             buffer [count] = '\0';
671             fprintf (fp, "%s\n", buffer);
672           }
673           SeqPortFree (spp);
674         }
675       }
676     }
677   }
678 }
679 
680 static void LIBCALLBACK GetSeqFeat (AsnExpOptStructPtr aeosp)
681 
682 {
683   BioseqPtr     bsp;
684   ExpStructPtr  esp;
685   SeqFeatPtr    sfp;
686 
687   if (aeosp->dvp->intvalue == START_STRUCT) {
688     esp = (ExpStructPtr) aeosp->data;
689     sfp = (SeqFeatPtr) aeosp->the_struct;
690     if (esp != NULL && esp->fp != NULL && sfp != NULL &&
691         sfp->data.choice == esp->feat) {
692       bsp = BioseqFind (SeqLocId (sfp->location));
693       if (bsp != NULL) {
694         PrintSequence (bsp, sfp, esp->fp, esp->is_na);
695       }
696     }
697   }
698 }
699 
700 /*****************************************************************************
701 *
702 *   These are the rest of the utility routines for reading the CDROM.
703 *
704 *****************************************************************************/
705 
706 
707 /*****************************************************************************
708 *
709 *   opens a file and reads SeqEntrys
710 *   calls ProcessSeqEntry to do the actual work on it
711 *
712 *****************************************************************************/
713 static void ProcessFile (FileListPtr flp, CharPtr root, CharPtr outputfile)
714 {
715   CASN_Handle  casnh;
716   FILE         *fp;
717   Char         path [PATH_MAX];
718   SeqEntryPtr  sep;
719   CASN_Type    type;
720   Char buf[40];
721   Int4 ctr = 0;
722 
723   if (flp != NULL) {
724     fp = FileOpen (outputfile, "a");
725     if (fp != NULL) {
726           if (pmon != NULL)
727           {
728                 sprintf(path, "Opening [%s]", flp->fname);
729                 MonitorStrValue(pmon, path);
730           }
731       StringCpy (path, root);
732       FileBuildPath (path, flp->fdir, NULL);
733       FileBuildPath (path, NULL, flp->fname);
734       if ((casnh = CASN_Open (path)) != NULL) {
735         if (! is_na) {
736           type = CASN_Type_aa;
737         } else {
738           type = CASN_Type_nt;
739         }
740         if (CASN_DocType (casnh) == type) {
741           while ((sep = CASN_NextSeqEntry (casnh)) != NULL) {
742             if (pmon != NULL)
743             {
744                 ctr++;
745                 sprintf(buf, "Processing %s Entry %ld", flp->fname,(long)ctr);
746                 MonitorStrValue(pmon, buf);
747             }
748             ProcessSeqEntry (sep, fp);
749             SeqEntryFree (sep);
750           }
751         }
752         CASN_Close (casnh);
753       }
754           else
755                 Message(MSG_ERROR, "Can't open [%s]", path);
756       FileClose (fp);
757     } else {
758       Message (MSG_FATAL, "Unable to reopen output file [%s]", outputfile);
759     }
760   }
761 }
762 
763 
764 /*****************************************************************************
765 *
766 *  Mounts the appropriate cdrom
767 *  Calls ProcessFile to Open and read through the file
768 *
769 *****************************************************************************/
770 static void ProcessFileList (FileListPtr flp, CharPtr outputfile)
771 
772 {
773   Int2         device;
774   FileListPtr  next;
775   Char         root [PATH_MAX];
776 
777   if (flp != NULL) {
778     root [0] = '\0';
779     device = flp->cdnum;
780     flp = flp->next;
781     while (flp != NULL) {
782       next = flp->next;
783       if (device != flp->cdnum) {
784         if (! CdMountEntrezVolume (flp->cdnum, root, sizeof (root))) {
785           Message (MSG_FATAL, "CdMountEntrezVolume failed");
786           root [0] = '\0';
787         }
788       }
789       ProcessFile (flp, root, outputfile);
790       device = flp->cdnum;
791       flp = next;
792     }
793   }
794 }
795 
796 /*****************************************************************************
797 *
798 *   Add a new file list element
799 *
800 *****************************************************************************/
801 static FileListPtr FileListNew (FileListPtr flp, Int2 cdnum,
802                                 CharPtr fdir, CharPtr fname)
803 
804 {
805   FileListPtr  newnode;
806 
807   newnode = (FileListPtr) MemNew (sizeof (FileList));
808   if (newnode != NULL) {
809     if (flp != NULL) {
810       while (flp->next != NULL && flp->next->cdnum <= cdnum) {
811         flp = flp->next;
812       }
813       newnode->next = flp->next;
814       flp->next = newnode;
815     }
816     newnode->cdnum = cdnum;
817     if (fdir != NULL && *fdir != '\0') {
818       newnode->fdir = StringSave (fdir);
819     }
820     if (fname != NULL && *fname != '\0') {
821       newnode->fname = StringSave (fname);
822     }
823   }
824   return newnode;
825 }
826 
827 /*****************************************************************************
828 *
829 *   Get all appropriate files to search
830 *
831 *****************************************************************************/
832 static Boolean LIBCALLBACK EnumerateFiles (int cdnum, const char *fdir,
833                                            const char *fname, long fsize,
834                                            void *opaque_data)
835 
836 {
837   FileListPtr      flp;
838   FileListPtr PNTR head;
839 
840   head = (FileListPtr PNTR) opaque_data;
841   flp = NULL;
842   if (head != NULL) {
843     flp = FileListNew (*head, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
844     if (*head == NULL) {
845       *head = flp;
846     }
847   } else {
848     flp = FileListNew (NULL, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
849   }
850   return TRUE;
851 }
852 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.