NCBI C Toolkit Cross Reference

C/demo/asn2fast.c


  1 /*****************************************************************************
  2 *
  3 *   asn2fast.c
  4 *
  5 *****************************************************************************/
  6 #include <tofasta.h>
  7 #include <subutil.h>
  8 #include <sqnutils.h>
  9 #include <accid1.h>
 10 #include <lsqfetch.h>
 11 
 12 #define NUMARG 18
 13 Args myargs[NUMARG] = {
 14         {"Filename for asn.1 input","stdin",NULL,NULL,TRUE,'a',ARG_FILE_IN,0.0,0,NULL},
 15         {"Input is a Seq-entry","F", NULL ,NULL ,TRUE,'e',ARG_BOOLEAN,0.0,0,NULL},
 16         {"Input asnfile in binary mode","F",NULL,NULL,TRUE,'b',ARG_BOOLEAN,0.0,0,NULL},
 17         {"Output Protein Filename","fasta.aa", NULL,NULL,TRUE,'p',ARG_FILE_OUT,0.0,0,NULL},
 18         {"Output DNA Filename","fasta.na", NULL,NULL,TRUE,'n',ARG_FILE_OUT,0.0,0,NULL},
 19         {"Log errors to file named:",NULL,NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
 20         {"Combine segmented or delta sequences","F",NULL,NULL,TRUE,'c',ARG_BOOLEAN,0.0,0,NULL},
 21         {"Produce Protein File","T",NULL,NULL,TRUE,'x',ARG_BOOLEAN,0.0,0,NULL},
 22         {"Produce DNA File","T",NULL,NULL,TRUE,'d',ARG_BOOLEAN,0.0,0,NULL},
 23         {"Limit to GenBank","F",NULL,NULL,TRUE,'g',ARG_BOOLEAN,0.0,0,NULL},
 24         {"Instantiate virtual sequences","F",NULL,NULL,TRUE,'v',ARG_BOOLEAN,0.0,0,NULL},
 25         {"Input is a Seq-submit","F", NULL ,NULL ,TRUE,'s',ARG_BOOLEAN,0.0,0,NULL},
 26         {"Produce output file of Quality Scores (DNA sequences only)","F",NULL,NULL,TRUE,'q',ARG_BOOLEAN,0.0,0,NULL},
 27         {"Output Filename for Quality Scores (DNA sequences only)","scores.ql", NULL,NULL,TRUE,'y',ARG_FILE_OUT,0.0,0,NULL},
 28         {"Far Genomic Contig function for Quality Scores","F",NULL,NULL,TRUE,'f',ARG_BOOLEAN,0.0,0,NULL},
 29         {"Remote fetching", "F", NULL, NULL, FALSE, 'r', ARG_BOOLEAN, 0.0, 0, NULL},
 30         {"Local fetching", "F", NULL, NULL, FALSE, 'k', ARG_BOOLEAN, 0.0, 0, NULL},
 31         {"Print Quality Score Gap as -1, false prints as 0", "F", NULL, NULL, FALSE, 'z', ARG_BOOLEAN, 0.0, 0, NULL},
 32 };
 33 
 34 static void PrintQualProc (CharPtr buf, Uint4 buflen, Pointer userdata)
 35 
 36 {
 37   FILE  *fp;
 38 
 39   fp = (FILE*) userdata;
 40   fprintf (fp, "%s", buf);
 41 }
 42 
 43 static void PrintQualScores (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
 44 
 45 {
 46         BioseqPtr  bsp;
 47         FILE       *fp;
 48 
 49         if (IS_Bioseq (sep)) {
 50                 bsp = (BioseqPtr) sep->data.ptrvalue;
 51 
 52                 /* WARNING: we're assuming here that asn2fast's quality-score
 53                    output is DNA-centric, thus protein bioseqs can be ignored
 54                    in the PrintQualScores callback. --MLC, 5/2000 */
 55 
 56                 if (ISA_aa(bsp->mol))
 57                   return;
 58 
 59                 fp = (FILE*) data;
 60                 if (myargs [17].intvalue) {
 61                   PrintQualityScoresToBuffer (bsp, FALSE, fp, PrintQualProc);
 62                 } else {
 63                   PrintQualityScoresToBuffer (bsp, TRUE, fp, PrintQualProc);
 64                 }
 65         }
 66 }
 67 
 68 static void PrintFarQualScores (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
 69 
 70 {
 71         BioseqPtr  bsp;
 72         FILE       *fp;
 73 
 74         if (IS_Bioseq (sep)) {
 75                 bsp = (BioseqPtr) sep->data.ptrvalue;
 76 
 77                 /* WARNING: we're assuming here that asn2fast's quality-score
 78                    output is DNA-centric, thus protein bioseqs can be ignored
 79                    in the PrintQualScores callback. --MLC, 5/2000 */
 80 
 81                 if (ISA_aa(bsp->mol))
 82                   return;
 83 
 84                 fp = (FILE*) data;
 85                 if (myargs [17].intvalue) {
 86                   PrintQualityScoresForContig (bsp, FALSE, fp);
 87                 } else {
 88                   PrintQualityScoresForContig (bsp, TRUE, fp);
 89                 }
 90         }
 91 }
 92 
 93 
 94 Boolean CheckIsGenBank(SeqEntryPtr sep);
 95 
 96 Int2 Main(void)
 97 {
 98         AsnIoPtr aip;
 99         FILE * aa = NULL, * na = NULL, * ql = NULL;
100         SeqEntryPtr sep;
101         SeqSubmitPtr ssp;
102         AsnTypePtr atp, atp2;
103         AsnModulePtr amp;
104         Uint1 group_segs = 0;
105         Boolean limit_to_genbank,
106                 make_dna,
107                 make_protein,
108                 make_quality,
109                 far_quality,
110                 do_it;
111         
112 
113                                         /* check command line arguments */
114 
115         if ( ! GetArgs("asn2fast",NUMARG, myargs))
116                 return 1;
117 
118                                         /* load the sequence alphabets  */
119                                         /* (and sequence parse trees)   */
120         if (! SeqEntryLoad())
121         {
122                 ErrShow();
123                 return 1;
124         }
125                                     /* get pointer to all loaded ASN.1 modules */
126         amp = AsnAllModPtr();
127         if (amp == NULL)
128         {
129                 ErrShow();
130                 return 1;
131         }
132 
133         if (myargs[11].intvalue) {
134                 if (! SubmitAsnLoad())
135                         Message(MSG_FATAL, "Unable to load parse trees.");
136                 
137                 atp2 = AsnFind("Seq-submit");
138                 if (atp2 == NULL)
139                         Message(MSG_FATAL, "Unable to find Seq-submit");
140                 atp = AsnFind("Seq-submit");
141                 if (atp == NULL)
142                         Message(MSG_FATAL, "Unable to find Seq-submit");
143 
144         } else {
145                 atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
146                 if (atp == NULL)
147                 {
148                         ErrShow();
149                         return 1;
150                 }
151         
152                 atp2 = AsnFind("Bioseq-set.seq-set.E");
153                 if (atp2 == NULL)
154                 {
155                         ErrShow();
156                         return 1;
157                 }
158         }
159 
160         make_protein = (Boolean)(myargs[7].intvalue);
161         make_dna = (Boolean)(myargs[8].intvalue);
162         make_quality = (Boolean)(myargs[12].intvalue);
163         far_quality = (Boolean)(myargs[14].intvalue);
164 
165                                         /* open the ASN.1 input file in the right mode */
166 
167         if ((aip = AsnIoOpen (myargs[0].strvalue, myargs[2].intvalue?"rb":"r"))
168           == NULL)
169         {
170                 ErrShow();
171                 return 1;
172         }
173 
174                                                                 /* open the output file */
175 
176         if ((myargs[3].strvalue != NULL) && (make_protein))
177         {
178                 if ( (aa = FileOpen (myargs[3].strvalue, "w")) == NULL)
179                 {
180                         ErrShow();
181                         return 1;
182                 }
183         }
184 
185         if ((myargs[4].strvalue != NULL) && (make_dna))
186         {
187                 if ( (na = FileOpen (myargs[4].strvalue, "w")) == NULL)
188                 {
189                         ErrShow();
190                         return 1;
191                 }
192         }
193 
194         if ((myargs[13].strvalue != NULL) && (make_quality))
195         {
196                 if ( (ql = FileOpen (myargs[13].strvalue, "w")) == NULL)
197                 {
198                         ErrShow();
199                         return 1;
200                 }
201         }
202 
203                                 /* log errors instead of die */
204     if (myargs[5].strvalue != NULL)
205     {
206         if (! ErrSetLog (myargs[5].strvalue))
207             ErrShow();
208         else
209             ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
210    }
211 
212         if (myargs[6].intvalue)  /* combine segmented seqs */
213         {
214                 group_segs = 1;
215                 if (myargs[10].intvalue)
216                         group_segs = 3;       /* and instantiate virtuals */
217         }
218 
219         limit_to_genbank = (Boolean)(myargs[9].intvalue);
220 
221         if (myargs [15].intvalue) {
222                 ID1BioseqFetchEnable ("asn2fast", FALSE);
223         }
224         if (myargs [16].intvalue) {
225                 LocalSeqFetchInit (FALSE);
226         }
227 
228         if ( myargs[1].intvalue)   /* read one Seq-entry */
229         {
230 
231                 sep = SeqEntryAsnRead(aip, NULL);
232                 do_it = TRUE;
233                 if (limit_to_genbank)
234                         do_it = CheckIsGenBank(sep);
235                 if (do_it)
236                 {
237                         if (make_protein)
238                                 SeqEntrysToFasta(sep, aa, FALSE, group_segs);
239                         if (make_dna)
240                                 SeqEntrysToFasta(sep, na, TRUE, group_segs);
241                         if (make_quality) {
242                                 if (far_quality) {
243                                         SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
244                                 } else {
245                                         SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
246                                 }
247                         }
248                 }
249                 SeqEntryFree(sep);
250         }
251         else if ( myargs[11].intvalue)   /* read Seq-submit's */
252         {
253                 while ((atp = AsnReadId(aip, amp, atp)) != NULL)
254                 {
255                         if (atp == atp2)    /* top level Seq-entry */
256                         {
257                                 ssp = SeqSubmitAsnRead(aip, atp);
258                                 if (ssp->datatype == 1)
259                                 {
260                                         sep = (SeqEntryPtr) ssp->data;
261                                         do_it = TRUE;
262                                         if (limit_to_genbank)
263                                                 do_it = CheckIsGenBank(sep);
264                                         if (do_it)
265                                         {
266                                                 if (make_protein)
267                                                         SeqEntrysToFasta(sep, aa, FALSE, group_segs);
268                                                 if (make_dna)
269                                                         SeqEntrysToFasta(sep, na, TRUE, group_segs);
270                                                 if (make_quality) {
271                                                         if (far_quality) {
272                                                                 SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
273                                                         } else {
274                                                                 SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
275                                                         }
276                                                 }
277                                         }
278                                 }
279                                 SeqSubmitFree(ssp);
280                         }
281                         else
282                         {
283                                 AsnReadVal(aip, atp, NULL);
284                         }
285                 }
286         }
287         else                      /* read Seq-entry's from a Bioseq-set */
288         {
289                 while ((atp = AsnReadId(aip, amp, atp)) != NULL)
290                 {
291                         if (atp == atp2)    /* top level Seq-entry */
292                         {
293                                 sep = SeqEntryAsnRead(aip, atp);
294                                 do_it = TRUE;
295                                 if (limit_to_genbank)
296                                         do_it = CheckIsGenBank(sep);
297                                 if (do_it)
298                                 {
299                                         if (make_protein)
300                                                 SeqEntrysToFasta(sep, aa, FALSE, group_segs);
301                                         if (make_dna)
302                                                 SeqEntrysToFasta(sep, na, TRUE, group_segs);
303                                         if (make_quality) {
304                                                 if (far_quality) {
305                                                         SeqEntryExplore (sep, (Pointer) ql, PrintFarQualScores);
306                                                 } else {
307                                                         SeqEntryExplore (sep, (Pointer) ql, PrintQualScores);
308                                                 }
309                                         }
310                                 }
311                                 SeqEntryFree(sep);
312                         }
313                         else
314                         {
315                                 AsnReadVal(aip, atp, NULL);
316                         }
317                 }
318         }
319 
320         AsnIoClose(aip);
321         if (make_protein)
322                 FileClose(aa);
323         if (make_dna)
324                 FileClose(na);
325         if (make_quality)
326                 FileClose (ql);
327 
328         if (myargs [16].intvalue) {
329                 LocalSeqFetchDisable ();
330         }
331         if (myargs [15].intvalue) {
332                 ID1BioseqFetchDisable ();
333         }
334 
335         return(0);
336 }
337 
338 void FindGenBank (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
339 
340 Boolean CheckIsGenBank(SeqEntryPtr sep)
341 {
342         Boolean retval = FALSE;
343 
344         SeqEntryExplore(sep, (Pointer)(&retval), FindGenBank);
345 
346         return retval;
347 }
348 
349 void FindGenBank (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
350 {
351         BoolPtr ptr;
352         BioseqPtr bsp;
353         ValNodePtr vnp;
354 
355         ptr = (BoolPtr)data;
356         if (*ptr == TRUE)   /* already know */
357                 return;
358 
359         if (IS_Bioseq(sep))
360         {
361 
362            bsp = (BioseqPtr)(sep->data.ptrvalue); 
363                             /* GenBank is a limited view of the world */
364            if ( (ISA_na(bsp->mol)) && ( (bsp->repr == Seq_repr_raw) || (bsp->repr == Seq_repr_delta) ) )
365            {
366                         for (vnp = bsp->id; vnp != NULL; vnp = vnp->next)
367                         {
368                                 switch (vnp->choice)
369                                 {
370                                         case SEQID_GENBANK:
371                                         case SEQID_EMBL:
372                                         case SEQID_DDBJ:
373                                                 *ptr = TRUE;
374                                                 return;
375                                         default:
376                                                 break;
377                                 }
378                         }
379            }
380         }
381 
382         return;
383 }
384 
385 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.