NCBI C Toolkit Cross Reference

C/demo/asn2ff.c


  1  /*****************************************************************************
  2 *
  3 *   asn2ff.c
  4 *       convert an ASN.1 entry to flat file format, using the FFPrintArrayPtrs. 
  5 *
  6 *****************************************************************************/
  7 #include <accentr.h>
  8 #include "asn2ff.h"
  9 #include "asn2ffp.h"
 10 #include "ffprint.h"
 11 #include <subutil.h>
 12 #include <objall.h>
 13 #include <objcode.h>
 14 #include <lsqfetch.h>
 15 #include <explore.h>
 16 
 17 #ifdef ENABLE_ID1
 18 #include <accid1.h>
 19 #endif
 20 
 21 FILE *fpl;
 22 
 23 Args myargs[] = {
 24         {"Filename for asn.1 input","stdin",NULL,NULL,TRUE,'a',ARG_FILE_IN,0.0,0,NULL},
 25         {"Input is a Seq-entry","F", NULL ,NULL ,TRUE,'e',ARG_BOOLEAN,0.0,0,NULL},
 26         {"Input asnfile in binary mode","F",NULL,NULL,TRUE,'b',ARG_BOOLEAN,0.0,0,NULL},
 27         {"Output Filename","stdout", NULL,NULL,TRUE,'o',ARG_FILE_OUT,0.0,0,NULL},
 28         {"Show Sequence?","T", NULL ,NULL ,TRUE,'h',ARG_BOOLEAN,0.0,0,NULL},
 29         {"Log errors to file named:",NULL,NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL},
 30         {"Output Format?: b for GenBank, p for GenPept, e for EMBL, s for PseudoEMBL, x for   GenBankSelect, z for EMBLPEPT",
 31         "b", NULL,NULL,TRUE,'f',ARG_STRING,0.0,0,NULL},
 32         {"Output mode?: r for release, d for dump, s for Sequin, c for Chromoscope, k for dir-sub-debug, l for dir-sub, e for revise, p for partial report","r", NULL ,NULL ,TRUE,'m',ARG_STRING,0.0,0,NULL},
 33         {"Input is a Seq-submit","F", NULL ,NULL ,TRUE,'s',ARG_BOOLEAN,0.0,0,NULL},
 34         {"Show gi numbers?","F", NULL ,NULL ,TRUE,'g',ARG_BOOLEAN,0.0,0,NULL},
 35         {"Non-Strict gene_binding","T", NULL ,NULL ,TRUE,'n',ARG_BOOLEAN,0.0,0,NULL},
 36         {"Show error messages","T", NULL ,NULL ,TRUE,'v',ARG_BOOLEAN,0.0,0,NULL},
 37         {"Show verbose message text","F", NULL ,NULL ,TRUE,'t',ARG_BOOLEAN,0.0,0,NULL},
 38         {"Use HTML output format?","F", NULL,NULL,TRUE,'w',ARG_BOOLEAN,0.0,0,NULL},
 39         {"Output is one top bioseq only","F", NULL ,NULL ,TRUE,'q',
 40         ARG_BOOLEAN,0.0,0,NULL},
 41         {"Output is one top bioseq only in genome view","F", NULL ,NULL ,TRUE,'G',
 42         ARG_BOOLEAN,0.0,0,NULL},
 43         {"Output is map bioseqs only ","F", NULL ,NULL ,TRUE,'M',
 44         ARG_BOOLEAN,0.0,0,NULL},
 45         {"Output error logfile","stderr", NULL,NULL,TRUE,'r',ARG_FILE_OUT,0.0,0,NULL},
 46         {"Show new gene features?","T",NULL,NULL,TRUE,'p',ARG_BOOLEAN,0.0,0,NULL},
 47         {"New algorithm for orgnames?",
 48         "F",NULL,NULL,TRUE,'z',ARG_BOOLEAN,0.0,0,NULL},
 49         {"Print help format only?",
 50         "F",NULL,NULL,TRUE,'y',ARG_BOOLEAN,0.0,0,NULL},
 51         {"From to show a region", "0", NULL, NULL, TRUE, 'A', ARG_FLOAT, 0.0, 0, NULL},
 52         {"To to show a region", "0", NULL, NULL, TRUE, 'B', ARG_FLOAT, 0.0, 0, NULL},
 53         {"Complex sets (phy-set,mut-set, pop-set)?",
 54         "T",NULL,NULL,TRUE,'k',ARG_BOOLEAN,0.0,0,NULL},
 55         {"Use SeqMgr indexing?","F",NULL,NULL,TRUE,'d',ARG_BOOLEAN,0.0,0,NULL},
 56         {"Use VERSION?","T",NULL,NULL,TRUE,'V',ARG_BOOLEAN,0.0,0,NULL},
 57         {"Show Bankit comments?","F",NULL,NULL,TRUE,'C',ARG_BOOLEAN,0.0,0,NULL},
 58         {"For GenBank Release?","F",NULL,NULL,TRUE,'R',ARG_BOOLEAN,0.0,0,NULL},
 59         {"New LOCUS line format?","T",NULL,NULL,TRUE,'L',ARG_BOOLEAN,0.0,0,NULL}
 60         };
 61 
 62 
 63 static MsgAnswer LIBCALLBACK myHook (MsgKey key, ErrSev sev, const char *caption, const char *message)
 64 {
 65         fprintf(fpl, "%s\n", message);
 66         return ANS_OK;
 67 }
 68 
 69 /*static void FindNuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
 70 {
 71     BioseqPtr PNTR bp;
 72     BioseqPtr local_bsp;
 73  
 74     bp = (BioseqPtr PNTR) data;
 75     if (IS_Bioseq(sep))
 76     {
 77         local_bsp = (BioseqPtr) sep->data.ptrvalue;
 78         if (ISA_na(local_bsp->mol))
 79           *bp = local_bsp;
 80     }
 81 }
 82 */
 83 static void CheckForCookedBioseqs (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
 84 
 85 {
 86   BoolPtr    bp;
 87   BioseqPtr  bsp;
 88 
 89   if (sep == NULL) return;
 90   if (! IS_Bioseq (sep)) return;
 91   bp = (BoolPtr) mydata;
 92   if (bp == NULL) return;
 93   bsp = (BioseqPtr) sep->data.ptrvalue;
 94   if (bsp == NULL) return;
 95   if (bsp->repr != Seq_repr_raw && bsp->repr != Seq_repr_seg) {
 96     *bp = FALSE;
 97   }
 98 }
 99 
100 static void IndexASeqEntry (SeqEntryPtr sep, Boolean useSeqMgrIndexes)
101 
102 {
103   Boolean  allRawOrSeg = TRUE;
104   Uint2    entityID;
105 
106   if (sep == NULL || (! useSeqMgrIndexes)) return;
107   while (sep != NULL) {
108     SeqEntryExplore (sep, (Pointer) (&allRawOrSeg), CheckForCookedBioseqs);
109     if (allRawOrSeg) {
110       entityID = ObjMgrGetEntityIDForChoice (sep);
111       SeqMgrIndexFeatures (entityID, NULL);
112     }
113     sep = sep->next;
114   }
115 }
116 
117 Int2 Main(void)
118 {
119 
120         AsnIoPtr aip;
121         AsnTypePtr atp = NULL, atp1, atp2;
122         AsnModulePtr amp;
123         Boolean error_msgs=TRUE, show_gi= TRUE, show_seq = TRUE, non_strict=TRUE;
124         SeqEntryPtr the_set;
125         SeqSubmitPtr ssp;
126         StdPrintOptionsPtr Spop = NULL;
127         FILE *fp;
128         Uint1 format, mode;
129         Boolean good = FALSE;
130         Asn2ffJobPtr            ajp;
131         Uint2 entityID;
132         Int4 num, total;
133         SeqLocPtr slp;
134         SeqIntPtr sip;
135         BioseqPtr bsp;
136         Boolean useSeqMgrIndexes;
137         /*
138         ValNode v;
139         LinkStrPtr lsp;
140         */
141 
142         if ( ! GetArgs("asn2ff", sizeof(myargs)/sizeof(Args), myargs))
143                 return 1;
144         ErrSetMessageLevel(SEV_NONE);
145         ErrSetOptFlags(EO_SHOW_CODES);
146         ErrSetOptFlags(EO_XLATE_CODES);
147         if (myargs[12].intvalue)   /* show the verbose error messages? */
148                 ErrSetOptFlags(EO_MSG_MSGTEXT);
149 
150         if (myargs[13].intvalue)   /* use HTML format for output? */
151                 init_www();
152         if (! SeqEntryLoad())
153                 ErrShow();
154         
155         if (myargs[8].intvalue) {
156                 if (! SubmitAsnLoad())
157                         Message(MSG_FATAL, "Unable to load parse trees.");
158                 
159                 atp1 = AsnFind("Seq-submit");
160                 if (atp1 == NULL)
161                         Message(MSG_FATAL, "Unable to find Seq-submit");
162                 atp = AsnFind("Seq-submit");
163                 if (atp == NULL)
164                         Message(MSG_FATAL, "Unable to find Seq-submit");
165 
166         } else {
167                 atp = AsnFind("Bioseq-set"); /* get the initial type pointers */
168                 if (atp == NULL)
169                         ErrShow();
170         
171                 atp2 = AsnFind("Bioseq-set.seq-set.E");
172                 if (atp2 == NULL)
173                         ErrShow();
174         }
175 
176                         /* open the i/o files in the right mode */
177 
178         if ((aip = 
179                 AsnIoOpen (myargs[0].strvalue, myargs[2].intvalue?"rb":"r")) == NULL)
180                 exit (1);
181 
182         if ( (fp = FileOpen (myargs[3].strvalue, "w")) == NULL) {
183                 ErrPostEx(SEV_ERROR,0,0, "Can't open %s", myargs[3].strvalue);
184                 exit (1);
185         }
186         if ( (fpl = FileOpen (myargs[17].strvalue, "w")) == NULL) {
187                 ErrPostEx(SEV_ERROR,0,0, "Can't open %s", myargs[17].strvalue);
188                 exit (1);
189         }
190 
191         SetMessageHook(myHook);
192 
193     if (myargs[5].strvalue != NULL) {                   /* log errors instead of die */
194         if (! ErrSetLog (myargs[5].strvalue))
195             ErrShow();
196         else
197             ErrSetOpts (ERR_TEE, ERR_LOG_ON);
198     }
199 
200         if (! myargs[4].intvalue)   /* show the sequence */
201                 show_seq = FALSE;
202 
203         if (! myargs[9].intvalue)   /* show the gi numbers? */
204                 show_gi = FALSE;
205 
206         if (!myargs[10].intvalue)   /* Non-Strict binding of genes to feats */
207                 non_strict = FALSE;
208 
209         if (! myargs[11].intvalue)   /* Show validator error messages */
210                 error_msgs = FALSE;
211 
212         format = GENBANK_FMT;
213         if (StringICmp(myargs[6].strvalue, "b") == 0) {
214                 format = GENBANK_FMT;
215         } else if (StringICmp(myargs[6].strvalue, "p") == 0) {
216                 format = GENPEPT_FMT;
217         } else if (StringICmp(myargs[6].strvalue, "e") == 0) {
218                 format = EMBL_FMT;
219         } else if (StringICmp(myargs[6].strvalue, "s") == 0) {
220                 format = PSEUDOEMBL_FMT;
221         } else if (StringICmp(myargs[6].strvalue, "x") == 0) {
222                 format = SELECT_FMT;
223         } else if (StringICmp(myargs[6].strvalue, "z") == 0) {
224                 format = EMBLPEPT_FMT;
225         }
226         if (format == GENPEPT_FMT) {
227                 if (!PrintTemplateSetLoad ("asn2ff.prt")) {
228                         ErrPostEx(SEV_WARNING, 1, 1, "PrintTemplateSetLoad failed");
229                 }
230                 if ((Spop = StdPrintOptionsNew(NULL)) != NULL) {
231                         Spop->newline = "~";
232                         Spop->indent = "";
233                 } else {
234                         ErrPostEx (SEV_FATAL, 1, 1, "StdPrintOptionsNew failed");
235                 }
236         }
237 
238         mode = RELEASE_MODE;
239         if (StringICmp(myargs[7].strvalue, "r") == 0)
240                 mode = RELEASE_MODE;
241         if (StringICmp(myargs[7].strvalue, "l") == 0)
242                 mode = DIRSUB_MODE;
243         if (StringICmp(myargs[7].strvalue, "k") == 0)
244                 mode = DIRSUB_DEBUG_MODE;
245         if (StringICmp(myargs[7].strvalue, "e") == 0)
246                 mode = REVISE_MODE;
247         if (StringICmp(myargs[7].strvalue, "d") == 0)
248                 mode = DUMP_MODE;
249         if (StringICmp(myargs[7].strvalue, "s") == 0)
250                 mode = SEQUIN_MODE;
251         if (StringICmp(myargs[7].strvalue, "c") == 0)
252                 mode = CHROMO_MODE;
253         if (StringICmp(myargs[7].strvalue, "p") == 0)
254                 mode = PARTIAL_MODE;
255 
256 #ifdef ENABLE_ENTREZ
257         EntrezBioseqFetchEnable ("asn2ff", FALSE); 
258 #endif
259 #ifdef ENABLE_ID1
260         ID1BioseqFetchEnable ("asn2ff", FALSE); 
261 #endif
262 #ifdef ENABLE_LOCAL
263         BioseqFetchInit(FALSE);
264 #endif
265         ajp = (Asn2ffJobPtr) MemNew(sizeof(Asn2ffJob));
266         ajp->show_gene = myargs[18].intvalue;
267         ajp->show_seq = show_seq;
268         ajp->show_gi = show_gi;
269         ajp->error_msgs = error_msgs;
270         ajp->non_strict = non_strict;
271         ajp->null_str = FALSE;
272         ajp->format = format;
273         ajp->mode = mode;
274         ajp->fp = fp;
275         ajp->Spop = Spop;
276         ajp->gb_style = TRUE;  /* show only non_right_truncated features */
277         
278         if (myargs[25].intvalue) {
279                 ajp->show_version = TRUE;
280         }
281         if (myargs[14].intvalue) {
282                 ajp->gb_style = FALSE;
283                 ajp->only_one = TRUE;
284                 ajp->ignore_top = FALSE;
285         }
286         if (myargs[15].intvalue) {
287                 ajp->ignore_top = TRUE;
288                 ajp->genome_view = TRUE;
289         }
290         if (myargs[16].intvalue) {
291                 ajp->map_view = TRUE;
292         }
293         if (myargs[19].intvalue) {
294                 ajp->orgname = TRUE;
295         }
296         if (myargs[20].intvalue) {
297                 ajp->help = TRUE;
298         }
299         if (myargs[26].intvalue) {
300                 ajp->bankit = TRUE;
301         }
302         if (myargs[27].intvalue) {
303                 ajp->forgbrel = TRUE;
304         }
305         if (myargs[28].intvalue) {
306                 ajp->old_locus_fmt = FALSE;
307         } else {
308                 ajp->old_locus_fmt = TRUE;
309         }
310         useSeqMgrIndexes = (Boolean)(myargs[24].intvalue);
311         /* get pointer to all loaded ASN.1 modules */
312         amp = AsnAllModPtr();
313         if (amp == NULL)
314                 ErrShow();
315 
316         total = 0;
317         num = 0;
318         if (myargs[22].floatvalue) {
319                 if (myargs[1].intvalue) {
320                         the_set = SeqEntryAsnRead(aip, NULL);
321                         bsp = NULL;
322                         SeqEntryExplore(the_set, &bsp, FindNuc);
323                         if (bsp == NULL) {
324                                 ErrPostEx(SEV_WARNING, 1, 1, "Couldn't find valid bioseq\n");
325                                 SeqEntryFree(the_set);
326                                 exit (1);
327                         }
328                         num = 1;                        
329                         slp = ValNodeNew(NULL);
330                         sip = SeqIntNew();
331                         slp->choice = SEQLOC_INT;
332                         slp->data.ptrvalue = sip;
333                         sip->from = 0;
334                         if (myargs[21].floatvalue > 0) {
335                                 sip->from = myargs[21].floatvalue-1;
336                         }
337                         if (myargs[22].floatvalue > bsp->length) {
338                                 sip->to = bsp->length-1;
339                         } else {
340                                 sip->to = myargs[22].floatvalue-1;
341                         }
342                         sip->id = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
343                         IndexASeqEntry (the_set, useSeqMgrIndexes);
344                         SeqLocToFlat(slp, fp, format, ajp->mode);
345                 }
346         } else if (myargs[8].intvalue) {
347                 if (mode == DUMP_MODE) {
348                     aip->scan_for_start = TRUE;  /* scan past any garbage */
349                 }
350                 while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
351                         if (atp == atp1) {      
352                                 ssp = SeqSubmitAsnRead(aip, atp);
353                                 if (ssp->datatype == 1) {
354                                         IndexASeqEntry ((SeqEntryPtr) ssp->data, useSeqMgrIndexes);
355                         if (SeqEntryToFlatAjp (ajp, (SeqEntryPtr) ssp->data, fp, ajp->format, ajp->mode)) {
356                                 num++;
357                         }
358                                 /*      if ((SeqSubmitToFlat(ssp, fp, mode, FALSE, format, 
359                                                         myargs[18].intvalue)) == TRUE)
360                                         {
361                                                 num++;
362                                         }*/
363                                 } 
364                                 SeqSubmitFree(ssp);
365                                 
366                         } else {
367                                 AsnReadVal(aip, atp, NULL);
368                         } 
369                 }
370         } else if (myargs[1].intvalue) {
371                 the_set = SeqEntryAsnRead(aip, NULL);
372                 total++;
373 /*********TEST*******
374         v.choice = SEQID_GI;
375         v.data.intvalue = 455854;
376         SeqEntryToFlatEx (the_set, fp, ajp->format, ajp->mode, &v, 0);
377                         if (mode == PARTIAL_MODE) {
378                         SeqEntryToPartRpt(the_set, stdout);
379         for (lsp=SeqEntryToStrArrayEx(the_set,  ajp->format, 5866992, TRUE); lsp;
380                 lsp=lsp->next) {
381                 printf ("%s", lsp->line);
382         }
383         exit (0);
384  *********TEST*******/
385                 if (myargs[23].intvalue) { /* complex sets */
386                         IndexASeqEntry (the_set, useSeqMgrIndexes);
387                         if (SeqEntryToFlatAjp (ajp, the_set, fp, ajp->format, ajp->mode)) {
388                                 num++;
389                         }
390                 } else {
391                         if ((entityID = ObjMgrGetEntityIDForPointer(the_set)) == 0) {
392                                 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
393                         }
394                         ajp->entityID = entityID;
395                         ajp->sep = the_set;
396                         IndexASeqEntry (the_set, useSeqMgrIndexes);
397                         if (asn2ff_print(ajp)) {
398                                 num++;
399                         }
400                 }
401                 SeqEntryFree(the_set);
402         } else {
403                 while ((atp = AsnReadId(aip, amp, atp)) != NULL) {
404                         if (atp == atp2) {  /* top level Seq-entry */
405                                 the_set = SeqEntryAsnRead(aip, atp);
406                                 total++;
407                                 if (myargs[23].intvalue) { /* complex sets */
408                                         IndexASeqEntry (the_set, useSeqMgrIndexes);
409                                         if (SeqEntryToFlatAjp (ajp, the_set, fp, ajp->format, ajp->mode))
410                                                 num++;
411                                 } else {
412                                         if ((entityID=ObjMgrGetEntityIDForPointer(the_set)) == 0) {
413                                                 ErrPostStr(SEV_WARNING, 0, 0, "Couldn't get entityID");
414                                         }
415                                         ajp->entityID = entityID;
416                                         ajp->sep = the_set;
417                                         IndexASeqEntry (the_set, useSeqMgrIndexes);
418                                         if (asn2ff_print(ajp))
419                                                 num++;
420                                 }
421                                 SeqEntryFree(the_set);
422                         } else {
423                                 AsnReadVal(aip, atp, NULL);
424                         }
425                 }
426         }
427 
428 #ifdef ENABLE_ENTREZ
429         EntrezBioseqFetchDisable (); 
430 #endif
431 #ifdef ENABLE_LOCAL
432         BioseqFetchDisable();
433 #endif
434         if (num == 0) {
435                 ErrPostStr(SEV_WARNING, 1, 1, "No valid entries found");
436         } else if (num < total) {
437                 ErrPostEx(SEV_WARNING, 1, 1, "[%ld] entries have been processed [total - %ld]\n", num, total);
438         }
439         if (format == GENPEPT_FMT && Spop != NULL) {
440                 Spop = StdPrintOptionsFree(Spop);
441         }
442         MemFree(ajp);
443         AsnIoClose(aip);
444         FileClose(fp);
445         return(0);
446 }
447 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.