NCBI C Toolkit Cross Reference

C/demo/asn2gb.c


  1 /*   asn2gb.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2gb.c
 27 *
 28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans
 29 *
 30 * Version Creation Date:   10/21/98
 31 *
 32 * $Revision: 6.137 $
 33 *
 34 * File Description:  New GenBank flatfile generator application
 35 *
 36 * Modifications:
 37 * --------------------------------------------------------------------------
 38 * ==========================================================================
 39 */
 40 
 41 #include <ncbi.h>
 42 #include <objall.h>
 43 #include <objsset.h>
 44 #include <objsub.h>
 45 #include <objfdef.h>
 46 #include <objgbseq.h>
 47 #include <objtseq.h>
 48 #include <sequtil.h>
 49 #include <sqnutils.h>
 50 #include <explore.h>
 51 #include <gather.h>
 52 #include <toasn3.h>
 53 #include <asn2gnbp.h>
 54 
 55 /* asn2gnbi.h needed to test PUBSEQGetAccnVer in accpubseq.c */
 56 #include <asn2gnbi.h>
 57 
 58 #define ASN2GB_APP_VER "7.4"
 59 
 60 CharPtr ASN2GB_APPLICATION = ASN2GB_APP_VER;
 61 
 62 static void SaveSeqEntry (
 63   SeqEntryPtr sep,
 64   CharPtr filename
 65 )
 66 
 67 {
 68   AsnIoPtr  aop;
 69 
 70   if (sep == NULL) return;
 71   aop = AsnIoOpen (filename, "w");
 72   if (aop != NULL) {
 73     SeqEntryAsnWrite (sep, aop, NULL);
 74   }
 75   AsnIoClose (aop);
 76 }
 77 
 78 static void SaveAsn2gnbk (
 79   SeqEntryPtr sep,
 80   CharPtr filename,
 81   FmtType format,
 82   ModType mode,
 83   StlType style,
 84   FlgType flags,
 85   LckType locks,
 86   CstType custom
 87 )
 88 
 89 {
 90   FILE  *fp;
 91 
 92   if (sep == NULL) return;
 93   fp = FileOpen (filename, "w");
 94   if (fp != NULL) {
 95     SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, NULL, fp);
 96   }
 97   FileClose (fp);
 98 }
 99 
100 static void GetFirstGoodBioseq (
101   BioseqPtr bsp,
102   Pointer userdata
103 )
104 
105 {
106   BioseqPtr PNTR bspp;
107 
108   bspp = (BioseqPtr PNTR) userdata;
109   if (*bspp != NULL) return;
110   *bspp = bsp;
111 }
112 
113 NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
114 
115 static void SaveTinySeqs (
116   BioseqPtr bsp,
117   Pointer userdata
118 )
119 
120 {
121   AsnIoPtr  aip;
122   TSeqPtr   tsp;
123 
124   if (bsp == NULL) return;
125   aip = (AsnIoPtr) userdata;
126 
127   tsp = BioseqToTSeq (bsp);
128   if (tsp == NULL) return;
129 
130   TSeqAsnWrite (tsp, aip, NULL);
131   AsnPrintNewLine (aip);
132   AsnIoFlush (aip);
133 
134   TSeqFree (tsp);
135 }
136 
137 static void SaveTinyStreams (
138   BioseqPtr bsp,
139   Pointer userdata
140 )
141 
142 {
143   AsnIoPtr  aip;
144 
145   if (bsp == NULL) return;
146   aip = (AsnIoPtr) userdata;
147 
148   BioseqAsnWriteAsTSeq (bsp, aip, NULL);
149   AsnPrintNewLine (aip);
150   AsnIoFlush (aip);
151 }
152 
153 #ifdef INTERNAL_NCBI_ASN2GB
154 static CharPtr dirsubfetchproc = "DirSubBioseqFetch";
155 
156 static CharPtr dirsubfetchcmd = NULL;
157 
158 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
159 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
160 
161 {
162   Char     cmmd [256];
163   Pointer  dataptr;
164   FILE*    fp;
165   Char     path [PATH_MAX];
166 
167   if (datatype != NULL) {
168     *datatype = 0;
169   }
170   if (entityID != NULL) {
171     *entityID = 0;
172   }
173   if (StringHasNoText (accn)) return NULL;
174 
175   if (dirsubfetchcmd == NULL) {
176     if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
177         dirsubfetchcmd = StringSaveNoNull (cmmd);
178     }
179   }
180   if (dirsubfetchcmd == NULL) return NULL;
181 
182   TmpNam (path);
183 
184 #ifdef OS_UNIX
185   sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, accn, path);
186   system (cmmd);
187 #endif
188 #ifdef OS_MSWIN
189   sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, accn, path);
190   system (cmmd);
191 #endif
192 
193   fp = FileOpen (path, "r");
194   if (fp == NULL) {
195     FileRemove (path);
196     return NULL;
197   }
198   dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
199   FileClose (fp);
200   FileRemove (path);
201   return dataptr;
202 }
203 
204 
205 static Int2 LIBCALLBACK DirSubBioseqFetchFunc (Pointer data)
206 
207 {
208   BioseqPtr         bsp;
209   Char              cmmd [256];
210   Pointer           dataptr;
211   Uint2             datatype;
212   Uint2             entityID;
213   FILE*             fp;
214   OMProcControlPtr  ompcp;
215   ObjMgrProcPtr     ompp;
216   Char              path [PATH_MAX];
217   SeqEntryPtr       sep = NULL;
218   SeqIdPtr          sip;
219   TextSeqIdPtr      tsip;
220 
221   ompcp = (OMProcControlPtr) data;
222   if (ompcp == NULL) return OM_MSG_RET_ERROR;
223   ompp = ompcp->proc;
224   if (ompp == NULL) return OM_MSG_RET_ERROR;
225   sip = (SeqIdPtr) ompcp->input_data;
226   if (sip == NULL) return OM_MSG_RET_ERROR;
227 
228   if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
229   tsip = (TextSeqIdPtr) sip->data.ptrvalue;
230   if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
231 
232   if (dirsubfetchcmd == NULL) {
233     if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
234         dirsubfetchcmd = StringSaveNoNull (cmmd);
235     }
236   }
237   if (dirsubfetchcmd == NULL) return OM_MSG_RET_ERROR;
238 
239   TmpNam (path);
240 
241 #ifdef OS_UNIX
242   sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, tsip->accession, path);
243   system (cmmd);
244 #endif
245 #ifdef OS_MSWIN
246   sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, tsip->accession, path);
247   system (cmmd);
248 #endif
249 
250   fp = FileOpen (path, "r");
251   if (fp == NULL) {
252     FileRemove (path);
253     return OM_MSG_RET_ERROR;
254   }
255   dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
256   FileClose (fp);
257   FileRemove (path);
258 
259   if (dataptr == NULL) return OM_MSG_RET_OK;
260 
261   sep = GetTopSeqEntryForEntityID (entityID);
262   if (sep == NULL) return OM_MSG_RET_ERROR;
263   bsp = BioseqFindInSeqEntry (sip, sep);
264   ompcp->output_data = (Pointer) bsp;
265   ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
266   return OM_MSG_RET_DONE;
267 }
268 
269 static Boolean DirSubFetchEnable (void)
270 
271 {
272   ObjMgrProcLoad (OMPROC_FETCH, dirsubfetchproc, dirsubfetchproc,
273                   OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
274                   DirSubBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
275   return TRUE;
276 }
277 
278 static CharPtr smartfetchproc = "SmartBioseqFetch";
279 
280 static CharPtr smartfetchcmd = NULL;
281 
282 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
283 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
284 
285 {
286   Char     cmmd [256];
287   Pointer  dataptr;
288   FILE*    fp;
289   Char     path [PATH_MAX];
290 
291   if (datatype != NULL) {
292     *datatype = 0;
293   }
294   if (entityID != NULL) {
295     *entityID = 0;
296   }
297   if (StringHasNoText (accn)) return NULL;
298 
299   if (smartfetchcmd == NULL) {
300     if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
301         smartfetchcmd = StringSaveNoNull (cmmd);
302     }
303   }
304   if (smartfetchcmd == NULL) return NULL;
305 
306   TmpNam (path);
307 
308 #ifdef OS_UNIX
309   sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, accn, path);
310   system (cmmd);
311 #endif
312 #ifdef OS_MSWIN
313   sprintf (cmmd, "%s %s -o %s", smartfetchcmd, accn, path);
314   system (cmmd);
315 #endif
316 
317   fp = FileOpen (path, "r");
318   if (fp == NULL) {
319     FileRemove (path);
320     return NULL;
321   }
322   dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
323   FileClose (fp);
324   FileRemove (path);
325   return dataptr;
326 }
327 
328 
329 static Int2 LIBCALLBACK SmartBioseqFetchFunc (Pointer data)
330 
331 {
332   BioseqPtr         bsp;
333   Char              cmmd [256];
334   Pointer           dataptr;
335   Uint2             datatype;
336   Uint2             entityID;
337   FILE*             fp;
338   OMProcControlPtr  ompcp;
339   ObjMgrProcPtr     ompp;
340   Char              path [PATH_MAX];
341   SeqEntryPtr       sep = NULL;
342   SeqIdPtr          sip;
343   TextSeqIdPtr      tsip;
344 
345   ompcp = (OMProcControlPtr) data;
346   if (ompcp == NULL) return OM_MSG_RET_ERROR;
347   ompp = ompcp->proc;
348   if (ompp == NULL) return OM_MSG_RET_ERROR;
349   sip = (SeqIdPtr) ompcp->input_data;
350   if (sip == NULL) return OM_MSG_RET_ERROR;
351 
352   if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
353   tsip = (TextSeqIdPtr) sip->data.ptrvalue;
354   if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
355 
356   if (smartfetchcmd == NULL) {
357     if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
358         smartfetchcmd = StringSaveNoNull (cmmd);
359     }
360   }
361   if (smartfetchcmd == NULL) return OM_MSG_RET_ERROR;
362 
363   TmpNam (path);
364 
365 #ifdef OS_UNIX
366   sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, tsip->accession, path);
367   system (cmmd);
368 #endif
369 #ifdef OS_MSWIN
370   sprintf (cmmd, "%s %s -o %s", smartfetchcmd, tsip->accession, path);
371   system (cmmd);
372 #endif
373 
374   fp = FileOpen (path, "r");
375   if (fp == NULL) {
376     FileRemove (path);
377     return OM_MSG_RET_ERROR;
378   }
379   dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
380   FileClose (fp);
381   FileRemove (path);
382 
383   if (dataptr == NULL) return OM_MSG_RET_OK;
384 
385   sep = GetTopSeqEntryForEntityID (entityID);
386   if (sep == NULL) return OM_MSG_RET_ERROR;
387   bsp = BioseqFindInSeqEntry (sip, sep);
388   ompcp->output_data = (Pointer) bsp;
389   ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
390   return OM_MSG_RET_DONE;
391 }
392 
393 static Boolean SmartFetchEnable (void)
394 
395 {
396   ObjMgrProcLoad (OMPROC_FETCH, smartfetchproc, smartfetchproc,
397                   OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
398                   SmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
399   return TRUE;
400 }
401 
402 static CharPtr tpasmartfetchproc = "TPASmartBioseqFetch";
403 
404 static CharPtr tpasmartfetchcmd = NULL;
405 
406 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
407 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
408 
409 {
410   Char     cmmd [256];
411   Pointer  dataptr;
412   FILE*    fp;
413   Char     path [PATH_MAX];
414 
415   if (datatype != NULL) {
416     *datatype = 0;
417   }
418   if (entityID != NULL) {
419     *entityID = 0;
420   }
421   if (StringHasNoText (accn)) return NULL;
422 
423   if (tpasmartfetchcmd == NULL) {
424     if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
425         tpasmartfetchcmd = StringSaveNoNull (cmmd);
426     }
427   }
428   if (tpasmartfetchcmd == NULL) return NULL;
429 
430   TmpNam (path);
431 
432 #ifdef OS_UNIX
433   sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, accn, path);
434   system (cmmd);
435 #endif
436 #ifdef OS_MSWIN
437   sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, accn, path);
438   system (cmmd);
439 #endif
440 
441   fp = FileOpen (path, "r");
442   if (fp == NULL) {
443     FileRemove (path);
444     return NULL;
445   }
446   dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
447   FileClose (fp);
448   FileRemove (path);
449   return dataptr;
450 }
451 
452 
453 static Int2 LIBCALLBACK TPASmartBioseqFetchFunc (Pointer data)
454 
455 {
456   BioseqPtr         bsp;
457   Char              cmmd [256];
458   Pointer           dataptr;
459   Uint2             datatype;
460   Uint2             entityID;
461   FILE*             fp;
462   OMProcControlPtr  ompcp;
463   ObjMgrProcPtr     ompp;
464   Char              path [PATH_MAX];
465   SeqEntryPtr       sep = NULL;
466   SeqIdPtr          sip;
467   TextSeqIdPtr      tsip;
468 
469   ompcp = (OMProcControlPtr) data;
470   if (ompcp == NULL) return OM_MSG_RET_ERROR;
471   ompp = ompcp->proc;
472   if (ompp == NULL) return OM_MSG_RET_ERROR;
473   sip = (SeqIdPtr) ompcp->input_data;
474   if (sip == NULL) return OM_MSG_RET_ERROR;
475 
476   if (sip->choice != SEQID_TPG) return OM_MSG_RET_ERROR;
477   tsip = (TextSeqIdPtr) sip->data.ptrvalue;
478   if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
479 
480   if (tpasmartfetchcmd == NULL) {
481     if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
482         tpasmartfetchcmd = StringSaveNoNull (cmmd);
483     }
484   }
485   if (tpasmartfetchcmd == NULL) return OM_MSG_RET_ERROR;
486 
487   TmpNam (path);
488 
489 #ifdef OS_UNIX
490   sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, tsip->accession, path);
491   system (cmmd);
492 #endif
493 #ifdef OS_MSWIN
494   sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, tsip->accession, path);
495   system (cmmd);
496 #endif
497 
498   fp = FileOpen (path, "r");
499   if (fp == NULL) {
500     FileRemove (path);
501     return OM_MSG_RET_ERROR;
502   }
503   dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
504   FileClose (fp);
505   FileRemove (path);
506 
507   if (dataptr == NULL) return OM_MSG_RET_OK;
508 
509   sep = GetTopSeqEntryForEntityID (entityID);
510   if (sep == NULL) return OM_MSG_RET_ERROR;
511   bsp = BioseqFindInSeqEntry (sip, sep);
512   ompcp->output_data = (Pointer) bsp;
513   ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
514   return OM_MSG_RET_DONE;
515 }
516 
517 static Boolean TPASmartFetchEnable (void)
518 
519 {
520   ObjMgrProcLoad (OMPROC_FETCH, tpasmartfetchproc, tpasmartfetchproc,
521                   OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
522                   TPASmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
523   return TRUE;
524 }
525 #endif
526 
527 static Int2 HandleSingleRecord (
528   CharPtr inputFile,
529   CharPtr outputFile,
530   FmtType format,
531   FmtType altformat,
532   ModType mode,
533   StlType style,
534   FlgType flags,
535   LckType locks,
536   CstType custom,
537   XtraPtr extra,
538   Int2 type,
539   Boolean binary,
540   Boolean compressed,
541   Int4 from,
542   Int4 to,
543   Uint1 strand,
544   Uint4 itemID,
545   Boolean do_tiny_seq,
546   Boolean do_fasta_stream
547 )
548 
549 {
550   AsnIoPtr      aip;
551   BioseqPtr     bsp;
552   BioseqSetPtr  bssp;
553   Pointer       dataptr = NULL;
554   Uint2         datatype = 0;
555   Uint2         entityID;
556   FILE          *fp;
557   FILE          *ofp = NULL;
558   ObjMgrPtr     omp;
559   SeqEntryPtr   sep;
560   SeqFeatPtr    sfp;
561   SeqInt        sint;
562   SeqLocPtr     slp = NULL;
563   ValNode       vn;
564 
565   if (type == 1) {
566     fp = FileOpen (inputFile, "r");
567     if (fp == NULL) {
568       Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
569       return 1;
570     }
571 
572     dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
573 
574     FileClose (fp);
575 
576     entityID = ObjMgrRegister (datatype, dataptr);
577 
578   } else if (type >= 2 && type <= 5) {
579     aip = AsnIoOpen (inputFile, binary? "rb" : "r");
580     if (aip == NULL) {
581       Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", inputFile);
582       return 1;
583     }
584 
585     SeqMgrHoldIndexing (TRUE);
586     switch (type) {
587       case 2 :
588         dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
589         datatype = OBJ_SEQENTRY;
590         break;
591       case 3 :
592         dataptr = (Pointer) BioseqAsnRead (aip, NULL);
593         datatype = OBJ_BIOSEQ;
594         break;
595       case 4 :
596         dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
597         datatype = OBJ_BIOSEQSET;
598         break;
599       case 5 :
600         dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
601         datatype = OBJ_SEQSUB;
602         break;
603       default :
604         break;
605     }
606     SeqMgrHoldIndexing (FALSE);
607 
608     AsnIoClose (aip);
609 
610     entityID = ObjMgrRegister (datatype, dataptr);
611 
612   } else {
613     Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) type);
614     return 1;
615   }
616 
617   if (dataptr == NULL) {
618     Message (MSG_POSTERR, "Data read failed for input file '%s'", inputFile);
619     return 1;
620   }
621 
622   if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
623         datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
624 
625 /*
626 #ifdef WIN_MAC
627 #if __profile__
628     ProfilerSetStatus (TRUE);
629 #endif
630 #endif
631 */
632 
633     entityID = SeqMgrIndexFeatures (entityID, NULL);
634 
635 /*
636 #ifdef WIN_MAC
637 #if __profile__
638     ProfilerSetStatus (FALSE);
639 #endif
640 #endif
641 */
642 
643     sep = GetTopSeqEntryForEntityID (entityID);
644 
645     if (sep == NULL) {
646       sep = SeqEntryNew ();
647       if (sep != NULL) {
648         if (datatype == OBJ_BIOSEQ) {
649           bsp = (BioseqPtr) dataptr;
650           sep->choice = 1;
651           sep->data.ptrvalue = bsp;
652           SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
653         } else if (datatype == OBJ_BIOSEQSET) {
654           bssp = (BioseqSetPtr) dataptr;
655           sep->choice = 2;
656           sep->data.ptrvalue = bssp;
657           SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
658         } else {
659           sep = SeqEntryFree (sep);
660         }
661       }
662       sep = GetTopSeqEntryForEntityID (entityID);
663     }
664 
665     if (sep != NULL) {
666       if (extra == NULL || extra->gbseq == NULL) {
667         FileRemove (outputFile);
668 #ifdef WIN_MAC
669         FileCreate (outputFile, "TEXT", "ttxt");
670 #endif
671         ofp = FileOpen (outputFile, "w");
672       }
673 
674       if ((from > 0 && to > 0) || strand == Seq_strand_minus) {
675         bsp = NULL;
676         if (format == GENPEPT_FMT) {
677           VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_PROTS, GetFirstGoodBioseq);
678         } else {
679           VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_NUCS, GetFirstGoodBioseq);
680         }
681         if (bsp != NULL) {
682           if (strand == Seq_strand_minus && from == 0 && to == 0) {
683             from = 1;
684             to = bsp->length;
685           }
686           if (from < 0) {
687             from = 1;
688           } else if (from > bsp->length) {
689             from = bsp->length;
690           }
691           if (to < 0) {
692             to = 1;
693           } else if (to > bsp->length) {
694             to = bsp->length;
695           }
696           MemSet ((Pointer) &vn, 0, sizeof (ValNode));
697           MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
698           sint.from = from - 1;
699           sint.to = to - 1;
700           sint.strand = strand;
701           sint.id = SeqIdFindBest (bsp->id, 0);
702           vn.choice = SEQLOC_INT;
703           vn.data.ptrvalue = (Pointer) &sint;
704           slp = &vn;
705         }
706       } else if (itemID > 0) {
707         sfp = SeqMgrGetDesiredFeature (entityID, 0, itemID, 0, NULL, NULL);
708         if (sfp != NULL) {
709           slp = sfp->location;
710         }
711       }
712 
713       if (do_tiny_seq) {
714         aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
715         VisitBioseqsInSep (sep, (Pointer) aip, SaveTinySeqs);
716         AsnIoFree (aip, FALSE);
717       } else if (do_fasta_stream) {
718         aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
719         VisitBioseqsInSep (sep, (Pointer) aip, SaveTinyStreams);
720         AsnIoFree (aip, FALSE);
721       } else {
722         SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, ofp);
723         if (altformat != 0) {
724           SeqEntryToGnbk (sep, slp, altformat, mode, style, flags, locks, custom, extra, ofp);
725         }
726       }
727       if (ofp != NULL) {
728         FileClose (ofp);
729       }
730     }
731   } else {
732     Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
733   }
734 
735   omp = ObjMgrGet ();
736   ObjMgrReapOne (omp);
737   SeqMgrClearBioseqIndex ();
738   ObjMgrFreeCache (0);
739   FreeSeqIdGiCache ();
740 
741   SeqEntrySetScope (NULL);
742 
743   ObjMgrFree (datatype, dataptr);
744 
745   return 0;
746 }
747 
748 static Int2 HandleCatenatedRecord (
749   CharPtr inputFile,
750   CharPtr outputFile,
751   FmtType format,
752   FmtType altformat,
753   ModType mode,
754   StlType style,
755   FlgType flags,
756   LckType locks,
757   CstType custom,
758   XtraPtr extra,
759   Int2 type,
760   Boolean binary,
761   Boolean compressed,
762   Int4 from,
763   Int4 to,
764   Uint1 strand,
765   Uint4 itemID,
766   Boolean do_tiny_seq,
767   Boolean do_fasta_stream
768 )
769 
770 {
771   AsnIoPtr     aip;
772   BioseqPtr    bsp;
773   Pointer      dataptr = NULL;
774   Uint2        datatype = 0;
775   Uint2        entityID;
776   FILE         *fp;
777   FILE         *ofp = NULL;
778   ObjMgrPtr    omp;
779   SeqEntryPtr  sep;
780   SeqFeatPtr   sfp;
781   SeqInt       sint;
782   SeqLocPtr    slp = NULL;
783   ValNode      vn;
784 
785   fp = FileOpen (inputFile, "r");
786   if (fp == NULL) {
787     Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
788     return 1;
789   }
790 
791   SeqMgrHoldIndexing (TRUE);
792   dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
793   SeqMgrHoldIndexing (FALSE);
794 
795   if (extra == NULL || extra->gbseq == NULL) {
796     FileRemove (outputFile);
797 #ifdef WIN_MAC
798     FileCreate (outputFile, "TEXT", "ttxt");
799 #endif
800     ofp = FileOpen (outputFile, "w");
801   }
802 
803   while (dataptr != NULL) {
804 
805     entityID = ObjMgrRegister (datatype, dataptr);
806     sep = GetTopSeqEntryForEntityID (entityID);
807 
808 
809     if (sep != NULL) {
810       if ((from > 0 && to > 0) || strand == Seq_strand_minus) {
811         bsp = NULL;
812         if (format == GENPEPT_FMT) {
813           VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_PROTS, GetFirstGoodBioseq);
814         } else {
815           VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_NUCS, GetFirstGoodBioseq);
816         }
817         if (bsp != NULL) {
818           if (strand == Seq_strand_minus && from == 0 && to == 0) {
819             from = 1;
820             to = bsp->length;
821           }
822           if (from < 0) {
823             from = 1;
824           } else if (from > bsp->length) {
825             from = bsp->length;
826           }
827           if (to < 0) {
828             to = 1;
829           } else if (to > bsp->length) {
830             to = bsp->length;
831           }
832           MemSet ((Pointer) &vn, 0, sizeof (ValNode));
833           MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
834           sint.from = from - 1;
835           sint.to = to - 1;
836           sint.strand = strand;
837           sint.id = SeqIdFindBest (bsp->id, 0);
838           vn.choice = SEQLOC_INT;
839           vn.data.ptrvalue = (Pointer) &sint;
840           slp = &vn;
841         }
842       } else if (itemID > 0) {
843         sfp = SeqMgrGetDesiredFeature (entityID, 0, itemID, 0, NULL, NULL);
844         if (sfp != NULL) {
845           slp = sfp->location;
846         }
847       }
848 
849       if (do_tiny_seq) {
850         aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
851         VisitBioseqsInSep (sep, (Pointer) aip, SaveTinySeqs);
852         AsnIoFree (aip, FALSE);
853       } else if (do_fasta_stream) {
854         aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
855         VisitBioseqsInSep (sep, (Pointer) aip, SaveTinyStreams);
856         AsnIoFree (aip, FALSE);
857       } else {
858         SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, ofp);
859         if (altformat != 0) {
860           SeqEntryToGnbk (sep, slp, altformat, mode, style, flags, locks, custom, extra, ofp);
861         }
862       }
863     }
864 
865     ObjMgrFree (datatype, dataptr);
866   
867     omp = ObjMgrGet ();
868     ObjMgrReapOne (omp);
869     SeqMgrClearBioseqIndex ();
870     ObjMgrFreeCache (0);
871     FreeSeqIdGiCache ();
872   
873     SeqEntrySetScope (NULL);
874 
875     SeqMgrHoldIndexing (TRUE);
876     dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
877     SeqMgrHoldIndexing (FALSE);
878   }
879 
880   if (ofp != NULL) {
881     FileClose (ofp);
882   }
883 
884   FileClose (fp);
885 
886   return 0;
887 }
888 
889 typedef struct hasgidata {
890   Int4     gi;
891   CharPtr  accn;
892   Boolean  found;
893 } HasGiData, PNTR HasGiPtr;
894 
895 static void LookForGi (
896   SeqEntryPtr sep,
897   Pointer mydata,
898   Int4 index,
899   Int2 indent
900 )
901 
902 {
903   BioseqPtr     bsp;
904   HasGiPtr      hgp;
905   SeqIdPtr      sip;
906   TextSeqIdPtr  tsip;
907 
908   if (sep == NULL) return;
909   if (! IS_Bioseq (sep)) return;
910   bsp = (BioseqPtr) sep->data.ptrvalue;
911   if (bsp == NULL) return;
912   hgp = (HasGiPtr) mydata;
913   if (hgp == NULL) return;
914   for (sip = bsp->id; sip != NULL; sip = sip->next) {
915     switch (sip->choice) {
916       case SEQID_GI :
917         if (sip->data.intvalue == hgp->gi) {
918           hgp->found = TRUE;
919           return;
920         }
921         break;
922       case SEQID_GENBANK :
923       case SEQID_EMBL :
924       case SEQID_PIR :
925       case SEQID_SWISSPROT :
926       case SEQID_OTHER :
927       case SEQID_DDBJ :
928       case SEQID_PRF :
929       case SEQID_TPG :
930       case SEQID_TPE :
931       case SEQID_TPD :
932       case SEQID_GPIPE :
933         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
934         if (tsip != NULL && hgp->accn!= NULL &&
935             StringICmp (tsip->accession, hgp->accn) == 0) {
936           hgp->found = TRUE;
937           return;
938         }
939         break;
940       default :
941         break;
942     }
943   }
944 }
945 
946 static Boolean SeqEntryHasGi (
947   SeqEntryPtr sep,
948   CharPtr accn
949 )
950 
951 {
952   HasGiData  hgd;
953   long int   val;
954 
955   if (sep == NULL || StringHasNoText (accn)) return FALSE;
956   MemSet ((Pointer) &hgd, 0, sizeof (HasGiData));
957   if (sscanf (accn, "%ld", &val) == 1) {
958     hgd.gi = (Int4) val;
959   } else {
960     hgd.accn = accn;
961   }
962   hgd.found = FALSE;
963   SeqEntryExplore (sep, (Pointer) (&hgd), LookForGi);
964   return hgd.found;
965 }
966 
967 static void FreeUnpubAffil (
968   PubdescPtr pdp,
969   Pointer userdata
970 )
971 
972 {
973   AuthListPtr  alp;
974   CitGenPtr    cgp;
975   ValNodePtr   vnp;
976 
977   if (pdp == NULL) return;
978   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
979     if (vnp->choice != PUB_Gen) continue;
980     cgp = (CitGenPtr) vnp->data.ptrvalue;
981     if (cgp == NULL) continue;
982     if (cgp->cit != NULL) {
983       if (StringNICmp (cgp->cit, "submitted", 8) == 0 ||
984                        StringNICmp (cgp->cit, "unpublished", 11) == 0 ||
985                        StringNICmp (cgp->cit, "in press", 8) == 0 ||
986                        StringNICmp (cgp->cit, "to be published", 15) == 0) {
987         cgp->cit = MemFree (cgp->cit);
988         cgp->cit = StringSave ("Unpublished");
989       }
990     }
991     alp = cgp->authors;
992     if (alp == NULL) continue;
993     alp->affil = AffilFree (alp->affil);
994   }
995 }
996 
997 static void LookForRefSeq (
998   BioseqPtr bsp,
999   Pointer userdata
1000 )
1001 
1002 {
1003   BoolPtr   hasRefseqP;
1004   SeqIdPtr  sip;
1005 
1006   hasRefseqP = (BoolPtr) userdata;
1007   if (*hasRefseqP) return;
1008   for (sip = bsp->id; sip != NULL; sip = sip->next) {
1009     if (sip->choice == SEQID_OTHER) {
1010       *hasRefseqP = TRUE;
1011       return;
1012     }
1013   }
1014 }
1015 
1016 static CharPtr fffmt [] = {
1017   "",
1018   "genbank",
1019   "embl",
1020   "genbank",
1021   "embl",
1022   "ftable",
1023   NULL
1024 };
1025 
1026 static CharPtr ffmod [] = {
1027   "",
1028   "release",
1029   "entrez",
1030   "gbench",
1031   "dump",
1032   NULL
1033 };
1034 
1035 static CharPtr ffstl [] = {
1036   "",
1037   "normal",
1038   "segment",
1039   "master",
1040   "contig",
1041   NULL
1042 };
1043 
1044 static CharPtr ffvew [] = {
1045   "",
1046   "nuc",
1047   "nuc",
1048   "prot",
1049   "prot",
1050   "nuc",
1051   NULL
1052 };
1053 
1054 static void ReportDiffs (
1055   CharPtr path1,
1056   CharPtr path2,
1057   CharPtr path3,
1058   FILE* fp,
1059   CharPtr ffdiff,
1060   Boolean useFfdiff
1061 )
1062 
1063 {
1064   Char    buf [256];
1065   Char    cmmd [256];
1066   size_t  ct;
1067   FILE    *fpo;
1068 
1069   if (useFfdiff) {
1070     sprintf (cmmd, "%s -o %s -n %s -d reports", ffdiff, path1, path2);
1071     system (cmmd);
1072 
1073     sprintf (cmmd, "rm %s; rm %s", path1, path2);
1074     system (cmmd);
1075   } else {
1076     sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path1, path1, path1);
1077     system (cmmd);
1078 
1079     sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path2, path2, path2);
1080     system (cmmd);
1081 
1082     sprintf (cmmd, "diff %s.suc %s.suc > %s", path1, path2, path3);
1083     system (cmmd);
1084 
1085     sprintf (cmmd, "cat %s", path3);
1086     fpo = popen (cmmd, "r");
1087     if (fpo != NULL) {
1088       while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
1089         fwrite (buf, 1, ct, fp);
1090         fflush (fp);
1091       }
1092       pclose (fpo);
1093     }
1094 
1095     sprintf (cmmd, "rm %s.suc; rm %s.suc", path1, path2);
1096     system (cmmd);
1097   }
1098 }
1099 
1100 static void CompareFlatFiles (
1101   CharPtr path1,
1102   CharPtr path2,
1103   CharPtr path3,
1104   SeqEntryPtr sep,
1105   FILE* fp,
1106   FmtType format,
1107   FmtType altformat,
1108   ModType mode,
1109   StlType style,
1110   FlgType flags,
1111   LckType locks,
1112   CstType custom,
1113   XtraPtr extra,
1114   Int2 batch,
1115   CharPtr ffdiff,
1116   CharPtr asn2flat,
1117   Boolean useFfdiff
1118 )
1119 
1120 {
1121 #ifdef OS_UNIX
1122   AsnIoPtr     aip;
1123   Char         arguments [128];
1124   BioseqPtr    bsp;
1125   Char         buf [256];
1126   Char         cmmd [256];
1127   size_t       ct;
1128   int          diff;
1129   FILE         *fpo;
1130   SeqEntryPtr  fsep;
1131 
1132   if (sep == NULL) return;
1133 
1134   if (batch == 1) {
1135 
1136     SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, fp);
1137     if (altformat != 0) {
1138       SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, fp);
1139     }
1140     return; /* just make report, nothing to diff */
1141 
1142   } else if (batch == 2) {
1143 
1144 #ifdef ASN2GNBK_SUPPRESS_UNPUB_AFFIL
1145     VisitPubdescsInSep (sep, NULL, FreeUnpubAffil);
1146 #endif
1147 
1148     SaveAsn2gnbk (sep, path1, format, SEQUIN_MODE, style, flags, locks, custom);
1149     SaveAsn2gnbk (sep, path2, format, RELEASE_MODE, style, flags, locks, custom);
1150 
1151     ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1152 
1153   } else if (batch == 3) {
1154 
1155 #ifdef ASN2GNBK_SUPPRESS_UNPUB_AFFIL
1156     VisitPubdescsInSep (sep, NULL, FreeUnpubAffil);
1157 #endif
1158 
1159     SaveAsn2gnbk (sep, path1, format, mode, style, flags, locks, custom);
1160     SeriousSeqEntryCleanupBulk (sep);
1161     SaveAsn2gnbk (sep, path2, format, mode, style, flags, locks, custom);
1162 
1163     ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1164 
1165   } else if (batch == 4) {
1166 
1167     aip = AsnIoOpen (path3, "w");
1168     if (aip == NULL) return;
1169 
1170     SeqEntryAsnWrite (sep, aip, NULL);
1171     AsnIoClose (aip);
1172 
1173     fsep = FindNthBioseq (sep, 1);
1174     if (fsep == NULL || fsep->choice != 1) return;
1175     bsp = (BioseqPtr) fsep->data.ptrvalue;
1176     if (bsp == NULL) return;
1177     SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
1178 
1179     arguments [0] = '\0';
1180     sprintf (arguments, "-format %s -mode %s -style %s -view %s -nocleanup",
1181              fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
1182 
1183     sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path1);
1184     system (cmmd);
1185 
1186     arguments [0] = '\0';
1187     sprintf (arguments, "-format %s -mode %s -style %s -view %s",
1188              fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
1189 
1190     sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path2);
1191     system (cmmd);
1192 
1193     ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1194 
1195   } else if (batch == 5) {
1196 
1197     SaveAsn2gnbk (sep, path1, format, mode, style, flags, locks, custom);
1198 
1199     aip = AsnIoOpen (path3, "w");
1200     if (aip == NULL) return;
1201 
1202     SeqEntryAsnWrite (sep, aip, NULL);
1203     AsnIoClose (aip);
1204 
1205     fsep = FindNthBioseq (sep, 1);
1206     if (fsep == NULL || fsep->choice != 1) return;
1207     bsp = (BioseqPtr) fsep->data.ptrvalue;
1208     if (bsp == NULL) return;
1209     SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
1210 
1211     arguments [0] = '\0';
1212     sprintf (arguments, "-format %s -mode %s -style %s -view %s",
1213              fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
1214 
1215     sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path2);
1216     system (cmmd);
1217 
1218     ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1219 
1220   } else if (batch == 6) {
1221 
1222 #ifdef ASN2GNBK_SUPPRESS_UNPUB_AFFIL
1223     VisitPubdescsInSep (sep, NULL, FreeUnpubAffil);
1224 #endif
1225 
1226     SaveAsn2gnbk (sep, path1, format, ENTREZ_MODE, style, (flags | 1), locks, custom);
1227     SaveAsn2gnbk (sep, path2, format, ENTREZ_MODE, style, (flags | 1 | 262144), locks, custom);
1228 
1229     ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1230 
1231   } else if (batch == 7) {
1232 
1233     aip = AsnIoOpen (path3, "w");
1234     if (aip == NULL) return;
1235 
1236     SeqEntryAsnWrite (sep, aip, NULL);
1237     AsnIoClose (aip);
1238 
1239     if (FindNucBioseq (sep) != NULL) {
1240 
1241       sprintf (cmmd, "./oldasn2gb -i %s -o %s -m e -g 1", path3, path1);
1242       system (cmmd);
1243 
1244       sprintf (cmmd, "./newasn2gb -i %s -o %s -m e -g 1", path3, path2);
1245       system (cmmd);
1246 
1247     } else {
1248 
1249       sprintf (cmmd, "./oldasn2gb -f p -i %s -o %s -m e -g 1", path3, path1);
1250       system (cmmd);
1251 
1252       sprintf (cmmd, "./newasn2gb -f p -i %s -o %s -m e -g 1", path3, path2);
1253       system (cmmd);
1254 
1255     }
1256 
1257     sprintf (cmmd, "diff -b %s %s > %s", path1, path2, path3);
1258     diff = system (cmmd);
1259 
1260     if (diff > 0) {
1261       sprintf (cmmd, "cat %s", path3);
1262       fpo = popen (cmmd, "r");
1263       if (fpo != NULL) {
1264         fprintf (fp, "\nasn2gb difference in %s\n", buf);
1265         fflush (fp);
1266         while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
1267           fwrite (buf, 1, ct, fp);
1268           fflush (fp);
1269         }
1270         pclose (fpo);
1271       }
1272     }
1273   }
1274 
1275 #else
1276 
1277   SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, fp);
1278   if (altformat != 0) {
1279     SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, fp);
1280   }
1281 #endif
1282 }
1283 
1284 static void CheckOrder (
1285   SeqFeatPtr sfp,
1286   Pointer userdata
1287 )
1288 
1289 {
1290 #ifdef ASN2GNBK_IGNORE_OUT_OF_ORDER
1291   BoolPtr    bp;
1292   BioseqPtr  bsp;
1293 #endif
1294 #ifdef ASN2GNBK_REPAIR_OUT_OF_ORDER
1295   BioseqPtr  bsp;
1296   SeqLocPtr  gslp;
1297   Boolean    hasNulls;
1298   Boolean    noLeft;
1299   Boolean    noRight;
1300 #endif
1301 
1302   /* ignore order of bonds in heterogen features from PDB */
1303 
1304   if (sfp->data.choice == SEQFEAT_HET) return;
1305 
1306 #ifdef ASN2GNBK_IGNORE_OUT_OF_ORDER
1307   bsp = BioseqFindFromSeqLoc (sfp->location);
1308   if (bsp != NULL && SeqLocBadSortOrder (bsp, sfp->location)) {
1309     bp = (BoolPtr) userdata;
1310     *bp = TRUE;
1311   }
1312 #endif
1313 #ifdef ASN2GNBK_REPAIR_OUT_OF_ORDER
1314   bsp = BioseqFindFromSeqLoc (sfp->location);
1315   if (bsp != NULL && SeqLocBadSortOrder (bsp, sfp->location)) {
1316     hasNulls = LocationHasNullsBetween (sfp->location);
1317     gslp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, hasNulls);
1318     if (gslp != NULL) {
1319       CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
1320       sfp->location = SeqLocFree (sfp->location);
1321       sfp->location = gslp;
1322       if (bsp->repr == Seq_repr_seg) {
1323         gslp = SegLocToParts (bsp, sfp->location);
1324         sfp->location = SeqLocFree (sfp->location);
1325         sfp->location = gslp;
1326       }
1327       FreeAllFuzz (sfp->location);
1328       SetSeqLocPartial (sfp->location, noLeft, noRight);
1329     }
1330   }
1331 #endif
1332 }
1333 
1334 static Int2 HandleMultipleRecords (
1335   CharPtr inputFile,
1336   CharPtr outputFile,
1337   FmtType format,
1338   FmtType altformat,
1339   ModType mode,
1340   StlType style,
1341   FlgType flags,
1342   LckType locks,
1343   CstType custom,
1344   XtraPtr extra,
1345   Int2 type,
1346   Int2 batch,
1347   Boolean binary,
1348   Boolean compressed,
1349   Boolean propOK,
1350   CharPtr ffdiff,
1351   CharPtr asn2flat,
1352   CharPtr accn,
1353   FILE *logfp
1354 )
1355 
1356 {
1357   AsnIoPtr        aip;
1358   AsnModulePtr    amp;
1359   AsnTypePtr      atp, atp_bss, atp_desc, atp_sbp, atp_se, atp_ssp;
1360   Boolean         atp_se_seen = FALSE;
1361   BioseqPtr       bsp;
1362   BioseqSetPtr    bssp;
1363   Char            buf [41];
1364   Char            cmmd [256];
1365   CitSubPtr       csp = NULL;
1366   SeqDescrPtr     descr = NULL;
1367   FILE            *fp;
1368   SeqEntryPtr     fsep;
1369   Boolean         hasgi;
1370   Boolean         hasRefSeq;
1371   Boolean         io_failure = FALSE;
1372   Char            longest [41];
1373   Int4            numrecords = 0;
1374   FILE            *ofp = NULL;
1375   ObjMgrPtr       omp;
1376   Boolean         outOfOrder;
1377   ObjValNode      ovn;
1378   Char            path1 [PATH_MAX];
1379   Char            path2 [PATH_MAX];
1380   Char            path3 [PATH_MAX];
1381   Pubdesc         pd;
1382   SubmitBlockPtr  sbp = NULL;
1383   SeqEntryPtr     sep;
1384   time_t          starttime, stoptime, worsttime;
1385   SeqDescrPtr     subcit = NULL;
1386   FILE            *tfp;
1387   Boolean         useFfdiff;
1388   ValNode         vn;
1389 #ifdef OS_UNIX
1390   CharPtr         gzcatprog;
1391   int             ret;
1392   Boolean         usedPopen = FALSE;
1393 #endif
1394 
1395   if (StringHasNoText (inputFile)) return 1;
1396 
1397 #ifndef OS_UNIX
1398   if (compressed) {
1399     Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
1400     return 1;
1401   }
1402 #endif
1403 
1404   amp = AsnAllModPtr ();
1405   if (amp == NULL) {
1406     Message (MSG_POSTERR, "Unable to load AsnAllModPtr");
1407     return 1;
1408   }
1409 
1410   atp_ssp = AsnFind ("Seq-submit");
1411   if (atp_ssp == NULL) {
1412     Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit");
1413     return 1;
1414   }
1415 
1416   atp_sbp = AsnFind ("Seq-submit.sub");
1417   if (atp_sbp == NULL) {
1418     Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit.sub");
1419     return 1;
1420   }
1421 
1422   atp_bss = AsnFind ("Bioseq-set");
1423   if (atp_bss == NULL) {
1424     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set");
1425     return 1;
1426   }
1427 
1428   atp_desc = AsnFind ("Bioseq-set.descr");
1429   if (atp_desc == NULL) {
1430     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr");
1431     return 1;
1432   }
1433 
1434   atp_se = AsnFind ("Bioseq-set.seq-set.E");
1435   if (atp_se == NULL) {
1436     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
1437     return 1;
1438   }
1439 
1440 #ifdef OS_UNIX
1441   if (compressed) {
1442     gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
1443     if (gzcatprog != NULL) {
1444       sprintf (cmmd, "%s %s", gzcatprog, inputFile);
1445     } else {
1446       ret = system ("gzcat -h >/dev/null 2>&1");
1447       if (ret == 0) {
1448         sprintf (cmmd, "gzcat %s", inputFile);
1449       } else if (ret == -1) {
1450         Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
1451         return 1;
1452       } else {
1453         ret = system ("zcat -h >/dev/null 2>&1");
1454         if (ret == 0) {
1455           sprintf (cmmd, "zcat %s", inputFile);
1456         } else if (ret == -1) {
1457           Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
1458           return 1;
1459         } else {
1460           Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
1461           return 1;
1462         }
1463       }
1464     }
1465     fp = popen (cmmd, /* binary? "rb" : */ "r");
1466     usedPopen = TRUE;
1467   } else {
1468     fp = FileOpen (inputFile, binary? "rb" : "r");
1469   }
1470 #else
1471   fp = FileOpen (inputFile, binary? "rb" : "r");
1472 #endif
1473   if (fp == NULL) {
1474     Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
1475     return 1;
1476   }
1477 
1478   aip = AsnIoNew (binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
1479   if (aip == NULL) {
1480     Message (MSG_POSTERR, "AsnIoNew failed for input file '%s'", inputFile);
1481     return 1;
1482   }
1483 
1484   if ((batch == 1 || batch == 4 || batch == 5 || format != GENBANK_FMT) &&
1485       (extra == NULL || extra->gbseq == NULL)) {
1486     ofp = FileOpen (outputFile, "w");
1487     if (ofp == NULL) {
1488       AsnIoClose (aip);
1489       Message (MSG_POSTERR, "FileOpen failed for output file '%s'", outputFile);
1490       return 1;
1491     }
1492   }
1493 
1494   TmpNam (path1);
1495   tfp = FileOpen (path1, "w");
1496   fprintf (tfp, "\n");
1497   FileClose (tfp);
1498 
1499   TmpNam (path2);
1500   tfp = FileOpen (path2, "w");
1501   fprintf (tfp, "\n");
1502   FileClose (tfp);
1503 
1504   TmpNam (path3);
1505   tfp = FileOpen (path3, "w");
1506   fprintf (tfp, "\n");
1507   FileClose (tfp);
1508 
1509   if (type == 4) {
1510     atp = atp_bss;
1511   } else if (type == 5) {
1512     atp = atp_ssp;
1513   } else {
1514     Message (MSG_POSTERR, "Batch processing type not set properly");
1515     return 1;
1516   }
1517 
1518   longest [0] = '\0';
1519   worsttime = 0;
1520 
1521   while ((! io_failure) && (atp = AsnReadId (aip, amp, atp)) != NULL) {
1522     if (aip->io_failure) {
1523       io_failure = TRUE;
1524       aip->io_failure = FALSE;
1525     }
1526     if (atp == atp_se) {
1527       atp_se_seen = TRUE;
1528 
1529       SeqMgrHoldIndexing (TRUE);
1530       sep = SeqEntryAsnRead (aip, atp);
1531       SeqMgrHoldIndexing (FALSE);
1532 
1533       /* propagate descriptors from the top-level set */
1534 
1535       if (propOK && descr != NULL && sep != NULL && sep->data.ptrvalue != NULL) {
1536         if (sep->choice == 1) {
1537           bsp = (BioseqPtr) sep->data.ptrvalue;
1538           ValNodeLink (&(bsp->descr),
1539                        AsnIoMemCopy ((Pointer) descr,
1540                                      (AsnReadFunc) SeqDescrAsnRead,
1541                                      (AsnWriteFunc) SeqDescrAsnWrite));
1542         } else if (sep->choice == 2) {
1543           bssp = (BioseqSetPtr) sep->data.ptrvalue;
1544           ValNodeLink (&(bssp->descr),
1545                        AsnIoMemCopy ((Pointer) descr,
1546                                      (AsnReadFunc) SeqDescrAsnRead,
1547                                      (AsnWriteFunc) SeqDescrAsnWrite));
1548         }
1549       }
1550 
1551       /* propagate submission citation as descriptor onto each Seq-entry */
1552 
1553       if (subcit != NULL && sep != NULL && sep->data.ptrvalue != NULL) {
1554         if (sep->choice == 1) {
1555           bsp = (BioseqPtr) sep->data.ptrvalue;
1556           ValNodeLink (&(bsp->descr),
1557                        AsnIoMemCopy ((Pointer) subcit,
1558                                      (AsnReadFunc) SeqDescrAsnRead,
1559                                      (AsnWriteFunc) SeqDescrAsnWrite));
1560         } else if (sep->choice == 2) {
1561           bssp = (BioseqSetPtr) sep->data.ptrvalue;
1562           ValNodeLink (&(bssp->descr),
1563                        AsnIoMemCopy ((Pointer) subcit,
1564                                      (AsnReadFunc) SeqDescrAsnRead,
1565                                      (AsnWriteFunc) SeqDescrAsnWrite));
1566         }
1567       }
1568 
1569       fsep = FindNthBioseq (sep, 1);
1570       if (fsep != NULL && fsep->choice == 1) {
1571         bsp = (BioseqPtr) fsep->data.ptrvalue;
1572         if (bsp != NULL) {
1573           SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
1574 #ifdef OS_UNIX
1575           if (batch != 1) {
1576             printf ("%s\n", buf);
1577             fflush (stdout);
1578             if (batch != 4 && batch != 5) {
1579               if (ofp != NULL) {
1580                 fprintf (ofp, "%s\n", buf);
1581                 fflush (ofp);
1582               }
1583             }
1584           }
1585 #endif
1586           if (logfp != NULL) {
1587             fprintf (logfp, "%s\n", buf);
1588             fflush (logfp);
1589           }
1590         }
1591       }
1592 
1593       hasgi = SeqEntryHasGi (sep, accn);
1594       if (hasgi) {
1595         sprintf (buf, "%s.before", accn);
1596         SaveSeqEntry (sep, buf);
1597         sprintf (buf, "%s.gbff.before", accn);
1598         SaveAsn2gnbk (sep, buf, format, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0);
1599         if (ofp != NULL) {
1600           FileClose (ofp);
1601         }
1602         AsnIoClose (aip);
1603         return 0;
1604       }
1605       outOfOrder = FALSE;
1606 #ifdef ASN2GNBK_IGNORE_OUT_OF_ORDER
1607       VisitFeaturesInSep (sep, (Pointer) &outOfOrder, CheckOrder);
1608 #endif
1609 #ifdef ASN2GNBK_REPAIR_OUT_OF_ORDER
1610       VisitFeaturesInSep (sep, (Pointer) &outOfOrder, CheckOrder);
1611 #endif
1612       if ((! outOfOrder) && StringHasNoText (accn)) {
1613         if ((format != GENPEPT_FMT && SeqEntryHasNucs (sep)) ||
1614            (format == GENPEPT_FMT && SeqEntryHasProts (sep))) {
1615 
1616           hasRefSeq = FALSE;
1617           VisitBioseqsInSep (sep, (Pointer) &hasRefSeq, LookForRefSeq);
1618           if (hasRefSeq) {
1619             if (batch != 1 && format == GENBANK_FMT && ofp == NULL &&
1620                 (extra == NULL || extra->gbseq == NULL)) {
1621               ofp = FileOpen (outputFile, "w");
1622               if (ofp == NULL) {
1623                 ofp = stdout;
1624               }
1625             }
1626           }
1627 
1628           starttime = GetSecs ();
1629           useFfdiff = (Boolean) (format == GENBANK_FMT && (! hasRefSeq));
1630           CompareFlatFiles (path1, path2, path3, sep, ofp,
1631                             format, altformat, mode, style, flags, locks,
1632                             custom, extra, batch, ffdiff, asn2flat, useFfdiff);
1633           stoptime = GetSecs ();
1634           if (stoptime - starttime > worsttime) {
1635             worsttime = stoptime - starttime;
1636             StringCpy (longest, buf);
1637           }
1638           numrecords++;
1639         }
1640       }
1641       SeqEntryFree (sep);
1642 
1643       omp = ObjMgrGet ();
1644       ObjMgrReapOne (omp);
1645       SeqMgrClearBioseqIndex ();
1646       ObjMgrFreeCache (0);
1647       FreeSeqIdGiCache ();
1648 
1649       SeqEntrySetScope (NULL);
1650 
1651     } else if (atp == atp_desc && (! atp_se_seen)) {
1652       descr = SeqDescrAsnRead (aip, atp);
1653     } else if (atp == atp_sbp) {
1654       sbp = SubmitBlockAsnRead (aip, atp);
1655       if (sbp != NULL) {
1656         csp = sbp->cit;
1657         if (csp != NULL) {
1658           MemSet ((Pointer) &ovn, 0, sizeof (ObjValNode));
1659           MemSet ((Pointer) &pd, 0, sizeof (Pubdesc));
1660           MemSet ((Pointer) &vn, 0, sizeof (ValNode));
1661           vn.choice = PUB_Sub;
1662           vn.data.ptrvalue = (Pointer) csp;
1663           vn.next = NULL;
1664           pd.pub = &vn;
1665           ovn.vn.choice = Seq_descr_pub;
1666           ovn.vn.data.ptrvalue = (Pointer) &pd;
1667           ovn.vn.next = NULL;
1668           ovn.vn.extended = 1;
1669           subcit = (SeqDescrPtr) &ovn;
1670         }
1671       }
1672     } else {
1673       AsnReadVal (aip, atp, NULL);
1674     }
1675 
1676     if (aip->io_failure) {
1677       io_failure = TRUE;
1678       aip->io_failure = FALSE;
1679     }
1680   }
1681 
1682   if (aip->io_failure) {
1683     io_failure = TRUE;
1684   }
1685 
1686   if (io_failure) {
1687     Message (MSG_POSTERR, "Asn io_failure for input file '%s'", inputFile);
1688   }
1689 
1690   if (ofp != NULL) {
1691     FileClose (ofp);
1692   }
1693 
1694   AsnIoFree (aip, FALSE);
1695 
1696   SeqDescrFree (descr);
1697   SubmitBlockFree (sbp);
1698 
1699 #ifdef OS_UNIX
1700   if (usedPopen) {
1701     pclose (fp);
1702   } else {
1703     FileClose (fp);
1704   }
1705 #else
1706   FileClose (fp);
1707 #endif
1708 
1709   if (logfp != NULL && (! StringHasNoText (longest))) {
1710     fprintf (logfp, "Longest processing time %ld seconds on %s\n",
1711              (long) worsttime, longest);
1712     fprintf (logfp, "Total number of records %ld\n", (long) numrecords);
1713     fflush (logfp);
1714   }
1715 
1716   sprintf (cmmd, "rm %s; rm %s; rm %s", path1, path2, path3);
1717   system (cmmd);
1718 
1719   if (io_failure) return 1;
1720   return 0;
1721 }
1722 
1723 #include <lsqfetch.h>
1724 #include <pmfapi.h>
1725 #ifdef INTERNAL_NCBI_ASN2GB
1726 #include <accpubseq.h>
1727 #endif
1728 
1729 static void ProcessOneSeqEntry (
1730   SeqEntryPtr sep,
1731   CharPtr outputFile,
1732   FmtType format,
1733   FmtType altformat,
1734   ModType mode,
1735   StlType style,
1736   FlgType flags,
1737   LckType locks,
1738   CstType custom,
1739   XtraPtr extra,
1740   Boolean do_tiny_seq,
1741   Boolean do_fasta_stream
1742 )
1743 
1744 
1745 {
1746   AsnIoPtr  aip;
1747   FILE      *ofp = NULL;
1748 
1749   if (sep == NULL) return;
1750 
1751   if (extra == NULL || extra->gbseq == NULL) {
1752     FileRemove (outputFile);
1753 #ifdef WIN_MAC
1754     FileCreate (outputFile, "TEXT", "ttxt");
1755 #endif
1756     ofp = FileOpen (outputFile, "w");
1757   }
1758 
1759   if (do_tiny_seq) {
1760     aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
1761     VisitBioseqsInSep (sep, (Pointer) aip, SaveTinySeqs);
1762     AsnIoFree (aip, FALSE);
1763   } else if (do_fasta_stream) {
1764     aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
1765     VisitBioseqsInSep (sep, (Pointer) aip, SaveTinyStreams);
1766     AsnIoFree (aip, FALSE);
1767   } else {
1768     SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, ofp);
1769     if (altformat != 0) {
1770       SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, ofp);
1771     }
1772   }
1773   if (ofp != NULL) {
1774     FileClose (ofp);
1775   }
1776 }
1777 
1778 static SeqEntryPtr SeqEntryFromAccnOrGi (
1779   CharPtr str
1780 )
1781 
1782 {
1783   CharPtr      accn;
1784   Boolean      alldigits;
1785   BioseqPtr    bsp;
1786   Char         buf [64];
1787   Char         ch;
1788   Int4         flags = 0;
1789   CharPtr      ptr;
1790   Int2         retcode = 0;
1791   SeqEntryPtr  sep = NULL;
1792   SeqIdPtr     sip;
1793   CharPtr      tmp1 = NULL;
1794   CharPtr      tmp2 = NULL;
1795   Int4         uid = 0;
1796   long int     val;
1797   ValNode      vn;
1798 
1799   if (StringHasNoText (str)) return NULL;
1800   StringNCpy_0 (buf, str, sizeof (buf));
1801   TrimSpacesAroundString (buf);
1802 
1803   accn = buf;
1804   tmp1 = StringChr (accn, ',');
1805   if (tmp1 != NULL) {
1806     *tmp1 = '\0';
1807     tmp1++;
1808     tmp2 = StringChr (tmp1, ',');
1809     if (tmp2 != NULL) {
1810       *tmp2 = '\0';
1811       tmp2++;
1812       if (StringDoesHaveText (tmp2) && sscanf (tmp2, "%ld", &val) == 1) {
1813         flags = (Int4) val;
1814       }
1815     }
1816     if (StringDoesHaveText (tmp1) && sscanf (tmp1, "%ld", &val) == 1) {
1817       retcode = (Int2) val;
1818     }
1819   }
1820 
1821 #ifdef INTERNAL_NCBI_ASN2GB
1822   /* temporary code to test PUBSEQGetAccnVer in accpubseq.c */
1823 
1824   if (*accn == '*') {
1825     Char buf [64];
1826     accn++;
1827     if (sscanf (accn, "%ld", &val) == 1) {
1828       uid = (Int4) val;
1829       if (GetAccnVerFromServer (uid, buf)) {
1830         Message (MSG_POST, "GetAccnVerFromServer returned %s", buf);
1831       } else {
1832         Message (MSG_POST, "GetAccnVerFromServer failed");
1833       }
1834     }
1835     return NULL;
1836   }
1837 #endif
1838 
1839   alldigits = TRUE;
1840   ptr = accn;
1841   ch = *ptr;
1842   while (ch != '\0') {
1843     if (! IS_DIGIT (ch)) {
1844       alldigits = FALSE;
1845     }
1846     ptr++;
1847     ch = *ptr;
1848   }
1849 
1850   if (alldigits) {
1851     if (sscanf (accn, "%ld", &val) == 1) {
1852       uid = (Int4) val;
1853     }
1854   } else {
1855     sip = SeqIdFromAccessionDotVersion (accn);
1856     if (sip != NULL) {
1857       uid = GetGIForSeqId (sip);
1858       SeqIdFree (sip);
1859     }
1860   }
1861 
1862   if (uid > 0) {
1863     sep = PubSeqSynchronousQuery (uid, retcode, flags);
1864     if (sep != NULL) {
1865       MemSet ((Pointer) &vn, 0, sizeof (ValNode));
1866       vn.choice = SEQID_GI;
1867       vn.data.intvalue = uid;
1868       bsp = BioseqFind (&vn);
1869       if (bsp != NULL) {
1870         sep = SeqMgrGetSeqEntryForData ((Pointer) bsp);
1871       }
1872     }
1873   }
1874 
1875   return sep;
1876 }
1877 
1878 static void MarkLocalAnnots (
1879   SeqAnnotPtr sap,
1880   Pointer userdata
1881 )
1882 
1883 {
1884   if (sap == NULL) return;
1885 
1886   if (StringNICmp (sap->name, "Annot:", 6) != 0) {
1887     sap->idx.deleteme = TRUE;
1888   }
1889 }
1890 
1891 static ValNodePtr PubSeqRemoteLock (
1892   SeqIdPtr sip,
1893   Pointer remotedata
1894 )
1895 
1896 {
1897   BioseqPtr    bsp;
1898   SeqAnnotPtr  sap = NULL;
1899   SeqEntryPtr  sep = NULL;
1900   Int4         uid = 0;
1901   ValNodePtr   vnp = NULL;
1902 
1903   if (sip == NULL) return NULL;
1904 
1905   if (sip->choice == SEQID_GI) {
1906     uid = (Int4) sip->data.intvalue;
1907   } else {
1908     uid = GetGIForSeqId (sip);
1909   }
1910 
1911   if (uid > 0) {
1912     sep = PubSeqSynchronousQuery (uid, 1, -1);
1913     if (sep != NULL && IS_Bioseq (sep)) {
1914       bsp = (BioseqPtr) sep->data.ptrvalue;
1915       if (bsp != NULL) {
1916         VisitAnnotsInSep (sep, NULL, MarkLocalAnnots);
1917         DeleteMarkedObjects (0, OBJ_BIOSEQ, (Pointer) bsp);
1918         sap = bsp->annot;
1919         bsp->annot = NULL;
1920       }
1921     }
1922     SeqEntryFree (sep);
1923   }
1924 
1925   if (sap == NULL) return NULL;
1926 
1927   bsp = (BioseqPtr) MemNew (sizeof (Bioseq));
1928   if (bsp == NULL) return NULL;
1929   bsp->annot = sap;
1930 
1931   vnp = ValNodeNew (NULL);
1932   if (vnp == NULL) return NULL;
1933 
1934   vnp->data.ptrvalue = (Pointer) bsp;
1935 
1936   return vnp;
1937 }
1938 
1939 static void PubSeqRemoteFree (
1940   ValNodePtr vnp,
1941   Pointer remotedata
1942 )
1943 
1944 {
1945   ValNodeFreeData (vnp);
1946 }
1947 
1948 /* Args structure contains command-line arguments */
1949 
1950 typedef enum {
1951   i_argInputFile  = 0,
1952   o_argOutputFile,
1953   f_argFormat,
1954   m_argMode,
1955   s_argStyle,
1956   g_argFlags,
1957   h_argLock,
1958   u_argCustom,
1959   a_argType,
1960   t_argBatch,
1961   b_argBinary,
1962   c_argCompressed,
1963   p_argPropagate,
1964   l_argLogFile,
1965   r_argRemote,
1966   A_argAccession,
1967   F_argFarFeats,
1968 #ifdef OS_UNIX
1969   q_argFfDiff,
1970   n_argAsn2Flat,
1971   j_argFrom,
1972   k_argTo,
1973   d_argStrand,
1974   y_argItemID,
1975 #ifdef INTERNAL_NCBI_ASN2GB
1976   H_argAccessHUP,
1977 #endif
1978 #ifdef ENABLE_ARG_X
1979   x_argAccnToSave,
1980 #endif
1981 #endif
1982 } Arguments;
1983 
1984 Args myargs [] = {
1985   {"Input File Name", "stdin", NULL, NULL,
1986     FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
1987   {"Output File Name", "stdout", NULL, NULL,
1988     FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
1989   {"Format (b GenBank, e EMBL, p GenPept, t Feature Table, x INSDSet)", "b", NULL, NULL,
1990     FALSE, 'f', ARG_STRING, 0.0, 0, NULL},
1991   {"Mode (r Release, e Entrez, s Sequin, d Dump)", "s", NULL, NULL,
1992     FALSE, 'm', ARG_STRING, 0.0, 0, NULL},
1993   {"Style (n Normal, s Segment, m Master, c Contig)", "n", NULL, NULL,
1994     FALSE, 's', ARG_STRING, 0.0, 0, NULL},
1995   {"Bit Flags (1 HTML, 2 XML, 4 ContigFeats, 8 ContigSrcs, 16 FarTransl)", "0", NULL, NULL,
1996     FALSE, 'g', ARG_INT, 0.0, 0, NULL},
1997   {"Lock/Lookup Flags (8 LockProd, 16 LookupComp, 64 LookupProd)", "0", NULL, NULL,
1998     FALSE, 'h', ARG_INT, 0.0, 0, NULL},
1999   {"Custom Flags (4 HideFeats, 1792 HideRefs, 8192 HideSources, 262144 HideTranslation)", "0", NULL, NULL,
2000     FALSE, 'u', ARG_INT, 0.0, 0, NULL},
2001   {"ASN.1 Type\n"
2002    "      Single Record: a Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, q Catenated\n"
2003    "      Release File: t Batch Bioseq-set, u Batch Seq-submit\n", "a", NULL, NULL,
2004     TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
2005   {"Batch\n"
2006    "      1 Report\n"
2007    "      2 Sequin/Release\n"
2008    "      3 asn2gb SSEC/nocleanup\n"
2009    "      4 asn2flat BSEC/nocleanup\n"
2010    "      5 asn2gb/asn2flat\n"
2011    "      6 asn2gb NEW dbxref/OLD dbxref\n"
2012    "      7 oldasn2gb/newasn2gb", "0", "0", "7",
2013     FALSE, 't', ARG_INT, 0.0, 0, NULL},
2014   {"Input File is Binary", "F", NULL, NULL,
2015     TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
2016   {"Batch File is Compressed", "F", NULL, NULL,
2017     TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
2018   {"Propagate Top Descriptors", "F", NULL, NULL,
2019     TRUE, 'p', ARG_BOOLEAN, 0.0, 0, NULL},
2020   {"Log file", NULL, NULL, NULL,
2021     TRUE, 'l', ARG_FILE_OUT, 0.0, 0, NULL},
2022   {"Remote Fetching", "F", NULL, NULL,
2023     TRUE, 'r', ARG_BOOLEAN, 0.0, 0, NULL},
2024   {"Accession to Fetch", NULL, NULL, NULL,
2025     TRUE, 'A', ARG_STRING, 0.0, 0, NULL},
2026   {"Fetch Remote Annotations", "F", NULL, NULL,
2027     TRUE, 'F', ARG_BOOLEAN, 0.0, 0, NULL},
2028 #ifdef OS_UNIX
2029 #ifdef PROC_I80X86
2030   {"Ffdiff Executable", "ffdiff", NULL, NULL,
2031     TRUE, 'q', ARG_FILE_IN, 0.0, 0, NULL},
2032   {"Asn2Flat Executable", "asn2flat", NULL, NULL,
2033     TRUE, 'n', ARG_FILE_IN, 0.0, 0, NULL},
2034 #else
2035   {"Ffdiff Executable", "/netopt/genbank/subtool/bin/ffdiff", NULL, NULL,
2036     TRUE, 'q', ARG_FILE_IN, 0.0, 0, NULL},
2037   {"Asn2Flat Executable", "asn2flat", NULL, NULL,
2038     TRUE, 'n', ARG_FILE_IN, 0.0, 0, NULL},
2039 #endif
2040   {"SeqLoc From", "0", NULL, NULL,
2041     TRUE, 'j', ARG_INT, 0.0, 0, NULL},
2042   {"SeqLoc To", "0", NULL, NULL,
2043     TRUE, 'k', ARG_INT, 0.0, 0, NULL},
2044   {"SeqLoc Minus Strand", "F", NULL, NULL,
2045     TRUE, 'd', ARG_BOOLEAN, 0.0, 0, NULL},
2046   {"Feature itemID", "0", NULL, NULL,
2047     TRUE, 'y', ARG_INT, 0.0, 0, NULL},
2048 #ifdef INTERNAL_NCBI_ASN2GB
2049   {"Internal Access to HUP", "F", NULL, NULL,
2050     TRUE, 'H', ARG_BOOLEAN, 0.0, 0, NULL},
2051 #endif
2052 #ifdef ENABLE_ARG_X
2053   {"Accession to Extract", NULL, NULL, NULL,
2054     TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
2055 #endif
2056 #endif
2057 };
2058 
2059 
2060 #define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
2061 
2062 Int2 Main (
2063   void
2064 )
2065 
2066 {
2067   CharPtr      accn = NULL;
2068   CharPtr      accntofetch = NULL;
2069   AsnIoPtr     aip = NULL;
2070   FmtType      altformat = (FmtType) 0;
2071   Char         app [64];
2072   CharPtr      asn2flat = NULL;
2073   AsnTypePtr   atp = NULL;
2074   Int2         batch = 0;
2075   Boolean      binary = FALSE;
2076   Boolean      catenated = FALSE;
2077   Boolean      compressed = FALSE;
2078   CstType      custom;
2079   Boolean      do_gbseq = FALSE;
2080   Boolean      do_insdseq = FALSE;
2081   Boolean      do_tiny_seq = FALSE;
2082   Boolean      do_fasta_stream = FALSE;
2083   XtraPtr      extra = NULL;
2084   Boolean      farfeats = FALSE;
2085   CharPtr      ffdiff = NULL;
2086   FlgType      flags;
2087   FmtType      format = GENBANK_FMT;
2088   Int4         from = 0;
2089   GBSeq        gbsq;
2090   GBSet        gbst;
2091 #ifdef INTERNAL_NCBI_ASN2GB
2092   Boolean      hup = FALSE;
2093 #endif
2094   Uint4        itemID = 0;
2095   LckType      locks;
2096   CharPtr      logfile = NULL;
2097   FILE         *logfp = NULL;
2098   ModType      mode = SEQUIN_MODE;
2099   Boolean      propOK = FALSE;
2100   Boolean      remote = FALSE;
2101   Int2         rsult = 0;
2102   time_t       runtime, starttime, stoptime;
2103   SeqEntryPtr  sep;
2104   CharPtr      str;
2105   Uint1        strand = Seq_strand_plus;
2106   StlType      style = NORMAL_STYLE;
2107   Int4         to = 0;
2108   Int2         type = 0;
2109   Char         xmlbuf [128];
2110   XtraBlock    xtra;
2111 
2112   /* standard setup */
2113 
2114   ErrSetFatalLevel (SEV_MAX);
2115   ErrClearOptFlags (EO_SHOW_USERSTR);
2116   ErrSetLogfile ("stderr", ELOG_APPEND);
2117   UseLocalAsnloadDataAndErrMsg ();
2118   ErrPathReset ();
2119 
2120   if (! AllObjLoad ()) {
2121     Message (MSG_POSTERR, "AllObjLoad failed");
2122     return 1;
2123   }
2124   if (! SubmitAsnLoad ()) {
2125     Message (MSG_POSTERR, "SubmitAsnLoad failed");
2126     return 1;
2127   }
2128   if (! FeatDefSetLoad ()) {
2129     Message (MSG_POSTERR, "FeatDefSetLoad failed");
2130     return 1;
2131   }
2132   if (! SeqCodeSetLoad ()) {
2133     Message (MSG_POSTERR, "SeqCodeSetLoad failed");
2134     return 1;
2135   }
2136   if (! GeneticCodeTableLoad ()) {
2137     Message (MSG_POSTERR, "GeneticCodeTableLoad failed");
2138     return 1;
2139   }
2140 
2141   /* process command line arguments */
2142 
2143   sprintf (app, "asn2gb %s", ASN2GB_APPLICATION);
2144   if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
2145     return 0;
2146   }
2147 
2148   if (myargs [b_argBinary].intvalue) {
2149     binary = TRUE;
2150   } else {
2151     binary = FALSE;
2152   }
2153 
2154   if (myargs [c_argCompressed].intvalue) {
2155     compressed = TRUE;
2156   } else {
2157     compressed = FALSE;
2158   }
2159 
2160   if (myargs [p_argPropagate].intvalue) {
2161     propOK = TRUE;
2162   } else {
2163     propOK = FALSE;
2164   }
2165 
2166   str = myargs [f_argFormat].strvalue;
2167   if (StringICmp (str, "bp") == 0 || StringICmp (str, "pb") == 0) {
2168     format = GENBANK_FMT;
2169     altformat = GENPEPT_FMT;
2170 
2171   } else if (StringICmp (str, "b") == 0) {
2172     format = GENBANK_FMT;
2173   } else if (StringICmp (str, "e") == 0) {
2174     format = EMBL_FMT;
2175   } else if (StringICmp (str, "p") == 0) {
2176     format = GENPEPT_FMT;
2177   } else if (StringICmp (str, "t") == 0) {
2178     format = FTABLE_FMT;
2179   
2180   } else if (StringICmp (str, "q") == 0) {
2181     do_gbseq = TRUE;
2182     format = GENBANK_FMT;
2183   } else if (StringICmp (str, "r") == 0) {
2184     do_gbseq = TRUE;
2185     format = GENPEPT_FMT;
2186 
2187   } else if (StringICmp (str, "xz") == 0 || StringICmp (str, "zx") == 0) {
2188     do_gbseq = TRUE;
2189     do_insdseq = TRUE;
2190     format = GENBANK_FMT;
2191     altformat = GENPEPT_FMT;
2192 
2193   } else if (StringICmp (str, "x") == 0) {
2194     do_gbseq = TRUE;
2195     do_insdseq = TRUE;
2196     format = GENBANK_FMT;
2197   } else if (StringCmp (str, "y") == 0) {
2198     do_tiny_seq = TRUE;
2199     format = GENBANK_FMT;
2200   } else if (StringCmp (str, "Y") == 0) {
2201     do_fasta_stream = TRUE;
2202     format = GENBANK_FMT;
2203   } else if (StringICmp (str, "z") == 0) {
2204     do_gbseq = TRUE;
2205     do_insdseq = TRUE;
2206     format = GENPEPT_FMT;
2207   } else {
2208     format = GENBANK_FMT;
2209   }
2210 
2211   str = myargs [m_argMode].strvalue;
2212   if (StringICmp (str, "r") == 0) {
2213     mode = RELEASE_MODE;
2214   } else if (StringICmp (str, "e") == 0) {
2215     mode = ENTREZ_MODE;
2216   } else if (StringICmp (str, "s") == 0) {
2217     mode = SEQUIN_MODE;
2218   } else if (StringICmp (str, "d") == 0) {
2219     mode = DUMP_MODE;
2220   } else {
2221     mode = SEQUIN_MODE;
2222   }
2223 
2224   str = myargs [s_argStyle].strvalue;
2225   if (StringICmp (str, "n") == 0) {
2226     style = NORMAL_STYLE;
2227   } else if (StringICmp (str, "s") == 0) {
2228     style = SEGMENT_STYLE;
2229   } else if (StringICmp (str, "m") == 0) {
2230     style = MASTER_STYLE;
2231   } else if (StringICmp (str, "c") == 0) {
2232     style = CONTIG_STYLE;
2233   } else {
2234     style = NORMAL_STYLE;
2235   }
2236 
2237   MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
2238 
2239   flags = (FlgType) myargs [g_argFlags].intvalue;
2240 
2241   locks = (LckType) myargs [h_argLock].intvalue;
2242 
2243   custom = (CstType) myargs [u_argCustom].intvalue;
2244 
2245   str = myargs [a_argType].strvalue;
2246   if (StringICmp (str, "a") == 0) {
2247     type = 1;
2248   } else if (StringICmp (str, "e") == 0) {
2249     type = 2;
2250   } else if (StringICmp (str, "b") == 0) {
2251     type = 3;
2252   } else if (StringICmp (str, "s") == 0) {
2253     type = 4;
2254   } else if (StringICmp (str, "m") == 0) {
2255     type = 5;
2256   } else if (StringICmp (str, "q") == 0) {
2257     catenated = TRUE;
2258     type = 1;
2259   } else if (StringICmp (str, "t") == 0) {
2260     batch = 1;
2261     type = 4;
2262   } else if (StringICmp (str, "u") == 0) {
2263     batch = 1;
2264     type = 5;
2265   } else {
2266     type = 1;
2267   }
2268 
2269   if (myargs [t_argBatch].intvalue > 0) {
2270     batch = (Int2) myargs [t_argBatch].intvalue;
2271   }
2272 
2273   if ((binary || compressed) && batch == 0) {
2274     if (type == 1) {
2275       Message (MSG_FATAL, "-b or -c cannot be used without -t or -a");
2276       return 1;
2277     }
2278   }
2279 
2280   remote = (Boolean) myargs [r_argRemote].intvalue;
2281 
2282   accntofetch = (CharPtr) myargs [A_argAccession].strvalue;
2283   if (StringDoesHaveText (accntofetch)) {
2284     remote = TRUE;
2285   }
2286   farfeats = myargs [F_argFarFeats].intvalue;
2287 
2288 #ifdef INTERNAL_NCBI_ASN2GB
2289   hup = myargs [H_argAccessHUP].intvalue;
2290 #endif
2291 
2292   if (remote) {
2293 #ifdef INTERNAL_NCBI_ASN2GB
2294     if (hup) {
2295       DirSubFetchEnable ();
2296       SmartFetchEnable ();
2297       TPASmartFetchEnable ();
2298     }
2299 
2300     if (! PUBSEQBioseqFetchEnable ("asn2gb", FALSE)) {
2301       Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
2302       return 1;
2303     }
2304 #else
2305     PubSeqFetchEnable ();
2306     if (farfeats) {
2307       xtra.remotelock = PubSeqRemoteLock;
2308       xtra.remotefree = PubSeqRemoteFree;
2309     }
2310 #endif
2311     PubMedFetchEnable ();
2312     LocalSeqFetchInit (FALSE);
2313   }
2314 
2315   logfile = (CharPtr) myargs [l_argLogFile].strvalue;
2316   if (! StringHasNoText (logfile)) {
2317     logfp = FileOpen (logfile, "w");
2318   }
2319 
2320 #ifdef OS_UNIX
2321   ffdiff = myargs [q_argFfDiff].strvalue;
2322   asn2flat = myargs [n_argAsn2Flat].strvalue;
2323 
2324   from = myargs [j_argFrom].intvalue;
2325   to = myargs [k_argTo].intvalue;
2326   if (myargs [d_argStrand].intvalue) {
2327     strand = Seq_strand_minus;
2328   } else {
2329     strand = Seq_strand_plus;
2330   }
2331   itemID = myargs [y_argItemID].intvalue;
2332 
2333 #ifdef ENABLE_ARG_X
2334   if (! StringHasNoText (myargs [x_argAccnToSave].strvalue)) {
2335     accn = myargs [x_argAccnToSave].strvalue;
2336   }
2337 #endif
2338 #endif
2339 
2340   if (GetAppParam ("NCBI", "SETTINGS", "XMLPREFIX", NULL, xmlbuf, sizeof (xmlbuf))) {
2341     AsnSetXMLmodulePrefix (StringSave (xmlbuf));
2342   }
2343 
2344   if (do_gbseq) {
2345     if (! objgbseqAsnLoad ()) {
2346       Message (MSG_POSTERR, "objgbseqAsnLoad failed");
2347       return 1;
2348     }
2349     if (! objinsdseqAsnLoad ()) {
2350       Message (MSG_POSTERR, "objinsdseqAsnLoad failed");
2351       return 1;
2352     }
2353     MemSet ((Pointer) &gbsq, 0, sizeof (GBSeq));
2354     xtra.gbseq = &gbsq;
2355     if ((flags & HTML_XML_ASN_MASK) == CREATE_ASN_GBSEQ_FILE) {
2356       aip = AsnIoOpen (myargs [o_argOutputFile].strvalue, "w");
2357     } else {
2358       aip = AsnIoOpen (myargs [o_argOutputFile].strvalue, "wx");
2359     }
2360     if (aip == NULL) {
2361       Message (MSG_POSTERR, "AsnIoOpen failed");
2362       return 1;
2363     }
2364     xtra.aip = aip;
2365     if ((Boolean) ((flags & PRODUCE_OLD_GBSEQ) != 0)) {
2366       do_insdseq = FALSE;
2367     }
2368     if (do_insdseq) {
2369       atp = AsnLinkType (NULL, AsnFind ("INSDSet"));
2370       xtra.atp = AsnLinkType (NULL, AsnFind ("INSDSet.E"));
2371     } else {
2372       atp = AsnLinkType (NULL, AsnFind ("GBSet"));
2373       xtra.atp = AsnLinkType (NULL, AsnFind ("GBSet.E"));
2374       flags |= PRODUCE_OLD_GBSEQ;
2375     }
2376     if (atp == NULL || xtra.atp == NULL) {
2377       Message (MSG_POSTERR, "AsnLinkType or AsnFind failed");
2378       return 1;
2379     }
2380     MemSet ((Pointer) &gbst, 0, sizeof (GBSet));
2381     AsnOpenStruct (aip, atp, (Pointer) &gbst);
2382   }
2383 
2384   extra = &xtra;
2385 
2386   starttime = GetSecs ();
2387 
2388   if (StringDoesHaveText (accntofetch)) {
2389 
2390     if (remote) {
2391       sep = SeqEntryFromAccnOrGi (accntofetch);
2392       if (sep != NULL) {
2393         ProcessOneSeqEntry (sep, myargs [o_argOutputFile].strvalue,
2394                             format, altformat, mode, style, flags, locks,
2395                             custom, extra, do_tiny_seq, do_fasta_stream);
2396         SeqEntryFree (sep);
2397       }
2398     }
2399 
2400   } else if (batch != 0 || accn != NULL) {
2401 
2402     rsult = HandleMultipleRecords (myargs [i_argInputFile].strvalue,
2403                                    myargs [o_argOutputFile].strvalue,
2404                                    format, altformat, mode, style, flags, locks,
2405                                    custom, extra, type, batch, binary, compressed,
2406                                    propOK, ffdiff, asn2flat, accn, logfp);
2407   } else if (catenated) {
2408 
2409     rsult = HandleCatenatedRecord (myargs [i_argInputFile].strvalue,
2410                                 myargs [o_argOutputFile].strvalue,
2411                                 format, altformat, mode, style, flags, locks,
2412                                 custom, extra, type, binary, compressed,
2413                                 from, to, strand, itemID, do_tiny_seq, do_fasta_stream);
2414   } else {
2415 
2416     rsult = HandleSingleRecord (myargs [i_argInputFile].strvalue,
2417                                 myargs [o_argOutputFile].strvalue,
2418                                 format, altformat, mode, style, flags, locks,
2419                                 custom, extra, type, binary, compressed,
2420                                 from, to, strand, itemID, do_tiny_seq, do_fasta_stream);
2421   }
2422 
2423   if (aip != NULL) {
2424     AsnCloseStruct (aip, atp, NULL);
2425     AsnPrintNewLine (aip);
2426     AsnIoClose (aip);
2427   }
2428 
2429   stoptime = GetSecs ();
2430   runtime = stoptime - starttime;
2431   if (logfp != NULL) {
2432     fprintf (logfp, "Finished in %ld seconds\n", (long) runtime);
2433     FileClose (logfp);
2434   }
2435 
2436   if (remote) {
2437     LocalSeqFetchDisable ();
2438     PubMedFetchDisable ();
2439 #ifdef INTERNAL_NCBI_ASN2GB
2440     PUBSEQBioseqFetchDisable ();
2441 #else
2442     PubSeqFetchDisable ();
2443 #endif
2444   }
2445 
2446   return rsult;
2447 }
2448 
2449 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.