|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/asn2gb.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2gb.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gb.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans
29 *
30 * Version Creation Date: 10/21/98
31 *
32 * $Revision: 6.137 $
33 *
34 * File Description: New GenBank flatfile generator application
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * ==========================================================================
39 */
40
41 #include <ncbi.h>
42 #include <objall.h>
43 #include <objsset.h>
44 #include <objsub.h>
45 #include <objfdef.h>
46 #include <objgbseq.h>
47 #include <objtseq.h>
48 #include <sequtil.h>
49 #include <sqnutils.h>
50 #include <explore.h>
51 #include <gather.h>
52 #include <toasn3.h>
53 #include <asn2gnbp.h>
54
55 /* asn2gnbi.h needed to test PUBSEQGetAccnVer in accpubseq.c */
56 #include <asn2gnbi.h>
57
58 #define ASN2GB_APP_VER "7.4"
59
60 CharPtr ASN2GB_APPLICATION = ASN2GB_APP_VER;
61
62 static void SaveSeqEntry (
63 SeqEntryPtr sep,
64 CharPtr filename
65 )
66
67 {
68 AsnIoPtr aop;
69
70 if (sep == NULL) return;
71 aop = AsnIoOpen (filename, "w");
72 if (aop != NULL) {
73 SeqEntryAsnWrite (sep, aop, NULL);
74 }
75 AsnIoClose (aop);
76 }
77
78 static void SaveAsn2gnbk (
79 SeqEntryPtr sep,
80 CharPtr filename,
81 FmtType format,
82 ModType mode,
83 StlType style,
84 FlgType flags,
85 LckType locks,
86 CstType custom
87 )
88
89 {
90 FILE *fp;
91
92 if (sep == NULL) return;
93 fp = FileOpen (filename, "w");
94 if (fp != NULL) {
95 SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, NULL, fp);
96 }
97 FileClose (fp);
98 }
99
100 static void GetFirstGoodBioseq (
101 BioseqPtr bsp,
102 Pointer userdata
103 )
104
105 {
106 BioseqPtr PNTR bspp;
107
108 bspp = (BioseqPtr PNTR) userdata;
109 if (*bspp != NULL) return;
110 *bspp = bsp;
111 }
112
113 NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
114
115 static void SaveTinySeqs (
116 BioseqPtr bsp,
117 Pointer userdata
118 )
119
120 {
121 AsnIoPtr aip;
122 TSeqPtr tsp;
123
124 if (bsp == NULL) return;
125 aip = (AsnIoPtr) userdata;
126
127 tsp = BioseqToTSeq (bsp);
128 if (tsp == NULL) return;
129
130 TSeqAsnWrite (tsp, aip, NULL);
131 AsnPrintNewLine (aip);
132 AsnIoFlush (aip);
133
134 TSeqFree (tsp);
135 }
136
137 static void SaveTinyStreams (
138 BioseqPtr bsp,
139 Pointer userdata
140 )
141
142 {
143 AsnIoPtr aip;
144
145 if (bsp == NULL) return;
146 aip = (AsnIoPtr) userdata;
147
148 BioseqAsnWriteAsTSeq (bsp, aip, NULL);
149 AsnPrintNewLine (aip);
150 AsnIoFlush (aip);
151 }
152
153 #ifdef INTERNAL_NCBI_ASN2GB
154 static CharPtr dirsubfetchproc = "DirSubBioseqFetch";
155
156 static CharPtr dirsubfetchcmd = NULL;
157
158 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
159 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
160
161 {
162 Char cmmd [256];
163 Pointer dataptr;
164 FILE* fp;
165 Char path [PATH_MAX];
166
167 if (datatype != NULL) {
168 *datatype = 0;
169 }
170 if (entityID != NULL) {
171 *entityID = 0;
172 }
173 if (StringHasNoText (accn)) return NULL;
174
175 if (dirsubfetchcmd == NULL) {
176 if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
177 dirsubfetchcmd = StringSaveNoNull (cmmd);
178 }
179 }
180 if (dirsubfetchcmd == NULL) return NULL;
181
182 TmpNam (path);
183
184 #ifdef OS_UNIX
185 sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, accn, path);
186 system (cmmd);
187 #endif
188 #ifdef OS_MSWIN
189 sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, accn, path);
190 system (cmmd);
191 #endif
192
193 fp = FileOpen (path, "r");
194 if (fp == NULL) {
195 FileRemove (path);
196 return NULL;
197 }
198 dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
199 FileClose (fp);
200 FileRemove (path);
201 return dataptr;
202 }
203
204
205 static Int2 LIBCALLBACK DirSubBioseqFetchFunc (Pointer data)
206
207 {
208 BioseqPtr bsp;
209 Char cmmd [256];
210 Pointer dataptr;
211 Uint2 datatype;
212 Uint2 entityID;
213 FILE* fp;
214 OMProcControlPtr ompcp;
215 ObjMgrProcPtr ompp;
216 Char path [PATH_MAX];
217 SeqEntryPtr sep = NULL;
218 SeqIdPtr sip;
219 TextSeqIdPtr tsip;
220
221 ompcp = (OMProcControlPtr) data;
222 if (ompcp == NULL) return OM_MSG_RET_ERROR;
223 ompp = ompcp->proc;
224 if (ompp == NULL) return OM_MSG_RET_ERROR;
225 sip = (SeqIdPtr) ompcp->input_data;
226 if (sip == NULL) return OM_MSG_RET_ERROR;
227
228 if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
229 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
230 if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
231
232 if (dirsubfetchcmd == NULL) {
233 if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
234 dirsubfetchcmd = StringSaveNoNull (cmmd);
235 }
236 }
237 if (dirsubfetchcmd == NULL) return OM_MSG_RET_ERROR;
238
239 TmpNam (path);
240
241 #ifdef OS_UNIX
242 sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, tsip->accession, path);
243 system (cmmd);
244 #endif
245 #ifdef OS_MSWIN
246 sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, tsip->accession, path);
247 system (cmmd);
248 #endif
249
250 fp = FileOpen (path, "r");
251 if (fp == NULL) {
252 FileRemove (path);
253 return OM_MSG_RET_ERROR;
254 }
255 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
256 FileClose (fp);
257 FileRemove (path);
258
259 if (dataptr == NULL) return OM_MSG_RET_OK;
260
261 sep = GetTopSeqEntryForEntityID (entityID);
262 if (sep == NULL) return OM_MSG_RET_ERROR;
263 bsp = BioseqFindInSeqEntry (sip, sep);
264 ompcp->output_data = (Pointer) bsp;
265 ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
266 return OM_MSG_RET_DONE;
267 }
268
269 static Boolean DirSubFetchEnable (void)
270
271 {
272 ObjMgrProcLoad (OMPROC_FETCH, dirsubfetchproc, dirsubfetchproc,
273 OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
274 DirSubBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
275 return TRUE;
276 }
277
278 static CharPtr smartfetchproc = "SmartBioseqFetch";
279
280 static CharPtr smartfetchcmd = NULL;
281
282 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
283 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
284
285 {
286 Char cmmd [256];
287 Pointer dataptr;
288 FILE* fp;
289 Char path [PATH_MAX];
290
291 if (datatype != NULL) {
292 *datatype = 0;
293 }
294 if (entityID != NULL) {
295 *entityID = 0;
296 }
297 if (StringHasNoText (accn)) return NULL;
298
299 if (smartfetchcmd == NULL) {
300 if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
301 smartfetchcmd = StringSaveNoNull (cmmd);
302 }
303 }
304 if (smartfetchcmd == NULL) return NULL;
305
306 TmpNam (path);
307
308 #ifdef OS_UNIX
309 sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, accn, path);
310 system (cmmd);
311 #endif
312 #ifdef OS_MSWIN
313 sprintf (cmmd, "%s %s -o %s", smartfetchcmd, accn, path);
314 system (cmmd);
315 #endif
316
317 fp = FileOpen (path, "r");
318 if (fp == NULL) {
319 FileRemove (path);
320 return NULL;
321 }
322 dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
323 FileClose (fp);
324 FileRemove (path);
325 return dataptr;
326 }
327
328
329 static Int2 LIBCALLBACK SmartBioseqFetchFunc (Pointer data)
330
331 {
332 BioseqPtr bsp;
333 Char cmmd [256];
334 Pointer dataptr;
335 Uint2 datatype;
336 Uint2 entityID;
337 FILE* fp;
338 OMProcControlPtr ompcp;
339 ObjMgrProcPtr ompp;
340 Char path [PATH_MAX];
341 SeqEntryPtr sep = NULL;
342 SeqIdPtr sip;
343 TextSeqIdPtr tsip;
344
345 ompcp = (OMProcControlPtr) data;
346 if (ompcp == NULL) return OM_MSG_RET_ERROR;
347 ompp = ompcp->proc;
348 if (ompp == NULL) return OM_MSG_RET_ERROR;
349 sip = (SeqIdPtr) ompcp->input_data;
350 if (sip == NULL) return OM_MSG_RET_ERROR;
351
352 if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
353 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
354 if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
355
356 if (smartfetchcmd == NULL) {
357 if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
358 smartfetchcmd = StringSaveNoNull (cmmd);
359 }
360 }
361 if (smartfetchcmd == NULL) return OM_MSG_RET_ERROR;
362
363 TmpNam (path);
364
365 #ifdef OS_UNIX
366 sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, tsip->accession, path);
367 system (cmmd);
368 #endif
369 #ifdef OS_MSWIN
370 sprintf (cmmd, "%s %s -o %s", smartfetchcmd, tsip->accession, path);
371 system (cmmd);
372 #endif
373
374 fp = FileOpen (path, "r");
375 if (fp == NULL) {
376 FileRemove (path);
377 return OM_MSG_RET_ERROR;
378 }
379 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
380 FileClose (fp);
381 FileRemove (path);
382
383 if (dataptr == NULL) return OM_MSG_RET_OK;
384
385 sep = GetTopSeqEntryForEntityID (entityID);
386 if (sep == NULL) return OM_MSG_RET_ERROR;
387 bsp = BioseqFindInSeqEntry (sip, sep);
388 ompcp->output_data = (Pointer) bsp;
389 ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
390 return OM_MSG_RET_DONE;
391 }
392
393 static Boolean SmartFetchEnable (void)
394
395 {
396 ObjMgrProcLoad (OMPROC_FETCH, smartfetchproc, smartfetchproc,
397 OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
398 SmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
399 return TRUE;
400 }
401
402 static CharPtr tpasmartfetchproc = "TPASmartBioseqFetch";
403
404 static CharPtr tpasmartfetchcmd = NULL;
405
406 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
407 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
408
409 {
410 Char cmmd [256];
411 Pointer dataptr;
412 FILE* fp;
413 Char path [PATH_MAX];
414
415 if (datatype != NULL) {
416 *datatype = 0;
417 }
418 if (entityID != NULL) {
419 *entityID = 0;
420 }
421 if (StringHasNoText (accn)) return NULL;
422
423 if (tpasmartfetchcmd == NULL) {
424 if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
425 tpasmartfetchcmd = StringSaveNoNull (cmmd);
426 }
427 }
428 if (tpasmartfetchcmd == NULL) return NULL;
429
430 TmpNam (path);
431
432 #ifdef OS_UNIX
433 sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, accn, path);
434 system (cmmd);
435 #endif
436 #ifdef OS_MSWIN
437 sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, accn, path);
438 system (cmmd);
439 #endif
440
441 fp = FileOpen (path, "r");
442 if (fp == NULL) {
443 FileRemove (path);
444 return NULL;
445 }
446 dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
447 FileClose (fp);
448 FileRemove (path);
449 return dataptr;
450 }
451
452
453 static Int2 LIBCALLBACK TPASmartBioseqFetchFunc (Pointer data)
454
455 {
456 BioseqPtr bsp;
457 Char cmmd [256];
458 Pointer dataptr;
459 Uint2 datatype;
460 Uint2 entityID;
461 FILE* fp;
462 OMProcControlPtr ompcp;
463 ObjMgrProcPtr ompp;
464 Char path [PATH_MAX];
465 SeqEntryPtr sep = NULL;
466 SeqIdPtr sip;
467 TextSeqIdPtr tsip;
468
469 ompcp = (OMProcControlPtr) data;
470 if (ompcp == NULL) return OM_MSG_RET_ERROR;
471 ompp = ompcp->proc;
472 if (ompp == NULL) return OM_MSG_RET_ERROR;
473 sip = (SeqIdPtr) ompcp->input_data;
474 if (sip == NULL) return OM_MSG_RET_ERROR;
475
476 if (sip->choice != SEQID_TPG) return OM_MSG_RET_ERROR;
477 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
478 if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
479
480 if (tpasmartfetchcmd == NULL) {
481 if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
482 tpasmartfetchcmd = StringSaveNoNull (cmmd);
483 }
484 }
485 if (tpasmartfetchcmd == NULL) return OM_MSG_RET_ERROR;
486
487 TmpNam (path);
488
489 #ifdef OS_UNIX
490 sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, tsip->accession, path);
491 system (cmmd);
492 #endif
493 #ifdef OS_MSWIN
494 sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, tsip->accession, path);
495 system (cmmd);
496 #endif
497
498 fp = FileOpen (path, "r");
499 if (fp == NULL) {
500 FileRemove (path);
501 return OM_MSG_RET_ERROR;
502 }
503 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
504 FileClose (fp);
505 FileRemove (path);
506
507 if (dataptr == NULL) return OM_MSG_RET_OK;
508
509 sep = GetTopSeqEntryForEntityID (entityID);
510 if (sep == NULL) return OM_MSG_RET_ERROR;
511 bsp = BioseqFindInSeqEntry (sip, sep);
512 ompcp->output_data = (Pointer) bsp;
513 ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
514 return OM_MSG_RET_DONE;
515 }
516
517 static Boolean TPASmartFetchEnable (void)
518
519 {
520 ObjMgrProcLoad (OMPROC_FETCH, tpasmartfetchproc, tpasmartfetchproc,
521 OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
522 TPASmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
523 return TRUE;
524 }
525 #endif
526
527 static Int2 HandleSingleRecord (
528 CharPtr inputFile,
529 CharPtr outputFile,
530 FmtType format,
531 FmtType altformat,
532 ModType mode,
533 StlType style,
534 FlgType flags,
535 LckType locks,
536 CstType custom,
537 XtraPtr extra,
538 Int2 type,
539 Boolean binary,
540 Boolean compressed,
541 Int4 from,
542 Int4 to,
543 Uint1 strand,
544 Uint4 itemID,
545 Boolean do_tiny_seq,
546 Boolean do_fasta_stream
547 )
548
549 {
550 AsnIoPtr aip;
551 BioseqPtr bsp;
552 BioseqSetPtr bssp;
553 Pointer dataptr = NULL;
554 Uint2 datatype = 0;
555 Uint2 entityID;
556 FILE *fp;
557 FILE *ofp = NULL;
558 ObjMgrPtr omp;
559 SeqEntryPtr sep;
560 SeqFeatPtr sfp;
561 SeqInt sint;
562 SeqLocPtr slp = NULL;
563 ValNode vn;
564
565 if (type == 1) {
566 fp = FileOpen (inputFile, "r");
567 if (fp == NULL) {
568 Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
569 return 1;
570 }
571
572 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
573
574 FileClose (fp);
575
576 entityID = ObjMgrRegister (datatype, dataptr);
577
578 } else if (type >= 2 && type <= 5) {
579 aip = AsnIoOpen (inputFile, binary? "rb" : "r");
580 if (aip == NULL) {
581 Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", inputFile);
582 return 1;
583 }
584
585 SeqMgrHoldIndexing (TRUE);
586 switch (type) {
587 case 2 :
588 dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
589 datatype = OBJ_SEQENTRY;
590 break;
591 case 3 :
592 dataptr = (Pointer) BioseqAsnRead (aip, NULL);
593 datatype = OBJ_BIOSEQ;
594 break;
595 case 4 :
596 dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
597 datatype = OBJ_BIOSEQSET;
598 break;
599 case 5 :
600 dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
601 datatype = OBJ_SEQSUB;
602 break;
603 default :
604 break;
605 }
606 SeqMgrHoldIndexing (FALSE);
607
608 AsnIoClose (aip);
609
610 entityID = ObjMgrRegister (datatype, dataptr);
611
612 } else {
613 Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) type);
614 return 1;
615 }
616
617 if (dataptr == NULL) {
618 Message (MSG_POSTERR, "Data read failed for input file '%s'", inputFile);
619 return 1;
620 }
621
622 if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
623 datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
624
625 /*
626 #ifdef WIN_MAC
627 #if __profile__
628 ProfilerSetStatus (TRUE);
629 #endif
630 #endif
631 */
632
633 entityID = SeqMgrIndexFeatures (entityID, NULL);
634
635 /*
636 #ifdef WIN_MAC
637 #if __profile__
638 ProfilerSetStatus (FALSE);
639 #endif
640 #endif
641 */
642
643 sep = GetTopSeqEntryForEntityID (entityID);
644
645 if (sep == NULL) {
646 sep = SeqEntryNew ();
647 if (sep != NULL) {
648 if (datatype == OBJ_BIOSEQ) {
649 bsp = (BioseqPtr) dataptr;
650 sep->choice = 1;
651 sep->data.ptrvalue = bsp;
652 SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
653 } else if (datatype == OBJ_BIOSEQSET) {
654 bssp = (BioseqSetPtr) dataptr;
655 sep->choice = 2;
656 sep->data.ptrvalue = bssp;
657 SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
658 } else {
659 sep = SeqEntryFree (sep);
660 }
661 }
662 sep = GetTopSeqEntryForEntityID (entityID);
663 }
664
665 if (sep != NULL) {
666 if (extra == NULL || extra->gbseq == NULL) {
667 FileRemove (outputFile);
668 #ifdef WIN_MAC
669 FileCreate (outputFile, "TEXT", "ttxt");
670 #endif
671 ofp = FileOpen (outputFile, "w");
672 }
673
674 if ((from > 0 && to > 0) || strand == Seq_strand_minus) {
675 bsp = NULL;
676 if (format == GENPEPT_FMT) {
677 VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_PROTS, GetFirstGoodBioseq);
678 } else {
679 VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_NUCS, GetFirstGoodBioseq);
680 }
681 if (bsp != NULL) {
682 if (strand == Seq_strand_minus && from == 0 && to == 0) {
683 from = 1;
684 to = bsp->length;
685 }
686 if (from < 0) {
687 from = 1;
688 } else if (from > bsp->length) {
689 from = bsp->length;
690 }
691 if (to < 0) {
692 to = 1;
693 } else if (to > bsp->length) {
694 to = bsp->length;
695 }
696 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
697 MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
698 sint.from = from - 1;
699 sint.to = to - 1;
700 sint.strand = strand;
701 sint.id = SeqIdFindBest (bsp->id, 0);
702 vn.choice = SEQLOC_INT;
703 vn.data.ptrvalue = (Pointer) &sint;
704 slp = &vn;
705 }
706 } else if (itemID > 0) {
707 sfp = SeqMgrGetDesiredFeature (entityID, 0, itemID, 0, NULL, NULL);
708 if (sfp != NULL) {
709 slp = sfp->location;
710 }
711 }
712
713 if (do_tiny_seq) {
714 aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
715 VisitBioseqsInSep (sep, (Pointer) aip, SaveTinySeqs);
716 AsnIoFree (aip, FALSE);
717 } else if (do_fasta_stream) {
718 aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
719 VisitBioseqsInSep (sep, (Pointer) aip, SaveTinyStreams);
720 AsnIoFree (aip, FALSE);
721 } else {
722 SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, ofp);
723 if (altformat != 0) {
724 SeqEntryToGnbk (sep, slp, altformat, mode, style, flags, locks, custom, extra, ofp);
725 }
726 }
727 if (ofp != NULL) {
728 FileClose (ofp);
729 }
730 }
731 } else {
732 Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
733 }
734
735 omp = ObjMgrGet ();
736 ObjMgrReapOne (omp);
737 SeqMgrClearBioseqIndex ();
738 ObjMgrFreeCache (0);
739 FreeSeqIdGiCache ();
740
741 SeqEntrySetScope (NULL);
742
743 ObjMgrFree (datatype, dataptr);
744
745 return 0;
746 }
747
748 static Int2 HandleCatenatedRecord (
749 CharPtr inputFile,
750 CharPtr outputFile,
751 FmtType format,
752 FmtType altformat,
753 ModType mode,
754 StlType style,
755 FlgType flags,
756 LckType locks,
757 CstType custom,
758 XtraPtr extra,
759 Int2 type,
760 Boolean binary,
761 Boolean compressed,
762 Int4 from,
763 Int4 to,
764 Uint1 strand,
765 Uint4 itemID,
766 Boolean do_tiny_seq,
767 Boolean do_fasta_stream
768 )
769
770 {
771 AsnIoPtr aip;
772 BioseqPtr bsp;
773 Pointer dataptr = NULL;
774 Uint2 datatype = 0;
775 Uint2 entityID;
776 FILE *fp;
777 FILE *ofp = NULL;
778 ObjMgrPtr omp;
779 SeqEntryPtr sep;
780 SeqFeatPtr sfp;
781 SeqInt sint;
782 SeqLocPtr slp = NULL;
783 ValNode vn;
784
785 fp = FileOpen (inputFile, "r");
786 if (fp == NULL) {
787 Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
788 return 1;
789 }
790
791 SeqMgrHoldIndexing (TRUE);
792 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
793 SeqMgrHoldIndexing (FALSE);
794
795 if (extra == NULL || extra->gbseq == NULL) {
796 FileRemove (outputFile);
797 #ifdef WIN_MAC
798 FileCreate (outputFile, "TEXT", "ttxt");
799 #endif
800 ofp = FileOpen (outputFile, "w");
801 }
802
803 while (dataptr != NULL) {
804
805 entityID = ObjMgrRegister (datatype, dataptr);
806 sep = GetTopSeqEntryForEntityID (entityID);
807
808
809 if (sep != NULL) {
810 if ((from > 0 && to > 0) || strand == Seq_strand_minus) {
811 bsp = NULL;
812 if (format == GENPEPT_FMT) {
813 VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_PROTS, GetFirstGoodBioseq);
814 } else {
815 VisitSequencesInSep (sep, (Pointer) &bsp, VISIT_NUCS, GetFirstGoodBioseq);
816 }
817 if (bsp != NULL) {
818 if (strand == Seq_strand_minus && from == 0 && to == 0) {
819 from = 1;
820 to = bsp->length;
821 }
822 if (from < 0) {
823 from = 1;
824 } else if (from > bsp->length) {
825 from = bsp->length;
826 }
827 if (to < 0) {
828 to = 1;
829 } else if (to > bsp->length) {
830 to = bsp->length;
831 }
832 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
833 MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
834 sint.from = from - 1;
835 sint.to = to - 1;
836 sint.strand = strand;
837 sint.id = SeqIdFindBest (bsp->id, 0);
838 vn.choice = SEQLOC_INT;
839 vn.data.ptrvalue = (Pointer) &sint;
840 slp = &vn;
841 }
842 } else if (itemID > 0) {
843 sfp = SeqMgrGetDesiredFeature (entityID, 0, itemID, 0, NULL, NULL);
844 if (sfp != NULL) {
845 slp = sfp->location;
846 }
847 }
848
849 if (do_tiny_seq) {
850 aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
851 VisitBioseqsInSep (sep, (Pointer) aip, SaveTinySeqs);
852 AsnIoFree (aip, FALSE);
853 } else if (do_fasta_stream) {
854 aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
855 VisitBioseqsInSep (sep, (Pointer) aip, SaveTinyStreams);
856 AsnIoFree (aip, FALSE);
857 } else {
858 SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, ofp);
859 if (altformat != 0) {
860 SeqEntryToGnbk (sep, slp, altformat, mode, style, flags, locks, custom, extra, ofp);
861 }
862 }
863 }
864
865 ObjMgrFree (datatype, dataptr);
866
867 omp = ObjMgrGet ();
868 ObjMgrReapOne (omp);
869 SeqMgrClearBioseqIndex ();
870 ObjMgrFreeCache (0);
871 FreeSeqIdGiCache ();
872
873 SeqEntrySetScope (NULL);
874
875 SeqMgrHoldIndexing (TRUE);
876 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
877 SeqMgrHoldIndexing (FALSE);
878 }
879
880 if (ofp != NULL) {
881 FileClose (ofp);
882 }
883
884 FileClose (fp);
885
886 return 0;
887 }
888
889 typedef struct hasgidata {
890 Int4 gi;
891 CharPtr accn;
892 Boolean found;
893 } HasGiData, PNTR HasGiPtr;
894
895 static void LookForGi (
896 SeqEntryPtr sep,
897 Pointer mydata,
898 Int4 index,
899 Int2 indent
900 )
901
902 {
903 BioseqPtr bsp;
904 HasGiPtr hgp;
905 SeqIdPtr sip;
906 TextSeqIdPtr tsip;
907
908 if (sep == NULL) return;
909 if (! IS_Bioseq (sep)) return;
910 bsp = (BioseqPtr) sep->data.ptrvalue;
911 if (bsp == NULL) return;
912 hgp = (HasGiPtr) mydata;
913 if (hgp == NULL) return;
914 for (sip = bsp->id; sip != NULL; sip = sip->next) {
915 switch (sip->choice) {
916 case SEQID_GI :
917 if (sip->data.intvalue == hgp->gi) {
918 hgp->found = TRUE;
919 return;
920 }
921 break;
922 case SEQID_GENBANK :
923 case SEQID_EMBL :
924 case SEQID_PIR :
925 case SEQID_SWISSPROT :
926 case SEQID_OTHER :
927 case SEQID_DDBJ :
928 case SEQID_PRF :
929 case SEQID_TPG :
930 case SEQID_TPE :
931 case SEQID_TPD :
932 case SEQID_GPIPE :
933 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
934 if (tsip != NULL && hgp->accn!= NULL &&
935 StringICmp (tsip->accession, hgp->accn) == 0) {
936 hgp->found = TRUE;
937 return;
938 }
939 break;
940 default :
941 break;
942 }
943 }
944 }
945
946 static Boolean SeqEntryHasGi (
947 SeqEntryPtr sep,
948 CharPtr accn
949 )
950
951 {
952 HasGiData hgd;
953 long int val;
954
955 if (sep == NULL || StringHasNoText (accn)) return FALSE;
956 MemSet ((Pointer) &hgd, 0, sizeof (HasGiData));
957 if (sscanf (accn, "%ld", &val) == 1) {
958 hgd.gi = (Int4) val;
959 } else {
960 hgd.accn = accn;
961 }
962 hgd.found = FALSE;
963 SeqEntryExplore (sep, (Pointer) (&hgd), LookForGi);
964 return hgd.found;
965 }
966
967 static void FreeUnpubAffil (
968 PubdescPtr pdp,
969 Pointer userdata
970 )
971
972 {
973 AuthListPtr alp;
974 CitGenPtr cgp;
975 ValNodePtr vnp;
976
977 if (pdp == NULL) return;
978 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
979 if (vnp->choice != PUB_Gen) continue;
980 cgp = (CitGenPtr) vnp->data.ptrvalue;
981 if (cgp == NULL) continue;
982 if (cgp->cit != NULL) {
983 if (StringNICmp (cgp->cit, "submitted", 8) == 0 ||
984 StringNICmp (cgp->cit, "unpublished", 11) == 0 ||
985 StringNICmp (cgp->cit, "in press", 8) == 0 ||
986 StringNICmp (cgp->cit, "to be published", 15) == 0) {
987 cgp->cit = MemFree (cgp->cit);
988 cgp->cit = StringSave ("Unpublished");
989 }
990 }
991 alp = cgp->authors;
992 if (alp == NULL) continue;
993 alp->affil = AffilFree (alp->affil);
994 }
995 }
996
997 static void LookForRefSeq (
998 BioseqPtr bsp,
999 Pointer userdata
1000 )
1001
1002 {
1003 BoolPtr hasRefseqP;
1004 SeqIdPtr sip;
1005
1006 hasRefseqP = (BoolPtr) userdata;
1007 if (*hasRefseqP) return;
1008 for (sip = bsp->id; sip != NULL; sip = sip->next) {
1009 if (sip->choice == SEQID_OTHER) {
1010 *hasRefseqP = TRUE;
1011 return;
1012 }
1013 }
1014 }
1015
1016 static CharPtr fffmt [] = {
1017 "",
1018 "genbank",
1019 "embl",
1020 "genbank",
1021 "embl",
1022 "ftable",
1023 NULL
1024 };
1025
1026 static CharPtr ffmod [] = {
1027 "",
1028 "release",
1029 "entrez",
1030 "gbench",
1031 "dump",
1032 NULL
1033 };
1034
1035 static CharPtr ffstl [] = {
1036 "",
1037 "normal",
1038 "segment",
1039 "master",
1040 "contig",
1041 NULL
1042 };
1043
1044 static CharPtr ffvew [] = {
1045 "",
1046 "nuc",
1047 "nuc",
1048 "prot",
1049 "prot",
1050 "nuc",
1051 NULL
1052 };
1053
1054 static void ReportDiffs (
1055 CharPtr path1,
1056 CharPtr path2,
1057 CharPtr path3,
1058 FILE* fp,
1059 CharPtr ffdiff,
1060 Boolean useFfdiff
1061 )
1062
1063 {
1064 Char buf [256];
1065 Char cmmd [256];
1066 size_t ct;
1067 FILE *fpo;
1068
1069 if (useFfdiff) {
1070 sprintf (cmmd, "%s -o %s -n %s -d reports", ffdiff, path1, path2);
1071 system (cmmd);
1072
1073 sprintf (cmmd, "rm %s; rm %s", path1, path2);
1074 system (cmmd);
1075 } else {
1076 sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path1, path1, path1);
1077 system (cmmd);
1078
1079 sprintf (cmmd, "sort %s | uniq -c > %s.suc; rm %s", path2, path2, path2);
1080 system (cmmd);
1081
1082 sprintf (cmmd, "diff %s.suc %s.suc > %s", path1, path2, path3);
1083 system (cmmd);
1084
1085 sprintf (cmmd, "cat %s", path3);
1086 fpo = popen (cmmd, "r");
1087 if (fpo != NULL) {
1088 while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
1089 fwrite (buf, 1, ct, fp);
1090 fflush (fp);
1091 }
1092 pclose (fpo);
1093 }
1094
1095 sprintf (cmmd, "rm %s.suc; rm %s.suc", path1, path2);
1096 system (cmmd);
1097 }
1098 }
1099
1100 static void CompareFlatFiles (
1101 CharPtr path1,
1102 CharPtr path2,
1103 CharPtr path3,
1104 SeqEntryPtr sep,
1105 FILE* fp,
1106 FmtType format,
1107 FmtType altformat,
1108 ModType mode,
1109 StlType style,
1110 FlgType flags,
1111 LckType locks,
1112 CstType custom,
1113 XtraPtr extra,
1114 Int2 batch,
1115 CharPtr ffdiff,
1116 CharPtr asn2flat,
1117 Boolean useFfdiff
1118 )
1119
1120 {
1121 #ifdef OS_UNIX
1122 AsnIoPtr aip;
1123 Char arguments [128];
1124 BioseqPtr bsp;
1125 Char buf [256];
1126 Char cmmd [256];
1127 size_t ct;
1128 int diff;
1129 FILE *fpo;
1130 SeqEntryPtr fsep;
1131
1132 if (sep == NULL) return;
1133
1134 if (batch == 1) {
1135
1136 SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, fp);
1137 if (altformat != 0) {
1138 SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, fp);
1139 }
1140 return; /* just make report, nothing to diff */
1141
1142 } else if (batch == 2) {
1143
1144 #ifdef ASN2GNBK_SUPPRESS_UNPUB_AFFIL
1145 VisitPubdescsInSep (sep, NULL, FreeUnpubAffil);
1146 #endif
1147
1148 SaveAsn2gnbk (sep, path1, format, SEQUIN_MODE, style, flags, locks, custom);
1149 SaveAsn2gnbk (sep, path2, format, RELEASE_MODE, style, flags, locks, custom);
1150
1151 ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1152
1153 } else if (batch == 3) {
1154
1155 #ifdef ASN2GNBK_SUPPRESS_UNPUB_AFFIL
1156 VisitPubdescsInSep (sep, NULL, FreeUnpubAffil);
1157 #endif
1158
1159 SaveAsn2gnbk (sep, path1, format, mode, style, flags, locks, custom);
1160 SeriousSeqEntryCleanupBulk (sep);
1161 SaveAsn2gnbk (sep, path2, format, mode, style, flags, locks, custom);
1162
1163 ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1164
1165 } else if (batch == 4) {
1166
1167 aip = AsnIoOpen (path3, "w");
1168 if (aip == NULL) return;
1169
1170 SeqEntryAsnWrite (sep, aip, NULL);
1171 AsnIoClose (aip);
1172
1173 fsep = FindNthBioseq (sep, 1);
1174 if (fsep == NULL || fsep->choice != 1) return;
1175 bsp = (BioseqPtr) fsep->data.ptrvalue;
1176 if (bsp == NULL) return;
1177 SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
1178
1179 arguments [0] = '\0';
1180 sprintf (arguments, "-format %s -mode %s -style %s -view %s -nocleanup",
1181 fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
1182
1183 sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path1);
1184 system (cmmd);
1185
1186 arguments [0] = '\0';
1187 sprintf (arguments, "-format %s -mode %s -style %s -view %s",
1188 fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
1189
1190 sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path2);
1191 system (cmmd);
1192
1193 ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1194
1195 } else if (batch == 5) {
1196
1197 SaveAsn2gnbk (sep, path1, format, mode, style, flags, locks, custom);
1198
1199 aip = AsnIoOpen (path3, "w");
1200 if (aip == NULL) return;
1201
1202 SeqEntryAsnWrite (sep, aip, NULL);
1203 AsnIoClose (aip);
1204
1205 fsep = FindNthBioseq (sep, 1);
1206 if (fsep == NULL || fsep->choice != 1) return;
1207 bsp = (BioseqPtr) fsep->data.ptrvalue;
1208 if (bsp == NULL) return;
1209 SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
1210
1211 arguments [0] = '\0';
1212 sprintf (arguments, "-format %s -mode %s -style %s -view %s",
1213 fffmt [(int) format], ffmod [(int) mode], ffstl [(int) style], ffvew [(int) format]);
1214
1215 sprintf (cmmd, "%s %s -i %s -o %s", asn2flat, arguments, path3, path2);
1216 system (cmmd);
1217
1218 ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1219
1220 } else if (batch == 6) {
1221
1222 #ifdef ASN2GNBK_SUPPRESS_UNPUB_AFFIL
1223 VisitPubdescsInSep (sep, NULL, FreeUnpubAffil);
1224 #endif
1225
1226 SaveAsn2gnbk (sep, path1, format, ENTREZ_MODE, style, (flags | 1), locks, custom);
1227 SaveAsn2gnbk (sep, path2, format, ENTREZ_MODE, style, (flags | 1 | 262144), locks, custom);
1228
1229 ReportDiffs (path1, path2, path3, fp, ffdiff, useFfdiff);
1230
1231 } else if (batch == 7) {
1232
1233 aip = AsnIoOpen (path3, "w");
1234 if (aip == NULL) return;
1235
1236 SeqEntryAsnWrite (sep, aip, NULL);
1237 AsnIoClose (aip);
1238
1239 if (FindNucBioseq (sep) != NULL) {
1240
1241 sprintf (cmmd, "./oldasn2gb -i %s -o %s -m e -g 1", path3, path1);
1242 system (cmmd);
1243
1244 sprintf (cmmd, "./newasn2gb -i %s -o %s -m e -g 1", path3, path2);
1245 system (cmmd);
1246
1247 } else {
1248
1249 sprintf (cmmd, "./oldasn2gb -f p -i %s -o %s -m e -g 1", path3, path1);
1250 system (cmmd);
1251
1252 sprintf (cmmd, "./newasn2gb -f p -i %s -o %s -m e -g 1", path3, path2);
1253 system (cmmd);
1254
1255 }
1256
1257 sprintf (cmmd, "diff -b %s %s > %s", path1, path2, path3);
1258 diff = system (cmmd);
1259
1260 if (diff > 0) {
1261 sprintf (cmmd, "cat %s", path3);
1262 fpo = popen (cmmd, "r");
1263 if (fpo != NULL) {
1264 fprintf (fp, "\nasn2gb difference in %s\n", buf);
1265 fflush (fp);
1266 while ((ct = fread (buf, 1, sizeof (buf), fpo)) > 0) {
1267 fwrite (buf, 1, ct, fp);
1268 fflush (fp);
1269 }
1270 pclose (fpo);
1271 }
1272 }
1273 }
1274
1275 #else
1276
1277 SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, fp);
1278 if (altformat != 0) {
1279 SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, fp);
1280 }
1281 #endif
1282 }
1283
1284 static void CheckOrder (
1285 SeqFeatPtr sfp,
1286 Pointer userdata
1287 )
1288
1289 {
1290 #ifdef ASN2GNBK_IGNORE_OUT_OF_ORDER
1291 BoolPtr bp;
1292 BioseqPtr bsp;
1293 #endif
1294 #ifdef ASN2GNBK_REPAIR_OUT_OF_ORDER
1295 BioseqPtr bsp;
1296 SeqLocPtr gslp;
1297 Boolean hasNulls;
1298 Boolean noLeft;
1299 Boolean noRight;
1300 #endif
1301
1302 /* ignore order of bonds in heterogen features from PDB */
1303
1304 if (sfp->data.choice == SEQFEAT_HET) return;
1305
1306 #ifdef ASN2GNBK_IGNORE_OUT_OF_ORDER
1307 bsp = BioseqFindFromSeqLoc (sfp->location);
1308 if (bsp != NULL && SeqLocBadSortOrder (bsp, sfp->location)) {
1309 bp = (BoolPtr) userdata;
1310 *bp = TRUE;
1311 }
1312 #endif
1313 #ifdef ASN2GNBK_REPAIR_OUT_OF_ORDER
1314 bsp = BioseqFindFromSeqLoc (sfp->location);
1315 if (bsp != NULL && SeqLocBadSortOrder (bsp, sfp->location)) {
1316 hasNulls = LocationHasNullsBetween (sfp->location);
1317 gslp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, hasNulls);
1318 if (gslp != NULL) {
1319 CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
1320 sfp->location = SeqLocFree (sfp->location);
1321 sfp->location = gslp;
1322 if (bsp->repr == Seq_repr_seg) {
1323 gslp = SegLocToParts (bsp, sfp->location);
1324 sfp->location = SeqLocFree (sfp->location);
1325 sfp->location = gslp;
1326 }
1327 FreeAllFuzz (sfp->location);
1328 SetSeqLocPartial (sfp->location, noLeft, noRight);
1329 }
1330 }
1331 #endif
1332 }
1333
1334 static Int2 HandleMultipleRecords (
1335 CharPtr inputFile,
1336 CharPtr outputFile,
1337 FmtType format,
1338 FmtType altformat,
1339 ModType mode,
1340 StlType style,
1341 FlgType flags,
1342 LckType locks,
1343 CstType custom,
1344 XtraPtr extra,
1345 Int2 type,
1346 Int2 batch,
1347 Boolean binary,
1348 Boolean compressed,
1349 Boolean propOK,
1350 CharPtr ffdiff,
1351 CharPtr asn2flat,
1352 CharPtr accn,
1353 FILE *logfp
1354 )
1355
1356 {
1357 AsnIoPtr aip;
1358 AsnModulePtr amp;
1359 AsnTypePtr atp, atp_bss, atp_desc, atp_sbp, atp_se, atp_ssp;
1360 Boolean atp_se_seen = FALSE;
1361 BioseqPtr bsp;
1362 BioseqSetPtr bssp;
1363 Char buf [41];
1364 Char cmmd [256];
1365 CitSubPtr csp = NULL;
1366 SeqDescrPtr descr = NULL;
1367 FILE *fp;
1368 SeqEntryPtr fsep;
1369 Boolean hasgi;
1370 Boolean hasRefSeq;
1371 Boolean io_failure = FALSE;
1372 Char longest [41];
1373 Int4 numrecords = 0;
1374 FILE *ofp = NULL;
1375 ObjMgrPtr omp;
1376 Boolean outOfOrder;
1377 ObjValNode ovn;
1378 Char path1 [PATH_MAX];
1379 Char path2 [PATH_MAX];
1380 Char path3 [PATH_MAX];
1381 Pubdesc pd;
1382 SubmitBlockPtr sbp = NULL;
1383 SeqEntryPtr sep;
1384 time_t starttime, stoptime, worsttime;
1385 SeqDescrPtr subcit = NULL;
1386 FILE *tfp;
1387 Boolean useFfdiff;
1388 ValNode vn;
1389 #ifdef OS_UNIX
1390 CharPtr gzcatprog;
1391 int ret;
1392 Boolean usedPopen = FALSE;
1393 #endif
1394
1395 if (StringHasNoText (inputFile)) return 1;
1396
1397 #ifndef OS_UNIX
1398 if (compressed) {
1399 Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
1400 return 1;
1401 }
1402 #endif
1403
1404 amp = AsnAllModPtr ();
1405 if (amp == NULL) {
1406 Message (MSG_POSTERR, "Unable to load AsnAllModPtr");
1407 return 1;
1408 }
1409
1410 atp_ssp = AsnFind ("Seq-submit");
1411 if (atp_ssp == NULL) {
1412 Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit");
1413 return 1;
1414 }
1415
1416 atp_sbp = AsnFind ("Seq-submit.sub");
1417 if (atp_sbp == NULL) {
1418 Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit.sub");
1419 return 1;
1420 }
1421
1422 atp_bss = AsnFind ("Bioseq-set");
1423 if (atp_bss == NULL) {
1424 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set");
1425 return 1;
1426 }
1427
1428 atp_desc = AsnFind ("Bioseq-set.descr");
1429 if (atp_desc == NULL) {
1430 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr");
1431 return 1;
1432 }
1433
1434 atp_se = AsnFind ("Bioseq-set.seq-set.E");
1435 if (atp_se == NULL) {
1436 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
1437 return 1;
1438 }
1439
1440 #ifdef OS_UNIX
1441 if (compressed) {
1442 gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
1443 if (gzcatprog != NULL) {
1444 sprintf (cmmd, "%s %s", gzcatprog, inputFile);
1445 } else {
1446 ret = system ("gzcat -h >/dev/null 2>&1");
1447 if (ret == 0) {
1448 sprintf (cmmd, "gzcat %s", inputFile);
1449 } else if (ret == -1) {
1450 Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
1451 return 1;
1452 } else {
1453 ret = system ("zcat -h >/dev/null 2>&1");
1454 if (ret == 0) {
1455 sprintf (cmmd, "zcat %s", inputFile);
1456 } else if (ret == -1) {
1457 Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
1458 return 1;
1459 } else {
1460 Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
1461 return 1;
1462 }
1463 }
1464 }
1465 fp = popen (cmmd, /* binary? "rb" : */ "r");
1466 usedPopen = TRUE;
1467 } else {
1468 fp = FileOpen (inputFile, binary? "rb" : "r");
1469 }
1470 #else
1471 fp = FileOpen (inputFile, binary? "rb" : "r");
1472 #endif
1473 if (fp == NULL) {
1474 Message (MSG_POSTERR, "FileOpen failed for input file '%s'", inputFile);
1475 return 1;
1476 }
1477
1478 aip = AsnIoNew (binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
1479 if (aip == NULL) {
1480 Message (MSG_POSTERR, "AsnIoNew failed for input file '%s'", inputFile);
1481 return 1;
1482 }
1483
1484 if ((batch == 1 || batch == 4 || batch == 5 || format != GENBANK_FMT) &&
1485 (extra == NULL || extra->gbseq == NULL)) {
1486 ofp = FileOpen (outputFile, "w");
1487 if (ofp == NULL) {
1488 AsnIoClose (aip);
1489 Message (MSG_POSTERR, "FileOpen failed for output file '%s'", outputFile);
1490 return 1;
1491 }
1492 }
1493
1494 TmpNam (path1);
1495 tfp = FileOpen (path1, "w");
1496 fprintf (tfp, "\n");
1497 FileClose (tfp);
1498
1499 TmpNam (path2);
1500 tfp = FileOpen (path2, "w");
1501 fprintf (tfp, "\n");
1502 FileClose (tfp);
1503
1504 TmpNam (path3);
1505 tfp = FileOpen (path3, "w");
1506 fprintf (tfp, "\n");
1507 FileClose (tfp);
1508
1509 if (type == 4) {
1510 atp = atp_bss;
1511 } else if (type == 5) {
1512 atp = atp_ssp;
1513 } else {
1514 Message (MSG_POSTERR, "Batch processing type not set properly");
1515 return 1;
1516 }
1517
1518 longest [0] = '\0';
1519 worsttime = 0;
1520
1521 while ((! io_failure) && (atp = AsnReadId (aip, amp, atp)) != NULL) {
1522 if (aip->io_failure) {
1523 io_failure = TRUE;
1524 aip->io_failure = FALSE;
1525 }
1526 if (atp == atp_se) {
1527 atp_se_seen = TRUE;
1528
1529 SeqMgrHoldIndexing (TRUE);
1530 sep = SeqEntryAsnRead (aip, atp);
1531 SeqMgrHoldIndexing (FALSE);
1532
1533 /* propagate descriptors from the top-level set */
1534
1535 if (propOK && descr != NULL && sep != NULL && sep->data.ptrvalue != NULL) {
1536 if (sep->choice == 1) {
1537 bsp = (BioseqPtr) sep->data.ptrvalue;
1538 ValNodeLink (&(bsp->descr),
1539 AsnIoMemCopy ((Pointer) descr,
1540 (AsnReadFunc) SeqDescrAsnRead,
1541 (AsnWriteFunc) SeqDescrAsnWrite));
1542 } else if (sep->choice == 2) {
1543 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1544 ValNodeLink (&(bssp->descr),
1545 AsnIoMemCopy ((Pointer) descr,
1546 (AsnReadFunc) SeqDescrAsnRead,
1547 (AsnWriteFunc) SeqDescrAsnWrite));
1548 }
1549 }
1550
1551 /* propagate submission citation as descriptor onto each Seq-entry */
1552
1553 if (subcit != NULL && sep != NULL && sep->data.ptrvalue != NULL) {
1554 if (sep->choice == 1) {
1555 bsp = (BioseqPtr) sep->data.ptrvalue;
1556 ValNodeLink (&(bsp->descr),
1557 AsnIoMemCopy ((Pointer) subcit,
1558 (AsnReadFunc) SeqDescrAsnRead,
1559 (AsnWriteFunc) SeqDescrAsnWrite));
1560 } else if (sep->choice == 2) {
1561 bssp = (BioseqSetPtr) sep->data.ptrvalue;
1562 ValNodeLink (&(bssp->descr),
1563 AsnIoMemCopy ((Pointer) subcit,
1564 (AsnReadFunc) SeqDescrAsnRead,
1565 (AsnWriteFunc) SeqDescrAsnWrite));
1566 }
1567 }
1568
1569 fsep = FindNthBioseq (sep, 1);
1570 if (fsep != NULL && fsep->choice == 1) {
1571 bsp = (BioseqPtr) fsep->data.ptrvalue;
1572 if (bsp != NULL) {
1573 SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
1574 #ifdef OS_UNIX
1575 if (batch != 1) {
1576 printf ("%s\n", buf);
1577 fflush (stdout);
1578 if (batch != 4 && batch != 5) {
1579 if (ofp != NULL) {
1580 fprintf (ofp, "%s\n", buf);
1581 fflush (ofp);
1582 }
1583 }
1584 }
1585 #endif
1586 if (logfp != NULL) {
1587 fprintf (logfp, "%s\n", buf);
1588 fflush (logfp);
1589 }
1590 }
1591 }
1592
1593 hasgi = SeqEntryHasGi (sep, accn);
1594 if (hasgi) {
1595 sprintf (buf, "%s.before", accn);
1596 SaveSeqEntry (sep, buf);
1597 sprintf (buf, "%s.gbff.before", accn);
1598 SaveAsn2gnbk (sep, buf, format, SEQUIN_MODE, NORMAL_STYLE, 0, 0, 0);
1599 if (ofp != NULL) {
1600 FileClose (ofp);
1601 }
1602 AsnIoClose (aip);
1603 return 0;
1604 }
1605 outOfOrder = FALSE;
1606 #ifdef ASN2GNBK_IGNORE_OUT_OF_ORDER
1607 VisitFeaturesInSep (sep, (Pointer) &outOfOrder, CheckOrder);
1608 #endif
1609 #ifdef ASN2GNBK_REPAIR_OUT_OF_ORDER
1610 VisitFeaturesInSep (sep, (Pointer) &outOfOrder, CheckOrder);
1611 #endif
1612 if ((! outOfOrder) && StringHasNoText (accn)) {
1613 if ((format != GENPEPT_FMT && SeqEntryHasNucs (sep)) ||
1614 (format == GENPEPT_FMT && SeqEntryHasProts (sep))) {
1615
1616 hasRefSeq = FALSE;
1617 VisitBioseqsInSep (sep, (Pointer) &hasRefSeq, LookForRefSeq);
1618 if (hasRefSeq) {
1619 if (batch != 1 && format == GENBANK_FMT && ofp == NULL &&
1620 (extra == NULL || extra->gbseq == NULL)) {
1621 ofp = FileOpen (outputFile, "w");
1622 if (ofp == NULL) {
1623 ofp = stdout;
1624 }
1625 }
1626 }
1627
1628 starttime = GetSecs ();
1629 useFfdiff = (Boolean) (format == GENBANK_FMT && (! hasRefSeq));
1630 CompareFlatFiles (path1, path2, path3, sep, ofp,
1631 format, altformat, mode, style, flags, locks,
1632 custom, extra, batch, ffdiff, asn2flat, useFfdiff);
1633 stoptime = GetSecs ();
1634 if (stoptime - starttime > worsttime) {
1635 worsttime = stoptime - starttime;
1636 StringCpy (longest, buf);
1637 }
1638 numrecords++;
1639 }
1640 }
1641 SeqEntryFree (sep);
1642
1643 omp = ObjMgrGet ();
1644 ObjMgrReapOne (omp);
1645 SeqMgrClearBioseqIndex ();
1646 ObjMgrFreeCache (0);
1647 FreeSeqIdGiCache ();
1648
1649 SeqEntrySetScope (NULL);
1650
1651 } else if (atp == atp_desc && (! atp_se_seen)) {
1652 descr = SeqDescrAsnRead (aip, atp);
1653 } else if (atp == atp_sbp) {
1654 sbp = SubmitBlockAsnRead (aip, atp);
1655 if (sbp != NULL) {
1656 csp = sbp->cit;
1657 if (csp != NULL) {
1658 MemSet ((Pointer) &ovn, 0, sizeof (ObjValNode));
1659 MemSet ((Pointer) &pd, 0, sizeof (Pubdesc));
1660 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
1661 vn.choice = PUB_Sub;
1662 vn.data.ptrvalue = (Pointer) csp;
1663 vn.next = NULL;
1664 pd.pub = &vn;
1665 ovn.vn.choice = Seq_descr_pub;
1666 ovn.vn.data.ptrvalue = (Pointer) &pd;
1667 ovn.vn.next = NULL;
1668 ovn.vn.extended = 1;
1669 subcit = (SeqDescrPtr) &ovn;
1670 }
1671 }
1672 } else {
1673 AsnReadVal (aip, atp, NULL);
1674 }
1675
1676 if (aip->io_failure) {
1677 io_failure = TRUE;
1678 aip->io_failure = FALSE;
1679 }
1680 }
1681
1682 if (aip->io_failure) {
1683 io_failure = TRUE;
1684 }
1685
1686 if (io_failure) {
1687 Message (MSG_POSTERR, "Asn io_failure for input file '%s'", inputFile);
1688 }
1689
1690 if (ofp != NULL) {
1691 FileClose (ofp);
1692 }
1693
1694 AsnIoFree (aip, FALSE);
1695
1696 SeqDescrFree (descr);
1697 SubmitBlockFree (sbp);
1698
1699 #ifdef OS_UNIX
1700 if (usedPopen) {
1701 pclose (fp);
1702 } else {
1703 FileClose (fp);
1704 }
1705 #else
1706 FileClose (fp);
1707 #endif
1708
1709 if (logfp != NULL && (! StringHasNoText (longest))) {
1710 fprintf (logfp, "Longest processing time %ld seconds on %s\n",
1711 (long) worsttime, longest);
1712 fprintf (logfp, "Total number of records %ld\n", (long) numrecords);
1713 fflush (logfp);
1714 }
1715
1716 sprintf (cmmd, "rm %s; rm %s; rm %s", path1, path2, path3);
1717 system (cmmd);
1718
1719 if (io_failure) return 1;
1720 return 0;
1721 }
1722
1723 #include <lsqfetch.h>
1724 #include <pmfapi.h>
1725 #ifdef INTERNAL_NCBI_ASN2GB
1726 #include <accpubseq.h>
1727 #endif
1728
1729 static void ProcessOneSeqEntry (
1730 SeqEntryPtr sep,
1731 CharPtr outputFile,
1732 FmtType format,
1733 FmtType altformat,
1734 ModType mode,
1735 StlType style,
1736 FlgType flags,
1737 LckType locks,
1738 CstType custom,
1739 XtraPtr extra,
1740 Boolean do_tiny_seq,
1741 Boolean do_fasta_stream
1742 )
1743
1744
1745 {
1746 AsnIoPtr aip;
1747 FILE *ofp = NULL;
1748
1749 if (sep == NULL) return;
1750
1751 if (extra == NULL || extra->gbseq == NULL) {
1752 FileRemove (outputFile);
1753 #ifdef WIN_MAC
1754 FileCreate (outputFile, "TEXT", "ttxt");
1755 #endif
1756 ofp = FileOpen (outputFile, "w");
1757 }
1758
1759 if (do_tiny_seq) {
1760 aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
1761 VisitBioseqsInSep (sep, (Pointer) aip, SaveTinySeqs);
1762 AsnIoFree (aip, FALSE);
1763 } else if (do_fasta_stream) {
1764 aip = AsnIoNew (ASNIO_TEXT_OUT | ASNIO_XML, ofp, NULL, NULL, NULL);
1765 VisitBioseqsInSep (sep, (Pointer) aip, SaveTinyStreams);
1766 AsnIoFree (aip, FALSE);
1767 } else {
1768 SeqEntryToGnbk (sep, NULL, format, mode, style, flags, locks, custom, extra, ofp);
1769 if (altformat != 0) {
1770 SeqEntryToGnbk (sep, NULL, altformat, mode, style, flags, locks, custom, extra, ofp);
1771 }
1772 }
1773 if (ofp != NULL) {
1774 FileClose (ofp);
1775 }
1776 }
1777
1778 static SeqEntryPtr SeqEntryFromAccnOrGi (
1779 CharPtr str
1780 )
1781
1782 {
1783 CharPtr accn;
1784 Boolean alldigits;
1785 BioseqPtr bsp;
1786 Char buf [64];
1787 Char ch;
1788 Int4 flags = 0;
1789 CharPtr ptr;
1790 Int2 retcode = 0;
1791 SeqEntryPtr sep = NULL;
1792 SeqIdPtr sip;
1793 CharPtr tmp1 = NULL;
1794 CharPtr tmp2 = NULL;
1795 Int4 uid = 0;
1796 long int val;
1797 ValNode vn;
1798
1799 if (StringHasNoText (str)) return NULL;
1800 StringNCpy_0 (buf, str, sizeof (buf));
1801 TrimSpacesAroundString (buf);
1802
1803 accn = buf;
1804 tmp1 = StringChr (accn, ',');
1805 if (tmp1 != NULL) {
1806 *tmp1 = '\0';
1807 tmp1++;
1808 tmp2 = StringChr (tmp1, ',');
1809 if (tmp2 != NULL) {
1810 *tmp2 = '\0';
1811 tmp2++;
1812 if (StringDoesHaveText (tmp2) && sscanf (tmp2, "%ld", &val) == 1) {
1813 flags = (Int4) val;
1814 }
1815 }
1816 if (StringDoesHaveText (tmp1) && sscanf (tmp1, "%ld", &val) == 1) {
1817 retcode = (Int2) val;
1818 }
1819 }
1820
1821 #ifdef INTERNAL_NCBI_ASN2GB
1822 /* temporary code to test PUBSEQGetAccnVer in accpubseq.c */
1823
1824 if (*accn == '*') {
1825 Char buf [64];
1826 accn++;
1827 if (sscanf (accn, "%ld", &val) == 1) {
1828 uid = (Int4) val;
1829 if (GetAccnVerFromServer (uid, buf)) {
1830 Message (MSG_POST, "GetAccnVerFromServer returned %s", buf);
1831 } else {
1832 Message (MSG_POST, "GetAccnVerFromServer failed");
1833 }
1834 }
1835 return NULL;
1836 }
1837 #endif
1838
1839 alldigits = TRUE;
1840 ptr = accn;
1841 ch = *ptr;
1842 while (ch != '\0') {
1843 if (! IS_DIGIT (ch)) {
1844 alldigits = FALSE;
1845 }
1846 ptr++;
1847 ch = *ptr;
1848 }
1849
1850 if (alldigits) {
1851 if (sscanf (accn, "%ld", &val) == 1) {
1852 uid = (Int4) val;
1853 }
1854 } else {
1855 sip = SeqIdFromAccessionDotVersion (accn);
1856 if (sip != NULL) {
1857 uid = GetGIForSeqId (sip);
1858 SeqIdFree (sip);
1859 }
1860 }
1861
1862 if (uid > 0) {
1863 sep = PubSeqSynchronousQuery (uid, retcode, flags);
1864 if (sep != NULL) {
1865 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
1866 vn.choice = SEQID_GI;
1867 vn.data.intvalue = uid;
1868 bsp = BioseqFind (&vn);
1869 if (bsp != NULL) {
1870 sep = SeqMgrGetSeqEntryForData ((Pointer) bsp);
1871 }
1872 }
1873 }
1874
1875 return sep;
1876 }
1877
1878 static void MarkLocalAnnots (
1879 SeqAnnotPtr sap,
1880 Pointer userdata
1881 )
1882
1883 {
1884 if (sap == NULL) return;
1885
1886 if (StringNICmp (sap->name, "Annot:", 6) != 0) {
1887 sap->idx.deleteme = TRUE;
1888 }
1889 }
1890
1891 static ValNodePtr PubSeqRemoteLock (
1892 SeqIdPtr sip,
1893 Pointer remotedata
1894 )
1895
1896 {
1897 BioseqPtr bsp;
1898 SeqAnnotPtr sap = NULL;
1899 SeqEntryPtr sep = NULL;
1900 Int4 uid = 0;
1901 ValNodePtr vnp = NULL;
1902
1903 if (sip == NULL) return NULL;
1904
1905 if (sip->choice == SEQID_GI) {
1906 uid = (Int4) sip->data.intvalue;
1907 } else {
1908 uid = GetGIForSeqId (sip);
1909 }
1910
1911 if (uid > 0) {
1912 sep = PubSeqSynchronousQuery (uid, 1, -1);
1913 if (sep != NULL && IS_Bioseq (sep)) {
1914 bsp = (BioseqPtr) sep->data.ptrvalue;
1915 if (bsp != NULL) {
1916 VisitAnnotsInSep (sep, NULL, MarkLocalAnnots);
1917 DeleteMarkedObjects (0, OBJ_BIOSEQ, (Pointer) bsp);
1918 sap = bsp->annot;
1919 bsp->annot = NULL;
1920 }
1921 }
1922 SeqEntryFree (sep);
1923 }
1924
1925 if (sap == NULL) return NULL;
1926
1927 bsp = (BioseqPtr) MemNew (sizeof (Bioseq));
1928 if (bsp == NULL) return NULL;
1929 bsp->annot = sap;
1930
1931 vnp = ValNodeNew (NULL);
1932 if (vnp == NULL) return NULL;
1933
1934 vnp->data.ptrvalue = (Pointer) bsp;
1935
1936 return vnp;
1937 }
1938
1939 static void PubSeqRemoteFree (
1940 ValNodePtr vnp,
1941 Pointer remotedata
1942 )
1943
1944 {
1945 ValNodeFreeData (vnp);
1946 }
1947
1948 /* Args structure contains command-line arguments */
1949
1950 typedef enum {
1951 i_argInputFile = 0,
1952 o_argOutputFile,
1953 f_argFormat,
1954 m_argMode,
1955 s_argStyle,
1956 g_argFlags,
1957 h_argLock,
1958 u_argCustom,
1959 a_argType,
1960 t_argBatch,
1961 b_argBinary,
1962 c_argCompressed,
1963 p_argPropagate,
1964 l_argLogFile,
1965 r_argRemote,
1966 A_argAccession,
1967 F_argFarFeats,
1968 #ifdef OS_UNIX
1969 q_argFfDiff,
1970 n_argAsn2Flat,
1971 j_argFrom,
1972 k_argTo,
1973 d_argStrand,
1974 y_argItemID,
1975 #ifdef INTERNAL_NCBI_ASN2GB
1976 H_argAccessHUP,
1977 #endif
1978 #ifdef ENABLE_ARG_X
1979 x_argAccnToSave,
1980 #endif
1981 #endif
1982 } Arguments;
1983
1984 Args myargs [] = {
1985 {"Input File Name", "stdin", NULL, NULL,
1986 FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
1987 {"Output File Name", "stdout", NULL, NULL,
1988 FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
1989 {"Format (b GenBank, e EMBL, p GenPept, t Feature Table, x INSDSet)", "b", NULL, NULL,
1990 FALSE, 'f', ARG_STRING, 0.0, 0, NULL},
1991 {"Mode (r Release, e Entrez, s Sequin, d Dump)", "s", NULL, NULL,
1992 FALSE, 'm', ARG_STRING, 0.0, 0, NULL},
1993 {"Style (n Normal, s Segment, m Master, c Contig)", "n", NULL, NULL,
1994 FALSE, 's', ARG_STRING, 0.0, 0, NULL},
1995 {"Bit Flags (1 HTML, 2 XML, 4 ContigFeats, 8 ContigSrcs, 16 FarTransl)", "0", NULL, NULL,
1996 FALSE, 'g', ARG_INT, 0.0, 0, NULL},
1997 {"Lock/Lookup Flags (8 LockProd, 16 LookupComp, 64 LookupProd)", "0", NULL, NULL,
1998 FALSE, 'h', ARG_INT, 0.0, 0, NULL},
1999 {"Custom Flags (4 HideFeats, 1792 HideRefs, 8192 HideSources, 262144 HideTranslation)", "0", NULL, NULL,
2000 FALSE, 'u', ARG_INT, 0.0, 0, NULL},
2001 {"ASN.1 Type\n"
2002 " Single Record: a Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, q Catenated\n"
2003 " Release File: t Batch Bioseq-set, u Batch Seq-submit\n", "a", NULL, NULL,
2004 TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
2005 {"Batch\n"
2006 " 1 Report\n"
2007 " 2 Sequin/Release\n"
2008 " 3 asn2gb SSEC/nocleanup\n"
2009 " 4 asn2flat BSEC/nocleanup\n"
2010 " 5 asn2gb/asn2flat\n"
2011 " 6 asn2gb NEW dbxref/OLD dbxref\n"
2012 " 7 oldasn2gb/newasn2gb", "0", "0", "7",
2013 FALSE, 't', ARG_INT, 0.0, 0, NULL},
2014 {"Input File is Binary", "F", NULL, NULL,
2015 TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
2016 {"Batch File is Compressed", "F", NULL, NULL,
2017 TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
2018 {"Propagate Top Descriptors", "F", NULL, NULL,
2019 TRUE, 'p', ARG_BOOLEAN, 0.0, 0, NULL},
2020 {"Log file", NULL, NULL, NULL,
2021 TRUE, 'l', ARG_FILE_OUT, 0.0, 0, NULL},
2022 {"Remote Fetching", "F", NULL, NULL,
2023 TRUE, 'r', ARG_BOOLEAN, 0.0, 0, NULL},
2024 {"Accession to Fetch", NULL, NULL, NULL,
2025 TRUE, 'A', ARG_STRING, 0.0, 0, NULL},
2026 {"Fetch Remote Annotations", "F", NULL, NULL,
2027 TRUE, 'F', ARG_BOOLEAN, 0.0, 0, NULL},
2028 #ifdef OS_UNIX
2029 #ifdef PROC_I80X86
2030 {"Ffdiff Executable", "ffdiff", NULL, NULL,
2031 TRUE, 'q', ARG_FILE_IN, 0.0, 0, NULL},
2032 {"Asn2Flat Executable", "asn2flat", NULL, NULL,
2033 TRUE, 'n', ARG_FILE_IN, 0.0, 0, NULL},
2034 #else
2035 {"Ffdiff Executable", "/netopt/genbank/subtool/bin/ffdiff", NULL, NULL,
2036 TRUE, 'q', ARG_FILE_IN, 0.0, 0, NULL},
2037 {"Asn2Flat Executable", "asn2flat", NULL, NULL,
2038 TRUE, 'n', ARG_FILE_IN, 0.0, 0, NULL},
2039 #endif
2040 {"SeqLoc From", "0", NULL, NULL,
2041 TRUE, 'j', ARG_INT, 0.0, 0, NULL},
2042 {"SeqLoc To", "0", NULL, NULL,
2043 TRUE, 'k', ARG_INT, 0.0, 0, NULL},
2044 {"SeqLoc Minus Strand", "F", NULL, NULL,
2045 TRUE, 'd', ARG_BOOLEAN, 0.0, 0, NULL},
2046 {"Feature itemID", "0", NULL, NULL,
2047 TRUE, 'y', ARG_INT, 0.0, 0, NULL},
2048 #ifdef INTERNAL_NCBI_ASN2GB
2049 {"Internal Access to HUP", "F", NULL, NULL,
2050 TRUE, 'H', ARG_BOOLEAN, 0.0, 0, NULL},
2051 #endif
2052 #ifdef ENABLE_ARG_X
2053 {"Accession to Extract", NULL, NULL, NULL,
2054 TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
2055 #endif
2056 #endif
2057 };
2058
2059
2060 #define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
2061
2062 Int2 Main (
2063 void
2064 )
2065
2066 {
2067 CharPtr accn = NULL;
2068 CharPtr accntofetch = NULL;
2069 AsnIoPtr aip = NULL;
2070 FmtType altformat = (FmtType) 0;
2071 Char app [64];
2072 CharPtr asn2flat = NULL;
2073 AsnTypePtr atp = NULL;
2074 Int2 batch = 0;
2075 Boolean binary = FALSE;
2076 Boolean catenated = FALSE;
2077 Boolean compressed = FALSE;
2078 CstType custom;
2079 Boolean do_gbseq = FALSE;
2080 Boolean do_insdseq = FALSE;
2081 Boolean do_tiny_seq = FALSE;
2082 Boolean do_fasta_stream = FALSE;
2083 XtraPtr extra = NULL;
2084 Boolean farfeats = FALSE;
2085 CharPtr ffdiff = NULL;
2086 FlgType flags;
2087 FmtType format = GENBANK_FMT;
2088 Int4 from = 0;
2089 GBSeq gbsq;
2090 GBSet gbst;
2091 #ifdef INTERNAL_NCBI_ASN2GB
2092 Boolean hup = FALSE;
2093 #endif
2094 Uint4 itemID = 0;
2095 LckType locks;
2096 CharPtr logfile = NULL;
2097 FILE *logfp = NULL;
2098 ModType mode = SEQUIN_MODE;
2099 Boolean propOK = FALSE;
2100 Boolean remote = FALSE;
2101 Int2 rsult = 0;
2102 time_t runtime, starttime, stoptime;
2103 SeqEntryPtr sep;
2104 CharPtr str;
2105 Uint1 strand = Seq_strand_plus;
2106 StlType style = NORMAL_STYLE;
2107 Int4 to = 0;
2108 Int2 type = 0;
2109 Char xmlbuf [128];
2110 XtraBlock xtra;
2111
2112 /* standard setup */
2113
2114 ErrSetFatalLevel (SEV_MAX);
2115 ErrClearOptFlags (EO_SHOW_USERSTR);
2116 ErrSetLogfile ("stderr", ELOG_APPEND);
2117 UseLocalAsnloadDataAndErrMsg ();
2118 ErrPathReset ();
2119
2120 if (! AllObjLoad ()) {
2121 Message (MSG_POSTERR, "AllObjLoad failed");
2122 return 1;
2123 }
2124 if (! SubmitAsnLoad ()) {
2125 Message (MSG_POSTERR, "SubmitAsnLoad failed");
2126 return 1;
2127 }
2128 if (! FeatDefSetLoad ()) {
2129 Message (MSG_POSTERR, "FeatDefSetLoad failed");
2130 return 1;
2131 }
2132 if (! SeqCodeSetLoad ()) {
2133 Message (MSG_POSTERR, "SeqCodeSetLoad failed");
2134 return 1;
2135 }
2136 if (! GeneticCodeTableLoad ()) {
2137 Message (MSG_POSTERR, "GeneticCodeTableLoad failed");
2138 return 1;
2139 }
2140
2141 /* process command line arguments */
2142
2143 sprintf (app, "asn2gb %s", ASN2GB_APPLICATION);
2144 if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
2145 return 0;
2146 }
2147
2148 if (myargs [b_argBinary].intvalue) {
2149 binary = TRUE;
2150 } else {
2151 binary = FALSE;
2152 }
2153
2154 if (myargs [c_argCompressed].intvalue) {
2155 compressed = TRUE;
2156 } else {
2157 compressed = FALSE;
2158 }
2159
2160 if (myargs [p_argPropagate].intvalue) {
2161 propOK = TRUE;
2162 } else {
2163 propOK = FALSE;
2164 }
2165
2166 str = myargs [f_argFormat].strvalue;
2167 if (StringICmp (str, "bp") == 0 || StringICmp (str, "pb") == 0) {
2168 format = GENBANK_FMT;
2169 altformat = GENPEPT_FMT;
2170
2171 } else if (StringICmp (str, "b") == 0) {
2172 format = GENBANK_FMT;
2173 } else if (StringICmp (str, "e") == 0) {
2174 format = EMBL_FMT;
2175 } else if (StringICmp (str, "p") == 0) {
2176 format = GENPEPT_FMT;
2177 } else if (StringICmp (str, "t") == 0) {
2178 format = FTABLE_FMT;
2179
2180 } else if (StringICmp (str, "q") == 0) {
2181 do_gbseq = TRUE;
2182 format = GENBANK_FMT;
2183 } else if (StringICmp (str, "r") == 0) {
2184 do_gbseq = TRUE;
2185 format = GENPEPT_FMT;
2186
2187 } else if (StringICmp (str, "xz") == 0 || StringICmp (str, "zx") == 0) {
2188 do_gbseq = TRUE;
2189 do_insdseq = TRUE;
2190 format = GENBANK_FMT;
2191 altformat = GENPEPT_FMT;
2192
2193 } else if (StringICmp (str, "x") == 0) {
2194 do_gbseq = TRUE;
2195 do_insdseq = TRUE;
2196 format = GENBANK_FMT;
2197 } else if (StringCmp (str, "y") == 0) {
2198 do_tiny_seq = TRUE;
2199 format = GENBANK_FMT;
2200 } else if (StringCmp (str, "Y") == 0) {
2201 do_fasta_stream = TRUE;
2202 format = GENBANK_FMT;
2203 } else if (StringICmp (str, "z") == 0) {
2204 do_gbseq = TRUE;
2205 do_insdseq = TRUE;
2206 format = GENPEPT_FMT;
2207 } else {
2208 format = GENBANK_FMT;
2209 }
2210
2211 str = myargs [m_argMode].strvalue;
2212 if (StringICmp (str, "r") == 0) {
2213 mode = RELEASE_MODE;
2214 } else if (StringICmp (str, "e") == 0) {
2215 mode = ENTREZ_MODE;
2216 } else if (StringICmp (str, "s") == 0) {
2217 mode = SEQUIN_MODE;
2218 } else if (StringICmp (str, "d") == 0) {
2219 mode = DUMP_MODE;
2220 } else {
2221 mode = SEQUIN_MODE;
2222 }
2223
2224 str = myargs [s_argStyle].strvalue;
2225 if (StringICmp (str, "n") == 0) {
2226 style = NORMAL_STYLE;
2227 } else if (StringICmp (str, "s") == 0) {
2228 style = SEGMENT_STYLE;
2229 } else if (StringICmp (str, "m") == 0) {
2230 style = MASTER_STYLE;
2231 } else if (StringICmp (str, "c") == 0) {
2232 style = CONTIG_STYLE;
2233 } else {
2234 style = NORMAL_STYLE;
2235 }
2236
2237 MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
2238
2239 flags = (FlgType) myargs [g_argFlags].intvalue;
2240
2241 locks = (LckType) myargs [h_argLock].intvalue;
2242
2243 custom = (CstType) myargs [u_argCustom].intvalue;
2244
2245 str = myargs [a_argType].strvalue;
2246 if (StringICmp (str, "a") == 0) {
2247 type = 1;
2248 } else if (StringICmp (str, "e") == 0) {
2249 type = 2;
2250 } else if (StringICmp (str, "b") == 0) {
2251 type = 3;
2252 } else if (StringICmp (str, "s") == 0) {
2253 type = 4;
2254 } else if (StringICmp (str, "m") == 0) {
2255 type = 5;
2256 } else if (StringICmp (str, "q") == 0) {
2257 catenated = TRUE;
2258 type = 1;
2259 } else if (StringICmp (str, "t") == 0) {
2260 batch = 1;
2261 type = 4;
2262 } else if (StringICmp (str, "u") == 0) {
2263 batch = 1;
2264 type = 5;
2265 } else {
2266 type = 1;
2267 }
2268
2269 if (myargs [t_argBatch].intvalue > 0) {
2270 batch = (Int2) myargs [t_argBatch].intvalue;
2271 }
2272
2273 if ((binary || compressed) && batch == 0) {
2274 if (type == 1) {
2275 Message (MSG_FATAL, "-b or -c cannot be used without -t or -a");
2276 return 1;
2277 }
2278 }
2279
2280 remote = (Boolean) myargs [r_argRemote].intvalue;
2281
2282 accntofetch = (CharPtr) myargs [A_argAccession].strvalue;
2283 if (StringDoesHaveText (accntofetch)) {
2284 remote = TRUE;
2285 }
2286 farfeats = myargs [F_argFarFeats].intvalue;
2287
2288 #ifdef INTERNAL_NCBI_ASN2GB
2289 hup = myargs [H_argAccessHUP].intvalue;
2290 #endif
2291
2292 if (remote) {
2293 #ifdef INTERNAL_NCBI_ASN2GB
2294 if (hup) {
2295 DirSubFetchEnable ();
2296 SmartFetchEnable ();
2297 TPASmartFetchEnable ();
2298 }
2299
2300 if (! PUBSEQBioseqFetchEnable ("asn2gb", FALSE)) {
2301 Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
2302 return 1;
2303 }
2304 #else
2305 PubSeqFetchEnable ();
2306 if (farfeats) {
2307 xtra.remotelock = PubSeqRemoteLock;
2308 xtra.remotefree = PubSeqRemoteFree;
2309 }
2310 #endif
2311 PubMedFetchEnable ();
2312 LocalSeqFetchInit (FALSE);
2313 }
2314
2315 logfile = (CharPtr) myargs [l_argLogFile].strvalue;
2316 if (! StringHasNoText (logfile)) {
2317 logfp = FileOpen (logfile, "w");
2318 }
2319
2320 #ifdef OS_UNIX
2321 ffdiff = myargs [q_argFfDiff].strvalue;
2322 asn2flat = myargs [n_argAsn2Flat].strvalue;
2323
2324 from = myargs [j_argFrom].intvalue;
2325 to = myargs [k_argTo].intvalue;
2326 if (myargs [d_argStrand].intvalue) {
2327 strand = Seq_strand_minus;
2328 } else {
2329 strand = Seq_strand_plus;
2330 }
2331 itemID = myargs [y_argItemID].intvalue;
2332
2333 #ifdef ENABLE_ARG_X
2334 if (! StringHasNoText (myargs [x_argAccnToSave].strvalue)) {
2335 accn = myargs [x_argAccnToSave].strvalue;
2336 }
2337 #endif
2338 #endif
2339
2340 if (GetAppParam ("NCBI", "SETTINGS", "XMLPREFIX", NULL, xmlbuf, sizeof (xmlbuf))) {
2341 AsnSetXMLmodulePrefix (StringSave (xmlbuf));
2342 }
2343
2344 if (do_gbseq) {
2345 if (! objgbseqAsnLoad ()) {
2346 Message (MSG_POSTERR, "objgbseqAsnLoad failed");
2347 return 1;
2348 }
2349 if (! objinsdseqAsnLoad ()) {
2350 Message (MSG_POSTERR, "objinsdseqAsnLoad failed");
2351 return 1;
2352 }
2353 MemSet ((Pointer) &gbsq, 0, sizeof (GBSeq));
2354 xtra.gbseq = &gbsq;
2355 if ((flags & HTML_XML_ASN_MASK) == CREATE_ASN_GBSEQ_FILE) {
2356 aip = AsnIoOpen (myargs [o_argOutputFile].strvalue, "w");
2357 } else {
2358 aip = AsnIoOpen (myargs [o_argOutputFile].strvalue, "wx");
2359 }
2360 if (aip == NULL) {
2361 Message (MSG_POSTERR, "AsnIoOpen failed");
2362 return 1;
2363 }
2364 xtra.aip = aip;
2365 if ((Boolean) ((flags & PRODUCE_OLD_GBSEQ) != 0)) {
2366 do_insdseq = FALSE;
2367 }
2368 if (do_insdseq) {
2369 atp = AsnLinkType (NULL, AsnFind ("INSDSet"));
2370 xtra.atp = AsnLinkType (NULL, AsnFind ("INSDSet.E"));
2371 } else {
2372 atp = AsnLinkType (NULL, AsnFind ("GBSet"));
2373 xtra.atp = AsnLinkType (NULL, AsnFind ("GBSet.E"));
2374 flags |= PRODUCE_OLD_GBSEQ;
2375 }
2376 if (atp == NULL || xtra.atp == NULL) {
2377 Message (MSG_POSTERR, "AsnLinkType or AsnFind failed");
2378 return 1;
2379 }
2380 MemSet ((Pointer) &gbst, 0, sizeof (GBSet));
2381 AsnOpenStruct (aip, atp, (Pointer) &gbst);
2382 }
2383
2384 extra = &xtra;
2385
2386 starttime = GetSecs ();
2387
2388 if (StringDoesHaveText (accntofetch)) {
2389
2390 if (remote) {
2391 sep = SeqEntryFromAccnOrGi (accntofetch);
2392 if (sep != NULL) {
2393 ProcessOneSeqEntry (sep, myargs [o_argOutputFile].strvalue,
2394 format, altformat, mode, style, flags, locks,
2395 custom, extra, do_tiny_seq, do_fasta_stream);
2396 SeqEntryFree (sep);
2397 }
2398 }
2399
2400 } else if (batch != 0 || accn != NULL) {
2401
2402 rsult = HandleMultipleRecords (myargs [i_argInputFile].strvalue,
2403 myargs [o_argOutputFile].strvalue,
2404 format, altformat, mode, style, flags, locks,
2405 custom, extra, type, batch, binary, compressed,
2406 propOK, ffdiff, asn2flat, accn, logfp);
2407 } else if (catenated) {
2408
2409 rsult = HandleCatenatedRecord (myargs [i_argInputFile].strvalue,
2410 myargs [o_argOutputFile].strvalue,
2411 format, altformat, mode, style, flags, locks,
2412 custom, extra, type, binary, compressed,
2413 from, to, strand, itemID, do_tiny_seq, do_fasta_stream);
2414 } else {
2415
2416 rsult = HandleSingleRecord (myargs [i_argInputFile].strvalue,
2417 myargs [o_argOutputFile].strvalue,
2418 format, altformat, mode, style, flags, locks,
2419 custom, extra, type, binary, compressed,
2420 from, to, strand, itemID, do_tiny_seq, do_fasta_stream);
2421 }
2422
2423 if (aip != NULL) {
2424 AsnCloseStruct (aip, atp, NULL);
2425 AsnPrintNewLine (aip);
2426 AsnIoClose (aip);
2427 }
2428
2429 stoptime = GetSecs ();
2430 runtime = stoptime - starttime;
2431 if (logfp != NULL) {
2432 fprintf (logfp, "Finished in %ld seconds\n", (long) runtime);
2433 FileClose (logfp);
2434 }
2435
2436 if (remote) {
2437 LocalSeqFetchDisable ();
2438 PubMedFetchDisable ();
2439 #ifdef INTERNAL_NCBI_ASN2GB
2440 PUBSEQBioseqFetchDisable ();
2441 #else
2442 PubSeqFetchDisable ();
2443 #endif
2444 }
2445
2446 return rsult;
2447 }
2448
2449 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |