|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/asn2fsa.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2fsa.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2fsa.c
27 *
28 * Author: Jonathan Kans
29 *
30 * Version Creation Date: 3/4/04
31 *
32 * $Revision: 1.54 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45 #include <ncbi.h>
46 #include <objall.h>
47 #include <objsset.h>
48 #include <objsub.h>
49 #include <objfdef.h>
50 #include <seqport.h>
51 #include <sequtil.h>
52 #include <sqnutils.h>
53 #include <subutil.h>
54 #include <tofasta.h>
55 #include <gather.h>
56 #include <explore.h>
57 #include <lsqfetch.h>
58 #include <readdb.h>
59 #include <pmfapi.h>
60 #ifdef INTERNAL_NCBI_ASN2FSA
61 #include <accpubseq.h>
62 #endif
63
64 #define ASN2FSA_APP_VER "3.6"
65
66 CharPtr ASN2FSA_APPLICATION = ASN2FSA_APP_VER;
67
68 static ValNodePtr requested_uid_list = NULL;
69 static TNlmMutex requested_uid_mutex = NULL;
70
71 static ValNodePtr locked_bsp_list = NULL;
72 static TNlmMutex locked_bsp_mutex = NULL;
73
74 static void AddUidToQueue (
75 SeqIdPtr sip
76 )
77
78 {
79 ValNodePtr last = NULL, vnp;
80 Int4 ret;
81 Int4 uid;
82
83 if (sip == NULL || sip->choice != SEQID_GI) return;
84 uid = (Int4) sip->data.intvalue;
85 if (uid < 1) return;
86
87 ret = NlmMutexLockEx (&requested_uid_mutex);
88 if (ret) {
89 ErrPostEx (SEV_FATAL, 0, 0, "AddUidToQueue mutex failed [%ld]", (long) ret);
90 return;
91 }
92
93 /* check against uids already in queue */
94
95 last = NULL;
96 for (vnp = requested_uid_list; vnp != NULL; vnp = vnp->next) {
97 last = vnp;
98 if ((Int4) vnp->data.intvalue == uid) break;
99 }
100
101 /* add uid to queue */
102
103 if (vnp == NULL) {
104 if (last != NULL) {
105 vnp = ValNodeAddInt (&last, 0, uid);
106 last = vnp;
107 } else {
108 requested_uid_list = ValNodeAddInt (NULL, 0, uid);
109 last = requested_uid_list;
110 }
111 }
112
113 NlmMutexUnlock (requested_uid_mutex);
114 }
115
116 static Int4 RemoveUidFromQueue (
117 void
118 )
119
120 {
121 Int4 ret, uid = 0;
122 ValNodePtr vnp;
123
124 ret = NlmMutexLockEx (&requested_uid_mutex);
125 if (ret) {
126 ErrPostEx (SEV_FATAL, 0, 0, "RemoveUidFromQueue mutex failed [%ld]", (long) ret);
127 return 0;
128 }
129
130 /* extract next requested uid from queue */
131
132 if (requested_uid_list != NULL) {
133 vnp = requested_uid_list;
134 requested_uid_list = vnp->next;
135 vnp->next = NULL;
136 uid = (Int4) vnp->data.intvalue;
137 ValNodeFree (vnp);
138 }
139
140 NlmMutexUnlock (requested_uid_mutex);
141
142 return uid;
143 }
144
145 static void QueueFarSegments (SeqLocPtr slp)
146
147 {
148 BioseqPtr bsp;
149 SeqLocPtr loc;
150 SeqIdPtr sip;
151 ValNodePtr vnp;
152
153 if (slp == NULL) return;
154
155 sip = SeqLocId (slp);
156 if (sip == NULL) {
157 loc = SeqLocFindNext (slp, NULL);
158 if (loc != NULL) {
159 sip = SeqLocId (loc);
160 }
161 }
162 if (sip == NULL) return;
163
164 /* if packaged in record, no need to fetch it */
165
166 if (BioseqFindCore (sip) != NULL) return;
167
168 /* check against currently locked records */
169
170 for (vnp = locked_bsp_list; vnp != NULL; vnp = vnp->next) {
171 bsp = (BioseqPtr) vnp->data.ptrvalue;
172 if (bsp == NULL) continue;
173 if (SeqIdIn (sip, bsp->id)) return;
174 }
175
176 AddUidToQueue (sip);
177 }
178
179 static void QueueFarBioseqs (BioseqPtr bsp, Pointer userdata)
180
181 {
182 DeltaSeqPtr dsp;
183 SeqLocPtr slp = NULL;
184 ValNode vn;
185
186 if (bsp == NULL) return;
187
188 if (bsp->repr == Seq_repr_seg) {
189 vn.choice = SEQLOC_MIX;
190 vn.extended = 0;
191 vn.data.ptrvalue = bsp->seq_ext;
192 vn.next = NULL;
193 while ((slp = SeqLocFindNext (&vn, slp)) != NULL) {
194 if (slp != NULL && slp->choice != SEQLOC_NULL) {
195 QueueFarSegments (slp);
196 }
197 }
198 } else if (bsp->repr == Seq_repr_delta) {
199 for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
200 if (dsp->choice == 1) {
201 slp = (SeqLocPtr) dsp->data.ptrvalue;
202 if (slp != NULL && slp->choice != SEQLOC_NULL) {
203 QueueFarSegments (slp);
204 }
205 }
206 }
207 }
208 }
209
210 static void AddBspToList (
211 BioseqPtr bsp
212 )
213
214 {
215 Int4 ret;
216 ValNodePtr vnp;
217
218 if (bsp == NULL) return;
219
220 ret = NlmMutexLockEx (&locked_bsp_mutex);
221 if (ret) {
222 ErrPostEx (SEV_FATAL, 0, 0, "AddBspToList mutex failed [%ld]", (long) ret);
223 return;
224 }
225
226 vnp = ValNodeAddPointer (&locked_bsp_list, 0, (Pointer) bsp);
227
228 NlmMutexUnlock (locked_bsp_mutex);
229 }
230
231 static ValNodePtr ExtractBspList (
232 void
233 )
234
235 {
236 Int4 ret;
237 ValNodePtr vnp;
238
239 ret = NlmMutexLockEx (&locked_bsp_mutex);
240 if (ret) {
241 ErrPostEx (SEV_FATAL, 0, 0, "ExtractBspList mutex failed [%ld]", (long) ret);
242 return NULL;
243 }
244
245 vnp = locked_bsp_list;
246 locked_bsp_list = NULL;
247
248 NlmMutexUnlock (locked_bsp_mutex);
249
250 return vnp;
251 }
252
253 typedef struct fastaflags {
254 Boolean master_style;
255 Boolean expand_gaps;
256 Boolean use_dashes;
257 Boolean far_genomic_qual;
258 Boolean qual_gap_is_zero;
259 Boolean automatic;
260 Boolean batch;
261 Boolean binary;
262 Boolean compressed;
263 Boolean lock;
264 Boolean useThreads;
265 Boolean usePUBSEQ;
266 Boolean useBLAST;
267 CharPtr blastdbname;
268 Int2 type;
269 Int2 linelen;
270 Boolean failed;
271 FILE *nt;
272 FILE *aa;
273 FILE *ql;
274 FILE *fr;
275 FILE *logfp;
276 } FastaFlagData, PNTR FastaFlagPtr;
277
278 static VoidPtr DoAsyncLookup (
279 VoidPtr arg
280 )
281
282 {
283 BioseqPtr bsp;
284 FastaFlagPtr ffp;
285 Int4 uid;
286 ValNode vn;
287
288 ffp = (FastaFlagPtr) arg;
289 if (ffp == NULL) return NULL;
290
291 #ifdef INTERNAL_NCBI_ASN2FSA
292 if (ffp->usePUBSEQ) {
293 PUBSEQInit ();
294 }
295 #endif
296 if (ffp->useBLAST) {
297 ReadDBBioseqFetchEnable ("asn2fsa", ffp->blastdbname, TRUE, FALSE);
298 }
299
300 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
301
302 uid = RemoveUidFromQueue ();
303 while (uid > 0) {
304
305 vn.choice = SEQID_GI;
306 vn.data.intvalue = uid;
307 vn.next = NULL;
308
309 bsp = BioseqLockById (&vn);
310 if (bsp != NULL) {
311 AddBspToList (bsp);
312 }
313
314 uid = RemoveUidFromQueue ();
315 }
316
317 if (ffp->useBLAST) {
318 ReadDBBioseqFetchDisable ();
319 }
320 #ifdef INTERNAL_NCBI_ASN2FSA
321 if (ffp->usePUBSEQ) {
322 PUBSEQFini ();
323 }
324 #endif
325
326 return NULL;
327 }
328
329 #define NUM_ASYNC_LOOKUP_THREADS 5
330
331 static void ProcessAsyncLookups (
332 FastaFlagPtr ffp
333 )
334
335 {
336 Int2 i;
337 VoidPtr status;
338 TNlmThread thds [NUM_ASYNC_LOOKUP_THREADS];
339
340 /* spawn several threads for individual BioseqLockById requests */
341
342 for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
343 thds [i] = NlmThreadCreate (DoAsyncLookup, (Pointer) ffp);
344 }
345
346 /* wait for all fetching threads to complete */
347
348 for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
349 NlmThreadJoin (thds [i], &status);
350 }
351 }
352
353 static ValNodePtr AsyncLockFarComponents (
354 SeqEntryPtr sep,
355 FastaFlagPtr ffp
356 )
357
358 {
359 BioseqPtr bsp;
360 ValNodePtr bsplist = NULL, sublist, vnp;
361 SeqEntryPtr oldsep;
362
363 if (sep == NULL || ffp == NULL) return NULL;
364 oldsep = SeqEntrySetScope (sep);
365
366 /* add far uids to queue */
367
368 VisitBioseqsInSep (sep, NULL, QueueFarBioseqs);
369
370 /* fetching from uid list using several threads */
371
372 ProcessAsyncLookups (ffp);
373
374 sublist = ExtractBspList ();
375
376 /* take list, look for seg or delta, recurse */
377
378 while (sublist != NULL) {
379 for (vnp = sublist; vnp != NULL; vnp = vnp->next) {
380 bsp = (BioseqPtr) vnp->data.ptrvalue;
381 if (bsp == NULL) continue;
382 QueueFarBioseqs (bsp, NULL);
383 }
384
385 ValNodeLink (&bsplist, sublist);
386 sublist = NULL;
387
388 ProcessAsyncLookups (ffp);
389
390 sublist = ExtractBspList ();
391 }
392
393 SeqEntrySetScope (oldsep);
394 return bsplist;
395 }
396
397 static ValNodePtr DoLockFarComponents (
398 SeqEntryPtr sep,
399 FastaFlagPtr ffp
400 )
401
402 {
403 ValNodePtr rsult;
404 time_t start_time, stop_time;
405
406 /*
407 if (NlmThreadsAvailable () && ffp->useThreads) {
408 return AsyncLockFarComponents (sep);
409 }
410
411 return LockFarComponents (sep);
412 */
413
414 start_time = GetSecs ();
415
416 if (NlmThreadsAvailable () && ffp->useThreads) {
417 rsult = AsyncLockFarComponents (sep, ffp);
418 } else if (ffp->useThreads) {
419 Message (MSG_POST, "Threads not available in this executable");
420 rsult = LockFarComponents (sep);
421 } else {
422 rsult = LockFarComponents (sep);
423 }
424
425 stop_time = GetSecs ();
426
427 return rsult;
428 }
429
430 static Boolean DeltaLitOnly (
431 BioseqPtr bsp
432 )
433
434 {
435 ValNodePtr vnp;
436
437 if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
438 for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
439 if (vnp->choice == 1) return FALSE;
440 }
441 return TRUE;
442 }
443
444 static Boolean SegHasParts (
445 BioseqPtr bsp
446 )
447
448 {
449 BioseqSetPtr bssp;
450 SeqEntryPtr sep;
451
452 if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE;
453 sep = bsp->seqentry;
454 if (sep == NULL) return FALSE;
455 sep = sep->next;
456 if (sep == NULL || (! IS_Bioseq_set (sep))) return FALSE;
457 bssp = (BioseqSetPtr) sep->data.ptrvalue;
458 if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts) return TRUE;
459 return FALSE;
460 }
461
462 static void CacheFarComponents (
463 FastaFlagPtr ffp,
464 ValNodePtr bsplist
465 )
466
467 {
468 BioseqPtr bsp;
469 Uint2 entityID;
470 ValNodePtr vnp;
471
472 if (ffp == NULL || ffp->fr == NULL || bsplist == NULL) return;
473
474 for (vnp = bsplist; vnp != NULL; vnp = vnp->next) {
475 bsp = (BioseqPtr) vnp->data.ptrvalue;
476 if (bsp == NULL) continue;
477
478 /* cache raw and constructed, near segmented, and delta literal */
479
480 switch (bsp->repr) {
481 case Seq_repr_raw :
482 case Seq_repr_const :
483 if (BioseqFastaStream (bsp, ffp->fr, 0, ffp->linelen, 0, 0, TRUE) < 0) {
484 ffp->failed = TRUE;
485 }
486 break;
487 case Seq_repr_seg :
488 entityID = ObjMgrGetEntityIDForPointer (bsp);
489 AssignIDsInEntity (entityID, 0, NULL);
490 if (SegHasParts (bsp)) {
491 if (BioseqFastaStream (bsp, ffp->fr, 0, ffp->linelen, 0, 0, TRUE) < 0) {
492 ffp->failed = TRUE;
493 }
494 }
495 break;
496 case Seq_repr_delta :
497 if (DeltaLitOnly (bsp)) {
498 if (BioseqFastaStream (bsp, ffp->fr, 0, ffp->linelen, 0, 0, TRUE) < 0) {
499 ffp->failed = TRUE;
500 }
501 }
502 break;
503 default :
504 break;
505 }
506 }
507 }
508
509 static void PrintQualProc (
510 CharPtr buf,
511 Uint4 buflen,
512 Pointer userdata
513 )
514
515 {
516 FILE *fp;
517
518 fp = (FILE*) userdata;
519 fprintf (fp, "%s", buf);
520 }
521
522 static void PrintQualScores (
523 BioseqPtr bsp,
524 Pointer userdata
525 )
526
527 {
528 FastaFlagPtr ffp;
529
530 ffp = (FastaFlagPtr) userdata;
531 if (bsp == NULL || ffp == NULL) return;
532 if (! ISA_na (bsp->mol)) return;
533
534 if (ffp->far_genomic_qual) {
535 PrintQualityScoresForContig (bsp, ffp->qual_gap_is_zero, ffp->ql);
536 } else {
537 PrintQualityScoresToBuffer (bsp, ffp->qual_gap_is_zero, ffp->ql, PrintQualProc);
538 }
539 }
540
541 static void ProcessSingleRecord (
542 CharPtr directory,
543 CharPtr base,
544 CharPtr suffix,
545 FastaFlagPtr ffp
546 )
547
548 {
549 AsnIoPtr aip;
550 BioseqPtr bsp;
551 ValNodePtr bsplist;
552 BioseqSetPtr bssp;
553 Pointer dataptr = NULL;
554 Uint2 datatype, entityID = 0;
555 Char file [FILENAME_MAX], path [PATH_MAX];
556 StreamFlgType flags = STREAM_CORRECT_INVAL;
557 FILE *fp;
558 ObjMgrPtr omp;
559 SeqEntryPtr sep;
560
561 if (ffp == NULL) return;
562
563 if (base == NULL) {
564 base = "";
565 }
566 if (suffix == NULL) {
567 suffix = "";
568 }
569 StringNCpy_0 (path, directory, sizeof (path));
570 sprintf (file, "%s%s", base, suffix);
571 FileBuildPath (path, NULL, file);
572
573 if (StringHasNoText (path)) return;
574
575 if (ffp->type == 1) {
576 fp = FileOpen (path, "r");
577 if (fp == NULL) {
578 Message (MSG_POSTERR, "Failed to open '%s'", path);
579 return;
580 }
581
582 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
583
584 FileClose (fp);
585
586 entityID = ObjMgrRegister (datatype, dataptr);
587
588 } else if (ffp->type >= 2 && ffp->type <= 5) {
589 aip = AsnIoOpen (path, ffp->binary? "rb" : "r");
590 if (aip == NULL) {
591 Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", path);
592 return;
593 }
594
595 SeqMgrHoldIndexing (TRUE);
596 switch (ffp->type) {
597 case 2 :
598 dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
599 datatype = OBJ_SEQENTRY;
600 break;
601 case 3 :
602 dataptr = (Pointer) BioseqAsnRead (aip, NULL);
603 datatype = OBJ_BIOSEQ;
604 break;
605 case 4 :
606 dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
607 datatype = OBJ_BIOSEQSET;
608 break;
609 case 5 :
610 dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
611 datatype = OBJ_SEQSUB;
612 break;
613 default :
614 break;
615 }
616 SeqMgrHoldIndexing (FALSE);
617
618 AsnIoClose (aip);
619
620 entityID = ObjMgrRegister (datatype, dataptr);
621
622 } else {
623 Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) ffp->type);
624 return;
625 }
626
627 if (entityID < 1 || dataptr == NULL) {
628 Message (MSG_POSTERR, "Data read failed for input file '%s'", path);
629 return;
630 }
631
632 if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
633 datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
634
635 sep = GetTopSeqEntryForEntityID (entityID);
636
637 if (sep == NULL) {
638 sep = SeqEntryNew ();
639 if (sep != NULL) {
640 if (datatype == OBJ_BIOSEQ) {
641 bsp = (BioseqPtr) dataptr;
642 sep->choice = 1;
643 sep->data.ptrvalue = bsp;
644 SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
645 } else if (datatype == OBJ_BIOSEQSET) {
646 bssp = (BioseqSetPtr) dataptr;
647 sep->choice = 2;
648 sep->data.ptrvalue = bssp;
649 SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
650 } else {
651 sep = SeqEntryFree (sep);
652 }
653 }
654 sep = GetTopSeqEntryForEntityID (entityID);
655 }
656
657 if (sep != NULL) {
658 if (ffp->expand_gaps && ffp->use_dashes) {
659 flags |= EXPAND_GAPS_TO_DASHES;
660 } else if (ffp->expand_gaps) {
661 flags |= STREAM_EXPAND_GAPS;
662 } else if (ffp->use_dashes) {
663 flags |= GAP_TO_SINGLE_DASH;
664 }
665
666 bsplist = NULL;
667 if (ffp->lock) {
668 bsplist = DoLockFarComponents (sep, ffp);
669 if (bsplist != NULL && ffp->fr != NULL) {
670 CacheFarComponents (ffp, bsplist);
671 }
672 }
673
674 if (ffp->nt != NULL) {
675 if (SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0,
676 TRUE, FALSE, ffp->master_style) < 0) {
677 ffp->failed = TRUE;
678 }
679 }
680 if (ffp->aa != NULL) {
681 if (SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0,
682 FALSE, TRUE, ffp->master_style) < 0) {
683 ffp->failed = TRUE;
684 }
685 }
686 if (ffp->ql != NULL) {
687 VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
688 }
689
690 bsplist = UnlockFarComponents (bsplist);
691
692 }
693 } else {
694 Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
695 }
696
697 ObjMgrFree (datatype, dataptr);
698
699 omp = ObjMgrGet ();
700 ObjMgrReapOne (omp);
701 SeqMgrClearBioseqIndex ();
702 ObjMgrFreeCache (0);
703 FreeSeqIdGiCache ();
704
705 SeqEntrySetScope (NULL);
706 }
707
708 static void ProcessMultipleRecord (
709 CharPtr directory,
710 CharPtr base,
711 CharPtr suffix,
712 FastaFlagPtr ffp
713 )
714
715 {
716 AsnIoPtr aip;
717 AsnModulePtr amp;
718 AsnTypePtr atp, atp_bss, atp_desc, atp_se;
719 BioseqPtr bsp;
720 ValNodePtr bsplist;
721 Char buf [64], file [FILENAME_MAX], path [PATH_MAX], longest [64];
722 StreamFlgType flags = STREAM_CORRECT_INVAL;
723 FILE *fp;
724 Int4 numrecords = 0;
725 SeqEntryPtr fsep, sep;
726 ObjMgrPtr omp;
727 time_t starttime, stoptime, worsttime;
728 #ifdef OS_UNIX
729 Char cmmd [256];
730 CharPtr gzcatprog;
731 int ret;
732 Boolean usedPopen = FALSE;
733 #endif
734
735 if (ffp == NULL) return;
736
737 if (base == NULL) {
738 base = "";
739 }
740 if (suffix == NULL) {
741 suffix = "";
742 }
743 StringNCpy_0 (path, directory, sizeof (path));
744 sprintf (file, "%s%s", base, suffix);
745 FileBuildPath (path, NULL, file);
746
747 if (StringHasNoText (path)) return;
748
749 #ifndef OS_UNIX
750 if (ffp->compressed) {
751 Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
752 return;
753 }
754 #endif
755
756 amp = AsnAllModPtr ();
757 if (amp == NULL) {
758 Message (MSG_POSTERR, "Unable to load AsnAllModPtr");
759 return;
760 }
761
762 atp_bss = AsnFind ("Bioseq-set");
763 if (atp_bss == NULL) {
764 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set");
765 return;
766 }
767
768 atp_desc = AsnFind ("Bioseq-set.descr");
769 if (atp_desc == NULL) {
770 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr");
771 return;
772 }
773
774 atp_se = AsnFind ("Bioseq-set.seq-set.E");
775 if (atp_se == NULL) {
776 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
777 return;
778 }
779
780 #ifdef OS_UNIX
781 if (ffp->compressed) {
782 gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
783 if (gzcatprog != NULL) {
784 sprintf (cmmd, "%s %s", gzcatprog, path);
785 } else {
786 ret = system ("gzcat -h >/dev/null 2>&1");
787 if (ret == 0) {
788 sprintf (cmmd, "gzcat %s", path);
789 } else if (ret == -1) {
790 Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
791 return;
792 } else {
793 ret = system ("zcat -h >/dev/null 2>&1");
794 if (ret == 0) {
795 sprintf (cmmd, "zcat %s", path);
796 } else if (ret == -1) {
797 Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
798 return;
799 } else {
800 Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
801 return;
802 }
803 }
804 }
805 fp = popen (cmmd, /* ffp->binary? "rb" : */ "r");
806 usedPopen = TRUE;
807 } else {
808 fp = FileOpen (path, ffp->binary? "rb" : "r");
809 }
810 #else
811 fp = FileOpen (path, ffp->binary? "rb" : "r");
812 #endif
813 if (fp == NULL) {
814 Message (MSG_POSTERR, "FileOpen failed for input file '%s'", path);
815 return;
816 }
817
818 aip = AsnIoNew (ffp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
819 if (aip == NULL) {
820 Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", path);
821 return;
822 }
823
824 atp = atp_bss;
825
826 if (ffp->expand_gaps && ffp->use_dashes) {
827 flags |= EXPAND_GAPS_TO_DASHES;
828 } else if (ffp->expand_gaps) {
829 flags |= STREAM_EXPAND_GAPS;
830 } else if (ffp->use_dashes) {
831 flags |= GAP_TO_SINGLE_DASH;
832 }
833
834 longest [0] = '\0';
835 worsttime = 0;
836
837 while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
838 if (atp == atp_se) {
839
840 SeqMgrHoldIndexing (TRUE);
841 sep = SeqEntryAsnRead (aip, atp);
842 SeqMgrHoldIndexing (FALSE);
843
844 starttime = GetSecs ();
845 buf [0] = '\0';
846
847 if (ffp->logfp != NULL) {
848 fsep = FindNthBioseq (sep, 1);
849 if (fsep != NULL && fsep->choice == 1) {
850 bsp = (BioseqPtr) fsep->data.ptrvalue;
851 if (bsp != NULL) {
852 SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
853 fprintf (ffp->logfp, "%s\n", buf);
854 fflush (ffp->logfp);
855 }
856 }
857 }
858
859 bsplist = NULL;
860 if (ffp->lock) {
861 bsplist = DoLockFarComponents (sep, ffp);
862 if (bsplist != NULL && ffp->fr != NULL) {
863 CacheFarComponents (ffp, bsplist);
864 }
865 }
866
867 if (ffp->nt != NULL) {
868 SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0, TRUE, FALSE, ffp->master_style);
869 }
870 if (ffp->aa != NULL) {
871 SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0, FALSE, TRUE, ffp->master_style);
872 }
873 if (ffp->ql != NULL) {
874 VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
875 }
876
877 bsplist = UnlockFarComponents (bsplist);
878
879 stoptime = GetSecs ();
880 if (stoptime - starttime > worsttime && StringDoesHaveText (buf)) {
881 worsttime = stoptime - starttime;
882 StringCpy (longest, buf);
883 }
884 numrecords++;
885
886 SeqEntryFree (sep);
887 omp = ObjMgrGet ();
888 ObjMgrReapOne (omp);
889 SeqMgrClearBioseqIndex ();
890 ObjMgrFreeCache (0);
891 FreeSeqIdGiCache ();
892
893 SeqEntrySetScope (NULL);
894 } else {
895 AsnReadVal (aip, atp, NULL);
896 }
897 }
898
899 AsnIoFree (aip, FALSE);
900
901 #ifdef OS_UNIX
902 if (usedPopen) {
903 pclose (fp);
904 } else {
905 FileClose (fp);
906 }
907 #else
908 FileClose (fp);
909 #endif
910
911 if (ffp->logfp != NULL && (! StringHasNoText (longest))) {
912 fprintf (ffp->logfp, "Longest processing time %ld seconds on %s\n",
913 (long) worsttime, longest);
914 fprintf (ffp->logfp, "Total number of records %ld\n", (long) numrecords);
915 fflush (ffp->logfp);
916 }
917 }
918
919 static void FastaWrapper (
920 SeqEntryPtr sep,
921 Pointer userdata
922 )
923
924 {
925 ValNodePtr bsplist;
926 FastaFlagPtr ffp;
927 StreamFlgType flags = STREAM_CORRECT_INVAL;
928
929 if (sep == NULL) return;
930 ffp = (FastaFlagPtr) userdata;
931 if (ffp == NULL) return;
932
933
934 if (ffp->expand_gaps && ffp->use_dashes) {
935 flags |= EXPAND_GAPS_TO_DASHES;
936 } else if (ffp->expand_gaps) {
937 flags |= STREAM_EXPAND_GAPS;
938 } else if (ffp->use_dashes) {
939 flags |= GAP_TO_SINGLE_DASH;
940 }
941
942 bsplist = NULL;
943 if (ffp->lock) {
944 bsplist = DoLockFarComponents (sep, ffp);
945 if (bsplist != NULL && ffp->fr != NULL) {
946 CacheFarComponents (ffp, bsplist);
947 }
948 }
949
950 if (ffp->nt != NULL) {
951 if (SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0,
952 TRUE, FALSE, ffp->master_style) < 0) {
953 ffp->failed = TRUE;
954 }
955 }
956 if (ffp->aa != NULL) {
957 if (SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0,
958 FALSE, TRUE, ffp->master_style) < 0) {
959 ffp->failed = TRUE;
960 }
961 }
962 if (ffp->ql != NULL) {
963 VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
964 }
965
966 bsplist = UnlockFarComponents (bsplist);
967 }
968
969 static void ProcessAutomaticRecord (
970 CharPtr directory,
971 CharPtr base,
972 CharPtr suffix,
973 FastaFlagPtr ffp
974 )
975
976 {
977 Char file [FILENAME_MAX], path [PATH_MAX];
978
979 if (ffp == NULL) return;
980
981 if (base == NULL) {
982 base = "";
983 }
984 if (suffix == NULL) {
985 suffix = "";
986 }
987 StringNCpy_0 (path, directory, sizeof (path));
988 sprintf (file, "%s%s", base, suffix);
989 FileBuildPath (path, NULL, file);
990
991 if (StringHasNoText (path)) return;
992
993 ReadSequenceAsnFile (path, ffp->binary, ffp->compressed, (Pointer) ffp, FastaWrapper);
994 }
995
996 static void ProcessOneRecord (
997 CharPtr directory,
998 CharPtr base,
999 CharPtr suffix,
1000 FastaFlagPtr ffp
1001 )
1002
1003 {
1004 if (ffp == NULL) return;
1005
1006 if (ffp->automatic) {
1007 ProcessAutomaticRecord (directory, base, suffix, ffp);
1008 } else if (ffp->batch) {
1009 ProcessMultipleRecord (directory, base, suffix, ffp);
1010 } else {
1011 ProcessSingleRecord (directory, base, suffix, ffp);
1012 }
1013 }
1014
1015 static void ProcessOneSeqEntry (
1016 SeqEntryPtr sep,
1017 FastaFlagPtr ffp
1018 )
1019
1020
1021 {
1022 ValNodePtr bsplist;
1023 StreamFlgType flags = STREAM_CORRECT_INVAL;
1024
1025 if (sep == NULL || ffp == NULL) return;
1026
1027 if (ffp->expand_gaps && ffp->use_dashes) {
1028 flags |= EXPAND_GAPS_TO_DASHES;
1029 } else if (ffp->expand_gaps) {
1030 flags |= STREAM_EXPAND_GAPS;
1031 } else if (ffp->use_dashes) {
1032 flags |= GAP_TO_SINGLE_DASH;
1033 }
1034
1035 bsplist = NULL;
1036 if (ffp->lock) {
1037 bsplist = DoLockFarComponents (sep, ffp);
1038 if (bsplist != NULL && ffp->fr != NULL) {
1039 CacheFarComponents (ffp, bsplist);
1040 }
1041 }
1042
1043 if (ffp->nt != NULL) {
1044 if (SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0,
1045 TRUE, FALSE, ffp->master_style) < 0) {
1046 ffp->failed = TRUE;
1047 }
1048 }
1049 if (ffp->aa != NULL) {
1050 if (SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0,
1051 FALSE, TRUE, ffp->master_style) < 0) {
1052 ffp->failed = TRUE;
1053 }
1054 }
1055 if (ffp->ql != NULL) {
1056 VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
1057 }
1058
1059 bsplist = UnlockFarComponents (bsplist);
1060 }
1061
1062 static void FileRecurse (
1063 CharPtr directory,
1064 CharPtr subdir,
1065 CharPtr suffix,
1066 Boolean dorecurse,
1067 FastaFlagPtr ffp
1068 )
1069
1070 {
1071 Char path [PATH_MAX];
1072 CharPtr ptr, str;
1073 ValNodePtr head, vnp;
1074
1075 /* get list of all files in source directory */
1076
1077 head = DirCatalog (directory);
1078
1079 for (vnp = head; vnp != NULL; vnp = vnp->next) {
1080 if (vnp->choice == 0) {
1081 if (StringHasNoText (subdir) || StringStr (directory, subdir) != NULL) {
1082 str = (CharPtr) vnp->data.ptrvalue;
1083 if (! StringHasNoText (str)) {
1084
1085 /* does filename have desired substring? */
1086
1087 ptr = StringStr (str, suffix);
1088 if (ptr != NULL) {
1089 *ptr = '\0';
1090
1091 /* process file that has desired suffix (usually .ent) */
1092
1093 ProcessOneRecord (directory, str, suffix, ffp);
1094 }
1095 }
1096 }
1097 } else if (vnp->choice == 1 && dorecurse) {
1098
1099 /* recurse into subdirectory */
1100
1101 StringNCpy_0 (path, directory, sizeof (path));
1102 str = (CharPtr) vnp->data.ptrvalue;
1103 FileBuildPath (path, str, NULL);
1104
1105 FileRecurse (path, str, suffix, dorecurse, ffp);
1106 }
1107 }
1108
1109 /* clean up file list */
1110
1111 ValNodeFreeData (head);
1112 }
1113
1114 static SeqEntryPtr SeqEntryFromAccnOrGi (
1115 CharPtr accn
1116 )
1117
1118 {
1119 Boolean alldigits;
1120 BioseqPtr bsp;
1121 Char ch;
1122 CharPtr ptr;
1123 SeqEntryPtr sep = NULL;
1124 SeqIdPtr sip;
1125 Int4 uid = 0;
1126 long int val;
1127 ValNode vn;
1128
1129 if (StringHasNoText (accn)) return NULL;
1130
1131 TrimSpacesAroundString (accn);
1132
1133 alldigits = TRUE;
1134 ptr = accn;
1135 ch = *ptr;
1136 while (ch != '\0') {
1137 if (! IS_DIGIT (ch)) {
1138 alldigits = FALSE;
1139 }
1140 ptr++;
1141 ch = *ptr;
1142 }
1143
1144 if (alldigits) {
1145 if (sscanf (accn, "%ld", &val) == 1) {
1146 uid = (Int4) val;
1147 }
1148 } else {
1149 sip = SeqIdFromAccessionDotVersion (accn);
1150 if (sip != NULL) {
1151 uid = GetGIForSeqId (sip);
1152 SeqIdFree (sip);
1153 }
1154 }
1155
1156 if (uid > 0) {
1157 sep = PubSeqSynchronousQuery (uid, 0, -1);
1158 if (sep != NULL) {
1159 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
1160 vn.choice = SEQID_GI;
1161 vn.data.intvalue = uid;
1162 bsp = BioseqFind (&vn);
1163 if (bsp != NULL) {
1164 sep = SeqMgrGetSeqEntryForData ((Pointer) bsp);
1165 }
1166 }
1167 }
1168
1169 return sep;
1170 }
1171
1172 /* Args structure contains command-line arguments */
1173
1174 #define p_argInputPath 0
1175 #define i_argInputFile 1
1176 #define o_argNtOutFile 2
1177 #define v_argAaOutFile 3
1178 #define q_argQlOutFile 4
1179 #define x_argSuffix 5
1180 #define u_argRecurse 6
1181 #define m_argMaster 7
1182 #define g_argExpandGaps 8
1183 #define D_argUseDashes 9
1184 #define s_argGenomicQual 10
1185 #define z_argZeroQualGap 11
1186 #define a_argType 12
1187 #define b_argBinary 13
1188 #define c_argCompressed 14
1189 #define r_argRemote 15
1190 #define f_argFastaIdx 16
1191 #define d_argBlastDB 17
1192 #define k_argLocalFetch 18
1193 #define l_argLockFar 19
1194 #define h_argFarOutFile 20
1195 #define e_argLineLength 21
1196 #define T_argThreads 22
1197 #define L_argLogFile 23
1198 #define A_argAccession 24
1199
1200 Args myargs [] = {
1201 {"Path to ASN.1 Files", NULL, NULL, NULL,
1202 TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
1203 {"Single Input File", "stdin", NULL, NULL,
1204 TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
1205 {"Nucleotide Output File Name", NULL, NULL, NULL,
1206 TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
1207 {"Protein Output File Name", NULL, NULL, NULL,
1208 TRUE, 'v', ARG_FILE_OUT, 0.0, 0, NULL},
1209 {"Quality Score Output File Name", NULL, NULL, NULL,
1210 TRUE, 'q', ARG_FILE_OUT, 0.0, 0, NULL},
1211 {"File Selection Substring", ".ent", NULL, NULL,
1212 TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
1213 {"Recurse", "F", NULL, NULL,
1214 TRUE, 'u', ARG_BOOLEAN, 0.0, 0, NULL},
1215 {"Master Style for Near Segmented Sequences", "F", NULL, NULL,
1216 TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL},
1217 {"Expand Delta Gaps into Ns", "F", NULL, NULL,
1218 TRUE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},
1219 {"Use Dash for Gap", "F", NULL, NULL,
1220 TRUE, 'D', ARG_BOOLEAN, 0.0, 0, NULL},
1221 {"Far Genomic Contig for Quality Scores", "F", NULL, NULL,
1222 TRUE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
1223 {"Print Quality Score Gap as -1", "F", NULL, NULL,
1224 TRUE, 'z', ARG_BOOLEAN, 0.0, 0, NULL},
1225 {"ASN.1 Type (a Automatic, z Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Processing)", "a", NULL, NULL,
1226 TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
1227 {"Bioseq-set is Binary", "F", NULL, NULL,
1228 TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
1229 {"Bioseq-set is Compressed", "F", NULL, NULL,
1230 TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
1231 {"Remote Fetching from ID", "F", NULL, NULL,
1232 TRUE, 'r', ARG_BOOLEAN, 0.0, 0, NULL},
1233 {"Path to Indexed FASTA Data", NULL, NULL, NULL,
1234 TRUE, 'f', ARG_STRING, 0.0, 0, NULL},
1235 {"Path to ReadDB Database", NULL, NULL, NULL,
1236 TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
1237 {"Local Fetching", "F", NULL, NULL,
1238 TRUE, 'k', ARG_BOOLEAN, 0.0, 0, NULL},
1239 {"Lock Components in Advance", "F", NULL, NULL,
1240 TRUE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
1241 {"Far Component Cache Output File Name", NULL, NULL, NULL,
1242 TRUE, 'h', ARG_FILE_OUT, 0.0, 0, NULL},
1243 {"Line Length", "70", "10", "120",
1244 TRUE, 'e', ARG_INT, 0.0, 0, NULL},
1245 {"Use Threads", "F", NULL, NULL,
1246 TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
1247 {"Log File", NULL, NULL, NULL,
1248 TRUE, 'L', ARG_FILE_OUT, 0.0, 0, NULL},
1249 {"Accession to Fetch", NULL, NULL, NULL,
1250 TRUE, 'A', ARG_STRING, 0.0, 0, NULL},
1251 };
1252
1253 Int2 Main (void)
1254
1255 {
1256 Char app [64], sfx [32];
1257 CharPtr accn, base, blastdb, directory, fastaidx, ntout,
1258 aaout, qlout, frout, logfile, ptr, str, suffix;
1259 Boolean automatic, batch, binary, blast, compressed, dorecurse,
1260 expandgaps, fargenomicqual, fasta, local, lock,
1261 masterstyle, qualgapzero, remote, usedashes,
1262 usethreads;
1263 FastaFlagData ffd;
1264 Int2 linelen, type = 0;
1265 time_t run_time, start_time, stop_time;
1266 SeqEntryPtr sep;
1267
1268 /* standard setup */
1269
1270 ErrSetFatalLevel (SEV_MAX);
1271 ErrClearOptFlags (EO_SHOW_USERSTR);
1272 ErrSetLogfile ("stderr", ELOG_APPEND);
1273 UseLocalAsnloadDataAndErrMsg ();
1274 ErrPathReset ();
1275
1276 if (! AllObjLoad ()) {
1277 Message (MSG_FATAL, "AllObjLoad failed");
1278 return 1;
1279 }
1280 if (! SubmitAsnLoad ()) {
1281 Message (MSG_FATAL, "SubmitAsnLoad failed");
1282 return 1;
1283 }
1284 if (! FeatDefSetLoad ()) {
1285 Message (MSG_FATAL, "FeatDefSetLoad failed");
1286 return 1;
1287 }
1288 if (! SeqCodeSetLoad ()) {
1289 Message (MSG_FATAL, "SeqCodeSetLoad failed");
1290 return 1;
1291 }
1292 if (! GeneticCodeTableLoad ()) {
1293 Message (MSG_FATAL, "GeneticCodeTableLoad failed");
1294 return 1;
1295 }
1296
1297 /* process command line arguments */
1298
1299 sprintf (app, "asn2fsa %s", ASN2FSA_APPLICATION);
1300 if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
1301 return 0;
1302 }
1303
1304 /* additional setup modifications */
1305
1306 MemSet ((Pointer) &ffd, 0, sizeof (FastaFlagData));
1307
1308 directory = (CharPtr) myargs [p_argInputPath].strvalue;
1309 suffix = (CharPtr) myargs [x_argSuffix].strvalue;
1310 base = (CharPtr) myargs [i_argInputFile].strvalue;
1311 accn = (CharPtr) myargs [A_argAccession].strvalue;
1312 dorecurse = (Boolean) myargs [u_argRecurse].intvalue;
1313 remote = (Boolean ) myargs [r_argRemote].intvalue;
1314 fastaidx = (CharPtr) myargs [f_argFastaIdx].strvalue;
1315 fasta = (Boolean) StringDoesHaveText (fastaidx);
1316 blastdb = (CharPtr) myargs [d_argBlastDB].strvalue;
1317 blast = (Boolean) StringDoesHaveText (blastdb);
1318 local = (Boolean) myargs [k_argLocalFetch].intvalue;
1319 lock = (Boolean) myargs [l_argLockFar].intvalue;
1320 linelen = (Int2) myargs [e_argLineLength].intvalue;
1321 usethreads = (Boolean) myargs [T_argThreads].intvalue;
1322
1323 expandgaps = (Boolean) myargs [g_argExpandGaps].intvalue;
1324 usedashes = (Boolean) myargs [D_argUseDashes].intvalue;
1325 masterstyle = (Boolean) myargs [m_argMaster].intvalue;
1326 fargenomicqual = (Boolean) myargs [s_argGenomicQual].intvalue;
1327 qualgapzero = (Boolean) myargs [z_argZeroQualGap].intvalue;
1328 automatic = FALSE;
1329 batch = FALSE;
1330 binary = (Boolean) myargs [b_argBinary].intvalue;
1331 compressed = (Boolean) myargs [c_argCompressed].intvalue;
1332
1333 str = myargs [a_argType].strvalue;
1334 if (StringICmp (str, "a") == 0) {
1335 type = 1;
1336 automatic = TRUE;
1337 } else if (StringICmp (str, "z") == 0) {
1338 type = 1;
1339 } else if (StringICmp (str, "e") == 0) {
1340 type = 2;
1341 } else if (StringICmp (str, "b") == 0) {
1342 type = 3;
1343 } else if (StringICmp (str, "s") == 0) {
1344 type = 4;
1345 } else if (StringICmp (str, "m") == 0) {
1346 type = 5;
1347 } else if (StringICmp (str, "t") == 0) {
1348 type = 1;
1349 batch = TRUE;
1350 } else {
1351 type = 1;
1352 }
1353
1354 if ((binary || compressed) && (! batch)) {
1355 if (type == 1) {
1356 Message (MSG_FATAL, "-b or -c cannot be used without -t or -a");
1357 return 1;
1358 }
1359 }
1360
1361 if (StringHasNoText (directory) && StringHasNoText (base)) {
1362 Message (MSG_FATAL, "Input path or input file must be specified");
1363 return 1;
1364 }
1365
1366 ntout = (CharPtr) myargs [o_argNtOutFile].strvalue;
1367 aaout = (CharPtr) myargs [v_argAaOutFile].strvalue;
1368 qlout = (CharPtr) myargs [q_argQlOutFile].strvalue;
1369 frout = (CharPtr) myargs [h_argFarOutFile].strvalue;
1370
1371 logfile = (CharPtr) myargs [L_argLogFile].strvalue;
1372
1373 /* default to stdout for nucleotide output if nothing specified */
1374
1375 if (StringHasNoText (ntout) &&
1376 StringHasNoText (aaout) &&
1377 StringHasNoText (qlout)) {
1378 ntout = "stdout";
1379 }
1380
1381 start_time = GetSecs ();
1382
1383 /* populate parameter structure */
1384
1385 ffd.expand_gaps = expandgaps;
1386 ffd.use_dashes = usedashes;
1387 ffd.master_style = masterstyle;
1388 ffd.far_genomic_qual = fargenomicqual;
1389 ffd.qual_gap_is_zero = (Boolean) (! qualgapzero);
1390 ffd.automatic = automatic;
1391 ffd.batch = batch;
1392 ffd.binary = binary;
1393 ffd.compressed = compressed;
1394 ffd.lock = lock;
1395 ffd.useThreads = usethreads;
1396 ffd.type = type;
1397 ffd.linelen = linelen;
1398 ffd.failed = FALSE;
1399 ffd.nt = NULL;
1400 ffd.aa = NULL;
1401 ffd.ql = NULL;
1402 ffd.fr = NULL;
1403 ffd.logfp = NULL;
1404
1405 if (! StringHasNoText (ntout)) {
1406 ffd.nt = FileOpen (ntout, "w");
1407 if (ffd.nt == NULL) {
1408 Message (MSG_FATAL, "Unable to open nucleotide output file");
1409 return 1;
1410 }
1411 }
1412
1413 if (! StringHasNoText (aaout)) {
1414 ffd.aa = FileOpen (aaout, "w");
1415 if (ffd.aa == NULL) {
1416 Message (MSG_FATAL, "Unable to open protein output file");
1417 return 1;
1418 }
1419 }
1420
1421 if (! StringHasNoText (qlout)) {
1422 ffd.ql = FileOpen (qlout, "w");
1423 if (ffd.ql == NULL) {
1424 Message (MSG_FATAL, "Unable to open quality score output file");
1425 return 1;
1426 }
1427 }
1428
1429 if (! StringHasNoText (frout)) {
1430 ffd.fr = FileOpen (frout, "w");
1431 if (ffd.fr == NULL) {
1432 Message (MSG_FATAL, "Unable to open far component cache output file");
1433 return 1;
1434 }
1435 ffd.lock = TRUE;
1436 }
1437
1438 if (! StringHasNoText (logfile)) {
1439 ffd.logfp = FileOpen (logfile, "w");
1440 if (ffd.logfp == NULL) {
1441 Message (MSG_FATAL, "Unable to open log file");
1442 return 1;
1443 }
1444 }
1445
1446 /* register fetch functions */
1447
1448 if (remote) {
1449 #ifdef INTERNAL_NCBI_ASN2FSA
1450 if (! PUBSEQBioseqFetchEnable ("asn2fsa", FALSE)) {
1451 Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
1452 return 1;
1453 }
1454 ffd.usePUBSEQ = TRUE;
1455 ffd.useThreads = FALSE;
1456 #else
1457 PubSeqFetchEnable ();
1458 #endif
1459 }
1460
1461 if (blast) {
1462 ptr = StringRChr (blastdb, DIRDELIMCHR);
1463 if (ptr != NULL) {
1464 *ptr = '\0';
1465 ptr++;
1466 TransientSetAppParam ("NCBI", "BLAST", "BLASTDB", blastdb);
1467 if (StringDoesHaveText (ptr)) {
1468 ReadDBBioseqFetchEnable ("asn2fsa", ptr, TRUE, FALSE);
1469 ffd.blastdbname = ptr;
1470 ffd.useBLAST = TRUE;
1471 } else {
1472 ReadDBBioseqFetchEnable ("asn2fsa", "nr", TRUE, FALSE);
1473 ffd.blastdbname = "nr";
1474 ffd.useBLAST = TRUE;
1475 }
1476 } else {
1477 ReadDBBioseqFetchEnable ("asn2fsa", blastdb, TRUE, FALSE);
1478 ffd.blastdbname = blastdb;
1479 ffd.useBLAST = TRUE;
1480 }
1481 }
1482
1483 if (fasta) {
1484 AltIndexedFastaLibFetchEnable (fastaidx);
1485 }
1486
1487 if (local) {
1488 LocalSeqFetchInit (FALSE);
1489 }
1490
1491 /* recurse through all files within source directory or subdirectories */
1492
1493 if (StringDoesHaveText (accn)) {
1494
1495 if (remote) {
1496 sep = SeqEntryFromAccnOrGi (accn);
1497 if (sep != NULL) {
1498 ProcessOneSeqEntry (sep, &ffd);
1499 SeqEntryFree (sep);
1500 }
1501 }
1502
1503 } else if (StringDoesHaveText (directory)) {
1504
1505 FileRecurse (directory, NULL, suffix, dorecurse, &ffd);
1506
1507 } else if (StringDoesHaveText (base)) {
1508
1509 ptr = StringRChr (base, '.');
1510 sfx[0] = '\0';
1511 if (ptr != NULL) {
1512 StringNCpy_0 (sfx, ptr, sizeof (sfx));
1513 *ptr = '\0';
1514 }
1515 ProcessOneRecord (directory, base, sfx, &ffd);
1516 }
1517
1518 if (ffd.nt != NULL) {
1519 FileClose (ffd.nt);
1520 }
1521 if (ffd.aa != NULL) {
1522 FileClose (ffd.aa);
1523 }
1524 if (ffd.ql != NULL) {
1525 FileClose (ffd.ql);
1526 }
1527 if (ffd.fr != NULL) {
1528 FileClose (ffd.fr);
1529 CreateFastaIndex (frout);
1530 }
1531
1532 stop_time = GetSecs ();
1533 run_time = stop_time - start_time;
1534
1535 if (ffd.logfp != NULL) {
1536 fprintf (ffd.logfp, "Finished in %ld seconds\n", (long) run_time);
1537 FileClose (ffd.logfp);
1538 }
1539
1540 /* close fetch functions */
1541
1542 if (local) {
1543 LocalSeqFetchDisable ();
1544 }
1545
1546 if (fasta) {
1547 AltIndexedFastaLibFetchDisable ();
1548 }
1549
1550 if (blast) {
1551 ReadDBBioseqFetchDisable ();
1552 }
1553
1554 if (remote) {
1555 #ifdef INTERNAL_NCBI_ASN2FSA
1556 PUBSEQBioseqFetchDisable ();
1557 #else
1558 PubSeqFetchDisable ();
1559 #endif
1560 }
1561
1562 if (ffd.failed) {
1563 return 1;
1564 }
1565
1566 return 0;
1567 }
1568
1569 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |