NCBI C Toolkit Cross Reference

C/demo/asn2fsa.c


  1 /*   asn2fsa.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2fsa.c
 27 *
 28 * Author:  Jonathan Kans
 29 *
 30 * Version Creation Date:   3/4/04
 31 *
 32 * $Revision: 1.54 $
 33 *
 34 * File Description:
 35 *
 36 * Modifications:  
 37 * --------------------------------------------------------------------------
 38 * Date     Name        Description of modification
 39 * -------  ----------  -----------------------------------------------------
 40 *
 41 *
 42 * ==========================================================================
 43 */
 44 
 45 #include <ncbi.h>
 46 #include <objall.h>
 47 #include <objsset.h>
 48 #include <objsub.h>
 49 #include <objfdef.h>
 50 #include <seqport.h>
 51 #include <sequtil.h>
 52 #include <sqnutils.h>
 53 #include <subutil.h>
 54 #include <tofasta.h>
 55 #include <gather.h>
 56 #include <explore.h>
 57 #include <lsqfetch.h>
 58 #include <readdb.h>
 59 #include <pmfapi.h>
 60 #ifdef INTERNAL_NCBI_ASN2FSA
 61 #include <accpubseq.h>
 62 #endif
 63 
 64 #define ASN2FSA_APP_VER "3.6"
 65 
 66 CharPtr ASN2FSA_APPLICATION = ASN2FSA_APP_VER;
 67 
 68 static ValNodePtr  requested_uid_list = NULL;
 69 static TNlmMutex   requested_uid_mutex = NULL;
 70 
 71 static ValNodePtr  locked_bsp_list = NULL;
 72 static TNlmMutex   locked_bsp_mutex = NULL;
 73 
 74 static void AddUidToQueue (
 75   SeqIdPtr sip
 76 )
 77 
 78 {
 79   ValNodePtr  last = NULL, vnp;
 80   Int4        ret;
 81   Int4        uid;
 82 
 83   if (sip == NULL || sip->choice != SEQID_GI) return;
 84   uid = (Int4) sip->data.intvalue;
 85   if (uid < 1) return;
 86 
 87   ret = NlmMutexLockEx (&requested_uid_mutex);
 88   if (ret) {
 89     ErrPostEx (SEV_FATAL, 0, 0, "AddUidToQueue mutex failed [%ld]", (long) ret);
 90     return;
 91   }
 92 
 93   /* check against uids already in queue */
 94 
 95   last = NULL;
 96   for (vnp = requested_uid_list; vnp != NULL; vnp = vnp->next) {
 97     last = vnp;
 98     if ((Int4) vnp->data.intvalue == uid) break;
 99   }
100 
101   /* add uid to queue */
102 
103   if (vnp == NULL) {
104     if (last != NULL) {
105       vnp = ValNodeAddInt (&last, 0, uid);
106       last = vnp;
107     } else {
108       requested_uid_list = ValNodeAddInt (NULL, 0, uid);
109       last = requested_uid_list;
110     }
111   }
112 
113   NlmMutexUnlock (requested_uid_mutex);
114 }
115 
116 static Int4 RemoveUidFromQueue (
117   void
118 )
119 
120 {
121   Int4        ret, uid = 0;
122   ValNodePtr  vnp;
123 
124   ret = NlmMutexLockEx (&requested_uid_mutex);
125   if (ret) {
126     ErrPostEx (SEV_FATAL, 0, 0, "RemoveUidFromQueue mutex failed [%ld]", (long) ret);
127     return 0;
128   }
129 
130   /* extract next requested uid from queue */
131 
132   if (requested_uid_list != NULL) {
133     vnp = requested_uid_list;
134     requested_uid_list = vnp->next;
135     vnp->next = NULL;
136     uid = (Int4) vnp->data.intvalue;
137     ValNodeFree (vnp);
138   }
139 
140   NlmMutexUnlock (requested_uid_mutex);
141 
142   return uid;
143 }
144 
145 static void QueueFarSegments (SeqLocPtr slp)
146 
147 {
148   BioseqPtr   bsp;
149   SeqLocPtr   loc;
150   SeqIdPtr    sip;
151   ValNodePtr  vnp;
152 
153   if (slp == NULL) return;
154 
155   sip = SeqLocId (slp);
156   if (sip == NULL) {
157     loc = SeqLocFindNext (slp, NULL);
158     if (loc != NULL) {
159       sip = SeqLocId (loc);
160     }
161   }
162   if (sip == NULL) return;
163 
164   /* if packaged in record, no need to fetch it */
165 
166   if (BioseqFindCore (sip) != NULL) return;
167 
168   /* check against currently locked records */
169 
170   for (vnp = locked_bsp_list; vnp != NULL; vnp = vnp->next) {
171     bsp = (BioseqPtr) vnp->data.ptrvalue;
172     if (bsp == NULL) continue;
173     if (SeqIdIn (sip, bsp->id)) return;
174   }
175 
176   AddUidToQueue (sip);
177 }
178 
179 static void QueueFarBioseqs (BioseqPtr bsp, Pointer userdata)
180 
181 {
182   DeltaSeqPtr  dsp;
183   SeqLocPtr    slp = NULL;
184   ValNode      vn;
185 
186   if (bsp == NULL) return;
187 
188   if (bsp->repr == Seq_repr_seg) {
189     vn.choice = SEQLOC_MIX;
190     vn.extended = 0;
191     vn.data.ptrvalue = bsp->seq_ext;
192     vn.next = NULL;
193     while ((slp = SeqLocFindNext (&vn, slp)) != NULL) {
194       if (slp != NULL && slp->choice != SEQLOC_NULL) {
195         QueueFarSegments (slp);
196       }
197     }
198   } else if (bsp->repr == Seq_repr_delta) {
199     for (dsp = (DeltaSeqPtr) (bsp->seq_ext); dsp != NULL; dsp = dsp->next) {
200       if (dsp->choice == 1) {
201         slp = (SeqLocPtr) dsp->data.ptrvalue;
202         if (slp != NULL && slp->choice != SEQLOC_NULL) {
203           QueueFarSegments (slp);
204         }
205       }
206     }
207   }
208 }
209 
210 static void AddBspToList (
211   BioseqPtr bsp
212 )
213 
214 {
215   Int4        ret;
216   ValNodePtr  vnp;
217 
218   if (bsp == NULL) return;
219 
220   ret = NlmMutexLockEx (&locked_bsp_mutex);
221   if (ret) {
222     ErrPostEx (SEV_FATAL, 0, 0, "AddBspToList mutex failed [%ld]", (long) ret);
223     return;
224   }
225 
226   vnp = ValNodeAddPointer (&locked_bsp_list, 0, (Pointer) bsp);
227 
228   NlmMutexUnlock (locked_bsp_mutex);
229 }
230 
231 static ValNodePtr ExtractBspList (
232   void
233 )
234 
235 {
236   Int4        ret;
237   ValNodePtr  vnp;
238 
239   ret = NlmMutexLockEx (&locked_bsp_mutex);
240   if (ret) {
241     ErrPostEx (SEV_FATAL, 0, 0, "ExtractBspList mutex failed [%ld]", (long) ret);
242     return NULL;
243   }
244 
245   vnp = locked_bsp_list;
246   locked_bsp_list = NULL;
247 
248   NlmMutexUnlock (locked_bsp_mutex);
249 
250   return vnp;
251 }
252 
253 typedef struct fastaflags {
254   Boolean  master_style;
255   Boolean  expand_gaps;
256   Boolean  use_dashes;
257   Boolean  far_genomic_qual;
258   Boolean  qual_gap_is_zero;
259   Boolean  automatic;
260   Boolean  batch;
261   Boolean  binary;
262   Boolean  compressed;
263   Boolean  lock;
264   Boolean  useThreads;
265   Boolean  usePUBSEQ;
266   Boolean  useBLAST;
267   CharPtr  blastdbname;
268   Int2     type;
269   Int2     linelen;
270   Boolean  failed;
271   FILE     *nt;
272   FILE     *aa;
273   FILE     *ql;
274   FILE     *fr;
275   FILE     *logfp;
276 } FastaFlagData, PNTR FastaFlagPtr;
277 
278 static VoidPtr DoAsyncLookup (
279   VoidPtr arg
280 )
281 
282 {
283   BioseqPtr     bsp;
284   FastaFlagPtr  ffp;
285   Int4          uid;
286   ValNode       vn;
287 
288   ffp = (FastaFlagPtr) arg;
289   if (ffp == NULL) return NULL;
290 
291 #ifdef INTERNAL_NCBI_ASN2FSA
292   if (ffp->usePUBSEQ) {
293     PUBSEQInit ();
294   }
295 #endif
296   if (ffp->useBLAST) {
297     ReadDBBioseqFetchEnable ("asn2fsa", ffp->blastdbname, TRUE, FALSE);
298   }
299 
300   MemSet ((Pointer) &vn, 0, sizeof (ValNode));
301 
302   uid = RemoveUidFromQueue ();
303   while (uid > 0) {
304 
305     vn.choice = SEQID_GI;
306     vn.data.intvalue = uid;
307     vn.next = NULL;
308 
309     bsp = BioseqLockById (&vn);
310     if (bsp != NULL) {
311       AddBspToList (bsp);
312     }
313 
314     uid = RemoveUidFromQueue ();
315   }
316 
317   if (ffp->useBLAST) {
318     ReadDBBioseqFetchDisable ();
319   }
320 #ifdef INTERNAL_NCBI_ASN2FSA
321   if (ffp->usePUBSEQ) {
322     PUBSEQFini ();
323   }
324 #endif
325 
326   return NULL;
327 }
328 
329 #define NUM_ASYNC_LOOKUP_THREADS 5
330 
331 static void ProcessAsyncLookups (
332   FastaFlagPtr ffp
333 )
334 
335 {
336   Int2        i;
337   VoidPtr     status;
338   TNlmThread  thds [NUM_ASYNC_LOOKUP_THREADS];
339 
340   /* spawn several threads for individual BioseqLockById requests */
341 
342   for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
343     thds [i] = NlmThreadCreate (DoAsyncLookup, (Pointer) ffp);
344   }
345 
346   /* wait for all fetching threads to complete */
347 
348   for (i = 0; i < NUM_ASYNC_LOOKUP_THREADS; i++) {
349     NlmThreadJoin (thds [i], &status);
350   }
351 }
352 
353 static ValNodePtr AsyncLockFarComponents (
354   SeqEntryPtr sep,
355   FastaFlagPtr ffp
356 )
357 
358 {
359   BioseqPtr    bsp;
360   ValNodePtr   bsplist = NULL, sublist, vnp;
361   SeqEntryPtr  oldsep;
362 
363   if (sep == NULL || ffp == NULL) return NULL;
364   oldsep = SeqEntrySetScope (sep);
365 
366   /* add far uids to queue */
367 
368   VisitBioseqsInSep (sep, NULL, QueueFarBioseqs);
369 
370   /* fetching from uid list using several threads */
371 
372   ProcessAsyncLookups (ffp);
373 
374   sublist = ExtractBspList ();
375 
376   /* take list, look for seg or delta, recurse */
377 
378   while (sublist != NULL) {
379     for (vnp = sublist; vnp != NULL; vnp = vnp->next) {
380       bsp = (BioseqPtr) vnp->data.ptrvalue;
381       if (bsp == NULL) continue;
382       QueueFarBioseqs (bsp, NULL);
383     }
384 
385     ValNodeLink (&bsplist, sublist);
386     sublist = NULL;
387 
388     ProcessAsyncLookups (ffp);
389 
390     sublist = ExtractBspList ();
391   }
392 
393   SeqEntrySetScope (oldsep);
394   return bsplist;
395 }
396 
397 static ValNodePtr DoLockFarComponents (
398   SeqEntryPtr sep,
399   FastaFlagPtr ffp
400 )
401 
402 {
403   ValNodePtr  rsult;
404   time_t      start_time, stop_time;
405 
406   /*
407   if (NlmThreadsAvailable () && ffp->useThreads) {
408     return AsyncLockFarComponents (sep);
409   }
410 
411   return LockFarComponents (sep);
412   */
413 
414   start_time = GetSecs ();
415 
416   if (NlmThreadsAvailable () && ffp->useThreads) {
417     rsult = AsyncLockFarComponents (sep, ffp);
418   } else if (ffp->useThreads) {
419     Message (MSG_POST, "Threads not available in this executable");
420     rsult = LockFarComponents (sep);
421   } else {
422     rsult = LockFarComponents (sep);
423   }
424 
425   stop_time = GetSecs ();
426 
427   return rsult;
428 }
429 
430 static Boolean DeltaLitOnly (
431   BioseqPtr bsp
432 )
433 
434 {
435   ValNodePtr  vnp;
436 
437   if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
438   for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
439     if (vnp->choice == 1) return FALSE;
440   }
441   return TRUE;
442 }
443 
444 static Boolean SegHasParts (
445   BioseqPtr bsp
446 )
447 
448 {
449   BioseqSetPtr  bssp;
450   SeqEntryPtr   sep;
451 
452   if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE;
453   sep = bsp->seqentry;
454   if (sep == NULL) return FALSE;
455   sep = sep->next;
456   if (sep == NULL || (! IS_Bioseq_set (sep))) return FALSE;
457   bssp = (BioseqSetPtr) sep->data.ptrvalue;
458   if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts) return TRUE;
459   return FALSE;
460 }
461 
462 static void CacheFarComponents (
463   FastaFlagPtr ffp,
464   ValNodePtr bsplist
465 )
466 
467 {
468   BioseqPtr   bsp;
469   Uint2       entityID;
470   ValNodePtr  vnp;
471 
472   if (ffp == NULL || ffp->fr == NULL || bsplist == NULL) return;
473 
474   for (vnp = bsplist; vnp != NULL; vnp = vnp->next) {
475     bsp = (BioseqPtr) vnp->data.ptrvalue;
476     if (bsp == NULL) continue;
477 
478     /* cache raw and constructed, near segmented, and delta literal */
479 
480     switch (bsp->repr) {
481         case Seq_repr_raw :
482         case Seq_repr_const :
483           if (BioseqFastaStream (bsp, ffp->fr, 0, ffp->linelen, 0, 0, TRUE) < 0) {
484             ffp->failed = TRUE;
485           }
486           break;
487         case Seq_repr_seg :
488           entityID = ObjMgrGetEntityIDForPointer (bsp);
489           AssignIDsInEntity (entityID, 0, NULL);
490           if (SegHasParts (bsp)) {
491             if (BioseqFastaStream (bsp, ffp->fr, 0, ffp->linelen, 0, 0, TRUE) < 0) {
492               ffp->failed = TRUE;
493             }
494           }
495           break;
496         case Seq_repr_delta :
497           if (DeltaLitOnly (bsp)) {
498             if (BioseqFastaStream (bsp, ffp->fr, 0, ffp->linelen, 0, 0, TRUE) < 0) {
499               ffp->failed = TRUE;
500             }
501           }
502           break;
503         default :
504           break;
505     }
506   }
507 }
508 
509 static void PrintQualProc (
510   CharPtr buf,
511   Uint4 buflen,
512   Pointer userdata
513 )
514 
515 {
516   FILE  *fp;
517 
518   fp = (FILE*) userdata;
519   fprintf (fp, "%s", buf);
520 }
521 
522 static void PrintQualScores (
523   BioseqPtr bsp,
524   Pointer userdata
525 )
526 
527 {
528   FastaFlagPtr  ffp;
529 
530   ffp = (FastaFlagPtr) userdata;
531   if (bsp == NULL || ffp == NULL) return;
532   if (! ISA_na (bsp->mol)) return;
533 
534   if (ffp->far_genomic_qual) {
535     PrintQualityScoresForContig (bsp, ffp->qual_gap_is_zero, ffp->ql);
536   } else {
537     PrintQualityScoresToBuffer (bsp, ffp->qual_gap_is_zero, ffp->ql, PrintQualProc);
538   }
539 }
540 
541 static void ProcessSingleRecord (
542   CharPtr directory,
543   CharPtr base,
544   CharPtr suffix,
545   FastaFlagPtr ffp
546 )
547 
548 {
549   AsnIoPtr       aip;
550   BioseqPtr      bsp;
551   ValNodePtr     bsplist;
552   BioseqSetPtr   bssp;
553   Pointer        dataptr = NULL;
554   Uint2          datatype, entityID = 0;
555   Char           file [FILENAME_MAX], path [PATH_MAX];
556   StreamFlgType  flags = STREAM_CORRECT_INVAL;
557   FILE           *fp;
558   ObjMgrPtr      omp;
559   SeqEntryPtr    sep;
560 
561   if (ffp == NULL) return;
562 
563   if (base == NULL) {
564     base = "";
565   }
566   if (suffix == NULL) {
567     suffix = "";
568   }
569   StringNCpy_0 (path, directory, sizeof (path));
570   sprintf (file, "%s%s", base, suffix);
571   FileBuildPath (path, NULL, file);
572 
573   if (StringHasNoText (path)) return;
574 
575   if (ffp->type == 1) {
576     fp = FileOpen (path, "r");
577     if (fp == NULL) {
578       Message (MSG_POSTERR, "Failed to open '%s'", path);
579       return;
580     }
581 
582     dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE);
583 
584     FileClose (fp);
585 
586     entityID = ObjMgrRegister (datatype, dataptr);
587 
588   } else if (ffp->type >= 2 && ffp->type <= 5) {
589     aip = AsnIoOpen (path, ffp->binary? "rb" : "r");
590     if (aip == NULL) {
591       Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", path);
592       return;
593     }
594 
595     SeqMgrHoldIndexing (TRUE);
596     switch (ffp->type) {
597       case 2 :
598         dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
599         datatype = OBJ_SEQENTRY;
600         break;
601       case 3 :
602         dataptr = (Pointer) BioseqAsnRead (aip, NULL);
603         datatype = OBJ_BIOSEQ;
604         break;
605       case 4 :
606         dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
607         datatype = OBJ_BIOSEQSET;
608         break;
609       case 5 :
610         dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
611         datatype = OBJ_SEQSUB;
612         break;
613       default :
614         break;
615     }
616     SeqMgrHoldIndexing (FALSE);
617 
618     AsnIoClose (aip);
619 
620     entityID = ObjMgrRegister (datatype, dataptr);
621 
622   } else {
623     Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) ffp->type);
624     return;
625   }
626 
627   if (entityID < 1 || dataptr == NULL) {
628     Message (MSG_POSTERR, "Data read failed for input file '%s'", path);
629     return;
630   }
631 
632   if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
633         datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
634 
635     sep = GetTopSeqEntryForEntityID (entityID);
636 
637     if (sep == NULL) {
638       sep = SeqEntryNew ();
639       if (sep != NULL) {
640         if (datatype == OBJ_BIOSEQ) {
641           bsp = (BioseqPtr) dataptr;
642           sep->choice = 1;
643           sep->data.ptrvalue = bsp;
644           SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
645         } else if (datatype == OBJ_BIOSEQSET) {
646           bssp = (BioseqSetPtr) dataptr;
647           sep->choice = 2;
648           sep->data.ptrvalue = bssp;
649           SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
650         } else {
651           sep = SeqEntryFree (sep);
652         }
653       }
654       sep = GetTopSeqEntryForEntityID (entityID);
655     }
656 
657     if (sep != NULL) {
658       if (ffp->expand_gaps && ffp->use_dashes) {
659         flags |= EXPAND_GAPS_TO_DASHES;
660       } else if (ffp->expand_gaps) {
661         flags |= STREAM_EXPAND_GAPS;
662       } else if (ffp->use_dashes) {
663         flags |= GAP_TO_SINGLE_DASH;
664       }
665 
666       bsplist = NULL;
667       if (ffp->lock) {
668         bsplist = DoLockFarComponents (sep, ffp);
669         if (bsplist != NULL && ffp->fr != NULL) {
670           CacheFarComponents (ffp, bsplist);
671         }
672       }
673 
674       if (ffp->nt != NULL) {
675         if (SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0,
676                                  TRUE, FALSE, ffp->master_style) < 0) {
677           ffp->failed = TRUE;
678         }
679       }
680       if (ffp->aa != NULL) {
681         if (SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0,
682                                  FALSE, TRUE, ffp->master_style) < 0) {
683           ffp->failed = TRUE;
684         }
685       }
686       if (ffp->ql != NULL) {
687         VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
688       }
689 
690       bsplist = UnlockFarComponents (bsplist);
691 
692     }
693   } else {
694     Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
695   }
696 
697   ObjMgrFree (datatype, dataptr);
698 
699   omp = ObjMgrGet ();
700   ObjMgrReapOne (omp);
701   SeqMgrClearBioseqIndex ();
702   ObjMgrFreeCache (0);
703   FreeSeqIdGiCache ();
704 
705   SeqEntrySetScope (NULL);
706 }
707 
708 static void ProcessMultipleRecord (
709   CharPtr directory,
710   CharPtr base,
711   CharPtr suffix,
712   FastaFlagPtr ffp
713 )
714 
715 {
716   AsnIoPtr       aip;
717   AsnModulePtr   amp;
718   AsnTypePtr     atp, atp_bss, atp_desc, atp_se;
719   BioseqPtr      bsp;
720   ValNodePtr     bsplist;
721   Char           buf [64], file [FILENAME_MAX], path [PATH_MAX], longest [64];
722   StreamFlgType  flags = STREAM_CORRECT_INVAL;
723   FILE           *fp;
724   Int4           numrecords = 0;
725   SeqEntryPtr    fsep, sep;
726   ObjMgrPtr      omp;
727   time_t         starttime, stoptime, worsttime;
728 #ifdef OS_UNIX
729   Char           cmmd [256];
730   CharPtr        gzcatprog;
731   int            ret;
732   Boolean        usedPopen = FALSE;
733 #endif
734 
735   if (ffp == NULL) return;
736 
737   if (base == NULL) {
738     base = "";
739   }
740   if (suffix == NULL) {
741     suffix = "";
742   }
743   StringNCpy_0 (path, directory, sizeof (path));
744   sprintf (file, "%s%s", base, suffix);
745   FileBuildPath (path, NULL, file);
746 
747   if (StringHasNoText (path)) return;
748 
749 #ifndef OS_UNIX
750   if (ffp->compressed) {
751     Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
752     return;
753   }
754 #endif
755 
756   amp = AsnAllModPtr ();
757   if (amp == NULL) {
758     Message (MSG_POSTERR, "Unable to load AsnAllModPtr");
759     return;
760   }
761 
762   atp_bss = AsnFind ("Bioseq-set");
763   if (atp_bss == NULL) {
764     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set");
765     return;
766   }
767 
768   atp_desc = AsnFind ("Bioseq-set.descr");
769   if (atp_desc == NULL) {
770     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr");
771     return;
772   }
773 
774   atp_se = AsnFind ("Bioseq-set.seq-set.E");
775   if (atp_se == NULL) {
776     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
777     return;
778   }
779 
780 #ifdef OS_UNIX
781   if (ffp->compressed) {
782     gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
783     if (gzcatprog != NULL) {
784       sprintf (cmmd, "%s %s", gzcatprog, path);
785     } else {
786       ret = system ("gzcat -h >/dev/null 2>&1");
787       if (ret == 0) {
788         sprintf (cmmd, "gzcat %s", path);
789       } else if (ret == -1) {
790         Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
791         return;
792       } else {
793         ret = system ("zcat -h >/dev/null 2>&1");
794         if (ret == 0) {
795           sprintf (cmmd, "zcat %s", path);
796         } else if (ret == -1) {
797           Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
798           return;
799         } else {
800           Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
801           return;
802         }
803       }
804     }
805     fp = popen (cmmd, /* ffp->binary? "rb" : */ "r");
806     usedPopen = TRUE;
807   } else {
808     fp = FileOpen (path, ffp->binary? "rb" : "r");
809   }
810 #else
811   fp = FileOpen (path, ffp->binary? "rb" : "r");
812 #endif
813   if (fp == NULL) {
814     Message (MSG_POSTERR, "FileOpen failed for input file '%s'", path);
815     return;
816   }
817 
818   aip = AsnIoNew (ffp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
819   if (aip == NULL) {
820     Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", path);
821     return;
822   }
823 
824   atp = atp_bss;
825 
826   if (ffp->expand_gaps && ffp->use_dashes) {
827     flags |= EXPAND_GAPS_TO_DASHES;
828   } else if (ffp->expand_gaps) {
829     flags |= STREAM_EXPAND_GAPS;
830   } else if (ffp->use_dashes) {
831     flags |= GAP_TO_SINGLE_DASH;
832   }
833 
834   longest [0] = '\0';
835   worsttime = 0;
836 
837   while ((atp = AsnReadId (aip, amp, atp)) != NULL) {
838     if (atp == atp_se) {
839 
840       SeqMgrHoldIndexing (TRUE);
841       sep = SeqEntryAsnRead (aip, atp);
842       SeqMgrHoldIndexing (FALSE);
843 
844       starttime = GetSecs ();
845       buf [0] = '\0';
846 
847       if (ffp->logfp != NULL) {
848         fsep = FindNthBioseq (sep, 1);
849         if (fsep != NULL && fsep->choice == 1) {
850           bsp = (BioseqPtr) fsep->data.ptrvalue;
851           if (bsp != NULL) {
852             SeqIdWrite (bsp->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
853             fprintf (ffp->logfp, "%s\n", buf);
854             fflush (ffp->logfp);
855           }
856         }
857       }
858 
859       bsplist = NULL;
860       if (ffp->lock) {
861         bsplist = DoLockFarComponents (sep, ffp);
862         if (bsplist != NULL && ffp->fr != NULL) {
863           CacheFarComponents (ffp, bsplist);
864         }
865       }
866 
867       if (ffp->nt != NULL) {
868         SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0, TRUE, FALSE, ffp->master_style);
869       }
870       if (ffp->aa != NULL) {
871         SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0, FALSE, TRUE, ffp->master_style);
872       }
873       if (ffp->ql != NULL) {
874         VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
875       }
876 
877       bsplist = UnlockFarComponents (bsplist);
878 
879       stoptime = GetSecs ();
880       if (stoptime - starttime > worsttime && StringDoesHaveText (buf)) {
881         worsttime = stoptime - starttime;
882         StringCpy (longest, buf);
883       }
884       numrecords++;
885 
886       SeqEntryFree (sep);
887       omp = ObjMgrGet ();
888       ObjMgrReapOne (omp);
889       SeqMgrClearBioseqIndex ();
890       ObjMgrFreeCache (0);
891       FreeSeqIdGiCache ();
892 
893       SeqEntrySetScope (NULL);
894     } else {
895       AsnReadVal (aip, atp, NULL);
896     }
897   }
898 
899   AsnIoFree (aip, FALSE);
900 
901 #ifdef OS_UNIX
902   if (usedPopen) {
903     pclose (fp);
904   } else {
905     FileClose (fp);
906   }
907 #else
908   FileClose (fp);
909 #endif
910 
911   if (ffp->logfp != NULL && (! StringHasNoText (longest))) {
912     fprintf (ffp->logfp, "Longest processing time %ld seconds on %s\n",
913              (long) worsttime, longest);
914     fprintf (ffp->logfp, "Total number of records %ld\n", (long) numrecords);
915     fflush (ffp->logfp);
916   }
917 }
918 
919 static void FastaWrapper (
920   SeqEntryPtr sep,
921   Pointer userdata
922 )
923 
924 {
925   ValNodePtr     bsplist;
926   FastaFlagPtr   ffp;
927   StreamFlgType  flags = STREAM_CORRECT_INVAL;
928 
929   if (sep == NULL) return;
930   ffp = (FastaFlagPtr) userdata;
931   if (ffp == NULL) return;
932 
933 
934   if (ffp->expand_gaps && ffp->use_dashes) {
935     flags |= EXPAND_GAPS_TO_DASHES;
936   } else if (ffp->expand_gaps) {
937     flags |= STREAM_EXPAND_GAPS;
938   } else if (ffp->use_dashes) {
939     flags |= GAP_TO_SINGLE_DASH;
940   }
941 
942   bsplist = NULL;
943   if (ffp->lock) {
944     bsplist = DoLockFarComponents (sep, ffp);
945     if (bsplist != NULL && ffp->fr != NULL) {
946       CacheFarComponents (ffp, bsplist);
947     }
948   }
949 
950   if (ffp->nt != NULL) {
951     if (SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0,
952                              TRUE, FALSE, ffp->master_style) < 0) {
953       ffp->failed = TRUE;
954     }
955   }
956   if (ffp->aa != NULL) {
957     if (SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0,
958                              FALSE, TRUE, ffp->master_style) < 0) {
959       ffp->failed = TRUE;
960     }
961   }
962   if (ffp->ql != NULL) {
963     VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
964   }
965 
966   bsplist = UnlockFarComponents (bsplist);
967 }
968 
969 static void ProcessAutomaticRecord (
970   CharPtr directory,
971   CharPtr base,
972   CharPtr suffix,
973   FastaFlagPtr ffp
974 )
975 
976 {
977   Char  file [FILENAME_MAX], path [PATH_MAX];
978 
979   if (ffp == NULL) return;
980 
981   if (base == NULL) {
982     base = "";
983   }
984   if (suffix == NULL) {
985     suffix = "";
986   }
987   StringNCpy_0 (path, directory, sizeof (path));
988   sprintf (file, "%s%s", base, suffix);
989   FileBuildPath (path, NULL, file);
990 
991   if (StringHasNoText (path)) return;
992 
993   ReadSequenceAsnFile (path, ffp->binary, ffp->compressed, (Pointer) ffp, FastaWrapper);
994 }
995 
996 static void ProcessOneRecord (
997   CharPtr directory,
998   CharPtr base,
999   CharPtr suffix,
1000   FastaFlagPtr ffp
1001 )
1002 
1003 {
1004   if (ffp == NULL) return;
1005 
1006   if (ffp->automatic) {
1007     ProcessAutomaticRecord (directory, base, suffix, ffp);
1008   } else if (ffp->batch) {
1009     ProcessMultipleRecord (directory, base, suffix, ffp);
1010   } else {
1011     ProcessSingleRecord (directory, base, suffix, ffp);
1012   }
1013 }
1014 
1015 static void ProcessOneSeqEntry (
1016   SeqEntryPtr sep,
1017   FastaFlagPtr ffp
1018 )
1019 
1020 
1021 {
1022   ValNodePtr     bsplist;
1023   StreamFlgType  flags = STREAM_CORRECT_INVAL;
1024 
1025   if (sep == NULL || ffp == NULL) return;
1026 
1027   if (ffp->expand_gaps && ffp->use_dashes) {
1028     flags |= EXPAND_GAPS_TO_DASHES;
1029   } else if (ffp->expand_gaps) {
1030     flags |= STREAM_EXPAND_GAPS;
1031   } else if (ffp->use_dashes) {
1032     flags |= GAP_TO_SINGLE_DASH;
1033   }
1034 
1035   bsplist = NULL;
1036   if (ffp->lock) {
1037     bsplist = DoLockFarComponents (sep, ffp);
1038     if (bsplist != NULL && ffp->fr != NULL) {
1039       CacheFarComponents (ffp, bsplist);
1040     }
1041   }
1042 
1043   if (ffp->nt != NULL) {
1044     if (SeqEntryFastaStream (sep, ffp->nt, flags, ffp->linelen, 0, 0,
1045                              TRUE, FALSE, ffp->master_style) < 0) {
1046       ffp->failed = TRUE;
1047     }
1048   }
1049   if (ffp->aa != NULL) {
1050     if (SeqEntryFastaStream (sep, ffp->aa, flags, ffp->linelen, 0, 0,
1051                              FALSE, TRUE, ffp->master_style) < 0) {
1052       ffp->failed = TRUE;
1053     }
1054   }
1055   if (ffp->ql != NULL) {
1056     VisitBioseqsInSep (sep, (Pointer) ffp, PrintQualScores);
1057   }
1058 
1059   bsplist = UnlockFarComponents (bsplist);
1060 }
1061 
1062 static void FileRecurse (
1063   CharPtr directory,
1064   CharPtr subdir,
1065   CharPtr suffix,
1066   Boolean dorecurse,
1067   FastaFlagPtr ffp
1068 )
1069 
1070 {
1071   Char        path [PATH_MAX];
1072   CharPtr     ptr, str;
1073   ValNodePtr  head, vnp;
1074 
1075   /* get list of all files in source directory */
1076 
1077   head = DirCatalog (directory);
1078 
1079   for (vnp = head; vnp != NULL; vnp = vnp->next) {
1080     if (vnp->choice == 0) {
1081       if (StringHasNoText (subdir) || StringStr (directory, subdir) != NULL) {
1082         str = (CharPtr) vnp->data.ptrvalue;
1083         if (! StringHasNoText (str)) {
1084 
1085           /* does filename have desired substring? */
1086 
1087           ptr = StringStr (str, suffix);
1088           if (ptr != NULL) {
1089             *ptr = '\0';
1090 
1091             /* process file that has desired suffix (usually .ent) */
1092 
1093             ProcessOneRecord (directory, str, suffix, ffp);
1094           }
1095         }
1096       }
1097     } else if (vnp->choice == 1 && dorecurse) {
1098 
1099       /* recurse into subdirectory */
1100 
1101       StringNCpy_0 (path, directory, sizeof (path));
1102       str = (CharPtr) vnp->data.ptrvalue;
1103       FileBuildPath (path, str, NULL);
1104 
1105       FileRecurse (path, str, suffix, dorecurse, ffp);
1106     }
1107   }
1108 
1109   /* clean up file list */
1110 
1111   ValNodeFreeData (head);
1112 }
1113 
1114 static SeqEntryPtr SeqEntryFromAccnOrGi (
1115   CharPtr accn
1116 )
1117 
1118 {
1119   Boolean      alldigits;
1120   BioseqPtr    bsp;
1121   Char         ch;
1122   CharPtr      ptr;
1123   SeqEntryPtr  sep = NULL;
1124   SeqIdPtr     sip;
1125   Int4         uid = 0;
1126   long int     val;
1127   ValNode      vn;
1128 
1129   if (StringHasNoText (accn)) return NULL;
1130 
1131   TrimSpacesAroundString (accn);
1132 
1133   alldigits = TRUE;
1134   ptr = accn;
1135   ch = *ptr;
1136   while (ch != '\0') {
1137     if (! IS_DIGIT (ch)) {
1138       alldigits = FALSE;
1139     }
1140     ptr++;
1141     ch = *ptr;
1142   }
1143 
1144   if (alldigits) {
1145     if (sscanf (accn, "%ld", &val) == 1) {
1146       uid = (Int4) val;
1147     }
1148   } else {
1149     sip = SeqIdFromAccessionDotVersion (accn);
1150     if (sip != NULL) {
1151       uid = GetGIForSeqId (sip);
1152       SeqIdFree (sip);
1153     }
1154   }
1155 
1156   if (uid > 0) {
1157     sep = PubSeqSynchronousQuery (uid, 0, -1);
1158     if (sep != NULL) {
1159       MemSet ((Pointer) &vn, 0, sizeof (ValNode));
1160       vn.choice = SEQID_GI;
1161       vn.data.intvalue = uid;
1162       bsp = BioseqFind (&vn);
1163       if (bsp != NULL) {
1164         sep = SeqMgrGetSeqEntryForData ((Pointer) bsp);
1165       }
1166     }
1167   }
1168 
1169   return sep;
1170 }
1171 
1172 /* Args structure contains command-line arguments */
1173 
1174 #define p_argInputPath     0
1175 #define i_argInputFile     1
1176 #define o_argNtOutFile     2
1177 #define v_argAaOutFile     3
1178 #define q_argQlOutFile     4
1179 #define x_argSuffix        5
1180 #define u_argRecurse       6
1181 #define m_argMaster        7
1182 #define g_argExpandGaps    8
1183 #define D_argUseDashes     9
1184 #define s_argGenomicQual  10
1185 #define z_argZeroQualGap  11
1186 #define a_argType         12
1187 #define b_argBinary       13
1188 #define c_argCompressed   14
1189 #define r_argRemote       15
1190 #define f_argFastaIdx     16
1191 #define d_argBlastDB      17
1192 #define k_argLocalFetch   18
1193 #define l_argLockFar      19
1194 #define h_argFarOutFile   20
1195 #define e_argLineLength   21
1196 #define T_argThreads      22
1197 #define L_argLogFile      23
1198 #define A_argAccession    24
1199 
1200 Args myargs [] = {
1201   {"Path to ASN.1 Files", NULL, NULL, NULL,
1202     TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
1203   {"Single Input File", "stdin", NULL, NULL,
1204     TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
1205   {"Nucleotide Output File Name", NULL, NULL, NULL,
1206     TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
1207   {"Protein Output File Name", NULL, NULL, NULL,
1208     TRUE, 'v', ARG_FILE_OUT, 0.0, 0, NULL},
1209   {"Quality Score Output File Name", NULL, NULL, NULL,
1210     TRUE, 'q', ARG_FILE_OUT, 0.0, 0, NULL},
1211   {"File Selection Substring", ".ent", NULL, NULL,
1212     TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
1213   {"Recurse", "F", NULL, NULL,
1214     TRUE, 'u', ARG_BOOLEAN, 0.0, 0, NULL},
1215   {"Master Style for Near Segmented Sequences", "F", NULL, NULL,
1216     TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL},
1217   {"Expand Delta Gaps into Ns", "F", NULL, NULL,
1218     TRUE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},
1219   {"Use Dash for Gap", "F", NULL, NULL,
1220     TRUE, 'D', ARG_BOOLEAN, 0.0, 0, NULL},
1221   {"Far Genomic Contig for Quality Scores", "F", NULL, NULL,
1222     TRUE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
1223   {"Print Quality Score Gap as -1", "F", NULL, NULL,
1224     TRUE, 'z', ARG_BOOLEAN, 0.0, 0, NULL},
1225   {"ASN.1 Type (a Automatic, z Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Processing)", "a", NULL, NULL,
1226     TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
1227   {"Bioseq-set is Binary", "F", NULL, NULL,
1228     TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
1229   {"Bioseq-set is Compressed", "F", NULL, NULL,
1230     TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
1231   {"Remote Fetching from ID", "F", NULL, NULL,
1232     TRUE, 'r', ARG_BOOLEAN, 0.0, 0, NULL},
1233   {"Path to Indexed FASTA Data", NULL, NULL, NULL,
1234     TRUE, 'f', ARG_STRING, 0.0, 0, NULL},
1235   {"Path to ReadDB Database", NULL, NULL, NULL,
1236     TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
1237   {"Local Fetching", "F", NULL, NULL,
1238     TRUE, 'k', ARG_BOOLEAN, 0.0, 0, NULL},
1239   {"Lock Components in Advance", "F", NULL, NULL,
1240     TRUE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
1241   {"Far Component Cache Output File Name", NULL, NULL, NULL,
1242     TRUE, 'h', ARG_FILE_OUT, 0.0, 0, NULL},
1243   {"Line Length", "70", "10", "120",
1244     TRUE, 'e', ARG_INT, 0.0, 0, NULL},
1245   {"Use Threads", "F", NULL, NULL,
1246     TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
1247   {"Log File", NULL, NULL, NULL,
1248     TRUE, 'L', ARG_FILE_OUT, 0.0, 0, NULL},
1249   {"Accession to Fetch", NULL, NULL, NULL,
1250     TRUE, 'A', ARG_STRING, 0.0, 0, NULL},
1251 };
1252 
1253 Int2 Main (void)
1254 
1255 {
1256   Char           app [64], sfx [32];
1257   CharPtr        accn, base, blastdb, directory, fastaidx, ntout,
1258                  aaout, qlout, frout, logfile, ptr, str, suffix;
1259   Boolean        automatic, batch, binary, blast, compressed, dorecurse,
1260                  expandgaps, fargenomicqual, fasta, local, lock,
1261                  masterstyle, qualgapzero, remote, usedashes,
1262                  usethreads;
1263   FastaFlagData  ffd;
1264   Int2           linelen, type = 0;
1265   time_t         run_time, start_time, stop_time;
1266   SeqEntryPtr    sep;
1267 
1268   /* standard setup */
1269 
1270   ErrSetFatalLevel (SEV_MAX);
1271   ErrClearOptFlags (EO_SHOW_USERSTR);
1272   ErrSetLogfile ("stderr", ELOG_APPEND);
1273   UseLocalAsnloadDataAndErrMsg ();
1274   ErrPathReset ();
1275 
1276   if (! AllObjLoad ()) {
1277     Message (MSG_FATAL, "AllObjLoad failed");
1278     return 1;
1279   }
1280   if (! SubmitAsnLoad ()) {
1281     Message (MSG_FATAL, "SubmitAsnLoad failed");
1282     return 1;
1283   }
1284   if (! FeatDefSetLoad ()) {
1285     Message (MSG_FATAL, "FeatDefSetLoad failed");
1286     return 1;
1287   }
1288   if (! SeqCodeSetLoad ()) {
1289     Message (MSG_FATAL, "SeqCodeSetLoad failed");
1290     return 1;
1291   }
1292   if (! GeneticCodeTableLoad ()) {
1293     Message (MSG_FATAL, "GeneticCodeTableLoad failed");
1294     return 1;
1295   }
1296 
1297   /* process command line arguments */
1298 
1299   sprintf (app, "asn2fsa %s", ASN2FSA_APPLICATION);
1300   if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
1301     return 0;
1302   }
1303 
1304   /* additional setup modifications */
1305 
1306   MemSet ((Pointer) &ffd, 0, sizeof (FastaFlagData));
1307 
1308   directory = (CharPtr) myargs [p_argInputPath].strvalue;
1309   suffix = (CharPtr) myargs [x_argSuffix].strvalue;
1310   base = (CharPtr) myargs [i_argInputFile].strvalue;
1311   accn = (CharPtr) myargs [A_argAccession].strvalue;
1312   dorecurse = (Boolean) myargs [u_argRecurse].intvalue;
1313   remote = (Boolean ) myargs [r_argRemote].intvalue;
1314   fastaidx = (CharPtr) myargs [f_argFastaIdx].strvalue;
1315   fasta = (Boolean) StringDoesHaveText (fastaidx);
1316   blastdb = (CharPtr) myargs [d_argBlastDB].strvalue;
1317   blast = (Boolean) StringDoesHaveText (blastdb);
1318   local = (Boolean) myargs [k_argLocalFetch].intvalue;
1319   lock = (Boolean) myargs [l_argLockFar].intvalue;
1320   linelen = (Int2) myargs [e_argLineLength].intvalue;
1321   usethreads = (Boolean) myargs [T_argThreads].intvalue;
1322 
1323   expandgaps = (Boolean) myargs [g_argExpandGaps].intvalue;
1324   usedashes = (Boolean) myargs [D_argUseDashes].intvalue;
1325   masterstyle = (Boolean) myargs [m_argMaster].intvalue;
1326   fargenomicqual = (Boolean) myargs [s_argGenomicQual].intvalue;
1327   qualgapzero = (Boolean) myargs [z_argZeroQualGap].intvalue;
1328   automatic = FALSE;
1329   batch = FALSE;
1330   binary = (Boolean) myargs [b_argBinary].intvalue;
1331   compressed = (Boolean) myargs [c_argCompressed].intvalue;
1332 
1333   str = myargs [a_argType].strvalue;
1334   if (StringICmp (str, "a") == 0) {
1335     type = 1;
1336     automatic = TRUE;
1337   } else if (StringICmp (str, "z") == 0) {
1338     type = 1;
1339   } else if (StringICmp (str, "e") == 0) {
1340     type = 2;
1341   } else if (StringICmp (str, "b") == 0) {
1342     type = 3;
1343   } else if (StringICmp (str, "s") == 0) {
1344     type = 4;
1345   } else if (StringICmp (str, "m") == 0) {
1346     type = 5;
1347   } else if (StringICmp (str, "t") == 0) {
1348     type = 1;
1349     batch = TRUE;
1350   } else {
1351     type = 1;
1352   }
1353 
1354   if ((binary || compressed) && (! batch)) {
1355     if (type == 1) {
1356       Message (MSG_FATAL, "-b or -c cannot be used without -t or -a");
1357       return 1;
1358     }
1359   }
1360 
1361   if (StringHasNoText (directory) && StringHasNoText (base)) {
1362     Message (MSG_FATAL, "Input path or input file must be specified");
1363     return 1;
1364   }
1365 
1366   ntout = (CharPtr) myargs [o_argNtOutFile].strvalue;
1367   aaout = (CharPtr) myargs [v_argAaOutFile].strvalue;
1368   qlout = (CharPtr) myargs [q_argQlOutFile].strvalue;
1369   frout = (CharPtr) myargs [h_argFarOutFile].strvalue;
1370 
1371   logfile = (CharPtr) myargs [L_argLogFile].strvalue;
1372 
1373   /* default to stdout for nucleotide output if nothing specified */
1374 
1375   if (StringHasNoText (ntout) &&
1376       StringHasNoText (aaout) &&
1377       StringHasNoText (qlout)) {
1378     ntout = "stdout";
1379   }
1380 
1381   start_time = GetSecs ();
1382 
1383   /* populate parameter structure */
1384 
1385   ffd.expand_gaps = expandgaps;
1386   ffd.use_dashes = usedashes;
1387   ffd.master_style = masterstyle;
1388   ffd.far_genomic_qual = fargenomicqual;
1389   ffd.qual_gap_is_zero = (Boolean) (! qualgapzero);
1390   ffd.automatic = automatic;
1391   ffd.batch = batch;
1392   ffd.binary = binary;
1393   ffd.compressed = compressed;
1394   ffd.lock = lock;
1395   ffd.useThreads = usethreads;
1396   ffd.type = type;
1397   ffd.linelen = linelen;
1398   ffd.failed = FALSE;
1399   ffd.nt = NULL;
1400   ffd.aa = NULL;
1401   ffd.ql = NULL;
1402   ffd.fr = NULL;
1403   ffd.logfp = NULL;
1404 
1405   if (! StringHasNoText (ntout)) {
1406     ffd.nt = FileOpen (ntout, "w");
1407     if (ffd.nt == NULL) {
1408       Message (MSG_FATAL, "Unable to open nucleotide output file");
1409       return 1;
1410     }
1411   }
1412 
1413   if (! StringHasNoText (aaout)) {
1414     ffd.aa = FileOpen (aaout, "w");
1415     if (ffd.aa == NULL) {
1416       Message (MSG_FATAL, "Unable to open protein output file");
1417       return 1;
1418     }
1419   }
1420 
1421   if (! StringHasNoText (qlout)) {
1422     ffd.ql = FileOpen (qlout, "w");
1423     if (ffd.ql == NULL) {
1424       Message (MSG_FATAL, "Unable to open quality score output file");
1425       return 1;
1426     }
1427   }
1428 
1429   if (! StringHasNoText (frout)) {
1430     ffd.fr = FileOpen (frout, "w");
1431     if (ffd.fr == NULL) {
1432       Message (MSG_FATAL, "Unable to open far component cache output file");
1433       return 1;
1434     }
1435     ffd.lock = TRUE;
1436   }
1437 
1438   if (! StringHasNoText (logfile)) {
1439     ffd.logfp = FileOpen (logfile, "w");
1440     if (ffd.logfp == NULL) {
1441       Message (MSG_FATAL, "Unable to open log file");
1442       return 1;
1443     }
1444   }
1445 
1446   /* register fetch functions */
1447 
1448   if (remote) {
1449 #ifdef INTERNAL_NCBI_ASN2FSA
1450     if (! PUBSEQBioseqFetchEnable ("asn2fsa", FALSE)) {
1451       Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
1452       return 1;
1453     }
1454     ffd.usePUBSEQ = TRUE;
1455     ffd.useThreads = FALSE;
1456 #else
1457     PubSeqFetchEnable ();
1458 #endif
1459   }
1460 
1461   if (blast) {
1462     ptr = StringRChr (blastdb, DIRDELIMCHR);
1463     if (ptr != NULL) {
1464       *ptr = '\0';
1465       ptr++;
1466       TransientSetAppParam ("NCBI", "BLAST", "BLASTDB", blastdb);
1467       if (StringDoesHaveText (ptr)) {
1468         ReadDBBioseqFetchEnable ("asn2fsa", ptr, TRUE, FALSE);
1469         ffd.blastdbname = ptr;
1470         ffd.useBLAST = TRUE;
1471       } else {
1472         ReadDBBioseqFetchEnable ("asn2fsa", "nr", TRUE, FALSE);
1473         ffd.blastdbname = "nr";
1474         ffd.useBLAST = TRUE;
1475       }
1476     } else {
1477       ReadDBBioseqFetchEnable ("asn2fsa", blastdb, TRUE, FALSE);
1478       ffd.blastdbname = blastdb;
1479       ffd.useBLAST = TRUE;
1480     }
1481   }
1482 
1483   if (fasta) {
1484     AltIndexedFastaLibFetchEnable (fastaidx);
1485   }
1486 
1487   if (local) {
1488     LocalSeqFetchInit (FALSE);
1489   }
1490 
1491   /* recurse through all files within source directory or subdirectories */
1492 
1493   if (StringDoesHaveText (accn)) {
1494 
1495     if (remote) {
1496       sep = SeqEntryFromAccnOrGi (accn);
1497       if (sep != NULL) {
1498         ProcessOneSeqEntry (sep, &ffd);
1499         SeqEntryFree (sep);
1500       }
1501     }
1502 
1503   } else if (StringDoesHaveText (directory)) {
1504 
1505     FileRecurse (directory, NULL, suffix, dorecurse, &ffd);
1506 
1507   } else if (StringDoesHaveText (base)) {
1508 
1509     ptr = StringRChr (base, '.');
1510     sfx[0] = '\0';
1511     if (ptr != NULL) {
1512       StringNCpy_0 (sfx, ptr, sizeof (sfx));
1513       *ptr = '\0';
1514     }
1515     ProcessOneRecord (directory, base, sfx, &ffd);
1516   }
1517 
1518   if (ffd.nt != NULL) {
1519     FileClose (ffd.nt);
1520   }
1521   if (ffd.aa != NULL) {
1522     FileClose (ffd.aa);
1523   }
1524   if (ffd.ql != NULL) {
1525     FileClose (ffd.ql);
1526   }
1527   if (ffd.fr != NULL) {
1528     FileClose (ffd.fr);
1529     CreateFastaIndex (frout);
1530   }
1531 
1532   stop_time = GetSecs ();
1533   run_time = stop_time - start_time;
1534 
1535   if (ffd.logfp != NULL) {
1536     fprintf (ffd.logfp, "Finished in %ld seconds\n", (long) run_time);
1537     FileClose (ffd.logfp);
1538   }
1539 
1540   /* close fetch functions */
1541 
1542   if (local) {
1543     LocalSeqFetchDisable ();
1544   }
1545 
1546   if (fasta) {
1547     AltIndexedFastaLibFetchDisable ();
1548   }
1549 
1550   if (blast) {
1551     ReadDBBioseqFetchDisable ();
1552   }
1553 
1554   if (remote) {
1555 #ifdef INTERNAL_NCBI_ASN2FSA
1556     PUBSEQBioseqFetchDisable ();
1557 #else
1558     PubSeqFetchDisable ();
1559 #endif
1560   }
1561 
1562   if (ffd.failed) {
1563     return 1;
1564   }
1565 
1566   return 0;
1567 }
1568 
1569 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.