NCBI C Toolkit Cross Reference

C/demo/asndisc.c


  1 /*   asndisc.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asndisc.c
 27 *
 28 * Author:  Jonathan Kans, adapted from asnval.c by Colleen Bollin
 29 *
 30 * Version Creation Date:   1/23/07
 31 *
 32 * $Revision: 1.26 $
 33 *
 34 * File Description:
 35 *
 36 * Modifications:  
 37 * --------------------------------------------------------------------------
 38 * Date     Name        Description of modification
 39 * -------  ----------  -----------------------------------------------------
 40 *
 41 *
 42 * ==========================================================================
 43 */
 44 
 45 #include <ncbi.h>
 46 #include <objall.h>
 47 #include <objsset.h>
 48 #include <objsub.h>
 49 #include <objfdef.h>
 50 #include <seqport.h>
 51 #include <sequtil.h>
 52 #include <sqnutils.h>
 53 #include <subutil.h>
 54 #include <gather.h>
 55 #include <explore.h>
 56 #include <lsqfetch.h>
 57 #include <valid.h>
 58 #include <pmfapi.h>
 59 #ifdef INTERNAL_NCBI_ASNDISC
 60 #include <accpubseq.h>
 61 #include <tax3api.h>
 62 #endif
 63 
 64 #define ASNDISC_APP_VER "1.2"
 65 
 66 CharPtr ASNDISC_APPLICATION = ASNDISC_APP_VER;
 67 
 68 typedef struct drflags {
 69   Boolean  farFetchCDSproducts;
 70   Boolean  batch;
 71   Boolean  binary;
 72   Boolean  compressed;
 73   Boolean  lock;
 74   Boolean  useThreads;
 75   Boolean  usePUBSEQ;
 76   Int2     type;
 77   Int4     maxcount;
 78   CharPtr  outpath;
 79   CharPtr  output_suffix;
 80   CharPtr  output_dir;
 81   FILE     *outfp;
 82   Int4     numrecords;
 83   ValNodePtr            sep_list;
 84   ValNodePtr            bsplist;
 85 
 86   GlobalDiscrepReportPtr global_report;
 87 } DRFlagData, PNTR DRFlagPtr;
 88 
 89 #ifdef INTERNAL_NCBI_ASNDISC
 90 const PerformDiscrepancyTest taxlookup = CheckTaxNamesAgainstTaxDatabase;
 91 #else
 92 const PerformDiscrepancyTest taxlookup = NULL;
 93 #endif
 94 
 95 #ifdef INTERNAL_NCBI_ASNDISC
 96 static CharPtr dirsubfetchproc = "DirSubBioseqFetch";
 97 
 98 static CharPtr dirsubfetchcmd = NULL;
 99 
100 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
101 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
102 
103 {
104   Char     cmmd [256];
105   Pointer  dataptr;
106   FILE*    fp;
107   Char     path [PATH_MAX];
108 
109   if (datatype != NULL) {
110     *datatype = 0;
111   }
112   if (entityID != NULL) {
113     *entityID = 0;
114   }
115   if (StringHasNoText (accn)) return NULL;
116 
117   if (dirsubfetchcmd == NULL) {
118     if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
119         dirsubfetchcmd = StringSaveNoNull (cmmd);
120     }
121   }
122   if (dirsubfetchcmd == NULL) return NULL;
123 
124   TmpNam (path);
125 
126 #ifdef OS_UNIX
127   sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, accn, path);
128   system (cmmd);
129 #endif
130 #ifdef OS_MSWIN
131   sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, accn, path);
132   system (cmmd);
133 #endif
134 
135   fp = FileOpen (path, "r");
136   if (fp == NULL) {
137     FileRemove (path);
138     return NULL;
139   }
140   dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
141   FileClose (fp);
142   FileRemove (path);
143   return dataptr;
144 }
145 
146 
147 static Int2 LIBCALLBACK DirSubBioseqFetchFunc (Pointer data)
148 
149 {
150   BioseqPtr         bsp;
151   Char              cmmd [256];
152   Pointer           dataptr;
153   Uint2             datatype;
154   Uint2             entityID;
155   FILE*             fp;
156   OMProcControlPtr  ompcp;
157   ObjMgrProcPtr     ompp;
158   Char              path [PATH_MAX];
159   SeqEntryPtr       sep = NULL;
160   SeqIdPtr          sip;
161   TextSeqIdPtr      tsip;
162 
163   ompcp = (OMProcControlPtr) data;
164   if (ompcp == NULL) return OM_MSG_RET_ERROR;
165   ompp = ompcp->proc;
166   if (ompp == NULL) return OM_MSG_RET_ERROR;
167   sip = (SeqIdPtr) ompcp->input_data;
168   if (sip == NULL) return OM_MSG_RET_ERROR;
169 
170   if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
171   tsip = (TextSeqIdPtr) sip->data.ptrvalue;
172   if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
173 
174   if (dirsubfetchcmd == NULL) {
175     if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
176         dirsubfetchcmd = StringSaveNoNull (cmmd);
177     }
178   }
179   if (dirsubfetchcmd == NULL) return OM_MSG_RET_ERROR;
180 
181   TmpNam (path);
182 
183 #ifdef OS_UNIX
184   sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, tsip->accession, path);
185   system (cmmd);
186 #endif
187 #ifdef OS_MSWIN
188   sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, tsip->accession, path);
189   system (cmmd);
190 #endif
191 
192   fp = FileOpen (path, "r");
193   if (fp == NULL) {
194     FileRemove (path);
195     return OM_MSG_RET_ERROR;
196   }
197   dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
198   FileClose (fp);
199   FileRemove (path);
200 
201   if (dataptr == NULL) return OM_MSG_RET_OK;
202 
203   sep = GetTopSeqEntryForEntityID (entityID);
204   if (sep == NULL) return OM_MSG_RET_ERROR;
205   bsp = BioseqFindInSeqEntry (sip, sep);
206   ompcp->output_data = (Pointer) bsp;
207   ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
208   return OM_MSG_RET_DONE;
209 }
210 
211 static Boolean DirSubFetchEnable (void)
212 
213 {
214   ObjMgrProcLoad (OMPROC_FETCH, dirsubfetchproc, dirsubfetchproc,
215                   OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
216                   DirSubBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
217   return TRUE;
218 }
219 
220 static CharPtr smartfetchproc = "SmartBioseqFetch";
221 
222 static CharPtr smartfetchcmd = NULL;
223 
224 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
225 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
226 
227 {
228   Char     cmmd [256];
229   Pointer  dataptr;
230   FILE*    fp;
231   Char     path [PATH_MAX];
232 
233   if (datatype != NULL) {
234     *datatype = 0;
235   }
236   if (entityID != NULL) {
237     *entityID = 0;
238   }
239   if (StringHasNoText (accn)) return NULL;
240 
241   if (smartfetchcmd == NULL) {
242     if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
243         smartfetchcmd = StringSaveNoNull (cmmd);
244     }
245   }
246   if (smartfetchcmd == NULL) return NULL;
247 
248   TmpNam (path);
249 
250 #ifdef OS_UNIX
251   sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, accn, path);
252   system (cmmd);
253 #endif
254 #ifdef OS_MSWIN
255   sprintf (cmmd, "%s %s -o %s", smartfetchcmd, accn, path);
256   system (cmmd);
257 #endif
258 
259   fp = FileOpen (path, "r");
260   if (fp == NULL) {
261     FileRemove (path);
262     return NULL;
263   }
264   dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
265   FileClose (fp);
266   FileRemove (path);
267   return dataptr;
268 }
269 
270 
271 static Int2 LIBCALLBACK SmartBioseqFetchFunc (Pointer data)
272 
273 {
274   BioseqPtr         bsp;
275   Char              cmmd [256];
276   Pointer           dataptr;
277   Uint2             datatype;
278   Uint2             entityID;
279   FILE*             fp;
280   OMProcControlPtr  ompcp;
281   ObjMgrProcPtr     ompp;
282   Char              path [PATH_MAX];
283   SeqEntryPtr       sep = NULL;
284   SeqIdPtr          sip;
285   TextSeqIdPtr      tsip;
286 
287   ompcp = (OMProcControlPtr) data;
288   if (ompcp == NULL) return OM_MSG_RET_ERROR;
289   ompp = ompcp->proc;
290   if (ompp == NULL) return OM_MSG_RET_ERROR;
291   sip = (SeqIdPtr) ompcp->input_data;
292   if (sip == NULL) return OM_MSG_RET_ERROR;
293 
294   if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
295   tsip = (TextSeqIdPtr) sip->data.ptrvalue;
296   if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
297 
298   if (smartfetchcmd == NULL) {
299     if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
300         smartfetchcmd = StringSaveNoNull (cmmd);
301     }
302   }
303   if (smartfetchcmd == NULL) return OM_MSG_RET_ERROR;
304 
305   TmpNam (path);
306 
307 #ifdef OS_UNIX
308   sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, tsip->accession, path);
309   system (cmmd);
310 #endif
311 #ifdef OS_MSWIN
312   sprintf (cmmd, "%s %s -o %s", smartfetchcmd, tsip->accession, path);
313   system (cmmd);
314 #endif
315 
316   fp = FileOpen (path, "r");
317   if (fp == NULL) {
318     FileRemove (path);
319     return OM_MSG_RET_ERROR;
320   }
321   dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
322   FileClose (fp);
323   FileRemove (path);
324 
325   if (dataptr == NULL) return OM_MSG_RET_OK;
326 
327   sep = GetTopSeqEntryForEntityID (entityID);
328   if (sep == NULL) return OM_MSG_RET_ERROR;
329   bsp = BioseqFindInSeqEntry (sip, sep);
330   ompcp->output_data = (Pointer) bsp;
331   ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
332   return OM_MSG_RET_DONE;
333 }
334 
335 static Boolean SmartFetchEnable (void)
336 
337 {
338   ObjMgrProcLoad (OMPROC_FETCH, smartfetchproc, smartfetchproc,
339                   OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
340                   SmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
341   return TRUE;
342 }
343 
344 static CharPtr tpasmartfetchproc = "TPASmartBioseqFetch";
345 
346 static CharPtr tpasmartfetchcmd = NULL;
347 
348 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
349 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
350 
351 {
352   Char     cmmd [256];
353   Pointer  dataptr;
354   FILE*    fp;
355   Char     path [PATH_MAX];
356 
357   if (datatype != NULL) {
358     *datatype = 0;
359   }
360   if (entityID != NULL) {
361     *entityID = 0;
362   }
363   if (StringHasNoText (accn)) return NULL;
364 
365   if (tpasmartfetchcmd == NULL) {
366     if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
367         tpasmartfetchcmd = StringSaveNoNull (cmmd);
368     }
369   }
370   if (tpasmartfetchcmd == NULL) return NULL;
371 
372   TmpNam (path);
373 
374 #ifdef OS_UNIX
375   sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, accn, path);
376   system (cmmd);
377 #endif
378 #ifdef OS_MSWIN
379   sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, accn, path);
380   system (cmmd);
381 #endif
382 
383   fp = FileOpen (path, "r");
384   if (fp == NULL) {
385     FileRemove (path);
386     return NULL;
387   }
388   dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
389   FileClose (fp);
390   FileRemove (path);
391   return dataptr;
392 }
393 
394 
395 static Int2 LIBCALLBACK TPASmartBioseqFetchFunc (Pointer data)
396 
397 {
398   BioseqPtr         bsp;
399   Char              cmmd [256];
400   Pointer           dataptr;
401   Uint2             datatype;
402   Uint2             entityID;
403   FILE*             fp;
404   OMProcControlPtr  ompcp;
405   ObjMgrProcPtr     ompp;
406   Char              path [PATH_MAX];
407   SeqEntryPtr       sep = NULL;
408   SeqIdPtr          sip;
409   TextSeqIdPtr      tsip;
410 
411   ompcp = (OMProcControlPtr) data;
412   if (ompcp == NULL) return OM_MSG_RET_ERROR;
413   ompp = ompcp->proc;
414   if (ompp == NULL) return OM_MSG_RET_ERROR;
415   sip = (SeqIdPtr) ompcp->input_data;
416   if (sip == NULL) return OM_MSG_RET_ERROR;
417 
418   if (sip->choice != SEQID_TPG) return OM_MSG_RET_ERROR;
419   tsip = (TextSeqIdPtr) sip->data.ptrvalue;
420   if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
421 
422   if (tpasmartfetchcmd == NULL) {
423     if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
424         tpasmartfetchcmd = StringSaveNoNull (cmmd);
425     }
426   }
427   if (tpasmartfetchcmd == NULL) return OM_MSG_RET_ERROR;
428 
429   TmpNam (path);
430 
431 #ifdef OS_UNIX
432   sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, tsip->accession, path);
433   system (cmmd);
434 #endif
435 #ifdef OS_MSWIN
436   sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, tsip->accession, path);
437   system (cmmd);
438 #endif
439 
440   fp = FileOpen (path, "r");
441   if (fp == NULL) {
442     FileRemove (path);
443     return OM_MSG_RET_ERROR;
444   }
445   dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
446   FileClose (fp);
447   FileRemove (path);
448 
449   if (dataptr == NULL) return OM_MSG_RET_OK;
450 
451   sep = GetTopSeqEntryForEntityID (entityID);
452   if (sep == NULL) return OM_MSG_RET_ERROR;
453   bsp = BioseqFindInSeqEntry (sip, sep);
454   ompcp->output_data = (Pointer) bsp;
455   ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
456   return OM_MSG_RET_DONE;
457 }
458 
459 static Boolean TPASmartFetchEnable (void)
460 
461 {
462   ObjMgrProcLoad (OMPROC_FETCH, tpasmartfetchproc, tpasmartfetchproc,
463                   OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
464                   TPASmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
465   return TRUE;
466 }
467 #endif
468 
469 static ValNodePtr DoLockFarComponents (
470   SeqEntryPtr sep,
471   DRFlagPtr drfp
472 )
473 
474 {
475   ValNodePtr  rsult;
476 
477 #ifdef INTERNAL_NCBI_ASNDISC
478   if (drfp->useThreads) {
479     Message (MSG_POST, "Threads will not be used in this executable");
480     drfp->useThreads = FALSE;;
481   }
482 #endif
483 
484   if (NlmThreadsAvailable () && drfp->useThreads) {
485     rsult = AdvcLockFarComponents (sep, TRUE, drfp->farFetchCDSproducts, drfp->farFetchCDSproducts, NULL, TRUE);
486   } else if (drfp->useThreads) {
487     Message (MSG_POST, "Threads not available in this executable");
488     rsult = AdvcLockFarComponents (sep, TRUE, drfp->farFetchCDSproducts, drfp->farFetchCDSproducts, NULL, FALSE);
489   } else {
490     rsult = AdvcLockFarComponents (sep, TRUE, drfp->farFetchCDSproducts, drfp->farFetchCDSproducts, NULL, FALSE);
491   }
492 
493   return rsult;
494 }
495 
496 
497 static void ReleaseDiscrepancyReportSeqEntries (DRFlagPtr drfp)
498 {
499   ValNodePtr vnp;
500   SeqEntryPtr sep;
501   ObjMgrPtr   omp;
502 
503   if (drfp == NULL) {
504     return;
505   }
506 
507   for (vnp = drfp->sep_list; vnp != NULL; vnp = vnp->next) {
508     sep = vnp->data.ptrvalue;
509     SeqEntryFree (sep);
510     omp = ObjMgrGet ();
511     ObjMgrReapOne (omp);
512   }
513   SeqMgrClearBioseqIndex ();
514   ObjMgrFreeCache (0);
515   FreeSeqIdGiCache ();
516   SeqEntrySetScope (NULL);
517   drfp->sep_list = ValNodeFree (drfp->sep_list);
518   
519   drfp->bsplist = UnlockFarComponents (drfp->bsplist);
520 }
521 
522 
523 static void ProcessSeqEntryList (DRFlagPtr drfp, CharPtr filename)
524 {
525   ValNodePtr  discrepancy_list;
526   FILE        *ofp = NULL;
527   Char        path [PATH_MAX];
528   CharPtr     ptr;
529 
530   if (drfp == NULL || drfp->sep_list == NULL) return;
531 
532   if (StringDoesHaveText (drfp->output_dir)) {
533     if (StringLen (drfp->output_dir) > PATH_MAX) {
534       Message (MSG_ERROR, "Unable to generate output file - path name is too long");
535       return;
536     }
537     StringCpy (path, drfp->output_dir);
538 #ifdef OS_WINNT
539     ptr = StringRChr (filename, '\\');
540     if (path[StringLen(path) - 1] != '\\') {
541       StringCat (path, "\\");
542     }
543 #else
544     ptr = StringRChr (filename, '/');
545     if (path[StringLen(path) - 1] != '/') {
546       StringCat (path, "/");
547     }
548 #endif;
549     if (ptr == NULL) {
550       StringNCat (path, filename, PATH_MAX - StringLen(path) - 1);
551     } else {
552       StringNCat (path, ptr + 1, PATH_MAX - StringLen(path) - 1);
553     }
554   } else {
555     StringNCpy_0 (path, filename, sizeof (path));
556   }
557   ptr = StringRChr (path, '.');
558   if (ptr != NULL) {
559     *ptr = '\0';
560   }
561   if (StringDoesHaveText (drfp->output_suffix)) {
562     StringNCat (path, drfp->output_suffix, PATH_MAX - StringLen(path) - 1);
563     path[PATH_MAX - 1] = 0;
564   } else {
565     StringCat (path, ".dr");
566   }
567   ofp = FileOpen (path, "w");
568 
569   discrepancy_list = CollectDiscrepancies (drfp->global_report->test_config, drfp->sep_list, taxlookup);
570   WriteAsnDiscReport (discrepancy_list, ofp, drfp->global_report->output_config, TRUE);
571   discrepancy_list = FreeClickableList (discrepancy_list);
572 
573   FileClose (ofp);
574 }
575 
576 
577 static void ProcessSingleRecord (
578   CharPtr filename,
579   DRFlagPtr drfp
580 )
581 
582 {
583   AsnIoPtr       aip;
584   BioseqPtr      bsp;
585   ValNodePtr     bsplist_next = NULL;
586   BioseqSetPtr   bssp;
587   Char           path [PATH_MAX];
588   Pointer        dataptr = NULL;
589   Uint2          datatype, entityID = 0;
590   FILE           *fp;
591   SeqEntryPtr    sep;
592 
593   if (StringHasNoText (filename)) return;
594   if (drfp == NULL) return;
595 
596   if (drfp->type == 1) {
597     fp = FileOpen (filename, "r");
598     if (fp == NULL) {
599       Message (MSG_POSTERR, "Failed to open '%s'", path);
600       return;
601     }
602 
603     dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);
604 
605     FileClose (fp);
606 
607     entityID = ObjMgrRegister (datatype, dataptr);
608 
609   } else if (drfp->type >= 2 && drfp->type <= 5) {
610     aip = AsnIoOpen (filename, drfp->binary? "rb" : "r");
611     if (aip == NULL) {
612       Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", filename);
613       return;
614     }
615 
616     SeqMgrHoldIndexing (TRUE);
617     switch (drfp->type) {
618       case 2 :
619         dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
620         datatype = OBJ_SEQENTRY;
621         break;
622       case 3 :
623         dataptr = (Pointer) BioseqAsnRead (aip, NULL);
624         datatype = OBJ_BIOSEQ;
625         break;
626       case 4 :
627         dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
628         datatype = OBJ_BIOSEQSET;
629         break;
630       case 5 :
631         dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
632         datatype = OBJ_SEQSUB;
633         break;
634       default :
635         break;
636     }
637     SeqMgrHoldIndexing (FALSE);
638 
639     AsnIoClose (aip);
640 
641     entityID = ObjMgrRegister (datatype, dataptr);
642 
643   } else {
644     Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) drfp->type);
645     return;
646   }
647 
648   if (entityID < 1 || dataptr == NULL) {
649     Message (MSG_POSTERR, "Data read failed for input file '%s'", filename);
650     return;
651   }
652 
653   if (SeqMgrFeaturesAreIndexed(entityID) == 0) {
654     SeqMgrIndexFeatures (entityID, NULL);
655   }
656 
657   if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
658         datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
659 
660     sep = GetTopSeqEntryForEntityID (entityID);
661 
662     if (sep == NULL) {
663       sep = SeqEntryNew ();
664       if (sep != NULL) {
665         if (datatype == OBJ_BIOSEQ) {
666           bsp = (BioseqPtr) dataptr;
667           sep->choice = 1;
668           sep->data.ptrvalue = bsp;
669           SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
670         } else if (datatype == OBJ_BIOSEQSET) {
671           bssp = (BioseqSetPtr) dataptr;
672           sep->choice = 2;
673           sep->data.ptrvalue = bssp;
674           SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
675         } else {
676           sep = SeqEntryFree (sep);
677         }
678       }
679       sep = GetTopSeqEntryForEntityID (entityID);
680     }
681 
682     if (sep != NULL) {
683       ValNodeAddPointer (&(drfp->sep_list), 0, sep);
684 
685       if (drfp->lock) {
686         bsplist_next = DoLockFarComponents (sep, drfp);
687         ValNodeLink (&(drfp->bsplist), bsplist_next);
688       }
689     }
690   } else {
691     Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
692   }
693 
694   SeqEntrySetScope (NULL);
695 }
696 
697 static void ProcessMultipleRecord (
698   CharPtr filename,
699   DRFlagPtr drfp
700 )
701 
702 {
703   AsnIoPtr        aip;
704   AsnModulePtr    amp;
705   AsnTypePtr      atp, atp_bss, atp_desc, atp_sbp, atp_se, atp_ssp;
706   ValNodePtr      bsplist_next;
707   Int2            maxcount = 0;
708   CitSubPtr       csp = NULL;
709   FILE            *fp, *ofp = NULL;
710   Int4            numrecords = 0;
711   SeqEntryPtr     sep;
712   ObjValNode      ovn;
713   Pubdesc         pd;
714   SubmitBlockPtr  sbp = NULL;
715   SeqDescrPtr     subcit = NULL;
716   ValNode         vn;
717 #ifdef OS_UNIX
718   Char            cmmd [256];
719   Boolean         detailed_report = FALSE;
720   CharPtr         gzcatprog;
721   Boolean         memory_usage = FALSE;
722   int             ret;
723   Boolean         usedPopen = FALSE;
724 #endif
725 
726   if (StringHasNoText (filename)) return;
727   if (drfp == NULL) return;
728 
729 #ifndef OS_UNIX
730   if (drfp->compressed) {
731     Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
732     return;
733   }
734 #endif
735 
736   amp = AsnAllModPtr ();
737   if (amp == NULL) {
738     Message (MSG_POSTERR, "Unable to load AsnAllModPtr");
739     return;
740   }
741 
742   atp_ssp = AsnFind ("Seq-submit");
743   if (atp_ssp == NULL) {
744     Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit");
745     return;
746   }
747 
748   atp_sbp = AsnFind ("Seq-submit.sub");
749   if (atp_sbp == NULL) {
750     Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit.sub");
751     return;
752   }
753 
754   atp_bss = AsnFind ("Bioseq-set");
755   if (atp_bss == NULL) {
756     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set");
757     return;
758   }
759 
760   atp_desc = AsnFind ("Bioseq-set.descr");
761   if (atp_desc == NULL) {
762     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr");
763     return;
764   }
765 
766   atp_se = AsnFind ("Bioseq-set.seq-set.E");
767   if (atp_se == NULL) {
768     Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
769     return;
770   }
771 
772 #ifdef OS_UNIX
773   if (getenv ("ASNVAL_LOG_OBJMGR_REPORT") != NULL) {
774     detailed_report = TRUE;
775   }
776   if (getenv ("ASNVAL_LOG_MEMORY_REPORT") != NULL) {
777     memory_usage = TRUE;
778   }
779 
780   if (drfp->compressed) {
781     gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
782     if (gzcatprog != NULL) {
783       sprintf (cmmd, "%s %s", gzcatprog, filename);
784     } else {
785       ret = system ("gzcat -h >/dev/null 2>&1");
786       if (ret == 0) {
787         sprintf (cmmd, "gzcat %s", filename);
788       } else if (ret == -1) {
789         Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
790         return;
791       } else {
792         ret = system ("zcat -h >/dev/null 2>&1");
793         if (ret == 0) {
794           sprintf (cmmd, "zcat %s", filename);
795         } else if (ret == -1) {
796           Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
797           return;
798         } else {
799           Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
800           return;
801         }
802       }
803     }
804     fp = popen (cmmd, /* drfp->binary? "rb" : */ "r");
805     usedPopen = TRUE;
806   } else {
807     fp = FileOpen (filename, drfp->binary? "rb" : "r");
808   }
809 #else
810   fp = FileOpen (filename, drfp->binary? "rb" : "r");
811 #endif
812   if (fp == NULL) {
813     Message (MSG_POSTERR, "FileOpen failed for input file '%s'", filename);
814     return;
815   }
816 
817   aip = AsnIoNew (drfp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
818   if (aip == NULL) {
819     Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", filename);
820     return;
821   }
822 
823   if (drfp->type == 4) {
824     atp = atp_bss;
825   } else if (drfp->type == 5) {
826     atp = atp_ssp;
827   } else {
828     Message (MSG_ERROR, "Batch processing type not set properly");
829     return;
830   }
831 
832   while ((atp = AsnReadId (aip, amp, atp)) != NULL && maxcount < drfp->maxcount) {
833     if (atp == atp_se) {
834 
835       SeqMgrHoldIndexing (TRUE);
836       sep = SeqEntryAsnRead (aip, atp);
837       SeqMgrHoldIndexing (FALSE);
838 
839       ValNodeAddPointer (&(drfp->sep_list), 0, sep);
840 
841       if (drfp->lock) {
842         bsplist_next = DoLockFarComponents (sep, drfp);
843         ValNodeLink (&(drfp->bsplist), bsplist_next);
844       }
845 
846       numrecords++;
847       maxcount++;
848     } else if (atp == atp_sbp) {
849       sbp = SubmitBlockAsnRead (aip, atp);
850       if (sbp != NULL) {
851         csp = sbp->cit;
852         if (csp != NULL) {
853           MemSet ((Pointer) &ovn, 0, sizeof (ObjValNode));
854           MemSet ((Pointer) &pd, 0, sizeof (Pubdesc));
855           MemSet ((Pointer) &vn, 0, sizeof (ValNode));
856           vn.choice = PUB_Sub;
857           vn.data.ptrvalue = (Pointer) csp;
858           vn.next = NULL;
859           pd.pub = &vn;
860           ovn.vn.choice = Seq_descr_pub;
861           ovn.vn.data.ptrvalue = (Pointer) &pd;
862           ovn.vn.next = NULL;
863           ovn.vn.extended = 1;
864           subcit = (SeqDescrPtr) &ovn;
865         }
866       }
867     } else {
868       AsnReadVal (aip, atp, NULL);
869     }
870   }
871 
872 
873 
874   AsnIoFree (aip, FALSE);
875 
876 #ifdef OS_UNIX
877   if (usedPopen) {
878     pclose (fp);
879   } else {
880     FileClose (fp);
881   }
882 #else
883   FileClose (fp);
884 #endif
885 
886 }
887 
888 
889 static void ProcessSeqEntryListWithCollation (GlobalDiscrepReportPtr g, ValNodePtr sep_list, CharPtr filename)
890 {
891   ValNodePtr  vnp;
892   SeqEntryPtr sep;
893 
894   if (g == NULL || sep_list == NULL) return;
895 
896   for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
897     sep = vnp->data.ptrvalue;
898     AddSeqEntryToGlobalDiscrepReport (sep, g, filename);
899   }
900 
901 }
902 
903 
904 static void ProcessOneRecord (CharPtr filename, Pointer userdata)
905 {
906   DRFlagPtr  drfp;
907 
908   drfp = (DRFlagPtr) userdata;
909   if (drfp == NULL) return;
910 
911   if (drfp->batch) {
912     ProcessMultipleRecord (filename, drfp);
913   } else {
914     ProcessSingleRecord (filename, drfp);
915   }
916 
917   if (drfp->outfp == NULL) {
918     ProcessSeqEntryList (drfp, filename);
919   } else {
920     ProcessSeqEntryListWithCollation (drfp->global_report, drfp->sep_list, filename);
921   }
922   ReleaseDiscrepancyReportSeqEntries (drfp);
923 }
924 
925 
926 /* Args structure contains command-line arguments */
927 
928 typedef enum {
929   p_argInputPath = 0,
930   i_argInputFile,
931   o_argOutputFile,
932   x_argSuffix,
933   u_argRecurse,
934   f_argUseFT,
935   e_argEnableTests,
936   d_argDisableTests,
937   s_argOutputSuffix,
938   r_argOutputDir,
939   Z_argRemoteCDS,
940   a_argType,
941   b_argBinary,
942   c_argCompressed,
943   R_argRemote,
944   k_argLocalFetch,
945   I_argAsnIdx,
946   l_argLockFar,
947   T_argThreads,
948   X_argExpandCategories,
949   S_argSummaryReport,
950   B_argBigSequenceReport,
951   C_argMaxCount
952 } DRFlagNum;
953 
954 Args myargs [] = {
955   {"Path to ASN.1 Files", NULL, NULL, NULL,
956     TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
957   {"Single Input File", "stdin", NULL, NULL,
958     TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
959   {"Single Output File", NULL, NULL, NULL,
960     TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
961   {"File Selection Substring", ".sqn", NULL, NULL,
962     TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
963   {"Recurse", "F", NULL, NULL,
964     TRUE, 'u', ARG_BOOLEAN, 0.0, 0, NULL},
965   {"Use Feature Table Output Format", "F", NULL, NULL,
966     FALSE, 'f', ARG_BOOLEAN, 0.0, 0, NULL},
967   {"Enable Tests (comma-delimited list of test names)\n\tMISSING_GENES\n\tEXTRA_GENES\n\tMISSING_LOCUS_TAGS\n\tDUPLICATE_LOCUS_TAGS\n\tBAD_LOCUS_TAG_FORMAT\n"
968    "\tINCONSISTENT_LOCUS_TAG_PREFIX\n\tNON_GENE_LOCUS_TAG\n\tMISSING_PROTEIN_ID\n\tINCONSISTENT_PROTEIN_ID\n"
969    "\tFEATURE_LOCATION_CONFLICT\n\tGENE_PRODUCT_CONFLICT\n\tDUPLICATE_GENE_LOCUS\n\tEC_NUMBER_NOTE\n\tPSEUDO_MISMATCH\n"
970    "\tJOINED_FEATURES\n\tOVERLAPPING_GENES\n\tOVERLAPPING_CDS\n\tSHORT_CONTIG\n\tINCONSISTENT_BIOSOURCE\n\tSUSPECT_PRODUCT_NAMES\n"
971    "\tINCONSISTENT_SOURCE_DEFLINE\n\tPARTIAL_CDS_COMPLETE_SEQUENCE\n\tEC_NUMBER_ON_UNKNOWN_PROTEIN\n\tTAX_LOOKUP_MISSING\n"
972    "\tTAX_LOOKUP_MISMATCH\n\tSHORT_SEQUENCES\n\tSUSPECT_PHRASES\n", "", NULL, NULL,
973     TRUE, 'e', ARG_STRING, 0.0, 0, NULL},
974   {"Disable Tests (comma-delimited list of test names)\n\tMISSING_GENES\n\tEXTRA_GENES\n\tMISSING_LOCUS_TAGS\n\tDUPLICATE_LOCUS_TAGS\n\tBAD_LOCUS_TAG_FORMAT\n"
975    "\tINCONSISTENT_LOCUS_TAG_PREFIX\n\tNON_GENE_LOCUS_TAG\n\tMISSING_PROTEIN_ID\n\tINCONSISTENT_PROTEIN_ID\n"
976    "\tFEATURE_LOCATION_CONFLICT\n\tGENE_PRODUCT_CONFLICT\n\tDUPLICATE_GENE_LOCUS\n\tEC_NUMBER_NOTE\n\tPSEUDO_MISMATCH\n"
977    "\tJOINED_FEATURES\n\tOVERLAPPING_GENES\n\tOVERLAPPING_CDS\n\tSHORT_CONTIG\n\tINCONSISTENT_BIOSOURCE\n\tSUSPECT_PRODUCT_NAMES\n"
978    "\tINCONSISTENT_SOURCE_DEFLINE\n\tPARTIAL_CDS_COMPLETE_SEQUENCE\n\tEC_NUMBER_ON_UNKNOWN_PROTEIN\n\tTAX_LOOKUP_MISSING\n"
979    "\tTAX_LOOKUP_MISMATCH\n\tSHORT_SEQUENCES\n\tSUSPECT_PHRASES\n", "", NULL, NULL,
980     TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
981   {"Output File Suffix", ".dr", NULL, NULL,
982     TRUE, 's', ARG_STRING, 0.0, 0, NULL},
983   {"Output Directory", NULL, NULL, NULL,
984     TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
985   {"Remote CDS Product Fetch", "F", NULL, NULL,
986     TRUE, 'Z', ARG_BOOLEAN, 0.0, 0, NULL},
987   {"ASN.1 Type (a Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Bioseq-set, u Batch Seq-submit)", "a", NULL, NULL,
988     TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
989   {"Batch File is Binary", "F", NULL, NULL,
990     TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
991   {"Batch File is Compressed", "F", NULL, NULL,
992     TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
993   {"Remote Fetching from ID", "F", NULL, NULL,
994     TRUE, 'R', ARG_BOOLEAN, 0.0, 0, NULL},
995   {"Local Fetching", "F", NULL, NULL,
996     TRUE, 'k', ARG_BOOLEAN, 0.0, 0, NULL},
997   {"Path to Indexed Binary ASN.1 Data", NULL, NULL, NULL,
998     TRUE, 'I', ARG_STRING, 0.0, 0, NULL},
999   {"Lock Components in Advance", "F", NULL, NULL,
1000     TRUE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
1001   {"Use Threads", "F", NULL, NULL,
1002     TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
1003   {"Expand Report Categories (comma-delimited list of test names or ALL)\n\tALL\n\tMISSING_GENES\n\tEXTRA_GENES\n\tMISSING_LOCUS_TAGS\n\tDUPLICATE_LOCUS_TAGS\n\tBAD_LOCUS_TAG_FORMAT\n"
1004    "\tINCONSISTENT_LOCUS_TAG_PREFIX\n\tNON_GENE_LOCUS_TAG\n\tMISSING_PROTEIN_ID\n\tINCONSISTENT_PROTEIN_ID\n"
1005    "\tFEATURE_LOCATION_CONFLICT\n\tGENE_PRODUCT_CONFLICT\n\tDUPLICATE_GENE_LOCUS\n\tEC_NUMBER_NOTE\n\tPSEUDO_MISMATCH\n"
1006    "\tJOINED_FEATURES\n\tOVERLAPPING_GENES\n\tOVERLAPPING_CDS\n\tSHORT_CONTIG\n\tINCONSISTENT_BIOSOURCE\n\tSUSPECT_PRODUCT_NAMES\n"
1007    "\tINCONSISTENT_SOURCE_DEFLINE\n\tPARTIAL_CDS_COMPLETE_SEQUENCE\n\tEC_NUMBER_ON_UNKNOWN_PROTEIN\n\tTAX_LOOKUP_MISSING\n"
1008    "\tTAX_LOOKUP_MISMATCH\n\tSHORT_SEQUENCES\n\tSUSPECT_PHRASES\n", "", NULL, NULL,
1009     TRUE, 'X', ARG_STRING, 0.0, 0, NULL},
1010   {"Summary Report", "F", NULL, NULL,
1011     TRUE, 'S', ARG_BOOLEAN, 0.0, 0, NULL},
1012   {"Big Sequence Report", "F", NULL, NULL,
1013   TRUE, 'B', ARG_BOOLEAN, 0.0, 0, NULL},
1014   {"Max Count", "0", NULL, NULL,
1015     TRUE, 'C', ARG_INT, 0.0, 0, NULL},
1016 };
1017 
1018 
1019 static CharPtr GetTestNameList (CharPtr intro)
1020 {
1021   Int4 i, len;
1022   CharPtr text;
1023 
1024   len = StringLen (intro) + 1;
1025 
1026   for (i = 0; i < MAX_DISC_TYPE; i++)
1027   {
1028     len += StringLen (GetDiscrepancyTestSettingName (i)) + 2;
1029   }
1030 
1031   text = (CharPtr) MemNew (sizeof (Char) * len);
1032   StringCat (text, intro);
1033   for (i = 0; i < MAX_DISC_TYPE; i++) {
1034     StringCat (text, "\t");
1035     StringCat (text, GetDiscrepancyTestSettingName (i));
1036     StringCat (text, "\n");
1037   }
1038   return text;
1039 }
1040 
1041 
1042 Int2 Main (void)
1043 
1044 {
1045   Char         app [64];
1046   CharPtr      asnidx, directory, infile, outfile, str, suffix, output_dir;
1047   CharPtr      enabled_list, disabled_list, err_msg;
1048   Boolean      batch, binary, compressed, dorecurse,
1049                indexed, local, lock, remote, usethreads;
1050   Int2         type = 0;
1051   DRFlagData   dfd;
1052   Boolean      big_sequence_report;
1053 
1054   /* standard setup */
1055 
1056   ErrSetFatalLevel (SEV_MAX);
1057   ErrSetMessageLevel (SEV_MAX);
1058   ErrClearOptFlags (EO_SHOW_USERSTR);
1059   ErrSetLogfile ("stderr", ELOG_APPEND);
1060   ErrSetOpts (ERR_IGNORE, ERR_LOG_ON);
1061 
1062   UseLocalAsnloadDataAndErrMsg ();
1063   ErrPathReset ();
1064 
1065   if (! AllObjLoad ()) {
1066     Message (MSG_FATAL, "AllObjLoad failed");
1067     return 1;
1068   }
1069   if (! SubmitAsnLoad ()) {
1070     Message (MSG_FATAL, "SubmitAsnLoad failed");
1071     return 1;
1072   }
1073   if (! FeatDefSetLoad ()) {
1074     Message (MSG_FATAL, "FeatDefSetLoad failed");
1075     return 1;
1076   }
1077   if (! SeqCodeSetLoad ()) {
1078     Message (MSG_FATAL, "SeqCodeSetLoad failed");
1079     return 1;
1080   }
1081   if (! GeneticCodeTableLoad ()) {
1082     Message (MSG_FATAL, "GeneticCodeTableLoad failed");
1083     return 1;
1084   }
1085 
1086   /* set up help descriptions for enable and disable */
1087   myargs[e_argEnableTests].prompt = GetTestNameList("Enable Tests (comma-delimited list of test names)\n");
1088   myargs[d_argDisableTests].prompt = GetTestNameList("Disable Tests (comma-delimited list of test names)\n");
1089   myargs[X_argExpandCategories].prompt = GetTestNameList("Expand Report Categories (comma-delimited list of test names or ALL)\n");
1090   /* process command line arguments */
1091 
1092   sprintf (app, "asndisc %s", ASNDISC_APPLICATION);
1093   if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
1094     return 0;
1095   }
1096 
1097   /* additional setup modifications */
1098   MemSet (&dfd, 0, sizeof (DRFlagData));
1099 
1100   directory = (CharPtr) myargs [p_argInputPath].strvalue;
1101   suffix = (CharPtr) myargs [x_argSuffix].strvalue;
1102   dfd.output_suffix = (CharPtr) myargs [s_argOutputSuffix].strvalue;
1103   infile = (CharPtr) myargs [i_argInputFile].strvalue;
1104   outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
1105   output_dir = (CharPtr) myargs [r_argOutputDir].strvalue;
1106   if (StringDoesHaveText (outfile) && StringDoesHaveText (output_dir)) {
1107     Message (MSG_FATAL, "-o and -q are incompatible: specify the output file name with the full path.");
1108     return 1;
1109   }
1110   if (StringDoesHaveText (output_dir)) {
1111     dfd.output_dir = output_dir;
1112     if (! CreateDir (output_dir)) {
1113       Message (MSG_FATAL, "Unable to create output directory %s", output_dir);
1114     }
1115   }
1116 
1117   dorecurse = (Boolean) myargs [u_argRecurse].intvalue;
1118   remote = (Boolean ) myargs [R_argRemote].intvalue;
1119   local = (Boolean) myargs [k_argLocalFetch].intvalue;
1120 
1121   asnidx = (CharPtr) myargs [I_argAsnIdx].strvalue;
1122   indexed = (Boolean) StringDoesHaveText (asnidx);
1123   lock = (Boolean) myargs [l_argLockFar].intvalue;
1124   usethreads = (Boolean) myargs [T_argThreads].intvalue;
1125   dfd.farFetchCDSproducts = (Boolean) myargs [Z_argRemoteCDS].intvalue;
1126 
1127   /* set up Discrepancy Report Configuration */
1128   dfd.global_report = GlobalDiscrepReportNew ();
1129   dfd.global_report->test_config = DiscrepancyConfigNew();
1130   DisableTRNATests (dfd.global_report->test_config);
1131 
1132   ExpandDiscrepancyReportTestsFromString ((CharPtr) myargs [X_argExpandCategories].strvalue, TRUE, dfd.global_report->output_config);
1133   dfd.global_report->output_config->summary_report = (Boolean) myargs [S_argSummaryReport].intvalue;
1134 
1135   big_sequence_report = (Boolean) myargs [B_argBigSequenceReport].intvalue;
1136 
1137   enabled_list = (CharPtr) myargs [e_argEnableTests].strvalue;
1138   disabled_list = (CharPtr) myargs [d_argDisableTests].strvalue;
1139 
1140 
1141 #ifdef INTERNAL_NCBI_ASNDISC
1142   dfd.global_report->taxlookup = CheckTaxNamesAgainstTaxDatabase;
1143 #endif
1144   
1145   err_msg = NULL;
1146   if (StringDoesHaveText (enabled_list) && StringDoesHaveText (disabled_list)) {
1147     err_msg = StringSave ("Cannot specify both -e and -d.  Choose -e to enable only a few tests and disable the rest, choose -d to disable only a few tests and enable the rest.");
1148   } else if (StringDoesHaveText (disabled_list)) {
1149     if (big_sequence_report) {
1150       ConfigureForBigSequence (dfd.global_report->test_config);
1151     } else {
1152       ConfigureForGenomes (dfd.global_report->test_config);
1153     }
1154 
1155     /* now disable tests from string */
1156     err_msg = SetDiscrepancyReportTestsFromString (disabled_list, FALSE, dfd.global_report->test_config);
1157   } else if (StringDoesHaveText (enabled_list)) {
1158     if (big_sequence_report) {
1159       ConfigureForBigSequence (dfd.global_report->test_config);
1160     } else {
1161       ConfigureForGenomes (dfd.global_report->test_config);
1162     }
1163     /* now enable tests from string */
1164     err_msg = SetDiscrepancyReportTestsFromString (enabled_list, TRUE, dfd.global_report->test_config);
1165   } else {
1166     if (big_sequence_report) {
1167       ConfigureForBigSequence (dfd.global_report->test_config);
1168     } else {
1169       ConfigureForGenomes (dfd.global_report->test_config);
1170     }
1171   }
1172   if (err_msg != NULL) {
1173     Message (MSG_FATAL, err_msg);
1174     err_msg = MemFree (err_msg);
1175     return 1;
1176   }
1177 
1178   if ((Boolean) myargs[f_argUseFT].intvalue) {
1179     dfd.global_report->test_config->use_feature_table_format = TRUE;
1180     dfd.global_report->output_config->use_feature_table_format = TRUE;
1181   }
1182 
1183   dfd.maxcount = (Int4) myargs [C_argMaxCount].intvalue;
1184   if (dfd.maxcount < 1) {
1185     dfd.maxcount = INT4_MAX;
1186   }
1187 
1188   batch = FALSE;
1189   binary = (Boolean) myargs [b_argBinary].intvalue;
1190   compressed = (Boolean) myargs [c_argCompressed].intvalue;
1191 
1192   str = myargs [a_argType].strvalue;
1193   if (StringICmp (str, "a") == 0) {
1194     type = 1;
1195   } else if (StringICmp (str, "e") == 0) {
1196     type = 2;
1197   } else if (StringICmp (str, "b") == 0) {
1198     type = 3;
1199   } else if (StringICmp (str, "s") == 0) {
1200     type = 4;
1201   } else if (StringICmp (str, "m") == 0) {
1202     type = 5;
1203   } else if (StringICmp (str, "t") == 0) {
1204     type = 4;
1205     batch = TRUE;
1206   } else if (StringICmp (str, "u") == 0) {
1207     type = 5;
1208     batch = TRUE;
1209   } else {
1210     type = 1;
1211   }
1212 
1213   if ((binary || compressed) && (! batch)) {
1214     if (type == 1) {
1215       Message (MSG_FATAL, "-b or -c cannot be used without -t or -a");
1216       return 1;
1217     }
1218   }
1219 
1220   if (StringHasNoText (directory) && StringHasNoText (infile)) {
1221     Message (MSG_FATAL, "Input path or input file must be specified");
1222     return 1;
1223   }
1224 
1225   /* populate parameter structure */
1226 
1227   dfd.batch = batch;
1228   dfd.binary = binary;
1229   dfd.compressed = compressed;
1230   dfd.lock = lock;
1231   dfd.useThreads = usethreads;
1232   dfd.type = type;
1233   dfd.numrecords = 0;
1234 
1235   if (! StringHasNoText (outfile)) {
1236     dfd.outpath = outfile;
1237     dfd.outfp = FileOpen (outfile, "w");
1238     if (dfd.outfp == NULL) {
1239       Message (MSG_FATAL, "Unable to open single output file");
1240       return 1;
1241     }
1242   }
1243 
1244   /* register fetch functions */
1245 
1246   if (remote) {
1247 #ifdef INTERNAL_NCBI_ASNDISC
1248 
1249     if (! PUBSEQBioseqFetchEnable ("asnval", FALSE)) {
1250       Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
1251       return 1;
1252     }
1253     dfd.usePUBSEQ = TRUE;
1254     dfd.useThreads = FALSE;
1255 #else
1256     PubSeqFetchEnable ();
1257 #endif
1258   }
1259 
1260   if (local) {
1261     LocalSeqFetchInit (FALSE);
1262   }
1263 
1264   if (indexed) {
1265     AsnIndexedLibFetchEnable (asnidx, TRUE);
1266   }
1267 
1268   if (StringDoesHaveText (directory)) {
1269     DirExplore (directory, NULL, suffix, dorecurse, ProcessOneRecord, (Pointer) &dfd);
1270 
1271   } else if (StringDoesHaveText (infile)) {
1272 
1273     ProcessOneRecord (infile, (Pointer) &dfd);
1274   }
1275   if (dfd.outfp != NULL) {
1276     WriteGlobalDiscrepancyReport (dfd.global_report, dfd.outfp);
1277     FileClose (dfd.outfp);
1278     dfd.outfp = NULL;
1279   }
1280 
1281   dfd.global_report = GlobalDiscrepReportFree (dfd.global_report);
1282 
1283   /* close fetch functions */
1284 
1285   if (indexed) {
1286     AsnIndexedLibFetchDisable ();
1287   }
1288 
1289   if (local) {
1290     LocalSeqFetchDisable ();
1291   }
1292 
1293   if (remote) {
1294 #ifdef INTERNAL_NCBI_ASNDISC
1295     PUBSEQBioseqFetchDisable ();
1296 #else
1297     PubSeqFetchDisable ();
1298 #endif
1299     SeqMgrSetPreCache (NULL);
1300     SeqMgrSetSeqIdSetFunc (NULL);
1301   }
1302 
1303   TransTableFreeAll ();
1304 
1305   ECNumberFSAFreeAll ();
1306 
1307   return 0;
1308 }
1309 
1310 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.