|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/asndisc.c |
source navigation diff markup identifier search freetext search file search |
1 /* asndisc.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asndisc.c
27 *
28 * Author: Jonathan Kans, adapted from asnval.c by Colleen Bollin
29 *
30 * Version Creation Date: 1/23/07
31 *
32 * $Revision: 1.26 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45 #include <ncbi.h>
46 #include <objall.h>
47 #include <objsset.h>
48 #include <objsub.h>
49 #include <objfdef.h>
50 #include <seqport.h>
51 #include <sequtil.h>
52 #include <sqnutils.h>
53 #include <subutil.h>
54 #include <gather.h>
55 #include <explore.h>
56 #include <lsqfetch.h>
57 #include <valid.h>
58 #include <pmfapi.h>
59 #ifdef INTERNAL_NCBI_ASNDISC
60 #include <accpubseq.h>
61 #include <tax3api.h>
62 #endif
63
64 #define ASNDISC_APP_VER "1.2"
65
66 CharPtr ASNDISC_APPLICATION = ASNDISC_APP_VER;
67
68 typedef struct drflags {
69 Boolean farFetchCDSproducts;
70 Boolean batch;
71 Boolean binary;
72 Boolean compressed;
73 Boolean lock;
74 Boolean useThreads;
75 Boolean usePUBSEQ;
76 Int2 type;
77 Int4 maxcount;
78 CharPtr outpath;
79 CharPtr output_suffix;
80 CharPtr output_dir;
81 FILE *outfp;
82 Int4 numrecords;
83 ValNodePtr sep_list;
84 ValNodePtr bsplist;
85
86 GlobalDiscrepReportPtr global_report;
87 } DRFlagData, PNTR DRFlagPtr;
88
89 #ifdef INTERNAL_NCBI_ASNDISC
90 const PerformDiscrepancyTest taxlookup = CheckTaxNamesAgainstTaxDatabase;
91 #else
92 const PerformDiscrepancyTest taxlookup = NULL;
93 #endif
94
95 #ifdef INTERNAL_NCBI_ASNDISC
96 static CharPtr dirsubfetchproc = "DirSubBioseqFetch";
97
98 static CharPtr dirsubfetchcmd = NULL;
99
100 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
101 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
102
103 {
104 Char cmmd [256];
105 Pointer dataptr;
106 FILE* fp;
107 Char path [PATH_MAX];
108
109 if (datatype != NULL) {
110 *datatype = 0;
111 }
112 if (entityID != NULL) {
113 *entityID = 0;
114 }
115 if (StringHasNoText (accn)) return NULL;
116
117 if (dirsubfetchcmd == NULL) {
118 if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
119 dirsubfetchcmd = StringSaveNoNull (cmmd);
120 }
121 }
122 if (dirsubfetchcmd == NULL) return NULL;
123
124 TmpNam (path);
125
126 #ifdef OS_UNIX
127 sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, accn, path);
128 system (cmmd);
129 #endif
130 #ifdef OS_MSWIN
131 sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, accn, path);
132 system (cmmd);
133 #endif
134
135 fp = FileOpen (path, "r");
136 if (fp == NULL) {
137 FileRemove (path);
138 return NULL;
139 }
140 dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
141 FileClose (fp);
142 FileRemove (path);
143 return dataptr;
144 }
145
146
147 static Int2 LIBCALLBACK DirSubBioseqFetchFunc (Pointer data)
148
149 {
150 BioseqPtr bsp;
151 Char cmmd [256];
152 Pointer dataptr;
153 Uint2 datatype;
154 Uint2 entityID;
155 FILE* fp;
156 OMProcControlPtr ompcp;
157 ObjMgrProcPtr ompp;
158 Char path [PATH_MAX];
159 SeqEntryPtr sep = NULL;
160 SeqIdPtr sip;
161 TextSeqIdPtr tsip;
162
163 ompcp = (OMProcControlPtr) data;
164 if (ompcp == NULL) return OM_MSG_RET_ERROR;
165 ompp = ompcp->proc;
166 if (ompp == NULL) return OM_MSG_RET_ERROR;
167 sip = (SeqIdPtr) ompcp->input_data;
168 if (sip == NULL) return OM_MSG_RET_ERROR;
169
170 if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
171 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
172 if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
173
174 if (dirsubfetchcmd == NULL) {
175 if (GetAppParam ("SEQUIN", "DIRSUB", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
176 dirsubfetchcmd = StringSaveNoNull (cmmd);
177 }
178 }
179 if (dirsubfetchcmd == NULL) return OM_MSG_RET_ERROR;
180
181 TmpNam (path);
182
183 #ifdef OS_UNIX
184 sprintf (cmmd, "csh %s %s > %s", dirsubfetchcmd, tsip->accession, path);
185 system (cmmd);
186 #endif
187 #ifdef OS_MSWIN
188 sprintf (cmmd, "%s %s -o %s", dirsubfetchcmd, tsip->accession, path);
189 system (cmmd);
190 #endif
191
192 fp = FileOpen (path, "r");
193 if (fp == NULL) {
194 FileRemove (path);
195 return OM_MSG_RET_ERROR;
196 }
197 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
198 FileClose (fp);
199 FileRemove (path);
200
201 if (dataptr == NULL) return OM_MSG_RET_OK;
202
203 sep = GetTopSeqEntryForEntityID (entityID);
204 if (sep == NULL) return OM_MSG_RET_ERROR;
205 bsp = BioseqFindInSeqEntry (sip, sep);
206 ompcp->output_data = (Pointer) bsp;
207 ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
208 return OM_MSG_RET_DONE;
209 }
210
211 static Boolean DirSubFetchEnable (void)
212
213 {
214 ObjMgrProcLoad (OMPROC_FETCH, dirsubfetchproc, dirsubfetchproc,
215 OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
216 DirSubBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
217 return TRUE;
218 }
219
220 static CharPtr smartfetchproc = "SmartBioseqFetch";
221
222 static CharPtr smartfetchcmd = NULL;
223
224 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
225 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
226
227 {
228 Char cmmd [256];
229 Pointer dataptr;
230 FILE* fp;
231 Char path [PATH_MAX];
232
233 if (datatype != NULL) {
234 *datatype = 0;
235 }
236 if (entityID != NULL) {
237 *entityID = 0;
238 }
239 if (StringHasNoText (accn)) return NULL;
240
241 if (smartfetchcmd == NULL) {
242 if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
243 smartfetchcmd = StringSaveNoNull (cmmd);
244 }
245 }
246 if (smartfetchcmd == NULL) return NULL;
247
248 TmpNam (path);
249
250 #ifdef OS_UNIX
251 sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, accn, path);
252 system (cmmd);
253 #endif
254 #ifdef OS_MSWIN
255 sprintf (cmmd, "%s %s -o %s", smartfetchcmd, accn, path);
256 system (cmmd);
257 #endif
258
259 fp = FileOpen (path, "r");
260 if (fp == NULL) {
261 FileRemove (path);
262 return NULL;
263 }
264 dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
265 FileClose (fp);
266 FileRemove (path);
267 return dataptr;
268 }
269
270
271 static Int2 LIBCALLBACK SmartBioseqFetchFunc (Pointer data)
272
273 {
274 BioseqPtr bsp;
275 Char cmmd [256];
276 Pointer dataptr;
277 Uint2 datatype;
278 Uint2 entityID;
279 FILE* fp;
280 OMProcControlPtr ompcp;
281 ObjMgrProcPtr ompp;
282 Char path [PATH_MAX];
283 SeqEntryPtr sep = NULL;
284 SeqIdPtr sip;
285 TextSeqIdPtr tsip;
286
287 ompcp = (OMProcControlPtr) data;
288 if (ompcp == NULL) return OM_MSG_RET_ERROR;
289 ompp = ompcp->proc;
290 if (ompp == NULL) return OM_MSG_RET_ERROR;
291 sip = (SeqIdPtr) ompcp->input_data;
292 if (sip == NULL) return OM_MSG_RET_ERROR;
293
294 if (sip->choice != SEQID_GENBANK) return OM_MSG_RET_ERROR;
295 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
296 if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
297
298 if (smartfetchcmd == NULL) {
299 if (GetAppParam ("SEQUIN", "SMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
300 smartfetchcmd = StringSaveNoNull (cmmd);
301 }
302 }
303 if (smartfetchcmd == NULL) return OM_MSG_RET_ERROR;
304
305 TmpNam (path);
306
307 #ifdef OS_UNIX
308 sprintf (cmmd, "csh %s %s > %s", smartfetchcmd, tsip->accession, path);
309 system (cmmd);
310 #endif
311 #ifdef OS_MSWIN
312 sprintf (cmmd, "%s %s -o %s", smartfetchcmd, tsip->accession, path);
313 system (cmmd);
314 #endif
315
316 fp = FileOpen (path, "r");
317 if (fp == NULL) {
318 FileRemove (path);
319 return OM_MSG_RET_ERROR;
320 }
321 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
322 FileClose (fp);
323 FileRemove (path);
324
325 if (dataptr == NULL) return OM_MSG_RET_OK;
326
327 sep = GetTopSeqEntryForEntityID (entityID);
328 if (sep == NULL) return OM_MSG_RET_ERROR;
329 bsp = BioseqFindInSeqEntry (sip, sep);
330 ompcp->output_data = (Pointer) bsp;
331 ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
332 return OM_MSG_RET_DONE;
333 }
334
335 static Boolean SmartFetchEnable (void)
336
337 {
338 ObjMgrProcLoad (OMPROC_FETCH, smartfetchproc, smartfetchproc,
339 OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
340 SmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
341 return TRUE;
342 }
343
344 static CharPtr tpasmartfetchproc = "TPASmartBioseqFetch";
345
346 static CharPtr tpasmartfetchcmd = NULL;
347
348 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
349 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID)
350
351 {
352 Char cmmd [256];
353 Pointer dataptr;
354 FILE* fp;
355 Char path [PATH_MAX];
356
357 if (datatype != NULL) {
358 *datatype = 0;
359 }
360 if (entityID != NULL) {
361 *entityID = 0;
362 }
363 if (StringHasNoText (accn)) return NULL;
364
365 if (tpasmartfetchcmd == NULL) {
366 if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
367 tpasmartfetchcmd = StringSaveNoNull (cmmd);
368 }
369 }
370 if (tpasmartfetchcmd == NULL) return NULL;
371
372 TmpNam (path);
373
374 #ifdef OS_UNIX
375 sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, accn, path);
376 system (cmmd);
377 #endif
378 #ifdef OS_MSWIN
379 sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, accn, path);
380 system (cmmd);
381 #endif
382
383 fp = FileOpen (path, "r");
384 if (fp == NULL) {
385 FileRemove (path);
386 return NULL;
387 }
388 dataptr = ReadAsnFastaOrFlatFile (fp, datatype, entityID, FALSE, FALSE, TRUE, FALSE);
389 FileClose (fp);
390 FileRemove (path);
391 return dataptr;
392 }
393
394
395 static Int2 LIBCALLBACK TPASmartBioseqFetchFunc (Pointer data)
396
397 {
398 BioseqPtr bsp;
399 Char cmmd [256];
400 Pointer dataptr;
401 Uint2 datatype;
402 Uint2 entityID;
403 FILE* fp;
404 OMProcControlPtr ompcp;
405 ObjMgrProcPtr ompp;
406 Char path [PATH_MAX];
407 SeqEntryPtr sep = NULL;
408 SeqIdPtr sip;
409 TextSeqIdPtr tsip;
410
411 ompcp = (OMProcControlPtr) data;
412 if (ompcp == NULL) return OM_MSG_RET_ERROR;
413 ompp = ompcp->proc;
414 if (ompp == NULL) return OM_MSG_RET_ERROR;
415 sip = (SeqIdPtr) ompcp->input_data;
416 if (sip == NULL) return OM_MSG_RET_ERROR;
417
418 if (sip->choice != SEQID_TPG) return OM_MSG_RET_ERROR;
419 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
420 if (tsip == NULL || StringHasNoText (tsip->accession)) return OM_MSG_RET_ERROR;
421
422 if (tpasmartfetchcmd == NULL) {
423 if (GetAppParam ("SEQUIN", "TPASMART", "FETCHSCRIPT", NULL, cmmd, sizeof (cmmd))) {
424 tpasmartfetchcmd = StringSaveNoNull (cmmd);
425 }
426 }
427 if (tpasmartfetchcmd == NULL) return OM_MSG_RET_ERROR;
428
429 TmpNam (path);
430
431 #ifdef OS_UNIX
432 sprintf (cmmd, "csh %s %s > %s", tpasmartfetchcmd, tsip->accession, path);
433 system (cmmd);
434 #endif
435 #ifdef OS_MSWIN
436 sprintf (cmmd, "%s %s -o %s", tpasmartfetchcmd, tsip->accession, path);
437 system (cmmd);
438 #endif
439
440 fp = FileOpen (path, "r");
441 if (fp == NULL) {
442 FileRemove (path);
443 return OM_MSG_RET_ERROR;
444 }
445 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
446 FileClose (fp);
447 FileRemove (path);
448
449 if (dataptr == NULL) return OM_MSG_RET_OK;
450
451 sep = GetTopSeqEntryForEntityID (entityID);
452 if (sep == NULL) return OM_MSG_RET_ERROR;
453 bsp = BioseqFindInSeqEntry (sip, sep);
454 ompcp->output_data = (Pointer) bsp;
455 ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
456 return OM_MSG_RET_DONE;
457 }
458
459 static Boolean TPASmartFetchEnable (void)
460
461 {
462 ObjMgrProcLoad (OMPROC_FETCH, tpasmartfetchproc, tpasmartfetchproc,
463 OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
464 TPASmartBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
465 return TRUE;
466 }
467 #endif
468
469 static ValNodePtr DoLockFarComponents (
470 SeqEntryPtr sep,
471 DRFlagPtr drfp
472 )
473
474 {
475 ValNodePtr rsult;
476
477 #ifdef INTERNAL_NCBI_ASNDISC
478 if (drfp->useThreads) {
479 Message (MSG_POST, "Threads will not be used in this executable");
480 drfp->useThreads = FALSE;;
481 }
482 #endif
483
484 if (NlmThreadsAvailable () && drfp->useThreads) {
485 rsult = AdvcLockFarComponents (sep, TRUE, drfp->farFetchCDSproducts, drfp->farFetchCDSproducts, NULL, TRUE);
486 } else if (drfp->useThreads) {
487 Message (MSG_POST, "Threads not available in this executable");
488 rsult = AdvcLockFarComponents (sep, TRUE, drfp->farFetchCDSproducts, drfp->farFetchCDSproducts, NULL, FALSE);
489 } else {
490 rsult = AdvcLockFarComponents (sep, TRUE, drfp->farFetchCDSproducts, drfp->farFetchCDSproducts, NULL, FALSE);
491 }
492
493 return rsult;
494 }
495
496
497 static void ReleaseDiscrepancyReportSeqEntries (DRFlagPtr drfp)
498 {
499 ValNodePtr vnp;
500 SeqEntryPtr sep;
501 ObjMgrPtr omp;
502
503 if (drfp == NULL) {
504 return;
505 }
506
507 for (vnp = drfp->sep_list; vnp != NULL; vnp = vnp->next) {
508 sep = vnp->data.ptrvalue;
509 SeqEntryFree (sep);
510 omp = ObjMgrGet ();
511 ObjMgrReapOne (omp);
512 }
513 SeqMgrClearBioseqIndex ();
514 ObjMgrFreeCache (0);
515 FreeSeqIdGiCache ();
516 SeqEntrySetScope (NULL);
517 drfp->sep_list = ValNodeFree (drfp->sep_list);
518
519 drfp->bsplist = UnlockFarComponents (drfp->bsplist);
520 }
521
522
523 static void ProcessSeqEntryList (DRFlagPtr drfp, CharPtr filename)
524 {
525 ValNodePtr discrepancy_list;
526 FILE *ofp = NULL;
527 Char path [PATH_MAX];
528 CharPtr ptr;
529
530 if (drfp == NULL || drfp->sep_list == NULL) return;
531
532 if (StringDoesHaveText (drfp->output_dir)) {
533 if (StringLen (drfp->output_dir) > PATH_MAX) {
534 Message (MSG_ERROR, "Unable to generate output file - path name is too long");
535 return;
536 }
537 StringCpy (path, drfp->output_dir);
538 #ifdef OS_WINNT
539 ptr = StringRChr (filename, '\\');
540 if (path[StringLen(path) - 1] != '\\') {
541 StringCat (path, "\\");
542 }
543 #else
544 ptr = StringRChr (filename, '/');
545 if (path[StringLen(path) - 1] != '/') {
546 StringCat (path, "/");
547 }
548 #endif;
549 if (ptr == NULL) {
550 StringNCat (path, filename, PATH_MAX - StringLen(path) - 1);
551 } else {
552 StringNCat (path, ptr + 1, PATH_MAX - StringLen(path) - 1);
553 }
554 } else {
555 StringNCpy_0 (path, filename, sizeof (path));
556 }
557 ptr = StringRChr (path, '.');
558 if (ptr != NULL) {
559 *ptr = '\0';
560 }
561 if (StringDoesHaveText (drfp->output_suffix)) {
562 StringNCat (path, drfp->output_suffix, PATH_MAX - StringLen(path) - 1);
563 path[PATH_MAX - 1] = 0;
564 } else {
565 StringCat (path, ".dr");
566 }
567 ofp = FileOpen (path, "w");
568
569 discrepancy_list = CollectDiscrepancies (drfp->global_report->test_config, drfp->sep_list, taxlookup);
570 WriteAsnDiscReport (discrepancy_list, ofp, drfp->global_report->output_config, TRUE);
571 discrepancy_list = FreeClickableList (discrepancy_list);
572
573 FileClose (ofp);
574 }
575
576
577 static void ProcessSingleRecord (
578 CharPtr filename,
579 DRFlagPtr drfp
580 )
581
582 {
583 AsnIoPtr aip;
584 BioseqPtr bsp;
585 ValNodePtr bsplist_next = NULL;
586 BioseqSetPtr bssp;
587 Char path [PATH_MAX];
588 Pointer dataptr = NULL;
589 Uint2 datatype, entityID = 0;
590 FILE *fp;
591 SeqEntryPtr sep;
592
593 if (StringHasNoText (filename)) return;
594 if (drfp == NULL) return;
595
596 if (drfp->type == 1) {
597 fp = FileOpen (filename, "r");
598 if (fp == NULL) {
599 Message (MSG_POSTERR, "Failed to open '%s'", path);
600 return;
601 }
602
603 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, FALSE, FALSE);
604
605 FileClose (fp);
606
607 entityID = ObjMgrRegister (datatype, dataptr);
608
609 } else if (drfp->type >= 2 && drfp->type <= 5) {
610 aip = AsnIoOpen (filename, drfp->binary? "rb" : "r");
611 if (aip == NULL) {
612 Message (MSG_POSTERR, "AsnIoOpen failed for input file '%s'", filename);
613 return;
614 }
615
616 SeqMgrHoldIndexing (TRUE);
617 switch (drfp->type) {
618 case 2 :
619 dataptr = (Pointer) SeqEntryAsnRead (aip, NULL);
620 datatype = OBJ_SEQENTRY;
621 break;
622 case 3 :
623 dataptr = (Pointer) BioseqAsnRead (aip, NULL);
624 datatype = OBJ_BIOSEQ;
625 break;
626 case 4 :
627 dataptr = (Pointer) BioseqSetAsnRead (aip, NULL);
628 datatype = OBJ_BIOSEQSET;
629 break;
630 case 5 :
631 dataptr = (Pointer) SeqSubmitAsnRead (aip, NULL);
632 datatype = OBJ_SEQSUB;
633 break;
634 default :
635 break;
636 }
637 SeqMgrHoldIndexing (FALSE);
638
639 AsnIoClose (aip);
640
641 entityID = ObjMgrRegister (datatype, dataptr);
642
643 } else {
644 Message (MSG_POSTERR, "Input format type '%d' unrecognized", (int) drfp->type);
645 return;
646 }
647
648 if (entityID < 1 || dataptr == NULL) {
649 Message (MSG_POSTERR, "Data read failed for input file '%s'", filename);
650 return;
651 }
652
653 if (SeqMgrFeaturesAreIndexed(entityID) == 0) {
654 SeqMgrIndexFeatures (entityID, NULL);
655 }
656
657 if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
658 datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
659
660 sep = GetTopSeqEntryForEntityID (entityID);
661
662 if (sep == NULL) {
663 sep = SeqEntryNew ();
664 if (sep != NULL) {
665 if (datatype == OBJ_BIOSEQ) {
666 bsp = (BioseqPtr) dataptr;
667 sep->choice = 1;
668 sep->data.ptrvalue = bsp;
669 SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
670 } else if (datatype == OBJ_BIOSEQSET) {
671 bssp = (BioseqSetPtr) dataptr;
672 sep->choice = 2;
673 sep->data.ptrvalue = bssp;
674 SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
675 } else {
676 sep = SeqEntryFree (sep);
677 }
678 }
679 sep = GetTopSeqEntryForEntityID (entityID);
680 }
681
682 if (sep != NULL) {
683 ValNodeAddPointer (&(drfp->sep_list), 0, sep);
684
685 if (drfp->lock) {
686 bsplist_next = DoLockFarComponents (sep, drfp);
687 ValNodeLink (&(drfp->bsplist), bsplist_next);
688 }
689 }
690 } else {
691 Message (MSG_POSTERR, "Datatype %d not recognized", (int) datatype);
692 }
693
694 SeqEntrySetScope (NULL);
695 }
696
697 static void ProcessMultipleRecord (
698 CharPtr filename,
699 DRFlagPtr drfp
700 )
701
702 {
703 AsnIoPtr aip;
704 AsnModulePtr amp;
705 AsnTypePtr atp, atp_bss, atp_desc, atp_sbp, atp_se, atp_ssp;
706 ValNodePtr bsplist_next;
707 Int2 maxcount = 0;
708 CitSubPtr csp = NULL;
709 FILE *fp, *ofp = NULL;
710 Int4 numrecords = 0;
711 SeqEntryPtr sep;
712 ObjValNode ovn;
713 Pubdesc pd;
714 SubmitBlockPtr sbp = NULL;
715 SeqDescrPtr subcit = NULL;
716 ValNode vn;
717 #ifdef OS_UNIX
718 Char cmmd [256];
719 Boolean detailed_report = FALSE;
720 CharPtr gzcatprog;
721 Boolean memory_usage = FALSE;
722 int ret;
723 Boolean usedPopen = FALSE;
724 #endif
725
726 if (StringHasNoText (filename)) return;
727 if (drfp == NULL) return;
728
729 #ifndef OS_UNIX
730 if (drfp->compressed) {
731 Message (MSG_POSTERR, "Can only decompress on-the-fly on UNIX machines");
732 return;
733 }
734 #endif
735
736 amp = AsnAllModPtr ();
737 if (amp == NULL) {
738 Message (MSG_POSTERR, "Unable to load AsnAllModPtr");
739 return;
740 }
741
742 atp_ssp = AsnFind ("Seq-submit");
743 if (atp_ssp == NULL) {
744 Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit");
745 return;
746 }
747
748 atp_sbp = AsnFind ("Seq-submit.sub");
749 if (atp_sbp == NULL) {
750 Message (MSG_POSTERR, "Unable to find ASN.1 type Seq-submit.sub");
751 return;
752 }
753
754 atp_bss = AsnFind ("Bioseq-set");
755 if (atp_bss == NULL) {
756 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set");
757 return;
758 }
759
760 atp_desc = AsnFind ("Bioseq-set.descr");
761 if (atp_desc == NULL) {
762 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.descr");
763 return;
764 }
765
766 atp_se = AsnFind ("Bioseq-set.seq-set.E");
767 if (atp_se == NULL) {
768 Message (MSG_POSTERR, "Unable to find ASN.1 type Bioseq-set.seq-set.E");
769 return;
770 }
771
772 #ifdef OS_UNIX
773 if (getenv ("ASNVAL_LOG_OBJMGR_REPORT") != NULL) {
774 detailed_report = TRUE;
775 }
776 if (getenv ("ASNVAL_LOG_MEMORY_REPORT") != NULL) {
777 memory_usage = TRUE;
778 }
779
780 if (drfp->compressed) {
781 gzcatprog = getenv ("NCBI_UNCOMPRESS_BINARY");
782 if (gzcatprog != NULL) {
783 sprintf (cmmd, "%s %s", gzcatprog, filename);
784 } else {
785 ret = system ("gzcat -h >/dev/null 2>&1");
786 if (ret == 0) {
787 sprintf (cmmd, "gzcat %s", filename);
788 } else if (ret == -1) {
789 Message (MSG_POSTERR, "Unable to fork or exec gzcat in ScanBioseqSetRelease");
790 return;
791 } else {
792 ret = system ("zcat -h >/dev/null 2>&1");
793 if (ret == 0) {
794 sprintf (cmmd, "zcat %s", filename);
795 } else if (ret == -1) {
796 Message (MSG_POSTERR, "Unable to fork or exec zcat in ScanBioseqSetRelease");
797 return;
798 } else {
799 Message (MSG_POSTERR, "Unable to find zcat or gzcat in ScanBioseqSetRelease - please edit your PATH environment variable");
800 return;
801 }
802 }
803 }
804 fp = popen (cmmd, /* drfp->binary? "rb" : */ "r");
805 usedPopen = TRUE;
806 } else {
807 fp = FileOpen (filename, drfp->binary? "rb" : "r");
808 }
809 #else
810 fp = FileOpen (filename, drfp->binary? "rb" : "r");
811 #endif
812 if (fp == NULL) {
813 Message (MSG_POSTERR, "FileOpen failed for input file '%s'", filename);
814 return;
815 }
816
817 aip = AsnIoNew (drfp->binary? ASNIO_BIN_IN : ASNIO_TEXT_IN, fp, NULL, NULL, NULL);
818 if (aip == NULL) {
819 Message (MSG_ERROR, "AsnIoNew failed for input file '%s'", filename);
820 return;
821 }
822
823 if (drfp->type == 4) {
824 atp = atp_bss;
825 } else if (drfp->type == 5) {
826 atp = atp_ssp;
827 } else {
828 Message (MSG_ERROR, "Batch processing type not set properly");
829 return;
830 }
831
832 while ((atp = AsnReadId (aip, amp, atp)) != NULL && maxcount < drfp->maxcount) {
833 if (atp == atp_se) {
834
835 SeqMgrHoldIndexing (TRUE);
836 sep = SeqEntryAsnRead (aip, atp);
837 SeqMgrHoldIndexing (FALSE);
838
839 ValNodeAddPointer (&(drfp->sep_list), 0, sep);
840
841 if (drfp->lock) {
842 bsplist_next = DoLockFarComponents (sep, drfp);
843 ValNodeLink (&(drfp->bsplist), bsplist_next);
844 }
845
846 numrecords++;
847 maxcount++;
848 } else if (atp == atp_sbp) {
849 sbp = SubmitBlockAsnRead (aip, atp);
850 if (sbp != NULL) {
851 csp = sbp->cit;
852 if (csp != NULL) {
853 MemSet ((Pointer) &ovn, 0, sizeof (ObjValNode));
854 MemSet ((Pointer) &pd, 0, sizeof (Pubdesc));
855 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
856 vn.choice = PUB_Sub;
857 vn.data.ptrvalue = (Pointer) csp;
858 vn.next = NULL;
859 pd.pub = &vn;
860 ovn.vn.choice = Seq_descr_pub;
861 ovn.vn.data.ptrvalue = (Pointer) &pd;
862 ovn.vn.next = NULL;
863 ovn.vn.extended = 1;
864 subcit = (SeqDescrPtr) &ovn;
865 }
866 }
867 } else {
868 AsnReadVal (aip, atp, NULL);
869 }
870 }
871
872
873
874 AsnIoFree (aip, FALSE);
875
876 #ifdef OS_UNIX
877 if (usedPopen) {
878 pclose (fp);
879 } else {
880 FileClose (fp);
881 }
882 #else
883 FileClose (fp);
884 #endif
885
886 }
887
888
889 static void ProcessSeqEntryListWithCollation (GlobalDiscrepReportPtr g, ValNodePtr sep_list, CharPtr filename)
890 {
891 ValNodePtr vnp;
892 SeqEntryPtr sep;
893
894 if (g == NULL || sep_list == NULL) return;
895
896 for (vnp = sep_list; vnp != NULL; vnp = vnp->next) {
897 sep = vnp->data.ptrvalue;
898 AddSeqEntryToGlobalDiscrepReport (sep, g, filename);
899 }
900
901 }
902
903
904 static void ProcessOneRecord (CharPtr filename, Pointer userdata)
905 {
906 DRFlagPtr drfp;
907
908 drfp = (DRFlagPtr) userdata;
909 if (drfp == NULL) return;
910
911 if (drfp->batch) {
912 ProcessMultipleRecord (filename, drfp);
913 } else {
914 ProcessSingleRecord (filename, drfp);
915 }
916
917 if (drfp->outfp == NULL) {
918 ProcessSeqEntryList (drfp, filename);
919 } else {
920 ProcessSeqEntryListWithCollation (drfp->global_report, drfp->sep_list, filename);
921 }
922 ReleaseDiscrepancyReportSeqEntries (drfp);
923 }
924
925
926 /* Args structure contains command-line arguments */
927
928 typedef enum {
929 p_argInputPath = 0,
930 i_argInputFile,
931 o_argOutputFile,
932 x_argSuffix,
933 u_argRecurse,
934 f_argUseFT,
935 e_argEnableTests,
936 d_argDisableTests,
937 s_argOutputSuffix,
938 r_argOutputDir,
939 Z_argRemoteCDS,
940 a_argType,
941 b_argBinary,
942 c_argCompressed,
943 R_argRemote,
944 k_argLocalFetch,
945 I_argAsnIdx,
946 l_argLockFar,
947 T_argThreads,
948 X_argExpandCategories,
949 S_argSummaryReport,
950 B_argBigSequenceReport,
951 C_argMaxCount
952 } DRFlagNum;
953
954 Args myargs [] = {
955 {"Path to ASN.1 Files", NULL, NULL, NULL,
956 TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
957 {"Single Input File", "stdin", NULL, NULL,
958 TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
959 {"Single Output File", NULL, NULL, NULL,
960 TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
961 {"File Selection Substring", ".sqn", NULL, NULL,
962 TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
963 {"Recurse", "F", NULL, NULL,
964 TRUE, 'u', ARG_BOOLEAN, 0.0, 0, NULL},
965 {"Use Feature Table Output Format", "F", NULL, NULL,
966 FALSE, 'f', ARG_BOOLEAN, 0.0, 0, NULL},
967 {"Enable Tests (comma-delimited list of test names)\n\tMISSING_GENES\n\tEXTRA_GENES\n\tMISSING_LOCUS_TAGS\n\tDUPLICATE_LOCUS_TAGS\n\tBAD_LOCUS_TAG_FORMAT\n"
968 "\tINCONSISTENT_LOCUS_TAG_PREFIX\n\tNON_GENE_LOCUS_TAG\n\tMISSING_PROTEIN_ID\n\tINCONSISTENT_PROTEIN_ID\n"
969 "\tFEATURE_LOCATION_CONFLICT\n\tGENE_PRODUCT_CONFLICT\n\tDUPLICATE_GENE_LOCUS\n\tEC_NUMBER_NOTE\n\tPSEUDO_MISMATCH\n"
970 "\tJOINED_FEATURES\n\tOVERLAPPING_GENES\n\tOVERLAPPING_CDS\n\tSHORT_CONTIG\n\tINCONSISTENT_BIOSOURCE\n\tSUSPECT_PRODUCT_NAMES\n"
971 "\tINCONSISTENT_SOURCE_DEFLINE\n\tPARTIAL_CDS_COMPLETE_SEQUENCE\n\tEC_NUMBER_ON_UNKNOWN_PROTEIN\n\tTAX_LOOKUP_MISSING\n"
972 "\tTAX_LOOKUP_MISMATCH\n\tSHORT_SEQUENCES\n\tSUSPECT_PHRASES\n", "", NULL, NULL,
973 TRUE, 'e', ARG_STRING, 0.0, 0, NULL},
974 {"Disable Tests (comma-delimited list of test names)\n\tMISSING_GENES\n\tEXTRA_GENES\n\tMISSING_LOCUS_TAGS\n\tDUPLICATE_LOCUS_TAGS\n\tBAD_LOCUS_TAG_FORMAT\n"
975 "\tINCONSISTENT_LOCUS_TAG_PREFIX\n\tNON_GENE_LOCUS_TAG\n\tMISSING_PROTEIN_ID\n\tINCONSISTENT_PROTEIN_ID\n"
976 "\tFEATURE_LOCATION_CONFLICT\n\tGENE_PRODUCT_CONFLICT\n\tDUPLICATE_GENE_LOCUS\n\tEC_NUMBER_NOTE\n\tPSEUDO_MISMATCH\n"
977 "\tJOINED_FEATURES\n\tOVERLAPPING_GENES\n\tOVERLAPPING_CDS\n\tSHORT_CONTIG\n\tINCONSISTENT_BIOSOURCE\n\tSUSPECT_PRODUCT_NAMES\n"
978 "\tINCONSISTENT_SOURCE_DEFLINE\n\tPARTIAL_CDS_COMPLETE_SEQUENCE\n\tEC_NUMBER_ON_UNKNOWN_PROTEIN\n\tTAX_LOOKUP_MISSING\n"
979 "\tTAX_LOOKUP_MISMATCH\n\tSHORT_SEQUENCES\n\tSUSPECT_PHRASES\n", "", NULL, NULL,
980 TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
981 {"Output File Suffix", ".dr", NULL, NULL,
982 TRUE, 's', ARG_STRING, 0.0, 0, NULL},
983 {"Output Directory", NULL, NULL, NULL,
984 TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
985 {"Remote CDS Product Fetch", "F", NULL, NULL,
986 TRUE, 'Z', ARG_BOOLEAN, 0.0, 0, NULL},
987 {"ASN.1 Type (a Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Bioseq-set, u Batch Seq-submit)", "a", NULL, NULL,
988 TRUE, 'a', ARG_STRING, 0.0, 0, NULL},
989 {"Batch File is Binary", "F", NULL, NULL,
990 TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
991 {"Batch File is Compressed", "F", NULL, NULL,
992 TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
993 {"Remote Fetching from ID", "F", NULL, NULL,
994 TRUE, 'R', ARG_BOOLEAN, 0.0, 0, NULL},
995 {"Local Fetching", "F", NULL, NULL,
996 TRUE, 'k', ARG_BOOLEAN, 0.0, 0, NULL},
997 {"Path to Indexed Binary ASN.1 Data", NULL, NULL, NULL,
998 TRUE, 'I', ARG_STRING, 0.0, 0, NULL},
999 {"Lock Components in Advance", "F", NULL, NULL,
1000 TRUE, 'l', ARG_BOOLEAN, 0.0, 0, NULL},
1001 {"Use Threads", "F", NULL, NULL,
1002 TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
1003 {"Expand Report Categories (comma-delimited list of test names or ALL)\n\tALL\n\tMISSING_GENES\n\tEXTRA_GENES\n\tMISSING_LOCUS_TAGS\n\tDUPLICATE_LOCUS_TAGS\n\tBAD_LOCUS_TAG_FORMAT\n"
1004 "\tINCONSISTENT_LOCUS_TAG_PREFIX\n\tNON_GENE_LOCUS_TAG\n\tMISSING_PROTEIN_ID\n\tINCONSISTENT_PROTEIN_ID\n"
1005 "\tFEATURE_LOCATION_CONFLICT\n\tGENE_PRODUCT_CONFLICT\n\tDUPLICATE_GENE_LOCUS\n\tEC_NUMBER_NOTE\n\tPSEUDO_MISMATCH\n"
1006 "\tJOINED_FEATURES\n\tOVERLAPPING_GENES\n\tOVERLAPPING_CDS\n\tSHORT_CONTIG\n\tINCONSISTENT_BIOSOURCE\n\tSUSPECT_PRODUCT_NAMES\n"
1007 "\tINCONSISTENT_SOURCE_DEFLINE\n\tPARTIAL_CDS_COMPLETE_SEQUENCE\n\tEC_NUMBER_ON_UNKNOWN_PROTEIN\n\tTAX_LOOKUP_MISSING\n"
1008 "\tTAX_LOOKUP_MISMATCH\n\tSHORT_SEQUENCES\n\tSUSPECT_PHRASES\n", "", NULL, NULL,
1009 TRUE, 'X', ARG_STRING, 0.0, 0, NULL},
1010 {"Summary Report", "F", NULL, NULL,
1011 TRUE, 'S', ARG_BOOLEAN, 0.0, 0, NULL},
1012 {"Big Sequence Report", "F", NULL, NULL,
1013 TRUE, 'B', ARG_BOOLEAN, 0.0, 0, NULL},
1014 {"Max Count", "0", NULL, NULL,
1015 TRUE, 'C', ARG_INT, 0.0, 0, NULL},
1016 };
1017
1018
1019 static CharPtr GetTestNameList (CharPtr intro)
1020 {
1021 Int4 i, len;
1022 CharPtr text;
1023
1024 len = StringLen (intro) + 1;
1025
1026 for (i = 0; i < MAX_DISC_TYPE; i++)
1027 {
1028 len += StringLen (GetDiscrepancyTestSettingName (i)) + 2;
1029 }
1030
1031 text = (CharPtr) MemNew (sizeof (Char) * len);
1032 StringCat (text, intro);
1033 for (i = 0; i < MAX_DISC_TYPE; i++) {
1034 StringCat (text, "\t");
1035 StringCat (text, GetDiscrepancyTestSettingName (i));
1036 StringCat (text, "\n");
1037 }
1038 return text;
1039 }
1040
1041
1042 Int2 Main (void)
1043
1044 {
1045 Char app [64];
1046 CharPtr asnidx, directory, infile, outfile, str, suffix, output_dir;
1047 CharPtr enabled_list, disabled_list, err_msg;
1048 Boolean batch, binary, compressed, dorecurse,
1049 indexed, local, lock, remote, usethreads;
1050 Int2 type = 0;
1051 DRFlagData dfd;
1052 Boolean big_sequence_report;
1053
1054 /* standard setup */
1055
1056 ErrSetFatalLevel (SEV_MAX);
1057 ErrSetMessageLevel (SEV_MAX);
1058 ErrClearOptFlags (EO_SHOW_USERSTR);
1059 ErrSetLogfile ("stderr", ELOG_APPEND);
1060 ErrSetOpts (ERR_IGNORE, ERR_LOG_ON);
1061
1062 UseLocalAsnloadDataAndErrMsg ();
1063 ErrPathReset ();
1064
1065 if (! AllObjLoad ()) {
1066 Message (MSG_FATAL, "AllObjLoad failed");
1067 return 1;
1068 }
1069 if (! SubmitAsnLoad ()) {
1070 Message (MSG_FATAL, "SubmitAsnLoad failed");
1071 return 1;
1072 }
1073 if (! FeatDefSetLoad ()) {
1074 Message (MSG_FATAL, "FeatDefSetLoad failed");
1075 return 1;
1076 }
1077 if (! SeqCodeSetLoad ()) {
1078 Message (MSG_FATAL, "SeqCodeSetLoad failed");
1079 return 1;
1080 }
1081 if (! GeneticCodeTableLoad ()) {
1082 Message (MSG_FATAL, "GeneticCodeTableLoad failed");
1083 return 1;
1084 }
1085
1086 /* set up help descriptions for enable and disable */
1087 myargs[e_argEnableTests].prompt = GetTestNameList("Enable Tests (comma-delimited list of test names)\n");
1088 myargs[d_argDisableTests].prompt = GetTestNameList("Disable Tests (comma-delimited list of test names)\n");
1089 myargs[X_argExpandCategories].prompt = GetTestNameList("Expand Report Categories (comma-delimited list of test names or ALL)\n");
1090 /* process command line arguments */
1091
1092 sprintf (app, "asndisc %s", ASNDISC_APPLICATION);
1093 if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
1094 return 0;
1095 }
1096
1097 /* additional setup modifications */
1098 MemSet (&dfd, 0, sizeof (DRFlagData));
1099
1100 directory = (CharPtr) myargs [p_argInputPath].strvalue;
1101 suffix = (CharPtr) myargs [x_argSuffix].strvalue;
1102 dfd.output_suffix = (CharPtr) myargs [s_argOutputSuffix].strvalue;
1103 infile = (CharPtr) myargs [i_argInputFile].strvalue;
1104 outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
1105 output_dir = (CharPtr) myargs [r_argOutputDir].strvalue;
1106 if (StringDoesHaveText (outfile) && StringDoesHaveText (output_dir)) {
1107 Message (MSG_FATAL, "-o and -q are incompatible: specify the output file name with the full path.");
1108 return 1;
1109 }
1110 if (StringDoesHaveText (output_dir)) {
1111 dfd.output_dir = output_dir;
1112 if (! CreateDir (output_dir)) {
1113 Message (MSG_FATAL, "Unable to create output directory %s", output_dir);
1114 }
1115 }
1116
1117 dorecurse = (Boolean) myargs [u_argRecurse].intvalue;
1118 remote = (Boolean ) myargs [R_argRemote].intvalue;
1119 local = (Boolean) myargs [k_argLocalFetch].intvalue;
1120
1121 asnidx = (CharPtr) myargs [I_argAsnIdx].strvalue;
1122 indexed = (Boolean) StringDoesHaveText (asnidx);
1123 lock = (Boolean) myargs [l_argLockFar].intvalue;
1124 usethreads = (Boolean) myargs [T_argThreads].intvalue;
1125 dfd.farFetchCDSproducts = (Boolean) myargs [Z_argRemoteCDS].intvalue;
1126
1127 /* set up Discrepancy Report Configuration */
1128 dfd.global_report = GlobalDiscrepReportNew ();
1129 dfd.global_report->test_config = DiscrepancyConfigNew();
1130 DisableTRNATests (dfd.global_report->test_config);
1131
1132 ExpandDiscrepancyReportTestsFromString ((CharPtr) myargs [X_argExpandCategories].strvalue, TRUE, dfd.global_report->output_config);
1133 dfd.global_report->output_config->summary_report = (Boolean) myargs [S_argSummaryReport].intvalue;
1134
1135 big_sequence_report = (Boolean) myargs [B_argBigSequenceReport].intvalue;
1136
1137 enabled_list = (CharPtr) myargs [e_argEnableTests].strvalue;
1138 disabled_list = (CharPtr) myargs [d_argDisableTests].strvalue;
1139
1140
1141 #ifdef INTERNAL_NCBI_ASNDISC
1142 dfd.global_report->taxlookup = CheckTaxNamesAgainstTaxDatabase;
1143 #endif
1144
1145 err_msg = NULL;
1146 if (StringDoesHaveText (enabled_list) && StringDoesHaveText (disabled_list)) {
1147 err_msg = StringSave ("Cannot specify both -e and -d. Choose -e to enable only a few tests and disable the rest, choose -d to disable only a few tests and enable the rest.");
1148 } else if (StringDoesHaveText (disabled_list)) {
1149 if (big_sequence_report) {
1150 ConfigureForBigSequence (dfd.global_report->test_config);
1151 } else {
1152 ConfigureForGenomes (dfd.global_report->test_config);
1153 }
1154
1155 /* now disable tests from string */
1156 err_msg = SetDiscrepancyReportTestsFromString (disabled_list, FALSE, dfd.global_report->test_config);
1157 } else if (StringDoesHaveText (enabled_list)) {
1158 if (big_sequence_report) {
1159 ConfigureForBigSequence (dfd.global_report->test_config);
1160 } else {
1161 ConfigureForGenomes (dfd.global_report->test_config);
1162 }
1163 /* now enable tests from string */
1164 err_msg = SetDiscrepancyReportTestsFromString (enabled_list, TRUE, dfd.global_report->test_config);
1165 } else {
1166 if (big_sequence_report) {
1167 ConfigureForBigSequence (dfd.global_report->test_config);
1168 } else {
1169 ConfigureForGenomes (dfd.global_report->test_config);
1170 }
1171 }
1172 if (err_msg != NULL) {
1173 Message (MSG_FATAL, err_msg);
1174 err_msg = MemFree (err_msg);
1175 return 1;
1176 }
1177
1178 if ((Boolean) myargs[f_argUseFT].intvalue) {
1179 dfd.global_report->test_config->use_feature_table_format = TRUE;
1180 dfd.global_report->output_config->use_feature_table_format = TRUE;
1181 }
1182
1183 dfd.maxcount = (Int4) myargs [C_argMaxCount].intvalue;
1184 if (dfd.maxcount < 1) {
1185 dfd.maxcount = INT4_MAX;
1186 }
1187
1188 batch = FALSE;
1189 binary = (Boolean) myargs [b_argBinary].intvalue;
1190 compressed = (Boolean) myargs [c_argCompressed].intvalue;
1191
1192 str = myargs [a_argType].strvalue;
1193 if (StringICmp (str, "a") == 0) {
1194 type = 1;
1195 } else if (StringICmp (str, "e") == 0) {
1196 type = 2;
1197 } else if (StringICmp (str, "b") == 0) {
1198 type = 3;
1199 } else if (StringICmp (str, "s") == 0) {
1200 type = 4;
1201 } else if (StringICmp (str, "m") == 0) {
1202 type = 5;
1203 } else if (StringICmp (str, "t") == 0) {
1204 type = 4;
1205 batch = TRUE;
1206 } else if (StringICmp (str, "u") == 0) {
1207 type = 5;
1208 batch = TRUE;
1209 } else {
1210 type = 1;
1211 }
1212
1213 if ((binary || compressed) && (! batch)) {
1214 if (type == 1) {
1215 Message (MSG_FATAL, "-b or -c cannot be used without -t or -a");
1216 return 1;
1217 }
1218 }
1219
1220 if (StringHasNoText (directory) && StringHasNoText (infile)) {
1221 Message (MSG_FATAL, "Input path or input file must be specified");
1222 return 1;
1223 }
1224
1225 /* populate parameter structure */
1226
1227 dfd.batch = batch;
1228 dfd.binary = binary;
1229 dfd.compressed = compressed;
1230 dfd.lock = lock;
1231 dfd.useThreads = usethreads;
1232 dfd.type = type;
1233 dfd.numrecords = 0;
1234
1235 if (! StringHasNoText (outfile)) {
1236 dfd.outpath = outfile;
1237 dfd.outfp = FileOpen (outfile, "w");
1238 if (dfd.outfp == NULL) {
1239 Message (MSG_FATAL, "Unable to open single output file");
1240 return 1;
1241 }
1242 }
1243
1244 /* register fetch functions */
1245
1246 if (remote) {
1247 #ifdef INTERNAL_NCBI_ASNDISC
1248
1249 if (! PUBSEQBioseqFetchEnable ("asnval", FALSE)) {
1250 Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
1251 return 1;
1252 }
1253 dfd.usePUBSEQ = TRUE;
1254 dfd.useThreads = FALSE;
1255 #else
1256 PubSeqFetchEnable ();
1257 #endif
1258 }
1259
1260 if (local) {
1261 LocalSeqFetchInit (FALSE);
1262 }
1263
1264 if (indexed) {
1265 AsnIndexedLibFetchEnable (asnidx, TRUE);
1266 }
1267
1268 if (StringDoesHaveText (directory)) {
1269 DirExplore (directory, NULL, suffix, dorecurse, ProcessOneRecord, (Pointer) &dfd);
1270
1271 } else if (StringDoesHaveText (infile)) {
1272
1273 ProcessOneRecord (infile, (Pointer) &dfd);
1274 }
1275 if (dfd.outfp != NULL) {
1276 WriteGlobalDiscrepancyReport (dfd.global_report, dfd.outfp);
1277 FileClose (dfd.outfp);
1278 dfd.outfp = NULL;
1279 }
1280
1281 dfd.global_report = GlobalDiscrepReportFree (dfd.global_report);
1282
1283 /* close fetch functions */
1284
1285 if (indexed) {
1286 AsnIndexedLibFetchDisable ();
1287 }
1288
1289 if (local) {
1290 LocalSeqFetchDisable ();
1291 }
1292
1293 if (remote) {
1294 #ifdef INTERNAL_NCBI_ASNDISC
1295 PUBSEQBioseqFetchDisable ();
1296 #else
1297 PubSeqFetchDisable ();
1298 #endif
1299 SeqMgrSetPreCache (NULL);
1300 SeqMgrSetSeqIdSetFunc (NULL);
1301 }
1302
1303 TransTableFreeAll ();
1304
1305 ECNumberFSAFreeAll ();
1306
1307 return 0;
1308 }
1309
1310 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |