|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/asnmacro.c |
source navigation diff markup identifier search freetext search file search |
1 /* asnmacro.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asnmacro.c
27 *
28 * Author: Colleen Bollin
29 *
30 * Version Creation Date: 4/12/07
31 *
32 * $Revision: 1.5 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45 #include <ncbi.h>
46 #include <objall.h>
47 #include <objsset.h>
48 #include <objsub.h>
49 #include <objfdef.h>
50 #include <sequtil.h>
51 #include <gather.h>
52 #include <sqnutils.h>
53 #include <explore.h>
54 #include <actutils.h>
55 #include <algo/blast/api/twoseq_api.h>
56 #define NLM_GENERATED_CODE_PROTO
57 #include <asnmacro.h>
58 #include <objmacro.h>
59 #include <macroapi.h>
60
61 #define ASNMACRO_APP_VER "1.0"
62
63 CharPtr ASNMACRO_APPLICATION = ASNMACRO_APP_VER;
64
65 /* for alignments */
66 static SeqAlignPtr LIBCALLBACK GetSeqAlign (BioseqPtr bsp1, BioseqPtr bsp2)
67 {
68 BLAST_SummaryOptions *options = NULL;
69 SeqAlignPtr salp = NULL;
70
71 if (bsp1 == NULL || bsp2 == NULL) return NULL;
72
73 BLAST_SummaryOptionsInit(&options);
74 if (bsp1->length > 10000 || bsp2->length > 10000)
75 {
76 options->filter_string = StringSave ("m L");
77 options->word_size = 20;
78 options->cutoff_evalue = act_get_eval (60);
79 options->hint = eBlastHint_None;
80 }
81 else
82 {
83 options->filter_string = StringSave ("m F");
84 }
85 if (ISA_na (bsp1->mol))
86 {
87 options->program = eBlastn;
88 }
89 else
90 {
91 options->program = eBlastp;
92 }
93
94 BLAST_TwoSequencesSearch(options, bsp1, bsp2, &salp);
95 BLAST_SummaryOptionsFree(options);
96 return salp;
97 }
98
99 static SeqAlignPtr LIBCALLBACK GetSeqAlignPiece (SeqLocPtr slp1, SeqLocPtr slp2)
100 {
101 BLAST_SummaryOptions *options = NULL;
102 SBlastSeqalignArray * seqalign_arr=NULL;
103 SeqAlignPtr salp = NULL;
104 BioseqPtr bsp;
105
106 if (slp1 == NULL || slp2 == NULL) return NULL;
107
108
109 bsp = BioseqFindFromSeqLoc (slp1);
110 if (bsp == NULL)
111 {
112 return NULL;
113 }
114
115 BLAST_SummaryOptionsInit(&options);
116
117 if (ISA_na (bsp->mol))
118 {
119 options->program = eBlastn;
120 }
121 else
122 {
123 options->program = eBlastp;
124 }
125
126 options->gapped_calculation = TRUE;
127 options->cutoff_evalue = 10;
128 options->gap_x_dropoff = 100;
129 options->gap_open = 4;
130 options->gap_extend = 1;
131 options->nucleotide_mismatch = -1;
132 options->word_size = 7;
133
134 BLAST_TwoSeqLocSets(options, slp1, slp2, NULL, &seqalign_arr, NULL, NULL, NULL);
135
136 if (seqalign_arr != NULL)
137 {
138 salp = seqalign_arr->array[0];
139 seqalign_arr->array[0] = NULL;
140 seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
141 }
142
143 BLAST_SummaryOptionsFree(options);
144 return salp;
145 }
146
147 static SeqAlignPtr GlobalAlign2Seq (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp)
148 {
149 return Sqn_GlobalAlign2SeqEx (bsp1, bsp2, revcomp, GetSeqAlign, GetSeqAlignPiece, TRUE);
150 }
151
152
153
154 typedef struct outputstream {
155 CharPtr results_dir;
156 CharPtr base;
157 CharPtr suffix;
158 CharPtr outfile;
159 CharPtr outsuffix;
160 AsnIoPtr aip;
161 Boolean is_binary;
162 } OutputStreamData, PNTR OutputStreamPtr;
163
164 typedef struct inputstream {
165 CharPtr directory;
166 CharPtr base;
167 CharPtr suffix;
168 Boolean is_binary;
169 Boolean is_seqentry;
170 } InputStreamData, PNTR InputStreamPtr;
171
172 typedef struct asnstream {
173 AsnModulePtr amp;
174 AsnTypePtr atp_se;
175 AsnTypePtr atp_bss;
176 AsnTypePtr atp_bss_se;
177 } AsnStreamData, PNTR AsnStreamPtr;
178
179 static FILE* OpenOneFile (
180 CharPtr directory,
181 CharPtr base,
182 CharPtr suffix
183 )
184
185 {
186 Char file [FILENAME_MAX], path [PATH_MAX];
187
188 if (base == NULL) {
189 base = "";
190 }
191 if (suffix == NULL) {
192 suffix = "";
193 }
194
195 StringNCpy_0 (path, directory, sizeof (path));
196 sprintf (file, "%s%s", base, suffix);
197 FileBuildPath (path, NULL, file);
198
199 return FileOpen (path, "r");
200 }
201
202 static AsnIoPtr AsnIoFromInputStream (
203 InputStreamPtr isp
204 )
205
206 {
207 AsnIoPtr aip;
208 Char file [FILENAME_MAX], path [PATH_MAX];
209 CharPtr read_flag;
210
211 if (isp == NULL) return NULL;
212
213 if (isp->is_binary) {
214 read_flag = "rb";
215 } else {
216 read_flag = "r";
217 }
218
219 if (isp->base == NULL) {
220 aip = AsnIoOpen ("stdin", read_flag);
221 } else {
222 StringNCpy_0 (path, isp->directory, sizeof (path));
223 sprintf (file, "%s%s", isp->base, isp->suffix);
224 FileBuildPath (path, NULL, file);
225 aip = AsnIoOpen (path, read_flag);
226 }
227 return aip;
228 }
229
230
231 static AsnIoPtr AsnIoFromOutputStream (OutputStreamPtr osp)
232 {
233 AsnIoPtr aip;
234 Char file [FILENAME_MAX], path [PATH_MAX];
235 CharPtr write_flag;
236
237 if (osp == NULL) return NULL;
238 if (osp->aip == NULL) {
239 write_flag = osp->is_binary ? "wb" : "w";
240 if (StringDoesHaveText (osp->outfile)) {
241 StringNCpy_0 (path, osp->outfile, sizeof (path));
242 } else {
243 if (osp->base == NULL) {
244 aip = AsnIoOpen ("stdout", write_flag);
245 } else {
246 if (osp->outsuffix == NULL) {
247 osp->outsuffix = "";
248 }
249 StringNCpy_0 (path, osp->results_dir, sizeof (path));
250 sprintf (file, "%s%s%s", osp->base, osp->suffix, osp->outsuffix);
251 FileBuildPath (path, NULL, file);
252 aip = AsnIoOpen (path, write_flag);
253 if (aip == NULL) {
254 Message (MSG_POSTERR, "Unable to write to %s.", path);
255 }
256 }
257 }
258 } else {
259 aip = osp->aip;
260 }
261 return aip;
262 }
263
264 static void WriteOneFile (
265 OutputStreamPtr osp,
266 SeqEntryPtr sep
267 )
268
269 {
270 AsnIoPtr aip;
271
272 aip = AsnIoFromOutputStream (osp);
273 if (aip != NULL) {
274 SeqEntryAsnWrite (sep, aip, NULL);
275 AsnIoFlush (aip);
276 }
277 if (aip != osp->aip) {
278 AsnIoClose (aip);
279 }
280 }
281
282
283 static Uint2 ProcessOneAsn (
284 FILE* fp,
285 CharPtr path,
286 ValNodePtr macro
287 )
288
289 {
290 Pointer dataptr;
291 Uint2 datatype, entityID = 0;
292 SeqEntryPtr sep;
293 Int4 num_fields = 0, num_features = 0;
294
295 if (fp == NULL) return 0;
296
297 dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, TRUE, FALSE, TRUE, FALSE);
298 if (dataptr == NULL) {
299 Message (MSG_POSTERR, "Unable to read data from %s.", path);
300 return 0;
301 }
302
303 SeqMgrIndexFeatures (entityID, NULL);
304 sep = GetTopSeqEntryForEntityID (entityID);
305 ApplyMacroToSeqEntry (sep, macro, &num_fields, &num_features);
306 Message (MSG_POST, "For file %s, macro script affected %d fields and created %d features", path, num_fields, num_features);
307
308 return entityID;
309 }
310
311 /* return -1 if failure, 0 if success */
312 static Int4 ProcessOneRecord (
313 CharPtr directory,
314 OutputStreamPtr osp,
315 ValNodePtr macro
316 )
317
318 {
319 Uint2 entityID;
320 FILE *fp;
321 SeqEntryPtr sep;
322
323 if (osp == NULL) return -1;
324 fp = OpenOneFile (directory, osp->base, osp->suffix);
325 if (fp == NULL) return -1;
326
327 entityID = ProcessOneAsn (fp, osp->base == NULL ? "input stream" : osp->base, macro);
328
329 FileClose (fp);
330
331 if (entityID == 0) return -1;
332
333 /* finish processing */
334
335 sep = GetTopSeqEntryForEntityID (entityID);
336 if (sep != NULL) {
337 WriteOneFile (osp, sep);
338 }
339
340 ObjMgrFreeByEntityID (entityID);
341 return 0;
342 }
343
344 static Int4 ProcessStream (InputStreamPtr isp, OutputStreamPtr osp, AsnStreamPtr asp, ValNodePtr macro)
345 {
346 AsnTypePtr atp, atp_srch;
347 AsnIoPtr asn_in, asn_out;
348 Int4 rval = 0;
349 SeqEntryPtr sep;
350 Uint2 entityID;
351 DataVal av;
352 Int4 num_fields = 0, num_features = 0;
353 Int4 tmp_fields, tmp_features;
354
355 if (isp == NULL || osp == NULL || asp == NULL) return 1;
356
357 asn_in = AsnIoFromInputStream (isp);
358 asn_out = AsnIoFromOutputStream (osp);
359
360 if (isp->is_seqentry) {
361 atp = asp->atp_se;
362 atp_srch = asp->atp_se;
363 }
364 else {
365 atp = asp->atp_bss;
366 atp_srch = asp->atp_bss_se;
367 }
368
369 while ((atp = AsnReadId(asn_in, asp->amp, atp)) != NULL && rval == 0) {
370 if (atp != atp_srch) {
371 AsnReadVal(asn_in, atp, &av);
372 AsnWrite(asn_out, atp, &av);
373 AsnKillValue(atp, &av);
374 continue;
375 }
376 if ((sep = SeqEntryAsnRead(asn_in, atp)) == NULL) {
377 Message (MSG_POSTERR, "SeqEntryAsnRead failure");
378 rval = 1;
379 }
380 if (rval == 0) {
381 entityID = ObjMgrRegister (OBJ_SEQENTRY, sep);
382 tmp_fields = 0;
383 tmp_features = 0;
384 ApplyMacroToSeqEntry (sep, macro, &tmp_fields, &tmp_features);
385 num_fields += tmp_fields;
386 num_features += tmp_features;
387 DeleteMarkedObjects (entityID, 0, NULL);
388 RenormalizeNucProtSets (sep, TRUE);
389 if (! SeqEntryAsnWrite(sep, asn_out, atp)) {
390 Message (MSG_POSTERR, "SeqEntryAsnWrite failure");
391 rval = 1;
392 }
393 AsnIoFlush(asn_out);
394 ObjMgrFreeByEntityID (entityID);
395 }
396 } /* Endwhile, AsnReadId */
397
398 AsnIoClose(asn_in);
399 if (asn_out != osp->aip) {
400 AsnIoClose(asn_out);
401 }
402 Message (MSG_POST, "Macro script affected %d fields and created %d features", num_fields, num_features);
403 return rval;
404 }
405
406 /* return -1 on failure, 0 on success */
407 static Int4 FileRecurse (
408 CharPtr directory,
409 InputStreamPtr isp,
410 OutputStreamPtr osp,
411 AsnStreamPtr asp,
412 ValNodePtr macro
413 )
414
415 {
416 Char path [PATH_MAX];
417 CharPtr ptr;
418 CharPtr str;
419 ValNodePtr head, vnp;
420 CharPtr orig_dir, orig_base;
421 Int4 rval = 0;
422
423 /* get list of all files in source directory */
424
425 head = DirCatalog (directory);
426
427 for (vnp = head; vnp != NULL; vnp = vnp->next) {
428 if (vnp->choice == 0) {
429 str = (CharPtr) vnp->data.ptrvalue;
430 if (StringDoesHaveText (str)) {
431
432 /* does filename have desired substring? */
433
434 ptr = StringStr (str, osp->suffix);
435
436 if (ptr != NULL) {
437
438 /* make sure detected suffix is really at end of filename */
439
440 if (StringCmp (ptr, osp->suffix) == 0) {
441 *ptr = '\0';
442
443 /* process file that has desired suffix (usually .fsa) */
444 osp->base = str;
445 orig_dir = isp->directory;
446 isp->directory = directory;
447 orig_base = isp->base;
448 isp->base = str;
449 if (isp->is_binary) {
450 rval |= ProcessStream (isp, osp, asp, macro);
451 } else {
452 rval |= ProcessOneRecord (directory, osp, macro);
453 }
454 isp->directory = orig_dir;
455 isp->base = orig_base;
456 osp->base = NULL;
457 }
458 }
459 }
460 } else if (vnp->choice == 1) {
461
462 /* recurse into subdirectory */
463
464 StringNCpy_0 (path, directory, sizeof (path));
465 str = (CharPtr) vnp->data.ptrvalue;
466 FileBuildPath (path, str, NULL);
467 rval |= FileRecurse (path, isp, osp, asp, macro);
468 }
469 }
470
471 /* clean up file list */
472
473 ValNodeFreeData (head);
474 return rval;
475 }
476
477 static Boolean SetUpAsnStreamData (AsnStreamPtr asp)
478
479 {
480 if (asp == NULL) return FALSE;
481
482 if (! SeqSetAsnLoad()) {
483 Message (MSG_POSTERR, "Unable to load SeqSet parse tree");
484 return FALSE;
485 }
486 asp->amp = AsnAllModPtr();
487 if (asp->amp == NULL) {
488 Message (MSG_POSTERR, "Unable to obtain ASN.1 module pointer");
489 return FALSE;
490 }
491
492 /* Get pointers to ASN.1 types that must be dealt with in asn_in */
493
494 if ( (asp->atp_bss = AsnFind("Bioseq-set")) == NULL) {
495 Message (MSG_POSTERR, "could not find type Bioseq-set");
496 return FALSE;
497 }
498 if ( (asp->atp_bss_se = AsnFind("Bioseq-set.seq-set.E")) == NULL) {
499 Message (MSG_POSTERR, "AsnFind failure: Bioseq-set.seq-set.E");
500 return FALSE;
501 }
502 if ( (asp->atp_se = AsnFind("Seq-entry")) == NULL) {
503 Message (MSG_POSTERR, "AsnFind failure: Seq-entry");
504 return FALSE;
505 }
506 return TRUE;
507 }
508
509
510 static ValNodePtr ReadMacroFile (CharPtr macro_file)
511 {
512
513 AsnIoPtr aip;
514 ValNodePtr action_list;
515
516 aip = AsnIoOpen (macro_file, "r");
517 if (aip == NULL) {
518 Message (MSG_POSTERR, "Unable to open %s", macro_file);
519 return NULL;
520 }
521 action_list = MacroActionListAsnRead (aip, NULL);
522 if (action_list == NULL) {
523 Message (MSG_POSTERR, "Unable to read action list from %s.", macro_file);
524 }
525 AsnIoClose (aip);
526 return action_list;
527 }
528
529
530 /* Args structure contains command-line arguments */
531
532 #define p_argInputPath 0
533 #define r_argOutputPath 1
534 #define i_argInputFile 2
535 #define o_argOutputFile 3
536 #define x_argSuffix 4
537 #define s_argOutSuffix 5
538 #define b_argInputBinary 6
539 #define e_argInputSeqEntry 7
540 #define d_argOutputBinary 8
541 #define m_argMacroFile 9
542
543 Args myargs [] = {
544 {"Path to Files", NULL, NULL, NULL,
545 TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
546 {"Path for Results", NULL, NULL, NULL,
547 TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
548 {"Single Input File", NULL, NULL, NULL,
549 TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
550 {"Single Output File", NULL, NULL, NULL,
551 TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
552 {"Suffix", ".sqn", NULL, NULL,
553 TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
554 {"Suffix for stripped files", "", NULL, NULL,
555 TRUE, 's', ARG_STRING, 0.0, 0, NULL},
556 {"Input is binary", "F", NULL, NULL,
557 TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
558 {"Input is Seq-entry", "F", NULL, NULL,
559 TRUE, 'e', ARG_BOOLEAN, 0.0, 0, NULL},
560 {"Output is binary", "F", NULL, NULL,
561 TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
562 {"Macro file", "NULL", NULL, NULL,
563 TRUE, 'm', ARG_FILE_IN, 0.0, 0, NULL}
564 };
565
566 Int2 Main(void)
567 {
568 Char app [64];
569 CharPtr directory;
570 CharPtr ptr;
571 Char sfx [32];
572 OutputStreamData osd;
573 InputStreamData isd;
574 AsnStreamData asd;
575 Int4 rval = 0;
576 CharPtr macro_file;
577 ValNodePtr action_list;
578
579 /* standard setup */
580
581 ErrSetFatalLevel (SEV_MAX);
582 ErrClearOptFlags (EO_SHOW_USERSTR);
583 UseLocalAsnloadDataAndErrMsg ();
584 ErrPathReset ();
585
586 /* finish resolving internal connections in ASN.1 parse tables */
587
588 if (! AllObjLoad ()) {
589 Message (MSG_FATAL, "AllObjLoad failed");
590 return 1;
591 }
592 if (! SubmitAsnLoad ()) {
593 Message (MSG_FATAL, "SubmitAsnLoad failed");
594 return 1;
595 }
596 if (! FeatDefSetLoad ()) {
597 Message (MSG_FATAL, "FeatDefSetLoad failed");
598 return 1;
599 }
600 if (! SeqCodeSetLoad ()) {
601 Message (MSG_FATAL, "SeqCodeSetLoad failed");
602 return 1;
603 }
604 if (! GeneticCodeTableLoad ()) {
605 Message (MSG_FATAL, "GeneticCodeTableLoad failed");
606 return 1;
607 }
608
609 SetUpAsnStreamData (&asd);
610
611 /* initialize OuputStreamData */
612 MemSet (&osd, 0, sizeof (osd));
613
614 /* initialize InputStreamData */
615 MemSet (&isd, 0, sizeof (isd));
616
617 /* process command line arguments */
618
619 sprintf (app, "asnmacro %s", ASNMACRO_APPLICATION);
620 if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
621 return 0;
622 }
623
624 macro_file = (CharPtr) myargs [m_argMacroFile].strvalue;
625 action_list = ReadMacroFile (macro_file);
626
627 directory = (CharPtr) myargs [p_argInputPath].strvalue;
628 osd.results_dir = (CharPtr) myargs [r_argOutputPath].strvalue;
629 if (StringHasNoText (osd.results_dir)) {
630 osd.results_dir = NULL;
631 }
632 osd.suffix = (CharPtr) myargs [x_argSuffix].strvalue;
633 osd.outsuffix = (CharPtr) myargs [s_argOutSuffix].strvalue;
634 osd.base = (CharPtr) myargs [i_argInputFile].strvalue;
635 osd.outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
636 if (StringHasNoText (osd.outfile)) {
637 osd.outfile = NULL;
638 }
639 osd.is_binary = (Boolean) myargs [d_argOutputBinary].intvalue;
640
641 if (osd.base == "stdin") {
642 osd.base = NULL;
643 }
644
645 /* if we don't have an output directory or an output file, and the user hasn't provided an
646 * output suffix, add a default.
647 */
648 if (osd.results_dir == NULL && osd.outfile == NULL && StringHasNoText (osd.outsuffix)) {
649 osd.outsuffix = ".processed";
650 }
651
652 isd.is_binary = (Boolean) myargs [b_argInputBinary].intvalue;
653 isd.is_seqentry = (Boolean) myargs [e_argInputSeqEntry].intvalue;
654 isd.directory = directory;
655 isd.base = osd.base;
656 isd.suffix = osd.suffix;
657
658 if (StringDoesHaveText (osd.outfile)) {
659 osd.aip = AsnIoOpen (osd.outfile, "w");
660 if (osd.aip == NULL) {
661 Message (MSG_FATAL, "Unable to open output file");
662 return 1;
663 }
664 } else {
665 if (StringHasNoText (osd.results_dir)) {
666 osd.results_dir = directory;
667 }
668 /* if we're putting the results in a separate directory, strip the directory name from the output base */
669 if (!StringHasNoText (osd.results_dir) && !StringHasNoText (osd.base)) {
670 #ifdef OS_MSWIN
671 ptr = StringRChr (osd.base, '\\');
672 #else
673 ptr = StringRChr (osd.base, '/');
674 #endif
675 if (ptr != NULL) {
676 osd.base = ptr + 1;
677 }
678 }
679 }
680
681
682 if (StringHasNoText(directory) && StringHasNoText(osd.base)) {
683 rval = ProcessStream (&isd, &osd, &asd, action_list);
684 } else if (StringDoesHaveText (osd.base)) {
685 ptr = StringRChr (osd.base, '.');
686 sfx[0] = '\0';
687 if (ptr != NULL) {
688 StringNCpy_0 (sfx, ptr, sizeof (sfx));
689 *ptr = '\0';
690 }
691 osd.suffix = sfx;
692 isd.suffix = sfx;
693 if (isd.is_binary) {
694 rval = ProcessStream (&isd, &osd, &asd, action_list);
695 } else {
696 rval = ProcessOneRecord (directory, &osd, action_list);
697 }
698 } else {
699
700 rval = FileRecurse (directory, &isd, &osd, &asd, action_list);
701 }
702
703 if (osd.aip != NULL) {
704 AsnIoFlush (osd.aip);
705 AsnIoClose (osd.aip);
706 }
707 return rval;
708 }
709 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |