|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/demo/asn2idx.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2idx.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2idx.c
27 *
28 * Author: Jonathan Kans
29 *
30 * Version Creation Date: 8/2/04
31 *
32 * $Revision: 1.5 $
33 *
34 * File Description:
35 *
36 * Generates accession/file offset indices for Bioseq-set release files
37 *
38 * Modifications:
39 * --------------------------------------------------------------------------
40 * Date Name Description of modification
41 * ------- ---------- -----------------------------------------------------
42 *
43 *
44 * ==========================================================================
45 */
46
47 #include <ncbi.h>
48 #include <objall.h>
49 #include <objsset.h>
50 #include <objsub.h>
51 #include <objfdef.h>
52 #include <sqnutils.h>
53 #include <lsqfetch.h>
54
55 static Boolean NotInFilter (
56 CharPtr filename,
57 ValNodePtr exclude
58 )
59
60 {
61 CharPtr str;
62 ValNodePtr vnp;
63
64 for (vnp = exclude; vnp != NULL; vnp = vnp->next) {
65 str = (CharPtr) vnp->data.ptrvalue;
66 if (StringHasNoText (str)) continue;
67 if (StringStr (filename, str) != NULL) return FALSE;
68 }
69 return TRUE;
70 }
71
72 static void FileRecurse (
73 CharPtr directory,
74 CharPtr results,
75 CharPtr subdir,
76 CharPtr subfile,
77 ValNodePtr exclude,
78 Boolean binary,
79 Boolean dorecurse
80 )
81
82 {
83 Char path [PATH_MAX];
84 CharPtr str;
85 ValNodePtr head, vnp;
86
87 /* get list of all files in source directory */
88
89 head = DirCatalog (directory);
90
91 for (vnp = head; vnp != NULL; vnp = vnp->next) {
92 if (vnp->choice == 0) {
93 if (StringHasNoText (subdir) || StringStr (directory, subdir) != NULL) {
94 str = (CharPtr) vnp->data.ptrvalue;
95 if (! StringHasNoText (str)) {
96
97 /* does filename have desired substring? */
98
99 if (StringHasNoText (subfile) || StringStr (str, subfile) != NULL) {
100
101 if (NotInFilter (str, exclude)) {
102
103 /* process file that has desired suffix (usually .aso) */
104
105 StringNCpy_0 (path, directory, sizeof (path));
106 FileBuildPath (path, NULL, str);
107
108 CreateAsnIndex (path, results, binary);
109 }
110 }
111 }
112 }
113 } else if (vnp->choice == 1 && dorecurse) {
114 StringNCpy_0 (path, directory, sizeof (path));
115 str = (CharPtr) vnp->data.ptrvalue;
116 FileBuildPath (path, str, NULL);
117 FileRecurse (path, results, subdir, subfile, exclude, binary, dorecurse);
118 }
119 }
120
121 /* clean up file list */
122
123 ValNodeFreeData (head);
124 }
125
126 static ValNodePtr ParseStringByCommas (
127 CharPtr str
128 )
129
130 {
131 Char ch;
132 ValNodePtr head = NULL;
133 CharPtr last, ptr, tmp;
134
135 if (StringHasNoText (str)) return NULL;
136
137 tmp = StringSave (str);
138 last = tmp;
139 ptr = last;
140 ch = *ptr;
141 while (ch != '\0') {
142 if (ch == ',') {
143 *ptr = '\0';
144 if (! StringHasNoText (last)) {
145 TrimSpacesAroundString (last);
146 ValNodeCopyStr (&head, 0, last);
147 }
148 ptr++;
149 last = ptr;
150 ch = *ptr;
151 } else {
152 ptr++;
153 ch = *ptr;
154 }
155 }
156 if (! StringHasNoText (last)) {
157 TrimSpacesAroundString (last);
158 ValNodeCopyStr (&head, 0, last);
159 }
160 MemFree (tmp);
161
162 return head;
163 }
164
165 /* Args structure contains optional command-line arguments */
166
167 #define p_argInputPath 0
168 #define r_argOutputPath 1
169 #define d_argSubDirName 2
170 #define x_argFileSelect 3
171 #define f_argFilter 4
172 #define b_argBinary 5
173 #define t_argRecurse 6
174
175 Args myargs [] = {
176 {"Path to Files", NULL, NULL, NULL,
177 FALSE, 'p', ARG_STRING, 0.0, 0, NULL},
178 {"Path for Results", NULL, NULL, NULL,
179 TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
180 {"Required Subdirectory", NULL, NULL, NULL,
181 TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
182 {"File Selection Substring", ".aso", NULL, NULL,
183 TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
184 {"Filter", "gbcon,gbest,gbgss,gbhtg,gbsts", NULL, NULL,
185 FALSE, 'f', ARG_STRING, 0.0, 0, NULL},
186 {"Bioseq-sets are Binary", "F", NULL, NULL,
187 TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
188 {"Recurse", "F", NULL, NULL,
189 TRUE, 't', ARG_BOOLEAN, 0.0, 0, NULL},
190 };
191
192 Int2 Main (void)
193
194 {
195 Boolean binary, dorecurse;
196 CharPtr directory, filter, results, subdir, subfile;
197 ValNodePtr exclude = NULL;
198
199 /* standard setup */
200
201 ErrSetFatalLevel (SEV_MAX);
202 ErrClearOptFlags (EO_SHOW_USERSTR);
203 UseLocalAsnloadDataAndErrMsg ();
204 ErrPathReset ();
205
206 if (! AllObjLoad ()) {
207 Message (MSG_FATAL, "AllObjLoad failed");
208 return 1;
209 }
210 if (! SubmitAsnLoad ()) {
211 Message (MSG_FATAL, "SubmitAsnLoad failed");
212 return 1;
213 }
214 if (! FeatDefSetLoad ()) {
215 Message (MSG_FATAL, "FeatDefSetLoad failed");
216 return 1;
217 }
218 if (! SeqCodeSetLoad ()) {
219 Message (MSG_FATAL, "SeqCodeSetLoad failed");
220 return 1;
221 }
222 if (! GeneticCodeTableLoad ()) {
223 Message (MSG_FATAL, "GeneticCodeTableLoad failed");
224 return 1;
225 }
226
227 /* process command line arguments */
228
229 if (! GetArgs ("asn2idx", sizeof (myargs) / sizeof (Args), myargs)) {
230 return 0;
231 }
232
233 /* additional setup modifications */
234
235 directory = (CharPtr) myargs [p_argInputPath].strvalue;
236 results = (CharPtr) myargs [r_argOutputPath].strvalue;
237 if (StringHasNoText (results)) {
238 results = NULL;
239 }
240 subdir = (CharPtr) myargs [d_argSubDirName].strvalue;
241 subfile = (CharPtr) myargs [x_argFileSelect].strvalue;
242 filter = (CharPtr) myargs [f_argFilter].strvalue;
243 binary = (Boolean) myargs [b_argBinary].intvalue;
244 dorecurse = (Boolean) myargs [t_argRecurse].intvalue;
245
246 if (StringHasNoText (directory)) {
247 Message (MSG_FATAL, "Required path to files not specified");
248 return 1;
249 }
250
251 exclude = ParseStringByCommas (filter);
252
253 /* index recursively within specified directory */
254
255 FileRecurse (directory, results, subdir, subfile, exclude, binary, dorecurse);
256
257 /* now create master index combining individual indices */
258
259 CreateMasterAsnIndex (directory, results);
260
261 ValNodeFreeData (exclude);
262
263 return 0;
264 }
265
266 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |