NCBI C Toolkit Cross Reference

C/demo/asn2idx.c


  1 /*   asn2idx.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2idx.c
 27 *
 28 * Author:  Jonathan Kans
 29 *
 30 * Version Creation Date:   8/2/04
 31 *
 32 * $Revision: 1.5 $
 33 *
 34 * File Description:
 35 *
 36 *   Generates accession/file offset indices for Bioseq-set release files
 37 *
 38 * Modifications:  
 39 * --------------------------------------------------------------------------
 40 * Date     Name        Description of modification
 41 * -------  ----------  -----------------------------------------------------
 42 *
 43 *
 44 * ==========================================================================
 45 */
 46 
 47 #include <ncbi.h>
 48 #include <objall.h>
 49 #include <objsset.h>
 50 #include <objsub.h>
 51 #include <objfdef.h>
 52 #include <sqnutils.h>
 53 #include <lsqfetch.h>
 54 
 55 static Boolean NotInFilter (
 56   CharPtr filename,
 57   ValNodePtr exclude
 58 )
 59 
 60 {
 61   CharPtr     str;
 62   ValNodePtr  vnp;
 63 
 64   for (vnp = exclude; vnp != NULL; vnp = vnp->next) {
 65     str = (CharPtr) vnp->data.ptrvalue;
 66     if (StringHasNoText (str)) continue;
 67     if (StringStr (filename, str) != NULL) return FALSE;
 68   }
 69   return TRUE;
 70 }
 71 
 72 static void FileRecurse (
 73   CharPtr directory,
 74   CharPtr results,
 75   CharPtr subdir,
 76   CharPtr subfile,
 77   ValNodePtr exclude,
 78   Boolean binary,
 79   Boolean dorecurse
 80 )
 81 
 82 {
 83   Char        path [PATH_MAX];
 84   CharPtr     str;
 85   ValNodePtr  head, vnp;
 86 
 87   /* get list of all files in source directory */
 88 
 89   head = DirCatalog (directory);
 90 
 91   for (vnp = head; vnp != NULL; vnp = vnp->next) {
 92     if (vnp->choice == 0) {
 93       if (StringHasNoText (subdir) || StringStr (directory, subdir) != NULL) {
 94         str = (CharPtr) vnp->data.ptrvalue;
 95         if (! StringHasNoText (str)) {
 96 
 97           /* does filename have desired substring? */
 98 
 99           if (StringHasNoText (subfile) || StringStr (str, subfile) != NULL) {
100 
101             if (NotInFilter (str, exclude)) {
102 
103               /* process file that has desired suffix (usually .aso) */
104 
105               StringNCpy_0 (path, directory, sizeof (path));
106               FileBuildPath (path, NULL, str);
107 
108               CreateAsnIndex (path, results, binary);
109             }
110           }
111         }
112       }
113     } else if (vnp->choice == 1 && dorecurse) {
114       StringNCpy_0 (path, directory, sizeof (path));
115       str = (CharPtr) vnp->data.ptrvalue;
116       FileBuildPath (path, str, NULL);
117       FileRecurse (path, results, subdir, subfile, exclude, binary, dorecurse);
118     }
119   }
120 
121   /* clean up file list */
122 
123   ValNodeFreeData (head);
124 }
125 
126 static ValNodePtr ParseStringByCommas (
127   CharPtr str
128 )
129 
130 {
131   Char        ch;
132   ValNodePtr  head = NULL;
133   CharPtr     last, ptr, tmp;
134 
135   if (StringHasNoText (str)) return NULL;
136 
137   tmp = StringSave (str);
138   last = tmp;
139   ptr = last;
140   ch = *ptr;
141   while (ch != '\0') {
142     if (ch == ',') {
143       *ptr = '\0';
144       if (! StringHasNoText (last)) {
145         TrimSpacesAroundString (last);
146         ValNodeCopyStr (&head, 0, last);
147       }
148       ptr++;
149       last = ptr;
150       ch = *ptr;
151     } else {
152       ptr++;
153       ch = *ptr;
154     }
155   }
156   if (! StringHasNoText (last)) {
157     TrimSpacesAroundString (last);
158     ValNodeCopyStr (&head, 0, last);
159   }
160   MemFree (tmp);
161 
162   return head;
163 }
164 
165 /* Args structure contains optional command-line arguments */
166 
167 #define p_argInputPath    0
168 #define r_argOutputPath   1
169 #define d_argSubDirName   2
170 #define x_argFileSelect   3
171 #define f_argFilter       4
172 #define b_argBinary       5
173 #define t_argRecurse      6
174 
175 Args myargs [] = {
176   {"Path to Files", NULL, NULL, NULL,
177     FALSE, 'p', ARG_STRING, 0.0, 0, NULL},
178   {"Path for Results", NULL, NULL, NULL,
179     TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
180   {"Required Subdirectory", NULL, NULL, NULL,
181     TRUE, 'd', ARG_STRING, 0.0, 0, NULL},
182   {"File Selection Substring", ".aso", NULL, NULL,
183     TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
184   {"Filter", "gbcon,gbest,gbgss,gbhtg,gbsts", NULL, NULL,
185     FALSE, 'f', ARG_STRING, 0.0, 0, NULL},
186   {"Bioseq-sets are Binary", "F", NULL, NULL,
187     TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
188   {"Recurse", "F", NULL, NULL,
189     TRUE, 't', ARG_BOOLEAN, 0.0, 0, NULL},
190 };
191 
192 Int2 Main (void)
193 
194 {
195   Boolean     binary, dorecurse;
196   CharPtr     directory, filter, results, subdir, subfile;
197   ValNodePtr  exclude = NULL;
198 
199   /* standard setup */
200 
201   ErrSetFatalLevel (SEV_MAX);
202   ErrClearOptFlags (EO_SHOW_USERSTR);
203   UseLocalAsnloadDataAndErrMsg ();
204   ErrPathReset ();
205 
206   if (! AllObjLoad ()) {
207     Message (MSG_FATAL, "AllObjLoad failed");
208     return 1;
209   }
210   if (! SubmitAsnLoad ()) {
211     Message (MSG_FATAL, "SubmitAsnLoad failed");
212     return 1;
213   }
214   if (! FeatDefSetLoad ()) {
215     Message (MSG_FATAL, "FeatDefSetLoad failed");
216     return 1;
217   }
218   if (! SeqCodeSetLoad ()) {
219     Message (MSG_FATAL, "SeqCodeSetLoad failed");
220     return 1;
221   }
222   if (! GeneticCodeTableLoad ()) {
223     Message (MSG_FATAL, "GeneticCodeTableLoad failed");
224     return 1;
225   }
226 
227   /* process command line arguments */
228 
229   if (! GetArgs ("asn2idx", sizeof (myargs) / sizeof (Args), myargs)) {
230     return 0;
231   }
232 
233   /* additional setup modifications */
234 
235   directory = (CharPtr) myargs [p_argInputPath].strvalue;
236   results = (CharPtr) myargs [r_argOutputPath].strvalue;
237   if (StringHasNoText (results)) {
238     results = NULL;
239   }
240   subdir = (CharPtr) myargs [d_argSubDirName].strvalue;
241   subfile = (CharPtr) myargs [x_argFileSelect].strvalue;
242   filter = (CharPtr) myargs [f_argFilter].strvalue;
243   binary = (Boolean) myargs [b_argBinary].intvalue;
244   dorecurse = (Boolean) myargs [t_argRecurse].intvalue;
245 
246   if (StringHasNoText (directory)) {
247     Message (MSG_FATAL, "Required path to files not specified");
248     return 1;
249   }
250 
251   exclude = ParseStringByCommas (filter);
252 
253   /* index recursively within specified directory */
254 
255   FileRecurse (directory, results, subdir, subfile, exclude, binary, dorecurse);
256 
257   /* now create master index combining individual indices */
258 
259   CreateMasterAsnIndex (directory, results);
260 
261   ValNodeFreeData (exclude);
262 
263   return 0;
264 }
265 
266 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.