NCBI C Toolkit Cross Reference

C/biostruc/mmdbFF.c


  1 /* $Id: mmdbFF.c,v 6.4 2000/06/20 20:47:03 lewisg Exp $
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  mmdbFF.c - file based retrieval interface
 27 * 
 28 * Modifications:  
 29 * --------------------------------------------------------------------------
 30 * Date     Name        Description of modification
 31 * -------  ----------  -----------------------------------------------------
 32 *
 33 * $Log: mmdbFF.c,v $
 34 * Revision 6.4  2000/06/20 20:47:03  lewisg
 35 * use gzip
 36 *
 37 * Revision 6.3  1999/09/08 18:43:54  zimmerma
 38 * Modified HashPDBCode with calls to "toupper" to ensure case-insensitivity
 39 *
 40 * Revision 6.2  1999/06/22 22:35:02  kimelman
 41 * function names were changed to original names
 42 * so we have now 3 version of access files again
 43 *
 44 * Revision 6.1  1999/05/11 21:31:07  kimelman
 45 * file based retrival funciton extracted here from mmdblocl.c
 46 * (function names slightly changed)
 47 *
 48 *
 49 */
 50 
 51 /* LoadMMDBIdx reads the local MMDB index "mmdb.idx" into memory */
 52 /* index format is
 53 3\n
 54 1 1ABC\n
 55 3 3INS\n
 56 234567 4GWA\n
 57 eof
 58 */
 59 
 60 #include <ncbi.h>
 61 #include <mmdbapi1.h>
 62 #include <mmdbdata.h>
 63 #include <mmdblocl.h>
 64 #include <assert.h>
 65 
 66 static Int4Ptr pI4Mmdbidx = NULL;
 67 static Uint4Ptr pU4Pdbidx = NULL;
 68 static long iMMDBSize = 0;
 69 static Char indexname[PATH_MAX];
 70 static Char database[PATH_MAX];
 71 static Int4Ptr pI4position = NULL;
 72 static Char gunzip[PATH_MAX];
 73 
 74 
 75 /* These functions are only required when working with flat files:
 76  * - Load/Remove the mmbd.idx file for cross referencing MMDBids with PDBids
 77  * - Define the hash function for PDB strings                                   */
 78 
 79 static Uint4 HashPDBCode(CharPtr pcPdb) {
 80   Uint4 iPdbhash;
 81   Uint4 c1, c2, c3, c4;
 82 
 83   if (!pcPdb) return 0;
 84   if (StringLen(pcPdb) != 4) return 0;
 85   c1 =  toupper(pcPdb[3]);
 86   c2 =  toupper(pcPdb[2]);
 87   c3 =  toupper(pcPdb[1]);
 88   c4 =  toupper(pcPdb[0]);
 89   iPdbhash =  c1 + (c2 << 8) + (c3 << 16) + (c4 << 24);
 90   
 91   return iPdbhash;
 92 }
 93 
 94 /* The index file is loaded as a list of (long) mmdbids correlated with a 
 95    list of unsigned long HASHED PDB codes                                       */
 96 
 97 static Boolean LoadMMDBIdx(CharPtr db,  CharPtr idx) {
 98   FILE *f;
 99   Char fullpath [PATH_MAX];
100   CharPtr ptr;
101   Char pcBuf[100];
102   CharPtr pcTest;
103   Char  pcMmdb[20];
104   Char  pcPdb[20];
105   Uint4Ptr     pU4Pdb;
106   Int4Ptr      pI4Mmdb;
107   long count = 0;
108   long mmdbid = 0;
109   int status;
110   char *msg;
111 
112 #define ERROR(err_msg) { msg = err_msg; goto errexit; }
113 
114   StringCpy(fullpath, db);
115   StringCat(fullpath, idx);
116   if ((f = FileOpen (fullpath, "r")) == NULL) 
117     ERROR("MMDBInit: Couldn't load MMDB index.");
118    
119   fscanf(f, "%ld", &iMMDBSize);
120   if ((iMMDBSize == 0) || (iMMDBSize > 100000))
121     ERROR ("Internal - LoadMMDBIdx() Failure 2;");
122 
123   pI4Mmdbidx = (Int4Ptr) MemNew((size_t) ((iMMDBSize)*sizeof(Int4)));
124   pU4Pdbidx = (Uint4Ptr) MemNew((size_t) ((iMMDBSize)*sizeof(Uint4)));
125 
126   if ((!pI4Mmdbidx) || (!pU4Pdbidx))
127     ERROR ("Internal - LoadMMDBIdx() Out of Memory;");
128    
129   pI4Mmdb = pI4Mmdbidx;
130   pU4Pdb = pU4Pdbidx;
131    
132   while ((status = fscanf(f, "%ld%s",  &mmdbid,  pcPdb)) != EOF) {
133 
134     StrUpper(pcPdb);
135     if ((mmdbid == 0) ||  (StringLen(pcPdb) > 4)) {
136       MMDBFini();
137       ERROR ("Internal - LoadMMDBIdx() Bad Index Values");
138     }
139     
140     *pI4Mmdb = mmdbid;
141     *pU4Pdb =  HashPDBCode(pcPdb);
142     pI4Mmdb++;
143     pU4Pdb++;
144     count++;
145     if (count > (iMMDBSize)) {
146       MMDBFini();
147       ERROR ("Internal - LoadMMDBIdx() Index count is wrong at top of file;");
148     }
149   }
150   FileClose(f);
151   return TRUE;
152 
153 #undef ERROR
154 errexit:
155   ErrPostEx(SEV_FATAL,0,0, msg);
156   return FALSE;
157 }
158 
159 static void CloseMMDBIdx() {
160   if (pI4Mmdbidx) MemFree(pI4Mmdbidx);
161   if (pU4Pdbidx) MemFree(pU4Pdbidx);
162   return;
163 }
164 
165 /* Given a Char PDBid, hash-encode it and retrieve the corresponding long MMDBid */
166 
167 DocUid MMDBEvalPDB (CharPtr str) 
168 {
169   register Uint4Ptr pU4Pdb = NULL;
170   register Int4Ptr pI4Mmdb = NULL;
171   register Uint4   iHash = 0;
172   long i = 0;
173   
174   ASSERT ((pI4Mmdbidx != NULL) && (pU4Pdbidx != NULL));
175   pI4Mmdb = pI4Mmdbidx;
176   pU4Pdb = pU4Pdbidx;
177   iHash = HashPDBCode(str);
178 
179   while (i++ < iMMDBSize && iHash != *pU4Pdb) {
180     pU4Pdb++;
181     pI4Mmdb++;
182   } 
183   if (iHash == *pU4Pdb)
184     return (DocUid) *pI4Mmdb;
185   return (DocUid) 0;
186 }
187 
188 /*****************************FF_BiostrucGet******************************/
189 BiostrucPtr MMDBBiostrucGet (DocUid uid, Int4 mdlLvl, Int4 maxModels)
190 {
191   Char path[PATH_MAX], compath[PATH_MAX];
192   AsnIoPtr    aip = NULL;
193   FILE *pipe;
194   BiostrucPtr pbs = NULL;
195 
196   sprintf(path, "%s%ld.val", database, (long) uid);
197   if (FileLength(path) >0)
198     aip = AsnIoOpen(path, "rb");
199   else
200     {
201       if (gunzip[0] == 0) {
202         ErrPostEx(SEV_FATAL,0,0, "MMDBBiostrucGet_files failed: gunzip path missing.");
203         return NULL;
204       }
205       sprintf(path, "%s%ld.val.gz", database, (long) uid);
206       if (FileLength(path) <=0)
207         return NULL;
208       
209       sprintf(compath,"%s -c -d %s ", gunzip, path);
210       pipe=popen(compath,"rb");
211       if (pipe == 0)
212         {
213           ErrPostEx(SEV_FATAL,0,0, "MMDBBiostrucGet failed: Can't find gunzip in path");
214           return NULL;
215         }
216       aip = AsnIoNew(ASNIO_BIN_IN, pipe , NULL, NULL, NULL);
217     }
218   if (!aip)
219     return NULL;
220 
221   pbs = BiostrucAsnGet(aip, NULL,  mdlLvl,  maxModels);
222   AsnIoClose (aip);
223   return pbs;
224 }
225 
226 Boolean MMDBInit (void) 
227 {
228   char *msg;
229   char ndxPath[PATH_MAX];
230   FILE *f;
231   
232 #define ERROR(err_msg) { msg = err_msg; goto errexit; }
233 
234   GetAppParam("mmdb", "MMDB", "Index"   , "", indexname, sizeof(indexname));
235   GetAppParam("mmdb", "MMDB", "Database", "", database, sizeof(database));
236   if (!(database[0] && indexname[0]))
237     return FALSE ;
238   
239   /* let's check if index file really there */
240     
241   assert(sizeof(ndxPath) > strlen(database) + strlen(indexname));
242   sprintf(ndxPath,"%s%s",database,indexname);
243   
244   if ((f = fopen(ndxPath,"r")) == NULL)
245     ERROR("MMDBInit failed: incorrect mmdb.idx path.");
246   fclose(f);
247   if(FALSE==LoadMMDBIdx(database,  indexname))
248     return FALSE;
249   GetAppParam("mmdb", "MMDBSRV", "Gunzip", "", gunzip, sizeof(gunzip));
250   if(gunzip[0])
251     {
252       f = fopen(gunzip,"r");
253       if(f)
254         fclose(f);
255       else
256         gunzip[0]=0;
257     }
258   return TRUE;
259   
260 #undef ERROR
261 
262 errexit:
263   ErrPostEx(SEV_ERROR,0,0, msg);
264   return FALSE;
265 }
266 
267 void LIBCALL
268 MMDBFini (void)
269 {
270   CloseMMDBIdx();
271 }
272 
273 CharPtr LIBCALL
274 MMDB_configuration(void)
275 {
276   return "Version:\t$Id: mmdbFF.c,v 6.4 2000/06/20 20:47:03 lewisg Exp $\nConfiguration: Flat Files" ;
277 }
278 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.