|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/biostruc/mmdbFF.c |
source navigation diff markup identifier search freetext search file search |
1 /* $Id: mmdbFF.c,v 6.4 2000/06/20 20:47:03 lewisg Exp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: mmdbFF.c - file based retrieval interface
27 *
28 * Modifications:
29 * --------------------------------------------------------------------------
30 * Date Name Description of modification
31 * ------- ---------- -----------------------------------------------------
32 *
33 * $Log: mmdbFF.c,v $
34 * Revision 6.4 2000/06/20 20:47:03 lewisg
35 * use gzip
36 *
37 * Revision 6.3 1999/09/08 18:43:54 zimmerma
38 * Modified HashPDBCode with calls to "toupper" to ensure case-insensitivity
39 *
40 * Revision 6.2 1999/06/22 22:35:02 kimelman
41 * function names were changed to original names
42 * so we have now 3 version of access files again
43 *
44 * Revision 6.1 1999/05/11 21:31:07 kimelman
45 * file based retrival funciton extracted here from mmdblocl.c
46 * (function names slightly changed)
47 *
48 *
49 */
50
51 /* LoadMMDBIdx reads the local MMDB index "mmdb.idx" into memory */
52 /* index format is
53 3\n
54 1 1ABC\n
55 3 3INS\n
56 234567 4GWA\n
57 eof
58 */
59
60 #include <ncbi.h>
61 #include <mmdbapi1.h>
62 #include <mmdbdata.h>
63 #include <mmdblocl.h>
64 #include <assert.h>
65
66 static Int4Ptr pI4Mmdbidx = NULL;
67 static Uint4Ptr pU4Pdbidx = NULL;
68 static long iMMDBSize = 0;
69 static Char indexname[PATH_MAX];
70 static Char database[PATH_MAX];
71 static Int4Ptr pI4position = NULL;
72 static Char gunzip[PATH_MAX];
73
74
75 /* These functions are only required when working with flat files:
76 * - Load/Remove the mmbd.idx file for cross referencing MMDBids with PDBids
77 * - Define the hash function for PDB strings */
78
79 static Uint4 HashPDBCode(CharPtr pcPdb) {
80 Uint4 iPdbhash;
81 Uint4 c1, c2, c3, c4;
82
83 if (!pcPdb) return 0;
84 if (StringLen(pcPdb) != 4) return 0;
85 c1 = toupper(pcPdb[3]);
86 c2 = toupper(pcPdb[2]);
87 c3 = toupper(pcPdb[1]);
88 c4 = toupper(pcPdb[0]);
89 iPdbhash = c1 + (c2 << 8) + (c3 << 16) + (c4 << 24);
90
91 return iPdbhash;
92 }
93
94 /* The index file is loaded as a list of (long) mmdbids correlated with a
95 list of unsigned long HASHED PDB codes */
96
97 static Boolean LoadMMDBIdx(CharPtr db, CharPtr idx) {
98 FILE *f;
99 Char fullpath [PATH_MAX];
100 CharPtr ptr;
101 Char pcBuf[100];
102 CharPtr pcTest;
103 Char pcMmdb[20];
104 Char pcPdb[20];
105 Uint4Ptr pU4Pdb;
106 Int4Ptr pI4Mmdb;
107 long count = 0;
108 long mmdbid = 0;
109 int status;
110 char *msg;
111
112 #define ERROR(err_msg) { msg = err_msg; goto errexit; }
113
114 StringCpy(fullpath, db);
115 StringCat(fullpath, idx);
116 if ((f = FileOpen (fullpath, "r")) == NULL)
117 ERROR("MMDBInit: Couldn't load MMDB index.");
118
119 fscanf(f, "%ld", &iMMDBSize);
120 if ((iMMDBSize == 0) || (iMMDBSize > 100000))
121 ERROR ("Internal - LoadMMDBIdx() Failure 2;");
122
123 pI4Mmdbidx = (Int4Ptr) MemNew((size_t) ((iMMDBSize)*sizeof(Int4)));
124 pU4Pdbidx = (Uint4Ptr) MemNew((size_t) ((iMMDBSize)*sizeof(Uint4)));
125
126 if ((!pI4Mmdbidx) || (!pU4Pdbidx))
127 ERROR ("Internal - LoadMMDBIdx() Out of Memory;");
128
129 pI4Mmdb = pI4Mmdbidx;
130 pU4Pdb = pU4Pdbidx;
131
132 while ((status = fscanf(f, "%ld%s", &mmdbid, pcPdb)) != EOF) {
133
134 StrUpper(pcPdb);
135 if ((mmdbid == 0) || (StringLen(pcPdb) > 4)) {
136 MMDBFini();
137 ERROR ("Internal - LoadMMDBIdx() Bad Index Values");
138 }
139
140 *pI4Mmdb = mmdbid;
141 *pU4Pdb = HashPDBCode(pcPdb);
142 pI4Mmdb++;
143 pU4Pdb++;
144 count++;
145 if (count > (iMMDBSize)) {
146 MMDBFini();
147 ERROR ("Internal - LoadMMDBIdx() Index count is wrong at top of file;");
148 }
149 }
150 FileClose(f);
151 return TRUE;
152
153 #undef ERROR
154 errexit:
155 ErrPostEx(SEV_FATAL,0,0, msg);
156 return FALSE;
157 }
158
159 static void CloseMMDBIdx() {
160 if (pI4Mmdbidx) MemFree(pI4Mmdbidx);
161 if (pU4Pdbidx) MemFree(pU4Pdbidx);
162 return;
163 }
164
165 /* Given a Char PDBid, hash-encode it and retrieve the corresponding long MMDBid */
166
167 DocUid MMDBEvalPDB (CharPtr str)
168 {
169 register Uint4Ptr pU4Pdb = NULL;
170 register Int4Ptr pI4Mmdb = NULL;
171 register Uint4 iHash = 0;
172 long i = 0;
173
174 ASSERT ((pI4Mmdbidx != NULL) && (pU4Pdbidx != NULL));
175 pI4Mmdb = pI4Mmdbidx;
176 pU4Pdb = pU4Pdbidx;
177 iHash = HashPDBCode(str);
178
179 while (i++ < iMMDBSize && iHash != *pU4Pdb) {
180 pU4Pdb++;
181 pI4Mmdb++;
182 }
183 if (iHash == *pU4Pdb)
184 return (DocUid) *pI4Mmdb;
185 return (DocUid) 0;
186 }
187
188 /*****************************FF_BiostrucGet******************************/
189 BiostrucPtr MMDBBiostrucGet (DocUid uid, Int4 mdlLvl, Int4 maxModels)
190 {
191 Char path[PATH_MAX], compath[PATH_MAX];
192 AsnIoPtr aip = NULL;
193 FILE *pipe;
194 BiostrucPtr pbs = NULL;
195
196 sprintf(path, "%s%ld.val", database, (long) uid);
197 if (FileLength(path) >0)
198 aip = AsnIoOpen(path, "rb");
199 else
200 {
201 if (gunzip[0] == 0) {
202 ErrPostEx(SEV_FATAL,0,0, "MMDBBiostrucGet_files failed: gunzip path missing.");
203 return NULL;
204 }
205 sprintf(path, "%s%ld.val.gz", database, (long) uid);
206 if (FileLength(path) <=0)
207 return NULL;
208
209 sprintf(compath,"%s -c -d %s ", gunzip, path);
210 pipe=popen(compath,"rb");
211 if (pipe == 0)
212 {
213 ErrPostEx(SEV_FATAL,0,0, "MMDBBiostrucGet failed: Can't find gunzip in path");
214 return NULL;
215 }
216 aip = AsnIoNew(ASNIO_BIN_IN, pipe , NULL, NULL, NULL);
217 }
218 if (!aip)
219 return NULL;
220
221 pbs = BiostrucAsnGet(aip, NULL, mdlLvl, maxModels);
222 AsnIoClose (aip);
223 return pbs;
224 }
225
226 Boolean MMDBInit (void)
227 {
228 char *msg;
229 char ndxPath[PATH_MAX];
230 FILE *f;
231
232 #define ERROR(err_msg) { msg = err_msg; goto errexit; }
233
234 GetAppParam("mmdb", "MMDB", "Index" , "", indexname, sizeof(indexname));
235 GetAppParam("mmdb", "MMDB", "Database", "", database, sizeof(database));
236 if (!(database[0] && indexname[0]))
237 return FALSE ;
238
239 /* let's check if index file really there */
240
241 assert(sizeof(ndxPath) > strlen(database) + strlen(indexname));
242 sprintf(ndxPath,"%s%s",database,indexname);
243
244 if ((f = fopen(ndxPath,"r")) == NULL)
245 ERROR("MMDBInit failed: incorrect mmdb.idx path.");
246 fclose(f);
247 if(FALSE==LoadMMDBIdx(database, indexname))
248 return FALSE;
249 GetAppParam("mmdb", "MMDBSRV", "Gunzip", "", gunzip, sizeof(gunzip));
250 if(gunzip[0])
251 {
252 f = fopen(gunzip,"r");
253 if(f)
254 fclose(f);
255 else
256 gunzip[0]=0;
257 }
258 return TRUE;
259
260 #undef ERROR
261
262 errexit:
263 ErrPostEx(SEV_ERROR,0,0, msg);
264 return FALSE;
265 }
266
267 void LIBCALL
268 MMDBFini (void)
269 {
270 CloseMMDBIdx();
271 }
272
273 CharPtr LIBCALL
274 MMDB_configuration(void)
275 {
276 return "Version:\t$Id: mmdbFF.c,v 6.4 2000/06/20 20:47:03 lewisg Exp $\nConfiguration: Flat Files" ;
277 }
278 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |