NCBI C Toolkit Cross Reference

C/cdromlib/casn.c


  1 /* casn.c
  2  * ===========================================================================
  3  *
  4  *                            PUBLIC DOMAIN NOTICE                          
  5  *               National Center for Biotechnology Information
  6  *                                                                          
  7  *  This software/database is a "United States Government Work" under the   
  8  *  terms of the United States Copyright Act.  It was written as part of    
  9  *  the author's official duties as a United States Government employee and 
 10  *  thus cannot be copyrighted.  This software/database is freely available 
 11  *  to the public for use. The National Library of Medicine and the U.S.    
 12  *  Government have not placed any restriction on its use or reproduction.  
 13  *                                                                          
 14  *  Although all reasonable efforts have been taken to ensure the accuracy  
 15  *  and reliability of the software and data, the NLM and the U.S.          
 16  *  Government do not and cannot warrant the performance or results that    
 17  *  may be obtained by using this software or data. The NLM and the U.S.    
 18  *  Government disclaim all warranties, express or implied, including       
 19  *  warranties of performance, merchantability or fitness for any particular
 20  *  purpose.                                                                
 21  *                                                                          
 22  *  Please cite the author in any work or product based on this material.   
 23  *
 24  * ===========================================================================
 25  *
 26  * RCS $Id: casn.c,v 6.1 2001/04/27 18:00:30 juran Exp $
 27  *
 28  * Author:  Greg Schuler
 29  *
 30  * Version Creation Date: 9/23/92
 31  *
 32  * File Description:  functions to decompress a compressed ASN,1 (CASN) file.
 33  *
 34  * Modifications:  
 35  * --------------------------------------------------------------------------
 36  * Date     Name        Description of modification
 37  * -------  ----------  -----------------------------------------------------
 38  * 04-21-93 Schuler     CASN_ReadBuff declared as LIBCALLBACK
 39  * 06-28-93 Schuler     New function:  CASN_Seek().
 40  * 06-17-94 Schuler     Modified to support new file format that is to debut 
 41  *                      in Entrez release 13.0
 42  * 07-20-94 Schuler     Fixed bug in CASN_Open (incorrect doc_type)
 43  * 09-07-94 Schuler     Changed implementation of rd_string (one byte length)
 44  * 10-05-94 Schuler     Added CASN_NextBiostruc
 45  *
 46  * 05-19-95 Schuler     Added rcs Log directive for automatic insertion of
 47  *                      modification comments.
 48  *
 49  * Revision $Log: casn.c,v $
 50  * Revision Revision 6.1  2001/04/27 18:00:30  juran
 51  * Revision Warnings.
 52  * Revision
 53  * Revision Revision 6.0  1997/08/25 18:12:41  madden
 54  * Revision Revision changed to 6.0
 55  * Revision
 56  * Revision Revision 5.2  1997/06/26 21:55:21  vakatov
 57  * Revision [PC] DLL'd "ncbicdr.lib", "ncbiacc.lib", "ncbinacc.lib" and "ncbicacc.lib"
 58  * Revision
 59  * Revision Revision 5.1  1997/05/29 18:17:11  savchuk
 60  * Revision CASN_NextSeqEntry() function is now seeking to the end of compressed ASN
 61  * Revision
 62  * Revision 5.0  1996/05/28  13:55:34  ostell
 63  * Set to revision 5.0
 64  *
 65  * Revision 4.0  1995/07/26  13:50:32  ostell
 66  * force revision to 4.0
 67  *
 68  * Revision 2.11  1995/06/23  16:02:43  kans
 69  * support for accmmdbs.c stub to resolve symbols without MMDB link
 70  *
 71  * Revision 2.10  1995/06/23  13:22:25  kans
 72  * Biostruc_CD_supported symbol needed for local MMDB access
 73  *
 74  * Revision 2.9  1995/05/16  14:36:20  schuler
 75  * Automatic comment insertion enabled
 76  *
 77  *
 78  * ==========================================================================
 79  */
 80 
 81 #define REVISION_STR "$Revision: 6.1 $"
 82 
 83 
 84 #include <asn.h>
 85 #include <casn.h>
 86 
 87 struct casn_ioblock
 88 {
 89         short      rel_major;
 90         short      rel_minor;
 91         int        magic;
 92         int        format;
 93         int        compr;
 94         long       bytes;
 95         CASN_Type  doc_type;
 96         long       doc_count;
 97         long       uid_min;
 98         long       uid_max;
 99         int        huff_count;
100         short     *huff_left;
101         short     *huff_right;
102         unsigned   byte;
103         unsigned   mask;
104         FILE      *fd;
105         AsnIo     *aio;
106         AsnModule *amp;
107         AsnType   *atp;
108 };
109 
110 
111 #define CURRENT_FILEFORMAT      2
112 #define MAGIC_FILEFORMAT        4541
113 
114 #define MAGIC_IOBLOCK           3958
115 #define Handle_IsValid(x)  ((x) && ((x)->magic == MAGIC_IOBLOCK))
116 
117 enum CASN_Compr { CASN_ComprNone, CASN_ComprHuff };
118 
119 static char * _asn_type[] = { "", "Medline-entry", "Seq-entry" };
120 static char * file_emsg = "Unrecognized compressed file format [%s]\n";
121 
122 static int compr_none_read (CASN_Handle handle, char *buff, int count);
123 static int compr_huff_read (CASN_Handle handle, char *buff, int count);
124 
125 
126 static Int2 LIBCALLBACK CASN_ReadBuff(Pointer param, CharPtr buffer, Uint2 count);
127 
128 static char * rd_string (FILE *fd);
129 static unsigned long rd_integer (FILE *fd, int bytes);
130 #define RD_SHORT(f)   (short)rd_integer(fd,2)
131 #define RD_USHORT(f)  (unsigned short)rd_integer(fd,2)
132 #define RD_INT(f)     (int)rd_integer(fd,2)
133 #define RD_UINT(f)    (unsigned int)rd_integer(fd,2)
134 #define RD_LONG(f)    (long)rd_integer(fd,4)
135 #define RD_ULONG(f)   rd_integer(fd,4)
136 
137 
138 
139 /* --------------- High-Level Functions --------------- */
140 
141 NLM_EXTERN CASN_Handle LIBCALL CASN_Open (char *fname)
142 {
143         int     i, j;
144         CASN_Handle handle;
145         FILE *fd =NULL;
146         int     doc_type;
147         long    l1, l2, l3;
148         int     huff_count;
149         short rel_major =0, rel_minor =0;
150 
151         if (!(fd = FileOpen(fname,"rb")))
152         {
153                 ErrPostEx(SEV_ERROR,CASN_ErrFileOpen,0,"Unable to open file %s\n",fname);
154                 return NULL;
155         }
156 
157         /* check to see that the file is recognizable */
158         i = RD_SHORT(fd);
159         j = RD_SHORT(fd);
160         if (i != MAGIC_FILEFORMAT  ||  j > CURRENT_FILEFORMAT)
161         {
162                 FileClose(fd);
163                 ErrPostEx(SEV_ERROR,CASN_ErrFileFormat,0,file_emsg,fname);
164                 return NULL;
165         }
166         if (j == CURRENT_FILEFORMAT)
167         {
168                 rel_major = RD_SHORT(fd);
169                 rel_minor = RD_SHORT(fd);
170         }
171 
172         l1 = RD_LONG(fd);
173         l2 = RD_LONG(fd);
174         l3 = RD_LONG(fd);
175 
176         doc_type = RD_SHORT(fd);
177         if (j<CURRENT_FILEFORMAT)
178                 doc_type = -doc_type;
179         huff_count = RD_SHORT(fd);
180 
181         if (!(handle = CASN_New((CASN_Type)doc_type,huff_count)))
182         {
183                 FileClose(fd);
184                 return NULL;
185         }
186         handle->format = j;
187         handle->huff_count = huff_count;
188         for (i=0; i<huff_count; ++i)
189                 handle->huff_left[i] = RD_SHORT(fd);
190         for (i=0; i<huff_count; ++i)
191                 handle->huff_right[i] = RD_SHORT(fd);
192 
193         if (!(handle->aio = AsnIoNew(ASNIO_BIN_IN,fd,handle,CASN_ReadBuff,NULL)))
194         {
195                 FileClose(fd);
196                 CASN_Free(handle);
197                 return NULL;
198         }
199         handle->aio->fname = StrSave(fname);
200         handle->fd = fd;
201         handle->amp = AsnAllModPtr();
202         handle->doc_count = l1;
203         handle->uid_min = l2;
204         handle->uid_max = l3;
205         handle->rel_major = rel_major;
206         handle->rel_minor = rel_minor;
207         
208         if (handle->format == CURRENT_FILEFORMAT)
209         {
210                 char *asntype;
211                 /* skip over some things */
212                 MemFree((void*)rd_string(fd));
213                 rd_integer(fd,2);
214                 MemFree((void*)rd_string(fd));
215                 MemFree((void*)rd_string(fd));
216                 asntype = rd_string(fd);
217                 handle->atp = AsnTypeFind(handle->amp,asntype);
218                 MemFree((void*)asntype);
219         }
220         else
221         {
222                 handle->atp = AsnTypeFind(handle->amp,_asn_type[-doc_type]);
223         }
224         return handle;
225 }
226 
227 NLM_EXTERN void LIBCALL CASN_Close (CASN_Handle handle)
228 {
229         ASSERT(Handle_IsValid(handle));
230         AsnIoClose(handle->aio);
231         CASN_Free(handle);
232 }
233 
234 NLM_EXTERN AsnIo* LIBCALL CASN_GetAsnIoPtr (CASN_Handle handle)
235 {
236         ASSERT(Handle_IsValid(handle));
237         return handle->aio;
238 }
239 
240 NLM_EXTERN CASN_Type LIBCALL CASN_DocType (CASN_Handle handle)
241 {
242         ASSERT(Handle_IsValid(handle));
243         return handle->doc_type;
244 }
245 
246 NLM_EXTERN long LIBCALL CASN_DocCount (CASN_Handle handle)
247 {
248         ASSERT(Handle_IsValid(handle));
249         return handle->doc_count;
250 }
251 
252 NLM_EXTERN MedlineEntry* LIBCALL CASN_NextMedlineEntry (CASN_Handle handle)
253 {
254         AsnTypePtr atp;
255 
256         ASSERT(Handle_IsValid(handle));
257         atp = AsnReadId(handle->aio,handle->amp,handle->atp);
258         return atp ? MedlineEntryAsnRead(handle->aio,atp) : NULL;
259 }
260 
261 
262 NLM_EXTERN SeqEntry* LIBCALL CASN_NextSeqEntry (CASN_Handle handle)
263 {
264         AsnTypePtr atp;
265 
266         ASSERT(Handle_IsValid(handle));
267         if ((atp = AsnReadId(handle->aio, handle->amp, handle->atp))) {
268           SeqEntryPtr sep = SeqEntryAsnRead(handle->aio, atp);
269           while(handle->compr != -1) {
270             char buf[4];
271             compr_huff_read(handle, buf, 1);
272           }
273           return sep;
274         }
275         return NULL;
276 }
277 
278 
279 #ifdef Biostruc_supported
280 NLM_EXTERN Biostruc* LIBCALL CASN_NextBiostruc (CASN_Handle handle)
281 {
282         AsnTypePtr atp;
283 
284         if (! BiostrucAvail ()) return NULL;
285         ASSERT(Handle_IsValid(handle));
286         atp = AsnReadId(handle->aio,handle->amp,handle->atp);
287         return atp ? BiostrucAsnRead(handle->aio,atp) : NULL;
288 }
289 #endif
290 
291 NLM_EXTERN int LIBCALL CASN_Seek (CASN_Handle handle, long offset, int origin)
292 {
293         ASSERT(Handle_IsValid(handle));
294         handle->compr = -1;          /* to reset the Huffman state */
295         AsnIoReset(handle->aio);     /* to reset the ASN state */
296         return fseek(handle->fd,offset,origin);
297 }
298 
299 
300 /* --------------- Low-Level Functions --------------- */
301 
302 NLM_EXTERN CASN_Handle  LIBCALL CASN_New (CASN_Type doc_type, int huff_count)
303 {
304         CASN_Handle handle;
305         short *left;
306         short *right;
307 
308         if (!(handle = (CASN_Handle) MemNew(sizeof(struct casn_ioblock))))
309                 return NULL;
310         if (!(left = (short*) MemNew(huff_count*sizeof(short))))
311                 return NULL;
312         if (!(right = (short*) MemNew(huff_count*sizeof(short))))
313                 return NULL;
314 
315         handle->magic = MAGIC_IOBLOCK;
316         handle->doc_type = doc_type;
317         handle->compr = -1;
318         handle->huff_left = left;
319         handle->huff_right = right;
320         return handle;
321 }
322 
323 
324 NLM_EXTERN void LIBCALL CASN_Free (CASN_Handle handle)
325 {
326         ASSERT(Handle_IsValid(handle));
327         MemFree(handle->huff_left);
328         MemFree(handle->huff_right);
329         MemFree(handle);
330 }
331 
332 
333 static Int2 LIBCALLBACK CASN_ReadBuff(Pointer param, CharPtr buff, Uint2 count)
334 {
335         CASN_Handle handle = (CASN_Handle) param;
336         Int2 retval = 0;
337 
338         ASSERT(Handle_IsValid(handle));
339 
340         while (! retval)   /* has to allow for 0 bytes from compressed read */
341         {
342                 if (handle->compr < 0)
343                 {
344                         Int2 c;
345         
346                         /* read the "decompression protocol identifier" */
347                         if ((c = fgetc(handle->fd)) == EOF)
348                                 return 0;
349         
350                         if (c == CASN_ComprNone)
351                         {
352                                 handle->bytes = rd_integer(handle->fd,3);
353                         }
354                         else if (c == CASN_ComprHuff)
355                         {
356                                 if (handle->format ==2)
357                                         rd_integer(handle->fd,3);   /* justskip over it for now */
358                                 handle->byte = 0;
359                                 handle->mask = 0;
360                         }
361                         else
362                         {
363                                 ErrPostEx(SEV_ERROR,CASN_ErrFileFormat,0,file_emsg,"ReadBuff");
364                                 return 0;
365                         }
366                         handle->compr = c;
367                 }
368         
369                 switch(handle->compr)
370                 {
371                         case CASN_ComprNone:
372                                 return compr_none_read(handle,buff,count);
373         
374                         case CASN_ComprHuff:
375                                 if ((retval = compr_huff_read(handle,buff,count)) !=0)
376                                         return retval;
377                                 break;
378         
379                         default:
380                                 ErrPostEx(SEV_ERROR,CASN_ErrFileFormat,0,file_emsg,"ReadBuff");
381                                 return 0;
382                 }
383         }
384 
385         return 0;
386 }
387 
388 
389 static int compr_none_read (CASN_Handle handle, char *buff, int count)
390 {
391         size_t bytes;
392         
393         ASSERT(Handle_IsValid(handle));
394         bytes = (size_t) MIN(handle->bytes,(Int4)count);
395         bytes = FileRead(buff,1,bytes,handle->fd);
396         handle->bytes -= bytes;
397         if (handle->bytes <= 0)
398         {
399                 /* reset for stream read of next entry */
400                 handle->compr = -1;
401         }
402         return (int)bytes;
403 }
404 
405 
406 static int compr_huff_read (CASN_Handle handle, char *buff, int count)
407 {
408         register unsigned mask, byte;
409         FILE *fd;
410         char *p = buff;
411         int i, cnt = 0;
412         int c;
413         int k;
414 
415         ASSERT(Handle_IsValid(handle));
416 
417         fd = handle->fd;
418         mask = handle->mask;
419         byte = handle->byte;
420 
421         while (cnt < count)
422         {
423                 for (i=0; i>=0; )
424                 {
425                         if (mask == 0)
426                         {
427                                 if ((c = fgetc(fd)) == EOF)
428                                 {
429                                         /* should never reach this point */
430                                         ErrPostEx(SEV_INFO,0,0,
431                                                 "Unexpected EOF reading Huffman-compressed ASN.1");
432                                         i = handle->huff_count - 257;
433                                         break;
434                                 }
435                                 else
436                                 {
437                                         byte = (Uint2) c;
438                                         mask = 0x80;
439                                 }
440                         }
441 
442                         if (byte & mask)
443                                 i = handle->huff_left[i];
444                         else
445                                 i = handle->huff_right[i];
446 
447                         mask >>= 1;
448                 }
449 
450                 if ((k = i + 257) == handle->huff_count)
451                 {
452                         handle->compr = -1; /* reset for next record */
453                         break;
454                 }
455 
456                 *p++ = (char) k;
457                 cnt++;
458         }
459 
460         handle->mask = mask;
461         handle->byte = byte;
462         return cnt;
463 }
464 
465 static unsigned long rd_integer (FILE *fd, int bytes)
466 {
467         int i, c;
468         unsigned long value = 0;
469 
470         for (i=0; i<bytes; ++i)
471         {
472                 if ((c = fgetc(fd)) ==EOF)  break;
473                 value <<= 8;
474                 value |= (unsigned long)c;
475         }
476         return value;
477 }
478 
479 static char * rd_string (FILE *fd)
480 {
481         size_t len = (size_t) fgetc(fd);
482         if (len > 0)
483         {
484                 char *str = MemGet(len+1,MGET_ERRPOST);
485                 if (fread((void*)str,1,len,fd) != len)
486                 {
487                         ErrPostEx(SEV_ERROR,CASN_ErrFileFormat,0,"File format error");
488                         MemFree((void*)str);
489                         return NULL;
490                 }
491                 *(str+len) = '\0';
492                 return str;
493         }
494         return NULL;
495 }
496 
497 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.