NCBI C Toolkit Cross Reference

C/cdromlib/cdromlib.c


  1 /* 
  2  * ===========================================================================
  3  *
  4  *                             COPYRIGHT NOTICE
  5  *               National Center for Biotechnology Information
  6  *
  7  *  This software/database is a "United States Government Work" under the
  8  *  terms of the United States Copyright Act.  It was written as part of
  9  *  the author's official duties as a Government employee and thus cannot
 10  *  be copyrighted.  This software/database is freely available to the
 11  *  public for use without a copyright notice.  Restrictions cannot be
 12  *  placed on its present or future use.
 13  *
 14  *  Although all reasonable efforts have been taken to ensure the accuracy
 15  *  and reliability of the software and data, the National Library of
 16  *  Medicine (NLM) and the U. S. Government do not and cannot warrant the
 17  *  performance or results that may be obtained by using this software or
 18  *  data.  The NLM and the U. S. Government disclaim all warranties as to
 19  *  performance, merchantability or fitness for any particular purpose.
 20  *
 21  *  Please see that the author is suitably cited in any work or product
 22  *  based on this material.
 23  *
 24  * ===========================================================================
 25  *
 26  * RCS $Id: cdromlib.c,v 6.1 2004/04/01 13:43:05 lavr Exp $
 27  *
 28  * Authors: Greg Schuler, Jim Ostell, Jonathan Kans, Jonathan Epstein
 29  * 
 30  * Original Creation Date:   9-91
 31  *
 32  * File Description: 
 33  *  Functions in this file are the I/O primitives needed to retrieve data
 34  *  from the Entrez CD-ROMs, releases 12.0 and earlier.  For releases 13.0
 35  *  and later, use the functions in cdnewlib.c
 36  *
 37  *
 38  * Modifications:
 39  * --------------------------------------------------------------------------
 40  * Date     Name        Description of modification
 41  * -------  ----------  -----------------------------------------------------
 42  * 06-29-94 Schuler     AsnRead/AsnWrite functions moved to objentr.c
 43  * 07-11-94 Schuler     Removed #include <cdrom.h> (no longer needed!)
 44  * 07-13-94 Schuler     Moved CdTermFree to cdentrez.c
 45  * 08-04-94 Kans        Fixed bug resulting in reading too many term pages
 46  * 11-16-94 Schuler     Typecasts for picky compilers
 47  *
 48  * 05-19-95 Schuler     Added rcs Log directive for automatic insertion of
 49  *                      modification comments.
 50  *
 51  * Revision $Log: cdromlib.c,v $
 52  * Revision Revision 6.1  2004/04/01 13:43:05  lavr
 53  * Revision Spell "occurred", "occurrence", and "occurring"
 54  * Revision
 55  * Revision Revision 6.0  1997/08/25 18:13:10  madden
 56  * Revision Revision changed to 6.0
 57  * Revision
 58  * Revision Revision 5.0  1996/05/28 13:55:34  ostell
 59  * Revision Set to revision 5.0
 60  * Revision
 61  * Revision 4.0  1995/07/26  13:50:32  ostell
 62  * force revision to 4.0
 63  *
 64  * Revision 2.48  1995/05/16  14:36:20  schuler
 65  * Automatic comment insertion enabled
 66  *
 67  *
 68  * ==========================================================================
 69  */
 70 
 71 #define REVISION_STR "$Revision: 6.1 $"
 72 
 73 #include <cdromlib.h>
 74 
 75 #ifdef _OLD_CdEntrez_
 76 
 77 static char * _this_module = "CdEntrez";
 78 #undef  THIS_MODULE
 79 #define THIS_MODULE _this_module
 80 static char * _this_file = __FILE__;
 81 #undef  THIS_FILE
 82 #define THIS_FILE _this_file
 83 
 84 /*  =========================================================================
 85  *      CONSTANTS & MACROS
 86  */
 87 
 88 #define CURRENT_FORMAT_VERSION 0
 89 
 90 #define BLKSIZE   ((size_t)vi->field_bucket_size)  /* sizeof term index block on cdrom */
 91 
 92 #define PREF_ML     TYP_ML
 93 #define PREF_AA     TYP_AA
 94 #define PREF_NT     TYP_NT
 95 #define PREF_MED    (NTYPE+0)
 96 #define PREF_SEQ    (NTYPE+1)
 97 
 98 #define SUF_ML      TYP_ML
 99 #define SUF_AA      TYP_AA
100 #define SUF_NT      TYP_NT
101 #define SUF_WORD    (NTYPE+FLD_WORD)
102 #define SUF_MESH    (NTYPE+FLD_MESH)
103 #define SUF_KYWD    (NTYPE+FLD_KYWD)
104 #define SUF_AUTH    (NTYPE+FLD_AUTH)
105 #define SUF_JOUR    (NTYPE+FLD_JOUR)
106 #define SUF_ORGN    (NTYPE+FLD_ORGN)
107 #define SUF_ACCN    (NTYPE+FLD_ACCN)
108 #define SUF_GENE    (NTYPE+FLD_GENE)
109 #define SUF_PROT    (NTYPE+FLD_PROT)
110 #define SUF_ECNO    (NTYPE+FLD_ECNO)
111 #define SUF_HIER    (NTYPE+FLD_ORGN_HIER)
112 #define SUF_DATE    (NTYPE+FLD_DATE)
113 #define SUF_ASN     (NTYPE+NFLD)
114 #define SUF_REC     (NTYPE+NFLD+1)
115 #define SUF_UID     (NTYPE+NFLD+2)
116 
117 #define EXT_DAT     0
118 #define EXT_IDX     1
119 #define EXT_LST     2
120 #define EXT_PST     3
121 #define EXT_LNK     4
122 
123 #define COMPR_NONE      0
124 #define COMPR_HUFFMAN   1
125 #define COMPR_LZW1      2
126 /* etc...*/
127 #define COMPR_DONT_KNOW 0xFF
128 
129 
130 #define HUFFMAN_SENTINEL 256
131 
132 
133 typedef struct {
134     DocUid  uid;                  /* MEDLINE UI or seq-id of Bioseq       */
135     DocType type;                  /* document type code (ml/nt/aa)        */
136     Int4  entry_offset ,        /* offset into entry file(s)            */
137         sum_offset ,            /* offset into summary file    */
138         link_offset ;            /* offset into link file       */
139 } UidIdx,  PNTR UidIdxPtr;
140 
141 typedef struct decompinfo {
142     AsnIoPtr aip;
143     FILE *fp;
144     Uint1 compr;        /* compression protocol */
145     unsigned int mask;          /* used internally for Huffman */
146     unsigned int byte;          /* used internally for Huffman */
147     Uint4 bytes_left;   /* count of remaining bytes for uncompressed protocol */
148     struct decompinfo PNTR next;
149 } DecompInfo, PNTR DecompInfoPtr;
150     
151 
152 /*  =========================================================================
153  *      VARIABLES
154  */
155 
156 static DecompInfoPtr DecompInfoList = NULL;
157 static Int4 numinits;
158 static CharPtr buffer;
159 
160 static CharPtr sPath [NDIR];
161 
162 static char  *sSdir[] = { "", "data", "sequence", "medline", "terms",
163                           "index", "links", "" };
164 static char  *sPref[] = { "ml", "aa", "nt", "med", "seq" };
165 static char  *sSuff[] = {  "word", "mesh", "kywd", "auth", "jour", "orgn",
166                            "accn", "gene", "prot", "ecno", "hier", "date",
167                            "fkey", "prop", "subs", "mloc",
168                            "ml", "aa", "nt",
169                            "asn", "rec", "uid" };
170 static char  *sExtn[] = { "dat", "idx", "lst", "pst", "lnk" };
171 
172 static Boolean bAppendVer = FALSE;
173 static Boolean upperCaseIt = FALSE;
174 
175 static EntrezInfoPtr vi = NULL;
176 static Int4Ptr type_bucket_index[NTYPE];  /* from the .idx files */
177 
178 static size_t detInfoCharCount;
179 static CharPtr CdDetailedBuf = NULL;
180 static Boolean countOnly;
181 
182                     /* for saving the last term.idx file used */
183 
184 static DocType term_idx_type = -1;   
185 static DocField term_idx_field = -1;
186 static Int2 term_idx_count = 0;
187 static CharPtr PNTR term_idx_str;
188 static FILE *IdxFilePtr[NTYPE+2];
189 static Boolean HoldIdxOpen = FALSE;
190 
191 
192 #ifdef IS_BIG_ENDIAN
193 /* no swapping needed:  define do-nothing macros */
194 #define SwapInt2(X)  (X)
195 #define SwapInt4(X)  (X)
196 #else
197 /* give prototypes for byte swapping functions */
198 static Int2 NEAR  SwapInt2  PROTO((Int2));
199 static Int4 NEAR  SwapInt4  PROTO((Int4));
200 #endif
201 
202 
203 /*****************************************************************************
204 *
205 *   Private Function Prototypes
206 *
207 *****************************************************************************/
208 static Boolean NEAR CdInitialize PROTO((CharPtr,CharPtr,CharPtr,Int2Ptr));
209 static Boolean NEAR CdSetPath    PROTO((Int2,CharPtr));
210 static Boolean NEAR SaveCdMediaContext PROTO((CharPtr media_name));
211 static void    NEAR ExtraInitWork PROTO((void));
212 static Boolean NEAR ValidateType PROTO((DocType type));
213 static Boolean NEAR ValidateField PROTO((DocType type, DocField field));
214 static Boolean NEAR ValidateUid PROTO((DocType type, DocUid uid));
215 static CharPtr NEAR MakePath PROTO((Int2 nSdir,Int2 nPref,Int2 nSuf, Int2 nExtn));
216 static Boolean NEAR LoadUidIndex PROTO((DocType type));
217 static Int2    NEAR LoadTrmIndex PROTO((DocType type, DocField field));
218 static void    NEAR FreeTrmIndex PROTO((void));
219 /**** not used in reading cdrom ******
220 static Int4 NEAR MergeSegOffset PROTO((Int2 seg, Int4 offset));
221 *************************************/
222 static Boolean NEAR SplitSegOffset PROTO((Int4 value, Int2Ptr segptr, Int4Ptr offsetptr));
223 static FILE * NEAR CdDocFil PROTO((DocType type, DocUid uid, UidIdxPtr idx));
224 
225 static Boolean SwapOutCd PROTO((VoidPtr med));
226 static Boolean SwapInCd PROTO((VoidPtr med));
227 static void NEAR ForceCdFini PROTO((void));
228 static Boolean CdInitMedia PROTO((VoidPtr med));
229 static Boolean CdFmtInfo PROTO((VoidPtr medName));
230 
231 static CdTermPtr NEAR CdTrmLocate PROTO((CharPtr term, Int2 page));
232 static UidIdxPtr NEAR UidIdxGet PROTO((DocType type, DocUid uid, UidIdxPtr idx));
233 static void NEAR linksort PROTO((Int4Ptr uids, Int4Ptr wts, Int4 n));
234 static DecompInfoPtr NEAR DecompInit PROTO((FILE *fp));
235 static Boolean NEAR DecompFini PROTO((AsnIoPtr aip, DecompInfoPtr dip));
236 static void NEAR DecompInfoFree PROTO((DecompInfoPtr dcp));
237 static Int2 LIBCALLBACK DecompReadFunc PROTO((Pointer p, CharPtr buff, Uint2 count));
238 static Int2 HuffmanRead PROTO((DecompInfoPtr dcp, CharPtr buff, Uint2 count));
239 static Boolean NEAR IsOKMagic PROTO((Uint4 magic, CharPtr volume_label));
240 static CdTermPtr  CdTermRead PROTO((Int2 type, Int2 field, CharPtr ptr, CharPtr bufr, Int2 page));
241 
242 /*****************************************************************************
243 *
244 *   General purpose public functions
245 *
246 *****************************************************************************/
247 
248 /*****************************************************************************
249 *
250 *   CdInit()
251 *
252 *****************************************************************************/
253 static CharPtr trmbuf;   /* for term pages */
254 static DocType trmtype;  /* type of last term used in trmbuf */
255 static DocField trmfield;  /* field of last term used in trmbuf */
256 static Int2 trmpage,       /* page # of first page in trmbuf */
257                         trmpages;                       /* number of pages in memory */
258 static size_t trmpagesrequest;     /* how bytes to read (5 * BLKSIZE) */
259 
260 static Boolean oldStyleCfgFile;
261 
262 
263 static Int2 nCdVer;
264 static char *sCdError [] = {
265     "",
266     "Memory allocation error",
267     "File create error",
268 #ifdef WIN_MSWIN
269         "File open error on %Fs",
270 #else
271     "File open error on %s",
272 #endif
273     "File seek error",
274     "File read error",
275     "File write error",
276     "Bad database type code [%d]",
277     "Bad field code [%d]",
278     "No terms for type/field [%d/%d]",
279     "Bad uid number [%ld]",
280     "Bad directory number [%d]",
281     "Cannot read new data format",
282     "Index files out of date",
283     "Data decompression error",
284     "Programmer error"
285 };
286 
287 static CdTermPtr cdtrmcache [10]; /* cache of most recent CdTrmFind results */
288 
289 /*****************************************************************************
290 *
291 *   CdInit()
292 *     uses environment variables to configure initialization
293 *
294 *****************************************************************************/
295 
296 Boolean  CdInit (void)
297 
298 {
299     char media[64];
300 
301         ConfigInit();
302 
303     if (nCdVer) {
304         numinits++;
305         return TRUE;   /* already setup */
306     }
307 
308         oldStyleCfgFile = FALSE;
309 
310     GetAppParam ("ncbi", "NCBI", "MEDIA", "", media, sizeof media);
311 
312         /* This is a work-around to provide backwards compatibility for old       */
313         /* config files which do not specify MEDIA                                */
314         if (media[0] == '\0')
315         {
316                 StrCpy(media, "NCBI");
317                 SetSoleMedia();
318                 oldStyleCfgFile = TRUE;
319         }
320 
321     return (ParseMedia(CdInitMedia, MEDIUM_CD | MEDIUM_DISK) != 0);
322 }
323 
324 
325 static Boolean CdInitMedia(VoidPtr med)
326 
327 {
328     char CdRootPath[PATH_MAX];
329     char sVol[32];
330         char datvalpath[PATH_MAX];
331         CharPtr mediaName = (CharPtr) med;
332 
333     GetAppParam ("ncbi", mediaName, "ROOT", "", CdRootPath, sizeof CdRootPath);
334 
335     vi = NULL;
336 
337     bAppendVer = FALSE;
338     upperCaseIt = FALSE;
339 
340     /* "VAL" overrides "ROOT" for purposes of finding first copy of .val */
341     if (GetAppParam ("ncbi", mediaName, "VAL", CdRootPath, datvalpath, sizeof datvalpath))
342         CdSetPath (DIR_VAL, CdRootPath);
343 
344     FileBuildPath(datvalpath, NULL, NULL);
345     if (! CdInitialize (CdRootPath, sVol, datvalpath, &nCdVer)) {
346         return  FALSE;
347     }
348 
349     trmpagesrequest =(size_t)(5 * BLKSIZE);   /* number of termpages to request */
350 
351     if (GetAppParam ("ncbi", mediaName, "IDX", "", CdRootPath, sizeof CdRootPath))
352         CdSetPath (DIR_IDX, CdRootPath);
353 
354     /* work-around to find alternate index files when using old-style    */
355     /* configuration file                                                */
356     if (oldStyleCfgFile)
357     {
358         if (StrICmp(sVol, "SeqData") == 0 &&
359             GetAppParam ("ncbi", mediaName, "SEQIDX", "", CdRootPath,
360             sizeof CdRootPath))
361         {
362             CdSetPath (DIR_IDX, CdRootPath);
363         }
364         if (StrICmp(sVol, "MedData") == 0 &&
365             GetAppParam ("ncbi", mediaName, "MEDIDX", "", CdRootPath,
366             sizeof CdRootPath))
367         {
368             CdSetPath (DIR_IDX, CdRootPath);
369         }
370     }
371 
372     if (GetAppParam ("ncbi", mediaName, "LNK", "", CdRootPath, sizeof CdRootPath))
373         CdSetPath (DIR_LNK, CdRootPath);
374     if (GetAppParam ("ncbi", mediaName, "MED", "", CdRootPath, sizeof CdRootPath))
375         CdSetPath (DIR_MED, CdRootPath);
376     if (GetAppParam ("ncbi", mediaName, "SEQ", "", CdRootPath, sizeof CdRootPath))
377         CdSetPath (DIR_SEQ, CdRootPath);
378     if (GetAppParam ("ncbi", mediaName, "TRM", "", CdRootPath, sizeof CdRootPath))
379         CdSetPath (DIR_TRM, CdRootPath);
380     SaveCdMediaContext(mediaName);
381 
382     return TRUE;
383 }
384 
385 
386 static Boolean NEAR SaveCdMediaContext(CharPtr media_name)
387 
388 {
389         MediaPtr media;
390         CdMediaInfoPtr cdm;
391         int i;
392         char ejectable[10];
393         char buffer[100];
394 
395         media = PreInitMedia(media_name);
396 
397         if (media == NULL)
398                 return FALSE;
399 
400         if (media->inited_partial || (media->media_type != MEDIUM_CD &&
401                 media->media_type != MEDIUM_DISK))
402                 return TRUE;
403 
404         media->swapOutMedia = SwapOutCd;
405         media->swapInMedia = SwapInCd;
406     GetAppParam ("ncbi", media_name, "EJECTABLE", "0", ejectable, sizeof ejectable);
407 
408         cdm = (CdMediaInfoPtr) MemNew(sizeof(CdMediaInfo));
409         cdm->ejectable = atoi(ejectable);
410         cdm->device_name = NULL;
411         cdm->raw_device_name = NULL;
412         cdm->mount_point = NULL;
413         cdm->mount_cmd = NULL;
414 
415     if (GetAppParam ("ncbi", media_name, "DEVICE_NAME", "", buffer, sizeof buffer))
416         {
417         cdm->device_name = StringSave(buffer);
418         }
419     if (GetAppParam ("ncbi", media_name, "RAW_DEVICE_NAME", "", buffer, sizeof buffer))
420         {
421         cdm->raw_device_name = StringSave(buffer);
422         }
423     if (GetAppParam ("ncbi", media_name, "MOUNT_POINT", "", buffer, sizeof buffer))
424         {
425         cdm->mount_point = StringSave(buffer);
426         }
427     if (GetAppParam ("ncbi", media_name, "MOUNT_CMD", "", buffer, sizeof buffer))
428         {
429         cdm->mount_cmd = StringSave(buffer);
430         }
431         cdm->hold_idx_open = FALSE;
432     if (GetAppParam ("ncbi", media_name, "HOLD_IDX_OPEN", "", buffer, sizeof buffer))
433         {
434         cdm->hold_idx_open = StringICmp(buffer, "TRUE") == 0;
435         }
436 
437         media->media_info = (VoidPtr) cdm;
438 
439         for (i = 0; i < NDIR; i++)
440         {
441                 cdm->sPath[i] = sPath[i];
442                 sPath[i] = NULL;
443         }
444 
445         media->entrez_info = vi;
446         cdm->bAppendVer = bAppendVer;
447         cdm->upperCaseIt = upperCaseIt;
448 
449         media->inited_partial = TRUE;
450         
451         return TRUE;
452 }
453 
454 
455 static Boolean SwapOutCd(VoidPtr curm)
456 {
457         int i;
458         MediaPtr CurMedia = (MediaPtr) curm;
459         CdMediaInfoPtr cmip;
460 
461         if (CurMedia != NULL)
462         {
463                 cmip = (CdMediaInfoPtr) CurMedia->media_info;
464                 CurMedia->entrez_info = vi;
465                 vi = NULL; /* avoid freeing it */
466 
467                 for (i = 0; i < NDIR; i++)
468                 { /* copy and avoid freeing */
469                         cmip->sPath[i] = sPath[i];
470                         sPath[i] = NULL;
471                 }
472 
473                 ForceCdFini();
474         }
475 
476         return TRUE;
477 }
478 
479 
480 static Boolean SwapInCd(VoidPtr med)
481 {
482         MediaPtr newMedia = (MediaPtr) med;
483         int i;
484         CdMediaInfoPtr cmip;
485 
486         cmip = (CdMediaInfoPtr) newMedia->media_info;
487 
488         for (i = 0; i < NDIR; i++)
489         { /* load up sPath */
490                 sPath[i] = cmip->sPath[i];
491         }
492         vi = newMedia->entrez_info;
493 
494         bAppendVer = cmip->bAppendVer;
495         upperCaseIt = cmip->upperCaseIt;
496     HoldIdxOpen = cmip->hold_idx_open;
497 
498         ExtraInitWork();
499 
500         return TRUE;
501 }
502 
503 
504 static void NEAR ExtraInitWork()
505 
506 {
507     size_t bufsize;
508         int i;
509 
510     /* initialize cached CdTermPtr array */
511     for (i = 0; i < 10; i++) {
512       cdtrmcache [i] = NULL;
513     }
514 
515     term_idx_type = -1;
516     term_idx_field = -1;
517 
518     if (buffer == NULL) {
519         bufsize = (size_t) MAX (MAX ((size_t) vi->type_bucket_size, (size_t) vi->field_bucket_size), sizeof (Int4) * 512);
520         buffer = (CharPtr) MemNew(bufsize);
521     }
522 }
523 
524 
525 static void NEAR ForceCdFini(void)
526 
527 {
528         Int4 sav_numinits = numinits;
529         CharPtr savDetailedBuf;
530 
531         ConfigInit(); /* simulate Init() to balance Fini() */
532     savDetailedBuf = CdDetailedBuf;
533         CdDetailedBuf = NULL; /* avoid freeing in Fini() */
534         numinits = 1;
535         CdFini();
536         numinits = sav_numinits;
537     CdDetailedBuf = savDetailedBuf;
538 }
539 
540 
541 /*****************************************************************************
542 *
543 *   CdFini()
544 *      closes cdromlib session
545 *   
546 *****************************************************************************/
547 Boolean  CdFini (void)
548 
549 {
550         Int2 i;
551         CdTermPtr trmptr;
552 
553         ConfigFini();
554         numinits--;
555         if (numinits)          /* haven't fixed all initializations yet */
556                 return TRUE;
557 
558     /* free cached CdTermPtr array */
559     for (i = 0; i < 10; i++) {
560       trmptr = cdtrmcache [i];
561       if (trmptr != NULL) {
562         if (trmptr->term != NULL) {
563           MemFree (trmptr->term);
564         }
565         MemFree (trmptr);
566       }
567       cdtrmcache [i] = NULL;
568     }
569 
570     buffer = (CharPtr) MemFree(buffer);
571         FreeTrmIndex();
572         for (i = 0; i < NDIR; i++)
573                 sPath[i] = (CharPtr) MemFree(sPath[i]);
574         for (i = 0; i < NTYPE; i++)
575                 if (i != TYP_NT)
576                         type_bucket_index[i] = (Int4Ptr) MemFree(type_bucket_index[i]);
577                 else
578                         type_bucket_index[i] = NULL;  /* NT and AA use same index */
579         vi = EntrezInfoFree(vi);
580         trmbuf = (CharPtr) MemFree(trmbuf);
581         trmpages = 0;
582         nCdVer = 0;
583         bAppendVer = FALSE;
584         upperCaseIt = FALSE;
585 
586     for (i = 0; i < NTYPE+2; i++)
587         {
588         if (IdxFilePtr[i] != NULL)
589                 {
590                         FileClose(IdxFilePtr[i]);
591                         IdxFilePtr[i] = NULL;
592                 }
593         }
594 
595     CdDetailedBuf = (CharPtr) MemFree(CdDetailedBuf);
596 
597         return TRUE;
598 }
599 
600 
601 /*  =========================================================================
602  *      PUBLIC FUNCTION BODIES
603  */
604 
605 
606 /*  -------------------- CdInitialize() --------------------------------
607  *  CdInitialize -- Initializes the library
608  *
609  *  Parameters:    sCdRoot:   CD-ROM root path
610  *                 sVolume:   pointer to volume name buffer (VOLUME_MAX)
611  *                 ver:       pointer to version number buffer
612  *
613  *  Return value:  TRUE:      Success.
614  *                 FALSE:     Failure;  refer to error code.
615  *
616  *  Notes:  1. The file cdromdat.val must be in the specified root path.
617  *          2. Default paths strings for various subdirectories are 
618  *             created by this function below the specified root path.
619  *             Use CdSetPath() to override the defaults.
620  */
621 
622 static Boolean NEAR CdInitialize (CharPtr sCdRoot, CharPtr sVolume, CharPtr datvalpath, Int2Ptr ver)
623 
624 {
625     Int2   i;
626     AsnIoPtr aip;
627     Char drctry [16];
628     CharPtr p;
629     size_t bufsize;
630 
631 
632     *sVolume = '\0';
633     *ver = 0;
634 
635         numinits++;     /* count the number of initialization calls */
636 
637     if (vi != NULL) {          /* already initialized ! */
638         StringCpy (sVolume, vi->volume_label);
639         *ver = vi->version;
640         return  TRUE;
641     }
642 
643     /* initialize cached CdTermPtr array */
644     for (i = 0; i < 10; i++) {
645       cdtrmcache [i] = NULL;
646     }
647 
648     term_idx_type = -1;
649     term_idx_field = -1;
650     
651     for (i = 0; i < NTYPE+2; i++)
652         IdxFilePtr[i] = NULL;
653 
654     /* initialize storage for path names */
655     for (i = 0; i < NDIR; i++)
656         if (sPath[i] == NULL)
657             sPath[i] = (CharPtr) MemNew(PATH_MAX + 1);
658 
659     /* initialize root path string variable */
660     StringNCpy (sPath[DIR_ROOT], sCdRoot, PATH_MAX);
661     FileBuildPath(sPath[DIR_ROOT], NULL, NULL);
662 
663     /* read the CDROMLIB.INF file */
664     if ((aip = EntrezInfoOpen (datvalpath)) == NULL)
665         return FALSE;
666 
667     /* set default paths for subdirectories */
668     for (i=1; i<NDIR; i++) {
669         StringCpy (sPath[i], sPath[DIR_ROOT]);
670         StringCpy (drctry, sSdir[i]);
671         if (upperCaseIt) {
672           p = drctry;
673           while (*p != '\0') {
674             *p = TO_UPPER (*p);
675             p++;
676           }
677         }
678         FileBuildPath(sPath[i], drctry, NULL);
679     }
680 
681     vi = EntrezInfoAsnRead(aip, NULL);
682     AsnIoClose(aip);
683     if (vi == NULL)
684         return FALSE;
685 
686     /* check for incompatible format */
687     if (vi->format != CURRENT_FORMAT_VERSION) {
688         ErrPostEx(SEV_ERROR, ERR_CD_BADFORMAT, 0, sCdError[ERR_CD_BADFORMAT]);
689         return FALSE;
690     }  
691 
692     if (buffer == NULL) {
693         bufsize = (size_t) MAX (MAX ((size_t) vi->type_bucket_size, (size_t) vi->field_bucket_size), sizeof (Int4) * 512);
694         buffer = (CharPtr) MemNew(bufsize);
695     }
696 
697     StringCpy (sVolume, vi->volume_label);
698     *ver = vi->version;
699     return  TRUE;
700 }
701 
702 /*****************************************************************************
703 *
704 *   CdGetInfo()
705 *       Gets Entrez info pointer
706 *
707 *****************************************************************************/
708 EntrezInfoPtr CdGetInfo (void)
709 
710 {
711         return vi;
712 }
713 
714 
715 /*****************************************************************************
716 *
717 *   CdFmtInfo()
718 *       Formats CD-ROM specific "detailed info" and either stores the number
719 *       of characters required to format the text, or concatentates the
720 *       formatted string to a global string
721 *
722 *****************************************************************************/
723 static Boolean CdFmtInfo(VoidPtr medName)
724 {
725   char buf[256];
726   MediaPtr media;
727   CharPtr mediaName = (CharPtr) medName;
728   CdMediaInfoPtr cdm;
729 
730 
731   if ((media = PreInitMedia(mediaName)) == NULL || media->invalid ||
732           (cdm = (CdMediaInfoPtr) media->media_info) == NULL)
733   {
734     return FALSE;
735   }
736 
737   if (media->media_type == MEDIUM_CD)
738     StrCpy(buf, "\n  CD-ROM image from ");
739   else
740     StrCpy(buf, "\n  Hard disk image from ");
741   if (cdm->sPath[DIR_ROOT] == NULL)
742   {
743         StrCat(buf, "<location unknown>");
744   } else {
745     StrCat(buf, cdm->sPath[DIR_ROOT]);
746   }
747   if (media->entrez_info != NULL && media->entrez_info->volume_label != NULL)
748   {
749     StrCat(buf, "\n    Volume label is ");
750     StrCat(buf, media->entrez_info->volume_label);
751   }
752   if (media->formal_name == NULL)
753   {
754     StrCat(buf, "\n    [ this medium has no formal name ]");
755   }
756   else {
757     StrCat(buf, "\n    Formal name is ");
758     StrCat(buf, media->formal_name);
759   }
760   StrCat(buf, "\n");
761 
762   if (countOnly)
763   {
764         detInfoCharCount += StringLen(buf);
765   } else {
766         StrCat(CdDetailedBuf, buf);
767   }
768 
769   /* always return FALSE, so that ParseMedia() will refrain from setting */
770   /* validity flags                                                      */
771   return FALSE;
772 }
773          
774   
775 /*****************************************************************************
776 *
777 *   CdDetailedInfo()
778 *       Gets formatted text information about the current status, or returns
779 *       NULL; the text (if any) is stored in a statically allocated buffer
780 *
781 *****************************************************************************/
782 
783 CharPtr CdDetailedInfo (void)
784 
785 {
786   if (numinits == 0) /* not yet initialized */
787   {
788         if (CdDetailedBuf == NULL)
789         {
790           CdDetailedBuf = StringSave("CD-ROM and HARD DISK access information is not currently available\n");
791         }
792         return CdDetailedBuf;
793   }
794   detInfoCharCount = 0;
795   countOnly = TRUE;
796   ParseMedia(CdFmtInfo, MEDIUM_CD | MEDIUM_DISK);
797   countOnly = FALSE;
798   if (detInfoCharCount == 0)
799     return NULL;
800   if (CdDetailedBuf != NULL)
801   {
802     CdDetailedBuf = (CharPtr) MemFree(CdDetailedBuf);
803   }
804   CdDetailedBuf = (CharPtr) MemNew(detInfoCharCount + 200);
805   StrCpy(CdDetailedBuf, "CD-ROM and HARD DISK ACCESS\n");
806   if (CurMediaType() == MEDIUM_CD || CurMediaType() == MEDIUM_DISK)
807   {
808     StrCat(CdDetailedBuf, "  Currently active medium is ");
809     StrCat(CdDetailedBuf, (GetCurMedia())->formal_name);
810     StrCat(CdDetailedBuf, "\n");
811   }
812   ParseMedia(CdFmtInfo, MEDIUM_CD | MEDIUM_DISK);
813   return CdDetailedBuf;
814 }
815 
816 /*  -------------------- CdSetPath() ---------------------------------
817  */
818 static Boolean NEAR CdSetPath (Int2 dir, CharPtr path)
819 
820 {
821     int  k = 0;
822 
823         if (path != NULL)
824                 k = StringLen(path);
825 
826     if ((dir<2) || (dir>=NDIR) || (k==0)) {
827                 ErrPostEx(SEV_ERROR, ERR_CD_BADDIR, 0, sCdError[ERR_CD_BADDIR], dir);
828         return FALSE;
829     }
830     StringCpy (sPath[dir], path);
831     FileBuildPath(sPath[dir], NULL, NULL);
832     return TRUE;
833 }
834 
835 /*****************************************************************************
836 *
837 *   UidIdxGet(type, uid, idx)
838 *
839 *****************************************************************************/
840 static UidIdxPtr NEAR UidIdxGet (DocType type, DocUid uid, UidIdxPtr idx)
841 
842 {
843         Int4Ptr ip;
844         Int2 i, j, l, r;
845     FILE * fp;
846     CharPtr path;
847         struct idxrec {
848                 DocUid uid;
849                 Int4 entry_offset,
850                         link_offset;
851         } PNTR idxptr;
852 
853         if (! ValidateUid(type, uid))
854                 return NULL;
855 
856         if (type == TYP_SEQ)    /* AA, NT, SEQ all the same */
857                 type = TYP_AA;
858                 
859     if (type_bucket_index[type] == NULL)
860         {
861         if (! LoadUidIndex(type))
862                         return NULL;
863         }
864 
865         ip = type_bucket_index[type];
866         r = vi->types[type].num_bucket - 1;
867         l = 0;
868         j = 0;
869         while ((l <= r) && (! ((ip[j] <= uid) && (ip[j+1] > uid))))
870         {
871                 j = (l + r) / 2;
872                 if (uid > ip[j])
873                         l = j + 1;
874                 else
875                         r = j - 1;
876         }
877                 
878         if (type != TYP_ML)
879                 type = TYP_SEQ;
880 
881         if ((fp = IdxFilePtr[type]) == NULL)
882         {
883                 path = MakePath (DIR_IDX, type, SUF_UID, EXT_LST);
884                 if ((fp = FileOpen(path, "rb")) == NULL)
885                 {
886                         ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
887                         return NULL;
888                 }
889         }
890 
891         fseek(fp, (long)j * (long)vi->type_bucket_size, SEEK_SET);
892         
893         j = FileRead(buffer, 1, vi->type_bucket_size, fp);
894 
895         if (HoldIdxOpen)
896         {
897                 IdxFilePtr[type] = fp;
898         } else {
899             FileClose(fp);
900         }
901 
902         if (j == 0)
903         {
904                 ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
905                 return NULL;
906         }
907 
908         idxptr = (struct idxrec PNTR) buffer;
909         j = vi->type_bucket_size / sizeof(struct idxrec);
910         for (i = 0; i < j; i++, idxptr++)
911         {
912                 if (uid == SwapInt4(idxptr->uid))
913                 {
914                     if (idx == NULL)
915                     idx = (UidIdxPtr) MemNew(sizeof(UidIdx));
916                     else
917                         MemFill(idx, '\0', sizeof(UidIdx));
918 
919                         idx->type = type;
920                     idx->uid = uid;
921                         idx->entry_offset = SwapInt4(idxptr->entry_offset);
922                         idx->sum_offset = 0;
923                         idx->link_offset = SwapInt4(idxptr->link_offset);
924                         if (type == TYP_SEQ)
925                         {
926                                 if (idx->entry_offset & 0x80000000)
927                                         idx->type = TYP_AA;
928                                 else
929                                         idx->type = TYP_NT;
930                         }
931                         return idx;
932                 }
933         }
934 
935         return NULL;
936 }
937 
938 /*  -------------------- CdTrmPageCt() --------------------------------
939  *  CdTrmPageCt -- returns the number of term pages for a type/field pair.
940  *
941  *  Parameters:     type:     database code.
942  *                  field:    field code.
943  *
944  *  Return value:   non-zero:   Success;  page count.
945  *                  zero:       Failure;  refer to error code.
946  */
947 
948 Int2    CdTrmPageCt (DocType type, DocField field)
949 
950 {
951     if (!ValidateType (type))  return  0;
952     if (!ValidateField (type, field))  return 0;
953     return (Int2)  vi->types[type].fields[field].num_bucket;
954 }
955 
956 
957 /*  -------------------- CdTrmLookup() --------------------------------
958  *  CdTrmLookup -- returns the first page that COULD contain a term.
959  *
960  *  Parameters:     type:     database code.
961  *                  field:    field code.
962  *                  term:   term (or term fragment) to lookup.
963  *
964  *  Return value:   non-negative:   Success;  page number. (zero-based)
965  *                  negative:       Failure;  refer to error code.
966  */
967 
968 Int2    CdTrmLookup (DocType type, DocField field, CharPtr term)
969 
970 {
971     int  i;
972 
973     if (!LoadTrmIndex (type, field))
974                 return(-1);
975 
976     for (i=0; i< term_idx_count; i++) {
977         if (MeshStringICmp (term_idx_str[i], term) >= 0) 
978             return  MAX (0,i-2);
979     }
980     return  MAX (0,term_idx_count-2);
981 }
982 
983  
984 /*  -------------------- CdTrmPages() ---------------------------------
985  *  CdTrmPages -- fetches a range of term pages from the CD-ROM.
986  *
987  *  Parameters:     type:     database code.
988  *                  field:    field code.
989  *                  pg:     page number of first page to read.
990  *                  ct:     number of pages to read.
991  *                  buffer: buffer to receive the data.
992  *
993  *  Return value:   non-zero:   Success;  number of pages read.
994  *                  zero:       Failure;  refer to error code.
995  *
996  *  Notes:  The term pages contain a series of variable-length term records,
997  *      each of which is an ASCII string with the following structure:
998  *
999  *      <term>\t<c1>\t<c2>\t<offset>\n
1000  *
1001  *      term:       term
1002  *      c1:         count of 'special' occurrences.
1003  *      c2:         count of total occurrences.  ** NOTE **
1004  *      offset:     offset in postings file of list of document numbers.
1005  *      \t:         tab character  (?).
1006  *      \n:         newline character  ('\x0A').
1007  *
1008  *      A term record may cross a page boundary.
1009  */
1010 
1011 Int2    CdTrmPages (DocType type, DocField field, Int2 pg)
1012 
1013 {
1014     CharPtr path, buff;
1015     FILE   *fd;
1016     Int4    offset;
1017     size_t    bytes;
1018 
1019         if ((type == trmtype) && (field == trmfield) && (pg == trmpage) && (trmpages))
1020                 return trmpages;
1021 
1022         if (trmbuf == NULL)
1023                 trmbuf = (CharPtr) MemNew(trmpagesrequest + 2); /* allow terminating 00 */
1024         buff = trmbuf;   /* use local static buffer */
1025     /* need to fill buffer with NULL's */
1026     MemFill(buff, 0, trmpagesrequest + 2);
1027         trmpages = 0;    /* no pages loaded */
1028 
1029     if (!ValidateType (type))  return 0;
1030     if (!ValidateField (type, field))  return 0;
1031 
1032     path = MakePath (DIR_TRM, type, field, EXT_LST);
1033     if ((fd = FileOpen(path, "rb")) ==NULL)  {
1034                 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1035         return  0;
1036     }
1037     offset = (long) pg * BLKSIZE;
1038     fseek (fd, offset, SEEK_SET);
1039         bytes = FileRead(buff, 1, trmpagesrequest, fd);
1040     FileClose (fd);
1041 
1042         if (bytes == trmpagesrequest)   /* got the extra page */
1043                 bytes -= BLKSIZE;
1044         trmpages = (Int2)(bytes/BLKSIZE);
1045         if (bytes % BLKSIZE)   /* got a partial last page */
1046                 trmpages++;
1047         trmtype = type;
1048         trmfield = field;
1049         trmpage = pg;
1050                                   /* may have to switch \n for \r */
1051     return  trmpages;
1052 }
1053 
1054 
1055 /*  -------------------- CdTrmUidsFil () --------------------------------
1056  *  CdTrmUids -- retrieves a list of uids for a term.
1057  *
1058  *  Parameters:     type:         database code.
1059  *                  field:        field code.
1060  *                  offset:     offset into postings file.
1061  *                  count:      number of uids.
1062  *                  filename:   name of file to receive the results.
1063  *
1064  *  Return value:   non-zero:   Success; number of documents (same as count).
1065  *                  zero:       Failure;  refer to error code.
1066  *
1067  *  Notes:   the offset value is obtained by:  
1068  *      1)  looking up a term (using CdTrmLookup()).
1069  *      2)  loading term pages (using CdTrmPages()).
1070  *      3)  finding the term in the loaded pages.
1071  */
1072 
1073 Int4    CdTrmUidsFil (DocType type, DocField field, Int4 offset, Int4 count, CharPtr filename, Boolean append)
1074 
1075 {
1076     Int4  i;
1077     FILE *fd1;
1078     FILE *fd2;
1079     Char mode [4];
1080     CharPtr path;
1081     Int4Ptr ptr;
1082     size_t cnt;
1083     Int4 cntr;
1084 
1085     if (!ValidateType (type))   return  0;
1086     if (!ValidateField (type, field))  return  0;
1087 
1088     path = MakePath (DIR_TRM, type, field, EXT_PST);
1089     if ((fd1 = FileOpen(path, "rb")) == NULL)
1090         {
1091                 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1092         return 0;
1093     }
1094 
1095     if (append) {
1096       StringCpy (mode, "ab");
1097     } else {
1098       StringCpy (mode, "wb");
1099     }
1100     if ((fd2 = FileOpen(filename, mode)) == NULL)
1101         {
1102         FileClose (fd1);
1103                 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], filename);
1104         return  0;
1105     }
1106 
1107     fseek (fd1, offset, SEEK_SET);
1108 
1109     cntr = count;
1110     cnt = (size_t) MIN (cntr, (Int4)(BLKSIZE / sizeof(Int4)));
1111         ptr = (Int4Ptr) buffer;
1112     while (cnt > 0)
1113         {
1114         FileRead (buffer, sizeof (Int4), cnt, fd1);
1115         for (i = 0; i < (Int4) cnt; i++)
1116                         ptr[i] = SwapInt4(ptr[i]);
1117         if (! FileWrite (buffer, sizeof(Int4), cnt, fd2))
1118                 {
1119                         ErrPostEx(SEV_ERROR, ERR_CD_FILEWRITE, 0, sCdError[ERR_CD_FILEWRITE]);
1120             break;
1121         }
1122         cntr -= cnt;
1123             cnt = (size_t) MIN (cntr, (Int4)(BLKSIZE / sizeof(Int4)));
1124     }
1125 
1126     FileClose (fd1);
1127     FileClose (fd2);
1128         if (cntr)    /* didn't finish */
1129                 return 0;
1130         else
1131                 return count;
1132 }
1133 
1134 /*  -------------------- CdTrmUidsMem () --------------------------------
1135  *  CdTrmUidsMem -- retrieves a list of uids for a term.
1136  *
1137  *  Parameters:     type:         database code.
1138  *                  field:        field code.
1139  *                  offset:     offset into postings file.
1140  *                  count:      number of uids.
1141  *                  mem:       storage to receive the results.
1142  *
1143  *  Return value:   non-zero:   Success; number of documents (same as count).
1144  *                  zero:       Failure;  refer to error code.
1145  *
1146  *  Notes:   the offset value is obtained by:  
1147  *      1)  looking up a term (using CdTrmLookup()).
1148  *      2)  loading term pages (using CdTrmPages()).
1149  *      3)  finding the term in the loaded pages.
1150  */
1151 
1152 Int4    CdTrmUidsMem (DocType type, DocField field, Int4 offset, Int4 count, DocUidPtr mem)
1153 
1154 {
1155     Int4  i;
1156     FILE *fd1;
1157     CharPtr path;
1158     size_t cnt;
1159 
1160     if (!ValidateField (type, field))
1161                 return  0;
1162         if (mem == NULL)
1163                 return 0;
1164 
1165     path = MakePath (DIR_TRM, type, field, EXT_PST);
1166     if ((fd1 = FileOpen(path, "rb")) == NULL)
1167         {
1168                 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1169         return 0;
1170     }
1171 
1172     fseek (fd1, offset, SEEK_SET);
1173 
1174     cnt = FileRead ((VoidPtr)mem, sizeof (Int4), (size_t) count, fd1);
1175     for (i = 0; i < (Int4) cnt; i++)
1176                 mem[i] = SwapInt4(mem[i]);
1177 
1178     FileClose (fd1);
1179         return (Int4) cnt;
1180 }
1181 
1182 /*  -------------------- CdDocAsnOpen() -----------------------------------
1183  *  CdDocAsnOpen -- returns an active AsnIoPtr for a document.
1184  *
1185  *  Parameters:     type:        class code (ML/AA/NT).
1186 *                               uid:         unique identifier
1187  *
1188  *  Return value:   non-null:   Success;  active asnioptr
1189  *                  null:       Failure;  refer to error code.
1190  *
1191  *  For TYP_ML, the value is a Medline-entry
1192 *   For TYP_AA or TYP_NT it is a Bioseq-set.
1193  */
1194 
1195 AsnIoPtr  CdDocAsnOpen (DocType type, DocUid uid)
1196 
1197 {
1198     FILE * fd2;
1199     AsnIoPtr aip;
1200     DecompInfoPtr decomp;
1201 
1202     fd2 = CdDocFil (type, uid, NULL);
1203     if (fd2 == NULL)
1204         return NULL;
1205 
1206         if (vi->no_compression)
1207         { /* no compression on this data source */
1208                 aip = AsnIoNew(ASNIO_BIN_IN, fd2, NULL, NULL, NULL);
1209         }
1210         else { /* use alternate read function for compressed data sources */
1211         decomp = DecompInit(fd2);
1212         aip = AsnIoNew(ASNIO_BIN_IN, fd2, decomp, DecompReadFunc, NULL);
1213         if (aip == NULL)
1214                         DecompFini(NULL, decomp);
1215         decomp->aip = aip;
1216         }
1217 
1218     return aip;
1219 }
1220 
1221 /*****************************************************************************
1222 *
1223 *   CdDocAsnClose(aip)
1224 *       closes an aip opened by CdDocAsnOpen
1225 *
1226 *****************************************************************************/
1227 AsnIoPtr  CdDocAsnClose (AsnIoPtr aip)
1228 
1229 {
1230         if (!vi->no_compression)
1231         {
1232         DecompFini(aip, NULL);
1233         }
1234         
1235     AsnIoClose(aip);
1236 
1237     return NULL;
1238 }
1239 
1240 /*  =========================================================================
1241  *      PRIVATE FUNCTION BODIES
1242  */
1243 
1244 static Boolean NEAR  ValidateUid (DocType type, DocUid uid)
1245 
1246 {
1247         EntrezTypeDataPtr tdp;
1248         DocType tmp;
1249 
1250         if (! ValidateType(type))
1251                 return FALSE;
1252 
1253         tmp = type;
1254         if (tmp == TYP_SEQ)
1255                 tmp = TYP_AA;
1256 
1257         tdp = &vi->types[tmp];
1258         if ((uid >= tdp->minuid) && (uid <= tdp->maxuid))
1259                 return TRUE;
1260 
1261         if (type == TYP_SEQ)
1262         {
1263                 tdp = &vi->types[TYP_NT];
1264                 if ((uid >= tdp->minuid) && (uid <= tdp->maxuid))
1265                         return TRUE;
1266         }
1267 
1268         return FALSE;
1269 }
1270 
1271 static Boolean NEAR  ValidateType (DocType type)
1272 
1273 {
1274     if (((type < 0) || (type >= NTYPE)) && (type != TYP_SEQ)) {
1275                 ErrPostEx(SEV_ERROR, ERR_CD_BADTYPE, 0, sCdError[ERR_CD_BADTYPE], type);
1276         return  FALSE;
1277     }
1278     return  TRUE;
1279 }
1280 
1281 static Boolean NEAR  ValidateField (DocType type, DocField field)
1282 
1283 {
1284     if (type<0 || type>=NTYPE) {
1285                 ErrPostEx(SEV_ERROR, ERR_CD_BADTYPE, 0, sCdError[ERR_CD_BADTYPE], type);
1286         return  FALSE;
1287     }
1288     if (field<0 || field>=NFLD) {
1289                 ErrPostEx(SEV_ERROR, ERR_CD_BADFIELD, 0, sCdError[ERR_CD_BADFIELD], field);
1290         return  FALSE;
1291     }
1292     if (vi->types[type].fields[field].num_bucket == 0) {
1293                 ErrPostEx(SEV_ERROR, ERR_CD_NOTERMS, 0, sCdError[ERR_CD_NOTERMS], type, field);
1294         return  FALSE;
1295     }
1296     return  TRUE;
1297 }
1298 
1299 static CharPtr NEAR  MakePath (Int2 nSdir, Int2 nPref, Int2 nSuff, Int2 nExtn)
1300 
1301 {
1302     Char ltemp[8], filename[60];
1303     Char   c;
1304     CharPtr p;
1305 
1306     StringCpy (buffer, sPath[nSdir]);
1307 
1308     StringCpy (filename, sPref[nPref]);
1309     StringCat (filename, sSuff[nSuff]);
1310     StringCat (filename, ".");
1311         if (nExtn <0) {
1312         c = (char) -nExtn;
1313         ltemp[0] = (char) ('0' + (c/100));
1314         ltemp[1] = (char) ('0' + ((c%100)/10));
1315         ltemp[2] = (char) ('0' + (c%10));
1316         ltemp[3] = '\0';
1317         StringCat (filename, ltemp);
1318     }
1319     else
1320         StringCat (filename, sExtn[nExtn]);
1321 
1322     if (bAppendVer) 
1323         StringCat (filename, ";1");
1324 
1325         if (upperCaseIt) {
1326       p = filename;
1327           while (*p != '\0') {
1328             *p = TO_UPPER (*p);
1329             p++;
1330           }
1331         }
1332 
1333     FileBuildPath(buffer, NULL, filename);
1334     return  buffer;
1335 }
1336 
1337 static Boolean NEAR  LoadUidIndex (DocType type)
1338 
1339 {
1340     Int2  i;
1341     size_t n;
1342     Int4Ptr p;
1343     CharPtr path;
1344     FILE   *fd;
1345     Int4 header [3];
1346     Int4 version;
1347     Int4 issue;
1348 
1349     if (!ValidateType (type))
1350                 return FALSE;
1351 
1352         if ((type == TYP_SEQ) || (type == TYP_NT))
1353                 type = TYP_AA;
1354 
1355     n = (size_t) vi->types[type].num_bucket + 1; 
1356     p = type_bucket_index[type];
1357     if (p != NULL)
1358         return TRUE;
1359 
1360         p = (Int4Ptr) MemNew(sizeof(Int4) * n);
1361         p[n-1] = INT4_MAX;            /* put sentinel at end */
1362         n--;         
1363 
1364         if (type == TYP_AA)
1365         {
1366                 type = TYP_SEQ;
1367         }
1368 
1369     path = MakePath (DIR_IDX, type, SUF_UID, EXT_IDX);
1370     if ((fd = FileOpen (path, "rb")) ==NULL)  {
1371                 MemFree (p);
1372                 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1373         return FALSE;
1374     }
1375     if (vi->version != 0 || vi->issue != 0) { /* for compatibility with pre-release 6 data */
1376       if (FileRead ((CharPtr)header, sizeof(Int4), 3, fd) != 3) {
1377         FileClose (fd);
1378         MemFree (p);
1379         ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
1380         return FALSE;
1381       }
1382           if (! IsOKMagic((Uint4) SwapInt4(header[1]), vi->volume_label))
1383           {
1384         ErrPostEx(SEV_ERROR,  ERR_CD_BADINDEX, 0, sCdError[ERR_CD_BADINDEX]);
1385                 return FALSE;
1386       }
1387       header [2] = SwapInt4 (header [2]);
1388       version = (Int4) vi->version;
1389       issue = (Int4) vi->issue;
1390       if (header [2] != ((version << 16) | issue)) {
1391         ErrPostEx(SEV_ERROR,  ERR_CD_BADINDEX, 0, sCdError[ERR_CD_BADINDEX]);
1392                 return FALSE;
1393       }
1394     }
1395     if (FileRead ((CharPtr)p, sizeof(Int4), n, fd) !=n) {
1396         FileClose (fd);
1397                 MemFree (p);
1398                 ErrPostEx(SEV_ERROR, ERR_CD_FILEREAD, 0, sCdError[ERR_CD_FILEREAD]);
1399         return FALSE;
1400     }
1401     FileClose (fd);
1402 
1403         if (type == TYP_SEQ)
1404         {
1405                 type_bucket_index[TYP_NT] = p;
1406                 type_bucket_index[TYP_AA] = p;
1407         } else {
1408                 type_bucket_index[type] = p;
1409         }
1410     for (i=0; i< (Int2) n; i++, p++) 
1411         *p = SwapInt4 (*p);
1412     return TRUE;
1413 }
1414 
1415 static Int2 NEAR  LoadTrmIndex (DocType type, DocField field)
1416 
1417 {
1418     Int2   i, k, c, buckets;
1419     CharPtr path, p;
1420     Int4  bytes;
1421     FILE   *fd;
1422 
1423     if (!ValidateType(type))  return 0;
1424     if (!ValidateField(type, field))  return  0;
1425 
1426     if (type == term_idx_type && field == term_idx_field)
1427         return  term_idx_count;
1428 
1429     path = MakePath (DIR_TRM, type, field, EXT_IDX);
1430     if ((fd = FileOpen (path, "r")) ==NULL)  {
1431                 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1432         return  0;
1433     }
1434 
1435     if (term_idx_count > 0)  FreeTrmIndex();
1436 
1437     buckets = (Int2)vi->types[type].fields[field].num_bucket;
1438     bytes = (buckets + 1) * sizeof(CharPtr);
1439     if ((term_idx_str = (CharPtr PNTR) MemNew((size_t)bytes)) ==NULL) {
1440         FileClose (fd);
1441                 ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
1442         return(0);
1443     }
1444 
1445     for (i=0,c=0; c!=EOF; ) {
1446         for (p=buffer, k=0; k<128; k++) {
1447             c = fgetc(fd);
1448             if (c == EOF)  break;
1449             if (c == '\n' || c == '\r') {
1450                 *p = '\0';
1451                 break;
1452             }
1453             *p++ = (char) TO_LOWER(c);
1454         }
1455         while (c != '\n' && c != '\r' && c != EOF) {
1456             c = fgetc(fd);
1457         }
1458         *p = '\0';
1459         if (c != EOF && i < buckets) {
1460             if ((term_idx_str[i] = StringSave(buffer)) ==NULL)  {
1461                 FileClose(fd);
1462                 term_idx_count = i;
1463                 FreeTrmIndex();
1464                                 ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
1465                 return  0;
1466             }
1467                         i++;
1468         }
1469     }
1470     FileClose (fd);
1471     term_idx_count = i;
1472     term_idx_type = type;
1473     term_idx_field = field;
1474     return  term_idx_count;
1475 }
1476 
1477 static void NEAR  FreeTrmIndex (void)
1478 
1479 {
1480     int  i;
1481 
1482     for (i=0; i<term_idx_count; i++)
1483         {
1484                 MemFree(term_idx_str[i]);
1485         }
1486     term_idx_str = (CharPtr PNTR) MemFree(term_idx_str);
1487     term_idx_count = 0;
1488     term_idx_type = -1;
1489     term_idx_field = -1;
1490 }
1491 
1492 extern AsnIoPtr   EntrezInfoOpen (CharPtr dirname)
1493 
1494 {
1495     CharPtr p, buf, endpath;
1496     AsnIoPtr aip = NULL;
1497     FILE * fp;
1498         
1499 
1500         buf = (CharPtr) MemNew(PATH_MAX);
1501     p = StringMove(buf, dirname);
1502     endpath = buf + StringLen (buf);
1503     p = StringMove(p , "cdromdat.val;1");  
1504     p -= 2;           /* point to the semi-colon */
1505     *p = '\0';        /* wipe-out the semi-colon */
1506     if ((fp = FileOpen(buf, "rb")) == NULL)
1507     {
1508         *p = ';';     /* put back the semi-colon */
1509         if ((fp = FileOpen(buf, "rb")) != NULL)
1510             bAppendVer = TRUE;
1511     }
1512 
1513     if (fp == NULL) {
1514         StringCat (buf, ";1");
1515         p = endpath;
1516         while (*p != '\0') {
1517           *p = TO_UPPER (*p);
1518           p++;
1519         }
1520         upperCaseIt = TRUE;
1521         p -= 2;           /* point to the semi-colon */
1522         *p = '\0';        /* wipe-out the semi-colon */
1523         if ((fp = FileOpen (buf, "rb")) == NULL) {
1524           *p = ';';     /* put back the semi-colon */
1525           if ((fp = FileOpen(buf, "rb")) != NULL)
1526             bAppendVer = TRUE;
1527         }
1528     }
1529 
1530     if (fp != NULL)
1531         aip = AsnIoNew(ASNIO_BIN_IN, fp, NULL, NULL, NULL);
1532         else
1533                 ErrPostEx(SEV_WARNING, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], (CharPtr) "cdromdat.val");
1534         MemFree(buf);
1535     return aip;
1536 }
1537 
1538 
1539 #ifdef IS_LITTLE_ENDIAN
1540 
1541 static Int2 NEAR  SwapInt2 (Int2 k)
1542 
1543 {
1544     Uint2  j, l;
1545         Int2 m;
1546 
1547         l = (Uint2)k;
1548     j  = ((l & (Uint2)0xFF00) >> 8);
1549     j |= ((l & (Uint2)0x00FF) << 8);
1550         m = (Int2)j;
1551     return  m;
1552 }
1553 
1554 static Int4 NEAR  SwapInt4 (Int4 k)
1555 
1556 {
1557     Uint4  j, l;
1558         Int4 m;
1559 
1560         l = (Uint4)k;
1561     j  = ((l & (Uint4)0xFF000000) >> 24);
1562     j |= ((l & (Uint4)0x00FF0000) >> 8);
1563     j |= ((l & (Uint4)0x0000FF00) << 8);
1564     j |= ((l & (Uint4)0x000000FF) << 24);
1565         m = (Int4)j;
1566     return  m;
1567 }
1568 
1569 #endif
1570 
1571 /****** not used in reading cdrom **********************
1572 static Int4 NEAR MergeSegOffset (Int2 seg, Int4 offset)
1573 
1574 {
1575         Int4 value;
1576 
1577         value = (seg - 1) << 25;
1578         value += offset;
1579         return value;
1580 }
1581 ******************************************************/
1582 /***
1583 bit 31 = if 1, is a protein, else is not
1584 bits 30-25 = segment (file number)
1585 bits 24-0  = offset into file up to 32 mbytes big
1586 ****************/
1587 static Boolean NEAR SplitSegOffset (Int4 value, Int2Ptr segptr, Int4Ptr offsetptr)
1588 
1589 {
1590         *segptr = (Int2)(((value >> 25) & 0x0000003F) + 1);
1591         *offsetptr = value & 0x01FFFFFF;
1592         return TRUE;
1593 }
1594 
1595 /*****************************************************************************
1596 *
1597 *   FILE * CdDocFil (type, uid, dat, &size)
1598 *       opens a binary asn file, seeks to doc, returns a FILE * and size
1599 *
1600 *****************************************************************************/
1601 static FILE * NEAR CdDocFil (DocType type, DocUid uid, UidIdxPtr idx)
1602 
1603 {
1604     Int4 offset;
1605         Int2 seg, dir, db;
1606     CharPtr path;
1607     FILE   *fd2;
1608     UidIdx ui;
1609 
1610         if (idx == NULL)
1611         {
1612                 idx = UidIdxGet(type, uid, &ui);
1613                 if (idx == NULL)
1614                         return NULL;
1615                 if ((type == TYP_AA || type == TYP_NT) && idx->type != type)
1616                         return NULL;
1617         }
1618 
1619         SplitSegOffset(idx->entry_offset, &seg, &offset);
1620 
1621     dir = (idx->type==TYP_ML) ? DIR_MED : DIR_SEQ;
1622     db = (idx->type==TYP_ML) ? PREF_MED : PREF_SEQ;
1623 
1624     path = MakePath (dir, db, SUF_ASN, (Int2) (-seg));
1625     if ((fd2=FileOpen (path, "rb")) == NULL)
1626         {
1627                 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1628         return  NULL;
1629     }
1630     fseek (fd2, offset, SEEK_SET);
1631     return fd2;
1632 }
1633 
1634 
1635 /*****************************************************************************
1636 *
1637 *   CdTrmFind(type, field, term)
1638 *      returns a pointer to a CdTerm structure
1639 *
1640 *****************************************************************************/
1641 
1642 static CdTermPtr  CdTrmDup (CdTermPtr trmptr)
1643 
1644 {
1645         CdTermPtr ctp;
1646 
1647         ctp = NULL;
1648         if (trmptr != NULL) {
1649                 ctp = (CdTermPtr) MemNew (sizeof (CdTerm));
1650                 if (ctp != NULL) {
1651                         ctp->type = trmptr->type;
1652                         ctp->field = trmptr->field;
1653                         ctp->term = StringSave (trmptr->term);
1654                         ctp->special_count = trmptr->special_count;
1655                         ctp->total_count = trmptr->total_count;
1656                         ctp->offset = trmptr->offset;
1657                         ctp->page = trmptr->page;
1658                         ctp->next = NULL;
1659                 }
1660         }
1661         return ctp;
1662 }
1663 
1664 static CdTermPtr  CdTrmCache (CdTermPtr trmptr)
1665 
1666 {
1667         CdTermPtr ctp;
1668         Int2 i;
1669 
1670         ctp = cdtrmcache [9];
1671         if (ctp != NULL) {
1672                 if (ctp->term != NULL) {
1673                         MemFree (ctp->term);
1674                 }
1675                 MemFree (ctp);
1676                 cdtrmcache [9] = NULL;
1677         }
1678         for (i = 9; i > 0; i--) {
1679                 cdtrmcache [i] = cdtrmcache [i - 1];
1680         }
1681         ctp = CdTrmDup (trmptr);
1682         cdtrmcache [0] = ctp;
1683         return trmptr;
1684 }
1685 
1686 CdTermPtr  CdTrmFind (DocType type, DocField field, CharPtr term)
1687 
1688 {
1689     Int2 i;
1690         Int2 termpage;
1691         CdTermPtr ctp = NULL;
1692         CdTermPtr trmptr;
1693 
1694     for (i = 0; i < 10; i++) {
1695       trmptr = cdtrmcache [i];
1696       if (trmptr != NULL && trmptr->type == type && trmptr->field == field &&
1697           StringICmp (trmptr->term, term) == 0) {
1698             return CdTrmDup (trmptr);
1699       }
1700     }
1701         termpage = CdTrmLookup(type,field,term);
1702         if (termpage < 0)
1703                 return NULL;
1704 
1705                                 /** could it already be cached? ***/
1706         if ((trmtype == type) && (trmfield == field) && (trmpages > 0))
1707         {
1708                 if ((termpage <= (trmpage + trmpages - 1)) &&
1709                         ((termpage + 3) >= trmpage))     /* overlapping range */
1710                 {
1711                         ctp = CdTrmLocate(term, termpage);
1712                         if (ctp != NULL)        /* found it */
1713                                 return CdTrmCache (ctp);
1714                         if (termpage == trmpage)   /* not possible to find it */
1715                                 return NULL;
1716                 }
1717         }
1718 
1719                                 /** Load term pages from disk ***/
1720 
1721         termpage = CdTrmPages(type, field, termpage);
1722         if (termpage == 0)
1723                 return NULL;
1724 
1725         ctp = CdTrmLocate(term, termpage);
1726         return CdTrmCache(ctp);
1727 }
1728 
1729 /*****************************************************************************
1730 *
1731 *   CdTrmLocate(term, page)
1732 *       locates a term in a term list already in cache
1733 *
1734 *****************************************************************************/
1735 static CdTermPtr NEAR CdTrmLocate (CharPtr term, Int2 page)
1736 
1737 {
1738         Int2 size, ctr, cmpval;
1739         CharPtr ret;
1740 
1741     size = trmpages * BLKSIZE;     /* bytes in term cache */
1742     ctr = 0;
1743     ret = trmbuf;
1744         size--;    /* have to have at least one space for test below */
1745     while (ctr < size)
1746     {
1747             while (*ret != '\n' && *ret != '\r')
1748                 {
1749           ret++;
1750               ctr++;
1751                   if (ctr >= size)
1752                         return NULL;
1753         }
1754             ret++;
1755         ctr++;
1756 
1757                 cmpval = MeshStringICmp(ret, term);
1758                 if (! cmpval)     /* found it */
1759                         return CdTermRead(trmtype, trmfield, ret, trmbuf, page);
1760                 else if (cmpval > 0)   /* gone past */
1761                         return NULL;
1762     }
1763         return NULL;
1764 }
1765 
1766 /*****************************************************************************
1767 *
1768 *   CdTermRead(type, field, ptr, bufr, page)
1769 *       creates and returns a CdTermPtr from a CdTermPage
1770 *       ptr should point at the start of a record (the term)
1771 *
1772 *****************************************************************************/
1773 static CdTermPtr  CdTermRead (Int2 type, Int2 field, CharPtr ptr, CharPtr bufr, Int2 page)
1774 
1775 {
1776         CdTermPtr trmptr;
1777         CharPtr tmp, tmp2;
1778         Char localbuf[10];
1779         Int4 vals[3];
1780         Int2 i;
1781 
1782         if (ptr == NULL)
1783                 return NULL;
1784         if (*ptr == '\0')
1785                 return NULL;
1786         trmptr = (CdTermPtr) MemNew(sizeof(CdTerm));
1787         trmptr->type = type;
1788         trmptr->field = field;
1789         tmp = ptr;
1790         tmp2 = tmp;
1791         while (*tmp2 != '\t')
1792                 tmp2++;
1793         *tmp2 = '\0';
1794         trmptr->term = StringSave(tmp);
1795         *tmp2 = '\t';
1796         tmp2++;
1797         for (i = 0; i < 3; i++)
1798         {
1799                 tmp = &localbuf[0];
1800                 while (*tmp2 >= ' ')
1801                 {
1802                         *tmp = *tmp2;
1803                         tmp++; tmp2++;
1804                 }
1805                 *tmp = '\0';
1806                 vals[i] = atol(localbuf);
1807                 tmp2++;
1808         }
1809         trmptr->special_count = vals[0];
1810         trmptr->total_count = vals[1];
1811         trmptr->offset = vals[2];
1812         trmptr->page = page + (Int2) (((size_t) (ptr - bufr - 1)) / (size_t) BLKSIZE);
1813         return trmptr;
1814 }
1815 
1816 /*****************************************************************************
1817 *
1818 *   CdTermScan(type, field, page, numpage, proc)
1819 *       returns terms found to proc until
1820 *       1) no more pages
1821 *       2) numpage pages have been read
1822 *       3) proc returns FALSE
1823 *   returns number of complete pages read
1824 *   if numpage=0, scans until EOF or proc returns FALSE
1825 *
1826 *****************************************************************************/
1827 Int2  CdTermScan (DocType type, DocField field, Int2 page, Int2 numpage, CdTermProc proc)
1828 
1829 {
1830         Boolean    goOn;
1831         CharPtr    ptr;
1832         Int2       pages, size, pagectr, startpage;
1833         CdTermPtr  trmptr;
1834 
1835         startpage = page;
1836         pagectr = 0;
1837         if (proc == NULL)
1838                 return pagectr;
1839 
1840         goOn = TRUE;
1841     while (goOn)
1842         {
1843                 startpage = page;
1844                 pages = CdTrmPages (type, field, page);
1845                 if (pages == 0)
1846                         return pagectr;
1847                 ptr = trmbuf;
1848                 size = pages * BLKSIZE;     /* bytes available */
1849                 pages = BLKSIZE;      /* bytes per page */
1850                 while ((size > 0) && (goOn))
1851                 {
1852                         while (*ptr != '\n' && *ptr != '\r' && *ptr != '\0')
1853                         {
1854                                 size--;
1855                                 pages--;
1856                                 ptr++;
1857                         }
1858                         if (*ptr == '\0')
1859                                 return (Int2) (pagectr + 1);   /* last page */
1860                         size--;
1861                         pages--;
1862                         ptr++;
1863                         if (size > 0)
1864                         {
1865                                 trmptr = CdTermRead(type, field, ptr, trmbuf, startpage);
1866                                 if (trmptr != NULL) {
1867                                         goOn = proc (trmptr);
1868                                 }
1869                         }
1870                         while (*ptr != '\n' && *ptr != '\r' && *ptr != '\0')
1871                         {
1872                                 size--;
1873                                 pages--;
1874                                 ptr++;
1875                         }
1876                         if (pages < 0)   /* crossed a page boundary */
1877                         {
1878                                 pages = BLKSIZE + pages;
1879                                 numpage--;
1880                                 pagectr++;
1881                                 page++;
1882                                 if (! numpage)
1883                                         goOn = FALSE;
1884                         }
1885                 }
1886         }
1887         return pagectr;
1888 }
1889 
1890 /*****************************************************************************
1891 *
1892 *   CdLinkUidGet(type, link_to_type, numuid, uids, max)
1893 *       returns count of input uids processed
1894 *       returns -1 on error
1895 *       if neighbors (type == link_to_type)
1896 *               sums weights for same uids
1897 *       if (more than max uids, frees uids and weights, but leaves num set)
1898 *
1899 *****************************************************************************/
1900 Int2  CdLinkUidGet (LinkSetPtr PNTR result, DocType type, DocType link_to_type, Int2 numuid, Int4Ptr uids, Boolean mark_missing, Int4 maxlink)
1901 
1902 {
1903         UidIdxPtr query;
1904     UidIdx local;
1905         DocType querytype;
1906         LinkSetPtr lsp = NULL;
1907         Int2 counts[NTYPE];
1908         FILE * fp;
1909         Int4 offset;
1910         CharPtr path;
1911         Uint1Ptr ptr1;
1912         Int2 numfound = 0;
1913         Int4 j, l, r, k;
1914         Boolean first = TRUE;
1915         Boolean sorted;
1916         Int4 cursize = 0, finalsize, finalcount = 0, count, i;
1917         Int4Ptr newuids = NULL,
1918                         newwts = NULL,
1919                         finaluids = NULL,
1920                         finalwts = NULL,
1921                         tmp;
1922 
1923         *result = NULL;
1924         
1925         if (! ValidateType(link_to_type))
1926                 return -1;
1927 
1928         for (i = 0, query = NULL; i < numuid && query == NULL; i++)
1929         {
1930                 query = UidIdxGet(type, uids[i], &local);
1931                 if ((mark_missing) && (query == NULL))
1932                         uids[i] *= -1;
1933                 if (query != NULL)
1934                         j = i;
1935         }
1936         if ((i == numuid) && (query == NULL)) { /* none found */
1937                 lsp = (LinkSetPtr) MemNew(sizeof(LinkSet));
1938                 lsp->uids = NULL;
1939                 lsp->weights = NULL;
1940                 *result = lsp;
1941                 return 0;
1942         }
1943 
1944         querytype = query->type;   /* record, to allow for TYP_SEQ */
1945         if (link_to_type == TYP_SEQ)
1946         {
1947                 if (type != TYP_SEQ)
1948                         return -1;           /* can't do it */
1949                 else
1950                         link_to_type = querytype;   /* neighbors */
1951         }
1952 
1953         path = MakePath (DIR_LNK, query->type, SUF_REC, EXT_LNK);
1954     if ((fp = FileOpen(path, "rb")) == NULL)
1955         {
1956                 ErrPostEx(SEV_ERROR, ERR_CD_FILEOPEN, 0, sCdError[ERR_CD_FILEOPEN], path);
1957         return -1;
1958         }
1959 
1960         if (numuid > 1)
1961         {
1962                 if (numuid > 320) {
1963                         finalsize = 16000;
1964                 } else {
1965                         finalsize = MIN((numuid * 50), 16000);     /* make a guess */
1966                 }
1967                 finaluids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));  /* make a guess */
1968                 if (link_to_type == querytype)
1969                         finalwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
1970         }
1971 
1972         for (; j < numuid; j++)
1973         {
1974                 
1975                 if (! first)
1976                 {
1977                     if ((query = UidIdxGet(type, uids[j], &local)) == NULL)
1978                         {
1979                                 if (mark_missing)
1980                                         uids[j] *= -1;
1981                                 continue; /* must examine remaining UIDs */
1982                         }
1983                 }
1984                 else
1985                         first = FALSE;
1986 
1987                 numfound++;         /* count how many uids we process */
1988 
1989                                     /* read the link counts for all types */
1990 
1991             fseek (fp, query->link_offset, SEEK_SET);
1992                 FileRead((CharPtr)&counts[0], sizeof(Int2), NTYPE, fp);
1993                 for (i = 0; i < NTYPE; i++)
1994                         counts[i] = SwapInt2(counts[i]);
1995 
1996                 offset = 0;
1997                 for (i = 0; i < link_to_type; i++)
1998                 {
1999                         offset += counts[i] * sizeof(DocUid);
2000                         if (i == query->type) {  /* has weights */
2001                                 offset += counts[i] * sizeof(Uint1);
2002                         }
2003                 }
2004                 if (offset)                                                /* skip preceeding link types */
2005                         fseek(fp, offset, SEEK_CUR);
2006 
2007                 count = (Int4)counts[link_to_type];
2008 
2009                 if (count > cursize)
2010                 {
2011                         MemFree(newuids);
2012                         newuids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (count + 1)));
2013                         if (querytype == link_to_type)
2014                         {
2015                                 MemFree(newwts);
2016                                 newwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (count + 1)));
2017                         }
2018                         cursize = count;
2019                 }
2020                 FileRead((CharPtr)newuids, sizeof(DocUid), (size_t)count, fp);
2021                 for (i = 0; i < count; i++)
2022                         newuids[i] = SwapInt4(newuids[i]);
2023                 if (link_to_type == querytype)    /* get the weights */
2024                 {
2025                         ptr1 = (Uint1Ptr) newwts;
2026                         FileRead((CharPtr)ptr1, sizeof(Uint1), (size_t)count, fp);
2027                         for (i = count - 1; i >= 0; i--) {
2028                                 newwts[i] = (Int4) (ptr1[i]);
2029                         }
2030                 }
2031                 if (numuid > 1)           /* merging lists */
2032                 {
2033                         if ((finalcount + count) > finalsize)
2034                         {
2035                                 finalsize += count;
2036                                 if (finalsize > 16000)
2037                                 {
2038                                         MemFree(newuids);
2039                                         MemFree(newwts);
2040                                         MemFree(finaluids);
2041                                         MemFree(finalwts);
2042                                         ErrPostEx(SEV_WARNING, ERR_CD_MEMORY, 0, sCdError[ERR_CD_MEMORY]);
2043                                         return -1;
2044                                 }
2045                                 tmp = finaluids;
2046                                 finaluids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
2047                                 MemCopy(finaluids, tmp, (size_t)(finalcount * sizeof(Int4)));
2048                                 MemFree(tmp);
2049                                 if (querytype == link_to_type)
2050                                 {
2051                                     tmp = finalwts;
2052                                         finalwts = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * finalsize));
2053                                         MemCopy(finalwts, tmp, (size_t)(finalcount * sizeof(Int4)));
2054                                         MemFree(tmp);
2055                                 }
2056                         }
2057                         for (i = 0; i < count; i++)
2058                         {
2059                                 l = 0;               /* binary search */
2060                                 r = (finalcount - 1);
2061                                 k = 0;
2062                                 while ((l <= r) && (finaluids[k] != newuids[i]))
2063                                 {
2064                                         k = (l + r)/ 2;
2065                                         if (newuids[i] < finaluids[k])
2066                                                 r = k - 1;
2067                                         else
2068                                                 l = k + 1;
2069                                 }
2070                                 if (finaluids[k] == newuids[i])   /* merge */
2071                                 {
2072                                         if (querytype == link_to_type)
2073                                                 finalwts[k] += newwts[i];
2074                                 }
2075                                 else
2076                                 {
2077                                         if (finalcount)
2078                                         {
2079                                                 if (finaluids[k] < newuids[i])
2080                                                         k++;
2081                                                 l = (finalcount - k);
2082                                                 r = l;
2083                                                 tmp = &finaluids[finalcount];
2084                                                 while (r)
2085                                                 {
2086                                                         *tmp = *(tmp-1);
2087                                                         tmp--; r--;
2088                                                 }
2089                                                 if (querytype == link_to_type)
2090                                                 {
2091                                                         r = l;
2092                                                         tmp = &finalwts[finalcount];
2093                                                         while (r)
2094                                                         {
2095                                                                 *tmp = *(tmp-1);
2096                                                                 tmp--; r--;
2097                                                         }
2098                                                 }
2099                                         }
2100                                         finaluids[k] = newuids[i];
2101                                         if (querytype == link_to_type)
2102                                                 finalwts[k] = newwts[i];
2103                                         finalcount++;
2104                                 }
2105                         }
2106                 }
2107         }
2108 
2109         FileClose(fp);
2110 
2111         lsp = (LinkSetPtr) MemNew(sizeof(LinkSet));
2112         if (maxlink <= 0)
2113                 maxlink = 16000;    /* default */
2114 
2115         if (numuid == 1)
2116         {
2117                 lsp->num = count;
2118                 if (lsp->num <= maxlink)
2119                 {
2120                         lsp->uids = newuids;
2121                         lsp->weights = newwts;
2122                 }
2123                 else
2124                 {
2125                         MemFree(newuids);
2126                         MemFree(newwts);
2127                 }
2128         }
2129         else
2130         {
2131                 MemFree(newuids);
2132                 MemFree(newwts);
2133                 lsp->num = finalcount;
2134                 if (lsp->num <= maxlink)
2135                 {
2136                         lsp->uids = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (finalcount + 1)));
2137                         MemCopy(lsp->uids, finaluids, (size_t)(finalcount * sizeof(Int4)));
2138                         MemFree(finaluids);
2139                         if (querytype == link_to_type)
2140                         {
2141                                 lsp->weights = (Int4Ptr) MemNew((size_t)(sizeof(Int4) * (finalcount + 1)));
2142                                 MemCopy(lsp->weights, finalwts, (size_t)(finalcount * sizeof(Int4)));
2143                                 MemFree(finalwts);
2144                                 linksort(lsp->uids, lsp->weights, lsp->num);
2145                         }
2146                 }
2147                 else
2148                 {
2149                         MemFree(finaluids);
2150                         MemFree(finalwts);
2151                 }
2152         }
2153 
2154         if (lsp->num > 1 && querytype != link_to_type && link_to_type == TYP_ML)
2155         {
2156                 /* try to sort MEDLINE uids in descending order */
2157 
2158                 for (sorted = TRUE, k = 1; k < lsp->num; k++)
2159                 {
2160                         if (lsp->uids[k-1] < lsp->uids[k])
2161                         {
2162                                 sorted = FALSE;
2163                                 break;
2164                         }
2165                 }
2166 
2167                 if (! sorted)
2168                 {   /* assume that the existing order is reversed */
2169                     for (k = (lsp->num / 2) - 1; k >= 0; k--)
2170                     {
2171                         j = lsp->uids[k];
2172                         lsp->uids[k] = lsp->uids[lsp->num - 1 - k];
2173                         lsp->uids[lsp->num - 1 - k] = j;
2174                     }
2175 
2176                         /* now check that it's sorted */
2177                 for (sorted = TRUE, k = 1; k < lsp->num; k++)
2178                 {
2179                         if (lsp->uids[k-1] < lsp->uids[k])
2180                         {
2181                                 sorted = FALSE;
2182                                 break;
2183                         }
2184                 }
2185 
2186                         if (! sorted)
2187                         { /* as a last resort, sort them using quicksort */
2188                                 /* dummy array */
2189                                 finaluids = (Int4Ptr) MemDup(lsp->uids, (size_t) (sizeof(Int4) * lsp->num));
2190                                 linksort(finaluids, lsp->uids, lsp->num);
2191                                 MemFree(finaluids);
2192                         }
2193                 }
2194         }
2195 
2196         *result = lsp;
2197         return numfound;
2198 }
2199 
2200 /*****************************************************************************
2201 *
2202 *   linksort(uids, wts, n)
2203 *       quicksort into descending wts order
2204 *
2205 *****************************************************************************/
2206 static void NEAR linksort (Int4Ptr uids, Int4Ptr wts, Int4 n)
2207 
2208 {
2209    Int4 tp, tp2;
2210    Int4 l, r, i, j, m, scnt;
2211    Int4 pstack[100];
2212    Int4Ptr p;
2213 
2214    if (n < 2)
2215        return;
2216 
2217    scnt = 2;
2218    l = 0; r = n - 1; p = pstack + 2;
2219 
2220    do
2221    {
2222        if ((r - l) > 15)
2223        {
2224            i = l; j = r;
2225                                      /* median of three */
2226 
2227            m = ((j - i) / 2) + i;    /* get middle element */
2228            /* partitioning operation */
2229            do
2230            {
2231                while((j > i) && (wts[j] <= wts[i]))
2232                    j--;
2233                if(j != i)
2234                {
2235                    tp = wts[j]; wts[j] = wts[i]; wts[i] = tp;
2236                    tp = uids[j]; uids[j] = uids[i]; uids[i] = tp;
2237                    while((i < j) && (wts[i] >= wts[j]))
2238                        i++;
2239                    if(i != j)
2240                       {tp = wts[j]; wts[j] = wts[i]; wts[i] = tp;
2241                        tp = uids[j]; uids[j] = uids[i]; uids[i] = tp;}
2242                }
2243            }while(i != j); /* end do */
2244 
2245            /* recursion elimination */
2246            if(i)
2247            {
2248                if((i - l) > (r - i))  /* put long segment on "stack" */
2249                    {*p = l; p++; *p = i - 1; p++; l = i + 1;}
2250                else
2251                    {*p = i + 1; p++; *p = r; p++; r = i - 1;}
2252                scnt += 2;
2253                if (scnt >= 100)
2254                                 {
2255                                         ErrPostEx(SEV_ERROR, ERR_CD_MEMORY, 0, "linksort > 100");
2256                                         return;
2257                                 }
2258            }
2259            else
2260            {
2261                l = i + 1;
2262            }
2263        }
2264                      /* if done with this segment, "pop" next */
2265        else
2266        {
2267             p--; r = *p; p--; l = *p; scnt -= 2;
2268        }
2269    }
2270    while (p > pstack);      /* end do */
2271 
2272 
2273    /* do the final insertion sort */
2274 
2275    for(i = 1; i < n; i++)
2276    {
2277        tp = wts[i]; tp2 = uids[i]; j = i; m = j - 1;
2278        while ((j > 0) && (wts[m] < tp))
2279            {wts[j] = wts[m]; uids[j] = uids[m]; j--; m--;}
2280        wts[j] = tp;
2281            uids[j] = tp2;
2282    }
2283    return;
2284 }
2285 
2286 /*****************************************************************************
2287 *
2288 *   DecompReadFunc:
2289 *       substituted read function for compressed data sources (for Sequence
2290 *       and Medline data).
2291 *
2292 *****************************************************************************/
2293 static Int2 LIBCALLBACK DecompReadFunc (Pointer p, CharPtr buff, Uint2 count)
2294 {
2295         DecompInfoPtr dcp = (DecompInfoPtr) p;
2296         Uint1 loc_buff[3];
2297         int bytes_to_request;
2298         int bytes_read;
2299 
2300         if (dcp->compr == COMPR_DONT_KNOW)
2301         {
2302                 int c;
2303 
2304                 /* read the "decompression protocol identifier" */
2305                 if ((c = fgetc(dcp->fp)) == EOF)  
2306                         return 0;
2307                 dcp->compr = (Uint1) c;
2308 
2309                 if (dcp->compr == COMPR_NONE)
2310                 {
2311                         /* for no decompression, we still have 4 bytes of overhead;     */
2312                         /* 1 byte for the protocol identifier, and 3 bytes for a length */
2313                         /* field of what follows                                        */
2314                         if (FileRead((CharPtr) loc_buff,1,3,dcp->fp) != 3)
2315                         {
2316                                 ErrPostEx(SEV_ERROR, ERR_CD_BADDECOMP, 0,
2317                                         "No length field detected for uncompressed data");
2318                                 return 0;
2319                         }
2320 
2321                         /* interpret the 3-byte length in a machine-independant order;  */
2322                         /* BIG ENDIAN (first byte is most significant)                  */
2323                         dcp->bytes_left = (((int) loc_buff[0]) * 256 + loc_buff[1]) * 256 +
2324                                                   loc_buff[2];
2325                 }
2326         }
2327 
2328         switch (dcp->compr)
2329         {
2330                 case COMPR_NONE :
2331                         /* based on knowledge of how many bytes are in this compressed  */
2332                         /* ASN.1 object, return only as many bytes as the caller really */
2333                         /* needs                                                        */
2334                         bytes_to_request = (int) MIN((Uint4) count, dcp->bytes_left);
2335                         bytes_read = FileRead(buff,1,bytes_to_request,dcp->fp);
2336                         dcp->bytes_left -= bytes_read;
2337                         if (dcp->bytes_left <= 0)
2338                         {
2339                                 /* reset for stream read of next entry */
2340                                 dcp->compr = COMPR_DONT_KNOW;
2341                         }
2342                         return bytes_read;
2343                 
2344                 case COMPR_HUFFMAN :
2345                         return HuffmanRead(dcp,buff,count);
2346                 
2347                 /* others ?? */
2348 
2349                 default:
2350                         ErrPostEx(SEV_ERROR, ERR_CD_BADDECOMP, 0,
2351                                     "Invalid decompression code detected <%d>", dcp->compr);
2352                         return 0;
2353         }
2354 }
2355 
2356 /*****************************************************************************
2357 *
2358 *   HuffmanRead:
2359 *       read Huffman compressed data
2360 *
2361 *****************************************************************************/
2362 static Int2 HuffmanRead (DecompInfoPtr dcp, CharPtr buff, Uint2 count)
2363 {
2364         register unsigned int mask = dcp->mask;
2365         register unsigned int byte = dcp->byte;
2366         CharPtr p = buff;
2367         int i, cnt = 0;
2368         int c;
2369         int k;
2370         FILE *fd1 = dcp->fp;
2371 
2372 
2373         while (cnt < (int) count)
2374         {
2375                 for (i=0; i>=0; )
2376                 {
2377                         if (mask == 0)
2378                         {
2379                                 if ((c = fgetc(fd1)) == EOF)
2380                                 {
2381                                         /* should never reach this point */
2382                                         i = HUFFMAN_SENTINEL - 257;
2383                                         break;
2384                                 }
2385                                 else
2386                                 {
2387                                         byte = (unsigned int) c;
2388                                         mask = 0x80;
2389                                 }
2390                         }
2391 
2392                         if (byte & mask)
2393                                 i = vi->huff_left[i];
2394                         else
2395                                 i = vi->huff_right[i];
2396 
2397                         mask >>= 1;
2398                 }
2399 
2400                 if ((k = i + 257) == HUFFMAN_SENTINEL)
2401                 {
2402                         mask = 0; /* to skip remaining bits in current byte */
2403                         dcp->compr = COMPR_DONT_KNOW; /* reset for next record */
2404                         break;
2405                 }
2406 
2407                 *p++ = (char) k;
2408                 cnt++;
2409         }
2410 
2411         dcp->mask = mask;
2412         dcp->byte = byte;
2413         return cnt;
2414 }
2415 
2416 /*****************************************************************************
2417 *
2418 *   DecompInit:
2419 *       Create a data structure to be used in decompression; the data structures
2420 *       are stored in a linked list. While no mutual exclusion is provided on
2421 *       list access, each decompression is independent ... therefore, many
2422 *       compressed ASN.1 data streams may be open and used simultaneously
2423 *
2424 *****************************************************************************/
2425 static DecompInfoPtr NEAR DecompInit (FILE *fp)
2426 {
2427         DecompInfoPtr dcp;
2428 
2429         dcp = (DecompInfoPtr) MemNew(sizeof(DecompInfo));
2430 
2431         if (dcp == NULL)
2432                 return NULL;
2433 
2434         dcp->fp = fp;
2435         dcp->compr = COMPR_DONT_KNOW;
2436         dcp->mask = 0;
2437         dcp->bytes_left = 0;
2438 
2439         /* insert it */
2440         dcp->next = DecompInfoList;
2441         DecompInfoList = dcp;
2442 
2443         return dcp;
2444 }
2445 
2446 /*****************************************************************************
2447 *
2448 *   DecompFini:
2449 *       Find and destroy the specified decompression data structure. The
2450 *       data structures, in addition to having an address known to its user,
2451 *       also contains a copy of the AsnIoPtr for that data stream. This
2452 *       enables the Fini() operation to be performed using either the address
2453 *       of this structure as a key, or the address of the AsnIoPtr as a key.
2454 *
2455 *****************************************************************************/
2456 static Boolean NEAR DecompFini (AsnIoPtr aip, DecompInfoPtr dcp)
2457 {
2458         DecompInfoPtr dtrail;
2459         DecompInfoPtr temp;
2460 
2461         if (DecompInfoList == NULL)
2462                 return FALSE; /* not found */
2463         
2464         /* check for first element in list */
2465         if ((DecompInfoList == dcp && dcp != NULL) ||
2466             (DecompInfoList->aip == aip && aip != NULL))
2467         { /* unlink and delete */
2468                 temp = DecompInfoList->next;
2469                 DecompInfoFree(DecompInfoList);
2470                 DecompInfoList = temp;
2471                 return TRUE;
2472         }
2473 
2474         if (DecompInfoList->next == NULL)
2475         { /* single-element list, and it's not the first element in list */
2476                 return FALSE;
2477         }
2478 
2479         for (dtrail = DecompInfoList; dtrail->next != NULL;
2480              dtrail = dtrail->next)
2481         { /* search remainder of list */
2482                 if ((dtrail->next == dcp && dcp != NULL) ||
2483                     (dtrail->next->aip == aip && aip != NULL))
2484                 {
2485                         temp = dtrail->next->next;
2486                         DecompInfoFree(dtrail->next);
2487                         dtrail->next = temp;
2488                         return TRUE;
2489                 }
2490         }
2491 
2492         return FALSE;
2493 }
2494 
2495 
2496 /*****************************************************************************
2497 *
2498 *   DecompInfoFree:
2499 *       Free a decompression data structure
2500 *****************************************************************************/
2501 static void NEAR DecompInfoFree(DecompInfoPtr dcp)
2502 {
2503         MemFree(dcp);
2504 }
2505 
2506 
2507 /*****************************************************************************
2508 *
2509 *   IsOKMagic:
2510 *       Validate the magic number for a file
2511 *****************************************************************************/
2512 static Boolean NEAR IsOKMagic(Uint4 magic, CharPtr volume_label)
2513 {
2514         /* check for a match with the "base" magic number; supported for        */
2515         /* backwards compatability                                              */
2516         if (magic == CD_MAGIC_BASE)
2517                 return TRUE;
2518 
2519         /* now check if the magic number equals the "base" plus the checksum of */
2520     /* the volume-label (so as to be able to distinguish between index      */
2521         /* files associated with different CDs)                                 */
2522         while (*volume_label)
2523         {
2524                 magic -= (int) (*volume_label++);
2525         }
2526         return (magic == CD_MAGIC_BASE);
2527 }
2528 
2529 
2530 #endif
2531 
2532 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.