NCBI C Toolkit Cross Reference

C/cdromlib/cdentrez.c


  1 /*   cdentrez.c                                                          
  2  * ===========================================================================
  3  *
  4  *                            PUBLIC DOMAIN NOTICE                          
  5  *               National Center for Biotechnology Information
  6  *                                                                          
  7  *  This software/database is a "United States Government Work" under the   
  8  *  terms of the United States Copyright Act.  It was written as part of    
  9  *  the author's official duties as a United States Government employee and 
 10  *  thus cannot be copyrighted.  This software/database is freely available 
 11  *  to the public for use. The National Library of Medicine and the U.S.    
 12  *  Government have not placed any restriction on its use or reproduction.  
 13  *                                                                          
 14  *  Although all reasonable efforts have been taken to ensure the accuracy  
 15  *  and reliability of the software and data, the NLM and the U.S.          
 16  *  Government do not and cannot warrant the performance or results that    
 17  *  may be obtained by using this software or data. The NLM and the U.S.    
 18  *  Government disclaim all warranties, express or implied, including       
 19  *  warranties of performance, merchantability or fitness for any particular
 20  *  purpose.                                                                
 21  *                                                                          
 22  *  Please cite the author in any work or product based on this material.   
 23  *
 24  * ===========================================================================
 25  *
 26  * RCS $Id: cdentrez.c,v 6.2 1999/03/11 23:20:07 kans Exp $
 27  *
 28  * Author:  Ostell, Kans
 29  *
 30  * Version Creation Date:   10/15/91
 31  *
 32  * File Description: 
 33  *      entrez index access library for Entrez CDROM
 34  *
 35  * Modifications:  
 36  * --------------------------------------------------------------------------
 37  * Date     Name        Description of modification
 38  * -------  ----------  -----------------------------------------------------
 39  * 07-07-94 Schuler     Added CdEntrezGetInfo function
 40  * 07-12-94 Schuler     Added #ifdef _NEW_CdEntrez_/_OLD_CdEntrez
 41  * 07-13-94 Schuler     Moved CdTermFree here from cdromlib.c
 42  * 09-22-94 Schuler     CdEntrezFini: set _nouveau to FALSE
 43  * 11-22-94 Schuler     Cleaned up some integer size problems
 44  *
 45  * 05-19-95 Schuler     Added rcs Log directive for automatic insertion of
 46  *                      modification comments.
 47  *
 48  * Revision $Log: cdentrez.c,v $
 49  * Revision Revision 6.2  1999/03/11 23:20:07  kans
 50  * Revision sprintf cast
 51  * Revision
 52  * Revision Revision 6.1  1998/08/24 18:42:15  kans
 53  * Revision fixed -v -fd warnings
 54  * Revision
 55  * Revision Revision 6.0  1997/08/25 18:12:52  madden
 56  * Revision Revision changed to 6.0
 57  * Revision
 58  * Revision Revision 5.5  1997/06/26 21:55:31  vakatov
 59  * Revision [PC] DLL'd "ncbicdr.lib", "ncbiacc.lib", "ncbinacc.lib" and "ncbicacc.lib"
 60  * Revision
 61  * Revision Revision 5.4  1997/03/07 17:16:10  epstein
 62  * Revision always choose the highest GI in EntrezFindSeqId
 63  * Revision
 64  * Revision 5.3  1997/01/14  21:26:07  epstein
 65  * plug memory leak when performing Entrez set-difference operations
 66  *
 67  * Revision 5.2  1996/11/22  18:02:19  epstein
 68  * change algorithm for looking up PDB accessions
 69  *
 70  * Revision 5.1  1996/08/14  19:56:41  epstein
 71  * add APIs for fetching pieces of biostruc annots (mostly written by Chris Hogue)
 72  *
 73  * Revision 5.0  1996/05/28  13:55:34  ostell
 74  * Set to revision 5.0
 75  *
 76  * Revision 4.18  1996/05/14  21:01:12  epstein
 77  * use SQID index to and docsum to convert back-and-forth between GIs and SeqIds, per Jim Ostell
 78  *
 79  * Revision 4.17  1996/04/01  21:02:31  epstein
 80  * remove dead CdEntrezBiostrucAnnotSetGet() code
 81  *
 82  * Revision 4.16  1996/04/01  20:59:38  epstein
 83  * Schuler/Epstein changes for cleaner CdEntrezBiostrucAnnotSetGet retrieval
 84  *
 85  * Revision 4.15  1996/03/29  18:52:12  epstein
 86  * add support for structure alignments (includes kludge for now)
 87  *
 88  * Revision 4.14  1995/10/23  21:39:56  epstein
 89  * another tweak for PC 16-bit addressing
 90  *
 91  * Revision 4.13  1995/10/23  14:17:52  epstein
 92  * fix 16/32-bit portability problems
 93  *
 94  * Revision 4.12  1995/10/03  14:12:40  epstein
 95  * repair term-checking logic to avoid erroneous caching
 96  *
 97  * Revision 4.11  1995/10/02  15:25:56  epstein
 98  * correct range-checking logic due to semantics of StringXCmp()
 99  *
100  * Revision 4.10  1995/10/02  12:51:23  epstein
101  * fix endpoints for range scanning
102  *
103  * Revision 4.9  1995/10/02  12:49:44  epstein
104  * add memory-based performance enhancements
105  *
106  * Revision 4.8  1995/10/02  02:35:33  epstein
107  * add range-checking
108  *
109  * Revision 4.7  1995/09/19  13:27:51  epstein
110  * add truncation limit
111  *
112  * Revision 4.6  1995/08/30  20:54:53  epstein
113  * search TYP_CH database if retcode is -1
114  *
115  * Revision 4.5  1995/08/28  23:20:47  kans
116  * includes new mmdbapi headers
117  *
118  * Revision 4.4  1995/08/28  17:44:01  epstein
119  * add code so that when retcode is -1, we perform less validation on the retrieve Seq-entry
120  *
121  * Revision 4.3  1995/08/24  20:44:10  epstein
122  * add more stuff for genomes
123  *
124  * Revision 4.2  1995/08/18  17:41:17  epstein
125  * fix (?) parsing of PDB accession per Brandon's observation
126  *
127  * Revision 4.1  1995/08/11  20:26:18  epstein
128  * add max-models support for biostrucs
129  *
130  * Revision 4.0  1995/07/26  13:50:32  ostell
131  * force revision to 4.0
132  *
133  * Revision 2.62  1995/07/19  22:07:00  kans
134  * added (probably superfluous) casts to some sprintf calls
135  *
136  * Revision 2.61  1995/06/29  15:57:03  epstein
137  * added Complexity argument when fetching structures
138  *
139  * Revision 2.60  95/06/27  11:54:35  kans
140  * replaced _OLD_CDEntrez_ with _OLD_CdEntrez_
141  * 
142  * Revision 2.59  1995/06/23  16:02:43  kans
143  * support for accmmdbs.c stub to resolve symbols without MMDB link
144  *
145  * Revision 2.58  1995/06/23  13:22:25  kans
146  * Biostruc_CD_supported symbol needed for local MMDB access
147  *
148  * Revision 2.57  1995/05/16  14:36:20  schuler
149  * Automatic comment insertion enabled
150  *
151  *
152  * ==========================================================================
153  */
154 
155 #define REVISION_STR  "$Revision: 6.2 $"
156 
157 static char * _this_module = "CdEntrez";
158 #define THIS_MODULE _this_module
159 static char * _this_file = __FILE__;
160 #define THIS_FILE _this_file
161 
162 #include <accentr.h>
163 #include <cdentrez.h>
164 #include <sequtil.h>
165 #include <objall.h>
166 
167 typedef struct posting {
168   ByteStorePtr  uids;
169   DocUidPtr     buffer;
170   Int4          bufsize;
171   Int4          index;
172   Int4          count;
173 } Posting, PNTR PostingPtr;
174 
175 #define SCAN_MAX 200
176 
177 /* the 1023 below is not a typo; it's needed to avoid overflowing 16-bit
178    addressing on PCs */
179 #define DEF_CDENTREZ_MEMUSAGE (64 * 1023L)
180 #define MAX_CDENTREZ_UID_LIST_SIZE (cdMemUsage)
181 #define CDENTREZ_TERM_MAX (cdMemUsage / 4)
182 #define MAX_CDENTREZ_BYTESTORE (cdMemUsage / 4)
183 #define MAX_CDENTREZ_SMALL_LIST (cdMemUsage / 2)
184 
185 typedef struct scanData {
186   Int4          specialCount;
187   Int4          totalCount;
188   Int4          offset;
189   ByteStorePtr  specialPtr;
190   ByteStorePtr  remainderPtr;
191 } ScanData, PNTR ScanPtr;
192 
193 static Int2          db;
194 static Int2          fld;
195 
196 static DocUidPtr     uidPtr;
197 
198 static Int2          searchTermLen;
199 static Int4          cdMemUsage = 32768;
200 
201 static ByteStorePtr  specialPost;
202 static ByteStorePtr  remainPost;
203 
204 static Char          selection [256];
205 static Char          wildcard [256];
206 static Char          topOfRange [256];
207 static Boolean       rangeScanning = FALSE;
208 
209 static ScanPtr       scanPtr;
210 static Int4          scanCount;
211 static Boolean       scanOk;
212 static CdTermProc    userScanProc;
213 
214 static CdTermPtr     eset;
215 
216 static ValNodePtr    cachedExpr = NULL;
217 static ByteStorePtr  cachedBsp = NULL;
218 
219 static void NEAR NextNode PROTO((void));
220 static ByteStorePtr NEAR Factor PROTO((void));
221 static ByteStorePtr NEAR Term PROTO((void));        
222 static ByteStorePtr NEAR Diff PROTO((void));
223 static ByteStorePtr NEAR Expression PROTO((void));
224 static CdTermPtr NEAR FindTermNode PROTO((CharPtr term, DocType type, DocField field, CharPtr highRange));
225 static ValNodePtr CdTLExprFree PROTO((ValNodePtr elst));
226 
227 static PostingPtr NEAR NewPost PROTO((ByteStorePtr lst, Int4 defsize));
228 static PostingPtr NEAR FreePost PROTO((PostingPtr pst));
229 static Int4 NEAR PostLength PROTO((PostingPtr pst));
230 static void NEAR RewindPost PROTO((PostingPtr pst));
231 static DocUid NEAR ReadItem PROTO((PostingPtr pst));
232 static void NEAR WriteItem PROTO((PostingPtr pst, DocUid value));
233 static void NEAR FlushItems PROTO((PostingPtr pst));
234 static void NEAR SavePostingList PROTO((FILE *f, ByteStorePtr bsp));
235 
236 static Boolean NEAR CdEntrezMergeTerm PROTO((DocType type, DocField field, CharPtr term, Int4Ptr spcl, Int4Ptr totl, CdTermProc userProc));
237 static void NEAR SingleSpaces PROTO((CharPtr str));
238 static void NEAR TermTruncate PROTO((CharPtr str));
239 static void NEAR QuickSortSmall PROTO((DocUidPtr uids, Int4 l, Int4 r));
240 static Int4 NEAR CompressSmall PROTO((DocUidPtr uids, Int4 count));
241 static Int4 NEAR UniqueSmall PROTO((DocUidPtr uids, Int4 count));
242 static ByteStorePtr NEAR MergeSmallLists PROTO((ByteStorePtr bsp, ByteStorePtr small));
243 static Boolean NEAR MergeSeveralLists PROTO((Int4 i, Int4 count));
244 static Boolean NEAR MergeSeveralOrderedLists PROTO((Int4 i, Int4 count));
245 static Boolean NEAR MergeUnorderedLists PROTO((Int4 i, Int4 count));
246 static Boolean NEAR ProcessScanResults PROTO((void));
247 static Boolean  WildCardProc PROTO((CdTermPtr trmp));
248 static Boolean  ScanOnlyProc PROTO((CdTermPtr trmp));
249 static Boolean  ScanAndFreeProc PROTO((CdTermPtr trmp));
250 
251 /**** Moved from cdentrez.h ********************/
252 
253 static CdTermPtr NEAR CdEntrezCreateTerm PROTO((CharPtr term, DocType type, DocField field, ByteStorePtr special, ByteStorePtr remainder, CharPtr highRange));
254 static ByteStorePtr NEAR LoadPostingList PROTO((FILE *f, Int4 special, Int4 total));
255 static ByteStorePtr NEAR FreePostingList PROTO((ByteStorePtr lst));
256 static ByteStorePtr NEAR MergePostingLists PROTO((ByteStorePtr lst1, ByteStorePtr lst2));
257 static ByteStorePtr NEAR IntersectPostingLists PROTO((ByteStorePtr lst1, ByteStorePtr lst2));
258 static ByteStorePtr NEAR DifferencePostingLists PROTO((ByteStorePtr lst1, ByteStorePtr lst2));
259 
260 static ValNodePtr currNode;
261 static Uint1 currChoice;
262 
263 /************************* moved from old cdml.c ****************************/
264 static AsnTypePtr  MEDLINE_ENTRY = NULL;
265 static AsnTypePtr  MEDLINE_ENTRY_cit = NULL;
266 static AsnTypePtr  MEDLINE_ENTRY_abstract = NULL;
267 static AsnTypePtr  TITLE_E_trans = NULL;
268 static AsnTypePtr  AUTH_LIST_names_ml_E = NULL;
269 static AsnTypePtr  AUTH_LIST_names_str_E = NULL;
270 static AsnTypePtr  DATE_STD_year = NULL;
271 static AsnTypePtr  DATE_str = NULL;
272 static AsnTypePtr  TITLE_E_name = NULL;
273 static AsnTypePtr  MEDLINE_ENTRY_mesh = NULL;
274 static AsnTypePtr  MEDLINE_ENTRY_substance = NULL;
275 static AsnTypePtr  MEDLINE_ENTRY_xref = NULL;
276 static AsnTypePtr  MEDLINE_ENTRY_idnum = NULL;
277 static AsnTypePtr  MEDLINE_ENTRY_gene = NULL;
278 
279 static DocSumPtr NEAR MedSumAsnRead PROTO((AsnIoPtr aip, DocUid uid));
280 static void NEAR StripAuthor PROTO((CharPtr author));
281 static void NEAR FindAsnType PROTO((AsnTypePtr PNTR atp, AsnModulePtr amp, CharPtr str));
282 
283 static DocSumPtr NEAR CdEntMlSumGet PROTO((Int4 uid));
284 
285 /************************* moved from old cdseq.c ****************************/
286 static AsnTypePtr  SEQ_ENTRY = NULL;
287 static AsnTypePtr  SEQ_ENTRY_seq = NULL;
288 static AsnTypePtr  SEQ_ENTRY_set = NULL;
289 static AsnTypePtr  TEXTSEQ_ID_name = NULL;
290 static AsnTypePtr  TEXTSEQ_ID_accession = NULL;
291 static AsnTypePtr  SEQ_DESCR_E_title = NULL;
292 static AsnTypePtr  GIIMPORT_ID_id = NULL;
293 static AsnTypePtr  BIOSEQ_inst = NULL;
294 static AsnTypePtr  SEQ_INST_mol = NULL;
295 static AsnTypePtr  SEQ_INST_repr = NULL;
296 static AsnTypePtr  SEQ_ID_gibbsq = NULL;
297 static AsnTypePtr  SEQ_ID_gibbmt = NULL;
298 static AsnTypePtr  SEQ_ID_genbank = NULL;
299 static AsnTypePtr  SEQ_ID_gi = NULL;
300 static AsnTypePtr  SEQ_ID_embl = NULL;
301 static AsnTypePtr  SEQ_ID_ddbj = NULL;
302 static AsnTypePtr  SEQ_ID_pir = NULL;
303 static AsnTypePtr  SEQ_ID_swissprot = NULL;
304 static AsnTypePtr  PDB_BLOCK_compound_E = NULL;
305 static AsnTypePtr  PDB_SEQ_ID_MOL = NULL;
306 static AsnTypePtr  BIOSEQ_id = NULL;
307 static AsnTypePtr  BIOSEQ_id_E = NULL;
308 static AsnTypePtr  CIT_PAT_title = NULL;
309 
310 static DocSumPtr NEAR CdEntSeqSumGet PROTO((Int4 uid, DocType type));
311 
312 extern int _nouveau;
313 
314 /*****************************************************************************
315 *
316 *   CdEntrezInit ()
317 *       Creates linked list of CdTerm nodes, creates temporary file for
318 *       postings lists, saves file name in first node.  When creating new
319 *       nodes, posting file is appended to temporary file, node offset then
320 *       points to temporary file location of posting information.
321 *
322 *****************************************************************************/
323 
324 NLM_EXTERN Boolean  CdEntrezInit (Boolean no_warnings)
325 
326 {
327   FILE      *fp;
328   Char      str [PATH_MAX];
329   Boolean inited = FALSE;
330   CharPtr   prop;
331   
332 #ifdef Biostruc_supported
333         objmmdb1AsnLoad ();
334         objmmdb2AsnLoad ();
335         objmmdb3AsnLoad ();
336 #endif
337 #ifdef _NEW_CdEntrez_
338         _nouveau = GetAppParamBoolean("ncbi","CdEntrez","NewStyle",TRUE);
339         if (_nouveau)
340         {       
341                 if (cd3_CdInit())
342                         inited = TRUE;
343                 else
344                         ErrLogPrintf("cd3_CdInit() failed\n");
345         }
346 #endif
347         /* In the dual OLD/NEW case, go on to try CdInit if cd3_CdInit failed */
348 #ifdef _OLD_CdEntrez_
349         if (!inited)
350         {
351                 if (CdInit())
352                 {
353                         inited = TRUE;
354                         _nouveau = FALSE;
355                 }
356         }
357 #endif
358         if (!inited)
359                 return FALSE;
360                 
361         eset = MemNew (sizeof (CdTerm));
362         if (eset == NULL)
363                 return FALSE;
364         eset->type = 255;   /* set to not used */
365     TmpNam (str);
366     eset->term = StringSave (str);
367 #ifdef WIN_MAC
368     FileCreate (str, "????", "NCBI");
369 #endif
370     fp = FileOpen (str, "wb");
371         if (fp == NULL) {
372                 ErrPostEx (SEV_ERROR, ERR_CD_FILEOPEN, 0, "Unable to open temporary file %s", str);
373                 return FALSE;
374         }
375     FileClose (fp);
376   if ((prop = (CharPtr) GetAppProperty("CdEntrezMemUsage")) != NULL)
377   {
378       long tmplong;
379 
380       sscanf(prop, "%ld", &tmplong);
381 
382       cdMemUsage = tmplong;
383   } else {
384       cdMemUsage = DEF_CDENTREZ_MEMUSAGE;
385   }
386       cdMemUsage = MIN(cdMemUsage, MAXALLOC);
387         return TRUE;
388 }
389 
390 /*****************************************************************************
391 *
392 *   CdEntrezFini ()
393 *       Frees linked list of CdTerm nodes and removes temporary posting file.
394 *
395 *****************************************************************************/
396 
397 NLM_EXTERN void  CdEntrezFini (void)
398 
399 {
400   CdTermPtr nxt;
401   Char      temp [PATH_MAX];
402 
403   if (eset != NULL) {
404     if (eset->term != NULL) {
405       StringCpy (temp, eset->term);
406       FileRemove (temp);
407     }
408     while (eset != NULL) {
409       nxt = eset->next;
410       CdTermFree (eset);
411       eset = nxt;
412     }
413   }
414   eset = NULL;
415 
416   cachedExpr = CdTLExprFree(cachedExpr);
417   cachedBsp = BSFree(cachedBsp);
418   
419 #ifdef _NEW_CdEntrez_
420         if (_nouveau)
421                 cd3_CdFini();
422 #endif
423 #ifdef _OLD_CdEntrez_
424         if (!_nouveau)
425                 CdFini();
426 #endif
427         _nouveau = FALSE;
428 }
429 
430 
431 /*****************************************************************************
432 *
433 *   CdEntrezGetInfo ()
434 *
435 *****************************************************************************/
436 
437 NLM_EXTERN EntrezInfo* CdEntrezGetInfo (void)
438 {
439         EntrezInfo *info = NULL;
440         
441 #ifdef _NEW_CdEntrez_
442         if (_nouveau)
443                 info = cd3_CdGetInfo();
444 #endif
445 
446 #ifdef _OLD_CdEntrez_
447         if (!_nouveau)
448                 info = CdGetInfo();
449 #endif
450         
451         return info;
452 }
453 
454 /*****************************************************************************
455 *
456 *   CdEntrezDetailedInfo ()
457 *
458 *****************************************************************************/
459 
460 NLM_EXTERN char* CdEntrezDetailedInfo (void)
461 {
462         char *info = NULL;
463         
464 #ifdef _NEW_CdEntrez_
465         if (_nouveau)
466                 info = cd3_CdDetailedInfo();
467 #endif
468 
469 #ifdef _OLD_CdEntrez_
470         if (!_nouveau)
471                 info = CdDetailedInfo();
472 #endif
473         
474         return info;
475 }
476 
477 
478 /*****************************************************************************
479 *
480 *   CdEntGetMaxLinks()
481 *       returns max links in link set allowed by system
482 *
483 *****************************************************************************/
484 NLM_EXTERN Int4 CdEntGetMaxLinks (void)
485 
486 {
487         return (Int4)(INT_MAX / sizeof(DocUid));
488 }
489 
490 /*****************************************************************************
491 *
492 *   CdEntrezCreateNamedUidList(term, type, field, num, uids)
493 *       Creates a term node in the entrez set structure if one does not
494 *       yet exist, and loads the posting file from the uid parameter.
495 *
496 *****************************************************************************/
497 NLM_EXTERN void  CdEntrezCreateNamedUidList (CharPtr term, DocType type, DocField field, Int4 num, DocUidPtr uids)
498 
499 {
500   Int4          count;
501   ByteStorePtr  post;
502   Char          str [256];
503 
504   if (term != NULL && uids != NULL && num > 0 && num <= 16383) {
505     StringNCpy (str, term, sizeof (str) - 1);
506     post = BSNew (0);
507     if (post != NULL) {
508       count = (Int4) num;
509       QuickSortSmall (uids, 0, (Int4) (count - 1));
510       count = CompressSmall (uids, count);
511       count = UniqueSmall (uids, count);
512       BSWrite (post, uids, (Int4) (count * sizeof (DocUid)));
513       CdEntrezCreateTerm (str, type, field, NULL, post, NULL);
514       BSFree (post);
515     }
516   }
517 }
518 
519 /*****************************************************************************
520 *
521 *   CdEntrezCreateNamedUidListX(term, type, field, post)
522 *       Creates a term node in the entrez set structure if one does not
523 *       yet exist, and loads the posting file from the uid parameter.
524 *
525 *****************************************************************************/
526 NLM_EXTERN void  CdEntrezCreateNamedUidListX (CharPtr term, DocType type, DocField field, ByteStorePtr bsp)
527 
528 {
529   Int4          actual;
530   Int4          count;
531   ByteStorePtr  post;
532   ByteStorePtr  small;
533   Char          str [256];
534   DocUidPtr     uids;
535 
536   if (term != NULL && bsp != NULL) {
537     StringNCpy (str, term, sizeof (str) - 1);
538     post = BSNew (0);
539     if (post != NULL) {
540       uids = MemNew (4096 * sizeof (DocUid));
541       BSSeek (bsp, 0L, 0);
542       actual = BSRead (bsp, uids, (Int4) (4096 * sizeof (DocUid)));
543       while (actual > 0) {
544         count = (Int4) actual;
545         QuickSortSmall (uids, 0, (Int4) (count - 1));
546         count = CompressSmall (uids, count);
547         count = UniqueSmall (uids, count);
548         if (count > 0) {
549           small = BSNew (0L);
550           if (small != NULL) {
551             BSWrite (small, uids, count * sizeof (DocUid));
552             post = MergePostingLists (post, small);
553           }
554         }
555         actual = BSRead (bsp, uids, (Int4) (4096 * sizeof (DocUid)));
556       }
557       CdEntrezCreateTerm (str, type, field, NULL, post, NULL);
558       MemFree (uids);
559       BSFree (post);
560     }
561   }
562 }
563 
564 /*****************************************************************************
565 *
566 *   CdEntTLNew (type)
567 *       Creates linked list of asn nodes for constructing boolean query on
568 *       terms.  First node points to the EntrezSetNew-created structure that
569 *       maps terms to posting lists.  Remaining nodes contain symbols for AND,
570 *       OR, LEFT PARENTHESIS, RIGHT PARENTHESIS, or a SPECIAL or TOTAL term
571 *       specification.  The term specification nodes point to a CdTerm node
572 *       within the entrez set structure.
573 *
574 *****************************************************************************/
575 
576 NLM_EXTERN ValNodePtr  CdEntTLNew (DocType type)
577 
578 {
579   ValNodePtr anp;
580 
581   anp = NULL;
582   if (eset != NULL) {
583     anp = ValNodeNew (NULL);
584     if (anp != NULL) {
585       anp->choice = NULLSYM;
586       anp->data.ptrvalue = (Pointer) eset;
587           eset->type = type;
588     }
589   }
590   return anp;
591 }
592 
593 /*****************************************************************************
594 *
595 *   CdEntTLAddTerm (elst, term, type, field, special, highRange)
596 *       Adds a term node to a boolean algebraic term query.
597 *
598 *****************************************************************************/
599 
600 NLM_EXTERN ValNodePtr  CdEntTLAddTerm (ValNodePtr elst, CharPtr term, DocType type, DocField field, Boolean special, CharPtr highRange)
601 
602 {
603   ValNodePtr anp;
604   CdTermPtr  trmp;
605 
606   anp = NULL;
607   if (eset != NULL && elst != NULL) {
608         if (type != eset->type)   /* mixed databases */
609                 return NULL;
610     anp = ValNodeNew (elst);
611     if (anp != NULL) {
612       if (special) {
613         anp->choice = SPECIALTERM;
614       } else {
615         anp->choice = TOTALTERM;
616       }
617       trmp = FindTermNode (term, type, field, highRange);
618       anp->data.ptrvalue = (Pointer) trmp;
619     }
620   }
621   return anp;
622 }
623 
624 /*****************************************************************************
625 *
626 *   CdEntTLFree (elst)
627 *       Frees a boolean algebraic term query list.
628 *
629 *****************************************************************************/
630 
631 NLM_EXTERN ValNodePtr  CdEntTLFree (ValNodePtr elst)
632 
633 {
634   if (elst != NULL) {
635     ValNodeFree (elst);
636         eset->type = 255;   /* set to nothing */
637   }
638   return NULL;
639 }
640 
641 /*****************************************************************************
642 *
643 *   CdTLExprFree(elst)
644 *
645 *   Free the CdEntrez-style expression, including all of its subordinate terms
646 ****************************************************************************/
647 static ValNodePtr CdTLExprFree(ValNodePtr elst)
648 {
649   ValNodePtr np;
650   CdTermPtr tp;
651 
652   for (np = elst; np != NULL; np = np->next) {
653     switch (np->choice) {
654     case SPECIALTERM:
655     case TOTALTERM:
656       if ((tp = np->data.ptrvalue) != NULL) {
657         MemFree (tp->term);
658         MemFree (tp->highRange);
659         MemFree (tp);
660       }
661       break;
662     default:
663       break;
664     }
665   }
666 
667   ValNodeFree(elst);
668 
669   return NULL;
670 }
671 
672 
673 /*****************************************************************************
674 *
675 *   CdDupExpr(elst)
676 *
677 *   Duplicate the input CdEntrez-style expression
678 ****************************************************************************/
679 static ValNodePtr CdDupExpr(ValNodePtr elst)
680 {
681   ValNodePtr dup = NULL;
682   ValNodePtr trailing = NULL;
683   ValNodePtr np;
684   CdTermPtr tp1, tp2;
685 
686   for (; elst != NULL; elst = elst->next) {
687     np = ValNodeNew(NULL);
688     if (dup == NULL)
689       dup = np;
690     if (trailing != NULL)
691       trailing->next = np;
692     trailing = np;
693     np->choice = elst->choice;
694     switch (elst->choice) {
695     case SPECIALTERM:
696     case TOTALTERM:
697       tp2 = elst->data.ptrvalue;
698       if (tp2 != NULL)
699       {
700         tp1 = MemNew(sizeof(*tp1));
701         np->data.ptrvalue = tp1;
702         tp1->type = tp2->type;
703         tp1->field = tp2->field;
704         tp1->term = StringSave(tp2->term);
705         tp1->highRange = StringSave(tp2->highRange);
706       }
707       break;
708     default:
709       break;
710     }
711   }
712 
713   return dup;
714 }
715 
716 static Boolean
717 EqualTerms (CharPtr x, CharPtr y)
718 {
719   if (x == NULL && y == NULL)
720     return TRUE;
721   if (x == NULL || y == NULL)
722     return FALSE;
723   return (StringICmp(x,y) == 0);
724 }
725   
726 
727 /*****************************************************************************
728 *
729 *   CdEntTLExprEqual (elst1, elst2)
730 *
731 *   Determine whether two CdEntrez-style boolean expressions are equal
732 ****************************************************************************/
733 
734 static Boolean
735 CdTLExprEqual (ValNodePtr elst1, ValNodePtr elst2)
736 {
737   Boolean equal = TRUE;
738   CdTermPtr c1, c2;
739 
740   for (; elst1 != NULL && elst2 != NULL && equal; elst1 = elst1->next,
741        elst2 = elst2->next) {
742     if (elst1->choice == elst2->choice) {
743       switch (elst1->choice) {
744         case SPECIALTERM:
745         case TOTALTERM:
746           c1 = elst1->data.ptrvalue;
747           c2 = elst2->data.ptrvalue;
748           equal = c1 != NULL && c2 != NULL && c1->type == c2->type &&
749                   c1->field == c2->field && EqualTerms(c1->term, c2->term) &&
750                   EqualTerms(c1->highRange, c2->highRange);
751           break;
752         default:
753           break;
754       }
755     } else {
756       equal = FALSE;
757     }
758   }
759 
760   return elst1 == NULL && elst2 == NULL && equal;
761 }
762 
763 
764 /*****************************************************************************
765 *
766 *   CdEntTLEvalCount (elst)
767 *       Evaluates a boolean algebraic term query list, returning the
768 *       count of resulting UIDs.
769 *
770 *****************************************************************************/
771 
772 NLM_EXTERN Int4 CdEntTLEvalCount (ValNodePtr elst)
773 {
774   ByteStorePtr bsp;
775   Int4         len;
776 
777   len = 0;
778   bsp = CdEntTLEvalX(elst);
779   if (bsp != NULL) {
780     len = BSLen(bsp) / sizeof(DocUid);
781     BSFree (bsp);
782   }
783   return len;
784 }
785 
786 
787 /*****************************************************************************
788 *
789 *   CdEntTLEvalX (elst)
790 *       Evaluates a boolean algebraic term query list, returning a pointer to
791 *       a ByteStore containing the resultant unique identifiers.  The number
792 *       of UIDs is calculated as BSLen (bsp) / sizeof (DocUid).
793 *
794 *****************************************************************************/
795 
796 NLM_EXTERN ByteStorePtr  CdEntTLEvalX (ValNodePtr elst)
797 
798 {
799   ByteStorePtr bsp;
800 
801   bsp = NULL;
802   if (eset != NULL && elst != NULL) {
803     if (cachedExpr != NULL && CdTLExprEqual(elst, cachedExpr)) {
804       BSSeek(cachedBsp, 0L, SEEK_SET);
805       bsp = BSDup (cachedBsp);
806     } else {
807       cachedExpr = CdTLExprFree(cachedExpr);
808       cachedExpr = CdDupExpr(elst);
809       cachedBsp = BSFree(cachedBsp);
810 
811       currNode = elst;
812       currChoice = NULLSYM;
813       NextNode ();
814       if (eset->term != NULL && currNode != NULL) {
815         bsp = Expression ();
816         BSSeek(bsp, 0L, SEEK_SET);
817         cachedBsp = BSDup(bsp);
818       }
819 
820     }
821   }
822   return bsp;
823 }
824 
825 /*****************************************************************************
826 *
827 *   CdEntTLEval (elst)
828 *       Evaluates a boolean algebraic term query list, returning a pointer to
829 *       a LinkSet containing the resultant unique identifiers.
830 *
831 *****************************************************************************/
832 
833 NLM_EXTERN LinkSetPtr  CdEntTLEval (ValNodePtr elst)
834 
835 {
836   ByteStorePtr bsp;
837   LinkSetPtr lsp = NULL;
838   Int4 numlinks;
839 
840   bsp = CdEntTLEvalX (elst);
841   if (bsp != NULL)
842         {
843                 numlinks = BSLen(bsp) / sizeof(DocUid);
844                 lsp = LinkSetNew();
845                 lsp->num = numlinks;
846                 if (numlinks <= CdEntGetMaxLinks())
847                 {
848                         lsp->uids = MemNew((size_t)(numlinks * sizeof(DocUid)));
849                         BSSeek (bsp, 0L, 0);
850                         BSRead(bsp, lsp->uids, (numlinks * sizeof(DocUid)));
851                 }
852                 BSFree(bsp);
853         }
854   return lsp;
855 }
856 
857 /*****************************************************************************
858 *
859 *   DocSumPtr CdDocSum(type, uid)
860 *
861 *****************************************************************************/
862 NLM_EXTERN DocSumPtr  CdDocSum (DocType type, DocUid uid)
863 
864 {
865         DocSum *sum = NULL;
866         
867 #ifdef _NEW_CdEntrez_
868         if (_nouveau)
869         {
870                 sum = CdGetDocSum(type,uid);
871         }
872 #endif
873 
874 #ifdef _OLD_CdEntrez_
875         if (!_nouveau)
876         {
877                 if (type == TYP_ML)
878                         sum = CdEntMlSumGet(uid);
879                 else
880                         sum = CdEntSeqSumGet(uid, type);
881         }
882 #endif
883 
884         return sum;
885 }
886 
887 
888 #ifdef _NEW_CdEntrez_
889 NLM_EXTERN int  CdDocSumListGet PROTO((DocSum **result, int numuid, DocType type, const DocUid *uids))
890 {
891         int i, n;
892         const DocUid *p = uids;
893         DocSum **s = result;
894                 
895         ASSERT(result != NULL);
896         ASSERT(uids != NULL);
897         
898         for (i=n=0; i<numuid; ++i)
899         {
900                 if ((*s = CdGetDocSum(type,*p++)) != NULL)
901                 {
902                         s++;
903                         n++;
904                 }
905         }
906         return n;       
907 }
908 #endif
909 
910 
911 
912 /*****************************************************************************
913 *
914 *   CdLinkUidList(type, link_to_type, numuid, uids)
915 *       returns count of input uids processed
916 *       returns -1 on error
917 *       if neighbors (type == link_to_type)
918 *               sums weights for same uids
919 *       if (more than EntrezUserMaxLinks() uids, frees uids and weights,
920 *           but leaves num set)
921 *
922 *****************************************************************************/
923 NLM_EXTERN Int2  CdLinkUidList (LinkSetPtr PNTR result, DocType type, DocType link_to_type, Int2 numuid, Int4Ptr uids, Boolean mark_missing)      
924 {
925         Int4 max_links = CdEntGetMaxLinks();
926         Int4 count;
927         
928 #ifdef _NEW_CdEntrez_
929         if (_nouveau)
930                 count = cd3_CdLinkUidGet(result,type,link_to_type,numuid,uids,mark_missing,max_links);
931 #endif
932 
933 #ifdef _OLD_CdEntrez_
934         if (!_nouveau)
935                 count = CdLinkUidGet(result,type,link_to_type,numuid,uids,mark_missing,max_links);
936 #endif
937 
938         return count;
939 }
940 
941 /*****************************************************************************
942 *
943 *   CdUidLinks()
944 *       retrieves links to other uids
945 *
946 *****************************************************************************/
947 NLM_EXTERN LinkSetPtr  CdUidLinks (DocType type, DocUid uid, DocType link_to_type)
948 {
949         LinkSetPtr lsp = NULL;
950         DocUid u = uid;
951 
952 #ifdef _NEW_CdEntrez_
953         if (_nouveau)
954           cd3_CdLinkUidGet(&lsp,type,link_to_type,1,&u,FALSE,CdEntGetMaxLinks());
955 #endif
956 #ifdef _OLD_CdEntrez_
957         if (!_nouveau)
958           CdLinkUidGet(&lsp,type,link_to_type,1,&u,FALSE,CdEntGetMaxLinks());
959 #endif
960 
961         return lsp;
962 }
963 
964 static Boolean  TermListPageScanProc PROTO((CdTermPtr trmptr));
965 static Boolean  TermListTermScanProc PROTO((CdTermPtr trmptr));
966 static TermListProc trmproc;
967 static Int4 trmcount;
968 static Int4 trmmax;
969 static Boolean trmfound;
970 static Char trmfirst [80];
971 static Int4 the_first_page;
972 
973 /*****************************************************************************
974 *
975 *   CdTermListByPage (type, field, page, numpage, proc)
976 *       Gets terms starting at page, for numpage, by calling proc
977 *       returns number of complete pages read
978 *
979 *****************************************************************************/
980 NLM_EXTERN Int2  CdTermListByPage (DocType type, DocField field, Int2 page, Int2 numpage, TermListProc proc)
981 
982 {
983         trmproc = proc;
984         if (trmproc != NULL) {
985 #ifdef _NEW_CdEntrez_
986         if (_nouveau)
987                 return cd3_CdTermScan(type, field, page, numpage, TermListPageScanProc);
988 #endif
989 #ifdef _OLD_CdEntrez_
990         if (!_nouveau)
991                 return CdTermScan(type, field, page, numpage, TermListPageScanProc);
992 #endif
993         } else {
994                 return 0;
995         }
996 
997         return 0;
998 }
999 
1000 /*****************************************************************************
1001 *
1002 *   CdTermListByTerm (type, field, term, numterms, proc, first_page)
1003 *       Gets Terms starting with at term
1004 *       returns number of complete pages read
1005 *       sets first_page to first page read
1006 *
1007 *****************************************************************************/
1008 NLM_EXTERN Int2  CdTermListByTerm (DocType type, DocField field, CharPtr term, Int2 numterms, TermListProc proc, Int2Ptr first_page)
1009 
1010 {
1011         Int4  first;
1012         Int4  rsult;
1013 
1014         rsult = 0;
1015 #ifdef _NEW_CdEntrez_
1016         if (_nouveau)
1017                 first = cd3_CdTrmLookup(type, field, term);
1018 #endif
1019 #ifdef _OLD_CdEntrez_
1020         if (!_nouveau)
1021                 first = CdTrmLookup(type, field, term);
1022 #endif
1023         the_first_page = first;
1024         trmproc = proc;
1025         trmcount = 0;
1026         if (numterms > 0) {
1027                 trmmax = numterms;
1028         } else {
1029                 trmmax = INT2_MAX;
1030         }
1031         trmfound = FALSE;
1032         StringNCpy (trmfirst, term, sizeof (trmfirst) - 1);
1033         if (trmproc != NULL) {
1034 #ifdef _NEW_CdEntrez_
1035                 if (_nouveau)
1036                         rsult = cd3_CdTermScan(type,field,first,0,TermListTermScanProc);
1037 #endif
1038 #ifdef _OLD_CdEntrez_
1039                 if (!_nouveau)
1040                         rsult = CdTermScan(type,field,first,0,TermListTermScanProc);
1041 #endif
1042         }
1043         if (first_page != NULL) {
1044           *first_page = the_first_page;
1045         }
1046         return rsult;
1047 }
1048 
1049 /*****************************************************************************
1050 *
1051 *   TermListPageScanProc(trmptr)
1052 *       Callback for CdTermListByPage
1053 *
1054 *****************************************************************************/
1055 static Boolean  TermListPageScanProc(CdTermPtr trmptr)
1056 {
1057   Boolean ret = trmproc(trmptr->term,
1058                         trmptr->special_count, trmptr->total_count);
1059   MemFree(trmptr);
1060   return ret;
1061 }
1062 
1063 /*****************************************************************************
1064 *
1065 *   TermListTermScanProc(trmptr)
1066 *       Callback for CdTermListByTerm
1067 *
1068 *****************************************************************************/
1069 static Boolean  TermListTermScanProc(CdTermPtr trmptr)
1070 {
1071   Boolean ret = TRUE;
1072   if (! trmfound) {
1073     if (MeshStringICmp (trmptr->term, trmfirst) >= 0) {
1074       trmfound = TRUE;
1075       the_first_page = trmptr->page;
1076     }
1077   }
1078   if (trmfound) {
1079     ret = trmproc(trmptr->term, trmptr->special_count, trmptr->total_count);
1080     trmcount++;
1081   } else {
1082     MemFree (trmptr->term);
1083   }
1084   MemFree(trmptr);
1085   return (ret && trmcount < trmmax);
1086 }
1087 
1088 /*****************************************************************************
1089 *
1090 *   CdEntrezFindTerm(type, field, term, spec, total)
1091 *       returns count of special and total for a term
1092 *       if term ends with  "...", does a truncated merge of the term
1093 *       if term contains '*' or '?', does a wild card merge
1094 *
1095 *****************************************************************************/
1096 NLM_EXTERN Boolean  CdEntrezFindTerm (DocType type, DocField field, CharPtr term, Int4Ptr spcl, Int4Ptr totl)
1097 
1098 {
1099         CharPtr tmp;
1100         CdTermPtr ctp;
1101 
1102         tmp = term;
1103         while (*tmp != '\0')
1104                 tmp++;
1105         tmp -= 3;
1106         if ((*tmp == '.') && (*(tmp+1) == '.') && (*(tmp+2) == '.')) {
1107                 return CdEntrezMergeTerm (type, field, term, spcl, totl, NULL);
1108         } else if (StringChr (term, '*') != NULL || StringChr (term, '?') != NULL) {
1109                 return CdEntrezMergeTerm (type, field, term, spcl, totl, WildCardProc);
1110         } else {
1111 #ifdef _NEW_CdEntrez_
1112                 if (_nouveau)
1113                         ctp = cd3_CdTrmFind(type,field,term);
1114 #endif
1115 #ifdef _OLD_CdEntrez_
1116                 if (!_nouveau)
1117                         ctp = CdTrmFind(type,field,term);
1118 #endif
1119                 if (ctp == NULL)
1120                         return FALSE;
1121                 *spcl = ctp->special_count;
1122                 *totl = ctp->total_count;
1123                 CdTermFree(ctp);
1124                 return TRUE;
1125         }
1126 }
1127 
1128 
1129 /*****************************************************************************
1130 *
1131 *   CdTermFree(trmp)
1132 *      frees a CdTerm structure
1133 *
1134 *****************************************************************************/
1135 
1136 NLM_EXTERN CdTermPtr  CdTermFree (CdTermPtr trmp)
1137 
1138 {
1139         if (trmp == NULL)
1140                 return NULL;
1141         if (trmp->term != NULL)
1142                 MemFree (trmp->term);
1143         if (trmp->highRange != NULL)
1144                 MemFree (trmp->highRange);
1145         return (CdTermPtr) MemFree(trmp);
1146 }
1147 
1148 
1149 
1150 
1151 /*****************************************************************************
1152 *
1153 *   Below are static functions local to this module
1154 *   ===============================================
1155 *
1156 *****************************************************************************/
1157 
1158 /*****************************************************************************
1159 *
1160 *   Functions to manipulate Boolean lists
1161 *
1162 *****************************************************************************/
1163 
1164 /*****************************************************************************
1165 *
1166 *   NextNode ()
1167 *       Advances to the next node in a term query list.
1168 *
1169 *****************************************************************************/
1170 
1171 static void NEAR NextNode (void)
1172 
1173 {
1174   if (currNode != NULL) {
1175     currNode = currNode->next;
1176     if (currNode != NULL) {
1177       currChoice = currNode->choice;
1178     } else {
1179       currChoice = NULLSYM;
1180     }
1181   } else {
1182     currChoice = NULLSYM;
1183   }
1184 }
1185 
1186 /*****************************************************************************
1187 *
1188 *   Factor ()
1189 *       Processes individual term nodes or parenthetical expressions in a
1190 *       term query list.
1191 *
1192 *****************************************************************************/
1193 
1194 static ByteStorePtr NEAR Factor (void)
1195 
1196 {
1197   ByteStorePtr bsp;
1198   FILE         *fp;
1199   CdTermPtr    trmp;
1200 
1201   bsp = NULL;
1202   if (currChoice == LPAREN) {
1203     NextNode ();
1204     bsp = Expression ();
1205     if (currChoice != RPAREN) {
1206       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "Expected right parenthesis");
1207     } else {
1208       NextNode ();
1209     }
1210   } else if (currChoice == SPECIALTERM || currChoice == TOTALTERM) {
1211     if (currNode != NULL) {
1212       trmp = currNode->data.ptrvalue;
1213       if (trmp != NULL) {
1214         fp = FileOpen (eset->term, "rb");
1215         if (fp != NULL) {
1216           fseek (fp, trmp->offset, SEEK_SET);
1217           if (currChoice == SPECIALTERM) {
1218             bsp = LoadPostingList (fp, trmp->special_count, trmp->special_count);
1219           } else if (currChoice == TOTALTERM) {
1220             bsp = LoadPostingList (fp, trmp->special_count, trmp->total_count);
1221           }
1222           FileClose (fp);
1223         }
1224       }
1225     }
1226     NextNode ();
1227   } else {
1228     NextNode ();
1229   }
1230   return bsp;
1231 }
1232 
1233 /*****************************************************************************
1234 *
1235 *   Term ()
1236 *       Processes strings of ANDed term nodes in a term query list.
1237 *
1238 *****************************************************************************/
1239 
1240 static ByteStorePtr NEAR Term (void)
1241 
1242 {
1243   ByteStorePtr bsp;
1244   ByteStorePtr fct;
1245 
1246   bsp = Factor ();
1247   while (currChoice == ANDSYMBL) {
1248     NextNode ();
1249     fct = Factor ();
1250     bsp = IntersectPostingLists (bsp, fct);
1251   }
1252   return bsp;
1253 }
1254 
1255 /*****************************************************************************
1256 *
1257 *   Diff ()
1258 *       Processes strings of ORed term nodes in a term query list.
1259 *
1260 *****************************************************************************/
1261 
1262 static ByteStorePtr NEAR Diff (void)
1263 
1264 {
1265   ByteStorePtr bsp;
1266   ByteStorePtr trm;
1267 
1268   bsp = Term ();
1269   while (currChoice == ORSYMBL) {
1270     NextNode ();
1271     trm = Term ();
1272     bsp = MergePostingLists (bsp, trm);
1273   }
1274   return bsp;
1275 }
1276 
1277 
1278 /*****************************************************************************
1279 *
1280 *   Expression ()
1281 *       Processes strings of BUTNOTed term nodes in a term query list.
1282 *
1283 *****************************************************************************/
1284 
1285 static ByteStorePtr NEAR Expression (void)
1286 
1287 {
1288   ByteStorePtr bsp;
1289   ByteStorePtr trm;
1290 
1291   bsp = Diff ();
1292   while (currChoice == BUTNOTSYMBL) {
1293     NextNode ();
1294     trm = Diff ();
1295     bsp = DifferencePostingLists (bsp, trm);
1296   }
1297   return bsp;
1298 }
1299 
1300 
1301 /*****************************************************************************
1302 *
1303 *   Low level functions to manipulate postings lists.
1304 *
1305 *****************************************************************************/
1306 
1307 static PostingPtr NEAR NewPost (ByteStorePtr lst, Int4 defsize)
1308 
1309 {
1310   PostingPtr  pst;
1311 
1312   pst = NULL;
1313   if (lst != NULL) {
1314     pst = MemNew (sizeof (Posting));
1315     if (pst != NULL) {
1316       pst->uids = lst;
1317       pst->buffer = NULL;
1318       if (defsize == 0) {
1319         pst->bufsize = (Int4) MIN (16384L, BSLen (lst));
1320       } else {
1321         pst->bufsize = (Int4) MIN (16384L, defsize);
1322       }
1323       pst->count = 0;
1324       pst->index = 0;
1325     }
1326   }
1327   return pst;
1328 }
1329 
1330 static PostingPtr NEAR FreePost (PostingPtr pst)
1331 
1332 {
1333   if (pst != NULL) {
1334     if (pst->uids != NULL) {
1335       BSFree (pst->uids);
1336     }
1337     if (pst->buffer != NULL) {
1338       MemFree (pst->buffer);
1339     }
1340     MemFree (pst);
1341   }
1342   return NULL;
1343 }
1344 
1345 static Int4 NEAR PostLength (PostingPtr pst)
1346 
1347 {
1348   Int4  k;
1349 
1350   k = 0;
1351   if (pst != NULL) {
1352     k = (Int4) (BSLen (pst->uids) / (Int4) sizeof (DocUid));
1353   }
1354   return k;
1355 }
1356 
1357 static void NEAR RewindPost (PostingPtr pst)
1358 
1359 {
1360   if (pst != NULL) {
1361     if (pst->uids != NULL) {
1362       BSSeek (pst->uids, 0L, 0);
1363     }
1364     pst->count = 0;
1365     pst->index = 0;
1366   }
1367 }
1368 
1369 static DocUid NEAR ReadItem (PostingPtr pst)
1370 
1371 {
1372   DocUid  rsult;
1373 
1374   rsult = INT4_MAX;
1375   if (pst != NULL && pst->uids != NULL) {
1376     if (pst->buffer == NULL) {
1377       pst->buffer = MemNew ((size_t) pst->bufsize);
1378       pst->count = 0;
1379       pst->index = 0;
1380     }
1381     if (pst->count <= 0) {
1382       pst->count = (Int4) BSRead (pst->uids, pst->buffer, pst->bufsize);
1383       pst->index = 0;
1384     }
1385     if (pst->count > 0) {
1386       rsult = pst->buffer [pst->index];
1387       (pst->index)++;
1388       (pst->count) -= sizeof (DocUid);
1389     }
1390   }
1391   return rsult;
1392 }
1393 
1394 static void NEAR WriteItem (PostingPtr pst, DocUid value)
1395 
1396 {
1397   if (pst != NULL && pst->uids != NULL) {
1398     if (pst->buffer == NULL) {
1399       pst->buffer = MemNew ((size_t) pst->bufsize);
1400       pst->count = 0;
1401       pst->index = 0;
1402     }
1403     pst->buffer [pst->index] = value;
1404     (pst->index)++;
1405     (pst->count) += sizeof (DocUid);
1406     if (pst->count >= pst->bufsize) {
1407       BSWrite (pst->uids, pst->buffer, pst->count);
1408       pst->count = 0;
1409       pst->index = 0;
1410     }
1411   }
1412 }
1413 
1414 static void NEAR FlushItems (PostingPtr pst)
1415 
1416 {
1417   if (pst != NULL && pst->uids != NULL && pst->buffer != NULL) {
1418     BSWrite (pst->uids, pst->buffer, pst->count);
1419     if (pst->buffer != NULL) {
1420       pst->buffer = MemFree (pst->buffer);
1421     }
1422     pst->count = 0;
1423     pst->index = 0;
1424   }
1425 }
1426 
1427 static ByteStorePtr NEAR MergePostingLists (ByteStorePtr lst1, ByteStorePtr lst2)
1428 
1429 {
1430   PostingPtr    buf1;
1431   PostingPtr    buf2;
1432   PostingPtr    buf3;
1433   Int4          k;
1434   Int4          k1;
1435   Int4          k2;
1436   DocUid        pstar;
1437   DocUid        qstar;
1438   ByteStorePtr  rsult;
1439 
1440   ProgMon ("MergePostingLists");
1441   rsult = NULL;
1442   if (lst1 != NULL && lst2 != NULL) {
1443     buf1 = NewPost (lst1, 0);
1444     buf2 = NewPost (lst2, 0);
1445     k1 = PostLength (buf1);
1446     k2 = PostLength (buf2);
1447     k = k1 + k2;
1448     rsult = BSNew (k * sizeof (DocUid));
1449     buf3 = NewPost (rsult, k * (Int4) sizeof (DocUid));
1450     if (rsult != NULL && buf1 != NULL && buf2 != NULL && buf3 != NULL) {
1451       RewindPost (buf1);
1452       RewindPost (buf2);
1453       pstar = ReadItem (buf1);
1454       qstar = ReadItem (buf2);
1455       while (k > 0) {
1456         if (pstar < qstar) {
1457           WriteItem (buf3, pstar);
1458           k--;
1459           pstar = ReadItem (buf1);
1460         } else if (qstar < pstar) {
1461           WriteItem (buf3, qstar);
1462           k--;
1463           qstar = ReadItem (buf2);
1464         } else {
1465           WriteItem (buf3, pstar);
1466           k -= 2;
1467           pstar = ReadItem (buf1);
1468           qstar = ReadItem (buf2);
1469         }
1470       }
1471       FlushItems (buf3);
1472     } else {
1473       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to merge");
1474     }
1475     if (buf1 != NULL) {
1476       FreePost (buf1);
1477     }
1478     if (buf2 != NULL) {
1479       FreePost (buf2);
1480     }
1481     if (buf3 != NULL) {
1482       buf3->uids = NULL;
1483       FreePost (buf3);
1484     }
1485   } else if (lst1 != NULL) {
1486     rsult = lst1;
1487   } else if (lst2 != NULL) {
1488     rsult = lst2;
1489   }
1490   return rsult;
1491 }
1492 
1493 static ByteStorePtr NEAR IntersectPostingLists (ByteStorePtr lst1, ByteStorePtr lst2)
1494 
1495 {
1496   PostingPtr    buf1;
1497   PostingPtr    buf2;
1498   PostingPtr    buf3;
1499   Int4          k;
1500   Int4          k1;
1501   Int4          k2;
1502   DocUid        pstar;
1503   DocUid        qstar;
1504   ByteStorePtr  rsult;
1505 
1506   ProgMon ("UnionPostingLists");
1507   rsult = NULL;
1508   if (lst1 != NULL && lst2 != NULL) {
1509     buf1 = NewPost (lst1, 0);
1510     buf2 = NewPost (lst2, 0);
1511     k1 = PostLength (buf1);
1512     k2 = PostLength (buf2);
1513     k = MIN (k1, k2);
1514     rsult = BSNew (k * sizeof (DocUid));
1515     buf3 = NewPost (rsult, k * (Int4) sizeof (DocUid));
1516     if (rsult != NULL && buf1 != NULL && buf2 != NULL && buf3 != NULL) {
1517       RewindPost (buf1);
1518       RewindPost (buf2);
1519       pstar = ReadItem (buf1);
1520       qstar = ReadItem (buf2);
1521       while (k1 > 0 && k2 > 0) {
1522         if (pstar < qstar) {
1523           k1--;
1524           pstar = ReadItem (buf1);
1525         } else if (qstar < pstar) {
1526           k2--;
1527           qstar = ReadItem (buf2);
1528         } else {
1529           WriteItem (buf3, pstar);
1530           k1--;
1531           k2--;
1532           pstar = ReadItem (buf1);
1533           qstar = ReadItem (buf2);
1534         }
1535       }
1536       FlushItems (buf3);
1537     } else {
1538       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to intersect");
1539     }
1540     if (buf1 != NULL) {
1541       FreePost (buf1);
1542     }
1543     if (buf2 != NULL) {
1544       FreePost (buf2);
1545     }
1546     if (buf3 != NULL) {
1547       buf3->uids = NULL;
1548       FreePost (buf3);
1549     }
1550   }
1551   return rsult;
1552 }
1553 
1554 static ByteStorePtr NEAR DifferencePostingLists (ByteStorePtr lst1, ByteStorePtr lst2)
1555 
1556 {
1557   PostingPtr    buf1;
1558   PostingPtr    buf2;
1559   PostingPtr    buf3;
1560   Int4          k;
1561   Int4          k1;
1562   Int4          k2;
1563   DocUid        pstar;
1564   DocUid        qstar;
1565   ByteStorePtr  rsult;
1566 
1567   ProgMon ("DiffPostingLists");
1568   rsult = NULL;
1569   if (lst1 != NULL && lst2 != NULL) {
1570     buf1 = NewPost (lst1, 0);
1571     buf2 = NewPost (lst2, 0);
1572     k1 = PostLength (buf1);
1573     k2 = PostLength (buf2);
1574     k = k1 + k2;
1575     rsult = BSNew (k * sizeof (DocUid));
1576     buf3 = NewPost (rsult, k * (Int4) sizeof (DocUid));
1577     if (rsult != NULL && buf1 != NULL && buf2 != NULL && buf3 != NULL) {
1578       RewindPost (buf1);
1579       RewindPost (buf2);
1580       pstar = ReadItem (buf1);
1581       qstar = ReadItem (buf2);
1582       while (k > 0) {
1583         if (pstar < qstar) {
1584           WriteItem (buf3, pstar);
1585           k--;
1586           pstar = ReadItem (buf1);
1587         } else if (qstar < pstar) {
1588           k--;
1589           qstar = ReadItem (buf2);
1590         } else {
1591           k -= 2;
1592           pstar = ReadItem (buf1);
1593           qstar = ReadItem (buf2);
1594         }
1595       }
1596       FlushItems (buf3);
1597     } else {
1598       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to difference");
1599     }
1600     if (buf1 != NULL) {
1601       FreePost (buf1);
1602     }
1603     if (buf2 != NULL) {
1604       FreePost (buf2);
1605     }
1606     if (buf3 != NULL) {
1607       buf3->uids = NULL;
1608       FreePost (buf3);
1609     }
1610   } else if (lst1 != NULL) {
1611     rsult = lst1;
1612   }
1613   return rsult;
1614 }
1615 
1616 static ByteStorePtr NEAR FreePostingList (ByteStorePtr lst)
1617 
1618 {
1619   if (lst != NULL) {
1620     BSFree (lst);
1621   }
1622   return NULL;
1623 }
1624 
1625 static ByteStorePtr NEAR LoadPostingList (FILE *f, Int4 special, Int4 total)
1626 
1627 {
1628   VoidPtr       bufr;
1629   Int4          cnt;
1630   Int4          cntr;
1631   Int4          k1;
1632   Int4          k2;
1633   ByteStorePtr  lst1;
1634   ByteStorePtr  lst2;
1635   ByteStorePtr  rsult;
1636 
1637   rsult = NULL;
1638   if (f != NULL && special >= 0 && total >= 0) {
1639     bufr = MemNew (MAX_CDENTREZ_BYTESTORE * sizeof (DocUid));
1640     if (bufr != NULL) {
1641       k1 = special;
1642       k2 = total - special;
1643       lst1 = BSNew (k1 * sizeof (DocUid));
1644       if (lst1 != NULL) {
1645         cntr = k1;
1646         cnt = MIN (k1, (long) MAX_CDENTREZ_BYTESTORE);
1647         while (cnt > 0) {
1648           FileRead (bufr, sizeof (DocUid), (size_t) cnt, f);
1649           BSWrite (lst1, bufr, cnt * sizeof (DocUid));
1650           cntr -= cnt;
1651           cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1652         }
1653       } else {
1654         ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to load");
1655       }
1656       lst2 = BSNew (k2 * sizeof (DocUid));
1657       if (lst2 != NULL) {
1658         cntr = k2;
1659         cnt = MIN (k2, (long) MAX_CDENTREZ_BYTESTORE);
1660         while (cnt > 0) {
1661           FileRead (bufr, sizeof (DocUid), (size_t) cnt, f);
1662           BSWrite (lst2, bufr, cnt * sizeof (DocUid));
1663           cntr -= cnt;
1664           cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1665         }
1666       } else {
1667         ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "List is too large to load");
1668       }
1669       rsult = MergePostingLists (lst1, lst2);
1670     }
1671     MemFree (bufr);
1672   }
1673   return rsult;
1674 }
1675 
1676 /*****************************************************************************
1677 *
1678 *   CdEntrezCreateTerm (term, type, field, special, remainder)
1679 *       Creates a CdTerm node in the entrez set structure if one does not yet
1680 *       exist, and loads the posting file from two ByteStorePtr posting lists.
1681 *
1682 *****************************************************************************/
1683 
1684 static void NEAR SavePostingList (FILE *f, ByteStorePtr bsp)
1685 
1686 {
1687   VoidPtr  bufr;
1688   Int4     cnt;
1689   Int4     cntr;
1690 
1691   if (f != NULL && bsp != NULL) {
1692     bufr = MemNew (MAX_CDENTREZ_BYTESTORE * sizeof (DocUid));
1693     if (bufr != NULL) {
1694       cntr = (BSLen (bsp) / (Int4) sizeof (DocUid));
1695       cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1696       BSSeek (bsp, 0L, 0);
1697       while (cnt > 0) {
1698         BSRead (bsp, bufr, cnt * sizeof (DocUid));
1699         FileWrite (bufr, sizeof (DocUid), (size_t) cnt, f);
1700         cntr -= cnt;
1701         cnt = MIN (cntr, (long) MAX_CDENTREZ_BYTESTORE);
1702       }
1703     }
1704     MemFree (bufr);
1705   }
1706 }
1707 
1708 static CdTermPtr NEAR CdEntrezCreateTerm (CharPtr term, DocType type, DocField field, ByteStorePtr special, ByteStorePtr remainder, CharPtr highRange)
1709 
1710 {
1711   FILE      *fp;
1712   Boolean   goOn;
1713   CdTermPtr last;
1714   Int4      remainderCount;
1715   Int4      specialCount;
1716   CdTermPtr trmp;
1717 
1718   trmp = NULL;
1719   if (eset != NULL && term != NULL) {
1720     trmp = eset->next;
1721     last = eset;
1722     goOn = TRUE;
1723     while (trmp != NULL && goOn) {
1724       if (trmp->type == type && trmp->field == field &&
1725           EqualTerms (trmp->term, term) &&
1726           EqualTerms (trmp->highRange, highRange)) {
1727         goOn = FALSE;
1728       } else {
1729         last = trmp;
1730         trmp = trmp->next;
1731       }
1732     }
1733     if (goOn) {
1734       trmp = MemNew (sizeof (CdTerm));
1735       if (trmp != NULL) {
1736         specialCount = 0;
1737         remainderCount = 0;
1738         if (special != NULL) {
1739           specialCount = (BSLen (special) / (Int4) sizeof (DocUid));
1740         }
1741         if (remainder != NULL) {
1742           remainderCount = (BSLen (remainder) / (Int4) sizeof (DocUid));
1743         }
1744         trmp->type = type;
1745         trmp->field = field;
1746         trmp->term = StringSave (term);
1747         trmp->special_count = specialCount;
1748         trmp->total_count = specialCount + remainderCount;
1749         trmp->highRange = StringSave(highRange);
1750         trmp->next = NULL;
1751         last->next = trmp;
1752         fp = FileOpen (eset->term, "ab");
1753         if (fp != NULL) {
1754           fseek (fp, 0, SEEK_END);
1755           trmp->offset = ftell (fp);
1756           SavePostingList (fp, special);
1757           SavePostingList (fp, remainder);
1758           FileClose (fp);
1759         } else {
1760           trmp->offset = 0;
1761         }
1762       }
1763     }
1764   }
1765   return trmp;
1766 }
1767 
1768 /*****************************************************************************
1769 *
1770 *   FindTermNode (term, type, field, highRange)
1771 *       Returns a pointer to a CdTerm node in the entrez set structure,
1772 *       creating the node and loading the posting file, if necessary.  The
1773 *       value of the offset field becomes the offset into the temporary file.
1774 *
1775 *****************************************************************************/
1776 
1777 static CdTermPtr NEAR FindTermNode (CharPtr term, DocType type, DocField field, CharPtr highRange)
1778 
1779 {
1780   FILE      *fp;
1781   Boolean   goOn;
1782   CdTermPtr last;
1783   Int4      offset;
1784   Int4      remain;
1785   Int4      special;
1786   CharPtr   tmp;
1787   Int4      total;
1788   CdTermPtr trmp;
1789 
1790   trmp = NULL;
1791   if (eset != NULL && term != NULL) {
1792     trmp = eset->next;
1793     last = eset;
1794     goOn = TRUE;
1795     while (trmp != NULL && goOn) {
1796       if (trmp->type == type && trmp->field == field &&
1797           EqualTerms (trmp->term, term) &&
1798           EqualTerms (trmp->highRange, highRange)) {
1799         goOn = FALSE;
1800       } else {
1801         last = trmp;
1802         trmp = trmp->next;
1803       }
1804     }
1805     if (goOn) {
1806       tmp = term;
1807       while (*tmp != '\0')
1808         tmp++;
1809       tmp -= 3;
1810       rangeScanning = FALSE;
1811       if (highRange != NULL) {
1812           rangeScanning = TRUE;
1813           StrNCpy(topOfRange, highRange, sizeof(topOfRange));
1814           CdEntrezMergeTerm (type, field, term, NULL, NULL, NULL);
1815       } else {
1816         if ((*tmp == '.') && (*(tmp+1) == '.') && (*(tmp+2) == '.')) {
1817           CdEntrezMergeTerm (type, field, term, NULL, NULL, NULL);
1818         } else if (StringChr (term, '*') != NULL || StringChr (term, '?') != NULL) {
1819           CdEntrezMergeTerm (type, field, term, NULL, NULL, WildCardProc);
1820         }
1821       }
1822       trmp = eset->next;
1823       last = eset;
1824       goOn = TRUE;
1825       while (trmp != NULL && goOn) {
1826         if (trmp->type == type && trmp->field == field &&
1827             EqualTerms (trmp->term, term) &&
1828             EqualTerms (trmp->highRange, highRange)) {
1829           goOn = FALSE;
1830         } else {
1831           last = trmp;
1832           trmp = trmp->next;
1833         }
1834       }
1835     }
1836     if (goOn) {
1837 #ifdef _NEW_CdEntrez_
1838                 if (_nouveau)
1839                         trmp = cd3_CdTrmFind(type,field,term);
1840 #endif
1841 #ifdef _OLD_CdEntrez_
1842                 if (!_nouveau)
1843                         trmp = CdTrmFind(type,field,term);
1844 #endif
1845       if (trmp != NULL) {
1846         if (field != FLD_ORGN) {
1847           last->next = trmp;
1848           fp = FileOpen (eset->term, "rb");
1849           if (fp != NULL) {
1850             fseek (fp, 0, SEEK_END);
1851             offset = ftell (fp);
1852             FileClose (fp);
1853           } else {
1854             offset = 0;
1855           }
1856 #ifdef _NEW_CdEntrez_
1857           if (_nouveau)
1858             cd3_CdTrmUidsFil (type, field, trmp->offset, trmp->total_count, eset->term, TRUE);
1859 #endif
1860 #ifdef _OLD_CdEntrez_
1861           if (!_nouveau)
1862             CdTrmUidsFil (type, field, trmp->offset, trmp->total_count, eset->term, TRUE);
1863 #endif
1864           trmp->offset = offset;
1865         } else {
1866           db = type;
1867           fld = field;
1868           uidPtr = MemNew ((size_t) MAX_CDENTREZ_UID_LIST_SIZE);
1869           if (uidPtr != NULL) {
1870             scanPtr = MemNew (SCAN_MAX * sizeof (ScanData));
1871             if (scanPtr != NULL) {
1872               scanOk = TRUE;
1873               scanCount = 0;
1874               specialPost = NULL;
1875               remainPost = NULL;
1876               ScanOnlyProc (trmp);
1877               if (scanCount > 0) {
1878                 ProcessScanResults ();
1879               }
1880               if (specialPost != NULL && remainPost != NULL) {
1881                 remainPost = DifferencePostingLists (remainPost, specialPost);
1882               }
1883               if (specialPost == NULL) {
1884                 specialPost = BSNew (0);
1885               }
1886               if (remainPost == NULL) {
1887                 remainPost = BSNew (0);
1888               }
1889               special = BSLen (specialPost) / sizeof (DocUid);
1890               remain = BSLen (remainPost) / sizeof (DocUid);
1891               total = special + remain;
1892               scanPtr = MemFree (scanPtr);
1893             }
1894             uidPtr = MemFree (uidPtr);
1895             if (scanOk && total > 0) {
1896               trmp = CdTermFree (trmp);
1897               trmp = CdEntrezCreateTerm (term, db, fld, specialPost, remainPost, highRange);
1898             }
1899             specialPost = BSFree (specialPost);
1900             remainPost = BSFree (remainPost);
1901           }
1902         }
1903       }
1904     }
1905   }
1906   return trmp;
1907 }
1908 
1909 /*****************************************************************************
1910 *
1911 *   CdEntrezPreloadMerge (term, type, field, spcl, totl)
1912 *       Creates a CdTerm node in the entrez set structure if one does not yet
1913 *       exist, and loads the posting file by merging multiple postings files.
1914 *
1915 *****************************************************************************/
1916 
1917 static void NEAR SingleSpaces (CharPtr str)
1918 
1919 {
1920   Char  ch;
1921   Int2  i;
1922   Int2  j;
1923   Int2  k;
1924 
1925   i = 0;
1926   j = 0;
1927   k = 0;
1928   ch = str [i];
1929   while (ch != '\0') {
1930     if (ch == ' ') {
1931       if (k == 0) {
1932         str [j] = ch;
1933         j++;
1934       }
1935       k++;
1936       i++;
1937     } else {
1938       k = 0;
1939       str [j] = ch;
1940       i++;
1941       j++;
1942     }
1943     ch = str [i];
1944   }
1945   str [j] = '\0';
1946 }
1947 
1948 static void NEAR TermTruncate (CharPtr str)
1949 
1950 {
1951   if (str != NULL && str [0] != '\0') {
1952     SingleSpaces (str);
1953     if (searchTermLen < (Int2) StringLen (str)) {
1954       str [searchTermLen] = '\0';
1955     }
1956   }
1957 }
1958 
1959 static int LIBCALLBACK HeapCompare (VoidPtr ptr1, VoidPtr ptr2)
1960 
1961 {
1962   DocUidPtr  uid1;
1963   DocUidPtr  uid2;
1964 
1965   if (ptr1 != NULL && ptr2 != NULL) {
1966     uid1 = (DocUidPtr) ptr1;
1967     uid2 = (DocUidPtr) ptr2;
1968     if (*uid1 > *uid2) {
1969       return 1;
1970     } else if (*uid1 < *uid2) {
1971       return -1;
1972     } else {
1973       return 0;
1974     }
1975   } else {
1976     return 0;
1977   }
1978 }
1979 
1980 static void NEAR QuickSortSmall (DocUidPtr uids, Int4 l, Int4 r)
1981 
1982 {
1983   HeapSort (uids + l, (size_t) (r - l + 1), sizeof (DocUid), HeapCompare);
1984 }
1985 
1986 /*
1987 static Boolean NEAR AlreadyInOrder (DocUidPtr uids, Int4 l, Int4 r)
1988 
1989 {
1990   DocUid   last;
1991   Boolean  rsult;
1992 
1993   rsult = TRUE;
1994   if (l < r) {
1995     last = 0;
1996     while (l <= r) {
1997       if (uids [l] < last) {
1998         rsult = FALSE;
1999       }
2000       last = uids [l];
2001       l++;
2002     }
2003   }
2004   return rsult;
2005 }
2006 
2007 static void NEAR QuickSortSmall (DocUidPtr uids, Int4 l, Int4 r)
2008 
2009 {
2010   DocUid  a;
2011   DocUid  b;
2012   DocUid  c;
2013   Int4    i;
2014   Int4    j;
2015   DocUid  temp;
2016   DocUid  x;
2017 
2018   if (AlreadyInOrder (uids, l, r)) {
2019     return;
2020   }
2021   i = l;
2022   j = r;
2023   a = uids [l];
2024   b = uids [(l + r) / 2];
2025   c = uids [r];
2026   if (a > b) {
2027     if (c > a) {
2028       x = a;
2029     } else if (c < b) {
2030       x = b;
2031     } else {
2032       x = c;
2033     }
2034   } else {
2035     if (c < a) {
2036       x = a;
2037     } else if (c > b) {
2038       x = b;
2039     } else {
2040       x = c;
2041     }
2042   }
2043   do {
2044     while (uids [i] < x) {
2045       i++;
2046     }
2047     while (x < uids [j]) {
2048       j--;
2049     }
2050     if (i <= j) {
2051       temp = uids [i];
2052       uids [i] = uids [j];
2053       uids [j] = temp;
2054       i++;
2055       j--;
2056     }
2057   } while (i <= j);
2058   if (i - l < r - j) {
2059     if (l < j) {
2060       QuickSortSmall (uids, l, j);
2061     }
2062     if (i < r) {
2063       QuickSortSmall (uids, i, r);
2064     }
2065   } else {
2066     if (i < r) {
2067       QuickSortSmall (uids, i, r);
2068     }
2069     if (l < j) {
2070       QuickSortSmall (uids, l, j);
2071     }
2072   }
2073 }
2074 */
2075 
2076 static Int4 NEAR CompressSmall (DocUidPtr uids, Int4 count)
2077 
2078 {
2079   Int4  i;
2080   Int4  j;
2081 
2082   i = 0;
2083   j = 0;
2084   while (i < count) {
2085     if (uids [i] > 0) {
2086       uids [j] = uids [i];
2087       i++;
2088       j++;
2089     } else {
2090       i++;
2091     }
2092   }
2093   i = j;
2094   while (j < count) {
2095     uids [j] = 0;
2096     j++;
2097   }
2098   return i;
2099 }
2100 
2101 static Int4 NEAR UniqueSmall (DocUidPtr uids, Int4 count)
2102 
2103 {
2104   Int4    i;
2105   Int4    j;
2106   DocUid  last;
2107 
2108   i = 0;
2109   if (count <= 1) {
2110     i = count;
2111   } else {
2112     i = 0;
2113     j = 0;
2114     last = 0;
2115     while (i < count) {
2116       if (uids [i] != last) {
2117         uids [j] = uids [i];
2118         last = uids [i];
2119         i++;
2120         j++;
2121       } else {
2122         i++;
2123       }
2124     }
2125     i = j;
2126     while (j < count) {
2127       uids [j] = 0;
2128       j++;
2129     }
2130   }
2131   return i;
2132 }
2133 
2134 static ByteStorePtr NEAR MergeSmallLists (ByteStorePtr bsp, ByteStorePtr small)
2135 
2136 {
2137   Int4       count;
2138   Int4       len;
2139   DocUidPtr  uids;
2140 
2141   if (small != NULL) {
2142     len = BSLen (small) / (Int4) sizeof (DocUid);
2143     if (len <= (long) MAX_CDENTREZ_SMALL_LIST && len > 0) {
2144       count = (Int4) len;
2145       uids = MemNew ((size_t) count * sizeof (DocUid));
2146       if (uids != NULL) {
2147         BSMerge (small, (VoidPtr) uids);
2148         small = BSFree (small);
2149         QuickSortSmall (uids, 0, (Int4) (count - 1));
2150         count = CompressSmall (uids, count);
2151         count = UniqueSmall (uids, count);
2152         if (count > 0) {
2153           small = BSNew (0L);
2154           BSWrite (small, uids, count * sizeof (DocUid));
2155         }
2156         uids = MemFree (uids);
2157         if (small != NULL) {
2158           bsp = MergePostingLists (bsp, small);
2159         }
2160       } else {
2161         ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "MergeSmallLists memory failure");
2162       }
2163     } else if (len > (long) MAX_CDENTREZ_SMALL_LIST) {
2164       ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "MergeSmallLists > %d", MAX_CDENTREZ_SMALL_LIST);
2165     }
2166   }
2167   return bsp;
2168 }
2169 
2170 static Boolean NEAR MergeUnorderedLists (Int4 i, Int4 count)
2171 
2172 {
2173   BytePtr       bptr;
2174   Int4          finish;
2175   Boolean       goOn;
2176   Int4          j;
2177   Int4          len;
2178   Int4          max;
2179   DocUidPtr     mptr;
2180   Int4          number;
2181   Int4          offset;
2182   ByteStorePtr  remainLarge;
2183   ByteStorePtr  remainSmall;
2184   Int4          smallCount;
2185   Int4          start;
2186   Int4          total;
2187 
2188   goOn = TRUE;
2189   j = i + count - 1;
2190   max = scanPtr [j].offset + scanPtr [j].totalCount *
2191         (Int4) sizeof (DocUid) - scanPtr [i].offset;
2192   if (max <= MAX_CDENTREZ_UID_LIST_SIZE) {
2193     offset = scanPtr [i].offset;
2194     len = (Int4) (max / (Int4) sizeof (DocUid));
2195 #ifdef _NEW_CdEntrez_
2196     if (_nouveau)
2197       cd3_CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2198 #endif
2199 #ifdef _OLD_CdEntrez_
2200     if (!_nouveau)
2201       CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2202 #endif
2203     remainSmall = NULL;
2204     smallCount = 0;
2205     for (j = i; j < i + count; j++) {
2206       scanPtr [j].offset -= offset;
2207       total = scanPtr [j].totalCount;
2208       bptr = ((BytePtr) uidPtr) + scanPtr [j].offset;
2209       mptr = (DocUidPtr) bptr;
2210       if (smallCount + total > MAX_CDENTREZ_SMALL_LIST) {
2211         if (remainSmall != NULL) {
2212           remainPost = MergeSmallLists (remainPost, remainSmall);
2213           remainSmall = NULL;
2214         }
2215         smallCount = 0;
2216       }
2217       if (total > 100) {
2218         start = 0;
2219         number = 0;
2220         while (start < total) {
2221           finish = start + 1;
2222           while (finish < total && mptr [finish - 1] < mptr [finish]) {
2223             finish++;
2224           }
2225           number = finish - start;
2226           if (number > 100) {
2227             remainLarge = BSNew (number * sizeof (DocUid));
2228             BSWrite (remainLarge, (mptr + start), number * sizeof (DocUid));
2229             remainPost = MergePostingLists (remainPost, remainLarge);
2230           } else {
2231             smallCount += number;
2232             if (number > 0) {
2233               if (remainSmall == NULL) {
2234                 remainSmall = BSNew (0L);
2235               }
2236               BSWrite (remainSmall, (mptr + start), number * sizeof (DocUid));
2237             }
2238             if (smallCount > MAX_CDENTREZ_SMALL_LIST) {
2239               if (remainSmall != NULL) {
2240                 remainPost = MergeSmallLists (remainPost, remainSmall);
2241                 remainSmall = NULL;
2242               }
2243               smallCount = 0;
2244             }
2245           }
2246           start = finish;
2247         }
2248       } else {
2249         smallCount += total;
2250         if (total > 0) {
2251           if (remainSmall == NULL) {
2252             remainSmall = BSNew (0L);
2253           }
2254           BSWrite (remainSmall, mptr, total * sizeof (DocUid));
2255         }
2256       }
2257     }
2258     if (remainSmall != NULL) {
2259       remainPost = MergeSmallLists (remainPost, remainSmall);
2260       remainSmall = NULL;
2261     }
2262   } else {
2263     ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "Cannot merge > 32 K element");
2264     scanOk = FALSE;
2265     goOn = FALSE;
2266   }
2267   return goOn;
2268 }
2269 
2270 static Boolean NEAR MergeSeveralOrderedLists (Int4 i, Int4 count)
2271 
2272 {
2273   BytePtr       bptr;
2274   Boolean       goOn;
2275   Int4          j;
2276   Int4          len;
2277   Int4          max;
2278   DocUidPtr     mptr;
2279   Int4          offset;
2280   Int4          remainder;
2281   ByteStorePtr  remainLarge;
2282   ByteStorePtr  remainSmall;
2283   Int4          smallCount;
2284   Int4          special;
2285   ByteStorePtr  specialLarge;
2286   ByteStorePtr  specialSmall;
2287   Int4          total;
2288 
2289   goOn = TRUE;
2290   j = i + count - 1;
2291   max = scanPtr [j].offset + scanPtr [j].totalCount *
2292         (Int4) sizeof (DocUid) - scanPtr [i].offset;
2293   if (max <= MAX_CDENTREZ_UID_LIST_SIZE) {
2294     offset = scanPtr [i].offset;
2295     len = (Int4) (max / (Int4) sizeof (DocUid));
2296 #ifdef _NEW_CdEntrez_
2297     if (_nouveau)
2298       cd3_CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2299 #endif
2300 #ifdef _OLD_CdEntrez_
2301     if (!_nouveau)
2302       CdTrmUidsMem (db, fld, offset, (Int4) len, uidPtr);
2303 #endif
2304     specialSmall = NULL;
2305     remainSmall = NULL;
2306     smallCount = 0;
2307     for (j = i; j < i + count; j++) {
2308       scanPtr [j].offset -= offset;
2309       special = scanPtr [j].specialCount;
2310       total = scanPtr [j].totalCount;
2311       remainder = total - special;
2312       bptr = ((BytePtr) uidPtr) + scanPtr [j].offset;
2313       mptr = (DocUidPtr) bptr;
2314       if (smallCount + total > MAX_CDENTREZ_SMALL_LIST) {
2315         if (specialSmall != NULL) {
2316           specialPost = MergeSmallLists (specialPost, specialSmall);
2317           specialSmall = NULL;
2318         }
2319         if (remainSmall != NULL) {
2320           remainPost = MergeSmallLists (remainPost, remainSmall);
2321           remainSmall = NULL;
2322         }
2323         smallCount = 0;
2324       }
2325       if (total > 100) {
2326         specialLarge = BSNew (special * sizeof (DocUid));
2327         BSWrite (specialLarge, mptr, special * sizeof (DocUid));
2328         specialPost = MergePostingLists (specialPost, specialLarge);
2329         remainLarge = BSNew (remainder * sizeof (DocUid));
2330         BSWrite (remainLarge, (mptr + special),
2331                  remainder * sizeof (DocUid));
2332         remainPost = MergePostingLists (remainPost, remainLarge);
2333       } else {
2334         smallCount += total;
2335         if (special > 0) {
2336           if (specialSmall == NULL) {
2337             specialSmall = BSNew (0L);
2338           }
2339           BSWrite (specialSmall, mptr, special * sizeof (DocUid));
2340         }
2341         if (remainder > 0) {
2342           if (remainSmall == NULL) {
2343             remainSmall = BSNew (0L);
2344           }
2345           BSWrite (remainSmall, (mptr + special), remainder * sizeof (DocUid));
2346         }
2347       }
2348     }
2349     if (specialSmall != NULL) {
2350       specialPost = MergeSmallLists (specialPost, specialSmall);
2351       specialSmall = NULL;
2352     }
2353     if (remainSmall != NULL) {
2354       remainPost = MergeSmallLists (remainPost, remainSmall);
2355       remainSmall = NULL;
2356     }
2357   } else {
2358     ErrPostEx (SEV_ERROR, ERR_CD_BOOL, 0, "Cannot merge > %ld element", (long) MAX_CDENTREZ_UID_LIST_SIZE);
2359     scanOk = FALSE;
2360     goOn = FALSE;
2361   }
2362   return goOn;
2363 }
2364 
2365 static Boolean NEAR MergeSeveralLists (Int4 i, Int4 count)
2366 
2367 {
2368   if (fld != FLD_ORGN) {
2369     return MergeSeveralOrderedLists (i, count);
2370   } else {
2371     return MergeUnorderedLists (i, count);
2372   }
2373 }
2374 
2375 static Boolean NEAR ProcessScanResults (void)
2376 
2377 {
2378   Boolean  goOn;
2379   Int4     i;
2380   Int4     j;
2381   Int4     max;
2382 
2383   ProgMon ("ProcessScanResults");
2384   goOn = TRUE;
2385   i = 0;
2386   j = 0;
2387   max = 0;
2388   while (j < scanCount) {
2389     if (scanPtr [j].offset < scanPtr [i].offset) {
2390       goOn = MergeSeveralLists (i, (Int4) (j - i));
2391       max = 0;
2392       i = j;
2393     } else {
2394       max = scanPtr [j].offset + scanPtr [j].totalCount *
2395             (Int4) sizeof (DocUid) - scanPtr [i].offset;
2396       if (max >= MAX_CDENTREZ_UID_LIST_SIZE) {
2397         if (j == i) {
2398           goOn = MergeSeveralLists (i, 1);
2399           j++;
2400           i = j;
2401           max = 0;
2402         } else {
2403           goOn = MergeSeveralLists (i, (Int4) (j - i));
2404           i = j;
2405           max = 0;
2406         }
2407       } else {
2408         j++;
2409       }
2410     }
2411   }
2412   if (max > 0) {
2413     goOn = MergeSeveralLists (i, (Int4) (j - i));
2414   }
2415   scanCount = 0;
2416   return goOn;
2417 }
2418 
2419 static Boolean  ScanOnlyProc (CdTermPtr trmp)
2420 
2421 {
2422   Int4     count;
2423   Boolean  goOn;
2424 
2425   goOn = TRUE;
2426   if (scanCount >= SCAN_MAX) {
2427     goOn = ProcessScanResults ();
2428   }
2429   if (scanCount < SCAN_MAX) {
2430     if (trmp->total_count >= CDENTREZ_TERM_MAX) {
2431       while (trmp->special_count > 0) {
2432         if (scanCount >= SCAN_MAX) {
2433           goOn = ProcessScanResults ();
2434         }
2435         count = MIN (trmp->special_count, (long) CDENTREZ_TERM_MAX);
2436         scanPtr [scanCount].specialCount = count;
2437         scanPtr [scanCount].totalCount = count;
2438         scanPtr [scanCount].offset = trmp->offset;
2439         scanPtr [scanCount].specialPtr = NULL;
2440         scanPtr [scanCount].remainderPtr = NULL;
2441         scanCount++;
2442         trmp->special_count -= count;
2443         trmp->total_count -= count;
2444         trmp->offset += count * sizeof (DocUid);
2445       }
2446       while (trmp->total_count > 0) {
2447         if (scanCount >= SCAN_MAX) {
2448           goOn = ProcessScanResults ();
2449         }
2450         count = MIN (trmp->total_count, (long) CDENTREZ_TERM_MAX);
2451         scanPtr [scanCount].specialCount = 0;
2452         scanPtr [scanCount].totalCount = count;
2453         scanPtr [scanCount].offset = trmp->offset;
2454         scanPtr [scanCount].specialPtr = NULL;
2455         scanPtr [scanCount].remainderPtr = NULL;
2456         scanCount++;
2457         trmp->total_count -= count;
2458         trmp->offset += count * sizeof (DocUid);
2459       }
2460     } else {
2461       if (scanCount >= SCAN_MAX) {
2462         goOn = ProcessScanResults ();
2463       }
2464       scanPtr [scanCount].specialCount = trmp->special_count;
2465       scanPtr [scanCount].totalCount = trmp->total_count;
2466       scanPtr [scanCount].offset = trmp->offset;
2467       scanPtr [scanCount].specialPtr = NULL;
2468       scanPtr [scanCount].remainderPtr = NULL;
2469       scanCount++;
2470     }
2471   }
2472   return goOn;
2473 }
2474 
2475 static Boolean  WildCardProc (CdTermPtr trmp)
2476 
2477 {
2478   Int4     diff;
2479   Boolean  goOn;
2480   CharPtr  src;
2481   CharPtr  tgt;
2482 
2483   goOn = FALSE;
2484   src = selection;
2485   tgt = trmp->term;
2486   diff = 0;
2487   while (*src != '\0' && *tgt != '\0' && diff == 0) {
2488     if (*src != '?') {
2489       diff = TO_UPPER (*src) - TO_UPPER (*tgt);
2490     }
2491     if (diff == 0) {
2492       src++;
2493       tgt++;
2494     }
2495   }
2496   if (diff != 0) {
2497     if (*src == '*') {
2498       goOn = TRUE;
2499     }
2500   } else if (*src == '*') {
2501     goOn = TRUE;
2502   } else if (*src == '\0' && *tgt == '\0') {
2503     goOn = TRUE;
2504   } else {
2505     goOn = FALSE;
2506   }
2507   return goOn;
2508 }
2509 
2510 static Boolean  ScanAndFreeProc (CdTermPtr trmp)
2511 
2512 {
2513   Int4     compare;
2514   Boolean  goOn;
2515   Char     str [256];
2516 
2517   goOn = TRUE;
2518   if (trmp != NULL && trmp->term != NULL) {
2519     if (rangeScanning) {
2520       compare = MeshStringICmp (trmp->term, selection);
2521       if (compare >= 0) {
2522         if (topOfRange[0] == '\0')
2523           compare = -1;
2524         else
2525           compare = MeshStringICmp (trmp->term, topOfRange);
2526         if (compare > 0)
2527           goOn = FALSE;
2528         else
2529           goOn = ScanOnlyProc (trmp);
2530       }
2531     } else {
2532       StringNCpy (str, trmp->term, sizeof (str));
2533       TermTruncate (str);
2534       if (userScanProc != NULL) {
2535         compare = MeshStringICmp (str, wildcard);
2536       } else {
2537         compare = MeshStringICmp (str, selection);
2538       }
2539       if (compare > 0) {
2540         str [searchTermLen] = '\0';
2541         if (userScanProc != NULL) {
2542           compare = MeshStringICmp (str, wildcard);
2543         } else {
2544           compare = MeshStringICmp (str, selection);
2545         }
2546         if (compare > 0) {
2547           goOn = FALSE;
2548         }
2549       } else if (compare == 0) {
2550         if (userScanProc != NULL) {
2551           if (userScanProc (trmp)) {
2552             goOn = ScanOnlyProc (trmp);
2553           }
2554         } else {
2555           goOn = ScanOnlyProc (trmp);
2556         }
2557       }
2558     }
2559   }
2560   trmp = CdTermFree (trmp);
2561   return goOn;
2562 }
2563 
2564 static Boolean NEAR CdEntrezMergeTerm (DocType type, DocField field, CharPtr term,
2565                                         Int4Ptr spcl, Int4Ptr totl, CdTermProc userProc)
2566 
2567 {
2568   Char  ch;
2569   Int4  remain;
2570   Int4  special;
2571   Char  str [256];
2572   Int4  total;
2573   Int4  termpage;
2574   CharPtr tmp;
2575   Int4  limit = 0;
2576   CharPtr prop;
2577   Boolean retval = FALSE;
2578 
2579   if (spcl != NULL) {
2580     *spcl = 0;
2581   }
2582   if (totl != NULL) {
2583     *totl = 0;
2584   }
2585   db = type;
2586   fld = field;
2587   userScanProc = userProc;
2588   StringNCpy (str, term, sizeof (str));
2589   tmp = str;
2590   while (*tmp != '\0') {
2591     tmp++;
2592   }
2593   tmp -= 3;
2594   if ((*tmp == '.') && (*(tmp+1) == '.') && (*(tmp+2) == '.')) {
2595     *tmp = '\0';
2596   }
2597   SingleSpaces (str);
2598   if (userProc != NULL) {
2599     searchTermLen = 0;
2600     ch = str [searchTermLen];
2601     while (ch != '\0' && ch != '*' && ch != '?') {
2602       searchTermLen++;
2603       ch = str [searchTermLen];
2604     }
2605   } else {
2606     searchTermLen = (Int4) StringLen (str);
2607   }
2608   if ((prop = (CharPtr) GetAppProperty("CdEntrezTruncLimit")) != NULL)
2609   {
2610     limit = atoi(prop);
2611   }
2612   if (searchTermLen > limit || str [0] == '?' || str [0] == '*' ||
2613       rangeScanning) {
2614     scanOk = TRUE;
2615     uidPtr = MemNew ((size_t) MAX_CDENTREZ_UID_LIST_SIZE);
2616     if (uidPtr != NULL) {
2617       scanPtr = MemNew (SCAN_MAX * sizeof (ScanData));
2618       if (scanPtr != NULL) {
2619         scanCount = 0;
2620         specialPost = NULL;
2621         remainPost = NULL;
2622         StringNCpy (selection, str, sizeof (selection));
2623         StringNCpy (wildcard, str, sizeof (wildcard));
2624         wildcard [searchTermLen] = '\0';
2625 #ifdef _NEW_CdEntrez_
2626                 if (_nouveau)
2627           termpage = cd3_CdTrmLookup (db, fld, wildcard);
2628 #endif
2629 #ifdef _OLD_CdEntrez_
2630                 if (!_nouveau)
2631           termpage = CdTrmLookup (db, fld, wildcard);
2632 #endif
2633         if (fld == FLD_MESH) {
2634           ch = str [0];
2635           str [0] = TO_UPPER (ch);
2636         }
2637         if (termpage >= 0) {
2638 #ifdef _NEW_CdEntrez_
2639                         if (_nouveau)
2640                                 cd3_CdTermScan (db, fld, termpage, (Int4)0, ScanAndFreeProc);
2641 #endif
2642 #ifdef _OLD_CdEntrez_
2643                         if (!_nouveau)
2644                                 CdTermScan (db, fld, termpage, (Int4)0, ScanAndFreeProc);
2645 #endif
2646         }
2647         if (scanCount > 0) {
2648           ProcessScanResults ();
2649         }
2650         if (specialPost != NULL && remainPost != NULL) {
2651           remainPost = DifferencePostingLists (remainPost, specialPost);
2652         }
2653         if (specialPost == NULL) {
2654           specialPost = BSNew (0);
2655         }
2656         if (remainPost == NULL) {
2657           remainPost = BSNew (0);
2658         }
2659         special = BSLen (specialPost) / sizeof (DocUid);
2660         remain = BSLen (remainPost) / sizeof (DocUid);
2661         total = special + remain;
2662         scanPtr = MemFree (scanPtr);
2663       }
2664       uidPtr = MemFree (uidPtr);
2665       if (scanOk && total > 0) {
2666                 retval = TRUE;
2667                 if (userProc == NULL && !rangeScanning) {
2668                         StringCat (str, "...");
2669                 }
2670         CdEntrezCreateTerm (str, db, fld, specialPost, remainPost, rangeScanning ? topOfRange : NULL);
2671         if (spcl != NULL) {
2672           *spcl = special;
2673         }
2674         if (totl != NULL) {
2675           *totl = total;
2676         }
2677       }
2678       specialPost = BSFree (specialPost);
2679       remainPost = BSFree (remainPost);
2680     }
2681   }
2682   return retval;
2683 }
2684 
2685 /*****************************************************************************
2686 *
2687 *   CdEntMedlineEntryListGet (result, numuid, uids, mark_missing)
2688 *       returns a count of entries read
2689 *       if (mark_missing) ids which could not be located are made negative
2690 *
2691 *****************************************************************************/
2692 NLM_EXTERN Int2  CdEntMedlineEntryListGet (MedlineEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Boolean mark_missing)
2693 
2694 {
2695         MedlineEntryPtr mep;
2696         Int2 count = 0, ctr;
2697         AsnIoPtr aip;
2698         DocType db = TYP_ML;
2699 
2700         if (! MedlineAsnLoad())
2701                 return 0;
2702 
2703         for (ctr = 0; ctr < numuid; ctr++)
2704         {
2705                 mep = NULL;
2706 
2707 #ifdef _NEW_CdEntrez_
2708                 if (_nouveau)
2709                         aip = cd3_CdDocAsnOpen(db, uids[ctr]);
2710 #endif
2711 #ifdef _OLD_CdEntrez_
2712                 if (!_nouveau)
2713                         aip = CdDocAsnOpen(db, uids[ctr]);
2714 #endif
2715                 if (aip != NULL)
2716                 {
2717                         mep = MedlineEntryAsnRead(aip, NULL);
2718 #ifdef _NEW_CdEntrez_
2719                         if (_nouveau)
2720                                 cd3_CdDocAsnClose(aip);
2721 #endif
2722 #ifdef _OLD_CdEntrez_
2723                         if (!_nouveau)
2724                                 CdDocAsnClose(aip);
2725 #endif
2726                 }
2727                 if (mep == NULL)    /* didn't get it */
2728                 {
2729                         if (mark_missing)
2730                                 uids[ctr] *= -1;
2731                 }
2732                 else
2733                 {
2734                         count++;
2735                         result[ctr] = mep;
2736                 }
2737         }
2738         
2739         return count;
2740 }
2741 
2742 /*****************************************************************************
2743 *
2744 *   CdEntSeqEntryListGet (result, numuid, uids, retcode, mark_missing)
2745 *       returns a count of entries read
2746 *       if (mark_missing) ids which could not be located are made negative
2747 *       retcode is defined in objsset.h 
2748 *
2749 *****************************************************************************/
2750 static AsnIo* CdSeqAsnOpen (DocType *type, DocUid uid, Boolean isGenome);
2751 
2752 NLM_EXTERN Int2  CdEntSeqEntryListGet (SeqEntryPtr PNTR result, Int2 numuid, Int4Ptr uids, Int2 retcode, Boolean mark_missing)
2753 {
2754         SeqEntryPtr sep;
2755         Int2 count = 0, ctr;
2756         AsnIoPtr aip;
2757         DocType db = TYP_SEQ;
2758         ValNode an;
2759 
2760         if (! SeqSetAsnLoad())
2761                 return 0;
2762 
2763     an.data.intvalue = 0;
2764     an.choice = SEQID_GI;
2765 
2766         for (ctr = 0; ctr < numuid; ctr++)
2767         {
2768                 sep = NULL;
2769                 aip = CdSeqAsnOpen(&db, uids[ctr], retcode == -1);
2770                 if (aip != NULL)
2771                 {
2772                         an.data.intvalue = uids[ctr];
2773                         if (retcode == -1)
2774                                 sep = SeqEntryAsnRead(aip, NULL);
2775                         else
2776                                 sep = SeqEntryAsnGet(aip, NULL, &an, retcode);
2777 #ifdef _NEW_CdEntrez_
2778                         if (_nouveau)
2779                                 cd3_CdDocAsnClose(aip);
2780 #endif
2781 #ifdef _OLD_CdEntrez_
2782                         if (!_nouveau)
2783                                 CdDocAsnClose(aip);
2784 #endif
2785                 }
2786                 if (sep == NULL)    /* didn't get it */
2787                 {
2788                         if (mark_missing)
2789                                 uids[ctr] *= -1;
2790                 }
2791                 else
2792                 {
2793                         count++;
2794                         result[ctr] = sep;
2795                 }
2796         }
2797         
2798         return count;
2799 }
2800 
2801 
2802 static AsnIo* CdSeqAsnOpen (DocType *type, DocUid uid, Boolean isGenome)
2803 {
2804         AsnIo *aio = NULL;
2805         
2806 #ifdef _NEW_CdEntrez_
2807         if (_nouveau)
2808         {
2809                 if (isGenome) {
2810                         if ((aio = cd3_CdDocAsnOpen(TYP_CH,uid)) != NULL)
2811                                 *type = TYP_CH;
2812                 } else {
2813                         if (*type != TYP_SEQ)
2814                         {
2815                                 aio = cd3_CdDocAsnOpen(*type,uid);
2816                         }
2817                         else
2818                         {
2819                                 if ((aio = cd3_CdDocAsnOpen(TYP_AA,uid)) != NULL)
2820                                         *type = TYP_AA;
2821                                 else if ((aio = cd3_CdDocAsnOpen(TYP_NT,uid)) != NULL)
2822                                                 *type = TYP_NT;
2823                         }
2824                 }
2825         }
2826 #endif
2827 
2828 #ifdef _OLD_CdEntrez_
2829         if (!_nouveau)
2830                 aio = CdDocAsnOpen(*type,uid);
2831 #endif
2832 
2833         return aio;
2834 }
2835 
2836 /*****************************************************************************
2837 *
2838 *   CdEntMlSumListGet (result, numuid, uids)
2839 *       returns a count of entries read
2840 *       head of linked list is in result
2841 *
2842 *****************************************************************************/
2843 
2844 NLM_EXTERN Int2  CdEntMlSumListGet (DocSumPtr PNTR result, Int2 numuid, Int4Ptr uids)          /* Gi numbers */
2845 {
2846         Int2 count = 0;
2847 
2848 #ifdef _NEW_CdEntrez_
2849         if (_nouveau)
2850                 count = CdDocSumListGet(result,numuid,TYP_ML,uids);
2851 #endif
2852 
2853 #ifdef _OLD_CdEntrez_
2854         if (!_nouveau)
2855         {
2856                 Int2 ctr;
2857                 DocType db = TYP_ML;
2858                 AsnIoPtr aip;
2859         
2860                 for (ctr = 0; ctr < numuid; ctr++)
2861                 {
2862                         result[ctr] = NULL;
2863                         aip = CdDocAsnOpen (db, uids[ctr]);
2864                     if (aip != NULL)
2865                         {
2866                                 result[ctr] = MedSumAsnRead(aip, uids[ctr]);
2867                                 CdDocAsnClose(aip);
2868                                 if (result[ctr] != NULL)
2869                                         count++;
2870                         }
2871                 }
2872         }
2873 #endif
2874         
2875         return count;
2876 }
2877 
2878 
2879 /*****************************************************************************
2880 *
2881 *   CdEntMlSumGet(uid)
2882 *       get one MlSummary
2883 *
2884 *****************************************************************************/
2885 #ifdef _OLD_CdEntrez_
2886 
2887 static DocSumPtr NEAR CdEntMlSumGet (Int4 uid)
2888 {
2889         DocSumPtr dsp = NULL;
2890 
2891         CdEntMlSumListGet(&dsp, 1, &uid);
2892         return dsp;
2893 }
2894 
2895 #endif
2896 
2897 /*****************************************************************************
2898 *
2899 *   void StripAuthor(author)
2900 *
2901 *****************************************************************************/
2902 static void NEAR StripAuthor (CharPtr author)
2903 
2904 {
2905   CharPtr  p1, p2;
2906 
2907   p1 = author;
2908   while ((p1 = StringChr (p1, ' ')) != NULL) {
2909     for (p2 = p1 + 1; *p2 != '\0'; p2++) {
2910       if (*p2 == ' ') break;
2911       if (IS_ALPHA (*p2) && IS_LOWER (*p2)) break;
2912     }
2913     if (*p2 == '\0' || *p2 == ' ') {
2914       *p1 = '\0';
2915       return;
2916     }
2917     p1++;
2918   }
2919 }
2920 
2921 /*****************************************************************************
2922 *
2923 *   MedSumAsnRead(aip, uid)
2924 *
2925 *****************************************************************************/
2926 static void NEAR FindAsnType (AsnTypePtr PNTR atp, AsnModulePtr amp, CharPtr str)
2927 
2928 {
2929   if (atp != NULL && (*atp) == NULL) {
2930     *atp = AsnTypeFind (amp, str);
2931   }
2932 }
2933 
2934 
2935 static DocSumPtr NEAR MedSumAsnRead (AsnIoPtr aip, DocUid uid)
2936 
2937 {
2938   DataVal       av;
2939   AsnModulePtr  amp;
2940   AsnTypePtr    atp;
2941   Boolean       citFound;
2942   DocSumPtr     dsp;
2943   Boolean       goOn;
2944   Int2          i;
2945   CharPtr       ptr;
2946   Char          caption [50];
2947   Char          author [40];
2948   Char          year [10];
2949 
2950   if ((aip == NULL) || (! AllObjLoad ()))
2951     return NULL;
2952 
2953           amp = AsnAllModPtr ();
2954 
2955           FindAsnType (&MEDLINE_ENTRY, amp, "Medline-entry");
2956           FindAsnType (&MEDLINE_ENTRY_cit, amp, "Medline-entry.cit");
2957           FindAsnType (&MEDLINE_ENTRY_abstract, amp, "Medline-entry.abstract");
2958           FindAsnType (&TITLE_E_trans, amp, "Title.E.trans");
2959           FindAsnType (&AUTH_LIST_names_ml_E, amp, "Auth-list.names.ml.E");
2960           FindAsnType (&AUTH_LIST_names_str_E, amp, "Auth-list.names.str.E");
2961           FindAsnType (&DATE_STD_year, amp, "Date-std.year");
2962           FindAsnType (&DATE_str, amp, "Date.str");
2963           FindAsnType (&TITLE_E_name, amp, "Title.E.name");
2964           FindAsnType (&MEDLINE_ENTRY_mesh, amp, "Medline-entry.mesh");
2965           FindAsnType (&MEDLINE_ENTRY_substance, amp, "Medline-entry.substance");
2966           FindAsnType (&MEDLINE_ENTRY_xref, amp, "Medline-entry.xref");
2967           FindAsnType (&MEDLINE_ENTRY_idnum, amp, "Medline-entry.idnum");
2968           FindAsnType (&MEDLINE_ENTRY_gene, amp, "Medline-entry.gene");
2969 
2970   atp = AsnReadId (aip, amp, MEDLINE_ENTRY);
2971   AsnReadVal (aip, atp, &av);
2972 
2973   dsp = MemNew (sizeof (DocSum));
2974   if (dsp != NULL) {
2975     dsp->no_abstract = TRUE;
2976     dsp->translated_title = FALSE;
2977     dsp->no_authors = TRUE;
2978     author [0] = '\0';
2979     year [0] = '\0';
2980     citFound = FALSE;
2981     goOn = TRUE;
2982     while (goOn) {
2983       atp = AsnReadId (aip, amp, atp);
2984       if (atp == MEDLINE_ENTRY) {
2985         AsnReadVal (aip, atp, NULL);
2986         goOn = FALSE;
2987       } else if (atp == MEDLINE_ENTRY_cit) {
2988         AsnReadVal (aip, atp, NULL);
2989         citFound = TRUE;
2990       } else if (atp == MEDLINE_ENTRY_abstract) {
2991         AsnReadVal (aip, atp, NULL);
2992         dsp->no_abstract = FALSE;
2993         goOn = FALSE;
2994       } else if (atp == TITLE_E_trans) {
2995         AsnReadVal (aip, atp, &av);
2996         dsp->translated_title = TRUE;
2997         if (dsp->title != NULL) {
2998           dsp->title = MemFree (dsp->title);
2999         }
3000         dsp->title = MemNew ((size_t) StringLen ((CharPtr) av.ptrvalue) + 3);
3001         ptr = dsp->title;
3002         *ptr = '[';
3003         ptr++;
3004         ptr = StringMove (ptr, (CharPtr) av.ptrvalue);
3005         *ptr = ']';
3006         ptr++;
3007         *ptr = '\0';
3008         AsnKillValue (atp, &av);
3009       } else if (atp == AUTH_LIST_names_ml_E) {
3010         AsnReadVal (aip, atp, &av);
3011         dsp->no_authors = FALSE;
3012         if (author [0] == '\0') {
3013           StringNCpy (author, (CharPtr) av.ptrvalue, sizeof (author));
3014         }
3015         AsnKillValue (atp, &av);
3016       } else if (atp == AUTH_LIST_names_str_E) {
3017         AsnReadVal (aip, atp, &av);
3018         dsp->no_authors = FALSE;
3019         if (author [0] == '\0') {
3020           StringNCpy (author, (CharPtr) av.ptrvalue, sizeof (author));
3021         }
3022         AsnKillValue (atp, &av);
3023       } else if (atp == DATE_STD_year) {
3024         AsnReadVal (aip, atp, &av);
3025         if (citFound) {
3026           sprintf (year, "%ld", (long) av.intvalue);
3027         }
3028       } else if (atp == DATE_str) {
3029         AsnReadVal (aip, atp, &av);
3030         if (citFound) {
3031           i = 0;
3032           ptr = av.ptrvalue;
3033           while (ptr [i] != '\0' && ptr [i] != ' ' && i < sizeof (year) - 1) {
3034             year [i] = ptr [i];
3035             i++;
3036           }
3037           year [i] = '\0';
3038         }
3039         AsnKillValue (atp, &av);
3040       } else if (atp == TITLE_E_name) {
3041         AsnReadVal (aip, atp, &av);
3042         if (dsp->title == NULL) {
3043           dsp->title = StringSave ((CharPtr) av.ptrvalue);
3044         }
3045         AsnKillValue (atp, &av);
3046       } else if (atp == MEDLINE_ENTRY_mesh || atp == MEDLINE_ENTRY_substance ||
3047                  atp == MEDLINE_ENTRY_xref || atp == MEDLINE_ENTRY_idnum ||
3048                  atp == MEDLINE_ENTRY_gene) {
3049         AsnReadVal (aip, atp, NULL);
3050         goOn = FALSE;
3051       } else {
3052         AsnReadVal (aip, atp, NULL);
3053       }
3054     }
3055     if (dsp->no_authors) {
3056       sprintf (caption, "[%ld], %s", (long) uid, year);
3057     } else if (author [0] != '\0') {
3058       StripAuthor (author);
3059       author [12] = '.';
3060       author [12] = '\0';
3061       sprintf (caption, "%s, %s", author, year);
3062     } else {
3063       sprintf (caption, "[%ld], %s", (long) uid, year);
3064     }
3065     dsp->caption = StringSave (caption);
3066     dsp->uid = uid;
3067   }
3068   AsnIoReset (aip);
3069   return dsp;
3070 }
3071 
3072 
3073 /*****************************************************************************
3074 *
3075 *   CdSeqIdForGI(Int4 gi)
3076 *
3077 *****************************************************************************/
3078 NLM_EXTERN SeqIdPtr CdSeqIdForGI (Int4 gi)
3079 {
3080 #ifdef _NEW_CdEntrez_
3081         DocSum* dsp;
3082         SeqIdPtr sip = NULL, tmp, next;
3083 
3084         dsp = cd3_CdGetDocSum (TYP_NT, gi);   /* nucleic acid? */
3085         if (dsp == NULL)
3086                 dsp = cd3_CdGetDocSum (TYP_AA, gi);  /* protein? */
3087         if (dsp != NULL)
3088         {
3089                 tmp = SeqIdParse(dsp->extra);
3090                 DocSumFree(dsp);
3091 
3092                 while (tmp != NULL)
3093                 {
3094                         next = tmp->next;
3095                         tmp->next = NULL;
3096                         if (tmp->choice == SEQID_GI)
3097                                 SeqIdFree(tmp);
3098                         else
3099                                 sip = tmp;
3100                         tmp = next;
3101                 }
3102         }
3103         return sip;
3104 
3105 #else
3106 
3107         SeqIdPtr sip = NULL, ids, curr, best;
3108         AsnIoPtr aip;
3109         AsnModulePtr amp;
3110         AsnTypePtr atp;
3111         Boolean gotit;
3112         DocType db = TYP_SEQ;
3113         GiimPtr gip;
3114 
3115         static Uint1 pick_order[20] = {
3116         83, /* 0 = not set */
3117         65, /* 1 = local Object-id */
3118         65,  /* 2 = gibbsq */
3119         65,  /* 3 = gibbmt */
3120         70, /* 4 = giim Giimport-id */
3121         60, /* 5 = genbank */
3122         60, /* 6 = embl */
3123         60, /* 7 = pir */
3124         60, /* 8 = swissprot */
3125         65,  /* 9 = patent */
3126         65, /* 10 = other TextSeqId */
3127         65, /* 11 = general Dbtag */
3128         90,  /* 12 = gi */
3129         60, /* 13 = ddbj */
3130         60, /* 14 = prf */
3131         60,  /* 15 = pdb */
3132         0,      /* extras for new ids */
3133         0,
3134         0,
3135         0
3136     };
3137 
3138         if (! AllObjLoad()) return sip;
3139         amp = AsnAllModPtr();
3140         FindAsnType (&SEQ_ENTRY, amp, "Seq-entry");
3141     FindAsnType (&BIOSEQ_id, amp, "Bioseq.id");
3142     FindAsnType (&BIOSEQ_id_E, amp, "Bioseq.id.E");
3143 
3144         aip = CdSeqAsnOpen (&db, gi, FALSE);
3145         if (aip == NULL) return sip;
3146 
3147         atp = SEQ_ENTRY;
3148         while ((atp = AsnReadId(aip, amp, atp)) != NULL)
3149         {
3150                 if (atp == BIOSEQ_id)
3151                 {
3152                         gotit = FALSE;
3153                     ids = SeqIdSetAsnRead(aip, atp, BIOSEQ_id_E);
3154                         for (curr = ids; curr != NULL; curr = curr->next)
3155                         {
3156                                 if (curr->choice == SEQID_GIIM)
3157                                 {
3158                                         gip = (GiimPtr)(curr->data.ptrvalue);
3159                                         if (gip->id == gi)
3160                                         {
3161                                                 gotit = TRUE;
3162                                                 break;
3163                                         }
3164                                 }
3165                                 else if (curr->choice == SEQID_GI)
3166                                 {
3167                                         if (curr->data.intvalue == gi)
3168                                         {
3169                                                 gotit = TRUE;
3170                                                 break;
3171                                         }
3172                                 }
3173                         }
3174                         if (gotit)
3175                         {
3176                                 best = SeqIdSelect(ids, pick_order, 20);
3177                                 sip = ValNodeExtract(&ids, (Int2)(best->choice));
3178                         }
3179                         SeqIdSetFree(ids);
3180                         if (gotit)
3181                                 break;
3182 
3183                 }
3184                 else
3185                         AsnReadVal(aip, atp, NULL);
3186                 if (! AsnGetLevel(aip))       /* finished reading a Seq-entry */
3187                         break;                    /* failed */
3188         }
3189         
3190 #ifdef _NEW_CdEntrez_
3191         if (_nouveau)
3192           cd3_CdDocAsnClose(aip);
3193 #endif
3194 #ifdef _OLD_CdEntrez_
3195         if (!_nouveau)
3196           CdDocAsnClose(aip);
3197 #endif
3198 
3199         return sip;
3200 #endif
3201 }
3202 
3203 
3204 
3205 /*****************************************************************************
3206 *
3207 *   CdEntSeqSumListGet (result, numuid, db, uids)
3208 *       returns a count of entries read
3209 *       head of linked list is in result
3210 *
3211 *****************************************************************************/
3212 
3213 NLM_EXTERN Int2  CdEntSeqSumListGet (DocSumPtr PNTR result, Int2 numuid, DocType db, Int4Ptr uids)          /* Gi numbers */
3214 {
3215         Int2 count = 0;
3216         
3217 #ifdef _NEW_CdEntrez_
3218         if (_nouveau)
3219         {
3220                 ASSERT(db != TYP_SEQ);
3221                 count = CdDocSumListGet(result,numuid,db,uids);
3222         }
3223 #endif
3224         
3225 #ifdef _OLD_CdEntrez_
3226         if (!_nouveau)
3227         {
3228                 Int2 ctr;
3229                 AsnIoPtr aip;
3230         
3231                 for (ctr = 0; ctr < numuid; ctr++)
3232                 {
3233                         result[ctr] = NULL;
3234                         aip = CdDocAsnOpen (db, uids[ctr]);
3235                     if (aip != NULL)
3236                         {
3237                                 result[ctr] = CdSeqSumAsnRead(aip, uids[ctr]);
3238                                 CdDocAsnClose(aip);
3239                                 if (result[ctr] != NULL)
3240                                         count++;
3241                         }
3242                 }
3243         }
3244 #endif
3245 
3246         return count;
3247 }
3248 
3249 /*****************************************************************************
3250 *
3251 *   CdEntSeqSumGet(uid, type)
3252 *       get one SeqSummary
3253 *
3254 *****************************************************************************/
3255 #ifdef _OLD_CdEntrez_
3256 
3257 static DocSumPtr NEAR CdEntSeqSumGet (Int4 uid, DocType type)
3258 {
3259         DocSumPtr dsp = NULL;
3260 
3261         CdEntSeqSumListGet(&dsp, 1, type, &uid);
3262         return dsp;
3263 }
3264 
3265 #endif
3266 
3267 NLM_EXTERN DocSumPtr CdSeqSumAsnRead (AsnIoPtr aip, DocUid uid)
3268 
3269 {
3270   DataVal       av;
3271   AsnModulePtr  amp;
3272   AsnTypePtr    atp;
3273   DocSumPtr     dsp;
3274   Boolean       goOn;
3275   Char          caption [50];
3276   Char          author [40];
3277   Char          year [10];
3278   Char          locus [40];
3279   Char          cds [10];
3280   CharPtr       chptr;
3281   Int2          proteins;
3282   CharPtr       recentTitle;
3283   Boolean       backbone;
3284   Boolean       genBank;
3285   Boolean       embl;
3286   Boolean       ddbj;
3287   Boolean       pir;
3288   Boolean       swissprot;
3289   Boolean       isaNA;
3290   Boolean       isaAA;
3291   Boolean       isaSEG;
3292   Boolean               in_id;
3293   Int2          level;
3294 
3295   if ((aip == NULL) || (! AllObjLoad ()))
3296     return NULL;
3297 
3298   amp = AsnAllModPtr ();
3299 
3300   FindAsnType (&SEQ_ENTRY, amp, "Seq-entry");
3301   FindAsnType (&SEQ_ENTRY_seq, amp, "Seq-entry.seq");
3302   FindAsnType (&SEQ_ENTRY_set, amp, "Seq-entry.set");
3303   FindAsnType (&TEXTSEQ_ID_name, amp, "Textseq-id.name");
3304   FindAsnType (&TEXTSEQ_ID_accession, amp, "Textseq-id.accession");
3305   FindAsnType (&AUTH_LIST_names_str_E, amp, "Auth-list.names.str.E");
3306   FindAsnType (&DATE_STD_year, amp, "Date-std.year");
3307   FindAsnType (&DATE_str, amp, "Date.str");
3308   FindAsnType (&SEQ_DESCR_E_title, amp, "Seq-descr.E.title");
3309   FindAsnType (&GIIMPORT_ID_id, amp, "Giimport-id.id");
3310   FindAsnType (&BIOSEQ_inst, amp, "Bioseq.inst");
3311   FindAsnType (&SEQ_INST_mol, amp, "Seq-inst.mol");
3312   FindAsnType (&SEQ_INST_repr, amp, "Seq-inst.repr");
3313   FindAsnType (&SEQ_ID_gibbsq, amp, "Seq-id.gibbsq");
3314   FindAsnType (&SEQ_ID_gibbmt, amp, "Seq-id.gibbmt");
3315   FindAsnType (&SEQ_ID_genbank, amp, "Seq-id.genbank");
3316   FindAsnType (&SEQ_ID_gi, amp, "Seq-id.gi");
3317   FindAsnType (&SEQ_ID_embl, amp, "Seq-id.embl");
3318   FindAsnType (&SEQ_ID_ddbj, amp, "Seq-id.ddbj");
3319   FindAsnType (&SEQ_ID_pir, amp, "Seq-id.pir");
3320   FindAsnType (&SEQ_ID_swissprot, amp, "Seq-id.swissprot");
3321   FindAsnType (&PDB_BLOCK_compound_E, amp, "PDB-block.compound.E");
3322   FindAsnType (&PDB_SEQ_ID_MOL, amp, "PDB-seq-id.mol");
3323   FindAsnType (&BIOSEQ_id, amp, "Bioseq.id");
3324   FindAsnType (&CIT_PAT_title, amp, "Cit-pat.title");
3325 
3326   atp = AsnReadId (aip, amp, SEQ_ENTRY);
3327   AsnReadVal (aip, atp, &av);
3328 
3329   atp = AsnReadId (aip, amp, atp);
3330   AsnReadVal (aip, atp, &av);
3331 
3332   dsp = MemNew (sizeof (DocSum));
3333   if (dsp != NULL) {
3334     dsp->no_abstract = TRUE;
3335     dsp->translated_title = FALSE;
3336     dsp->no_authors = TRUE;
3337     author [0] = '\0';
3338     year [0] = '\0';
3339     locus [0] = '\0';
3340     cds [0] = '\0';
3341     proteins = 1;
3342     recentTitle = NULL;
3343     backbone = FALSE;
3344     genBank = FALSE;
3345     embl = FALSE;
3346         ddbj = FALSE;
3347     pir = FALSE;
3348     swissprot = FALSE;
3349     isaNA = FALSE;
3350     isaAA = FALSE;
3351     isaSEG = FALSE;
3352         in_id = FALSE;
3353     goOn = TRUE;
3354     level = AsnGetLevel (aip);
3355     while (goOn) {
3356       atp = AsnReadId (aip, amp, atp);
3357       if (atp == SEQ_ENTRY_seq || atp == SEQ_ENTRY_set) {
3358         AsnReadVal (aip, atp, NULL);
3359         if (AsnGetLevel (aip) <= level) {
3360           goOn = FALSE;
3361             }
3362       } else if (atp == BIOSEQ_id) {
3363         AsnReadVal (aip, atp, &av);
3364                 if (in_id) {
3365                         in_id = FALSE;
3366                 } else {
3367                         in_id = TRUE;
3368         }
3369       } else if (in_id && ((atp == TEXTSEQ_ID_name) ||
3370                         (atp == PDB_SEQ_ID_MOL))) {
3371         AsnReadVal (aip, atp, &av);
3372         if (locus [0] == '\0') {
3373           StringNCpy (locus, (CharPtr) av.ptrvalue, sizeof (locus));
3374         }
3375         AsnKillValue (atp, &av);
3376       } else if (in_id && (atp == TEXTSEQ_ID_accession)) {
3377         AsnReadVal (aip, atp, &av);
3378         if (locus [0] == '\0') {
3379           StringNCpy (locus, (CharPtr) av.ptrvalue, sizeof (locus));
3380         }
3381         AsnKillValue (atp, &av);
3382       } else if (atp == AUTH_LIST_names_str_E) {
3383         AsnReadVal (aip, atp, &av);
3384         if (author [0] == '\0') {
3385           StringNCpy (author, (CharPtr) av.ptrvalue, sizeof (author));
3386         }
3387         AsnKillValue (atp, &av);
3388       } else if (atp == DATE_STD_year) {
3389         AsnReadVal (aip, atp, &av);
3390         sprintf (year, "%ld", (long) av.intvalue);
3391       } else if (atp == DATE_str) {
3392         AsnReadVal (aip, atp, &av);
3393         StringNCpy (year, (CharPtr) av.ptrvalue, sizeof (year));
3394         AsnKillValue (atp, &av);
3395       } else if ((atp == SEQ_DESCR_E_title) ||
3396                 (atp == PDB_BLOCK_compound_E) || (atp == CIT_PAT_title)) {
3397         AsnReadVal (aip, atp, &av);
3398                 if (*((CharPtr)av.ptrvalue) != '\0')
3399                 {
3400                 if (recentTitle != NULL) {
3401               recentTitle = MemFree (recentTitle);
3402                 }
3403 
3404                 if (dsp->uid == uid && dsp->title == NULL &&
3405                                 atp != CIT_PAT_title) {
3406               dsp->title = (CharPtr)av.ptrvalue;
3407                 }
3408                         else
3409                                 recentTitle = (CharPtr)av.ptrvalue;
3410                 }
3411                 else
3412                 AsnKillValue (atp, &av);
3413       } else if (atp == GIIMPORT_ID_id || atp == SEQ_ID_gi) {
3414         AsnReadVal (aip, atp, &av);
3415         if (av.intvalue == uid) {
3416           dsp->uid = uid;
3417         }
3418       } else if (atp == SEQ_INST_mol) {
3419         AsnReadVal (aip, atp, &av);
3420         if ((! isaNA) && (! isaAA) && dsp->uid == uid) {
3421           isaNA = (Boolean) ISA_na (av.intvalue);
3422           isaAA = (Boolean) ISA_aa (av.intvalue);
3423           if (isaAA && cds [0] == '\0') {
3424             sprintf (cds, " cds%d", (int) proteins);
3425           }
3426         }
3427         if (ISA_aa (av.intvalue)) {
3428           proteins++;
3429         }
3430       } else if (atp == SEQ_INST_repr) {
3431         AsnReadVal (aip, atp, &av);
3432         if (av.intvalue == Seq_repr_seg) {
3433           isaSEG = TRUE;
3434         }
3435       } else if (atp == BIOSEQ_inst) {
3436         AsnReadVal (aip, atp, NULL);
3437         if (dsp->uid == uid && dsp->title == NULL) {
3438           dsp->title = recentTitle;
3439           recentTitle = NULL;
3440         }
3441       } else if (atp == SEQ_ID_gibbsq || atp == SEQ_ID_gibbmt) {
3442         AsnReadVal (aip, atp, NULL);
3443         backbone = TRUE;
3444       } else if (atp == SEQ_ID_genbank) {
3445         AsnReadVal (aip, atp, NULL);
3446                 if (in_id)
3447                 genBank = TRUE;
3448       } else if (atp == SEQ_ID_embl) {
3449         AsnReadVal (aip, atp, NULL);
3450                 if (in_id)
3451                 embl = TRUE;
3452       } else if (atp == SEQ_ID_ddbj) {
3453         AsnReadVal (aip, atp, NULL);
3454                 if (in_id)
3455                 ddbj = TRUE;
3456       } else if (atp == SEQ_ID_pir) {
3457         AsnReadVal (aip, atp, NULL);
3458                 if (in_id)
3459                 pir = TRUE;
3460       } else if (atp == SEQ_ID_swissprot) {
3461         AsnReadVal (aip, atp, NULL);
3462                 if (in_id)
3463                 swissprot = TRUE;
3464       } else {
3465         AsnReadVal (aip, atp, NULL);
3466       }
3467       if (dsp->title != NULL && dsp->uid == uid) {
3468         if (backbone) {
3469           if (author [0] != '\0' && year [0] != '\0') {
3470             goOn = FALSE;
3471           }
3472         } else if (genBank || embl || ddbj) {
3473           if (locus [0] != '\0') {
3474             if (isaAA && cds [0] != '\0') {
3475               goOn = FALSE;
3476             } else if (isaNA) {
3477               goOn = FALSE;
3478             }
3479           }
3480         } else if (pir) {
3481           if (locus [0] != '\0') {
3482             goOn = FALSE;
3483           }
3484         } else if (swissprot) {
3485           if (locus [0] != '\0') {
3486             goOn = FALSE;
3487           }
3488         } else if (embl) {
3489         }
3490       }
3491     }
3492     if (backbone) {
3493       chptr = StringChr (author, ',');
3494       if (chptr != NULL) {
3495         *chptr = '\0';
3496       }
3497       chptr = StringChr (year, ' ');
3498       if (chptr != NULL) {
3499         *chptr = '\0';
3500       }
3501       author [12] = '.';
3502       author [12] = '\0';
3503       sprintf (caption, "%s, %s", author, year);
3504       dsp->caption = StringSave (caption);
3505     } else if (genBank || embl || ddbj) {
3506       if (isaAA) {
3507         sprintf (caption, "%s%s", locus, cds);
3508       } else if (isaSEG) {
3509         sprintf (caption, "%s segs", locus);
3510       } else {
3511         sprintf (caption, "%s", locus);
3512       }
3513       dsp->caption = StringSave (caption);
3514     } else {
3515       sprintf (caption, "%s", locus);
3516       dsp->caption = StringSave (caption);
3517     }
3518     dsp->uid = uid;
3519     if (recentTitle != NULL) {
3520       recentTitle = MemFree (recentTitle);
3521     }
3522   }
3523   AsnIoReset (aip);
3524   return dsp;
3525 }
3526 
3527 /*****************************************************************************
3528 *
3529 *   CdEntrezFindSeqId(sip)
3530 *       given a Seq-id, get the uid.
3531 *       returns 0 on failure
3532 *
3533 *****************************************************************************/
3534 NLM_EXTERN Int4 CdEntrezFindSeqId (SeqIdPtr sip)
3535 {
3536     Int4 uid = 0;
3537     DocType db = -1;
3538     TextSeqIdPtr tsip;
3539     PDBSeqIdPtr psip;
3540     PatentSeqIdPtr patsip;
3541     CharPtr locus = NULL;
3542     Char localbuf[40];
3543     ValNodePtr lst;
3544     LinkSetPtr lsp;
3545     Boolean check_both, done;
3546     EntrezInfoPtr eip;
3547     Int4 index;
3548 
3549     if ((eip = CdEntrezGetInfo()) != NULL && eip->field_count > FLD_SQID &&
3550         eip->types[TYP_NT].fields[FLD_SQID].num_terms > 0)
3551     {
3552         done = FALSE;
3553         check_both = TRUE;
3554         db = TYP_NT;
3555         SeqIdWrite(sip, localbuf, PRINTID_FASTA_LONG, sizeof(localbuf));
3556         while (! done)     /* might need to check 2 types */
3557         {
3558             lst = CdEntTLNew(db);
3559             if (lst == NULL) return uid;
3560             CdEntTLAddTerm(lst, localbuf, db, FLD_SQID, TRUE, NULL);
3561             lsp = CdEntTLEval(lst);
3562             CdEntTLFree(lst);
3563             if (lsp != NULL)
3564             {
3565                 for (index = 0; index < lsp->num; index++)
3566                 { /* choose the highest one */
3567                     if (lsp->uids[index] > uid)
3568                         uid = lsp->uids[index];
3569                 }
3570                 LinkSetFree(lsp);
3571             }
3572             if ((! check_both) || (uid > 0))
3573                 done = TRUE;
3574             else
3575             {
3576                 if (db == TYP_AA)
3577                     db = TYP_NT;
3578                 else
3579                     db = TYP_AA;
3580                 check_both = FALSE;
3581             }
3582         }
3583     }
3584 
3585     if (uid > 0)
3586     {
3587         return uid;
3588     }
3589 
3590     check_both = FALSE;
3591     switch (sip->choice)
3592     {
3593         case SEQID_NOT_SET:           /* not set */
3594         case SEQID_LOCAL:           /* local */
3595             break;
3596         case SEQID_GIBBSQ:           /* gibbsq */
3597         case SEQID_GIBBMT:           /* gibbmt */
3598             sprintf(localbuf, "B%ld", (long)(sip->data.intvalue));
3599             locus = (CharPtr)localbuf;
3600             db = TYP_AA;   /* guess it's a protein */
3601             check_both = TRUE;
3602             break;             /* not on cdrom */
3603         case SEQID_GIIM:           /* giim */
3604             uid = ((GiimPtr)sip->data.ptrvalue)->id;
3605             break;
3606         case SEQID_GI:
3607             uid = sip->data.intvalue;
3608             break;
3609         case SEQID_GENBANK:             /* genbank */
3610         case SEQID_EMBL:                /* embl */
3611         case SEQID_DDBJ:
3612             db = TYP_NT;   /* guess it's a nucleic acid */
3613             check_both = TRUE;
3614         case SEQID_PIR:             /* pir */
3615         case SEQID_SWISSPROT:
3616         case SEQID_PRF:
3617             if (db < 0)
3618                 db = TYP_AA;
3619             tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3620             if (tsip->accession != NULL)
3621                 locus = tsip->accession;
3622             else
3623                 locus = tsip->name;
3624             break;
3625         case SEQID_PDB:
3626             psip = (PDBSeqIdPtr)(sip->data.ptrvalue);
3627             if (psip->chain == '\0' || psip->chain == ' ')
3628                 StrCpy (localbuf, psip->mol);
3629             else
3630                 sprintf(localbuf, "%s-%c", psip->mol, (Char)psip->chain);
3631             locus = localbuf;
3632             db = TYP_AA;   /* guess protein */
3633             check_both = TRUE;
3634             break;
3635         case SEQID_PATENT:
3636             patsip = (PatentSeqIdPtr)(sip->data.ptrvalue);
3637             sprintf(localbuf, "%s%s %d", patsip->cit->country, patsip->cit->number,
3638                 (int)patsip->seqid);
3639             locus = localbuf;
3640             db = TYP_AA;   /* guess protein */
3641             check_both = TRUE;
3642             break;
3643         default:
3644             break;
3645     }
3646 
3647     if ((! uid) && (locus != NULL))   /* got a term to find */
3648     {
3649         done = FALSE;
3650         while (! done)     /* might need to check 2 types */
3651         {
3652             lst = CdEntTLNew(db);
3653             if (lst == NULL) return uid;
3654             CdEntTLAddTerm(lst, locus, db, FLD_ACCN, TRUE, NULL);
3655             lsp = CdEntTLEval(lst);
3656             CdEntTLFree(lst);
3657             if (lsp != NULL)
3658             {
3659                 for (index = 0; index < lsp->num; index++)
3660                 { /* choose the highest one */
3661                     if (lsp->uids[index] > uid)
3662                         uid = lsp->uids[index];
3663                 }
3664                 LinkSetFree(lsp);
3665             }
3666             if ((! check_both) || (uid > 0))
3667                 done = TRUE;
3668             else
3669             {
3670                 if (db == TYP_AA)
3671                     db = TYP_NT;
3672                 else
3673                     db = TYP_AA;
3674                 check_both = FALSE;
3675             }
3676         }
3677     }
3678 
3679     return uid;
3680 }
3681 
3682 #ifdef Biostruc_supported
3683 NLM_EXTERN BiostrucPtr CdEntrezBiostrucGet (DocUid uid, Int4 mdlLvl, Int4 maxModels)
3684 {
3685         Biostruc *struc = NULL;
3686         AsnIo *stream = NULL;
3687 
3688         if (! BiostrucAvail ()) return NULL;
3689         stream = cd3_CdDocAsnOpen(TYP_ST,uid);
3690         if (stream != NULL)
3691         {
3692                 struc = BiostrucAsnGet(stream,NULL, mdlLvl, maxModels);
3693                 cd3_CdDocAsnClose(stream);
3694         }
3695         return struc;
3696 }
3697 
3698 
3699 #ifdef OS_UNIX
3700 
3701 NLM_EXTERN BiostrucAnnotSetPtr CdEntrezBiostrucAnnotSetGet (DocUid uid)
3702 {
3703         BiostrucAnnotSetPtr retval = NULL;
3704         AsnIoPtr  aip;
3705         FILE *pipe;
3706         char command[PATH_MAX+5];
3707         char fname[PATH_MAX];
3708         
3709         if (CdMountEntrezVolume(1,fname,PATH_MAX-32))
3710         {
3711                 sprintf(strchr(fname,0), "/vast/%ld.bas.Z", (long) uid);
3712                 if (FileLength(fname) <= 0)
3713                 {
3714                         return NULL;
3715                 }
3716                 sprintf(command,"zcat %s", fname);
3717                 if ((pipe=popen(command,"r")) ==NULL)
3718                 {
3719                         ErrPostEx(SEV_ERROR,0,0,"Unable to open pipe [%s]",command);
3720                         return NULL;
3721                 }
3722                 aip = AsnIoNew(ASNIO_TEXT_IN, pipe, NULL, NULL, NULL);
3723                 if (aip != NULL)
3724                 {
3725                         retval = BiostrucAnnotSetAsnRead(aip, NULL);
3726                 }
3727                 AsnIoFree(aip,FALSE);
3728                 pclose(pipe);
3729         }
3730         return retval;
3731 }
3732 
3733 #else
3734 
3735 NLM_EXTERN BiostrucAnnotSetPtr CdEntrezBiostrucAnnotSetGet (DocUid uid)
3736 {
3737         return NULL;
3738 }
3739 
3740 #endif
3741 
3742 
3743 NLM_EXTERN BiostrucAnnotSetPtr LIBCALL CdEntrezBiostrucAnnotSetGetByFid (DocUid mmdbid, Int4 feature_id, Int4 feature_set_id)
3744 {
3745     BiostrucAnnotSetPtr basp = CdEntrezBiostrucAnnotSetGet (mmdbid);
3746     BiostrucAnnotSetPtr basp2 = NULL;
3747     BiostrucFeatureSetPtr pbsfs = NULL;
3748     BiostrucFeaturePtr pbsf = NULL;
3749 
3750     if (basp == NULL)
3751         return NULL;
3752  
3753     pbsfs = basp->features;
3754     while (pbsfs)
3755      {
3756        if (pbsfs->id == feature_set_id)
3757         {
3758           pbsf =  pbsfs->features;
3759           while(pbsf)
3760             {
3761               if (pbsf->id == feature_id)
3762                 {  /* found it */
3763                      basp2 = BiostrucAnnotSetNew();
3764                      basp2->id = basp->id;
3765                      basp2->descr = basp->descr; 
3766                      basp->descr = NULL;  /* unlink the descr from basp object */
3767                      basp2->features = BiostrucFeatureSetNew();
3768                      basp2->features->id = pbsfs->id;
3769                      basp2->features->descr = pbsfs->descr;
3770                      pbsfs->descr = NULL; /* unlink the feature-set descr from basp  object */
3771                      basp2->features->features = BiostrucFeatureNew();
3772                      basp2->features->features->id = pbsf->id;
3773                      basp2->features->features->name = StringSave(pbsf->name);
3774                      basp2->features->features->type = pbsf->type;
3775                      basp2->features->features->Property_property = pbsf->Property_property;
3776                      pbsf->Property_property = NULL; /* unlink the property from basp  object */
3777                      basp2->features->features->Location_location = pbsf->Location_location;
3778                      pbsf->Location_location = NULL; /* unlink the location from basp  object */ 
3779                      BiostrucAnnotSetFree(basp);
3780                      return basp2;
3781                 }
3782                pbsf = pbsf->next;
3783             }
3784         }
3785        pbsfs = pbsfs->next;
3786      }
3787    
3788     BiostrucAnnotSetFree(basp);
3789     return basp2;
3790 }
3791 
3792 
3793 NLM_EXTERN LinkSetPtr LIBCALL CdEntrezBiostrucFeatIds(DocUid mmdbid, Int2 feature_type, Int4 feature_set_id)
3794 {
3795     BiostrucAnnotSetPtr basp = CdEntrezBiostrucAnnotSetGet (mmdbid);
3796     LinkSetPtr retval = NULL;
3797     Int4Ptr ids = NULL;
3798     Int4Ptr scores = NULL;
3799     Int4 count = 0;
3800     BiostrucFeatureSetPtr pbsfs = NULL;
3801     BiostrucFeaturePtr pbsf = NULL;
3802     ChemGraphAlignmentPtr  pcga = NULL;
3803     
3804     
3805     if (basp == NULL)
3806         return NULL;
3807     
3808     /* count the number of features of type feature_type */
3809     pbsfs = basp->features;
3810     while (pbsfs)
3811      {
3812        if (pbsfs->id == feature_set_id)
3813         {
3814           pbsf =  pbsfs->features;
3815           while(pbsf)
3816             {
3817               if (pbsf->type == feature_type)
3818                 { 
3819                    count++;
3820                 }
3821                pbsf = pbsf->next;
3822             }
3823         }
3824        pbsfs = pbsfs->next;
3825      }
3826     
3827      /* allocate vectors for ids, scores iff alignment data */
3828    
3829     ids = (Int4Ptr) MemNew(sizeof(Int4) * count);
3830     if (feature_type == 200) /* NCBI alignments */
3831       scores = (Int4Ptr) MemNew(sizeof(Int4) * count);
3832     
3833     count = 0;   
3834     /* collect the feature-id's and scores  */
3835     pbsfs = basp->features;
3836     while (pbsfs)
3837      {
3838        if (pbsfs->id == feature_set_id)
3839         {
3840           pbsf =  pbsfs->features;
3841           while(pbsf)
3842             {
3843               if (pbsf->type == feature_type)
3844                 {
3845                   
3846                    ids[count] = pbsf->id;
3847                    if (feature_type == 200) /* alignment type id */
3848                     {
3849                      pcga = (ChemGraphAlignmentPtr) pbsf->Location_location->data.ptrvalue;
3850                      scores[count] = pcga->aligndata->vast_mlogp;  /* an Int4 already */
3851                     }
3852                    count++;
3853                 }
3854                pbsf = pbsf->next;
3855             }  /* while feature */
3856           retval = LinkSetNew();
3857           retval->num = count;
3858           retval->uids = ids;
3859           retval->weights = scores;
3860           MemFree(basp);
3861           return retval;
3862         }  /* if feature_set_id */
3863        pbsfs = pbsfs->next;
3864      }  /* while feature_set */
3865   MemFree(basp);
3866   return NULL;
3867 }
3868 #endif /* Biostruc_supported */
3869 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.