NCBI C Toolkit Cross Reference

C/api/asn2ff6.c


  1 /*   asn2ff6.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2ff6.c
 27 *
 28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov
 29 *
 30 * Version Creation Date:   7/15/95
 31 *
 32 * $Revision: 6.69 $
 33 *
 34 * File Description: 
 35 *
 36 * Modifications:  
 37 * --------------------------------------------------------------------------
 38 * $Log: asn2ff6.c,v $
 39 * Revision 6.69  2006/07/13 17:06:38  bollin
 40 * use Uint4 instead of Uint2 for itemID values
 41 * removed unused variables
 42 * resolved compiler warnings
 43 *
 44 * Revision 6.68  2002/02/15 18:30:55  kans
 45 * no longer change snoRNA to misc_RNA
 46 *
 47 * Revision 6.67  2001/12/28 21:37:10  kans
 48 * allow sfp->product to be SEQLOC_EQUIV
 49 *
 50 * Revision 6.66  2001/12/21 20:21:06  cavanaug
 51 * old_locus_fmt now controls generated of *old* LOCUS line format
 52 *
 53 * Revision 6.65  2001/12/05 18:13:53  cavanaug
 54 * Changes for new LOCUS line format
 55 *
 56 * Revision 6.64  2001/08/21 17:33:33  kans
 57 * snoRNA can show /product
 58 *
 59 * Revision 6.63  2001/08/07 15:51:08  kans
 60 * use NUM_SEQID, added third party annotation seqids
 61 *
 62 * Revision 6.62  2001/07/18 14:50:13  kans
 63 * gather features with gsc.useSeqMgrIndexes if genpept, raw, indexing requested, and IndexedGetDescrForDiv to speed up finding division
 64 *
 65 * Revision 6.61  2001/07/03 20:01:41  kans
 66 * AddGBQual ASN2GNBK_STRIP_NOTE_PERIODS trim trailing tilde first
 67 *
 68 * Revision 6.60  2001/07/03 00:05:51  kans
 69 * TrimSpacesAndJunkFromEnds on genbankblock->source if ASN2GNBK_STRIP_NOTE_PERIODS
 70 *
 71 * Revision 6.59  2001/06/26 23:43:35  kans
 72 * moved second period check to inside last period check
 73 *
 74 * Revision 6.58  2001/06/26 23:36:06  kans
 75 * in AddGBQual if ASN2GNBK_STRIP_NOTE_PERIODS, trim one or two periods at end
 76 *
 77 * Revision 6.57  2001/06/13 14:41:58  yaschenk
 78 * changing increment of 10 to 1024 in EnlargeSortList()
 79 *
 80 * Revision 6.56  2001/06/04 21:30:52  kans
 81 * TrimSpacesAndSemicolons trims leading semicolons as well as leading spaces
 82 *
 83 * Revision 6.55  2001/06/01 18:46:26  tatiana
 84 * NG_ added to ValidateAccession
 85 *
 86 * Revision 6.54  2001/05/31 23:45:48  kans
 87 * if ASN2GNBK_STRIP_NOTE_PERIODS and IsEllipsis, do not strip period
 88 *
 89 * Revision 6.53  2001/05/29 23:27:47  kans
 90 * added support for snoRNA - flatfile prints as misc_RNA for now
 91 *
 92 * Revision 6.52  2001/04/16 16:51:42  tatiana
 93 * GetDivision(): CON division never use for aa
 94 *
 95 * Revision 6.51  2001/04/06 12:47:43  beloslyu
 96 * missing flatloc declaration was added
 97 *
 98 * Revision 6.50  2001/04/05 21:41:26  tatiana
 99 * REGION added in GetLocusPartsAwp()
100 *
101 * Revision 6.49  2001/04/04 22:05:16  kans
102 * In GB_PrintPubs under ASN2GNBK_STRIP_NOTE_PERIODS clean up comma/space/semicolon (TF)
103 *
104 * Revision 6.48  2001/04/04 21:46:56  kans
105 * TrimSpacesAndJunkFromEnds if ASN2GNBK_STRIP_NOTE_PERIODS (TF)
106 *
107 * Revision 6.47  2001/04/02 21:25:19  kans
108 * AddGBQual under ASN2GNBK_STRIP_NOTE_PERIODS also removes ; ; substrings
109 *
110 * Revision 6.46  2001/03/26 17:36:06  kans
111 * added NULL for endogenous-virus to genome prefix array
112 *
113 * Revision 6.45  2001/02/16 16:52:22  tatiana
114 * special case locus for NT_ records
115 *
116 * Revision 6.44  2001/01/26 19:21:48  kans
117 * extrachromosomal into source note, removed macronuclear, extrachrom, plasmid from organism line
118 *
119 * Revision 6.43  2001/01/19 21:51:04  kans
120 * finally got ASN2GNBK_STRIP_NOTE_PERIODS logic right
121 *
122 * Revision 6.42  2001/01/19 18:45:28  kans
123 * another attempt to use ASN2GNBK_STRIP_NOTE_PERIODS to remove extraneous asn2ff/asn2gnbk diffs
124 *
125 * Revision 6.41  2001/01/08 18:36:40  kans
126 * removed ASN2GNBK_STRIP_NOTE_PERIODS - this was not the right place
127 *
128 * Revision 6.40  2001/01/06 22:09:42  kans
129 * added ASN2GNBK_STRIP_NOTE_PERIODS to try to eliminate trivial note discrepancies
130 *
131 * Revision 6.39  2000/11/29 20:46:11  tatiana
132 * HTC division added for MI_TECH_htc
133 *
134 * Revision 6.38  2000/10/24 20:28:44  tatiana
135 * ValidateAccession accepts XP, XM
136 *
137 * Revision 6.37  2000/09/20 21:26:19  tatiana
138 * all organelles adde to ORGANISM line
139 *
140 * Revision 6.36  2000/09/11 18:52:59  tatiana
141 * PUBMED linetype is legal in release mode
142 *
143 * Revision 6.35  2000/08/25 16:16:46  kans
144 * ValidateLocus initializes num_of_digits even if > 1000 segments
145 *
146 * Revision 6.34  2000/08/01 21:09:39  tatiana
147 * ValidateVersion is colld in forgbrel option only
148 *
149 * Revision 6.33  2000/06/29 12:23:30  kans
150 * GenPept on Seq_repr_virtual shown only if is_www || ajp->mode != RELEASE_MODE, earlier kludge of ignoring get_www was probably too broad
151 *
152 * Revision 6.32  2000/06/28 19:31:22  kans
153 * in SeqToAwp always set is_www to TRUE, so virtual sequences show up on non-web applications
154 *
155 * Revision 6.31  2000/06/23 15:42:34  tatiana
156 * removed virion and proviral from ORGANISM line
157 *
158 * Revision 6.30  2000/06/21 15:04:57  tatiana
159 * space added to Virion
160 *
161 * Revision 6.29  2000/06/12 20:49:04  tatiana
162 * new organelles added to ORGANISM filed
163 *
164 * Revision 6.28  2000/06/05 17:51:53  tatiana
165 * increase size of feature arrays to Int4
166 *
167 * Revision 6.27  2000/02/09 19:34:39  kans
168 * added forgbrel flag to Asn2ffJobPtr, currently used to suppress PUBMED line, which was not formally announced in release notes
169 *
170 * Revision 6.26  2000/01/28 17:56:48  kans
171 * show_gi always FALSE to suppress NID and PID, added support for PUBMED line in GenBank format
172 *
173 * Revision 6.25  2000/01/18 17:09:24  tatiana
174 * NP added to ValidateAccession
175 *
176 * Revision 6.24  1999/10/06 20:20:24  bazhin
177 * Removed memory leaks in GeneStructContentFree() and GetPubsAwp()
178 * functions.
179 *
180 * Revision 6.23  1999/09/23 18:09:33  tatiana
181 * ValidateAccession modified for N*_ accession
182 *
183 * Revision 6.22  1999/09/15 18:17:12  tatiana
184 * GRAPHIK_FMT corrected
185 *
186 * Revision 6.18  1999/04/02 19:33:55  tatiana
187 * MI_TECH_htgs_0 added in BioseqGetGBDivCode()
188 *
189 * Revision 6.17  1999/04/01 20:44:12  kans
190 * Int2 lengths to Int4 to allow CountGapsInDeltaSeq with buffer > 32K
191 *
192 * Revision 6.16  1999/03/31 01:09:23  tatiana
193 * ValidateAccession accepts 3+5
194 *
195 * Revision 6.15  1999/03/30 21:00:45  tatiana
196 *  ValidateOtherAccession() added
197 *
198 * Revision 6.14  1999/03/22 23:22:32  tatiana
199 * accession.version modifications
200 *
201 * Revision 6.13  1999/01/12 16:57:55  kans
202 * SeqToAwp checks for null ep before dereferencing
203 *
204 * Revision 6.12  1998/11/24 20:15:03  kans
205 * seqid other has better priority than local so refgene id is used preferentially
206 *
207 * Revision 6.11  1998/10/30 01:12:00  kans
208 * GetPubsAwp GatherEntity filters out OBJ_SEQALIGN - this was being hit many times on big records, and there is no need for asn2ff to see alignments
209 *
210 * Revision 6.10  1998/09/24 17:46:00  kans
211 * fixed GetDBXrefFromGene problem (TT)
212 *
213 * Revision 6.9  1998/06/15 14:59:49  tatiana
214 * UNIX compiler warnings fixed
215 *
216 * Revision 6.8  1998/05/11 21:58:33  tatiana
217 * some functions moved from asn2ff1.c
218 *
219 * Revision 6.7  1998/05/05 19:53:50  tatiana
220 * SEQFEAT_RSITE supressed in GetNAFeatKey()
221 *
222 * Revision 6.6  1998/04/30 21:49:10  tatiana
223 * *** empty log message ***
224 *
225 * Revision 6.5  1998/02/10 17:01:14  tatiana
226 * AddGBQualEx() added
227 *
228 * Revision 6.4  1998/01/13 21:35:20  tatiana
229 *  AsnIoHash moved to asnio.c file
230 *
231 * Revision 6.3  1998/01/13 21:14:50  tatiana
232 * static AsnIoHash changed to AsnIoHash to avoid fubction name collision
233 *
234 * Revision 6.2  1997/12/15 15:53:29  tatiana
235 * features processing has been changed
236 *
237 * Revision 6.1  1997/09/16 15:41:49  kans
238 * added SEQFEAT_SITE case to GetNAFeatKey (TT)
239 *
240 * Revision 5.25  1997/07/28 19:03:59  vakatov
241 * [WIN32,MSVC++]  Restored lost "NCBIOBJ.LIB" pro-DLL modifications
242 *
243  * Revision 5.24  1997/07/28 14:26:11  vakatov
244  * BioseqGetGBDivCode() proto in-sync with its header-located declaration
245  *
246  * Revision 5.23  1997/07/24 23:57:41  tatiana
247  * fixed sfp_order
248  *
249  * Revision 5.22  1997/07/24 15:59:06  tatiana
250  * aaaaaaa bug fixed in Getscblknum
251  *
252  * Revision 5.21  1997/07/16 21:18:42  tatiana
253  *  added sorting by feat type in CompareSfpForHeap()
254  *
255  * Revision 5.20  1997/06/19 18:37:17  vakatov
256  * [WIN32,MSVC++]  Adopted for the "NCBIOBJ.LIB" DLL'ization
257  *
258  * Revision 5.19  1997/05/21 14:43:27  tatiana
259  * fix empty /product in GetNAFeatKey
260  *
261  * Revision 5.17  1997/01/13  22:33:04  tatiana
262  * added CompareGeneName()
263  *
264  * Revision 5.16  1996/12/17  22:47:56  tatiana
265  * added StoreFeatFree()
266  *
267  * Revision 5.15  1996/10/25  22:12:10  tatiana
268  * doesn't add empty ("") val if qual is translation
269  *
270  * Revision 5.14  1996/10/02  15:14:38  tatiana
271  * a bug fixed
272  *
273  * Revision 5.13  1996/10/01  22:42:09  tatiana
274  * fixed duplicated notes in NoteToCharPtrStack
275  *
276  * Revision 5.12  1996/09/09  13:36:02  kans
277  * moved BioseqGetGBDivCode from toasn.[ch] to asn2ff.h/asn2ff6.c
278  *
279  * Revision 5.11  1996/09/03  19:52:49  tatiana
280  * extra_loc added
281  *
282  * Revision 5.10  1996/08/28  21:40:35  tatiana
283  * don't copy new location from gather
284  *
285  * Revision 5.9  1996/08/16  20:34:45  tatiana
286  * GetNAFeatKey() changed
287  *
288  * Revision 5.7  1996/08/09  21:08:57  tatiana
289  * a bug fixed in GetNAFeatKey
290  *
291  * Revision 5.6  1996/07/30  16:35:05  tatiana
292  * Boolean new added to GetNaFeatKey()
293  *
294  * Revision 5.5  1996/07/19  21:38:15  tatiana
295  * ERR_GI_No_GI_Number changed from     ErrPostEx to    ErrPostStr
296  *
297  * Revision 5.3  1996/07/02  18:11:18  tatiana
298  * calculate hash in StoreFeat
299  *
300  * Revision 5.2  1996/06/14  18:05:03  tatiana
301  * GetNAFeatKey change
302  *
303  * Revision 5.1  1996/06/11  15:26:36  tatiana
304  * GetGINumber is modified to get also embl NI
305  *
306  * Revision 4.17  1996/05/16  21:00:52  tatiana
307  * RemoveRedundantFeats addded
308  *
309  * Revision 4.16  1996/04/29  18:51:42  tatiana
310  * whole_book format added
311  *
312  * Revision 4.15  1996/04/15  14:36:23  tatiana
313  * memory leaks cleaning
314  *
315  * Revision 4.13  1996/02/28  04:53:06  ostell
316  * changes to support segmented master seeuquences
317  *
318  * Revision 4.12  1996/02/15  15:54:51  tatiana
319  * minor clean ups
320  *
321  * Revision 4.11  1996/01/29  22:39:10  tatiana
322  * error posting MODULE
323  *
324  * Revision 4.10  1995/12/20  22:41:56  tatiana
325  * removed redundant functions
326  *
327  * Revision 4.9  1995/12/12  20:21:05  tatiana
328  * CitSub validation fixed
329  *
330  * Revision 4.8  1995/12/10  22:19:31  tatiana
331  * Imprint in CitSub became optional
332  *
333  * Revision 4.7  1995/11/17  21:28:35  kans
334  * asn2ff now uses gather (Tatiana)
335  *
336  * Revision 4.2  1995/08/04  15:26:42  tatiana
337  * bug fixed in GetPubDate (check for Null pointer).
338  *
339  * Revision 4.1  1995/08/01  14:53:08  tatiana
340  * change SeqIdPrint to SeqIdWrite
341  *
342  * Revision 1.57  1995/07/17  19:33:20  kans
343  * parameters combined into Asn2ffJobPtr structure
344 * ==========================================================================
345 */
346 
347 #include <asn2ff6.h>
348 #include <asn2ffp.h>
349 #include <a2ferrdf.h>
350 #include <asn2ffg.h>
351 #include <utilpub.h>
352 #include <ffprint.h>
353 #include <explore.h>
354 #include <sqnutils.h>
355 
356 #define BUF_EXT_LENGTH 4
357 
358 /*---------- order for other id FASTA_LONG (copied from SeqIdWrite) ------- */
359 
360 static Uint1 fasta_order[NUM_SEQID] = {  
361 33, /* 0 = not set */
362 20, /* 1 = local Object-id */
363 15,  /* 2 = gibbsq */
364 16,  /* 3 = gibbmt */
365 30, /* 4 = giim Giimport-id */
366 10, /* 5 = genbank */
367 10, /* 6 = embl */
368 10, /* 7 = pir */
369 10, /* 8 = swissprot */
370 15,  /* 9 = patent */
371 10, /* 10 = other TextSeqId */
372 20, /* 11 = general Dbtag */
373 32,  /* 12 = gi */
374 10, /* 13 = ddbj */
375 10, /* 14 = prf */
376 12, /* 15 = pdb */
377 10,  /* 16 = tpg */
378 10,  /* 17 = tpe */
379 10   /* 18 = tpd */
380 };
381 
382 
383 static Uint1 sfp_order[21] = {0, 
384 2, /* SEQFEAT_GENE */
385 5, /* SEQFEAT_ORG */
386 3, /* SEQFEAT_CDREGION */
387 5, /* SEQFEAT_PROT */
388 1, /* SEQFEAT_RNA */
389 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5};
390 
391 SeqFeatPtr MakeSyntheticSeqFeat PROTO ((void));
392 ValNodePtr LookForPubsOnFeat PROTO ((SeqFeatPtr sfp, ValNodePtr PubOnFeat));
393 
394 Boolean asn2ff_flags[13];
395 
396 NLM_EXTERN GeneStructPtr GeneStructNew (void)
397 {
398         GeneStructPtr gsp;
399         
400         gsp = (GeneStructPtr) MemNew(sizeof(GeneStruct));
401         gsp->gene = NULL;
402         gsp->product = NULL;
403         gsp->standard_name = NULL;
404         gsp->map = (CharPtr PNTR) MemNew(sizeof(CharPtr));
405         gsp->map_index = 0;
406         gsp->map_size = 1;
407         gsp->ECNum = NULL;
408         gsp->activity = NULL;
409         gsp->grp = NULL;
410 
411         return gsp;
412 }
413 
414 NLM_EXTERN NoteStructPtr NoteStructNew (NoteStructPtr nsp)
415 {
416         nsp = (NoteStructPtr) MemNew(sizeof(NoteStruct));
417 
418         nsp->note = (CharPtr PNTR) MemNew(5*sizeof(CharPtr));
419         nsp->note_annot = (CharPtr PNTR) MemNew(5*sizeof(CharPtr));
420         nsp->note_alloc = (Uint1 PNTR) MemNew(5*sizeof(Uint1));
421         nsp->note_index = 0;
422         nsp->note_size = 5;
423 
424         return nsp;
425 }
426 
427 static void GeneStructContentFree(GeneStructPtr gsp)
428 {
429         ValNodePtr v, vnext;
430         
431         if (gsp->gene) {
432                 if (gsp->gene->data.ptrvalue != NULL) {
433                         MemFree(gsp->gene->data.ptrvalue);
434                 }
435                 gsp->gene = ValNodeFree(gsp->gene);
436         }
437         for (v = gsp->product; v; v = vnext) {
438                 vnext = v->next;
439                 if (v->data.ptrvalue != NULL) {
440                         MemFree(v->data.ptrvalue);
441                 }
442                 MemFree(v);
443         }
444         for (v = gsp->standard_name; v != NULL; v = vnext) {
445                 vnext = v->next;
446                 if (v->data.ptrvalue != NULL) {
447                         MemFree(v->data.ptrvalue);
448                 }
449                 MemFree(v);
450         }
451         for (v = gsp->ECNum; v; v = vnext) {
452                 vnext = v->next;
453                 if (v->data.ptrvalue != NULL) {
454                         MemFree(v->data.ptrvalue);
455                 }
456                 MemFree(v);
457         }
458         for (v = gsp->activity; v; v = vnext) {
459                 vnext = v->next;
460                 if (v->data.ptrvalue != NULL) {
461                         MemFree(v->data.ptrvalue);
462                 }
463                 MemFree(v);
464         }
465         if (gsp->grp) {
466                 GeneRefFree(gsp->grp);
467         }
468         return;
469 }
470 
471 NLM_EXTERN void GeneStructFree (GeneStructPtr gsp)
472 {
473         if (gsp == NULL)
474                 return;
475         gsp->map = MemFree(gsp->map);
476         GeneStructContentFree(gsp);
477         MemFree(gsp);
478 }
479 
480 NLM_EXTERN void NoteStructFree (NoteStructPtr nsp)
481 {
482         Int2 index;
483 
484         if (nsp == NULL) {
485         return;
486         }
487         for (index=0; index < nsp->note_index; index++) {
488                         if (nsp->note_alloc[index] == ASN2FLAT_ALLOC)
489                                 nsp->note[index] = MemFree(nsp->note[index]);
490         }
491         nsp->note = MemFree(nsp->note);
492         nsp->note_annot = MemFree(nsp->note_annot);
493         nsp->note_alloc = MemFree(nsp->note_alloc);
494         MemFree(nsp);
495 }
496 
497 NLM_EXTERN void NoteStructReset (NoteStructPtr nsp)
498 {
499         Int2 index;
500 
501         if (nsp == NULL) {
502                 return;
503         }
504         for (index=0; index<nsp->note_index; index++)
505         {
506                 if (nsp->note_alloc[index] == ASN2FLAT_ALLOC)
507                         nsp->note[index] = MemFree(nsp->note[index]);
508                 nsp->note[index] = NULL;
509                 nsp->note_annot[index] = NULL;
510         }
511         nsp->note_index = 0;
512 }
513 
514 
515 NLM_EXTERN void ListFree (SeqFeatPtr PNTR PNTR List, Int4 range)
516 {
517         Int4 index;
518 
519         for (index=0; index < range; index++)
520                 MemFree(List[index]);
521 
522         MemFree(List);
523 }
524 
525 /***********************************************************************
526 *SeqFeatPtr MakeSyntheticSeqFeat(void) 
527 *
528 *       This function allocates a "synthetic" SeqFeatPtr, which is
529 *       used to print the SeqFeats out.  To print out SeqFeats, they
530 *       are copied to this "synthetic" sfp, which is an ImpFeat, 
531 *       adjusted, validated, and then a function prints out this ImpFeat.
532 *************************************************************************/
533 
534 NLM_EXTERN SeqFeatPtr MakeSyntheticSeqFeat(void) 
535 {
536         ImpFeatPtr ifp;
537         SeqFeatPtr sfp_out;
538 
539         sfp_out = SeqFeatNew();
540         if (sfp_out)
541         {
542                 sfp_out->data.choice = SEQFEAT_IMP;
543                 sfp_out->qual = NULL;
544                 ifp = sfp_out->data.value.ptrvalue = ImpFeatNew();
545         /*      ifp->key = (CharPtr) MemNew(20*sizeof(Char)); */
546 /* key may be more than 20 char one day and cause segmentation fault */
547                 ifp->key = NULL;
548                 ifp->loc = NULL;
549                 sfp_out->comment = NULL;
550                 sfp_out->location = NULL;
551                 sfp_out->product = NULL;
552         }
553 
554         return sfp_out;
555 }
556 
557 NLM_EXTERN void CpNoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string)
558 {
559         NoteToCharPtrStack(nsp, annot, string, ASN2FLAT_NOT_ALLOC);
560         return;
561 }
562 
563 NLM_EXTERN void SaveNoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string)
564 {
565         NoteToCharPtrStack(nsp, annot, string, ASN2FLAT_ALLOC);
566         return;
567 }
568 
569 
570 NLM_EXTERN void NoteToCharPtrStack (NoteStructPtr nsp, CharPtr annot, CharPtr string, Uint1 alloc)
571 {
572         Int2 index, note_size;
573 
574         if (nsp)
575         {
576                 note_size = nsp->note_size;
577                 index = nsp->note_index;
578         }
579         else
580                 return;
581 /*** add check for duplicated notes 9-27-96 ***/
582 /*      if (string[StringLen(string)-1] == '.')
583                 string[StringLen(string)-1] = '\0';
584         for (i = 0; i < note_size; i++) {
585                 if (nsp->note[i] && StringStr(nsp->note[i], string) != NULL) {
586                         return;
587                 }
588         }
589 */
590         if (index == note_size)
591                 EnlargeCharPtrStack(nsp, 5);
592 
593         nsp->note_annot[index] = annot;
594 
595         if (alloc == ASN2FLAT_NOT_ALLOC)
596         {
597                 nsp->note_alloc[index] = ASN2FLAT_NOT_ALLOC;
598                 nsp->note[index] = string;
599         }
600         else if (alloc == ASN2FLAT_ALLOC)
601         {
602                 nsp->note_alloc[index] = ASN2FLAT_ALLOC;
603                 nsp->note[index] = StringSave(string);
604         }
605 
606         nsp->note_index++;
607 
608         return;
609 }
610 
611 NLM_EXTERN void EnlargeCharPtrStack (NoteStructPtr nsp, Int2 enlarge)
612 {
613         CharPtr PNTR newstr;
614         CharPtr PNTR new_annot;
615         Int2 index;
616         Uint1 PNTR new_alloc;
617 
618         newstr = (CharPtr PNTR) MemNew((size_t) 
619                 ((enlarge+(nsp->note_size))*sizeof(CharPtr)));
620         new_annot = (CharPtr PNTR) MemNew((size_t) 
621                 ((enlarge+(nsp->note_size))*sizeof(CharPtr)));
622         new_alloc = (Uint1 PNTR) MemNew((size_t) 
623                 ((enlarge+(nsp->note_size))*sizeof(Uint1)));
624 
625         for (index=0; index<(nsp->note_size); index++) {
626                 newstr[index] = nsp->note[index];
627                 new_annot[index] = nsp->note_annot[index];
628                 new_alloc[index] = nsp->note_alloc[index];
629         }
630         nsp->note_size += enlarge;
631         nsp->note = MemFree(nsp->note);
632         nsp->note_annot = MemFree(nsp->note_annot);
633         nsp->note_alloc = MemFree(nsp->note_alloc);
634         nsp->note = newstr;
635         nsp->note_annot = new_annot;
636         nsp->note_alloc = new_alloc;
637 }
638 
639 NLM_EXTERN SortStructPtr EnlargeSortList(SortStructPtr List, Int4 size)
640 {
641         SortStructPtr NewList;
642 
643         if (size % 1024 == 0) {
644                 NewList = (SortStructPtr) MemNew((size+1024)*sizeof(SortStruct));
645                 if (size > 0) {
646                         MemCopy(NewList, List, (size * sizeof(SortStruct)));
647                         MemFree(List);
648                 }
649                 return NewList;
650         }
651         return List;
652 
653 }       /* EnlargeSortList */
654 
655 NLM_EXTERN int LIBCALLBACK CompareSfpForHeap (VoidPtr vp1, VoidPtr vp2)
656 {
657 
658         SortStructPtr sp1 = vp1;
659         SortStructPtr sp2 = vp2;
660         BioseqPtr bsp;
661         SeqFeatPtr sfp1, sfp2;
662         Int2 status = 0;
663 
664         bsp = sp1->bsp;
665         sfp1 = sp1->sfp;
666         sfp2 = sp2->sfp;
667         if (sfp1 == NULL || sfp2 == NULL) {
668                 return status;
669         }
670 
671         status = SeqLocOrder(sfp1->location, sfp2->location, bsp);
672 
673         if (ABS(status) >= 2 && sp1->seg_bsp) {
674                 status = SeqLocOrder(sfp1->location, sfp2->location, sp1->seg_bsp);
675         }
676         if (status == 0 && sfp1->data.choice < 6 && sfp2->data.choice < 6) {
677                 status = sfp_order[sfp1->data.choice] - sfp_order[sfp2->data.choice];
678         }
679         return status;
680 }
681 NLM_EXTERN int LIBCALLBACK CompareGeneName (VoidPtr vp1, VoidPtr vp2)
682 {
683 
684         SortStructPtr sp1 = vp1;
685         SortStructPtr sp2 = vp2;
686         SeqFeatPtr sfp1, sfp2;
687         GeneRefPtr gr1, gr2;
688         Int2 status = 0;
689 
690         sfp1 = sp1->sfp;
691         sfp2 = sp2->sfp;
692         if (sfp1 == NULL || sfp2 == NULL) {
693                 return status;
694         }
695         if (sfp1->data.choice != SEQFEAT_GENE)
696                 return status;
697         if (sfp2->data.choice != SEQFEAT_GENE)
698                 return status;
699         gr1 = (GeneRefPtr) sfp1->data.value.ptrvalue;   
700         gr2 = (GeneRefPtr) sfp2->data.value.ptrvalue;
701         if (gr1 == NULL || gr2 == NULL)
702                 return status;
703         status = StringCmp(gr1->locus, gr2->locus);
704 
705         return status;
706 }
707 
708 /**************************************************************************
709 *       This function returns the gi number
710 *       If no gi number is found, -1 is returned  and a warning is
711 *       issued.
712 *       06-10-96
713 *       This fubction is changed to void. It will find NCBI gi and embl ni
714 *       and fill up gbp structure
715 **************************************************************************/
716 
717 NLM_EXTERN void GetGINumber(GBEntryPtr gbp)
718 {
719         Boolean         found_gi;
720         ValNodePtr      vnp;
721         Int4            gi = -1;
722         CharPtr         ni = NULL;
723     DbtagPtr    dbtag;
724     ObjectIdPtr oid;
725 
726         found_gi = FALSE;
727         if (gbp == NULL)
728                 return;
729         if (gbp->bsp == NULL)
730                 return;
731         for (vnp=gbp->bsp->id; vnp; vnp=vnp->next) {
732                 if (vnp->choice == SEQID_GI) {
733                         gi = vnp->data.intvalue;
734                         if (gi != 0) {
735                                 found_gi = TRUE;
736                                 break;
737                         } else {
738                                 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
739                                         ErrPostEx(SEV_WARNING, ERR_GI_No_GI_Number, 
740                                                                         "Zero gi number: %d", gi);
741                                 }
742                         }
743                 } else if (vnp->choice == SEQID_GENERAL) {
744                         dbtag = vnp->data.ptrvalue;
745                         if (StringCmp(dbtag->db, "NID") == 0) {
746                                 oid = dbtag->tag;
747                                 if (oid->str) {
748                                         ni = StringSave(oid->str);
749                                 }
750                         }
751                 }
752         }
753         if (! found_gi) {
754                 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
755                         ErrPostStr(SEV_WARNING, ERR_GI_No_GI_Number, "");
756                 }
757                 gi = -1;
758         }
759         gbp->gi = gi;
760         gbp->ni = ni;
761         return;
762 }
763 
764 /***********************************************************************
765 *
766 *       GetGIs gets the GI's.
767 *
768 ************************************************************************/
769 NLM_EXTERN void GetGIs (Asn2ffJobPtr ajp)
770 {
771         GBEntryPtr gbp;
772                 
773         for (gbp = ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
774                 GetGINumber(gbp);
775         }
776         return;
777 }
778 
779 NLM_EXTERN SeqIdPtr GetProductSeqId(ValNodePtr product)
780 {
781         SeqIdPtr sip=NULL;
782         SeqIntPtr seq_int;
783         SeqLocPtr slp;
784 
785         if (product)
786         {
787                 if (product->choice == SEQLOC_WHOLE)
788                 {
789                         sip = (SeqIdPtr) product->data.ptrvalue;
790                 } 
791                 else if (product->choice == SEQLOC_INT)
792                 {
793                         seq_int = (SeqIntPtr) product->data.ptrvalue;
794                         sip = seq_int->id;
795                 }
796                 else if (product->choice == SEQLOC_EQUIV)
797                 {
798                         for (slp = (SeqLocPtr) product->data.ptrvalue; slp != NULL; slp = slp->next) {
799                                 sip = GetProductSeqId (slp);
800                                 if (sip != NULL) return sip;
801                         }
802                 }
803         }
804         return sip;
805 }
806 
807 /*****************************************************************************
808 *check_range
809 *
810 *       This function is called by the gbparse functions of Karl Sirotkin 
811 *       and determines if the length of a BioSeqPtr is sensible.
812 *       Pointer data is not used !! Tatiana !!
813 *       Tom Madden
814 *****************************************************************************/
815 
816 NLM_EXTERN Int4 check_range(Pointer data, SeqIdPtr seq_id)
817 
818 {
819         BioseqPtr bsp;
820 
821         bsp = BioseqFind(seq_id);
822         if (bsp)
823                 return bsp->length;
824         else
825                 return 0;
826 }       /* check_range */
827 
828 /****************************************************************************
829 *do_loc_errors
830 *
831 *       This function is called both by the gbparse functions of Karl Sirotkin
832 *       and by asn2ff.  If called by gbparse, error messages are stored in 
833 *       buffers and a flag is set; if called by asn2ff, the error messages
834 *       are retrieved and the flag reset.
835 *       
836 ***************************************************************************/
837 
838 NLM_EXTERN void do_loc_errors(CharPtr front, CharPtr details)
839 {
840         ErrPostEx(SEV_INFO, ERR_FEATURE_Bad_location, "%s: %s\n", front, details);
841 }
842 
843 /***************************************************************************
844 *do_no_loc_errors
845 *
846 *       Is used when no error messages are wanted.
847 ****************************************************************************/
848 
849 NLM_EXTERN void do_no_loc_errors(CharPtr front, CharPtr details)
850 {
851         return;
852 }
853 
854 /***************************************************************************
855 *Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
856 *
857 *This function check that a qual, that is to be added to the list of qual
858 *isn't already present.
859 ***************************************************************************/
860 NLM_EXTERN Boolean GBQualPresent(CharPtr ptr, GBQualPtr gbqual)
861 
862 {
863         Boolean present=FALSE;
864         GBQualPtr qual;
865 
866         for (qual=gbqual; qual; qual=qual->next)
867                 if (StringCmp(ptr, qual->qual) == 0)
868                 {
869                         present = TRUE;
870                         break;
871                 }
872 
873         return present;
874 }       /* GBQualPresent */
875 
876 /**********************************************************************
877 *Boolean GetNAFeatKey(CharPtr buffer, SeqFeatPtr sfp)
878 *
879 *       This function places the sfp "key" in buffer and returns TRUE
880 *       if successful, it returns FALSE if not successful.
881 *       This function only works for nucleic acid sequences, as the
882 *       keys (for corresponding numbers) are different for peptides.
883 ***********************************************************************/
884 
885 NLM_EXTERN Boolean GetNAFeatKey(Boolean is_new, CharPtr PNTR buffer, SeqFeatPtr sfp, SeqFeatPtr sfp_out)
886 {
887 
888         Boolean retval=TRUE;
889         ImpFeatPtr ifp;
890         RnaRefPtr rrp;
891         CharPtr str = NULL;
892         Int2 index;
893 
894 
895         switch (sfp->data.choice)
896         {
897         case SEQFEAT_GENE:      /* gene becomes misc_feat for purposes of CheckNAFeat */
898                 if (is_new) {
899                         *buffer = StringSave("gene");
900                 } else {
901                         *buffer = StringSave("misc_feature");
902                 }
903                 break;
904         case SEQFEAT_CDREGION:
905                 *buffer = StringSave("CDS");
906                 break;
907         case SEQFEAT_RNA:
908                 rrp = sfp->data.value.ptrvalue;
909                 /* the following code was taken (almost) directly from Karl
910                 Sirotkin's code.                                        */
911                 switch ( rrp -> type){ 
912                         case 1:
913                                 *buffer =StringSave("precursor_RNA");
914                                 break;
915                         case 2:
916                                 *buffer = StringSave("mRNA");
917                                 break;
918                         case 3:
919                                 *buffer = StringSave("tRNA");
920                                 break;
921                         case 4:
922                                 *buffer = StringSave("rRNA");
923                                 break;
924                         case 5:
925                                 *buffer = StringSave("snRNA");
926                                 break;
927                         case 6:
928                                 *buffer = StringSave("scRNA");
929                                 break;
930                         case 7:
931                                 *buffer = StringSave("snoRNA"); /* snoRNA */
932                                 break;
933                         case 255:
934                                 *buffer = StringSave("misc_RNA");
935                                 break;
936                 }
937                 switch ( rrp -> type){ 
938                         case 2:
939                         case 4:
940                         case 5:
941                         case 6:
942                         case 7:
943                         case 255:
944                                 if (rrp ->ext.choice == 1 && sfp_out) {
945                                          str = rrp->ext.value.ptrvalue;
946                                          if (str != NULL && *str != '\0') {
947                                                 index = GBFeatKeyNameValid(buffer, FALSE);
948                                                 if (GBQualValidToAdd(index, "product")) {
949                                                         sfp_out->qual = AddGBQual(sfp_out->qual, 
950                                                                         "product", str);
951                                                 }
952                                         }
953                                 }
954                                 break;
955                 }
956                 break;
957         case SEQFEAT_IMP:
958                 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
959                 *buffer = StringSave(ifp->key); 
960                 break;
961         case SEQFEAT_SEQ:
962         case SEQFEAT_SITE:
963         case SEQFEAT_REGION:
964         case SEQFEAT_COMMENT:
965                 *buffer = StringSave("misc_feature");
966                 break;
967         case SEQFEAT_BIOSRC:
968                 *buffer = StringSave("source");
969                 break;
970         case SEQFEAT_RSITE:
971         default:
972                 retval = FALSE; 
973                 break;
974         }
975 
976         return retval;
977 }       /* GetNAFeatKey */
978 
979 /**************************************************************************
980 *SeqIdPtr CheckXrefFeat (BioseqPtr bsp, SeqFeatPtr sfp)
981 *
982 *       First the location of the xref is checked to see if it overlaps 
983 *       the sequence.  If this feature has a xref that is NOT of type 
984 *       genbank, embl, or ddbj, it is put out as a misc_feat.  If it's 
985 *       one of genbank, embl, or ddbj, it has been put out as a second 
986 *       accession.  If the feature should be put out as a misc_feat, then
987 *       the SeqIdPtr (xid) is returned, otherwise NULL.
988 **************************************************************************/
989 
990 NLM_EXTERN SeqIdPtr CheckXrefFeat (BioseqPtr bsp, SeqFeatPtr sfp)
991 {
992 
993         SeqIdPtr xid=NULL;
994         SeqIntPtr si;
995         SeqLocPtr xref;
996         ValNodePtr location;
997                         
998         location = ValNodeNew(NULL);
999         si = SeqIntNew();
1000         location->choice = SEQLOC_INT;
1001         location->data.ptrvalue = si;
1002         si->from = 0;
1003         si->to = bsp->length - 1;
1004         si->id = bsp->id;       /* Don't delete id!! */
1005         if (SeqLocCompare(sfp->location, location) != 0)
1006         {
1007                 xref = (SeqLocPtr) sfp->data.value.ptrvalue;
1008                 xid = (SeqIdPtr) xref->data.ptrvalue;
1009                 if (xid->choice != 5 && xid->choice != 6 && xid->choice != 13)
1010                         ;
1011                 else
1012                         xid = NULL;
1013         }
1014         si->id = NULL;
1015         SeqIntFree(si);
1016         ValNodeFree(location);
1017         return xid;
1018 }
1019 
1020 NLM_EXTERN Int4 GetGINumFromSip (SeqIdPtr sip)
1021 {
1022         Int4 gi = -1;
1023         ValNodePtr vnp;
1024 
1025         for (vnp=sip; vnp; vnp=vnp->next)
1026                 if (vnp->choice == SEQID_GI)
1027                         gi = vnp->data.intvalue;
1028 
1029         return gi;
1030 }
1031 
1032 /*****************************************************************************
1033 *FlatRefBest
1034 *
1035 *       returns ValNodePtr to best (for FlatFile production) pub in a equiv set
1036 *****************************************************************************/
1037 NLM_EXTERN ValNodePtr FlatRefBest(ValNodePtr equiv, Boolean error_msgs, Boolean anything)
1038 {
1039         ValNodePtr the_pub, retval = NULL, newpub;
1040         CitBookPtr cb;
1041         CitSubPtr cs;
1042         CitGenPtr cg;   
1043         CitArtPtr ca;
1044         MedlineEntryPtr ml;
1045         CitJourPtr jp;
1046         ImprintPtr ip;
1047         Boolean good_one;
1048         Int1 bad_one= 0;
1049         CharPtr str_ret;
1050 
1051         if (equiv->choice == PUB_Equiv) {
1052                 newpub = equiv->data.ptrvalue;
1053         } else {
1054                 newpub = equiv;
1055         }
1056         for (the_pub = newpub, good_one = FALSE; the_pub && ! good_one
1057                         ; the_pub = the_pub -> next) {
1058 
1059                 switch ( the_pub -> choice) {
1060 
1061       case PUB_Sub:
1062          cs = (CitSubPtr) the_pub -> data.ptrvalue;
1063          if (cs) {
1064                         if ( cs -> imp){
1065                                 ip = cs -> imp;
1066                                 if ( ip -> date) {
1067                                         retval = the_pub;
1068                                         good_one = TRUE; /* good for submitted */
1069                                 }
1070                         } else if (cs->date) {
1071                                         retval = the_pub;
1072                                         good_one = TRUE; /* good for submitted */
1073                         }
1074                 }
1075          break;
1076                 case PUB_Man:
1077                 case PUB_Book:
1078                         cb = (CitBookPtr) the_pub -> data.ptrvalue;
1079                         if ( cb -> imp) {
1080                                 ip = cb -> imp;
1081                                 if ( ip -> date) {
1082                                         retval = the_pub;
1083                                         good_one = TRUE; /* good for thesis or book */
1084                                 }
1085                         }
1086                 break;
1087                 case PUB_Patent:
1088                         retval = the_pub;
1089                         good_one = TRUE; /* might exclude later...*/
1090                 break;
1091                 case PUB_Article:
1092                 case PUB_Medline:
1093                         if ( the_pub -> choice == PUB_Medline) {
1094                                 ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1095                                 ca = (CitArtPtr) ml -> cit;
1096 
1097                         } else {
1098                                 ca = (CitArtPtr) the_pub -> data.ptrvalue;
1099                         }
1100                 if (ca -> fromptr) {
1101                         if (ca -> from ==1) {
1102                                 jp = (CitJourPtr) ca -> fromptr;
1103                                 if ( jp -> imp) {
1104                                         ip = jp -> imp;
1105                                         if ( ip -> date) {
1106                                                 retval = the_pub;
1107                                                 good_one = TRUE; /* good as it gets */
1108                                         }
1109                                 }
1110                         } else {
1111                                 CitBookPtr book = (CitBookPtr) ca -> fromptr;
1112                                         if ( book -> imp) {
1113                                                 ip = book -> imp;
1114                                                 if ( ip -> date) {
1115                                                         retval = the_pub;
1116                                                         good_one = TRUE; /* good for book */
1117                                                 }
1118                                         }
1119                                 
1120                         }
1121                 }
1122                         break;
1123                 case PUB_Gen: 
1124                         cg = (CitGenPtr) the_pub -> data.ptrvalue;
1125                         if (cg -> cit) {
1126                                 str_ret = NULL;
1127                                 str_ret = StrStr(cg -> cit ,"Journal=\"");
1128                                 if ((str_ret) || (cg->title) || (cg->journal) || (cg->date)) {
1129                                         retval = the_pub;  /*unless something better */
1130                                 } else {
1131                                         if (StringNICmp("unpublished", cg->cit, 11) == 0)
1132                                                 retval = the_pub;
1133                                         else if (StringNICmp("to be published", cg->cit, 15) == 0)
1134                                                 retval = the_pub;
1135                                         else if (StringNICmp("in press", cg->cit, 8) == 0)
1136                                                 retval = the_pub;
1137                                         else if (StringNICmp("submitted", cg->cit, 8) == 0)
1138                                                 retval = the_pub;
1139                                 }
1140                         } else if (cg -> journal) {
1141                                 retval = the_pub;  /*unless something better */
1142                         }
1143 
1144                         break;
1145                 case PUB_Proc:
1146                         bad_one = the_pub -> choice;
1147                         break;
1148                 }
1149         }
1150 
1151         if (! retval && anything) {
1152            for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1153                 if (the_pub->choice == PUB_Muid)
1154                         retval = the_pub;
1155            }
1156            if (! retval) /* Take anything left over now and hope for the best */
1157                 retval = newpub;
1158         }
1159 
1160         if ( ! retval && bad_one != 0) {
1161                 if (error_msgs == TRUE)
1162                         ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference,
1163                         "FlatRefBest: Unimplemented pub type = %d", bad_one);
1164         }
1165         
1166         return retval;
1167 }       /* FlatRefBest */
1168 
1169 NLM_EXTERN Int4 StoreFeatTemp(SortStruct PNTR List, SeqFeatPtr sfp,
1170 Int4 currentsize, BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt,
1171 Boolean temp)
1172 {
1173         SeqLocPtr PNTR slpp = NULL;
1174         
1175         List[currentsize].entityID = entityID;
1176         List[currentsize].itemID = itemID;
1177         List[currentsize].itemtype = itemtype;
1178         List[currentsize].sfp = sfp;
1179         List[currentsize].bsp = bsp;
1180         List[currentsize].seg_bsp = seg;
1181         List[currentsize].dup = FALSE;
1182         List[currentsize].hash = AsnIoHash(sfp, 
1183                                                 (AsnWriteFunc) SeqFeatAsnWrite);
1184         List[currentsize].slp = slp;
1185         if (extra_loc_cnt > 0) {
1186                 slpp = MemNew(extra_loc_cnt*(sizeof(SeqLocPtr)));
1187                 MemCpy(slpp, extra_loc, extra_loc_cnt*(sizeof(SeqLocPtr)));
1188         } 
1189         List[currentsize].extra_loc = slpp;
1190         List[currentsize].extra_loc_cnt = extra_loc_cnt;
1191         List[currentsize].tempload = temp;
1192         List[currentsize].gsp = NULL;
1193         List[currentsize].nsp = NoteStructNew(List[currentsize].nsp);
1194 
1195         currentsize++;
1196 
1197         return currentsize;
1198 }
1199 
1200 NLM_EXTERN Int4 StoreFeat(SortStruct PNTR List, SeqFeatPtr sfp, Int4 currentsize, 
1201 BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,
1202 SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt)
1203 {
1204         return StoreFeatFree(List, sfp, currentsize, bsp, seg, entityID, itemID, 
1205                                         itemtype,slp, extra_loc, extra_loc_cnt, FALSE);
1206 }
1207 
1208 NLM_EXTERN Int4 StoreFeatFree(SortStruct PNTR List, SeqFeatPtr sfp, Int4 currentsize, 
1209 BioseqPtr bsp, BioseqPtr seg, Uint2 entityID, Uint4 itemID, Uint2 itemtype,
1210 SeqLocPtr slp, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt, Boolean feat_free)
1211 {
1212         SeqLocPtr PNTR slpp = NULL;
1213         
1214         List[currentsize].entityID = entityID;
1215         List[currentsize].itemID = itemID;
1216         List[currentsize].itemtype = itemtype;
1217         List[currentsize].sfp = sfp;
1218         List[currentsize].bsp = bsp;
1219         List[currentsize].seg_bsp = seg;
1220         List[currentsize].dup = FALSE;
1221         List[currentsize].hash = AsnIoHash(sfp, 
1222                                                 (AsnWriteFunc) SeqFeatAsnWrite);
1223         List[currentsize].slp = slp;
1224         if (extra_loc_cnt > 0) {
1225                 slpp = MemNew(extra_loc_cnt*(sizeof(SeqLocPtr)));
1226                 MemCpy(slpp, extra_loc, extra_loc_cnt*(sizeof(SeqLocPtr)));
1227         } 
1228         List[currentsize].extra_loc = slpp;
1229         List[currentsize].extra_loc_cnt = extra_loc_cnt;
1230         List[currentsize].feat_free = feat_free;
1231         List[currentsize].gsp = NULL;
1232         List[currentsize].nsp = NoteStructNew(List[currentsize].nsp);
1233 
1234         currentsize++;
1235 
1236         return currentsize;
1237 }
1238 /****************************************************************************
1239 *CharPtr Cat2Strings (CharPtr string1, CharPtr string2, CharPtr separator, Int2 num)
1240 *
1241 * Concatenates two strings (string1 and string2) and separates them by a
1242 * "separator".  If num>0, takes num spaces off the end of string1 on
1243 * concatenation; if num<0 takes all spaces off the end of the complete 
1244 * string.
1245 *****************************************************************************/
1246 NLM_EXTERN CharPtr Cat2Strings (CharPtr string1, CharPtr string2, CharPtr separator, Int2 num)
1247 
1248 {
1249         Boolean no_space=FALSE;
1250         Int4 length1=0, length2=0, length_sep=0, length_total;
1251         CharPtr newstring=NULL;
1252 
1253         if (num < 0)
1254         {
1255                 num=0;
1256                 no_space=TRUE;
1257         }
1258 
1259         if (string1 != NULL)
1260                 length1 = StringLen(string1);
1261         if (string2 != NULL)
1262                 length2 = StringLen(string2);
1263         if (separator != NULL)
1264                 length_sep = StringLen(separator);
1265 
1266         length_total = length1+length2+length_sep-num+1;
1267 
1268         newstring = (CharPtr) MemNew(length_total*sizeof(Char));
1269 
1270         if (string1 != NULL)    
1271                 newstring = StringCat(newstring, string1);
1272         if ((length1-num) >= 0)
1273                 newstring[length1-num] = '\0';
1274         if (no_space && length1 > 0)
1275                 while (length1 > 0 && newstring[length1-1] == ' ')
1276                 {
1277                         newstring[length1-1] = '\0';
1278                         length1--;
1279                 }
1280         if (separator != NULL)  
1281                 newstring = StringCat(newstring, separator);
1282         if (string2 != NULL)    
1283                 newstring = StringCat(newstring, string2);
1284 
1285 
1286         return newstring;
1287 }
1288 
1289 NLM_EXTERN GBQualPtr AddGBQualEx (CharPtr PNTR key, GBQualPtr gbqual, CharPtr qual, CharPtr val)
1290 {
1291         Int2 index;
1292         
1293         index = GBFeatKeyNameValid(key, FALSE);
1294         if (GBQualValidToAdd(index,qual)) {
1295                 return AddGBQual(gbqual, qual, val);
1296         }
1297         return gbqual;
1298 }
1299 
1300 /************************************************************************
1301 *AddGBQual
1302 *
1303 *       This function makes a new GBQual and adds a "val" and a
1304 *       a "qual".
1305 *   doesn't add qual if it's already there /tatiana/
1306 *       doesn't add empty ("") val if qual is translation
1307 ***********************************************************************/
1308 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
1309 static Boolean IsEllipsis (
1310   CharPtr str
1311 )
1312 
1313 {
1314   size_t   len;
1315   CharPtr  ptr;
1316 
1317   if (StringHasNoText (str)) return FALSE;
1318   len = StringLen (str);
1319   if (len < 3) return FALSE;
1320   ptr = str + len - 3;
1321   return (Boolean) (ptr [0] == '.' && ptr [1] == '.' && ptr [2] == '.');
1322 }
1323 #endif
1324 
1325 NLM_EXTERN GBQualPtr AddGBQual (GBQualPtr gbqual, CharPtr qual, CharPtr val)
1326 {
1327         GBQualPtr curq, note = NULL;
1328 
1329         if (StringCmp(qual, "translation") == 0) {
1330                 if (val == NULL) 
1331                         return gbqual;
1332                 if (*val == '\0')
1333                         return gbqual;
1334         }
1335         if (gbqual) {
1336                 if (CheckForQual(gbqual, qual, val) == 1) {
1337                         return gbqual;
1338                 }
1339                 for (curq=gbqual; curq->next != NULL; curq=curq->next)
1340                         continue;
1341                 curq->next = GBQualNew();
1342                 curq = curq->next;
1343                 if (val)
1344                         curq->val = StringSave(val);
1345                 curq->qual = StringSave(qual);
1346                 note = curq;
1347         } else {
1348                 gbqual = GBQualNew();
1349                 gbqual->next = NULL;
1350                 if (val)
1351                         gbqual->val = StringSave(val);
1352                 gbqual->qual = StringSave(qual);
1353                 note = gbqual;
1354         }
1355 
1356 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
1357         if (note != NULL && StringICmp (qual, "note") == 0) {
1358                 size_t len;
1359                 CharPtr p, q;
1360                 len = StringLen (note->val);
1361                 if (len > 0 && note->val [len - 1] == '~') {
1362                         note->val [len - 1] = '\0';
1363                 }
1364                 if (! IsEllipsis (note->val)) {
1365                         len = StringLen (note->val);
1366                         if (len > 0 && note->val [len - 1] == '.') {
1367                                 note->val [len - 1] = '\0';
1368                                 if (len > 1 && note->val [len - 2] == '.') {
1369                                         note->val [len - 2] = '\0';
1370                                 }
1371                         }
1372                 }
1373                 TrimSpacesAndJunkFromEnds (note->val,TRUE);
1374                 TrimSpacesAndSemicolons (note->val);
1375                 p = note->val;
1376                 q = note->val;
1377                 while (*p) {
1378                   if (*p == ';' && p [1] == ' ' && p [2] == ';') {
1379                     p += 2;
1380                   } else {
1381                     *q = *p;
1382                     p++;
1383                     q++;
1384                   }
1385                 }
1386                 *q = '\0';
1387         }
1388 #endif
1389 
1390         return gbqual;
1391 }
1392 
1393 /****************************************************************************
1394 *       Int2 CheckForQual(GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
1395 *
1396 *       Compares string (a potential gbqual->val) against all gbquals.
1397 *       If a match is found, "1" is returned; if not "0".
1398 ****************************************************************************/
1399 
1400 NLM_EXTERN Int2 CheckForQual (GBQualPtr gbqual, CharPtr string_q, CharPtr string_v)
1401 {
1402         GBQualPtr curq;
1403 
1404         for (curq=gbqual; curq; curq=curq->next) {
1405                 if (StringCmp(string_q, curq->qual) == 0) {
1406                         if (curq->val == NULL) {
1407                                 curq->val = StringSave(string_v);
1408                                 return 1;
1409                         } 
1410                         if (StringCmp(string_v, curq->val) == 0) {
1411                                 return 1;
1412                         }
1413                 }
1414         }
1415         return 0;
1416 }
1417 
1418 
1419 /****************************************************************************
1420 *
1421 *       MakeAnAccession is for last ditch efforts to get an accession
1422 *       after all the normal things have failed.
1423 *
1424 ****************************************************************************/
1425 
1426 NLM_EXTERN CharPtr MakeAnAccession (CharPtr new_buf, SeqIdPtr seq_id, Int2 buflen)
1427 {
1428         SeqIdPtr new_id;
1429 
1430         new_id = SeqIdFindBest(seq_id, SEQID_GENBANK);
1431         SeqIdWrite(new_id, new_buf, PRINTID_TEXTID_ACCESSION, buflen);
1432         return new_buf;
1433 
1434 }
1435 
1436 NLM_EXTERN CharPtr GetGBSourceLine (GBBlockPtr gb)
1437 {
1438         CharPtr source = NULL;
1439 
1440         if(gb && gb->source)
1441                 source = StringSave(gb->source);
1442 
1443 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
1444         if (source != NULL) {
1445                 TrimSpacesAndJunkFromEnds (source,TRUE);
1446         }
1447 #endif
1448         return source;
1449 }
1450 
1451 NLM_EXTERN CharPtr FlatOrganelle(Asn2ffJobPtr ajp, GBEntryPtr gbp)
1452 {
1453         CharPtr retval = NULL;
1454         ValNodePtr man, vnp=NULL;
1455         static char * organelle_names [] = {
1456                  "Mitochondrion " ,
1457     "Chloroplast " ,
1458     "Kinetoplast ",
1459     "Cyanelle "};
1460         BioSourcePtr biosp=NULL;
1461 /*      
1462         static CharPtr genome[] = {
1463         NULL, NULL, "Chloroplast ", "Chromoplast ", "Kinetoplast ", "Mitochondrion ", "Plastid ", "Macronuclear ", "Extrachrom ", "Plasmid ", NULL, NULL, "Cyanelle ", "Proviral ", "Virion ", "Nucleomorph ", "Apicoplast ", "Leucoplast ", "Proplastid "};
1464 */      
1465         static CharPtr genome[] = {
1466         NULL, NULL, "Chloroplast ", "Chromoplast ", "Kinetoplast ", "Mitochondrion ", "Plastid ", NULL, NULL, NULL, NULL, NULL, "Cyanelle ", NULL, NULL, "Nucleomorph ", "Apicoplast ", "Leucoplast ", "Proplastid ", NULL};
1467         
1468 /* try new first */
1469         if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_source)) != NULL) 
1470         {
1471                 biosp = vnp->data.ptrvalue;
1472         /*      if (biosp->genome < 6 || biosp->genome > 12)*/
1473                         retval = StringSave(genome[biosp->genome]);
1474         }
1475 /* old next */
1476         if (biosp == NULL) {
1477                 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_modif)) != NULL) 
1478                 {
1479                         for (man = (ValNodePtr) vnp-> data.ptrvalue; man; man = man -> next)
1480                         {
1481                                 switch (man -> data.intvalue){
1482                                         case 4: case 5: case 6: case 7:
1483                                         if (! retval )
1484                                                 retval = StringSave(organelle_names
1485                                                                 [man->data.intvalue-4]);
1486                                                 break;
1487                                         default:
1488                                                 break;
1489                                         }
1490                         }
1491                 }
1492         }
1493         return retval;
1494 }
1495 
1496 NLM_EXTERN Int4 GetNumOfSeqBlks (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1497 {
1498         Int4 length, num_of_seqblks;
1499 
1500         length = BioseqGetLen(gbp->bsp);
1501                 if (ajp->slp) {
1502                         length = SeqLocLen(ajp->slp);
1503                 }
1504         num_of_seqblks = ROUNDUP(length, SEQ_BLK_SIZE)/SEQ_BLK_SIZE;
1505 
1506         return num_of_seqblks;
1507 }
1508 
1509 
1510 /*************************************************************************
1511 *       New asn.1 spec - division is in Orgname.div
1512 *       check MolInfo.tech 
1513 *       check GBBlock for PAT or SYN
1514 *       get division from Orgname.div (in BioSource)
1515 *       09-05-96
1516 *************************************************************************/
1517 
1518 static void IndexedGetDescrForDiv (BioseqPtr bsp, DivStructPtr PNTR dspp)
1519 
1520 {
1521         SeqMgrDescContext context;
1522         ValNodePtr tmp;
1523         DivStructPtr    dsp;
1524         BioSourcePtr bsr;
1525         MolInfoPtr mol;
1526         CharPtr gb_div=NULL;
1527         GBBlockPtr gb;
1528 
1529         dsp = *dspp;
1530         tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &context);
1531                         if (tmp != NULL) {
1532                                 if (tmp->data.ptrvalue != NULL) {
1533                                         mol = (MolInfoPtr) tmp->data.ptrvalue;
1534                                         if (mol->tech != 0) {
1535                                                 if (dsp->tech == 0) {
1536                                                         dsp->tech = mol->tech;
1537                                                 } else if (mol->tech != dsp->tech) {
1538                                                         dsp->was_err = TRUE;
1539                                                         if (dsp->err_post) {
1540                                                                 ErrPostEx(SEV_WARNING, 0, 0, 
1541                                                                 "Different Molinfo in one entry: %d|%d", 
1542                                                                         mol->tech, dsp->tech);
1543                                                         }
1544                                                         dsp->tech = mol->tech;
1545                                                 }
1546                                                 dsp->techID = context.itemID;
1547                                                 dsp->techtype = OBJ_SEQDESC;
1548                                                 *dspp = dsp;
1549                                         }
1550                                 }
1551                         }
1552 
1553         tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
1554                         while (tmp != NULL && dsp->orgdiv == NULL) {
1555                                 bsr = (BioSourcePtr) tmp->data.ptrvalue;
1556                                 if (bsr && bsr->org) {
1557                                         if (bsr->org->orgname && bsr->org->orgname->div) {
1558                                                 gb_div = bsr->org->orgname->div;
1559                                                 if (dsp->orgdiv == NULL) {
1560                                                         dsp->orgdiv = gb_div;
1561                                                 } else if (StringCmp(gb_div, dsp->orgdiv) != 0) {
1562                                                         dsp->was_err = TRUE;
1563                                                         if (dsp->err_post) {
1564                                                                 ErrPostEx(SEV_WARNING, 0, 0, 
1565                                                         "Different Taxonomy divisions in one entry: %s|%s", 
1566                                                                         gb_div, dsp->orgdiv);
1567                                                         }
1568                                                         dsp->orgdiv = gb_div;
1569                                                 }
1570                                                 dsp->biosrc = bsr;
1571                                                 dsp->orgID = context.itemID;
1572                                                 dsp->orgtype = OBJ_SEQDESC;
1573                                                 *dspp = dsp;
1574                                         }
1575                                 }
1576                                 tmp = SeqMgrGetNextDescriptor (bsp, tmp, Seq_descr_source, &context);
1577                         }
1578 
1579         tmp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &context);
1580                         if (tmp != NULL) {
1581                                 gb = (GBBlockPtr) tmp->data.ptrvalue;
1582                                 if (gb->div) {
1583                                         gb_div = gb->div;
1584                                         if (dsp->gbdiv == NULL) {
1585                                                 dsp->gbdiv = gb_div;
1586                                         } else if (StringCmp(gb_div, dsp->gbdiv) != 0) {
1587                                                 dsp->was_err = TRUE;
1588                                                 if (dsp->err_post) {
1589                                                         ErrPostEx(SEV_WARNING, 0, 0, 
1590                                                         "Different GBBlock divisions in one entry: %s|%s", 
1591                                                                 gb_div, dsp->gbdiv);
1592                                                 }
1593                                                 dsp->gbdiv = gb_div;
1594                                         }
1595                                         dsp->gbID = context.itemID;
1596                                         dsp->gbtype = OBJ_SEQDESC;
1597                                         *dspp = dsp;
1598                                 }
1599                         }
1600 }
1601 
1602 static Boolean GetDescrForDiv (GatherContextPtr gcp)
1603 {
1604 /* find only one (closest to the target!) vnp with given choice */
1605         ValNodePtr      tmp;
1606         DivStructPtr    PNTR dspp;
1607         DivStructPtr    dsp;
1608         BioSourcePtr bsr;
1609         MolInfoPtr mol;
1610         CharPtr gb_div=NULL;
1611         GBBlockPtr gb;
1612         
1613         dspp = gcp->userdata;
1614         dsp = *dspp;
1615         switch (gcp->thistype)
1616         {
1617                 case OBJ_SEQDESC:
1618                         tmp = (ValNodePtr) (gcp->thisitem);
1619                         if (tmp->choice == Seq_descr_molinfo) {
1620                                 if (tmp->data.ptrvalue != NULL) {
1621                                         mol = (MolInfoPtr) tmp->data.ptrvalue;
1622                                         if (mol->tech != 0) {
1623                                                 if (dsp->tech == 0) {
1624                                                         dsp->tech = mol->tech;
1625                                                 } else if (mol->tech != dsp->tech) {
1626                                                         dsp->was_err = TRUE;
1627                                                         if (dsp->err_post) {
1628                                                                 ErrPostEx(SEV_WARNING, 0, 0, 
1629                                                                 "Different Molinfo in one entry: %d|%d", 
1630                                                                         mol->tech, dsp->tech);
1631                                                         }
1632                                                         dsp->tech = mol->tech;
1633                                                 }
1634                                                 dsp->techID = gcp->itemID;
1635                                                 dsp->techtype = gcp->thistype;
1636                                                 *dspp = dsp;
1637                                         }
1638                                 }
1639                         } else if (tmp->choice == Seq_descr_source) {
1640                                 bsr = (BioSourcePtr) tmp->data.ptrvalue;
1641                                 if (bsr && bsr->org) {
1642                                         if (bsr->org->orgname && bsr->org->orgname->div) {
1643                                                 gb_div = bsr->org->orgname->div;
1644                                                 if (dsp->orgdiv == NULL) {
1645                                                         dsp->orgdiv = gb_div;
1646                                                 } else if (StringCmp(gb_div, dsp->orgdiv) != 0) {
1647                                                         dsp->was_err = TRUE;
1648                                                         if (dsp->err_post) {
1649                                                                 ErrPostEx(SEV_WARNING, 0, 0, 
1650                                                         "Different Taxonomy divisions in one entry: %s|%s", 
1651                                                                         gb_div, dsp->orgdiv);
1652                                                         }
1653                                                         dsp->orgdiv = gb_div;
1654                                                 }
1655                                                 dsp->biosrc = bsr;
1656                                                 dsp->orgID = gcp->itemID;
1657                                                 dsp->orgtype = gcp->thistype;
1658                                                 *dspp = dsp;
1659                                         }
1660                                 }
1661                         } else if (tmp->choice == Seq_descr_genbank) {
1662                                 gb = (GBBlockPtr) tmp->data.ptrvalue;
1663                                 if (gb->div) {
1664                                         gb_div = gb->div;
1665                                         if (dsp->gbdiv == NULL) {
1666                                                 dsp->gbdiv = gb_div;
1667                                         } else if (StringCmp(gb_div, dsp->gbdiv) != 0) {
1668                                                 dsp->was_err = TRUE;
1669                                                 if (dsp->err_post) {
1670                                                         ErrPostEx(SEV_WARNING, 0, 0, 
1671                                                         "Different GBBlock divisions in one entry: %s|%s", 
1672                                                                 gb_div, dsp->gbdiv);
1673                                                 }
1674                                                 dsp->gbdiv = gb_div;
1675                                         }
1676                                         dsp->gbID = gcp->itemID;
1677                                         dsp->gbtype = gcp->thistype;
1678                                         *dspp = dsp;
1679                                 }
1680                         }                       
1681                         break;
1682                 default:
1683                         break;
1684         }
1685         return TRUE;
1686 }
1687 
1688 /**************************************************************************
1689 *       0 - nothing found
1690 *       1 - return division code OK
1691 *       2 - return division code but errors were found
1692 **************************************************************************/
1693 static Int2 BioseqGetGBDivCodeEx (BioseqPtr bsp, CharPtr buf, Int2 buflen, Boolean err_post, Boolean useFeatureIndexing)
1694 {
1695         GatherScope gsc;
1696         SeqLocPtr slp = NULL;
1697         Uint2 bspID;
1698         DivStructPtr dsp;
1699         BioSourcePtr bsr = NULL;
1700         Int2 tech, /*UNUSED*/diff, retval = 0;
1701         CharPtr orgdiv, gbdiv;
1702         SeqIdPtr sip;
1703 
1704         if (buf == NULL)
1705                 return 0;
1706         *buf = '\0';
1707 /* check for Patent SeqId  */
1708         for (sip = bsp->id; sip; sip=sip->next) {
1709                 if (sip->choice == SEQID_PATENT) {
1710                         diff = LabelCopy(buf, "PAT", buflen);
1711                         return 1;
1712                 }
1713         }
1714         bspID = ObjMgrGetEntityIDForPointer(bsp);
1715         dsp = MemNew(sizeof(DivStruct));
1716         dsp->err_post = err_post;
1717         dsp->entityID = bspID;
1718         dsp->tech = 0;
1719         dsp->gbdiv = NULL;
1720         dsp->orgdiv = NULL;
1721         dsp->biosrc = NULL;
1722         dsp->was_err = FALSE;
1723         MemSet ((Pointer) (&gsc), 0, sizeof (GatherScope));
1724         MemSet ((Pointer) (gsc.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
1725         gsc.ignore[OBJ_SEQDESC] = FALSE;
1726         slp = ValNodeNew(NULL);
1727         slp->choice = SEQLOC_WHOLE;
1728         slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
1729         gsc.target = slp;
1730 
1731         if (useFeatureIndexing) {
1732                 IndexedGetDescrForDiv (bsp, &dsp);
1733         } else {
1734                 GatherEntity(bspID, &dsp, GetDescrForDiv, &gsc);
1735         }
1736         
1737         SeqLocFree(slp);
1738         orgdiv = dsp->orgdiv;
1739         gbdiv = dsp->gbdiv;
1740         tech = dsp->tech;
1741         bsr = dsp->biosrc;
1742         if (dsp->was_err) {
1743                 retval = 2;
1744         } else {
1745                 retval = 1;
1746         }
1747         MemFree(dsp);
1748         switch (tech) {
1749                 case MI_TECH_est: 
1750                         diff = LabelCopy(buf, "EST", buflen);
1751                 break;
1752                 case MI_TECH_sts:  /* Sequence Tagged Site */
1753                         diff = LabelCopy(buf, "STS", buflen);
1754                 break;
1755                 case MI_TECH_survey:
1756                         diff = LabelCopy(buf, "GSS", buflen);
1757                 break;
1758                 case MI_TECH_htc:
1759                         diff = LabelCopy(buf, "HTC", buflen);
1760                 break;
1761                 case MI_TECH_htgs_0:
1762                 case MI_TECH_htgs_1:
1763                 case MI_TECH_htgs_2:
1764                         diff = LabelCopy(buf, "HTG", buflen);
1765                 break;
1766                 default:
1767                 break;
1768         }
1769         if (*buf != '\0') {
1770                 return retval;
1771         }
1772 /*  new slot for synthetic sequences */ 
1773         if (bsr && bsr->origin == 5) {
1774                 diff = LabelCopy(buf, "SYN", buflen);
1775                 return retval;
1776         }
1777 /***** division in GBBlock becomes obsolete  ********/  
1778         if (gbdiv != NULL) {
1779                 if (StringCmp(gbdiv, "PAT") == 0 || 
1780                                         StringCmp(gbdiv, "SYN") == 0 || orgdiv == NULL) {
1781                         diff = LabelCopy(buf, gbdiv, buflen);
1782                 return retval;
1783                 }
1784         }
1785         
1786 /**********/    
1787         if (orgdiv != NULL) {
1788                 diff = LabelCopy(buf, orgdiv, buflen);
1789                 return retval;
1790         }
1791         return 0;
1792 }
1793 
1794 NLM_EXTERN Int2 BioseqGetGBDivCode(BioseqPtr bsp, CharPtr buf, Int2 buflen, Boolean err_post)
1795 
1796 {
1797         return BioseqGetGBDivCodeEx (bsp, buf, buflen, err_post, FALSE);
1798 }
1799 
1800 
1801 /*============================================================================*\
1802  * Function:
1803  *      StrStripSpaces
1804  *
1805  * Purpose:
1806  *      Strips all spaces in string in following manner. If the function
1807  *      meet several spaces (spaces and tabs) in succession it replaces them
1808  *      with one space.
1809  *      Strips all spaces after '(' and before ')'
1810  *
1811 \*----------------------------------------------------------------------------*/
1812 static void StrStripSpaces(CharPtr str)
1813 {
1814         CharPtr new_str;
1815 
1816         if (str == NULL) {
1817                 return;
1818         }
1819 
1820         new_str = str;
1821         while (*str != '\0') {
1822                 *new_str++ = *str;
1823                 if (*str == ' ' || *str == '\t' || *str == '(') {
1824                         for (str++; *str == ' ' || *str == '\t'; str++) ;
1825                         if (*str == ')' || *str == ',') {
1826                                 new_str--;
1827                         }
1828                 } else {
1829                         str++;
1830                 }
1831         }
1832         *new_str = '\0';
1833 }
1834 
1835 static CharPtr GetFlatRetract(ValNodePtr pub)
1836 {
1837         CitArtPtr cit;
1838         CitJourPtr jour = NULL;
1839         CitRetractPtr ret = NULL;
1840         CharPtr buffer;
1841         Int2 len;
1842         
1843         if (pub == NULL)
1844                 return NULL;
1845         if (pub->choice != PUB_Article)
1846                 return NULL;
1847         cit = pub->data.ptrvalue;
1848         if (cit->from == 1) {
1849                 jour = cit->fromptr;
1850                 if (jour && jour->imp) {
1851                         ret = jour->imp->retract;
1852                         if (ret && ret->type == 3) { /* other types can be added later */
1853                                 len = StringLen(ret->exp) + 11;
1854                                 buffer = (CharPtr) MemNew(len*sizeof(Char));
1855                                 sprintf(buffer, "Erratum:[%s]", ret->exp);
1856                                 return buffer;
1857                         }
1858                 }
1859         }
1860         return NULL;
1861 }
1862 
1863 static CharPtr GetSubmitDescr(ValNodePtr pub)
1864 {
1865         CitSubPtr cs;
1866         
1867         if (pub == NULL) {
1868                 return NULL;
1869         }
1870         if (pub->choice != PUB_Sub) {
1871                 return NULL;
1872         }
1873         cs = (CitSubPtr) pub->data.ptrvalue;
1874         if (cs->descr == NULL) {
1875                 return NULL;
1876         }
1877         return (StringSave(cs->descr));
1878 }
1879 
1880 static Int4 GetMuid(ValNodePtr equiv)
1881 {
1882         Int4 muid=0;
1883         ValNodePtr newpub, the_pub;
1884         MedlineEntryPtr ml;
1885         
1886         if (equiv->choice == PUB_Equiv)
1887                 newpub = equiv->data.ptrvalue;
1888         else
1889                 newpub = equiv;
1890 
1891         for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1892                 if (the_pub->choice == PUB_Muid) {
1893                         muid = the_pub->data.intvalue;
1894                         break;
1895                 }
1896                 if (the_pub->choice == PUB_Medline) {
1897                         ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1898                         muid = ml->uid;
1899                 }
1900         }
1901 
1902         return muid;
1903 
1904 }       /* GetMuid */
1905 
1906 static Int4 GetPmid(ValNodePtr equiv)
1907 {
1908         Int4 pmid=0;
1909         ValNodePtr newpub, the_pub;
1910         MedlineEntryPtr ml;
1911         
1912         if (equiv->choice == PUB_Equiv)
1913                 newpub = equiv->data.ptrvalue;
1914         else
1915                 newpub = equiv;
1916 
1917         for (the_pub = newpub; the_pub; the_pub = the_pub -> next) {
1918                 if (the_pub->choice == PUB_PMid) {
1919                         pmid = the_pub->data.intvalue;
1920                         break;
1921                 }
1922                 if (the_pub->choice == PUB_Medline) {
1923                         ml = (MedlineEntryPtr) the_pub -> data.ptrvalue;
1924                         pmid = ml->pmid;
1925                 }
1926         }
1927 
1928         return pmid;
1929 
1930 }       /* GetPmid */
1931 
1932 /***************************************************************************
1933 * SeqLocPtr GetBaseRangeForCitation (SeqLocPtr loc, SeqLocPtr slp, Int4Ptr start, Int4Ptr stop)
1934 *
1935 *
1936 *       This function finds the start and stop Int4 values for a location.
1937 *       If this is a cmplex location (e.g., SEQLOC_MIX), then the
1938 *       function is called several times, with the returned slp used
1939 *       as an argument on the next round.
1940 *       
1941 *       The first call should be with slp set to NULL.
1942 *
1943 ****************************************************************************/
1944 
1945 static SeqLocPtr GetBaseRangeForCitation (SeqLocPtr loc, SeqLocPtr slp, Int4Ptr start, Int4Ptr stop)
1946 {
1947         Int4 tmp_start, tmp_stop, tmp_range;
1948         
1949         *start = 0;
1950         *stop = 0;
1951 
1952         switch (loc->choice)
1953         {
1954                 case SEQLOC_BOND:   
1955                 case SEQLOC_FEAT:   
1956                 case SEQLOC_NULL:    
1957                 case SEQLOC_EMPTY:  
1958                         slp = NULL;
1959                         break;
1960                 case SEQLOC_WHOLE:
1961                 case SEQLOC_INT:
1962                         if ((tmp_start = SeqLocStart(loc)) >= 0  &&
1963                                         (tmp_stop = SeqLocStop(loc)) >= 0)
1964                         {
1965                                 tmp_range = tmp_stop - tmp_start;
1966                                 if (tmp_range >= 0)
1967                                 { /* +1 for Genbank format. */
1968                                         *start = tmp_start+1;
1969                                         *stop = tmp_stop+1;
1970                                 }
1971                         }
1972                         slp = NULL;
1973                         break;
1974                 case SEQLOC_MIX:
1975                 case SEQLOC_EQUIV:
1976                 case SEQLOC_PACKED_INT:
1977                         if (slp == NULL)
1978                                 slp = loc->data.ptrvalue;
1979                         if (slp != NULL)
1980                         {
1981                                 if ((tmp_start = SeqLocStart(slp)) >= 0  &&
1982                                                 (tmp_stop = SeqLocStop(slp)) >= 0)
1983                                 {
1984                                         tmp_range = tmp_stop - tmp_start;
1985                                         if (tmp_range >= 0)
1986                                         { /* +1 for Genbank format. */
1987                                                 *start = tmp_start+1;
1988                                                 *stop = tmp_stop+1;
1989                                         }
1990                                 }
1991                                 slp = slp->next;
1992                         }
1993                         break;
1994                 case SEQLOC_PACKED_PNT: 
1995                 case SEQLOC_PNT:
1996                         slp = NULL;
1997                         break;
1998                 default:
1999                         slp = NULL;
2000                         break;
2001         }
2002         return slp;
2003 }
2004 
2005 /*************************************************************************
2006 *GB_PrintPubs
2007 *
2008 *       "GB_PrintPubs" to dump pubs in Flat File (i.e., Genbank) format.
2009 *
2010 **************************************************************************/
2011 
2012 void GB_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2013 
2014 {
2015 
2016         BioseqPtr bsp=gbp->bsp;
2017         Boolean first_time, ignore_this=FALSE, submit=FALSE, tag;
2018         Char buffer[150];
2019         CharPtr authors=NULL,title=NULL,journal=NULL,string_start, string, retract;
2020         CharPtr descr = NULL;
2021         Int2 i;
2022         Int4 gibbsq, muid, pmid, pat_seqid=0, start=0, stop=0;
2023         PubdescPtr pdp;
2024         SeqFeatPtr sfp;
2025         SeqLocPtr loc, slp;
2026         ValNodePtr pub;
2027 
2028         if (ASN2FF_SHOW_ALL_PUBS) {
2029                 pub = FlatRefBest(psp->pub, ajp->error_msgs, TRUE);
2030         } else {
2031                 pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2032         }
2033         if (pub == NULL)
2034         {
2035                 if (ajp->error_msgs == TRUE)
2036                         PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2037                 return;
2038         }
2039         ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2040         if (ajp->format != GENPEPT_FMT)
2041         {
2042                 if (ignore_this == TRUE)
2043                 {
2044                         if (ajp->error_msgs == TRUE)
2045                                 PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2046                         return;
2047                 }
2048         }
2049 
2050         ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
2051         ff_AddString("REFERENCE");
2052         TabToColumn(13);
2053         ff_AddInteger("%ld", (long) psp->number);
2054         if (psp->start == 1) {
2055                 TabToColumn(16);
2056                 if (psp->descr != NULL) {
2057                         if (psp->descr->reftype != 0) {
2058                                 ff_AddString("(sites)");
2059                         } else {
2060                                 if (ajp->format != GENPEPT_FMT) {
2061                                         ff_AddString("(bases ");
2062                                 } else {
2063                                         ff_AddString("(residues ");
2064                                 }
2065                                 if (ajp->slp) {
2066                                         ff_AddInteger("%ld", (long) (SeqLocStart(ajp->slp) + 1));
2067                                         ff_AddString(" to "); 
2068                                         ff_AddInteger("%ld", (long) (SeqLocStop(ajp->slp) + 1));
2069                                 } else {
2070                                         ff_AddString("1 to ");
2071                                         ff_AddInteger("%ld", (long) bsp->length);
2072                                 }
2073                                 ff_AddChar(')');
2074                         }
2075                 }
2076         }
2077         else if (psp->start == 2) {
2078                 TabToColumn(16);
2079                 if (ajp->format != GENPEPT_FMT)
2080                         ff_AddString("(bases ");
2081                 else
2082                         ff_AddString("(residues ");
2083                 for (i=0; i<psp->citcount; i++) {
2084                         sfp = psp->citfeat[i];
2085                         loc = (SeqLocPtr) sfp->location;
2086                         slp = GetBaseRangeForCitation (loc, NULL, &start, &stop);
2087                         if (start != 0 || stop != 0) {
2088                                 ff_AddInteger("%ld", (long) start);
2089                                 ff_AddString(" to ");
2090                                 ff_AddInteger("%ld", (long) stop);
2091                                 if (slp != NULL || i+1 != psp->citcount)
2092                                         ff_AddString("; ");
2093                         }
2094                         while (slp != NULL) {
2095                                 slp = GetBaseRangeForCitation (loc, slp, &start, &stop);
2096                                 if (start != 0 || stop != 0) {
2097                                         ff_AddInteger("%ld", (long) start);
2098                                         ff_AddString(" to ");
2099                                         ff_AddInteger("%ld", (long) stop);
2100                                         if (slp != NULL || i+1 != psp->citcount)
2101                                                 ff_AddString("; ");
2102                                 }
2103                         }
2104                 }
2105                 ff_AddChar(')');
2106         } else if (psp->start == 3) {
2107                 TabToColumn(16);
2108                 ff_AddString("(sites)");
2109         } else {
2110                 if (ajp->error_msgs == TRUE)
2111                         ErrPostEx(SEV_WARNING, CTX_NCBI2GB, 1, 
2112                          "Incorrect start value (%d) in PubStruct\n", psp->start);
2113         }
2114         ff_EndPrint();
2115 
2116         authors = FlatAuthor(ajp, pub);
2117         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2118         ff_AddString("AUTHORS");
2119         TabToColumn(13);
2120 
2121         if (authors && *authors != NULLB) {
2122                 ff_AddString(authors);
2123         } else {
2124                 ff_AddChar('.');
2125         }
2126         ff_EndPrint();
2127 
2128         title = FlatPubTitle(pub);
2129         if (title ) {
2130                 if ( *title  != NULLB) {
2131                         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2132                         ff_AddString("TITLE");
2133                         TabToColumn(13);
2134                         StrStripSpaces(title);
2135                         ff_AddString(title);
2136                         ff_EndPrint();
2137                 }
2138         }
2139 
2140         journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2141         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2142         ff_AddString("JOURNAL");
2143         TabToColumn(13);
2144         if (journal ) {
2145 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
2146                 CharPtr p, q;
2147                 p = journal;
2148                 q = journal;
2149                 while (*p) {
2150                   if (*p == ',' && p [1] == ' ' && p [2] == ';') {
2151                     p += 2;
2152                   } else {
2153                     *q = *p;
2154                     p++;
2155                     q++;
2156                   }
2157                 }
2158                 *q = '\0';
2159 #endif
2160                 StrStripSpaces(journal);
2161                 ff_AddString(journal);
2162         } else {
2163                 ff_AddString("Unpublished");
2164         }
2165         ff_EndPrint();
2166 
2167         muid = GetMuid(psp->pub);
2168         if (muid > 0) {
2169                 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2170                 ff_AddString("MEDLINE");
2171                 TabToColumn(13);
2172                 www_muid(muid);
2173                 ff_EndPrint();
2174         }
2175         pmid = GetPmid (psp->pub);
2176         if (pmid > 0) {
2177                 ff_StartPrint(3, 12, ASN2FF_GB_MAX, NULL);
2178                 ff_AddString("PUBMED");
2179                 TabToColumn(13);
2180                 www_muid(pmid);
2181                 ff_EndPrint();
2182         }
2183 
2184         tag = FALSE;
2185         pdp = psp->descr;
2186         if (pdp != NULL && pdp->comment != NULL) {
2187                 if (StringCmp(pdp->comment, "full automatic") != 0 &&
2188                   StringCmp(pdp->comment, "full staff_review") != 0 &&
2189                    StringCmp(pdp->comment, "full staff_entry") != 0 &&
2190                     StringCmp(pdp->comment, "simple staff_review") != 0 &&
2191                       StringCmp(pdp->comment, "simple staff_entry") != 0 &&
2192                        StringCmp(pdp->comment, "simple automatic") != 0 &&
2193                         StringCmp(pdp->comment, "unannotated automatic") != 0 &&
2194                          StringCmp(pdp->comment, "unannotated staff_review") != 0 &&
2195                           StringCmp(pdp->comment, "unannotated staff_entry") != 0)
2196                 {
2197                         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2198                         ff_AddString("REMARK  ");
2199                         TabToColumn(13);
2200                         ff_AddStringWithTildes(pdp->comment);
2201                         tag = TRUE;
2202                 }
2203         }
2204         string = &buffer[0];
2205         gibbsq = GetGibbsqStatement(gbp, string);
2206         if (gibbsq > 0) {
2207                 if (tag != TRUE) {
2208                         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2209                         ff_AddString("REMARK");
2210                         TabToColumn(13);
2211                 } else {
2212                         NewContLine();
2213                 }
2214                 ff_AddStringWithTildes(string);
2215                 tag = TRUE;
2216         }
2217         string = GetGibbsqComment(gbp);
2218         if (string) {
2219                 string_start = string;
2220                 if (tag != TRUE) {
2221                         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2222                         ff_AddString("REMARK");
2223                         TabToColumn(13);
2224                 } else {
2225                         NewContLine();
2226                 }
2227                 first_time = TRUE;
2228 /* Can't this be rewritten to use ff_AddString????  That would be faster! */
2229                 while (*string != '\0') {
2230                         if (*string == '~') {
2231                                 if (first_time == FALSE)
2232                                         NewContLine();
2233                                 else
2234                                         first_time = FALSE;
2235                         } else if (*string == '\"') {
2236                                 *string = '\'';
2237                                 ff_AddChar(*string);
2238                         } else {
2239                                 ff_AddChar(*string);
2240                         }
2241                         string++;
2242                 }
2243                 string_start = MemFree(string_start);
2244                 tag=TRUE;
2245         }
2246         retract = GetFlatRetract(pub);
2247         if (retract) {
2248                 if (tag != TRUE) {
2249                         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2250                         ff_AddString("REMARK");
2251                         TabToColumn(13);
2252                 } else {
2253                         NewContLine();
2254                 }
2255                 ff_AddStringWithTildes(retract);
2256                 tag = TRUE;
2257                 MemFree(retract);
2258         }
2259         descr = GetSubmitDescr(pub);
2260         if (descr) {
2261                 if (tag != TRUE) {
2262                         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2263                         ff_AddString("REMARK");
2264                         TabToColumn(13);
2265                 } else {
2266                         NewContLine();
2267                 }
2268                 ff_AddStringWithTildes(descr);
2269                 tag = TRUE;
2270         }
2271         if (tag == TRUE)
2272                 ff_EndPrint();
2273 
2274         if (authors)
2275                 MemFree(authors);
2276 
2277         MemFree(descr);
2278         MemFree(title);
2279         MemFree(journal);
2280 }       /* GB_PrintPubs */
2281 
2282 /*************************************************************************
2283 *GR_PrintPubs
2284 *
2285 *       "GR_PrintPubs" to dump pubs in Flat File (i.e., Genbank) format.
2286 *
2287 **************************************************************************/
2288 
2289 void GR_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2290 
2291 {
2292 
2293         BioseqPtr bsp=gbp->bsp;
2294         Boolean ignore_this=FALSE, submit=FALSE;
2295         CharPtr authors=NULL,title=NULL,journal=NULL;
2296         CharPtr descr = NULL;
2297         Int4 muid, pmid, pat_seqid=0, start=0, stop=0;
2298         ValNodePtr pub;
2299 
2300         if (ASN2FF_SHOW_ALL_PUBS) {
2301                 pub = FlatRefBest(psp->pub, ajp->error_msgs, TRUE);
2302         } else {
2303                 pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2304         }
2305         if (pub == NULL)
2306         {
2307                 if (ajp->error_msgs == TRUE)
2308                         PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2309                 return;
2310         }
2311         ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2312         if (ajp->format != GENPEPT_FMT)
2313         {
2314                 if (ignore_this == TRUE)
2315                 {
2316                         if (ajp->error_msgs == TRUE)
2317                                 PostARefErrMessage (ajp, bsp, psp, NULL, -1, NULL);
2318                         return;
2319                 }
2320         }
2321 
2322         ff_StartPrint(0, 12, ASN2FF_GB_MAX, NULL);
2323         ff_AddString("<BR><BR>");
2324         title = FlatPubTitle(pub);
2325         if (title ) {
2326                 if ( *title  != NULLB) {
2327                         StrStripSpaces(title);
2328                         ff_AddString("<B>");
2329                         ff_AddString(title);
2330                         ff_AddString("</B>");
2331                         ff_EndPrint();
2332                 }
2333         }
2334         authors = FlatAuthor(ajp, pub);
2335 
2336         if (authors && *authors != NULLB) {
2337                 ff_AddString("<BR>");
2338                 ff_AddString(authors);
2339         } else {
2340                 ff_AddChar('.');
2341         }
2342         ff_EndPrint();
2343 
2344 
2345         journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2346         ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2347         ff_AddString("<BR>");
2348         if (journal ) {
2349                 StrStripSpaces(journal);
2350                 ff_AddString(journal);
2351         } else {
2352                 ff_AddString("Unpublished");
2353         }
2354         ff_EndPrint();
2355 
2356         muid = GetMuid(psp->pub);
2357         if (muid > 0) {
2358                 ff_StartPrint(2, 12, ASN2FF_GB_MAX, NULL);
2359                 ff_AddString("<BR>");
2360                 TabToColumn(13);
2361                 www_muid(muid);
2362                 ff_EndPrint();
2363         }
2364         pmid = GetPmid (psp->pub); /* not sure what GR format should be generating */
2365         /*
2366         if (pmid > 0) {
2367                 ff_StartPrint(3, 12, ASN2FF_GB_MAX, NULL);
2368                 ff_AddString("<BR>");
2369                 TabToColumn(13);
2370                 www_muid(pmid);
2371                 ff_EndPrint();
2372         }
2373         */
2374 
2375 
2376         if (authors)
2377                 MemFree(authors);
2378 
2379         MemFree(title);
2380         MemFree(journal);
2381         
2382 }       /* GR_PrintPubs */
2383 
2384 /*************************************************************************
2385 *EMBL_PrintPubs
2386 *
2387 *       "EMBL_PrintPubs" to dump pubs in FlatFile (EMBL) format.
2388 *
2389 **************************************************************************/
2390 
2391 void EMBL_PrintPubs (Asn2ffJobPtr ajp, GBEntryPtr gbp, PubStructPtr psp)
2392 
2393 {
2394 
2395         BioseqPtr bsp=gbp->bsp;
2396         Boolean ignore_this=FALSE, submit=FALSE;
2397         CharPtr authors=NULL, title=NULL, journal=NULL, new_journal;
2398         Int2 i;
2399         Int4 pat_seqid=0;       
2400         Int4 start=0, stop=0, tmp_range, range;
2401         PubdescPtr descr=psp->descr;
2402         SeqFeatPtr sfp;
2403         SeqLocPtr loc, slp;
2404         ValNodePtr pub;
2405         Int4 muid;
2406         Char s[15];
2407 
2408         pub = FlatRefBest(psp->pub, ajp->error_msgs, FALSE);
2409         if (pub == NULL)
2410         {
2411                 if (ajp->error_msgs == TRUE)
2412                         ErrPostStr(SEV_WARNING, ERR_REFERENCE_Illegalreference, "FFDumpPubs: Invalid Pub found.");
2413                 return;
2414         }
2415         ignore_this = FlatIgnoreThisPatentPub(bsp, pub, &pat_seqid);
2416         if (ignore_this == TRUE && ASN2FF_IGNORE_PATENT_PUBS != FALSE)
2417         {
2418                 if (ajp->error_msgs == TRUE)
2419                         ErrPostStr(SEV_WARNING, ERR_REFERENCE_Illegalreference, "FFDumpPubs: Invalid Patent Pub");
2420                 return;
2421         }
2422 
2423         PrintXX();
2424 
2425         ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RN");
2426         ff_AddChar('[');
2427         ff_AddInteger("%ld", (long) psp->number);
2428         ff_AddChar(']');
2429         ff_EndPrint();
2430         if (psp->start == 1)
2431         {
2432                 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2433                 ff_AddString("1-");
2434                 ff_AddInteger("%ld", (long) bsp->length);
2435                 ff_EndPrint();
2436         }
2437         else if (psp->start == 2)
2438         {
2439                 range = 0;
2440                 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2441                 for (i=0; i<psp->citcount; i++)
2442                 {
2443                         sfp = psp->citfeat[i];
2444                         loc = (SeqLocPtr) sfp->location;
2445                         slp = GetBaseRangeForCitation (loc, NULL, &start, &stop);
2446                         if (start != 0 || stop != 0)
2447                         { /* Why do I need the tmp_range test??? */
2448                                 tmp_range = stop - start;
2449                                 if (tmp_range >= range)
2450                                 {
2451                                         range = tmp_range;
2452                                         ff_AddInteger("%ld", (long) start);
2453                                         ff_AddChar('-');
2454                                         ff_AddInteger("%ld", (long) stop);
2455                                         if (slp != NULL || i+1 != psp->citcount)
2456                                                 ff_AddString(", ");
2457                                 }
2458                         }
2459                         while (slp != NULL)
2460                         {
2461                                 slp = GetBaseRangeForCitation (loc, slp, &start, &stop);
2462                                 if (start != 0 || stop != 0)
2463                                 {
2464                                         ff_AddInteger("%ld", (long) start);
2465                                         ff_AddChar('-');
2466                                         ff_AddInteger("%ld", (long) stop);
2467                                         if (slp != NULL || i+1 != psp->citcount)
2468                                                 ff_AddString(", ");
2469                                 }
2470                         }
2471                 }
2472                 ff_EndPrint();
2473         }
2474         else if (psp->start == 3 && ajp->pseudo == TRUE) 
2475         { /* "sites" only for pseudo-embl.  */
2476                 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RP");
2477                 ff_AddString("(sites)");
2478                 ff_EndPrint();
2479         }
2480 
2481         journal = FlatJournal(ajp, gbp, pub, pat_seqid, &submit, FALSE);
2482 
2483         if (descr && descr->comment)
2484         {
2485                 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RC");
2486                 ff_AddString(descr->comment);
2487                 ff_EndPrint();
2488         }
2489         authors = FlatAuthor(ajp, pub);
2490         ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RA");
2491         if (authors)
2492                 ff_AddString(authors);
2493         ff_AddChar(';');
2494         ff_EndPrint();
2495         
2496         ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RT");
2497         if (! submit)
2498         {
2499                 title = FlatPubTitle(pub);
2500                 if (title ){
2501                         if ( *title )
2502                         {
2503                                 ff_AddChar('\"');
2504                                 StrStripSpaces(title); 
2505                                 ff_AddString(title);
2506                                 ff_AddChar('\"');
2507                         }
2508                 }
2509         }
2510         ff_AddChar(';');
2511         ff_EndPrint();
2512 
2513         ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RL");
2514         if (journal)
2515         {
2516                 new_journal = CheckEndPunctuation(journal, '.');
2517                 StrStripSpaces(new_journal);
2518                 ff_AddString(new_journal);
2519                 new_journal = MemFree(new_journal);
2520         }
2521         ff_EndPrint();
2522         
2523         muid = GetMuid(psp->pub);
2524         if (muid != 0) {
2525                 sprintf(s, "%ld.", (long) muid);
2526                 s[StringLen(s)] = '\0';
2527                 ff_StartPrint(5, 5, ASN2FF_EMBL_MAX, "RX");
2528                 ff_AddString("MEDLINE; ");
2529                 ff_AddString(s);
2530                 ff_EndPrint();
2531         }
2532         if (authors)
2533                 MemFree(authors);
2534         MemFree(title);
2535         MemFree(journal);
2536 }       /* EMBL_PrintPubs */
2537 
2538 /***************************************************************************
2539 *CharPtr CheckLocusLength (Boolean error_msgs, CharPtr locus, Int2 locus_max, Int2 total_segs)
2540 *
2541 *       Calculate the length of the locus; if it's too long, take characters
2542 *       off the front.  If it's part of a segmented set and the locus ends 
2543 *       in a number, add an "S".  If it appears to be an NCBI locus of the
2544 *       form HSU00001, then take two letters off the front.
2545 ***************************************************************************/ 
2546 static CharPtr CheckLocusLength (Boolean error_msgs, CharPtr locus, Int2 locus_max, Int2 total_segs)
2547 
2548 {
2549         Boolean cut_two=FALSE;
2550         CharPtr buffer;
2551         Int2 length, surplus;
2552 
2553         length = StringLen(locus);
2554         buffer = MemNew((length+2)*sizeof(Char));
2555         buffer = StringCpy(buffer, locus);
2556         
2557         if (total_segs > 0 && IS_DIGIT(locus[length-1]) != 0)
2558                 if (locus[length-1] != '0' || IS_DIGIT(locus[length-2]) != 0)
2559                 {
2560                         length++;
2561                         buffer[length-1] = 'S';
2562                         buffer[length] = '\0';
2563                 }
2564 
2565         surplus = length - locus_max;
2566 
2567         if (surplus > 0)
2568         {
2569                 if (surplus <= 2)
2570                 {       /* Check if this is of the form HSU00001S */
2571                         if (IS_ALPHA(buffer[0]) != 0 &&
2572                                 IS_ALPHA(buffer[1]) != 0 &&
2573                                 IS_ALPHA(buffer[2]) != 0 &&
2574                                 IS_DIGIT(buffer[3]) != 0 &&
2575                                 IS_DIGIT(buffer[4]) != 0 &&
2576                                 IS_DIGIT(buffer[5]) != 0 &&
2577                                 IS_DIGIT(buffer[6]) != 0 &&
2578                                 IS_DIGIT(buffer[7]) != 0 &&
2579                                 buffer[8] == 'S' &&
2580                                 buffer[9] == '\0')
2581                                         cut_two = TRUE;
2582                 }
2583         
2584                 if (cut_two == TRUE)
2585                         locus = StringCpy(locus, buffer+2);
2586                 else
2587                         locus = StringCpy(locus, buffer+surplus);
2588                 if (error_msgs == TRUE)
2589                 {
2590                         flat2asn_delete_locus_user_string();
2591                         flat2asn_install_locus_user_string(buffer);
2592                         ErrPostStr(SEV_INFO, ERR_LOCUS_ChangedLocusName, 
2593                                 "Locusname length is more than 16, locusname is truncated");
2594                 }
2595         }
2596 
2597         buffer = MemFree(buffer);
2598 
2599         return locus;
2600 }
2601 
2602 NLM_EXTERN Int4 GetPubsAwp (Asn2ffJobPtr ajp, GBEntryPtr gbp)
2603 {
2604         GatherScope gs;
2605         BioseqPtr bsp = NULL;
2606         ValNodePtr vnp, v;
2607         SeqLocPtr slp = NULL;
2608         SeqIdPtr isip;
2609         Int4 status, i;
2610         Char buffer[31];
2611         
2612         bsp = gbp->bsp;
2613         if (bsp == NULL) {
2614                 return 0;
2615         }
2616         isip = bsp->id;
2617         vnp = NULL;
2618         MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2619         gs.get_feats_location = TRUE;
2620         if (ajp->genome_view == TRUE) {
2621                 gs.seglevels = 0;
2622         } else if (ajp->only_one) { 
2623                 gs.seglevels = 2;
2624         } else {
2625                 gs.seglevels = 1;
2626         }
2627 /*      MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2628         gs.ignore[OBJ_SEQDESC] = FALSE;
2629         gs.ignore[OBJ_SEQANNOT] = FALSE;
2630         gs.ignore[OBJ_SEQFEAT] = FALSE;
2631         gs.ignore[OBJ_SEQSUB] = FALSE;
2632         gs.ignore[OBJ_SEQSUB_CIT] = FALSE;*/
2633 
2634         MemSet ((Pointer) (gs.ignore), (int)(FALSE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2635         gs.ignore[OBJ_SEQALIGN] = TRUE; /* this was being hit many times on big records */
2636 
2637         if (ajp->slp == NULL) {
2638                 slp = ValNodeNew(NULL);
2639                 slp->choice = SEQLOC_WHOLE;
2640                 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
2641                 gs.target = slp;
2642         } else {
2643                 gs.target = ajp->slp;
2644         }
2645         GatherEntity(ajp->entityID, &vnp, get_pubs, &gs);
2646         if (slp)
2647                 SeqLocFree(slp);
2648         if ((status = CheckPubs(ajp, bsp, &vnp)) <= 0) {
2649                 if (ajp->error_msgs == TRUE) {
2650                         MakeAnAccession(buffer, isip, 30);
2651                         flat2asn_delete_locus_user_string();
2652                         flat2asn_install_locus_user_string(buffer);
2653                         flat2asn_delete_accession_user_string();
2654                         flat2asn_install_accession_user_string(buffer);
2655                         ErrPostStr(SEV_ERROR, ERR_REFERENCE_NoValidRefs, 
2656                         "No refs found that would result in legal flatfile format");
2657                 }
2658                 /* found something. */
2659                 if (status < 0) {
2660                         ValNodeFree(vnp);
2661                         vnp = NULL;
2662                 }
2663         }
2664         gbp->Pub = OrganizePubList(vnp); 
2665         for (v = gbp->Pub, i=0; v != NULL; v= v->next, i++);
2666         
2667         return i;
2668 }
2669 /*************************************************************************
2670 *       Check for EMBL format first
2671 *       Call   to find div for Genbank records 
2672 *       Allocate a buffer for division  
2673 *       09-05-96
2674 *************************************************************************/
2675 static CharPtr GetDivision(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2676 {
2677         ValNodePtr vnp;
2678         MolInfoPtr mol = NULL;
2679         EMBLBlockPtr eb=NULL;
2680         BioseqPtr bsp = gbp->bsp;
2681         Int2 buflen=4;
2682         CharPtr buffer;
2683         static CharPtr embl_divs [] = {
2684         "FUN","INV","MAM","ORG","PHG","PLN","PRI","PRO","ROD","SYN","UNA","VRL",
2685         "VRT","PAT","EST","STS", "HUM", "HTC"
2686         };
2687 
2688         buffer = MemNew(buflen);
2689         buffer[0] = '\0';
2690         if (ajp->format == EMBL_FMT || ajp->format == PSEUDOEMBL_FMT ||
2691                                         ajp->format == EMBLPEPT_FMT) {
2692                 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_embl)) != NULL) {
2693                         eb = (EMBLBlockPtr) vnp->data.ptrvalue;
2694                 }
2695                 if (eb ) {
2696                         if (eb->div == 255) {
2697 /* kludge for HUM division */
2698                                 if ((vnp=GatherDescrByChoice(ajp, gbp, Seq_descr_molinfo)) != NULL) {
2699                                         gbp->descr = MemFree(gbp->descr);       
2700                                         mol = (MolInfoPtr) vnp->data.ptrvalue;
2701                                 }
2702                                 if (mol) {
2703                                         if (mol->tech == MI_TECH_survey) {
2704                                                 StringNCpy_0(buffer, "GSS", buflen);
2705                                                 return buffer;
2706                                         } else if (mol->tech == MI_TECH_htc) {
2707                                                 StringNCpy_0(buffer, "HTC", buflen);
2708                                                 return buffer;
2709                                         } else if (mol->tech == MI_TECH_htgs_1
2710                                                          || mol->tech == MI_TECH_htgs_2) {
2711                                                 StringNCpy_0(buffer, "HTG", buflen);
2712                                                 return buffer;
2713                                         }
2714                                 } else {
2715                                         StringNCpy_0(buffer, embl_divs[16], buflen);  /*HUM */
2716                                         return buffer;
2717                                 }
2718                         } else {
2719                                 StringNCpy_0(buffer, embl_divs[eb->div], buflen);
2720                                 return buffer;
2721                         }
2722                 }
2723                 BioseqGetGBDivCodeEx (bsp, buffer, buflen, FALSE, ajp->useSeqMgrIndexes);
2724                 if (buffer[0] == NULLB) {
2725                         StringNCpy_0(buffer, "   ", buflen);
2726                 }
2727                 return buffer;
2728         }
2729         BioseqGetGBDivCodeEx (bsp, buffer, buflen, FALSE, ajp->useSeqMgrIndexes);
2730         if (buffer[0] == NULLB) {
2731                 StringNCpy_0(buffer, "   ", buflen);
2732         }
2733         if (gbp->bsp && gbp->bsp->mol == Seq_mol_aa) {
2734                 return buffer;
2735         }
2736         if (ajp->genome_view) {
2737                 StringNCpy_0(buffer, "CON", buflen);
2738         }
2739         return buffer;
2740 }
2741 
2742 /***************************************************************************
2743 *
2744 *       UseGIforLocus to get the GI number for the locus and accession numbers.
2745 *       and to get division using Gather
2746 *
2747 ***************************************************************************/
2748 
2749 NLM_EXTERN void UseGIforLocus (Asn2ffJobPtr ajp)
2750 {
2751         CharPtr buffer;
2752         GBEntryPtr gbp;
2753         
2754         for (gbp=ajp->asn2ffwep->gbp; gbp; gbp=gbp->next) {
2755                 if (ajp->show_gi) {     
2756                         sprintf(gbp->accession, "%ld", (long) (gbp->gi));
2757                         sprintf(gbp->locus, "%-10ld", (long) (gbp->gi));
2758                 } else {
2759                         MemSet((VoidPtr) gbp->accession, ' ', 10);
2760                         MemSet((VoidPtr) gbp->locus, ' ', 10);
2761                 }
2762                 buffer = GetDivision(ajp, gbp);
2763                 if (buffer[0] != NULLB) {
2764                         StringCpy(gbp->div, "   ");
2765                 } else {
2766                         StringNCpy_0(gbp->div, buffer, 4);
2767                 }
2768                 MemFree(buffer);
2769         }
2770 }
2771 
2772 /*****************************************************************************
2773 *
2774 *       ValidateLocus takes a locus name and assures that the format is 
2775 *       proper. if segmented set adds the segment number at the end and 
2776 *       returns new locus
2777 *****************************************************************************/
2778 CharPtr ValidateLocus(Asn2ffJobPtr ajp, BioseqPtr bsp, CharPtr base_locus, Int2 total_segs, Int2 num_seg, CharPtr new_buf, CharPtr buf_locus, CharPtr orig_buf)
2779 
2780 {
2781         Boolean collision=FALSE;
2782         static Boolean order_init=FALSE;
2783         Char buf_ext[BUF_EXT_LENGTH], buffer[30];
2784         DbtagPtr db;
2785         int dex;
2786         Int2 /*UNUSED*/base_locus_max, buf_index, exp, length, num_of_digits;
2787         ObjectIdPtr ob;
2788         SeqIdPtr best_id, id;
2789         static Uint1 rel_order[NUM_SEQID];
2790 
2791         if (! order_init)
2792         {
2793                 for (dex=0; dex<18; dex++)
2794                         rel_order[dex] = 255;
2795                 rel_order[SEQID_GENERAL ] = 14;
2796         }
2797         order_init = TRUE;
2798 
2799         if (ASN2FF_AVOID_LOCUS_COLL || ASN2FF_REPORT_LOCUS_COLL)
2800         {       /* Check for LOCUS collisions with Karl's algorithm */
2801                 id = bsp->id;
2802                 best_id = SeqIdSelect( id, rel_order,NUM_SEQID);
2803                 if (best_id != NULL) {
2804                         if (best_id -> choice == SEQID_GENERAL){ /* always! */
2805 
2806                             db = (DbtagPtr) best_id -> data.ptrvalue;
2807                             if (StringCmp(db -> db, LOCUS_COLLISION_DB_NAME) == 0){
2808                                 ob = db -> tag;
2809                                     if ( ob != NULL)
2810                                     {
2811                                         if (ASN2FF_REPORT_LOCUS_COLL)
2812                                         {
2813                                             MakeAnAccession(buffer, id, 30);
2814                                             flat2asn_delete_locus_user_string();
2815                                             flat2asn_install_locus_user_string(buffer);
2816                                             flat2asn_delete_accession_user_string();
2817                                             flat2asn_install_accession_user_string(buffer);
2818                                              ErrPostStr(SEV_WARNING, ERR_LOCUS_LocusNameCollision, "");
2819                                         }
2820                                         if (ASN2FF_AVOID_LOCUS_COLL)
2821                                         {
2822                                             collision=TRUE;
2823                                             StringNCpy_0(new_buf, ob -> str, MAX_LOCUS_NAME_LEN+1);
2824                                         }
2825                                     }
2826                                 }
2827                         }
2828                 }
2829         }
2830 
2831         if (! collision)
2832         {
2833                 if (total_segs == 0)
2834                 {       /* Not a segmented set. */
2835                         if ((length=StringLen(buf_locus)) <= 0)
2836                                 new_buf = StringCpy(new_buf, orig_buf);
2837                         else
2838                                 new_buf = StringCpy(new_buf, buf_locus);
2839                                  
2840                         new_buf = CheckLocusLength (ajp->error_msgs, new_buf, MAX_LOCUS_NAME_LEN, 0);
2841                 }
2842                 else
2843                 {
2844                         if (total_segs < 10)
2845                                 num_of_digits = 1;
2846                         else if (total_segs < 100)
2847                                 num_of_digits = 2;
2848                         else if (total_segs < 1000)
2849                                 num_of_digits = 3;
2850                         else 
2851                         {
2852                                 num_of_digits = 4;
2853                                 ErrPostStr(SEV_INFO, ERR_SEGMENT_MoreThan1000Segs, "");
2854                         }
2855                         if (num_seg < 10)
2856                                 exp = 1;
2857                         else if (num_seg < 100)
2858                                 exp = 2;
2859                         else if (num_seg < 1000)
2860                                 exp = 3;
2861                         base_locus_max = MAX_LOCUS_NAME_LEN - num_of_digits;
2862                         length = StringLen(base_locus);
2863                         StringCpy(new_buf, base_locus);
2864                         MemSet((VoidPtr) buf_ext, '\0', BUF_EXT_LENGTH);        
2865                         MemSet((VoidPtr) buf_ext, '0', num_of_digits);  
2866                         sprintf(buf_ext+num_of_digits-exp, "%ld", (long) num_seg);
2867                         buf_index = 0;
2868                         while (buf_ext[buf_index] != '\0')
2869                         {
2870                              new_buf[length+buf_index] = buf_ext[buf_index];
2871                              buf_index++;
2872                          }
2873                          new_buf[length+buf_index] = '\0';
2874                 }
2875         }
2876         
2877         return new_buf;
2878 }       /* ValidateLocus */
2879 
2880 /***************************************************************************
2881 *       example: NM_000756
2882 ***************************************************************************/
2883 static Int2 ValidateOtherAccession(CharPtr new_buf, CharPtr orig_buf)
2884 {
2885         Int2 count;
2886         Boolean FirstLetter=FALSE, FiveNum = FALSE;
2887 
2888         if (orig_buf == NULL || orig_buf[0] == '\0') {
2889                 return -3;
2890         }
2891         if (StringLen(orig_buf) >= 10) {
2892                 return -4;
2893         }
2894         if (orig_buf[0] != 'N') {
2895                 return -1;
2896         }
2897         if (orig_buf[2] != '_') {
2898                 return -1;
2899         }
2900         for (count=3; count < 8; count++) {
2901                 if(! IS_DIGIT(orig_buf[count]))
2902                         break;
2903         }
2904         if (count == 8 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2905                 StringCpy(new_buf, orig_buf);
2906                 return 0;
2907 
2908         } else {
2909                 return -1;
2910         }
2911 }
2912 
2913 
2914 /****************************************************************************
2915 *
2916 *       ValidateAccession takes an accession number and makes sure it is
2917 *       in the proper format (starts with a capital letter that is followed
2918 *       by five numbers).
2919 *
2920 *       Return values are:
2921 *        0: no problem
2922 *       -1: Accession did not start with a letter (or two letters)
2923 *       -2: Accession did not contain five numbers (or six numbers after 2 letters)
2924 *       -3: the original Accession number to be validated was NULL
2925 *       -4: the original Accession number is too long (>10)
2926 *
2927 ****************************************************************************/
2928 Int2 ValidateAccession(CharPtr new_buf, CharPtr orig_buf)
2929 {
2930         Int2 count, start_count, stop_count;
2931         Boolean FirstLetter=FALSE, FiveNum = FALSE;
2932 
2933         if (orig_buf == NULL || orig_buf[0] == '\0') {
2934                 return -3;
2935         }
2936         if (StringLen(orig_buf) >= 10) {
2937                 return -4;
2938         }
2939         if (orig_buf[0] < 'A' || orig_buf[0] > 'Z') {
2940                 return -1;
2941         } else {
2942                 FirstLetter = TRUE;
2943         }
2944         for (count=1; count < 5; count++) {
2945                 if(! IS_DIGIT(orig_buf[count]))
2946                         break;
2947         }
2948         if (count == 5 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' '))
2949                 FiveNum = TRUE;
2950 
2951         if (FirstLetter == TRUE) {
2952                 if (FiveNum == TRUE) {           /* 1 + 5 accession*/
2953                         StringCpy(new_buf, orig_buf);
2954                         return 0;
2955                 } else if (IS_ALPHA(orig_buf[1])) {      /* 2 + 6 accession */
2956                         if (orig_buf[1] < 'A' || orig_buf[1] > 'Z') {
2957                                 return -1;
2958                         }
2959                         start_count = 2;
2960                         stop_count = 7;
2961                         if (orig_buf[0] == 'N' || orig_buf[0] == 'X') {
2962                                 if ((orig_buf[1] == 'M' || orig_buf[1] == 'C' 
2963                                                 || orig_buf[1] == 'T'  || orig_buf[1] == 'P' 
2964                                                                                                          || orig_buf[1] == 'G') 
2965                                                                                                 &&  orig_buf[2] == '_') {
2966                                                 start_count = 3;
2967                                                 stop_count = 8;
2968                                 }
2969                         }
2970                         for (count=start_count; count < stop_count; count++) {
2971                                 if(! IS_DIGIT(orig_buf[count]))
2972                                         break;
2973                         }                       
2974                         if (count == stop_count && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2975                                 StringCpy(new_buf, orig_buf);
2976                                 return 0;
2977                         } else if (IS_ALPHA(orig_buf[2])) {      /* 3 + 5 accession */
2978                                 if (orig_buf[0] =='A' || orig_buf[0] == 'B' || orig_buf[0] == 'C') {
2979                                         for (count=3; count < 7; count++) {
2980                                                 if(! IS_DIGIT(orig_buf[count]))
2981                                                         break;
2982                                         }                       
2983                                         if (count == 7 && (orig_buf[count+1] == '\0' || orig_buf[count+1] == ' ')) {
2984                                                 StringCpy(new_buf, orig_buf);
2985                                                 return 0;
2986                                         } else {
2987                                                 return -2;
2988                                         }
2989                                 } else {
2990                                         return -2;
2991                                 }
2992                         } else {
2993                                 return -2;
2994                         }
2995                 } else {
2996                         return -2;
2997                 }
2998         } else {
2999                 return -1;
3000         }
3001 }
3002 
3003 /**************************************************************************
3004 *MakeBaseAccession
3005 *
3006 *       GetBaseAccession takes a BioseqPtr bsp and returns an 
3007 *       accession if 1.) the set is segmented, and 2.) there is
3008 *       an accession at a higher level.  Otherwise NULL is returned.
3009 *       The user should deallocate the CharPtr.
3010 **************************************************************************/
3011 
3012 CharPtr MakeBaseAccession (BioseqPtr bsp)
3013 
3014 {
3015         Char buffer[MAX_ACCESSION_LEN+1];
3016         CharPtr buf_acc=buffer;
3017         Int2 status = -1;
3018         SeqIdPtr sip, isip;
3019         TextSeqIdPtr tsip;
3020 
3021 
3022         if (bsp == NULL)
3023                 return NULL;
3024         isip = bsp->id;
3025         sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3026         if (sip && (sip->choice == SEQID_GENBANK || 
3027                 sip->choice == SEQID_EMBL || 
3028                 sip->choice == SEQID_PIR || 
3029                 sip->choice == SEQID_SWISSPROT || 
3030                 sip->choice == SEQID_DDBJ || 
3031                 sip->choice == SEQID_PRF ||
3032                 sip->choice == SEQID_OTHER ||
3033                 sip->choice == SEQID_TPG ||
3034                 sip->choice == SEQID_TPE ||
3035                 sip->choice == SEQID_TPD))
3036         {
3037                 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3038                 switch (sip->choice) {
3039                         case SEQID_GENBANK:
3040                         case SEQID_EMBL:
3041                         case SEQID_DDBJ:
3042                         case SEQID_TPG:
3043                         case SEQID_TPE:
3044                         case SEQID_TPD:
3045                         case SEQID_PIR:
3046                         case SEQID_SWISSPROT:
3047                                 status = ValidateAccession(buf_acc, tsip->accession);
3048                 }
3049         }
3050         if (status < 0)
3051                 return NULL;
3052                 
3053         return (StringSave(buf_acc));
3054 }
3055 
3056 /***************************************************************************
3057 *
3058 *       MakeBaseLocus takes a Asn2ffJobPtr and a CharPtr (base_locus)
3059 *       and returns a CharPtr which is the new base_locus.  Checking is 
3060 *       done to assure suitability of the new base locus name (i.e., 
3061 *       no more than 15 characters for less than 10 segments and no more
3062 *       than 14 characters for 10 or more segments).
3063 *
3064 ***************************************************************************/
3065 
3066 CharPtr MakeBaseLocusAwp (Asn2ffJobPtr ajp, CharPtr base_locus)
3067 
3068 {
3069         BioseqPtr bsp, bbsp = NULL;
3070         Int2  index, length, base_locus_max, name_len, num_of_digits, num_seg;
3071         SeqIdPtr sip, bsip=NULL, isip=NULL;
3072         TextSeqIdPtr tsip = NULL, btsip=NULL;
3073         ObjectIdPtr obj;
3074         Char buffer[21], temp_buf[21];
3075         CharPtr localbuf=buffer, name, ptr=temp_buf;
3076         CharPtr tmp = "SEG_";
3077         Asn2ffWEPtr awp;
3078         GBEntryPtr      gbp;
3079         
3080         base_locus[0] = '\0'; 
3081         awp = ajp->asn2ffwep;
3082         num_seg = awp->total_seg;
3083         if (num_seg < 10)
3084                 num_of_digits = 1;
3085         else if (num_seg < 100)
3086                 num_of_digits = 2;
3087         else if (num_seg < 1000)
3088                 num_of_digits = 3;
3089         else 
3090         {
3091                 ErrPostStr(SEV_INFO, ERR_SEGMENT_MoreThan1000Segs, "");
3092         }
3093         base_locus_max = MAX_LOCUS_NAME_LEN - num_of_digits;
3094 /* look for base locus in segmented bioseq */
3095         awp = ajp->asn2ffwep;
3096         bbsp = awp->seg; /* segmented Bioseq in segmented set */
3097         if (bbsp) {
3098                 bsip = SeqIdSelect(bbsp->id, fasta_order, NUM_SEQID);
3099         }
3100         if (bsip && (bsip->choice == SEQID_GENBANK || 
3101                                 bsip->choice == SEQID_EMBL ||
3102                                 bsip->choice == SEQID_DDBJ ||
3103                                 bsip->choice == SEQID_SWISSPROT ||
3104                                 bsip->choice == SEQID_PIR || 
3105                                 bsip->choice == SEQID_OTHER || 
3106                                 bsip->choice == SEQID_TPG || 
3107                                 bsip->choice == SEQID_TPE || 
3108                                 bsip->choice == SEQID_TPD)) {
3109                 btsip = (TextSeqIdPtr) bsip->data.ptrvalue;
3110         }
3111         if (btsip && StringLen(btsip->name) > 0) {
3112                 localbuf = StringCpy(localbuf, btsip->name);
3113                 if (StringNCmp(localbuf, tmp, 4) == 0) {
3114                 /* check if name starts with "SEG_", remove if it does. */
3115                     StringCpy(ptr, localbuf+4);
3116                     length = StringLen(ptr);
3117                     ptr[length] = '\0';
3118                     if (ptr[length-1] == '1') {
3119                        bsp = awp->gbp->bsp;
3120                        isip = bsp->id;
3121                        sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3122                        if (sip &&
3123                         (name=((TextSeqIdPtr)sip->data.ptrvalue)->name) != NULL) {
3124                           name_len = StringLen(name);
3125                           if (name_len == length) {
3126                              if (name[length-1] == ptr[length-1])
3127                                for (index=2; index >= num_of_digits; index++) {
3128                                /* The following is *really* '0'! */
3129                                   if (ptr[length-index] == '0') {
3130                                      if (ptr[length-index] == name[length-index]) {
3131                                         StringNCpy(base_locus, ptr, length-index);
3132                                         base_locus[length-index] = '\0';
3133                                      } else {
3134                                         StringNCpy(base_locus, ptr, length-index+1);
3135                                         base_locus[length-index+1] = '\0';
3136                                      }
3137                                   } else {
3138                                      StringNCpy(base_locus, ptr, length-index+1);
3139                                      base_locus[length-index+1] = '\0';
3140                                      break;
3141                                   }
3142                                }
3143                             }
3144                          }
3145                     } 
3146                     /* If nothing else worked, use base locus anyway. */
3147                     if (base_locus[0] == '\0')
3148                         StringCpy(base_locus, ptr);
3149                 }
3150                 if (base_locus[0] == '\0')
3151                         StringCpy(base_locus, btsip->name);
3152 
3153                 /*check for length, truncate if necessary.      */
3154                 base_locus = CheckLocusLength (ajp->error_msgs, base_locus, base_locus_max, num_seg);
3155                 return base_locus;
3156         }
3157 
3158 /* Look for at least one sensible locus in all segments. */
3159         for (gbp = awp->gbp; gbp; gbp=gbp->next) {
3160                 bsp = gbp->bsp;
3161                 isip = bsp->id;
3162                 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3163                 if (sip && (sip->choice == SEQID_GENBANK || 
3164                                         sip->choice == SEQID_EMBL ||
3165                                         sip->choice == SEQID_DDBJ ||
3166                                         sip->choice == SEQID_SWISSPROT ||
3167                                         sip->choice == SEQID_OTHER || 
3168                                         sip->choice == SEQID_PIR || 
3169                                         sip->choice == SEQID_TPG || 
3170                                         sip->choice == SEQID_TPE || 
3171                                         sip->choice == SEQID_TPD)) {
3172                         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3173                 }
3174                 if (tsip && tsip->name && StringLen(tsip->name) > 0) {
3175                         base_locus = StringCpy(base_locus, tsip->name);
3176                         length = StringLen(base_locus);
3177                         base_locus[length-num_of_digits] = '\0';
3178                         base_locus = CheckLocusLength (ajp->error_msgs,
3179                                                                         base_locus, base_locus_max, num_seg);
3180                         return base_locus;
3181                 }
3182         }
3183 
3184 /* No option left but to take the first locus name.*/
3185         bsp = awp->gbp->bsp;
3186         isip = bsp->id;
3187         sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3188         if (sip && sip->choice == SEQID_LOCAL) {
3189                 obj = (ObjectIdPtr) sip->data.ptrvalue;
3190                 if ( obj->str == NULL) {
3191                         sprintf(base_locus, "%ld", (long)(obj->id));
3192                 } else {
3193                         base_locus = StringCpy(base_locus, obj->str);
3194                 }
3195         } else if (sip && (sip->choice == SEQID_GENBANK || 
3196                                 sip->choice == SEQID_EMBL ||
3197                                 sip->choice == SEQID_SWISSPROT ||
3198                                 sip->choice == SEQID_DDBJ ||
3199                                 sip->choice == SEQID_PRF ||
3200                                 sip->choice == SEQID_PDB ||
3201                                 sip->choice == SEQID_OTHER ||
3202                                 sip->choice == SEQID_PIR || 
3203                                 sip->choice == SEQID_TPG || 
3204                                 sip->choice == SEQID_TPE || 
3205                                 sip->choice == SEQID_TPD)) {
3206                 tsip = (TextSeqIdPtr)sip->data.ptrvalue;
3207                 base_locus = StringCpy(base_locus, tsip->name);
3208         }
3209         base_locus = CheckLocusLength (ajp->error_msgs, base_locus, base_locus_max, num_seg);
3210         return base_locus;
3211         
3212 }       /* MakeBaseLocusAwp */  
3213 
3214 static Boolean ValidateVersion(SeqIdPtr sid, Asn2ffJobPtr ajp)
3215 {
3216         TextSeqIdPtr tsip;
3217                 
3218         if (ajp->forgbrel == FALSE)
3219                 return TRUE;
3220         switch (sid->choice) {
3221         case SEQID_GENBANK:
3222         case SEQID_EMBL:
3223         case SEQID_DDBJ:
3224         case SEQID_OTHER:
3225         case SEQID_TPG:
3226         case SEQID_TPE:
3227         case SEQID_TPD:
3228                 tsip = (TextSeqIdPtr) sid->data.ptrvalue;
3229                 if (tsip->version == 0 || tsip->version == INT2_MIN) {
3230                         return FALSE;
3231                 }
3232         }
3233         return TRUE;
3234 }
3235 
3236 NLM_EXTERN void GetLocusPartsAwp (Asn2ffJobPtr ajp)
3237 {
3238         BioseqPtr bsp=NULL;
3239         Asn2ffWEPtr awp;
3240         SeqIdPtr sip, isip;
3241         Int2 num_seg=0, total_segs=0;
3242         TextSeqIdPtr tsip;
3243         Char buf_a[MAX_ACCESSION_LEN+1], buf_l[MAX_ACCESSION_LEN+1],
3244                  base_l[MAX_ACCESSION_LEN+1];
3245         CharPtr buffer, buf_acc=buf_a, buf_locus=buf_l, base_locus=base_l, base_a;
3246         GBEntryPtr gbp;
3247         CharPtr loc;
3248         Int2 acc_len;
3249 
3250         awp = ajp->asn2ffwep;
3251         if (ajp->slp) {
3252                 for (gbp = awp->gbp; gbp; gbp = gbp->next) {
3253                         buffer = GetDivision(ajp, gbp);
3254                         if (buffer[0] != NULLB) {
3255                                 StringNCpy_0(gbp->div, buffer, 4);
3256                                 MemFree(buffer);
3257                         }
3258                         if ((bsp = BioseqFindFromSeqLoc(ajp->slp)) != NULL) {
3259                                 CharPtr flatloc;
3260 
3261                                 isip = SeqIdSelect(gbp->bsp->id, fasta_order, NUM_SEQID);
3262                                 if (isip == NULL)
3263                                         isip = gbp->bsp->id;
3264                                 SeqIdWrite(isip, 
3265                                         buf_acc, PRINTID_TEXTID_ACCESSION, MAX_ACCESSION_LEN);
3266 
3267                                 if (ajp->old_locus_fmt == TRUE)
3268                                   sprintf(gbp->locus, "%-10s", buf_acc);
3269                                 else
3270                                   sprintf(gbp->locus, "%-16s", buf_acc);
3271                                 
3272                                 flatloc =  FlatLoc(bsp, ajp->slp);
3273                                 sprintf(gbp->accession, "%s REGION: %s", buf_acc, flatloc);
3274                                 flatloc = MemFree(flatloc);
3275                                 if (ajp->show_version) {
3276                                         SeqIdWrite(isip,
3277                                         buf_acc, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3278                                         StringNCpy_0(gbp->version,
3279                                                                  buf_acc, MAX_ACCESSION_LEN+1);
3280                                 }
3281                         } else {
3282                                 loc = SeqLocPrint(ajp->slp);
3283                                 StringNCpy_0(gbp->locus,  loc, MAX_LOCUS_NAME_LEN+1); 
3284                                 acc_len = MIN(StringLen(loc), 60);
3285                                 StringNCpy_0(gbp->accession, loc, acc_len+1);
3286                                 MemFree(loc);
3287                         }
3288                 }
3289                 return; 
3290         }
3291         if (ajp->only_one) {
3292                 for (gbp = awp->gbp; gbp; gbp = gbp->next) {
3293                         if (gbp->bsp == NULL) {
3294                                 continue;
3295                         }
3296                         bsp = gbp->bsp;
3297                         GetGINumber(gbp);
3298                         buffer = GetDivision(ajp, gbp);
3299                         if (buffer[0] != NULLB) {
3300                                 StringNCpy_0(gbp->div, buffer, 4);
3301                                 MemFree(buffer);
3302                         }
3303                         isip = SeqIdSelect(gbp->bsp->id, fasta_order, NUM_SEQID);
3304                         if (isip == NULL)
3305                                 isip = gbp->bsp->id;
3306                         SeqIdWrite(isip, buf_acc, 
3307                                         PRINTID_TEXTID_ACCESSION, MAX_ACCESSION_LEN+1);
3308                         StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3309 
3310                         if (ajp->old_locus_fmt == TRUE)
3311                           sprintf(gbp->locus, "%-10s", buf_acc); 
3312                         else
3313                           sprintf(gbp->locus, "%-16s", buf_acc); 
3314 
3315                         if (ajp->show_version) {
3316                                 SeqIdWrite(isip, buf_acc, 
3317                                         PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3318                                 StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+1);
3319                         }
3320                 }
3321                 return;
3322         }
3323         total_segs = awp->total_seg; 
3324         base_a = MakeBaseAccession(awp->seg);
3325         base_locus = MakeBaseLocusAwp(ajp, base_locus);
3326         StringNCpy_0(ajp->asn2ffwep->base_name, base_locus, 11);
3327         
3328         for (gbp = awp->gbp; gbp != NULL; gbp = gbp->next) {
3329                 if (gbp->bsp == NULL) {
3330                         continue;
3331                 }
3332                 bsp = gbp->bsp;
3333                 if ((isip = gbp->bsp->id) == NULL) {
3334                         continue;
3335                 } 
3336                 buffer = GetDivision(ajp, gbp);
3337                 if (buffer[0] != NULLB) {
3338                         StringNCpy_0(gbp->div, buffer, 4);
3339                         MemFree(buffer);
3340                 }
3341                 num_seg = gbp->num_seg; 
3342                 sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3343                 if (sip == NULL) {
3344                         sip = isip;
3345                 }
3346                 switch (sip->choice) {
3347                     case SEQID_GENBANK:
3348                 case SEQID_EMBL:
3349                 case SEQID_DDBJ:
3350                 case SEQID_OTHER:
3351                         case SEQID_TPG:
3352                         case SEQID_TPE:
3353                         case SEQID_TPD:
3354                                 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3355                                 if ((ValidateAccession(buf_acc, tsip->accession)) < 0) {
3356                                         if (base_a != NULL) {
3357                                                 StringNCpy_0(buf_acc, base_a, MAX_ACCESSION_LEN+1);
3358                                         } else {
3359                                                 buf_acc = MakeAnAccession(buf_acc, isip, 
3360                                                                                                         MAX_ACCESSION_LEN+1);
3361                                         }
3362                                 }
3363                                 buf_locus = ValidateLocus(ajp, bsp, base_locus, 
3364                                         total_segs, num_seg, buf_locus, tsip->name, buf_acc); 
3365                                 StringNCpy_0(gbp->accession, 
3366                                         buf_acc, MAX_ACCESSION_LEN+1);
3367                                 if (sip->choice == SEQID_OTHER 
3368                                                 && StringNCmp(tsip->accession, "NT_", 3) == 0) {
3369                                         if (ajp->old_locus_fmt == TRUE)
3370                                           sprintf(gbp->locus, "%-10s", buf_acc);
3371                                         else
3372                                           sprintf(gbp->locus, "%-16s", buf_acc);
3373                                 } else {
3374                                         if (ajp->old_locus_fmt == TRUE)
3375                                           sprintf(gbp->locus, "%-10s", buf_locus);
3376                                         else
3377                                           sprintf(gbp->locus, "%-16s", buf_locus);
3378                                 }
3379                                 num_seg--;
3380                         if (ajp->show_version) {
3381                                 if (ValidateVersion(sip, ajp) == FALSE) {
3382                                         gbp->bsp = NULL;
3383                                         ErrPostEx(SEV_ERROR, ERR_ACCESSION_No_VERSION_Number, "%s", gbp->accession);
3384                                         continue;
3385                                 }
3386                                 SeqIdWrite(sip, buf_acc, 
3387                                         PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+6);
3388                                 StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+6);
3389                         }
3390                                 break;
3391                     case SEQID_LOCAL:
3392                                 if ((((ObjectIdPtr)sip->data.ptrvalue)->str) == NULL) {
3393                                         buf_acc[0] = 'X';
3394                                         sprintf(buf_acc+1, "%ld", 
3395                                                 (long)((ObjectIdPtr)sip->data.ptrvalue)->id);
3396                                 } else {
3397                                         StringNCpy_0(buf_acc,
3398                                  ((ObjectIdPtr)sip->data.ptrvalue)->str, MAX_ACCESSION_LEN+1);
3399                                 }
3400                                 buf_locus = ValidateLocus(ajp, bsp, base_locus, 
3401                                 total_segs, num_seg,buf_locus,  buf_acc, buf_acc); 
3402                                 StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3403 
3404                                 if (ajp->old_locus_fmt == TRUE)
3405                                   sprintf(gbp->locus, "%-10s", buf_locus); 
3406                                 else
3407                                   sprintf(gbp->locus, "%-16s", buf_locus); 
3408 
3409                                 num_seg--;
3410                                 break;
3411                    case SEQID_GI:
3412                         sprintf(buf_acc, "%ld", (long) (sip->data.intvalue));
3413                         buf_locus = ValidateLocus(ajp, bsp, base_locus, 
3414                                         total_segs, num_seg, buf_locus, buf_acc, buf_acc); 
3415                         StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3416 
3417                         if (ajp->old_locus_fmt == TRUE)
3418                           sprintf(gbp->locus, "%-10s", buf_locus); 
3419                         else
3420                           sprintf(gbp->locus, "%-16s", buf_locus); 
3421 
3422                         num_seg--;
3423                         break;
3424 
3425                    case SEQID_PIR:
3426                    case SEQID_SWISSPROT:
3427                         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3428                         if ((ValidateAccession(buf_acc, tsip->accession)) < 0) {
3429                                 if (base_a != NULL) {
3430                                         StringNCpy_0(buf_acc, base_a, MAX_ACCESSION_LEN+1);
3431                                 } else {
3432                                         buf_acc = MakeAnAccession(buf_acc, 
3433                                                 isip, MAX_ACCESSION_LEN);
3434                                 }
3435                         }
3436                         if (ajp->show_version) {
3437                                 SeqIdWrite(sip, buf_acc, 
3438                                         PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+6);
3439                                 StringNCpy_0(gbp->version, buf_acc, MAX_ACCESSION_LEN+6);
3440                         }
3441                         buf_locus = ValidateLocus(ajp, bsp, base_locus, 
3442                                 total_segs, num_seg, buf_locus, tsip->name, buf_acc); 
3443                         StringNCpy_0(gbp->accession, buf_acc, MAX_ACCESSION_LEN+1);
3444                         if (sip->choice == SEQID_OTHER 
3445                                         && StringNCmp(tsip->accession, "NT_", 3) == 0) {
3446                                 if (ajp->old_locus_fmt == TRUE)
3447                                   sprintf(gbp->locus, "%-10s", buf_acc);
3448                                 else
3449                                   sprintf(gbp->locus, "%-16s", buf_acc);
3450                         } else {
3451                                 if (ajp->old_locus_fmt == TRUE)
3452                                   sprintf(gbp->locus, "%-10s", buf_locus);
3453                                 else
3454                                   sprintf(gbp->locus, "%-16s", buf_locus);
3455                         }
3456                         num_seg--;
3457                         
3458                         break;
3459                    default:
3460                         buf_acc = MakeAnAccession(buf_acc, isip, MAX_ACCESSION_LEN+1);
3461                         buf_locus = ValidateLocus(ajp, bsp, base_locus, 
3462                                 total_segs, num_seg, buf_locus, buf_acc, buf_acc); 
3463                         StringNCpy_0(gbp->accession, buf_acc,
3464                                                                                                          MAX_ACCESSION_LEN+1);
3465                         if (ajp->old_locus_fmt == TRUE)
3466                           sprintf(gbp->locus, "%-10s", buf_locus); 
3467                         else
3468                           sprintf(gbp->locus, "%-16s", buf_locus); 
3469 
3470                         num_seg--;
3471                         break;
3472                 }
3473         }
3474         if (base_a != NULL)
3475                 base_a = MemFree(base_a);
3476         
3477 }
3478 /**************************************************************************
3479 *       Looks in the descriptor  and feature->xref for any extra-accessions.
3480 **************************************************************************/
3481 
3482 NLM_EXTERN void AddExtraAccessions(Asn2ffJobPtr ajp, GBEntryPtr gbp)
3483 
3484 {
3485         BioseqPtr bsp;
3486         Char buffer[10];
3487         CharPtr ptr=buffer, ac;
3488         EMBLBlockPtr eb;
3489         GBBlockPtr gb;
3490         Int2 index, status;
3491         SeqFeatPtr sfp;
3492         SeqIdPtr xid;
3493         SeqIntPtr si;
3494         SeqLocPtr xref;
3495         TextSeqIdPtr text;
3496         ValNodePtr extra_access=NULL, location=NULL, vnp;
3497         SortStructPtr p;
3498         Boolean /*UNUSED*/ncbi = FALSE;
3499 
3500         if (gbp == NULL) {
3501                 return;
3502         }
3503         if ((bsp = gbp->bsp) == NULL) {
3504                 return;
3505         }
3506         ac = gbp->accession;
3507         if (ac && *ac == 'U') {
3508                 ncbi = TRUE;
3509         }
3510         for (vnp = bsp->descr; vnp; vnp=vnp->next) {
3511                 if (vnp->choice == Seq_descr_genbank) {
3512                         break;
3513                 }
3514         }
3515         if (vnp != NULL) {
3516                 gb = (GBBlockPtr) vnp->data.ptrvalue;
3517                 extra_access = gb->extra_accessions;
3518                 if (extra_access != NULL) {
3519                         for (vnp=extra_access; vnp != NULL; vnp=vnp->next) {
3520                                 status = ValidateAccession(ptr, vnp->data.ptrvalue);
3521                                 if (status == 0) {
3522                                         if (ajp->format == EMBL_FMT || ajp->format ==
3523                                                  PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3524                                                 ff_AddChar(';');
3525                                         } else {
3526                                                 ff_AddChar(' ');
3527                                         }
3528                                 /*      www_extra_acc(ptr, ncbi); */
3529                                         ff_AddString( ptr);
3530                                 }
3531                         }
3532                 }
3533         }
3534         for (vnp = bsp->descr; vnp; vnp=vnp->next) {
3535                 if (vnp->choice == Seq_descr_embl) {
3536                         break;
3537                 }
3538         }
3539 
3540         if (vnp != NULL) {
3541                 eb = (EMBLBlockPtr) vnp->data.ptrvalue;
3542                 extra_access = eb->extra_acc;
3543                 if (extra_access != NULL) {
3544                         for (vnp=extra_access; vnp != NULL; vnp=vnp->next) {
3545                                 status = ValidateAccession(ptr, vnp->data.ptrvalue);
3546                                 if (status == 0) {
3547                                         if (ajp->format == EMBL_FMT || ajp->format ==
3548                                                  PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3549                                                 ff_AddChar(';');
3550                                         } else {
3551                                                 ff_AddChar(' ');
3552                                         }
3553                                 /*      www_extra_acc(ptr, ncbi); */
3554                                         ff_AddString( ptr);
3555                                 }
3556                         }
3557                 }
3558         }
3559         if (gbp->feat) {
3560                 p = gbp->feat->Xreflist;
3561                 for (index=0; index < gbp->feat->sfpXrefsize; index++, p++) {
3562                         if (location == NULL) {
3563                                 location = ValNodeNew(NULL);
3564                                 si = SeqIntNew();
3565                                 location->choice = SEQLOC_INT;
3566                                 location->data.ptrvalue = si;
3567                         }
3568                         si->from = 0;
3569                         bsp = gbp->bsp;
3570                         si->to = bsp->length - 1;
3571                         si->id = bsp->id;       /* Don't delete id!! */
3572                         if ((sfp = p->sfp) == NULL) {
3573                                 GatherItemWithLock(p->entityID,
3574                                         p->itemID, p->itemtype, &sfp, find_item);
3575                         }
3576                         if (sfp == NULL) {
3577                                 continue;
3578                         }
3579                         if (SeqLocCompare(sfp->location, location) != 0) {
3580                                 xref = (SeqLocPtr) sfp->data.value.ptrvalue;
3581                                 xid = (SeqIdPtr) xref->data.ptrvalue;
3582                                 if (xid->choice == 5 || xid->choice == 6 ||
3583                                         xid->choice == 13) {
3584                                         text = (TextSeqIdPtr) xid->data.ptrvalue;
3585                                         status = ValidateAccession(ptr, text->accession);
3586                                         if (status == 0) {
3587                                                 if (ajp->format == EMBL_FMT || ajp->format ==
3588                                                          PSEUDOEMBL_FMT || ajp->format == EMBLPEPT_FMT) {
3589                                                         ff_AddChar(';');
3590                                                 } else {
3591                                                         ff_AddChar(' ');
3592                                                 }
3593                                         /*      www_extra_acc(ptr, ncbi); */
3594                                                 ff_AddString( ptr);
3595                                         }
3596                                 }
3597                         }
3598                 }
3599         }
3600 
3601         if (location) {
3602                 si->id = NULL;
3603                 SeqIntFree(si);
3604                 ValNodeFree(location);
3605         }
3606 
3607         return;
3608 }static Boolean CompareToAwpList (BioseqPtr bsp, Asn2ffWEPtr    awp)
3609 
3610 {
3611         GBEntryPtr gbp;
3612 
3613         if (bsp == NULL) {
3614                 return FALSE;
3615         }
3616         for (gbp = awp->gbp; gbp != NULL; gbp = gbp->next) {
3617                 if (bsp == gbp->bsp) {
3618                         return TRUE;
3619                 }
3620         }
3621         return FALSE;   
3622 }
3623 
3624 static GBEntryPtr GBEntryNew(void)
3625 {
3626         GBEntryPtr gbp;
3627         
3628         gbp = (GBEntryPtr) MemNew(sizeof(GBEntry));
3629         gbp->feat = NULL;
3630         gbp->descr = NULL;
3631         gbp->source_info = NULL;
3632         gbp->comm = NULL;
3633         gbp->map = FALSE;
3634         
3635         return gbp; 
3636 }
3637 
3638 static GBEntryPtr tie_next_gbp(GBEntryPtr head, GBEntryPtr next)
3639 /*  ties next node to the end of the chain */
3640 {
3641         GBEntryPtr v;
3642 
3643         if (head == NULL) {
3644                 return next;
3645         }
3646         for (v = head; v->next != NULL; v = v->next) {
3647                 v = v;
3648         }
3649         v->next = next;
3650         return head;
3651 }
3652 
3653 static GBEntryPtr CreateGBEntry(Asn2ffWEPtr awp, BioseqPtr bsp, 
3654 Int2 eID, Int2 iID, Int2 itype)
3655 {
3656         GBEntryPtr      gbep;
3657 
3658         gbep = GBEntryNew();
3659         gbep->bsp = bsp;
3660         gbep->length = bsp->length;
3661         gbep->entityID = eID;
3662         gbep->itemID = iID;
3663         gbep->itemtype = itype;
3664         awp->gbp = tie_next_gbp(awp->gbp, gbep);
3665         
3666         return gbep;
3667 }
3668 
3669 /************************************************************************
3670 *       SeqToAwp()
3671 *               gather callback to create a list of GenBank entries
3672 *************************************************************************/
3673 
3674 NLM_EXTERN Boolean SeqToAwp (GatherContextPtr gcp)
3675 
3676 {
3677         BioseqPtr bsp;
3678         SeqEntryPtr ep;
3679         BioseqSetPtr bssp;
3680         SeqLocPtr slp;
3681         Asn2ffWEPtr     awp;
3682         Asn2ffJobPtr ajp;
3683         GBEntryPtr      gbep;
3684         SeqIdPtr isip, sip;
3685         Uint1 format;
3686         Boolean is_www = get_www();
3687 
3688         ajp = (Asn2ffJobPtr) gcp->userdata;
3689         awp = ajp->asn2ffwep;
3690         format = ajp->format;
3691         switch (gcp->thistype)
3692         {
3693                 case OBJ_BIOSEQ:
3694                         bsp = gcp->thisitem;
3695                         if (bsp->repr == Seq_repr_seg) {
3696                                 if (ajp->genome_view || ajp->only_one) {
3697                                         gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3698                                                 gcp->itemID, gcp->thistype);
3699                                                 if (ajp->only_one && !ajp->map_view) {
3700                                                         return FALSE;
3701                                                 }
3702                                 }
3703                                 if (ISA_na(bsp->mol) && (format == GENBANK_FMT ||
3704                                         format == EMBL_FMT || format == PSEUDOEMBL_FMT
3705                                                 || format == GRAPHIK_FMT)) {
3706                                         awp->seg = bsp;
3707                                 } else if (ISA_aa(bsp->mol) && 
3708                                         (format == GENPEPT_FMT || format == EMBLPEPT_FMT
3709                                                 || format == GRAPHIK_FMT)) {
3710                                         awp->seg = bsp;
3711                                 }
3712                         }
3713                         if (ASN2FF_LOOK_FOR_SEQ == FALSE) {
3714                                 if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT
3715                                         || (ISA_aa(bsp->mol) && format == GRAPHIK_FMT)) {
3716                                         if (ISA_aa(bsp->mol) && (bsp->repr == Seq_repr_raw 
3717                         || bsp->repr == Seq_repr_const || bsp->repr == Seq_repr_delta 
3718                         ||      ((is_www || ajp->mode != RELEASE_MODE) && bsp->repr == Seq_repr_virtual))) {
3719                                                 gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3720                                                         gcp->itemID, gcp->thistype);
3721                                                 ++awp->total_seg;
3722                                                 gbep->num_seg = awp->total_seg;
3723                                         }
3724                                 } else {
3725                                         if (ISA_na(bsp->mol) && (bsp->repr == Seq_repr_raw 
3726                                 || bsp->repr == Seq_repr_const|| bsp->repr == Seq_repr_delta
3727                                 ||      (is_www && bsp->repr == Seq_repr_virtual))) {
3728                                                 if (ASN2FF_LOCAL_ID == FALSE) {
3729                                                         sip = SeqIdSelect(bsp->id, fasta_order, NUM_SEQID);
3730                                                         if (sip && sip->choice != SEQID_LOCAL) {
3731                                                                 gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3732                                                                         gcp->itemID, gcp->thistype);
3733                                                                 ++awp->total_seg;
3734                                                                 gbep->num_seg = awp->total_seg;
3735                                                         }
3736                                                 } else {
3737                                                         gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3738                                                                 gcp->itemID, gcp->thistype);
3739                                                         ++awp->total_seg;
3740                                                         gbep->num_seg = awp->total_seg;
3741                                                 }
3742                                         } else if (ISA_na(bsp->mol) && bsp->repr == Seq_repr_map &&
3743                                                         ajp->map_view) {
3744                                                         gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3745                                                                 gcp->itemID, gcp->thistype);
3746                                                         gbep->map = TRUE;
3747                                         }
3748                                 }
3749                         } else {
3750                                 if (bsp->seq_ext_type == 1) {
3751                                         slp = bsp->seq_ext;
3752                                         while (slp) {
3753                                                 bsp = BioseqFind(SeqLocId(slp));
3754                                                 if (bsp->repr == Seq_repr_raw || 
3755                                                         bsp->repr == Seq_repr_const 
3756                                                         || bsp->repr == Seq_repr_delta 
3757                                                         || (is_www && bsp->repr == Seq_repr_virtual)) {
3758                                                         if (CompareToAwpList(bsp, awp) == FALSE) {
3759                                                                 if (ASN2FF_LOCAL_ID == FALSE) {
3760                                                                         isip = bsp->id;
3761                                                                         sip = SeqIdSelect(isip, 
3762                                                                                 fasta_order, NUM_SEQID);
3763                                                                         if (sip && sip->choice != SEQID_LOCAL) {
3764                                                                                 gbep = CreateGBEntry(awp, bsp, 
3765                                                                                         gcp->entityID, gcp->itemID, 
3766                                                                                                 gcp->thistype);
3767                                                                                 ++awp->total_seg;
3768                                                                                 gbep->num_seg = awp->total_seg;
3769                                                                         } else if (sip->choice == SEQID_LOCAL && 
3770                                                                                 (format == GENPEPT_FMT || 
3771                                                                                                 format == EMBLPEPT_FMT)) {
3772                                                                                 gbep = CreateGBEntry(awp, bsp, 
3773                                                                                         gcp->entityID, gcp->itemID, 
3774                                                                                                         gcp->thistype);
3775                                                                                 ++awp->total_seg;
3776                                                                                 gbep->num_seg = awp->total_seg;
3777                                                                         } else {
3778                                                                                 gbep = CreateGBEntry(awp, bsp, 
3779                                                                                         gcp->entityID, gcp->itemID, 
3780                                                                                                 gcp->thistype);
3781                                                                                 ++awp->total_seg;
3782                                                                                 gbep->num_seg = awp->total_seg;
3783                                                                 }
3784                                                                 }
3785                                                         }
3786                                                 }
3787                                                 slp = slp->next;
3788                                         }
3789                                 } else if (ISA_na(bsp->mol) && (bsp->repr == Seq_repr_raw || 
3790                                                 bsp->repr == Seq_repr_const 
3791                                                         || bsp->repr == Seq_repr_delta 
3792                                                         || (is_www && bsp->repr == Seq_repr_virtual))) {
3793                                                 if (CompareToAwpList(bsp, awp) == FALSE) {
3794                                                 if (ASN2FF_LOCAL_ID == FALSE) {
3795                                                         isip = bsp->id;
3796                                                         sip = SeqIdSelect(isip, fasta_order, NUM_SEQID);
3797                                                         if (sip && sip->choice != SEQID_LOCAL) {
3798                                                                 gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3799                                                                                         gcp->itemID, gcp->thistype);
3800                                                                 ++awp->total_seg;
3801                                                                 gbep->num_seg = awp->total_seg;
3802                                                         } else if (sip->choice == SEQID_LOCAL && 
3803                                                                         (format == GENPEPT_FMT || 
3804                                                                                                 format == EMBLPEPT_FMT)) {
3805                                                                 gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3806                                                                                         gcp->itemID, gcp->thistype);
3807                                                                 ++awp->total_seg;
3808                                                                 gbep->num_seg = awp->total_seg;
3809                                                         } else {
3810                                                                 gbep = CreateGBEntry(awp, bsp, gcp->entityID, 
3811                                                                                         gcp->itemID, gcp->thistype);
3812                                                                 ++awp->total_seg;
3813                                                                 gbep->num_seg = awp->total_seg;
3814                                                         }
3815                                                 }
3816                                         }
3817                                 }
3818                         }
3819                         break;
3820                 case OBJ_BIOSEQSET:
3821                         bssp = (BioseqSetPtr) gcp->thisitem;
3822                         if (bssp->_class == 4) {/*parts*/
3823                                 ep = bssp->seq_set;
3824                                 if (ep != NULL) {
3825                                         bsp = ep->data.ptrvalue;
3826                                         if (ISA_na(bsp->mol) && (format == GENBANK_FMT ||
3827                                                 format == EMBL_FMT || format == PSEUDOEMBL_FMT)) {
3828                                                 awp->parts = bssp;
3829                                         } else if (ISA_aa(bsp->mol) && 
3830                                                 (format == GENPEPT_FMT || format == EMBLPEPT_FMT)) {
3831                                                 awp->parts = bssp;
3832                                         }
3833                                 }
3834                         }
3835                         break;
3836                 default:
3837                         break;
3838                         
3839         }
3840         return TRUE;
3841 
3842 }
3843 
3844 
3845 
3846 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.