NCBI C Toolkit Cross Reference

C/api/asn2gnb2.c


  1 /*   asn2gnb2.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2gnb2.c
 27 *
 28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
 29 *          Mati Shomrat
 30 *
 31 * Version Creation Date:   10/21/98
 32 *
 33 * $Revision: 1.131 $
 34 *
 35 * File Description:  New GenBank flatfile generator - work in progress
 36 *
 37 * Modifications:
 38 * --------------------------------------------------------------------------
 39 * ==========================================================================
 40 */
 41 
 42 #include <ncbi.h>
 43 #include <objall.h>
 44 #include <objsset.h>
 45 #include <objsub.h>
 46 #include <objfdef.h>
 47 #include <objpubme.h>
 48 #include <seqport.h>
 49 #include <sequtil.h>
 50 #include <sqnutils.h>
 51 #include <subutil.h>
 52 #include <tofasta.h>
 53 #include <explore.h>
 54 #include <gbfeat.h>
 55 #include <gbftdef.h>
 56 #include <edutil.h>
 57 #include <alignmgr2.h>
 58 #include <asn2gnbi.h>
 59 
 60 #ifdef WIN_MAC
 61 #if __profile__
 62 #include <Profiler.h>
 63 #endif
 64 #endif
 65 
 66 static CharPtr link_projid = "http://www.ncbi.nlm.nih.gov/sites/entrez?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=";
 67 
 68 static CharPtr link_wgs = "http://www.ncbi.nlm.nih.gov/sites/entrez?";
 69 
 70 static CharPtr link_sp = "http://www.uniprot.org/uniprot/";
 71 
 72 /*
 73 static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
 74 static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
 75 */
 76 
 77 static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
 78 static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
 79 
 80 /*
 81 static CharPtr link_omim = "http://www.ncbi.nlm.nih.gov/entrez/dispomim.cgi?id=";
 82 */
 83 
 84 
 85 
 86 /* ********************************************************************** */
 87 
 88 /* add functions allocate specific blocks, populate with paragraph print info */
 89 
 90 static CharPtr strd [4] = {
 91   "   ", "ss-", "ds-", "ms-"
 92 };
 93 
 94 static CharPtr gnbk_mol [16] = {
 95   "    ", "DNA ", "RNA ", "mRNA", "rRNA", "tRNA", "snRNA", "scRNA",
 96   " AA ", "DNA ", "DNA ", "cRNA ", "snoRNA", "RNA ", "RNA ", "tmRNA "
 97 };
 98 
 99 /* EMBL_FMT in RELEASE_MODE or ENTREZ_MODE, otherwise use gnbk_mol */
100 
101 static CharPtr embl_mol [16] = {
102   "xxx", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA", "RNA",
103   "AA ", "DNA", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA"
104 };
105 
106 static CharPtr embl_divs [18] = {
107   "FUN", "INV", "MAM", "ORG", "PHG", "PLN", "PRI", "PRO", "ROD"
108   "SYN", "UNA", "VRL", "VRT", "PAT", "EST", "STS", "HUM", "HTC"
109 };
110 
111 static Uint1 imolToMoltype [16] = {
112   0, 1, 2, 5, 4, 3, 6, 7, 9, 1, 1, 2, 8, 2, 10, 11
113 };
114 
115 static CharPtr gbseq_strd [4] = {
116   NULL, "single", "double", "mixed"
117 };
118 
119 static CharPtr gbseq_mol [16] = {
120   "?", "DNA", "RNA", "mRNA", "rRNA", "tRNA", "snRNA", "scRNA",
121   "AA", "DNA", "DNA", "cRNA", "snoRNA", "RNA", "ncRNA", "tmRNA "
122 };
123 
124 static CharPtr gbseq_top [3] = {
125   NULL, "linear", "circular"
126 };
127 
128 static DatePtr GetBestDate (
129   DatePtr a,
130   DatePtr b
131 )
132 
133 {
134   Int2  status;
135 
136   if (a == NULL) return b;
137   if (b == NULL) return a;
138 
139   status = DateMatch (a, b, FALSE);
140   if (status == 1) return a;
141 
142   return b;
143 }
144 
145 /*--------------------------------------------------------*/
146 /*                                                        */
147 /*  s_IsSeperatorNeeded()                                 */
148 /*                                                        */
149 /*--------------------------------------------------------*/
150 
151 static Boolean s_IsSeperatorNeeded(CharPtr baseString, Int4 baseLength, Int2 suffixLength)
152 {
153   Char lastChar;
154   Char nextToLastChar;
155 
156   lastChar = baseString[baseLength - 1];
157   nextToLastChar = baseString[baseLength - 2];
158 
159   /* This first check put here to emulate what may be a  */
160   /* bug in the original code (in CheckLocusLength() )   */
161   /* which adds an 'S' segment seperator only if it      */
162   /* DOES make the string longer than the max.           */
163 
164   if (baseLength + suffixLength < 16)
165     return FALSE;
166 
167   /* If the last character is not a digit */
168   /* then don't use a seperator.          */
169 
170   if (!IS_DIGIT(lastChar))
171     return FALSE;
172 
173   /* If the last two characters are a non-digit   */
174   /* followed by a '0', then don't use seperator. */
175 
176   if ((lastChar == '0') && (!IS_DIGIT(nextToLastChar)))
177     return FALSE;
178 
179   /* If we made it to here, use a seperator */
180 
181   return TRUE;
182 }
183 
184 /*--------------------------------------------------------*/
185 /*                                                        */
186 /*  s_LocusAddSuffix() -                                  */
187 /*                                                        */
188 /*--------------------------------------------------------*/
189 
190 static Boolean s_LocusAddSuffix (CharPtr locus, Asn2gbWorkPtr awp)
191 {
192   size_t  buflen;
193   Char    ch;
194   Char    segCountStr[6];
195   Int2    segCountStrLen;
196   Char    segSuffix[5];
197 
198   buflen = StringLen (locus);
199 
200   /* If there's one or less segments, */
201   /* no suffix is needed.             */
202 
203   if (awp->numsegs <= 1)
204     return FALSE;
205 
206   /* If the basestring has one or less */
207   /* characters, no suffix is needed.  */
208 
209   if (buflen <=1)
210     return FALSE;
211 
212   /* Add the suffix */
213 
214   ch = locus[buflen-1];
215   sprintf(segCountStr,"%d",awp->numsegs);
216   segCountStrLen = StringLen(segCountStr);
217   segSuffix[0] = '\0';
218 
219   if (s_IsSeperatorNeeded(locus,buflen,segCountStrLen) == TRUE)
220     sprintf(segSuffix,"S%0*d",segCountStrLen,awp->seg);
221   else
222     sprintf(segSuffix,"%0*d",segCountStrLen,awp->seg);
223   StringCat(locus,segSuffix);
224 
225   /* Return successfully */
226 
227   return TRUE;
228 }
229 
230 /*--------------------------------------------------------*/
231 /*                                                        */
232 /*  s_LocusAdjustLength() -                               */
233 /*                                                        */
234 /*--------------------------------------------------------*/
235 
236 static Boolean s_LocusAdjustLength(CharPtr locus, Int2 maxLength)
237 {
238   Int2     trimCount;
239   Int2     buflen;
240   CharPtr  buftmp;
241 
242   buflen = StringLen (locus);
243   if (buflen <= maxLength) return FALSE;
244 
245   buftmp = MemNew(maxLength + 1);
246 
247   /* If the sequence id is an NCBI locus of the */
248   /* form HSU00001, then make sure that if      */
249   /* there is trimming the HS gets trimmed off  */
250   /* as a unit, never just the 'H'.             */
251 
252   trimCount = buflen - maxLength;
253   if (trimCount == 1)
254     if (IS_ALPHA(locus[0]) != 0 &&
255         IS_ALPHA(locus[1]) != 0 &&
256         IS_ALPHA(locus[2]) != 0 &&
257         IS_DIGIT(locus[3]) != 0 &&
258         IS_DIGIT(locus[4]) != 0 &&
259         IS_DIGIT(locus[5]) != 0 &&
260         IS_DIGIT(locus[6]) != 0 &&
261         IS_DIGIT(locus[7]) != 0 &&
262         locus[8] == 'S' &&
263         locus[9] == '\0')
264       trimCount++;
265 
266   /* Left truncate the sequence id */
267 
268   StringCpy(buftmp, &locus[trimCount]);
269   StringCpy(locus, buftmp);
270 
271   MemFree(buftmp);
272   return TRUE;
273 }
274 
275 /*--------------------------------------------------------*/
276 /*                                                        */
277 /*  AddLocusBlock() -                                     */
278 /*                                                        */
279 /*--------------------------------------------------------*/
280 
281 static DatePtr GetBestDateForBsp (
282   BioseqPtr bsp
283 )
284 
285 {
286   DatePtr            best_date = NULL;
287   SeqMgrDescContext  dcontext;
288   DatePtr            dp;
289   EMBLBlockPtr       ebp;
290   GBBlockPtr         gbp;
291   PdbBlockPtr        pdp;
292   PdbRepPtr          prp;
293   SeqDescrPtr        sdp;
294   SPBlockPtr         spp;
295 
296   if (bsp == NULL) return NULL;
297 
298   dp = NULL;
299   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
300   if (sdp != NULL) {
301     dp = (DatePtr) sdp->data.ptrvalue;
302     best_date = GetBestDate (dp, best_date);
303   }
304 
305   /* !!! temporarily also look at genbank block entry date !!! */
306 
307   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
308   if (sdp != NULL) {
309     gbp = (GBBlockPtr) sdp->data.ptrvalue;
310     if (gbp != NULL) {
311       dp = gbp->entry_date;
312       best_date = GetBestDate (dp, best_date);
313     }
314   }
315 
316   /* more complicated code for dates from various objects goes here */
317 
318   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
319   if (sdp != NULL) {
320     ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
321     if (ebp != NULL) {
322       dp = ebp->creation_date;
323       best_date = GetBestDate (dp, best_date);
324       dp = ebp->update_date;
325       best_date = GetBestDate (dp, best_date);
326     }
327   }
328 
329   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
330   if (sdp != NULL) {
331     spp = (SPBlockPtr) sdp->data.ptrvalue;
332     if (spp != NULL) {
333       dp = spp->created;
334       if (dp != NULL && dp->data [0] == 1) {
335         best_date = GetBestDate (dp, best_date);
336       }
337       dp = spp->sequpd;
338       if (dp != NULL && dp->data [0] == 1) {
339         best_date = GetBestDate (dp, best_date);
340       }
341       dp = spp->annotupd;
342       if (dp != NULL && dp->data [0] == 1) {
343         best_date = GetBestDate (dp, best_date);
344       }
345     }
346   }
347 
348   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
349   if (sdp != NULL) {
350     pdp = (PdbBlockPtr) sdp->data.ptrvalue;
351     if (pdp != NULL) {
352       dp = pdp->deposition;
353       if (dp != NULL && dp->data [0] == 1) {
354         best_date = GetBestDate (dp, best_date);
355       }
356       prp = pdp->replace;
357       if (prp != NULL) {
358         dp = prp->date;
359         if (dp != NULL && dp->data[0] == 1) {
360           best_date = GetBestDate (dp, best_date);
361         }
362       }
363     }
364   }
365 
366   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
367   if (sdp != NULL) {
368     dp = (DatePtr) sdp->data.ptrvalue;
369     if (dp != NULL) {
370       best_date = GetBestDate (dp, best_date);
371     }
372   }
373 
374   return best_date;
375 }
376 
377 static Boolean LocusHasBadChars (
378   CharPtr locus
379 )
380 
381 {
382   Char     ch;
383   CharPtr  ptr;
384 
385   ptr = locus;
386   ch = *ptr;
387   while (ch != '\0') {
388     if (! IS_ALPHANUM (ch)) {
389       return TRUE;
390     }
391     ptr++;
392     ch = *ptr;
393   }
394   return FALSE;
395 }
396 
397 static void LookupAccnForNavLink (
398   Int4 gi,
399   CharPtr seqid,
400   size_t len,
401   CharPtr dfault
402 )
403 
404 {
405   SeqIdPtr  sip;
406 
407   if (seqid == NULL) return;
408   *seqid = '\0';
409   if (gi > 0) {
410     if (GetAccnVerFromServer (gi, seqid)) return;
411     sip = GetSeqIdForGI (gi);
412     if (sip != NULL) {
413       if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, len) != NULL) {
414         SeqIdFree (sip);
415         return;
416       }
417       SeqIdFree (sip);
418     }
419   }
420   if (dfault == NULL) return;
421   StringCpy (seqid, dfault);
422 }
423 
424 NLM_EXTERN void AddLocusBlock (
425   Asn2gbWorkPtr awp,
426   Boolean willshowwgs,
427   Boolean willshowcage,
428   Boolean willshowgenome,
429   Boolean willshowcontig,
430   Boolean willshowsequence
431 )
432 
433 {
434   size_t             acclen;
435   IntAsn2gbJobPtr    ajp;
436   Asn2gbSectPtr      asp;
437   BaseBlockPtr       bbp;
438   DatePtr            best_date = NULL;
439   BioSourcePtr       biop;
440   Int2               bmol = 0;
441   BioseqPtr          bsp;
442   Char               buf [1024];
443   Boolean            cagemaster = FALSE;
444   SeqFeatPtr         cds;
445   Int4               currGi;
446   Char               dataclass [10];
447   Char               date [40];
448   SeqMgrDescContext  dcontext;
449   Char               div [10];
450   BioseqPtr          dna;
451   DatePtr            dp;
452   CharPtr            ebmol;
453   EMBLBlockPtr       ebp;
454   Char               embldiv [10];
455   SeqMgrFeatContext  fcontext;
456   StringItemPtr      ffstring;
457   GBBlockPtr         gbp;
458   Char               gene [32];
459   Boolean            genome_view;
460   GBSeqPtr           gbseq;
461   ValNodePtr         gilistpos;
462   SeqIdPtr           gpp = NULL;
463   Boolean            has_next_pref_ul = FALSE;
464   Boolean            hasComment;
465   Char               id [41];
466   Int2               imol = 0;
467   IndxPtr            index;
468   Int2               istrand;
469   Boolean            is_nm = FALSE;
470   Boolean            is_np = FALSE;
471   Boolean            is_nz = FALSE;
472   Boolean            is_env_sample = FALSE;
473   Boolean            is_transgenic = FALSE;
474   Boolean            is_tpa = FALSE;
475   Char               len [32];
476   Int4               length;
477   size_t             loclen;
478   Char               locus [41];
479   MolInfoPtr         mip;
480   Char               mol [64];
481   Int4               nextGi;
482   BioseqPtr          nm = NULL;
483   BioseqPtr          nuc;
484   ObjectIdPtr        oip;
485   OrgNamePtr         onp;
486   Uint1              origin;
487   OrgRefPtr          orp;
488   BioseqPtr          parent;
489   Int4               prevGi;
490   SeqDescrPtr        sdp;
491   Char               sect [128];
492   Char               seg [32];
493   Char               seqid [128];
494   SeqFeatPtr         sfp;
495   SeqHistPtr         hist;
496   SeqIdPtr           sip;
497   SubSourcePtr       ssp;
498   CharPtr            str;
499   CharPtr            suffix = NULL;
500   Uint1              tech;
501   Uint1              topology;
502   TextSeqIdPtr       tsip;
503   UserObjectPtr      uop;
504   Char               ver [16];
505   Int2               version;
506   ValNodePtr         vnp;
507   Boolean            wgsmaster = FALSE;
508   Int2               moltype, strandedness, topol;
509   /*
510   Int4               gi = 0;
511   Char               gi_buf [16];
512   Boolean            is_aa;
513   CharPtr            prefix = NULL;
514   */
515 
516   if (awp == NULL) return;
517   ajp = awp->ajp;
518   if (ajp == NULL) return;
519   bsp = awp->bsp;
520   if (bsp == NULL) return;
521   asp = awp->asp;
522   if (asp == NULL) return;
523 
524   bbp = Asn2gbAddBlock (awp, LOCUS_BLOCK, sizeof (BaseBlock));
525   if (bbp == NULL) return;
526 
527   ffstring = FFGetString(ajp);
528   if ( ffstring == NULL ) return;
529 
530   mol [0] = '\0';
531   len [0] = '\0';
532   div [0] = '\0';
533   embldiv [0] = '\0';
534   dataclass [0] = '\0';
535   date [0] = '\0';
536   gene [0] = '\0';
537 
538   genome_view = FALSE;
539   if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
540     genome_view = TRUE;
541     
542   }
543   if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
544     genome_view = TRUE;
545   }
546 
547   /* locus id */
548 
549   sip = NULL;
550   version = 0;
551   for (sip = bsp->id; sip != NULL; sip = sip->next) {
552     if (sip->choice == SEQID_OTHER) {
553       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
554       if (tsip != NULL) {
555         version = tsip->version;
556         if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
557             StringNCmp (tsip->accession, "NR_", 3) == 0 ||
558             StringNCmp (tsip->accession, "XM_", 3) == 0 ||
559             StringNCmp (tsip->accession, "XR_", 3) == 0) {
560           is_nm = TRUE;
561           nm = bsp;
562         } else if (StringNCmp (tsip->accession, "NP_", 3) == 0  ||
563                    StringNCmp (tsip->accession, "XP_", 3) == 0) {
564           is_np = TRUE;
565         } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) {
566           is_nz = TRUE;
567         }
568       }
569       break;
570     }
571     if (sip->choice == SEQID_GENBANK ||
572         sip->choice == SEQID_EMBL ||
573         sip->choice == SEQID_DDBJ) {
574       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
575       if (tsip != NULL) {
576         version = tsip->version;
577       }
578       break;
579     }
580     if (sip->choice == SEQID_TPG ||
581         sip->choice == SEQID_TPE ||
582         sip->choice == SEQID_TPD) {
583       is_tpa = TRUE;
584       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
585       if (tsip != NULL) {
586         version = tsip->version;
587       }
588       break;
589     }
590     if (sip->choice == SEQID_PIR ||
591         sip->choice == SEQID_SWISSPROT ||
592         sip->choice == SEQID_PRF ||
593         sip->choice == SEQID_PDB) break;
594     if (sip->choice == SEQID_GPIPE) {
595       gpp = sip;
596     }
597   }
598   if (sip == NULL) {
599     sip = gpp;
600   }
601   if (sip == NULL) {
602     sip = SeqIdFindBest (bsp->id, SEQID_GENBANK);
603   }
604   sprintf (ver, "%d", (int) version);
605 
606   if (genome_view) {
607     SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
608   } else {
609     SeqIdWrite (sip, locus, PRINTID_TEXTID_LOCUS, sizeof (locus) - 1);
610     if (LocusHasBadChars (locus)) {
611       SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
612     }
613   }
614 
615   if (is_np) {
616     sfp = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
617     if (sfp != NULL && fcontext.bsp != NULL) {
618       nm = fcontext.bsp;
619       for (sip = nm->id; sip != NULL; sip = sip->next) {
620         if (sip->choice == SEQID_OTHER) {
621           tsip = (TextSeqIdPtr) sip->data.ptrvalue;
622           if (tsip != NULL) {
623             if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
624                 StringNCmp (tsip->accession, "XM_", 3) == 0) {
625               is_nm = TRUE;
626             }
627           }
628         }
629       }
630       if (! is_nm) {
631         nm = NULL;
632       }
633     }
634   }
635   if (nm != NULL) {
636     /*
637     sfp = SeqMgrGetNextFeature (nm, NULL, SEQFEAT_GENE, 0, &fcontext);
638     if (sfp != NULL) {
639       StringNCpy_0 (gene, fcontext.label, sizeof (gene));
640       if (SeqMgrGetNextFeature (nm, sfp, SEQFEAT_GENE, 0, &fcontext) != NULL) {
641         gene [0] = '\0';
642       }
643       if (StringLen (gene) > 15) {
644         gene [0] = '\0';
645       }
646     }
647     */
648   }
649 
650   /* more complicated code to get parent locus, if segmented, goes here */
651 
652   if (awp->slp != NULL) {
653     length = SeqLocLen (awp->slp);
654   } else {
655     length = bsp->length;
656   }
657 
658   mip = NULL;
659   tech = MI_TECH_standard;
660   origin = 0;
661   bmol = bsp->mol;
662   if (bmol > Seq_mol_aa) {
663     bmol = 0;
664   }
665   istrand = bsp->strand;
666   if (istrand > 3) {
667     istrand = 0;
668   }
669 
670   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
671   if (sdp != NULL) {
672     bbp->entityID = dcontext.entityID;
673     bbp->itemID = dcontext.itemID;
674     bbp->itemtype = OBJ_SEQDESC;
675 
676     mip = (MolInfoPtr) sdp->data.ptrvalue;
677     if (mip != NULL) {
678       if (mip->biomol <= MOLECULE_TYPE_TMRNA) {
679         imol = (Int2) mip->biomol;
680       }
681       tech = mip->tech;
682 
683       if (tech == MI_TECH_wgs && bsp->repr == Seq_repr_virtual) {
684 
685         /* check for WGS master record */
686 
687         for (sip = bsp->id; sip != NULL; sip = sip->next) {
688           switch (sip->choice) {
689             case SEQID_GENBANK :
690             case SEQID_EMBL :
691             case SEQID_DDBJ :
692               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
693               if (tsip != NULL && tsip->accession != NULL) {
694                 acclen = StringLen (tsip->accession);
695                 if (acclen == 12) {
696                   if (StringCmp (tsip->accession + 6, "000000") == 0) {
697                     wgsmaster = TRUE;
698                   }
699                 } else if (acclen == 13) {
700                   if (StringCmp (tsip->accession + 6, "0000000") == 0) {
701                     wgsmaster = TRUE;
702                   }
703                 }
704               }
705               break;
706             case SEQID_OTHER :
707               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
708               if (tsip != NULL && tsip->accession != NULL) {
709                 if (StringLen (tsip->accession) == 15) {
710                   if (StringCmp (tsip->accession + 9, "000000") == 0) {
711                     wgsmaster = TRUE;
712                   }
713                 }
714               }
715               break;
716             default :
717               break;
718           }
719         }
720       }
721 
722       if (tech == MI_TECH_other && willshowcage && bsp->repr == Seq_repr_virtual) {
723 
724         /* check for TAG master record */
725 
726         for (sip = bsp->id; sip != NULL; sip = sip->next) {
727           switch (sip->choice) {
728             case SEQID_GENBANK :
729             case SEQID_EMBL :
730             case SEQID_DDBJ :
731               tsip = (TextSeqIdPtr) sip->data.ptrvalue;
732               if (tsip != NULL && tsip->accession != NULL) {
733                 acclen = StringLen (tsip->accession);
734                 if (acclen == 12) {
735                   if (StringCmp (tsip->accession + 5, "0000000") == 0) {
736                     cagemaster = TRUE;
737                   }
738                 }
739               }
740               break;
741             default :
742               break;
743           }
744         }
745       }
746     }
747   }
748 
749   /* check inst.mol if mol-type is not-set or genomic */
750 
751   if (imol <= MOLECULE_TYPE_GENOMIC) {
752     if (bmol == Seq_mol_aa) {
753       imol = MOLECULE_TYPE_PEPTIDE;
754     } else if (bmol == Seq_mol_na) {
755       imol = 0;
756     } else if (bmol == Seq_mol_rna) {
757       imol = 2;
758     } else {
759       imol = 1;
760     }
761   } else if (imol == MOLECULE_TYPE_OTHER_GENETIC_MATERIAL) {
762     if (bmol == Seq_mol_rna) {
763       imol = 2;
764     }
765   }
766 
767   /* if ds-DNA don't show ds */
768 
769   if (bmol == Seq_mol_dna && istrand == 2) {
770     istrand = 0;
771   }
772 
773   /* ss=any RNA don't show ss */
774 
775   if ((bmol > Seq_mol_rna ||
776       (imol >= MOLECULE_TYPE_MRNA && imol <= MOLECULE_TYPE_PEPTIDE) ||
777       (imol >= MOLECULE_TYPE_CRNA && imol <= MOLECULE_TYPE_TMRNA)) &&
778       istrand == 1) {
779     istrand = 0;
780   }
781 
782   topology = bsp->topology;
783   if (awp->slp != NULL) {
784     topology = TOPOLOGY_LINEAR;
785   }
786 
787   /* length, topology, and molecule type */
788 
789   if (awp->format == GENBANK_FMT) {
790 
791     if (awp->newLocusLine) {
792 
793       if (wgsmaster && (! is_nz)) {
794         sprintf (len, "%ld rc", (long) length);
795       } else if (cagemaster) {
796         sprintf (len, "%ld rc", (long) length);
797       } else {
798         sprintf (len, "%ld bp", (long) length);
799       }
800       sprintf (mol, "%s%-4s", strd [istrand], gnbk_mol [imol]);
801 
802     } else {
803 
804       if (topology == TOPOLOGY_CIRCULAR) {
805         sprintf (len, "%7ld bp", (long) length);
806         sprintf (mol, "%s%-4s  circular", strd [istrand], gnbk_mol [imol]);
807       } else {
808         sprintf (len, "%7ld bp", (long) length);
809         sprintf (mol, "%s%-4s          ", strd [istrand], gnbk_mol [imol]);
810       }
811     }
812 
813   } else if (awp->format == GENPEPT_FMT) {
814 
815     if (awp->newLocusLine) {
816       sprintf (len, "%ld aa", (long) length);
817     } else {
818       sprintf (len, "%7ld aa", (long) length);
819     }
820 
821   } else if (awp->format == EMBL_FMT) {
822 
823     if (imol < MOLECULE_TYPE_PEPTIDE) {
824       if (ajp->flags.useEmblMolType) {
825         ebmol = embl_mol [imol];
826       } else {
827         ebmol = gnbk_mol [imol];
828       }
829 
830       if (topology == TOPOLOGY_CIRCULAR) {
831         sprintf (mol, "circular %s", ebmol);
832         sprintf (len, "%ld BP.", (long) length);
833       } else {
834         sprintf (mol, "%s", ebmol);
835         sprintf (len, "%ld BP.", (long) length);
836       }
837     }
838   }
839 
840   /* division */
841 
842   biop = NULL;
843   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
844   if (sdp != NULL) {
845     biop = (BioSourcePtr) sdp->data.ptrvalue;
846   } else {
847     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
848     if (sfp != NULL) {
849       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
850     } else if (ISA_aa (bsp->mol)) {
851 
852       /* if protein with no sources, get sources applicable to DNA location of CDS */
853 
854       cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
855       if (cds != NULL) {
856         sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
857         if (sfp != NULL) {
858           biop = (BioSourcePtr) sfp->data.value.ptrvalue;
859         } else {
860           dna = BioseqFindFromSeqLoc (cds->location);
861           if (dna != NULL) {
862             sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
863             if (sdp != NULL) {
864               biop = (BioSourcePtr) sdp->data.ptrvalue;
865             }
866           }
867         }
868       }
869     }
870   }
871   if (biop != NULL) {
872     origin = biop->origin;
873     orp = biop->org;
874     if (orp != NULL) {
875       onp = orp->orgname;
876       if (onp != NULL) {
877         StringNCpy_0 (div, onp->div, sizeof (div));
878         StringNCpy_0 (embldiv, onp->div, sizeof (embldiv));
879       }
880     }
881     for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
882       if (ssp->subtype == SUBSRC_transgenic) {
883         is_transgenic = TRUE;
884       } else if (ssp->subtype == SUBSRC_environmental_sample) {
885         is_env_sample = TRUE;
886       }
887     }
888   }
889 
890   StringCpy (dataclass, "STD");
891   if (is_tpa) {
892     StringCpy (dataclass, "TPA");
893   }
894 
895   switch (tech) {
896     case MI_TECH_est :
897       StringCpy (div, "EST");
898       StringCpy (dataclass, "EST");
899       break;
900     case MI_TECH_sts :
901       StringCpy (div, "STS");
902       StringCpy (dataclass, "STS");
903       break;
904     case MI_TECH_survey :
905       StringCpy (div, "GSS");
906       StringCpy (dataclass, "GSS");
907       break;
908     case MI_TECH_htgs_0 :
909     case MI_TECH_htgs_1 :
910     case MI_TECH_htgs_2 :
911       StringCpy (div, "HTG");
912       StringCpy (dataclass, "HTG");
913       break;
914     case MI_TECH_htc :
915       StringCpy (div, "HTC");
916       StringCpy (dataclass, "HTC");
917       break;
918     case MI_TECH_tsa :
919       StringCpy (div, "TSA");
920       StringCpy (dataclass, "TSA");
921       break;
922     default :
923       break;
924   }
925 
926   if (origin == ORG_MUT ||
927       origin == ORG_ARTIFICIAL ||
928       origin == ORG_SYNTHETIC ||
929       is_transgenic) {
930     StringCpy (div, "SYN");
931     StringCpy (embldiv, "SYN");
932   } else if (is_env_sample) {
933     if (tech == MI_TECH_unknown ||
934         tech == MI_TECH_standard ||
935         tech == MI_TECH_other ||
936         tech == MI_TECH_htgs_3) {
937       StringCpy (div, "ENV");
938       StringCpy (embldiv, "ENV");
939     }
940   }
941 
942   if (is_transgenic && tech == MI_TECH_survey) {
943     StringCpy (div, "GSS");
944     StringCpy (dataclass, "GSS");
945   }
946 
947   sip = SeqIdFindBest (bsp->id, SEQID_PATENT);
948   if (sip != NULL && sip->choice == SEQID_PATENT) {
949     StringCpy (div, "PAT");
950     StringCpy (dataclass, "PAT");
951   }
952 
953   /* if protein is encoded by a patent nucleotide, use PAT division */
954 
955   if (ISA_aa (bsp->mol)) {
956     cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
957     if (cds != NULL) {
958       nuc = BioseqFindFromSeqLoc (cds->location);
959       if (nuc != NULL) {
960         for (sip = nuc->id; sip != NULL; sip = sip->next) {
961           if (sip->choice == SEQID_PATENT) {
962             StringCpy (div, "PAT");
963             StringCpy (dataclass, "PAT");
964           }
965         }
966       }
967     }
968   }
969 
970   /* more complicated code for division, if necessary, goes here */
971 
972   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
973   while (sdp != NULL) {
974     gbp = (GBBlockPtr) sdp->data.ptrvalue;
975     if (gbp != NULL) {
976       if (StringHasNoText (div) && gbp->div != NULL) {
977         StringCpy (div, gbp->div);
978         StringCpy (embldiv, gbp->div);
979       } else if (StringCmp(gbp->div, "PAT") == 0) {
980         StringCpy (div, gbp->div);
981         StringCpy (dataclass, gbp->div);
982       } else if (StringCmp(gbp->div, "SYN") == 0 ) {
983         StringCpy (div, gbp->div);
984         StringCpy (embldiv, gbp->div);
985       }
986     }
987     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext);
988   }
989 
990   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
991 
992     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
993     if (sdp != NULL) {
994       ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
995       if (ebp != NULL) {
996         if (ebp->div == 255) {
997           if (mip == NULL) {
998             StringCpy (div, "HUM");
999             StringCpy (embldiv, "HUM");
1000           }
1001         } else if (ebp->div < 18)  {
1002           StringCpy (div, embl_divs [ebp->div]);
1003           StringCpy (embldiv, embl_divs [ebp->div]);
1004         }
1005       }
1006     }
1007 
1008     if (StringHasNoText (div)) {
1009       StringCpy (div, "UNA");
1010       StringCpy (embldiv, "UNA");
1011     }
1012   }
1013 
1014   /* empty division field if unable to find anything */
1015 
1016   if (StringHasNoText (div)) {
1017     StringCpy (div, "   ");
1018   }
1019   if (StringHasNoText (embldiv)) {
1020     StringCpy (embldiv, "   ");
1021   }
1022 
1023   /* contig style (old genome_view flag) forces CON division */
1024 
1025   if (awp->contig) {
1026     StringCpy (div, "CON");
1027     StringCpy (dataclass, "CON");
1028   }
1029 
1030   if (genome_view) {
1031     StringCpy (div, "CON");
1032     StringCpy (dataclass, "CON");
1033   }
1034 
1035   if (StringCmp (dataclass, "CON") == 0) {
1036     if (DeltaLitOnly (bsp)) {
1037       if (SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext) != NULL) {
1038         StringCpy (dataclass, "ANN");
1039       }
1040     }
1041   }
1042 
1043   /* date */
1044 
1045   best_date = GetBestDateForBsp (bsp);
1046 
1047   if (best_date == NULL) {
1048 
1049     /* if bsp is product of CDS or mRNA feature, get date from sfp->location bsp */
1050 
1051     sfp = NULL;
1052     if (ISA_na (bsp->mol)) {
1053       sfp = SeqMgrGetRNAgivenProduct (bsp, NULL);
1054     } else if (ISA_aa (bsp->mol)) {
1055       sfp = SeqMgrGetCDSgivenProduct (bsp, NULL);
1056     }
1057     if (sfp != NULL) {
1058       parent = BioseqFindFromSeqLoc (sfp->location);
1059       if (parent != NULL) {
1060         best_date = GetBestDateForBsp (parent);
1061       }
1062     }
1063   }
1064 
1065   /* convert best date */
1066 
1067   if (best_date != NULL) {
1068     DateToFF (date, best_date, FALSE);
1069   }
1070   if (StringHasNoText (date)) {
1071     StringCpy (date, "01-JAN-1900");
1072   }
1073 
1074   if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1075 
1076     /* Create the proper locus name */
1077 
1078     parent = awp->parent;
1079     if (parent->repr == Seq_repr_seg) {
1080 
1081       if (! StringHasNoText (awp->basename)) {
1082         StringCpy (locus, awp->basename);
1083         s_LocusAddSuffix (locus, awp);
1084       }
1085     }
1086 
1087     /* Print the "LOCUS_NEW" line, if requested */
1088 
1089     if (awp->newLocusLine) {
1090 
1091       FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1092       parent = awp->parent;
1093 
1094       if (parent->repr == Seq_repr_seg)
1095         s_LocusAdjustLength (locus,16);
1096 
1097       if (is_nm && (! StringHasNoText (gene))) {
1098         FFAddOneString (ffstring, gene, FALSE, FALSE, TILDE_IGNORE);
1099       } else {
1100         FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1101       }
1102       FFAddNChar(ffstring, ' ', 43 - StringLen(len)- ffstring->curr->pos, FALSE);
1103       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1104       FFAddNChar(ffstring, ' ', 44 - ffstring->curr->pos, FALSE);
1105       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1106       FFAddNChar(ffstring, ' ', 55 - ffstring->curr->pos, FALSE);
1107       if (topology == TOPOLOGY_CIRCULAR) {
1108         FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1109       } else {
1110         FFAddOneString (ffstring, "linear  ", FALSE, FALSE, TILDE_IGNORE);
1111       }
1112       FFAddNChar(ffstring, ' ', 64 - ffstring->curr->pos, FALSE);
1113       FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1114       FFAddNChar(ffstring, ' ', 68 - ffstring->curr->pos, FALSE);
1115       FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1116     }
1117 
1118     /* Else print the "LOCUS" line */
1119 
1120     else {
1121 
1122       FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1123 
1124       if (parent->repr == Seq_repr_seg)
1125         s_LocusAdjustLength (locus,16);
1126 
1127       FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1128       FFAddNChar(ffstring, ' ', 32 - StringLen(len) - ffstring->curr->pos, FALSE);
1129       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1130       FFAddNChar(ffstring, ' ', 33 - ffstring->curr->pos, FALSE);
1131       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1132       FFAddNChar(ffstring, ' ', 52 - ffstring->curr->pos, FALSE);
1133       FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1134       FFAddNChar(ffstring, ' ', 62 - ffstring->curr->pos, FALSE);
1135       FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1136     }
1137 
1138   } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1139 
1140     if (awp->newLocusLine) {
1141 
1142       str = GetMolTypeQual (bsp);
1143       if (str == NULL) {
1144         switch (bsp->mol) {
1145           case Seq_mol_dna :
1146             str = "unassigned DNA";
1147             break;
1148           case Seq_mol_rna :
1149             str = "unassigned RNA";
1150             break;
1151           case Seq_mol_aa :
1152             break;
1153           default :
1154             str = "unassigned DNA";
1155             break;
1156         }
1157       }
1158       if (StringCmp (str, "viral cRNA") == 0) {
1159         str = "other RNA";
1160       }
1161       if (StringICmp (str, "ncRNA") == 0) {
1162         str = "RNA";
1163       }
1164       StringNCpy_0 (mol, str, sizeof (mol));
1165 
1166       FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1167 
1168       FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1169       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1170       FFAddOneString (ffstring, "SV ", FALSE, FALSE, TILDE_IGNORE);
1171       FFAddOneString (ffstring, ver, FALSE, FALSE, TILDE_IGNORE);
1172       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1173       if (topology == TOPOLOGY_CIRCULAR) {
1174         FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1175       } else {
1176         FFAddOneString (ffstring, "linear", FALSE, FALSE, TILDE_IGNORE);
1177       }
1178       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1179       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1180       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1181       FFAddOneString (ffstring, dataclass, FALSE, FALSE, TILDE_IGNORE);
1182       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1183       FFAddOneString (ffstring, embldiv, FALSE, FALSE, TILDE_IGNORE);
1184       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1185       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1186 
1187     } else {
1188 
1189       FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1190 
1191       FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1192       loclen = StringLen(locus);
1193       if (14 - 5 - loclen > 0) {
1194         FFAddNChar(ffstring, ' ', 14 - 5 - loclen, FALSE);
1195       }
1196       if (awp->hup) {
1197         FFAddOneString (ffstring, " confidential; ", FALSE, FALSE, TILDE_IGNORE);
1198       } else {
1199         FFAddOneString (ffstring, " standard; ", FALSE, FALSE, TILDE_IGNORE);
1200       }
1201       FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1202       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1203 
1204       /* conditional code to make div "UNA" goes here */
1205 
1206       FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1207       FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1208       FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1209     }
1210   }
1211 
1212   /* optionally populate indexes for NCBI internal database */
1213 
1214   if (ajp->index) {
1215     index = &asp->index;
1216   } else {
1217     index = NULL;
1218   }
1219 
1220   if (index != NULL) {
1221     Char  tmp [20];
1222     index->locus = StringSave (locus);
1223     index->div = StringSave (div);
1224     sprintf (tmp, "%ld", (long) length);
1225     index->base_cnt = StringSave (tmp);
1226   }
1227 
1228   /* optionally populate gbseq for XML-ized GenBank format */
1229 
1230   if (ajp->gbseq) {
1231     gbseq = &asp->gbseq;
1232   } else {
1233     gbseq = NULL;
1234   }
1235 
1236   if (gbseq != NULL) {
1237     gbseq->locus = StringSave (locus);
1238     gbseq->length = length;
1239     gbseq->division = StringSave (div);
1240 
1241     gbseq->moltype = StringSave (gbseq_mol [imol]);
1242 
1243     strandedness = (Int2) bsp->strand;
1244     if (strandedness < 0 || strandedness > 3) {
1245       strandedness = 0;
1246     }
1247     if (strandedness == 0) {
1248       moltype = (Int2) imolToMoltype [imol];
1249       if (moltype < 0 || moltype > 11) {
1250         moltype = 0;
1251       }
1252       if (moltype == 1) {
1253         strandedness = 2; /* default to double strand for DNA */
1254       } else if ((moltype >= 2 && moltype <= 8) || moltype >= 10 && moltype <= 11) {
1255         strandedness = 1; /* default to single strand for RNA */
1256       }
1257     }
1258     gbseq->strandedness = StringSaveNoNull (gbseq_strd [strandedness]);
1259 
1260     topol = (Int2) bsp->topology;
1261     if (topol < 0 || topol > 2) {
1262       topol = 0;
1263     }
1264     if (topol == 0) {
1265       topol = 1; /* default to linear if not set */
1266     }
1267     gbseq->topology = StringSaveNoNull (gbseq_top [topol]);
1268 
1269     for (sip = bsp->id; sip != NULL; sip = sip->next) {
1270       SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id));
1271       ValNodeCopyStr (&gbseq->other_seqids, 0, id);
1272     }
1273 
1274     date [0] = '\0';
1275     dp = NULL;
1276     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
1277     if (sdp != NULL) {
1278       dp = (DatePtr) sdp->data.ptrvalue;
1279     }
1280     if (dp != NULL) {
1281       DateToFF (date, dp, FALSE);
1282       if (StringDoesHaveText (date)) {
1283         gbseq->create_date = StringSave (date);
1284       }
1285     }
1286     /*
1287     if (StringHasNoText (date)) {
1288       StringCpy (date, "01-JAN-1900");
1289     }
1290     gbseq->create_date = StringSave (date);
1291     */
1292 
1293     date [0] = '\0';
1294     dp = NULL;
1295     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
1296     if (sdp != NULL) {
1297       dp = (DatePtr) sdp->data.ptrvalue;
1298     }
1299     if (dp != NULL) {
1300       DateToFF (date, dp, FALSE);
1301     }
1302     if (StringHasNoText (date)) {
1303       StringCpy (date, "01-JAN-1900");
1304     }
1305     gbseq->update_date = StringSave (date);
1306   }
1307 
1308   suffix = FFEndPrint(ajp, ffstring, awp->format, 12, 0, 5, 0, "ID");
1309   FFRecycleString(ajp, ffstring);
1310 
1311   bbp->string = suffix;
1312 
1313   /*
1314   if (awp->contig && (! awp->showconfeats) && awp->smartconfeats && GetWWW (ajp) &&
1315       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1316     is_aa = ISA_aa (bsp->mol);
1317     gi = 0;
1318     for (sip = bsp->id; sip != NULL; sip = sip->next) {
1319       if (sip->choice == SEQID_GI) {
1320         gi = (Int4) sip->data.intvalue;
1321       }
1322     }
1323     if (gi > 0) {
1324       ffstring = FFGetString(ajp);
1325 
1326       sprintf(gi_buf, "%ld", (long) gi);
1327       FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1328       if (is_aa) {
1329         FF_Add_NCBI_Base_URL(ffstring, link_featp);
1330       } else {
1331         FF_Add_NCBI_Base_URL(ffstring, link_featn);
1332       }
1333       FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
1334       if ( is_aa ) {
1335         FFAddOneString(ffstring, "?report=gpwithparts", FALSE, FALSE, TILDE_IGNORE);
1336       } else {
1337         FFAddOneString(ffstring, "?report=gbwithparts", FALSE, FALSE, TILDE_IGNORE);
1338       }
1339       FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1340       if (bsp->length > 1000000) {
1341         FFAddOneString(ffstring, "Click here to see all features and the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1342       } else {
1343         FFAddOneString(ffstring, "Click here to see the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1344       }
1345       FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1346 
1347       prefix = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "??");
1348 
1349       FFRecycleString(ajp, ffstring);
1350 
1351       if (awp->afp != NULL) {
1352         DoQuickLinkFormat (awp->afp, prefix);
1353       }
1354       MemFree (prefix);
1355     }
1356   }
1357   */
1358 
1359   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
1360       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1361 
1362     sprintf (buf, "<a name=\"locus_%ld\"></a>", (long) awp->currGi);
1363     DoQuickLinkFormat (awp->afp, buf);
1364 
1365     buf [0] = '\0';
1366     hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext) != NULL);
1367     if (! hasComment) {
1368       hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_region, &dcontext) != NULL);
1369     }
1370     if (! hasComment) {
1371       hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_maploc, &dcontext) != NULL);
1372     }
1373     if (! hasComment) {
1374       sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
1375       while (sdp != NULL) {
1376         uop = (UserObjectPtr) sdp->data.ptrvalue;
1377         if (uop != NULL) {
1378           oip = uop->type;
1379           if (oip != NULL) {
1380             if (StringCmp (oip->str, "RefGeneTracking") == 0) {
1381               hasComment = TRUE;
1382             } else if (StringCmp (oip->str, "GenomeBuild") == 0) {
1383               hasComment = TRUE;
1384             } else if (StringCmp (oip->str, "ENCODE") == 0) {
1385               hasComment = TRUE;
1386             }
1387           }
1388         }
1389         sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
1390       }
1391     }
1392     if (! hasComment) {
1393       hist = bsp->hist;
1394       if (hist != NULL) {
1395         if (hist->replaced_by_ids != NULL && hist->replaced_by_date != NULL) {
1396           hasComment = TRUE;
1397         } else if (hist->replace_ids != NULL && hist->replace_date != NULL) {
1398           hasComment = TRUE;
1399         }
1400       }
1401     }
1402 
1403     buf [0] = '\0';
1404     StringCpy (buf, "<div class=\"localnav\"><ul class=\"locals\">");
1405 
1406     if (hasComment) {
1407       sprintf (sect, "<li><a href=\"#comment_%ld\" title=\"Jump to the comment section of this record\">Comment</a></li>", (long) awp->currGi);
1408       StringCat (buf, sect);
1409     }
1410     sprintf (sect, "<li><a href=\"#feature_%ld\" title=\"Jump to the feature table of this record\">Features</a></li>", (long) awp->currGi);
1411     StringCat (buf, sect);
1412     if (willshowwgs) {
1413       sprintf (sect, "<li><a href=\"#wgs_%ld\" title=\"Jump to WGS section of this record\">WGS</a></li>", (long) awp->currGi);
1414       StringCat (buf, sect);
1415     }
1416     if (willshowgenome) {
1417       sprintf (sect, "<li><a href=\"#genome_%ld\" title=\"Jump to the genome section of this record\">Genome</a></li>", (long) awp->currGi);
1418       StringCat (buf, sect);
1419     }
1420     if (willshowcontig) {
1421       sprintf (sect, "<li><a href=\"#contig_%ld\" title=\"Jump to the contig section of this record\">Contig</a></li>", (long) awp->currGi);
1422       StringCat (buf, sect);
1423     }
1424     if (willshowsequence) {
1425       sprintf (sect, "<li><a href=\"#sequence_%ld\" title=\"Jump to the sequence of this record\">Sequence</a></li>", (long) awp->currGi);
1426       StringCat (buf, sect);
1427     }
1428 
1429     StringCat (buf, "</ul>");
1430 
1431     prevGi = 0;
1432     currGi = 0;
1433     nextGi = 0;
1434     gilistpos = awp->gilistpos;
1435     if (gilistpos == NULL) {
1436       gilistpos = ajp->gihead;
1437     }
1438     do {
1439       vnp = gilistpos;
1440       if (vnp != NULL) {
1441         prevGi = vnp->data.intvalue;
1442         vnp = vnp->next;
1443         gilistpos = vnp;
1444         if (vnp != NULL) {
1445           currGi = vnp->data.intvalue;
1446           vnp = vnp->next;
1447           if (vnp != NULL) {
1448             nextGi = vnp->data.intvalue;
1449           }
1450         }
1451       }
1452     } while (gilistpos != NULL && currGi != awp->currGi);
1453 
1454     has_next_pref_ul = FALSE;
1455 
1456     if (currGi == awp->currGi && nextGi > 0 && awp->sectionCount < awp->sectionMax) {
1457       if (! has_next_pref_ul) {
1458         StringCat (buf, "<ul class=\"nextprevlinks\">");
1459         has_next_pref_ul = TRUE;
1460       }
1461       LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1462       if (awp->seg + 1 > 0 && awp->numsegs > 0 && awp->seg + 1 <= awp->numsegs) {
1463         sprintf (seg, " (segment %d of %ld)", (int) (awp->seg + 1), (long) awp->numsegs);
1464         StringCat (seqid, seg);
1465       }
1466       sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) nextGi, seqid);
1467       StringCat (buf, sect);
1468     } else if (awp->nextGi > 0) {
1469       if (! has_next_pref_ul) {
1470         StringCat (buf, "<ul class=\"nextprevlinks\">");
1471         has_next_pref_ul = TRUE;
1472       }
1473       LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1474       sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) awp->nextGi, seqid);
1475       StringCat (buf, sect);
1476     }
1477     if (currGi == awp->currGi && prevGi > 0 && awp->sectionCount > 1) {
1478       if (! has_next_pref_ul) {
1479         StringCat (buf, "<ul class=\"nextprevlinks\">");
1480         has_next_pref_ul = TRUE;
1481       }
1482       LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1483       if (awp->seg - 1 > 0 && awp->numsegs > 0 && awp->seg - 1 <= awp->numsegs) {
1484         sprintf (seg, " (segment %d of %ld)", (int) (awp->seg - 1), (long) awp->numsegs);
1485         StringCat (seqid, seg);
1486       }
1487       sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) prevGi, seqid);
1488       StringCat (buf, sect);
1489     } else if (awp->prevGi > 0) {
1490       if (! has_next_pref_ul) {
1491         StringCat (buf, "<ul class=\"nextprevlinks\">");
1492         has_next_pref_ul = TRUE;
1493       }
1494       LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1495       sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) awp->prevGi, seqid);
1496       StringCat (buf, sect);
1497     }
1498     if (has_next_pref_ul) {
1499       StringCat (buf, "</ul>");
1500     }
1501     StringCat (buf, "</div>\n");
1502     StringCat (buf, "<pre class=\"genbank\">");
1503     DoQuickLinkFormat (awp->afp, buf);
1504   } else if (GetWWW (ajp)) {
1505     buf [0] = '\0';
1506     StringCat (buf, "<pre>");
1507     DoQuickLinkFormat (awp->afp, buf);
1508   }
1509 
1510   if (awp->afp != NULL) {
1511     DoImmediateFormat (awp->afp, bbp);
1512   }
1513 }
1514 
1515 NLM_EXTERN void AddDeflineBlock (
1516   Asn2gbWorkPtr awp
1517 )
1518 
1519 {
1520   IntAsn2gbJobPtr    ajp;
1521   Asn2gbSectPtr      asp;
1522   BaseBlockPtr       bbp;
1523   BioseqPtr          bsp;
1524   Char               buf [4096]; 
1525   GBSeqPtr           gbseq;
1526   ItemInfo           ii;
1527   StringItemPtr      ffstring;
1528 
1529   if (awp == NULL) return;
1530   ajp = awp->ajp;
1531   if (ajp == NULL) return;
1532   bsp = awp->bsp;
1533   if (bsp == NULL) return;
1534   asp = awp->asp;
1535   if (asp == NULL) return;
1536 
1537   bbp = Asn2gbAddBlock (awp, DEFLINE_BLOCK, sizeof (BaseBlock));
1538   if (bbp == NULL) return;
1539 
1540   ffstring = FFGetString(ajp);
1541   if ( ffstring == NULL ) return;
1542 
1543   MemSet ((Pointer) (&ii), 0, sizeof (ItemInfo));
1544   MemSet ((Pointer) buf, 0, sizeof (buf));
1545 
1546   /* create default defline */
1547 
1548   if (NewCreateDefLineBuf (&ii, bsp, buf, sizeof (buf), FALSE, FALSE)) {
1549     bbp->entityID = ii.entityID;
1550     bbp->itemID = ii.itemID;
1551     bbp->itemtype = ii.itemtype;
1552 
1553     FFStartPrint (ffstring, awp->format, 0, 12, "DEFINITION", 12, 5, 5, "DE", TRUE);
1554 
1555     if (StringHasNoText (buf)) {
1556       FFAddOneChar (ffstring, '.', FALSE);
1557     } else {
1558       FFAddOneString (ffstring, buf, TRUE, TRUE, TILDE_IGNORE);
1559     }
1560 
1561     bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "DE");
1562   }
1563 
1564   /* optionally populate gbseq for XML-ized GenBank format */
1565 
1566   if (ajp->gbseq) {
1567     gbseq = &asp->gbseq;
1568   } else {
1569     gbseq = NULL;
1570   }
1571 
1572   if (gbseq != NULL) {
1573     gbseq->definition = StringSave (buf);
1574   }
1575 
1576   FFRecycleString(ajp, ffstring);
1577 
1578   /*
1579   if (bbp->itemtype == 0) {
1580     bbp->entityID = bsp->idx.entityID;
1581     bbp->itemtype = bsp->idx.itemtype;
1582     bbp->itemID = bsp->idx.itemID;
1583   }
1584   */
1585 
1586   if (awp->afp != NULL) {
1587     DoImmediateFormat (awp->afp, bbp);
1588   }
1589 }
1590 
1591 static void FF_www_accession (
1592   IntAsn2gbJobPtr ajp,
1593   StringItemPtr ffstring,
1594   CharPtr cstring,
1595   Boolean is_na
1596 )
1597 {
1598   if (cstring == NULL || ffstring == NULL) return;
1599 
1600   if ( GetWWW(ajp) ) {
1601     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1602     if (is_na) {
1603       FF_Add_NCBI_Base_URL (ffstring, link_seqn);
1604     } else {
1605       FF_Add_NCBI_Base_URL (ffstring, link_seqp);
1606     }
1607     FFAddTextToString(ffstring, /* "val=" */ NULL, cstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1608     FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1609     FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1610   } else {
1611     FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1612   }
1613   return;
1614 }
1615 
1616 /* Check if acc directly follows prev */
1617 static Boolean IsSuccessor(CharPtr acc, CharPtr prev)
1618 {
1619   CharPtr accp, prevp;
1620   Int4 acc_num, prev_num;
1621 
1622   if (acc == NULL  ||  prev == NULL) return FALSE;
1623 
1624   if (StringLen(acc) != StringLen(prev)) return FALSE;
1625 
1626   accp = acc;
1627   prevp = prev;
1628   while (accp != '\0'  &&  prevp != '\0') {
1629     if (*accp != *prevp) return FALSE;
1630     if (IS_DIGIT(*accp)) {
1631       acc_num = (Int4)atol(accp);
1632       prev_num = (Int4)atol(prevp);
1633       return (acc_num == prev_num + 1);
1634     }
1635     ++accp;
1636     ++prevp;
1637   }
1638   return FALSE;
1639 }
1640 
1641 
1642 static Boolean IsProjectAccn(CharPtr acc)
1643 {
1644     Int2 letters;
1645     Char digits[3];
1646     CharPtr ptr;
1647 
1648     if (acc == NULL) {
1649         return FALSE;
1650     }
1651     digits[0] = '\0';
1652 
1653     for (ptr = acc, letters = 0; ptr != '\0'  &&  IS_ALPHA(*ptr); ++ptr, ++letters) continue;
1654     if (letters != 4  ||  StringLen(ptr) < 2) {
1655         return FALSE;
1656     }
1657     digits[0] = *ptr++;
1658     digits[1] = *ptr++;
1659     digits[2] = '\0';
1660     if (atoi(digits) < 1) {
1661         return FALSE;
1662     }
1663     while (*ptr != '\0') {
1664         if (*ptr != '0') {
1665             return FALSE;
1666         }
1667         ++ptr;
1668     }
1669     return TRUE;
1670 }
1671 
1672 
1673 static ValNodePtr GetSecondaryAccessions(ValNodePtr extra_access)
1674 {
1675 #define EXTRA_ACCESSION_CUTOFF 20
1676 #define BIN_ACCESSION_CUTOFF   5
1677 
1678   Int4 extra_acc_num = 0;
1679   ValNodePtr  bins, bin, vnp, result = NULL, temp, prj;
1680   CharPtr first, last, curr, prev = NULL;
1681   Char  range[40];
1682 
1683   extra_acc_num = ValNodeLen(extra_access);
1684   if (extra_acc_num < EXTRA_ACCESSION_CUTOFF) {
1685     for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1686       ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1687     }
1688     return result;
1689   }
1690 
1691   /* sort the accessions into bins of successive accessions */
1692   bin = bins = NULL;
1693   for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1694     curr = (CharPtr) vnp->data.ptrvalue;
1695     if (ValidateAccn (curr) != 0) {
1696       continue;
1697     }
1698     if (IsProjectAccn(curr)) {
1699         prj = ValNodeNew(NULL);
1700         ValNodeAddStr ((ValNodePtr PNTR) &(prj->data.ptrvalue), 0, curr);
1701         prj->next = bins;
1702         bins = prj;
1703         continue;
1704     }
1705     if (!IsSuccessor(curr, prev)) {
1706       bin = ValNodeAdd(&bins);
1707     }
1708     if (bin != NULL) {
1709       temp = (ValNodePtr)bin->data.ptrvalue;
1710       ValNodeAddStr(&temp, 0, curr);
1711       bin->data.ptrvalue = temp;
1712     }
1713 
1714     prev = curr;
1715   }
1716 
1717   for (bin = bins; bin != NULL; bin = bin->next) {
1718     vnp = (ValNodePtr)bin->data.ptrvalue;
1719     if (ValNodeLen(vnp) > BIN_ACCESSION_CUTOFF) {
1720       first = last = NULL;
1721       for ( ; vnp != NULL; vnp = vnp->next) {
1722         last = (CharPtr)vnp->data.ptrvalue;
1723         if (first == NULL) {
1724           first = last;
1725         }
1726       }
1727       range[0] = '\0';
1728       StringCat(range, first);
1729       StringCat(range, "-");
1730       StringCat(range, last);
1731       ValNodeCopyStr(&result, 0, range);
1732     } else {
1733       for ( ; vnp != NULL; vnp = vnp->next) {
1734         ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1735       }
1736     }
1737     bin->data.ptrvalue = ValNodeFree((ValNodePtr)bin->data.ptrvalue);
1738   }
1739 
1740   bins = ValNodeFreeData(bins);
1741   return result;
1742 }
1743 
1744 
1745 /* !!! this definitely needs more work to support all classes, use proper SeqId !!! */
1746 
1747 NLM_EXTERN void AddAccessionBlock (
1748   Asn2gbWorkPtr awp
1749 )
1750 
1751 {
1752   size_t             acclen;
1753   SeqIdPtr           accn = NULL;
1754   IntAsn2gbJobPtr    ajp;
1755   Asn2gbSectPtr      asp;
1756   BaseBlockPtr       bbp;
1757   BioseqPtr          bsp;
1758   Char               buf [41];
1759   SeqMgrDescContext  dcontext;
1760   EMBLBlockPtr       ebp;
1761   ValNodePtr         extra_access;
1762   CharPtr            flatloc;
1763   GBBlockPtr         gbp;
1764   SeqIdPtr           gi = NULL;
1765   GBSeqPtr           gbseq;
1766   SeqIdPtr           gnl = NULL;
1767   SeqIdPtr           gpp = NULL;
1768   IndxPtr            index;
1769   Boolean            is_na;
1770   SeqIdPtr           lcl = NULL;
1771   size_t             len = 0;
1772   MolInfoPtr         mip;
1773   SeqDescrPtr        sdp;
1774   ValNodePtr         secondary_acc;
1775   CharPtr            separator = " ";
1776   SeqIdPtr           sip;
1777   TextSeqIdPtr       tsip;
1778   ValNodePtr         vnp;
1779   CharPtr            wgsaccn = NULL;
1780   CharPtr            xtra;
1781   StringItemPtr      ffstring;
1782 
1783   if (awp == NULL) return;
1784   ajp = awp->ajp;
1785   if (ajp == NULL) return;
1786   bsp = awp->bsp;
1787   if (bsp == NULL) return;
1788   asp = awp->asp;
1789   if (asp == NULL) return;
1790   
1791   ffstring = FFGetString(ajp);
1792   if ( ffstring == NULL ) return;
1793 
1794   is_na = ISA_na (bsp->mol);
1795 
1796   for (sip = bsp->id; sip != NULL; sip = sip->next) {
1797     switch (sip->choice) {
1798       case SEQID_GI :
1799         gi = sip;
1800         break;
1801       case SEQID_GENBANK :
1802       case SEQID_EMBL :
1803       case SEQID_DDBJ :
1804         accn = sip;
1805         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1806         if (tsip != NULL) {
1807           acclen = StringLen (tsip->accession);
1808           if (acclen == 12) {
1809             wgsaccn = tsip->accession;
1810             len = 12;
1811           } else if (acclen == 13) {
1812             wgsaccn = tsip->accession;
1813             len = 13;
1814           }
1815         }
1816         break;
1817       case SEQID_OTHER :
1818         accn = sip;
1819         tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1820         if (tsip != NULL) {
1821           if (StringLen (tsip->accession) == 15) {
1822             wgsaccn = tsip->accession;
1823             len = 15;
1824           }
1825         }
1826         break;
1827       case SEQID_PIR :
1828       case SEQID_SWISSPROT :
1829       case SEQID_PRF :
1830       case SEQID_PDB :
1831         accn = sip;
1832         break;
1833       case SEQID_TPG :
1834       case SEQID_TPE :
1835       case SEQID_TPD :
1836         accn = sip;
1837         break;
1838       case SEQID_GPIPE :
1839         /* should not override better accession */
1840         gpp = sip;
1841         break;
1842       case SEQID_GENERAL :
1843         /* should not override better accession */
1844         gnl = sip;
1845         break;
1846       case SEQID_LOCAL :
1847         lcl = sip;
1848         break;
1849       default :
1850         break;
1851     }
1852   }
1853 
1854   sip = NULL;
1855   if (accn == NULL) {
1856     accn = gpp;
1857     gpp = NULL;
1858   }
1859   if (accn != NULL) {
1860     sip = accn;
1861   } else if (gnl != NULL) {
1862     sip = gnl;
1863   } else if (lcl != NULL) {
1864     sip = lcl;
1865   } else if (gi != NULL) {
1866     sip = gi;
1867   }
1868 
1869   if (sip == NULL) return;
1870 
1871   SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
1872 
1873   bbp = Asn2gbAddBlock (awp, ACCESSION_BLOCK, sizeof (BaseBlock));
1874   if (bbp == NULL) return;
1875 
1876   bbp->entityID = awp->entityID;
1877 
1878   if (accn == NULL) {
1879 
1880     /* if no accession, do not show local or general in ACCESSION */
1881 
1882     if (ajp->mode == ENTREZ_MODE || ajp->mode == SEQUIN_MODE) {
1883       buf [0] = '\0';
1884     }
1885   }
1886 
1887   FFStartPrint (ffstring, awp->format, 0, 12, "ACCESSION", 12, 5, 5, "AC", TRUE);
1888 
1889   if (awp->hup && accn != NULL) {
1890     FFAddOneString (ffstring, ";", FALSE, FALSE, TILDE_TO_SPACES);
1891 
1892   } else if (ajp->ajp.slp != NULL) {
1893     FF_www_accession (ajp, ffstring, buf, is_na);
1894     flatloc =  FFFlatLoc (ajp, bsp, ajp->ajp.slp, ajp->masterStyle);
1895     FFAddTextToString (ffstring, " REGION: ", flatloc, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1896     MemFree (flatloc);
1897   } else {
1898     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
1899     if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1900         FFAddOneChar(ffstring, ';', FALSE);
1901     }
1902   }
1903 
1904   /* optionally populate indexes for NCBI internal database */
1905 
1906   if (ajp->index) {
1907     index = &asp->index;
1908   } else {
1909     index = NULL;
1910   }
1911 
1912   if (index != NULL) {
1913     index->accession = StringSave (buf);
1914   }
1915 
1916   /* optionally populate gbseq for XML-ized GenBank format */
1917 
1918   if (ajp->gbseq) {
1919     gbseq = &asp->gbseq;
1920   } else {
1921     gbseq = NULL;
1922   }
1923 
1924   if (gbseq != NULL) {
1925     gbseq->primary_accession = StringSave (buf);
1926   }
1927 
1928   if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1929     separator = " ";
1930   } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1931     separator = " ";
1932   }
1933 
1934   if (gpp != NULL) {
1935     SeqIdWrite (gpp, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
1936     FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
1937     if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1938       FFAddOneChar(ffstring, ';', FALSE);
1939     }
1940   }
1941 
1942   if (ajp->ajp.slp == NULL) {
1943     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
1944     if (sdp != NULL && wgsaccn != NULL) {
1945       mip = (MolInfoPtr) sdp->data.ptrvalue;
1946       if (mip != NULL && mip->tech == MI_TECH_wgs) {
1947         StringNCpy_0 (buf, wgsaccn, sizeof (buf));
1948         acclen = StringLen (buf);
1949         if (acclen == 12 && StringCmp (buf + len - 6, "000000") != 0) {
1950           StringCpy (buf + len - 6, "000000");
1951         } else if (acclen == 13 && StringCmp (buf + len - 7, "0000000") != 0) {
1952           StringCpy (buf + len - 7, "0000000");
1953         } else if (acclen == 15 && StringCmp (buf + len - 8, "00000000") != 0) {
1954           StringCpy (buf + len - 8, "00000000");
1955         } else {
1956           buf [0] = '\0';
1957         }
1958         if (! StringHasNoText (buf)) {
1959           if ( GetWWW(ajp) ) {
1960             FFAddTextToString(ffstring, separator, "<a href=\"", NULL, FALSE, FALSE, TILDE_IGNORE);
1961             FF_Add_NCBI_Base_URL (ffstring, link_wgs);
1962             FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", buf, "\">", FALSE, FALSE, TILDE_IGNORE);
1963             FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
1964             FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
1965           } else {
1966             FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1967           }
1968           if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1969             FFAddOneChar(ffstring, ';', FALSE);
1970           }
1971         }
1972       }
1973     }
1974 
1975     sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
1976     while (sdp != NULL) {
1977 
1978       extra_access = NULL;
1979 
1980       switch (dcontext.seqdesctype) {
1981         case Seq_descr_genbank :
1982           gbp = (GBBlockPtr) sdp->data.ptrvalue;
1983           if (gbp != NULL) {
1984             extra_access = gbp->extra_accessions;
1985           }
1986           break;
1987         case Seq_descr_embl :
1988           ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
1989           if (ebp != NULL) {
1990             extra_access = ebp->extra_acc;
1991           }
1992           break;
1993         default :
1994           break;
1995       }
1996 
1997       if (extra_access != NULL) {
1998         bbp->entityID = dcontext.entityID;
1999         bbp->itemID = dcontext.itemID;
2000         bbp->itemtype = OBJ_SEQDESC;
2001 
2002         
2003         secondary_acc = GetSecondaryAccessions(extra_access);
2004         for (vnp = secondary_acc; vnp != NULL; vnp = vnp->next) {
2005           xtra = (CharPtr)vnp->data.ptrvalue;
2006           FFAddTextToString(ffstring, separator, xtra, NULL, FALSE, FALSE, TILDE_IGNORE);
2007           if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2008             FFAddOneChar(ffstring, ';', FALSE);
2009           }
2010 
2011           /* optionally populate indexes for NCBI internal database */
2012 
2013           if (index != NULL) {
2014             ValNodeCopyStrToHead (&(index->secondaries), 0, xtra);
2015           }
2016 
2017           /* optionally populate gbseq for XML-ized GenBank format */
2018 
2019           if (gbseq != NULL) {
2020               ValNodeCopyStr (&(gbseq->secondary_accessions), 0, xtra);
2021           }
2022         }
2023         ValNodeFreeData(secondary_acc);
2024       }
2025       
2026       sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
2027     }
2028   }
2029 
2030   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "AC");
2031   FFRecycleString(ajp, ffstring);
2032 
2033   if (bbp->itemtype == 0) {
2034     bbp->entityID = bsp->idx.entityID;
2035     bbp->itemtype = bsp->idx.itemtype;
2036     bbp->itemID = bsp->idx.itemID;
2037   }
2038   
2039   if (awp->afp != NULL) {
2040     DoImmediateFormat (awp->afp, bbp);
2041   }
2042 }
2043 
2044 NLM_EXTERN void AddVersionBlock (
2045   Asn2gbWorkPtr awp
2046 )
2047 
2048 {
2049   SeqIdPtr         accn = NULL;
2050   IntAsn2gbJobPtr  ajp;
2051   Asn2gbSectPtr    asp;
2052   BaseBlockPtr     bbp;
2053   BioseqPtr        bsp;
2054   Char             buf [41];
2055   Uint1            format = PRINTID_TEXTID_ACC_VER;
2056   GBSeqPtr         gbseq;
2057   Int4             gi = -1;
2058   SeqIdPtr         gpp = NULL;
2059   IndxPtr          index;
2060   CharPtr          ptr;
2061   SeqIdPtr         sip;
2062   Char             tmp [41];
2063   Char             version [64];
2064   StringItemPtr    ffstring;
2065 
2066   if (awp == NULL) return;
2067   ajp = awp->ajp;
2068   if (ajp == NULL) return;
2069   bsp = awp->bsp;
2070   if (bsp == NULL) return;
2071   asp = awp->asp;
2072   if (asp == NULL) return;
2073 
2074   ffstring = FFGetString(ajp);
2075   if ( ffstring == NULL ) return;
2076 
2077   for (sip = bsp->id; sip != NULL; sip = sip->next) {
2078     switch (sip->choice) {
2079       case SEQID_GI :
2080         gi = sip->data.intvalue;
2081         break;
2082       case SEQID_GENBANK :
2083       case SEQID_EMBL :
2084       case SEQID_DDBJ :
2085       case SEQID_OTHER :
2086         accn = sip;
2087         break;
2088       case SEQID_PIR :
2089       case SEQID_SWISSPROT :
2090       case SEQID_PRF :
2091       case SEQID_PDB :
2092         accn = sip;
2093         break;
2094       case SEQID_TPG :
2095       case SEQID_TPE :
2096       case SEQID_TPD :
2097         accn = sip;
2098         break;
2099       case SEQID_GPIPE :
2100         /* should not override better accession */
2101         gpp = sip;
2102         break;
2103       default :
2104         break;
2105     }
2106   }
2107 
2108   if (accn == NULL) {
2109     accn = gpp;
2110     /*
2111     format = PRINTID_TEXTID_ACC_ONLY;
2112     */
2113   }
2114 
2115   /* if (gi < 1 && accn == NULL) return; */
2116 
2117   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2118     if ( accn == NULL ) return;
2119     if (awp->newLocusLine) return;
2120   }
2121 
2122   bbp = Asn2gbAddBlock (awp, VERSION_BLOCK, sizeof (BaseBlock));
2123   if (bbp == NULL) return;
2124 
2125   bbp->entityID = awp->entityID;
2126 
2127   /* no longer displaying NID */
2128 
2129   /*
2130   if (gi > 0) {
2131     sprintf (version, "g%ld", (long) gi);
2132 
2133     gb_StartPrint (awp->format, needInitBuff, 0, 12, "NID", 13, 5, 5, "NI", TRUE);
2134     needInitBuff = FALSE;
2135 
2136     gb_AddString (NULL, version, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2137 
2138     ff_EndPrint();
2139     needEndPrint = FALSE;
2140   }
2141   */
2142 
2143   version [0] = '\0';
2144 
2145   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2146     SeqIdWrite (accn, version, format, sizeof (version) - 1);
2147 
2148     FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2149 
2150     FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
2151 
2152     FFAddOneChar(ffstring, '\n', FALSE);
2153 
2154     bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2155     FFRecycleString(ajp, ffstring);
2156 
2157     if (awp->afp != NULL) {
2158       DoImmediateFormat (awp->afp, bbp);
2159     }
2160 
2161     return;
2162   }
2163 
2164   if (accn != NULL) {
2165 
2166     buf [0] = '\0';
2167     SeqIdWrite (accn, buf, format, sizeof (buf) - 1);
2168 
2169     if (gi > 0) {
2170       sprintf (version, "%s  GI:%ld", buf, (long) gi);
2171     } else {
2172       sprintf (version, "%s", buf);
2173     }
2174 
2175     FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2176 
2177     FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2178     /* optionally populate indexes for NCBI internal database */
2179 
2180     if (ajp->index) {
2181       index = &asp->index;
2182     } else {
2183       index = NULL;
2184     }
2185 
2186     if (index != NULL) {
2187       ptr = StringChr (buf, '.');
2188       if (ptr != NULL) {
2189         ptr++;
2190         index->version = StringSave (ptr);
2191       }
2192       if (gi > 0) {
2193         sprintf (tmp, "%ld", (long) gi);
2194         index->gi = StringSave (tmp);
2195       }
2196     }
2197 
2198     /* optionally populate gbseq for XML-ized GenBank format */
2199 
2200     if (ajp->gbseq) {
2201       gbseq = &asp->gbseq;
2202     } else {
2203       gbseq = NULL;
2204     }
2205 
2206     if (gbseq != NULL) {
2207       ptr = StringChr (buf, '.');
2208       if (ptr != NULL) {
2209         gbseq->accession_version = StringSave (buf);
2210       }
2211     }
2212 
2213   } else if (gi > 0) {
2214 
2215     FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 12, 5, 5, "SV", TRUE);
2216 
2217     sprintf (version, "  GI:%ld", (long) gi);
2218 
2219     FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2220 
2221   } else {
2222 
2223     FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 0, 5, 5, "SV", TRUE);
2224     FFAddOneChar(ffstring, '\n', FALSE);
2225   }
2226 
2227   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2228   FFRecycleString(ajp, ffstring);
2229 
2230   if (bbp->itemtype == 0) {
2231     bbp->itemtype = bsp->idx.itemtype;
2232     bbp->itemID = bsp->idx.itemID;
2233   }
2234 
2235   if (awp->afp != NULL) {
2236     DoImmediateFormat (awp->afp, bbp);
2237   }
2238 }
2239 
2240 static void FF_asn2gb_www_projID (
2241   StringItemPtr ffstring,
2242   CharPtr projID
2243 )
2244 
2245 {
2246   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2247   FF_Add_NCBI_Base_URL (ffstring, link_projid);
2248   FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2249   FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2250   FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2251   FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2252 }
2253 
2254 static CharPtr GetDBLinkString (
2255   UserObjectPtr uop
2256 )
2257 
2258 {
2259   Char          buf1 [256];
2260   Char          buf2 [256];
2261   Char          buf3 [256];
2262   CharPtr PNTR  cpp;
2263   Int4          i;
2264   Int4Ptr       ip;
2265   size_t        len;
2266   ObjectIdPtr   oip;
2267   CharPtr       prefix;
2268   CharPtr       str;
2269   Char          tmp [32];
2270   UserFieldPtr  ufp;
2271   Int4          val;
2272 
2273   if (uop == NULL) return NULL;
2274 
2275   buf1 [0] = '\0';
2276   buf2 [0] = '\0';
2277   buf3 [0] = '\0';
2278 
2279   for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2280     oip = ufp->label;
2281     if (oip == NULL || oip->str == NULL) continue;
2282     if (StringICmp (oip->str, "Trace Assembly Archive") == 0 && ufp->choice == 8) {
2283       ip = (Int4Ptr) ufp->data.ptrvalue;
2284       if (ufp->num > 0 && ip != NULL) {
2285         val = ip [0];
2286         if (val > 0) {
2287           sprintf (buf1, "Trace Assembly Archive:%ld", (long) val);
2288           for (i = 1; i < ufp->num; i++) {
2289             val = ip [i];
2290             if (val > 0) {
2291               sprintf (tmp, ",%ld", (long) val);
2292               StringCat (buf1, tmp);
2293             }
2294           }
2295         }
2296       }
2297     }
2298     if (StringICmp (oip->str, "Bio Sample") == 0 && ufp->choice == 7) {
2299       cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2300       if (ufp->num > 0 && cpp != NULL) {
2301         str = cpp [0];
2302         if (StringDoesHaveText (str)) {
2303           sprintf (buf2, "Bio Sample:%s", str);
2304           for (i = 1; i < ufp->num; i++) {
2305             str = cpp [i];
2306             if (StringDoesHaveText (str)) {
2307               sprintf (tmp, ",%s", str);
2308               StringCat (buf2, tmp);
2309             }
2310           }
2311         }
2312       }
2313     }
2314     if (StringICmp (oip->str, "ProbeDB") == 0 && ufp->choice == 7) {
2315       cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2316       if (ufp->num > 0 && cpp != NULL) {
2317         str = cpp [0];
2318         if (StringDoesHaveText (str)) {
2319           sprintf (buf3, "ProbeDB:%s", str);
2320           for (i = 1; i < ufp->num; i++) {
2321             str = cpp [i];
2322             if (StringDoesHaveText (str)) {
2323               sprintf (tmp, ",%s", str);
2324               StringCat (buf3, tmp);
2325             }
2326           }
2327         }
2328       }
2329     }
2330   }
2331 
2332   if (StringHasNoText (buf1) && StringHasNoText (buf2) && StringHasNoText (buf3)) return NULL;
2333 
2334   len = StringLen (buf1) + StringLen (buf2) + StringLen (buf3);
2335   str = (CharPtr) MemNew (sizeof (Char) * (len + 2));
2336   if (str == NULL) return NULL;
2337 
2338   prefix = "";
2339 
2340   if (StringDoesHaveText (buf1)) {
2341     StringCat (str, buf1);
2342     prefix = "\n";
2343   }
2344 
2345   if (StringDoesHaveText (buf2)) {
2346     StringCat (str, prefix);
2347     StringCat (str, buf2);
2348     prefix = "\n";
2349   }
2350 
2351   if (StringDoesHaveText (buf3)) {
2352     StringCat (str, prefix);
2353     StringCat (str, buf3);
2354     prefix = "\n";
2355   }
2356 
2357   return str;
2358 }
2359 
2360 NLM_EXTERN void AddDblinkBlock (
2361   Asn2gbWorkPtr awp
2362 )
2363 
2364 {
2365   IntAsn2gbJobPtr    ajp;
2366   Asn2gbSectPtr      asp;
2367   BaseBlockPtr       bbp;
2368   BioseqPtr          bsp;
2369   Char               buf [32];
2370   UserFieldPtr       curr;
2371   Uint4              dbitemID = 0;
2372   UserObjectPtr      dbuop = NULL;
2373   SeqMgrDescContext  dcontext;
2374   Boolean            first = TRUE;
2375   StringItemPtr      ffstring;
2376   GBSeqPtr           gbseq;
2377   Uint4              gpitemID = 0;
2378   UserObjectPtr      gpuop = NULL;
2379   ValNodePtr         head = NULL;
2380   ObjectIdPtr        oip;
2381   Int4               parentID;
2382   CharPtr            prefix;
2383   Int4               projectID;
2384   SeqDescrPtr        sdp;
2385   CharPtr            str;
2386   UserObjectPtr      uop;
2387   Int4               val;
2388 
2389   if (awp == NULL) return;
2390   ajp = awp->ajp;
2391   if (ajp == NULL) return;
2392   bsp = awp->bsp;
2393   if (bsp == NULL) return;
2394   asp = awp->asp;
2395   if (asp == NULL) return;
2396 
2397   if (ISA_na (bsp->mol) && awp->format != GENBANK_FMT) return;
2398   if (ISA_aa (bsp->mol) && awp->format != GENPEPT_FMT) return;
2399 
2400   /*
2401   if (! ISA_na (bsp->mol)) return;
2402   if (awp->format != GENBANK_FMT) return;
2403   */
2404 
2405   if (ajp->gbseq) {
2406     gbseq = &asp->gbseq;
2407   } else {
2408     gbseq = NULL;
2409   }
2410 
2411   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
2412   while (sdp != NULL) {
2413     uop = (UserObjectPtr) sdp->data.ptrvalue;
2414     if (uop != NULL) {
2415       oip = uop->type;
2416       if (oip != NULL && StringICmp (oip->str, "GenomeProjectsDB") == 0) {
2417         gpuop = uop;
2418         gpitemID = dcontext.itemID;
2419       }
2420       if (oip != NULL && StringICmp (oip->str, "DBLink") == 0) {
2421         dbuop = uop;
2422         dbitemID = dcontext.itemID;
2423       }
2424     }
2425     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
2426   }
2427   if (gpuop == NULL && dbuop == NULL) return;
2428 
2429   ffstring = FFGetString (ajp);
2430   if ( ffstring == NULL ) return;
2431 
2432   if (gpuop != NULL) {
2433     bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
2434     if (bbp == NULL) return;
2435 
2436     bbp->entityID = awp->entityID;
2437     bbp->itemID = gpitemID;
2438     bbp->itemtype = OBJ_SEQDESC;
2439 
2440     if (first) {
2441       FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
2442     } else {
2443       FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
2444     }
2445     first = FALSE;
2446 
2447     prefix = "Project:";
2448     projectID = 0;
2449     parentID = 0;
2450     for (curr = gpuop->data; curr != NULL; curr = curr->next) {
2451       oip = curr->label;
2452       if (oip == NULL) continue;
2453       if (StringICmp (oip->str, "ProjectID") == 0) {
2454         if (curr->choice == 2) {
2455           val = (Int4) curr->data.intvalue;
2456           if (projectID > 0) {
2457             sprintf (buf, "%ld", (long) projectID);
2458             FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2459             if (GetWWW (ajp)) {
2460               FF_asn2gb_www_projID (ffstring, buf);
2461             } else {
2462               FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2463             }
2464             /*
2465             FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2466             */
2467             if (gbseq != NULL) {
2468               if (head == NULL) {
2469                 sprintf (buf, "%ld", (long) projectID);
2470               } else {
2471                 sprintf (buf, ", %ld", (long) projectID);
2472               }
2473               ValNodeCopyStr (&head, 0, buf);
2474             }
2475             prefix = ",";
2476             parentID = 0;
2477           }
2478           projectID = val;
2479         }
2480       } else if (StringICmp (oip->str, "ParentID") == 0) {
2481         if (curr->choice == 2) {
2482           val = (Int4) curr->data.intvalue;
2483           parentID = val;
2484         }
2485       }
2486     }
2487     if (projectID > 0) {
2488       sprintf (buf, "%ld", (long) projectID);
2489       FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2490       if (GetWWW (ajp)) {
2491         FF_asn2gb_www_projID (ffstring, buf);
2492       } else {
2493         FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2494       }
2495       /*
2496       FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2497       */
2498       if (gbseq != NULL) {
2499         if (head == NULL) {
2500           sprintf (buf, "%ld", (long) projectID);
2501         } else {
2502           sprintf (buf, ", %ld", (long) projectID);
2503         }
2504         ValNodeCopyStr (&head, 0, buf);
2505       }
2506     }
2507 
2508     bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
2509     FFRecycleString (ajp, ffstring);
2510     ffstring = FFGetString (ajp);
2511 
2512     if (awp->afp != NULL) {
2513       DoImmediateFormat (awp->afp, bbp);
2514     }
2515   }
2516 
2517   if (dbuop != NULL) {
2518     str = GetDBLinkString (dbuop);
2519     if (StringDoesHaveText (str)) {
2520       bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
2521       if (bbp == NULL) return;
2522 
2523       bbp->entityID = awp->entityID;
2524       bbp->itemID = dbitemID;
2525       bbp->itemtype = OBJ_SEQDESC;
2526 
2527       if (first) {
2528         FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
2529       } else {
2530         FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
2531       }
2532       first = FALSE;
2533 
2534       FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
2535 
2536       bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
2537       MemFree (str);
2538 
2539       if (awp->afp != NULL) {
2540         DoImmediateFormat (awp->afp, bbp);
2541       }
2542     }
2543   }
2544 
2545   FFRecycleString (ajp, ffstring);
2546 
2547   if (gbseq != NULL) {
2548     if (head != NULL) {
2549       gbseq->project = MergeFFValNodeStrs (head);
2550       ValNodeFreeData (head);
2551     }
2552   }
2553 }
2554 
2555 /* only displaying PID in GenPept format */
2556 
2557 /*
2558 static void AddPidBlock (Asn2gbWorkPtr awp)
2559 
2560 {
2561   IntAsn2gbJobPtr  ajp;
2562   BaseBlockPtr  bbp;
2563   BioseqPtr     bsp;
2564   Int4          gi = -1;
2565   SeqIdPtr      sip;
2566   Char          version [64];
2567   StringItemPtr ffstring;
2568 
2569   if (awp == NULL) return;
2570   ajp = awp->ajp;
2571   if (ajp == NULL) return;
2572   bsp = awp->bsp;
2573   if (bsp == NULL) return;
2574 
2575   for (sip = bsp->id; sip != NULL; sip = sip->next) {
2576     switch (sip->choice) {
2577       case SEQID_GI :
2578         gi = sip->data.intvalue;
2579         break;
2580       default :
2581         break;
2582     }
2583   }
2584 
2585   if (gi < 1) return;
2586 
2587   bbp = Asn2gbAddBlock (awp, PID_BLOCK, sizeof (BaseBlock));
2588   if (bbp == NULL) return;
2589 
2590   ffstring = FFGetString(ajp);
2591   if ( ffstring == NULL ) return;
2592 
2593   FFStartPrint (ffstring, awp->format, 0, 12, "PID", 12, 5, 5, NULL, TRUE);
2594 
2595   sprintf (version, "g%ld", (long) gi);
2596   FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
2597 
2598   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
2599   FFRecycleString(ajp, ffstring);
2600 }
2601 */
2602 
2603 static Uint1 dbsource_fasta_order [NUM_SEQID] = {
2604   33, /* 0 = not set */
2605   20, /* 1 = local Object-id */
2606   15, /* 2 = gibbsq */
2607   16, /* 3 = gibbmt */
2608   30, /* 4 = giim Giimport-id */
2609   10, /* 5 = genbank */
2610   10, /* 6 = embl */
2611   10, /* 7 = pir */
2612   10, /* 8 = swissprot */
2613   15, /* 9 = patent */
2614   18, /* 10 = other TextSeqId */
2615   20, /* 11 = general Dbtag */
2616   31, /* 12 = gi */
2617   10, /* 13 = ddbj */
2618   10, /* 14 = prf */
2619   12, /* 15 = pdb */
2620   10, /* 16 = tpg */
2621   10, /* 17 = tpe */
2622   10, /* 18 = tpd */
2623   10, /* 19 = gpp */
2624   10  /* 20 = nat */
2625 };
2626 
2627 static void AddToUniqueSipList (
2628   ValNodePtr PNTR list,
2629   SeqIdPtr sip
2630 )
2631 
2632 {
2633   ValNodePtr  vnp;
2634 
2635   if (list == NULL || sip == NULL) return;
2636   for (vnp = *list; vnp != NULL; vnp = vnp->next) {
2637     if (SeqIdMatch (sip, (SeqIdPtr) vnp->data.ptrvalue)) return;
2638   }
2639   ValNodeAddPointer (list, 0, (Pointer) sip);
2640 }
2641 
2642 static Boolean WriteDbsourceID (
2643   SeqIdPtr sip,
2644   CharPtr str,
2645   BoolPtr is_na_p
2646 )
2647 
2648 {
2649   Boolean       check_na = FALSE;
2650   DbtagPtr      db;
2651   CharPtr       dt;
2652   Int4          gi;
2653   ObjectIdPtr   oip;
2654   CharPtr       pfx;
2655   PDBSeqIdPtr   psip = NULL;
2656   CharPtr       prefix;
2657   Boolean       rsult = FALSE;
2658   CharPtr       sfx;
2659   CharPtr       suffix;
2660   Char          tmp [32];
2661   TextSeqIdPtr  tsip = NULL;
2662 
2663   if (is_na_p != NULL) {
2664     *is_na_p = FALSE;
2665   }
2666   if (sip == NULL || str == NULL) return FALSE;
2667   *str = '\0';
2668   switch (sip->choice) {
2669     case SEQID_LOCAL :
2670       oip = (ObjectIdPtr) sip->data.ptrvalue;
2671       if (oip == NULL) return FALSE;
2672       if (! StringHasNoText (oip->str)) {
2673         StringCat (str, oip->str);
2674         return TRUE;
2675       } else if (oip->id > 0) {
2676         sprintf (tmp, "%ld", (long) oip->id);
2677         StringCat (str, tmp);
2678         return TRUE;
2679       }
2680       return FALSE;
2681     case SEQID_GI :
2682       gi = (Int4) sip->data.intvalue;
2683       if (gi == 0) return FALSE;
2684       sprintf (tmp, "gi: %ld", (long) gi);
2685       StringCat (str, tmp);
2686       return TRUE;
2687     case SEQID_GENERAL :
2688       db = (DbtagPtr) sip->data.ptrvalue;
2689       if (db == NULL) return FALSE;
2690       /* !!! still need to implement this !!! */
2691       return FALSE;
2692     case SEQID_GENBANK :
2693     case SEQID_EMBL :
2694     case SEQID_DDBJ :
2695     case SEQID_OTHER :
2696     case SEQID_TPG :
2697     case SEQID_TPE :
2698     case SEQID_TPD :
2699     case SEQID_GPIPE :
2700       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2701       if (tsip == NULL) return FALSE;
2702       check_na = TRUE;
2703       break;
2704     case SEQID_PIR :
2705     case SEQID_SWISSPROT :
2706     case SEQID_PRF :
2707       tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2708       if (tsip == NULL) return FALSE;
2709       break;
2710     case SEQID_PDB :
2711       psip = (PDBSeqIdPtr) sip->data.ptrvalue;
2712       if (psip == NULL) return FALSE;
2713       break;
2714     default :
2715       break;
2716   }
2717   prefix = " ";
2718   suffix = NULL;
2719   switch (sip->choice) {
2720     case SEQID_EMBL :
2721       StringCat (str, "embl ");
2722       suffix = ",";
2723       break;
2724     case SEQID_OTHER :
2725       StringCat (str, "REFSEQ: ");
2726       break;
2727     case SEQID_SWISSPROT :
2728       StringCat (str, "UniProtKB: ");
2729       suffix = ",";
2730       break;
2731     case SEQID_PIR :
2732       StringCat (str, "UniProtKB: ");
2733       break;
2734     case SEQID_PRF :
2735       StringCat (str, "prf: ");
2736       break;
2737     case SEQID_PDB :
2738       StringCat (str, "pdb: ");
2739       suffix = ",";
2740       break;
2741     default :
2742       break;
2743   }
2744   pfx = NULL;
2745   sfx = NULL;
2746   if (tsip != NULL) {
2747     if (! StringHasNoText (tsip->name)) {
2748       StringCat (str, sfx);
2749       StringCat (str, pfx);
2750       StringCat (str, "locus ");
2751       StringCat (str, tsip->name);
2752       sfx = suffix;
2753       pfx = prefix;
2754       rsult = TRUE;
2755     }
2756     if (! StringHasNoText (tsip->accession)) {
2757       StringCat (str, sfx);
2758       StringCat (str, pfx);
2759       StringCat (str, "accession ");
2760       StringCat (str, tsip->accession);
2761       sfx = suffix;
2762       pfx = prefix;
2763       rsult = TRUE;
2764       if (check_na && is_na_p != NULL) {
2765         *is_na_p = IS_ntdb_accession (tsip->accession);
2766       }
2767     }
2768     if (tsip->version > 0 && sip->choice != SEQID_SWISSPROT) {
2769       sprintf (tmp, ".%d", (int) tsip->version);
2770       StringCat (str, tmp);
2771       sfx = suffix;
2772       pfx = prefix;
2773     }
2774     if (! StringHasNoText (tsip->release) && sip->choice != SEQID_SWISSPROT) {
2775       StringCat (str, pfx);
2776       StringCat (str, "release ");
2777       StringCat (str, tsip->release);
2778       sfx = suffix;
2779       pfx = prefix;
2780     }
2781     if (sip->choice == SEQID_SWISSPROT || sip->choice == SEQID_PIR || sip->choice == SEQID_PRF) {
2782       StringCat (str, ";");
2783     }
2784     return rsult;
2785   }
2786   if (psip != NULL) {
2787     if (! StringHasNoText (psip->mol)) {
2788       StringCat (str, "molecule ");
2789       StringCat (str, psip->mol);
2790       sfx = suffix;
2791       pfx = prefix;
2792       rsult = TRUE;
2793     }
2794     if (psip->chain > 0) {
2795       StringCat (str, sfx);
2796       StringCat (str, pfx);
2797       sprintf (tmp, "chain %d", (int) psip->chain);
2798       StringCat (str, tmp);
2799       sfx = suffix;
2800       pfx = prefix;
2801       rsult = TRUE;
2802     }
2803     if (psip->rel != NULL) {
2804       StringCat (str, sfx);
2805       StringCat (str, pfx);
2806       StringCat (str, "release ");
2807       dt = asn2gb_PrintDate (psip->rel);
2808       StringCat (str, dt);
2809       MemFree (dt);
2810       sfx = suffix;
2811       pfx = prefix;
2812       rsult = TRUE;
2813     }
2814     StringCat (str, ";");
2815     return rsult;
2816   }
2817   return rsult;
2818 }
2819 
2820 
2821 static void AddSPBlock (
2822   IntAsn2gbJobPtr ajp,
2823   StringItemPtr ffstring,
2824   BioseqPtr bsp
2825 )
2826 
2827 {
2828   CharPtr            acc;
2829   Char               buf [64];
2830   DbtagPtr           db;
2831   SeqMgrDescContext  dcontext;
2832   Boolean            first;
2833   Int4               gi;
2834   Boolean            has_link;
2835   Char               id [42];
2836   ObjectIdPtr        oip;
2837   CharPtr            ptr;
2838   SeqDescrPtr        sdp;
2839   SeqIdPtr           sid;
2840   SeqIdPtr           sif;
2841   SeqIdPtr           sip;
2842   SPBlockPtr         spb;
2843   CharPtr            string;
2844   ValNodePtr         vnp;
2845   CharPtr            str;
2846   Char               numbuf[40];
2847 
2848   if (bsp == NULL) return;
2849   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
2850   if (sdp == NULL) return;
2851   spb = (SPBlockPtr) sdp->data.ptrvalue;
2852   if (spb == NULL) return;
2853 
2854   if (spb->_class == 1) {
2855     FFAddOneString (ffstring, "class: standard.", FALSE, FALSE, TILDE_IGNORE);
2856     FFAddNewLine(ffstring);
2857   } else if (spb->_class == 2) {
2858     FFAddOneString (ffstring, "class: preliminary.", FALSE, FALSE, TILDE_IGNORE);
2859     FFAddNewLine(ffstring);
2860   }
2861 
2862   if (spb->extra_acc) {
2863     FFAddOneString (ffstring, "extra accessions:", FALSE, FALSE, TILDE_IGNORE);
2864     for (vnp = spb->extra_acc; vnp != NULL; vnp = vnp->next) {
2865       FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
2866       if (vnp->next != NULL) {
2867         FFAddOneChar (ffstring, ',', FALSE );
2868       }
2869     }
2870     FFAddNewLine(ffstring);
2871   }
2872 
2873   if (spb->imeth) {
2874     FFAddOneString (ffstring, "seq starts with Met", FALSE, FALSE, TILDE_IGNORE);
2875   }
2876 
2877   if (spb->plasnm != NULL) {
2878     FFAddOneString (ffstring, "plasmid:", FALSE, FALSE, TILDE_IGNORE);
2879     for (vnp = spb->plasnm; vnp != NULL; vnp = vnp->next) {
2880       FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
2881       FFAddOneChar (ffstring, ',', FALSE );
2882     }
2883   }
2884 
2885   if (spb->created) {
2886     string = PrintDate (spb->created);
2887     FFAddOneString (ffstring, "created: ", FALSE, FALSE, TILDE_IGNORE);
2888     FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
2889 
2890     MemFree (string);
2891   }
2892 
2893   if (spb->sequpd) {
2894     string = PrintDate (spb->sequpd);
2895     FFAddOneString (ffstring, "sequence updated: ", FALSE, FALSE, TILDE_IGNORE);
2896     FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
2897     MemFree (string);
2898   }
2899 
2900   if (spb->annotupd) {
2901     string = PrintDate (spb->annotupd);
2902     FFAddOneString (ffstring, "annotation updated: ", FALSE, FALSE, TILDE_IGNORE);
2903     FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
2904     MemFree (string);
2905   }
2906 
2907   if (spb->seqref) {
2908     FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
2909     first = TRUE;
2910     for (sid = spb->seqref; sid != NULL; sid = sid->next) {
2911       acc = NULL;
2912       has_link = FALSE;
2913       if (first == FALSE) {
2914         FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
2915       }
2916       first = FALSE;
2917       sip = sid;
2918       sif = NULL;
2919       id [0] = '\0';
2920       if (sip->choice == SEQID_GI) {
2921         gi = sid->data.intvalue;
2922         if (! GetAccnVerFromServer (gi, id)) {
2923           sif = GetSeqIdForGI (gi);
2924           if (sif != NULL) {
2925             sip = sif;
2926           }
2927         }
2928       }
2929       if (id [0] == '\0') {
2930         SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
2931       }
2932       if (sid->choice == SEQID_GI) {
2933         has_link = TRUE;
2934       }
2935       if (StringDoesHaveText (id)) {
2936         acc = id;
2937       }
2938       if (acc != NULL) {
2939         if ( GetWWW(ajp) && has_link ) {
2940           sprintf(numbuf, "%ld", (long) sid->data.intvalue);
2941           FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2942           StringNCpy_0 (buf, acc, sizeof (buf));
2943           ptr = StringChr (buf, '.');
2944           if (ptr != NULL) {
2945             *ptr = '\0';
2946           }
2947           if (IS_ntdb_accession (buf)) {
2948             FF_Add_NCBI_Base_URL (ffstring, link_seqn);
2949           } else {
2950             FF_Add_NCBI_Base_URL (ffstring, link_seqp);
2951           }
2952           FFAddTextToString(ffstring, /* "val=" */ NULL, numbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
2953           FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
2954           FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2955         } else {
2956           FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
2957         }
2958       }
2959       if (sif != NULL) {
2960         SeqIdFree (sif);
2961       }
2962     }
2963   }
2964 
2965   first = TRUE;
2966   for (vnp = spb->dbref; vnp != NULL; vnp = vnp->next) {
2967     db = (DbtagPtr) vnp->data.ptrvalue;
2968     if (db == NULL) continue;
2969     oip = db->tag;
2970     if (oip == NULL) continue;
2971     has_link = FALSE;
2972     if (first) {
2973       FFAddNewLine(ffstring);
2974       FFAddOneString (ffstring, "xrefs (non-sequence databases): ", FALSE, FALSE, TILDE_IGNORE);
2975       first = FALSE;
2976     } else {
2977       FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
2978     }
2979 
2980     str = NULL;
2981     if ( oip->str != NULL ) {
2982       str = oip->str;
2983       if (StringNCmp (str, "GO:", 3) == 0) {
2984         str += 3;
2985       } else if (StringNCmp (str, "MGI:", 4) == 0) {
2986         str += 4;
2987       } else if (StringNCmp (str, "HGNC:", 5) == 0) {
2988         str += 5;
2989       }
2990     } else if ( oip->id > 0 ) {
2991       sprintf (numbuf, "%d", oip->id);
2992       str = numbuf;
2993     }
2994 
2995     FF_www_db_xref (ajp, ffstring, db->db, str, bsp);
2996 
2997     /*
2998     if (StringCmp (db->db, "MGD") == 0 || StringCmp (db->db, "MGI") == 0) {
2999       FFAddOneString (ffstring, "MGI", FALSE, FALSE, TILDE_IGNORE);
3000     } else {
3001       FFAddOneString (ffstring, db->db, FALSE, FALSE, TILDE_IGNORE);
3002     }
3003     if (StringCmp (db->db, "MIM") == 0) {
3004       has_link = TRUE;
3005     }
3006 
3007     str = NULL;
3008     if ( oip->str != NULL ) {
3009       str = oip->str;
3010       if (StringNCmp (str, "GO:", 3) == 0) {
3011         str += 3;
3012       } else if (StringNCmp (str, "MGI:", 4) == 0) {
3013         str += 4;
3014       } else if (StringNCmp (str, "HGNC:", 5) == 0) {
3015         str += 5;
3016       }
3017     } else if ( oip->id > 0 ) {
3018       sprintf(numbuf, "%d", oip->id);
3019       str = numbuf;
3020     }
3021 
3022     if ( !StringHasNoText(str) ) {
3023       FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3024       if ( GetWWW(ajp) && has_link) {
3025         FFAddOneChar (ffstring, ' ', FALSE);
3026         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3027         FF_Add_NCBI_Base_URL (ffstring, link_omim);
3028         FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3029         FFAddTextToString(ffstring, "\">", str, "</a>", FALSE, FALSE, TILDE_IGNORE);
3030       } else {
3031         FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3032       }
3033     }
3034     */
3035   }
3036 }
3037 
3038 static void AddPIRBlock (
3039   IntAsn2gbJobPtr ajp,
3040   StringItemPtr ffstring,
3041   BioseqPtr bsp
3042 )
3043 
3044 {
3045   CharPtr            acc;
3046   SeqMgrDescContext  dcontext;
3047   Boolean            first;
3048   Char               id [41];
3049   CharPtr            prefix = NULL;
3050   SeqDescrPtr        sdp;
3051   SeqIdPtr           sid;
3052   PirBlockPtr        pbp;
3053 
3054   if (bsp == NULL) return;
3055   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pir, &dcontext);
3056   if (sdp == NULL) return;
3057   pbp = (PirBlockPtr) sdp->data.ptrvalue;
3058   if (pbp == NULL) return;
3059 
3060   if (pbp->host != NULL) {
3061     FFAddTextToString (ffstring, "host:", pbp->host, "\n", FALSE, TRUE, TILDE_IGNORE);
3062     prefix = ";";
3063   }
3064 
3065   if (pbp->source != NULL) {
3066     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3067     FFAddNewLine(ffstring);
3068     FFAddTextToString(ffstring, "source: ", pbp->source, "\n", FALSE, TRUE, TILDE_IGNORE);
3069     prefix = ";";
3070   }
3071 
3072   if (pbp->summary != NULL) {
3073     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3074     FFAddNewLine(ffstring);
3075     FFAddTextToString(ffstring, "summary: ", pbp->summary, "\n", FALSE, TRUE, TILDE_IGNORE);
3076     prefix = ";";
3077   }
3078 
3079   if (pbp->genetic != NULL) {
3080     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3081     FFAddNewLine(ffstring);
3082     FFAddTextToString(ffstring, "genetic: ", pbp->genetic, "\n", FALSE, TRUE, TILDE_IGNORE);
3083     prefix = ";";
3084   }
3085 
3086   if (pbp->includes != NULL) {
3087     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3088     FFAddNewLine(ffstring);
3089     FFAddTextToString(ffstring, "includes: ", pbp->includes, "\n", FALSE, TRUE, TILDE_IGNORE);
3090     prefix = ";";
3091   }
3092 
3093   if (pbp->placement != NULL) {
3094     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3095     FFAddNewLine(ffstring);
3096     FFAddTextToString(ffstring, "placement: ", pbp->placement, "\n", FALSE, TRUE, TILDE_IGNORE);
3097     prefix = ";";
3098   }
3099 
3100   if (pbp->superfamily != NULL) {
3101     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3102     FFAddNewLine(ffstring);
3103     FFAddTextToString(ffstring, "superfamily: ", pbp->superfamily, "\n", FALSE, TRUE, TILDE_IGNORE);
3104     prefix = ";";
3105   }
3106 
3107   if (pbp->cross_reference != NULL) {
3108     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3109     FFAddNewLine(ffstring);
3110     FFAddTextToString(ffstring, "xref: ", pbp->cross_reference, "\n", FALSE, TRUE, TILDE_IGNORE);
3111     prefix = ";";
3112   }
3113 
3114   if (pbp->date != NULL) {
3115     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3116     FFAddNewLine(ffstring);
3117     FFAddTextToString (ffstring, "PIR dates: ", pbp->date, "\n", FALSE, TRUE, TILDE_IGNORE);
3118     prefix = ";";
3119   }
3120 
3121   if (pbp->had_punct) {
3122     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3123     FFAddNewLine(ffstring);
3124     FFAddOneString (ffstring, "punctuation in sequence", FALSE, FALSE, TILDE_IGNORE);
3125     prefix = ";";
3126   }
3127 
3128   if (pbp->seqref) {
3129     FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3130     FFAddNewLine(ffstring);
3131     FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
3132     first = TRUE;
3133     for (sid = pbp->seqref; sid != NULL; sid = sid->next) {
3134       acc = NULL;
3135       if (first == FALSE) {
3136         FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3137       }
3138       first = FALSE;
3139       SeqIdWrite (sid, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
3140       acc = id;
3141       if (acc != NULL) {
3142         switch (sid->choice) {
3143           case SEQID_GENBANK:
3144             FFAddOneString (ffstring, "genbank ", FALSE, FALSE, TILDE_IGNORE);
3145             break; 
3146           case SEQID_EMBL:
3147             FFAddOneString (ffstring, "embl ", FALSE, FALSE, TILDE_IGNORE);
3148             break; 
3149           case SEQID_PIR:
3150             FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3151             break; 
3152           case SEQID_SWISSPROT:
3153             FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3154             break; 
3155           case SEQID_DDBJ:
3156             FFAddOneString (ffstring, "ddbj ", FALSE, FALSE, TILDE_IGNORE);
3157             break; 
3158           case SEQID_PRF:
3159             FFAddOneString (ffstring, "prf ", FALSE, FALSE, TILDE_IGNORE);
3160             break; 
3161           case SEQID_GI:
3162             FFAddOneString (ffstring, "gi: ", FALSE, FALSE, TILDE_IGNORE);
3163             break; 
3164           default:
3165             acc = NULL;
3166             break; 
3167         }
3168       }
3169       if (acc != NULL) {
3170         FFAddOneString (ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3171       }
3172     }
3173   }
3174   FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
3175 }
3176 
3177 static void AddPRFBlock (
3178   IntAsn2gbJobPtr ajp,
3179   StringItemPtr ffstring,
3180   BioseqPtr bsp
3181 )
3182 
3183 {
3184   SeqMgrDescContext  dcontext;
3185   PrfExtSrcPtr       extra;
3186   CharPtr            prefix = NULL;
3187   SeqDescrPtr        sdp;
3188   PrfBlockPtr        prf;
3189 
3190   if (bsp == NULL) return;
3191   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_prf, &dcontext);
3192   if (sdp == NULL) return;
3193   prf = (PrfBlockPtr) sdp->data.ptrvalue;
3194   if (prf == NULL) return;
3195   if ( ffstring == NULL ) return;
3196 
3197   extra = prf->extra_src;
3198   if (extra != NULL) {
3199 
3200     if (extra->host != NULL) {
3201       FFAddTextToString(ffstring, "host:", extra->host, NULL, FALSE, TRUE, TILDE_IGNORE);
3202       prefix = ";\n";
3203     }
3204 
3205     if (extra->part != NULL) {
3206       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3207       FFAddTextToString(ffstring, "part: ", extra->part, NULL, FALSE, TRUE, TILDE_IGNORE);
3208       prefix = ";\n";
3209     }
3210     if (extra->state != NULL) {
3211       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3212       FFAddTextToString(ffstring, "state: ", extra->state, NULL, FALSE, TRUE, TILDE_IGNORE);
3213       prefix = ";\n";
3214     }
3215     if (extra->strain != NULL) {
3216       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3217       FFAddTextToString(ffstring, "strain: ", extra->strain, NULL, FALSE, TRUE, TILDE_IGNORE);
3218       prefix = ";\n";
3219     }
3220     if (extra->taxon != NULL) {
3221       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3222       FFAddTextToString(ffstring, "taxonomy: ", extra->taxon, NULL, FALSE, TRUE, TILDE_IGNORE);
3223       prefix = ";\n";
3224     }
3225 
3226     FFAddOneChar(ffstring, '.', FALSE);
3227   }
3228 }
3229 
3230 static void AddPDBBlock (
3231   IntAsn2gbJobPtr ajp,
3232   StringItemPtr ffstring,
3233   BioseqPtr bsp
3234 )
3235 
3236 {
3237   SeqMgrDescContext  dcontext;
3238   CharPtr            dt;
3239   CharPtr            prefix = NULL;
3240   SeqDescrPtr        sdp;
3241   PdbBlockPtr        pdb;
3242   PdbRepPtr          replace;
3243   CharPtr            str;
3244   ValNodePtr         vnp;
3245 
3246   if (bsp == NULL) return;
3247   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
3248   if (sdp == NULL) return;
3249   pdb = (PdbBlockPtr) sdp->data.ptrvalue;
3250   if (pdb == NULL) return;
3251 
3252   if (pdb->deposition != NULL) {
3253     dt = asn2gb_PrintDate (pdb->deposition);
3254     FFAddTextToString (ffstring, "deposition: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3255     MemFree (dt);
3256     prefix = ";";
3257   }
3258   if (pdb->pdbclass != NULL) {
3259     FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3260     FFAddNewLine(ffstring);
3261     FFAddTextToString(ffstring, "class: ", pdb->pdbclass, NULL, FALSE, TRUE, TILDE_IGNORE);
3262     prefix = ";";
3263   }
3264   if (pdb->source != NULL) {
3265     FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3266     FFAddNewLine(ffstring);
3267     FFAddOneString(ffstring, "source: ", FALSE, TRUE, TILDE_IGNORE);
3268     prefix = NULL;
3269     for (vnp = pdb->source; vnp != NULL; vnp = vnp->next) {
3270       str = (CharPtr) vnp->data.ptrvalue;
3271       if (StringHasNoText (str)) continue;
3272       FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3273       prefix = ", ";
3274     }
3275     prefix = ";";
3276   }
3277   if (pdb->exp_method != NULL) {
3278     FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3279     FFAddNewLine(ffstring);
3280     FFAddTextToString(ffstring, "Exp. method: ", pdb->exp_method, NULL, FALSE, TRUE, TILDE_IGNORE);
3281     prefix = ";";
3282   }
3283   replace = pdb->replace;
3284   if (replace != NULL) {
3285     if (replace->ids != NULL) {
3286       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3287       FFAddNewLine(ffstring);
3288       FFAddOneString(ffstring, "ids replaced: ", FALSE, TRUE, TILDE_IGNORE);
3289 
3290       prefix = NULL;
3291       for (vnp = replace->ids; vnp != NULL; vnp = vnp->next) {
3292         str = (CharPtr) vnp->data.ptrvalue;
3293         if (StringHasNoText (str)) continue;
3294         FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3295         prefix = ", ";
3296       }
3297       prefix = ";";
3298     }
3299     if (replace->date != NULL) {
3300       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3301       FFAddNewLine(ffstring);
3302 
3303       dt = asn2gb_PrintDate (replace->date);
3304       FFAddTextToString(ffstring, "replacement date: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3305       MemFree (dt);
3306       prefix = ";";
3307     }
3308   }
3309 
3310   FFAddOneChar(ffstring, '.', FALSE);
3311 }
3312 
3313 static CharPtr TxtSave (CharPtr text, size_t len)
3314 
3315 {
3316    CharPtr str = NULL;
3317 
3318    if ((text == NULL) || (len == 0))
3319       return str;
3320 
3321    str = MemNew((size_t)(len + 1));
3322    MemCopy(str, text, (size_t)len);
3323 
3324    return (str);
3325 }
3326 
3327 static Boolean FF_www_dbsource (
3328   IntAsn2gbJobPtr ajp,
3329   StringItemPtr ffstring,
3330   CharPtr str,
3331   Boolean first,
3332   SeqIdPtr sip,
3333   Boolean is_na
3334 )
3335 
3336 {
3337   CharPtr  temp, end, text, loc, link = NULL;
3338   Uint1    choice;
3339   Int2     j;
3340   Int4     gi = 0;
3341   Char     gibuf [32];
3342 
3343   if (sip == NULL) return FALSE;
3344   choice = sip->choice;
3345 
3346   if( GetWWW(ajp) ) {
3347     if (choice == SEQID_PIR) {
3348       link = link_seqp;
3349     } else if (choice == SEQID_SWISSPROT) {
3350       link = link_sp;
3351     } else if (choice == SEQID_PDB || choice == SEQID_PRF) {
3352       link = link_seqp;
3353     } else if (choice == SEQID_EMBL || choice == SEQID_GENBANK || 
3354         choice == SEQID_DDBJ || choice == SEQID_GIBBSQ || 
3355         choice == SEQID_GIBBMT || choice == SEQID_GI || 
3356         choice == SEQID_GIIM || choice == SEQID_OTHER ||
3357         choice == SEQID_TPG || choice == SEQID_TPE || choice == SEQID_TPD ||
3358         choice == SEQID_GPIPE)  {
3359       if (is_na) {
3360         link = link_seqn;
3361       } else {
3362         link = link_seqp;
3363       }
3364     } else {
3365       AddStringWithTildes(ffstring, str);
3366       return TRUE;
3367     }
3368   
3369     if ((text = StringStr(str, "accession")) != NULL) {
3370       end = text + 9;
3371       j = 9;
3372       while (*end == ' ') {
3373         ++end;
3374         j++;
3375       }
3376       if (first == FALSE) {
3377         FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3378       }
3379       loc = TxtSave (str, end-str - j);
3380       FFAddOneString(ffstring, loc, FALSE, FALSE, TILDE_IGNORE);
3381       MemFree (loc);
3382       for (; text != end; ++text ) {
3383         FFAddOneChar(ffstring, *text, FALSE);
3384       }
3385 
3386       temp = text;
3387       end += StringLen(text) - 1;
3388       if ( *end != ';' ) {
3389         ++end;
3390       }
3391 
3392       if (choice == SEQID_SWISSPROT) {
3393         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3394         FF_Add_NCBI_Base_URL (ffstring, link);
3395         for (text = temp; text != end; ++text ) {
3396           FFAddOneChar (ffstring, *text, FALSE);
3397         }
3398       } else {
3399         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3400         FF_Add_NCBI_Base_URL (ffstring, link);
3401         gi = GetGIForSeqId (sip);
3402         if (gi > 0) {
3403           sprintf (gibuf, "%ld", (long) gi);
3404           FFAddOneString (ffstring, gibuf, FALSE, FALSE, TILDE_IGNORE);
3405         } else {
3406           for (text = temp; text != end; ++text ) {
3407             FFAddOneChar(ffstring, *text, FALSE);
3408           }
3409         }
3410       }
3411       FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3412 
3413       for (text = temp; text != end; ++text ) {
3414         FFAddOneChar(ffstring, *text, FALSE);
3415       }
3416       FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3417       if ( *end == ';' ) {
3418         FFAddOneChar(ffstring, ';', FALSE);
3419       }
3420     } else {
3421       if (first == FALSE) {
3422         FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3423       }
3424       FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3425     }
3426   } else {
3427     AddStringWithTildes(ffstring, str);
3428   }
3429   return TRUE;
3430 }
3431 
3432 NLM_EXTERN void AddDbsourceBlock (
3433   Asn2gbWorkPtr awp
3434 )
3435 
3436 {
3437   IntAsn2gbJobPtr  ajp;
3438   Asn2gbSectPtr    asp;
3439   BaseBlockPtr     bbp;
3440   BioseqPtr        bsp;
3441   Char             buf [256];
3442   SeqFeatPtr       cds;
3443   DbtagPtr         db;
3444   GBSeqPtr         gbseq;
3445   SeqIdPtr         id;
3446   Boolean          is_na;
3447   ValNodePtr       list = NULL;
3448   BioseqPtr        nuc;
3449   SeqEntryPtr      sep;
3450   SeqIdPtr         sip;
3451   SeqLocPtr        slp;
3452   CharPtr          str;
3453   Boolean          unknown = TRUE;
3454   ValNodePtr       vnp;
3455   StringItemPtr    ffstring;
3456 
3457   if (awp == NULL) return;
3458   ajp = awp->ajp;
3459   if (ajp == NULL) return;
3460   asp = awp->asp;
3461   if (asp == NULL) return;
3462   bsp = awp->bsp;
3463   if (bsp == NULL) return;
3464 
3465   bbp = Asn2gbAddBlock (awp, DBSOURCE_BLOCK, sizeof (BaseBlock));
3466   if (bbp == NULL) return;
3467 
3468   bbp->entityID = awp->entityID;
3469 
3470   ffstring = FFGetString(ajp);
3471   if ( ffstring == NULL ) return;
3472 
3473   FFStartPrint (ffstring, awp->format, 0, 12, "DBSOURCE", 12, 5, 5, NULL, TRUE);
3474 
3475   sip = SeqIdSelect (bsp->id, dbsource_fasta_order, NUM_SEQID);
3476 
3477   if (sip != NULL) {
3478 
3479     switch (sip->choice) {
3480       case SEQID_PIR :
3481       case SEQID_SWISSPROT :
3482       case SEQID_PRF :
3483       case SEQID_PDB :
3484         if (WriteDbsourceID (sip, buf, &is_na)) {
3485           FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3486           FFAddNewLine(ffstring);
3487           unknown = FALSE;
3488         }
3489         break;
3490       case SEQID_GENERAL :
3491         db = sip->data.ptrvalue;
3492         if (db == NULL) {
3493           break;
3494         }
3495         if (StringNCmp (db->db, "PIDe", 4) != 0 &&
3496             StringNCmp (db->db, "PIDd", 4) != 0 &&
3497             StringNCmp (db->db, "PID", 3) != 0) {
3498           break;
3499         }
3500         /* if (ChoicePID) found, continue on to next set of cases */
3501       case SEQID_EMBL :
3502       case SEQID_GENBANK :
3503       case SEQID_DDBJ :
3504       case SEQID_GIBBSQ :
3505       case SEQID_GIBBMT :
3506       case SEQID_OTHER :
3507       case SEQID_TPG :
3508       case SEQID_TPE :
3509       case SEQID_TPD :
3510       case SEQID_GPIPE :
3511       case SEQID_GI :
3512       case SEQID_GIIM :
3513         cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
3514         if (cds == NULL) {
3515           /* now may also be protein product of mature peptide feature */
3516           cds = SeqMgrGetPROTgivenProduct (bsp, NULL);
3517         }
3518         if (cds != NULL) {
3519           nuc = BioseqFindFromSeqLoc (cds->location);
3520           if (nuc != NULL) {
3521             slp = SeqLocFindNext (cds->location, NULL);
3522             while (slp != NULL) {
3523               sip = SeqLocId (slp);
3524               AddToUniqueSipList (&list, sip);
3525               slp = SeqLocFindNext (cds->location, slp);
3526             }
3527             for (vnp = list; vnp != NULL; vnp = vnp->next) {
3528               id = (SeqIdPtr) vnp->data.ptrvalue;
3529               nuc = BioseqFindCore (id);
3530               sip = NULL;
3531               if (nuc != NULL) {
3532                 sip = SeqIdSelect (nuc->id, dbsource_fasta_order, NUM_SEQID);
3533               } else if (id != NULL && id->choice == SEQID_GI) {
3534                 sip = GetSeqIdForGI (id->data.intvalue);
3535               }
3536               if (sip == NULL) {
3537                 sip = id;
3538               }
3539               if (sip != NULL) {
3540                 if (WriteDbsourceID (sip, buf, &is_na)) {
3541                   FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3542                   FFAddNewLine(ffstring);
3543                   unknown = FALSE;
3544                 }
3545               }
3546             }
3547             ValNodeFree (list);
3548           } else {
3549             sep = GetTopSeqEntryForEntityID (awp->entityID);
3550             if (sep != NULL && IS_Bioseq (sep)) {
3551               /* special case for coded_by CDS packed on retcode 1 protein */
3552               id = SeqLocId (cds->location);
3553               if (id != NULL && id->choice == SEQID_GI) {
3554                 sip = GetSeqIdForGI (id->data.intvalue);
3555                 if (sip == NULL) {
3556                   sip = id;
3557                 }
3558               }
3559               if (WriteDbsourceID (sip, buf, &is_na)) {
3560                 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3561                 FFAddNewLine(ffstring);
3562                 unknown = FALSE;
3563               }
3564             }
3565           }
3566         } else {
3567           if (WriteDbsourceID (sip, buf, &is_na)) {
3568             FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3569             FFAddNewLine(ffstring);
3570             unknown = FALSE;
3571           }
3572         }
3573         break;
3574       default :
3575         break;
3576     }
3577 
3578     if (sip != NULL) {
3579       switch (sip->choice) {
3580         case SEQID_PIR :
3581           AddPIRBlock (ajp, ffstring, bsp);
3582           break;
3583         case SEQID_SWISSPROT :
3584           AddSPBlock (ajp, ffstring, bsp);
3585           break;
3586         case SEQID_PRF :
3587           AddPRFBlock (ajp, ffstring, bsp);
3588           break;
3589         case SEQID_PDB :
3590           AddPDBBlock (ajp, ffstring, bsp);
3591           break;
3592         default :
3593           break;
3594       }
3595     }
3596   }
3597 
3598   if (unknown) {
3599     FFAddOneString (ffstring, "UNKNOWN", FALSE, FALSE, TILDE_TO_SPACES);
3600   }
3601 
3602   str = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
3603 
3604   /* optionally populate gbseq for XML-ized GenBank format */
3605 
3606   if (ajp->gbseq) {
3607     gbseq = &asp->gbseq;
3608   } else {
3609     gbseq = NULL;
3610   }
3611 
3612   if (gbseq != NULL) {
3613     if (StringNCmp (str, "DBSOURCE    ", 12) == 0) {
3614       gbseq->source_db = StringSave (str + 12);
3615     } else {
3616       gbseq->source_db = StringSave (str);
3617     }
3618     CleanQualValue (gbseq->source_db);
3619     Asn2gnbkCompressSpaces (gbseq->source_db);
3620   }
3621 
3622   bbp->string = str;
3623   FFRecycleString(ajp, ffstring);
3624 
3625   if (awp->afp != NULL) {
3626     DoImmediateFormat (awp->afp, bbp);
3627   }
3628 }
3629 
3630 NLM_EXTERN void AddDateBlock (
3631   Asn2gbWorkPtr awp
3632 )
3633 
3634 {
3635   IntAsn2gbJobPtr    ajp;
3636   BaseBlockPtr       bbp;
3637   BioseqPtr          bsp;
3638   Char               date [40];
3639   SeqMgrDescContext  dcontext;
3640   DatePtr            dp;
3641   SeqDescrPtr        sdp;
3642   StringItemPtr      ffstring;
3643 
3644   if (awp == NULL) return;
3645   ajp = awp->ajp;
3646   if (ajp == NULL) return;
3647   bsp = awp->bsp;
3648   if (bsp == NULL) return;
3649 
3650   ffstring = FFGetString(ajp);
3651   if ( ffstring == NULL ) return;
3652 
3653   bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
3654   if (bbp == NULL) return;
3655 
3656   date [0] = '\0';
3657 
3658   dp = NULL;
3659   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
3660   if (sdp != NULL) {
3661     dp = (DatePtr) sdp->data.ptrvalue;
3662   }
3663   if (dp != NULL) {
3664     DateToFF (date, dp, FALSE);
3665   }
3666   if (StringHasNoText (date)) {
3667     StringCpy (date, "01-JAN-1900");
3668   }
3669 
3670   FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", TRUE);
3671   FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
3672 
3673   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
3674   FFRecycleString(ajp, ffstring);
3675 
3676   bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
3677   if (bbp == NULL) return;
3678 
3679   ffstring = FFGetString(ajp);
3680 
3681   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
3682   if (sdp != NULL) {
3683     dp = (DatePtr) sdp->data.ptrvalue;
3684   }
3685   if (dp != NULL) {
3686     DateToFF (date, dp, FALSE);
3687   }
3688 
3689   FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", FALSE);
3690   FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
3691 
3692   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
3693   FFRecycleString(ajp, ffstring);
3694 
3695   if (awp->afp != NULL) {
3696     DoImmediateFormat (awp->afp, bbp);
3697   }
3698 }
3699 
3700 
3701 #define TOTAL_ESTKW 11
3702 #define TOTAL_STSKW 5
3703 #define TOTAL_GSSKW 2
3704 
3705 static CharPtr EST_kw_array[ TOTAL_ESTKW] = {
3706   "EST", "EST PROTO((expressed sequence tag)", "expressed sequence tag",
3707   "EST (expressed sequence tag)", "EST(expressed sequence tag)",
3708   "partial cDNA sequence", "transcribed sequence fragment", "TSR",
3709   "putatively transcribed partial sequence", "UK putts"
3710 };
3711 
3712 static CharPtr GSS_kw_array [TOTAL_GSSKW] = {
3713   "GSS", "trapped exon"
3714 };
3715 static CharPtr STS_kw_array[TOTAL_STSKW] = {
3716   "STS", "STS(sequence tagged site)", "STS (sequence tagged site)",
3717   "STS sequence", "sequence tagged site"
3718 };
3719 
3720 static Int2 MatchArrayString (
3721   CharPtr array_string [],
3722   Int2 totalstr,
3723   CharPtr text
3724 )
3725 
3726 {
3727   Int2 i;
3728 
3729   for (i = 0; i < totalstr && text != NULL; i++) {
3730     if (StringCmp (array_string [i], text) == 0) {
3731       return (i);
3732     }
3733   }
3734 
3735   return (-1);
3736 }
3737 
3738 static Boolean CheckSpecialKeyword (
3739   Boolean is_est,
3740   Boolean is_sts,
3741   Boolean is_gss,
3742   CharPtr kwd
3743 )
3744 
3745 {
3746   if (kwd == NULL) return FALSE;
3747 
3748   if (is_est) {
3749     if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
3750     if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
3751   }
3752 
3753   if (is_sts) {
3754     if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
3755     if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
3756   }
3757 
3758   if (is_gss) {
3759     if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
3760     if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
3761   }
3762 
3763   return TRUE;
3764 }
3765 
3766 static Boolean KeywordAlreadyInList (
3767   ValNodePtr head,
3768   CharPtr kwd
3769 )
3770 
3771 {
3772   ValNodePtr  vnp;
3773 
3774   for (vnp = head; vnp != NULL; vnp = vnp->next) {
3775     if (StringICmp ((CharPtr) vnp->data.ptrvalue, kwd) == 0) return TRUE;
3776   }
3777 
3778   return FALSE;
3779 }
3780 
3781 typedef struct finstatdata {
3782   CharPtr  inuserobj;
3783   CharPtr  inkeyword;
3784 } FinStatData, PNTR FinStatPtr;
3785 
3786 static FinStatData finStatKywds [] = {
3787   {"Standard-Draft",              "STANDARD_DRAFT"},
3788   {"High-quality-draft",          "HIGH_QUALITY_DRAFT"},
3789   {"Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT"},
3790   {"Annotation-grade",            "ANNOTATION_GRADE"},
3791   {"Non-contiguous-finished",     "NON_CONTIGUOUS_FINISHED"},
3792   {"Finished",                    "FINISHED"},
3793   {NULL, NULL}
3794 };
3795 
3796 static CharPtr GetFinishingStatus (
3797   CharPtr str
3798 )
3799 
3800 {
3801   Char     buf [64];
3802   Char     ch;
3803   Int2     i;
3804   CharPtr  ptr;
3805 
3806   if (StringHasNoText (str)) return NULL;
3807 
3808   StringNCpy_0 (buf, str, sizeof (buf));
3809   ptr = buf;
3810   ch = *ptr;
3811   while (ch != '\0') {
3812     if (ch == ' ') {
3813       *ptr = '-';
3814     }
3815     ptr++;
3816     ch = *ptr;
3817   }
3818 
3819   for (i = 0; finStatKywds [i].inuserobj != NULL; i++) {
3820     if (StringICmp (buf, finStatKywds [i].inuserobj) == 0) {
3821       return finStatKywds [i].inkeyword;
3822     }
3823   }
3824 
3825   return NULL;
3826 }
3827 
3828 NLM_EXTERN void AddKeywordsBlock (
3829   Asn2gbWorkPtr awp
3830 )
3831 
3832 {
3833   Boolean            add_encode = FALSE;
3834   IntAsn2gbJobPtr    ajp;
3835   Asn2gbSectPtr      asp;
3836   BaseBlockPtr       bbp;
3837   BioseqPtr          bsp;
3838   BioSourcePtr       biop;
3839   UserFieldPtr       curr;
3840   SeqMgrDescContext  dcontext;
3841   EMBLBlockPtr       ebp;
3842   CharPtr            field;
3843   CharPtr            finishing_status = NULL;
3844   GBBlockPtr         gbp;
3845   GBSeqPtr           gbseq;
3846   ValNodePtr         head = NULL;
3847   IndxPtr            index;
3848   Boolean            is_est = FALSE;
3849   Boolean            is_gss = FALSE;
3850   Boolean            is_sts = FALSE;
3851   Boolean            is_env_sample = FALSE;
3852   Boolean            is_genome_assembly = FALSE;
3853   ValNodePtr         keywords;
3854   CharPtr            kwd;
3855   MolInfoPtr         mip;
3856   ObjectIdPtr        oip;
3857   PirBlockPtr        pir;
3858   PrfBlockPtr        prf;
3859   SeqDescrPtr        sdp;
3860   SeqIdPtr           sip;
3861   SPBlockPtr         sp;
3862   SubSourcePtr       ssp;
3863   CharPtr            str;
3864   UserObjectPtr      uop;
3865   ValNodePtr         vnp;
3866   StringItemPtr      ffstring;
3867 
3868   if (awp == NULL) return;
3869   ajp = awp->ajp;
3870   if (ajp == NULL) return;
3871   bsp = awp->bsp;
3872   if (bsp == NULL) return;
3873   asp = awp->asp;
3874   if (asp == NULL) return;
3875 
3876   bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, KEYWORDS_BLOCK, sizeof (BaseBlock));
3877   if (bbp == NULL) return;
3878 
3879   ffstring = FFGetString(ajp);
3880   if ( ffstring == NULL ) return;
3881 
3882   biop = NULL;
3883   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
3884   if (sdp != NULL) {
3885     biop = (BioSourcePtr) sdp->data.ptrvalue;
3886   }
3887   if (biop != NULL) {
3888     for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
3889       if (ssp->subtype == SUBSRC_environmental_sample) {
3890         is_env_sample = TRUE;
3891       }
3892     }
3893   }
3894 
3895   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
3896   if (sdp != NULL) {
3897     bbp->entityID = dcontext.entityID;
3898     bbp->itemID = dcontext.itemID;
3899     bbp->itemtype = OBJ_SEQDESC;
3900 
3901     mip = (MolInfoPtr) sdp->data.ptrvalue;
3902     if (mip != NULL) {
3903       switch (mip->tech) {
3904         case MI_TECH_htgs_1 :
3905           if (head != NULL) {
3906             ValNodeCopyStr (&head, 0, "; ");
3907           }
3908           ValNodeCopyStr (&head, 0, "HTG");
3909           ValNodeCopyStr (&head, 0, "; ");
3910           ValNodeCopyStr (&head, 0, "HTGS_PHASE1");
3911           break;
3912         case MI_TECH_htgs_2 :
3913           if (head != NULL) {
3914             ValNodeCopyStr (&head, 0, "; ");
3915           }
3916           ValNodeCopyStr (&head, 0, "HTG");
3917           ValNodeCopyStr (&head, 0, "; ");
3918           ValNodeCopyStr (&head, 0, "HTGS_PHASE2");
3919           break;
3920         case MI_TECH_htgs_3 :
3921           if (head != NULL) {
3922             ValNodeCopyStr (&head, 0, "; ");
3923           }
3924           ValNodeCopyStr (&head, 0, "HTG");
3925           break;
3926         case MI_TECH_est :
3927           if (head != NULL) {
3928             ValNodeCopyStr (&head, 0, "; ");
3929           }
3930           is_est = TRUE;
3931           ValNodeCopyStr (&head, 0, "EST");
3932           if (is_env_sample) {
3933             if (head != NULL) {
3934               ValNodeCopyStr (&head, 0, "; ");
3935             }
3936             ValNodeCopyStr (&head, 0, "ENV");
3937           }
3938           break;
3939         case MI_TECH_sts :
3940           if (head != NULL) {
3941             ValNodeCopyStr (&head, 0, "; ");
3942           }
3943           is_sts = TRUE;
3944           ValNodeCopyStr (&head, 0, "STS");
3945           break;
3946         case MI_TECH_survey :
3947           if (head != NULL) {
3948             ValNodeCopyStr (&head, 0, "; ");
3949           }
3950           is_gss = TRUE;
3951           ValNodeCopyStr (&head, 0, "GSS");
3952           if (is_env_sample) {
3953             if (head != NULL) {
3954               ValNodeCopyStr (&head, 0, "; ");
3955             }
3956             ValNodeCopyStr (&head, 0, "ENV");
3957           }
3958           break;
3959         case MI_TECH_fli_cdna :
3960           if (head != NULL) {
3961             ValNodeCopyStr (&head, 0, "; ");
3962           }
3963           ValNodeCopyStr (&head, 0, "FLI_CDNA");
3964           break;
3965         case MI_TECH_htgs_0 :
3966           if (head != NULL) {
3967             ValNodeCopyStr (&head, 0, "; ");
3968           }
3969           ValNodeCopyStr (&head, 0, "HTG");
3970           ValNodeCopyStr (&head, 0, "; ");
3971           ValNodeCopyStr (&head, 0, "HTGS_PHASE0");
3972           break;
3973         case MI_TECH_htc :
3974           if (head != NULL) {
3975             ValNodeCopyStr (&head, 0, "; ");
3976           }
3977           ValNodeCopyStr (&head, 0, "HTC");
3978           break;
3979         case MI_TECH_wgs :
3980           if (head != NULL) {
3981             ValNodeCopyStr (&head, 0, "; ");
3982           }
3983           ValNodeCopyStr (&head, 0, "WGS");
3984           break;
3985         /*
3986         case MI_TECH_barcode :
3987           if (head != NULL) {
3988             ValNodeCopyStr (&head, 0, "; ");
3989           }
3990           ValNodeCopyStr (&head, 0, "BARCODE");
3991           break;
3992         */
3993         case MI_TECH_tsa :
3994           if (head != NULL) {
3995             ValNodeCopyStr (&head, 0, "; ");
3996           }
3997           ValNodeCopyStr (&head, 0, "TSA");
3998           ValNodeCopyStr (&head, 0, "; ");
3999           ValNodeCopyStr (&head, 0, "Transcriptome Shotgun Assembly");
4000           break;
4001         case MI_TECH_unknown :
4002         case MI_TECH_standard :
4003         case MI_TECH_other :
4004           if (is_env_sample) {
4005             if (head != NULL) {
4006               ValNodeCopyStr (&head, 0, "; ");
4007             }
4008             ValNodeCopyStr (&head, 0, "ENV");
4009           }
4010           break;
4011         default :
4012           break;
4013       }
4014     }
4015   }
4016 
4017   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
4018   while (sdp != NULL) {
4019     uop = (UserObjectPtr) sdp->data.ptrvalue;
4020     if (uop != NULL) {
4021       oip = uop->type;
4022       if (oip != NULL && StringICmp (oip->str, "ENCODE") == 0) {
4023         add_encode = TRUE;
4024       } else if (oip != NULL && StringICmp (oip->str, "StructuredComment") == 0) {
4025         for (curr = uop->data; curr != NULL; curr = curr->next) {
4026           if (curr->choice != 1) continue;
4027           oip = curr->label;
4028           if (oip == NULL) continue;
4029           field = oip->str;
4030           if (StringHasNoText (field)) continue;
4031           if (StringCmp (field, "StructuredCommentPrefix") == 0) {
4032             if (StringCmp ((CharPtr) curr->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) {
4033               is_genome_assembly = TRUE;
4034             }
4035           }
4036           if (StringCmp (field, "Current Finishing Status") == 0) {
4037             finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue);
4038           }
4039         }
4040       }
4041     }
4042     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
4043   }
4044   if (add_encode) {
4045     if (head != NULL) {
4046       ValNodeCopyStr (&head, 0, "; ");
4047     }
4048     ValNodeCopyStr (&head, 0, "ENCODE");
4049   }
4050   if (is_genome_assembly && StringDoesHaveText (finishing_status)) {
4051     if (head != NULL) {
4052       ValNodeCopyStr (&head, 0, "; ");
4053     }
4054     ValNodeCopyStr (&head, 0, finishing_status);
4055   }
4056 
4057   for (sip = bsp->id; sip != NULL; sip = sip->next) {
4058     if (sip->choice == SEQID_TPG || sip->choice == SEQID_TPE || sip->choice == SEQID_TPD) {
4059       if (head != NULL) {
4060         ValNodeCopyStr (&head, 0, "; ");
4061       }
4062       ValNodeCopyStr (&head, 0, "Third Party Annotation");
4063       ValNodeCopyStr (&head, 0, "; ");
4064       ValNodeCopyStr (&head, 0, "TPA");
4065     }
4066   }
4067 
4068   sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
4069   while (sdp != NULL) {
4070 
4071     keywords = NULL;
4072 
4073     switch (dcontext.seqdesctype) {
4074       case Seq_descr_genbank :
4075         gbp = (GBBlockPtr) sdp->data.ptrvalue;
4076         if (gbp != NULL) {
4077           keywords = gbp->keywords;
4078         }
4079         break;
4080       case Seq_descr_embl :
4081         ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
4082         if (ebp != NULL) {
4083           keywords = ebp->keywords;
4084         }
4085         break;
4086       case Seq_descr_pir :
4087         pir = (PirBlockPtr) sdp->data.ptrvalue;
4088         if (pir != NULL) {
4089           keywords = pir->keywords;
4090         }
4091         break;
4092       case Seq_descr_prf :
4093         prf = (PrfBlockPtr) sdp->data.ptrvalue;
4094         if (prf != NULL) {
4095           keywords = prf->keywords;
4096         }
4097         break;
4098       case Seq_descr_sp :
4099         sp = (SPBlockPtr) sdp->data.ptrvalue;
4100         if (sp != NULL) {
4101           keywords = sp->keywords;
4102         }
4103         break;
4104       default :
4105         break;
4106     }
4107 
4108     if (keywords != NULL) {
4109       bbp->entityID = dcontext.entityID;
4110       bbp->itemID = dcontext.itemID;
4111       bbp->itemtype = OBJ_SEQDESC;
4112     }
4113 
4114     for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
4115       kwd = (CharPtr) vnp->data.ptrvalue;
4116       if (CheckSpecialKeyword (is_est, is_sts, is_gss, kwd)) {
4117         if (! KeywordAlreadyInList (head, kwd)) {
4118           if (head != NULL) {
4119             ValNodeCopyStr (&head, 0, "; ");
4120           }
4121           ValNodeCopyStr (&head, 0, kwd);
4122         }
4123       }
4124     }
4125 
4126     sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
4127   }
4128 
4129   FFStartPrint( ffstring, awp->format, 0, 12, "KEYWORDS", 12, 5, 5, "KW", TRUE);
4130   str = MergeFFValNodeStrs (head);
4131   
4132   /* if no keywords were found, period will still be added by this call */
4133   if ( str != NULL ) {
4134     FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_TO_SPACES);
4135   } else {
4136     FFAddOneChar(ffstring, '.', FALSE);
4137   }
4138 
4139   MemFree (str);
4140 
4141   /* optionally populate indexes for NCBI internal database */
4142 
4143   if (ajp->index) {
4144     index = &asp->index;
4145   } else {
4146     index = NULL;
4147   }
4148 
4149   if (index != NULL) {
4150     for (vnp = head; vnp != NULL; vnp = vnp->next) {
4151       kwd = (CharPtr) vnp->data.ptrvalue;
4152       if (StringCmp (kwd, "; ") == 0) continue;
4153       ValNodeCopyStrToHead (&(index->keywords), 0, kwd);
4154     }
4155   }
4156 
4157   /* optionally populate gbseq for XML-ized GenBank format */
4158 
4159   if (ajp->gbseq) {
4160     gbseq = &asp->gbseq;
4161   } else {
4162     gbseq = NULL;
4163   }
4164 
4165   if (gbseq != NULL) {
4166     for (vnp = head; vnp != NULL; vnp = vnp->next) {
4167       kwd = (CharPtr) vnp->data.ptrvalue;
4168       if (StringCmp (kwd, "; ") == 0) continue;
4169       ValNodeCopyStr (&(gbseq->keywords), 0, kwd);
4170     }
4171   }
4172 
4173   ValNodeFreeData (head);
4174 
4175   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "KW");
4176 
4177   FFRecycleString(ajp, ffstring);
4178 
4179   if (awp->afp != NULL) {
4180     DoImmediateFormat (awp->afp, bbp);
4181   }
4182 }
4183 
4184 NLM_EXTERN void AddSegmentBlock (
4185   Asn2gbWorkPtr awp,
4186   Boolean onePartOfSeg,
4187   Boolean is_na
4188 )
4189 
4190 {
4191   Char             acc [41];
4192   IntAsn2gbJobPtr  ajp;
4193   Asn2gbSectPtr    asp;
4194   BaseBlockPtr     bbp;
4195   Char             buf [32];
4196   GBSeqPtr         gbseq;
4197   StringItemPtr    ffstring;
4198 
4199   if (awp == NULL) return;
4200   ajp = awp->ajp;
4201   if (ajp == NULL) return;
4202   asp = awp->asp;
4203   if (asp == NULL) return;
4204 
4205   if (awp->seg < 1 || awp->numsegs < 1) return;
4206 
4207   bbp = Asn2gbAddBlock (awp, SEGMENT_BLOCK, sizeof (BaseBlock));
4208   if (bbp == NULL) return;
4209 
4210   ffstring = FFGetString(ajp);
4211   if ( ffstring == NULL ) return;
4212 
4213 
4214   FFStartPrint (ffstring, awp->format, 0, 12, "SEGMENT", 12, 5, 5, "XX", FALSE);
4215 
4216   if ( GetWWW(ajp) && awp->parent != NULL && onePartOfSeg) {
4217     sprintf (buf, "%d of ", (int) awp->seg);
4218     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4219     SeqIdWrite (awp->parent->id, acc, PRINTID_TEXTID_ACC_VER, sizeof (acc) - 1);
4220 
4221     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4222     if (is_na) {
4223       FF_Add_NCBI_Base_URL (ffstring, link_seqn);
4224     } else {
4225       FF_Add_NCBI_Base_URL (ffstring, link_seqp);
4226     }
4227     FFAddTextToString(ffstring, /* "val=" */ NULL, acc, "\">", FALSE, FALSE, TILDE_IGNORE);
4228 
4229     sprintf (buf, "%ld", (long) awp->numsegs);
4230     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4231     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4232   } else {
4233     sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4234     FFAddOneString (ffstring, buf, FALSE, TRUE, TILDE_TO_SPACES);
4235   }
4236 
4237   /* optionally populate gbseq for XML-ized GenBank format */
4238 
4239   if (ajp->gbseq) {
4240     gbseq = &asp->gbseq;
4241   } else {
4242     gbseq = NULL;
4243   }
4244 
4245   if (gbseq != NULL) {
4246     sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4247     gbseq->segment = StringSave (buf);
4248   }
4249 
4250   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
4251   FFRecycleString(ajp, ffstring);
4252 
4253   if (awp->afp != NULL) {
4254     DoImmediateFormat (awp->afp, bbp);
4255   }
4256 }
4257 
4258 NLM_EXTERN void AddSourceBlock (
4259   Asn2gbWorkPtr awp
4260 )
4261 
4262 {
4263   IntAsn2gbJobPtr    ajp;
4264   BaseBlockPtr       bbp;
4265   BioseqPtr          bsp;
4266   SeqFeatPtr         cds;
4267   SeqMgrDescContext  dcontext;
4268   BioseqPtr          dna;
4269   SeqMgrFeatContext  fcontext;
4270   GBBlockPtr         gbp;
4271   SeqDescrPtr        sdp;
4272   SeqFeatPtr         sfp;
4273 
4274   if (awp == NULL) return;
4275   ajp = awp->ajp;
4276   if (ajp == NULL) return;
4277   bsp = awp->bsp;
4278   if (bsp == NULL) return;
4279 
4280   bbp = Asn2gbAddBlock (awp, SOURCE_BLOCK, sizeof (BaseBlock));
4281   if (bbp == NULL) return;
4282 
4283   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
4284   if (sdp != NULL && (! ajp->newSourceOrg)) {
4285     gbp = (GBBlockPtr) sdp->data.ptrvalue;
4286     if (gbp != NULL && (! StringHasNoText (gbp->source))) {
4287       bbp->entityID = dcontext.entityID;
4288       bbp->itemID = dcontext.itemID;
4289       bbp->itemtype = OBJ_SEQDESC;
4290 
4291       if (awp->afp != NULL) {
4292         DoImmediateFormat (awp->afp, bbp);
4293       }
4294 
4295       return;
4296     }
4297   }
4298 
4299   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
4300   if (sdp != NULL) {
4301     bbp->entityID = dcontext.entityID;
4302     bbp->itemID = dcontext.itemID;
4303     bbp->itemtype = OBJ_SEQDESC;
4304   } else {
4305     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
4306     if (sfp != NULL) {
4307       bbp->entityID = fcontext.entityID;
4308       bbp->itemID = fcontext.itemID;
4309       bbp->itemtype = OBJ_SEQFEAT;
4310     } else if (ISA_aa (bsp->mol)) {
4311 
4312       /* if protein with no sources, get sources applicable to DNA location of CDS */
4313 
4314       cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
4315       if (cds != NULL) {
4316         sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
4317         if (sfp != NULL) {
4318           bbp->entityID = fcontext.entityID;
4319           bbp->itemID = fcontext.itemID;
4320           bbp->itemtype = OBJ_SEQFEAT;
4321         } else {
4322           dna = BioseqFindFromSeqLoc (cds->location);
4323           if (dna != NULL) {
4324             sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
4325             if (sdp != NULL) {
4326               bbp->entityID = dcontext.entityID;
4327               bbp->itemID = dcontext.itemID;
4328               bbp->itemtype = OBJ_SEQDESC;
4329             }
4330           }
4331         }
4332       }
4333     }
4334   }
4335 
4336   if (awp->afp != NULL) {
4337     DoImmediateFormat (awp->afp, bbp);
4338   }
4339 }
4340 
4341 NLM_EXTERN void AddOrganismBlock (
4342   Asn2gbWorkPtr awp
4343 )
4344 
4345 {
4346   BaseBlockPtr       bbp;
4347   BioseqPtr          bsp;
4348   SeqFeatPtr         cds;
4349   SeqMgrDescContext  dcontext;
4350   BioseqPtr          dna;
4351   SeqMgrFeatContext  fcontext;
4352   SeqDescrPtr        sdp;
4353   SeqFeatPtr         sfp;
4354 
4355   if (awp == NULL) return;
4356   bsp = awp->bsp;
4357   if (bsp == NULL) return;
4358 
4359   bbp = Asn2gbAddBlock (awp, ORGANISM_BLOCK, sizeof (BaseBlock));
4360   if (bbp == NULL) return;
4361 
4362   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
4363   if (sdp != NULL) {
4364     bbp->entityID = dcontext.entityID;
4365     bbp->itemID = dcontext.itemID;
4366     bbp->itemtype = OBJ_SEQDESC;
4367   } else {
4368     sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
4369     if (sfp != NULL) {
4370       bbp->entityID = fcontext.entityID;
4371       bbp->itemID = fcontext.itemID;
4372       bbp->itemtype = OBJ_SEQFEAT;
4373     } else if (ISA_aa (bsp->mol)) {
4374 
4375       /* if protein with no sources, get sources applicable to DNA location of CDS */
4376 
4377       cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
4378       if (cds != NULL) {
4379         sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
4380         if (sfp != NULL) {
4381           bbp->entityID = fcontext.entityID;
4382           bbp->itemID = fcontext.itemID;
4383           bbp->itemtype = OBJ_SEQFEAT;
4384         } else {
4385           dna = BioseqFindFromSeqLoc (cds->location);
4386           if (dna != NULL) {
4387             sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
4388             if (sdp != NULL) {
4389               bbp->entityID = dcontext.entityID;
4390               bbp->itemID = dcontext.itemID;
4391               bbp->itemtype = OBJ_SEQDESC;
4392             }
4393           }
4394         }
4395       }
4396     }
4397   }
4398 
4399   if (awp->afp != NULL) {
4400     DoImmediateFormat (awp->afp, bbp);
4401   }
4402 }
4403 
4404 static RefBlockPtr AddPub (
4405   Asn2gbWorkPtr awp,
4406   ValNodePtr PNTR head,
4407   PubdescPtr pdp
4408 )
4409 
4410 {
4411   Char            buf [521]; /* increased for consortium in citsub */
4412   CitArtPtr       cap;
4413   CitBookPtr      cbp;
4414   CitGenPtr       cgp;
4415   CitJourPtr      cjp;
4416   CitPatPtr       cpp;
4417   CitSubPtr       csp;
4418   DatePtr         dp = NULL;
4419   Boolean         justuids = TRUE;
4420   ImprintPtr      imp = NULL;
4421   IntRefBlockPtr  irp;
4422   RefBlockPtr     rbp;
4423   ValNodePtr      vnp;
4424   ArticleIdPtr    aip;
4425 
4426   if (awp == NULL || head == NULL || pdp == NULL) return NULL;
4427 
4428   if (awp->hideGeneRIFs) {
4429     if (StringISearch (pdp->comment, "GeneRIF") != NULL) return NULL;
4430   } else if (awp->onlyGeneRIFs) {
4431     if (StringISearch (pdp->comment, "GeneRIF") == NULL) return NULL;
4432   } else if (awp->onlyReviewPubs) {
4433     if (StringISearch (pdp->comment, "Review Article") == NULL) return NULL;
4434   }
4435 
4436   rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
4437   if (rbp == NULL) return NULL;
4438   rbp->blocktype = REFERENCE_BLOCK;
4439   rbp->section = awp->currsection;
4440 
4441   rbp->serial = INT2_MAX;
4442 
4443   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4444     switch (vnp->choice) {
4445       case PUB_Gen :
4446         /* may be unpublished, or may be serial number of swiss-prot reference */
4447         cgp = (CitGenPtr) vnp->data.ptrvalue;
4448         if (cgp != NULL) {
4449           if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
4450             rbp->category = REF_CAT_UNP;
4451             if (dp == NULL) {
4452               dp = cgp->date;
4453             }
4454             if (cgp->serial_number > 0) {
4455               rbp->serial = cgp->serial_number;
4456             }
4457             if (cgp->cit != NULL) {
4458               if (StringNICmp ("unpublished", cgp->cit, 11) != 0 &&
4459                   StringNICmp ("submitted", cgp->cit, 8) != 0 &&
4460                   StringNICmp ("to be published", cgp->cit, 15) != 0 &&
4461                   StringNICmp ("in press", cgp->cit, 8) != 0 &&
4462                   StringStr (cgp->cit, "Journal") == NULL) {
4463                 if (cgp->serial_number == 0) {
4464                   MemFree (rbp);
4465                   return NULL;
4466                 }
4467               }
4468             } else if (cgp->journal == NULL || cgp->date == NULL) {
4469               if (cgp->serial_number == 0) {
4470                 MemFree (rbp);
4471                 return NULL;
4472               }
4473             }
4474           }
4475         }
4476         break;
4477       case PUB_Sub :
4478         rbp->category = REF_CAT_SUB;
4479         csp = (CitSubPtr) vnp->data.ptrvalue;
4480         if (csp != NULL) {
4481           imp = csp->imp;
4482           if (imp != NULL) {
4483             if (dp == NULL) {
4484               dp = imp->date;
4485             }
4486           }
4487           if (csp->date != NULL) {
4488             if (dp == NULL) {
4489               dp = csp->date;
4490             }
4491           }
4492         }
4493         break;
4494       case PUB_Article:
4495         cap = (CitArtPtr) vnp->data.ptrvalue;
4496         if (cap != NULL) {
4497           switch (cap->from) {
4498             case 1:
4499               cjp = (CitJourPtr) cap->fromptr;
4500               if (cjp != NULL) {
4501                 imp = (ImprintPtr) cjp->imp;
4502                 if (imp != NULL) {
4503                   if (dp == NULL) {
4504                     dp = imp->date;
4505                   }
4506                 }
4507               }
4508               break;
4509             case 2:
4510               cbp = (CitBookPtr) cap->fromptr;
4511               if (cbp != NULL) {
4512                 imp = (ImprintPtr) cbp->imp;
4513                 if (imp != NULL) {
4514                   if (dp == NULL) {
4515                     dp = imp->date;
4516                   }
4517                 }
4518               }
4519               break;
4520             case 3:
4521               cbp = (CitBookPtr) cap->fromptr;
4522               if (cbp != NULL) {
4523                 imp = (ImprintPtr) cbp->imp;
4524                 if (imp != NULL) {
4525                   if (dp == NULL) {
4526                     dp = imp->date;
4527                   }
4528                 }
4529               }
4530               break;
4531             default:
4532               break;
4533           }
4534           /*  look for PMID and MUID in the Cit-art article ids set */
4535           if (cap->ids != NULL) {
4536             for (aip = cap->ids; aip != NULL; aip = aip->next) {
4537               if (aip->choice == ARTICLEID_PUBMED && rbp->pmid == 0) {
4538                 rbp->pmid = aip->data.intvalue;
4539                 rbp->category = REF_CAT_PUB;
4540               } else if (aip->choice == ARTICLEID_MEDLINE && rbp->muid == 0) {
4541                 rbp->muid = aip->data.intvalue;
4542                 rbp->category = REF_CAT_PUB;
4543               }
4544             }
4545           }
4546         }
4547         break;
4548       case PUB_Book:
4549         cbp = (CitBookPtr) vnp->data.ptrvalue;
4550         if (cbp != NULL) {
4551           imp = (ImprintPtr) cbp->imp;
4552           if (imp != NULL) {
4553             if (dp == NULL) {
4554               dp = imp->date;
4555             }
4556           }
4557         }
4558         break;
4559       case PUB_Proc:
4560         cbp = (CitBookPtr) vnp->data.ptrvalue;
4561         if (cbp != NULL) {
4562           imp = (ImprintPtr) cbp->imp;
4563           if (imp != NULL) {
4564             if (dp == NULL) {
4565               dp = imp->date;
4566             }
4567           }
4568         }
4569         break;
4570       case PUB_Patent :
4571         rbp->category = REF_CAT_PUB;
4572         cpp = (CitPatPtr) vnp->data.ptrvalue;
4573         if (cpp != NULL) {
4574           if (cpp->date_issue != NULL) {
4575             if (dp == NULL) {
4576               dp = (DatePtr) cpp->date_issue;
4577             }
4578           } else if (cpp->app_date != NULL) {
4579             if (dp == NULL) {
4580               dp = (DatePtr) cpp->app_date;
4581             }
4582           }
4583         }
4584         break;
4585       case PUB_Man:
4586         cbp = (CitBookPtr) vnp->data.ptrvalue;
4587         if (cbp != NULL) {
4588           imp = (ImprintPtr) cbp->imp;
4589           if (imp != NULL) {
4590             if (dp == NULL) {
4591               dp = imp->date;
4592             }
4593           }
4594         }
4595         break;
4596       case PUB_Muid :
4597         if (rbp->muid == 0) {
4598           rbp->muid = vnp->data.intvalue;
4599           rbp->category = REF_CAT_PUB;
4600         }
4601         break;
4602       case PUB_PMid :
4603         if (rbp->pmid == 0) {
4604           rbp->pmid = vnp->data.intvalue;
4605           rbp->category = REF_CAT_PUB;
4606         }
4607         break;
4608       default :
4609         break;
4610     }
4611     if (vnp->choice != PUB_Muid && vnp->choice != PUB_PMid) {
4612       justuids = FALSE;
4613     }
4614   }
4615 
4616   /* check for submitted vs. in-press */
4617 
4618   if (imp != NULL) {
4619     rbp->category = REF_CAT_PUB;
4620     switch (imp->prepub) {
4621       case 1 :
4622         rbp->category = REF_CAT_UNP;
4623         break;
4624       case 2 :
4625         rbp->category = REF_CAT_PUB;
4626         break;
4627       default :
4628         break;
4629     }
4630   }
4631 
4632   /* check for sites reftype */
4633 
4634   if (pdp->reftype != 0) {
4635     rbp->sites = pdp->reftype;
4636   }
4637 
4638   if (rbp->muid == 0 && rbp->pmid == 0) {
4639     vnp = pdp->pub;
4640 
4641     /* skip over just serial number */
4642 
4643     if (vnp != NULL && vnp->choice == PUB_Gen && vnp->next != NULL) {
4644       cgp = (CitGenPtr) vnp->data.ptrvalue;
4645       if (cgp != NULL) {
4646         if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
4647           if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
4648             vnp = vnp->next;
4649           }
4650         }
4651       }
4652     }
4653 
4654     if (PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE) > 0) {
4655       rbp->uniquestr = StringSaveNoNull (buf);
4656     }
4657   }
4658 
4659   irp = (IntRefBlockPtr) rbp;
4660   irp->date = DateDup (dp);
4661   irp->justuids = justuids;
4662   /* if (justuids) { */
4663     irp->fig = StringSaveNoNull (pdp->fig);
4664     irp->maploc = StringSaveNoNull (pdp->maploc);
4665     irp->poly_a = pdp->poly_a;
4666   /* } */
4667 
4668   /* if not rejected by now, link in */
4669 
4670   ValNodeAddPointer (head, 0, rbp);
4671 
4672   return rbp;
4673 }
4674 
4675 static int LIBCALLBACK SortReferences (
4676   VoidPtr ptr1,
4677   VoidPtr ptr2,
4678   Boolean serialFirst,
4679   Boolean isRefSeq
4680 )
4681 
4682 {
4683   int             compare;
4684   IntRefBlockPtr  irp1;
4685   IntRefBlockPtr  irp2;
4686   RefBlockPtr     rbp1;
4687   RefBlockPtr     rbp2;
4688   Int2            status;
4689   RefBlockPtr     temp;
4690   ValNodePtr      vnp1;
4691   ValNodePtr      vnp2;
4692 
4693   if (ptr1 == NULL || ptr2 == NULL) return 0;
4694   vnp1 = *((ValNodePtr PNTR) ptr1);
4695   vnp2 = *((ValNodePtr PNTR) ptr2);
4696   if (vnp1 == NULL || vnp2 == NULL) return 0;
4697   rbp1 = (RefBlockPtr) vnp1->data.ptrvalue;
4698   rbp2 = (RefBlockPtr) vnp2->data.ptrvalue;
4699   if (rbp1 == NULL || rbp2 == NULL) return 0;
4700 
4701   if (serialFirst) {
4702     if (rbp1->serial > rbp2->serial) {
4703       return 1;
4704     } else if (rbp1->serial < rbp2->serial) {
4705       return -1;
4706     }
4707   }
4708 
4709   /* usual first sort by published, unpublished, and cit-subs */
4710 
4711   if (rbp1->category > rbp2->category) {
4712     return 1;
4713   } else if (rbp1->category < rbp2->category) {
4714     return -1;
4715   }
4716 
4717   /* for RefSeq, newer publications first, so temporarily swap pointers */
4718 
4719   if (isRefSeq) {
4720     temp = rbp1;
4721     rbp1 = rbp2;
4722     rbp2 = temp;
4723   }
4724 
4725   /* within class, sort by date, older publications first (except RefSeq) */
4726 
4727   irp1 = (IntRefBlockPtr) rbp1;
4728   irp2 = (IntRefBlockPtr) rbp2;
4729 
4730   if ( irp1->date != 0  &&  irp2->date == 0 ) {
4731       return 1;
4732   } else if ( irp1->date == 0  &&  irp2->date != 0 ) {
4733       return -1;
4734   }
4735 
4736   status = DateMatch (irp1->date, irp2->date, TRUE);
4737   if (status == 1 || status == -1) return status;
4738 
4739   /* if dates (e.g., years) match, try to distinguish by uids */
4740 
4741   if (rbp1->pmid != 0 && rbp2->pmid != 0) {
4742     if (rbp1->pmid > rbp2->pmid) {
4743       return 1;
4744     } else if (rbp1->pmid < rbp2->pmid) {
4745       return -1;
4746     }
4747   }
4748 
4749   if (rbp1->muid != 0 && rbp2->muid != 0) {
4750     if (rbp1->muid > rbp2->muid) {
4751       return 1;
4752     } else if (rbp1->muid < rbp2->muid) {
4753       return -1;
4754     }
4755   }
4756 
4757   /* restore sort order after date and pmid/muid matching */
4758 
4759   if (isRefSeq) {
4760     temp = rbp1;
4761     rbp1 = rbp2;
4762     rbp2 = temp;
4763 
4764     irp1 = (IntRefBlockPtr) rbp1;
4765     irp2 = (IntRefBlockPtr) rbp2;
4766   }
4767 
4768   /* if same uid, one with just uids goes last to be excised but remembered */
4769 
4770   if ((rbp1->pmid != 0 && rbp2->pmid != 0) || (rbp1->muid != 0 && rbp2->muid != 0)) {
4771     if (irp1->justuids && (! irp2->justuids)) {
4772       return 1;
4773     } else if ((! irp1->justuids) && irp2->justuids) {
4774       return -1;
4775     }
4776   }
4777 
4778   /* put sites after pubs that refer to all or a range of bases */
4779 
4780   if (rbp1->sites > rbp2->sites) {
4781     return 1;
4782   } else if (rbp2->sites > rbp1->sites) {
4783     return -1;
4784   }
4785 
4786   /* for publication features, sort in explore index order */
4787 
4788   if (irp1->index > irp2->index) {
4789     return 1;
4790   } else if (irp1->index < irp2->index) {
4791     return -1;
4792   }
4793 
4794   /* next use author string */
4795 
4796   if (irp1->authstr != NULL && irp2->authstr != NULL) {
4797     compare = StringICmp (irp1->authstr, irp2->authstr);
4798     if (compare > 0) {
4799       return 1;
4800     } else if (compare < 0) {
4801       return -1;
4802     }
4803   }
4804 
4805   /* use unique label string to determine sort order */
4806 
4807   if (rbp1->uniquestr != NULL && rbp2->uniquestr != NULL) {
4808     compare = StringICmp (rbp1->uniquestr, rbp2->uniquestr);
4809     if (compare > 0) {
4810       return 1;
4811     } else if (compare < 0) {
4812       return -1;
4813     }
4814   }
4815 
4816   /* last resort for equivalent publication descriptors, sort in itemID order */
4817 
4818   if (rbp1->itemtype == OBJ_SEQDESC && rbp2->itemtype == OBJ_SEQDESC) {
4819     if (rbp1->itemID > rbp2->itemID) {
4820       return 1;
4821     } else if (rbp1->itemID < rbp2->itemID) {
4822       return -1;
4823     }
4824   }
4825 
4826   if (rbp1->itemtype == OBJ_ANNOTDESC && rbp2->itemtype == OBJ_ANNOTDESC) {
4827     if (rbp1->itemID > rbp2->itemID) {
4828       return 1;
4829     } else if (rbp1->itemID < rbp2->itemID) {
4830       return -1;
4831     }
4832   }
4833 
4834   if (! serialFirst) {
4835     if (rbp1->serial > rbp2->serial) {
4836       return 1;
4837     } else if (rbp1->serial < rbp2->serial) {
4838       return -1;
4839     }
4840   }
4841 
4842   return 0;
4843 }
4844 
4845 static int LIBCALLBACK SortReferencesA (
4846   VoidPtr ptr1,
4847   VoidPtr ptr2
4848 )
4849 
4850 {
4851   return SortReferences (ptr1, ptr2, FALSE, FALSE);
4852 }
4853 
4854 static int LIBCALLBACK SortReferencesB (
4855   VoidPtr ptr1,
4856   VoidPtr ptr2
4857 )
4858 
4859 {
4860   return SortReferences (ptr1, ptr2, TRUE, FALSE);
4861 }
4862 
4863 static int LIBCALLBACK SortReferencesAR (
4864   VoidPtr ptr1,
4865   VoidPtr ptr2
4866 )
4867 
4868 {
4869   return SortReferences (ptr1, ptr2, FALSE, TRUE);
4870 }
4871 
4872 static int LIBCALLBACK SortReferencesBR (
4873   VoidPtr ptr1,
4874   VoidPtr ptr2
4875 )
4876 
4877 {
4878   return SortReferences (ptr1, ptr2, TRUE, TRUE);
4879 }
4880 
4881 static CharPtr GetAuthorsPlusConsortium (
4882   FmtType format,
4883   AuthListPtr alp
4884 )
4885 
4886 {
4887   CharPtr  consortium;
4888   CharPtr  str;
4889   CharPtr  tmp;
4890 
4891   consortium = NULL;
4892   str = GetAuthorsString (format, alp, &consortium, NULL, NULL);
4893   if (str == NULL) return consortium;
4894   if (consortium == NULL) return str;
4895   tmp = MemNew (StringLen (str) + StringLen (consortium) + 5);
4896   if (tmp == NULL) return NULL;
4897   StringCpy (tmp, str);
4898   StringCat (tmp, "; ");
4899   StringCat (tmp, consortium);
4900   MemFree (str);
4901   MemFree (consortium);
4902   return tmp;
4903 }
4904 
4905 static Boolean HasNoPmidOrMuid (
4906   PubdescPtr pdp
4907 )
4908 
4909 {
4910   ValNodePtr  vnp;
4911 
4912   if (pdp == NULL) return TRUE;
4913   for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4914     if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return FALSE;
4915   }
4916   return TRUE;
4917 }
4918 
4919 typedef struct cdspubs {
4920   Asn2gbWorkPtr  awp;
4921   BioseqPtr      target;
4922   ValNodePtr     vnp;
4923 } CdsPubs, PNTR CdsPubsPtr;
4924 
4925 static Boolean LIBCALLBACK GetRefsOnCDS (
4926   SeqFeatPtr sfp,
4927   SeqMgrFeatContextPtr context
4928 )
4929 
4930 {
4931   AuthListPtr     alp;
4932   Asn2gbWorkPtr   awp;
4933   CdsPubsPtr      cpp;
4934   IntRefBlockPtr  irp;
4935   Boolean         okay;
4936   PubdescPtr      pdp;
4937   RefBlockPtr     rbp;
4938   BioseqPtr       target;
4939 
4940   if (sfp == NULL || context == NULL) return TRUE;
4941   cpp = (CdsPubsPtr) context->userdata;
4942   awp = cpp->awp;
4943   if (awp == NULL) return TRUE;
4944   target = cpp->target;
4945 
4946   okay = TRUE;
4947   pdp = (PubdescPtr) sfp->data.value.ptrvalue;
4948   if (awp->format == FTABLE_FMT) {
4949     if (HasNoPmidOrMuid (pdp)) {
4950       okay = FALSE;
4951     }
4952   }
4953 
4954   if (okay) {
4955     rbp = AddPub (awp, &(awp->pubhead), pdp);
4956     if (rbp != NULL) {
4957 
4958       rbp->entityID = context->entityID;
4959       rbp->itemID = context->itemID;
4960       rbp->itemtype = OBJ_SEQFEAT;
4961 
4962       irp = (IntRefBlockPtr) rbp;
4963       irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE);
4964       if (target != NULL) {
4965         irp->left = 0;
4966         irp->right = target->length - 1;
4967       }
4968       alp = GetAuthListPtr (pdp, NULL);
4969       if (alp != NULL) {
4970         irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
4971       }
4972       irp->index = 0;
4973     }
4974   }
4975 
4976   return TRUE;
4977 }
4978 
4979 static void GetRefsOnBioseq (
4980   Asn2gbWorkPtr awp,
4981   BioseqPtr target,
4982   BioseqPtr bsp,
4983   Int4 from,
4984   Int4 to,
4985   SeqLocPtr cdsloc
4986 )
4987 
4988 {
4989   SeqMgrAndContext   acontext;
4990   AnnotDescPtr       adp;
4991   IntAsn2gbJobPtr    ajp;
4992   AuthListPtr        alp;
4993   CdsPubs            cp;
4994   SeqMgrDescContext  dcontext;
4995   SeqMgrFeatContext  fcontext;
4996   Int2               i;
4997   Int2               idx;
4998   IntRefBlockPtr     irp;
4999   Int4Ptr            ivals;
5000   Int4               left;
5001   SeqLocPtr          newloc;
5002   Int2               numivals;
5003   Boolean            okay;
5004   PubdescPtr         pdp;
5005   RefBlockPtr        rbp;
5006   Int4               right;
5007   SeqDescrPtr        sdp;
5008   SeqFeatPtr         sfp;
5009   SeqInt             sint;
5010   SeqIdPtr           sip;
5011   Boolean            split;
5012   Int4               start;
5013   Int4               stop;
5014   Uint1              strand;
5015   Boolean            takeIt;
5016   ValNode            vn;
5017   ValNodePtr         vnp;
5018 
5019   if (awp == NULL || target == NULL || bsp == NULL) return;
5020   ajp = awp->ajp;
5021   if (ajp == NULL) return;
5022 
5023   /* full length loc for descriptors */
5024 
5025   sint.from = 0;
5026   if (ajp->ajp.slp != NULL) {
5027     from = SeqLocStart (ajp->ajp.slp); /* other features use awp->slp for from and to */
5028   }
5029   if (ajp->ajp.slp != NULL) {
5030     sint.to = SeqLocLen (ajp->ajp.slp) - 1;
5031     to = SeqLocStop (ajp->ajp.slp); /* other features use awp->slp for from and to */
5032   } else {
5033     sint.to = bsp->length - 1;
5034   }
5035   sint.strand = Seq_strand_plus;
5036   sint.id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
5037   sint.if_from = NULL;
5038   sint.if_to = NULL;
5039 
5040   vn.choice = SEQLOC_INT;
5041   vn.data.ptrvalue = (Pointer) &sint;
5042   vn.next = NULL;
5043 
5044   sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_pub, &dcontext);
5045   while (sdp != NULL) {
5046 
5047     /* check if descriptor on part already added on segmented bioseq */
5048 
5049     okay = TRUE;
5050     for (vnp = awp->pubhead; vnp != NULL && okay; vnp = vnp->next) {
5051       rbp = (RefBlockPtr) vnp->data.ptrvalue;
5052       if (rbp != NULL) {
5053         if (rbp->entityID == dcontext.entityID &&
5054             rbp->itemID == dcontext.itemID &&
5055             rbp->itemtype == OBJ_SEQDESC) {
5056           okay = FALSE;
5057         }
5058       }
5059     }
5060     if (awp->format == FTABLE_FMT) {
5061       pdp = (PubdescPtr) sdp->data.ptrvalue;
5062       if (HasNoPmidOrMuid (pdp)) {
5063         okay = FALSE;
5064       }
5065     }
5066 
5067     if (okay) {
5068       pdp = (PubdescPtr) sdp->data.ptrvalue;
5069       rbp = AddPub (awp, &(awp->pubhead), pdp);
5070       if (rbp != NULL) {
5071 
5072         rbp->entityID = dcontext.entityID;
5073         rbp->itemID = dcontext.itemID;
5074         rbp->itemtype = OBJ_SEQDESC;
5075 
5076         irp = (IntRefBlockPtr) rbp;
5077         irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
5078         irp->left = 0;
5079         irp->right = target->length - 1;
5080         alp = GetAuthListPtr (pdp, NULL);
5081         if (alp != NULL) {
5082           irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5083         }
5084         irp->index = 0;
5085       }
5086     }
5087     sdp = SeqMgrGetNextDescriptor (target, sdp, Seq_descr_pub, &dcontext);
5088   }
5089 
5090   /* if protein with no pubs, get pubs applicable to DNA location of CDS */
5091 
5092   if (cdsloc != NULL) {
5093     cp.awp = awp;
5094     cp.target = target;
5095     cp.vnp = &vn;
5096     SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS);
5097   }
5098 
5099   /* also get publications from AnnotDesc on SeqAnnot */
5100 
5101   adp = SeqMgrGetNextAnnotDesc (target, NULL, Annot_descr_pub, &acontext);
5102   while (adp != NULL) {
5103 
5104     okay = TRUE;
5105 
5106     if (okay) {
5107       pdp = (PubdescPtr) adp->data.ptrvalue;
5108       rbp = AddPub (awp, &(awp->pubhead), pdp);
5109       if (rbp != NULL) {
5110 
5111         rbp->entityID = acontext.entityID;
5112         rbp->itemID = acontext.itemID;
5113         rbp->itemtype = OBJ_ANNOTDESC;
5114 
5115         irp = (IntRefBlockPtr) rbp;
5116         irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
5117         irp->left = 0;
5118         irp->right = target->length - 1;
5119         alp = GetAuthListPtr (pdp, NULL);
5120         if (alp != NULL) {
5121           irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5122         }
5123         irp->index = 0;
5124       }
5125     }
5126     adp = SeqMgrGetNextAnnotDesc (target, adp, Annot_descr_pub, &acontext);
5127   }
5128 
5129   SeqIdFree (sint.id);
5130 
5131   /* features are indexed on parent if segmented */
5132 
5133   bsp = awp->parent;
5134 
5135   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
5136   while (sfp != NULL) {
5137     ivals = fcontext.ivals;
5138     numivals = fcontext.numivals;
5139     if (ivals != NULL && numivals > 0) {
5140 
5141       /*
5142       idx = (numivals - 1) * 2;
5143       start = ivals [idx];
5144       stop = ivals [idx + 1];
5145       */
5146 
5147       takeIt = FALSE;
5148       for (i = 0, idx = 0; i < numivals; i++, idx += 2) {
5149         start = ivals [idx];
5150         stop = ivals [idx + 1];
5151         if ((start <= from && stop > from) ||
5152             (start < to && stop >= to) ||
5153             (start >= from && stop <= to)) {
5154           takeIt = TRUE;
5155         }
5156       }
5157       if (awp->format == FTABLE_FMT) {
5158         pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5159         if (HasNoPmidOrMuid (pdp)) {
5160           takeIt = FALSE;
5161         }
5162       }
5163 
5164       if (takeIt /* stop >= from && stop <= to */) {
5165 
5166         /*
5167         start = ivals [0] + 1;
5168         stop = ivals [idx + 1] + 1;
5169         */
5170         pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5171         rbp = AddPub (awp, &(awp->pubhead), pdp);
5172         if (rbp != NULL) {
5173 
5174           rbp->entityID = fcontext.entityID;
5175           rbp->itemID = fcontext.itemID;
5176           rbp->itemtype = OBJ_SEQFEAT;
5177 
5178           irp = (IntRefBlockPtr) rbp;
5179           irp->loc = SeqLocMerge (target, sfp->location, NULL, FALSE, TRUE, FALSE);
5180           irp->left = fcontext.left;
5181           irp->right = fcontext.right;
5182           if (ajp->ajp.slp != NULL) {
5183             sip = SeqIdParse ("lcl|dummy");
5184             left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END);
5185             right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
5186             strand = SeqLocStrand (ajp->ajp.slp);
5187             split = FALSE;
5188             newloc = SeqLocReMapEx (sip, ajp->ajp.slp, irp->loc, 0, FALSE, ajp->masterStyle);
5189             /*
5190             newloc = SeqLocCopyRegion (sip, irp->loc, bsp, left, right, strand, &split);
5191             */
5192             SeqIdFree (sip);
5193             if (newloc != NULL) {
5194               A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
5195               irp->loc = SeqLocFree (irp->loc);
5196               irp->loc = newloc;
5197             }
5198           }
5199           alp = GetAuthListPtr (pdp, NULL);
5200           if (alp != NULL) {
5201             irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5202           }
5203           irp->index = fcontext.index;
5204         }
5205       }
5206     }
5207 
5208     sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext);
5209   }
5210 }
5211 
5212 static Boolean LIBCALLBACK GetRefsOnSeg (
5213   SeqLocPtr slp,
5214   SeqMgrSegmentContextPtr context
5215 )
5216 
5217 {
5218   Asn2gbWorkPtr  awp;
5219   BioseqPtr      bsp;
5220   Int4           from;
5221   SeqLocPtr      loc;
5222   SeqEntryPtr    oldscope;
5223   SeqEntryPtr    sep;
5224   SeqIdPtr       sip;
5225   Int4           to;
5226 
5227   if (slp == NULL || context == NULL) return FALSE;
5228   awp = (Asn2gbWorkPtr) context->userdata;
5229 
5230   from = context->cumOffset;
5231   to = from + context->to - context->from;
5232 
5233   sip = SeqLocId (slp);
5234   if (sip == NULL) {
5235     loc = SeqLocFindNext (slp, NULL);
5236     if (loc != NULL) {
5237       sip = SeqLocId (loc);
5238     }
5239   }
5240   if (sip == NULL) return TRUE;
5241 
5242   /* reference descriptors only on parts within entity */
5243 
5244   sep = GetTopSeqEntryForEntityID (awp->entityID);
5245   oldscope = SeqEntrySetScope (sep);
5246   bsp = BioseqFind (sip);
5247   SeqEntrySetScope (oldscope);
5248 
5249   if (bsp != NULL) {
5250     GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL);
5251     return TRUE;
5252   }
5253 
5254   /* if we ever want to fetch remote references, code goes here */
5255 
5256   return TRUE;
5257 }
5258 
5259 NLM_EXTERN Boolean AddReferenceBlock (
5260   Asn2gbWorkPtr awp,
5261   Boolean isRefSeq
5262 )
5263 
5264 {
5265   IntAsn2gbJobPtr    ajp;
5266   AuthListPtr        alp;
5267   Asn2gbSectPtr      asp;
5268   BioseqPtr          bsp;
5269   SeqFeatPtr         cds;
5270   Boolean            combine;
5271   SeqMgrFeatContext  context;
5272   CitSubPtr          csp;
5273   BioseqPtr          dna;
5274   Boolean            excise;
5275   Int2               firstserial;
5276   ValNodePtr         head = NULL;
5277   Int2               i = 0;
5278   IntRefBlockPtr     irp;
5279   Boolean            is_aa;
5280   Boolean            is_ddbj = FALSE;
5281   Boolean            is_embl = FALSE;
5282   Boolean            is_patent = FALSE;
5283   Int2               j;
5284   IntRefBlockPtr     lastirp;
5285   RefBlockPtr        lastrbp;
5286   ValNodePtr         next;
5287   Int2               numReferences;
5288   ValNodePtr         PNTR prev;
5289   RefBlockPtr        rbp;
5290   RefBlockPtr        PNTR referenceArray;
5291   BioseqPtr          refs;
5292   SubmitBlockPtr     sbp;
5293   SeqIdPtr           sip;
5294   SeqLocPtr          slp;
5295   BioseqPtr          target;
5296   ValNodePtr         vnp;
5297 
5298   if (awp == NULL) return FALSE;
5299   ajp = awp->ajp;
5300   if (ajp == NULL) return FALSE;
5301   asp = awp->asp;
5302   if (asp == NULL) return FALSE;
5303   bsp = awp->bsp;
5304   refs = awp->refs;
5305   if (bsp == NULL || refs == NULL) return FALSE;
5306 
5307   /* collect publications on bioseq */
5308 
5309   awp->pubhead = NULL;
5310   GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL);
5311   target = bsp;
5312 
5313   for (sip = bsp->id; sip != NULL; sip = sip->next) {
5314     if (sip->choice == SEQID_EMBL) {
5315       is_embl = TRUE;
5316     } else if (sip->choice == SEQID_DDBJ) {
5317       is_ddbj = TRUE;
5318     } else if (sip->choice == SEQID_PATENT) {
5319       is_patent = TRUE;
5320     }
5321   }
5322 
5323   is_aa = (Boolean) ISA_aa (bsp->mol);
5324 
5325   if (bsp->repr == Seq_repr_seg) {
5326 
5327     /* collect publication descriptors on local parts */
5328 
5329     SeqMgrExploreSegments (bsp, (Pointer) awp, GetRefsOnSeg);
5330     target = awp->refs;
5331   }
5332 
5333   if (awp->pubhead == NULL && ISA_aa (bsp->mol)) {
5334 
5335     /* if protein with no pubs, get pubs applicable to DNA location of CDS */
5336 
5337     cds = SeqMgrGetCDSgivenProduct (bsp, &context);
5338     if (cds != NULL) {
5339       dna = BioseqFindFromSeqLoc (cds->location);
5340       if (dna != NULL) {
5341         GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location);
5342         target = dna;
5343       }
5344     }
5345   }
5346 
5347   head = awp->pubhead;
5348   awp->pubhead = NULL;
5349 
5350   if (head == NULL && awp->ssp == NULL) return FALSE;
5351 
5352   /* sort by pub/unpub/sites/sub, then date, finally existing serial */
5353 
5354   if (isRefSeq) {
5355     head = SortValNode (head, SortReferencesAR);
5356   } else {
5357     head = SortValNode (head, SortReferencesA);
5358   }
5359 
5360   if (awp->ssp != NULL && (! awp->onlyGeneRIFs) && (! awp->onlyReviewPubs)) {
5361 
5362     /* add seq-submit citation */
5363 
5364     rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
5365     if (rbp != NULL) {
5366       irp = (IntRefBlockPtr) rbp;
5367 
5368       rbp->blocktype = REFERENCE_BLOCK;
5369       rbp->section = awp->currsection;
5370       rbp->serial = INT2_MAX;
5371       rbp->category = REF_CAT_SUB;
5372 
5373       rbp->entityID = ajp->ajp.entityID;
5374       rbp->itemID = 1;
5375       rbp->itemtype = OBJ_SEQSUB_CIT;
5376 
5377       sbp = awp->ssp->sub;
5378       if (sbp != NULL) {
5379         csp = sbp->cit;
5380         if (csp != NULL) {
5381           alp = GetAuthListPtr (NULL, csp);
5382           if (alp != NULL) {
5383             irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5384           }
5385           if (csp->date != NULL) {
5386             irp->date = DateDup (csp->date);
5387           }
5388         }
5389       }
5390 
5391       if (awp->citSubsFirst) {
5392 
5393         /* for DDBJ, add seq-submit citation to beginning of list */
5394 
5395         vnp = ValNodeNew (NULL);
5396         if (vnp != NULL) {
5397           vnp->choice = 0;
5398           vnp->data.ptrvalue = (VoidPtr) rbp;
5399           vnp->next = head;
5400           head = vnp;
5401         }
5402 
5403       } else {
5404 
5405         /* for GENBANK and EMBL add seq-submit citation to end of list */
5406 
5407         ValNodeAddPointer (&head, 0, rbp);
5408       }
5409     }
5410   }
5411 
5412   /* unique references, excise duplicates from list */
5413 
5414   prev = &(head);
5415   vnp = head;
5416   lastrbp = NULL;
5417   while (vnp != NULL) {
5418     excise = FALSE;
5419     combine = TRUE;
5420     next = vnp->next;
5421     rbp = (RefBlockPtr) vnp->data.ptrvalue;
5422     if (lastrbp != NULL) {
5423       lastirp = (IntRefBlockPtr) lastrbp;
5424       if (rbp != NULL) {
5425         irp = (IntRefBlockPtr) rbp;
5426         if (lastrbp->pmid != 0 && rbp->pmid != 0) {
5427           if (lastrbp->pmid == rbp->pmid) {
5428             if (lastirp->right + 1 >= irp->left) {
5429               excise = TRUE;
5430             }
5431           }
5432         } else if (lastrbp->muid != 0 && rbp->muid != 0) {
5433           if (lastrbp->muid == rbp->muid) {
5434             if (lastirp->right + 1 >= irp->left) {
5435               excise = TRUE;
5436             }
5437           }
5438         } else if (lastrbp->uniquestr != NULL && rbp->uniquestr != NULL) {
5439           if (StringICmp (lastrbp->uniquestr, rbp->uniquestr) == 0) {
5440             if (SeqLocCompare (irp->loc, lastirp->loc) == SLC_A_EQ_B) {
5441               if (StringICmp (irp->authstr, lastirp->authstr) == 0) {
5442 
5443                 /* L76496.1 - removing duplicate submission pubs */
5444                 if (lastirp->right + 1 >= irp->left) {
5445                   excise = TRUE;
5446                 }
5447               }
5448             }
5449           }
5450         }
5451         if (excise && lastrbp->sites == 0 && rbp->sites > 0) {
5452           /* real range trumps sites */
5453           combine = FALSE;
5454         }
5455       }
5456     }
5457     if (rbp != NULL) {
5458       irp = (IntRefBlockPtr) rbp;
5459       if (irp->justuids) {
5460         if (isRefSeq && is_aa) {
5461           /* if allowing justuid in protein RefSeq, try to look up dynamically */
5462           excise = TRUE; /* Back to old behavior, do not fetch */
5463         } else {
5464           /* do not allow justuids reference to appear by itself - S79174.1 */
5465           excise = TRUE;
5466           /* justuids should still combine, even if no authors - S67070.1 */
5467         }
5468       } else if (is_embl && is_patent) {
5469         /* EMBL patent records do not need author or title - A29528.1 */
5470       } else if (StringHasNoText (irp->authstr)) {
5471         /* do not allow no author reference to appear by itself - U07000.1 */
5472         excise = TRUE;
5473         combine = FALSE;
5474       } else if (isRefSeq && is_aa && rbp->category == REF_CAT_SUB) {
5475         /* GenPept RefSeq suppresses cit-subs */
5476         excise = TRUE;
5477         combine = FALSE;
5478       }
5479     }
5480     if (awp->mode == DUMP_MODE) {
5481       excise = FALSE;
5482     }
5483     /* do not hide duplicate EMBL and DDBJ publications */
5484     if (is_embl || is_ddbj) {
5485       excise = FALSE;
5486       combine = TRUE;
5487     }
5488     /* does not fuse equivalent publication features for local, general, refseq, and 2+6 genbank ids */
5489     if (excise && awp->sourcePubFuse) {
5490       *prev = vnp->next;
5491       vnp->next = NULL;
5492 
5493       /* combine locations of duplicate references */
5494 
5495       irp = (IntRefBlockPtr) rbp;
5496       lastirp = (IntRefBlockPtr) lastrbp;
5497       if (combine) {
5498         if (lastirp != NULL) {
5499           slp = SeqLocMerge (target, lastirp->loc, irp->loc, FALSE, TRUE, FALSE);
5500           lastirp->loc = SeqLocFree (lastirp->loc);
5501           lastirp->loc = slp;
5502         }
5503         if (irp != NULL && lastirp != NULL) {
5504           if ((rbp->muid == lastrbp->muid && rbp->muid != 0) ||
5505               (rbp->pmid == lastrbp->pmid && rbp->pmid != 0)) {
5506             if (lastirp->fig == NULL) {
5507               lastirp->fig = StringSaveNoNull (irp->fig);
5508             }
5509             if (lastirp->maploc == NULL) {
5510               lastirp->maploc = StringSaveNoNull (irp->maploc);
5511             }
5512             lastirp->poly_a = irp->poly_a;
5513           }
5514         }
5515       }
5516 
5517       /* and remove duplicate reference */
5518 
5519       MemFree (rbp->uniquestr);
5520       DateFree (irp->date);
5521       SeqLocFree (irp->loc);
5522       MemFree (irp->authstr);
5523       MemFree (irp->fig);
5524       MemFree (irp->maploc);
5525       MemFree (rbp);
5526       ValNodeFree (vnp);
5527 
5528     } else {
5529 
5530       prev = &(vnp->next);
5531       lastrbp = rbp;
5532     }
5533     vnp = next;
5534   }
5535 
5536   /* resort by existing serial, then pub/unpub/sites/sub, then date */
5537 
5538   if (isRefSeq) {
5539     head = SortValNode (head, SortReferencesBR);
5540   } else {
5541     head = SortValNode (head, SortReferencesB);
5542   }
5543 
5544   if (head == NULL) return FALSE;
5545 
5546   /* if taking newest publications, free remainder */
5547 
5548   if (awp->newestPubs) {
5549     for (vnp = head, i = 1; vnp != NULL && i < 5; vnp = vnp->next, i++) continue;
5550     if (vnp != NULL) {
5551       next = vnp->next;
5552       vnp->next = NULL;
5553       for (vnp = next; vnp != NULL; vnp = vnp->next) {
5554         rbp = (RefBlockPtr) vnp->data.ptrvalue;
5555         MemFree (rbp->uniquestr);
5556         irp = (IntRefBlockPtr) rbp;
5557         DateFree (irp->date);
5558         SeqLocFree (irp->loc);
5559         MemFree (irp->authstr);
5560         MemFree (irp->fig);
5561         MemFree (irp->maploc);
5562         MemFree (rbp);
5563       }
5564     }
5565 
5566   /* if taking oldest publications, free remainder */
5567 
5568   } else if (awp->oldestPubs) {
5569     for (vnp = head, j = 0; vnp != NULL; vnp = vnp->next, j++) continue;
5570     if (j > 5) {
5571       for (vnp = head, i = 0; vnp != NULL && i < j - 6; vnp = vnp->next, i++) continue;
5572       if (vnp != NULL) {
5573         next = vnp->next;
5574         vnp->next = NULL;
5575         for (vnp = head; vnp != NULL; vnp = vnp->next) {
5576           rbp = (RefBlockPtr) vnp->data.ptrvalue;
5577           MemFree (rbp->uniquestr);
5578           irp = (IntRefBlockPtr) rbp;
5579           DateFree (irp->date);
5580           SeqLocFree (irp->loc);
5581           MemFree (irp->authstr);
5582           MemFree (irp->fig);
5583           MemFree (irp->maploc);
5584           MemFree (rbp);
5585         }
5586         head = next;
5587       }
5588     }
5589   }
5590 
5591   /* assign serial numbers */
5592 
5593   firstserial = 1;
5594 
5595   /* first find highest one assigned by EMBL/SWISS-PROT */
5596 
5597   for (vnp = head; vnp != NULL; vnp = vnp->next) {
5598     rbp = (RefBlockPtr) vnp->data.ptrvalue;
5599     if (rbp == NULL) continue;
5600     if (rbp->serial > 0 && rbp->serial < INT2_MAX) {
5601       firstserial = rbp->serial + 1;
5602     }
5603   }
5604 
5605   /* then give increasing serial numbers to unassigned publications */
5606 
5607   for (vnp = head; vnp != NULL; vnp = vnp->next) {
5608     rbp = (RefBlockPtr) vnp->data.ptrvalue;
5609     if (rbp == NULL) continue;
5610     if (rbp->serial > 0 && rbp->serial < INT2_MAX) continue;
5611     rbp->serial = firstserial;
5612     firstserial++;
5613   }
5614 
5615   /* allocate reference array for this section */
5616 
5617   numReferences = ValNodeLen (head);
5618   asp->numReferences = numReferences;
5619 
5620   if (numReferences > 0) {
5621     referenceArray = (RefBlockPtr PNTR) MemNew (sizeof (RefBlockPtr) * (numReferences + 1));
5622     asp->referenceArray = referenceArray;
5623 
5624     if (referenceArray != NULL) {
5625 
5626       /* fill in reference array */
5627 
5628       for (vnp = head, i = 0; vnp != NULL && i < numReferences; vnp = vnp->next, i++) {
5629         referenceArray [i] = (RefBlockPtr) vnp->data.ptrvalue;
5630       }
5631     }
5632   }
5633 
5634   /* finally link into blocks for current section */
5635 
5636   ValNodeLink (&(awp->lastblock), head);
5637   vnp = awp->lastblock;
5638   if (vnp == NULL) return FALSE;
5639   while (vnp->next != NULL) {
5640     vnp = vnp->next;
5641   }
5642 
5643   awp->lastblock = vnp;
5644   if (awp->blockList == NULL) {
5645     awp->blockList = vnp;
5646   }
5647 
5648   if (awp->afp != NULL) {
5649     for (vnp = head; vnp != NULL; vnp = vnp->next) {
5650       rbp = (RefBlockPtr) vnp->data.ptrvalue;
5651       if (rbp == NULL) continue;
5652       DoImmediateFormat (awp->afp, (BaseBlockPtr) rbp);
5653     }
5654   }
5655 
5656   return TRUE;
5657 }
5658 
5659 NLM_EXTERN void AddRefStatsBlock (
5660   Asn2gbWorkPtr awp
5661 )
5662 
5663 {
5664   IntAsn2gbJobPtr  ajp;
5665   BaseBlockPtr     bbp;
5666   BioseqPtr        bsp;
5667   StringItemPtr    ffstring;
5668 
5669   if (awp == NULL) return;
5670   ajp = awp->ajp;
5671   if ( ajp == NULL ) return;
5672   bsp = awp->bsp;
5673   if (bsp == NULL) return;
5674 
5675   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5676 
5677   bbp = Asn2gbAddBlock (awp, REF_STATS_BLOCK, sizeof (BaseBlock));
5678   if (bbp != NULL) {
5679     ffstring = FFGetString (ajp);
5680     if (ffstring != NULL) {
5681       FFStartPrint (ffstring, awp->format, 0, 12, "REFSTATS", 12, 0, 0, NULL, FALSE);
5682     
5683       FFAddOneString (ffstring, "placeholder", FALSE, FALSE, TILDE_TO_SPACES);
5684   
5685       bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
5686       FFRecycleString(ajp, ffstring);
5687     }
5688 
5689     if (awp->afp != NULL) {
5690       DoImmediateFormat (awp->afp, bbp);
5691     }
5692   }
5693 }
5694 
5695 NLM_EXTERN void AddWGSBlock (
5696   Asn2gbWorkPtr awp
5697 )
5698 
5699 {
5700   IntAsn2gbJobPtr    ajp;
5701   BaseBlockPtr       bbp;
5702   BioseqPtr          bsp;
5703   Char               buf [80];
5704   SeqMgrDescContext  dcontext;
5705   CharPtr            first;
5706   CharPtr            last;
5707   ObjectIdPtr        oip;
5708   SeqDescrPtr        sdp;
5709   UserFieldPtr       ufp;
5710   UserObjectPtr      uop;
5711   Int2               wgstype;
5712   StringItemPtr      ffstring;
5713 
5714   if (awp == NULL) return;
5715   ajp = awp->ajp;
5716   if ( ajp == NULL ) return;
5717   bsp = awp->bsp;
5718   if (bsp == NULL) return;
5719 
5720   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5721 
5722   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
5723       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
5724     sprintf (buf, "<a name=\"wgs_%ld\"></a>", (long) awp->currGi);
5725     DoQuickLinkFormat (awp->afp, buf);
5726   }
5727 
5728   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
5729   while (sdp != NULL) {
5730     uop = (UserObjectPtr) sdp->data.ptrvalue;
5731     if (uop != NULL) {
5732       oip = uop->type;
5733       first = NULL;
5734       last = NULL;
5735       wgstype = 0;
5736       if (oip != NULL) {
5737         if (StringICmp (oip->str, "WGSProjects") == 0) {
5738           wgstype = 1;
5739         } else if (StringICmp (oip->str, "WGS-Scaffold-List") == 0) {
5740           wgstype = 2;
5741         } else if (StringICmp (oip->str, "WGS-Contig-List") == 0) {
5742           wgstype = 3;
5743         }
5744         if (wgstype != 0) {
5745           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5746             oip = ufp->label;
5747             if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
5748             if (StringICmp (oip->str, "WGS_accession_first") == 0) {
5749               first = (CharPtr) ufp->data.ptrvalue;
5750             } else if (StringICmp (oip->str, "WGS_accession_last") == 0) {
5751               last = (CharPtr) ufp->data.ptrvalue;
5752             } else if (StringICmp (oip->str, "Accession_first") == 0) {
5753               first = (CharPtr) ufp->data.ptrvalue;
5754             } else if (StringICmp (oip->str, "Accession_last") == 0) {
5755               last = (CharPtr) ufp->data.ptrvalue;
5756             }
5757           }
5758           if (first != NULL && last != NULL) {
5759             bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
5760             if (bbp != NULL) {
5761               ffstring = FFGetString (ajp);
5762               if (ffstring != NULL) {
5763                 if (wgstype == 1) {
5764                   FFStartPrint (ffstring, awp->format, 0, 12, "WGS", 12, 0, 0, NULL, FALSE);
5765                 } else if (wgstype == 2) {
5766                   FFStartPrint (ffstring, awp->format, 0, 12, "WGS_SCAFLD", 12, 0, 0, NULL, FALSE);
5767                 } else if (wgstype == 3) {
5768                   FFStartPrint (ffstring, awp->format, 0, 12, "WGS_CONTIG", 12, 0, 0, NULL, FALSE);
5769                 }
5770               
5771                 if ( GetWWW(ajp) ) {
5772                   if (StringCmp (first, last) != 0) {
5773                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5774                     FF_Add_NCBI_Base_URL (ffstring, link_wgs);
5775                     FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", first, NULL, FALSE, FALSE, TILDE_IGNORE);
5776                     FFAddTextToString(ffstring, ":", last, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
5777                     sprintf (buf, "%s-%s", first, last);
5778                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5779                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5780                   } else {
5781                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5782                     FF_Add_NCBI_Base_URL (ffstring, link_seqn);
5783                     FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
5784                     sprintf (buf, "%s", first);
5785                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5786                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5787                   }
5788                 } else {
5789                   if (StringCmp (first, last) != 0) {
5790                     sprintf (buf, "%s-%s", first, last);
5791                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5792                   } else {
5793                     sprintf (buf, "%s", first);
5794                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5795                   }
5796                 }
5797 
5798                 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
5799                 FFRecycleString(ajp, ffstring);
5800               }
5801 
5802               bbp->entityID = dcontext.entityID;
5803               bbp->itemtype = OBJ_SEQDESC;
5804               bbp->itemID = dcontext.itemID;
5805               if (awp->afp != NULL) {
5806                 DoImmediateFormat (awp->afp, bbp);
5807               }
5808             }
5809           }
5810         }
5811       }
5812     }
5813     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
5814   }
5815 }
5816 
5817 NLM_EXTERN void AddCAGEBlock (
5818   Asn2gbWorkPtr awp
5819 )
5820 
5821 {
5822   IntAsn2gbJobPtr    ajp;
5823   BaseBlockPtr       bbp;
5824   BioseqPtr          bsp;
5825   Char               buf [80];
5826   Int2               cagetype;
5827   SeqMgrDescContext  dcontext;
5828   CharPtr            first;
5829   CharPtr            last;
5830   ObjectIdPtr        oip;
5831   SeqDescrPtr        sdp;
5832   UserFieldPtr       ufp;
5833   UserObjectPtr      uop;
5834   StringItemPtr      ffstring;
5835 
5836   if (awp == NULL) return;
5837   ajp = awp->ajp;
5838   if ( ajp == NULL ) return;
5839   bsp = awp->bsp;
5840   if (bsp == NULL) return;
5841 
5842   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5843 
5844   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
5845       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
5846     sprintf (buf, "<a name=\"wgs_%ld\"></a>", (long) awp->currGi);
5847     DoQuickLinkFormat (awp->afp, buf);
5848   }
5849 
5850   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
5851   while (sdp != NULL) {
5852     uop = (UserObjectPtr) sdp->data.ptrvalue;
5853     if (uop != NULL) {
5854       oip = uop->type;
5855       first = NULL;
5856       last = NULL;
5857       cagetype = 0;
5858       if (oip != NULL) {
5859         if (StringICmp (oip->str, "CAGE-Tag-List") == 0) {
5860           cagetype = 1;
5861         }
5862         if (cagetype != 0) {
5863           for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5864             oip = ufp->label;
5865             if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
5866             if (StringICmp (oip->str, "CAGE_accession_first") == 0) {
5867               first = (CharPtr) ufp->data.ptrvalue;
5868             } else if (StringICmp (oip->str, "CAGE_accession_last") == 0) {
5869               last = (CharPtr) ufp->data.ptrvalue;
5870             }
5871           }
5872           if (first != NULL && last != NULL) {
5873             bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
5874             if (bbp != NULL) {
5875               ffstring = FFGetString (ajp);
5876               if (ffstring != NULL) {
5877                 if (cagetype == 1) {
5878                   FFStartPrint (ffstring, awp->format, 0, 12, "TAG", 12, 0, 0, NULL, FALSE);
5879                 }
5880               
5881                 if ( GetWWW(ajp) ) {
5882                   if (StringCmp (first, last) != 0) {
5883                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5884                     FF_Add_NCBI_Base_URL (ffstring, link_wgs);
5885                     FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", first, NULL, FALSE, FALSE, TILDE_IGNORE);
5886                     FFAddTextToString(ffstring, ":", last, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
5887                     sprintf (buf, "%s-%s", first, last);
5888                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5889                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5890                   } else {
5891                     FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5892                     FF_Add_NCBI_Base_URL (ffstring, link_seqn);
5893                     FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
5894                     sprintf (buf, "%s", first);
5895                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5896                     FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5897                   }
5898                 } else {
5899                   if (StringCmp (first, last) != 0) {
5900                     sprintf (buf, "%s-%s", first, last);
5901                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5902                   } else {
5903                     sprintf (buf, "%s", first);
5904                     FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5905                   }
5906                 }
5907 
5908                 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
5909                 FFRecycleString(ajp, ffstring);
5910               }
5911 
5912               bbp->entityID = dcontext.entityID;
5913               bbp->itemtype = OBJ_SEQDESC;
5914               bbp->itemID = dcontext.itemID;
5915               if (awp->afp != NULL) {
5916                 DoImmediateFormat (awp->afp, bbp);
5917               }
5918             }
5919           }
5920         }
5921       }
5922     }
5923     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
5924   }
5925 }
5926 
5927 NLM_EXTERN void AddGenomeBlock (
5928   Asn2gbWorkPtr awp
5929 )
5930 
5931 {
5932   CharPtr            accn;
5933   IntAsn2gbJobPtr    ajp;
5934   BaseBlockPtr       bbp;
5935   BioseqPtr          bsp;
5936   Char               buf [128];
5937   SeqMgrDescContext  dcontext;
5938   Boolean            first = TRUE;
5939   CharPtr            moltype;
5940   ObjectIdPtr        oip;
5941   SeqDescrPtr        sdp;
5942   UserFieldPtr       ufp;
5943   UserObjectPtr      uop;
5944   UserFieldPtr       urf;
5945   StringItemPtr      ffstring;
5946 
5947   if (awp == NULL) return;
5948   ajp = awp->ajp;
5949   if ( ajp == NULL ) return;
5950   bsp = awp->bsp;
5951   if (bsp == NULL) return;
5952 
5953   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5954 
5955   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
5956       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
5957     sprintf (buf, "<a name=\"genome_%ld\"></a>", (long) awp->currGi);
5958     DoQuickLinkFormat (awp->afp, buf);
5959   }
5960 
5961   bbp = Asn2gbAddBlock (awp, GENOME_BLOCK, sizeof (BaseBlock));
5962   if (bbp == NULL) return;
5963 
5964   ffstring = FFGetString(ajp);
5965   if ( ffstring == NULL ) return;
5966 
5967   FFStartPrint (ffstring, awp->format, 0, 12, "GENOME", 12, 0, 0, NULL, FALSE);
5968 
5969   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
5970   while (sdp != NULL) {
5971     uop = (UserObjectPtr) sdp->data.ptrvalue;
5972     if (uop != NULL) {
5973       oip = uop->type;
5974       if (oip != NULL && StringICmp (oip->str, "GenomeProject") == 0) {
5975         for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5976           oip = ufp->label;
5977           if (oip == NULL || oip->str == NULL || ufp->choice != 11) continue;
5978           if (StringICmp (oip->str, "Chromosome") != 0) continue;
5979           accn = NULL;
5980           moltype = NULL;
5981           for (urf = (UserFieldPtr) ufp->data.ptrvalue; urf != NULL; urf = urf->next) {
5982             oip = urf->label;
5983             if (oip == NULL || oip->str == NULL || urf->choice != 1) continue;
5984             if (StringICmp (oip->str, "accession") == 0) {
5985               accn = (CharPtr) urf->data.ptrvalue;
5986             } else if (StringICmp (oip->str, "Moltype") == 0) {
5987               moltype = (CharPtr) urf->data.ptrvalue;
5988             }
5989           }
5990           if (! StringHasNoText (accn)) {
5991             if (! first) {
5992               FFAddNewLine(ffstring);
5993             }
5994             first = FALSE;
5995             FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
5996             if (! StringHasNoText (moltype)) {
5997               FFAddTextToString (ffstring, " (", moltype, ")", FALSE, FALSE, TILDE_TO_SPACES);
5998             }
5999           }
6000         }
6001       }
6002     }
6003     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
6004   }
6005 
6006   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6007   FFRecycleString(ajp, ffstring);
6008 
6009   if (awp->afp != NULL) {
6010     DoImmediateFormat (awp->afp, bbp);
6011   }
6012 }
6013 
6014 NLM_EXTERN void AddBasecountBlock (
6015   Asn2gbWorkPtr awp
6016 )
6017 
6018 {
6019   IntAsn2gbJobPtr  ajp;
6020   BaseBlockPtr     bbp;
6021   BioseqPtr        bsp;
6022  
6023   if (awp == NULL) return;
6024   ajp = awp->ajp;
6025   if (ajp == NULL) return;
6026   bsp = awp->bsp;
6027   if (bsp == NULL) return;
6028 
6029   bbp = Asn2gbAddBlock (awp, BASECOUNT_BLOCK, sizeof (BaseBlock));
6030   if (bbp == NULL) return;
6031 
6032   bbp->entityID = awp->entityID;
6033   bbp->itemtype = bsp->idx.itemtype;
6034   bbp->itemID = bsp->idx.itemID;
6035 
6036   if (awp->afp != NULL) {
6037     DoImmediateFormat (awp->afp, bbp);
6038   }
6039 }
6040 
6041 NLM_EXTERN void AddOriginBlock (
6042   Asn2gbWorkPtr awp
6043 )
6044 
6045 {
6046   IntAsn2gbJobPtr    ajp;
6047   BaseBlockPtr       bbp;
6048   BioseqPtr          bsp;
6049   Char               buf [67];
6050   SeqMgrDescContext  dcontext;
6051   GBBlockPtr         gbp;
6052   SeqDescrPtr        sdp;
6053   StringItemPtr      ffstring;
6054 
6055   if (awp == NULL) return;
6056   ajp = awp->ajp;
6057   if (ajp == NULL) return;
6058   bsp = awp->bsp;
6059   if (bsp == NULL) return;
6060 
6061   if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6062   
6063   ffstring = FFGetString(ajp);
6064   if ( ffstring == NULL ) return;
6065 
6066   bbp = Asn2gbAddBlock (awp, ORIGIN_BLOCK, sizeof (BaseBlock));
6067   if (bbp == NULL) return;
6068 
6069   bbp->entityID = awp->entityID;
6070 
6071   if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
6072 
6073     buf [0] = '\0';
6074 
6075     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
6076     if (sdp != NULL) {
6077       gbp = (GBBlockPtr) sdp->data.ptrvalue;
6078       if (gbp != NULL && (! StringHasNoText (gbp->origin))) {
6079         StringNCpy_0 (buf, gbp->origin, sizeof (buf));
6080         bbp->entityID = dcontext.entityID;
6081         bbp->itemID = dcontext.itemID;
6082         bbp->itemtype = OBJ_SEQDESC;
6083       }
6084     }
6085 
6086     FFStartPrint (ffstring, awp->format, 0, 12, "ORIGIN", 12, 0, 0, NULL, FALSE);
6087 
6088     if (! StringHasNoText (buf)) {
6089       FFAddOneString (ffstring, buf, TRUE, FALSE, TILDE_TO_SPACES);
6090     }
6091   }
6092 
6093   bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 12, 0, 0, NULL);
6094   FFRecycleString(ajp, ffstring);
6095 
6096   if (awp->afp != NULL) {
6097     DoImmediateFormat (awp->afp, bbp);
6098   }
6099 }
6100 
6101 #define BASES_PER_BLOCK 1200
6102 
6103 NLM_EXTERN void AddSequenceBlock (
6104   Asn2gbWorkPtr awp
6105 )
6106 
6107 {
6108   IntAsn2gbJobPtr  ajp;
6109   BioseqPtr        bsp;
6110   Char             buf [128];
6111   Int4             extend;
6112   Int4             len;
6113   SeqBlockPtr      sbp;
6114   Int4             start;
6115   Int4             stop;
6116 
6117   if (awp == NULL) return;
6118   ajp = awp->ajp;
6119   if (ajp == NULL) return;
6120   bsp = awp->bsp;
6121   if (bsp == NULL) return;
6122 
6123   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6124       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6125     sprintf (buf, "<a name=\"sequence_%ld\"></a>", (long) awp->currGi);
6126     DoQuickLinkFormat (awp->afp, buf);
6127   }
6128 
6129   if (awp->slp != NULL) {
6130     len = SeqLocLen (awp->slp);
6131   } else {
6132     len = bsp->length;
6133   }
6134 
6135   /* if generating GBSeq XML, populate single sequence block */
6136 
6137   if (ajp->gbseq) {
6138     sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
6139     if (sbp == NULL) return;
6140 
6141     sbp->entityID = bsp->idx.entityID;
6142     sbp->itemID = bsp->idx.itemID;
6143     sbp->itemtype = OBJ_BIOSEQ;
6144 
6145     sbp->start = 0;
6146     sbp->stop = len;
6147 
6148     if (awp->afp != NULL) {
6149       DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
6150     }
6151 
6152     return;
6153   }
6154 
6155   /* otherwise populate individual sequence blocks for given range */
6156 
6157   for (start = 0; start < len; start += BASES_PER_BLOCK) {
6158     sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
6159     if (sbp == NULL) continue;
6160 
6161     sbp->entityID = bsp->idx.entityID;
6162     sbp->itemID = bsp->idx.itemID;
6163     sbp->itemtype = OBJ_BIOSEQ;
6164 
6165     stop = start + BASES_PER_BLOCK;
6166     if (stop >= len) {
6167       stop = len;
6168     }
6169     extend = start + BASES_PER_BLOCK + 60;
6170     if (extend >= len) {
6171       extend = len;
6172     }
6173 
6174     sbp->start = start;
6175     sbp->stop = stop;
6176     sbp->extend = extend;
6177 
6178     if (awp->afp != NULL) {
6179       DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
6180     }
6181   }
6182 }
6183 
6184 NLM_EXTERN void AddContigBlock (
6185   Asn2gbWorkPtr awp
6186 )
6187 
6188 {
6189   IntAsn2gbJobPtr  ajp;
6190   BaseBlockPtr     bbp;
6191   Char             buf [128];
6192 
6193   if (awp == NULL) return;
6194   ajp = awp->ajp;
6195   if ( ajp == NULL ) return;
6196 
6197   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6198       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6199     sprintf (buf, "<a name=\"contig_%ld\"></a>", (long) awp->currGi);
6200     DoQuickLinkFormat (awp->afp, buf);
6201   }
6202   bbp = Asn2gbAddBlock (awp, CONTIG_BLOCK, sizeof (BaseBlock));
6203 
6204   if (awp->afp != NULL) {
6205     DoImmediateFormat (awp->afp, bbp);
6206   }
6207 }
6208 
6209 NLM_EXTERN void AddSlashBlock (
6210   Asn2gbWorkPtr awp
6211 )
6212 
6213 {
6214   IntAsn2gbJobPtr  ajp;
6215   BaseBlockPtr     bbp;
6216   Char             buf [128];
6217   CharPtr          str;
6218 
6219   if (awp == NULL) return;
6220   ajp = awp->ajp;
6221   if (ajp == NULL) return;
6222 
6223   /*
6224   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6225       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6226     sprintf (buf, "<a name=\"slash_%ld\"></a>", (long) awp->currGi);
6227     DoQuickLinkFormat (awp->afp, buf);
6228   }
6229   */
6230 
6231   bbp = Asn2gbAddBlock (awp, SLASH_BLOCK, sizeof (BaseBlock));
6232   if (bbp == NULL) return;
6233 
6234   bbp->entityID = awp->entityID;
6235 
6236   if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6237       (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6238     sprintf (buf, "//</pre>\n<a name=\"slash_%ld\"></a>", (long) awp->currGi);
6239     str = StringSave (buf);
6240   } else if (GetWWW (ajp)) {
6241     sprintf (buf, "//</pre>\n");
6242     str = StringSave (buf);
6243   } else {
6244     str = MemNew(sizeof(Char) * 4);
6245     StringNCpy(str, "//\n", 4);
6246   }
6247 
6248   bbp->string = str;
6249 
6250   if (awp->afp != NULL) {
6251     DoImmediateFormat (awp->afp, bbp);
6252   }
6253 }
6254 
6255 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.