NCBI C Toolkit Cross Reference

C/api/asn2gnb6.c


  1 /*   asn2gnb6.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2gnb6.c
 27 *
 28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
 29 *          Mati Shomrat
 30 *
 31 * Version Creation Date:   10/21/98
 32 *
 33 * $Revision: 1.211 $
 34 *
 35 * File Description:  New GenBank flatfile generator - work in progress
 36 *
 37 * Modifications:
 38 * --------------------------------------------------------------------------
 39 * ==========================================================================
 40 */
 41 
 42 #include <ncbi.h>
 43 #include <objall.h>
 44 #include <objsset.h>
 45 #include <objsub.h>
 46 #include <objfdef.h>
 47 #include <objpubme.h>
 48 #include <seqport.h>
 49 #include <sequtil.h>
 50 #include <sqnutils.h>
 51 #include <subutil.h>
 52 #include <tofasta.h>
 53 #include <explore.h>
 54 #include <gbfeat.h>
 55 #include <gbftdef.h>
 56 #include <edutil.h>
 57 #include <alignmgr2.h>
 58 #include <asn2gnbi.h>
 59 
 60 #ifdef WIN_MAC
 61 #if __profile__
 62 #include <Profiler.h>
 63 #endif
 64 #endif
 65 
 66 static CharPtr link_tax = "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
 67 
 68 static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
 69 static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
 70 
 71 static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
 72 static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
 73 
 74 static CharPtr link_lat_lon = "http://www.ncbi.nlm.nih.gov/projects/Sequin/latlonview.html?";
 75 
 76 
 77 
 78 
 79 /* ordering arrays for qualifiers and note components */
 80 
 81 static SourceType source_qual_order [] = {
 82   SCQUAL_organism,
 83 
 84   SCQUAL_organelle,
 85 
 86   SCQUAL_mol_type,
 87 
 88   SCQUAL_strain,
 89   SCQUAL_sub_strain,
 90   SCQUAL_variety,
 91   SCQUAL_serotype,
 92   SCQUAL_serovar,
 93   SCQUAL_cultivar,
 94   SCQUAL_isolate,
 95   SCQUAL_isolation_source,
 96   SCQUAL_spec_or_nat_host,
 97   SCQUAL_sub_species,
 98 
 99   SCQUAL_specimen_voucher,
100   SCQUAL_culture_collection,
101   SCQUAL_bio_material,
102 
103   SCQUAL_db_xref,
104   SCQUAL_org_xref,
105 
106   SCQUAL_chromosome,
107 
108   SCQUAL_segment,
109 
110   SCQUAL_map,
111   SCQUAL_clone,
112   SCQUAL_sub_clone,
113   SCQUAL_haplotype,
114   SCQUAL_haplogroup,
115   SCQUAL_sex,
116   SCQUAL_mating_type,
117   SCQUAL_cell_line,
118   SCQUAL_cell_type,
119   SCQUAL_tissue_type,
120   SCQUAL_clone_lib,
121   SCQUAL_dev_stage,
122   SCQUAL_ecotype,
123   SCQUAL_frequency,
124 
125   SCQUAL_germline,
126   SCQUAL_rearranged,
127   SCQUAL_transgenic,
128   SCQUAL_environmental_sample,
129 
130   SCQUAL_lab_host,
131   SCQUAL_pop_variant,
132   SCQUAL_tissue_lib,
133 
134   SCQUAL_plasmid_name,
135   SCQUAL_transposon_name,
136   SCQUAL_ins_seq_name,
137 
138   SCQUAL_country,
139 
140   SCQUAL_focus,
141 
142   SCQUAL_lat_lon,
143   SCQUAL_collection_date,
144   SCQUAL_collected_by,
145   SCQUAL_identified_by,
146   /*
147   SCQUAL_fwd_primer_seq,
148   SCQUAL_rev_primer_seq,
149   SCQUAL_fwd_primer_name,
150   SCQUAL_rev_primer_name,
151   */
152   SCQUAL_PCR_primers,
153   SCQUAL_PCR_reaction,
154 
155   SCQUAL_note,
156 
157   SCQUAL_sequenced_mol,
158   SCQUAL_label,
159   SCQUAL_usedin,
160   SCQUAL_citation,
161   (SourceType) 0
162 };
163 
164 static SourceType source_desc_note_order [] = {
165   SCQUAL_seqfeat_note,
166   SCQUAL_orgmod_note,
167   SCQUAL_subsource_note,
168 
169   SCQUAL_metagenomic,
170 
171   SCQUAL_linkage_group,
172 
173   SCQUAL_type,
174   SCQUAL_sub_type,
175   SCQUAL_serogroup,
176   SCQUAL_pathovar,
177   SCQUAL_chemovar,
178   SCQUAL_biovar,
179   SCQUAL_biotype,
180   SCQUAL_group,
181   SCQUAL_sub_group,
182   SCQUAL_common,
183   SCQUAL_acronym,
184   SCQUAL_dosage,
185 
186   SCQUAL_authority,
187   SCQUAL_forma,
188   SCQUAL_forma_specialis,
189   SCQUAL_synonym,
190   SCQUAL_anamorph,
191   SCQUAL_teleomorph,
192   SCQUAL_breed,
193 
194   SCQUAL_metagenome_source,
195   SCQUAL_metagenome_note,
196 
197   SCQUAL_genotype,
198   SCQUAL_plastid_name,
199 
200   SCQUAL_endogenous_virus_name,
201 
202   SCQUAL_common_name,
203 
204   SCQUAL_PCR_primer_note,
205   SCQUAL_PCR_reaction,
206 
207   SCQUAL_zero_orgmod,
208   SCQUAL_one_orgmod,
209   SCQUAL_zero_subsrc,
210 
211   /* SCQUAL_old_lineage, */
212 
213   /* SCQUAL_old_name, */
214   (SourceType) 0
215 };
216 
217 static SourceType source_feat_note_order [] = {
218   SCQUAL_unstructured,
219 
220   SCQUAL_metagenomic,
221 
222   SCQUAL_linkage_group,
223   SCQUAL_mating_type,
224 
225   SCQUAL_type,
226   SCQUAL_sub_type,
227   SCQUAL_serogroup,
228   SCQUAL_pathovar,
229   SCQUAL_chemovar,
230   SCQUAL_biovar,
231   SCQUAL_biotype,
232   SCQUAL_group,
233   SCQUAL_sub_group,
234   SCQUAL_common,
235   SCQUAL_acronym,
236   SCQUAL_dosage,
237 
238   SCQUAL_authority,
239   SCQUAL_forma,
240   SCQUAL_forma_specialis,
241   SCQUAL_synonym,
242   SCQUAL_anamorph,
243   SCQUAL_teleomorph,
244   SCQUAL_breed,
245   
246   SCQUAL_metagenome_source,
247   SCQUAL_metagenome_note,
248 
249   SCQUAL_genotype,
250   SCQUAL_plastid_name,
251 
252   SCQUAL_endogenous_virus_name,
253 
254   SCQUAL_seqfeat_note,
255   SCQUAL_orgmod_note,
256   SCQUAL_subsource_note,
257 
258   SCQUAL_common_name,
259 
260   SCQUAL_PCR_primer_note,
261   SCQUAL_PCR_reaction,
262 
263   SCQUAL_zero_orgmod,
264   SCQUAL_one_orgmod,
265   SCQUAL_zero_subsrc,
266 
267   /* SCQUAL_old_lineage, */
268 
269   /* SCQUAL_old_name, */
270   (SourceType) 0
271 };
272 
273 NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE] = {
274   { "",                         Qual_class_ignore     },
275   { "acronym",                  Qual_class_orgmod     },
276   { "anamorph",                 Qual_class_orgmod     },
277   { "authority",                Qual_class_orgmod     },
278   { "biotype",                  Qual_class_orgmod     },
279   { "biovar",                   Qual_class_orgmod     },
280   { "bio_material",             Qual_class_voucher    },
281   { "breed",                    Qual_class_orgmod     },
282   { "cell_line",                Qual_class_subsource  },
283   { "cell_type",                Qual_class_subsource  },
284   { "chemovar",                 Qual_class_orgmod     },
285   { "chromosome",               Qual_class_subsource  },
286   { "citation",                 Qual_class_pubset     },
287   { "clone",                    Qual_class_subsource  },
288   { "clone_lib",                Qual_class_subsource  },
289   { "collected_by",             Qual_class_subsource  },
290   { "collection_date",          Qual_class_subsource  },
291   { "common",                   Qual_class_orgmod     },
292   { "common",                   Qual_class_string     },
293   { "country",                  Qual_class_subsource  },
294   { "cultivar",                 Qual_class_orgmod     },
295   { "culture_collection",       Qual_class_voucher    },
296   { "db_xref",                  Qual_class_db_xref    },
297   { "db_xref",                  Qual_class_db_xref    },
298   { "dev_stage",                Qual_class_subsource  },
299   { "dosage",                   Qual_class_orgmod     },
300   { "ecotype",                  Qual_class_orgmod     },
301   { "endogenous_virus",         Qual_class_subsource  },
302   { "environmental_sample",     Qual_class_subsource  },
303   { "extrachromosomal",         Qual_class_boolean    },
304   { "focus",                    Qual_class_boolean    },
305   { "forma",                    Qual_class_orgmod     },
306   { "forma_specialis",          Qual_class_orgmod     },
307   { "frequency",                Qual_class_subsource  },
308   { "fwd_primer_name",          Qual_class_subsource  },
309   { "fwd_primer_seq",           Qual_class_subsource  },
310   { "gb_acronym",               Qual_class_orgmod     },
311   { "gb_anamorph",              Qual_class_orgmod     },
312   { "gb_synonym",               Qual_class_orgmod     },
313   { "genotype",                 Qual_class_subsource  },
314   { "germline",                 Qual_class_subsource  },
315   { "group",                    Qual_class_orgmod     },
316   { "haplogroup",               Qual_class_subsource  },
317   { "haplotype",                Qual_class_subsource  },
318   { "identified_by",            Qual_class_subsource  },
319   { "insertion_seq",            Qual_class_subsource  },
320   { "isolate",                  Qual_class_orgmod     },
321   { "isolation_source",         Qual_class_subsource  },
322   { "lab_host",                 Qual_class_subsource  },
323   { "label",                    Qual_class_label      },
324   { "lat_lon",                  Qual_class_lat_lon    },
325   { "linkage_group",            Qual_class_subsource  },
326   { "macronuclear",             Qual_class_boolean    },
327   { "map",                      Qual_class_subsource  },
328   { "mating_type",              Qual_class_subsource  },
329   { "derived from metagenome",  Qual_class_orgmod     },
330   { "metagenome_source",        Qual_class_orgmod     },
331   { "metagenomic",              Qual_class_subsource  },
332   { "mol_type",                 Qual_class_string     },
333   { "note",                     Qual_class_note       },
334   { "old_lineage",              Qual_class_orgmod     },
335   { "old_name",                 Qual_class_orgmod     },
336   { "organism",                 Qual_class_string     },
337   { "organelle",                Qual_class_organelle  },
338   { "orgmod_note",              Qual_class_orgmod     },
339   { "pathovar",                 Qual_class_orgmod     },
340   { "PCR_primers",              Qual_class_pcr        },
341   { "PCR_primers",              Qual_class_pcr        },
342   { "PCR_primers",              Qual_class_pcr_react  },
343   { "plasmid",                  Qual_class_subsource  },
344   { "plastid",                  Qual_class_subsource  },
345   { "pop_variant",              Qual_class_subsource  },
346   { "rearranged",               Qual_class_subsource  },
347   { "rev_primer_name",          Qual_class_subsource  },
348   { "rev_primer_seq",           Qual_class_subsource  },
349   { "segment",                  Qual_class_subsource  },
350   { "seqfeat_note",             Qual_class_string     },
351   { "sequenced_mol",            Qual_class_quote      },
352   { "serogroup",                Qual_class_orgmod     },
353   { "serotype",                 Qual_class_orgmod     },
354   { "serovar",                  Qual_class_orgmod     },
355   { "sex",                      Qual_class_subsource  },
356   { "host",                     Qual_class_orgmod     },
357   { "specimen_voucher",         Qual_class_voucher    },
358   { "strain",                   Qual_class_orgmod     },
359   { "sub_clone",                Qual_class_subsource  },
360   { "subgroup",                 Qual_class_orgmod     },
361   { "sub_species",              Qual_class_orgmod     },
362   { "sub_strain",               Qual_class_orgmod     },
363   { "subtype",                  Qual_class_orgmod     },
364   { "subsource_note",           Qual_class_subsource  },
365   { "synonym",                  Qual_class_orgmod     },
366   { "teleomorph",               Qual_class_orgmod     },
367   { "tissue_lib",               Qual_class_subsource  },
368   { "tissue_type",              Qual_class_subsource  },
369   { "transgenic",               Qual_class_subsource  },
370   { "transposon",               Qual_class_subsource  },
371   { "type",                     Qual_class_orgmod     },
372   { "unstructured",             Qual_class_valnode    },
373   { "usedin",                   Qual_class_quote      },
374   { "variety",                  Qual_class_orgmod     },
375   { "?",                        Qual_class_orgmod     },
376   { "?",                        Qual_class_orgmod     },
377   { "?",                        Qual_class_subsource  }
378 };
379 
380 NLM_EXTERN SourceType subSourceToSourceIdx [42] = {
381   SCQUAL_zero_subsrc,
382   SCQUAL_chromosome,
383   SCQUAL_map,
384   SCQUAL_clone,
385   SCQUAL_sub_clone,
386   SCQUAL_haplotype,
387   SCQUAL_genotype,
388   SCQUAL_sex,
389   SCQUAL_cell_line,
390   SCQUAL_cell_type,
391   SCQUAL_tissue_type,
392   SCQUAL_clone_lib,
393   SCQUAL_dev_stage,
394   SCQUAL_frequency,
395   SCQUAL_germline,
396   SCQUAL_rearranged,
397   SCQUAL_lab_host,
398   SCQUAL_pop_variant,
399   SCQUAL_tissue_lib,
400   SCQUAL_plasmid_name,
401   SCQUAL_transposon_name,
402   SCQUAL_ins_seq_name,
403   SCQUAL_plastid_name,
404   SCQUAL_country,
405   SCQUAL_segment,
406   SCQUAL_endogenous_virus_name,
407   SCQUAL_transgenic,
408   SCQUAL_environmental_sample,
409   SCQUAL_isolation_source,
410   SCQUAL_lat_lon,
411   SCQUAL_collection_date,
412   SCQUAL_collected_by,
413   SCQUAL_identified_by,
414   SCQUAL_fwd_primer_seq,
415   SCQUAL_rev_primer_seq,
416   SCQUAL_fwd_primer_name,
417   SCQUAL_rev_primer_name,
418   SCQUAL_metagenomic,
419   SCQUAL_mating_type,
420   SCQUAL_linkage_group,
421   SCQUAL_haplogroup,
422   SCQUAL_subsource_note
423 };
424 
425 /* ********************************************************************** */
426 
427 /* ********************************************************************** */
428 
429 /* format functions allocate printable string for given paragraph */
430 
431 /* superset of http://www.ncbi.nlm.nih.gov/collab/db_xref.html and RefSeq db_xrefs */
432 
433 NLM_EXTERN CharPtr legalDbXrefs [] = {
434   "AceView/WormGenes",
435   "AFTOL",
436   "AntWeb",
437   "APHIDBASE",
438   "ApiDB",
439   "ApiDB_CryptoDB",
440   "ApiDB_PlasmoDB",
441   "ApiDB_ToxoDB",
442   "ASAP",
443   "ATCC",
444   "ATCC(in host)",
445   "ATCC(dna)",
446   "Axeldb",
447   "BDGP_EST",
448   "BDGP_INS",
449   "BEETLEBASE",
450   "BOLD",
451   "CDD",
452   "CK",
453   "COG",
454   "dbClone",
455   "dbCloneLib",
456   "dbEST",
457   "dbProbe",
458   "dbSNP",
459   "dbSTS",
460   "dictyBase",
461   "EcoGene",
462   "ENSEMBL",
463   "ERIC",
464   "ESTLIB",
465   "FANTOM_DB",
466   "FLYBASE",
467   "GABI",
468   "GDB",
469   "GeneDB",
470   "GeneID",
471   "GO",
472   "GOA",
473   "Greengenes",
474   "GRIN",
475   "H-InvDB",
476   "HGNC",
477   "HMP",
478   "HOMD",
479   "HSSP",
480   "IMGT/GENE-DB",
481   "IMGT/HLA",
482   "IMGT/LIGM",
483   "InterimID",
484   "InterPro",
485   "IRD",
486   "ISD",
487   "ISFinder",
488   "JCM",
489   "JGIDB",
490   "LocusID",
491   "MaizeGDB",
492   "MGI",
493   "MIM",
494   "MycoBank",
495   "NBRC",
496   "NextDB",
497   "niaEST",
498   "NMPDR",
499   "NRESTdb",
500   "Osa1",
501   "Pathema",
502   "PBmice",
503   "PDB",
504   "PFAM",
505   "PGN",
506   "PIR",
507   "PSEUDO",
508   "PseudoCap",
509   "RAP-DB",
510   "RATMAP",
511   "RFAM",
512   "RGD",
513   "RiceGenes",
514   "RZPD",
515   "SEED",
516   "SGD",
517   "SGN",
518   "SoyBase",
519   "SubtiList",
520   "taxon",
521   "TIGRFAM",
522   "UniGene",
523   "UNILIB",
524   "UniProtKB/Swiss-Prot",
525   "UniProtKB/TrEMBL",
526   "UniSTS",
527   "UNITE",
528   "VBASE2",
529   "VectorBase",
530   "WorfDB",
531   "WormBase",
532   "Xenbase",
533   "ZFIN",
534   NULL
535 };
536 
537 NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
538   "AFTOL",
539   "AntWeb",
540   "ATCC",
541   "ATCC(dna)",
542   "ATCC(in host)",
543   "BOLD",
544   "FANTOM_DB",
545   "FLYBASE",
546   "GRIN",
547   "HMP",
548   "HOMD",
549   "IMGT/HLA",
550   "IMGT/LIGM",
551   "JCM",
552   "MGI",
553   "MycoBank",
554   "NBRC",
555   "RZPD",
556   "taxon",
557   "UNILIB",
558   "UNITE",
559   NULL
560 };
561 
562 NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
563   "CCDS",
564   "CGNC",
565   "CloneID",
566   "ECOCYC",
567   "HPRD",
568   "LRG",
569   "miRBase",
570   "PBR",
571   "REBASE",
572   "SK-FST",
573   "TAIR",
574   "VBRC",
575   NULL
576 };
577 
578 static Boolean IsDbxrefInList (
579   CharPtr name,
580   CharPtr PNTR list,
581   size_t num,
582   BoolPtr badcapP,
583   CharPtr PNTR goodcapP
584 )
585 
586 {
587   Int2  L, R, mid;
588 
589   L = 0;
590   R = num;
591 
592   while (L < R) {
593     mid = (L + R) / 2;
594     if (StringICmp (list [mid], name) < 0) {
595       L = mid + 1;
596     } else {
597       R = mid;
598     }
599   }
600 
601   if (StringICmp (list [R], name) == 0) {
602     if (StringCmp (list [R], name) != 0) {
603       if (badcapP != NULL) {
604         *badcapP = TRUE;
605       }
606       if (goodcapP != NULL) {
607         *goodcapP = list [R];
608       }
609     }
610     return TRUE;
611   }
612 
613   return FALSE;
614 }
615 
616 NLM_EXTERN Boolean DbxrefIsValid (
617   CharPtr name,
618   BoolPtr is_refseq_P,
619   BoolPtr is_source_P,
620   BoolPtr is_badcap_P,
621   CharPtr PNTR goodcapP
622 )
623 
624 {
625   if (is_refseq_P != NULL) {
626     *is_refseq_P = FALSE;
627   }
628   if (is_source_P != NULL) {
629     *is_source_P = FALSE;
630   }
631   if (is_badcap_P != NULL) {
632     *is_badcap_P = FALSE;
633   }
634   if (goodcapP != NULL) {
635     *goodcapP = NULL;
636   }
637 
638   if (StringHasNoText (name)) return FALSE;
639 
640   if (IsDbxrefInList (name, legalRefSeqDbXrefs,
641                       sizeof (legalRefSeqDbXrefs) / sizeof (legalRefSeqDbXrefs [0]) - 1,
642                       is_badcap_P, goodcapP)) {
643     if (is_refseq_P != NULL) {
644       *is_refseq_P = TRUE;
645     }
646     return TRUE;
647   }
648 
649   if (IsDbxrefInList (name, legalSrcDbXrefs,
650                       sizeof (legalSrcDbXrefs) / sizeof (legalSrcDbXrefs [0]) - 1,
651                       is_badcap_P, goodcapP)) {
652     if (is_source_P != NULL) {
653       *is_source_P = TRUE;
654     }
655     return TRUE;
656   }
657 
658   if (IsDbxrefInList (name, legalDbXrefs,
659                       sizeof (legalDbXrefs) / sizeof (legalDbXrefs [0]) - 1,
660                       is_badcap_P, goodcapP)) {
661     return TRUE;
662   }
663 
664   return FALSE;
665 }
666 
667 
668 /* These functions are for testing dbxrefs */
669 
670 static ValNodePtr MakeDbxrefList (void)
671 {
672   ValNodePtr dbxref_list = NULL;
673   Int4 i;
674   DbtagPtr dbtag;
675 
676   for (i = 0; legalDbXrefs [i] != NULL; i++) {
677     dbtag = DbtagNew ();
678     dbtag->db = StringSave (legalDbXrefs [i]);
679     dbtag->tag = ObjectIdNew ();
680     dbtag->tag->id = 42;
681     ValNodeAddPointer (&dbxref_list, 0, dbtag);
682   }
683 
684   /* legalSrcDbXrefs is contained within legalDbXrefs */
685 
686   for (i = 0; legalRefSeqDbXrefs [i] != NULL; i++) {
687     dbtag = DbtagNew ();
688     dbtag->db = StringSave (legalRefSeqDbXrefs [i]);
689     dbtag->tag = ObjectIdNew ();
690     dbtag->tag->id = 42;
691     ValNodeAddPointer (&dbxref_list, 0, dbtag);
692   }
693 
694   return dbxref_list;
695 }
696 
697 static void AddDbxrefsToBioSource (BioSourcePtr biop)
698 {
699   if (biop == NULL) return;
700   if (biop->org == NULL)
701   {
702     biop->org = OrgRefNew();
703   }
704 
705   ValNodeLink (&(biop->org->db), MakeDbxrefList());
706 }
707 
708 static void AddDbxrefsToSeqFeat (SeqFeatPtr sfp)
709 {
710   if (sfp == NULL) return;
711   ValNodeLink (&(sfp->dbxref), MakeDbxrefList());
712 }
713 
714 NLM_EXTERN void AddAllDbxrefsToBioseq (BioseqPtr bsp)
715 {
716   SeqDescrPtr sdp;
717   SeqFeatPtr  sfp;
718   SeqMgrDescContext dcontext;
719   SeqMgrFeatContext fcontext;
720 
721   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
722   if (sdp != NULL) {
723     AddDbxrefsToBioSource (sdp->data.ptrvalue);
724   }
725 
726   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
727   if (sfp != NULL) {
728     AddDbxrefsToBioSource (sfp->data.value.ptrvalue);
729     AddDbxrefsToSeqFeat (sfp);
730   }
731 
732   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
733   if (sfp != NULL) {
734     AddDbxrefsToSeqFeat (sfp);
735   }
736 
737   sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
738   if (sfp != NULL) {
739     AddDbxrefsToSeqFeat (sfp);
740   }
741 }
742 
743 
744 
745 static CharPtr organellePrefix [] = {
746   NULL,
747   NULL,
748   "Chloroplast ",
749   "Chromoplast ",
750   "Kinetoplast ",
751   "Mitochondrion ",
752   "Plastid ",
753   NULL,
754   NULL,
755   NULL,
756   NULL,
757   NULL,
758   "Cyanelle ",
759   NULL,
760   NULL,
761   "Nucleomorph ",
762   "Apicoplast ",
763   "Leucoplast ",
764   "Proplastid ",
765   NULL,
766   "Hydrogenosome ",
767   NULL,
768   "Chromatophore "
769 };
770 
771 static CharPtr newOrganellePrefix [] = {
772   NULL,
773   NULL,
774   "chloroplast ",
775   "chromoplast ",
776   "kinetoplast ",
777   "mitochondrion ",
778   "plastid ",
779   NULL,
780   NULL,
781   NULL,
782   NULL,
783   NULL,
784   "cyanelle ",
785   NULL,
786   NULL,
787   "nucleomorph ",
788   "apicoplast ",
789   "leucoplast ",
790   "proplastid ",
791   NULL,
792   "hydrogenosome ",
793   NULL,
794   "chromatophore "
795 };
796 
797 NLM_EXTERN CharPtr FormatSourceBlock (
798   Asn2gbFormatPtr afp,
799   BaseBlockPtr bbp
800 )
801 
802 {
803   CharPtr            acr = NULL;
804   Boolean            addPeriod = TRUE;
805   IntAsn2gbJobPtr    ajp;
806   CharPtr            ana = NULL;
807   Asn2gbSectPtr      asp;
808   BioSourcePtr       biop = NULL;
809   CharPtr            com = NULL;
810   CharPtr            common = NULL;
811   SeqMgrDescContext  dcontext;
812   SeqMgrFeatContext  fcontext;
813   CharPtr            gbacr = NULL;
814   CharPtr            gbana = NULL;
815   GBBlockPtr         gbp = NULL;
816   GBSeqPtr           gbseq;
817   CharPtr            gbsyn = NULL;
818   Uint1              genome;
819   CharPtr            met = NULL;
820   ValNodePtr         mod = NULL;
821   Int2               numacr = 0;
822   Int2               numana = 0;
823   Int2               numcom = 0;
824   Int2               numgbacr = 0;
825   Int2               numgbana = 0;
826   Int2               numgbsyn = 0;
827   Int2               nummet = 0;
828   Int2               numsyn = 0;
829   OrgModPtr          omp = NULL;
830   OrgNamePtr         onp;
831   CharPtr            organelle = NULL;
832   OrgRefPtr          orp;
833   CharPtr            prefix = " (";
834   SeqDescrPtr        sdp;
835   CharPtr            second = NULL;
836   SeqFeatPtr         sfp;
837   CharPtr            str;
838   CharPtr            syn = NULL;
839   CharPtr            taxname = NULL;
840   StringItemPtr      ffstring, temp;
841 
842   if (afp == NULL || bbp == NULL) return NULL;
843   ajp = afp->ajp;
844   if (ajp == NULL) return NULL;
845   asp = afp->asp;
846   if (asp == NULL) return NULL;
847 
848   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
849 
850   ffstring = FFGetString(ajp);
851   if ( ffstring == NULL ) return NULL;
852 
853   if (bbp->itemtype == OBJ_SEQDESC) {
854     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
855     if (sdp != NULL) {
856       if (dcontext.seqdesctype == Seq_descr_source) {
857         biop = (BioSourcePtr) sdp->data.ptrvalue;
858       } else if (dcontext.seqdesctype == Seq_descr_genbank) {
859         gbp = (GBBlockPtr) sdp->data.ptrvalue;
860       }
861     }
862   } else if (bbp->itemtype == OBJ_SEQFEAT) {
863     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
864     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
865       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
866     }
867   }
868   if (gbp != NULL) {
869     common = gbp->source;
870   }
871 
872   if (biop != NULL) {
873     genome = biop->genome;
874     if (genome <= 22) {
875       if (ajp->newSourceOrg && (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT)) {
876         organelle = newOrganellePrefix [genome];
877       } else {
878         organelle = organellePrefix [genome];
879       }
880     }
881     orp = biop->org;
882     if (orp != NULL) {
883       taxname = orp->taxname;
884       common = orp->common;
885       mod = orp->mod;
886       onp = orp->orgname;
887       if (onp != NULL) {
888 
889         if (ajp->newSourceOrg) {
890           for (omp = onp->mod; omp != NULL; omp = omp->next) {
891             switch (omp->subtype) {
892               case ORGMOD_common :
893                 com = omp->subname;
894                 numcom++;
895                 break;
896               case ORGMOD_acronym :
897                 acr = omp->subname;
898                 numacr++;
899                 break;
900               case ORGMOD_synonym :
901                 syn = omp->subname;
902                 numsyn++;
903                 break;
904               case ORGMOD_anamorph :
905                 ana = omp->subname;
906                 numana++;
907                 break;
908               case ORGMOD_gb_acronym :
909                 gbacr = omp->subname;
910                 numgbacr++;
911                 break;
912               case ORGMOD_gb_anamorph :
913                 gbana = omp->subname;
914                 numgbana++;
915                 break;
916               case ORGMOD_gb_synonym :
917                 gbsyn = omp->subname;
918                 numgbsyn++;
919                 break;
920               case ORGMOD_metagenome_source :
921                 met = omp->subname;
922                 nummet++;
923                 break;
924               default :
925                 break;
926             }
927           }
928 
929           if (numacr > 1) {
930              acr = NULL;
931           }
932           if (numana > 1) {
933              ana = NULL;
934           }
935           if (numcom > 1) {
936              com = NULL;
937           }
938           if (nummet > 1) {
939              met = NULL;
940           }
941           if (numsyn > 1) {
942              syn = NULL;
943           }
944           if (numgbacr > 1) {
945              gbacr = NULL;
946           }
947           if (numgbana > 1) {
948              gbana = NULL;
949           }
950           if (numgbsyn > 1) {
951              gbsyn = NULL;
952           }
953 
954           if (StringHasNoText (second)) {
955             second = met;
956           }
957           if (StringHasNoText (second)) {
958             second = syn;
959           }
960            if (StringHasNoText (second)) {
961              second = acr;
962           }
963           if (StringHasNoText (second)) {
964             if (StringDoesHaveText (ana)) {
965               second = ana;
966               prefix = " (anamorph: ";
967             }
968           }
969           if (StringHasNoText (second)) {
970             second = com;
971           }
972 
973           if (StringHasNoText (second)) {
974             second = gbsyn;
975           }
976           if (StringHasNoText (second)) {
977             second = gbacr;
978           }
979           if (StringHasNoText (second)) {
980             if (StringDoesHaveText (gbana)) {
981               second = gbana;
982               prefix = " (anamorph: ";
983             }
984           }
985         }
986       }
987       if (StringHasNoText (second)) {
988         second = common;
989       }
990     }
991   }
992 
993   /* If the organelle prefix is already on the */
994   /* name, don't add it.                       */
995 
996   if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0)
997     organelle = "";
998 
999   if (StringHasNoText (common)) {
1000     common = taxname;
1001   }
1002   if (StringHasNoText (common)) {
1003     common = "Unknown.";
1004   }
1005   if (StringHasNoText (taxname)) {
1006     taxname = "Unknown.";
1007   }
1008 
1009   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1010     
1011     temp = FFGetString(ajp);
1012 
1013     if (ajp->newSourceOrg) {
1014 
1015       if (! StringHasNoText (organelle)) {
1016         FFAddTextToString(temp, NULL, organelle, NULL, FALSE, FALSE, TILDE_IGNORE);
1017       }
1018       FFAddTextToString(temp, NULL, taxname, NULL, FALSE, FALSE, TILDE_IGNORE);
1019       if (! StringHasNoText (second)) {
1020         FFAddTextToString(temp, prefix, second, ")", FALSE, FALSE, TILDE_IGNORE);
1021       }
1022       addPeriod = FALSE;
1023 
1024     } else {
1025       FFAddTextToString(temp, NULL, common, NULL, FALSE, FALSE, TILDE_IGNORE);
1026       while (mod != NULL) {
1027         str = (CharPtr) mod->data.ptrvalue;
1028         if (! StringHasNoText (str)) {
1029           FFAddTextToString(temp, " ", str, NULL, FALSE, FALSE, TILDE_IGNORE);
1030         }
1031         mod = mod->next;
1032       }
1033     }
1034 
1035     str = FFToCharPtr(temp);
1036     if (StringCmp (str, ".") == 0) {
1037       str = MemFree (str);
1038     }
1039     FFRecycleString(ajp, temp);
1040     /* optionally populate gbseq for XML-ized GenBank format */
1041 
1042     if (ajp->gbseq) {
1043       gbseq = &asp->gbseq;
1044     } else {
1045       gbseq = NULL;
1046     }
1047 
1048     if (gbseq != NULL) {
1049       gbseq->source = StringSave (str);
1050     }
1051 
1052     
1053     FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1054     if (str != NULL) {
1055       FFAddTextToString(ffstring, NULL, str, NULL, addPeriod, FALSE, TILDE_TO_SPACES);
1056     } else {
1057       FFAddOneChar(ffstring, '.', FALSE);
1058     }
1059     
1060     MemFree (str);
1061 
1062   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1063 
1064     FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1065     FFAddTextToString(ffstring, NULL, taxname, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1066     if ( StringICmp(taxname, common) != 0 ) {
1067         FFAddTextToString(ffstring, " (", common, ")", FALSE, FALSE, TILDE_TO_SPACES);
1068     }
1069   }
1070   
1071   str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 0, 5, "OS");
1072   FFRecycleString(ajp, ffstring);
1073   return str;
1074 }
1075 
1076 NLM_EXTERN CharPtr FormatOrganismBlock (
1077   Asn2gbFormatPtr afp,
1078   BaseBlockPtr bbp
1079 )
1080 
1081 {
1082   IntAsn2gbJobPtr    ajp;
1083   Asn2gbSectPtr      asp;
1084   BioSourcePtr       biop = NULL;
1085   Char               ch;
1086   CharPtr            common = NULL;
1087   DbtagPtr           dbt;
1088   SeqMgrDescContext  dcontext;
1089   SeqMgrFeatContext  fcontext;
1090   GBSeqPtr           gbseq;
1091   Uint1              genome;
1092   CharPtr            lineage = NULL;
1093   ObjectIdPtr        oip;
1094   OrgModPtr          omp;
1095   OrgNamePtr         onp;
1096   CharPtr            organelle = NULL;
1097   OrgRefPtr          orp;
1098   SeqDescrPtr        sdp;
1099   SeqFeatPtr         sfp;
1100   CharPtr            str;
1101   Int4               taxid = -1;
1102   CharPtr            taxname = NULL;
1103   CharPtr            tmp;
1104   CharPtr            ptr;
1105   ValNodePtr         vnp;
1106   StringItemPtr      ffstring, temp;
1107   Char               buf [16];
1108 
1109   if (afp == NULL || bbp == NULL) return NULL;
1110   ajp = afp->ajp;
1111   if (ajp == NULL) return NULL;
1112   asp = afp->asp;
1113   if (asp == NULL) return NULL;
1114 
1115 
1116   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
1117 
1118   if (bbp->itemtype == OBJ_SEQDESC) {
1119     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
1120     if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
1121       biop = (BioSourcePtr) sdp->data.ptrvalue;
1122     }
1123   } else if (bbp->itemtype == OBJ_SEQFEAT) {
1124     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
1125     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
1126       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
1127     }
1128   }
1129   if (biop != NULL) {
1130     genome = biop->genome;
1131     if (genome <= 22) {
1132       organelle = organellePrefix [genome];
1133     }
1134     orp = biop->org;
1135     if (orp != NULL) {
1136       taxname = orp->taxname;
1137       common = orp->common;
1138       onp = orp->orgname;
1139       if (onp != NULL) {
1140         lineage = onp->lineage;
1141         if (StringHasNoText (lineage)) {
1142           for (omp = onp->mod; omp != NULL; omp = omp->next) {
1143             if (omp->subtype == ORGMOD_old_lineage) {
1144               lineage = omp->subname;
1145             }
1146           }
1147         }
1148       }
1149       for (vnp = orp->db; vnp != NULL; vnp = vnp->next) {
1150         dbt = (DbtagPtr) vnp->data.ptrvalue;
1151         if (dbt == NULL) continue;
1152         if (StringCmp (dbt->db, "taxon") == 0) {
1153           oip = dbt->tag;
1154           if (oip != NULL) {
1155             taxid = oip->id;
1156           }
1157         }
1158       }
1159     }
1160   }
1161 
1162   /* If the organelle prefix is already on the */
1163   /* name, don't add it.                       */
1164 
1165   if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0)
1166     organelle = "";
1167 
1168   if (StringHasNoText (common)) {
1169     common = taxname;
1170   }
1171   if (StringHasNoText (common)) {
1172     common = "Unknown.";
1173   }
1174   if (StringHasNoText (taxname)) {
1175     taxname = "Unknown.";
1176   }
1177   if (StringHasNoText (lineage)) {
1178     lineage = "Unclassified.";
1179   }
1180 
1181   ffstring = FFGetString(ajp);
1182   temp = FFGetString(ajp);
1183   if ( ffstring == NULL || temp == NULL ) return NULL;
1184 
1185   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1186     
1187     FFStartPrint(temp, afp->format, 2, 12, "ORGANISM", 12, 5, 5, "OC", FALSE);
1188     if (! ajp->newSourceOrg) {
1189       FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1190     }
1191     if (StringNICmp (taxname, "Unknown", 7) != 0) {
1192       if ( GetWWW(ajp) ) { 
1193         if (taxid != -1) {
1194           FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1195           FF_Add_NCBI_Base_URL (temp, link_tax);
1196           FFAddOneString(temp, "id=", FALSE, FALSE, TILDE_IGNORE);
1197           sprintf (buf, "%ld", (long) taxid);
1198           FFAddOneString(temp, buf, FALSE, FALSE, TILDE_IGNORE);
1199           FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1200         } else {
1201           FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1202           FF_Add_NCBI_Base_URL (temp, link_tax);
1203           FFAddOneString(temp, "name=", FALSE, FALSE, TILDE_IGNORE);
1204           tmp = StringSave (taxname);
1205           if (tmp != NULL) {
1206             ptr = tmp;
1207             ch = *ptr;
1208             while (ch != '\0') {
1209               if (IS_WHITESP (ch)) {
1210                 *ptr = '+';
1211               }
1212               ptr++;
1213               ch = *ptr;
1214             }
1215             FFAddOneString(temp, tmp, FALSE, FALSE, TILDE_IGNORE);
1216             MemFree (tmp);
1217           }
1218           FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1219         }
1220         FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1221         FFAddOneString(temp, "</a>", FALSE, FALSE, TILDE_IGNORE);
1222       } else {
1223         FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1224       }
1225     } else {
1226       FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1227     }
1228     FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1229     FFRecycleString(ajp, temp);
1230 
1231     temp = FFGetString(ajp);
1232     FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1233     FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1234     FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1235     FFRecycleString(ajp, temp);
1236     /* optionally populate gbseq for XML-ized GenBank format */
1237 
1238     if (ajp->gbseq) {
1239       gbseq = &asp->gbseq;
1240     } else {
1241       gbseq = NULL;
1242     }
1243 
1244     if (gbseq != NULL) {
1245       temp = FFGetString(ajp);
1246       if (! ajp->newSourceOrg) {
1247         FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1248       }
1249       FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1250       gbseq->organism = FFToCharPtr(temp);
1251       gbseq->taxonomy = StringSave (lineage);
1252       FFRecycleString(ajp, temp);
1253     }
1254 
1255   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1256     FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1257     FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1258     FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC");
1259     FFRecycleString(ajp, temp);
1260     if ( !StringHasNoText(organelle) ) {
1261       temp = FFGetString(ajp);
1262       if ( temp != NULL ) {
1263         FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OG", FALSE);
1264         FFAddTextToString(temp, NULL, organelle, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1265         FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG");
1266         FFRecycleString(ajp, temp);
1267       }
1268     }
1269   }
1270   
1271   str = FFToCharPtr(ffstring);
1272   FFRecycleString(ajp, ffstring);
1273   return str;
1274 }
1275 
1276 /* A tilde is not an EOL if it is found in a string of the form:    */
1277 /* /~alpahnumdot/ where alphanumdot is either alpha numeric or '.' */
1278 /*                                                                 */
1279 /* str points to the tilde in question.                            */
1280 static Boolean IsTildeEOL(CharPtr str) {
1281   CharPtr ptr;
1282 
1283   if ( *(str - 1) != '/' ) return TRUE;
1284 
1285   ++str;
1286 
1287   
1288   for ( ptr = str; 
1289     IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.';
1290     ++ptr) continue;
1291 
1292   return *ptr == '/' ? FALSE : TRUE;
1293 }
1294 
1295 /* returns a pointer to the first character past the url */
1296 static CharPtr FindUrlEnding(CharPtr str) {
1297   CharPtr ptr;
1298 
1299   for ( ptr = str;
1300         !IS_WHITESP(*ptr) && *ptr != '\0' && *ptr != '(' && *ptr != '\"';
1301         ++ptr  ) {
1302     if ( *ptr == '~' ) {
1303       if ( IsTildeEOL(ptr) ) break;
1304     }
1305   }
1306 
1307   --ptr;
1308 
1309   /* back up over any trailing periods, commas, or parentheses */
1310   while ( (*ptr == '.') || (*ptr == ',') || (*ptr == ')') ) {
1311     --ptr;
1312   }
1313 
1314   ++ptr;
1315 
1316   return ptr;
1317 }
1318 
1319 static Boolean CommentHasSuspiciousHtml (
1320   IntAsn2gbJobPtr ajp,
1321   CharPtr searchString
1322 )
1323 
1324 {
1325   Char        ch;
1326   CharPtr     ptr;
1327   Int4        state;
1328   ValNodePtr  matches;
1329 
1330   if (StringHasNoText (searchString)) return FALSE;
1331 
1332   state = 0;
1333   ptr = searchString;
1334   ch = *ptr;
1335 
1336   while (ch != '\0') {
1337     matches = NULL;
1338     ch = TO_LOWER (ch);
1339     state = TextFsaNext (ajp->bad_html_fsa, state, ch, &matches);
1340     if (matches != NULL) {
1341       return TRUE;
1342     }
1343     ptr++;
1344     ch = *ptr;
1345   }
1346 
1347   return FALSE;
1348 }
1349 
1350 NLM_EXTERN void AddCommentWithURLlinks (
1351   IntAsn2gbJobPtr ajp,
1352   StringItemPtr ffstring,
1353   CharPtr prefix,
1354   CharPtr str,
1355   CharPtr suffix
1356 )
1357 
1358 {
1359   Char     ch;
1360   CharPtr  ptr;
1361 
1362   if (GetWWW (ajp) && CommentHasSuspiciousHtml (ajp, str)) {
1363     if (prefix != NULL) {
1364       FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1365     }
1366     AddCommentStringWithTildes (ffstring, str);
1367     if (suffix != NULL) {
1368       FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1369     }
1370     return;
1371   }
1372 
1373   while (! StringHasNoText (str)) {
1374     ptr = StringStr (str, "http://");
1375     if (ptr == NULL) {
1376       ptr = StringStr (str, "https://");
1377     }
1378     if (ptr == NULL) {
1379       if (prefix != NULL) {
1380         FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1381       }
1382       AddCommentStringWithTildes (ffstring, str);
1383       if (suffix != NULL) {
1384         FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1385       }
1386       return;
1387     }
1388 
1389     *ptr = '\0';
1390     AddCommentStringWithTildes (ffstring, str); 
1391     *ptr = 'h';
1392 
1393     str = ptr;
1394     ptr = FindUrlEnding(str);
1395 
1396 
1397     ch = *ptr;
1398     *ptr = '\0';
1399     if ( GetWWW(ajp) ) {
1400       FFAddTextToString(ffstring, "<a href=\"", str, "\">", FALSE, FALSE, TILDE_IGNORE);
1401       FFAddTextToString(ffstring, NULL, str, "</a>", FALSE, FALSE, TILDE_IGNORE);
1402     } else {
1403       FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
1404     }
1405 
1406     *ptr = ch;
1407     str = ptr;
1408   }
1409 }
1410 
1411 static void CatenateCommentInGbseq (
1412   GBSeqPtr gbseq,
1413   CharPtr str,
1414   Boolean compress
1415 )
1416 
1417 {
1418   Char     ch;
1419   CharPtr  tmp;
1420 
1421   if (gbseq == NULL || StringHasNoText (str)) return;
1422 
1423   if (StringNCmp (str, "COMMENT     ", 12) == 0) {
1424     str += 12;
1425   }
1426   if (gbseq->comment == NULL) {
1427     gbseq->comment = StringSave (str);
1428   } else {
1429     tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (str) + 4);
1430     StringCpy (tmp, gbseq->comment);
1431     StringCat (tmp, "; ");
1432     StringCat (tmp, str);
1433     gbseq->comment = MemFree (gbseq->comment);
1434     gbseq->comment = tmp;
1435   }
1436 
1437   tmp = gbseq->comment;
1438   if (tmp == NULL) return;
1439   ch = *tmp;
1440   while (ch != '\0') {
1441     if (ch == '\n' || ch == '\r' || ch == '\t') {
1442       *tmp = ' ';
1443     }
1444     tmp++;
1445     ch = *tmp;
1446   }
1447   TrimSpacesAroundString (gbseq->comment);
1448   if (compress) {
1449     Asn2gnbkCompressSpaces (gbseq->comment);
1450   }
1451 }
1452 
1453 
1454 NLM_EXTERN CharPtr FormatCommentBlock (
1455   Asn2gbFormatPtr afp,
1456   BaseBlockPtr bbp
1457 )
1458 
1459 {
1460   Boolean            add_period;
1461   IntAsn2gbJobPtr    ajp;
1462   Asn2gbSectPtr      asp;
1463   CommentBlockPtr    cbp;
1464   CharPtr            db;
1465   DbtagPtr           dbt;
1466   SeqMgrDescContext  dcontext;
1467   SeqMgrFeatContext  fcontext;
1468   GBSeqPtr           gbseq;
1469   size_t             len;
1470   ObjectIdPtr        oip;
1471   CharPtr            prefix;
1472   SeqDescrPtr        sdp;
1473   SeqFeatPtr         sfp;
1474   Char               sfx [32];
1475   CharPtr            str;
1476   CharPtr            suffix;
1477   CharPtr            title;
1478   StringItemPtr      ffstring;
1479 
1480   if (afp == NULL || bbp == NULL) return NULL;
1481   ajp = afp->ajp;
1482   if (ajp == NULL) return NULL;
1483   asp = afp->asp;
1484   if (asp == NULL) return NULL;
1485 
1486   cbp = (CommentBlockPtr) bbp;
1487 
1488   /* optionally populate gbseq for XML-ized GenBank format */
1489 
1490   if (ajp->gbseq) {
1491     gbseq = &asp->gbseq;
1492   } else {
1493     gbseq = NULL;
1494   }
1495 
1496   /* some comments are allocated (along with possible first COMMENT label) */
1497 
1498   if (! StringHasNoText (bbp->string)) {
1499     str = StringSave (bbp->string);
1500     CatenateCommentInGbseq (gbseq, str, TRUE);
1501     return str;
1502   }
1503 
1504   title = NULL;
1505   prefix = NULL;
1506   suffix = NULL;
1507   add_period = FALSE;
1508   sfx [0] = '\0';
1509 
1510   if (bbp->itemtype == OBJ_SEQDESC) {
1511 
1512     /* usually should reference comment, maploc, or region descriptor IDs */
1513 
1514     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
1515     if (sdp != NULL) {
1516 
1517       if (dcontext.seqdesctype == Seq_descr_comment) {
1518 
1519         title = (CharPtr) sdp->data.ptrvalue;
1520 
1521       } else if (dcontext.seqdesctype == Seq_descr_maploc) {
1522 
1523         dbt = (DbtagPtr) sdp->data.ptrvalue;
1524         if (dbt != NULL) {
1525           db = dbt->db;
1526           oip = dbt->tag;
1527           if (oip != NULL) {
1528             if (oip->str != NULL) {
1529 
1530               title = oip->str;
1531               prefix = ("Map location: ");
1532 
1533             } else if (db != NULL && oip->id != 0) {
1534 
1535               title = db;
1536               prefix = ("Map location: (Database ");
1537               sprintf (sfx, "; id # %ld).", (long) oip->id);
1538               suffix = sfx;
1539 
1540             }
1541           }
1542         }
1543 
1544       } else if (dcontext.seqdesctype == Seq_descr_region) {
1545 
1546         title = (CharPtr) sdp->data.ptrvalue;
1547         prefix = "Region: ";
1548 
1549       } else if (dcontext.seqdesctype == Seq_descr_name) {
1550 
1551         title = (CharPtr) sdp->data.ptrvalue;
1552         prefix = "Name: ";
1553 
1554       }
1555     }
1556 
1557   } else if (bbp->itemtype == OBJ_SEQFEAT) {
1558 
1559     /* also have to deal with comment feature across entire sequence */
1560 
1561     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
1562     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_COMMENT) {
1563 
1564       title = sfp->comment;
1565     }
1566   }
1567 
1568   if (title == NULL) return NULL;
1569 
1570   ffstring = FFGetString(ajp);
1571   if ( ffstring == NULL ) return NULL;
1572 
1573   if (cbp->first) {
1574     FFStartPrint (ffstring, afp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
1575   } else {
1576     FFStartPrint (ffstring, afp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
1577   }
1578 
1579   str = StringSave (title);
1580   TrimSpacesAndJunkFromEnds (str, TRUE);
1581   if (! IsEllipsis (str)) {
1582     s_RemovePeriodFromEnd (str);
1583     len = StringLen (str);
1584     if (len > 0 && str [len - 1] != '.') {
1585       add_period = TRUE;
1586     }
1587   }
1588   AddCommentWithURLlinks(ajp, ffstring, prefix, str, suffix);
1589   /*
1590   if ( GetWWW(ajp) && prefix == NULL && suffix == NULL) {
1591     
1592     AddCommentWithURLlinks (ffstring, str);
1593   } else {
1594     FFAddTextToString (ffstring, prefix, str, suffix, FALSE, TRUE, TILDE_OLD_EXPAND);
1595   }
1596   */
1597   if (add_period) {
1598     FFAddOneChar (ffstring, '.',FALSE);
1599   }
1600   MemFree (str);
1601 
1602   str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 5, 5, "CC");
1603 
1604   /*
1605   CatenateCommentInGbseq (gbseq, str);
1606   */
1607   CatenateCommentInGbseq (gbseq, title, FALSE);
1608 
1609   FFRecycleString(ajp, ffstring);
1610   return str;
1611 }
1612 
1613 /* format features section */
1614 
1615 static Boolean is_real_id (
1616   SeqIdPtr sip,
1617   SeqIdPtr this_sip
1618 )
1619 
1620 {
1621   BioseqPtr  bsp;
1622 
1623   if (sip == NULL || this_sip == NULL) return FALSE;
1624 
1625   if (! SeqIdIn (sip, this_sip)) {
1626     bsp = BioseqFind (sip);
1627     if (bsp == NULL) return TRUE;  /* ??? */
1628     if (bsp->repr == Seq_repr_virtual) return FALSE;
1629   }
1630 
1631   return TRUE;
1632 }
1633 
1634 static Boolean FlatVirtLoc (
1635   BioseqPtr bsp,
1636   SeqLocPtr location
1637 )
1638 
1639 {
1640   SeqIntPtr  sintp;
1641   SeqIdPtr   sip;
1642   SeqPntPtr  spp;
1643 
1644   if (bsp == NULL || location == NULL) return FALSE;
1645 
1646   switch (location->choice) {
1647     case SEQLOC_WHOLE :
1648       sip = (SeqIdPtr) location->data.ptrvalue;
1649       if (sip == NULL) return TRUE;
1650       if (! is_real_id (sip, bsp->id)) return TRUE;
1651       break;
1652     case SEQLOC_INT :
1653       sintp = (SeqIntPtr) location->data.ptrvalue;
1654       if (sintp == NULL) return TRUE;
1655       sip = sintp->id;
1656       if (sip == NULL) return TRUE;
1657       if (! is_real_id (sip, bsp->id)) return TRUE;
1658       break;
1659     case SEQLOC_PNT :
1660       spp = (SeqPntPtr) location->data.ptrvalue;
1661       if (spp == NULL) return TRUE;
1662       sip = spp->id;
1663       if (sip == NULL) return TRUE;
1664       if (! is_real_id (sip, bsp->id)) return TRUE;
1665       break;
1666     default :
1667       break;
1668   }
1669 
1670   return FALSE;
1671 }
1672 
1673 static Uint1    id_order [NUM_SEQID];
1674 static Boolean  order_initialized = FALSE;
1675 
1676 static CharPtr lim_str [5] = { "", ">","<", ">", "<" };
1677 
1678 NLM_EXTERN Boolean GetAccnVerFromServer (Int4 gi, CharPtr buf)
1679 
1680 {
1681   AccnVerLookupFunc  func;
1682   SeqMgrPtr          smp;
1683   CharPtr            str;
1684 
1685   if (buf == NULL) return FALSE;
1686   *buf = '\0';
1687   smp = SeqMgrWriteLock ();
1688   if (smp == NULL) return FALSE;
1689   func = smp->accn_ver_lookup_func;
1690   SeqMgrUnlock ();
1691   if (func == NULL) return FALSE;
1692   str = (*func) (gi);
1693   if (str == NULL) return FALSE;
1694   if (StringLen (str) < 40) {
1695     StringCpy (buf, str);
1696   }
1697   MemFree (str);
1698   return TRUE;
1699 }
1700 
1701 
1702 /******************************************************************************/
1703 /*                              FFFlatLoc functions  .                          */
1704 /******************************************************************************/
1705 
1706 static Boolean FF_FlatNullAhead (
1707   BioseqPtr bsp,
1708   ValNodePtr location
1709 )
1710 
1711 {
1712   SeqLocPtr  next;
1713 
1714   if (bsp == NULL || location == NULL) return FALSE;
1715 
1716   next = location->next;
1717   if (next == NULL) return TRUE;
1718   if (next->choice == SEQLOC_NULL) return TRUE;
1719   if (FlatVirtLoc (bsp, next)) return TRUE;
1720 
1721   return FALSE;
1722 }
1723 
1724 
1725 
1726 static void FlatLocSeqId (
1727   IntAsn2gbJobPtr ajp,
1728   StringItemPtr ffstring,
1729   SeqIdPtr sip
1730 )
1731 
1732 {
1733   BioseqPtr    bsp;
1734   Char         buf [40];
1735   ObjectIdPtr  oip;
1736   SeqIdPtr     use_id = NULL;
1737   Boolean      was_lock = FALSE;
1738 
1739   if (ffstring == NULL || sip == NULL) return;
1740 
1741   buf [0] = '\0';
1742   bsp = BioseqFind (sip);
1743   if (bsp != NULL) {
1744     use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
1745   } else if (sip->choice == SEQID_GI) {
1746     if (GetAccnVerFromServer (sip->data.intvalue, buf)) {
1747       FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
1748       /*AddValNodeString (head, NULL, buf, ":");*/
1749       return;
1750     }
1751     use_id = GetSeqIdForGI (sip->data.intvalue);
1752   }
1753   if (use_id == NULL && bsp == NULL) {
1754     bsp = BioseqLockById (sip);
1755     was_lock = TRUE;
1756     if (bsp != NULL) {
1757       use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
1758     }
1759   }
1760   if (use_id != NULL) {
1761     SeqIdWrite (use_id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
1762     if (use_id->choice == SEQID_GI) {
1763       ajp->relModeError = TRUE;
1764     }
1765   } else if (sip->choice == SEQID_GI) {
1766     SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
1767     ajp->relModeError = TRUE;
1768   } else {
1769     SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
1770     if (sip->choice == SEQID_GI) {
1771       ajp->relModeError = TRUE;
1772     }
1773   }
1774   if (was_lock) {
1775     BioseqUnlock (bsp);
1776   }
1777   if (StringHasNoText (buf)) {
1778     StringCpy (buf, "?00000");
1779     ajp->relModeError = TRUE;
1780     if (use_id != NULL && use_id->choice == SEQID_LOCAL) {
1781       oip = (ObjectIdPtr) use_id->data.ptrvalue;
1782       if (oip != NULL && (! StringHasNoText (oip->str))) {
1783         StringNCpy_0 (buf, oip->str, 13);
1784       }
1785     }
1786   }
1787   FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
1788 }
1789 
1790 
1791 
1792 static void FlatLocCaret (
1793   IntAsn2gbJobPtr ajp,
1794   StringItemPtr ffstring,
1795   SeqIdPtr sip,
1796   SeqIdPtr this_sip,
1797   Int4 point,
1798   IntFuzzPtr fuzz
1799 )
1800 
1801 {
1802   Char   buf [128];
1803   Uint1  index;
1804 
1805   if (ffstring == NULL) return;
1806 
1807   if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
1808     FlatLocSeqId (ajp, ffstring, sip);
1809   }
1810 
1811   buf [0] = '\0';
1812   point++; /* orginal FlatLocHalfCaret was called with point + 1 */
1813 
1814   if (fuzz != NULL) {
1815     switch (fuzz->choice) {
1816       case 1 :
1817         sprintf (buf, "(%ld.%ld)..(%ld.%ld)",
1818                  (long) (point - fuzz->a),
1819                  (long) point,
1820                  (long) point,
1821                  (long) (point + fuzz->a));
1822         break;
1823       case 2 :
1824         sprintf (buf, "%ld^%ld",
1825                  (long) (1 + fuzz->b),
1826                  (long) (1 + fuzz->a));
1827         break;
1828       case 3 :
1829         sprintf (buf, "%ld^%ld",
1830                  (long) (point - point * ((double) fuzz->a / 1000.0)),
1831                  (long) (point + point * ((double) fuzz->a / 1000.0)));
1832         break;
1833       case 4 :
1834         if (fuzz->a == 3) { /* space to right */
1835           sprintf (buf, "%ld^%ld", (long) (point), (long) (point + 1));
1836         } else if (fuzz->a == 4 && point > 1) { /* space to left */
1837           sprintf (buf, "%ld^%ld", (long) (point - 1), (long) point);
1838         } else {
1839           index = (Uint1) fuzz->a;
1840           if (index > 4) {
1841             index = 0;
1842           }
1843           sprintf (buf, "%s%ld", lim_str [index], (long) point);
1844         }
1845         break;
1846       default :
1847         sprintf (buf, "%ld", (long) point);
1848         break;
1849     }
1850   } else {
1851     sprintf (buf, "%ld", (long) point);
1852   }
1853 
1854   FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
1855 }
1856 
1857 
1858 static void FlatLocPoint (
1859   IntAsn2gbJobPtr ajp,
1860   StringItemPtr ffstring,
1861   SeqIdPtr sip,
1862   SeqIdPtr this_sip,
1863   Int4 point,
1864   IntFuzzPtr fuzz
1865 )
1866 
1867 {
1868   Char   buf [128];
1869   Uint1  index;
1870 
1871   if (ffstring == NULL) return;
1872 
1873   if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
1874     FlatLocSeqId (ajp, ffstring, sip);
1875   }
1876 
1877   buf [0] = '\0';
1878   point++;
1879 
1880   if (fuzz != NULL) {
1881     switch (fuzz->choice) {
1882       case 1 :
1883         sprintf (buf, "(%ld.%ld)",
1884                  (long) (point - fuzz->a),
1885                  (long) (point + fuzz->a));
1886         break;
1887       case 2 :
1888         sprintf (buf, "(%ld.%ld)",
1889                  (long) (1 + fuzz->b),
1890                  (long) (1 + fuzz->a));
1891         break;
1892       case 3 :
1893         sprintf (buf, "(%ld.%ld)",
1894                  (long) (point - point * ((double) fuzz->a / 1000.0)),
1895                  (long) (point + point * ((double) fuzz->a / 1000.0)));
1896         break;
1897       case 4 :
1898         index = (Uint1) fuzz->a;
1899         if (index > 4) {
1900           index = 0;
1901         }
1902         sprintf (buf, "%s%ld", lim_str [index], (long) point);
1903         break;
1904       default :
1905         sprintf (buf, "%ld", (long) point);
1906         break;
1907     }
1908   } else {
1909     sprintf (buf, "%ld", (long) point);
1910   }
1911 
1912   FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
1913 }
1914 
1915 
1916 static void FlatLocElement (
1917   IntAsn2gbJobPtr ajp,
1918   StringItemPtr ffstring,
1919   BioseqPtr bsp,
1920   SeqLocPtr location
1921 )
1922 
1923 {
1924   Boolean     minus_strand = FALSE;
1925   SeqBondPtr  sbp;
1926   SeqIntPtr   sintp;
1927   SeqIdPtr    sip;
1928   SeqPntPtr   spp;
1929   BioseqPtr   wholebsp;
1930 
1931   if (ffstring == NULL || bsp == NULL || location == NULL) return;
1932 
1933   switch (location->choice) {
1934     case SEQLOC_WHOLE :
1935       sip = (SeqIdPtr) location->data.ptrvalue;
1936       if (sip == NULL) return;
1937       wholebsp = BioseqFind (sip);
1938       if (wholebsp == NULL) return;
1939       if (is_real_id (sip, bsp->id)) {
1940         FlatLocPoint (ajp, ffstring, sip, bsp->id, 0, NULL);
1941         if (bsp->length > 0) {
1942           FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
1943           FlatLocPoint (ajp, ffstring, NULL, bsp->id, bsp->length - 1, NULL);
1944         }
1945       }
1946       break;
1947     case SEQLOC_INT :
1948       sintp = (SeqIntPtr) location->data.ptrvalue;
1949       if (sintp == NULL) return;
1950       sip = sintp->id;
1951       if (sip == NULL) return;
1952       if (is_real_id (sip, bsp->id)) {
1953         minus_strand = (Boolean) (sintp->strand == Seq_strand_minus);
1954         if (minus_strand) {
1955           FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
1956         }
1957         FlatLocPoint (ajp, ffstring, sip, bsp->id, sintp->from, sintp->if_from);
1958         if (sintp->to > 0 &&
1959             (sintp->to != sintp->from ||
1960              sintp->if_from != NULL ||
1961              sintp->if_to != NULL)) {
1962           FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
1963           FlatLocPoint (ajp, ffstring, NULL, bsp->id, sintp->to, sintp->if_to);
1964         }
1965         if (minus_strand) {
1966           FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
1967         }
1968       }
1969       break;
1970     case SEQLOC_PNT :
1971       spp = (SeqPntPtr) location->data.ptrvalue;
1972       if (spp == NULL) return;
1973       sip = spp->id;
1974       if (sip == NULL) return;
1975       if (is_real_id (sip, bsp->id)) {
1976         minus_strand = (Boolean) (spp->strand == Seq_strand_minus);
1977         if (minus_strand) {
1978           FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
1979         }
1980         if (spp->fuzz != NULL) {
1981           FlatLocCaret (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
1982         } else {
1983           FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, NULL);
1984         }
1985         if (minus_strand) {
1986           FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
1987         }
1988       }
1989       break;
1990     case SEQLOC_BOND :
1991       sbp = (SeqBondPtr) location->data.ptrvalue;
1992       if (sbp == NULL) return;
1993       spp = sbp->a;
1994       if (spp == NULL) return;
1995       sip = spp->id;
1996       if (sip == NULL) return;
1997       FFAddOneString(ffstring, "bond(", FALSE, FALSE, TILDE_IGNORE);
1998       FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
1999       spp = sbp->b;
2000       if (spp != NULL) {
2001         FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2002         FlatLocPoint (ajp, ffstring, NULL, bsp->id, spp->point, spp->fuzz);
2003       }
2004       FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2005       break;
2006     default :
2007       /* unexpected internal complex type or unimplemented SEQLOC_FEAT */
2008       return;
2009   }
2010 }
2011 
2012 
2013 
2014 static void FF_FlatPackedPoint (
2015   IntAsn2gbJobPtr ajp,
2016   StringItemPtr ffstring,
2017   PackSeqPntPtr pspp,
2018   BioseqPtr bsp
2019 )
2020 
2021 {
2022   Uint1  dex;
2023 
2024   if (ffstring == NULL || pspp == NULL || bsp == NULL) return;
2025 
2026   for (dex = 0; dex < pspp->used; dex++) {
2027     FlatLocPoint (ajp, ffstring, pspp->id, bsp->id, pspp->pnts [dex], pspp->fuzz);
2028   }
2029 }
2030 
2031 
2032 static void FF_DoFlatLoc (
2033   IntAsn2gbJobPtr ajp,
2034   StringItemPtr ffstring,
2035   BioseqPtr bsp,
2036   SeqLocPtr location,
2037   Boolean ok_to_complement
2038 );
2039 
2040 static void FF_GroupFlatLoc (
2041   IntAsn2gbJobPtr ajp,
2042   StringItemPtr ffstring,
2043   BioseqPtr bsp,
2044   SeqLocPtr location,
2045   CharPtr prefix,
2046   Boolean is_flat_order
2047 )
2048 
2049 {
2050   Boolean        found_non_virt = FALSE;
2051   SeqIdPtr       hold_next;
2052   Int2           parens = 1;
2053   PackSeqPntPtr  pspp;
2054   SeqLocPtr      slp;
2055   Boolean        special_mode = FALSE; /* join in order */
2056 
2057   if (ffstring == NULL || bsp == NULL || location == NULL) return;
2058 
2059   /* prefix will have the first parenthesis */
2060 
2061   FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2062 
2063   for (slp = (SeqLocPtr) location->data.ptrvalue; slp != NULL; slp = slp->next) {
2064 
2065     if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) {
2066       if (slp != location && slp->next != NULL) {
2067         if (special_mode) {
2068           special_mode = FALSE;
2069           FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2070           parens--;
2071         }
2072       }
2073       continue;
2074     }
2075 
2076     if (found_non_virt && slp->choice != SEQLOC_EMPTY && slp->choice != SEQLOC_NULL) {
2077       FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2078     }
2079 
2080     switch (slp->choice) {
2081       case SEQLOC_WHOLE :
2082       case SEQLOC_PNT :
2083       case SEQLOC_BOND :
2084       case SEQLOC_FEAT :
2085         found_non_virt = TRUE;
2086         if (FlatVirtLoc (bsp, slp)) {
2087           if (slp != location && slp->next != NULL) {
2088             if (special_mode) {
2089               special_mode = FALSE;
2090               FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2091               parens--;
2092             }
2093           }
2094         } else {
2095           FlatLocElement (ajp, ffstring, bsp, slp);
2096         }
2097         break;
2098       case SEQLOC_INT :
2099         found_non_virt = TRUE;
2100         if (is_flat_order && (! FF_FlatNullAhead (bsp, slp))) {
2101           special_mode = TRUE;
2102           FFAddOneString(ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
2103           parens++;
2104         }
2105         FlatLocElement (ajp, ffstring, bsp, slp);
2106         break;
2107       case SEQLOC_PACKED_PNT :
2108         found_non_virt = TRUE;
2109         pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2110         if (pspp != NULL) {
2111           FF_FlatPackedPoint (ajp, ffstring, pspp, bsp);
2112         }
2113         break;
2114       case SEQLOC_PACKED_INT :
2115       case SEQLOC_MIX :
2116       case SEQLOC_EQUIV :
2117         found_non_virt = TRUE;
2118         hold_next = slp->next;
2119         slp->next = NULL;
2120         FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE);
2121         slp->next = hold_next;
2122         break;
2123       default :
2124         break;
2125     }
2126 
2127   }
2128 
2129   while (parens > 0) {
2130     FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2131     parens--;
2132   }
2133 }
2134 
2135 
2136 
2137 
2138 static void FF_DoFlatLoc (
2139   IntAsn2gbJobPtr ajp,
2140   StringItemPtr ffstring,
2141   BioseqPtr bsp,
2142   SeqLocPtr location,
2143   Boolean ok_to_complement
2144 )
2145 
2146 {
2147   Boolean        found_null;
2148   SeqLocPtr      next_loc;
2149   PackSeqPntPtr  pspp;
2150   SeqLocPtr      slp;
2151 
2152   if (ffstring == NULL || bsp == NULL || location == NULL) return;
2153 
2154   /* deal with complement of entire location */
2155 
2156   if (ok_to_complement && SeqLocStrand (location) == Seq_strand_minus) {
2157     slp = AsnIoMemCopy ((Pointer) location,
2158                         (AsnReadFunc) SeqLocAsnRead,
2159                         (AsnWriteFunc) SeqLocAsnWrite);
2160     if (slp != NULL) {
2161       SeqLocRevCmp (slp);
2162       FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2163       FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE);
2164       FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2165     }
2166     SeqLocFree (slp);
2167     return;
2168   }
2169 
2170   /* handle each location component */
2171 
2172   for (slp = location; slp != NULL; slp = slp->next) {
2173 
2174     if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) continue;
2175 
2176     /* print comma between components */
2177 
2178     if (slp != location) {
2179       FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2180     }
2181 
2182     switch (slp->choice) {
2183       case SEQLOC_MIX :
2184       case SEQLOC_PACKED_INT :
2185         found_null = FALSE;
2186         for (next_loc = (SeqLocPtr) slp->data.ptrvalue;
2187          next_loc != NULL;
2188          next_loc = next_loc->next) {
2189           if (next_loc->choice == SEQLOC_NULL ||
2190               FlatVirtLoc (bsp, next_loc) /* ||
2191               LocationHasNullsBetween (slp) */ )
2192             found_null = TRUE;
2193         }
2194         if (found_null) {
2195           FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "order(", TRUE);
2196         } else {
2197           FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "join(", FALSE);
2198         }
2199         break;
2200       case SEQLOC_EQUIV :
2201         FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "one-of(", FALSE);
2202         break;
2203       case SEQLOC_PACKED_PNT :
2204         pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2205         if (pspp != NULL) {
2206           FF_FlatPackedPoint (ajp, ffstring, pspp, bsp);
2207         }
2208         break;
2209       default :
2210         FlatLocElement (ajp, ffstring, bsp, slp);
2211         break;
2212     }
2213 
2214   }
2215 }
2216 
2217 
2218 
2219 
2220 NLM_EXTERN CharPtr FFFlatLoc (
2221   IntAsn2gbJobPtr ajp,
2222   BioseqPtr bsp,
2223   SeqLocPtr location,
2224   Boolean masterStyle
2225 )
2226 
2227 {
2228   Boolean     hasNulls;
2229   IntFuzzPtr  fuzz = NULL;
2230   SeqLocPtr   loc;
2231   Boolean     noLeft;
2232   Boolean     noRight;
2233   Uint1       num = 1;
2234   SeqPntPtr   spp;
2235   CharPtr     str;
2236   SeqLocPtr   tmp;
2237   StringItemPtr ffstring = NULL;
2238 
2239   if (bsp == NULL || location == NULL) return NULL;
2240 
2241   ffstring = FFGetString(ajp);
2242 
2243   if (! order_initialized) {
2244     id_order [SEQID_GENBANK] = num++;
2245     id_order [SEQID_EMBL] = num++;
2246     id_order [SEQID_DDBJ] = num++;
2247     id_order [SEQID_OTHER] = num++;
2248     id_order [SEQID_TPG] = num++;
2249     id_order [SEQID_TPE] = num++;
2250     id_order [SEQID_TPD] = num++;
2251     id_order [SEQID_GPIPE] = num++;
2252     id_order [SEQID_GIBBSQ] = num++;
2253     id_order [SEQID_GIBBMT] = num++;
2254     id_order [SEQID_PRF] = num++;
2255     id_order [SEQID_PDB] = num++;
2256     id_order [SEQID_PIR] = num++;
2257     id_order [SEQID_SWISSPROT] = num++;
2258     id_order [SEQID_PATENT] = num++;
2259     id_order [SEQID_GI] = num++;;
2260     id_order [SEQID_GENERAL] = num++;
2261     id_order [SEQID_LOCAL] = num++;
2262     id_order [SEQID_GIIM] = num++;
2263     order_initialized = TRUE;
2264   }
2265 
2266   if (masterStyle) {
2267 
2268     /* map location from parts to segmented bioseq */
2269 
2270     if (location->choice == SEQLOC_PNT) {
2271       spp = (SeqPntPtr) location->data.ptrvalue;
2272       if (spp != NULL) {
2273         fuzz = spp->fuzz;
2274       }
2275     }
2276 
2277     CheckSeqLocForPartial (location, &noLeft, &noRight);
2278     hasNulls = LocationHasNullsBetween (location);
2279     loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE);
2280     if (loc == NULL) {
2281       tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
2282       loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE);
2283       SeqLocFree (tmp);
2284     }
2285     if (loc == NULL) {
2286       return StringSave ("?");
2287     }
2288     FreeAllFuzz (loc);
2289     SetSeqLocPartial (loc, noLeft, noRight);
2290 
2291     if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
2292       spp = (SeqPntPtr) loc->data.ptrvalue;
2293       if (spp != NULL && spp->fuzz == NULL) {
2294         spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
2295                                   (AsnReadFunc) IntFuzzAsnRead,
2296                                   (AsnWriteFunc) IntFuzzAsnWrite);
2297       }
2298     }
2299 
2300     FF_DoFlatLoc (ajp, ffstring, bsp, loc, TRUE);
2301 
2302     SeqLocFree (loc);
2303 
2304   } else {
2305     FF_DoFlatLoc (ajp, ffstring, bsp, location, TRUE);
2306   }
2307 
2308   str = FFToCharPtr(ffstring);
2309   FFRecycleString(ajp, ffstring);
2310   return str;
2311 }
2312 
2313 
2314 
2315 
2316 static void PromoteSeqId (SeqIdPtr sip, Pointer userdata)
2317 
2318 {
2319   SeqIdPtr  bestid, newid, oldid;
2320 
2321   bestid = (SeqIdPtr) userdata;
2322 
2323   newid = SeqIdDup (bestid);
2324   if (newid == NULL) return;
2325 
2326   oldid = ValNodeNew (NULL);
2327   if (oldid == NULL) return;
2328 
2329   MemCopy (oldid, sip, sizeof (ValNode));
2330   oldid->next = NULL;
2331 
2332   sip->choice = newid->choice;
2333   sip->data.ptrvalue = newid->data.ptrvalue;
2334 
2335   SeqIdFree (oldid);
2336   ValNodeFree (newid);
2337 
2338   SeqIdStripLocus (sip);
2339 }
2340 
2341 NLM_EXTERN SeqLocPtr SeqLocReMapEx (
2342   SeqIdPtr newid,
2343   SeqLocPtr seq_loc,
2344   SeqLocPtr location,
2345   Int4 offset,
2346   Boolean rev,
2347   Boolean masterStyle
2348 )
2349 
2350 {
2351   BioseqPtr    bsp;
2352   Boolean      hasNulls;
2353   IntFuzzPtr   fuzz = NULL;
2354   SeqLocPtr    loc;
2355   Boolean      noLeft;
2356   Boolean      noRight;
2357   SeqEntryPtr  scope;
2358   SeqIdPtr     sip;
2359   SeqLocPtr    slp = NULL;
2360   SeqPntPtr    spp;
2361   SeqLocPtr    tmp;
2362 
2363   if (newid == NULL || seq_loc == NULL || location == NULL) return NULL;
2364 
2365   if (masterStyle) {
2366 
2367     sip = SeqLocId (seq_loc);
2368     if (sip == NULL) return NULL;
2369     bsp = BioseqFind (sip);
2370     if (bsp == NULL) {
2371       scope = SeqEntrySetScope (NULL);
2372       bsp = BioseqFind (sip);
2373       SeqEntrySetScope (scope);
2374     }
2375     if (bsp == NULL) return NULL;
2376     sip = SeqIdFindBest (bsp->id, 0);
2377 
2378     /* map location from parts to segmented bioseq */
2379 
2380     if (location->choice == SEQLOC_PNT) {
2381       spp = (SeqPntPtr) location->data.ptrvalue;
2382       if (spp != NULL) {
2383         fuzz = spp->fuzz;
2384       }
2385     }
2386 
2387     CheckSeqLocForPartial (location, &noLeft, &noRight);
2388     hasNulls = LocationHasNullsBetween (location);
2389     loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE);
2390     if (loc == NULL) {
2391       tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
2392       loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE);
2393       SeqLocFree (tmp);
2394     }
2395     if (loc == NULL) {
2396       return NULL;
2397     }
2398     FreeAllFuzz (loc);
2399     SetSeqLocPartial (loc, noLeft, noRight);
2400 
2401     if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
2402       spp = (SeqPntPtr) loc->data.ptrvalue;
2403       if (spp != NULL && spp->fuzz == NULL) {
2404         spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
2405                                   (AsnReadFunc) IntFuzzAsnRead,
2406                                   (AsnWriteFunc) IntFuzzAsnWrite);
2407       }
2408     }
2409 
2410     scope = SeqEntrySetScope (NULL);
2411     slp = SeqLocReMap (newid, seq_loc, loc, offset, rev);
2412     SeqEntrySetScope (scope);
2413 
2414     SeqLocFree (loc);
2415 
2416     VisitSeqIdsInSeqLoc (slp, (Pointer) sip, PromoteSeqId);
2417   } else {
2418 
2419     scope = SeqEntrySetScope (NULL);
2420     slp = SeqLocReMap (newid, seq_loc, location, offset, rev);
2421     SeqEntrySetScope (scope);
2422   }
2423 
2424   return slp;
2425 }
2426 
2427 
2428 /******************************************************************************/
2429 /*                            End FFFlatLoc functions.                          */
2430 /******************************************************************************/
2431 
2432 
2433 
2434 static void SubSourceToQualArray (
2435   SubSourcePtr ssp,
2436   QualValPtr qvp
2437 )
2438 
2439 {
2440   SourceType  idx;
2441   Uint1       subtype;
2442 
2443   if (ssp == NULL || qvp == NULL) return;
2444 
2445   while (ssp != NULL) {
2446     subtype = ssp->subtype;
2447     if (subtype == 255) {
2448       subtype = 41;
2449     }
2450     if (subtype < 42) {
2451       idx = subSourceToSourceIdx [subtype];
2452       if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
2453         if (qvp [idx].ssp == NULL) {
2454           qvp [idx].ssp = ssp;
2455         }
2456       }
2457     }
2458     ssp = ssp->next;
2459   }
2460 }
2461 
2462 NLM_EXTERN SourceType orgModToSourceIdx [41] = {
2463   SCQUAL_zero_orgmod,
2464   SCQUAL_one_orgmod,
2465   SCQUAL_strain,
2466   SCQUAL_sub_strain,
2467   SCQUAL_type,
2468   SCQUAL_sub_type,
2469   SCQUAL_variety,
2470   SCQUAL_serotype,
2471   SCQUAL_serogroup,
2472   SCQUAL_serovar,
2473   SCQUAL_cultivar,
2474   SCQUAL_pathovar,
2475   SCQUAL_chemovar,
2476   SCQUAL_biovar,
2477   SCQUAL_biotype,
2478   SCQUAL_group,
2479   SCQUAL_sub_group,
2480   SCQUAL_isolate,
2481   SCQUAL_common,
2482   SCQUAL_acronym,
2483   SCQUAL_dosage,
2484   SCQUAL_spec_or_nat_host,
2485   SCQUAL_sub_species,
2486   SCQUAL_specimen_voucher,
2487   SCQUAL_authority,
2488   SCQUAL_forma,
2489   SCQUAL_forma_specialis,
2490   SCQUAL_ecotype,
2491   SCQUAL_synonym,
2492   SCQUAL_anamorph,
2493   SCQUAL_teleomorph,
2494   SCQUAL_breed,
2495   SCQUAL_gb_acronym,
2496   SCQUAL_gb_anamorph,
2497   SCQUAL_gb_synonym,
2498   SCQUAL_culture_collection,
2499   SCQUAL_bio_material,
2500   SCQUAL_metagenome_source,
2501   SCQUAL_old_lineage,
2502   SCQUAL_old_name,
2503   SCQUAL_orgmod_note
2504 };
2505 
2506 static void OrgModToQualArray (
2507   OrgModPtr omp,
2508   QualValPtr qvp
2509 )
2510 
2511 {
2512   SourceType  idx;
2513   Uint1       subtype;
2514 
2515   if (omp == NULL || qvp == NULL) return;
2516 
2517   while (omp != NULL) {
2518     subtype = omp->subtype;
2519     if (subtype == 253) {
2520       subtype = 38;
2521     } else if (subtype == 254) {
2522       subtype = 39;
2523     } else if (subtype == 255) {
2524       subtype = 40;
2525     }
2526     if (subtype < 41) {
2527       idx = orgModToSourceIdx [subtype];
2528       if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
2529         if (qvp [idx].omp == NULL) {
2530           qvp [idx].omp = omp;
2531         }
2532       }
2533     }
2534     omp = omp->next;
2535   }
2536 }
2537 
2538 static CharPtr organelleQual [] = {
2539   NULL,
2540   NULL,
2541   "/organelle=\"plastid:chloroplast\"",
2542   "/organelle=\"plastid:chromoplast\"",
2543   "/organelle=\"mitochondrion:kinetoplast\"",
2544   "/organelle=\"mitochondrion\"",
2545   "/organelle=\"plastid\"",
2546   "/macronuclear",
2547   NULL,
2548   "/plasmid=\"\"",
2549   "/transposon=\"\"",
2550   "/insertion_seq=\"\"",
2551   "/organelle=\"plastid:cyanelle\"",
2552   "/proviral",
2553   NULL,
2554   "/organelle=\"nucleomorph\"",
2555   "/organelle=\"plastid:apicoplast\"",
2556   "/organelle=\"plastid:leucoplast\"",
2557   "/organelle=\"plastid:proplastid\"",
2558   NULL,
2559   "/organelle=\"hydrogenosome\"",
2560   NULL,
2561   "/organelle=\"chromatophore\""
2562 };
2563 
2564 NLM_EXTERN Boolean StringIsJustQuotes (
2565   CharPtr str
2566 )
2567 
2568 {
2569   Nlm_Uchar  ch;    /* to use 8bit characters in multibyte languages */
2570 
2571   if (str != NULL) {
2572     ch = *str;
2573     while (ch != '\0') {
2574       if (ch > ' ' && ch != '"' && ch != '\'') {
2575         return FALSE;
2576       }
2577       str++;
2578       ch = *str;
2579     }
2580   }
2581   return TRUE;
2582 }
2583 
2584 static CharPtr RemoveAllSpaces (
2585   CharPtr str
2586 )
2587 
2588 {
2589   Char     ch;
2590   CharPtr  dst;
2591   CharPtr  ptr;
2592 
2593   if (str == NULL || str [0] == '\0') return NULL;
2594 
2595   dst = str;
2596   ptr = str;
2597   ch = *ptr;
2598   while (ch != '\0') {
2599     if (ch != ' ') {
2600       *dst = ch;
2601       dst++;
2602     }
2603     ptr++;
2604     ch = *ptr;
2605   }
2606   *dst = '\0';
2607 
2608   return str;
2609 }
2610 
2611 NLM_EXTERN void AddFeatureToGbseq (
2612   GBSeqPtr gbseq,
2613   GBFeaturePtr gbfeat,
2614   CharPtr str,
2615   SeqFeatPtr sfp
2616 )
2617 
2618 {
2619   Char            ch;
2620   CharPtr         copy;
2621   GBQualifierPtr  gbqual;
2622   GBQualifierPtr  last = NULL;
2623   CharPtr         ptr;
2624   CharPtr         qual;
2625   CharPtr         tmp;
2626   CharPtr         val;
2627 
2628   if (gbseq == NULL || gbfeat == NULL || StringHasNoText (str)) return;
2629 
2630   copy = StringSave (str);
2631 
2632   /* link in reverse order, to be reversed in slash block */
2633 
2634   gbfeat->next = gbseq->feature_table;
2635   gbseq->feature_table = gbfeat;
2636 
2637   /* now parse qualifiers */
2638 
2639   ptr = StringStr (copy, "                     /");
2640   while (ptr != NULL) {
2641     qual = ptr + 22;
2642     val = qual;
2643     ch = *val;
2644     while (ch != '=' && ch != '\n' && ch != '\0') {
2645       val++;
2646       ch = *val;
2647     }
2648     /*
2649     val = StringChr (qual, '=');
2650     if (val == NULL) {
2651       val = StringChr (qual, '\n');
2652     }
2653     */
2654     if (ch != '\0' /* val != NULL */) {
2655       *val = '\0';
2656       val++;
2657       if (ch == '=') {
2658         tmp = val;
2659         if (*val == '"') {
2660           val++;
2661           tmp = val;
2662           ch = *tmp;
2663           while (ch != '"' && ch != '\0') {
2664             tmp++;
2665             ch = *tmp;
2666           }
2667         }
2668         ptr = StringStr (tmp, "\n                     /");
2669         if (ptr != NULL) {
2670           *ptr = '\0';
2671           ptr++;
2672         }
2673       } else {
2674         ptr = StringStr (val, "                     /");
2675         val = NULL;
2676       }
2677       gbqual = GBQualifierNew ();
2678       if (gbqual != NULL) {
2679         gbqual->name = StringSave (qual);
2680         if (! StringHasNoText (val)) {
2681           gbqual->value = StringSave (val);
2682           CleanQualValue (gbqual->value);
2683           Asn2gnbkCompressSpaces (gbqual->value);
2684           if (sfp != NULL) {
2685             if (sfp->data.choice == SEQFEAT_CDREGION &&
2686                 StringICmp (qual, "translation") == 0) {
2687               RemoveAllSpaces (gbqual->value);
2688             } else if (sfp->data.choice == SEQFEAT_RNA &&
2689                        StringICmp (qual, "transcription") == 0) {
2690               RemoveAllSpaces (gbqual->value);
2691             } else if (sfp->data.choice == SEQFEAT_PROT &&
2692                        StringICmp (qual, "peptide") == 0) {
2693               RemoveAllSpaces (gbqual->value);
2694             }
2695           }
2696         }
2697       }
2698     } else {
2699       gbqual = GBQualifierNew ();
2700       if (gbqual != NULL) {
2701         gbqual->name = StringSave (qual);
2702       }
2703     }
2704     if (gbfeat->quals == NULL) {
2705       gbfeat->quals = gbqual;
2706     } else if (last != NULL) {
2707       last->next = gbqual;
2708     }
2709     last = gbqual;
2710   }
2711 
2712   MemFree (copy);
2713 }
2714 
2715 NLM_EXTERN CharPtr GetMolTypeQual (
2716   BioseqPtr bsp
2717 )
2718 
2719 {
2720   SeqMgrDescContext  dcontext;
2721   MolInfoPtr         mip;
2722   SeqDescrPtr        sdp;
2723 
2724   if (bsp == NULL) return NULL;
2725 
2726   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
2727   if (sdp == NULL) return NULL;
2728   mip = (MolInfoPtr) sdp->data.ptrvalue;
2729   if (mip == NULL) return NULL;
2730 
2731   switch (mip->biomol) {
2732     case 0 :
2733       switch (bsp->mol) {
2734         case Seq_mol_dna :
2735           return "unassigned DNA";
2736         case Seq_mol_rna :
2737           return "unassigned RNA";
2738         case Seq_mol_na :
2739           break;
2740         default :
2741           break;
2742       }
2743       break;
2744     case MOLECULE_TYPE_GENOMIC :
2745       switch (bsp->mol) {
2746         case Seq_mol_dna :
2747           return "genomic DNA";
2748         case Seq_mol_rna :
2749           return "genomic RNA";
2750         case Seq_mol_na :
2751           break;
2752         default :
2753           break;
2754       }
2755       break;
2756     case MOLECULE_TYPE_PRE_MRNA :
2757       return "transcribed RNA";
2758     case MOLECULE_TYPE_MRNA :
2759       return "mRNA";
2760     case MOLECULE_TYPE_RRNA :
2761       return "rRNA";
2762     case MOLECULE_TYPE_TRNA :
2763       return "tRNA";
2764     case MOLECULE_TYPE_SNRNA :
2765       return "transcribed RNA";
2766     case MOLECULE_TYPE_SCRNA :
2767       return "transcribed RNA";
2768     case MOLECULE_TYPE_PEPTIDE :
2769       break;
2770     case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
2771       switch (bsp->mol) {
2772         case Seq_mol_dna :
2773           return "other DNA";
2774         case Seq_mol_rna :
2775           return "other RNA";
2776         case Seq_mol_na :
2777           break;
2778         default :
2779           break;
2780       }
2781       break;
2782     case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
2783       break;
2784     case MOLECULE_TYPE_CRNA :
2785       return "viral cRNA";
2786       break;
2787     case MOLECULE_TYPE_SNORNA :
2788       return "transcribed RNA";
2789       break;
2790     case MOLECULE_TYPE_TRANSCRIBED_RNA :
2791       return "transcribed RNA";
2792       break;
2793     case MOLECULE_TYPE_NCRNA :
2794       return "transcribed RNA";
2795       break;
2796     case MOLECULE_TYPE_TMRNA :
2797       return "transcribed RNA";
2798       break;
2799     case 255 :
2800       switch (bsp->mol) {
2801         case Seq_mol_dna :
2802           return "other DNA";
2803         case Seq_mol_rna :
2804           return "other RNA";
2805         case Seq_mol_na :
2806           break;
2807         default :
2808           break;
2809       }
2810       break;
2811     default :
2812       break;
2813   }
2814 
2815   return NULL;
2816 }
2817 
2818 static ValNodePtr ParsePCRPrimerString (
2819   QualValPtr qvp
2820 )
2821 
2822 {
2823   CharPtr       fwd_primer_seq = NULL;
2824   CharPtr       rev_primer_seq = NULL;
2825   CharPtr       fwd_primer_name = NULL;
2826   CharPtr       rev_primer_name = NULL;
2827   SubSourcePtr  ssp;
2828 
2829   if (qvp == NULL) return NULL;
2830 
2831   ssp = qvp [SCQUAL_fwd_primer_seq].ssp;
2832   if (ssp != NULL) {
2833     fwd_primer_seq = ssp->name;
2834   }
2835   ssp = qvp [SCQUAL_rev_primer_seq].ssp;
2836   if (ssp != NULL) {
2837     rev_primer_seq = ssp->name;
2838   }
2839   ssp = qvp [SCQUAL_fwd_primer_name].ssp;
2840   if (ssp != NULL) {
2841     fwd_primer_name = ssp->name;
2842   }
2843   ssp = qvp [SCQUAL_rev_primer_name].ssp;
2844   if (ssp != NULL) {
2845     rev_primer_name = ssp->name;
2846   }
2847 
2848   return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
2849 }
2850 
2851 static ValNodePtr ParseColonString (
2852   CharPtr strs,
2853   Boolean multiple
2854 )
2855 
2856 {
2857   ValNodePtr  head = NULL;
2858   size_t      len;
2859   CharPtr     ptr, str, tmp;
2860 
2861   if (StringHasNoText (strs)) return NULL;
2862 
2863   tmp = StringSave (strs);
2864   str = tmp;
2865   len = StringLen (str);
2866   if (len > 1 && StringChr (str, ':') != NULL /* && multiple */) {
2867     while (StringDoesHaveText (str)) {
2868       ptr = StringChr (str, ':');
2869       if (ptr != NULL) {
2870         *ptr = '\0';
2871         ptr++;
2872       }
2873       TrimSpacesAroundString (str);
2874       ValNodeCopyStr (&head, 0, str);
2875       str = ptr;
2876     }
2877   } else {
2878     ValNodeCopyStr (&head, 0, str);
2879   }
2880 
2881   MemFree (tmp);
2882   return head;
2883 }
2884 
2885 static void PrintHalfPrimer (
2886   ValNodePtr PNTR headp,
2887   CharPtr name,
2888   CharPtr seq,
2889   CharPtr nm_label,
2890   CharPtr sq_label,
2891   CharPtr prefix,
2892   Boolean name_only_ok,
2893   Boolean multiple
2894 )
2895 
2896 {
2897   ValNodePtr  name_list, seq_list, name_vnp, seq_vnp;
2898   CharPtr     str;
2899 
2900   name_list = ParseColonString (name, multiple);
2901   seq_list = ParseColonString (seq, multiple);
2902 
2903   name_vnp = name_list;
2904   seq_vnp = seq_list;
2905   if (seq_vnp != NULL) {
2906     while (seq_vnp != NULL) {
2907       if (name_vnp != NULL) {
2908         str = (CharPtr) name_vnp->data.ptrvalue;
2909         if (StringDoesHaveText (str)) {
2910           ValNodeCopyStr (headp, 0, prefix);
2911           ValNodeCopyStr (headp, 0, nm_label);
2912           ValNodeCopyStr (headp, 0, str);
2913           prefix = ", ";
2914         }
2915         name_vnp = name_vnp->next;
2916       }
2917       str = (CharPtr) seq_vnp->data.ptrvalue;
2918       if (StringDoesHaveText (str)) {
2919         ValNodeCopyStr (headp, 0, prefix);
2920         ValNodeCopyStr (headp, 0, sq_label);
2921         ValNodeCopyStr (headp, 0, str);
2922         prefix = ", ";
2923       }
2924       seq_vnp = seq_vnp->next;
2925     }
2926   } else if (name_only_ok) {
2927     while (name_vnp != NULL) {
2928       str = (CharPtr) name_vnp->data.ptrvalue;
2929       if (StringDoesHaveText (str)) {
2930         ValNodeCopyStr (headp, 0, prefix);
2931         ValNodeCopyStr (headp, 0, nm_label);
2932         ValNodeCopyStr (headp, 0, str);
2933         prefix = ", ";
2934       }
2935       name_vnp = name_vnp->next;
2936     }
2937   }
2938 
2939   ValNodeFreeData (name_list);
2940   ValNodeFreeData (seq_list);
2941 }
2942 
2943 static CharPtr NextPCRPrimerString (
2944   PcrSetPtr psp,
2945   Boolean isInNote,
2946   Boolean multiple
2947 )
2948 
2949 {
2950   ValNodePtr  head = NULL, vnp;
2951   CharPtr     prefix = NULL;
2952   CharPtr     str;
2953 
2954   if (psp == NULL) return NULL;
2955 
2956   if (StringHasNoText (psp->fwd_seq) || StringHasNoText (psp->rev_seq)) {
2957     if (isInNote) {
2958       /*
2959       if (StringDoesHaveText (psp->fwd_name)) {
2960         ValNodeCopyStr (&head, 0, prefix);
2961         ValNodeCopyStr (&head, 0, "fwd_name: ");
2962         ValNodeCopyStr (&head, 0, psp->fwd_name);
2963         prefix = ", ";
2964       }
2965 
2966       if (StringDoesHaveText (psp->fwd_seq)) {
2967         ValNodeCopyStr (&head, 0, prefix);
2968         ValNodeCopyStr (&head, 0, "fwd_seq: ");
2969         ValNodeCopyStr (&head, 0, psp->fwd_seq);
2970         prefix = ", ";
2971       }
2972 
2973       if (StringDoesHaveText (psp->rev_name)) {
2974         ValNodeCopyStr (&head, 0, prefix);
2975         ValNodeCopyStr (&head, 0, "rev_name: ");
2976         ValNodeCopyStr (&head, 0, psp->rev_name);
2977         prefix = ", ";
2978       }
2979 
2980       if (StringDoesHaveText (psp->rev_seq)) {
2981         ValNodeCopyStr (&head, 0, prefix);
2982         ValNodeCopyStr (&head, 0, "rev_seq: ");
2983         ValNodeCopyStr (&head, 0, psp->rev_seq);
2984         prefix = ", ";
2985       }
2986       */
2987       PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
2988       if (head != NULL) {
2989         prefix = ", ";
2990       }
2991       PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
2992     } else {
2993       return StringSave ("");
2994     }
2995   } else {
2996     if (isInNote) return StringSave ("");
2997 
2998     PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
2999     PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3000   }
3001 
3002   if (head != NULL && isInNote) {
3003     vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3004     if (vnp != NULL) {
3005       vnp->next = head;
3006       head = vnp;
3007     }
3008   }
3009 
3010   str = MergeFFValNodeStrs (head);
3011   ValNodeFreeData (head);
3012   return str;
3013 }
3014 
3015 static void PrintHalfReaction (
3016   ValNodePtr PNTR headp,
3017   PCRPrimerPtr primers,
3018   CharPtr nm_label,
3019   CharPtr sq_label,
3020   CharPtr prefix,
3021   Boolean name_only_ok,
3022   Boolean multiple
3023 )
3024 
3025 {
3026   PCRPrimerPtr  ppp;
3027 
3028   for (ppp = primers; ppp != NULL; ppp = ppp->next) {
3029     if (StringDoesHaveText (ppp->seq)) {
3030       if (StringDoesHaveText (ppp->name)) {
3031         ValNodeCopyStr (headp, 0, prefix);
3032         ValNodeCopyStr (headp, 0, nm_label);
3033         ValNodeCopyStr (headp, 0, ppp->name);
3034         prefix = ", ";
3035       }
3036       ValNodeCopyStr (headp, 0, prefix);
3037       ValNodeCopyStr (headp, 0, sq_label);
3038       ValNodeCopyStr (headp, 0, ppp->seq);
3039       prefix = ", ";
3040     } else if (name_only_ok) {
3041       if (StringDoesHaveText (ppp->name)) {
3042         ValNodeCopyStr (headp, 0, prefix);
3043         ValNodeCopyStr (headp, 0, nm_label);
3044         ValNodeCopyStr (headp, 0, ppp->name);
3045         prefix = ", ";
3046       }
3047     }
3048   }
3049 }
3050 
3051 static CharPtr NextPCRReaction (
3052   PCRReactionPtr prp,
3053   Boolean isInNote,
3054   Boolean multiple
3055 )
3056 
3057 {
3058   Boolean       has_fwd_seq = FALSE, has_rev_seq = FALSE;
3059   ValNodePtr    head = NULL, vnp;
3060   PCRPrimerPtr  ppp;
3061   CharPtr       prefix = NULL, str;
3062 
3063   if (prp == NULL) return NULL;
3064 
3065   for (ppp = prp->forward; ppp != NULL; ppp = ppp->next) {
3066     if (StringDoesHaveText (ppp->seq)) {
3067       has_fwd_seq = TRUE;
3068     }
3069   }
3070 
3071   for (ppp = prp->reverse; ppp != NULL; ppp = ppp->next) {
3072     if (StringDoesHaveText (ppp->seq)) {
3073       has_rev_seq = TRUE;
3074     }
3075   }
3076 
3077   if (has_fwd_seq && has_rev_seq) {
3078     if (isInNote) {
3079       return StringSave ("");
3080     } else {
3081       PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
3082       PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3083     }
3084   } else {
3085     if (isInNote) {
3086       PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
3087       if (head != NULL) {
3088         prefix = ", ";
3089       }
3090       PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
3091     } else {
3092       return StringSave ("");
3093     }
3094   }
3095 
3096   if (head != NULL && isInNote) {
3097     vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3098     if (vnp != NULL) {
3099       vnp->next = head;
3100       head = vnp;
3101     }
3102   }
3103 
3104   str = MergeFFValNodeStrs (head);
3105   ValNodeFreeData (head);
3106   return str;
3107 }
3108 
3109 /* specimen_voucher, culture_collection, bio_material hyperlinks */
3110 
3111 #define s_atcc_base "http://www.atcc.org/SearchCatalogs/linkin?id="
3112 #define s_bcrc_base "http://strain.bcrc.firdi.org.tw/BSAS/controller?event=SEARCH&bcrc_no="
3113 #define s_ccmp_base "http://ccmp.bigelow.org/SD/display.php?strain=CCMP"
3114 #define s_ccug_base "http://www.ccug.se/default.cfm?page=search_record.cfm&db=mc&s_tests=1&ccugno="
3115 #define s_dsmz_base "http://www.dsmz.de/microorganisms/search_no.php?q="
3116 #define s_fsu_base  "http://www.prz.uni-jena.de/data.php?fsu="
3117 #define s_icmp_base "http://nzfungi.landcareresearch.co.nz/icmp/results_cultures.asp?ID=&icmpVAR="
3118 #define s_ku_base   "http://collections.nhm.ku.edu/"
3119 #define s_pcc_base  "http://www.pasteur.fr/recherche/banques/PCC/docs/pcc"
3120 #define s_pcmb_base "http://www2.bishopmuseum.org/HBS/PCMB/results3.asp?searchterm3="
3121 #define s_pdd_base  "http://nzfungi.landcareresearch.co.nz/html/data_collections_details.asp?CID="
3122 #define s_tgrc_base "http://tgrc.ucdavis.edu/Data/Acc/AccDetail.aspx?AccessionNum="
3123 #define s_uam_base  "http://arctos.database.museum/guid/"
3124 
3125 #define s_colon_pfx ":"
3126 
3127 #define s_kui_pfx   "KU_Fish/detail.jsp?record="
3128 #define s_kuit_pfx  "KU_Tissue/detail.jsp?record="
3129 
3130 #define s_bcrc_sfx  "&type_id=6&keyword=;;"
3131 #define s_pcc_sfx   ".htm"
3132 
3133 #define s_atcc_inst  "American Type Culture Collection"
3134 #define s_bcrc_inst  "Bioresource Collection and Research Center"
3135 #define s_ccmp_inst  "Provasoli-Guillard National Center for Culture of Marine Phytoplankton"
3136 #define s_ccug_inst  "Culture Collection, University of Goteborg, Department of Clinical Bacteriology"
3137 #define s_crcm_inst  "Charles R. Conner Museum, Washington State University"
3138 #define s_dgr_inst   "Division of Genomic Resources, University of New Mexico"
3139 #define s_dsmz_inst  "German Resource Center for Biological Material"
3140 #define s_fsu_inst   "Fungal Reference Center, University of Jena"
3141 #define s_icmp_inst  "International Collection of Microorganisms from Plants"
3142 #define s_ku_inst    "University of Kansas, Museum of Natural History"
3143 #define s_kwp_inst   "Kenelm W. Philip Collection, University of Alaska Museum of the North"
3144 #define s_nzfh_inst  "New Zealand Fungal Herbarium"
3145 #define s_msb_inst   "Museum of Southwestern Biology, University of New Mexico"
3146 #define s_mvz_inst   "Museum of Vertebrate Zoology, University of California"
3147 #define s_nbsb_inst  "National Biomonitoring Specimen Bank, U.S. Geological Survey"
3148 #define s_pcc_inst   "Pasteur Culture Collection of Cyanobacteria"
3149 #define s_pcmb_inst  "Pacific Center for Molecular Biodiversity"
3150 #define s_psu_inst   "Portland State University"
3151 #define s_tgrc_inst  "Tomato Genetics Resource Center, University of California"
3152 #define s_uam_inst   "University of Alaska Museum of the North"
3153 #define s_wmnu_inst  "Western New Mexico University Museum"
3154 
3155 typedef struct vouch {
3156   CharPtr  sites;
3157   CharPtr  links;
3158   Boolean  prepend_institute;
3159   CharPtr  prefix;
3160   CharPtr  suffix;
3161   CharPtr  mouseover;
3162 } VouchData, PNTR VouchDataPtr;
3163 
3164 static VouchData Nlm_spec_vouchers [] = {
3165  { "ATCC",        s_atcc_base, FALSE, NULL,         NULL,       s_atcc_inst  },
3166  { "BCRC",        s_bcrc_base, FALSE, NULL,         s_bcrc_sfx, s_bcrc_inst  },
3167  { "CCMP",        s_ccmp_base, FALSE, NULL,         NULL,       s_ccmp_inst  },
3168  { "CCUG",        s_ccug_base, FALSE, NULL,         NULL,       s_ccug_inst  },
3169  { "CRCM:Bird",   s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_crcm_inst  },
3170  { "DGR:Bird",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_dgr_inst   },
3171  { "DGR:Ento",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_dgr_inst   },
3172  { "DGR:Fish",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_dgr_inst   },
3173  { "DGR:Herp",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_dgr_inst   },
3174  { "DGR:Mamm",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_dgr_inst   },
3175  { "DSM",         s_dsmz_base, FALSE, NULL,         NULL,       s_dsmz_inst  },
3176  { "FSU<DEU>",    s_fsu_base,  FALSE, NULL,         NULL,       s_fsu_inst   },
3177  { "ICMP",        s_icmp_base, FALSE, NULL,         NULL,       s_icmp_inst  },
3178  { "KU:I",        s_ku_base,   FALSE, s_kui_pfx,    NULL,       s_ku_inst    },
3179  { "KU:IT",       s_ku_base,   FALSE, s_kuit_pfx,   NULL,       s_ku_inst    },
3180  { "KWP:Ento",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_kwp_inst   },
3181  { "MSB:Bird",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_msb_inst   },
3182  { "MSB:Mamm",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_msb_inst   },
3183  { "MSB:Para",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_msb_inst   },
3184  { "MVZ:Bird",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3185  { "MVZ:Egg",     s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3186  { "MVZ:Herp",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3187  { "MVZ:Hild",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3188  { "MVZ:Img",     s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3189  { "MVZ:Mamm",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3190  { "MVZ:Page",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3191  { "MVZObs:Herp", s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_mvz_inst   },
3192  { "NBSB:Bird",   s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_nbsb_inst  },
3193  { "PCC",         s_pcc_base,  FALSE, NULL,         s_pcc_sfx,  s_pcc_inst   },
3194  { "PCMB",        s_pcmb_base, FALSE, NULL,         NULL,       s_pcmb_inst  },
3195  { "PDD",         s_pdd_base,  FALSE, NULL,         NULL,       s_nzfh_inst  },
3196  { "TGRC",        s_tgrc_base, FALSE, NULL,         NULL,       s_tgrc_inst  },
3197  { "PSU:Mamm",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_psu_inst   },
3198  { "UAM:Bird",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3199  { "UAM:Bryo",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3200  { "UAM:Crus",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3201  { "UAM:Ento",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3202  { "UAM:Fish",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3203  { "UAM:Herb",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3204  { "UAM:Herp",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3205  { "UAM:Mamm",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3206  { "UAM:Moll",    s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3207  { "UAM:Paleo",   s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3208  { "UAMObs:Mamm", s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_uam_inst   },
3209  { "WNMU:Bird",   s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_wmnu_inst  },
3210  { "WNMU:Fish",   s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_wmnu_inst  },
3211  { "WNMU:Mamm",   s_uam_base,  TRUE,  s_colon_pfx,  NULL,       s_wmnu_inst  },
3212  { NULL,          NULL,        FALSE, NULL,         NULL,       NULL         }
3213 };
3214 
3215 static Int2 VoucherNameIsValid (
3216   CharPtr name
3217 )
3218 
3219 {
3220   Int2     L, R, mid;
3221   CharPtr  ptr;
3222   Char     str [256];
3223 
3224   if (StringHasNoText (name)) return -1;
3225   StringNCpy_0 (str, name, sizeof (str));
3226   ptr = StringChr (str, ' ');
3227   if (ptr != NULL) {
3228     *ptr = '\0';
3229   }
3230 
3231   L = 0;
3232   R = sizeof (Nlm_spec_vouchers) / sizeof (Nlm_spec_vouchers [0]) - 1; /* -1 because now NULL terminated */
3233 
3234   while (L < R) {
3235     mid = (L + R) / 2;
3236     if (StringICmp (Nlm_spec_vouchers [mid].sites, str) < 0) {
3237       L = mid + 1;
3238     } else {
3239       R = mid;
3240     }
3241   }
3242 
3243   /* case sensitive comparison at end enforces strictness */
3244 
3245   if (StringCmp (Nlm_spec_vouchers [R].sites, str) == 0) {
3246     return R;
3247   }
3248 
3249   return -1;
3250 }
3251 
3252 /* works on subname copy that it can change */
3253 
3254 static Boolean ParseSecVoucher (
3255   CharPtr subname,
3256   CharPtr PNTR inst,
3257   CharPtr PNTR id
3258 )
3259 
3260 {
3261   CharPtr  ptr;
3262   CharPtr  tmp;
3263 
3264   if (StringHasNoText (subname)) return FALSE;
3265   if (StringLen (subname) < 5) return FALSE;
3266   TrimSpacesAroundString (subname);
3267 
3268   ptr = StringChr (subname, ':');
3269   if (ptr == NULL) return FALSE;
3270 
3271   *inst = subname;
3272 
3273   tmp = StringChr (ptr + 1, ':');
3274   if (tmp != NULL) {
3275     *tmp = '\0';
3276     tmp++;
3277     TrimSpacesAroundString (tmp);
3278     *id = tmp;
3279   } else {
3280     *ptr = '\0';
3281     ptr++;
3282     TrimSpacesAroundString (ptr);
3283     *id = ptr;
3284   }
3285 
3286   if (StringHasNoText (*inst) || StringHasNoText (*id)) return FALSE;
3287 
3288   return TRUE;
3289 }
3290 
3291 static void Do_www_specimen_voucher (
3292   StringItemPtr ffstring,
3293   CharPtr inst,
3294   CharPtr id,
3295   VouchDataPtr vdp
3296 )
3297 
3298 {
3299   if ( ffstring == NULL || inst == NULL || id == NULL || vdp == NULL || vdp->links == NULL ) return;
3300 
3301   FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
3302   FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3303   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3304   FFAddOneString (ffstring, vdp->links, FALSE, FALSE, TILDE_IGNORE);
3305   if (vdp->prepend_institute) {
3306     FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
3307   }
3308   if (vdp->prefix != NULL) {
3309     FFAddOneString (ffstring, vdp->prefix, FALSE, FALSE, TILDE_IGNORE);
3310   }
3311   FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
3312   if (vdp->suffix != NULL) {
3313     FFAddOneString (ffstring, vdp->suffix, FALSE, FALSE, TILDE_IGNORE);
3314   }
3315   FFAddOneString(ffstring, "\"", FALSE, FALSE, TILDE_IGNORE);
3316   if (vdp->mouseover != NULL) {
3317     FFAddTextToString (ffstring, " title=\"", vdp->mouseover, "\"",
3318                        FALSE, FALSE, TILDE_IGNORE);
3319   }
3320   FFAddTextToString (ffstring, ">", id, "</a>", FALSE, FALSE, TILDE_IGNORE);
3321 }
3322 
3323 NLM_EXTERN void FF_www_specimen_voucher (
3324   IntAsn2gbJobPtr ajp,
3325   StringItemPtr ffstring,
3326   CharPtr subname
3327 )
3328 
3329 {
3330   Char          buf [512];
3331   CharPtr       inst = NULL, id = NULL;
3332   Int2          R;
3333   VouchDataPtr  vdp;
3334 
3335   if ( ffstring == NULL || subname == NULL ) return;
3336   if (! GetWWW (ajp)) { /* not in www mode */
3337     FFAddTextToString(ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3338     return;
3339   }
3340   StringNCpy_0 (buf, subname, sizeof (buf));
3341   if (! ParseSecVoucher (buf, &inst, &id)) {
3342     FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3343     return;
3344   }
3345   R = VoucherNameIsValid (inst);
3346   if (R < 0) {
3347     FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3348     return;
3349   }
3350   vdp = &(Nlm_spec_vouchers [R]);
3351   if (vdp == NULL || vdp->links == NULL) {
3352     FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3353     return;
3354   }
3355   Do_www_specimen_voucher (ffstring, inst, id, vdp);
3356 }
3357 
3358 static void Do_www_lat_lon (
3359   StringItemPtr ffstring,
3360   CharPtr lat_lon
3361 )
3362 
3363 {
3364   Char     buf [128];
3365   Char     ch;
3366   CharPtr  ew = "";
3367   Int2     i;
3368   CharPtr  ns = "";
3369   CharPtr  ptr;
3370   Char     tmp [128];
3371   CharPtr  tokens [6];
3372 
3373   if ( ffstring == NULL || lat_lon == NULL ) return;
3374 
3375   MemSet ((Pointer) tokens, 0, sizeof (tokens));
3376 
3377   StringNCpy_0 (buf, lat_lon, sizeof (buf));
3378 
3379   i = 0;
3380   ptr = buf;
3381   ch = *ptr;
3382   tokens [i] = ptr;
3383   while (ch != '\0' && i < 5) {
3384     if (ch == ' ') {
3385       *ptr = '\0';
3386       ptr++;
3387       ch = *ptr;
3388       while (ch == ' ') {
3389         ptr++;
3390         ch = *ptr;
3391       }
3392       i++;
3393       tokens [i] = ptr;
3394     } else {
3395       ptr++;
3396       ch = *ptr;
3397     }
3398   }
3399 
3400   ptr = tokens [1];
3401   if (ptr != NULL && *ptr == 'S') {
3402     ns = "-";
3403   }
3404   ptr = tokens [3];
3405   if (ptr != NULL && *ptr == 'W') {
3406     ew = "-";
3407   }
3408 
3409   if (tokens [0] == NULL) {
3410     tokens [0] = "?";
3411   }
3412   if (tokens [2] == NULL) {
3413     tokens [2] = "?";
3414   }
3415 
3416   FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3417   FF_Add_NCBI_Base_URL (ffstring, link_lat_lon);
3418   sprintf (tmp, "lat=%s%s&lon=%s%s", ns, tokens [0], ew, tokens [2]);
3419   FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
3420   FFAddTextToString (ffstring, "\">", lat_lon, "</a>", FALSE, FALSE, TILDE_IGNORE);
3421 }
3422 
3423 static void FF_www_lat_lon (
3424   IntAsn2gbJobPtr ajp,
3425   StringItemPtr ffstring,
3426   CharPtr lat_lon
3427 )
3428 
3429 {
3430   Boolean  format_ok = FALSE;
3431   FloatHi  lat = 0.0;
3432   FloatHi  lon = 0.0;
3433   Boolean  lat_in_range = FALSE;
3434   Boolean  lon_in_range = FALSE;
3435 
3436   if ( ffstring == NULL || lat_lon == NULL ) return;
3437   if (! GetWWW (ajp)) { /* not in www mode */
3438     FFAddTextToString(ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3439     return;
3440   }
3441   if (StringDoesHaveText (lat_lon)) {
3442     IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range);
3443     if (format_ok && lat_in_range && lon_in_range) {
3444       if (ParseLatLon (lat_lon, &lat, &lon)) {
3445         Do_www_lat_lon (ffstring, lat_lon);
3446         return;
3447       }
3448     }
3449   }
3450 
3451   /* if any of above tests failed, default print */
3452   FFAddTextToString (ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3453 }
3454 
3455 NLM_EXTERN CharPtr FormatSourceFeatBlock (
3456   Asn2gbFormatPtr afp,
3457   BaseBlockPtr bbp
3458 )
3459 
3460 {
3461   Boolean            add_period;
3462   IntAsn2gbJobPtr    ajp;
3463   Asn2gbSectPtr      asp;
3464   BioSourcePtr       biop = NULL;
3465   BioseqPtr          bsp;
3466   BioseqSetPtr       bssp;
3467   Char               buf [80];
3468   CharPtr            common = NULL;
3469   DbtagPtr           dbt;
3470   SeqMgrDescContext  dcontext;
3471   SeqMgrFeatContext  fcontext;
3472   GBFeaturePtr       gbfeat = NULL;
3473   GBSeqPtr           gbseq;
3474   Int2               i;
3475   Uint1              idx;
3476   IntSrcBlockPtr     isp;
3477   Boolean            is_desc = TRUE;
3478   Boolean            is_gps = FALSE;
3479   Boolean            is_other = FALSE;
3480   Boolean            is_est_or_gss = FALSE;
3481   Boolean            is_bc;
3482   Boolean            is_rf;
3483   Boolean            is_sc;
3484   Int2               j;
3485   Uint1              jdx;
3486   Uint1              lastomptype;
3487   Uint1              lastssptype;
3488   SeqLocPtr          location = NULL;
3489   MolInfoPtr         mip;
3490   CharPtr            notestr;
3491   SourceType PNTR    notetbl = NULL;
3492   Boolean            okay;
3493   ObjectIdPtr        oip;
3494   OrgModPtr          omp;
3495   OrgNamePtr         onp = NULL;
3496   OrgRefPtr          orp = NULL;
3497   Boolean            partial5;
3498   Boolean            partial3;
3499   CharPtr            prefix;
3500   PCRReactionPtr     prp;
3501   ValNodePtr         pset;
3502   PcrSetPtr          psp;
3503   SourceType PNTR    qualtbl = NULL;
3504   QualValPtr         qvp;
3505   SeqDescrPtr        sdp;
3506   SeqEntryPtr        sep;
3507   SeqFeatPtr         sfp = NULL;
3508   SeqIdPtr           sip;
3509   SubSourcePtr       ssp;
3510   CharPtr            str;
3511   BioseqPtr          target;
3512   CharPtr            taxname = NULL;
3513   ValNodePtr         vnp;
3514   StringItemPtr      ffstring, unique;
3515 
3516   if (afp == NULL || bbp == NULL) return NULL;
3517   ajp = afp->ajp;
3518   if (ajp == NULL) return NULL;
3519   asp = afp->asp;
3520   if (asp == NULL) return NULL;
3521   target = asp->target;
3522   bsp = asp->bsp;
3523   if (target == NULL || bsp == NULL) return NULL;
3524   qvp = afp->qvp;
3525   if (qvp == NULL) return NULL;
3526 
3527   if (ajp->gbseq) {
3528     gbseq = &asp->gbseq;
3529   } else {
3530     gbseq = NULL;
3531   }
3532 
3533   /* five-column feature table uses special code for formatting */
3534 
3535   if (ajp->format == FTABLE_FMT) {
3536     str = FormatFtableSourceFeatBlock (bbp, target);
3537     return str;
3538   }
3539 
3540   /* otherwise do regular flatfile formatting */
3541 
3542   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
3543 
3544   isp = (IntSrcBlockPtr) bbp;
3545 
3546   /* could be descriptor or feature */
3547 
3548   if (bbp->itemtype == OBJ_SEQDESC) {
3549     sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
3550     if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
3551       biop = (BioSourcePtr) sdp->data.ptrvalue;
3552     }
3553   } else if (bbp->itemtype == OBJ_SEQFEAT) {
3554     sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
3555     if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
3556       biop = (BioSourcePtr) sfp->data.value.ptrvalue;
3557     }
3558     is_desc = FALSE;
3559   }
3560 
3561   if (biop == NULL) return NULL;
3562 
3563   unique = FFGetString(ajp);
3564   if ( unique == NULL ) return NULL;
3565 
3566   ffstring = FFGetString(ajp);
3567   if ( ffstring == NULL ) return NULL;
3568 
3569   FFStartPrint (ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE);
3570   FFAddOneString (ffstring, "source", FALSE, FALSE, TILDE_IGNORE);
3571   FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE);
3572 
3573   if (gbseq != NULL) {
3574     gbfeat = GBFeatureNew ();
3575     if (gbfeat != NULL) {
3576       gbfeat->key = StringSave ("source");
3577     }
3578   }
3579 
3580   location = isp->loc;
3581 
3582   str = FFFlatLoc (ajp, bsp, location, ajp->masterStyle);
3583   if ( GetWWW(ajp) ) {
3584     FF_www_featloc (ffstring, str);
3585   } else {
3586     FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3587   }
3588   FFAddOneChar(ffstring, '\n', FALSE);
3589 
3590   if (gbseq != NULL) {
3591     if (gbfeat != NULL) {
3592       if (StringDoesHaveText (str)) {
3593         gbfeat->location = StringSave (str);
3594       } else {
3595         gbfeat->location = StringSave ("");
3596       }
3597       if (StringDoesHaveText (str)) {
3598         if (StringStr (str, "join") != NULL) {
3599           gbfeat->operator__ = StringSave ("join");
3600         } else if (StringStr (str, "order") != NULL) {
3601           gbfeat->operator__ = StringSave ("order");
3602         }
3603       }
3604       CheckSeqLocForPartial (location, &partial5, &partial3);
3605       gbfeat->partial5 = partial5;
3606       gbfeat->partial3 = partial3;
3607       if (ajp->masterStyle) {
3608         AddIntervalsToGbfeat (gbfeat, location, bsp);
3609       } else {
3610         AddIntervalsToGbfeat (gbfeat, location, NULL);
3611       }
3612     }
3613   }
3614 
3615   MemFree (str);
3616 
3617   orp = biop->org;
3618   if (orp != NULL) {
3619     taxname = orp->taxname;
3620     /* common = orp->common; */
3621   }
3622   if (StringHasNoText (taxname)) {
3623     if (ajp->flags.needOrganismQual) {
3624       taxname = "unknown";
3625       if (orp != NULL) {
3626         common = orp->common;
3627       }
3628 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
3629     } else {
3630       taxname = "unknown";
3631       common = orp->common;
3632 #endif
3633     }
3634   }
3635 
3636   sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3637   if (sep != NULL && IS_Bioseq_set (sep)) {
3638     bssp = (BioseqSetPtr) sep->data.ptrvalue;
3639     if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3640       is_gps = TRUE;
3641     }
3642   }
3643 
3644   if (bsp != NULL) {
3645     for (sip = bsp->id; sip != NULL; sip = sip->next) {
3646       if (sip->choice == SEQID_OTHER) {
3647         is_other = TRUE;
3648       }
3649     }
3650   }
3651  
3652   if (ajp->refseqConventions) {
3653     is_other = TRUE;
3654   }
3655 
3656   /* populate qualifier table from biosource fields */
3657 
3658   qvp [SCQUAL_organism].str = taxname;
3659   qvp [SCQUAL_common_name].str = common;
3660 
3661   if (biop->is_focus) {
3662     qvp [SCQUAL_focus].ble = TRUE;
3663   }
3664 
3665   str = GetMolTypeQual (bsp);
3666   /*
3667   if (StringICmp (str, "ncRNA") == 0) {
3668     str = "other RNA";
3669   }
3670   */
3671   if (str == NULL) {
3672     switch (bsp->mol) {
3673       case Seq_mol_dna :
3674         str = "unassigned DNA";
3675         break;
3676       case Seq_mol_rna :
3677         str = "unassigned RNA";
3678         break;
3679       case Seq_mol_aa :
3680         break;
3681       default :
3682         str = "unassigned DNA";
3683         break;
3684     }
3685   }
3686   qvp [SCQUAL_mol_type].str = str;
3687 
3688   SubSourceToQualArray (biop->subtype, qvp);
3689 
3690   if (orp != NULL) {
3691     onp = orp->orgname;
3692     if (onp != NULL) {
3693       OrgModToQualArray (onp->mod, qvp);
3694     }
3695 
3696     if (! is_desc) {
3697       qvp [SCQUAL_unstructured].vnp = orp->mod;
3698     }
3699     qvp [SCQUAL_db_xref].vnp = orp->db;
3700   }
3701 
3702   if (sfp != NULL) {
3703     qvp [SCQUAL_org_xref].vnp = sfp->dbxref;
3704   }
3705 
3706   /* organelle currently prints /mitochondrion, /virion, etc. */
3707 
3708   qvp [SCQUAL_organelle].num = biop->genome;
3709 
3710   /* some qualifiers are flags in genome and names in subsource, print once with name */
3711 
3712   if (qvp [SCQUAL_ins_seq_name].ssp != NULL &&
3713       qvp [SCQUAL_organelle].num == GENOME_insertion_seq) {
3714     qvp [SCQUAL_organelle].num = 0;
3715   }
3716   if (qvp [SCQUAL_plasmid_name].ssp != NULL &&
3717       qvp [SCQUAL_organelle].num == GENOME_plasmid) {
3718     qvp [SCQUAL_organelle].num = 0;
3719   }
3720   /* AF095904.1
3721   if (qvp [SCQUAL_plastid_name].ssp != NULL &&
3722       qvp [SCQUAL_organelle].num == GENOME_plastid) {
3723     qvp [SCQUAL_organelle].num = 0;
3724   }
3725   */
3726   if (qvp [SCQUAL_transposon_name].ssp != NULL &&
3727       qvp [SCQUAL_organelle].num == GENOME_transposon) {
3728     qvp [SCQUAL_organelle].num = 0;
3729   }
3730 
3731   if (sfp != NULL) {
3732     qvp [SCQUAL_seqfeat_note].str = sfp->comment;
3733   }
3734 
3735   if (qvp [SCQUAL_fwd_primer_name].ssp != NULL ||
3736       qvp [SCQUAL_fwd_primer_seq].ssp != NULL ||
3737       qvp [SCQUAL_rev_primer_name].ssp != NULL ||
3738       qvp [SCQUAL_rev_primer_seq].ssp != NULL) {
3739     qvp [SCQUAL_PCR_primers].ble = TRUE;
3740     qvp [SCQUAL_PCR_primer_note].ble = TRUE;
3741   }
3742 
3743   if (biop->pcr_primers != NULL) {
3744     qvp [SCQUAL_PCR_reaction].prp = biop->pcr_primers;
3745   }
3746 
3747   if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
3748     /* leave metagenome_source as a separate qualifier */
3749   } else {
3750     /* move metagenome_source to note */
3751     qvp [SCQUAL_metagenome_note].omp = qvp [SCQUAL_metagenome_source].omp;
3752     qvp [SCQUAL_metagenome_source].omp = NULL;
3753   }
3754 
3755 #if 0
3756   if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
3757     /* leave mating_type as a separate qualifier */
3758   } else if (qvp [SCQUAL_sex].ssp == NULL &&  qvp [SCQUAL_mating_type].ssp != NULL) {
3759     /* move mating_type to sex if available */
3760     qvp [SCQUAL_sex].ssp = qvp [SCQUAL_mating_type].ssp;
3761     qvp [SCQUAL_mating_type].ssp = NULL;
3762   }
3763 #endif
3764 
3765   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
3766   if (sdp != NULL) {
3767     mip = (MolInfoPtr) sdp->data.ptrvalue;
3768     if (mip != NULL) {
3769       if (mip->tech == MI_TECH_est || mip->tech == MI_TECH_survey) {
3770         is_est_or_gss = TRUE;
3771       }
3772     }
3773   }
3774 
3775   /* now print qualifiers from table */
3776 
3777   qualtbl = source_qual_order;
3778   if (is_desc) {
3779     notetbl = source_desc_note_order;
3780   } else {
3781     notetbl = source_feat_note_order;
3782   }
3783 
3784   for (i = 0, idx = qualtbl [i]; idx != 0; i++, idx = qualtbl [i]) {
3785 
3786     lastomptype = 0;
3787     lastssptype = 0;
3788     switch (asn2gnbk_source_quals [idx].qualclass) {
3789 
3790       case Qual_class_ignore :
3791         break;
3792 
3793       case Qual_class_string :
3794         if (! StringHasNoText (qvp [idx].str)) {
3795           FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=", 
3796                             FALSE, FALSE, TILDE_IGNORE);
3797           FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"", 
3798                             FALSE, FALSE, TILDE_TO_SPACES);
3799           FFAddOneChar(ffstring, '\n', FALSE);
3800         }
3801         break;
3802 
3803       case Qual_class_boolean :
3804         if (qvp [idx].ble) {
3805           FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
3806                             FALSE, TRUE, TILDE_IGNORE);
3807         }
3808         break;
3809 
3810       case Qual_class_organelle :
3811         j = (Int2) qvp [idx].num;
3812         if (j < sizeof (organelleQual) / sizeof (CharPtr)) {
3813           if (organelleQual [j] != NULL) {
3814             FFAddTextToString(ffstring, NULL, organelleQual[j], "\n",
3815                               FALSE, FALSE, TILDE_IGNORE);
3816           }
3817         }
3818         break;
3819 
3820       case Qual_class_orgmod :
3821         omp = qvp [idx].omp;
3822         if (lastomptype == 0 && omp != NULL) {
3823           lastomptype = omp->subtype;
3824         }
3825         while (omp != NULL && omp->subtype == lastomptype) {
3826           if (StringIsJustQuotes (omp->subname)) {
3827             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3828                               FALSE, TRUE, TILDE_IGNORE);
3829           } else if (! StringHasNoText (omp->subname)) {
3830             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3831                               FALSE, TRUE, TILDE_IGNORE);
3832             FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
3833                               FALSE, TRUE, TILDE_TO_SPACES);
3834           }
3835           omp = omp->next;
3836         }
3837         break;
3838 
3839       case Qual_class_voucher :
3840         omp = qvp [idx].omp;
3841         if (lastomptype == 0 && omp != NULL) {
3842           lastomptype = omp->subtype;
3843         }
3844         while (omp != NULL && omp->subtype == lastomptype) {
3845           if (StringIsJustQuotes (omp->subname)) {
3846             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3847                               FALSE, TRUE, TILDE_IGNORE);
3848           } else if (! StringHasNoText (omp->subname)) {
3849             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
3850                               FALSE, TRUE, TILDE_IGNORE);
3851             FF_www_specimen_voucher(ajp, ffstring, omp->subname);
3852             FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
3853           }
3854           omp = omp->next;
3855         }
3856         break;
3857 
3858       case Qual_class_lat_lon :
3859         omp = qvp [idx].omp;
3860         if (lastomptype == 0 && omp != NULL) {
3861           lastomptype = omp->subtype;
3862         }
3863         while (omp != NULL && omp->subtype == lastomptype) {
3864           if (StringIsJustQuotes (omp->subname)) {
3865             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3866                               FALSE, TRUE, TILDE_IGNORE);
3867           } else if (! StringHasNoText (omp->subname)) {
3868             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
3869                               FALSE, TRUE, TILDE_IGNORE);
3870             FF_www_lat_lon(ajp, ffstring, omp->subname);
3871             FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
3872           }
3873           omp = omp->next;
3874         }
3875         break;
3876 
3877       case Qual_class_subsource :
3878         ssp = qvp [idx].ssp;
3879         if (lastssptype == 0 && ssp != NULL) {
3880           lastssptype = ssp->subtype;
3881         }
3882         while (ssp != NULL && ssp->subtype == lastssptype) {
3883           if (ssp->subtype == SUBSRC_germline ||
3884               ssp->subtype == SUBSRC_rearranged ||
3885               ssp->subtype == SUBSRC_transgenic ||
3886               ssp->subtype == SUBSRC_environmental_sample ||
3887               ssp->subtype == SUBSRC_metagenomic) {
3888             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
3889                               FALSE, TRUE, TILDE_TO_SPACES);
3890           } else if (StringIsJustQuotes (ssp->name)) {
3891             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3892                               FALSE, TRUE, TILDE_IGNORE);
3893           } else if (! StringHasNoText (ssp->name)) {
3894             FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3895                               FALSE, TRUE, TILDE_IGNORE);
3896             FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
3897                               FALSE, TRUE, TILDE_TO_SPACES);
3898           }
3899           ssp = ssp->next;
3900         }
3901         break;
3902 
3903       case Qual_class_pcr :
3904         if (qvp [idx].ble) {
3905           lastssptype = 0;
3906           pset = ParsePCRPrimerString (qvp);
3907           for (vnp = pset; vnp != NULL; vnp = vnp->next) {
3908             psp = (PcrSetPtr) vnp->data.ptrvalue;
3909             if (psp == NULL) continue;
3910             str = NextPCRPrimerString (psp, FALSE, (Boolean) (pset->next != NULL));
3911             if (str == NULL) continue;
3912             if (! StringHasNoText (str)) {
3913               FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3914                                 FALSE, TRUE, TILDE_IGNORE);
3915               FFAddTextToString(ffstring, "\"", str, "\"\n",
3916                                 FALSE, TRUE, TILDE_TO_SPACES);
3917             }
3918             MemFree (str);
3919           }
3920           FreePCRSet (pset);
3921         }
3922         break;
3923 
3924       case Qual_class_pcr_react :
3925         prp = qvp [idx].prp;
3926         while (prp != NULL) {
3927           str = NextPCRReaction (prp, FALSE, (Boolean) (prp->next != NULL));
3928           if (StringDoesHaveText (str)) {
3929             FFAddTextToString (ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3930                                FALSE, TRUE, TILDE_IGNORE);
3931             FFAddTextToString (ffstring, "\"", str, "\"\n",
3932                                FALSE, TRUE, TILDE_TO_SPACES);
3933           }
3934           MemFree (str);
3935           prp = prp->next;
3936         }
3937         break;
3938 
3939       case Qual_class_pubset :
3940         break;
3941 
3942       case Qual_class_quote :
3943         break;
3944 
3945       case Qual_class_noquote :
3946         break;
3947 
3948       case Qual_class_label :
3949         break;
3950 
3951       case Qual_class_db_xref :
3952         for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
3953           buf [0] = '\0';
3954           dbt = (DbtagPtr) vnp->data.ptrvalue;
3955           if (dbt != NULL && (! StringHasNoText (dbt->db))) {
3956             oip = dbt->tag;
3957             if (oip != NULL) {
3958 
3959               okay = TRUE;
3960               if (ajp->flags.dropBadDbxref) {
3961                 /* if RELEASE_MODE, drop unknown dbtag */
3962 
3963                 okay = FALSE;
3964                 if (DbxrefIsValid (dbt->db, &is_rf, &is_sc, &is_bc, NULL)) {
3965                   if (is_bc) {
3966                     /* case counts, so suppress if bad case */
3967                   } else if (is_rf && (is_other || is_gps)) {
3968                     /* allow refseq dbxrefs in source feature */
3969                     okay = TRUE;
3970                   } else if (is_sc) {
3971                     /* expect it to be in legalSrcDbXrefs list */
3972                     okay = TRUE;
3973                   } else if (is_est_or_gss) {
3974                     /* EST and GSS records only have source feature, so allow anything */
3975                     okay = TRUE;
3976                   } else {
3977                     /* suppress regular dbxrefs, also warn in validator */
3978                   }
3979                 }
3980 
3981                 /*
3982                 okay = FALSE;
3983                 for (j = 0; legalDbXrefs [j] != NULL; j++) {
3984                   if (StringCmp (dbt->db, legalDbXrefs [j]) == 0) {
3985                     okay = TRUE;
3986                   }
3987                 }
3988                 */
3989               }
3990 
3991               if (okay) {
3992                 if (! StringHasNoText (oip->str)) {
3993                   if (StringLen (dbt->db) + StringLen (oip->str) < 80) {
3994                     sprintf (buf, "%s", oip->str);
3995                   }
3996                 } else {
3997                   sprintf (buf, "%ld", (long) oip->id);
3998                 }
3999               }
4000             }
4001           }
4002           if (StringDoesHaveText (buf) && dbt != NULL) {
4003             FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
4004             FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp);
4005             FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4006           }
4007         }
4008         break;
4009 
4010       case Qual_class_illegal :
4011         break;
4012 
4013       case Qual_class_note :
4014         if (! ajp->flags.srcQualsToNote) {
4015 
4016           /* in sequin_mode and dump_mode, all orgmods and subsources show up as separate /qualifiers */
4017 
4018           for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
4019 
4020             lastomptype = 0;
4021             lastssptype = 0;
4022             switch (asn2gnbk_source_quals [jdx].qualclass) {
4023 
4024               case Qual_class_orgmod :
4025                 if (jdx == SCQUAL_orgmod_note) break;
4026                 omp = qvp [jdx].omp;
4027                 if (lastomptype == 0 && omp != NULL) {
4028                   lastomptype = omp->subtype;
4029                 }
4030                 while (omp != NULL && omp->subtype == lastomptype) {
4031                   if (StringIsJustQuotes (omp->subname)) {
4032                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4033                               FALSE, TRUE, TILDE_IGNORE);
4034                   } else if (! StringHasNoText (omp->subname)) {
4035                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
4036                                       FALSE, TRUE, TILDE_IGNORE);
4037                     FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
4038                                       FALSE, TRUE, TILDE_TO_SPACES);
4039                   }
4040                   omp = omp->next;
4041                 }
4042                 break;
4043 
4044               case Qual_class_voucher :
4045                 if (jdx == SCQUAL_orgmod_note) break;
4046                 omp = qvp [jdx].omp;
4047                 if (lastomptype == 0 && omp != NULL) {
4048                   lastomptype = omp->subtype;
4049                 }
4050                 while (omp != NULL && omp->subtype == lastomptype) {
4051                   if (StringIsJustQuotes (omp->subname)) {
4052                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4053                               FALSE, TRUE, TILDE_IGNORE);
4054                   } else if (! StringHasNoText (omp->subname)) {
4055                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"",
4056                                       FALSE, TRUE, TILDE_IGNORE);
4057                     FF_www_specimen_voucher(ajp, ffstring, omp->subname);
4058                     FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4059                   }
4060                   omp = omp->next;
4061                 }
4062                 break;
4063 
4064               case Qual_class_subsource :
4065                 if (jdx == SCQUAL_subsource_note) break;
4066                 ssp = qvp [jdx].ssp;
4067                 if (lastssptype == 0 && ssp != NULL) {
4068                   lastssptype = ssp->subtype;
4069                 }
4070                 while (ssp != NULL && ssp->subtype == lastssptype) {
4071                   if (ssp->subtype == SUBSRC_germline ||
4072                       ssp->subtype == SUBSRC_rearranged ||
4073                       ssp->subtype == SUBSRC_transgenic ||
4074                       ssp->subtype == SUBSRC_environmental_sample ||
4075                       ssp->subtype == SUBSRC_metagenomic) {
4076                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "\n",
4077                                       FALSE, TRUE, TILDE_TO_SPACES);
4078                   } else if (StringIsJustQuotes (ssp->name)) {
4079                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4080                                       FALSE, TRUE, TILDE_IGNORE);
4081 
4082                   } else if (! StringHasNoText (ssp->name)) {
4083                     FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
4084                                       FALSE, TRUE, TILDE_IGNORE);
4085                     FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
4086                                       FALSE, TRUE, TILDE_TO_SPACES);
4087                   }
4088                   ssp = ssp->next;
4089                 }
4090                 break;
4091 
4092               default :
4093                 break;
4094             }
4095           }
4096         }
4097 
4098         notestr = NULL;
4099         prefix = "";
4100         add_period = FALSE;
4101 
4102         if (biop->genome == 8) {
4103           FFAddTextToString(unique, "", "extrachromosomal", NULL, FALSE, FALSE, TILDE_IGNORE); 
4104           prefix = "\n";
4105         }
4106 
4107         for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
4108 
4109           lastomptype = 0;
4110           lastssptype = 0;
4111           switch (asn2gnbk_source_quals [jdx].qualclass) {
4112 
4113             case Qual_class_string :
4114               if (! StringHasNoText (qvp [jdx].str)) {
4115                 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, NULL, FALSE);
4116                 add_period = FALSE;
4117                 prefix = "\n";
4118               }
4119               break;
4120 
4121             case Qual_class_orgmod :
4122             case Qual_class_voucher :
4123               if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_orgmod_note) break;
4124               omp = qvp [jdx].omp;
4125               if (lastomptype == 0 && omp != NULL) {
4126                 lastomptype = omp->subtype;
4127               }
4128               while (omp != NULL && omp->subtype == lastomptype) {
4129                 if (! StringHasNoText (omp->subname)) {
4130                   if (jdx == SCQUAL_orgmod_note) {
4131                     sprintf (buf, "%s", prefix);
4132                   } else {
4133                     sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
4134                   }
4135 
4136                   str = StringSave (omp->subname);
4137                   add_period = s_RemovePeriodFromEnd (str);
4138                   if (jdx == SCQUAL_orgmod_note) {
4139                     FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
4140                   } else {
4141                     FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
4142                   }
4143                   MemFree (str);
4144 
4145                   if (jdx == SCQUAL_orgmod_note) {
4146                     if (add_period) {
4147                       prefix = ".\n";
4148                     } else {
4149                       prefix = "\n";
4150                     }
4151                   } else {
4152                     prefix = "; ";
4153                   }
4154                 }
4155                 omp = omp->next;
4156               }
4157               break;
4158 
4159             case Qual_class_subsource :
4160               if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_subsource_note) break;
4161               ssp = qvp [jdx].ssp;
4162               if (lastssptype == 0 && ssp != NULL) {
4163                 lastssptype = ssp->subtype;
4164               }
4165               while (ssp != NULL && ssp->subtype == lastssptype) {
4166                 if (ssp->subtype == SUBSRC_germline ||
4167                     ssp->subtype == SUBSRC_rearranged ||
4168                     ssp->subtype == SUBSRC_transgenic ||
4169                     ssp->subtype == SUBSRC_environmental_sample ||
4170                     ssp->subtype == SUBSRC_metagenomic) {
4171                   FFAddTextToString (unique, prefix, asn2gnbk_source_quals [jdx].name, NULL, FALSE, FALSE, TILDE_IGNORE);
4172                   prefix = "; ";
4173                 } else if (! StringHasNoText (ssp->name)) {
4174                   if (jdx == SCQUAL_subsource_note) {
4175                     sprintf (buf, "%s", prefix);
4176                   } else {
4177                     sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
4178                   }
4179 
4180                   str = StringSave (ssp->name);
4181                   add_period = s_RemovePeriodFromEnd (str);
4182                   if (jdx == SCQUAL_subsource_note) {
4183                     FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
4184                   } else {
4185                     FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
4186                   }
4187                   MemFree (str);
4188 
4189                   if (jdx == SCQUAL_subsource_note) {
4190                     if (add_period) {
4191                       prefix = ".\n";
4192                     } else {
4193                       prefix = "\n";
4194                     }
4195                   } else {
4196                     prefix = "; ";
4197                  }
4198                 }
4199                 ssp = ssp->next;
4200               }
4201               break;
4202 
4203             case Qual_class_pcr :
4204               if (qvp [jdx].ble) {
4205                 lastssptype = 0;
4206                 pset = ParsePCRPrimerString (qvp);
4207                 for (vnp = pset; vnp != NULL; vnp = vnp->next) {
4208                   psp = (PcrSetPtr) vnp->data.ptrvalue;
4209                   if (psp == NULL) continue;
4210                   str = NextPCRPrimerString (psp, TRUE, (Boolean) (pset->next != NULL));
4211                   if (str == NULL) continue;
4212                   if (! StringHasNoText (str)) {
4213                     FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
4214                     add_period = FALSE;
4215                     prefix = "; ";
4216                   }
4217                   MemFree (str);
4218                 }
4219                 FreePCRSet (pset);
4220               }
4221               break;
4222 
4223             case Qual_class_pcr_react :
4224               prp = qvp [jdx].prp;
4225               while (prp != NULL) {
4226                 str = NextPCRReaction (prp, TRUE, (Boolean) (prp->next != NULL));
4227                 if (StringDoesHaveText (str)) {
4228                   FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
4229                   add_period = FALSE;
4230                   prefix = "; ";
4231                 }
4232                 MemFree (str);
4233                 prp = prp->next;
4234               }
4235               break;
4236 
4237             case Qual_class_valnode :
4238               for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
4239                 str = (CharPtr) vnp->data.ptrvalue;
4240                 if (! StringHasNoText (str)) {
4241                   FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
4242                   add_period = FALSE;
4243                   prefix = "; ";
4244                 }
4245               }
4246               break;
4247 
4248             default :
4249               break;
4250           }
4251         }
4252         if ( !FFEmpty(unique) ) {
4253           notestr = FFToCharPtr(unique);
4254         
4255           if (add_period) {
4256             s_AddPeriodToEnd (notestr);
4257           }
4258 
4259 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
4260           if (! IsEllipsis (notestr))
4261             s_RemovePeriodFromEnd (notestr);
4262 #endif
4263 
4264           FFAddOneString (ffstring, "/note=\"", FALSE, FALSE, TILDE_IGNORE);
4265           if (is_desc) {
4266             /* AB055064.1 said TILDE_IGNORE on descriptors, but now changing policy */
4267             FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_IGNORE */ /* TILDE_EXPAND */ TILDE_SEMICOLON);
4268           } else {
4269             /* ASZ93724.1 said TILDE_EXPAND on features, but record does not exist */
4270             FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_EXPAND */ TILDE_SEMICOLON);
4271           }
4272           FFAddOneString (ffstring, "\"", FALSE, FALSE, TILDE_IGNORE);
4273 
4274           MemFree (notestr);
4275         }
4276         break;
4277       default :
4278         break;
4279     }
4280   }
4281 
4282   /* and then deal with the various note types separately (not in order table) */
4283 
4284   str = FFEndPrint(ajp, ffstring, afp->format, 21, 21, 5, 21, "FT"); 
4285 
4286   /* optionally populate gbseq for XML-ized GenBank format */
4287 
4288   if (gbseq != NULL) {
4289     if (gbfeat != NULL) {
4290       AddFeatureToGbseq (gbseq, gbfeat, str, NULL);
4291     }
4292   }
4293 
4294   FFRecycleString(ajp, unique);
4295   FFRecycleString(ajp, ffstring);
4296   return str;
4297 }
4298 
4299 static void LIBCALLBACK CountBasesByStream (
4300   CharPtr sequence,
4301   Pointer userdata
4302 )
4303 
4304 {
4305   Int4Ptr  base_count;
4306   Char     ch;
4307   CharPtr  ptr;
4308 
4309   base_count = (Int4Ptr) userdata;
4310 
4311   ptr = sequence;
4312   ch = *ptr;
4313   while (ch != '\0') {
4314     ch = TO_UPPER (ch);
4315     switch (ch) {
4316       case 'A' :
4317         (base_count [0])++;
4318         break;
4319       case 'C' :
4320         (base_count [1])++;
4321         break;
4322       case 'G' :
4323         (base_count [2])++;
4324         break;
4325       case 'T' :
4326         (base_count [3])++;
4327         break;
4328       default :
4329         (base_count [4])++;
4330         break;
4331     }
4332     ptr++;
4333     ch = *ptr;
4334   }
4335 }
4336 
4337 NLM_EXTERN CharPtr FormatBasecountBlock (
4338   Asn2gbFormatPtr afp,
4339   BaseBlockPtr bbp
4340 )
4341 
4342 {
4343   IntAsn2gbJobPtr  ajp;
4344   Asn2gbSectPtr    asp;
4345   Int4             base_count [5];
4346   BioseqPtr        bsp;
4347   Char             buf [80];
4348   Int2             i;
4349   Int4             len;
4350   StringItemPtr    ffstring;
4351   CharPtr          str;
4352 
4353   if (afp == NULL || bbp == NULL) return NULL;
4354   ajp = afp->ajp;
4355   if (ajp == NULL) return NULL;
4356 
4357   asp = afp->asp;
4358   if (asp == NULL) return NULL;
4359   bsp = (asp->bsp);
4360   if (bsp == NULL) return NULL;
4361 
4362   /* after first formatting, result is cached into bbp->string */
4363 
4364   if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
4365 
4366   for (i = 0; i < 5; i++) {
4367     base_count [i] = 0;
4368   }
4369 
4370   if (ajp->ajp.slp != NULL) {
4371     len = SeqLocLen (ajp->ajp.slp);
4372     SeqPortStreamLoc (ajp->ajp.slp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
4373   } else {
4374     len = bsp->length;
4375     SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
4376   }
4377 
4378   if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4379 
4380     if (base_count [4] == 0) {
4381       sprintf (buf, "%7ld a%7ld c%7ld g%7ld t",
4382                (long) base_count [0], (long) base_count [1],
4383                (long) base_count [2], (long) base_count [3]);
4384     } else {
4385       sprintf (buf, "%7ld a%7ld c%7ld g%7ld t%7ld others",
4386                (long) base_count [0], (long) base_count [1],
4387                (long) base_count [2], (long) base_count [3],
4388                (long) base_count [4]);
4389     }
4390 
4391   } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4392 
4393     sprintf (buf, "Sequence %ld BP; %ld A; %ld C; %ld G; %ld T; %ld other;",
4394              (long) len,
4395              (long) base_count [0], (long) base_count [1],
4396              (long) base_count [2], (long) base_count [3],
4397              (long) base_count [4]);
4398   }
4399 
4400   ffstring = FFGetString(ajp);
4401   if ( ffstring == NULL ) return NULL;
4402 
4403   if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4404     FFAddOneString(ffstring, "XX\n", FALSE, FALSE, TILDE_IGNORE);
4405   }
4406   FFStartPrint (ffstring, afp->format, 0, 0, "BASE COUNT", 12, 5, 5, "SQ", FALSE);
4407   FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
4408   str = FFEndPrint(ajp, ffstring, afp->format, 12, 0, 5, 5, "SQ");
4409   FFRecycleString(ajp, ffstring);
4410 
4411   return str;
4412 }
4413 
4414 static void PrintSeqLine (
4415   StringItemPtr ffstring,
4416   FmtType format,
4417   CharPtr buf,
4418   Int4 start,
4419   Int4 stop
4420 )
4421 
4422 {
4423   size_t  len;
4424   Char    pos [16];
4425   Int4    pad;
4426 
4427   len = StringLen (buf);
4428   if (len > 0 && buf [len - 1] == ' ') {
4429     buf [len - 1] = '\0';
4430   }
4431 
4432   if (format == GENBANK_FMT || format == GENPEPT_FMT) {
4433 
4434     sprintf (pos, "%9ld", (long) (start + 1));
4435     FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
4436     FFAddOneChar(ffstring, ' ', FALSE);
4437     FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
4438     FFAddOneChar(ffstring, '\n', FALSE);
4439   } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
4440 
4441     sprintf (pos, "%8ld", (long) (stop));
4442     FFAddNChar(ffstring, ' ', 5, FALSE);
4443     FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
4444     pad = 72 - 5 - StringLen(buf);
4445     FFAddNChar(ffstring, ' ', pad, FALSE);
4446     FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
4447     FFAddOneChar(ffstring, '\n', FALSE);
4448   }
4449 }
4450 
4451 static CharPtr CompressNonBases (CharPtr str)
4452 
4453 {
4454   Char     ch;
4455   CharPtr  dst;
4456   CharPtr  ptr;
4457 
4458   if (str == NULL || str [0] == '\0') return NULL;
4459 
4460   dst = str;
4461   ptr = str;
4462   ch = *ptr;
4463   while (ch != '\0') {
4464     if (IS_ALPHA (ch)) {
4465       *dst = ch;
4466       dst++;
4467     }
4468     ptr++;
4469     ch = *ptr;
4470   }
4471   *dst = '\0';
4472 
4473   return str;
4474 }
4475 
4476   static Uint1 fasta_order [NUM_SEQID] = {
4477     33, /* 0 = not set */
4478     20, /* 1 = local Object-id */
4479     15, /* 2 = gibbsq */
4480     16, /* 3 = gibbmt */
4481     30, /* 4 = giim Giimport-id */
4482     10, /* 5 = genbank */
4483     10, /* 6 = embl */
4484     10, /* 7 = pir */
4485     10, /* 8 = swissprot */
4486     15, /* 9 = patent */
4487     20, /* 10 = other TextSeqId */
4488     20, /* 11 = general Dbtag */
4489     255, /* 12 = gi */
4490     10, /* 13 = ddbj */
4491     10, /* 14 = prf */
4492     12, /* 15 = pdb */
4493     10, /* 16 = tpg */
4494     10, /* 17 = tpe */
4495     10, /* 18 = tpd */
4496     10, /* 19 = gpp */
4497     10  /* 20 = nat */
4498   };
4499 
4500 static void PrintGenome (
4501   IntAsn2gbJobPtr ajp,
4502   StringItemPtr ffstring,
4503   SeqLocPtr slp_head, 
4504   CharPtr prefix, 
4505   Boolean segWithParts,
4506   Boolean is_na
4507 )
4508 {
4509   Char         buf[40], gibuf [32], vbuf [80];
4510   Boolean      first = TRUE;
4511   SeqIdPtr     freeid = NULL, sid = NULL, newid = NULL;
4512   SeqLocPtr    slp = NULL;
4513   Int4         from = 0, to = 0, start = 0, stop = 0, gi = 0;
4514   BioseqPtr    bsp = NULL;
4515   Int2         p1 = 0, p2 = 0;
4516 
4517   buf [0] = '\0';
4518   gibuf [0] = '\0';
4519   vbuf [0] = '\0';
4520   for (slp = slp_head; slp; slp = slp->next) {
4521     from = to = 0;
4522     sid = SeqLocId (slp);
4523     if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE) {
4524       start = from = SeqLocStart (slp);
4525       stop = to = SeqLocStop (slp);
4526     } else if (slp->choice == SEQLOC_NULL){
4527       sprintf (vbuf, ",%s", "gap()");
4528       FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4529       continue;
4530     } else {
4531       continue;
4532     }
4533     if (sid == NULL) {
4534       continue;
4535     }
4536     newid = NULL;
4537     freeid = NULL;
4538     buf [0] = '\0';
4539     gi = 0;
4540     if (sid->choice == SEQID_GI) {
4541       gi = sid->data.intvalue;
4542       if (GetAccnVerFromServer (gi, buf)) {
4543         /* no need to call GetSeqIdForGI */
4544       } else {
4545         newid = GetSeqIdForGI (gi);
4546         if (newid != NULL) {
4547           freeid = newid;
4548         }
4549         if (newid != NULL && segWithParts) {
4550           if (newid->choice == SEQID_GIBBSQ ||
4551               newid->choice == SEQID_GIBBMT ||
4552               newid->choice == SEQID_GIIM) {
4553             bsp = BioseqFind (newid);
4554             if (bsp != NULL && bsp->repr == Seq_repr_virtual) {
4555               if (bsp->length > 0) {
4556                 sprintf (vbuf, ",gap(%ld)", (long) bsp->length);
4557               } else {
4558                 sprintf (vbuf, ",%s", "gap()");
4559               }
4560               FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4561               continue;
4562             }
4563           }
4564         }
4565       }
4566     } else if (sid->choice == SEQID_GENERAL) {
4567       newid = sid;
4568     } else {
4569       newid = sid;
4570       gi = GetGIForSeqId (sid);
4571     }
4572     if (prefix != NULL) {
4573       FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
4574     }
4575     if (first) {
4576       first = FALSE;
4577     } else {
4578       FFAddOneChar (ffstring, ',', FALSE);
4579       /*ff_AddChar(',');*/
4580     }
4581     if (! StringHasNoText (buf)) {
4582       /* filled in by GetAccnVerFromServer */
4583     } else if (newid != NULL) {
4584       SeqIdWrite (SeqIdSelect (newid, fasta_order, NUM_SEQID),
4585                  buf, PRINTID_TEXTID_ACC_VER, sizeof(buf) -1 );
4586     } else if (sid->choice == SEQID_GI) {
4587       SeqIdWrite (sid, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
4588     }
4589 
4590     if (SeqLocStrand (slp) == Seq_strand_minus) {
4591       FFAddOneString (ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
4592     }
4593     if ( GetWWW (ajp) && gi > 0) {
4594       if (newid == NULL) {
4595         newid = sid;
4596       }
4597       if (newid->choice != SEQID_GENERAL) {
4598         FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4599         if (is_na) {
4600           FF_Add_NCBI_Base_URL (ffstring, link_seqn);
4601         } else {
4602           FF_Add_NCBI_Base_URL (ffstring, link_seqp);
4603         }
4604         sprintf (gibuf, "%ld", (long) gi);
4605         FFAddTextToString (ffstring, /* "val=" */ NULL, gibuf, "\">", FALSE, FALSE, TILDE_IGNORE);
4606         FFAddTextToString (ffstring, NULL, buf, "</a>", FALSE, FALSE, TILDE_IGNORE);
4607       }
4608     } else {
4609       FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4610     }
4611 
4612     if (SeqLocStrand (slp) == Seq_strand_minus) {
4613       sprintf (vbuf,":%ld..%ld)", (long) start+1, (long) stop+1);
4614     } else {
4615       sprintf (vbuf,":%ld..%ld", (long) start+1, (long) stop+1);
4616     }
4617     FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4618     p1 += StringLen (vbuf);
4619     p2 += StringLen (vbuf);
4620     if (freeid != NULL) {
4621       freeid = SeqIdFree (freeid);
4622     }
4623   }
4624 }
4625 
4626 NLM_EXTERN CharPtr FormatContigBlock (
4627   Asn2gbFormatPtr afp,
4628   BaseBlockPtr bbp
4629 )
4630 
4631 {
4632   IntAsn2gbJobPtr  ajp;
4633   Asn2gbSectPtr    asp;
4634   BioseqPtr        bsp;
4635   DeltaSeqPtr      dsp;
4636   IntFuzzPtr       fuzz;
4637   GBSeqPtr         gbseq;
4638   Boolean          is_na;
4639   SeqLitPtr        litp;
4640   CharPtr          prefix = NULL;
4641   Boolean          segWithParts = FALSE;
4642   SeqLocPtr        slp_head = NULL;
4643   CharPtr          str;
4644   Char             tmp [16];
4645   Boolean          unknown;
4646   Char             vbuf [32];
4647   StringItemPtr    ffstring;
4648 /*  CharPtr          label;*/
4649 
4650   if (afp == NULL || bbp == NULL) return NULL;
4651   ajp = afp->ajp;
4652   if (ajp == NULL) return NULL;
4653   asp = afp->asp;
4654   if (asp == NULL) return NULL;
4655   bsp = (asp->bsp);
4656   if (bsp == NULL) return NULL;
4657 
4658   ffstring = FFGetString (ajp);
4659   if ( ffstring == NULL ) return NULL;
4660 
4661   is_na = ISA_na (bsp->mol);
4662 
4663   FFStartPrint (ffstring, afp->format, 0, 0, "CONTIG", 12, 5, 5, "CO", FALSE);
4664   /*
4665   if ( GetWWW(ajp) ) {
4666     label = "CONTIG   ";
4667   } else {
4668     label = "CONTIG";
4669   }
4670   
4671   FFAddOneString(ffstring, label,  FALSE, FALSE, TILDE_IGNORE);  
4672   FFAddNChar(ffstring, ' ', 12 - StringLen(label), FALSE);
4673   */
4674 
4675   FFAddOneString (ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
4676 
4677   if (bsp->seq_ext_type == 1) {
4678 
4679     if (bsp->repr == Seq_repr_seg && SegHasParts (bsp)) {
4680       segWithParts = TRUE;
4681     }
4682 
4683     slp_head = (SeqLocPtr) bsp->seq_ext;
4684     PrintGenome (ajp, ffstring, slp_head, prefix, segWithParts, is_na);
4685 
4686   } else if (bsp->seq_ext_type == 4) {
4687 
4688     for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp=dsp->next) {
4689       if (dsp->choice == 1) {
4690 
4691         slp_head = (SeqLocPtr) dsp->data.ptrvalue;
4692         PrintGenome (ajp, ffstring, slp_head, prefix, FALSE, is_na);
4693 
4694       } else {
4695 
4696         litp = (SeqLitPtr) dsp->data.ptrvalue;
4697         if (litp != NULL) {
4698           if (litp->seq_data != NULL && litp->seq_data_type != Seq_code_gap) {
4699             if (litp->length == 0) {
4700               sprintf (vbuf, "gap(%ld)", (long) litp->length);
4701               FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4702             } else {
4703               /* don't know what to do here */
4704             }
4705           } else {
4706             unknown = FALSE;
4707             fuzz = litp->fuzz;
4708             if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
4709               unknown = TRUE;
4710             }
4711             if (unknown && litp->length > 0) {
4712               sprintf (tmp, "unk%ld", (long) litp->length);
4713             } else {
4714               sprintf (tmp, "%ld", (long) litp->length);
4715             }
4716             if (prefix != NULL) {
4717               sprintf (vbuf, "%sgap(%s)", prefix, tmp);
4718             } else {
4719               sprintf (vbuf, "gap(%s)", tmp);
4720             }
4721             FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4722           }
4723         }
4724       }
4725 
4726       prefix = ",";
4727     }
4728   }
4729 
4730   FFAddOneChar (ffstring, ')', FALSE);
4731 
4732   str = FFEndPrint (ajp, ffstring, afp->format, 12, 12, 5, 5, "CO");
4733   FFRecycleString (ajp, ffstring);
4734 
4735   /* optionally populate gbseq for XML-ized GenBank format */
4736 
4737   if (ajp->gbseq) {
4738     gbseq = &asp->gbseq;
4739   } else {
4740     gbseq = NULL;
4741   }
4742 
4743   if (gbseq != NULL) {
4744     if (StringLen (str) > 12) {
4745       gbseq->contig = StringSave (str + 12);
4746     } else {
4747       gbseq->contig = StringSave (str);
4748     }
4749 
4750     CleanQualValue (gbseq->contig);
4751     Asn2gnbkCompressSpaces (gbseq->contig);
4752     StripAllSpaces (gbseq->contig);
4753   }
4754 
4755   return str;
4756 }
4757 
4758 static void LIBCALLBACK SaveGBSeqSequence (
4759   CharPtr sequence,
4760   Pointer userdata
4761 )
4762 
4763 {
4764   CharPtr       tmp;
4765   CharPtr PNTR  tmpp;
4766 
4767   tmpp = (CharPtr PNTR) userdata;
4768   tmp = *tmpp;
4769 
4770   tmp = StringMove (tmp, sequence);
4771 
4772   *tmpp = tmp;
4773 }
4774 
4775 static Boolean InGapBlock (
4776   IntAsn2gbJobPtr ajp
4777 )
4778 
4779 {
4780   return (Boolean) (ajp->seqGapCurrLen > 0);
4781 }
4782 
4783 static Boolean LineIsAllGaps (
4784   CharPtr ptr
4785 )
4786 
4787 {
4788   Char  ch;
4789   Int2  j;
4790 
4791   for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4792     if (ch != '-') return FALSE;
4793   }
4794   if (j == 60) return TRUE;
4795   return FALSE;
4796 }
4797 
4798 static Int2 GapAtStart (
4799   CharPtr ptr
4800 )
4801 
4802 {
4803   Char  ch;
4804   Int2  j;
4805 
4806   for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4807     if (ch != '-') return j;
4808   }
4809   return 0;
4810 }
4811 
4812 static void FixGapAtStart (
4813   CharPtr ptr,
4814   Char pad
4815 )
4816 
4817 {
4818   Char  ch;
4819   Int2  j;
4820 
4821   for (ch = *ptr, j = 0; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
4822     *ptr = pad;
4823   }
4824 }
4825 
4826 static Int2 GapAtEnd (
4827   CharPtr ptr
4828 )
4829 
4830 {
4831   Char  ch;
4832   Int2  j;
4833   Int2  k;
4834 
4835   for (ch = *ptr, j = 0, k = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4836     if (ch == '-') {
4837       k++;
4838     } else {
4839       k = 0;
4840     }
4841   }
4842   return k;
4843 }
4844 
4845 static void FixGapAtEnd (
4846   CharPtr ptr,
4847   Char pad
4848 )
4849 
4850 {
4851   Char  ch;
4852   Int2  j;
4853 
4854   j = StringLen (ptr) - GapAtEnd (ptr);
4855   ptr += j;
4856   for (ch = *ptr; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
4857     *ptr = pad;
4858   }
4859 }
4860 
4861 static void FixRemainingGaps (
4862   CharPtr ptr,
4863   Char pad
4864 )
4865 
4866 {
4867   Char  ch;
4868   Int2  j;
4869 
4870   for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4871     if (ch == '-') {
4872       *ptr = pad;
4873     }
4874   }
4875 }
4876 
4877 static void ExpandSeqLine (
4878   CharPtr buf
4879 )
4880 
4881 {
4882   Char     ch;
4883   Int2     blk, count, lin;
4884   CharPtr  ptr;
4885   Char     seq [80];
4886 
4887   StringCpy (seq, buf);
4888 
4889   count = 0;
4890   blk = 0;
4891   lin = 0;
4892 
4893   ptr = seq;
4894   ch = *ptr;
4895 
4896   while (ch != '\0') {
4897     buf [count] = ch;
4898     count++;
4899     ptr++;
4900     ch = *ptr;
4901 
4902     blk++;
4903     lin++;
4904     if (blk >= 10 && lin < 60) {
4905 
4906       buf [count] = ' ';
4907       count++;
4908       blk = 0;
4909 
4910     }
4911   }
4912 
4913   buf [count] = '\0';
4914 }
4915 
4916 static Int2 ProcessGapSpecialFormat (
4917   Asn2gbFormatPtr afp,
4918   IntAsn2gbJobPtr ajp,
4919   BioseqPtr bsp,
4920   StringItemPtr ffstring,
4921   CharPtr buf,
4922   CharPtr nextchars
4923 )
4924 
4925 {
4926   Char      fmt_buf [64];
4927   Char      gapbuf [80];
4928   Int4      gi;
4929   Char      gi_buf [16];
4930   Boolean   is_na;
4931   Char      pad;
4932   SeqIdPtr  sip;
4933   Int2      startgapgap = 0, endgap = 0;
4934 
4935   is_na = ISA_na (bsp->mol);
4936   if (is_na) {
4937     pad = 'n';
4938   } else {
4939     pad = 'x';
4940   }
4941 
4942   if (LineIsAllGaps (buf)) {
4943     ajp->seqGapCurrLen += StringLen (buf);
4944     *buf = '\0';
4945     return 0;
4946   }
4947 
4948   startgapgap = GapAtStart (buf);
4949   if (InGapBlock (ajp)) {
4950     ajp->seqGapCurrLen += startgapgap;
4951     if (is_na) {
4952       sprintf (gapbuf, "          [gap %ld bp]", (long) ajp->seqGapCurrLen);
4953     } else {
4954       sprintf (gapbuf, "          [gap %ld aa]", (long) ajp->seqGapCurrLen);
4955     }
4956     FFAddOneString (ffstring, gapbuf, FALSE, FALSE, TILDE_TO_SPACES);
4957     if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && afp != NULL &&
4958       (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
4959       gi = 0;
4960       for (sip = bsp->id; sip != NULL; sip = sip->next) {
4961         if (sip->choice == SEQID_GI) {
4962           gi = (Int4) sip->data.intvalue;
4963         }
4964       }
4965       if (gi > 0) {
4966         sprintf(gi_buf, "%ld", (long) gi);
4967         sprintf(fmt_buf, "?fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY);
4968         if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
4969           StringCat (fmt_buf, "&report=gbwithparts");
4970         }
4971         FFAddOneString (ffstring, "    <a href=\"", FALSE, FALSE, TILDE_IGNORE);
4972         if (is_na) {
4973           FF_Add_NCBI_Base_URL (ffstring, link_featn);
4974         } else {
4975           FF_Add_NCBI_Base_URL (ffstring, link_featp);
4976         }
4977         FFAddOneString (ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
4978         FFAddOneString (ffstring, fmt_buf, FALSE, FALSE, TILDE_IGNORE);
4979         FFAddOneString (ffstring, "\">Expand Ns", FALSE, FALSE, TILDE_IGNORE);
4980         FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4981       }
4982     }
4983     FFAddOneChar (ffstring, '\n', FALSE);
4984     ajp->seqGapCurrLen = 0;
4985     FixGapAtStart (buf, ' ');
4986   } else if (startgapgap > 0) {
4987     FixGapAtStart (buf, pad);
4988   }
4989 
4990   endgap = GapAtEnd (buf);
4991   if (LineIsAllGaps (nextchars)) {
4992     FixGapAtEnd (buf, ' ');
4993     ajp->seqGapCurrLen += endgap;
4994   } else if (endgap > 0) {
4995     /*
4996     FixGapAtEnd (buf, pad);
4997     */
4998     FixGapAtEnd (buf, ' ');
4999     ajp->seqGapCurrLen += endgap;
5000   }
5001 
5002   FixRemainingGaps (buf, pad);
5003 
5004   return startgapgap;
5005 }
5006 
5007 /*
5008 static void ChangeOandJtoX (CharPtr str)
5009 
5010 {
5011   Char  ch;
5012 
5013   if (str == NULL) return;
5014   ch = *str;
5015   while (ch != '\0') {
5016     if (ch == 'O' || ch == 'J') {
5017       *str = 'X';
5018     } else if (ch == 'o' || ch == 'j') {
5019       *str = 'x';
5020     }
5021     str++;
5022     ch = *str;
5023   }
5024 }
5025 */
5026 
5027 NLM_EXTERN CharPtr FormatSequenceBlock (
5028   Asn2gbFormatPtr afp,
5029   BaseBlockPtr bbp
5030 )
5031 
5032 {
5033   IntAsn2gbJobPtr   ajp;
5034   Asn2gbSectPtr     asp;
5035   Int2              blk;
5036   BioseqPtr         bsp;
5037   Bioseq            bsq;
5038   Char              buf [80];
5039   Char              ch;
5040   Int2              count;
5041   Int4              extend;
5042   StreamFlgType     flags = STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL;
5043   GBSeqPtr          gbseq;
5044   IntAsn2gbSectPtr  iasp;
5045   Int2              lin;
5046   SeqLocPtr         loc;
5047   Int4              num;
5048   CharPtr           ptr;
5049   Int4              remaining;
5050   SeqBlockPtr       sbp;
5051   SeqLoc            sl;
5052   SeqLocPtr         slp;
5053   Int4              start;
5054   Int2              startgapgap;
5055   Int4              stop;
5056   CharPtr           str = NULL;
5057   CharPtr           tmp;
5058   StringItemPtr     ffstring;
5059 
5060   if (afp == NULL || bbp == NULL) return NULL;
5061   sbp = (SeqBlockPtr) bbp;
5062   ajp = afp->ajp;
5063   if (ajp == NULL) return NULL;
5064   asp = afp->asp;
5065   if (asp == NULL) return NULL;
5066   iasp = (IntAsn2gbSectPtr) asp;
5067   bsp = (asp->bsp);
5068   if (bsp == NULL) return NULL;
5069 
5070   /* if GBSeq XML, use SeqPortStream on single block */
5071 
5072   if (ajp->gbseq) {
5073     gbseq = &asp->gbseq;
5074 
5075     if (ajp->ajp.slp != NULL) {
5076       slp = ajp->ajp.slp;
5077       str = MemNew (sizeof (Char) * (SeqLocLen (slp) + 10));
5078     } else {
5079       str = MemNew (sizeof (Char) * (bsp->length + 10));
5080     }
5081     if (str == NULL) return NULL;
5082 
5083     tmp = str;
5084     if (ajp->ajp.slp != NULL) {
5085       slp = ajp->ajp.slp;
5086       SeqPortStreamLoc (slp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
5087     } else {
5088       SeqPortStream (bsp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
5089     }
5090     /*
5091     if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
5092       if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
5093         ChangeOandJtoX (str);
5094       }
5095     }
5096     */
5097     gbseq->sequence = StringSave (str);
5098 
5099     tmp = gbseq->sequence;
5100     if (tmp == NULL) return NULL;
5101     ch = *tmp;
5102     while (ch != '\0') {
5103       if (ch == '\n' || ch == '\r' || ch == '\t') {
5104         *tmp = ' ';
5105       } else if (IS_UPPER (ch)) {
5106         /* collab decision to present target sequence in lower case */
5107         *tmp = TO_LOWER (ch);
5108       }
5109       tmp++;
5110       ch = *tmp;
5111     }
5112     TrimSpacesAroundString (gbseq->sequence);
5113     CompressNonBases (gbseq->sequence);
5114 
5115     return str;
5116   }
5117 
5118   /* replace SeqPort with improved SeqPortStream */
5119 
5120   if (sbp->bases == NULL) {
5121     if (ajp->specialGapFormat) {
5122       flags = EXPAND_GAPS_TO_DASHES | STREAM_CORRECT_INVAL;
5123     }
5124 
5125     start = sbp->start;
5126     stop = sbp->stop;
5127     extend = sbp->extend;
5128 
5129     if (stop > start) {
5130 
5131       str = MemNew (sizeof (Char) * (extend - start + 3));
5132       if (str != NULL) {
5133         if (ajp->ajp.slp != NULL) {
5134           slp = ajp->ajp.slp;
5135           MemSet ((Pointer) &bsq, 0, sizeof (Bioseq));
5136           MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
5137           bsq.repr = Seq_repr_seg;
5138           bsq.mol = bsp->mol;
5139           bsq.seq_ext_type = 1;
5140           bsq.length = SeqLocLen (slp);
5141           bsq.seq_ext = &sl;
5142           if (slp->choice == SEQLOC_MIX || slp->choice == SEQLOC_PACKED_INT) {
5143             loc = (SeqLocPtr) slp->data.ptrvalue;
5144             if (loc != NULL) {
5145               sl.choice = loc->choice;
5146               sl.data.ptrvalue = (Pointer) loc->data.ptrvalue;
5147               sl.next = loc->next;
5148             }
5149           } else {
5150             sl.choice = slp->choice;
5151             sl.data.ptrvalue = (Pointer) slp->data.ptrvalue;
5152             sl.next = NULL;
5153           }
5154           SeqPortStreamInt (&bsq, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
5155         } else {
5156           num = SeqPortStreamInt (bsp, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
5157           if (num < 1) {
5158             /* flag possible inconsistency between bsp->length and actual sequence data length */
5159             ajp->relModeError = TRUE;
5160             return NULL;
5161           }
5162         }
5163         /*
5164         if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
5165           if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
5166             ChangeOandJtoX (str);
5167           }
5168         }
5169         */
5170         sbp->bases = str;
5171       }
5172     }
5173   }
5174 
5175   if (sbp->bases == NULL) return NULL;
5176 
5177   /* format subsequence cached with SeqPortStream */
5178 
5179   ffstring = FFGetString (ajp);
5180 
5181   start = sbp->start;
5182   stop = sbp->stop;
5183   remaining = stop - start;
5184 
5185   count = 0;
5186   blk = 0;
5187   lin = 0;
5188 
5189   ptr = sbp->bases;
5190   ch = *ptr;
5191 
5192   while (ch != '\0' && remaining > 0) {
5193     buf [count] = (Char) (TO_LOWER (ch));
5194     count++;
5195     remaining--;
5196     ptr++;
5197     ch = *ptr;
5198 
5199     blk++;
5200     lin++;
5201     if (lin >= 60) {
5202 
5203       buf [count] = '\0';
5204       startgapgap = 0;
5205       if (ajp->specialGapFormat) {
5206         startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
5207       }
5208       if (StringDoesHaveText (buf)) {
5209         ExpandSeqLine (buf);
5210         PrintSeqLine (ffstring, afp->format, buf, start + startgapgap, start + lin);
5211       }
5212       count = 0;
5213       blk = 0;
5214       lin = 0;
5215       start += 60;
5216     }
5217   }
5218 
5219   buf [count] = '\0';
5220   if (count > 0) {
5221     startgapgap = 0;
5222     if (ajp->specialGapFormat) {
5223       startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
5224     }
5225     if (StringDoesHaveText (buf)) {
5226       ExpandSeqLine (buf);
5227       PrintSeqLine (ffstring, afp->format, buf, start + startgapgap, start + lin);
5228     }
5229   }
5230 
5231   str = FFToCharPtr(ffstring);
5232 
5233   FFRecycleString (ajp, ffstring);
5234   return str;
5235 }
5236 
5237 /*
5238 static CharPtr insd_strd [4] = {
5239   NULL, "single", "double", "mixed"
5240 };
5241 
5242 static CharPtr insd_mol [10] = {
5243   "?", "DNA", "RNA", "tRNA", "rRNA", "mRNA", "uRNA", "snRNA", "snoRNA", "AA"
5244 };
5245 
5246 static CharPtr insd_top [3] = {
5247   NULL, "linear", "circular"
5248 };
5249 */
5250 
5251 NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
5252 
5253 NLM_EXTERN CharPtr FormatSlashBlock (
5254   Asn2gbFormatPtr afp,
5255   BaseBlockPtr bbp
5256 )
5257 
5258 {
5259   IntAsn2gbJobPtr  ajp;
5260   Asn2gbSectPtr    asp;
5261   GBFeaturePtr     currf, headf, nextf;
5262   GBReferencePtr   currr, headr, nextr;
5263   GBSeqPtr         gbseq, gbtmp;
5264   IndxPtr          index;
5265   INSDSeq          is;
5266   /*
5267   Int2              moltype, strandedness, topology;
5268   */
5269 
5270   if (afp == NULL || bbp == NULL) return NULL;
5271   ajp = afp->ajp;
5272   if (ajp == NULL) return NULL;
5273   asp = afp->asp;
5274   if (asp == NULL) return NULL;
5275 
5276   /* sort and unique indexes */
5277 
5278   index = ajp->index;
5279 
5280   if (index != NULL) {
5281 
5282     MemCopy (index, &asp->index, sizeof (IndxBlock));
5283     MemSet (&asp->index, 0, sizeof (IndxBlock));
5284 
5285     index->authors = ValNodeSort (index->authors, SortVnpByString);
5286     index->authors = UniqueValNode (index->authors);
5287 
5288     index->genes = ValNodeSort (index->genes, SortVnpByString);
5289     index->genes = UniqueValNode (index->genes);
5290 
5291     index->journals = ValNodeSort (index->journals, SortVnpByString);
5292     index->journals = UniqueValNode (index->journals);
5293 
5294     index->keywords = ValNodeSort (index->keywords, SortVnpByString);
5295     index->keywords = UniqueValNode (index->keywords);
5296 
5297     index->secondaries = ValNodeSort (index->secondaries, SortVnpByString);
5298     index->secondaries = UniqueValNode (index->secondaries);
5299   }
5300 
5301   /* adjust XML-ized GenBank format */
5302 
5303   gbseq = ajp->gbseq;
5304 
5305   if (gbseq != NULL) {
5306 
5307     MemCopy (gbseq, &asp->gbseq, sizeof (GBSeq));
5308     MemSet (&asp->gbseq, 0, sizeof (GBSeq));
5309 
5310     /* reverse order of references */
5311 
5312     headr = NULL;
5313     for (currr = gbseq->references; currr != NULL; currr = nextr) {
5314       nextr = currr->next;
5315       currr->next = headr;
5316       headr = currr;
5317     }
5318     gbseq->references = headr;
5319 
5320     /* reverse order of features */
5321 
5322     headf = NULL;
5323     for (currf = gbseq->feature_table; currf != NULL; currf = nextf) {
5324       nextf = currf->next;
5325       currf->next = headf;
5326       headf = currf;
5327     }
5328     gbseq->feature_table = headf;
5329   }
5330 
5331   /* if generating GBSeq XML/ASN, write at each slash block */
5332 
5333   if (gbseq != NULL && afp->aip != NULL) {
5334     if (ajp->produceInsdSeq) {
5335       MemSet ((Pointer) &is, 0, sizeof (INSDSeq));
5336       is.next = (INSDSeqPtr) gbseq->next;
5337       is.OBbits__ = gbseq->OBbits__;
5338       is.locus = gbseq->locus;
5339       is.length = gbseq->length;
5340       is.strandedness = gbseq->strandedness;
5341       is.moltype = gbseq->moltype;
5342       is.topology = gbseq->topology;
5343       /*
5344       strandedness = (Int2) gbseq->strandedness;
5345       if (strandedness < 0 || strandedness > 3) {
5346         strandedness = 0;
5347       }
5348       is.strandedness = StringSave (insd_strd [strandedness]);
5349       moltype = (Int2) gbseq->moltype;
5350       if (moltype < 0 || moltype > 9) {
5351         moltype = 0;
5352       }
5353       is.moltype = StringSave (insd_mol [moltype]);
5354       topology = (Int2) gbseq->topology;
5355       if (topology < 0 || topology > 2) {
5356         topology = 0;
5357       }
5358       is.topology = StringSave (insd_top [topology]);
5359       */
5360       is.division = gbseq->division;
5361       is.update_date = gbseq->update_date;
5362       is.create_date = gbseq->create_date;
5363       is.update_release = gbseq->update_release;
5364       is.create_release = gbseq->create_release;
5365       is.definition = gbseq->definition;
5366       is.primary_accession = gbseq->primary_accession;
5367       is.entry_version = gbseq->entry_version;
5368       is.accession_version = gbseq->accession_version;
5369       is.other_seqids = gbseq->other_seqids;
5370       is.secondary_accessions = gbseq->secondary_accessions;
5371       is.project = gbseq->project;
5372       is.keywords = gbseq->keywords;
5373       is.segment = gbseq->segment;
5374       is.source = gbseq->source;
5375       is.organism = gbseq->organism;
5376       is.taxonomy = gbseq->taxonomy;
5377       is.references = (INSDReferencePtr) gbseq->references;
5378       is.comment = gbseq->comment;
5379       is.primary = gbseq->primary;
5380       is.source_db = gbseq->source_db;
5381       is.database_reference = gbseq->database_reference;
5382       is.feature_table = (INSDFeaturePtr) gbseq->feature_table;
5383       is.sequence = gbseq->sequence;
5384       is.contig = gbseq->contig;
5385       INSDSeqAsnWrite (&is, afp->aip, afp->atp);
5386     } else {
5387       GBSeqAsnWrite (gbseq, afp->aip, afp->atp);
5388     }
5389     if (afp->atp == NULL) {
5390       AsnPrintNewLine (afp->aip);
5391     }
5392     AsnIoFlush (afp->aip);
5393 
5394     /* clean up gbseq fields */
5395 
5396     gbtmp = GBSeqNew ();
5397     MemCopy (gbtmp, gbseq, sizeof (GBSeq));
5398     MemSet (gbseq, 0, sizeof (GBSeq));
5399     GBSeqFree (gbtmp);
5400   }
5401 
5402   /* slash always has string pre-allocated by add slash block function */
5403 
5404   return StringSaveNoNull (bbp->string);
5405 }
5406 
5407 
5408 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.