|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/asn2gnb6.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2gnb6.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb6.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 * Version Creation Date: 10/21/98
32 *
33 * $Revision: 1.211 $
34 *
35 * File Description: New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59
60 #ifdef WIN_MAC
61 #if __profile__
62 #include <Profiler.h>
63 #endif
64 #endif
65
66 static CharPtr link_tax = "http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?";
67
68 static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
69 static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
70
71 static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
72 static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
73
74 static CharPtr link_lat_lon = "http://www.ncbi.nlm.nih.gov/projects/Sequin/latlonview.html?";
75
76
77
78
79 /* ordering arrays for qualifiers and note components */
80
81 static SourceType source_qual_order [] = {
82 SCQUAL_organism,
83
84 SCQUAL_organelle,
85
86 SCQUAL_mol_type,
87
88 SCQUAL_strain,
89 SCQUAL_sub_strain,
90 SCQUAL_variety,
91 SCQUAL_serotype,
92 SCQUAL_serovar,
93 SCQUAL_cultivar,
94 SCQUAL_isolate,
95 SCQUAL_isolation_source,
96 SCQUAL_spec_or_nat_host,
97 SCQUAL_sub_species,
98
99 SCQUAL_specimen_voucher,
100 SCQUAL_culture_collection,
101 SCQUAL_bio_material,
102
103 SCQUAL_db_xref,
104 SCQUAL_org_xref,
105
106 SCQUAL_chromosome,
107
108 SCQUAL_segment,
109
110 SCQUAL_map,
111 SCQUAL_clone,
112 SCQUAL_sub_clone,
113 SCQUAL_haplotype,
114 SCQUAL_haplogroup,
115 SCQUAL_sex,
116 SCQUAL_mating_type,
117 SCQUAL_cell_line,
118 SCQUAL_cell_type,
119 SCQUAL_tissue_type,
120 SCQUAL_clone_lib,
121 SCQUAL_dev_stage,
122 SCQUAL_ecotype,
123 SCQUAL_frequency,
124
125 SCQUAL_germline,
126 SCQUAL_rearranged,
127 SCQUAL_transgenic,
128 SCQUAL_environmental_sample,
129
130 SCQUAL_lab_host,
131 SCQUAL_pop_variant,
132 SCQUAL_tissue_lib,
133
134 SCQUAL_plasmid_name,
135 SCQUAL_transposon_name,
136 SCQUAL_ins_seq_name,
137
138 SCQUAL_country,
139
140 SCQUAL_focus,
141
142 SCQUAL_lat_lon,
143 SCQUAL_collection_date,
144 SCQUAL_collected_by,
145 SCQUAL_identified_by,
146 /*
147 SCQUAL_fwd_primer_seq,
148 SCQUAL_rev_primer_seq,
149 SCQUAL_fwd_primer_name,
150 SCQUAL_rev_primer_name,
151 */
152 SCQUAL_PCR_primers,
153 SCQUAL_PCR_reaction,
154
155 SCQUAL_note,
156
157 SCQUAL_sequenced_mol,
158 SCQUAL_label,
159 SCQUAL_usedin,
160 SCQUAL_citation,
161 (SourceType) 0
162 };
163
164 static SourceType source_desc_note_order [] = {
165 SCQUAL_seqfeat_note,
166 SCQUAL_orgmod_note,
167 SCQUAL_subsource_note,
168
169 SCQUAL_metagenomic,
170
171 SCQUAL_linkage_group,
172
173 SCQUAL_type,
174 SCQUAL_sub_type,
175 SCQUAL_serogroup,
176 SCQUAL_pathovar,
177 SCQUAL_chemovar,
178 SCQUAL_biovar,
179 SCQUAL_biotype,
180 SCQUAL_group,
181 SCQUAL_sub_group,
182 SCQUAL_common,
183 SCQUAL_acronym,
184 SCQUAL_dosage,
185
186 SCQUAL_authority,
187 SCQUAL_forma,
188 SCQUAL_forma_specialis,
189 SCQUAL_synonym,
190 SCQUAL_anamorph,
191 SCQUAL_teleomorph,
192 SCQUAL_breed,
193
194 SCQUAL_metagenome_source,
195 SCQUAL_metagenome_note,
196
197 SCQUAL_genotype,
198 SCQUAL_plastid_name,
199
200 SCQUAL_endogenous_virus_name,
201
202 SCQUAL_common_name,
203
204 SCQUAL_PCR_primer_note,
205 SCQUAL_PCR_reaction,
206
207 SCQUAL_zero_orgmod,
208 SCQUAL_one_orgmod,
209 SCQUAL_zero_subsrc,
210
211 /* SCQUAL_old_lineage, */
212
213 /* SCQUAL_old_name, */
214 (SourceType) 0
215 };
216
217 static SourceType source_feat_note_order [] = {
218 SCQUAL_unstructured,
219
220 SCQUAL_metagenomic,
221
222 SCQUAL_linkage_group,
223 SCQUAL_mating_type,
224
225 SCQUAL_type,
226 SCQUAL_sub_type,
227 SCQUAL_serogroup,
228 SCQUAL_pathovar,
229 SCQUAL_chemovar,
230 SCQUAL_biovar,
231 SCQUAL_biotype,
232 SCQUAL_group,
233 SCQUAL_sub_group,
234 SCQUAL_common,
235 SCQUAL_acronym,
236 SCQUAL_dosage,
237
238 SCQUAL_authority,
239 SCQUAL_forma,
240 SCQUAL_forma_specialis,
241 SCQUAL_synonym,
242 SCQUAL_anamorph,
243 SCQUAL_teleomorph,
244 SCQUAL_breed,
245
246 SCQUAL_metagenome_source,
247 SCQUAL_metagenome_note,
248
249 SCQUAL_genotype,
250 SCQUAL_plastid_name,
251
252 SCQUAL_endogenous_virus_name,
253
254 SCQUAL_seqfeat_note,
255 SCQUAL_orgmod_note,
256 SCQUAL_subsource_note,
257
258 SCQUAL_common_name,
259
260 SCQUAL_PCR_primer_note,
261 SCQUAL_PCR_reaction,
262
263 SCQUAL_zero_orgmod,
264 SCQUAL_one_orgmod,
265 SCQUAL_zero_subsrc,
266
267 /* SCQUAL_old_lineage, */
268
269 /* SCQUAL_old_name, */
270 (SourceType) 0
271 };
272
273 NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE] = {
274 { "", Qual_class_ignore },
275 { "acronym", Qual_class_orgmod },
276 { "anamorph", Qual_class_orgmod },
277 { "authority", Qual_class_orgmod },
278 { "biotype", Qual_class_orgmod },
279 { "biovar", Qual_class_orgmod },
280 { "bio_material", Qual_class_voucher },
281 { "breed", Qual_class_orgmod },
282 { "cell_line", Qual_class_subsource },
283 { "cell_type", Qual_class_subsource },
284 { "chemovar", Qual_class_orgmod },
285 { "chromosome", Qual_class_subsource },
286 { "citation", Qual_class_pubset },
287 { "clone", Qual_class_subsource },
288 { "clone_lib", Qual_class_subsource },
289 { "collected_by", Qual_class_subsource },
290 { "collection_date", Qual_class_subsource },
291 { "common", Qual_class_orgmod },
292 { "common", Qual_class_string },
293 { "country", Qual_class_subsource },
294 { "cultivar", Qual_class_orgmod },
295 { "culture_collection", Qual_class_voucher },
296 { "db_xref", Qual_class_db_xref },
297 { "db_xref", Qual_class_db_xref },
298 { "dev_stage", Qual_class_subsource },
299 { "dosage", Qual_class_orgmod },
300 { "ecotype", Qual_class_orgmod },
301 { "endogenous_virus", Qual_class_subsource },
302 { "environmental_sample", Qual_class_subsource },
303 { "extrachromosomal", Qual_class_boolean },
304 { "focus", Qual_class_boolean },
305 { "forma", Qual_class_orgmod },
306 { "forma_specialis", Qual_class_orgmod },
307 { "frequency", Qual_class_subsource },
308 { "fwd_primer_name", Qual_class_subsource },
309 { "fwd_primer_seq", Qual_class_subsource },
310 { "gb_acronym", Qual_class_orgmod },
311 { "gb_anamorph", Qual_class_orgmod },
312 { "gb_synonym", Qual_class_orgmod },
313 { "genotype", Qual_class_subsource },
314 { "germline", Qual_class_subsource },
315 { "group", Qual_class_orgmod },
316 { "haplogroup", Qual_class_subsource },
317 { "haplotype", Qual_class_subsource },
318 { "identified_by", Qual_class_subsource },
319 { "insertion_seq", Qual_class_subsource },
320 { "isolate", Qual_class_orgmod },
321 { "isolation_source", Qual_class_subsource },
322 { "lab_host", Qual_class_subsource },
323 { "label", Qual_class_label },
324 { "lat_lon", Qual_class_lat_lon },
325 { "linkage_group", Qual_class_subsource },
326 { "macronuclear", Qual_class_boolean },
327 { "map", Qual_class_subsource },
328 { "mating_type", Qual_class_subsource },
329 { "derived from metagenome", Qual_class_orgmod },
330 { "metagenome_source", Qual_class_orgmod },
331 { "metagenomic", Qual_class_subsource },
332 { "mol_type", Qual_class_string },
333 { "note", Qual_class_note },
334 { "old_lineage", Qual_class_orgmod },
335 { "old_name", Qual_class_orgmod },
336 { "organism", Qual_class_string },
337 { "organelle", Qual_class_organelle },
338 { "orgmod_note", Qual_class_orgmod },
339 { "pathovar", Qual_class_orgmod },
340 { "PCR_primers", Qual_class_pcr },
341 { "PCR_primers", Qual_class_pcr },
342 { "PCR_primers", Qual_class_pcr_react },
343 { "plasmid", Qual_class_subsource },
344 { "plastid", Qual_class_subsource },
345 { "pop_variant", Qual_class_subsource },
346 { "rearranged", Qual_class_subsource },
347 { "rev_primer_name", Qual_class_subsource },
348 { "rev_primer_seq", Qual_class_subsource },
349 { "segment", Qual_class_subsource },
350 { "seqfeat_note", Qual_class_string },
351 { "sequenced_mol", Qual_class_quote },
352 { "serogroup", Qual_class_orgmod },
353 { "serotype", Qual_class_orgmod },
354 { "serovar", Qual_class_orgmod },
355 { "sex", Qual_class_subsource },
356 { "host", Qual_class_orgmod },
357 { "specimen_voucher", Qual_class_voucher },
358 { "strain", Qual_class_orgmod },
359 { "sub_clone", Qual_class_subsource },
360 { "subgroup", Qual_class_orgmod },
361 { "sub_species", Qual_class_orgmod },
362 { "sub_strain", Qual_class_orgmod },
363 { "subtype", Qual_class_orgmod },
364 { "subsource_note", Qual_class_subsource },
365 { "synonym", Qual_class_orgmod },
366 { "teleomorph", Qual_class_orgmod },
367 { "tissue_lib", Qual_class_subsource },
368 { "tissue_type", Qual_class_subsource },
369 { "transgenic", Qual_class_subsource },
370 { "transposon", Qual_class_subsource },
371 { "type", Qual_class_orgmod },
372 { "unstructured", Qual_class_valnode },
373 { "usedin", Qual_class_quote },
374 { "variety", Qual_class_orgmod },
375 { "?", Qual_class_orgmod },
376 { "?", Qual_class_orgmod },
377 { "?", Qual_class_subsource }
378 };
379
380 NLM_EXTERN SourceType subSourceToSourceIdx [42] = {
381 SCQUAL_zero_subsrc,
382 SCQUAL_chromosome,
383 SCQUAL_map,
384 SCQUAL_clone,
385 SCQUAL_sub_clone,
386 SCQUAL_haplotype,
387 SCQUAL_genotype,
388 SCQUAL_sex,
389 SCQUAL_cell_line,
390 SCQUAL_cell_type,
391 SCQUAL_tissue_type,
392 SCQUAL_clone_lib,
393 SCQUAL_dev_stage,
394 SCQUAL_frequency,
395 SCQUAL_germline,
396 SCQUAL_rearranged,
397 SCQUAL_lab_host,
398 SCQUAL_pop_variant,
399 SCQUAL_tissue_lib,
400 SCQUAL_plasmid_name,
401 SCQUAL_transposon_name,
402 SCQUAL_ins_seq_name,
403 SCQUAL_plastid_name,
404 SCQUAL_country,
405 SCQUAL_segment,
406 SCQUAL_endogenous_virus_name,
407 SCQUAL_transgenic,
408 SCQUAL_environmental_sample,
409 SCQUAL_isolation_source,
410 SCQUAL_lat_lon,
411 SCQUAL_collection_date,
412 SCQUAL_collected_by,
413 SCQUAL_identified_by,
414 SCQUAL_fwd_primer_seq,
415 SCQUAL_rev_primer_seq,
416 SCQUAL_fwd_primer_name,
417 SCQUAL_rev_primer_name,
418 SCQUAL_metagenomic,
419 SCQUAL_mating_type,
420 SCQUAL_linkage_group,
421 SCQUAL_haplogroup,
422 SCQUAL_subsource_note
423 };
424
425 /* ********************************************************************** */
426
427 /* ********************************************************************** */
428
429 /* format functions allocate printable string for given paragraph */
430
431 /* superset of http://www.ncbi.nlm.nih.gov/collab/db_xref.html and RefSeq db_xrefs */
432
433 NLM_EXTERN CharPtr legalDbXrefs [] = {
434 "AceView/WormGenes",
435 "AFTOL",
436 "AntWeb",
437 "APHIDBASE",
438 "ApiDB",
439 "ApiDB_CryptoDB",
440 "ApiDB_PlasmoDB",
441 "ApiDB_ToxoDB",
442 "ASAP",
443 "ATCC",
444 "ATCC(in host)",
445 "ATCC(dna)",
446 "Axeldb",
447 "BDGP_EST",
448 "BDGP_INS",
449 "BEETLEBASE",
450 "BOLD",
451 "CDD",
452 "CK",
453 "COG",
454 "dbClone",
455 "dbCloneLib",
456 "dbEST",
457 "dbProbe",
458 "dbSNP",
459 "dbSTS",
460 "dictyBase",
461 "EcoGene",
462 "ENSEMBL",
463 "ERIC",
464 "ESTLIB",
465 "FANTOM_DB",
466 "FLYBASE",
467 "GABI",
468 "GDB",
469 "GeneDB",
470 "GeneID",
471 "GO",
472 "GOA",
473 "Greengenes",
474 "GRIN",
475 "H-InvDB",
476 "HGNC",
477 "HMP",
478 "HOMD",
479 "HSSP",
480 "IMGT/GENE-DB",
481 "IMGT/HLA",
482 "IMGT/LIGM",
483 "InterimID",
484 "InterPro",
485 "IRD",
486 "ISD",
487 "ISFinder",
488 "JCM",
489 "JGIDB",
490 "LocusID",
491 "MaizeGDB",
492 "MGI",
493 "MIM",
494 "MycoBank",
495 "NBRC",
496 "NextDB",
497 "niaEST",
498 "NMPDR",
499 "NRESTdb",
500 "Osa1",
501 "Pathema",
502 "PBmice",
503 "PDB",
504 "PFAM",
505 "PGN",
506 "PIR",
507 "PSEUDO",
508 "PseudoCap",
509 "RAP-DB",
510 "RATMAP",
511 "RFAM",
512 "RGD",
513 "RiceGenes",
514 "RZPD",
515 "SEED",
516 "SGD",
517 "SGN",
518 "SoyBase",
519 "SubtiList",
520 "taxon",
521 "TIGRFAM",
522 "UniGene",
523 "UNILIB",
524 "UniProtKB/Swiss-Prot",
525 "UniProtKB/TrEMBL",
526 "UniSTS",
527 "UNITE",
528 "VBASE2",
529 "VectorBase",
530 "WorfDB",
531 "WormBase",
532 "Xenbase",
533 "ZFIN",
534 NULL
535 };
536
537 NLM_EXTERN CharPtr legalSrcDbXrefs [] = {
538 "AFTOL",
539 "AntWeb",
540 "ATCC",
541 "ATCC(dna)",
542 "ATCC(in host)",
543 "BOLD",
544 "FANTOM_DB",
545 "FLYBASE",
546 "GRIN",
547 "HMP",
548 "HOMD",
549 "IMGT/HLA",
550 "IMGT/LIGM",
551 "JCM",
552 "MGI",
553 "MycoBank",
554 "NBRC",
555 "RZPD",
556 "taxon",
557 "UNILIB",
558 "UNITE",
559 NULL
560 };
561
562 NLM_EXTERN CharPtr legalRefSeqDbXrefs [] = {
563 "CCDS",
564 "CGNC",
565 "CloneID",
566 "ECOCYC",
567 "HPRD",
568 "LRG",
569 "miRBase",
570 "PBR",
571 "REBASE",
572 "SK-FST",
573 "TAIR",
574 "VBRC",
575 NULL
576 };
577
578 static Boolean IsDbxrefInList (
579 CharPtr name,
580 CharPtr PNTR list,
581 size_t num,
582 BoolPtr badcapP,
583 CharPtr PNTR goodcapP
584 )
585
586 {
587 Int2 L, R, mid;
588
589 L = 0;
590 R = num;
591
592 while (L < R) {
593 mid = (L + R) / 2;
594 if (StringICmp (list [mid], name) < 0) {
595 L = mid + 1;
596 } else {
597 R = mid;
598 }
599 }
600
601 if (StringICmp (list [R], name) == 0) {
602 if (StringCmp (list [R], name) != 0) {
603 if (badcapP != NULL) {
604 *badcapP = TRUE;
605 }
606 if (goodcapP != NULL) {
607 *goodcapP = list [R];
608 }
609 }
610 return TRUE;
611 }
612
613 return FALSE;
614 }
615
616 NLM_EXTERN Boolean DbxrefIsValid (
617 CharPtr name,
618 BoolPtr is_refseq_P,
619 BoolPtr is_source_P,
620 BoolPtr is_badcap_P,
621 CharPtr PNTR goodcapP
622 )
623
624 {
625 if (is_refseq_P != NULL) {
626 *is_refseq_P = FALSE;
627 }
628 if (is_source_P != NULL) {
629 *is_source_P = FALSE;
630 }
631 if (is_badcap_P != NULL) {
632 *is_badcap_P = FALSE;
633 }
634 if (goodcapP != NULL) {
635 *goodcapP = NULL;
636 }
637
638 if (StringHasNoText (name)) return FALSE;
639
640 if (IsDbxrefInList (name, legalRefSeqDbXrefs,
641 sizeof (legalRefSeqDbXrefs) / sizeof (legalRefSeqDbXrefs [0]) - 1,
642 is_badcap_P, goodcapP)) {
643 if (is_refseq_P != NULL) {
644 *is_refseq_P = TRUE;
645 }
646 return TRUE;
647 }
648
649 if (IsDbxrefInList (name, legalSrcDbXrefs,
650 sizeof (legalSrcDbXrefs) / sizeof (legalSrcDbXrefs [0]) - 1,
651 is_badcap_P, goodcapP)) {
652 if (is_source_P != NULL) {
653 *is_source_P = TRUE;
654 }
655 return TRUE;
656 }
657
658 if (IsDbxrefInList (name, legalDbXrefs,
659 sizeof (legalDbXrefs) / sizeof (legalDbXrefs [0]) - 1,
660 is_badcap_P, goodcapP)) {
661 return TRUE;
662 }
663
664 return FALSE;
665 }
666
667
668 /* These functions are for testing dbxrefs */
669
670 static ValNodePtr MakeDbxrefList (void)
671 {
672 ValNodePtr dbxref_list = NULL;
673 Int4 i;
674 DbtagPtr dbtag;
675
676 for (i = 0; legalDbXrefs [i] != NULL; i++) {
677 dbtag = DbtagNew ();
678 dbtag->db = StringSave (legalDbXrefs [i]);
679 dbtag->tag = ObjectIdNew ();
680 dbtag->tag->id = 42;
681 ValNodeAddPointer (&dbxref_list, 0, dbtag);
682 }
683
684 /* legalSrcDbXrefs is contained within legalDbXrefs */
685
686 for (i = 0; legalRefSeqDbXrefs [i] != NULL; i++) {
687 dbtag = DbtagNew ();
688 dbtag->db = StringSave (legalRefSeqDbXrefs [i]);
689 dbtag->tag = ObjectIdNew ();
690 dbtag->tag->id = 42;
691 ValNodeAddPointer (&dbxref_list, 0, dbtag);
692 }
693
694 return dbxref_list;
695 }
696
697 static void AddDbxrefsToBioSource (BioSourcePtr biop)
698 {
699 if (biop == NULL) return;
700 if (biop->org == NULL)
701 {
702 biop->org = OrgRefNew();
703 }
704
705 ValNodeLink (&(biop->org->db), MakeDbxrefList());
706 }
707
708 static void AddDbxrefsToSeqFeat (SeqFeatPtr sfp)
709 {
710 if (sfp == NULL) return;
711 ValNodeLink (&(sfp->dbxref), MakeDbxrefList());
712 }
713
714 NLM_EXTERN void AddAllDbxrefsToBioseq (BioseqPtr bsp)
715 {
716 SeqDescrPtr sdp;
717 SeqFeatPtr sfp;
718 SeqMgrDescContext dcontext;
719 SeqMgrFeatContext fcontext;
720
721 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
722 if (sdp != NULL) {
723 AddDbxrefsToBioSource (sdp->data.ptrvalue);
724 }
725
726 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
727 if (sfp != NULL) {
728 AddDbxrefsToBioSource (sfp->data.value.ptrvalue);
729 AddDbxrefsToSeqFeat (sfp);
730 }
731
732 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, 0, &fcontext);
733 if (sfp != NULL) {
734 AddDbxrefsToSeqFeat (sfp);
735 }
736
737 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, 0, &fcontext);
738 if (sfp != NULL) {
739 AddDbxrefsToSeqFeat (sfp);
740 }
741 }
742
743
744
745 static CharPtr organellePrefix [] = {
746 NULL,
747 NULL,
748 "Chloroplast ",
749 "Chromoplast ",
750 "Kinetoplast ",
751 "Mitochondrion ",
752 "Plastid ",
753 NULL,
754 NULL,
755 NULL,
756 NULL,
757 NULL,
758 "Cyanelle ",
759 NULL,
760 NULL,
761 "Nucleomorph ",
762 "Apicoplast ",
763 "Leucoplast ",
764 "Proplastid ",
765 NULL,
766 "Hydrogenosome ",
767 NULL,
768 "Chromatophore "
769 };
770
771 static CharPtr newOrganellePrefix [] = {
772 NULL,
773 NULL,
774 "chloroplast ",
775 "chromoplast ",
776 "kinetoplast ",
777 "mitochondrion ",
778 "plastid ",
779 NULL,
780 NULL,
781 NULL,
782 NULL,
783 NULL,
784 "cyanelle ",
785 NULL,
786 NULL,
787 "nucleomorph ",
788 "apicoplast ",
789 "leucoplast ",
790 "proplastid ",
791 NULL,
792 "hydrogenosome ",
793 NULL,
794 "chromatophore "
795 };
796
797 NLM_EXTERN CharPtr FormatSourceBlock (
798 Asn2gbFormatPtr afp,
799 BaseBlockPtr bbp
800 )
801
802 {
803 CharPtr acr = NULL;
804 Boolean addPeriod = TRUE;
805 IntAsn2gbJobPtr ajp;
806 CharPtr ana = NULL;
807 Asn2gbSectPtr asp;
808 BioSourcePtr biop = NULL;
809 CharPtr com = NULL;
810 CharPtr common = NULL;
811 SeqMgrDescContext dcontext;
812 SeqMgrFeatContext fcontext;
813 CharPtr gbacr = NULL;
814 CharPtr gbana = NULL;
815 GBBlockPtr gbp = NULL;
816 GBSeqPtr gbseq;
817 CharPtr gbsyn = NULL;
818 Uint1 genome;
819 CharPtr met = NULL;
820 ValNodePtr mod = NULL;
821 Int2 numacr = 0;
822 Int2 numana = 0;
823 Int2 numcom = 0;
824 Int2 numgbacr = 0;
825 Int2 numgbana = 0;
826 Int2 numgbsyn = 0;
827 Int2 nummet = 0;
828 Int2 numsyn = 0;
829 OrgModPtr omp = NULL;
830 OrgNamePtr onp;
831 CharPtr organelle = NULL;
832 OrgRefPtr orp;
833 CharPtr prefix = " (";
834 SeqDescrPtr sdp;
835 CharPtr second = NULL;
836 SeqFeatPtr sfp;
837 CharPtr str;
838 CharPtr syn = NULL;
839 CharPtr taxname = NULL;
840 StringItemPtr ffstring, temp;
841
842 if (afp == NULL || bbp == NULL) return NULL;
843 ajp = afp->ajp;
844 if (ajp == NULL) return NULL;
845 asp = afp->asp;
846 if (asp == NULL) return NULL;
847
848 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
849
850 ffstring = FFGetString(ajp);
851 if ( ffstring == NULL ) return NULL;
852
853 if (bbp->itemtype == OBJ_SEQDESC) {
854 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
855 if (sdp != NULL) {
856 if (dcontext.seqdesctype == Seq_descr_source) {
857 biop = (BioSourcePtr) sdp->data.ptrvalue;
858 } else if (dcontext.seqdesctype == Seq_descr_genbank) {
859 gbp = (GBBlockPtr) sdp->data.ptrvalue;
860 }
861 }
862 } else if (bbp->itemtype == OBJ_SEQFEAT) {
863 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
864 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
865 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
866 }
867 }
868 if (gbp != NULL) {
869 common = gbp->source;
870 }
871
872 if (biop != NULL) {
873 genome = biop->genome;
874 if (genome <= 22) {
875 if (ajp->newSourceOrg && (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT)) {
876 organelle = newOrganellePrefix [genome];
877 } else {
878 organelle = organellePrefix [genome];
879 }
880 }
881 orp = biop->org;
882 if (orp != NULL) {
883 taxname = orp->taxname;
884 common = orp->common;
885 mod = orp->mod;
886 onp = orp->orgname;
887 if (onp != NULL) {
888
889 if (ajp->newSourceOrg) {
890 for (omp = onp->mod; omp != NULL; omp = omp->next) {
891 switch (omp->subtype) {
892 case ORGMOD_common :
893 com = omp->subname;
894 numcom++;
895 break;
896 case ORGMOD_acronym :
897 acr = omp->subname;
898 numacr++;
899 break;
900 case ORGMOD_synonym :
901 syn = omp->subname;
902 numsyn++;
903 break;
904 case ORGMOD_anamorph :
905 ana = omp->subname;
906 numana++;
907 break;
908 case ORGMOD_gb_acronym :
909 gbacr = omp->subname;
910 numgbacr++;
911 break;
912 case ORGMOD_gb_anamorph :
913 gbana = omp->subname;
914 numgbana++;
915 break;
916 case ORGMOD_gb_synonym :
917 gbsyn = omp->subname;
918 numgbsyn++;
919 break;
920 case ORGMOD_metagenome_source :
921 met = omp->subname;
922 nummet++;
923 break;
924 default :
925 break;
926 }
927 }
928
929 if (numacr > 1) {
930 acr = NULL;
931 }
932 if (numana > 1) {
933 ana = NULL;
934 }
935 if (numcom > 1) {
936 com = NULL;
937 }
938 if (nummet > 1) {
939 met = NULL;
940 }
941 if (numsyn > 1) {
942 syn = NULL;
943 }
944 if (numgbacr > 1) {
945 gbacr = NULL;
946 }
947 if (numgbana > 1) {
948 gbana = NULL;
949 }
950 if (numgbsyn > 1) {
951 gbsyn = NULL;
952 }
953
954 if (StringHasNoText (second)) {
955 second = met;
956 }
957 if (StringHasNoText (second)) {
958 second = syn;
959 }
960 if (StringHasNoText (second)) {
961 second = acr;
962 }
963 if (StringHasNoText (second)) {
964 if (StringDoesHaveText (ana)) {
965 second = ana;
966 prefix = " (anamorph: ";
967 }
968 }
969 if (StringHasNoText (second)) {
970 second = com;
971 }
972
973 if (StringHasNoText (second)) {
974 second = gbsyn;
975 }
976 if (StringHasNoText (second)) {
977 second = gbacr;
978 }
979 if (StringHasNoText (second)) {
980 if (StringDoesHaveText (gbana)) {
981 second = gbana;
982 prefix = " (anamorph: ";
983 }
984 }
985 }
986 }
987 if (StringHasNoText (second)) {
988 second = common;
989 }
990 }
991 }
992
993 /* If the organelle prefix is already on the */
994 /* name, don't add it. */
995
996 if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0)
997 organelle = "";
998
999 if (StringHasNoText (common)) {
1000 common = taxname;
1001 }
1002 if (StringHasNoText (common)) {
1003 common = "Unknown.";
1004 }
1005 if (StringHasNoText (taxname)) {
1006 taxname = "Unknown.";
1007 }
1008
1009 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1010
1011 temp = FFGetString(ajp);
1012
1013 if (ajp->newSourceOrg) {
1014
1015 if (! StringHasNoText (organelle)) {
1016 FFAddTextToString(temp, NULL, organelle, NULL, FALSE, FALSE, TILDE_IGNORE);
1017 }
1018 FFAddTextToString(temp, NULL, taxname, NULL, FALSE, FALSE, TILDE_IGNORE);
1019 if (! StringHasNoText (second)) {
1020 FFAddTextToString(temp, prefix, second, ")", FALSE, FALSE, TILDE_IGNORE);
1021 }
1022 addPeriod = FALSE;
1023
1024 } else {
1025 FFAddTextToString(temp, NULL, common, NULL, FALSE, FALSE, TILDE_IGNORE);
1026 while (mod != NULL) {
1027 str = (CharPtr) mod->data.ptrvalue;
1028 if (! StringHasNoText (str)) {
1029 FFAddTextToString(temp, " ", str, NULL, FALSE, FALSE, TILDE_IGNORE);
1030 }
1031 mod = mod->next;
1032 }
1033 }
1034
1035 str = FFToCharPtr(temp);
1036 if (StringCmp (str, ".") == 0) {
1037 str = MemFree (str);
1038 }
1039 FFRecycleString(ajp, temp);
1040 /* optionally populate gbseq for XML-ized GenBank format */
1041
1042 if (ajp->gbseq) {
1043 gbseq = &asp->gbseq;
1044 } else {
1045 gbseq = NULL;
1046 }
1047
1048 if (gbseq != NULL) {
1049 gbseq->source = StringSave (str);
1050 }
1051
1052
1053 FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1054 if (str != NULL) {
1055 FFAddTextToString(ffstring, NULL, str, NULL, addPeriod, FALSE, TILDE_TO_SPACES);
1056 } else {
1057 FFAddOneChar(ffstring, '.', FALSE);
1058 }
1059
1060 MemFree (str);
1061
1062 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1063
1064 FFStartPrint(ffstring, afp->format, 0, 12, "SOURCE", 12, 5, 5, "OS", TRUE);
1065 FFAddTextToString(ffstring, NULL, taxname, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1066 if ( StringICmp(taxname, common) != 0 ) {
1067 FFAddTextToString(ffstring, " (", common, ")", FALSE, FALSE, TILDE_TO_SPACES);
1068 }
1069 }
1070
1071 str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 0, 5, "OS");
1072 FFRecycleString(ajp, ffstring);
1073 return str;
1074 }
1075
1076 NLM_EXTERN CharPtr FormatOrganismBlock (
1077 Asn2gbFormatPtr afp,
1078 BaseBlockPtr bbp
1079 )
1080
1081 {
1082 IntAsn2gbJobPtr ajp;
1083 Asn2gbSectPtr asp;
1084 BioSourcePtr biop = NULL;
1085 Char ch;
1086 CharPtr common = NULL;
1087 DbtagPtr dbt;
1088 SeqMgrDescContext dcontext;
1089 SeqMgrFeatContext fcontext;
1090 GBSeqPtr gbseq;
1091 Uint1 genome;
1092 CharPtr lineage = NULL;
1093 ObjectIdPtr oip;
1094 OrgModPtr omp;
1095 OrgNamePtr onp;
1096 CharPtr organelle = NULL;
1097 OrgRefPtr orp;
1098 SeqDescrPtr sdp;
1099 SeqFeatPtr sfp;
1100 CharPtr str;
1101 Int4 taxid = -1;
1102 CharPtr taxname = NULL;
1103 CharPtr tmp;
1104 CharPtr ptr;
1105 ValNodePtr vnp;
1106 StringItemPtr ffstring, temp;
1107 Char buf [16];
1108
1109 if (afp == NULL || bbp == NULL) return NULL;
1110 ajp = afp->ajp;
1111 if (ajp == NULL) return NULL;
1112 asp = afp->asp;
1113 if (asp == NULL) return NULL;
1114
1115
1116 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
1117
1118 if (bbp->itemtype == OBJ_SEQDESC) {
1119 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
1120 if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
1121 biop = (BioSourcePtr) sdp->data.ptrvalue;
1122 }
1123 } else if (bbp->itemtype == OBJ_SEQFEAT) {
1124 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
1125 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
1126 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
1127 }
1128 }
1129 if (biop != NULL) {
1130 genome = biop->genome;
1131 if (genome <= 22) {
1132 organelle = organellePrefix [genome];
1133 }
1134 orp = biop->org;
1135 if (orp != NULL) {
1136 taxname = orp->taxname;
1137 common = orp->common;
1138 onp = orp->orgname;
1139 if (onp != NULL) {
1140 lineage = onp->lineage;
1141 if (StringHasNoText (lineage)) {
1142 for (omp = onp->mod; omp != NULL; omp = omp->next) {
1143 if (omp->subtype == ORGMOD_old_lineage) {
1144 lineage = omp->subname;
1145 }
1146 }
1147 }
1148 }
1149 for (vnp = orp->db; vnp != NULL; vnp = vnp->next) {
1150 dbt = (DbtagPtr) vnp->data.ptrvalue;
1151 if (dbt == NULL) continue;
1152 if (StringCmp (dbt->db, "taxon") == 0) {
1153 oip = dbt->tag;
1154 if (oip != NULL) {
1155 taxid = oip->id;
1156 }
1157 }
1158 }
1159 }
1160 }
1161
1162 /* If the organelle prefix is already on the */
1163 /* name, don't add it. */
1164
1165 if (StringNCmp (organelle, taxname, StringLen (organelle)) == 0)
1166 organelle = "";
1167
1168 if (StringHasNoText (common)) {
1169 common = taxname;
1170 }
1171 if (StringHasNoText (common)) {
1172 common = "Unknown.";
1173 }
1174 if (StringHasNoText (taxname)) {
1175 taxname = "Unknown.";
1176 }
1177 if (StringHasNoText (lineage)) {
1178 lineage = "Unclassified.";
1179 }
1180
1181 ffstring = FFGetString(ajp);
1182 temp = FFGetString(ajp);
1183 if ( ffstring == NULL || temp == NULL ) return NULL;
1184
1185 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
1186
1187 FFStartPrint(temp, afp->format, 2, 12, "ORGANISM", 12, 5, 5, "OC", FALSE);
1188 if (! ajp->newSourceOrg) {
1189 FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1190 }
1191 if (StringNICmp (taxname, "Unknown", 7) != 0) {
1192 if ( GetWWW(ajp) ) {
1193 if (taxid != -1) {
1194 FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1195 FF_Add_NCBI_Base_URL (temp, link_tax);
1196 FFAddOneString(temp, "id=", FALSE, FALSE, TILDE_IGNORE);
1197 sprintf (buf, "%ld", (long) taxid);
1198 FFAddOneString(temp, buf, FALSE, FALSE, TILDE_IGNORE);
1199 FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1200 } else {
1201 FFAddOneString(temp, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1202 FF_Add_NCBI_Base_URL (temp, link_tax);
1203 FFAddOneString(temp, "name=", FALSE, FALSE, TILDE_IGNORE);
1204 tmp = StringSave (taxname);
1205 if (tmp != NULL) {
1206 ptr = tmp;
1207 ch = *ptr;
1208 while (ch != '\0') {
1209 if (IS_WHITESP (ch)) {
1210 *ptr = '+';
1211 }
1212 ptr++;
1213 ch = *ptr;
1214 }
1215 FFAddOneString(temp, tmp, FALSE, FALSE, TILDE_IGNORE);
1216 MemFree (tmp);
1217 }
1218 FFAddOneString(temp, "\">", FALSE, FALSE, TILDE_IGNORE);
1219 }
1220 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1221 FFAddOneString(temp, "</a>", FALSE, FALSE, TILDE_IGNORE);
1222 } else {
1223 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1224 }
1225 } else {
1226 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1227 }
1228 FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1229 FFRecycleString(ajp, temp);
1230
1231 temp = FFGetString(ajp);
1232 FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1233 FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1234 FFLineWrap(ffstring, temp, 12, 12, ASN2FF_GB_MAX, NULL);
1235 FFRecycleString(ajp, temp);
1236 /* optionally populate gbseq for XML-ized GenBank format */
1237
1238 if (ajp->gbseq) {
1239 gbseq = &asp->gbseq;
1240 } else {
1241 gbseq = NULL;
1242 }
1243
1244 if (gbseq != NULL) {
1245 temp = FFGetString(ajp);
1246 if (! ajp->newSourceOrg) {
1247 FFAddOneString(temp, organelle, FALSE, FALSE, TILDE_IGNORE);
1248 }
1249 FFAddOneString(temp, taxname, FALSE, FALSE, TILDE_IGNORE);
1250 gbseq->organism = FFToCharPtr(temp);
1251 gbseq->taxonomy = StringSave (lineage);
1252 FFRecycleString(ajp, temp);
1253 }
1254
1255 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
1256 FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OC", FALSE);
1257 FFAddTextToString(temp, NULL, lineage, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1258 FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OC");
1259 FFRecycleString(ajp, temp);
1260 if ( !StringHasNoText(organelle) ) {
1261 temp = FFGetString(ajp);
1262 if ( temp != NULL ) {
1263 FFStartPrint(temp, afp->format, 12, 12, NULL, 0, 5, 5, "OG", FALSE);
1264 FFAddTextToString(temp, NULL, organelle, NULL, TRUE, FALSE, TILDE_TO_SPACES);
1265 FFLineWrap(ffstring, temp, 5, 5, ASN2FF_EMBL_MAX, "OG");
1266 FFRecycleString(ajp, temp);
1267 }
1268 }
1269 }
1270
1271 str = FFToCharPtr(ffstring);
1272 FFRecycleString(ajp, ffstring);
1273 return str;
1274 }
1275
1276 /* A tilde is not an EOL if it is found in a string of the form: */
1277 /* /~alpahnumdot/ where alphanumdot is either alpha numeric or '.' */
1278 /* */
1279 /* str points to the tilde in question. */
1280 static Boolean IsTildeEOL(CharPtr str) {
1281 CharPtr ptr;
1282
1283 if ( *(str - 1) != '/' ) return TRUE;
1284
1285 ++str;
1286
1287
1288 for ( ptr = str;
1289 IS_ALPHANUM(*ptr) || *ptr == '_' || *ptr == '-' || *ptr == '.';
1290 ++ptr) continue;
1291
1292 return *ptr == '/' ? FALSE : TRUE;
1293 }
1294
1295 /* returns a pointer to the first character past the url */
1296 static CharPtr FindUrlEnding(CharPtr str) {
1297 CharPtr ptr;
1298
1299 for ( ptr = str;
1300 !IS_WHITESP(*ptr) && *ptr != '\0' && *ptr != '(' && *ptr != '\"';
1301 ++ptr ) {
1302 if ( *ptr == '~' ) {
1303 if ( IsTildeEOL(ptr) ) break;
1304 }
1305 }
1306
1307 --ptr;
1308
1309 /* back up over any trailing periods, commas, or parentheses */
1310 while ( (*ptr == '.') || (*ptr == ',') || (*ptr == ')') ) {
1311 --ptr;
1312 }
1313
1314 ++ptr;
1315
1316 return ptr;
1317 }
1318
1319 static Boolean CommentHasSuspiciousHtml (
1320 IntAsn2gbJobPtr ajp,
1321 CharPtr searchString
1322 )
1323
1324 {
1325 Char ch;
1326 CharPtr ptr;
1327 Int4 state;
1328 ValNodePtr matches;
1329
1330 if (StringHasNoText (searchString)) return FALSE;
1331
1332 state = 0;
1333 ptr = searchString;
1334 ch = *ptr;
1335
1336 while (ch != '\0') {
1337 matches = NULL;
1338 ch = TO_LOWER (ch);
1339 state = TextFsaNext (ajp->bad_html_fsa, state, ch, &matches);
1340 if (matches != NULL) {
1341 return TRUE;
1342 }
1343 ptr++;
1344 ch = *ptr;
1345 }
1346
1347 return FALSE;
1348 }
1349
1350 NLM_EXTERN void AddCommentWithURLlinks (
1351 IntAsn2gbJobPtr ajp,
1352 StringItemPtr ffstring,
1353 CharPtr prefix,
1354 CharPtr str,
1355 CharPtr suffix
1356 )
1357
1358 {
1359 Char ch;
1360 CharPtr ptr;
1361
1362 if (GetWWW (ajp) && CommentHasSuspiciousHtml (ajp, str)) {
1363 if (prefix != NULL) {
1364 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1365 }
1366 AddCommentStringWithTildes (ffstring, str);
1367 if (suffix != NULL) {
1368 FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1369 }
1370 return;
1371 }
1372
1373 while (! StringHasNoText (str)) {
1374 ptr = StringStr (str, "http://");
1375 if (ptr == NULL) {
1376 ptr = StringStr (str, "https://");
1377 }
1378 if (ptr == NULL) {
1379 if (prefix != NULL) {
1380 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1381 }
1382 AddCommentStringWithTildes (ffstring, str);
1383 if (suffix != NULL) {
1384 FFAddOneString(ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1385 }
1386 return;
1387 }
1388
1389 *ptr = '\0';
1390 AddCommentStringWithTildes (ffstring, str);
1391 *ptr = 'h';
1392
1393 str = ptr;
1394 ptr = FindUrlEnding(str);
1395
1396
1397 ch = *ptr;
1398 *ptr = '\0';
1399 if ( GetWWW(ajp) ) {
1400 FFAddTextToString(ffstring, "<a href=\"", str, "\">", FALSE, FALSE, TILDE_IGNORE);
1401 FFAddTextToString(ffstring, NULL, str, "</a>", FALSE, FALSE, TILDE_IGNORE);
1402 } else {
1403 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
1404 }
1405
1406 *ptr = ch;
1407 str = ptr;
1408 }
1409 }
1410
1411 static void CatenateCommentInGbseq (
1412 GBSeqPtr gbseq,
1413 CharPtr str,
1414 Boolean compress
1415 )
1416
1417 {
1418 Char ch;
1419 CharPtr tmp;
1420
1421 if (gbseq == NULL || StringHasNoText (str)) return;
1422
1423 if (StringNCmp (str, "COMMENT ", 12) == 0) {
1424 str += 12;
1425 }
1426 if (gbseq->comment == NULL) {
1427 gbseq->comment = StringSave (str);
1428 } else {
1429 tmp = (CharPtr) MemNew (StringLen (gbseq->comment) + StringLen (str) + 4);
1430 StringCpy (tmp, gbseq->comment);
1431 StringCat (tmp, "; ");
1432 StringCat (tmp, str);
1433 gbseq->comment = MemFree (gbseq->comment);
1434 gbseq->comment = tmp;
1435 }
1436
1437 tmp = gbseq->comment;
1438 if (tmp == NULL) return;
1439 ch = *tmp;
1440 while (ch != '\0') {
1441 if (ch == '\n' || ch == '\r' || ch == '\t') {
1442 *tmp = ' ';
1443 }
1444 tmp++;
1445 ch = *tmp;
1446 }
1447 TrimSpacesAroundString (gbseq->comment);
1448 if (compress) {
1449 Asn2gnbkCompressSpaces (gbseq->comment);
1450 }
1451 }
1452
1453
1454 NLM_EXTERN CharPtr FormatCommentBlock (
1455 Asn2gbFormatPtr afp,
1456 BaseBlockPtr bbp
1457 )
1458
1459 {
1460 Boolean add_period;
1461 IntAsn2gbJobPtr ajp;
1462 Asn2gbSectPtr asp;
1463 CommentBlockPtr cbp;
1464 CharPtr db;
1465 DbtagPtr dbt;
1466 SeqMgrDescContext dcontext;
1467 SeqMgrFeatContext fcontext;
1468 GBSeqPtr gbseq;
1469 size_t len;
1470 ObjectIdPtr oip;
1471 CharPtr prefix;
1472 SeqDescrPtr sdp;
1473 SeqFeatPtr sfp;
1474 Char sfx [32];
1475 CharPtr str;
1476 CharPtr suffix;
1477 CharPtr title;
1478 StringItemPtr ffstring;
1479
1480 if (afp == NULL || bbp == NULL) return NULL;
1481 ajp = afp->ajp;
1482 if (ajp == NULL) return NULL;
1483 asp = afp->asp;
1484 if (asp == NULL) return NULL;
1485
1486 cbp = (CommentBlockPtr) bbp;
1487
1488 /* optionally populate gbseq for XML-ized GenBank format */
1489
1490 if (ajp->gbseq) {
1491 gbseq = &asp->gbseq;
1492 } else {
1493 gbseq = NULL;
1494 }
1495
1496 /* some comments are allocated (along with possible first COMMENT label) */
1497
1498 if (! StringHasNoText (bbp->string)) {
1499 str = StringSave (bbp->string);
1500 CatenateCommentInGbseq (gbseq, str, TRUE);
1501 return str;
1502 }
1503
1504 title = NULL;
1505 prefix = NULL;
1506 suffix = NULL;
1507 add_period = FALSE;
1508 sfx [0] = '\0';
1509
1510 if (bbp->itemtype == OBJ_SEQDESC) {
1511
1512 /* usually should reference comment, maploc, or region descriptor IDs */
1513
1514 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
1515 if (sdp != NULL) {
1516
1517 if (dcontext.seqdesctype == Seq_descr_comment) {
1518
1519 title = (CharPtr) sdp->data.ptrvalue;
1520
1521 } else if (dcontext.seqdesctype == Seq_descr_maploc) {
1522
1523 dbt = (DbtagPtr) sdp->data.ptrvalue;
1524 if (dbt != NULL) {
1525 db = dbt->db;
1526 oip = dbt->tag;
1527 if (oip != NULL) {
1528 if (oip->str != NULL) {
1529
1530 title = oip->str;
1531 prefix = ("Map location: ");
1532
1533 } else if (db != NULL && oip->id != 0) {
1534
1535 title = db;
1536 prefix = ("Map location: (Database ");
1537 sprintf (sfx, "; id # %ld).", (long) oip->id);
1538 suffix = sfx;
1539
1540 }
1541 }
1542 }
1543
1544 } else if (dcontext.seqdesctype == Seq_descr_region) {
1545
1546 title = (CharPtr) sdp->data.ptrvalue;
1547 prefix = "Region: ";
1548
1549 } else if (dcontext.seqdesctype == Seq_descr_name) {
1550
1551 title = (CharPtr) sdp->data.ptrvalue;
1552 prefix = "Name: ";
1553
1554 }
1555 }
1556
1557 } else if (bbp->itemtype == OBJ_SEQFEAT) {
1558
1559 /* also have to deal with comment feature across entire sequence */
1560
1561 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
1562 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_COMMENT) {
1563
1564 title = sfp->comment;
1565 }
1566 }
1567
1568 if (title == NULL) return NULL;
1569
1570 ffstring = FFGetString(ajp);
1571 if ( ffstring == NULL ) return NULL;
1572
1573 if (cbp->first) {
1574 FFStartPrint (ffstring, afp->format, 0, 12, "COMMENT", 12, 5, 5, "CC", TRUE);
1575 } else {
1576 FFStartPrint (ffstring, afp->format, 0, 12, NULL, 12, 5, 5, "CC", FALSE);
1577 }
1578
1579 str = StringSave (title);
1580 TrimSpacesAndJunkFromEnds (str, TRUE);
1581 if (! IsEllipsis (str)) {
1582 s_RemovePeriodFromEnd (str);
1583 len = StringLen (str);
1584 if (len > 0 && str [len - 1] != '.') {
1585 add_period = TRUE;
1586 }
1587 }
1588 AddCommentWithURLlinks(ajp, ffstring, prefix, str, suffix);
1589 /*
1590 if ( GetWWW(ajp) && prefix == NULL && suffix == NULL) {
1591
1592 AddCommentWithURLlinks (ffstring, str);
1593 } else {
1594 FFAddTextToString (ffstring, prefix, str, suffix, FALSE, TRUE, TILDE_OLD_EXPAND);
1595 }
1596 */
1597 if (add_period) {
1598 FFAddOneChar (ffstring, '.',FALSE);
1599 }
1600 MemFree (str);
1601
1602 str = FFEndPrint(ajp, ffstring, afp->format, 12, 12, 5, 5, "CC");
1603
1604 /*
1605 CatenateCommentInGbseq (gbseq, str);
1606 */
1607 CatenateCommentInGbseq (gbseq, title, FALSE);
1608
1609 FFRecycleString(ajp, ffstring);
1610 return str;
1611 }
1612
1613 /* format features section */
1614
1615 static Boolean is_real_id (
1616 SeqIdPtr sip,
1617 SeqIdPtr this_sip
1618 )
1619
1620 {
1621 BioseqPtr bsp;
1622
1623 if (sip == NULL || this_sip == NULL) return FALSE;
1624
1625 if (! SeqIdIn (sip, this_sip)) {
1626 bsp = BioseqFind (sip);
1627 if (bsp == NULL) return TRUE; /* ??? */
1628 if (bsp->repr == Seq_repr_virtual) return FALSE;
1629 }
1630
1631 return TRUE;
1632 }
1633
1634 static Boolean FlatVirtLoc (
1635 BioseqPtr bsp,
1636 SeqLocPtr location
1637 )
1638
1639 {
1640 SeqIntPtr sintp;
1641 SeqIdPtr sip;
1642 SeqPntPtr spp;
1643
1644 if (bsp == NULL || location == NULL) return FALSE;
1645
1646 switch (location->choice) {
1647 case SEQLOC_WHOLE :
1648 sip = (SeqIdPtr) location->data.ptrvalue;
1649 if (sip == NULL) return TRUE;
1650 if (! is_real_id (sip, bsp->id)) return TRUE;
1651 break;
1652 case SEQLOC_INT :
1653 sintp = (SeqIntPtr) location->data.ptrvalue;
1654 if (sintp == NULL) return TRUE;
1655 sip = sintp->id;
1656 if (sip == NULL) return TRUE;
1657 if (! is_real_id (sip, bsp->id)) return TRUE;
1658 break;
1659 case SEQLOC_PNT :
1660 spp = (SeqPntPtr) location->data.ptrvalue;
1661 if (spp == NULL) return TRUE;
1662 sip = spp->id;
1663 if (sip == NULL) return TRUE;
1664 if (! is_real_id (sip, bsp->id)) return TRUE;
1665 break;
1666 default :
1667 break;
1668 }
1669
1670 return FALSE;
1671 }
1672
1673 static Uint1 id_order [NUM_SEQID];
1674 static Boolean order_initialized = FALSE;
1675
1676 static CharPtr lim_str [5] = { "", ">","<", ">", "<" };
1677
1678 NLM_EXTERN Boolean GetAccnVerFromServer (Int4 gi, CharPtr buf)
1679
1680 {
1681 AccnVerLookupFunc func;
1682 SeqMgrPtr smp;
1683 CharPtr str;
1684
1685 if (buf == NULL) return FALSE;
1686 *buf = '\0';
1687 smp = SeqMgrWriteLock ();
1688 if (smp == NULL) return FALSE;
1689 func = smp->accn_ver_lookup_func;
1690 SeqMgrUnlock ();
1691 if (func == NULL) return FALSE;
1692 str = (*func) (gi);
1693 if (str == NULL) return FALSE;
1694 if (StringLen (str) < 40) {
1695 StringCpy (buf, str);
1696 }
1697 MemFree (str);
1698 return TRUE;
1699 }
1700
1701
1702 /******************************************************************************/
1703 /* FFFlatLoc functions . */
1704 /******************************************************************************/
1705
1706 static Boolean FF_FlatNullAhead (
1707 BioseqPtr bsp,
1708 ValNodePtr location
1709 )
1710
1711 {
1712 SeqLocPtr next;
1713
1714 if (bsp == NULL || location == NULL) return FALSE;
1715
1716 next = location->next;
1717 if (next == NULL) return TRUE;
1718 if (next->choice == SEQLOC_NULL) return TRUE;
1719 if (FlatVirtLoc (bsp, next)) return TRUE;
1720
1721 return FALSE;
1722 }
1723
1724
1725
1726 static void FlatLocSeqId (
1727 IntAsn2gbJobPtr ajp,
1728 StringItemPtr ffstring,
1729 SeqIdPtr sip
1730 )
1731
1732 {
1733 BioseqPtr bsp;
1734 Char buf [40];
1735 ObjectIdPtr oip;
1736 SeqIdPtr use_id = NULL;
1737 Boolean was_lock = FALSE;
1738
1739 if (ffstring == NULL || sip == NULL) return;
1740
1741 buf [0] = '\0';
1742 bsp = BioseqFind (sip);
1743 if (bsp != NULL) {
1744 use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
1745 } else if (sip->choice == SEQID_GI) {
1746 if (GetAccnVerFromServer (sip->data.intvalue, buf)) {
1747 FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
1748 /*AddValNodeString (head, NULL, buf, ":");*/
1749 return;
1750 }
1751 use_id = GetSeqIdForGI (sip->data.intvalue);
1752 }
1753 if (use_id == NULL && bsp == NULL) {
1754 bsp = BioseqLockById (sip);
1755 was_lock = TRUE;
1756 if (bsp != NULL) {
1757 use_id = SeqIdSelect (bsp->id, id_order, NUM_SEQID);
1758 }
1759 }
1760 if (use_id != NULL) {
1761 SeqIdWrite (use_id, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
1762 if (use_id->choice == SEQID_GI) {
1763 ajp->relModeError = TRUE;
1764 }
1765 } else if (sip->choice == SEQID_GI) {
1766 SeqIdWrite (sip, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
1767 ajp->relModeError = TRUE;
1768 } else {
1769 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
1770 if (sip->choice == SEQID_GI) {
1771 ajp->relModeError = TRUE;
1772 }
1773 }
1774 if (was_lock) {
1775 BioseqUnlock (bsp);
1776 }
1777 if (StringHasNoText (buf)) {
1778 StringCpy (buf, "?00000");
1779 ajp->relModeError = TRUE;
1780 if (use_id != NULL && use_id->choice == SEQID_LOCAL) {
1781 oip = (ObjectIdPtr) use_id->data.ptrvalue;
1782 if (oip != NULL && (! StringHasNoText (oip->str))) {
1783 StringNCpy_0 (buf, oip->str, 13);
1784 }
1785 }
1786 }
1787 FFAddTextToString(ffstring, NULL, buf, ":", FALSE, FALSE, TILDE_IGNORE);
1788 }
1789
1790
1791
1792 static void FlatLocCaret (
1793 IntAsn2gbJobPtr ajp,
1794 StringItemPtr ffstring,
1795 SeqIdPtr sip,
1796 SeqIdPtr this_sip,
1797 Int4 point,
1798 IntFuzzPtr fuzz
1799 )
1800
1801 {
1802 Char buf [128];
1803 Uint1 index;
1804
1805 if (ffstring == NULL) return;
1806
1807 if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
1808 FlatLocSeqId (ajp, ffstring, sip);
1809 }
1810
1811 buf [0] = '\0';
1812 point++; /* orginal FlatLocHalfCaret was called with point + 1 */
1813
1814 if (fuzz != NULL) {
1815 switch (fuzz->choice) {
1816 case 1 :
1817 sprintf (buf, "(%ld.%ld)..(%ld.%ld)",
1818 (long) (point - fuzz->a),
1819 (long) point,
1820 (long) point,
1821 (long) (point + fuzz->a));
1822 break;
1823 case 2 :
1824 sprintf (buf, "%ld^%ld",
1825 (long) (1 + fuzz->b),
1826 (long) (1 + fuzz->a));
1827 break;
1828 case 3 :
1829 sprintf (buf, "%ld^%ld",
1830 (long) (point - point * ((double) fuzz->a / 1000.0)),
1831 (long) (point + point * ((double) fuzz->a / 1000.0)));
1832 break;
1833 case 4 :
1834 if (fuzz->a == 3) { /* space to right */
1835 sprintf (buf, "%ld^%ld", (long) (point), (long) (point + 1));
1836 } else if (fuzz->a == 4 && point > 1) { /* space to left */
1837 sprintf (buf, "%ld^%ld", (long) (point - 1), (long) point);
1838 } else {
1839 index = (Uint1) fuzz->a;
1840 if (index > 4) {
1841 index = 0;
1842 }
1843 sprintf (buf, "%s%ld", lim_str [index], (long) point);
1844 }
1845 break;
1846 default :
1847 sprintf (buf, "%ld", (long) point);
1848 break;
1849 }
1850 } else {
1851 sprintf (buf, "%ld", (long) point);
1852 }
1853
1854 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
1855 }
1856
1857
1858 static void FlatLocPoint (
1859 IntAsn2gbJobPtr ajp,
1860 StringItemPtr ffstring,
1861 SeqIdPtr sip,
1862 SeqIdPtr this_sip,
1863 Int4 point,
1864 IntFuzzPtr fuzz
1865 )
1866
1867 {
1868 Char buf [128];
1869 Uint1 index;
1870
1871 if (ffstring == NULL) return;
1872
1873 if (sip != NULL && (! SeqIdIn (sip, this_sip))) {
1874 FlatLocSeqId (ajp, ffstring, sip);
1875 }
1876
1877 buf [0] = '\0';
1878 point++;
1879
1880 if (fuzz != NULL) {
1881 switch (fuzz->choice) {
1882 case 1 :
1883 sprintf (buf, "(%ld.%ld)",
1884 (long) (point - fuzz->a),
1885 (long) (point + fuzz->a));
1886 break;
1887 case 2 :
1888 sprintf (buf, "(%ld.%ld)",
1889 (long) (1 + fuzz->b),
1890 (long) (1 + fuzz->a));
1891 break;
1892 case 3 :
1893 sprintf (buf, "(%ld.%ld)",
1894 (long) (point - point * ((double) fuzz->a / 1000.0)),
1895 (long) (point + point * ((double) fuzz->a / 1000.0)));
1896 break;
1897 case 4 :
1898 index = (Uint1) fuzz->a;
1899 if (index > 4) {
1900 index = 0;
1901 }
1902 sprintf (buf, "%s%ld", lim_str [index], (long) point);
1903 break;
1904 default :
1905 sprintf (buf, "%ld", (long) point);
1906 break;
1907 }
1908 } else {
1909 sprintf (buf, "%ld", (long) point);
1910 }
1911
1912 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
1913 }
1914
1915
1916 static void FlatLocElement (
1917 IntAsn2gbJobPtr ajp,
1918 StringItemPtr ffstring,
1919 BioseqPtr bsp,
1920 SeqLocPtr location
1921 )
1922
1923 {
1924 Boolean minus_strand = FALSE;
1925 SeqBondPtr sbp;
1926 SeqIntPtr sintp;
1927 SeqIdPtr sip;
1928 SeqPntPtr spp;
1929 BioseqPtr wholebsp;
1930
1931 if (ffstring == NULL || bsp == NULL || location == NULL) return;
1932
1933 switch (location->choice) {
1934 case SEQLOC_WHOLE :
1935 sip = (SeqIdPtr) location->data.ptrvalue;
1936 if (sip == NULL) return;
1937 wholebsp = BioseqFind (sip);
1938 if (wholebsp == NULL) return;
1939 if (is_real_id (sip, bsp->id)) {
1940 FlatLocPoint (ajp, ffstring, sip, bsp->id, 0, NULL);
1941 if (bsp->length > 0) {
1942 FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
1943 FlatLocPoint (ajp, ffstring, NULL, bsp->id, bsp->length - 1, NULL);
1944 }
1945 }
1946 break;
1947 case SEQLOC_INT :
1948 sintp = (SeqIntPtr) location->data.ptrvalue;
1949 if (sintp == NULL) return;
1950 sip = sintp->id;
1951 if (sip == NULL) return;
1952 if (is_real_id (sip, bsp->id)) {
1953 minus_strand = (Boolean) (sintp->strand == Seq_strand_minus);
1954 if (minus_strand) {
1955 FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
1956 }
1957 FlatLocPoint (ajp, ffstring, sip, bsp->id, sintp->from, sintp->if_from);
1958 if (sintp->to > 0 &&
1959 (sintp->to != sintp->from ||
1960 sintp->if_from != NULL ||
1961 sintp->if_to != NULL)) {
1962 FFAddOneString(ffstring, "..", FALSE, FALSE, TILDE_IGNORE);
1963 FlatLocPoint (ajp, ffstring, NULL, bsp->id, sintp->to, sintp->if_to);
1964 }
1965 if (minus_strand) {
1966 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
1967 }
1968 }
1969 break;
1970 case SEQLOC_PNT :
1971 spp = (SeqPntPtr) location->data.ptrvalue;
1972 if (spp == NULL) return;
1973 sip = spp->id;
1974 if (sip == NULL) return;
1975 if (is_real_id (sip, bsp->id)) {
1976 minus_strand = (Boolean) (spp->strand == Seq_strand_minus);
1977 if (minus_strand) {
1978 FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
1979 }
1980 if (spp->fuzz != NULL) {
1981 FlatLocCaret (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
1982 } else {
1983 FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, NULL);
1984 }
1985 if (minus_strand) {
1986 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
1987 }
1988 }
1989 break;
1990 case SEQLOC_BOND :
1991 sbp = (SeqBondPtr) location->data.ptrvalue;
1992 if (sbp == NULL) return;
1993 spp = sbp->a;
1994 if (spp == NULL) return;
1995 sip = spp->id;
1996 if (sip == NULL) return;
1997 FFAddOneString(ffstring, "bond(", FALSE, FALSE, TILDE_IGNORE);
1998 FlatLocPoint (ajp, ffstring, sip, bsp->id, spp->point, spp->fuzz);
1999 spp = sbp->b;
2000 if (spp != NULL) {
2001 FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2002 FlatLocPoint (ajp, ffstring, NULL, bsp->id, spp->point, spp->fuzz);
2003 }
2004 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2005 break;
2006 default :
2007 /* unexpected internal complex type or unimplemented SEQLOC_FEAT */
2008 return;
2009 }
2010 }
2011
2012
2013
2014 static void FF_FlatPackedPoint (
2015 IntAsn2gbJobPtr ajp,
2016 StringItemPtr ffstring,
2017 PackSeqPntPtr pspp,
2018 BioseqPtr bsp
2019 )
2020
2021 {
2022 Uint1 dex;
2023
2024 if (ffstring == NULL || pspp == NULL || bsp == NULL) return;
2025
2026 for (dex = 0; dex < pspp->used; dex++) {
2027 FlatLocPoint (ajp, ffstring, pspp->id, bsp->id, pspp->pnts [dex], pspp->fuzz);
2028 }
2029 }
2030
2031
2032 static void FF_DoFlatLoc (
2033 IntAsn2gbJobPtr ajp,
2034 StringItemPtr ffstring,
2035 BioseqPtr bsp,
2036 SeqLocPtr location,
2037 Boolean ok_to_complement
2038 );
2039
2040 static void FF_GroupFlatLoc (
2041 IntAsn2gbJobPtr ajp,
2042 StringItemPtr ffstring,
2043 BioseqPtr bsp,
2044 SeqLocPtr location,
2045 CharPtr prefix,
2046 Boolean is_flat_order
2047 )
2048
2049 {
2050 Boolean found_non_virt = FALSE;
2051 SeqIdPtr hold_next;
2052 Int2 parens = 1;
2053 PackSeqPntPtr pspp;
2054 SeqLocPtr slp;
2055 Boolean special_mode = FALSE; /* join in order */
2056
2057 if (ffstring == NULL || bsp == NULL || location == NULL) return;
2058
2059 /* prefix will have the first parenthesis */
2060
2061 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2062
2063 for (slp = (SeqLocPtr) location->data.ptrvalue; slp != NULL; slp = slp->next) {
2064
2065 if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) {
2066 if (slp != location && slp->next != NULL) {
2067 if (special_mode) {
2068 special_mode = FALSE;
2069 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2070 parens--;
2071 }
2072 }
2073 continue;
2074 }
2075
2076 if (found_non_virt && slp->choice != SEQLOC_EMPTY && slp->choice != SEQLOC_NULL) {
2077 FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2078 }
2079
2080 switch (slp->choice) {
2081 case SEQLOC_WHOLE :
2082 case SEQLOC_PNT :
2083 case SEQLOC_BOND :
2084 case SEQLOC_FEAT :
2085 found_non_virt = TRUE;
2086 if (FlatVirtLoc (bsp, slp)) {
2087 if (slp != location && slp->next != NULL) {
2088 if (special_mode) {
2089 special_mode = FALSE;
2090 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2091 parens--;
2092 }
2093 }
2094 } else {
2095 FlatLocElement (ajp, ffstring, bsp, slp);
2096 }
2097 break;
2098 case SEQLOC_INT :
2099 found_non_virt = TRUE;
2100 if (is_flat_order && (! FF_FlatNullAhead (bsp, slp))) {
2101 special_mode = TRUE;
2102 FFAddOneString(ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
2103 parens++;
2104 }
2105 FlatLocElement (ajp, ffstring, bsp, slp);
2106 break;
2107 case SEQLOC_PACKED_PNT :
2108 found_non_virt = TRUE;
2109 pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2110 if (pspp != NULL) {
2111 FF_FlatPackedPoint (ajp, ffstring, pspp, bsp);
2112 }
2113 break;
2114 case SEQLOC_PACKED_INT :
2115 case SEQLOC_MIX :
2116 case SEQLOC_EQUIV :
2117 found_non_virt = TRUE;
2118 hold_next = slp->next;
2119 slp->next = NULL;
2120 FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE);
2121 slp->next = hold_next;
2122 break;
2123 default :
2124 break;
2125 }
2126
2127 }
2128
2129 while (parens > 0) {
2130 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2131 parens--;
2132 }
2133 }
2134
2135
2136
2137
2138 static void FF_DoFlatLoc (
2139 IntAsn2gbJobPtr ajp,
2140 StringItemPtr ffstring,
2141 BioseqPtr bsp,
2142 SeqLocPtr location,
2143 Boolean ok_to_complement
2144 )
2145
2146 {
2147 Boolean found_null;
2148 SeqLocPtr next_loc;
2149 PackSeqPntPtr pspp;
2150 SeqLocPtr slp;
2151
2152 if (ffstring == NULL || bsp == NULL || location == NULL) return;
2153
2154 /* deal with complement of entire location */
2155
2156 if (ok_to_complement && SeqLocStrand (location) == Seq_strand_minus) {
2157 slp = AsnIoMemCopy ((Pointer) location,
2158 (AsnReadFunc) SeqLocAsnRead,
2159 (AsnWriteFunc) SeqLocAsnWrite);
2160 if (slp != NULL) {
2161 SeqLocRevCmp (slp);
2162 FFAddOneString(ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
2163 FF_DoFlatLoc (ajp, ffstring, bsp, slp, FALSE);
2164 FFAddOneString(ffstring, ")", FALSE, FALSE, TILDE_IGNORE);
2165 }
2166 SeqLocFree (slp);
2167 return;
2168 }
2169
2170 /* handle each location component */
2171
2172 for (slp = location; slp != NULL; slp = slp->next) {
2173
2174 if (slp->choice == SEQLOC_NULL || FlatVirtLoc (bsp, slp)) continue;
2175
2176 /* print comma between components */
2177
2178 if (slp != location) {
2179 FFAddOneString(ffstring, ",", FALSE, FALSE, TILDE_IGNORE);
2180 }
2181
2182 switch (slp->choice) {
2183 case SEQLOC_MIX :
2184 case SEQLOC_PACKED_INT :
2185 found_null = FALSE;
2186 for (next_loc = (SeqLocPtr) slp->data.ptrvalue;
2187 next_loc != NULL;
2188 next_loc = next_loc->next) {
2189 if (next_loc->choice == SEQLOC_NULL ||
2190 FlatVirtLoc (bsp, next_loc) /* ||
2191 LocationHasNullsBetween (slp) */ )
2192 found_null = TRUE;
2193 }
2194 if (found_null) {
2195 FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "order(", TRUE);
2196 } else {
2197 FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "join(", FALSE);
2198 }
2199 break;
2200 case SEQLOC_EQUIV :
2201 FF_GroupFlatLoc (ajp, ffstring, bsp, slp, "one-of(", FALSE);
2202 break;
2203 case SEQLOC_PACKED_PNT :
2204 pspp = (PackSeqPntPtr) slp->data.ptrvalue;
2205 if (pspp != NULL) {
2206 FF_FlatPackedPoint (ajp, ffstring, pspp, bsp);
2207 }
2208 break;
2209 default :
2210 FlatLocElement (ajp, ffstring, bsp, slp);
2211 break;
2212 }
2213
2214 }
2215 }
2216
2217
2218
2219
2220 NLM_EXTERN CharPtr FFFlatLoc (
2221 IntAsn2gbJobPtr ajp,
2222 BioseqPtr bsp,
2223 SeqLocPtr location,
2224 Boolean masterStyle
2225 )
2226
2227 {
2228 Boolean hasNulls;
2229 IntFuzzPtr fuzz = NULL;
2230 SeqLocPtr loc;
2231 Boolean noLeft;
2232 Boolean noRight;
2233 Uint1 num = 1;
2234 SeqPntPtr spp;
2235 CharPtr str;
2236 SeqLocPtr tmp;
2237 StringItemPtr ffstring = NULL;
2238
2239 if (bsp == NULL || location == NULL) return NULL;
2240
2241 ffstring = FFGetString(ajp);
2242
2243 if (! order_initialized) {
2244 id_order [SEQID_GENBANK] = num++;
2245 id_order [SEQID_EMBL] = num++;
2246 id_order [SEQID_DDBJ] = num++;
2247 id_order [SEQID_OTHER] = num++;
2248 id_order [SEQID_TPG] = num++;
2249 id_order [SEQID_TPE] = num++;
2250 id_order [SEQID_TPD] = num++;
2251 id_order [SEQID_GPIPE] = num++;
2252 id_order [SEQID_GIBBSQ] = num++;
2253 id_order [SEQID_GIBBMT] = num++;
2254 id_order [SEQID_PRF] = num++;
2255 id_order [SEQID_PDB] = num++;
2256 id_order [SEQID_PIR] = num++;
2257 id_order [SEQID_SWISSPROT] = num++;
2258 id_order [SEQID_PATENT] = num++;
2259 id_order [SEQID_GI] = num++;;
2260 id_order [SEQID_GENERAL] = num++;
2261 id_order [SEQID_LOCAL] = num++;
2262 id_order [SEQID_GIIM] = num++;
2263 order_initialized = TRUE;
2264 }
2265
2266 if (masterStyle) {
2267
2268 /* map location from parts to segmented bioseq */
2269
2270 if (location->choice == SEQLOC_PNT) {
2271 spp = (SeqPntPtr) location->data.ptrvalue;
2272 if (spp != NULL) {
2273 fuzz = spp->fuzz;
2274 }
2275 }
2276
2277 CheckSeqLocForPartial (location, &noLeft, &noRight);
2278 hasNulls = LocationHasNullsBetween (location);
2279 loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE);
2280 if (loc == NULL) {
2281 tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
2282 loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE);
2283 SeqLocFree (tmp);
2284 }
2285 if (loc == NULL) {
2286 return StringSave ("?");
2287 }
2288 FreeAllFuzz (loc);
2289 SetSeqLocPartial (loc, noLeft, noRight);
2290
2291 if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
2292 spp = (SeqPntPtr) loc->data.ptrvalue;
2293 if (spp != NULL && spp->fuzz == NULL) {
2294 spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
2295 (AsnReadFunc) IntFuzzAsnRead,
2296 (AsnWriteFunc) IntFuzzAsnWrite);
2297 }
2298 }
2299
2300 FF_DoFlatLoc (ajp, ffstring, bsp, loc, TRUE);
2301
2302 SeqLocFree (loc);
2303
2304 } else {
2305 FF_DoFlatLoc (ajp, ffstring, bsp, location, TRUE);
2306 }
2307
2308 str = FFToCharPtr(ffstring);
2309 FFRecycleString(ajp, ffstring);
2310 return str;
2311 }
2312
2313
2314
2315
2316 static void PromoteSeqId (SeqIdPtr sip, Pointer userdata)
2317
2318 {
2319 SeqIdPtr bestid, newid, oldid;
2320
2321 bestid = (SeqIdPtr) userdata;
2322
2323 newid = SeqIdDup (bestid);
2324 if (newid == NULL) return;
2325
2326 oldid = ValNodeNew (NULL);
2327 if (oldid == NULL) return;
2328
2329 MemCopy (oldid, sip, sizeof (ValNode));
2330 oldid->next = NULL;
2331
2332 sip->choice = newid->choice;
2333 sip->data.ptrvalue = newid->data.ptrvalue;
2334
2335 SeqIdFree (oldid);
2336 ValNodeFree (newid);
2337
2338 SeqIdStripLocus (sip);
2339 }
2340
2341 NLM_EXTERN SeqLocPtr SeqLocReMapEx (
2342 SeqIdPtr newid,
2343 SeqLocPtr seq_loc,
2344 SeqLocPtr location,
2345 Int4 offset,
2346 Boolean rev,
2347 Boolean masterStyle
2348 )
2349
2350 {
2351 BioseqPtr bsp;
2352 Boolean hasNulls;
2353 IntFuzzPtr fuzz = NULL;
2354 SeqLocPtr loc;
2355 Boolean noLeft;
2356 Boolean noRight;
2357 SeqEntryPtr scope;
2358 SeqIdPtr sip;
2359 SeqLocPtr slp = NULL;
2360 SeqPntPtr spp;
2361 SeqLocPtr tmp;
2362
2363 if (newid == NULL || seq_loc == NULL || location == NULL) return NULL;
2364
2365 if (masterStyle) {
2366
2367 sip = SeqLocId (seq_loc);
2368 if (sip == NULL) return NULL;
2369 bsp = BioseqFind (sip);
2370 if (bsp == NULL) {
2371 scope = SeqEntrySetScope (NULL);
2372 bsp = BioseqFind (sip);
2373 SeqEntrySetScope (scope);
2374 }
2375 if (bsp == NULL) return NULL;
2376 sip = SeqIdFindBest (bsp->id, 0);
2377
2378 /* map location from parts to segmented bioseq */
2379
2380 if (location->choice == SEQLOC_PNT) {
2381 spp = (SeqPntPtr) location->data.ptrvalue;
2382 if (spp != NULL) {
2383 fuzz = spp->fuzz;
2384 }
2385 }
2386
2387 CheckSeqLocForPartial (location, &noLeft, &noRight);
2388 hasNulls = LocationHasNullsBetween (location);
2389 loc = SeqLocMergeExEx (bsp, location, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE);
2390 if (loc == NULL) {
2391 tmp = TrimLocInSegment (bsp, location, &noLeft, &noRight);
2392 loc = SeqLocMergeExEx (bsp, tmp, NULL, FALSE, TRUE, TRUE, hasNulls, FALSE, FALSE);
2393 SeqLocFree (tmp);
2394 }
2395 if (loc == NULL) {
2396 return NULL;
2397 }
2398 FreeAllFuzz (loc);
2399 SetSeqLocPartial (loc, noLeft, noRight);
2400
2401 if (loc->choice == SEQLOC_PNT && fuzz != NULL) {
2402 spp = (SeqPntPtr) loc->data.ptrvalue;
2403 if (spp != NULL && spp->fuzz == NULL) {
2404 spp->fuzz = AsnIoMemCopy ((Pointer) fuzz,
2405 (AsnReadFunc) IntFuzzAsnRead,
2406 (AsnWriteFunc) IntFuzzAsnWrite);
2407 }
2408 }
2409
2410 scope = SeqEntrySetScope (NULL);
2411 slp = SeqLocReMap (newid, seq_loc, loc, offset, rev);
2412 SeqEntrySetScope (scope);
2413
2414 SeqLocFree (loc);
2415
2416 VisitSeqIdsInSeqLoc (slp, (Pointer) sip, PromoteSeqId);
2417 } else {
2418
2419 scope = SeqEntrySetScope (NULL);
2420 slp = SeqLocReMap (newid, seq_loc, location, offset, rev);
2421 SeqEntrySetScope (scope);
2422 }
2423
2424 return slp;
2425 }
2426
2427
2428 /******************************************************************************/
2429 /* End FFFlatLoc functions. */
2430 /******************************************************************************/
2431
2432
2433
2434 static void SubSourceToQualArray (
2435 SubSourcePtr ssp,
2436 QualValPtr qvp
2437 )
2438
2439 {
2440 SourceType idx;
2441 Uint1 subtype;
2442
2443 if (ssp == NULL || qvp == NULL) return;
2444
2445 while (ssp != NULL) {
2446 subtype = ssp->subtype;
2447 if (subtype == 255) {
2448 subtype = 41;
2449 }
2450 if (subtype < 42) {
2451 idx = subSourceToSourceIdx [subtype];
2452 if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
2453 if (qvp [idx].ssp == NULL) {
2454 qvp [idx].ssp = ssp;
2455 }
2456 }
2457 }
2458 ssp = ssp->next;
2459 }
2460 }
2461
2462 NLM_EXTERN SourceType orgModToSourceIdx [41] = {
2463 SCQUAL_zero_orgmod,
2464 SCQUAL_one_orgmod,
2465 SCQUAL_strain,
2466 SCQUAL_sub_strain,
2467 SCQUAL_type,
2468 SCQUAL_sub_type,
2469 SCQUAL_variety,
2470 SCQUAL_serotype,
2471 SCQUAL_serogroup,
2472 SCQUAL_serovar,
2473 SCQUAL_cultivar,
2474 SCQUAL_pathovar,
2475 SCQUAL_chemovar,
2476 SCQUAL_biovar,
2477 SCQUAL_biotype,
2478 SCQUAL_group,
2479 SCQUAL_sub_group,
2480 SCQUAL_isolate,
2481 SCQUAL_common,
2482 SCQUAL_acronym,
2483 SCQUAL_dosage,
2484 SCQUAL_spec_or_nat_host,
2485 SCQUAL_sub_species,
2486 SCQUAL_specimen_voucher,
2487 SCQUAL_authority,
2488 SCQUAL_forma,
2489 SCQUAL_forma_specialis,
2490 SCQUAL_ecotype,
2491 SCQUAL_synonym,
2492 SCQUAL_anamorph,
2493 SCQUAL_teleomorph,
2494 SCQUAL_breed,
2495 SCQUAL_gb_acronym,
2496 SCQUAL_gb_anamorph,
2497 SCQUAL_gb_synonym,
2498 SCQUAL_culture_collection,
2499 SCQUAL_bio_material,
2500 SCQUAL_metagenome_source,
2501 SCQUAL_old_lineage,
2502 SCQUAL_old_name,
2503 SCQUAL_orgmod_note
2504 };
2505
2506 static void OrgModToQualArray (
2507 OrgModPtr omp,
2508 QualValPtr qvp
2509 )
2510
2511 {
2512 SourceType idx;
2513 Uint1 subtype;
2514
2515 if (omp == NULL || qvp == NULL) return;
2516
2517 while (omp != NULL) {
2518 subtype = omp->subtype;
2519 if (subtype == 253) {
2520 subtype = 38;
2521 } else if (subtype == 254) {
2522 subtype = 39;
2523 } else if (subtype == 255) {
2524 subtype = 40;
2525 }
2526 if (subtype < 41) {
2527 idx = orgModToSourceIdx [subtype];
2528 if (idx > 0 && idx < ASN2GNBK_TOTAL_SOURCE) {
2529 if (qvp [idx].omp == NULL) {
2530 qvp [idx].omp = omp;
2531 }
2532 }
2533 }
2534 omp = omp->next;
2535 }
2536 }
2537
2538 static CharPtr organelleQual [] = {
2539 NULL,
2540 NULL,
2541 "/organelle=\"plastid:chloroplast\"",
2542 "/organelle=\"plastid:chromoplast\"",
2543 "/organelle=\"mitochondrion:kinetoplast\"",
2544 "/organelle=\"mitochondrion\"",
2545 "/organelle=\"plastid\"",
2546 "/macronuclear",
2547 NULL,
2548 "/plasmid=\"\"",
2549 "/transposon=\"\"",
2550 "/insertion_seq=\"\"",
2551 "/organelle=\"plastid:cyanelle\"",
2552 "/proviral",
2553 NULL,
2554 "/organelle=\"nucleomorph\"",
2555 "/organelle=\"plastid:apicoplast\"",
2556 "/organelle=\"plastid:leucoplast\"",
2557 "/organelle=\"plastid:proplastid\"",
2558 NULL,
2559 "/organelle=\"hydrogenosome\"",
2560 NULL,
2561 "/organelle=\"chromatophore\""
2562 };
2563
2564 NLM_EXTERN Boolean StringIsJustQuotes (
2565 CharPtr str
2566 )
2567
2568 {
2569 Nlm_Uchar ch; /* to use 8bit characters in multibyte languages */
2570
2571 if (str != NULL) {
2572 ch = *str;
2573 while (ch != '\0') {
2574 if (ch > ' ' && ch != '"' && ch != '\'') {
2575 return FALSE;
2576 }
2577 str++;
2578 ch = *str;
2579 }
2580 }
2581 return TRUE;
2582 }
2583
2584 static CharPtr RemoveAllSpaces (
2585 CharPtr str
2586 )
2587
2588 {
2589 Char ch;
2590 CharPtr dst;
2591 CharPtr ptr;
2592
2593 if (str == NULL || str [0] == '\0') return NULL;
2594
2595 dst = str;
2596 ptr = str;
2597 ch = *ptr;
2598 while (ch != '\0') {
2599 if (ch != ' ') {
2600 *dst = ch;
2601 dst++;
2602 }
2603 ptr++;
2604 ch = *ptr;
2605 }
2606 *dst = '\0';
2607
2608 return str;
2609 }
2610
2611 NLM_EXTERN void AddFeatureToGbseq (
2612 GBSeqPtr gbseq,
2613 GBFeaturePtr gbfeat,
2614 CharPtr str,
2615 SeqFeatPtr sfp
2616 )
2617
2618 {
2619 Char ch;
2620 CharPtr copy;
2621 GBQualifierPtr gbqual;
2622 GBQualifierPtr last = NULL;
2623 CharPtr ptr;
2624 CharPtr qual;
2625 CharPtr tmp;
2626 CharPtr val;
2627
2628 if (gbseq == NULL || gbfeat == NULL || StringHasNoText (str)) return;
2629
2630 copy = StringSave (str);
2631
2632 /* link in reverse order, to be reversed in slash block */
2633
2634 gbfeat->next = gbseq->feature_table;
2635 gbseq->feature_table = gbfeat;
2636
2637 /* now parse qualifiers */
2638
2639 ptr = StringStr (copy, " /");
2640 while (ptr != NULL) {
2641 qual = ptr + 22;
2642 val = qual;
2643 ch = *val;
2644 while (ch != '=' && ch != '\n' && ch != '\0') {
2645 val++;
2646 ch = *val;
2647 }
2648 /*
2649 val = StringChr (qual, '=');
2650 if (val == NULL) {
2651 val = StringChr (qual, '\n');
2652 }
2653 */
2654 if (ch != '\0' /* val != NULL */) {
2655 *val = '\0';
2656 val++;
2657 if (ch == '=') {
2658 tmp = val;
2659 if (*val == '"') {
2660 val++;
2661 tmp = val;
2662 ch = *tmp;
2663 while (ch != '"' && ch != '\0') {
2664 tmp++;
2665 ch = *tmp;
2666 }
2667 }
2668 ptr = StringStr (tmp, "\n /");
2669 if (ptr != NULL) {
2670 *ptr = '\0';
2671 ptr++;
2672 }
2673 } else {
2674 ptr = StringStr (val, " /");
2675 val = NULL;
2676 }
2677 gbqual = GBQualifierNew ();
2678 if (gbqual != NULL) {
2679 gbqual->name = StringSave (qual);
2680 if (! StringHasNoText (val)) {
2681 gbqual->value = StringSave (val);
2682 CleanQualValue (gbqual->value);
2683 Asn2gnbkCompressSpaces (gbqual->value);
2684 if (sfp != NULL) {
2685 if (sfp->data.choice == SEQFEAT_CDREGION &&
2686 StringICmp (qual, "translation") == 0) {
2687 RemoveAllSpaces (gbqual->value);
2688 } else if (sfp->data.choice == SEQFEAT_RNA &&
2689 StringICmp (qual, "transcription") == 0) {
2690 RemoveAllSpaces (gbqual->value);
2691 } else if (sfp->data.choice == SEQFEAT_PROT &&
2692 StringICmp (qual, "peptide") == 0) {
2693 RemoveAllSpaces (gbqual->value);
2694 }
2695 }
2696 }
2697 }
2698 } else {
2699 gbqual = GBQualifierNew ();
2700 if (gbqual != NULL) {
2701 gbqual->name = StringSave (qual);
2702 }
2703 }
2704 if (gbfeat->quals == NULL) {
2705 gbfeat->quals = gbqual;
2706 } else if (last != NULL) {
2707 last->next = gbqual;
2708 }
2709 last = gbqual;
2710 }
2711
2712 MemFree (copy);
2713 }
2714
2715 NLM_EXTERN CharPtr GetMolTypeQual (
2716 BioseqPtr bsp
2717 )
2718
2719 {
2720 SeqMgrDescContext dcontext;
2721 MolInfoPtr mip;
2722 SeqDescrPtr sdp;
2723
2724 if (bsp == NULL) return NULL;
2725
2726 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
2727 if (sdp == NULL) return NULL;
2728 mip = (MolInfoPtr) sdp->data.ptrvalue;
2729 if (mip == NULL) return NULL;
2730
2731 switch (mip->biomol) {
2732 case 0 :
2733 switch (bsp->mol) {
2734 case Seq_mol_dna :
2735 return "unassigned DNA";
2736 case Seq_mol_rna :
2737 return "unassigned RNA";
2738 case Seq_mol_na :
2739 break;
2740 default :
2741 break;
2742 }
2743 break;
2744 case MOLECULE_TYPE_GENOMIC :
2745 switch (bsp->mol) {
2746 case Seq_mol_dna :
2747 return "genomic DNA";
2748 case Seq_mol_rna :
2749 return "genomic RNA";
2750 case Seq_mol_na :
2751 break;
2752 default :
2753 break;
2754 }
2755 break;
2756 case MOLECULE_TYPE_PRE_MRNA :
2757 return "transcribed RNA";
2758 case MOLECULE_TYPE_MRNA :
2759 return "mRNA";
2760 case MOLECULE_TYPE_RRNA :
2761 return "rRNA";
2762 case MOLECULE_TYPE_TRNA :
2763 return "tRNA";
2764 case MOLECULE_TYPE_SNRNA :
2765 return "transcribed RNA";
2766 case MOLECULE_TYPE_SCRNA :
2767 return "transcribed RNA";
2768 case MOLECULE_TYPE_PEPTIDE :
2769 break;
2770 case MOLECULE_TYPE_OTHER_GENETIC_MATERIAL :
2771 switch (bsp->mol) {
2772 case Seq_mol_dna :
2773 return "other DNA";
2774 case Seq_mol_rna :
2775 return "other RNA";
2776 case Seq_mol_na :
2777 break;
2778 default :
2779 break;
2780 }
2781 break;
2782 case MOLECULE_TYPE_GENOMIC_MRNA_MIX :
2783 break;
2784 case MOLECULE_TYPE_CRNA :
2785 return "viral cRNA";
2786 break;
2787 case MOLECULE_TYPE_SNORNA :
2788 return "transcribed RNA";
2789 break;
2790 case MOLECULE_TYPE_TRANSCRIBED_RNA :
2791 return "transcribed RNA";
2792 break;
2793 case MOLECULE_TYPE_NCRNA :
2794 return "transcribed RNA";
2795 break;
2796 case MOLECULE_TYPE_TMRNA :
2797 return "transcribed RNA";
2798 break;
2799 case 255 :
2800 switch (bsp->mol) {
2801 case Seq_mol_dna :
2802 return "other DNA";
2803 case Seq_mol_rna :
2804 return "other RNA";
2805 case Seq_mol_na :
2806 break;
2807 default :
2808 break;
2809 }
2810 break;
2811 default :
2812 break;
2813 }
2814
2815 return NULL;
2816 }
2817
2818 static ValNodePtr ParsePCRPrimerString (
2819 QualValPtr qvp
2820 )
2821
2822 {
2823 CharPtr fwd_primer_seq = NULL;
2824 CharPtr rev_primer_seq = NULL;
2825 CharPtr fwd_primer_name = NULL;
2826 CharPtr rev_primer_name = NULL;
2827 SubSourcePtr ssp;
2828
2829 if (qvp == NULL) return NULL;
2830
2831 ssp = qvp [SCQUAL_fwd_primer_seq].ssp;
2832 if (ssp != NULL) {
2833 fwd_primer_seq = ssp->name;
2834 }
2835 ssp = qvp [SCQUAL_rev_primer_seq].ssp;
2836 if (ssp != NULL) {
2837 rev_primer_seq = ssp->name;
2838 }
2839 ssp = qvp [SCQUAL_fwd_primer_name].ssp;
2840 if (ssp != NULL) {
2841 fwd_primer_name = ssp->name;
2842 }
2843 ssp = qvp [SCQUAL_rev_primer_name].ssp;
2844 if (ssp != NULL) {
2845 rev_primer_name = ssp->name;
2846 }
2847
2848 return ParsePCRStrings (fwd_primer_seq, rev_primer_seq, fwd_primer_name, rev_primer_name);
2849 }
2850
2851 static ValNodePtr ParseColonString (
2852 CharPtr strs,
2853 Boolean multiple
2854 )
2855
2856 {
2857 ValNodePtr head = NULL;
2858 size_t len;
2859 CharPtr ptr, str, tmp;
2860
2861 if (StringHasNoText (strs)) return NULL;
2862
2863 tmp = StringSave (strs);
2864 str = tmp;
2865 len = StringLen (str);
2866 if (len > 1 && StringChr (str, ':') != NULL /* && multiple */) {
2867 while (StringDoesHaveText (str)) {
2868 ptr = StringChr (str, ':');
2869 if (ptr != NULL) {
2870 *ptr = '\0';
2871 ptr++;
2872 }
2873 TrimSpacesAroundString (str);
2874 ValNodeCopyStr (&head, 0, str);
2875 str = ptr;
2876 }
2877 } else {
2878 ValNodeCopyStr (&head, 0, str);
2879 }
2880
2881 MemFree (tmp);
2882 return head;
2883 }
2884
2885 static void PrintHalfPrimer (
2886 ValNodePtr PNTR headp,
2887 CharPtr name,
2888 CharPtr seq,
2889 CharPtr nm_label,
2890 CharPtr sq_label,
2891 CharPtr prefix,
2892 Boolean name_only_ok,
2893 Boolean multiple
2894 )
2895
2896 {
2897 ValNodePtr name_list, seq_list, name_vnp, seq_vnp;
2898 CharPtr str;
2899
2900 name_list = ParseColonString (name, multiple);
2901 seq_list = ParseColonString (seq, multiple);
2902
2903 name_vnp = name_list;
2904 seq_vnp = seq_list;
2905 if (seq_vnp != NULL) {
2906 while (seq_vnp != NULL) {
2907 if (name_vnp != NULL) {
2908 str = (CharPtr) name_vnp->data.ptrvalue;
2909 if (StringDoesHaveText (str)) {
2910 ValNodeCopyStr (headp, 0, prefix);
2911 ValNodeCopyStr (headp, 0, nm_label);
2912 ValNodeCopyStr (headp, 0, str);
2913 prefix = ", ";
2914 }
2915 name_vnp = name_vnp->next;
2916 }
2917 str = (CharPtr) seq_vnp->data.ptrvalue;
2918 if (StringDoesHaveText (str)) {
2919 ValNodeCopyStr (headp, 0, prefix);
2920 ValNodeCopyStr (headp, 0, sq_label);
2921 ValNodeCopyStr (headp, 0, str);
2922 prefix = ", ";
2923 }
2924 seq_vnp = seq_vnp->next;
2925 }
2926 } else if (name_only_ok) {
2927 while (name_vnp != NULL) {
2928 str = (CharPtr) name_vnp->data.ptrvalue;
2929 if (StringDoesHaveText (str)) {
2930 ValNodeCopyStr (headp, 0, prefix);
2931 ValNodeCopyStr (headp, 0, nm_label);
2932 ValNodeCopyStr (headp, 0, str);
2933 prefix = ", ";
2934 }
2935 name_vnp = name_vnp->next;
2936 }
2937 }
2938
2939 ValNodeFreeData (name_list);
2940 ValNodeFreeData (seq_list);
2941 }
2942
2943 static CharPtr NextPCRPrimerString (
2944 PcrSetPtr psp,
2945 Boolean isInNote,
2946 Boolean multiple
2947 )
2948
2949 {
2950 ValNodePtr head = NULL, vnp;
2951 CharPtr prefix = NULL;
2952 CharPtr str;
2953
2954 if (psp == NULL) return NULL;
2955
2956 if (StringHasNoText (psp->fwd_seq) || StringHasNoText (psp->rev_seq)) {
2957 if (isInNote) {
2958 /*
2959 if (StringDoesHaveText (psp->fwd_name)) {
2960 ValNodeCopyStr (&head, 0, prefix);
2961 ValNodeCopyStr (&head, 0, "fwd_name: ");
2962 ValNodeCopyStr (&head, 0, psp->fwd_name);
2963 prefix = ", ";
2964 }
2965
2966 if (StringDoesHaveText (psp->fwd_seq)) {
2967 ValNodeCopyStr (&head, 0, prefix);
2968 ValNodeCopyStr (&head, 0, "fwd_seq: ");
2969 ValNodeCopyStr (&head, 0, psp->fwd_seq);
2970 prefix = ", ";
2971 }
2972
2973 if (StringDoesHaveText (psp->rev_name)) {
2974 ValNodeCopyStr (&head, 0, prefix);
2975 ValNodeCopyStr (&head, 0, "rev_name: ");
2976 ValNodeCopyStr (&head, 0, psp->rev_name);
2977 prefix = ", ";
2978 }
2979
2980 if (StringDoesHaveText (psp->rev_seq)) {
2981 ValNodeCopyStr (&head, 0, prefix);
2982 ValNodeCopyStr (&head, 0, "rev_seq: ");
2983 ValNodeCopyStr (&head, 0, psp->rev_seq);
2984 prefix = ", ";
2985 }
2986 */
2987 PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
2988 if (head != NULL) {
2989 prefix = ", ";
2990 }
2991 PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
2992 } else {
2993 return StringSave ("");
2994 }
2995 } else {
2996 if (isInNote) return StringSave ("");
2997
2998 PrintHalfPrimer (&head, psp->fwd_name, psp->fwd_seq, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
2999 PrintHalfPrimer (&head, psp->rev_name, psp->rev_seq, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3000 }
3001
3002 if (head != NULL && isInNote) {
3003 vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3004 if (vnp != NULL) {
3005 vnp->next = head;
3006 head = vnp;
3007 }
3008 }
3009
3010 str = MergeFFValNodeStrs (head);
3011 ValNodeFreeData (head);
3012 return str;
3013 }
3014
3015 static void PrintHalfReaction (
3016 ValNodePtr PNTR headp,
3017 PCRPrimerPtr primers,
3018 CharPtr nm_label,
3019 CharPtr sq_label,
3020 CharPtr prefix,
3021 Boolean name_only_ok,
3022 Boolean multiple
3023 )
3024
3025 {
3026 PCRPrimerPtr ppp;
3027
3028 for (ppp = primers; ppp != NULL; ppp = ppp->next) {
3029 if (StringDoesHaveText (ppp->seq)) {
3030 if (StringDoesHaveText (ppp->name)) {
3031 ValNodeCopyStr (headp, 0, prefix);
3032 ValNodeCopyStr (headp, 0, nm_label);
3033 ValNodeCopyStr (headp, 0, ppp->name);
3034 prefix = ", ";
3035 }
3036 ValNodeCopyStr (headp, 0, prefix);
3037 ValNodeCopyStr (headp, 0, sq_label);
3038 ValNodeCopyStr (headp, 0, ppp->seq);
3039 prefix = ", ";
3040 } else if (name_only_ok) {
3041 if (StringDoesHaveText (ppp->name)) {
3042 ValNodeCopyStr (headp, 0, prefix);
3043 ValNodeCopyStr (headp, 0, nm_label);
3044 ValNodeCopyStr (headp, 0, ppp->name);
3045 prefix = ", ";
3046 }
3047 }
3048 }
3049 }
3050
3051 static CharPtr NextPCRReaction (
3052 PCRReactionPtr prp,
3053 Boolean isInNote,
3054 Boolean multiple
3055 )
3056
3057 {
3058 Boolean has_fwd_seq = FALSE, has_rev_seq = FALSE;
3059 ValNodePtr head = NULL, vnp;
3060 PCRPrimerPtr ppp;
3061 CharPtr prefix = NULL, str;
3062
3063 if (prp == NULL) return NULL;
3064
3065 for (ppp = prp->forward; ppp != NULL; ppp = ppp->next) {
3066 if (StringDoesHaveText (ppp->seq)) {
3067 has_fwd_seq = TRUE;
3068 }
3069 }
3070
3071 for (ppp = prp->reverse; ppp != NULL; ppp = ppp->next) {
3072 if (StringDoesHaveText (ppp->seq)) {
3073 has_rev_seq = TRUE;
3074 }
3075 }
3076
3077 if (has_fwd_seq && has_rev_seq) {
3078 if (isInNote) {
3079 return StringSave ("");
3080 } else {
3081 PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, FALSE, multiple);
3082 PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", ", ", FALSE, multiple);
3083 }
3084 } else {
3085 if (isInNote) {
3086 PrintHalfReaction (&head, prp->forward, "fwd_name: ", "fwd_seq: ", NULL, TRUE, multiple);
3087 if (head != NULL) {
3088 prefix = ", ";
3089 }
3090 PrintHalfReaction (&head, prp->reverse, "rev_name: ", "rev_seq: ", prefix, TRUE, multiple);
3091 } else {
3092 return StringSave ("");
3093 }
3094 }
3095
3096 if (head != NULL && isInNote) {
3097 vnp = ValNodeCopyStr (NULL, 0, "PCR_primers=");
3098 if (vnp != NULL) {
3099 vnp->next = head;
3100 head = vnp;
3101 }
3102 }
3103
3104 str = MergeFFValNodeStrs (head);
3105 ValNodeFreeData (head);
3106 return str;
3107 }
3108
3109 /* specimen_voucher, culture_collection, bio_material hyperlinks */
3110
3111 #define s_atcc_base "http://www.atcc.org/SearchCatalogs/linkin?id="
3112 #define s_bcrc_base "http://strain.bcrc.firdi.org.tw/BSAS/controller?event=SEARCH&bcrc_no="
3113 #define s_ccmp_base "http://ccmp.bigelow.org/SD/display.php?strain=CCMP"
3114 #define s_ccug_base "http://www.ccug.se/default.cfm?page=search_record.cfm&db=mc&s_tests=1&ccugno="
3115 #define s_dsmz_base "http://www.dsmz.de/microorganisms/search_no.php?q="
3116 #define s_fsu_base "http://www.prz.uni-jena.de/data.php?fsu="
3117 #define s_icmp_base "http://nzfungi.landcareresearch.co.nz/icmp/results_cultures.asp?ID=&icmpVAR="
3118 #define s_ku_base "http://collections.nhm.ku.edu/"
3119 #define s_pcc_base "http://www.pasteur.fr/recherche/banques/PCC/docs/pcc"
3120 #define s_pcmb_base "http://www2.bishopmuseum.org/HBS/PCMB/results3.asp?searchterm3="
3121 #define s_pdd_base "http://nzfungi.landcareresearch.co.nz/html/data_collections_details.asp?CID="
3122 #define s_tgrc_base "http://tgrc.ucdavis.edu/Data/Acc/AccDetail.aspx?AccessionNum="
3123 #define s_uam_base "http://arctos.database.museum/guid/"
3124
3125 #define s_colon_pfx ":"
3126
3127 #define s_kui_pfx "KU_Fish/detail.jsp?record="
3128 #define s_kuit_pfx "KU_Tissue/detail.jsp?record="
3129
3130 #define s_bcrc_sfx "&type_id=6&keyword=;;"
3131 #define s_pcc_sfx ".htm"
3132
3133 #define s_atcc_inst "American Type Culture Collection"
3134 #define s_bcrc_inst "Bioresource Collection and Research Center"
3135 #define s_ccmp_inst "Provasoli-Guillard National Center for Culture of Marine Phytoplankton"
3136 #define s_ccug_inst "Culture Collection, University of Goteborg, Department of Clinical Bacteriology"
3137 #define s_crcm_inst "Charles R. Conner Museum, Washington State University"
3138 #define s_dgr_inst "Division of Genomic Resources, University of New Mexico"
3139 #define s_dsmz_inst "German Resource Center for Biological Material"
3140 #define s_fsu_inst "Fungal Reference Center, University of Jena"
3141 #define s_icmp_inst "International Collection of Microorganisms from Plants"
3142 #define s_ku_inst "University of Kansas, Museum of Natural History"
3143 #define s_kwp_inst "Kenelm W. Philip Collection, University of Alaska Museum of the North"
3144 #define s_nzfh_inst "New Zealand Fungal Herbarium"
3145 #define s_msb_inst "Museum of Southwestern Biology, University of New Mexico"
3146 #define s_mvz_inst "Museum of Vertebrate Zoology, University of California"
3147 #define s_nbsb_inst "National Biomonitoring Specimen Bank, U.S. Geological Survey"
3148 #define s_pcc_inst "Pasteur Culture Collection of Cyanobacteria"
3149 #define s_pcmb_inst "Pacific Center for Molecular Biodiversity"
3150 #define s_psu_inst "Portland State University"
3151 #define s_tgrc_inst "Tomato Genetics Resource Center, University of California"
3152 #define s_uam_inst "University of Alaska Museum of the North"
3153 #define s_wmnu_inst "Western New Mexico University Museum"
3154
3155 typedef struct vouch {
3156 CharPtr sites;
3157 CharPtr links;
3158 Boolean prepend_institute;
3159 CharPtr prefix;
3160 CharPtr suffix;
3161 CharPtr mouseover;
3162 } VouchData, PNTR VouchDataPtr;
3163
3164 static VouchData Nlm_spec_vouchers [] = {
3165 { "ATCC", s_atcc_base, FALSE, NULL, NULL, s_atcc_inst },
3166 { "BCRC", s_bcrc_base, FALSE, NULL, s_bcrc_sfx, s_bcrc_inst },
3167 { "CCMP", s_ccmp_base, FALSE, NULL, NULL, s_ccmp_inst },
3168 { "CCUG", s_ccug_base, FALSE, NULL, NULL, s_ccug_inst },
3169 { "CRCM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL, s_crcm_inst },
3170 { "DGR:Bird", s_uam_base, TRUE, s_colon_pfx, NULL, s_dgr_inst },
3171 { "DGR:Ento", s_uam_base, TRUE, s_colon_pfx, NULL, s_dgr_inst },
3172 { "DGR:Fish", s_uam_base, TRUE, s_colon_pfx, NULL, s_dgr_inst },
3173 { "DGR:Herp", s_uam_base, TRUE, s_colon_pfx, NULL, s_dgr_inst },
3174 { "DGR:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL, s_dgr_inst },
3175 { "DSM", s_dsmz_base, FALSE, NULL, NULL, s_dsmz_inst },
3176 { "FSU<DEU>", s_fsu_base, FALSE, NULL, NULL, s_fsu_inst },
3177 { "ICMP", s_icmp_base, FALSE, NULL, NULL, s_icmp_inst },
3178 { "KU:I", s_ku_base, FALSE, s_kui_pfx, NULL, s_ku_inst },
3179 { "KU:IT", s_ku_base, FALSE, s_kuit_pfx, NULL, s_ku_inst },
3180 { "KWP:Ento", s_uam_base, TRUE, s_colon_pfx, NULL, s_kwp_inst },
3181 { "MSB:Bird", s_uam_base, TRUE, s_colon_pfx, NULL, s_msb_inst },
3182 { "MSB:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL, s_msb_inst },
3183 { "MSB:Para", s_uam_base, TRUE, s_colon_pfx, NULL, s_msb_inst },
3184 { "MVZ:Bird", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3185 { "MVZ:Egg", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3186 { "MVZ:Herp", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3187 { "MVZ:Hild", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3188 { "MVZ:Img", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3189 { "MVZ:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3190 { "MVZ:Page", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3191 { "MVZObs:Herp", s_uam_base, TRUE, s_colon_pfx, NULL, s_mvz_inst },
3192 { "NBSB:Bird", s_uam_base, TRUE, s_colon_pfx, NULL, s_nbsb_inst },
3193 { "PCC", s_pcc_base, FALSE, NULL, s_pcc_sfx, s_pcc_inst },
3194 { "PCMB", s_pcmb_base, FALSE, NULL, NULL, s_pcmb_inst },
3195 { "PDD", s_pdd_base, FALSE, NULL, NULL, s_nzfh_inst },
3196 { "TGRC", s_tgrc_base, FALSE, NULL, NULL, s_tgrc_inst },
3197 { "PSU:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL, s_psu_inst },
3198 { "UAM:Bird", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3199 { "UAM:Bryo", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3200 { "UAM:Crus", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3201 { "UAM:Ento", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3202 { "UAM:Fish", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3203 { "UAM:Herb", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3204 { "UAM:Herp", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3205 { "UAM:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3206 { "UAM:Moll", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3207 { "UAM:Paleo", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3208 { "UAMObs:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL, s_uam_inst },
3209 { "WNMU:Bird", s_uam_base, TRUE, s_colon_pfx, NULL, s_wmnu_inst },
3210 { "WNMU:Fish", s_uam_base, TRUE, s_colon_pfx, NULL, s_wmnu_inst },
3211 { "WNMU:Mamm", s_uam_base, TRUE, s_colon_pfx, NULL, s_wmnu_inst },
3212 { NULL, NULL, FALSE, NULL, NULL, NULL }
3213 };
3214
3215 static Int2 VoucherNameIsValid (
3216 CharPtr name
3217 )
3218
3219 {
3220 Int2 L, R, mid;
3221 CharPtr ptr;
3222 Char str [256];
3223
3224 if (StringHasNoText (name)) return -1;
3225 StringNCpy_0 (str, name, sizeof (str));
3226 ptr = StringChr (str, ' ');
3227 if (ptr != NULL) {
3228 *ptr = '\0';
3229 }
3230
3231 L = 0;
3232 R = sizeof (Nlm_spec_vouchers) / sizeof (Nlm_spec_vouchers [0]) - 1; /* -1 because now NULL terminated */
3233
3234 while (L < R) {
3235 mid = (L + R) / 2;
3236 if (StringICmp (Nlm_spec_vouchers [mid].sites, str) < 0) {
3237 L = mid + 1;
3238 } else {
3239 R = mid;
3240 }
3241 }
3242
3243 /* case sensitive comparison at end enforces strictness */
3244
3245 if (StringCmp (Nlm_spec_vouchers [R].sites, str) == 0) {
3246 return R;
3247 }
3248
3249 return -1;
3250 }
3251
3252 /* works on subname copy that it can change */
3253
3254 static Boolean ParseSecVoucher (
3255 CharPtr subname,
3256 CharPtr PNTR inst,
3257 CharPtr PNTR id
3258 )
3259
3260 {
3261 CharPtr ptr;
3262 CharPtr tmp;
3263
3264 if (StringHasNoText (subname)) return FALSE;
3265 if (StringLen (subname) < 5) return FALSE;
3266 TrimSpacesAroundString (subname);
3267
3268 ptr = StringChr (subname, ':');
3269 if (ptr == NULL) return FALSE;
3270
3271 *inst = subname;
3272
3273 tmp = StringChr (ptr + 1, ':');
3274 if (tmp != NULL) {
3275 *tmp = '\0';
3276 tmp++;
3277 TrimSpacesAroundString (tmp);
3278 *id = tmp;
3279 } else {
3280 *ptr = '\0';
3281 ptr++;
3282 TrimSpacesAroundString (ptr);
3283 *id = ptr;
3284 }
3285
3286 if (StringHasNoText (*inst) || StringHasNoText (*id)) return FALSE;
3287
3288 return TRUE;
3289 }
3290
3291 static void Do_www_specimen_voucher (
3292 StringItemPtr ffstring,
3293 CharPtr inst,
3294 CharPtr id,
3295 VouchDataPtr vdp
3296 )
3297
3298 {
3299 if ( ffstring == NULL || inst == NULL || id == NULL || vdp == NULL || vdp->links == NULL ) return;
3300
3301 FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
3302 FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3303 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3304 FFAddOneString (ffstring, vdp->links, FALSE, FALSE, TILDE_IGNORE);
3305 if (vdp->prepend_institute) {
3306 FFAddOneString (ffstring, inst, FALSE, FALSE, TILDE_IGNORE);
3307 }
3308 if (vdp->prefix != NULL) {
3309 FFAddOneString (ffstring, vdp->prefix, FALSE, FALSE, TILDE_IGNORE);
3310 }
3311 FFAddOneString (ffstring, id, FALSE, FALSE, TILDE_IGNORE);
3312 if (vdp->suffix != NULL) {
3313 FFAddOneString (ffstring, vdp->suffix, FALSE, FALSE, TILDE_IGNORE);
3314 }
3315 FFAddOneString(ffstring, "\"", FALSE, FALSE, TILDE_IGNORE);
3316 if (vdp->mouseover != NULL) {
3317 FFAddTextToString (ffstring, " title=\"", vdp->mouseover, "\"",
3318 FALSE, FALSE, TILDE_IGNORE);
3319 }
3320 FFAddTextToString (ffstring, ">", id, "</a>", FALSE, FALSE, TILDE_IGNORE);
3321 }
3322
3323 NLM_EXTERN void FF_www_specimen_voucher (
3324 IntAsn2gbJobPtr ajp,
3325 StringItemPtr ffstring,
3326 CharPtr subname
3327 )
3328
3329 {
3330 Char buf [512];
3331 CharPtr inst = NULL, id = NULL;
3332 Int2 R;
3333 VouchDataPtr vdp;
3334
3335 if ( ffstring == NULL || subname == NULL ) return;
3336 if (! GetWWW (ajp)) { /* not in www mode */
3337 FFAddTextToString(ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3338 return;
3339 }
3340 StringNCpy_0 (buf, subname, sizeof (buf));
3341 if (! ParseSecVoucher (buf, &inst, &id)) {
3342 FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3343 return;
3344 }
3345 R = VoucherNameIsValid (inst);
3346 if (R < 0) {
3347 FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3348 return;
3349 }
3350 vdp = &(Nlm_spec_vouchers [R]);
3351 if (vdp == NULL || vdp->links == NULL) {
3352 FFAddTextToString (ffstring, NULL, subname, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3353 return;
3354 }
3355 Do_www_specimen_voucher (ffstring, inst, id, vdp);
3356 }
3357
3358 static void Do_www_lat_lon (
3359 StringItemPtr ffstring,
3360 CharPtr lat_lon
3361 )
3362
3363 {
3364 Char buf [128];
3365 Char ch;
3366 CharPtr ew = "";
3367 Int2 i;
3368 CharPtr ns = "";
3369 CharPtr ptr;
3370 Char tmp [128];
3371 CharPtr tokens [6];
3372
3373 if ( ffstring == NULL || lat_lon == NULL ) return;
3374
3375 MemSet ((Pointer) tokens, 0, sizeof (tokens));
3376
3377 StringNCpy_0 (buf, lat_lon, sizeof (buf));
3378
3379 i = 0;
3380 ptr = buf;
3381 ch = *ptr;
3382 tokens [i] = ptr;
3383 while (ch != '\0' && i < 5) {
3384 if (ch == ' ') {
3385 *ptr = '\0';
3386 ptr++;
3387 ch = *ptr;
3388 while (ch == ' ') {
3389 ptr++;
3390 ch = *ptr;
3391 }
3392 i++;
3393 tokens [i] = ptr;
3394 } else {
3395 ptr++;
3396 ch = *ptr;
3397 }
3398 }
3399
3400 ptr = tokens [1];
3401 if (ptr != NULL && *ptr == 'S') {
3402 ns = "-";
3403 }
3404 ptr = tokens [3];
3405 if (ptr != NULL && *ptr == 'W') {
3406 ew = "-";
3407 }
3408
3409 if (tokens [0] == NULL) {
3410 tokens [0] = "?";
3411 }
3412 if (tokens [2] == NULL) {
3413 tokens [2] = "?";
3414 }
3415
3416 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3417 FF_Add_NCBI_Base_URL (ffstring, link_lat_lon);
3418 sprintf (tmp, "lat=%s%s&lon=%s%s", ns, tokens [0], ew, tokens [2]);
3419 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
3420 FFAddTextToString (ffstring, "\">", lat_lon, "</a>", FALSE, FALSE, TILDE_IGNORE);
3421 }
3422
3423 static void FF_www_lat_lon (
3424 IntAsn2gbJobPtr ajp,
3425 StringItemPtr ffstring,
3426 CharPtr lat_lon
3427 )
3428
3429 {
3430 Boolean format_ok = FALSE;
3431 FloatHi lat = 0.0;
3432 FloatHi lon = 0.0;
3433 Boolean lat_in_range = FALSE;
3434 Boolean lon_in_range = FALSE;
3435
3436 if ( ffstring == NULL || lat_lon == NULL ) return;
3437 if (! GetWWW (ajp)) { /* not in www mode */
3438 FFAddTextToString(ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3439 return;
3440 }
3441 if (StringDoesHaveText (lat_lon)) {
3442 IsCorrectLatLonFormat (lat_lon, &format_ok, &lat_in_range, &lon_in_range);
3443 if (format_ok && lat_in_range && lon_in_range) {
3444 if (ParseLatLon (lat_lon, &lat, &lon)) {
3445 Do_www_lat_lon (ffstring, lat_lon);
3446 return;
3447 }
3448 }
3449 }
3450
3451 /* if any of above tests failed, default print */
3452 FFAddTextToString (ffstring, NULL, lat_lon, NULL, FALSE, TRUE, TILDE_TO_SPACES);
3453 }
3454
3455 NLM_EXTERN CharPtr FormatSourceFeatBlock (
3456 Asn2gbFormatPtr afp,
3457 BaseBlockPtr bbp
3458 )
3459
3460 {
3461 Boolean add_period;
3462 IntAsn2gbJobPtr ajp;
3463 Asn2gbSectPtr asp;
3464 BioSourcePtr biop = NULL;
3465 BioseqPtr bsp;
3466 BioseqSetPtr bssp;
3467 Char buf [80];
3468 CharPtr common = NULL;
3469 DbtagPtr dbt;
3470 SeqMgrDescContext dcontext;
3471 SeqMgrFeatContext fcontext;
3472 GBFeaturePtr gbfeat = NULL;
3473 GBSeqPtr gbseq;
3474 Int2 i;
3475 Uint1 idx;
3476 IntSrcBlockPtr isp;
3477 Boolean is_desc = TRUE;
3478 Boolean is_gps = FALSE;
3479 Boolean is_other = FALSE;
3480 Boolean is_est_or_gss = FALSE;
3481 Boolean is_bc;
3482 Boolean is_rf;
3483 Boolean is_sc;
3484 Int2 j;
3485 Uint1 jdx;
3486 Uint1 lastomptype;
3487 Uint1 lastssptype;
3488 SeqLocPtr location = NULL;
3489 MolInfoPtr mip;
3490 CharPtr notestr;
3491 SourceType PNTR notetbl = NULL;
3492 Boolean okay;
3493 ObjectIdPtr oip;
3494 OrgModPtr omp;
3495 OrgNamePtr onp = NULL;
3496 OrgRefPtr orp = NULL;
3497 Boolean partial5;
3498 Boolean partial3;
3499 CharPtr prefix;
3500 PCRReactionPtr prp;
3501 ValNodePtr pset;
3502 PcrSetPtr psp;
3503 SourceType PNTR qualtbl = NULL;
3504 QualValPtr qvp;
3505 SeqDescrPtr sdp;
3506 SeqEntryPtr sep;
3507 SeqFeatPtr sfp = NULL;
3508 SeqIdPtr sip;
3509 SubSourcePtr ssp;
3510 CharPtr str;
3511 BioseqPtr target;
3512 CharPtr taxname = NULL;
3513 ValNodePtr vnp;
3514 StringItemPtr ffstring, unique;
3515
3516 if (afp == NULL || bbp == NULL) return NULL;
3517 ajp = afp->ajp;
3518 if (ajp == NULL) return NULL;
3519 asp = afp->asp;
3520 if (asp == NULL) return NULL;
3521 target = asp->target;
3522 bsp = asp->bsp;
3523 if (target == NULL || bsp == NULL) return NULL;
3524 qvp = afp->qvp;
3525 if (qvp == NULL) return NULL;
3526
3527 if (ajp->gbseq) {
3528 gbseq = &asp->gbseq;
3529 } else {
3530 gbseq = NULL;
3531 }
3532
3533 /* five-column feature table uses special code for formatting */
3534
3535 if (ajp->format == FTABLE_FMT) {
3536 str = FormatFtableSourceFeatBlock (bbp, target);
3537 return str;
3538 }
3539
3540 /* otherwise do regular flatfile formatting */
3541
3542 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
3543
3544 isp = (IntSrcBlockPtr) bbp;
3545
3546 /* could be descriptor or feature */
3547
3548 if (bbp->itemtype == OBJ_SEQDESC) {
3549 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID, 0, NULL, &dcontext);
3550 if (sdp != NULL && dcontext.seqdesctype == Seq_descr_source) {
3551 biop = (BioSourcePtr) sdp->data.ptrvalue;
3552 }
3553 } else if (bbp->itemtype == OBJ_SEQFEAT) {
3554 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
3555 if (sfp != NULL && fcontext.seqfeattype == SEQFEAT_BIOSRC) {
3556 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
3557 }
3558 is_desc = FALSE;
3559 }
3560
3561 if (biop == NULL) return NULL;
3562
3563 unique = FFGetString(ajp);
3564 if ( unique == NULL ) return NULL;
3565
3566 ffstring = FFGetString(ajp);
3567 if ( ffstring == NULL ) return NULL;
3568
3569 FFStartPrint (ffstring, afp->format, 5, 21, NULL, 0, 5, 21, "FT", FALSE);
3570 FFAddOneString (ffstring, "source", FALSE, FALSE, TILDE_IGNORE);
3571 FFAddNChar(ffstring, ' ', 21 - 5 - StringLen("source"), FALSE);
3572
3573 if (gbseq != NULL) {
3574 gbfeat = GBFeatureNew ();
3575 if (gbfeat != NULL) {
3576 gbfeat->key = StringSave ("source");
3577 }
3578 }
3579
3580 location = isp->loc;
3581
3582 str = FFFlatLoc (ajp, bsp, location, ajp->masterStyle);
3583 if ( GetWWW(ajp) ) {
3584 FF_www_featloc (ffstring, str);
3585 } else {
3586 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3587 }
3588 FFAddOneChar(ffstring, '\n', FALSE);
3589
3590 if (gbseq != NULL) {
3591 if (gbfeat != NULL) {
3592 if (StringDoesHaveText (str)) {
3593 gbfeat->location = StringSave (str);
3594 } else {
3595 gbfeat->location = StringSave ("");
3596 }
3597 if (StringDoesHaveText (str)) {
3598 if (StringStr (str, "join") != NULL) {
3599 gbfeat->operator__ = StringSave ("join");
3600 } else if (StringStr (str, "order") != NULL) {
3601 gbfeat->operator__ = StringSave ("order");
3602 }
3603 }
3604 CheckSeqLocForPartial (location, &partial5, &partial3);
3605 gbfeat->partial5 = partial5;
3606 gbfeat->partial3 = partial3;
3607 if (ajp->masterStyle) {
3608 AddIntervalsToGbfeat (gbfeat, location, bsp);
3609 } else {
3610 AddIntervalsToGbfeat (gbfeat, location, NULL);
3611 }
3612 }
3613 }
3614
3615 MemFree (str);
3616
3617 orp = biop->org;
3618 if (orp != NULL) {
3619 taxname = orp->taxname;
3620 /* common = orp->common; */
3621 }
3622 if (StringHasNoText (taxname)) {
3623 if (ajp->flags.needOrganismQual) {
3624 taxname = "unknown";
3625 if (orp != NULL) {
3626 common = orp->common;
3627 }
3628 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
3629 } else {
3630 taxname = "unknown";
3631 common = orp->common;
3632 #endif
3633 }
3634 }
3635
3636 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3637 if (sep != NULL && IS_Bioseq_set (sep)) {
3638 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3639 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3640 is_gps = TRUE;
3641 }
3642 }
3643
3644 if (bsp != NULL) {
3645 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3646 if (sip->choice == SEQID_OTHER) {
3647 is_other = TRUE;
3648 }
3649 }
3650 }
3651
3652 if (ajp->refseqConventions) {
3653 is_other = TRUE;
3654 }
3655
3656 /* populate qualifier table from biosource fields */
3657
3658 qvp [SCQUAL_organism].str = taxname;
3659 qvp [SCQUAL_common_name].str = common;
3660
3661 if (biop->is_focus) {
3662 qvp [SCQUAL_focus].ble = TRUE;
3663 }
3664
3665 str = GetMolTypeQual (bsp);
3666 /*
3667 if (StringICmp (str, "ncRNA") == 0) {
3668 str = "other RNA";
3669 }
3670 */
3671 if (str == NULL) {
3672 switch (bsp->mol) {
3673 case Seq_mol_dna :
3674 str = "unassigned DNA";
3675 break;
3676 case Seq_mol_rna :
3677 str = "unassigned RNA";
3678 break;
3679 case Seq_mol_aa :
3680 break;
3681 default :
3682 str = "unassigned DNA";
3683 break;
3684 }
3685 }
3686 qvp [SCQUAL_mol_type].str = str;
3687
3688 SubSourceToQualArray (biop->subtype, qvp);
3689
3690 if (orp != NULL) {
3691 onp = orp->orgname;
3692 if (onp != NULL) {
3693 OrgModToQualArray (onp->mod, qvp);
3694 }
3695
3696 if (! is_desc) {
3697 qvp [SCQUAL_unstructured].vnp = orp->mod;
3698 }
3699 qvp [SCQUAL_db_xref].vnp = orp->db;
3700 }
3701
3702 if (sfp != NULL) {
3703 qvp [SCQUAL_org_xref].vnp = sfp->dbxref;
3704 }
3705
3706 /* organelle currently prints /mitochondrion, /virion, etc. */
3707
3708 qvp [SCQUAL_organelle].num = biop->genome;
3709
3710 /* some qualifiers are flags in genome and names in subsource, print once with name */
3711
3712 if (qvp [SCQUAL_ins_seq_name].ssp != NULL &&
3713 qvp [SCQUAL_organelle].num == GENOME_insertion_seq) {
3714 qvp [SCQUAL_organelle].num = 0;
3715 }
3716 if (qvp [SCQUAL_plasmid_name].ssp != NULL &&
3717 qvp [SCQUAL_organelle].num == GENOME_plasmid) {
3718 qvp [SCQUAL_organelle].num = 0;
3719 }
3720 /* AF095904.1
3721 if (qvp [SCQUAL_plastid_name].ssp != NULL &&
3722 qvp [SCQUAL_organelle].num == GENOME_plastid) {
3723 qvp [SCQUAL_organelle].num = 0;
3724 }
3725 */
3726 if (qvp [SCQUAL_transposon_name].ssp != NULL &&
3727 qvp [SCQUAL_organelle].num == GENOME_transposon) {
3728 qvp [SCQUAL_organelle].num = 0;
3729 }
3730
3731 if (sfp != NULL) {
3732 qvp [SCQUAL_seqfeat_note].str = sfp->comment;
3733 }
3734
3735 if (qvp [SCQUAL_fwd_primer_name].ssp != NULL ||
3736 qvp [SCQUAL_fwd_primer_seq].ssp != NULL ||
3737 qvp [SCQUAL_rev_primer_name].ssp != NULL ||
3738 qvp [SCQUAL_rev_primer_seq].ssp != NULL) {
3739 qvp [SCQUAL_PCR_primers].ble = TRUE;
3740 qvp [SCQUAL_PCR_primer_note].ble = TRUE;
3741 }
3742
3743 if (biop->pcr_primers != NULL) {
3744 qvp [SCQUAL_PCR_reaction].prp = biop->pcr_primers;
3745 }
3746
3747 if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
3748 /* leave metagenome_source as a separate qualifier */
3749 } else {
3750 /* move metagenome_source to note */
3751 qvp [SCQUAL_metagenome_note].omp = qvp [SCQUAL_metagenome_source].omp;
3752 qvp [SCQUAL_metagenome_source].omp = NULL;
3753 }
3754
3755 #if 0
3756 if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
3757 /* leave mating_type as a separate qualifier */
3758 } else if (qvp [SCQUAL_sex].ssp == NULL && qvp [SCQUAL_mating_type].ssp != NULL) {
3759 /* move mating_type to sex if available */
3760 qvp [SCQUAL_sex].ssp = qvp [SCQUAL_mating_type].ssp;
3761 qvp [SCQUAL_mating_type].ssp = NULL;
3762 }
3763 #endif
3764
3765 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
3766 if (sdp != NULL) {
3767 mip = (MolInfoPtr) sdp->data.ptrvalue;
3768 if (mip != NULL) {
3769 if (mip->tech == MI_TECH_est || mip->tech == MI_TECH_survey) {
3770 is_est_or_gss = TRUE;
3771 }
3772 }
3773 }
3774
3775 /* now print qualifiers from table */
3776
3777 qualtbl = source_qual_order;
3778 if (is_desc) {
3779 notetbl = source_desc_note_order;
3780 } else {
3781 notetbl = source_feat_note_order;
3782 }
3783
3784 for (i = 0, idx = qualtbl [i]; idx != 0; i++, idx = qualtbl [i]) {
3785
3786 lastomptype = 0;
3787 lastssptype = 0;
3788 switch (asn2gnbk_source_quals [idx].qualclass) {
3789
3790 case Qual_class_ignore :
3791 break;
3792
3793 case Qual_class_string :
3794 if (! StringHasNoText (qvp [idx].str)) {
3795 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3796 FALSE, FALSE, TILDE_IGNORE);
3797 FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
3798 FALSE, FALSE, TILDE_TO_SPACES);
3799 FFAddOneChar(ffstring, '\n', FALSE);
3800 }
3801 break;
3802
3803 case Qual_class_boolean :
3804 if (qvp [idx].ble) {
3805 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
3806 FALSE, TRUE, TILDE_IGNORE);
3807 }
3808 break;
3809
3810 case Qual_class_organelle :
3811 j = (Int2) qvp [idx].num;
3812 if (j < sizeof (organelleQual) / sizeof (CharPtr)) {
3813 if (organelleQual [j] != NULL) {
3814 FFAddTextToString(ffstring, NULL, organelleQual[j], "\n",
3815 FALSE, FALSE, TILDE_IGNORE);
3816 }
3817 }
3818 break;
3819
3820 case Qual_class_orgmod :
3821 omp = qvp [idx].omp;
3822 if (lastomptype == 0 && omp != NULL) {
3823 lastomptype = omp->subtype;
3824 }
3825 while (omp != NULL && omp->subtype == lastomptype) {
3826 if (StringIsJustQuotes (omp->subname)) {
3827 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3828 FALSE, TRUE, TILDE_IGNORE);
3829 } else if (! StringHasNoText (omp->subname)) {
3830 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3831 FALSE, TRUE, TILDE_IGNORE);
3832 FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
3833 FALSE, TRUE, TILDE_TO_SPACES);
3834 }
3835 omp = omp->next;
3836 }
3837 break;
3838
3839 case Qual_class_voucher :
3840 omp = qvp [idx].omp;
3841 if (lastomptype == 0 && omp != NULL) {
3842 lastomptype = omp->subtype;
3843 }
3844 while (omp != NULL && omp->subtype == lastomptype) {
3845 if (StringIsJustQuotes (omp->subname)) {
3846 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3847 FALSE, TRUE, TILDE_IGNORE);
3848 } else if (! StringHasNoText (omp->subname)) {
3849 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
3850 FALSE, TRUE, TILDE_IGNORE);
3851 FF_www_specimen_voucher(ajp, ffstring, omp->subname);
3852 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
3853 }
3854 omp = omp->next;
3855 }
3856 break;
3857
3858 case Qual_class_lat_lon :
3859 omp = qvp [idx].omp;
3860 if (lastomptype == 0 && omp != NULL) {
3861 lastomptype = omp->subtype;
3862 }
3863 while (omp != NULL && omp->subtype == lastomptype) {
3864 if (StringIsJustQuotes (omp->subname)) {
3865 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3866 FALSE, TRUE, TILDE_IGNORE);
3867 } else if (! StringHasNoText (omp->subname)) {
3868 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"",
3869 FALSE, TRUE, TILDE_IGNORE);
3870 FF_www_lat_lon(ajp, ffstring, omp->subname);
3871 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
3872 }
3873 omp = omp->next;
3874 }
3875 break;
3876
3877 case Qual_class_subsource :
3878 ssp = qvp [idx].ssp;
3879 if (lastssptype == 0 && ssp != NULL) {
3880 lastssptype = ssp->subtype;
3881 }
3882 while (ssp != NULL && ssp->subtype == lastssptype) {
3883 if (ssp->subtype == SUBSRC_germline ||
3884 ssp->subtype == SUBSRC_rearranged ||
3885 ssp->subtype == SUBSRC_transgenic ||
3886 ssp->subtype == SUBSRC_environmental_sample ||
3887 ssp->subtype == SUBSRC_metagenomic) {
3888 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "\n",
3889 FALSE, TRUE, TILDE_TO_SPACES);
3890 } else if (StringIsJustQuotes (ssp->name)) {
3891 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=\"\"\n",
3892 FALSE, TRUE, TILDE_IGNORE);
3893 } else if (! StringHasNoText (ssp->name)) {
3894 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3895 FALSE, TRUE, TILDE_IGNORE);
3896 FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
3897 FALSE, TRUE, TILDE_TO_SPACES);
3898 }
3899 ssp = ssp->next;
3900 }
3901 break;
3902
3903 case Qual_class_pcr :
3904 if (qvp [idx].ble) {
3905 lastssptype = 0;
3906 pset = ParsePCRPrimerString (qvp);
3907 for (vnp = pset; vnp != NULL; vnp = vnp->next) {
3908 psp = (PcrSetPtr) vnp->data.ptrvalue;
3909 if (psp == NULL) continue;
3910 str = NextPCRPrimerString (psp, FALSE, (Boolean) (pset->next != NULL));
3911 if (str == NULL) continue;
3912 if (! StringHasNoText (str)) {
3913 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3914 FALSE, TRUE, TILDE_IGNORE);
3915 FFAddTextToString(ffstring, "\"", str, "\"\n",
3916 FALSE, TRUE, TILDE_TO_SPACES);
3917 }
3918 MemFree (str);
3919 }
3920 FreePCRSet (pset);
3921 }
3922 break;
3923
3924 case Qual_class_pcr_react :
3925 prp = qvp [idx].prp;
3926 while (prp != NULL) {
3927 str = NextPCRReaction (prp, FALSE, (Boolean) (prp->next != NULL));
3928 if (StringDoesHaveText (str)) {
3929 FFAddTextToString (ffstring, "/", asn2gnbk_source_quals [idx].name, "=",
3930 FALSE, TRUE, TILDE_IGNORE);
3931 FFAddTextToString (ffstring, "\"", str, "\"\n",
3932 FALSE, TRUE, TILDE_TO_SPACES);
3933 }
3934 MemFree (str);
3935 prp = prp->next;
3936 }
3937 break;
3938
3939 case Qual_class_pubset :
3940 break;
3941
3942 case Qual_class_quote :
3943 break;
3944
3945 case Qual_class_noquote :
3946 break;
3947
3948 case Qual_class_label :
3949 break;
3950
3951 case Qual_class_db_xref :
3952 for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
3953 buf [0] = '\0';
3954 dbt = (DbtagPtr) vnp->data.ptrvalue;
3955 if (dbt != NULL && (! StringHasNoText (dbt->db))) {
3956 oip = dbt->tag;
3957 if (oip != NULL) {
3958
3959 okay = TRUE;
3960 if (ajp->flags.dropBadDbxref) {
3961 /* if RELEASE_MODE, drop unknown dbtag */
3962
3963 okay = FALSE;
3964 if (DbxrefIsValid (dbt->db, &is_rf, &is_sc, &is_bc, NULL)) {
3965 if (is_bc) {
3966 /* case counts, so suppress if bad case */
3967 } else if (is_rf && (is_other || is_gps)) {
3968 /* allow refseq dbxrefs in source feature */
3969 okay = TRUE;
3970 } else if (is_sc) {
3971 /* expect it to be in legalSrcDbXrefs list */
3972 okay = TRUE;
3973 } else if (is_est_or_gss) {
3974 /* EST and GSS records only have source feature, so allow anything */
3975 okay = TRUE;
3976 } else {
3977 /* suppress regular dbxrefs, also warn in validator */
3978 }
3979 }
3980
3981 /*
3982 okay = FALSE;
3983 for (j = 0; legalDbXrefs [j] != NULL; j++) {
3984 if (StringCmp (dbt->db, legalDbXrefs [j]) == 0) {
3985 okay = TRUE;
3986 }
3987 }
3988 */
3989 }
3990
3991 if (okay) {
3992 if (! StringHasNoText (oip->str)) {
3993 if (StringLen (dbt->db) + StringLen (oip->str) < 80) {
3994 sprintf (buf, "%s", oip->str);
3995 }
3996 } else {
3997 sprintf (buf, "%ld", (long) oip->id);
3998 }
3999 }
4000 }
4001 }
4002 if (StringDoesHaveText (buf) && dbt != NULL) {
4003 FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
4004 FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp);
4005 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4006 }
4007 }
4008 break;
4009
4010 case Qual_class_illegal :
4011 break;
4012
4013 case Qual_class_note :
4014 if (! ajp->flags.srcQualsToNote) {
4015
4016 /* in sequin_mode and dump_mode, all orgmods and subsources show up as separate /qualifiers */
4017
4018 for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
4019
4020 lastomptype = 0;
4021 lastssptype = 0;
4022 switch (asn2gnbk_source_quals [jdx].qualclass) {
4023
4024 case Qual_class_orgmod :
4025 if (jdx == SCQUAL_orgmod_note) break;
4026 omp = qvp [jdx].omp;
4027 if (lastomptype == 0 && omp != NULL) {
4028 lastomptype = omp->subtype;
4029 }
4030 while (omp != NULL && omp->subtype == lastomptype) {
4031 if (StringIsJustQuotes (omp->subname)) {
4032 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4033 FALSE, TRUE, TILDE_IGNORE);
4034 } else if (! StringHasNoText (omp->subname)) {
4035 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
4036 FALSE, TRUE, TILDE_IGNORE);
4037 FFAddTextToString(ffstring, "\"", omp->subname, "\"\n",
4038 FALSE, TRUE, TILDE_TO_SPACES);
4039 }
4040 omp = omp->next;
4041 }
4042 break;
4043
4044 case Qual_class_voucher :
4045 if (jdx == SCQUAL_orgmod_note) break;
4046 omp = qvp [jdx].omp;
4047 if (lastomptype == 0 && omp != NULL) {
4048 lastomptype = omp->subtype;
4049 }
4050 while (omp != NULL && omp->subtype == lastomptype) {
4051 if (StringIsJustQuotes (omp->subname)) {
4052 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4053 FALSE, TRUE, TILDE_IGNORE);
4054 } else if (! StringHasNoText (omp->subname)) {
4055 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"",
4056 FALSE, TRUE, TILDE_IGNORE);
4057 FF_www_specimen_voucher(ajp, ffstring, omp->subname);
4058 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4059 }
4060 omp = omp->next;
4061 }
4062 break;
4063
4064 case Qual_class_subsource :
4065 if (jdx == SCQUAL_subsource_note) break;
4066 ssp = qvp [jdx].ssp;
4067 if (lastssptype == 0 && ssp != NULL) {
4068 lastssptype = ssp->subtype;
4069 }
4070 while (ssp != NULL && ssp->subtype == lastssptype) {
4071 if (ssp->subtype == SUBSRC_germline ||
4072 ssp->subtype == SUBSRC_rearranged ||
4073 ssp->subtype == SUBSRC_transgenic ||
4074 ssp->subtype == SUBSRC_environmental_sample ||
4075 ssp->subtype == SUBSRC_metagenomic) {
4076 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "\n",
4077 FALSE, TRUE, TILDE_TO_SPACES);
4078 } else if (StringIsJustQuotes (ssp->name)) {
4079 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=\"\"\n",
4080 FALSE, TRUE, TILDE_IGNORE);
4081
4082 } else if (! StringHasNoText (ssp->name)) {
4083 FFAddTextToString(ffstring, "/", asn2gnbk_source_quals [jdx].name, "=",
4084 FALSE, TRUE, TILDE_IGNORE);
4085 FFAddTextToString(ffstring, "\"", ssp->name, "\"\n",
4086 FALSE, TRUE, TILDE_TO_SPACES);
4087 }
4088 ssp = ssp->next;
4089 }
4090 break;
4091
4092 default :
4093 break;
4094 }
4095 }
4096 }
4097
4098 notestr = NULL;
4099 prefix = "";
4100 add_period = FALSE;
4101
4102 if (biop->genome == 8) {
4103 FFAddTextToString(unique, "", "extrachromosomal", NULL, FALSE, FALSE, TILDE_IGNORE);
4104 prefix = "\n";
4105 }
4106
4107 for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j]) {
4108
4109 lastomptype = 0;
4110 lastssptype = 0;
4111 switch (asn2gnbk_source_quals [jdx].qualclass) {
4112
4113 case Qual_class_string :
4114 if (! StringHasNoText (qvp [jdx].str)) {
4115 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, NULL, FALSE);
4116 add_period = FALSE;
4117 prefix = "\n";
4118 }
4119 break;
4120
4121 case Qual_class_orgmod :
4122 case Qual_class_voucher :
4123 if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_orgmod_note) break;
4124 omp = qvp [jdx].omp;
4125 if (lastomptype == 0 && omp != NULL) {
4126 lastomptype = omp->subtype;
4127 }
4128 while (omp != NULL && omp->subtype == lastomptype) {
4129 if (! StringHasNoText (omp->subname)) {
4130 if (jdx == SCQUAL_orgmod_note) {
4131 sprintf (buf, "%s", prefix);
4132 } else {
4133 sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
4134 }
4135
4136 str = StringSave (omp->subname);
4137 add_period = s_RemovePeriodFromEnd (str);
4138 if (jdx == SCQUAL_orgmod_note) {
4139 FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
4140 } else {
4141 FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
4142 }
4143 MemFree (str);
4144
4145 if (jdx == SCQUAL_orgmod_note) {
4146 if (add_period) {
4147 prefix = ".\n";
4148 } else {
4149 prefix = "\n";
4150 }
4151 } else {
4152 prefix = "; ";
4153 }
4154 }
4155 omp = omp->next;
4156 }
4157 break;
4158
4159 case Qual_class_subsource :
4160 if ((! ajp->flags.srcQualsToNote) && jdx != SCQUAL_subsource_note) break;
4161 ssp = qvp [jdx].ssp;
4162 if (lastssptype == 0 && ssp != NULL) {
4163 lastssptype = ssp->subtype;
4164 }
4165 while (ssp != NULL && ssp->subtype == lastssptype) {
4166 if (ssp->subtype == SUBSRC_germline ||
4167 ssp->subtype == SUBSRC_rearranged ||
4168 ssp->subtype == SUBSRC_transgenic ||
4169 ssp->subtype == SUBSRC_environmental_sample ||
4170 ssp->subtype == SUBSRC_metagenomic) {
4171 FFAddTextToString (unique, prefix, asn2gnbk_source_quals [jdx].name, NULL, FALSE, FALSE, TILDE_IGNORE);
4172 prefix = "; ";
4173 } else if (! StringHasNoText (ssp->name)) {
4174 if (jdx == SCQUAL_subsource_note) {
4175 sprintf (buf, "%s", prefix);
4176 } else {
4177 sprintf (buf, "%s%s: ", prefix, asn2gnbk_source_quals [jdx].name);
4178 }
4179
4180 str = StringSave (ssp->name);
4181 add_period = s_RemovePeriodFromEnd (str);
4182 if (jdx == SCQUAL_subsource_note) {
4183 FFAddString_NoRedund (unique, buf, str, NULL, FALSE);
4184 } else {
4185 FFAddTextToString(unique, buf, str, NULL, FALSE, FALSE, TILDE_IGNORE);
4186 }
4187 MemFree (str);
4188
4189 if (jdx == SCQUAL_subsource_note) {
4190 if (add_period) {
4191 prefix = ".\n";
4192 } else {
4193 prefix = "\n";
4194 }
4195 } else {
4196 prefix = "; ";
4197 }
4198 }
4199 ssp = ssp->next;
4200 }
4201 break;
4202
4203 case Qual_class_pcr :
4204 if (qvp [jdx].ble) {
4205 lastssptype = 0;
4206 pset = ParsePCRPrimerString (qvp);
4207 for (vnp = pset; vnp != NULL; vnp = vnp->next) {
4208 psp = (PcrSetPtr) vnp->data.ptrvalue;
4209 if (psp == NULL) continue;
4210 str = NextPCRPrimerString (psp, TRUE, (Boolean) (pset->next != NULL));
4211 if (str == NULL) continue;
4212 if (! StringHasNoText (str)) {
4213 FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
4214 add_period = FALSE;
4215 prefix = "; ";
4216 }
4217 MemFree (str);
4218 }
4219 FreePCRSet (pset);
4220 }
4221 break;
4222
4223 case Qual_class_pcr_react :
4224 prp = qvp [jdx].prp;
4225 while (prp != NULL) {
4226 str = NextPCRReaction (prp, TRUE, (Boolean) (prp->next != NULL));
4227 if (StringDoesHaveText (str)) {
4228 FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
4229 add_period = FALSE;
4230 prefix = "; ";
4231 }
4232 MemFree (str);
4233 prp = prp->next;
4234 }
4235 break;
4236
4237 case Qual_class_valnode :
4238 for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
4239 str = (CharPtr) vnp->data.ptrvalue;
4240 if (! StringHasNoText (str)) {
4241 FFAddString_NoRedund (unique, prefix, str, NULL, FALSE);
4242 add_period = FALSE;
4243 prefix = "; ";
4244 }
4245 }
4246 break;
4247
4248 default :
4249 break;
4250 }
4251 }
4252 if ( !FFEmpty(unique) ) {
4253 notestr = FFToCharPtr(unique);
4254
4255 if (add_period) {
4256 s_AddPeriodToEnd (notestr);
4257 }
4258
4259 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
4260 if (! IsEllipsis (notestr))
4261 s_RemovePeriodFromEnd (notestr);
4262 #endif
4263
4264 FFAddOneString (ffstring, "/note=\"", FALSE, FALSE, TILDE_IGNORE);
4265 if (is_desc) {
4266 /* AB055064.1 said TILDE_IGNORE on descriptors, but now changing policy */
4267 FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_IGNORE */ /* TILDE_EXPAND */ TILDE_SEMICOLON);
4268 } else {
4269 /* ASZ93724.1 said TILDE_EXPAND on features, but record does not exist */
4270 FFAddOneString (ffstring, notestr, FALSE, TRUE, /* TILDE_EXPAND */ TILDE_SEMICOLON);
4271 }
4272 FFAddOneString (ffstring, "\"", FALSE, FALSE, TILDE_IGNORE);
4273
4274 MemFree (notestr);
4275 }
4276 break;
4277 default :
4278 break;
4279 }
4280 }
4281
4282 /* and then deal with the various note types separately (not in order table) */
4283
4284 str = FFEndPrint(ajp, ffstring, afp->format, 21, 21, 5, 21, "FT");
4285
4286 /* optionally populate gbseq for XML-ized GenBank format */
4287
4288 if (gbseq != NULL) {
4289 if (gbfeat != NULL) {
4290 AddFeatureToGbseq (gbseq, gbfeat, str, NULL);
4291 }
4292 }
4293
4294 FFRecycleString(ajp, unique);
4295 FFRecycleString(ajp, ffstring);
4296 return str;
4297 }
4298
4299 static void LIBCALLBACK CountBasesByStream (
4300 CharPtr sequence,
4301 Pointer userdata
4302 )
4303
4304 {
4305 Int4Ptr base_count;
4306 Char ch;
4307 CharPtr ptr;
4308
4309 base_count = (Int4Ptr) userdata;
4310
4311 ptr = sequence;
4312 ch = *ptr;
4313 while (ch != '\0') {
4314 ch = TO_UPPER (ch);
4315 switch (ch) {
4316 case 'A' :
4317 (base_count [0])++;
4318 break;
4319 case 'C' :
4320 (base_count [1])++;
4321 break;
4322 case 'G' :
4323 (base_count [2])++;
4324 break;
4325 case 'T' :
4326 (base_count [3])++;
4327 break;
4328 default :
4329 (base_count [4])++;
4330 break;
4331 }
4332 ptr++;
4333 ch = *ptr;
4334 }
4335 }
4336
4337 NLM_EXTERN CharPtr FormatBasecountBlock (
4338 Asn2gbFormatPtr afp,
4339 BaseBlockPtr bbp
4340 )
4341
4342 {
4343 IntAsn2gbJobPtr ajp;
4344 Asn2gbSectPtr asp;
4345 Int4 base_count [5];
4346 BioseqPtr bsp;
4347 Char buf [80];
4348 Int2 i;
4349 Int4 len;
4350 StringItemPtr ffstring;
4351 CharPtr str;
4352
4353 if (afp == NULL || bbp == NULL) return NULL;
4354 ajp = afp->ajp;
4355 if (ajp == NULL) return NULL;
4356
4357 asp = afp->asp;
4358 if (asp == NULL) return NULL;
4359 bsp = (asp->bsp);
4360 if (bsp == NULL) return NULL;
4361
4362 /* after first formatting, result is cached into bbp->string */
4363
4364 if (! StringHasNoText (bbp->string)) return StringSave (bbp->string);
4365
4366 for (i = 0; i < 5; i++) {
4367 base_count [i] = 0;
4368 }
4369
4370 if (ajp->ajp.slp != NULL) {
4371 len = SeqLocLen (ajp->ajp.slp);
4372 SeqPortStreamLoc (ajp->ajp.slp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
4373 } else {
4374 len = bsp->length;
4375 SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) base_count, CountBasesByStream);
4376 }
4377
4378 if (afp->format == GENBANK_FMT || afp->format == GENPEPT_FMT) {
4379
4380 if (base_count [4] == 0) {
4381 sprintf (buf, "%7ld a%7ld c%7ld g%7ld t",
4382 (long) base_count [0], (long) base_count [1],
4383 (long) base_count [2], (long) base_count [3]);
4384 } else {
4385 sprintf (buf, "%7ld a%7ld c%7ld g%7ld t%7ld others",
4386 (long) base_count [0], (long) base_count [1],
4387 (long) base_count [2], (long) base_count [3],
4388 (long) base_count [4]);
4389 }
4390
4391 } else if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4392
4393 sprintf (buf, "Sequence %ld BP; %ld A; %ld C; %ld G; %ld T; %ld other;",
4394 (long) len,
4395 (long) base_count [0], (long) base_count [1],
4396 (long) base_count [2], (long) base_count [3],
4397 (long) base_count [4]);
4398 }
4399
4400 ffstring = FFGetString(ajp);
4401 if ( ffstring == NULL ) return NULL;
4402
4403 if (afp->format == EMBL_FMT || afp->format == EMBLPEPT_FMT) {
4404 FFAddOneString(ffstring, "XX\n", FALSE, FALSE, TILDE_IGNORE);
4405 }
4406 FFStartPrint (ffstring, afp->format, 0, 0, "BASE COUNT", 12, 5, 5, "SQ", FALSE);
4407 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
4408 str = FFEndPrint(ajp, ffstring, afp->format, 12, 0, 5, 5, "SQ");
4409 FFRecycleString(ajp, ffstring);
4410
4411 return str;
4412 }
4413
4414 static void PrintSeqLine (
4415 StringItemPtr ffstring,
4416 FmtType format,
4417 CharPtr buf,
4418 Int4 start,
4419 Int4 stop
4420 )
4421
4422 {
4423 size_t len;
4424 Char pos [16];
4425 Int4 pad;
4426
4427 len = StringLen (buf);
4428 if (len > 0 && buf [len - 1] == ' ') {
4429 buf [len - 1] = '\0';
4430 }
4431
4432 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
4433
4434 sprintf (pos, "%9ld", (long) (start + 1));
4435 FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
4436 FFAddOneChar(ffstring, ' ', FALSE);
4437 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
4438 FFAddOneChar(ffstring, '\n', FALSE);
4439 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
4440
4441 sprintf (pos, "%8ld", (long) (stop));
4442 FFAddNChar(ffstring, ' ', 5, FALSE);
4443 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
4444 pad = 72 - 5 - StringLen(buf);
4445 FFAddNChar(ffstring, ' ', pad, FALSE);
4446 FFAddOneString(ffstring, pos, FALSE, FALSE, TILDE_TO_SPACES);
4447 FFAddOneChar(ffstring, '\n', FALSE);
4448 }
4449 }
4450
4451 static CharPtr CompressNonBases (CharPtr str)
4452
4453 {
4454 Char ch;
4455 CharPtr dst;
4456 CharPtr ptr;
4457
4458 if (str == NULL || str [0] == '\0') return NULL;
4459
4460 dst = str;
4461 ptr = str;
4462 ch = *ptr;
4463 while (ch != '\0') {
4464 if (IS_ALPHA (ch)) {
4465 *dst = ch;
4466 dst++;
4467 }
4468 ptr++;
4469 ch = *ptr;
4470 }
4471 *dst = '\0';
4472
4473 return str;
4474 }
4475
4476 static Uint1 fasta_order [NUM_SEQID] = {
4477 33, /* 0 = not set */
4478 20, /* 1 = local Object-id */
4479 15, /* 2 = gibbsq */
4480 16, /* 3 = gibbmt */
4481 30, /* 4 = giim Giimport-id */
4482 10, /* 5 = genbank */
4483 10, /* 6 = embl */
4484 10, /* 7 = pir */
4485 10, /* 8 = swissprot */
4486 15, /* 9 = patent */
4487 20, /* 10 = other TextSeqId */
4488 20, /* 11 = general Dbtag */
4489 255, /* 12 = gi */
4490 10, /* 13 = ddbj */
4491 10, /* 14 = prf */
4492 12, /* 15 = pdb */
4493 10, /* 16 = tpg */
4494 10, /* 17 = tpe */
4495 10, /* 18 = tpd */
4496 10, /* 19 = gpp */
4497 10 /* 20 = nat */
4498 };
4499
4500 static void PrintGenome (
4501 IntAsn2gbJobPtr ajp,
4502 StringItemPtr ffstring,
4503 SeqLocPtr slp_head,
4504 CharPtr prefix,
4505 Boolean segWithParts,
4506 Boolean is_na
4507 )
4508 {
4509 Char buf[40], gibuf [32], vbuf [80];
4510 Boolean first = TRUE;
4511 SeqIdPtr freeid = NULL, sid = NULL, newid = NULL;
4512 SeqLocPtr slp = NULL;
4513 Int4 from = 0, to = 0, start = 0, stop = 0, gi = 0;
4514 BioseqPtr bsp = NULL;
4515 Int2 p1 = 0, p2 = 0;
4516
4517 buf [0] = '\0';
4518 gibuf [0] = '\0';
4519 vbuf [0] = '\0';
4520 for (slp = slp_head; slp; slp = slp->next) {
4521 from = to = 0;
4522 sid = SeqLocId (slp);
4523 if (slp->choice == SEQLOC_INT || slp->choice == SEQLOC_WHOLE) {
4524 start = from = SeqLocStart (slp);
4525 stop = to = SeqLocStop (slp);
4526 } else if (slp->choice == SEQLOC_NULL){
4527 sprintf (vbuf, ",%s", "gap()");
4528 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4529 continue;
4530 } else {
4531 continue;
4532 }
4533 if (sid == NULL) {
4534 continue;
4535 }
4536 newid = NULL;
4537 freeid = NULL;
4538 buf [0] = '\0';
4539 gi = 0;
4540 if (sid->choice == SEQID_GI) {
4541 gi = sid->data.intvalue;
4542 if (GetAccnVerFromServer (gi, buf)) {
4543 /* no need to call GetSeqIdForGI */
4544 } else {
4545 newid = GetSeqIdForGI (gi);
4546 if (newid != NULL) {
4547 freeid = newid;
4548 }
4549 if (newid != NULL && segWithParts) {
4550 if (newid->choice == SEQID_GIBBSQ ||
4551 newid->choice == SEQID_GIBBMT ||
4552 newid->choice == SEQID_GIIM) {
4553 bsp = BioseqFind (newid);
4554 if (bsp != NULL && bsp->repr == Seq_repr_virtual) {
4555 if (bsp->length > 0) {
4556 sprintf (vbuf, ",gap(%ld)", (long) bsp->length);
4557 } else {
4558 sprintf (vbuf, ",%s", "gap()");
4559 }
4560 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4561 continue;
4562 }
4563 }
4564 }
4565 }
4566 } else if (sid->choice == SEQID_GENERAL) {
4567 newid = sid;
4568 } else {
4569 newid = sid;
4570 gi = GetGIForSeqId (sid);
4571 }
4572 if (prefix != NULL) {
4573 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
4574 }
4575 if (first) {
4576 first = FALSE;
4577 } else {
4578 FFAddOneChar (ffstring, ',', FALSE);
4579 /*ff_AddChar(',');*/
4580 }
4581 if (! StringHasNoText (buf)) {
4582 /* filled in by GetAccnVerFromServer */
4583 } else if (newid != NULL) {
4584 SeqIdWrite (SeqIdSelect (newid, fasta_order, NUM_SEQID),
4585 buf, PRINTID_TEXTID_ACC_VER, sizeof(buf) -1 );
4586 } else if (sid->choice == SEQID_GI) {
4587 SeqIdWrite (sid, buf, PRINTID_FASTA_LONG, sizeof (buf) - 1);
4588 }
4589
4590 if (SeqLocStrand (slp) == Seq_strand_minus) {
4591 FFAddOneString (ffstring, "complement(", FALSE, FALSE, TILDE_IGNORE);
4592 }
4593 if ( GetWWW (ajp) && gi > 0) {
4594 if (newid == NULL) {
4595 newid = sid;
4596 }
4597 if (newid->choice != SEQID_GENERAL) {
4598 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4599 if (is_na) {
4600 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
4601 } else {
4602 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
4603 }
4604 sprintf (gibuf, "%ld", (long) gi);
4605 FFAddTextToString (ffstring, /* "val=" */ NULL, gibuf, "\">", FALSE, FALSE, TILDE_IGNORE);
4606 FFAddTextToString (ffstring, NULL, buf, "</a>", FALSE, FALSE, TILDE_IGNORE);
4607 }
4608 } else {
4609 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4610 }
4611
4612 if (SeqLocStrand (slp) == Seq_strand_minus) {
4613 sprintf (vbuf,":%ld..%ld)", (long) start+1, (long) stop+1);
4614 } else {
4615 sprintf (vbuf,":%ld..%ld", (long) start+1, (long) stop+1);
4616 }
4617 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4618 p1 += StringLen (vbuf);
4619 p2 += StringLen (vbuf);
4620 if (freeid != NULL) {
4621 freeid = SeqIdFree (freeid);
4622 }
4623 }
4624 }
4625
4626 NLM_EXTERN CharPtr FormatContigBlock (
4627 Asn2gbFormatPtr afp,
4628 BaseBlockPtr bbp
4629 )
4630
4631 {
4632 IntAsn2gbJobPtr ajp;
4633 Asn2gbSectPtr asp;
4634 BioseqPtr bsp;
4635 DeltaSeqPtr dsp;
4636 IntFuzzPtr fuzz;
4637 GBSeqPtr gbseq;
4638 Boolean is_na;
4639 SeqLitPtr litp;
4640 CharPtr prefix = NULL;
4641 Boolean segWithParts = FALSE;
4642 SeqLocPtr slp_head = NULL;
4643 CharPtr str;
4644 Char tmp [16];
4645 Boolean unknown;
4646 Char vbuf [32];
4647 StringItemPtr ffstring;
4648 /* CharPtr label;*/
4649
4650 if (afp == NULL || bbp == NULL) return NULL;
4651 ajp = afp->ajp;
4652 if (ajp == NULL) return NULL;
4653 asp = afp->asp;
4654 if (asp == NULL) return NULL;
4655 bsp = (asp->bsp);
4656 if (bsp == NULL) return NULL;
4657
4658 ffstring = FFGetString (ajp);
4659 if ( ffstring == NULL ) return NULL;
4660
4661 is_na = ISA_na (bsp->mol);
4662
4663 FFStartPrint (ffstring, afp->format, 0, 0, "CONTIG", 12, 5, 5, "CO", FALSE);
4664 /*
4665 if ( GetWWW(ajp) ) {
4666 label = "CONTIG ";
4667 } else {
4668 label = "CONTIG";
4669 }
4670
4671 FFAddOneString(ffstring, label, FALSE, FALSE, TILDE_IGNORE);
4672 FFAddNChar(ffstring, ' ', 12 - StringLen(label), FALSE);
4673 */
4674
4675 FFAddOneString (ffstring, "join(", FALSE, FALSE, TILDE_IGNORE);
4676
4677 if (bsp->seq_ext_type == 1) {
4678
4679 if (bsp->repr == Seq_repr_seg && SegHasParts (bsp)) {
4680 segWithParts = TRUE;
4681 }
4682
4683 slp_head = (SeqLocPtr) bsp->seq_ext;
4684 PrintGenome (ajp, ffstring, slp_head, prefix, segWithParts, is_na);
4685
4686 } else if (bsp->seq_ext_type == 4) {
4687
4688 for (dsp = (DeltaSeqPtr) bsp->seq_ext; dsp != NULL; dsp=dsp->next) {
4689 if (dsp->choice == 1) {
4690
4691 slp_head = (SeqLocPtr) dsp->data.ptrvalue;
4692 PrintGenome (ajp, ffstring, slp_head, prefix, FALSE, is_na);
4693
4694 } else {
4695
4696 litp = (SeqLitPtr) dsp->data.ptrvalue;
4697 if (litp != NULL) {
4698 if (litp->seq_data != NULL && litp->seq_data_type != Seq_code_gap) {
4699 if (litp->length == 0) {
4700 sprintf (vbuf, "gap(%ld)", (long) litp->length);
4701 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4702 } else {
4703 /* don't know what to do here */
4704 }
4705 } else {
4706 unknown = FALSE;
4707 fuzz = litp->fuzz;
4708 if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
4709 unknown = TRUE;
4710 }
4711 if (unknown && litp->length > 0) {
4712 sprintf (tmp, "unk%ld", (long) litp->length);
4713 } else {
4714 sprintf (tmp, "%ld", (long) litp->length);
4715 }
4716 if (prefix != NULL) {
4717 sprintf (vbuf, "%sgap(%s)", prefix, tmp);
4718 } else {
4719 sprintf (vbuf, "gap(%s)", tmp);
4720 }
4721 FFAddOneString (ffstring, vbuf, FALSE, FALSE, TILDE_IGNORE);
4722 }
4723 }
4724 }
4725
4726 prefix = ",";
4727 }
4728 }
4729
4730 FFAddOneChar (ffstring, ')', FALSE);
4731
4732 str = FFEndPrint (ajp, ffstring, afp->format, 12, 12, 5, 5, "CO");
4733 FFRecycleString (ajp, ffstring);
4734
4735 /* optionally populate gbseq for XML-ized GenBank format */
4736
4737 if (ajp->gbseq) {
4738 gbseq = &asp->gbseq;
4739 } else {
4740 gbseq = NULL;
4741 }
4742
4743 if (gbseq != NULL) {
4744 if (StringLen (str) > 12) {
4745 gbseq->contig = StringSave (str + 12);
4746 } else {
4747 gbseq->contig = StringSave (str);
4748 }
4749
4750 CleanQualValue (gbseq->contig);
4751 Asn2gnbkCompressSpaces (gbseq->contig);
4752 StripAllSpaces (gbseq->contig);
4753 }
4754
4755 return str;
4756 }
4757
4758 static void LIBCALLBACK SaveGBSeqSequence (
4759 CharPtr sequence,
4760 Pointer userdata
4761 )
4762
4763 {
4764 CharPtr tmp;
4765 CharPtr PNTR tmpp;
4766
4767 tmpp = (CharPtr PNTR) userdata;
4768 tmp = *tmpp;
4769
4770 tmp = StringMove (tmp, sequence);
4771
4772 *tmpp = tmp;
4773 }
4774
4775 static Boolean InGapBlock (
4776 IntAsn2gbJobPtr ajp
4777 )
4778
4779 {
4780 return (Boolean) (ajp->seqGapCurrLen > 0);
4781 }
4782
4783 static Boolean LineIsAllGaps (
4784 CharPtr ptr
4785 )
4786
4787 {
4788 Char ch;
4789 Int2 j;
4790
4791 for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4792 if (ch != '-') return FALSE;
4793 }
4794 if (j == 60) return TRUE;
4795 return FALSE;
4796 }
4797
4798 static Int2 GapAtStart (
4799 CharPtr ptr
4800 )
4801
4802 {
4803 Char ch;
4804 Int2 j;
4805
4806 for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4807 if (ch != '-') return j;
4808 }
4809 return 0;
4810 }
4811
4812 static void FixGapAtStart (
4813 CharPtr ptr,
4814 Char pad
4815 )
4816
4817 {
4818 Char ch;
4819 Int2 j;
4820
4821 for (ch = *ptr, j = 0; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
4822 *ptr = pad;
4823 }
4824 }
4825
4826 static Int2 GapAtEnd (
4827 CharPtr ptr
4828 )
4829
4830 {
4831 Char ch;
4832 Int2 j;
4833 Int2 k;
4834
4835 for (ch = *ptr, j = 0, k = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4836 if (ch == '-') {
4837 k++;
4838 } else {
4839 k = 0;
4840 }
4841 }
4842 return k;
4843 }
4844
4845 static void FixGapAtEnd (
4846 CharPtr ptr,
4847 Char pad
4848 )
4849
4850 {
4851 Char ch;
4852 Int2 j;
4853
4854 j = StringLen (ptr) - GapAtEnd (ptr);
4855 ptr += j;
4856 for (ch = *ptr; ch == '-' && j < 60; ptr++, ch = *ptr, j++) {
4857 *ptr = pad;
4858 }
4859 }
4860
4861 static void FixRemainingGaps (
4862 CharPtr ptr,
4863 Char pad
4864 )
4865
4866 {
4867 Char ch;
4868 Int2 j;
4869
4870 for (ch = *ptr, j = 0; ch != '\0' && j < 60; ptr++, ch = *ptr, j++) {
4871 if (ch == '-') {
4872 *ptr = pad;
4873 }
4874 }
4875 }
4876
4877 static void ExpandSeqLine (
4878 CharPtr buf
4879 )
4880
4881 {
4882 Char ch;
4883 Int2 blk, count, lin;
4884 CharPtr ptr;
4885 Char seq [80];
4886
4887 StringCpy (seq, buf);
4888
4889 count = 0;
4890 blk = 0;
4891 lin = 0;
4892
4893 ptr = seq;
4894 ch = *ptr;
4895
4896 while (ch != '\0') {
4897 buf [count] = ch;
4898 count++;
4899 ptr++;
4900 ch = *ptr;
4901
4902 blk++;
4903 lin++;
4904 if (blk >= 10 && lin < 60) {
4905
4906 buf [count] = ' ';
4907 count++;
4908 blk = 0;
4909
4910 }
4911 }
4912
4913 buf [count] = '\0';
4914 }
4915
4916 static Int2 ProcessGapSpecialFormat (
4917 Asn2gbFormatPtr afp,
4918 IntAsn2gbJobPtr ajp,
4919 BioseqPtr bsp,
4920 StringItemPtr ffstring,
4921 CharPtr buf,
4922 CharPtr nextchars
4923 )
4924
4925 {
4926 Char fmt_buf [64];
4927 Char gapbuf [80];
4928 Int4 gi;
4929 Char gi_buf [16];
4930 Boolean is_na;
4931 Char pad;
4932 SeqIdPtr sip;
4933 Int2 startgapgap = 0, endgap = 0;
4934
4935 is_na = ISA_na (bsp->mol);
4936 if (is_na) {
4937 pad = 'n';
4938 } else {
4939 pad = 'x';
4940 }
4941
4942 if (LineIsAllGaps (buf)) {
4943 ajp->seqGapCurrLen += StringLen (buf);
4944 *buf = '\0';
4945 return 0;
4946 }
4947
4948 startgapgap = GapAtStart (buf);
4949 if (InGapBlock (ajp)) {
4950 ajp->seqGapCurrLen += startgapgap;
4951 if (is_na) {
4952 sprintf (gapbuf, " [gap %ld bp]", (long) ajp->seqGapCurrLen);
4953 } else {
4954 sprintf (gapbuf, " [gap %ld aa]", (long) ajp->seqGapCurrLen);
4955 }
4956 FFAddOneString (ffstring, gapbuf, FALSE, FALSE, TILDE_TO_SPACES);
4957 if (GetWWW (ajp) && ajp->mode == ENTREZ_MODE && afp != NULL &&
4958 (ajp->format == GENBANK_FMT || ajp->format == GENPEPT_FMT)) {
4959 gi = 0;
4960 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4961 if (sip->choice == SEQID_GI) {
4962 gi = (Int4) sip->data.intvalue;
4963 }
4964 }
4965 if (gi > 0) {
4966 sprintf(gi_buf, "%ld", (long) gi);
4967 sprintf(fmt_buf, "?fmt_mask=%ld", (long) EXPANDED_GAP_DISPLAY);
4968 if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
4969 StringCat (fmt_buf, "&report=gbwithparts");
4970 }
4971 FFAddOneString (ffstring, " <a href=\"", FALSE, FALSE, TILDE_IGNORE);
4972 if (is_na) {
4973 FF_Add_NCBI_Base_URL (ffstring, link_featn);
4974 } else {
4975 FF_Add_NCBI_Base_URL (ffstring, link_featp);
4976 }
4977 FFAddOneString (ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
4978 FFAddOneString (ffstring, fmt_buf, FALSE, FALSE, TILDE_IGNORE);
4979 FFAddOneString (ffstring, "\">Expand Ns", FALSE, FALSE, TILDE_IGNORE);
4980 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4981 }
4982 }
4983 FFAddOneChar (ffstring, '\n', FALSE);
4984 ajp->seqGapCurrLen = 0;
4985 FixGapAtStart (buf, ' ');
4986 } else if (startgapgap > 0) {
4987 FixGapAtStart (buf, pad);
4988 }
4989
4990 endgap = GapAtEnd (buf);
4991 if (LineIsAllGaps (nextchars)) {
4992 FixGapAtEnd (buf, ' ');
4993 ajp->seqGapCurrLen += endgap;
4994 } else if (endgap > 0) {
4995 /*
4996 FixGapAtEnd (buf, pad);
4997 */
4998 FixGapAtEnd (buf, ' ');
4999 ajp->seqGapCurrLen += endgap;
5000 }
5001
5002 FixRemainingGaps (buf, pad);
5003
5004 return startgapgap;
5005 }
5006
5007 /*
5008 static void ChangeOandJtoX (CharPtr str)
5009
5010 {
5011 Char ch;
5012
5013 if (str == NULL) return;
5014 ch = *str;
5015 while (ch != '\0') {
5016 if (ch == 'O' || ch == 'J') {
5017 *str = 'X';
5018 } else if (ch == 'o' || ch == 'j') {
5019 *str = 'x';
5020 }
5021 str++;
5022 ch = *str;
5023 }
5024 }
5025 */
5026
5027 NLM_EXTERN CharPtr FormatSequenceBlock (
5028 Asn2gbFormatPtr afp,
5029 BaseBlockPtr bbp
5030 )
5031
5032 {
5033 IntAsn2gbJobPtr ajp;
5034 Asn2gbSectPtr asp;
5035 Int2 blk;
5036 BioseqPtr bsp;
5037 Bioseq bsq;
5038 Char buf [80];
5039 Char ch;
5040 Int2 count;
5041 Int4 extend;
5042 StreamFlgType flags = STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL;
5043 GBSeqPtr gbseq;
5044 IntAsn2gbSectPtr iasp;
5045 Int2 lin;
5046 SeqLocPtr loc;
5047 Int4 num;
5048 CharPtr ptr;
5049 Int4 remaining;
5050 SeqBlockPtr sbp;
5051 SeqLoc sl;
5052 SeqLocPtr slp;
5053 Int4 start;
5054 Int2 startgapgap;
5055 Int4 stop;
5056 CharPtr str = NULL;
5057 CharPtr tmp;
5058 StringItemPtr ffstring;
5059
5060 if (afp == NULL || bbp == NULL) return NULL;
5061 sbp = (SeqBlockPtr) bbp;
5062 ajp = afp->ajp;
5063 if (ajp == NULL) return NULL;
5064 asp = afp->asp;
5065 if (asp == NULL) return NULL;
5066 iasp = (IntAsn2gbSectPtr) asp;
5067 bsp = (asp->bsp);
5068 if (bsp == NULL) return NULL;
5069
5070 /* if GBSeq XML, use SeqPortStream on single block */
5071
5072 if (ajp->gbseq) {
5073 gbseq = &asp->gbseq;
5074
5075 if (ajp->ajp.slp != NULL) {
5076 slp = ajp->ajp.slp;
5077 str = MemNew (sizeof (Char) * (SeqLocLen (slp) + 10));
5078 } else {
5079 str = MemNew (sizeof (Char) * (bsp->length + 10));
5080 }
5081 if (str == NULL) return NULL;
5082
5083 tmp = str;
5084 if (ajp->ajp.slp != NULL) {
5085 slp = ajp->ajp.slp;
5086 SeqPortStreamLoc (slp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
5087 } else {
5088 SeqPortStream (bsp, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &tmp, SaveGBSeqSequence);
5089 }
5090 /*
5091 if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
5092 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
5093 ChangeOandJtoX (str);
5094 }
5095 }
5096 */
5097 gbseq->sequence = StringSave (str);
5098
5099 tmp = gbseq->sequence;
5100 if (tmp == NULL) return NULL;
5101 ch = *tmp;
5102 while (ch != '\0') {
5103 if (ch == '\n' || ch == '\r' || ch == '\t') {
5104 *tmp = ' ';
5105 } else if (IS_UPPER (ch)) {
5106 /* collab decision to present target sequence in lower case */
5107 *tmp = TO_LOWER (ch);
5108 }
5109 tmp++;
5110 ch = *tmp;
5111 }
5112 TrimSpacesAroundString (gbseq->sequence);
5113 CompressNonBases (gbseq->sequence);
5114
5115 return str;
5116 }
5117
5118 /* replace SeqPort with improved SeqPortStream */
5119
5120 if (sbp->bases == NULL) {
5121 if (ajp->specialGapFormat) {
5122 flags = EXPAND_GAPS_TO_DASHES | STREAM_CORRECT_INVAL;
5123 }
5124
5125 start = sbp->start;
5126 stop = sbp->stop;
5127 extend = sbp->extend;
5128
5129 if (stop > start) {
5130
5131 str = MemNew (sizeof (Char) * (extend - start + 3));
5132 if (str != NULL) {
5133 if (ajp->ajp.slp != NULL) {
5134 slp = ajp->ajp.slp;
5135 MemSet ((Pointer) &bsq, 0, sizeof (Bioseq));
5136 MemSet ((Pointer) &sl, 0, sizeof (SeqLoc));
5137 bsq.repr = Seq_repr_seg;
5138 bsq.mol = bsp->mol;
5139 bsq.seq_ext_type = 1;
5140 bsq.length = SeqLocLen (slp);
5141 bsq.seq_ext = &sl;
5142 if (slp->choice == SEQLOC_MIX || slp->choice == SEQLOC_PACKED_INT) {
5143 loc = (SeqLocPtr) slp->data.ptrvalue;
5144 if (loc != NULL) {
5145 sl.choice = loc->choice;
5146 sl.data.ptrvalue = (Pointer) loc->data.ptrvalue;
5147 sl.next = loc->next;
5148 }
5149 } else {
5150 sl.choice = slp->choice;
5151 sl.data.ptrvalue = (Pointer) slp->data.ptrvalue;
5152 sl.next = NULL;
5153 }
5154 SeqPortStreamInt (&bsq, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
5155 } else {
5156 num = SeqPortStreamInt (bsp, start, extend - 1, Seq_strand_plus, flags, (Pointer) str, NULL);
5157 if (num < 1) {
5158 /* flag possible inconsistency between bsp->length and actual sequence data length */
5159 ajp->relModeError = TRUE;
5160 return NULL;
5161 }
5162 }
5163 /*
5164 if (ISA_aa (bsp->mol) && StringDoesHaveText (str)) {
5165 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
5166 ChangeOandJtoX (str);
5167 }
5168 }
5169 */
5170 sbp->bases = str;
5171 }
5172 }
5173 }
5174
5175 if (sbp->bases == NULL) return NULL;
5176
5177 /* format subsequence cached with SeqPortStream */
5178
5179 ffstring = FFGetString (ajp);
5180
5181 start = sbp->start;
5182 stop = sbp->stop;
5183 remaining = stop - start;
5184
5185 count = 0;
5186 blk = 0;
5187 lin = 0;
5188
5189 ptr = sbp->bases;
5190 ch = *ptr;
5191
5192 while (ch != '\0' && remaining > 0) {
5193 buf [count] = (Char) (TO_LOWER (ch));
5194 count++;
5195 remaining--;
5196 ptr++;
5197 ch = *ptr;
5198
5199 blk++;
5200 lin++;
5201 if (lin >= 60) {
5202
5203 buf [count] = '\0';
5204 startgapgap = 0;
5205 if (ajp->specialGapFormat) {
5206 startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
5207 }
5208 if (StringDoesHaveText (buf)) {
5209 ExpandSeqLine (buf);
5210 PrintSeqLine (ffstring, afp->format, buf, start + startgapgap, start + lin);
5211 }
5212 count = 0;
5213 blk = 0;
5214 lin = 0;
5215 start += 60;
5216 }
5217 }
5218
5219 buf [count] = '\0';
5220 if (count > 0) {
5221 startgapgap = 0;
5222 if (ajp->specialGapFormat) {
5223 startgapgap = ProcessGapSpecialFormat (afp, ajp, bsp, ffstring, buf, ptr);
5224 }
5225 if (StringDoesHaveText (buf)) {
5226 ExpandSeqLine (buf);
5227 PrintSeqLine (ffstring, afp->format, buf, start + startgapgap, start + lin);
5228 }
5229 }
5230
5231 str = FFToCharPtr(ffstring);
5232
5233 FFRecycleString (ajp, ffstring);
5234 return str;
5235 }
5236
5237 /*
5238 static CharPtr insd_strd [4] = {
5239 NULL, "single", "double", "mixed"
5240 };
5241
5242 static CharPtr insd_mol [10] = {
5243 "?", "DNA", "RNA", "tRNA", "rRNA", "mRNA", "uRNA", "snRNA", "snoRNA", "AA"
5244 };
5245
5246 static CharPtr insd_top [3] = {
5247 NULL, "linear", "circular"
5248 };
5249 */
5250
5251 NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
5252
5253 NLM_EXTERN CharPtr FormatSlashBlock (
5254 Asn2gbFormatPtr afp,
5255 BaseBlockPtr bbp
5256 )
5257
5258 {
5259 IntAsn2gbJobPtr ajp;
5260 Asn2gbSectPtr asp;
5261 GBFeaturePtr currf, headf, nextf;
5262 GBReferencePtr currr, headr, nextr;
5263 GBSeqPtr gbseq, gbtmp;
5264 IndxPtr index;
5265 INSDSeq is;
5266 /*
5267 Int2 moltype, strandedness, topology;
5268 */
5269
5270 if (afp == NULL || bbp == NULL) return NULL;
5271 ajp = afp->ajp;
5272 if (ajp == NULL) return NULL;
5273 asp = afp->asp;
5274 if (asp == NULL) return NULL;
5275
5276 /* sort and unique indexes */
5277
5278 index = ajp->index;
5279
5280 if (index != NULL) {
5281
5282 MemCopy (index, &asp->index, sizeof (IndxBlock));
5283 MemSet (&asp->index, 0, sizeof (IndxBlock));
5284
5285 index->authors = ValNodeSort (index->authors, SortVnpByString);
5286 index->authors = UniqueValNode (index->authors);
5287
5288 index->genes = ValNodeSort (index->genes, SortVnpByString);
5289 index->genes = UniqueValNode (index->genes);
5290
5291 index->journals = ValNodeSort (index->journals, SortVnpByString);
5292 index->journals = UniqueValNode (index->journals);
5293
5294 index->keywords = ValNodeSort (index->keywords, SortVnpByString);
5295 index->keywords = UniqueValNode (index->keywords);
5296
5297 index->secondaries = ValNodeSort (index->secondaries, SortVnpByString);
5298 index->secondaries = UniqueValNode (index->secondaries);
5299 }
5300
5301 /* adjust XML-ized GenBank format */
5302
5303 gbseq = ajp->gbseq;
5304
5305 if (gbseq != NULL) {
5306
5307 MemCopy (gbseq, &asp->gbseq, sizeof (GBSeq));
5308 MemSet (&asp->gbseq, 0, sizeof (GBSeq));
5309
5310 /* reverse order of references */
5311
5312 headr = NULL;
5313 for (currr = gbseq->references; currr != NULL; currr = nextr) {
5314 nextr = currr->next;
5315 currr->next = headr;
5316 headr = currr;
5317 }
5318 gbseq->references = headr;
5319
5320 /* reverse order of features */
5321
5322 headf = NULL;
5323 for (currf = gbseq->feature_table; currf != NULL; currf = nextf) {
5324 nextf = currf->next;
5325 currf->next = headf;
5326 headf = currf;
5327 }
5328 gbseq->feature_table = headf;
5329 }
5330
5331 /* if generating GBSeq XML/ASN, write at each slash block */
5332
5333 if (gbseq != NULL && afp->aip != NULL) {
5334 if (ajp->produceInsdSeq) {
5335 MemSet ((Pointer) &is, 0, sizeof (INSDSeq));
5336 is.next = (INSDSeqPtr) gbseq->next;
5337 is.OBbits__ = gbseq->OBbits__;
5338 is.locus = gbseq->locus;
5339 is.length = gbseq->length;
5340 is.strandedness = gbseq->strandedness;
5341 is.moltype = gbseq->moltype;
5342 is.topology = gbseq->topology;
5343 /*
5344 strandedness = (Int2) gbseq->strandedness;
5345 if (strandedness < 0 || strandedness > 3) {
5346 strandedness = 0;
5347 }
5348 is.strandedness = StringSave (insd_strd [strandedness]);
5349 moltype = (Int2) gbseq->moltype;
5350 if (moltype < 0 || moltype > 9) {
5351 moltype = 0;
5352 }
5353 is.moltype = StringSave (insd_mol [moltype]);
5354 topology = (Int2) gbseq->topology;
5355 if (topology < 0 || topology > 2) {
5356 topology = 0;
5357 }
5358 is.topology = StringSave (insd_top [topology]);
5359 */
5360 is.division = gbseq->division;
5361 is.update_date = gbseq->update_date;
5362 is.create_date = gbseq->create_date;
5363 is.update_release = gbseq->update_release;
5364 is.create_release = gbseq->create_release;
5365 is.definition = gbseq->definition;
5366 is.primary_accession = gbseq->primary_accession;
5367 is.entry_version = gbseq->entry_version;
5368 is.accession_version = gbseq->accession_version;
5369 is.other_seqids = gbseq->other_seqids;
5370 is.secondary_accessions = gbseq->secondary_accessions;
5371 is.project = gbseq->project;
5372 is.keywords = gbseq->keywords;
5373 is.segment = gbseq->segment;
5374 is.source = gbseq->source;
5375 is.organism = gbseq->organism;
5376 is.taxonomy = gbseq->taxonomy;
5377 is.references = (INSDReferencePtr) gbseq->references;
5378 is.comment = gbseq->comment;
5379 is.primary = gbseq->primary;
5380 is.source_db = gbseq->source_db;
5381 is.database_reference = gbseq->database_reference;
5382 is.feature_table = (INSDFeaturePtr) gbseq->feature_table;
5383 is.sequence = gbseq->sequence;
5384 is.contig = gbseq->contig;
5385 INSDSeqAsnWrite (&is, afp->aip, afp->atp);
5386 } else {
5387 GBSeqAsnWrite (gbseq, afp->aip, afp->atp);
5388 }
5389 if (afp->atp == NULL) {
5390 AsnPrintNewLine (afp->aip);
5391 }
5392 AsnIoFlush (afp->aip);
5393
5394 /* clean up gbseq fields */
5395
5396 gbtmp = GBSeqNew ();
5397 MemCopy (gbtmp, gbseq, sizeof (GBSeq));
5398 MemSet (gbseq, 0, sizeof (GBSeq));
5399 GBSeqFree (gbtmp);
5400 }
5401
5402 /* slash always has string pre-allocated by add slash block function */
5403
5404 return StringSaveNoNull (bbp->string);
5405 }
5406
5407
5408 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |