|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/asn2gnb4.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2gnb4.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb4.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 * Version Creation Date: 10/21/98
32 *
33 * $Revision: 1.208 $
34 *
35 * File Description: New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59
60 #ifdef WIN_MAC
61 #if __profile__
62 #include <Profiler.h>
63 #endif
64 #endif
65
66 static CharPtr link_muid = "http://www.ncbi.nlm.nih.gov/pubmed/";
67
68 static CharPtr link_go = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&depth=1&query=GO:";
69
70 static CharPtr link_go_ref = "http://www.geneontology.org/cgi-bin/references.cgi#GO_REF:";
71
72 static CharPtr link_code = "http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?";
73
74 static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
75 static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
76
77 static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
78 static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
79
80 /*
81 static CharPtr ec_link = "http://www.expasy.org/cgi-bin/nicezyme.pl?";
82
83 static CharPtr ec_ambig = "http://www.chem.qmw.ac.uk/iubmb/enzyme/";
84 */
85
86 static CharPtr ec_link = "http://www.expasy.org/enzyme/";
87
88 /* ordering arrays for qualifiers and note components */
89
90 static FtQualType feat_qual_order [] = {
91 FTQUAL_partial,
92 FTQUAL_gene,
93
94 FTQUAL_locus_tag,
95 FTQUAL_old_locus_tag,
96
97 FTQUAL_gene_syn_refseq,
98 FTQUAL_gene_syn,
99
100 FTQUAL_gene_allele,
101
102 FTQUAL_operon,
103
104 FTQUAL_ncRNA_class,
105 FTQUAL_ncRNA_other,
106
107 FTQUAL_product,
108
109 FTQUAL_prot_EC_number,
110 FTQUAL_prot_activity,
111
112 FTQUAL_standard_name,
113 FTQUAL_coded_by,
114 FTQUAL_derived_from,
115
116 FTQUAL_prot_name,
117 FTQUAL_region_name,
118 FTQUAL_bond_type,
119 FTQUAL_site_type,
120 FTQUAL_sec_str_type,
121 FTQUAL_heterogen,
122
123 FTQUAL_tag_peptide,
124 FTQUAL_tag_peptide_str,
125
126 FTQUAL_evidence,
127 FTQUAL_experiment,
128 FTQUAL_experiment_string,
129 FTQUAL_inference,
130 FTQUAL_inference_string,
131 FTQUAL_inference_good,
132 FTQUAL_exception,
133 FTQUAL_ribosomal_slippage,
134 FTQUAL_trans_splicing,
135
136 FTQUAL_note,
137 FTQUAL_citation,
138
139 FTQUAL_number,
140
141 FTQUAL_pseudo,
142 FTQUAL_selenocysteine,
143 FTQUAL_pyrrolysine,
144
145 FTQUAL_codon_start,
146
147 FTQUAL_anticodon,
148 FTQUAL_trna_codons,
149 FTQUAL_bound_moiety,
150 FTQUAL_clone,
151 FTQUAL_compare,
152 FTQUAL_direction,
153 FTQUAL_function,
154 FTQUAL_frequency,
155 FTQUAL_EC_number,
156 FTQUAL_gene_map,
157 FTQUAL_gene_cyt_map,
158 FTQUAL_gene_gen_map,
159 FTQUAL_gene_rad_map,
160 FTQUAL_estimated_length,
161 FTQUAL_allele,
162 FTQUAL_map,
163 FTQUAL_mod_base,
164 FTQUAL_PCR_conditions,
165 FTQUAL_phenotype,
166 FTQUAL_rpt_family,
167 FTQUAL_rpt_type,
168 FTQUAL_rpt_unit,
169 FTQUAL_rpt_unit_range,
170 FTQUAL_rpt_unit_seq,
171 FTQUAL_satellite,
172 FTQUAL_mobile_element,
173 FTQUAL_usedin,
174
175 FTQUAL_illegal_qual,
176
177 FTQUAL_replace,
178
179 FTQUAL_transl_except,
180 FTQUAL_transl_table,
181 FTQUAL_codon,
182 FTQUAL_organism,
183 FTQUAL_label,
184 FTQUAL_cds_product,
185 FTQUAL_extra_products,
186 FTQUAL_UniProtKB_evidence,
187 FTQUAL_protein_id,
188 FTQUAL_transcript_id,
189 FTQUAL_db_xref,
190 FTQUAL_gene_xref,
191 FTQUAL_mol_wt,
192 FTQUAL_translation,
193 FTQUAL_transcription,
194 FTQUAL_peptide,
195 (FtQualType) 0
196 };
197
198 /*
199 prot_names after seqfeat_note - gi|4210642|emb|AJ005084.1|HBVAJ5084
200 prot_conflict after prot_desc - gi|61183|emb|V01135.1|PIVM02
201 figure after prot_desc - gi|400553|gb|S64006.1|
202 seqfeat_note after prot_desc - gi|431713|gb|L20354.1|STVPATPOLB
203 but prot_desc after seqfeat_note - AF252556.1
204 prot_names after figure - gi|234022|gb|S56149.1|S56149
205 seqfeat_note after prot_conflict after figure - gi|234046|gb|S51392.1|S51392
206 prot_method after prot_comment (descriptor) after prot_note after prot_desc
207 region after seqfeat_note - gi|6554164|gb|AF043644.3|AF043644
208 prot_desc after prot_names - gi|6581069|gb|AF202541.1|AF202541 - cannot do !!!
209 gene_syn after gene_desc - gi|3386543|gb|AF079528.1|AF079528
210 pseudo after note - gi|6598562|gb|AC006419.3|AC006419
211 */
212
213 static FtQualType feat_note_order [] = {
214 FTQUAL_transcript_id_note, /* !!! remove October 15, 2003 !!! */
215 FTQUAL_gene_desc,
216 FTQUAL_trna_codons_note,
217 FTQUAL_encodes,
218 FTQUAL_prot_desc,
219 FTQUAL_prot_note,
220 FTQUAL_prot_comment,
221 FTQUAL_prot_method,
222 FTQUAL_ncRNA_note,
223 FTQUAL_figure,
224 FTQUAL_maploc,
225 FTQUAL_prot_conflict,
226 FTQUAL_prot_missing,
227 FTQUAL_seqfeat_note,
228 FTQUAL_seqannot_note,
229 FTQUAL_region,
230 FTQUAL_selenocysteine_note,
231 FTQUAL_pyrrolysine_note,
232 FTQUAL_prot_names,
233 FTQUAL_bond,
234 FTQUAL_site,
235 /*
236 FTQUAL_rrna_its,
237 */
238 FTQUAL_xtra_prod_quals,
239 FTQUAL_inference_bad,
240 FTQUAL_modelev,
241 FTQUAL_cdd_definition,
242 /* GO terms appear as own qualifiers in RefSeq records, Sequin or Dump mode */
243 FTQUAL_go_component,
244 FTQUAL_go_function,
245 FTQUAL_go_process,
246 /* RefSeq-specific qualifiers have same display policy as GO terms */
247 FTQUAL_nomenclature,
248 FTQUAL_gene_nomen,
249 FTQUAL_exception_note,
250 (FtQualType) 0
251 };
252
253 typedef struct featurqual {
254 CharPtr name;
255 QualType qualclass;
256 } FeaturQual, PNTR FeaturQualPtr;
257
258 static FeaturQual asn2gnbk_featur_quals [ASN2GNBK_TOTAL_FEATUR] = {
259 { "", Qual_class_ignore },
260 { "allele", Qual_class_quote },
261 { "anticodon", Qual_class_anti_codon },
262 { "bond", Qual_class_bond },
263 { "bond_type", Qual_class_bond },
264 { "bound_moiety", Qual_class_quote },
265 { "cdd_definition", Qual_class_string },
266 { "product", Qual_class_string },
267 { "citation", Qual_class_pubset },
268 { "clone", Qual_class_quote },
269 { "coded_by", Qual_class_seq_loc },
270 { "compare", Qual_class_compare },
271 { "codon", Qual_class_codon },
272 { "codon_start", Qual_class_int },
273 { "cons_splice", Qual_class_consplice },
274 { "db_xref", Qual_class_db_xref },
275 { "derived_from", Qual_class_seq_loc },
276 { "direction", Qual_class_L_R_B },
277 { "EC_number", Qual_class_EC_quote },
278 { "encodes", Qual_class_encodes },
279 { "estimated_length", Qual_class_number },
280 { "evidence", Qual_class_evidence },
281 { "exception", Qual_class_exception },
282 { "exception_note", Qual_class_exception },
283 { "experiment", Qual_class_quote },
284 { "experiment", Qual_class_string },
285 { "product", Qual_class_valnode },
286 { "figure", Qual_class_string },
287 { "frequency", Qual_class_quote },
288 { "function", Qual_class_quote },
289 { "gene", Qual_class_sgml },
290 { "gene_desc", Qual_class_string },
291 { "allele", Qual_class_string },
292 { "map", Qual_class_string },
293 { "cyt_map", Qual_class_map },
294 { "gen_map", Qual_class_map },
295 { "rad_map", Qual_class_map },
296 { "gene_synonym", Qual_class_sep_gene_syn },
297 { "gene_synonym", Qual_class_gene_syn },
298 { "gene_note", Qual_class_string },
299 { "db_xref", Qual_class_db_xref },
300 { "GO_component", Qual_class_go },
301 { "GO_function", Qual_class_go },
302 { "GO_process", Qual_class_go },
303 { "heterogen", Qual_class_string },
304 { "illegal", Qual_class_illegal },
305 { "inference", Qual_class_quote },
306 { "inference", Qual_class_string },
307 { "inference", Qual_class_valnode },
308 { "inference", Qual_class_valnode },
309 { "insertion_seq", Qual_class_quote },
310 { "label", Qual_class_label },
311 { "locus_tag", Qual_class_locus_tag },
312 { "map", Qual_class_quote },
313 { "maploc", Qual_class_string },
314 { "mobile_element", Qual_class_mobile_element },
315 { "mod_base", Qual_class_noquote },
316 { "model_evidence", Qual_class_model_ev },
317 { "calculated_mol_wt", Qual_class_mol_wt },
318 { "ncRNA_class", Qual_class_quote },
319 { "ncRNA_note", Qual_class_string },
320 { "ncRNA_class", Qual_class_string },
321 { "nomenclature", Qual_class_nomenclature },
322 { "nomenclature", Qual_class_gene_nomen },
323 { "note", Qual_class_note },
324 { "number", Qual_class_number },
325 { "old_locus_tag", Qual_class_paren },
326 { "operon", Qual_class_quote },
327 { "organism", Qual_class_quote },
328 { "partial", Qual_class_boolean },
329 { "PCR_conditions", Qual_class_quote },
330 { "peptide", Qual_class_peptide },
331 { "phenotype", Qual_class_quote },
332 { "product", Qual_class_product },
333 { "product", Qual_class_quote },
334 { "function", Qual_class_valnode },
335 { "prot_comment", Qual_class_string },
336 { "EC_number", Qual_class_EC_valnode },
337 { "prot_note", Qual_class_string },
338 { "prot_method", Qual_class_method },
339 { "prot_conflict", Qual_class_string },
340 { "prot_desc", Qual_class_string },
341 { "prot_missing", Qual_class_string },
342 { "name", Qual_class_tilde },
343 { "prot_names", Qual_class_protnames },
344 { "protein_id", Qual_class_prt_id },
345 { "pseudo", Qual_class_boolean },
346 { "pyrrolysine", Qual_class_boolean },
347 { "pyrrolysine", Qual_class_string },
348 { "region", Qual_class_region },
349 { "region_name", Qual_class_string },
350 { "replace", Qual_class_replace },
351 { "ribosomal_slippage", Qual_class_boolean },
352 { "rpt_family", Qual_class_quote },
353 { "rpt_type", Qual_class_rpt },
354 { "rpt_unit", Qual_class_rpt_unit },
355 { "rpt_unit_range", Qual_class_rpt_unit },
356 { "rpt_unit_seq", Qual_class_rpt_unit },
357 { "rrna_its", Qual_class_its },
358 { "satellite", Qual_class_quote },
359 { "sec_str_type", Qual_class_sec_str },
360 { "selenocysteine", Qual_class_boolean },
361 { "selenocysteine", Qual_class_string },
362 { "seqannot_note", Qual_class_string },
363 { "seqfeat_note", Qual_class_string },
364 { "site", Qual_class_site },
365 { "site_type", Qual_class_site },
366 { "standard_name", Qual_class_quote },
367 { "tag_peptide", Qual_class_noquote },
368 { "tag_peptide", Qual_class_tag_peptide },
369 { "transcription", Qual_class_transcription },
370 { "transcript_id", Qual_class_nuc_id },
371 { "tscpt_id_note", Qual_class_nuc_id },
372 { "transl_except", Qual_class_code_break },
373 { "transl_table", Qual_class_int },
374 { "translation", Qual_class_translation },
375 { "transposon", Qual_class_quote },
376 { "trans_splicing", Qual_class_boolean },
377 { "trna_aa", Qual_class_ignore },
378 { "codon_recognized", Qual_class_trna_codons },
379 { "trna_codons", Qual_class_trna_codons },
380 { "UniProtKB_evidence", Qual_class_quote },
381 { "usedin", Qual_class_usedin },
382 { "xtra_products", Qual_class_xtraprds }
383 };
384
385
386 typedef struct qualfeatur {
387 CharPtr name;
388 FtQualType featurclass;
389 } QualFeatur, PNTR QualFeaturPtr;
390
391 #define NUM_GB_QUALS 41
392
393 static QualFeatur qualToFeature [NUM_GB_QUALS] = {
394 { "allele", FTQUAL_allele },
395 { "bound_moiety", FTQUAL_bound_moiety },
396 { "clone", FTQUAL_clone },
397 { "codon", FTQUAL_codon },
398 { "compare", FTQUAL_compare },
399 { "cons_splice", FTQUAL_cons_splice },
400 { "cyt_map", FTQUAL_gene_cyt_map },
401 { "direction", FTQUAL_direction },
402 { "EC_number", FTQUAL_EC_number },
403 { "estimated_length", FTQUAL_estimated_length },
404 { "experiment", FTQUAL_experiment },
405 { "frequency", FTQUAL_frequency },
406 { "function", FTQUAL_function },
407 { "gen_map", FTQUAL_gene_gen_map },
408 { "inference", FTQUAL_inference },
409 { "insertion_seq", FTQUAL_insertion_seq },
410 { "label", FTQUAL_label },
411 { "map", FTQUAL_map },
412 { "mobile_element", FTQUAL_mobile_element },
413 { "mod_base", FTQUAL_mod_base },
414 { "ncRNA_class", FTQUAL_ncRNA_class },
415 { "number", FTQUAL_number },
416 { "old_locus_tag", FTQUAL_old_locus_tag },
417 { "operon", FTQUAL_operon },
418 { "organism", FTQUAL_organism },
419 { "PCR_conditions", FTQUAL_PCR_conditions },
420 { "phenotype", FTQUAL_phenotype },
421 { "product", FTQUAL_product_quals },
422 { "rad_map", FTQUAL_gene_rad_map },
423 { "replace", FTQUAL_replace },
424 { "rpt_family", FTQUAL_rpt_family },
425 { "rpt_type", FTQUAL_rpt_type },
426 { "rpt_unit", FTQUAL_rpt_unit },
427 { "rpt_unit_range", FTQUAL_rpt_unit_range },
428 { "rpt_unit_seq", FTQUAL_rpt_unit_seq },
429 { "satellite", FTQUAL_satellite },
430 { "standard_name", FTQUAL_standard_name },
431 { "tag_peptide", FTQUAL_tag_peptide },
432 { "transposon", FTQUAL_transposon },
433 { "UniProtKB_evidence", FTQUAL_UniProtKB_evidence },
434 { "usedin", FTQUAL_usedin }
435 };
436
437 static Int2 GbqualToFeaturIndex (
438 CharPtr qualname
439 )
440
441 {
442 Int2 L, R, mid;
443
444 if (qualname == NULL || *qualname == '\0') return 0;
445
446 L = 0;
447 R = NUM_GB_QUALS - 1;
448
449 while (L < R) {
450 mid = (L + R) / 2;
451 if (StringICmp (qualToFeature [mid].name, qualname) < 0) {
452 L = mid + 1;
453 } else {
454 R = mid;
455 }
456 }
457
458 if (StringICmp (qualToFeature [R].name, qualname) == 0) {
459 return qualToFeature [R].featurclass;
460 }
461
462 return 0;
463 }
464
465 #define NUM_ILLEGAL_QUALS 14
466
467 static FeaturQual illegalGbqualList [NUM_ILLEGAL_QUALS] = {
468 { "anticodon", Qual_class_noquote },
469 { "citation", Qual_class_noquote },
470 { "codon_start", Qual_class_noquote },
471 { "db_xref", Qual_class_quote },
472 { "evidence", Qual_class_noquote },
473 { "exception", Qual_class_quote },
474 { "gene", Qual_class_quote },
475 { "note", Qual_class_quote },
476 { "protein_id", Qual_class_quote },
477 { "pseudo", Qual_class_noquote },
478 { "transcript_id", Qual_class_quote },
479 { "transl_except", Qual_class_noquote },
480 { "transl_table", Qual_class_noquote },
481 { "translation", Qual_class_quote }
482 };
483
484 static Int2 IllegalGbqualToClass (
485 CharPtr qualname
486 )
487
488 {
489 Int2 L, R, mid;
490
491 if (qualname == NULL || *qualname == '\0') return 0;
492
493 L = 0;
494 R = NUM_ILLEGAL_QUALS - 1;
495
496 while (L < R) {
497 mid = (L + R) / 2;
498 if (StringICmp (illegalGbqualList [mid].name, qualname) < 0) {
499 L = mid + 1;
500 } else {
501 R = mid;
502 }
503 }
504
505 if (StringICmp (illegalGbqualList [R].name, qualname) == 0) {
506 return illegalGbqualList [R].qualclass;
507 }
508
509 return 0;
510 }
511
512 static CharPtr trnaList [] = {
513 "tRNA-Gap",
514 "tRNA-Ala",
515 "tRNA-Asx",
516 "tRNA-Cys",
517 "tRNA-Asp",
518 "tRNA-Glu",
519 "tRNA-Phe",
520 "tRNA-Gly",
521 "tRNA-His",
522 "tRNA-Ile",
523 "tRNA-Xle",
524 "tRNA-Lys",
525 "tRNA-Leu",
526 "tRNA-Met",
527 "tRNA-Asn",
528 "tRNA-Pyl",
529 "tRNA-Pro",
530 "tRNA-Gln",
531 "tRNA-Arg",
532 "tRNA-Ser",
533 "tRNA-Thr",
534 "tRNA-Sec",
535 "tRNA-Val",
536 "tRNA-Trp",
537 "tRNA-OTHER",
538 "tRNA-Tyr",
539 "tRNA-Glx",
540 "tRNA-TERM",
541 NULL
542 };
543
544 static CharPtr evidenceText [] = {
545 NULL, "experimental", "not_experimental"
546 };
547
548 NLM_EXTERN CharPtr secStrText [] = {
549 NULL, "helix", "sheet", "turn"
550 };
551
552 static CharPtr oops = "?";
553
554 static CharPtr SeqCodeNameGet (
555 SeqCodeTablePtr table,
556 Uint1 residue
557 )
558
559 {
560 Uint1 index;
561
562 if (table != NULL) {
563 index = residue - table->start_at;
564 if ( /*index >= 0 && */ index < table->num) {
565 return (table->names) [index];
566 }
567 }
568
569 return oops;
570 }
571
572 NLM_EXTERN CharPtr Get3LetterSymbol (
573 IntAsn2gbJobPtr ajp,
574 Uint1 seq_code,
575 SeqCodeTablePtr table,
576 Uint1 residue
577 )
578
579 {
580 Uint1 code = Seq_code_ncbieaa;
581 Int2 index;
582 Uint1 new_residue;
583 CharPtr ptr;
584 CharPtr retval = NULL;
585 SeqMapTablePtr smtp;
586 SeqCodeTablePtr table_3aa;
587
588 if (residue == 42) { /* stop codon in NCBIeaa */
589 retval = "TERM";
590 return retval;
591 }
592
593 if (ajp != NULL && ajp->flags.iupacaaOnly) {
594 code = Seq_code_iupacaa;
595 } else {
596 code = Seq_code_ncbieaa;
597 }
598
599 if (seq_code != code) {
600 /* if code and seq_code are identical, then smtp is NULL?? */
601 smtp = SeqMapTableFind (code, seq_code);
602 new_residue = SeqMapTableConvert (smtp, residue);
603 } else {
604 new_residue = residue;
605 }
606
607 /* The following looks for non-symbols (255) and "Undetermined" (88) */
608 if ((int) new_residue == 255 || (int) new_residue == 88) {
609 retval = "OTHER";
610 return retval;
611 } else {
612 if (table == NULL) {
613 table = SeqCodeTableFind (Seq_code_ncbieaa);
614 if (table == NULL) {
615 retval = "OTHER";
616 return retval;
617 }
618 }
619 ptr = SeqCodeNameGet (table, residue);
620 table_3aa = SeqCodeTableFind (Seq_code_iupacaa3);
621 if (ptr != NULL && table_3aa != NULL) {
622 for (index=0; index < (int) table_3aa->num; index++) {
623 if (StringCmp(ptr, (table_3aa->names) [index]) == 0) {
624 retval = (table_3aa->symbols) [index];
625 return retval;
626 }
627 }
628 }
629 }
630
631 retval = "OTHER";
632 return retval;
633 }
634
635 static Boolean MatchCit (
636 ValNodePtr ppr,
637 RefBlockPtr rbp
638 )
639
640 {
641 Char buf [121];
642 size_t len;
643 Int4 uid;
644 ValNodePtr vnp;
645
646 if (ppr == NULL || rbp == NULL) return FALSE;
647 switch (ppr->choice) {
648 case PUB_Muid :
649 uid = ppr->data.intvalue;
650 if (rbp->muid == uid) return TRUE;
651 break;
652 case PUB_PMid :
653 uid = ppr->data.intvalue;
654 if (rbp->pmid == uid) return TRUE;
655 break;
656 case PUB_Equiv :
657 for (vnp = (ValNodePtr) ppr->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
658 if (MatchCit (vnp, rbp)) return TRUE;
659 }
660 break;
661 default :
662 PubLabelUnique (ppr, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE);
663 len = StringLen (buf);
664 if (len > 0 && buf [len - 1] == '>') {
665 buf [len - 1] = '\0';
666 len--;
667 }
668 len = MIN (len, StringLen (rbp->uniquestr));
669 if (StringNICmp (rbp->uniquestr, buf, len) == 0) return TRUE;
670 break;
671 }
672 return FALSE;
673 }
674
675 NLM_EXTERN Int2 MatchRef (
676 ValNodePtr ppr,
677 RefBlockPtr PNTR rbpp,
678 Int2 numReferences
679 )
680
681 {
682 Int2 j;
683 RefBlockPtr rbp;
684
685 if (ppr == NULL || rbpp == NULL) return 0;
686
687 for (j = 0; j < numReferences; j++) {
688 rbp = rbpp [j];
689 if (rbp == NULL) continue;
690 if (MatchCit (ppr, rbp)) return j + 1;
691 }
692 return 0;
693 }
694
695 /* taken from asn2ff4.c */
696
697 static Boolean LookForFuzz (SeqLocPtr head)
698 {
699 Boolean retval=FALSE;
700 IntFuzzPtr ifp;
701 PackSeqPntPtr pspp;
702 SeqIntPtr sip;
703 SeqLocPtr slp;
704 SeqPntPtr spp;
705
706 if (head == NULL)
707 return retval;
708
709 slp=NULL;
710 while ((slp = SeqLocFindNext(head, slp)) != NULL)
711 {
712 switch (slp->choice)
713 {
714 case SEQLOC_INT:
715 sip = (SeqIntPtr)(slp->data.ptrvalue);
716 ifp = sip->if_from;
717 if (ifp != NULL)
718 {
719 if (ifp->choice == 4)
720 {
721 if (ifp->a != 0)
722 retval=TRUE;
723 }
724 else
725 retval = TRUE;
726 }
727 ifp = sip->if_to;
728 if (ifp != NULL)
729 {
730 if (ifp->choice == 4)
731 {
732 if (ifp->a != 0)
733 retval=TRUE;
734 }
735 else
736 retval = TRUE;
737 }
738 break;
739 case SEQLOC_PNT:
740 spp = (SeqPntPtr)(slp->data.ptrvalue);
741 ifp = spp->fuzz;
742 if (ifp != NULL)
743 {
744 if (ifp->choice == 4)
745 {
746 if (ifp->a != 0)
747 retval=TRUE;
748 }
749 else
750 retval = TRUE;
751 }
752 break;
753 case SEQLOC_PACKED_PNT:
754 pspp = (PackSeqPntPtr)(slp->data.ptrvalue);
755 ifp = pspp->fuzz;
756 if (ifp != NULL)
757 {
758 if (ifp->choice == 4)
759 {
760 if (ifp->a != 0)
761 retval=TRUE;
762 }
763 else
764 retval = TRUE;
765 }
766 break;
767 default:
768 break;
769 }
770 if (retval == TRUE)
771 break;
772 }
773 return retval;
774 }
775
776 NLM_EXTERN CharPtr bondList [] = {
777 NULL,
778 "disulfide",
779 "thiolester",
780 "xlink",
781 "thioether",
782 "other"
783 };
784
785 NLM_EXTERN CharPtr siteList [] = {
786 NULL,
787 "active",
788 "binding",
789 "cleavage",
790 "inhibit",
791 "modified",
792 "glycosylation",
793 "myristoylation",
794 "mutagenized",
795 "metal-binding",
796 "phosphorylation",
797 "acetylation",
798 "amidation",
799 "methylation",
800 "hydroxylation",
801 "sulfatation",
802 "oxidative-deamination",
803 "pyrrolidone-carboxylic-acid",
804 "gamma-carboxyglutamic-acid",
805 "blocked",
806 "lipid-binding",
807 "np-binding",
808 "DNA binding",
809 "signal-peptide",
810 "transit-peptide",
811 "transmembrane-region",
812 "nitrosylation",
813 "other"
814 };
815
816 static CharPtr siteFFList [] = {
817 NULL,
818 "active",
819 "binding",
820 "cleavage",
821 "inhibition",
822 "modified",
823 "glycosylation",
824 "myristoylation",
825 "mutagenized",
826 "metal-binding",
827 "phosphorylation",
828 "acetylation",
829 "amidation",
830 "methylation",
831 "hydroxylation",
832 "sulfatation",
833 "oxidative-deamination",
834 "pyrrolidone-carboxylic-acid",
835 "gamma-carboxyglutamic-acid",
836 "blocked",
837 "lipid-binding",
838 "np-binding",
839 "DNA binding",
840 "signal peptide",
841 "transit peptide",
842 "transmembrane region",
843 "nitrosylation",
844 "other"
845 };
846
847 static CharPtr conflict_msg =
848 "Protein sequence is in conflict with the conceptual translation";
849
850 /*
851 static CharPtr no_protein_msg =
852 "Coding region translates with internal stops";
853 */
854
855 /**/
856 /* s_DisplayQVP () -- Displays the strings in a QVP structure. */
857 /* This is a debugging function only. */
858 /**/
859
860 #ifdef DISPLAY_STRINGS
861 static void s_DisplayQVP(QualValPtr qvp, Uint1Ptr notetbl)
862 {
863 Int2 j;
864 Int2 jdx;
865
866 fprintf(stderr,"\n");
867 for (j = 0, jdx = notetbl [j]; jdx != 0; j++, jdx = notetbl [j])
868 {
869 if (((int) qvp[jdx].str != 0x1000000) &&
870 ((int) qvp[jdx].str != 0x1) &&
871 ((int) qvp[jdx].str != 0xb) &&
872 (qvp[jdx].str != NULL))
873 fprintf(stderr, "%d\t%-25s %s\n", j, asn2gnbk_featur_quals[jdx].name,
874 qvp[jdx].str);
875 else
876 fprintf(stderr, "%d\t%-25s %s\n", j, asn2gnbk_featur_quals[jdx].name,
877 "NULL");
878 }
879 }
880 #endif
881
882 /*
883 static Boolean NotInGeneSyn (
884 CharPtr str,
885 ValNodePtr gene_syn)
886
887 {
888 CharPtr syn;
889 ValNodePtr vnp;
890
891 for (vnp = gene_syn; vnp != NULL; vnp = vnp->next) {
892 syn = (CharPtr) vnp->data.ptrvalue;
893 if (! StringHasNoText (syn)) {
894 if (StringICmp (str, syn) == 0) return FALSE;
895 }
896 }
897 return TRUE;
898 }
899 */
900
901 typedef struct valqualstruc {
902 Uint2 featdef;
903 FtQualType ftqual;
904 } ValQual, PNTR ValQualPtr;
905
906 /*
907 WARNING - This list MUST be kept sorted in FEATDEF order as the primary
908 key, and within a FEATDEF group sorted by FTQUAL as the secondary key
909 */
910
911 static ValQual legalGbqualList [] = {
912
913 { FEATDEF_GENE , FTQUAL_allele },
914 { FEATDEF_GENE , FTQUAL_function },
915 { FEATDEF_GENE , FTQUAL_label },
916 { FEATDEF_GENE , FTQUAL_map },
917 { FEATDEF_GENE , FTQUAL_old_locus_tag },
918 { FEATDEF_GENE , FTQUAL_operon },
919 { FEATDEF_GENE , FTQUAL_phenotype },
920 { FEATDEF_GENE , FTQUAL_product },
921 { FEATDEF_GENE , FTQUAL_standard_name },
922
923 { FEATDEF_CDS , FTQUAL_allele },
924 { FEATDEF_CDS , FTQUAL_codon },
925 { FEATDEF_CDS , FTQUAL_label },
926 { FEATDEF_CDS , FTQUAL_map },
927 { FEATDEF_CDS , FTQUAL_number },
928 { FEATDEF_CDS , FTQUAL_old_locus_tag },
929 { FEATDEF_CDS , FTQUAL_operon },
930 { FEATDEF_CDS , FTQUAL_standard_name },
931
932 { FEATDEF_PROT , FTQUAL_product },
933 { FEATDEF_PROT , FTQUAL_UniProtKB_evidence },
934
935 { FEATDEF_preRNA , FTQUAL_allele },
936 { FEATDEF_preRNA , FTQUAL_function },
937 { FEATDEF_preRNA , FTQUAL_label },
938 { FEATDEF_preRNA , FTQUAL_map },
939 { FEATDEF_preRNA , FTQUAL_old_locus_tag },
940 { FEATDEF_preRNA , FTQUAL_operon },
941 { FEATDEF_preRNA , FTQUAL_product },
942 { FEATDEF_preRNA , FTQUAL_standard_name },
943
944 { FEATDEF_mRNA , FTQUAL_allele },
945 { FEATDEF_mRNA , FTQUAL_function },
946 { FEATDEF_mRNA , FTQUAL_label },
947 { FEATDEF_mRNA , FTQUAL_map },
948 { FEATDEF_mRNA , FTQUAL_old_locus_tag },
949 { FEATDEF_mRNA , FTQUAL_operon },
950 { FEATDEF_mRNA , FTQUAL_product },
951 { FEATDEF_mRNA , FTQUAL_standard_name },
952
953 { FEATDEF_tRNA , FTQUAL_allele },
954 { FEATDEF_tRNA , FTQUAL_function },
955 { FEATDEF_tRNA , FTQUAL_label },
956 { FEATDEF_tRNA , FTQUAL_map },
957 { FEATDEF_tRNA , FTQUAL_old_locus_tag },
958 { FEATDEF_tRNA , FTQUAL_product },
959 { FEATDEF_tRNA , FTQUAL_standard_name },
960
961 { FEATDEF_rRNA , FTQUAL_allele },
962 { FEATDEF_rRNA , FTQUAL_function },
963 { FEATDEF_rRNA , FTQUAL_label },
964 { FEATDEF_rRNA , FTQUAL_map },
965 { FEATDEF_rRNA , FTQUAL_old_locus_tag },
966 { FEATDEF_rRNA , FTQUAL_operon },
967 { FEATDEF_rRNA , FTQUAL_product },
968 { FEATDEF_rRNA , FTQUAL_standard_name },
969
970 { FEATDEF_snRNA , FTQUAL_allele },
971 { FEATDEF_snRNA , FTQUAL_function },
972 { FEATDEF_snRNA , FTQUAL_label },
973 { FEATDEF_snRNA , FTQUAL_map },
974 { FEATDEF_snRNA , FTQUAL_old_locus_tag },
975 { FEATDEF_snRNA , FTQUAL_product },
976 { FEATDEF_snRNA , FTQUAL_standard_name },
977
978 { FEATDEF_scRNA , FTQUAL_allele },
979 { FEATDEF_scRNA , FTQUAL_function },
980 { FEATDEF_scRNA , FTQUAL_label },
981 { FEATDEF_scRNA , FTQUAL_map },
982 { FEATDEF_scRNA , FTQUAL_old_locus_tag },
983 { FEATDEF_scRNA , FTQUAL_product },
984 { FEATDEF_scRNA , FTQUAL_standard_name },
985
986 { FEATDEF_otherRNA , FTQUAL_allele },
987 { FEATDEF_otherRNA , FTQUAL_function },
988 { FEATDEF_otherRNA , FTQUAL_label },
989 { FEATDEF_otherRNA , FTQUAL_map },
990 { FEATDEF_otherRNA , FTQUAL_old_locus_tag },
991 { FEATDEF_otherRNA , FTQUAL_operon },
992 { FEATDEF_otherRNA , FTQUAL_product },
993 { FEATDEF_otherRNA , FTQUAL_standard_name },
994
995 { FEATDEF_attenuator , FTQUAL_allele },
996 { FEATDEF_attenuator , FTQUAL_label },
997 { FEATDEF_attenuator , FTQUAL_map },
998 { FEATDEF_attenuator , FTQUAL_old_locus_tag },
999 { FEATDEF_attenuator , FTQUAL_operon },
1000 { FEATDEF_attenuator , FTQUAL_phenotype },
1001
1002 { FEATDEF_C_region , FTQUAL_allele },
1003 { FEATDEF_C_region , FTQUAL_label },
1004 { FEATDEF_C_region , FTQUAL_map },
1005 { FEATDEF_C_region , FTQUAL_old_locus_tag },
1006 { FEATDEF_C_region , FTQUAL_product },
1007 { FEATDEF_C_region , FTQUAL_standard_name },
1008
1009 { FEATDEF_CAAT_signal , FTQUAL_allele },
1010 { FEATDEF_CAAT_signal , FTQUAL_label },
1011 { FEATDEF_CAAT_signal , FTQUAL_map },
1012 { FEATDEF_CAAT_signal , FTQUAL_old_locus_tag },
1013
1014 { FEATDEF_Imp_CDS , FTQUAL_codon },
1015 { FEATDEF_Imp_CDS , FTQUAL_EC_number },
1016 { FEATDEF_Imp_CDS , FTQUAL_function },
1017 { FEATDEF_Imp_CDS , FTQUAL_label },
1018 { FEATDEF_Imp_CDS , FTQUAL_map },
1019 { FEATDEF_Imp_CDS , FTQUAL_number },
1020 { FEATDEF_Imp_CDS , FTQUAL_old_locus_tag },
1021 { FEATDEF_Imp_CDS , FTQUAL_operon },
1022 { FEATDEF_Imp_CDS , FTQUAL_product },
1023 { FEATDEF_Imp_CDS , FTQUAL_standard_name },
1024
1025 { FEATDEF_conflict , FTQUAL_allele },
1026 { FEATDEF_conflict , FTQUAL_compare },
1027 { FEATDEF_conflict , FTQUAL_label },
1028 { FEATDEF_conflict , FTQUAL_map },
1029 { FEATDEF_conflict , FTQUAL_old_locus_tag },
1030 { FEATDEF_conflict , FTQUAL_replace },
1031
1032 { FEATDEF_D_loop , FTQUAL_allele },
1033 { FEATDEF_D_loop , FTQUAL_label },
1034 { FEATDEF_D_loop , FTQUAL_map },
1035 { FEATDEF_D_loop , FTQUAL_old_locus_tag },
1036
1037 { FEATDEF_D_segment , FTQUAL_allele },
1038 { FEATDEF_D_segment , FTQUAL_label },
1039 { FEATDEF_D_segment , FTQUAL_map },
1040 { FEATDEF_D_segment , FTQUAL_old_locus_tag },
1041 { FEATDEF_D_segment , FTQUAL_product },
1042 { FEATDEF_D_segment , FTQUAL_standard_name },
1043
1044 { FEATDEF_enhancer , FTQUAL_allele },
1045 { FEATDEF_enhancer , FTQUAL_label },
1046 { FEATDEF_enhancer , FTQUAL_map },
1047 { FEATDEF_enhancer , FTQUAL_old_locus_tag },
1048 { FEATDEF_enhancer , FTQUAL_standard_name },
1049
1050 { FEATDEF_exon , FTQUAL_allele },
1051 { FEATDEF_exon , FTQUAL_EC_number },
1052 { FEATDEF_exon , FTQUAL_function },
1053 { FEATDEF_exon , FTQUAL_label },
1054 { FEATDEF_exon , FTQUAL_map },
1055 { FEATDEF_exon , FTQUAL_number },
1056 { FEATDEF_exon , FTQUAL_old_locus_tag },
1057 { FEATDEF_exon , FTQUAL_product },
1058 { FEATDEF_exon , FTQUAL_standard_name },
1059
1060 { FEATDEF_GC_signal , FTQUAL_allele },
1061 { FEATDEF_GC_signal , FTQUAL_label },
1062 { FEATDEF_GC_signal , FTQUAL_map },
1063 { FEATDEF_GC_signal , FTQUAL_old_locus_tag },
1064
1065 { FEATDEF_iDNA , FTQUAL_allele },
1066 { FEATDEF_iDNA , FTQUAL_function },
1067 { FEATDEF_iDNA , FTQUAL_label },
1068 { FEATDEF_iDNA , FTQUAL_map },
1069 { FEATDEF_iDNA , FTQUAL_number },
1070 { FEATDEF_iDNA , FTQUAL_old_locus_tag },
1071 { FEATDEF_iDNA , FTQUAL_standard_name },
1072
1073 { FEATDEF_intron , FTQUAL_allele },
1074 { FEATDEF_intron , FTQUAL_cons_splice },
1075 { FEATDEF_intron , FTQUAL_function },
1076 { FEATDEF_intron , FTQUAL_label },
1077 { FEATDEF_intron , FTQUAL_map },
1078 { FEATDEF_intron , FTQUAL_number },
1079 { FEATDEF_intron , FTQUAL_old_locus_tag },
1080 { FEATDEF_intron , FTQUAL_standard_name },
1081
1082 { FEATDEF_J_segment , FTQUAL_allele },
1083 { FEATDEF_J_segment , FTQUAL_label },
1084 { FEATDEF_J_segment , FTQUAL_map },
1085 { FEATDEF_J_segment , FTQUAL_old_locus_tag },
1086 { FEATDEF_J_segment , FTQUAL_product },
1087 { FEATDEF_J_segment , FTQUAL_standard_name },
1088
1089 { FEATDEF_LTR , FTQUAL_allele },
1090 { FEATDEF_LTR , FTQUAL_function },
1091 { FEATDEF_LTR , FTQUAL_label },
1092 { FEATDEF_LTR , FTQUAL_map },
1093 { FEATDEF_LTR , FTQUAL_old_locus_tag },
1094 { FEATDEF_LTR , FTQUAL_standard_name },
1095
1096 { FEATDEF_mat_peptide , FTQUAL_allele },
1097 { FEATDEF_mat_peptide , FTQUAL_EC_number },
1098 { FEATDEF_mat_peptide , FTQUAL_function },
1099 { FEATDEF_mat_peptide , FTQUAL_label },
1100 { FEATDEF_mat_peptide , FTQUAL_map },
1101 { FEATDEF_mat_peptide , FTQUAL_old_locus_tag },
1102 { FEATDEF_mat_peptide , FTQUAL_product },
1103 { FEATDEF_mat_peptide , FTQUAL_standard_name },
1104
1105 { FEATDEF_misc_binding , FTQUAL_allele },
1106 { FEATDEF_misc_binding , FTQUAL_bound_moiety },
1107 { FEATDEF_misc_binding , FTQUAL_function },
1108 { FEATDEF_misc_binding , FTQUAL_label },
1109 { FEATDEF_misc_binding , FTQUAL_map },
1110 { FEATDEF_misc_binding , FTQUAL_old_locus_tag },
1111
1112 { FEATDEF_misc_difference , FTQUAL_allele },
1113 { FEATDEF_misc_difference , FTQUAL_clone },
1114 { FEATDEF_misc_difference , FTQUAL_compare },
1115 { FEATDEF_misc_difference , FTQUAL_label },
1116 { FEATDEF_misc_difference , FTQUAL_map },
1117 { FEATDEF_misc_difference , FTQUAL_old_locus_tag },
1118 { FEATDEF_misc_difference , FTQUAL_phenotype },
1119 { FEATDEF_misc_difference , FTQUAL_replace },
1120 { FEATDEF_misc_difference , FTQUAL_standard_name },
1121
1122 { FEATDEF_misc_feature , FTQUAL_allele },
1123 { FEATDEF_misc_feature , FTQUAL_function },
1124 { FEATDEF_misc_feature , FTQUAL_label },
1125 { FEATDEF_misc_feature , FTQUAL_map },
1126 { FEATDEF_misc_feature , FTQUAL_number },
1127 { FEATDEF_misc_feature , FTQUAL_old_locus_tag },
1128 { FEATDEF_misc_feature , FTQUAL_phenotype },
1129 { FEATDEF_misc_feature , FTQUAL_product },
1130 { FEATDEF_misc_feature , FTQUAL_standard_name },
1131
1132 { FEATDEF_misc_recomb , FTQUAL_allele },
1133 { FEATDEF_misc_recomb , FTQUAL_label },
1134 { FEATDEF_misc_recomb , FTQUAL_map },
1135 { FEATDEF_misc_recomb , FTQUAL_old_locus_tag },
1136 { FEATDEF_misc_recomb , FTQUAL_standard_name },
1137
1138 { FEATDEF_misc_signal , FTQUAL_allele },
1139 { FEATDEF_misc_signal , FTQUAL_function },
1140 { FEATDEF_misc_signal , FTQUAL_label },
1141 { FEATDEF_misc_signal , FTQUAL_map },
1142 { FEATDEF_misc_signal , FTQUAL_old_locus_tag },
1143 { FEATDEF_misc_signal , FTQUAL_operon },
1144 { FEATDEF_misc_signal , FTQUAL_phenotype },
1145 { FEATDEF_misc_signal , FTQUAL_standard_name },
1146
1147 { FEATDEF_misc_structure , FTQUAL_allele },
1148 { FEATDEF_misc_structure , FTQUAL_function },
1149 { FEATDEF_misc_structure , FTQUAL_label },
1150 { FEATDEF_misc_structure , FTQUAL_map },
1151 { FEATDEF_misc_structure , FTQUAL_old_locus_tag },
1152 { FEATDEF_misc_structure , FTQUAL_standard_name },
1153
1154 { FEATDEF_modified_base , FTQUAL_allele },
1155 { FEATDEF_modified_base , FTQUAL_frequency },
1156 { FEATDEF_modified_base , FTQUAL_label },
1157 { FEATDEF_modified_base , FTQUAL_map },
1158 { FEATDEF_modified_base , FTQUAL_mod_base },
1159 { FEATDEF_modified_base , FTQUAL_old_locus_tag },
1160
1161 { FEATDEF_N_region , FTQUAL_allele },
1162 { FEATDEF_N_region , FTQUAL_label },
1163 { FEATDEF_N_region , FTQUAL_map },
1164 { FEATDEF_N_region , FTQUAL_old_locus_tag },
1165 { FEATDEF_N_region , FTQUAL_product },
1166 { FEATDEF_N_region , FTQUAL_standard_name },
1167
1168 { FEATDEF_old_sequence , FTQUAL_allele },
1169 { FEATDEF_old_sequence , FTQUAL_compare },
1170 { FEATDEF_old_sequence , FTQUAL_label },
1171 { FEATDEF_old_sequence , FTQUAL_map },
1172 { FEATDEF_old_sequence , FTQUAL_old_locus_tag },
1173 { FEATDEF_old_sequence , FTQUAL_replace },
1174
1175 { FEATDEF_polyA_signal , FTQUAL_allele },
1176 { FEATDEF_polyA_signal , FTQUAL_label },
1177 { FEATDEF_polyA_signal , FTQUAL_map },
1178 { FEATDEF_polyA_signal , FTQUAL_old_locus_tag },
1179
1180 { FEATDEF_polyA_site , FTQUAL_allele },
1181 { FEATDEF_polyA_site , FTQUAL_label },
1182 { FEATDEF_polyA_site , FTQUAL_map },
1183 { FEATDEF_polyA_site , FTQUAL_old_locus_tag },
1184
1185 { FEATDEF_prim_transcript , FTQUAL_allele },
1186 { FEATDEF_prim_transcript , FTQUAL_function },
1187 { FEATDEF_prim_transcript , FTQUAL_label },
1188 { FEATDEF_prim_transcript , FTQUAL_map },
1189 { FEATDEF_prim_transcript , FTQUAL_old_locus_tag },
1190 { FEATDEF_prim_transcript , FTQUAL_operon },
1191 { FEATDEF_prim_transcript , FTQUAL_standard_name },
1192
1193 { FEATDEF_primer_bind , FTQUAL_allele },
1194 { FEATDEF_primer_bind , FTQUAL_label },
1195 { FEATDEF_primer_bind , FTQUAL_map },
1196 { FEATDEF_primer_bind , FTQUAL_old_locus_tag },
1197 { FEATDEF_primer_bind , FTQUAL_PCR_conditions },
1198 { FEATDEF_primer_bind , FTQUAL_standard_name },
1199
1200 { FEATDEF_promoter , FTQUAL_allele },
1201 { FEATDEF_promoter , FTQUAL_function },
1202 { FEATDEF_promoter , FTQUAL_label },
1203 { FEATDEF_promoter , FTQUAL_map },
1204 { FEATDEF_promoter , FTQUAL_old_locus_tag },
1205 { FEATDEF_promoter , FTQUAL_operon },
1206 { FEATDEF_promoter , FTQUAL_phenotype },
1207 { FEATDEF_promoter , FTQUAL_standard_name },
1208
1209 { FEATDEF_protein_bind , FTQUAL_allele },
1210 { FEATDEF_protein_bind , FTQUAL_bound_moiety },
1211 { FEATDEF_protein_bind , FTQUAL_function },
1212 { FEATDEF_protein_bind , FTQUAL_label },
1213 { FEATDEF_protein_bind , FTQUAL_map },
1214 { FEATDEF_protein_bind , FTQUAL_old_locus_tag },
1215 { FEATDEF_protein_bind , FTQUAL_operon },
1216 { FEATDEF_protein_bind , FTQUAL_standard_name },
1217
1218 { FEATDEF_RBS , FTQUAL_allele },
1219 { FEATDEF_RBS , FTQUAL_label },
1220 { FEATDEF_RBS , FTQUAL_map },
1221 { FEATDEF_RBS , FTQUAL_old_locus_tag },
1222 { FEATDEF_RBS , FTQUAL_standard_name },
1223
1224 { FEATDEF_repeat_region , FTQUAL_allele },
1225 { FEATDEF_repeat_region , FTQUAL_function },
1226 { FEATDEF_repeat_region , FTQUAL_label },
1227 { FEATDEF_repeat_region , FTQUAL_map },
1228 { FEATDEF_repeat_region , FTQUAL_mobile_element },
1229 { FEATDEF_repeat_region , FTQUAL_old_locus_tag },
1230 { FEATDEF_repeat_region , FTQUAL_rpt_family },
1231 { FEATDEF_repeat_region , FTQUAL_rpt_type },
1232 { FEATDEF_repeat_region , FTQUAL_rpt_unit },
1233 { FEATDEF_repeat_region , FTQUAL_rpt_unit_range },
1234 { FEATDEF_repeat_region , FTQUAL_rpt_unit_seq },
1235 { FEATDEF_repeat_region , FTQUAL_satellite },
1236 { FEATDEF_repeat_region , FTQUAL_standard_name },
1237
1238 { FEATDEF_repeat_unit , FTQUAL_allele },
1239 { FEATDEF_repeat_unit , FTQUAL_function },
1240 { FEATDEF_repeat_unit , FTQUAL_label },
1241 { FEATDEF_repeat_unit , FTQUAL_map },
1242 { FEATDEF_repeat_unit , FTQUAL_old_locus_tag },
1243 { FEATDEF_repeat_unit , FTQUAL_rpt_family },
1244 { FEATDEF_repeat_unit , FTQUAL_rpt_type },
1245 { FEATDEF_repeat_unit , FTQUAL_rpt_unit },
1246 { FEATDEF_repeat_unit , FTQUAL_rpt_unit_range },
1247 { FEATDEF_repeat_unit , FTQUAL_rpt_unit_seq },
1248
1249 { FEATDEF_rep_origin , FTQUAL_allele },
1250 { FEATDEF_rep_origin , FTQUAL_direction },
1251 { FEATDEF_rep_origin , FTQUAL_label },
1252 { FEATDEF_rep_origin , FTQUAL_map },
1253 { FEATDEF_rep_origin , FTQUAL_old_locus_tag },
1254 { FEATDEF_rep_origin , FTQUAL_standard_name },
1255
1256 { FEATDEF_S_region , FTQUAL_allele },
1257 { FEATDEF_S_region , FTQUAL_label },
1258 { FEATDEF_S_region , FTQUAL_map },
1259 { FEATDEF_S_region , FTQUAL_old_locus_tag },
1260 { FEATDEF_S_region , FTQUAL_product },
1261 { FEATDEF_S_region , FTQUAL_standard_name },
1262
1263 { FEATDEF_satellite , FTQUAL_allele },
1264 { FEATDEF_satellite , FTQUAL_label },
1265 { FEATDEF_satellite , FTQUAL_map },
1266 { FEATDEF_satellite , FTQUAL_old_locus_tag },
1267 { FEATDEF_satellite , FTQUAL_rpt_family },
1268 { FEATDEF_satellite , FTQUAL_rpt_type },
1269 { FEATDEF_satellite , FTQUAL_rpt_unit },
1270 { FEATDEF_satellite , FTQUAL_rpt_unit_range },
1271 { FEATDEF_satellite , FTQUAL_rpt_unit_seq },
1272 { FEATDEF_satellite , FTQUAL_standard_name },
1273
1274 { FEATDEF_sig_peptide , FTQUAL_allele },
1275 { FEATDEF_sig_peptide , FTQUAL_function },
1276 { FEATDEF_sig_peptide , FTQUAL_label },
1277 { FEATDEF_sig_peptide , FTQUAL_map },
1278 { FEATDEF_sig_peptide , FTQUAL_old_locus_tag },
1279 { FEATDEF_sig_peptide , FTQUAL_product },
1280 { FEATDEF_sig_peptide , FTQUAL_standard_name },
1281
1282 { FEATDEF_stem_loop , FTQUAL_allele },
1283 { FEATDEF_stem_loop , FTQUAL_function },
1284 { FEATDEF_stem_loop , FTQUAL_label },
1285 { FEATDEF_stem_loop , FTQUAL_map },
1286 { FEATDEF_stem_loop , FTQUAL_old_locus_tag },
1287 { FEATDEF_stem_loop , FTQUAL_operon },
1288 { FEATDEF_stem_loop , FTQUAL_standard_name },
1289
1290 { FEATDEF_STS , FTQUAL_allele },
1291 { FEATDEF_STS , FTQUAL_label },
1292 { FEATDEF_STS , FTQUAL_map },
1293 { FEATDEF_STS , FTQUAL_old_locus_tag },
1294 { FEATDEF_STS , FTQUAL_standard_name },
1295
1296 { FEATDEF_TATA_signal , FTQUAL_allele },
1297 { FEATDEF_TATA_signal , FTQUAL_label },
1298 { FEATDEF_TATA_signal , FTQUAL_map },
1299 { FEATDEF_TATA_signal , FTQUAL_old_locus_tag },
1300
1301 { FEATDEF_terminator , FTQUAL_allele },
1302 { FEATDEF_terminator , FTQUAL_label },
1303 { FEATDEF_terminator , FTQUAL_map },
1304 { FEATDEF_terminator , FTQUAL_old_locus_tag },
1305 { FEATDEF_terminator , FTQUAL_operon },
1306 { FEATDEF_terminator , FTQUAL_standard_name },
1307
1308 { FEATDEF_transit_peptide , FTQUAL_allele },
1309 { FEATDEF_transit_peptide , FTQUAL_function },
1310 { FEATDEF_transit_peptide , FTQUAL_label },
1311 { FEATDEF_transit_peptide , FTQUAL_map },
1312 { FEATDEF_transit_peptide , FTQUAL_old_locus_tag },
1313 { FEATDEF_transit_peptide , FTQUAL_product },
1314 { FEATDEF_transit_peptide , FTQUAL_standard_name },
1315
1316 { FEATDEF_unsure , FTQUAL_allele },
1317 { FEATDEF_unsure , FTQUAL_compare },
1318 { FEATDEF_unsure , FTQUAL_label },
1319 { FEATDEF_unsure , FTQUAL_map },
1320 { FEATDEF_unsure , FTQUAL_old_locus_tag },
1321 { FEATDEF_unsure , FTQUAL_replace },
1322
1323 { FEATDEF_V_region , FTQUAL_allele },
1324 { FEATDEF_V_region , FTQUAL_label },
1325 { FEATDEF_V_region , FTQUAL_map },
1326 { FEATDEF_V_region , FTQUAL_old_locus_tag },
1327 { FEATDEF_V_region , FTQUAL_product },
1328 { FEATDEF_V_region , FTQUAL_standard_name },
1329
1330 { FEATDEF_V_segment , FTQUAL_allele },
1331 { FEATDEF_V_segment , FTQUAL_label },
1332 { FEATDEF_V_segment , FTQUAL_map },
1333 { FEATDEF_V_segment , FTQUAL_old_locus_tag },
1334 { FEATDEF_V_segment , FTQUAL_product },
1335 { FEATDEF_V_segment , FTQUAL_standard_name },
1336
1337 { FEATDEF_variation , FTQUAL_allele },
1338 { FEATDEF_variation , FTQUAL_compare },
1339 { FEATDEF_variation , FTQUAL_frequency },
1340 { FEATDEF_variation , FTQUAL_label },
1341 { FEATDEF_variation , FTQUAL_map },
1342 { FEATDEF_variation , FTQUAL_old_locus_tag },
1343 { FEATDEF_variation , FTQUAL_phenotype },
1344 { FEATDEF_variation , FTQUAL_product },
1345 { FEATDEF_variation , FTQUAL_replace },
1346 { FEATDEF_variation , FTQUAL_standard_name },
1347
1348 { FEATDEF_3clip , FTQUAL_allele },
1349 { FEATDEF_3clip , FTQUAL_function },
1350 { FEATDEF_3clip , FTQUAL_label },
1351 { FEATDEF_3clip , FTQUAL_map },
1352 { FEATDEF_3clip , FTQUAL_old_locus_tag },
1353 { FEATDEF_3clip , FTQUAL_standard_name },
1354
1355 { FEATDEF_3UTR , FTQUAL_allele },
1356 { FEATDEF_3UTR , FTQUAL_function },
1357 { FEATDEF_3UTR , FTQUAL_label },
1358 { FEATDEF_3UTR , FTQUAL_map },
1359 { FEATDEF_3UTR , FTQUAL_old_locus_tag },
1360 { FEATDEF_3UTR , FTQUAL_standard_name },
1361
1362 { FEATDEF_5clip , FTQUAL_allele },
1363 { FEATDEF_5clip , FTQUAL_function },
1364 { FEATDEF_5clip , FTQUAL_label },
1365 { FEATDEF_5clip , FTQUAL_map },
1366 { FEATDEF_5clip , FTQUAL_old_locus_tag },
1367 { FEATDEF_5clip , FTQUAL_standard_name },
1368
1369 { FEATDEF_5UTR , FTQUAL_allele },
1370 { FEATDEF_5UTR , FTQUAL_function },
1371 { FEATDEF_5UTR , FTQUAL_label },
1372 { FEATDEF_5UTR , FTQUAL_map },
1373 { FEATDEF_5UTR , FTQUAL_old_locus_tag },
1374 { FEATDEF_5UTR , FTQUAL_standard_name },
1375
1376 { FEATDEF_10_signal , FTQUAL_allele },
1377 { FEATDEF_10_signal , FTQUAL_label },
1378 { FEATDEF_10_signal , FTQUAL_map },
1379 { FEATDEF_10_signal , FTQUAL_old_locus_tag },
1380 { FEATDEF_10_signal , FTQUAL_operon },
1381 { FEATDEF_10_signal , FTQUAL_standard_name },
1382
1383 { FEATDEF_35_signal , FTQUAL_allele },
1384 { FEATDEF_35_signal , FTQUAL_label },
1385 { FEATDEF_35_signal , FTQUAL_map },
1386 { FEATDEF_35_signal , FTQUAL_old_locus_tag },
1387 { FEATDEF_35_signal , FTQUAL_operon },
1388 { FEATDEF_35_signal , FTQUAL_standard_name },
1389
1390 { FEATDEF_REGION , FTQUAL_function },
1391 { FEATDEF_REGION , FTQUAL_label },
1392 { FEATDEF_REGION , FTQUAL_map },
1393 { FEATDEF_REGION , FTQUAL_number },
1394 { FEATDEF_REGION , FTQUAL_old_locus_tag },
1395 { FEATDEF_REGION , FTQUAL_phenotype },
1396 { FEATDEF_REGION , FTQUAL_product },
1397 { FEATDEF_REGION , FTQUAL_standard_name },
1398
1399 { FEATDEF_preprotein , FTQUAL_allele },
1400 { FEATDEF_preprotein , FTQUAL_label },
1401 { FEATDEF_preprotein , FTQUAL_map },
1402 { FEATDEF_preprotein , FTQUAL_old_locus_tag },
1403 { FEATDEF_preprotein , FTQUAL_product },
1404 { FEATDEF_preprotein , FTQUAL_standard_name },
1405
1406 { FEATDEF_mat_peptide_aa , FTQUAL_allele },
1407 { FEATDEF_mat_peptide_aa , FTQUAL_label },
1408 { FEATDEF_mat_peptide_aa , FTQUAL_map },
1409 { FEATDEF_mat_peptide_aa , FTQUAL_old_locus_tag },
1410 { FEATDEF_mat_peptide_aa , FTQUAL_product },
1411 { FEATDEF_mat_peptide_aa , FTQUAL_standard_name },
1412
1413 { FEATDEF_sig_peptide_aa , FTQUAL_allele },
1414 { FEATDEF_sig_peptide_aa , FTQUAL_label },
1415 { FEATDEF_sig_peptide_aa , FTQUAL_map },
1416 { FEATDEF_sig_peptide_aa , FTQUAL_old_locus_tag },
1417 { FEATDEF_sig_peptide_aa , FTQUAL_product },
1418 { FEATDEF_sig_peptide_aa , FTQUAL_standard_name },
1419
1420 { FEATDEF_transit_peptide_aa , FTQUAL_allele },
1421 { FEATDEF_transit_peptide_aa , FTQUAL_label },
1422 { FEATDEF_transit_peptide_aa , FTQUAL_map },
1423 { FEATDEF_transit_peptide_aa , FTQUAL_old_locus_tag },
1424 { FEATDEF_transit_peptide_aa , FTQUAL_product },
1425 { FEATDEF_transit_peptide_aa , FTQUAL_standard_name },
1426
1427 { FEATDEF_snoRNA , FTQUAL_allele },
1428 { FEATDEF_snoRNA , FTQUAL_function },
1429 { FEATDEF_snoRNA , FTQUAL_label },
1430 { FEATDEF_snoRNA , FTQUAL_map },
1431 { FEATDEF_snoRNA , FTQUAL_old_locus_tag },
1432 { FEATDEF_snoRNA , FTQUAL_product },
1433 { FEATDEF_snoRNA , FTQUAL_standard_name },
1434
1435 { FEATDEF_gap , FTQUAL_estimated_length },
1436 { FEATDEF_gap , FTQUAL_map },
1437
1438 { FEATDEF_operon , FTQUAL_allele },
1439 { FEATDEF_operon , FTQUAL_function },
1440 { FEATDEF_operon , FTQUAL_label },
1441 { FEATDEF_operon , FTQUAL_map },
1442 { FEATDEF_operon , FTQUAL_operon },
1443 { FEATDEF_operon , FTQUAL_phenotype },
1444 { FEATDEF_operon , FTQUAL_standard_name },
1445
1446 { FEATDEF_oriT , FTQUAL_allele },
1447 { FEATDEF_oriT , FTQUAL_direction },
1448 { FEATDEF_oriT , FTQUAL_label },
1449 { FEATDEF_oriT , FTQUAL_map },
1450 { FEATDEF_oriT , FTQUAL_old_locus_tag },
1451 { FEATDEF_oriT , FTQUAL_rpt_type },
1452 { FEATDEF_oriT , FTQUAL_rpt_unit },
1453 { FEATDEF_oriT , FTQUAL_rpt_unit_range },
1454 { FEATDEF_oriT , FTQUAL_rpt_unit_seq },
1455 { FEATDEF_oriT , FTQUAL_standard_name },
1456
1457 { FEATDEF_ncRNA , FTQUAL_allele },
1458 { FEATDEF_ncRNA , FTQUAL_function },
1459 { FEATDEF_ncRNA , FTQUAL_label },
1460 { FEATDEF_ncRNA , FTQUAL_map },
1461 { FEATDEF_ncRNA , FTQUAL_ncRNA_class },
1462 { FEATDEF_ncRNA , FTQUAL_old_locus_tag },
1463 { FEATDEF_ncRNA , FTQUAL_operon },
1464 { FEATDEF_ncRNA , FTQUAL_product },
1465 { FEATDEF_ncRNA , FTQUAL_standard_name },
1466
1467 { FEATDEF_tmRNA , FTQUAL_allele },
1468 { FEATDEF_tmRNA , FTQUAL_function },
1469 { FEATDEF_tmRNA , FTQUAL_label },
1470 { FEATDEF_tmRNA , FTQUAL_map },
1471 { FEATDEF_tmRNA , FTQUAL_old_locus_tag },
1472 { FEATDEF_tmRNA , FTQUAL_operon },
1473 { FEATDEF_tmRNA , FTQUAL_product },
1474 { FEATDEF_tmRNA , FTQUAL_standard_name },
1475 { FEATDEF_tmRNA , FTQUAL_tag_peptide }
1476 };
1477
1478 /* comparison of ValQual's -- first compare featdef then ftqual */
1479
1480 /* macro did not work properly on linux machine, so using function instead */
1481 /* #define COMPARE_VALQUAL(av,aq,bv,bq) ( ((av)-(bv)) ? ((av)-(bv)) : ((aq)-(bq)) ) */
1482
1483 static Int2 CompareValQual (Uint2 av, FtQualType aq, Uint2 bv, FtQualType bq)
1484
1485 {
1486 if (av == bv) return (aq - bq);
1487 return (av - bv);
1488 }
1489
1490 /* Returns TRUE if {featureKey, qualKey} exists in legalGbqualList */
1491
1492 static Boolean AllowedValQual (Uint2 featureKey, FtQualType qualKey, Boolean forGbRelease)
1493
1494 {
1495 Int2 L, R, mid;
1496
1497 if (qualKey == FTQUAL_experiment || qualKey == FTQUAL_inference) return TRUE;
1498
1499 L = 0;
1500 R = sizeof (legalGbqualList) / sizeof (ValQual) - 1;
1501 while (L < R) {
1502 mid = (L + R) / 2;
1503 if (CompareValQual (legalGbqualList [mid].featdef,
1504 legalGbqualList [mid].ftqual,
1505 featureKey, qualKey) < 0)
1506 L = mid + 1;
1507 else
1508 R = mid;
1509 }
1510 if (CompareValQual (legalGbqualList [R].featdef,
1511 legalGbqualList [R].ftqual, featureKey, qualKey) == 0) {
1512 return TRUE;
1513 }
1514
1515 return FALSE;
1516 }
1517
1518
1519 static CharPtr validRptString [] = {
1520 "tandem", "inverted", "flanking", "terminal", "direct", "dispersed", "other", NULL
1521 };
1522
1523 static CharPtr validLRBString [] = {
1524 "LEFT", "RIGHT", "BOTH", NULL
1525 };
1526
1527 static CharPtr validConsSpliceString [] = {
1528 "(5'site:YES, 3'site:YES)",
1529 "(5'site:YES, 3'site:NO)",
1530 "(5'site:YES, 3'site:ABSENT)",
1531 "(5'site:NO, 3'site:YES)",
1532 "(5'site:NO, 3'site:NO)",
1533 "(5'site:NO, 3'site:ABSENT)",
1534 "(5'site:ABSENT, 3'site:YES)",
1535 "(5'site:ABSENT, 3'site:NO)",
1536 "(5'site:ABSENT, 3'site:ABSENT)",
1537 NULL
1538 };
1539
1540 static Boolean StringInStringList (CharPtr testString, CharPtr PNTR stringList) {
1541 Int2 i;
1542 i = 0;
1543 while (stringList [i] != NULL) {
1544 if (StringICmp (testString, stringList [i]) == 0)
1545 return 1;
1546 i++;
1547 }
1548 return 0;
1549 }
1550
1551 static CharPtr validMobileElementString [] = {
1552 "transposon",
1553 "retrotransposon",
1554 "integron",
1555 "insertion sequence",
1556 "non-LTR retrotransposon",
1557 "SINE",
1558 "MITE",
1559 "LINE",
1560 "other",
1561 NULL
1562 };
1563
1564 static Boolean ValidateMobileElement (CharPtr testString)
1565
1566 {
1567 Boolean found;
1568 Int2 i;
1569 size_t len;
1570 CharPtr ptr, str;
1571
1572 found = FALSE;
1573 str = NULL;
1574 for (i = 0; validMobileElementString [i] != NULL; i++) {
1575 ptr = validMobileElementString [i];
1576 len = StringLen (ptr);
1577 if (StringNICmp (testString, ptr, len) == 0) {
1578 found = TRUE;
1579 str = testString + len;
1580 break;
1581 }
1582 }
1583 if (found) {
1584 if (StringDoesHaveText (str) && (str [0] != ':' || str [1] == '\0')) {
1585 return FALSE;
1586 } else if (StringNICmp (testString, "other", 5) == 0) {
1587 if (str [0] != ':' || str [1] == '\0') {
1588 return FALSE;
1589 }
1590 }
1591 }
1592 return found;
1593 }
1594
1595 /*
1596 Functions now public and prototyped in sequtil.h
1597 Return values are:
1598 0: no problem - Accession is in proper format
1599 -1: Accession did not start with a letter (or two letters)
1600 -2: Accession did not contain five numbers (or six numbers after 2 letters)
1601 -3: the original Accession number to be validated was NULL
1602 -4: the original Accession number is too long (>16)
1603 -5: missing version number (required by ValidateAccnDotVer)
1604 -6: bad version number (required by ValidateAccnDotVer)
1605 */
1606
1607 static Int2 ValidateAccnInternal (
1608 CharPtr accession,
1609 CharPtr PNTR strptr
1610 )
1611
1612 {
1613 Char ch;
1614 Int2 numAlpha = 0;
1615 Int2 numDigits = 0;
1616 Int2 numUndersc = 0;
1617 CharPtr str;
1618
1619 if (accession == NULL || accession [0] == '\0') return -3;
1620
1621 if (StringLen (accession) >= 16) return -4;
1622
1623 if (accession [0] < 'A' || accession [0] > 'Z') return -1;
1624
1625 str = accession;
1626 if (StringNCmp (str, "NZ_", 3) == 0) {
1627 str += 3;
1628 }
1629 ch = *str;
1630 while (IS_ALPHA (ch)) {
1631 numAlpha++;
1632 str++;
1633 ch = *str;
1634 }
1635 while (ch == '_') {
1636 numUndersc++;
1637 str++;
1638 ch = *str;
1639 }
1640 while (IS_DIGIT (ch)) {
1641 numDigits++;
1642 str++;
1643 ch = *str;
1644 }
1645 if (ch != '\0' && ch != ' ' && ch != '.') return -2;
1646
1647 if (numUndersc > 1) return -2;
1648
1649 if (strptr != NULL) {
1650 /* pass back current position for version check */
1651 *strptr = str;
1652 }
1653
1654 if (numUndersc == 0) {
1655 if (numAlpha == 1 && numDigits == 5) return 0;
1656 if (numAlpha == 2 && numDigits == 6) return 0;
1657 if (numAlpha == 3 && numDigits == 5) return 0;
1658 if (numAlpha == 4 && numDigits == 8) return 0;
1659 if (numAlpha == 4 && numDigits == 9) return 0;
1660 if (numAlpha == 5 && numDigits == 7) return 0;
1661 } else if (numUndersc == 1) {
1662 if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return -2;
1663 if (accession [0] == 'N' || accession [0] == 'X' || accession [0] == 'Z') {
1664 if (accession [1] == 'M' ||
1665 accession [1] == 'C' ||
1666 accession [1] == 'T' ||
1667 accession [1] == 'P' ||
1668 accession [1] == 'G' ||
1669 accession [1] == 'R' ||
1670 accession [1] == 'S' ||
1671 accession [1] == 'W' ||
1672 accession [1] == 'Z') {
1673 return 0;
1674 }
1675 }
1676 if (accession [0] == 'A' || accession [0] == 'Y') {
1677 if (accession [1] == 'P') return 0;
1678 }
1679 }
1680
1681 return -2;
1682 }
1683
1684 NLM_EXTERN Int2 ValidateAccn (
1685 CharPtr accession
1686 )
1687
1688 {
1689 return ValidateAccnInternal (accession, NULL);
1690 }
1691
1692 NLM_EXTERN Int2 ValidateAccnDotVer (
1693 CharPtr accession
1694 )
1695
1696 {
1697 Char ch;
1698 Int2 numVersion = 0;
1699 Int2 rsult;
1700 CharPtr str = NULL;
1701
1702 rsult = ValidateAccnInternal (accession, &str);
1703 if (rsult != 0) return rsult;
1704
1705 if (str == NULL) return -5;
1706 ch = *str;
1707 if (ch != '.') return -5;
1708 str++;
1709 ch = *str;
1710 while (IS_DIGIT (ch)) {
1711 numVersion++;
1712 str++;
1713 ch = *str;
1714 }
1715 if (numVersion < 1) return -5;
1716 if (ch != '\0' && ch != ' ') return -6;
1717
1718 return 0;
1719 }
1720
1721 NLM_EXTERN Int2 ValidateSeqID (
1722 SeqIdPtr sip
1723 )
1724
1725 {
1726 Char buf [41];
1727
1728 if (sip == NULL) return -3;
1729 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_VER, sizeof (buf) - 1);
1730 return ValidateAccn (buf);
1731 }
1732
1733 static Boolean ValidateCompareQual (CharPtr accession, Boolean is_ged)
1734
1735 {
1736 if (ValidateAccnDotVer (accession) != 0) return FALSE;
1737 if (StringChr (accession, '_') == NULL) return TRUE;
1738 if (is_ged) return FALSE;
1739 return TRUE;
1740 }
1741
1742 static CharPtr mrnaevtext1 = "Derived by automated computational analysis";
1743 static CharPtr mrnaevtext2 = "using gene prediction method:";
1744 static CharPtr mrnaevtext3 = "Supporting evidence includes similarity to:";
1745
1746 static void GetStrFormRNAEvidence (
1747 UserObjectPtr uop,
1748 Pointer userdata
1749 )
1750
1751 {
1752 Int2 ce = 0, cm = 0, cp = 0, ne = 0, nm = 0, np = 0;
1753 Boolean has_counts = FALSE;
1754 size_t len;
1755 CharPtr method = NULL, prefix = NULL;
1756 ObjectIdPtr oip;
1757 CharPtr str = NULL;
1758 CharPtr PNTR strp;
1759 Char tmp [20];
1760 UserFieldPtr u, ufp, uu;
1761
1762 if (uop == NULL) return;
1763 oip = uop->type;
1764 if (oip == NULL) return;
1765 if (StringCmp (oip->str, "ModelEvidence") != 0) return;
1766 strp = (CharPtr PNTR) userdata;
1767
1768 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
1769 oip = ufp->label;
1770 if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
1771 if (StringCmp (oip->str, "Method") == 0) {
1772 method = StringSaveNoNull ((CharPtr) ufp->data.ptrvalue);
1773 } else if (StringCmp (oip->str, "mRNA") == 0) {
1774 for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
1775 if (u->data.ptrvalue == NULL) continue;
1776 for (uu = (UserFieldPtr) u->data.ptrvalue; uu != NULL; uu = uu->next) {
1777 oip = uu->label;
1778 if (oip == NULL) continue;
1779 if (StringCmp (oip->str, "accession") == 0) {
1780 nm++;
1781 }
1782 }
1783 }
1784 } else if (StringCmp (oip->str, "EST") == 0) {
1785 for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
1786 if (u->data.ptrvalue == NULL) continue;
1787 for (uu = (UserFieldPtr) u->data.ptrvalue; uu != NULL; uu = uu->next) {
1788 oip = uu->label;
1789 if (oip == NULL) continue;
1790 if (StringCmp (oip->str, "accession") == 0) {
1791 ne++;
1792 }
1793 }
1794 }
1795 } else if (StringCmp (oip->str, "Protein") == 0) {
1796 for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
1797 if (u->data.ptrvalue == NULL) continue;
1798 for (uu = (UserFieldPtr) u->data.ptrvalue; uu != NULL; uu = uu->next) {
1799 oip = uu->label;
1800 if (oip == NULL) continue;
1801 if (StringCmp (oip->str, "accession") == 0) {
1802 np++;
1803 }
1804 }
1805 }
1806 } else if (StringCmp (oip->str, "Counts") == 0) {
1807 has_counts = TRUE;
1808 for (u = (UserFieldPtr) ufp->data.ptrvalue; u != NULL; u = u->next) {
1809 if (u->data.ptrvalue == NULL) continue;
1810 if (u->choice != 2) continue;
1811 oip = u->label;
1812 if (oip == NULL) continue;
1813 if (StringCmp (oip->str, "mRNA") == 0) {
1814 cm = (Int2) u->data.intvalue;
1815 } else if (StringCmp (oip->str, "EST") == 0) {
1816 ce = (Int2) u->data.intvalue;
1817 } else if (StringCmp (oip->str, "Protein") == 0) {
1818 cp = (Int2) u->data.intvalue;
1819 }
1820 }
1821 }
1822 }
1823
1824 if (has_counts) {
1825 nm = cm;
1826 ne = ce;
1827 np = cp;
1828 }
1829
1830 len = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 80;
1831 str = (CharPtr) MemNew (len);
1832 if (str == NULL) return;
1833
1834 if (method != NULL) {
1835 sprintf (str, "%s %s %s.", mrnaevtext1, mrnaevtext2, method);
1836 } else {
1837 sprintf (str, "%s.", mrnaevtext1);
1838 }
1839 if (nm > 0 || ne > 0 || np > 0) {
1840 StringCat (str, " ");
1841 StringCat (str, mrnaevtext3);
1842 }
1843 prefix = " ";
1844 if (nm > 0) {
1845 StringCat (str, prefix);
1846 if (nm > 1) {
1847 sprintf (tmp, "%d mRNAs", (int) nm);
1848 } else {
1849 sprintf (tmp, "%d mRNA", (int) nm);
1850 }
1851 StringCat (str, tmp);
1852 prefix = ", ";
1853 }
1854 if (ne > 0) {
1855 StringCat (str, prefix);
1856 if (ne > 1) {
1857 sprintf (tmp, "%d ESTs", (int) ne);
1858 } else {
1859 sprintf (tmp, "%d EST", (int) ne);
1860 }
1861 StringCat (str, tmp);
1862 prefix = ", ";
1863 }
1864 if (np > 0) {
1865 StringCat (str, prefix);
1866 if (np > 1) {
1867 sprintf (tmp, "%d Proteins", (int) np);
1868 } else {
1869 sprintf (tmp, "%d Protein", (int) np);
1870 }
1871 StringCat (str, tmp);
1872 prefix = ", ";
1873 }
1874
1875 *strp = str;
1876 }
1877
1878 static Boolean ValidateRptUnit (
1879 CharPtr buf
1880 )
1881
1882 {
1883 #if 0
1884 CharPtr str;
1885 Char tmp [255];
1886
1887 StringNCpy_0 (tmp, buf, sizeof (tmp));
1888 TrimSpacesAroundString (tmp);
1889
1890 str = tmp;
1891 /* first check for sequence letters with optional semicolons */
1892 while (IS_ALPHA (*str) || *str == ';') str++;
1893 if (*str == '\0') return TRUE;
1894 /* next check for letters, digits, commas, parentheses, dashes, and underscores */
1895 str = tmp;
1896 while (IS_ALPHANUM (*str) || *str == '(' || *str == ')' || *str == ',' || *str == ';' || *str == '-' || *str == '_') str++;
1897 if (*str == '\0') return TRUE;
1898 /* now check for officially legal styles */
1899 str = tmp;
1900 while (IS_ALPHANUM (*str)) str++;
1901 if (*str != '\0') { /* wasn't pure alphanumeric; now check for xxx..yyy */
1902 str = buf;
1903 while (IS_DIGIT (*str)) str++; /* xxx */
1904 if (*str == '\0' /* must be something after the xxx */
1905 || StringLen (str) < 3 /* need at least 2 '.'s and a digit*/
1906 || str[0] != '.' || str[1] != '.') return FALSE;
1907 str+=2;
1908 while (IS_DIGIT (*str)) str++;
1909 if (*str != '\0') return FALSE; /* mustn't be anything after the yyy */
1910 }
1911 #endif
1912 return TRUE;
1913 }
1914
1915
1916 NLM_EXTERN CharPtr goFieldType [] = {
1917 "", "text string", "go id", "pubmed id", "go ref", "evidence", NULL
1918 };
1919
1920 typedef struct gostruc {
1921 CharPtr term;
1922 CharPtr goid;
1923 CharPtr evidence;
1924 Int4 pmid;
1925 CharPtr goref;
1926 } GoStruc, PNTR GoStrucPtr;
1927
1928 static int LIBCALLBACK SortVnpByGsp (VoidPtr ptr1, VoidPtr ptr2)
1929
1930 {
1931 int compare;
1932 GoStrucPtr gsp1, gsp2;
1933 ValNodePtr vnp1, vnp2;
1934
1935 if (ptr1 == NULL || ptr2 == NULL) return 0;
1936 vnp1 = *((ValNodePtr PNTR) ptr1);
1937 vnp2 = *((ValNodePtr PNTR) ptr2);
1938 if (vnp1 == NULL || vnp2 == NULL) return 0;
1939 gsp1 = (GoStrucPtr) vnp1->data.ptrvalue;
1940 gsp2 = (GoStrucPtr) vnp2->data.ptrvalue;
1941 if (gsp1 == NULL || gsp2 == NULL) return 0;
1942
1943 compare = StringICmp (gsp1->term, gsp2->term);
1944 if (compare > 0) {
1945 return 1;
1946 } else if (compare < 0) {
1947 return -1;
1948 }
1949
1950 if (gsp1->pmid == 0) return 1;
1951 if (gsp2->pmid == 0) return -1;
1952 if (gsp1->pmid > gsp2->pmid) {
1953 return 1;
1954 } else if (gsp1->pmid < gsp2->pmid) {
1955 return -1;
1956 }
1957
1958 return 0;
1959 }
1960
1961 static CharPtr GetCombinedGOtext (
1962 UserFieldPtr entryhead,
1963 IntAsn2gbJobPtr ajp
1964 )
1965
1966 {
1967 UserFieldPtr entry, topufp, ufp;
1968 CharPtr evidence, goid, goref, last = NULL,
1969 str, textstr, prefix;
1970 StringItemPtr ffstring;
1971 Char gid [32], tmp [32];
1972 GoStrucPtr gsp;
1973 ValNodePtr head = NULL, vnp;
1974 Boolean is_www;
1975 Int2 j;
1976 ObjectIdPtr oip;
1977 Int4 pmid;
1978
1979 if (entryhead == NULL || ajp == NULL) return NULL;
1980 is_www = GetWWW (ajp);
1981
1982 for (entry = entryhead; entry != NULL; entry = entry->next) {
1983 if (entry == NULL || entry->choice != 11) break;
1984 topufp = (UserFieldPtr) entry->data.ptrvalue;
1985 if (topufp == NULL) continue;
1986
1987 textstr = NULL;
1988 evidence = NULL;
1989 goid = NULL;
1990 goref = NULL;
1991 pmid = 0;
1992 for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
1993 oip = ufp->label;
1994 if (oip == NULL) continue;
1995 for (j = 0; goFieldType [j] != NULL; j++) {
1996 if (StringICmp (oip->str, goFieldType [j]) == 0) break;
1997 }
1998 if (goFieldType [j] == NULL) continue;
1999 switch (j) {
2000 case 1 :
2001 if (ufp->choice == 1) {
2002 textstr = (CharPtr) ufp->data.ptrvalue;
2003 }
2004 break;
2005 case 2 :
2006 if (ufp->choice == 1) {
2007 goid = (CharPtr) ufp->data.ptrvalue;
2008 } else if (ufp->choice == 2) {
2009 sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
2010 goid = (CharPtr) gid;
2011 }
2012 break;
2013 case 3 :
2014 if (ufp->choice == 2) {
2015 pmid = (Int4) ufp->data.intvalue;
2016 }
2017 break;
2018 case 4 :
2019 if (ufp->choice == 1) {
2020 goref = (CharPtr) ufp->data.ptrvalue;
2021 }
2022 break;
2023 case 5 :
2024 if (ufp->choice == 1) {
2025 evidence = (CharPtr) ufp->data.ptrvalue;
2026 }
2027 break;
2028 default :
2029 break;
2030 }
2031 }
2032
2033 if (StringDoesHaveText (textstr)) {
2034 gsp = (GoStrucPtr) MemNew (sizeof (GoStruc));
2035 if (gsp != NULL) {
2036 gsp->term = StringSave (textstr);
2037 gsp->goid = StringSave (goid);
2038 gsp->evidence = StringSave (evidence);
2039 gsp->pmid = pmid;
2040 gsp->goref = StringSave (goref);
2041 ValNodeAddPointer (&head, 0, (Pointer) gsp);
2042 }
2043 }
2044 }
2045
2046 if (head == NULL) return NULL;
2047 head = ValNodeSort (head, SortVnpByGsp);
2048
2049 if (is_www) {
2050 ffstring = FFGetString (ajp);
2051 if (ffstring != NULL) {
2052
2053 last = NULL;
2054 prefix = NULL;
2055 for (vnp = head; vnp != NULL; vnp = vnp->next) {
2056 gsp = (GoStrucPtr) vnp->data.ptrvalue;
2057 if (gsp == NULL) continue;
2058 if (StringICmp (gsp->term, last) != 0) {
2059 if (prefix != NULL) {
2060 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2061 }
2062 if (StringDoesHaveText (gsp->goid)) {
2063 FFAddOneString (ffstring, "GO:", FALSE, TRUE, TILDE_IGNORE);
2064 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2065 FF_Add_NCBI_Base_URL (ffstring, link_go);
2066 FFAddOneString (ffstring, gsp->goid, FALSE, FALSE, TILDE_IGNORE);
2067 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2068 FFAddOneString (ffstring, gsp->goid, FALSE, TRUE, TILDE_IGNORE);
2069 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2070 }
2071 if (StringDoesHaveText (gsp->term)) {
2072 FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
2073 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2074 FF_Add_NCBI_Base_URL (ffstring, link_go);
2075 FFAddOneString (ffstring, gsp->goid, FALSE, FALSE, TILDE_IGNORE);
2076 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2077 FFAddOneString (ffstring, gsp->term, FALSE, TRUE, TILDE_IGNORE);
2078 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2079 }
2080 }
2081 if (StringDoesHaveText (gsp->evidence)) {
2082 FFAddOneString (ffstring, " [Evidence ", FALSE, TRUE, TILDE_IGNORE);
2083 FFAddOneString (ffstring, gsp->evidence, FALSE, TRUE, TILDE_IGNORE);
2084 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2085 }
2086 if (gsp->pmid > 0) {
2087 sprintf (tmp, "%ld", (long) gsp->pmid);
2088 FFAddOneString (ffstring, " [PMID <a href=\"", FALSE, FALSE, TILDE_IGNORE);
2089 FF_Add_NCBI_Base_URL (ffstring, link_muid);
2090 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
2091 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2092 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
2093 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2094 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2095 } else if (StringDoesHaveText (gsp->goref)) {
2096 FFAddOneString (ffstring, " [GO Ref <a href=\"", FALSE, FALSE, TILDE_IGNORE);
2097 FF_Add_NCBI_Base_URL (ffstring, link_go_ref);
2098 FFAddOneString (ffstring, gsp->goref, FALSE, FALSE, TILDE_IGNORE);
2099 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2100 FFAddOneString (ffstring, gsp->goref, FALSE, FALSE, TILDE_IGNORE);
2101 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2102 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2103 }
2104 prefix = "; ";
2105 last = gsp->term;
2106 }
2107
2108 str = FFToCharPtr (ffstring);
2109 TrimSpacesAroundString (str);
2110
2111 FFRecycleString (ajp, ffstring);
2112
2113 for (vnp = head; vnp != NULL; vnp = vnp->next) {
2114 gsp = (GoStrucPtr) vnp->data.ptrvalue;
2115 if (gsp == NULL) continue;
2116 gsp->term = MemFree (gsp->term);
2117 gsp->goid = MemFree (gsp->goid);
2118 gsp->goref = MemFree (gsp->goref);
2119 gsp->evidence = MemFree (gsp->evidence);
2120 }
2121 ValNodeFreeData (head);
2122
2123 return str;
2124 }
2125 }
2126
2127 /* not is_www */
2128
2129 ffstring = FFGetString (ajp);
2130 if (ffstring != NULL) {
2131 last = NULL;
2132 prefix = NULL;
2133 for (vnp = head; vnp != NULL; vnp = vnp->next) {
2134 gsp = (GoStrucPtr) vnp->data.ptrvalue;
2135 if (gsp == NULL) continue;
2136 if (StringICmp (gsp->term, last) != 0) {
2137 if (prefix != NULL) {
2138 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2139 }
2140 if (StringDoesHaveText (gsp->goid)) {
2141 FFAddOneString (ffstring, "GO:", FALSE, TRUE, TILDE_IGNORE);
2142 FFAddOneString (ffstring, gsp->goid, FALSE, TRUE, TILDE_IGNORE);
2143 }
2144 if (StringDoesHaveText (gsp->term)) {
2145 FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
2146 FFAddOneString (ffstring, gsp->term, FALSE, TRUE, TILDE_IGNORE);
2147 }
2148 }
2149 if (StringDoesHaveText (gsp->evidence)) {
2150 FFAddOneString (ffstring, " [Evidence ", FALSE, TRUE, TILDE_IGNORE);
2151 FFAddOneString (ffstring, gsp->evidence, FALSE, TRUE, TILDE_IGNORE);
2152 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2153 }
2154 if (gsp->pmid > 0) {
2155 sprintf (tmp, "%ld", (long) gsp->pmid);
2156 FFAddOneString (ffstring, " [PMID ", FALSE, FALSE, TILDE_IGNORE);
2157 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
2158 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2159 } else if (StringDoesHaveText (gsp->goref)) {
2160 FFAddOneString (ffstring, " [GO Ref", FALSE, FALSE, TILDE_IGNORE);
2161 FFAddOneString (ffstring, gsp->goref, FALSE, FALSE, TILDE_IGNORE);
2162 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2163 }
2164 prefix = "; ";
2165 last = gsp->term;
2166 }
2167 }
2168
2169 str = FFToCharPtr (ffstring);
2170 TrimSpacesAroundString (str);
2171
2172 FFRecycleString (ajp, ffstring);
2173
2174 for (vnp = head; vnp != NULL; vnp = vnp->next) {
2175 gsp = (GoStrucPtr) vnp->data.ptrvalue;
2176 if (gsp == NULL) continue;
2177 gsp->term = MemFree (gsp->term);
2178 gsp->goid = MemFree (gsp->goid);
2179 gsp->goref = MemFree (gsp->goref);
2180 gsp->evidence = MemFree (gsp->evidence);
2181 }
2182 ValNodeFreeData (head);
2183
2184 return str;
2185 }
2186
2187 static CharPtr GetGOtext (
2188 UserFieldPtr topufp,
2189 IntAsn2gbJobPtr ajp,
2190 Boolean abbreviate
2191 )
2192
2193 {
2194 CharPtr evidence = NULL;
2195 StringItemPtr ffstring;
2196 Char gid [32];
2197 CharPtr goid = NULL;
2198 CharPtr goref = NULL;
2199 Boolean is_www;
2200 Int2 j;
2201 ObjectIdPtr oip;
2202 Int4 pmid = 0;
2203 CharPtr str;
2204 CharPtr textstr = NULL;
2205 Char tmp [32];
2206 UserFieldPtr ufp;
2207
2208 if (topufp == NULL || ajp == NULL) return NULL;
2209 is_www = GetWWW (ajp);
2210
2211 for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
2212 oip = ufp->label;
2213 if (oip == NULL) continue;
2214 for (j = 0; goFieldType [j] != NULL; j++) {
2215 if (StringICmp (oip->str, goFieldType [j]) == 0) break;
2216 }
2217 if (goFieldType [j] == NULL) continue;
2218 switch (j) {
2219 case 1 :
2220 if (ufp->choice == 1) {
2221 textstr = (CharPtr) ufp->data.ptrvalue;
2222 }
2223 break;
2224 case 2 :
2225 if (ufp->choice == 1) {
2226 goid = (CharPtr) ufp->data.ptrvalue;
2227 } else if (ufp->choice == 2) {
2228 sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
2229 goid = (CharPtr) gid;
2230 }
2231 break;
2232 case 3 :
2233 if (ufp->choice == 2) {
2234 pmid = (Int4) ufp->data.intvalue;
2235 }
2236 break;
2237 case 4 :
2238 if (ufp->choice == 1) {
2239 goref = (CharPtr) ufp->data.ptrvalue;
2240 }
2241 break;
2242 case 5 :
2243 if (ufp->choice == 1) {
2244 evidence = (CharPtr) ufp->data.ptrvalue;
2245 }
2246 break;
2247 default :
2248 break;
2249 }
2250 }
2251 /* if (StringHasNoText (textstr)) return NULL; */
2252
2253 if (is_www) {
2254 ffstring = FFGetString (ajp);
2255 if (ffstring != NULL) {
2256 if (StringDoesHaveText (goid)) {
2257 FFAddOneString (ffstring, "GO:", FALSE, TRUE, TILDE_IGNORE);
2258 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2259 FF_Add_NCBI_Base_URL (ffstring, link_go);
2260 FFAddOneString (ffstring, goid, FALSE, FALSE, TILDE_IGNORE);
2261 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2262 FFAddOneString (ffstring, goid, FALSE, TRUE, TILDE_IGNORE);
2263 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2264 }
2265
2266 if (StringDoesHaveText (textstr)) {
2267 FFAddOneString (ffstring, " - ", FALSE, TRUE, TILDE_IGNORE);
2268 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2269 FF_Add_NCBI_Base_URL (ffstring, link_go);
2270 FFAddOneString (ffstring, goid, FALSE, FALSE, TILDE_IGNORE);
2271 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2272 FFAddOneString (ffstring, textstr, FALSE, TRUE, TILDE_IGNORE);
2273 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2274 }
2275
2276 if (StringDoesHaveText (evidence)) {
2277 FFAddOneString (ffstring, " [Evidence ", FALSE, TRUE, TILDE_IGNORE);
2278 FFAddOneString (ffstring, evidence, FALSE, TRUE, TILDE_IGNORE);
2279 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2280 }
2281
2282 if (pmid != 0) {
2283 sprintf (tmp, "%ld", (long) pmid);
2284 FFAddOneString (ffstring, " [PMID ", FALSE, TRUE, TILDE_IGNORE);
2285 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2286 FF_Add_NCBI_Base_URL (ffstring, link_muid);
2287 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
2288 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2289 FFAddOneString (ffstring, tmp, FALSE, FALSE, TILDE_IGNORE);
2290 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2291 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2292 } else if (StringDoesHaveText (goref)) {
2293 FFAddOneString (ffstring, " [GO Ref ", FALSE, TRUE, TILDE_IGNORE);
2294 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2295 FF_Add_NCBI_Base_URL (ffstring, link_go_ref);
2296 FFAddOneString (ffstring, goref, FALSE, FALSE, TILDE_IGNORE);
2297 FFAddOneString (ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2298 FFAddOneString (ffstring, goref, FALSE, FALSE, TILDE_IGNORE);
2299 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2300 FFAddOneString (ffstring, "]", FALSE, TRUE, TILDE_IGNORE);
2301 }
2302
2303 str = FFToCharPtr (ffstring);
2304 TrimSpacesAroundString (str);
2305
2306 FFRecycleString (ajp, ffstring);
2307
2308 return str;
2309 }
2310 }
2311
2312 /* not is_www */
2313
2314 str = (CharPtr) MemNew (StringLen (goid) + StringLen (textstr) +
2315 StringLen (evidence) + StringLen (goref) + 100);
2316 if (str == NULL) return NULL;
2317
2318 if (StringDoesHaveText (goid)) {
2319 StringCat (str, "GO:");
2320 StringCat (str, goid);
2321 }
2322
2323 if (StringDoesHaveText (textstr)) {
2324 StringCat (str, " - ");
2325 StringCat (str, textstr);
2326 }
2327
2328 if (StringDoesHaveText (evidence)) {
2329 StringCat (str, " [Evidence ");
2330 StringCat (str, evidence);
2331 StringCat (str, "]");
2332 }
2333
2334 if (pmid != 0) {
2335 sprintf (tmp, "%ld", (long) pmid);
2336 StringCat (str, " [PMID ");
2337 StringCat (str, tmp);
2338 StringCat (str, "]");
2339 } else if (StringDoesHaveText (goref)) {
2340 StringCat (str, " [GO Ref ");
2341 StringCat (str, goref);
2342 StringCat (str, "]");
2343 }
2344
2345 TrimSpacesAroundString (str);
2346
2347 return str;
2348 }
2349
2350 static void GetNomenclatureText (
2351 UserObjectPtr uop,
2352 Pointer userdata
2353 )
2354
2355 {
2356 CharPtr ds = NULL, me = NULL, nm = NULL, sy = NULL;
2357 size_t len;
2358 ObjectIdPtr oip;
2359 CharPtr str = NULL;
2360 CharPtr PNTR strp;
2361 UserFieldPtr ufp;
2362
2363 if (uop == NULL) return;
2364 oip = uop->type;
2365 if (oip == NULL) return;
2366 if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
2367 strp = (CharPtr PNTR) userdata;
2368
2369 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2370 oip = ufp->label;
2371 if (oip == NULL || oip->str == NULL) continue;
2372 if (StringICmp (oip->str, "Symbol") == 0) {
2373 if (ufp->choice == 1) {
2374 str = (CharPtr) ufp->data.ptrvalue;
2375 if (StringDoesHaveText (str)) {
2376 sy = str;
2377 }
2378 }
2379 } else if (StringICmp (oip->str, "Name") == 0) {
2380 if (ufp->choice == 1) {
2381 str = (CharPtr) ufp->data.ptrvalue;
2382 if (StringDoesHaveText (str)) {
2383 nm = str;
2384 }
2385 }
2386 } else if (StringICmp (oip->str, "DataSource") == 0) {
2387 if (ufp->choice == 1) {
2388 str = (CharPtr) ufp->data.ptrvalue;
2389 if (StringDoesHaveText (str)) {
2390 ds = str;
2391 }
2392 }
2393 } else if (StringICmp (oip->str, "Status") == 0) {
2394 if (ufp->choice == 1) {
2395 str = (CharPtr) ufp->data.ptrvalue;
2396 if (StringDoesHaveText (str)) {
2397 me = str;
2398 }
2399 }
2400 }
2401 }
2402 if (me == NULL) {
2403 me = "Unclassified";
2404 }
2405
2406 if (StringHasNoText (sy)) return;
2407
2408 len = StringLen (ds) + StringLen (me) + StringLen (nm) + StringLen (sy) + 80;
2409 str = (CharPtr) MemNew (len);
2410 if (str == NULL) return;
2411
2412 StringCpy (str, me);
2413 StringCat (str, " Symbol: ");
2414 StringCat (str, sy);
2415
2416 if (StringDoesHaveText (nm)) {
2417 StringCat (str, " | Name: ");
2418 StringCat (str, nm);
2419 }
2420
2421 if (StringDoesHaveText (ds)) {
2422 StringCat (str, " | Provided by: ");
2423 StringCat (str, ds);
2424 }
2425
2426 *strp = str;
2427 }
2428
2429 static CharPtr GetNomenclature (
2430 GeneNomenclaturePtr gnp
2431 )
2432
2433 {
2434 Char buf [32];
2435 CharPtr db = NULL, ds = NULL, me = NULL, nm = NULL, sy = NULL, str = NULL;
2436 DbtagPtr dbt;
2437 size_t len;
2438 ObjectIdPtr oip;
2439
2440 if (gnp == NULL) return NULL;
2441
2442 if (StringDoesHaveText (gnp->symbol)) {
2443 sy = gnp->symbol;
2444 }
2445 if (StringHasNoText (sy)) return NULL;
2446
2447 if (gnp->status == 1) {
2448 me = "Official";
2449 } else if (gnp->status == 2) {
2450 me = "Interim";
2451 }
2452 if (me == NULL) {
2453 me = "Unclassified";
2454 }
2455
2456 if (StringDoesHaveText (gnp->name)) {
2457 nm = gnp->name;
2458 }
2459
2460 dbt = gnp->source;
2461 if (dbt != NULL) {
2462 if (StringDoesHaveText (dbt->db)) {
2463 db = dbt->db;
2464 }
2465 oip = dbt->tag;
2466 if (oip != NULL) {
2467 if (StringDoesHaveText (oip->str)) {
2468 ds = oip->str;
2469 } else {
2470 sprintf (buf, "%ld", (long) oip->id);
2471 ds = buf;
2472 }
2473 }
2474 }
2475
2476 len = StringLen (db) + StringLen (ds) + StringLen (me) + StringLen (nm) + StringLen (sy) + 80;
2477 str = (CharPtr) MemNew (sizeof (Char) * len);
2478 if (str == NULL) return NULL;
2479
2480 StringCpy (str, me);
2481 StringCat (str, " Symbol: ");
2482 StringCat (str, sy);
2483
2484 if (StringDoesHaveText (nm)) {
2485 StringCat (str, " | Name: ");
2486 StringCat (str, nm);
2487 }
2488
2489 if (StringDoesHaveText (db) && StringDoesHaveText (ds)) {
2490 StringCat (str, " | Provided by: ");
2491 StringCat (str, db);
2492 StringCat (str, ":");
2493 StringCat (str, ds);
2494 }
2495
2496 return str;
2497 }
2498
2499 static Boolean DbxrefAlreadyInGeneXref (
2500 DbtagPtr dbt,
2501 ValNodePtr dbxref
2502 )
2503
2504 {
2505 DbtagPtr gdbt;
2506 ValNodePtr vnp;
2507
2508 if (dbt == NULL) return FALSE;
2509
2510 for (vnp = dbxref; vnp != NULL; vnp = vnp->next) {
2511 gdbt = (DbtagPtr) vnp->data.ptrvalue;
2512 if (gdbt == NULL) continue;
2513 if (DbtagMatch (dbt, gdbt)) return TRUE;
2514 }
2515
2516 return FALSE;
2517 }
2518
2519 static void FF_www_nuc_or_prot_id (
2520 IntAsn2gbJobPtr ajp,
2521 StringItemPtr ffstring,
2522 CharPtr seqid,
2523 Int4 gi,
2524 Boolean is_na
2525 )
2526 {
2527 Char buf [32];
2528
2529 if ( GetWWW(ajp) ) {
2530 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2531 if (is_na) {
2532 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
2533 } else {
2534 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
2535 }
2536 if (gi > 0) {
2537 sprintf (buf, "%ld", (long) gi);
2538 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2539 } else {
2540 FFAddOneString(ffstring, seqid, FALSE, FALSE, TILDE_IGNORE);
2541 }
2542 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2543 FFAddOneString(ffstring, seqid, FALSE, FALSE, TILDE_IGNORE);
2544 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2545 } else {
2546 FFAddOneString(ffstring, seqid, FALSE, FALSE, TILDE_IGNORE);
2547 }
2548 }
2549
2550 static void FF_www_gcode (
2551 IntAsn2gbJobPtr ajp,
2552 StringItemPtr ffstring,
2553 CharPtr gcode
2554 )
2555 {
2556
2557 if ( GetWWW(ajp) ) {
2558 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2559 FF_Add_NCBI_Base_URL (ffstring, link_code);
2560 FFAddOneString(ffstring, "mode=c#SG", FALSE, FALSE, TILDE_IGNORE);
2561 FFAddOneString(ffstring, gcode, FALSE, FALSE, TILDE_IGNORE);
2562 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2563 FFAddOneString(ffstring, gcode, FALSE, FALSE, TILDE_IGNORE);
2564 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2565 } else {
2566 FFAddOneString(ffstring, gcode, FALSE, FALSE, TILDE_IGNORE);
2567 }
2568 }
2569
2570 static void FF_AddECnumber (
2571 IntAsn2gbJobPtr ajp,
2572 StringItemPtr ffstring,
2573 CharPtr str
2574 )
2575 {
2576 if (StringHasNoText (str)) return;
2577 if ( GetWWW(ajp) ) {
2578 /*
2579 if (StringChr (str, '-') != NULL) {
2580 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2581 FFAddOneString(ffstring, ec_ambig, FALSE, FALSE, TILDE_IGNORE);
2582 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2583 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2584 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2585 } else {
2586 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2587 FFAddOneString(ffstring, ec_link, FALSE, FALSE, TILDE_IGNORE);
2588 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2589 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2590 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2591 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2592 }
2593 */
2594 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2595 FFAddOneString(ffstring, ec_link, FALSE, FALSE, TILDE_IGNORE);
2596 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2597 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2598 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2599 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2600 } else {
2601 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
2602 }
2603 }
2604
2605
2606 /* FormatFeatureblockQuals should not be called directly,
2607 except from FormatFeatureBlock. It performs no input
2608 validation. (perhaps it should?) */
2609
2610 static void LIBCALLBACK SaveGBSeqTranslation (
2611 CharPtr sequence,
2612 Pointer userdata
2613 )
2614
2615 {
2616 CharPtr tmp;
2617 CharPtr PNTR tmpp;
2618
2619 tmpp = (CharPtr PNTR) userdata;
2620 tmp = *tmpp;
2621
2622 tmp = StringMove (tmp, sequence);
2623
2624 *tmpp = tmp;
2625 }
2626
2627 static int LIBCALLBACK SortVnpByInt (VoidPtr ptr1, VoidPtr ptr2)
2628
2629 {
2630 ValNodePtr vnp1;
2631 ValNodePtr vnp2;
2632
2633 if (ptr1 == NULL || ptr2 == NULL) return 0;
2634 vnp1 = *((ValNodePtr PNTR) ptr1);
2635 vnp2 = *((ValNodePtr PNTR) ptr2);
2636 if (vnp1 == NULL || vnp2 == NULL) return 0;
2637
2638 if (vnp1->data.intvalue > vnp2->data.intvalue) {
2639 return 1;
2640 } else if (vnp1->data.intvalue < vnp2->data.intvalue) {
2641 return -1;
2642 }
2643
2644 return 0;
2645 }
2646
2647 static FloatHi MolWtForProtFeat (
2648 BioseqPtr bsp,
2649 SeqFeatPtr sfp,
2650 IntPrtBlockPtr ipp
2651 )
2652
2653 {
2654 size_t len;
2655 FloatHi mol_wt = 0.0;
2656 ProtRefPtr prp;
2657 CharPtr str;
2658
2659 if (bsp == NULL || sfp == NULL || ipp == NULL) return 0.0;
2660 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
2661 if (prp == NULL) return 0.0;
2662
2663 if (prp->processed >= 2) {
2664 return MolWtForLoc (sfp->location);
2665 }
2666
2667 if (! ipp->is_whole_loc) {
2668 return MolWtForLoc (sfp->location);
2669 }
2670
2671 if (ipp->suppress_mol_wt) return 0.0;
2672
2673 if (ipp->sig_pept_trim_len > 0) {
2674 str = GetSequenceByFeature (sfp);
2675 if (str == NULL) return 0.0;
2676 len = StringLen (str);
2677 if (len > ipp->sig_pept_trim_len) {
2678 mol_wt = MolWtForStr (str + ipp->sig_pept_trim_len);
2679 } else {
2680 mol_wt = MolWtForStr (str);
2681 }
2682 MemFree (str);
2683 return mol_wt;
2684 }
2685
2686 if (ipp->trim_initial_met) {
2687 str = GetSequenceByFeature (sfp);
2688 if (str == NULL) return 0.0;
2689 if (StringLen (str) > 1 && *str == 'M') {
2690 mol_wt = MolWtForStr (str + 1);
2691 } else {
2692 mol_wt = MolWtForStr (str);
2693 }
2694 MemFree (str);
2695 return mol_wt;
2696 }
2697
2698 return MolWtForLoc (sfp->location);
2699 }
2700
2701 /*
2702 static void ChangeOandJtoX (CharPtr str)
2703
2704 {
2705 Char ch;
2706
2707 if (str == NULL) return;
2708 ch = *str;
2709 while (ch != '\0') {
2710 if (ch == 'O' || ch == 'J') {
2711 *str = 'X';
2712 } else if (ch == 'o' || ch == 'j') {
2713 *str = 'x';
2714 }
2715 str++;
2716 ch = *str;
2717 }
2718 }
2719 */
2720
2721 static Boolean ECNumberFormatOkay (
2722 CharPtr str,
2723 Boolean forGbRelease
2724 )
2725
2726 {
2727 Char ch;
2728 Boolean is_ambig;
2729 Int2 numdashes;
2730 Int2 numdigits;
2731 Int2 numperiods;
2732 CharPtr ptr;
2733
2734 if (StringHasNoText (str)) return FALSE;
2735
2736 is_ambig = FALSE;
2737 numperiods = 0;
2738 numdigits = 0;
2739 numdashes = 0;
2740
2741 ptr = str;
2742 ch = *ptr;
2743 while (ch != '\0') {
2744 if (IS_DIGIT (ch)) {
2745 numdigits++;
2746 if (is_ambig) return FALSE;
2747 ptr++;
2748 ch = *ptr;
2749 } else if (ch == '-') {
2750 numdashes++;
2751 is_ambig = TRUE;
2752 ptr++;
2753 ch = *ptr;
2754 } else if (ch == 'n') {
2755 numdashes++;
2756 is_ambig = TRUE;
2757 ptr++;
2758 ch = *ptr;
2759 } else if (ch == '.') {
2760 numperiods++;
2761 if (numdigits > 0 && numdashes > 0) return FALSE;
2762 if (numdigits == 0 && numdashes == 0) return FALSE;
2763 if (numdashes > 1) return FALSE;
2764 numdigits = 0;
2765 numdashes = 0;
2766 ptr++;
2767 ch = *ptr;
2768 } else {
2769 ptr++;
2770 ch = *ptr;
2771 }
2772 }
2773
2774 if (numperiods == 3) {
2775 if (numdigits > 0 && numdashes > 0) return FALSE;
2776 if (numdigits > 0 || numdashes == 1) return TRUE;
2777 }
2778
2779 return FALSE;
2780 }
2781
2782 static Boolean OnlyOneRealGeneral (SeqIdPtr sip)
2783
2784 {
2785 DbtagPtr dbt;
2786 Int2 numGenerals = 0;
2787
2788 while (sip != NULL) {
2789 if (sip->choice != SEQID_GENERAL) return FALSE;
2790 dbt = (DbtagPtr) sip->data.ptrvalue;
2791 if (dbt == NULL) return FALSE;
2792 if (!IsSkippableDbtag(dbt) &&
2793 StringICmp (dbt->db, "SMART") != 0) {
2794 numGenerals++;
2795 }
2796 sip = sip->next;
2797 }
2798 if (numGenerals == 1) return TRUE;
2799 return FALSE;
2800 }
2801
2802 static void FormatFeatureBlockQuals (
2803 StringItemPtr ffstring,
2804 IntAsn2gbJobPtr ajp,
2805 Asn2gbSectPtr asp,
2806 BioseqPtr bsp,
2807 Uint1 featdeftype,
2808 ValNodePtr gene_syn,
2809 CharPtr lasttype,
2810 SeqLocPtr location,
2811 BioseqPtr prod,
2812 CharPtr protein_pid_g,
2813 QualValPtr qvp,
2814 Int4 left,
2815 Int4 right,
2816 Uint1 strand,
2817 SeqFeatPtr sfp,
2818 BioseqPtr target,
2819 IntFeatBlockPtr ifp,
2820 Boolean is_other,
2821 Boolean is_journalscan,
2822 Boolean is_gps,
2823 Boolean is_ged
2824 )
2825
2826 {
2827 Boolean add_period;
2828 /*
2829 CharPtr ascii;
2830 Int2 ascii_len;
2831 */
2832 Boolean at_end = FALSE;
2833 ByteStorePtr bs;
2834 Char buf [80];
2835 Choice cbaa;
2836 CodeBreakPtr cbp;
2837 Char ch;
2838 Uint1 choice;
2839 ValNodePtr citlist;
2840 Int4 gi;
2841 Boolean hadProtDesc = FALSE;
2842 DbtagPtr dbt;
2843 UserFieldPtr entry;
2844 Int4 exp_ev;
2845 GBQualPtr gbq;
2846 GeneNomenclaturePtr gnp;
2847 Int2 i;
2848 FtQualType idx;
2849 IntPrtBlockPtr ipp;
2850 Boolean isTRNA;
2851 Boolean is_bc;
2852 Boolean is_rf;
2853 Boolean is_sc;
2854 Int2 j;
2855 FtQualType jdx;
2856 Int4 len;
2857 Boolean link_is_na;
2858 FloatHi molwt;
2859 SeqLocPtr newloc;
2860 CharPtr notestr;
2861 Char numbuf [32];
2862 Int2 numcodons;
2863 Int2 numsyns;
2864 ObjectIdPtr oip;
2865 Boolean okay = FALSE;
2866 Boolean only_digits;
2867 BioseqPtr pbsp;
2868 Int4 pmid;
2869 Char pmidbuf [32];
2870 ValNodePtr pmidlist;
2871 ValNodePtr ppr;
2872 CharPtr prefix;
2873 CharPtr protein_seq = NULL;
2874 size_t prtlen;
2875 CharPtr ptr;
2876 RefBlockPtr rbp;
2877 CharPtr region;
2878 Uint1 residue;
2879 SeqCodeTablePtr sctp;
2880 Int4 sec_str;
2881 Uint1 seqcode;
2882 Char seqid [50];
2883 SeqIntPtr sintp;
2884 SeqIdPtr sip;
2885 SeqLocPtr slp;
2886 Boolean split;
2887 CharPtr start;
2888 CharPtr str;
2889 Boolean suppress_period;
2890 CharPtr tmp;
2891 tRNAPtr trna;
2892 UserFieldPtr ufp;
2893 UserObjectPtr uop;
2894 ValNodePtr vnp;
2895 StringItemPtr unique;
2896 Boolean indexerVersion;
2897
2898 unique = FFGetString(ajp);
2899 if ( unique == NULL ) return;
2900
2901 indexerVersion = (Boolean) (GetAppProperty ("InternalNcbiSequin") != NULL);
2902
2903 for (i = 0, idx = feat_qual_order [i]; idx != (FtQualType) 0; i++, idx = feat_qual_order [i]) {
2904
2905 link_is_na = FALSE;
2906
2907 lasttype = NULL;
2908 switch (asn2gnbk_featur_quals [idx].qualclass) {
2909
2910 case Qual_class_ignore :
2911 break;
2912
2913 case Qual_class_string :
2914 if (! StringHasNoText (qvp [idx].str)) {
2915 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2916 FALSE, TRUE, TILDE_TO_SPACES);
2917 FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
2918 FALSE, TRUE, TILDE_TO_SPACES);
2919 FFAddOneChar(ffstring, '\n', FALSE);
2920 }
2921 break;
2922
2923 case Qual_class_locus_tag :
2924 if (! StringHasNoText (qvp [idx].str)) {
2925 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2926 FALSE, TRUE, TILDE_TO_SPACES);
2927 FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
2928 FALSE, TRUE, TILDE_TO_SPACES);
2929 FFAddOneChar(ffstring, '\n', FALSE);
2930 }
2931 break;
2932
2933 case Qual_class_tilde :
2934 if (! StringHasNoText (qvp [idx].str)) {
2935 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2936 FALSE, TRUE, TILDE_EXPAND);
2937 FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
2938 FALSE, TRUE, TILDE_EXPAND);
2939 FFAddOneChar(ffstring, '\n', FALSE);
2940 }
2941 break;
2942
2943 case Qual_class_exception :
2944 if (! StringHasNoText (qvp [idx].str)) {
2945 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2946 FALSE, TRUE, TILDE_TO_SPACES);
2947 FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
2948 FALSE, TRUE, TILDE_TO_SPACES);
2949 FFAddOneChar(ffstring, '\n', FALSE);
2950 }
2951 break;
2952
2953 case Qual_class_product :
2954 if (StringHasNoText (qvp [idx].str) ||
2955 (ajp->flags.dropIllegalQuals &&
2956 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
2957 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2958 FALSE, TRUE, TILDE_TO_SPACES);
2959 FFAddTextToString(ffstring, "\"", qvp [idx].str, "\"",
2960 FALSE, TRUE, TILDE_TO_SPACES);
2961 FFAddOneChar(ffstring, '\n', FALSE);
2962 break;
2963
2964 case Qual_class_sgml :
2965 if (! StringHasNoText (qvp [idx].str)) {
2966 /*
2967 if (is_journalscan) {
2968 ascii_len = Sgml2AsciiLen (qvp [idx].str);
2969 start = ascii = MemNew ((size_t) (10 + ascii_len));
2970 if (start != NULL) {
2971 ascii = Sgml2Ascii (qvp [idx].str, ascii, ascii_len + 1);
2972
2973 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2974 FALSE, TRUE, TILDE_TO_SPACES);
2975 FFAddTextToString(ffstring, "\"", start, "\"",
2976 FALSE, TRUE, TILDE_TO_SPACES);
2977 FFAddOneChar(ffstring, '\n', FALSE);
2978
2979 MemFree (start);
2980 }
2981 } else {
2982 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2983 FALSE, TRUE, TILDE_TO_SPACES);
2984 FFAddTextToString(ffstring, "\"", qvp[idx].str, "\"",
2985 FALSE, TRUE, TILDE_TO_SPACES);
2986 FFAddOneChar(ffstring, '\n', FALSE);
2987 }
2988 */
2989 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
2990 FALSE, TRUE, TILDE_TO_SPACES);
2991 FFAddTextToString(ffstring, "\"", qvp[idx].str, "\"",
2992 FALSE, TRUE, TILDE_TO_SPACES);
2993 FFAddOneChar(ffstring, '\n', FALSE);
2994 }
2995 break;
2996
2997 case Qual_class_boolean :
2998 if (qvp [idx].ble) {
2999 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "\n",
3000 FALSE, TRUE, TILDE_IGNORE);
3001 }
3002 break;
3003
3004 case Qual_class_int :
3005 if (qvp [idx].num > 0) {
3006 if (idx == FTQUAL_transl_table) {
3007 sprintf (numbuf, "%ld", (long) qvp [idx].num);
3008 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3009 FALSE, TRUE, TILDE_IGNORE);
3010 FF_www_gcode (ajp, ffstring, numbuf);
3011 } else {
3012 sprintf (numbuf, "%ld", (long) qvp [idx].num);
3013 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3014 FALSE, TRUE, TILDE_IGNORE);
3015 FFAddTextToString(ffstring, NULL, numbuf, NULL,
3016 FALSE, TRUE, TILDE_IGNORE);
3017 }
3018 FFAddOneChar(ffstring, '\n', FALSE);
3019 }
3020 break;
3021
3022 case Qual_class_evidence :
3023 exp_ev = qvp [idx].num;
3024 if (exp_ev > 0 && exp_ev <= 2) {
3025 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3026 FALSE, TRUE, TILDE_IGNORE);
3027 FFAddOneString(ffstring, evidenceText [exp_ev], FALSE, TRUE, TILDE_IGNORE);
3028 FFAddOneChar(ffstring, '\n', FALSE);
3029 }
3030 break;
3031
3032 case Qual_class_valnode :
3033 for (vnp = qvp[idx].vnp; vnp != NULL; vnp = vnp->next) {
3034 str = (CharPtr) vnp->data.ptrvalue;
3035 if (str != NULL) {
3036 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3037 FALSE, TRUE, TILDE_TO_SPACES);
3038 FFAddTextToString(ffstring, "\"", str, "\"",
3039 FALSE, TRUE, TILDE_TO_SPACES);
3040 FFAddOneChar(ffstring, '\n', FALSE);
3041 }
3042 }
3043 break;
3044
3045 case Qual_class_sep_gene_syn :
3046 for (vnp = qvp[idx].vnp; vnp != NULL; vnp = vnp->next) {
3047 str = (CharPtr) vnp->data.ptrvalue;
3048 if (StringHasNoText (str)) continue;
3049 FFAddTextToString (ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3050 FALSE, TRUE, TILDE_TO_SPACES);
3051 FFAddTextToString (ffstring, "\"", str, "\"",
3052 FALSE, TRUE, TILDE_TO_SPACES);
3053 FFAddOneChar (ffstring, '\n', FALSE);
3054 }
3055 break;
3056
3057 case Qual_class_gene_syn :
3058 numsyns = 0;
3059 for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
3060 str = (CharPtr) vnp->data.ptrvalue;
3061 if (! StringHasNoText (str)) {
3062 numsyns++;
3063 }
3064 }
3065 if (numsyns > 0) {
3066 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
3067 FALSE, TRUE, TILDE_TO_SPACES);
3068 prefix = NULL;
3069 for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
3070 str = (CharPtr) vnp->data.ptrvalue;
3071 if (! StringHasNoText (str)) {
3072 FFAddTextToString (ffstring, prefix, str, NULL, FALSE, FALSE, TILDE_IGNORE);
3073 prefix = "; ";
3074 }
3075 }
3076 FFAddOneChar(ffstring, '\"', FALSE);
3077 FFAddOneChar(ffstring, '\n', FALSE);
3078 }
3079 break;
3080
3081 case Qual_class_map :
3082 gbq = qvp [idx].gbq;
3083 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3084 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3085 if (lasttype == NULL) {
3086 lasttype = gbq->qual;
3087 }
3088 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3089 if (! StringHasNoText (gbq->val)) {
3090 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
3091 FALSE, TRUE, TILDE_IGNORE);
3092 if (!StringIsJustQuotes (gbq->val)) {
3093 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_IGNORE);
3094 }
3095 FFAddOneChar(ffstring, '\"', FALSE);
3096 FFAddOneChar(ffstring, '\n', FALSE);
3097 }
3098 gbq = gbq->next;
3099 }
3100 break;
3101
3102 case Qual_class_EC_valnode :
3103 for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
3104 str = (CharPtr) vnp->data.ptrvalue;
3105 okay = TRUE;
3106
3107 if (str == NULL) continue;
3108
3109 if (ajp->flags.dropIllegalQuals) {
3110 tmp = str;
3111 while (*tmp != '\0' && *tmp == '\"')
3112 tmp++;
3113 for (; *tmp != '\0' && *tmp != '\"'; tmp++) {
3114 if (!IS_DIGIT(*tmp) && *tmp != '.' && *tmp != '-') {
3115 okay = FALSE;
3116 }
3117 }
3118 }
3119 if (!okay) continue;
3120
3121 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3122 FALSE, TRUE, TILDE_TO_SPACES);
3123 FFAddOneChar(ffstring, '\"', FALSE);
3124 FF_AddECnumber(ajp, ffstring, str);
3125 FFAddOneChar(ffstring, '\"', FALSE);
3126 FFAddOneChar(ffstring, '\n', FALSE);
3127 }
3128 break;
3129
3130 case Qual_class_EC_quote :
3131 gbq = qvp [idx].gbq;
3132 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3133 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3134 if (lasttype == NULL) {
3135 lasttype = gbq->qual;
3136 }
3137 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3138 okay = TRUE;
3139 if (gbq->val == NULL) {
3140 okay = FALSE;
3141 }
3142
3143 if (ajp->flags.dropIllegalQuals && okay) {
3144 if (! ECNumberFormatOkay (gbq->val, ajp->flags.forGbRelease)) {
3145 okay = FALSE;
3146 }
3147 }
3148
3149 if (StringHasNoText (gbq->val)) {
3150 okay = FALSE;
3151 }
3152
3153 if (okay) {
3154 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3155 FALSE, TRUE, TILDE_TO_SPACES);
3156 FFAddOneChar(ffstring, '\"', FALSE);
3157 if (!StringIsJustQuotes (gbq->val)) {
3158 FF_AddECnumber (ajp, ffstring, gbq->val);
3159 }
3160 FFAddOneChar(ffstring, '\"', FALSE);
3161 FFAddOneChar(ffstring, '\n', FALSE);
3162 }
3163 gbq = gbq->next;
3164 }
3165 break;
3166
3167 case Qual_class_quote :
3168 gbq = qvp [idx].gbq;
3169 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3170 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3171 if (lasttype == NULL) {
3172 lasttype = gbq->qual;
3173 }
3174 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3175 if (! StringHasNoText (gbq->val)) {
3176 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
3177 FALSE, TRUE, TILDE_IGNORE);
3178 if (!StringIsJustQuotes (gbq->val)) {
3179 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_IGNORE);
3180 }
3181 FFAddOneChar(ffstring, '\"', FALSE);
3182 FFAddOneChar(ffstring, '\n', FALSE);
3183 }
3184 gbq = gbq->next;
3185 }
3186 break;
3187
3188 case Qual_class_noquote :
3189 gbq = qvp [idx].gbq;
3190 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3191 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3192 if (lasttype == NULL) {
3193 lasttype = gbq->qual;
3194 }
3195 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3196 if (! StringHasNoText (gbq->val)) {
3197 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
3198 FALSE, TRUE, TILDE_IGNORE);
3199 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_TO_SPACES);
3200 FFAddOneChar(ffstring, '\n', FALSE);
3201 }
3202 gbq = gbq->next;
3203 }
3204 break;
3205
3206 case Qual_class_label :
3207 gbq = qvp [idx].gbq;
3208 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3209 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3210 if (lasttype == NULL) {
3211 lasttype = gbq->qual;
3212 }
3213 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3214 if (! StringHasNoText (gbq->val)) {
3215 if (ajp->flags.checkQualSyntax) { /* single token, not just numeric */
3216 str = gbq->val;
3217 ch = *str;
3218 only_digits = TRUE;
3219 while (ch != '\0') {
3220 if (IS_WHITESP (ch)) break; /* only single token allowed */
3221 if (! IS_DIGIT (ch)) {
3222 only_digits = FALSE;
3223 }
3224 str++;
3225 ch = *str;
3226 }
3227 if (only_digits) break; /* must not be just numeric */
3228 }
3229 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3230 FALSE, TRUE, TILDE_IGNORE);
3231 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_TO_SPACES);
3232 FFAddOneChar(ffstring, '\n', FALSE);
3233 }
3234 gbq = gbq->next;
3235 }
3236 break;
3237
3238 case Qual_class_mobile_element :
3239 gbq = qvp [idx].gbq;
3240 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3241 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3242 if (lasttype == NULL) {
3243 lasttype = gbq->qual;
3244 }
3245 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3246 if (! StringHasNoText (gbq->val)) {
3247 str = gbq->val;
3248 if ((! ajp->flags.checkQualSyntax) || (ValidateMobileElement (str))) {
3249
3250 /* mobile_element enabled as of 12/15/2006
3251 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
3252 if (StringNICmp (str, "transposon", 10) == 0) {
3253 str += 10;
3254 if (*str == ':') {
3255 str++;
3256 }
3257 FFAddTextToString(ffstring, "/", "transposon", "=\"",
3258 FALSE, TRUE, TILDE_IGNORE);
3259 if (!StringIsJustQuotes (str)) {
3260 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_IGNORE);
3261 }
3262 FFAddOneChar(ffstring, '\"', FALSE);
3263 FFAddOneChar(ffstring, '\n', FALSE);
3264 } else if (StringNICmp (str, "insertion sequence", 18) == 0) {
3265 str += 18;
3266 if (*str == ':') {
3267 str++;
3268 }
3269 FFAddTextToString(ffstring, "/", "insertion_seq", "=\"",
3270 FALSE, TRUE, TILDE_IGNORE);
3271 if (!StringIsJustQuotes (str)) {
3272 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_IGNORE);
3273 }
3274 FFAddOneChar(ffstring, '\"', FALSE);
3275 FFAddOneChar(ffstring, '\n', FALSE);
3276 } else {
3277 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
3278 FALSE, TRUE, TILDE_IGNORE);
3279 if (!StringIsJustQuotes (str)) {
3280 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_IGNORE);
3281 }
3282 FFAddOneChar(ffstring, '\"', FALSE);
3283 FFAddOneChar(ffstring, '\n', FALSE);
3284 }
3285 } else {
3286 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
3287 FALSE, TRUE, TILDE_IGNORE);
3288 if (!StringIsJustQuotes (str)) {
3289 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_IGNORE);
3290 }
3291 FFAddOneChar(ffstring, '\"', FALSE);
3292 FFAddOneChar(ffstring, '\n', FALSE);
3293 }
3294 */
3295
3296 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
3297 FALSE, TRUE, TILDE_IGNORE);
3298 if (!StringIsJustQuotes (str)) {
3299 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_IGNORE);
3300 }
3301 FFAddOneChar(ffstring, '\"', FALSE);
3302 FFAddOneChar(ffstring, '\n', FALSE);
3303 }
3304 }
3305 gbq = gbq->next;
3306 }
3307 break;
3308
3309 case Qual_class_number :
3310 gbq = qvp [idx].gbq;
3311 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3312 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3313
3314 if (ajp->flags.checkQualSyntax) {
3315 str = gbq->val;
3316
3317 if ( StringHasNoText (str) )
3318 break;
3319 while (!IS_WHITESP (*str) && *str != '\0')
3320 str++;
3321 if (! StringHasNoText (str) )
3322 break;
3323 }
3324
3325 if (lasttype == NULL) {
3326 lasttype = gbq->qual;
3327 }
3328 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3329 if (! StringHasNoText (gbq->val)) {
3330 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3331 FALSE, TRUE, TILDE_IGNORE);
3332 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_TO_SPACES);
3333 FFAddOneChar(ffstring, '\n', FALSE);
3334 }
3335 gbq = gbq->next;
3336 }
3337 break;
3338
3339 case Qual_class_usedin :
3340 gbq = qvp [idx].gbq;
3341 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3342 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3343 if (lasttype == NULL) {
3344 lasttype = gbq->qual;
3345 }
3346 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3347 if (! StringHasNoText (gbq->val)) {
3348 tmp = StringSave (gbq->val);
3349 str = tmp;
3350 len = StringLen (str);
3351 if (len > 1 && *str == '(' && str [len - 1] == ')' &&
3352 StringChr (str, ',') != NULL) {
3353 str++;
3354 while (! StringHasNoText (str)) {
3355 ptr = StringChr (str, ',');
3356 if (ptr == NULL) {
3357 ptr = StringChr (str, ')');
3358 }
3359 if (ptr != NULL) {
3360 *ptr = '\0';
3361 ptr++;
3362 }
3363 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3364 FALSE, TRUE, TILDE_IGNORE);
3365 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3366 FFAddOneChar(ffstring, '\n', FALSE);
3367 str = ptr;
3368 }
3369 } else {
3370 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3371 FALSE, TRUE, TILDE_IGNORE);
3372 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3373 FFAddOneChar(ffstring, '\n', FALSE);
3374 }
3375 MemFree (tmp);
3376 }
3377 gbq = gbq->next;
3378 }
3379 break;
3380
3381 case Qual_class_paren :
3382 gbq = qvp [idx].gbq;
3383 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3384 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3385 if (lasttype == NULL) {
3386 lasttype = gbq->qual;
3387 }
3388 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3389 if (! StringHasNoText (gbq->val)) {
3390 tmp = StringSave (gbq->val);
3391 str = tmp;
3392 len = StringLen (str);
3393 if (len > 1 && *str == '(' && str [len - 1] == ')' &&
3394 StringChr (str, ',') != NULL) {
3395 str++;
3396 while (! StringHasNoText (str)) {
3397 ptr = StringChr (str, ',');
3398 if (ptr == NULL) {
3399 ptr = StringChr (str, ')');
3400 }
3401 if (ptr != NULL) {
3402 *ptr = '\0';
3403 ptr++;
3404 }
3405 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"",
3406 FALSE, TRUE, TILDE_IGNORE);
3407 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3408 FFAddOneChar(ffstring, '\"', FALSE);
3409 FFAddOneChar(ffstring, '\n', FALSE);
3410 str = ptr;
3411 }
3412 } else {
3413 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"",
3414 FALSE, TRUE, TILDE_IGNORE);
3415 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3416 FFAddOneChar(ffstring, '\"', FALSE);
3417 FFAddOneChar(ffstring, '\n', FALSE);
3418 }
3419 MemFree (tmp);
3420 }
3421 gbq = gbq->next;
3422 }
3423 break;
3424
3425 case Qual_class_rpt :
3426 gbq = qvp [idx].gbq;
3427 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3428 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3429
3430 if (lasttype == NULL) {
3431 lasttype = gbq->qual;
3432 }
3433 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3434 if (! StringHasNoText (gbq->val)) {
3435 tmp = StringSave (gbq->val);
3436 str = tmp;
3437 len = StringLen (str);
3438 if (len > 1 && *str == '(' && str [len - 1] == ')' &&
3439 StringChr (str, ',') != NULL) {
3440 str++;
3441 while (! StringHasNoText (str)) {
3442 ptr = StringChr (str, ',');
3443 if (ptr == NULL) {
3444 ptr = StringChr (str, ')');
3445 }
3446 if (ptr != NULL) {
3447 *ptr = '\0';
3448 ptr++;
3449 }
3450 if ((! ajp->flags.checkQualSyntax) || (StringInStringList (str, validRptString))) {
3451 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3452 FALSE, TRUE, TILDE_IGNORE);
3453 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3454 FFAddOneChar(ffstring, '\n', FALSE);
3455 }
3456 str = ptr;
3457 }
3458 } else {
3459 if ((! ajp->flags.checkQualSyntax) || (StringInStringList (str, validRptString))) {
3460 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3461 FALSE, TRUE, TILDE_IGNORE);
3462 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3463 FFAddOneChar(ffstring, '\n', FALSE);
3464 }
3465 }
3466 MemFree (tmp);
3467 }
3468 gbq = gbq->next;
3469 }
3470 break;
3471
3472 case Qual_class_rpt_unit :
3473 gbq = qvp [idx].gbq;
3474 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3475 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3476 if (lasttype == NULL) {
3477 lasttype = gbq->qual;
3478 }
3479
3480 /* in release_mode, must be of the form 123..4567 or a single-token label,
3481 or (technically illegal but common) letters and semicolons - NO LONGER CHECKED */
3482
3483 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3484 if (! StringHasNoText (gbq->val)) {
3485 tmp = StringSave (gbq->val);
3486 str = tmp;
3487 len = StringLen (str);
3488 #if 0
3489 if (len > 1 && *str == '(' && str [len - 1] == ')' /* &&
3490 StringChr (str + 1, '(') == NULL /* && StringChr (str, ',') != NULL */) {
3491 str++;
3492 while (! StringHasNoText (str)) {
3493 ptr = StringChr (str, ',');
3494 if (ptr == NULL) {
3495 ptr = StringRChr (str, ')');
3496 }
3497 if (ptr != NULL) {
3498 *ptr = '\0';
3499 ptr++;
3500 }
3501 if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) {
3502 TrimSpacesAroundString (str);
3503 if (idx == FTQUAL_rpt_unit_range) {
3504 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3505 FALSE, TRUE, TILDE_IGNORE);
3506 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3507 FFAddOneChar(ffstring, '\n', FALSE);
3508 } else {
3509 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"",
3510 FALSE, TRUE, TILDE_IGNORE);
3511 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3512 FFAddOneChar(ffstring, '\"', FALSE);
3513 FFAddOneChar(ffstring, '\n', FALSE);
3514 }
3515 }
3516 str = ptr;
3517 }
3518 } else {
3519 #endif
3520 if ((! ajp->flags.checkQualSyntax) || (ValidateRptUnit (str))) {
3521 TrimSpacesAroundString (str);
3522 if (idx == FTQUAL_rpt_unit_range) {
3523 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3524 FALSE, TRUE, TILDE_IGNORE);
3525 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3526 FFAddOneChar(ffstring, '\n', FALSE);
3527 } else {
3528 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=\"",
3529 FALSE, TRUE, TILDE_IGNORE);
3530 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3531 FFAddOneChar(ffstring, '\"', FALSE);
3532 FFAddOneChar(ffstring, '\n', FALSE);
3533 }
3534 }
3535 #if 0
3536 }
3537 #endif
3538 MemFree (tmp);
3539 }
3540 gbq = gbq->next;
3541 }
3542 break;
3543
3544 case Qual_class_compare :
3545 gbq = qvp [idx].gbq;
3546 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3547 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3548
3549 if (lasttype == NULL) {
3550 lasttype = gbq->qual;
3551 }
3552 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3553 if (! StringHasNoText (gbq->val)) {
3554 tmp = StringSave (gbq->val);
3555 str = tmp;
3556 len = StringLen (str);
3557 if (len > 1 && *str == '(' && str [len - 1] == ')' &&
3558 StringChr (str, ',') != NULL) {
3559 str++;
3560 while (! StringHasNoText (str)) {
3561 ptr = StringChr (str, ',');
3562 if (ptr == NULL) {
3563 ptr = StringChr (str, ')');
3564 }
3565 if (ptr != NULL) {
3566 *ptr = '\0';
3567 ptr++;
3568 }
3569 if ((! ajp->flags.checkQualSyntax) || ValidateCompareQual (str, is_ged)) {
3570 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3571 FALSE, TRUE, TILDE_IGNORE);
3572 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3573 FFAddOneChar(ffstring, '\n', FALSE);
3574 }
3575 str = ptr;
3576 }
3577 } else {
3578 if ((! ajp->flags.checkQualSyntax) || ValidateCompareQual (str, is_ged)) {
3579 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3580 FALSE, TRUE, TILDE_IGNORE);
3581 FFAddOneString(ffstring, str, FALSE, TRUE, TILDE_TO_SPACES);
3582 FFAddOneChar(ffstring, '\n', FALSE);
3583 }
3584 }
3585 MemFree (tmp);
3586 }
3587 gbq = gbq->next;
3588 }
3589 break;
3590
3591 case Qual_class_replace :
3592 gbq = qvp [idx].gbq;
3593 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3594 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3595 if (lasttype == NULL) {
3596 lasttype = gbq->qual;
3597 }
3598 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3599 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3600 FALSE, TRUE, TILDE_IGNORE);
3601 FFAddOneChar(ffstring, '\"', FALSE);
3602 if (!StringHasNoText (gbq->val)) {
3603 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_TO_SPACES);
3604 }
3605 FFAddOneChar(ffstring, '\"', FALSE);
3606 FFAddOneChar(ffstring, '\n', FALSE);
3607 gbq = gbq->next;
3608 }
3609 break;
3610
3611 case Qual_class_consplice :
3612 gbq = qvp [idx].gbq;
3613 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3614 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3615
3616 if (ajp->flags.checkQualSyntax && (! StringInStringList (gbq->val, validConsSpliceString)) ) {
3617 break;
3618 }
3619
3620 if (lasttype == NULL) {
3621 lasttype = gbq->qual;
3622 }
3623 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3624 if (! StringHasNoText (gbq->val)) {
3625 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3626 FALSE, TRUE, TILDE_IGNORE);
3627 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_TO_SPACES);
3628 FFAddOneChar(ffstring, '\n', FALSE);
3629 }
3630 gbq = gbq->next;
3631 }
3632 break;
3633
3634 case Qual_class_site :
3635 if (! StringHasNoText (qvp [idx].str)) {
3636 str = qvp [idx].str;
3637 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3638 FALSE, TRUE, TILDE_IGNORE);
3639 FFAddTextToString(ffstring, "\"", str, "\"", FALSE, TRUE, TILDE_TO_SPACES);
3640 FFAddOneChar(ffstring, '\n', FALSE);
3641 }
3642 break;
3643
3644 case Qual_class_bond :
3645 if (! StringHasNoText (qvp [idx].str)) {
3646 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3647 FALSE, TRUE, TILDE_IGNORE);
3648 FFAddTextToString(ffstring, "\"", qvp[idx].str, "\"", FALSE, TRUE, TILDE_TO_SPACES);
3649 FFAddOneChar(ffstring, '\n', FALSE);
3650 }
3651 break;
3652
3653 case Qual_class_L_R_B :
3654 gbq = qvp [idx].gbq;
3655 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3656 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3657
3658 if (ajp->flags.checkQualSyntax && (! StringInStringList (gbq->val, validLRBString)) ) {
3659 break;
3660 }
3661
3662 if (lasttype == NULL) {
3663 lasttype = gbq->qual;
3664 }
3665 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3666 if (! StringHasNoText (gbq->val)) {
3667 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3668 FALSE, TRUE, TILDE_IGNORE);
3669 FFAddOneString(ffstring, gbq->val, FALSE, TRUE, TILDE_TO_SPACES);
3670 FFAddOneChar(ffstring, '\n', FALSE);
3671 }
3672 gbq = gbq->next;
3673 }
3674 break;
3675
3676 case Qual_class_sec_str :
3677 sec_str = qvp [idx].num;
3678 if (sec_str > 0 && sec_str <= 3) {
3679 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3680 FALSE, TRUE, TILDE_IGNORE);
3681 FFAddTextToString(ffstring, "\"", secStrText[sec_str], "\"",
3682 FALSE, FALSE, TILDE_IGNORE);
3683 FFAddOneChar(ffstring, '\n', FALSE);
3684 }
3685 break;
3686
3687 case Qual_class_seq_loc :
3688 slp = qvp [idx].slp;
3689 if (slp != NULL) {
3690 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3691 FALSE, TRUE, TILDE_IGNORE);
3692 str = FFFlatLoc (ajp, target, slp, /* ajp->masterStyle */ FALSE);
3693 FFAddTextToString(ffstring, "\"", str, "\"",
3694 FALSE, TRUE, TILDE_TO_SPACES);
3695 FFAddOneChar(ffstring, '\n', FALSE);
3696 MemFree (str);
3697 }
3698 break;
3699
3700 case Qual_class_code_break :
3701 cbp = qvp [idx].cbp;
3702 seqcode = 0;
3703 sctp = NULL;
3704 while (cbp != NULL) {
3705 cbaa = cbp->aa;
3706 switch (cbaa.choice) {
3707 case 1 :
3708 seqcode = Seq_code_ncbieaa;
3709 break;
3710 case 2 :
3711 seqcode = Seq_code_ncbi8aa;
3712 break;
3713 case 3 :
3714 seqcode = Seq_code_ncbistdaa;
3715 break;
3716 default :
3717 break;
3718 }
3719 if (seqcode != 0) {
3720 sctp = SeqCodeTableFind (seqcode);
3721 if (sctp != NULL) {
3722 slp = NULL;
3723 while ((slp = SeqLocFindNext (cbp->loc, slp)) != NULL) {
3724 str = NULL;
3725 if (ajp->ajp.slp != NULL) {
3726 sip = SeqIdParse ("lcl|dummy");
3727 split = FALSE;
3728 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
3729
3730 SeqIdFree (sip);
3731 if (newloc != NULL) {
3732 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
3733 str = FFFlatLoc (ajp, target, newloc, ajp->masterStyle);
3734 SeqLocFree (newloc);
3735 }
3736 } else {
3737 str = FFFlatLoc (ajp, target, slp, ajp->masterStyle);
3738 }
3739 if (str != NULL) {
3740 residue = cbaa.value.intvalue;
3741 ptr = Get3LetterSymbol (ajp, seqcode, sctp, residue);
3742 /* O and J no longer quarantined */
3743 /*
3744 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
3745 if (StringICmp (ptr, "Pyl") == 0 || StringICmp (ptr, "Xle") == 0) {
3746 ptr = "OTHER";
3747 }
3748 }
3749 */
3750 if (ptr == NULL) {
3751 ptr = "OTHER";
3752 }
3753 FFAddOneString(ffstring, "/transl_except=", FALSE, FALSE, TILDE_IGNORE);
3754 FFAddTextToString(ffstring, "(pos:", str, ",", FALSE, FALSE, TILDE_IGNORE);
3755 FFAddTextToString(ffstring, "aa:", ptr, ")", FALSE, FALSE, TILDE_IGNORE);
3756 FFAddOneChar(ffstring, '\n', FALSE);
3757 }
3758 MemFree (str);
3759 }
3760 }
3761 }
3762 cbp = cbp->next;
3763 }
3764 break;
3765
3766 case Qual_class_anti_codon :
3767 slp = qvp [FTQUAL_anticodon].slp;
3768 newloc = NULL;
3769 if (slp != NULL && ajp->ajp.slp != NULL) {
3770 sip = SeqIdParse ("lcl|dummy");
3771 split = FALSE;
3772 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
3773 /*
3774 newloc = SeqLocCopyRegion (sip, slp, bsp, left, right, strand, &split);
3775 */
3776 SeqIdFree (sip);
3777 slp = newloc;
3778 if (newloc != NULL) {
3779 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
3780 }
3781 }
3782 str = qvp [FTQUAL_trna_aa].str;
3783 if (slp != NULL && StringDoesHaveText (str)) {
3784 if (ajp->mode == RELEASE_MODE) { /* !!! quarantined pending collab approval !!! */
3785 if (slp->choice == SEQLOC_INT) {
3786 sintp = (SeqIntPtr) slp->data.ptrvalue;
3787 if (sintp != NULL) {
3788 sprintf(numbuf, "%ld", (long) sintp->from + 1);
3789 FFAddTextToString (ffstring, "/anticodon=(pos:", numbuf, "..",
3790 FALSE, FALSE, TILDE_IGNORE);
3791 sprintf (numbuf, "%ld", (long) sintp->to + 1);
3792 FFAddTextToString (ffstring, NULL, numbuf, ",",
3793 FALSE, FALSE, TILDE_IGNORE);
3794 FFAddTextToString (ffstring, "aa:", str, ")",
3795 FALSE, FALSE, TILDE_IGNORE);
3796 FFAddOneChar (ffstring, '\n', FALSE);
3797 }
3798 }
3799 } else {
3800 tmp = FFFlatLoc (ajp, target, slp, ajp->masterStyle);
3801 if (tmp != NULL) {
3802 FFAddTextToString (ffstring, "/anticodon=(pos:", tmp, ",",
3803 FALSE, FALSE, TILDE_IGNORE);
3804 FFAddTextToString(ffstring, "aa:", str, ")",
3805 FALSE, FALSE, TILDE_IGNORE);
3806 FFAddOneChar(ffstring, '\n', FALSE);
3807 }
3808 MemFree (tmp);
3809 }
3810 }
3811 if (newloc != NULL) {
3812 SeqLocFree (newloc);
3813 }
3814 break;
3815
3816 case Qual_class_trna_codons :
3817 trna = qvp [idx].trp;
3818 if (trna) {
3819 numcodons = ComposeCodonsRecognizedString (trna, numbuf, sizeof (numbuf));
3820 if (numcodons < 1 || StringHasNoText (numbuf)) {
3821 } else {
3822 FFAddTextToString(ffstring, "/", "codon_recognized", "=\"",
3823 FALSE, TRUE, TILDE_IGNORE);
3824 FFAddOneString(ffstring, numbuf, FALSE, TRUE, TILDE_TO_SPACES);
3825 FFAddOneChar(ffstring, '\"', FALSE);
3826 FFAddOneChar(ffstring, '\n', FALSE);
3827 }
3828 }
3829 break;
3830
3831 case Qual_class_codon :
3832 gbq = qvp [idx].gbq;
3833 if (gbq == NULL || (ajp->flags.dropIllegalQuals &&
3834 (! AllowedValQual (featdeftype, idx, ajp->flags.forGbRelease)))) break;
3835 if (lasttype == NULL) {
3836 lasttype = gbq->qual;
3837 }
3838 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
3839 if (! StringHasNoText (gbq->val)) {
3840 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
3841 FALSE, FALSE, TILDE_IGNORE);
3842 FFAddOneString(ffstring, gbq->val, FALSE, FALSE, TILDE_TO_SPACES);
3843 FFAddOneChar(ffstring, '\n', FALSE);
3844 }
3845 gbq = gbq->next;
3846 }
3847 break;
3848
3849 case Qual_class_pubset :
3850 vnp = qvp [idx].vnp;
3851 if (vnp != NULL && asp != NULL && asp->referenceArray != NULL) {
3852 citlist = NULL;
3853 pmidlist = NULL;
3854 for (ppr = vnp->data.ptrvalue; ppr != NULL; ppr = ppr->next) {
3855 j = MatchRef (ppr, asp->referenceArray, asp->numReferences);
3856 if (j > 0) {
3857 ValNodeAddInt (&citlist, 0, (Int4) j);
3858 } else if (is_other && ppr->choice == PUB_PMid && ajp->mode != RELEASE_MODE) {
3859 pmid = ppr->data.intvalue;
3860 ValNodeAddInt (&pmidlist, 0, (Int4) pmid);
3861 }
3862 }
3863 citlist = ValNodeSort (citlist, SortVnpByInt);
3864 pmidlist = ValNodeSort (pmidlist, SortVnpByInt);
3865 for (vnp = citlist; vnp != NULL; vnp = vnp->next) {
3866 j = (Int2) vnp->data.intvalue;
3867 if (j > 0) {
3868 sprintf (numbuf, "%d", (int) j);
3869 FFAddOneString(ffstring, "/citation=[", FALSE, TRUE, TILDE_TO_SPACES);
3870 pmid = 0;
3871 if (j <= asp->numReferences) {
3872 rbp = asp->referenceArray [j - 1];
3873 if (rbp != NULL) {
3874 pmid = rbp->pmid;
3875 }
3876 }
3877 if (pmid > 0 && GetWWW (ajp)) {
3878 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3879 FF_Add_NCBI_Base_URL (ffstring, link_muid);
3880 sprintf (pmidbuf, "%ld", (long) pmid);
3881 FFAddTextToString(ffstring, NULL, pmidbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
3882 FFAddOneString(ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
3883 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3884 } else {
3885 FFAddOneString(ffstring, numbuf, FALSE, FALSE, TILDE_IGNORE);
3886 }
3887 FFAddOneString(ffstring, "]", FALSE, FALSE, TILDE_IGNORE);
3888 FFAddOneChar(ffstring, '\n', FALSE);
3889 }
3890 }
3891 for (vnp = pmidlist; vnp != NULL; vnp = vnp->next) {
3892 pmid = (Int4) vnp->data.intvalue;
3893 if (pmid > 0) {
3894 sprintf (pmidbuf, "%ld", (long) pmid);
3895 FFAddOneString(ffstring, "/citation=[PUBMED ", FALSE, TRUE, TILDE_TO_SPACES);
3896 if (GetWWW (ajp)) {
3897
3898 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3899 FF_Add_NCBI_Base_URL (ffstring, link_muid);
3900 FFAddTextToString(ffstring, NULL, pmidbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
3901 FFAddOneString(ffstring, pmidbuf, FALSE, FALSE, TILDE_IGNORE);
3902 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3903 } else {
3904 FFAddOneString(ffstring, pmidbuf, FALSE, FALSE, TILDE_IGNORE);
3905 }
3906 FFAddOneString(ffstring, "]", FALSE, FALSE, TILDE_IGNORE);
3907 /*
3908 FFAddTextToString(ffstring, "/citation=[PUBMED ", pmidbuf, "]",
3909 FALSE, TRUE, TILDE_TO_SPACES);
3910 */
3911 FFAddOneChar(ffstring, '\n', FALSE);
3912 }
3913 }
3914 citlist = ValNodeFree (citlist);
3915 pmidlist = ValNodeFree (pmidlist);
3916 }
3917 break;
3918
3919 case Qual_class_db_xref :
3920 for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
3921 buf [0] = '\0';
3922 dbt = (DbtagPtr) vnp->data.ptrvalue;
3923 if (dbt != NULL && (! StringHasNoText (dbt->db))) {
3924 oip = dbt->tag;
3925 if (oip != NULL) {
3926
3927 okay = TRUE;
3928 if (ajp->flags.dropBadDbxref) {
3929 /* if RELEASE_MODE, drop unknown dbtag */
3930
3931 okay = FALSE;
3932 if (DbxrefIsValid (dbt->db, &is_rf, &is_sc, &is_bc, NULL)) {
3933 if (is_bc) {
3934 /* case counts, so suppress if bad case */
3935 } else if (is_rf) {
3936 if (is_gps || is_other) {
3937 okay = TRUE;
3938 }
3939 } else if (is_sc) {
3940 /* show, but warn in validator */
3941 okay = TRUE;
3942 } else {
3943 okay = TRUE;
3944 }
3945 }
3946
3947 /*okay = FALSE;
3948 for (j = 0; legalDbXrefs [j] != NULL; j++) {
3949 if (StringCmp (dbt->db, legalDbXrefs [j]) == 0) {
3950 okay = TRUE;
3951 }
3952 }
3953 if (! okay) {
3954 if (is_gps || is_other) {
3955 for (j = 0; legalRefSeqDbXrefs [j] != NULL; j++) {
3956 if (StringCmp (dbt->db, legalRefSeqDbXrefs [j]) == 0) {
3957 okay = TRUE;
3958 }
3959 }
3960 }
3961 }
3962 */
3963 }
3964
3965 if (StringICmp (dbt->db, "taxon") == 0 ||
3966 StringCmp (dbt->db, "PID") == 0 ||
3967 StringCmp (dbt->db, "GI") == 0) {
3968 okay = FALSE;
3969 }
3970 if (okay && idx == FTQUAL_db_xref && qvp [FTQUAL_gene_xref].vnp != NULL) {
3971 if (DbxrefAlreadyInGeneXref (dbt, qvp [FTQUAL_gene_xref].vnp)) {
3972 okay = FALSE;
3973 }
3974 }
3975
3976 if (okay) {
3977 if (! StringHasNoText (oip->str)) {
3978 if (StringLen (oip->str) < 80) {
3979 sprintf (buf, "%s", oip->str);
3980 }
3981 } else {
3982 sprintf (buf, "%ld", (long) oip->id);
3983 }
3984 }
3985 }
3986 }
3987 if (! StringHasNoText (buf)) {
3988 if (StringICmp (buf, protein_pid_g) != 0) {
3989 /* already sorted and uniqued by BasicSeqEntryCleanup, per feature */
3990 if (dbt != NULL) {
3991 if (StringICmp (dbt->db, "LocusID") == 0 || StringICmp (dbt->db, "InterimID") == 0) {
3992 if (FFStringSearch (ffstring, dbt->db, 0) >= 0) {
3993 okay = FALSE;
3994 }
3995 }
3996 }
3997 if (okay) {
3998 FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
3999 FF_www_db_xref(ajp, ffstring, dbt->db, buf, bsp);
4000 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4001 }
4002 }
4003 }
4004 }
4005 break;
4006
4007 case Qual_class_nuc_id :
4008 link_is_na = TRUE;
4009 /* fall through */
4010 case Qual_class_prt_id :
4011 sip = qvp [idx].sip;
4012 if (sip != NULL) {
4013 /* should always be found above for protein_id or transcript_id
4014 prod = BioseqFind (sip);
4015 */
4016 if (prod != NULL) {
4017 gi = 0;
4018 for (sip = prod->id; sip != NULL; sip = sip->next) {
4019 if (sip->choice == SEQID_GI) {
4020 gi = sip->data.intvalue;
4021 }
4022 }
4023 choice = 0;
4024 for (sip = prod->id; sip != NULL; sip = sip->next) {
4025 if (sip->choice == SEQID_GENBANK ||
4026 sip->choice == SEQID_EMBL ||
4027 sip->choice == SEQID_DDBJ ||
4028 sip->choice == SEQID_OTHER ||
4029 sip->choice == SEQID_TPG ||
4030 sip->choice == SEQID_TPE ||
4031 sip->choice == SEQID_TPD ||
4032 sip->choice == SEQID_GPIPE) {
4033 choice = sip->choice;
4034 if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
4035 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
4036 FALSE, FALSE, TILDE_IGNORE);
4037 FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
4038 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4039 }
4040 } else if (sip->choice == SEQID_GI) {
4041 if (choice == 0) {
4042 sprintf (seqid, "%ld", (long) sip->data.intvalue);
4043 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
4044 FALSE, FALSE, TILDE_IGNORE);
4045 FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
4046 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4047 }
4048 sprintf (seqid, "%ld", (long) sip->data.intvalue);
4049 FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
4050 FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
4051 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4052 } else if (sip->choice == SEQID_GENERAL) {
4053 dbt = (DbtagPtr) sip->data.ptrvalue;
4054 if (dbt != NULL && StringCmp (dbt->db, "PID") == 0) {
4055 /*
4056 oip = dbt->tag;
4057 if (oip != NULL) {
4058 if (! StringHasNoText (oip->str)) {
4059 sprintf (seqid, "PID:%s", oip->str);
4060 NewContLine ();
4061 gb_AddString ("/db_xref=\"", seqid, "\"", FALSE, TRUE, TILDE_TO_SPACES);
4062 }
4063 }
4064 */
4065 } else if (dbt != NULL) {
4066 pbsp = BioseqFind (sip);
4067 if (pbsp != NULL && pbsp->id != NULL && /* pbsp->id->next == NULL && */ OnlyOneRealGeneral (pbsp->id)) {
4068 dbt = (DbtagPtr) sip->data.ptrvalue;
4069 if (dbt != NULL &&
4070 !IsSkippableDbtag(dbt)) {
4071 if (SeqIdWrite (sip, seqid, PRINTID_REPORT, sizeof (seqid)) != NULL) {
4072 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
4073 FALSE, FALSE, TILDE_IGNORE);
4074 FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
4075 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4076 }
4077 }
4078 }
4079 }
4080 }
4081 }
4082 } else {
4083 if (sip->choice == SEQID_GI) {
4084 gi = sip->data.intvalue;
4085 if (GetAccnVerFromServer (gi, seqid)) {
4086 #ifdef OS_UNIX
4087 if (getenv ("ASN2GB_PSF_DEBUG") != NULL) {
4088 printf ("GetAccnVerFromServer returned %s\n", seqid);
4089 }
4090 #endif
4091 if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
4092 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
4093 FALSE, FALSE, TILDE_IGNORE);
4094 FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
4095 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4096 } else {
4097 ajp->relModeError = TRUE;
4098 }
4099 } else {
4100 sip = GetSeqIdForGI (gi);
4101 if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
4102 #ifdef OS_UNIX
4103 if (getenv ("ASN2GB_PSF_DEBUG") != NULL) {
4104 printf ("GetSeqIdForGI returned %s\n", seqid);
4105 }
4106 #endif
4107 if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
4108 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
4109 FALSE, FALSE, TILDE_IGNORE);
4110 FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
4111 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4112 } else {
4113 ajp->relModeError = TRUE;
4114 }
4115 } else if (! ajp->flags.dropIllegalQuals) {
4116 sprintf (seqid, "%ld", (long) gi);
4117 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
4118 FALSE, FALSE, TILDE_IGNORE);
4119 FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
4120 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4121 } else {
4122 ajp->relModeError = TRUE;
4123 }
4124 }
4125
4126 sprintf (seqid, "%ld", (long) gi);
4127 FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
4128 FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
4129 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4130 } else if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
4131 gi = GetGIForSeqId (sip);
4132 if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
4133 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=\"",
4134 FALSE, FALSE, TILDE_IGNORE);
4135 FF_www_nuc_or_prot_id (ajp, ffstring, seqid, gi, link_is_na);
4136 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4137 } else {
4138 ajp->relModeError = TRUE;
4139 }
4140
4141 if (gi > 0) {
4142 sprintf (seqid, "%ld", (long) gi);
4143 FFAddOneString(ffstring, "/db_xref=\"", FALSE, FALSE, TILDE_IGNORE);
4144 FF_www_db_xref(ajp, ffstring, "GI", seqid, bsp);
4145 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4146 }
4147 }
4148 }
4149 }
4150 break;
4151
4152 case Qual_class_mol_wt :
4153 if (qvp [idx].ble) {
4154 if (ifp != NULL && ifp->isPrt) {
4155 ipp = (IntPrtBlockPtr) ifp;
4156 molwt = MolWtForProtFeat (bsp, sfp, ipp);
4157 if (molwt > 0.01) {
4158 sprintf (buf, "%ld", (long) (molwt + 0.5));
4159 TrimSpacesAroundString (buf);
4160 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[idx].name, "=",
4161 FALSE, FALSE, TILDE_IGNORE);
4162 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
4163 FFAddOneChar(ffstring, '\n', FALSE);
4164 }
4165 }
4166 }
4167 break;
4168
4169 case Qual_class_translation :
4170 if (qvp [idx].ble && (! ajp->hideTranslation)) {
4171 if ((prod == NULL && ajp->transIfNoProd) || ajp->alwaysTranslCds) {
4172 bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE);
4173 if (bs != NULL) {
4174 str = BSMerge (bs, NULL);
4175 bs = BSFree (bs);
4176 if (str != NULL) {
4177 ptr = str;
4178 ch = *ptr;
4179 while (ch != '\0') {
4180 *ptr = TO_UPPER (ch);
4181 ptr++;
4182 ch = *ptr;
4183 }
4184 prtlen = StringLen (str);
4185 if (prtlen > 1) {
4186 if (str [prtlen - 1] == '*') {
4187 str [prtlen - 1] = '\0';
4188 }
4189 }
4190 if (! StringHasNoText (str)) {
4191 /*
4192 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
4193 ChangeOandJtoX (str);
4194 }
4195 */
4196 FFAddTextToString(ffstring, "/translation=\"", str, "\"",
4197 FALSE, TRUE, TILDE_TO_SPACES);
4198 FFAddOneChar(ffstring, '\n', FALSE);
4199 }
4200 MemFree (str);
4201 }
4202 } else {
4203 ajp->relModeError = TRUE;
4204 }
4205 } else if (prod != NULL) {
4206 len = SeqLocLen (sfp->product);
4207 if (len > 0) {
4208 if (SeqLocStart (location) == 0 || (bsp != NULL && SeqLocStop (location) == bsp->length - 1)) {
4209 at_end = TRUE;
4210 }
4211 str = (CharPtr) MemNew ((size_t) (len + 1) * sizeof (Char));
4212 protein_seq = str;
4213 /*
4214 if (ajp->flags.iupacaaOnly) {
4215 code = Seq_code_iupacaa;
4216 } else {
4217 code = Seq_code_ncbieaa;
4218 }
4219 */
4220 SeqPortStreamLoc (sfp->product, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) &protein_seq, SaveGBSeqTranslation);
4221 if (! StringHasNoText (str)) {
4222 /*
4223 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
4224 ChangeOandJtoX (str);
4225 }
4226 */
4227 FFAddTextToString(ffstring, "/translation=\"", str, "\"",
4228 FALSE, TRUE, TILDE_TO_SPACES);
4229 FFAddOneChar(ffstring, '\n', FALSE);
4230 }
4231 MemFree (str);
4232 } else {
4233 ajp->relModeError = TRUE;
4234 }
4235 }
4236 }
4237 break;
4238
4239 case Qual_class_transcription :
4240 if (qvp [idx].ble && ajp->showTranscript) {
4241 if ((prod == NULL && ajp->transIfNoProd) || ajp->alwaysTranslCds) {
4242 str = GetSequenceByFeature (sfp);
4243 if (str != NULL) {
4244 ptr = str;
4245 ch = *ptr;
4246 while (ch != '\0') {
4247 *ptr = TO_UPPER (ch);
4248 ptr++;
4249 ch = *ptr;
4250 }
4251 if (! StringHasNoText (str)) {
4252 FFAddTextToString(ffstring, "/transcription=\"", str, "\"",
4253 FALSE, TRUE, TILDE_TO_SPACES);
4254 FFAddOneChar(ffstring, '\n', FALSE);
4255 }
4256 MemFree (str);
4257 }
4258 } else if (prod != NULL) {
4259 len = SeqLocLen (sfp->product);
4260 if (len > 0) {
4261 str = (CharPtr) MemNew ((size_t) (len + 2) * sizeof (Char));
4262 SeqPortStreamLoc (sfp->product, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) str, NULL);
4263 if (! StringHasNoText (str)) {
4264 FFAddTextToString(ffstring, "/transcription=\"", str, "\"",
4265 FALSE, TRUE, TILDE_TO_SPACES);
4266 FFAddOneChar(ffstring, '\n', FALSE);
4267 }
4268 MemFree (str);
4269 }
4270 } else {
4271 str = GetSequenceByFeature (sfp);
4272 if (str != NULL) {
4273 ptr = str;
4274 ch = *ptr;
4275 while (ch != '\0') {
4276 *ptr = TO_UPPER (ch);
4277 ptr++;
4278 ch = *ptr;
4279 }
4280 if (! StringHasNoText (str)) {
4281 FFAddTextToString(ffstring, "/transcription=\"", str, "\"",
4282 FALSE, TRUE, TILDE_TO_SPACES);
4283 FFAddOneChar(ffstring, '\n', FALSE);
4284 }
4285 MemFree (str);
4286 }
4287 }
4288 }
4289 break;
4290
4291 case Qual_class_peptide :
4292 if (qvp [idx].ble) {
4293 if (ajp->showPeptide) {
4294 str = GetSequenceByFeature (sfp);
4295 if (str != NULL) {
4296 ptr = str;
4297 ch = *ptr;
4298 while (ch != '\0') {
4299 *ptr = TO_UPPER (ch);
4300 ptr++;
4301 ch = *ptr;
4302 }
4303 if (! StringHasNoText (str)) {
4304 FFAddTextToString(ffstring, "/peptide=\"", str, "\"",
4305 FALSE, TRUE, TILDE_TO_SPACES);
4306 FFAddOneChar(ffstring, '\n', FALSE);
4307 }
4308 MemFree (str);
4309 }
4310 }
4311 }
4312 break;
4313
4314 case Qual_class_tag_peptide :
4315 if (! StringHasNoText (qvp [idx].str)) {
4316 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals [idx].name, "=",
4317 FALSE, TRUE, TILDE_TO_SPACES);
4318 FFAddTextToString(ffstring, NULL, qvp [idx].str, NULL,
4319 FALSE, TRUE, TILDE_TO_SPACES);
4320 FFAddOneChar(ffstring, '\n', FALSE);
4321 }
4322 break;
4323
4324 case Qual_class_illegal :
4325 for (vnp = qvp [idx].vnp; vnp != NULL; vnp = vnp->next) {
4326 str = (CharPtr) vnp->data.ptrvalue;
4327 if (str != NULL) {
4328 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_TO_SPACES);
4329 FFAddNewLine(ffstring);
4330 }
4331 }
4332 break;
4333
4334 case Qual_class_note :
4335 if (! ajp->flags.goQualsToNote) {
4336
4337 /* in GenBank sequin_mode and dump_mode, and in RefSeq, GO terms show up as separate /qualifiers */
4338
4339 for (j = 0, jdx = feat_note_order [j]; jdx != 0; j++, jdx = feat_note_order [j]) {
4340
4341 link_is_na = FALSE;
4342
4343 switch (asn2gnbk_featur_quals [jdx].qualclass) {
4344
4345 case Qual_class_go :
4346 if (qvp [jdx].ufp != NULL) {
4347 if (ajp->mode == ENTREZ_MODE) {
4348 str = GetCombinedGOtext (qvp [jdx].ufp, ajp);
4349 if (StringDoesHaveText (str)) {
4350 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[jdx].name, "=",
4351 FALSE, TRUE, TILDE_IGNORE);
4352 FFAddTextToString(ffstring, "\"", str, "\"",
4353 FALSE, FALSE, TILDE_IGNORE);
4354 FFAddOneChar(ffstring, '\n', FALSE);
4355 }
4356 MemFree (str);
4357 } else {
4358 for (entry = qvp [jdx].ufp; entry != NULL; entry = entry->next) {
4359 if (entry == NULL || entry->choice != 11) break;
4360 ufp = (UserFieldPtr) entry->data.ptrvalue;
4361 str = GetGOtext (ufp, ajp, (Boolean) (ajp->mode == ENTREZ_MODE));
4362 if (! StringHasNoText (str)) {
4363 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[jdx].name, "=",
4364 FALSE, TRUE, TILDE_IGNORE);
4365 FFAddTextToString(ffstring, "\"", str, "\"",
4366 FALSE, FALSE, TILDE_IGNORE);
4367 FFAddOneChar(ffstring, '\n', FALSE);
4368 }
4369 MemFree (str);
4370 }
4371 }
4372 }
4373 break;
4374
4375 default :
4376 break;
4377 }
4378 }
4379 }
4380
4381 if (! ajp->flags.refSeqQualsToNote) {
4382
4383 /* in entrez_mode, sequin_mode and dump_mode in RefSeq, RefSeq-specific qualifiers show up as separate /qualifiers */
4384
4385 for (j = 0, jdx = feat_note_order [j]; jdx != 0; j++, jdx = feat_note_order [j]) {
4386 switch (asn2gnbk_featur_quals [jdx].qualclass) {
4387
4388 case Qual_class_nomenclature :
4389 uop = qvp [jdx].uop;
4390 if (uop != NULL) {
4391 str = NULL;
4392 VisitUserObjectsInUop (sfp->ext, (Pointer) &str, GetNomenclatureText);
4393 if (! StringHasNoText (str)) {
4394 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[jdx].name, "=",
4395 FALSE, TRUE, TILDE_IGNORE);
4396 FFAddTextToString(ffstring, "\"", str, "\"",
4397 FALSE, FALSE, TILDE_IGNORE);
4398 FFAddOneChar(ffstring, '\n', FALSE);
4399 prefix = "; ";
4400 add_period = FALSE;
4401 }
4402 MemFree (str);
4403 }
4404 break;
4405
4406 case Qual_class_gene_nomen :
4407 gnp = qvp [jdx].gnp;
4408 if (gnp != NULL) {
4409 str = GetNomenclature (gnp);
4410 if (! StringHasNoText (str)) {
4411 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[jdx].name, "=",
4412 FALSE, TRUE, TILDE_IGNORE);
4413 FFAddTextToString(ffstring, "\"", str, "\"",
4414 FALSE, FALSE, TILDE_IGNORE);
4415 FFAddOneChar(ffstring, '\n', FALSE);
4416 prefix = "; ";
4417 add_period = FALSE;
4418 }
4419 MemFree (str);
4420 }
4421 break;
4422
4423 default :
4424 break;
4425 }
4426 }
4427 }
4428
4429 /*head = NULL;*/
4430 notestr = NULL;
4431 prefix = NULL;
4432 add_period = FALSE;
4433 suppress_period = FALSE;
4434 lasttype = NULL;
4435 isTRNA = FALSE;
4436
4437
4438 #ifdef DISPLAY_STRINGS
4439 s_DisplayQVP (qvp, feat_note_order);
4440 #endif
4441 for (j = 0, jdx = feat_note_order [j]; jdx != 0; j++, jdx = feat_note_order [j]) {
4442 switch (asn2gnbk_featur_quals [jdx].qualclass) {
4443
4444 case Qual_class_string :
4445 if (! StringHasNoText (qvp [jdx].str)) {
4446 if (jdx == FTQUAL_figure) {
4447 if (!IsEllipsis (qvp [jdx].str))
4448 s_RemovePeriodFromEnd (qvp [jdx].str);
4449 sprintf (buf, "This sequence comes from %s", qvp [jdx].str);
4450 FFAddString_NoRedund (unique, prefix, buf, NULL, TRUE);
4451 add_period = FALSE;
4452 } else if (jdx == FTQUAL_maploc) {
4453 if (!IsEllipsis (qvp [jdx].str))
4454 s_RemovePeriodFromEnd (qvp [jdx].str);
4455 sprintf (buf, "Map location %s", qvp [jdx].str);
4456 FFAddString_NoRedund (unique, prefix, buf, NULL, TRUE);
4457 add_period = FALSE;
4458 } else if (jdx == FTQUAL_seqannot_note) {
4459 str = StringSave (qvp [jdx].str);
4460 TrimSpacesAndJunkFromEnds (str, TRUE);
4461 if (! IsEllipsis (str))
4462 add_period = s_RemovePeriodFromEnd (str);
4463 /* NOTE -- The following function call cleans up some strings
4464 (i.e., U34661 & U31565) but should be commented back
4465 in only if the problem can't be fixed upstream of here
4466
4467 s_StringCleanup(str);
4468
4469 */
4470 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4471 MemFree (str);
4472 if (hadProtDesc) {
4473 suppress_period = TRUE;
4474 }
4475 } else if (jdx == FTQUAL_seqfeat_note) {
4476 str = StringSave (qvp [jdx].str);
4477 if (indexerVersion) {
4478 TrimSpacesAroundString (str);
4479 } else {
4480 TrimSpacesAndJunkFromEnds (str, TRUE);
4481 }
4482 if (! IsEllipsis (str))
4483 add_period = s_RemovePeriodFromEnd (str);
4484 /* NOTE -- The following function call cleans up some strings
4485 (i.e., U34661 & U31565) but should be commented back
4486 in only if the problem can't be fixed upstream of here
4487
4488 s_StringCleanup(str);
4489
4490 */
4491 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4492 MemFree (str);
4493 if (hadProtDesc) {
4494 suppress_period = TRUE;
4495 }
4496 } else if (jdx == FTQUAL_prot_note) {
4497 str = StringSave (qvp [jdx].str);
4498 TrimSpacesAndJunkFromEnds (str, TRUE);
4499 if (! IsEllipsis (str))
4500 s_RemovePeriodFromEnd (str);
4501 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4502 MemFree (str);
4503 add_period = FALSE;
4504 } else if (jdx == FTQUAL_prot_desc) {
4505 str = StringSave (qvp [jdx].str);
4506 TrimSpacesAndJunkFromEnds (str, TRUE);
4507 if (! IsEllipsis (str))
4508 add_period = s_RemovePeriodFromEnd (str);
4509 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4510 MemFree (str);
4511 hadProtDesc = TRUE; /* gi|347886|gb|M96268.1|ECOUBIA */
4512 } else {
4513 if (! IsEllipsis (qvp [jdx].str)) {
4514 s_RemovePeriodFromEnd (qvp [jdx].str);
4515 }
4516 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, NULL, TRUE);
4517 add_period = FALSE;
4518 }
4519 prefix = "; ";
4520 }
4521 break;
4522
4523 case Qual_class_exception :
4524 if (! StringHasNoText (qvp [jdx].str)) {
4525 if (! IsEllipsis (qvp [jdx].str)) {
4526 s_RemovePeriodFromEnd (qvp [jdx].str);
4527 }
4528 if (StringCmp (prefix, "; ") == 0) {
4529 prefix = "~";
4530 }
4531 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, NULL, TRUE);
4532 add_period = FALSE;
4533 prefix = "; ";
4534 }
4535 break;
4536
4537 case Qual_class_encodes :
4538 if (! StringHasNoText (qvp [jdx].str)) {
4539 if (! IsEllipsis (qvp [jdx].str)) {
4540 s_RemovePeriodFromEnd (qvp [jdx].str);
4541 }
4542 FFAddTextToString (unique, prefix, "encodes ", NULL, FALSE, FALSE, TILDE_IGNORE);
4543 FFAddString_NoRedund (unique, NULL, qvp [jdx].str, NULL, TRUE);
4544 prefix = "; ";
4545 add_period = FALSE;
4546 }
4547 break;
4548
4549 case Qual_class_locus_tag :
4550 if (! StringHasNoText (qvp [jdx].str)) {
4551 if (! IsEllipsis (qvp [jdx].str)) {
4552 s_RemovePeriodFromEnd (qvp [jdx].str);
4553 }
4554 FFAddTextToString (unique, prefix, "locus_tag: ", NULL, FALSE, FALSE, TILDE_IGNORE);
4555 FFAddString_NoRedund (unique, NULL, qvp [jdx].str, NULL, TRUE);
4556 prefix = "; ";
4557 add_period = FALSE;
4558 }
4559 break;
4560
4561 case Qual_class_go :
4562 if (ajp->flags.goQualsToNote && qvp [jdx].ufp != NULL) {
4563 for (entry = qvp [jdx].ufp; entry != NULL; entry = entry->next) {
4564 if (entry == NULL || entry->choice != 11) break;
4565 ufp = (UserFieldPtr) entry->data.ptrvalue;
4566 str = GetGOtext (ufp, ajp, (Boolean) (ajp->mode == ENTREZ_MODE));
4567 if (! StringHasNoText (str)) {
4568 if (StringCmp (prefix, "; ") == 0) {
4569 prefix = ";\n";
4570 }
4571 FFAddTextToString (unique, prefix, asn2gnbk_featur_quals[jdx].name, ": ", FALSE, FALSE, TILDE_IGNORE);
4572 FFAddTextToString(unique, NULL, str, NULL, FALSE, FALSE, TILDE_IGNORE);
4573 }
4574 MemFree (str);
4575 prefix = "; ";
4576 add_period = FALSE;
4577 }
4578 }
4579 break;
4580
4581 case Qual_class_nomenclature :
4582 if (ajp->flags.refSeqQualsToNote) {
4583 uop = qvp [jdx].uop;
4584 if (uop != NULL) {
4585 str = NULL;
4586 VisitUserObjectsInUop (sfp->ext, (Pointer) &str, GetNomenclatureText);
4587 if (! StringHasNoText (str)) {
4588 if (StringCmp (prefix, "; ") == 0) {
4589 prefix = ";\n";
4590 }
4591 FFAddTextToString (unique, prefix, asn2gnbk_featur_quals[jdx].name, ": ", FALSE, FALSE, TILDE_IGNORE);
4592 FFAddTextToString(unique, NULL, str, NULL, FALSE, TRUE, TILDE_IGNORE);
4593 prefix = "; ";
4594 add_period = FALSE;
4595 }
4596 MemFree (str);
4597 prefix = "; ";
4598 add_period = FALSE;
4599 }
4600 }
4601 break;
4602
4603 case Qual_class_gene_nomen :
4604 if (ajp->flags.refSeqQualsToNote) {
4605 gnp = qvp [jdx].gnp;
4606 if (gnp != NULL) {
4607 str = GetNomenclature (gnp);
4608 if (! StringHasNoText (str)) {
4609 FFAddTextToString(ffstring, "/", asn2gnbk_featur_quals[jdx].name, "=",
4610 FALSE, TRUE, TILDE_IGNORE);
4611 FFAddTextToString(ffstring, "\"", str, "\"",
4612 FALSE, FALSE, TILDE_IGNORE);
4613 FFAddOneChar(ffstring, '\n', FALSE);
4614 prefix = "; ";
4615 add_period = FALSE;
4616 }
4617 MemFree (str);
4618 }
4619 }
4620 break;
4621
4622 case Qual_class_method :
4623 if (! StringHasNoText (qvp [jdx].str)) {
4624 if ( FFEmpty(unique) ) {
4625 prefix = "Method: ";
4626 } else {
4627 prefix = "; Method: ";
4628 }
4629 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, NULL, TRUE);
4630 prefix = "; ";
4631 add_period = FALSE;
4632 }
4633 break;
4634
4635 case Qual_class_valnode :
4636 for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
4637 str = (CharPtr) vnp->data.ptrvalue;
4638 if (! StringHasNoText (str)) {
4639 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4640 prefix = "; ";
4641 add_period = FALSE;
4642 }
4643 }
4644 break;
4645
4646 /*
4647 case Qual_class_gene_syn :
4648 numsyns = 0;
4649 for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
4650 str = (CharPtr) vnp->data.ptrvalue;
4651 if (! StringHasNoText (str)) {
4652 numsyns++;
4653 }
4654 }
4655 if (numsyns > 0) {
4656 if (numsyns > 1) {
4657 FFAddTextToString (unique, prefix, "synonyms: ", NULL, FALSE, FALSE, TILDE_IGNORE);
4658 } else {
4659 FFAddTextToString (unique, prefix, "synonym: ", NULL, FALSE, FALSE, TILDE_IGNORE);
4660 }
4661 prefix = NULL;
4662 for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
4663 str = (CharPtr) vnp->data.ptrvalue;
4664 if (! StringHasNoText (str)) {
4665 FFAddTextToString (unique, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
4666 prefix = ", ";
4667 }
4668 }
4669 prefix = "; ";
4670 add_period = FALSE;
4671 }
4672 break;
4673 */
4674
4675 case Qual_class_region :
4676 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
4677 FFAddTextToString(unique, prefix, qvp [jdx].str, NULL, FALSE, TRUE, TILDE_IGNORE);
4678 #else
4679 region = NULL;
4680 if (! StringHasNoText (qvp [jdx].str)) {
4681 if ( FFEmpty(unique) ) {
4682 prefix = "Region: ";
4683 } else {
4684 prefix = "; Region: ";
4685 }
4686 region = MemNew(StringLen(prefix) + StringLen(qvp [jdx].str) + 1);
4687 if ( region != NULL ) {
4688 sprintf(region, "%s%s", prefix, (qvp [jdx].str));
4689 FFAddString_NoRedund(unique, NULL, region, NULL, TRUE);
4690 region = MemFree(region);
4691 } else {
4692 FFAddTextToString(unique, prefix, qvp [jdx].str, NULL, FALSE, TRUE, TILDE_IGNORE);
4693 }
4694 prefix = "; ";
4695 add_period = FALSE;
4696 }
4697 #endif
4698 break;
4699
4700 case Qual_class_site :
4701 if (! StringHasNoText (qvp [jdx].str)) {
4702 str = qvp [jdx].str;
4703 if (StringCmp (str, "signal peptide") == 0 ||
4704 StringCmp (str, "transit peptide") == 0 ||
4705 StringCmp (str, "transmembrane region") == 0) {
4706 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4707 } else {
4708 FFAddString_NoRedund (unique, prefix, str, " site", TRUE);
4709 }
4710 add_period = FALSE;
4711 prefix = "\n";
4712 }
4713 break;
4714
4715 case Qual_class_bond :
4716 if (! StringHasNoText (qvp [jdx].str)) {
4717 FFAddString_NoRedund (unique, prefix, qvp [jdx].str, " bond", TRUE);
4718 add_period = FALSE;
4719 prefix = "\n";
4720 }
4721 break;
4722
4723 case Qual_class_protnames :
4724 /* process gene sgml for check against subsequent protein names */
4725 start = NULL;
4726 if (! StringHasNoText (qvp [FTQUAL_gene].str)) {
4727 /*
4728 if (is_journalscan) {
4729 ascii_len = Sgml2AsciiLen (qvp [FTQUAL_gene].str);
4730 start = ascii = MemNew ((size_t) (10 + ascii_len));
4731 if (start != NULL) {
4732 ascii = Sgml2Ascii (qvp [FTQUAL_gene].str, ascii, ascii_len + 1);
4733 }
4734 } else {
4735 start = StringSaveNoNull (qvp [FTQUAL_gene].str);
4736 }
4737 */
4738 start = StringSaveNoNull (qvp [FTQUAL_gene].str);
4739 }
4740 for (vnp = qvp [jdx].vnp; vnp != NULL; vnp = vnp->next) {
4741 str = (CharPtr) vnp->data.ptrvalue;
4742 if (! StringHasNoText (str)) {
4743 /* case sensitive - gi|4973426|gb|AF148501.1|AF148501 */
4744 /* check with and without sgml conversion */
4745 if (StringCmp (start, str) != 0 &&
4746 StringCmp (qvp [FTQUAL_gene].str, str) != 0) {
4747 if (! StringStr (qvp [FTQUAL_prot_desc].str, str)) {
4748 /* if (NotInGeneSyn (str, gene_syn)) { */
4749 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4750 prefix = "; ";
4751 add_period = FALSE;
4752 /* } */
4753 }
4754 }
4755 }
4756 }
4757 MemFree (start);
4758 break;
4759
4760 case Qual_class_xtraprds :
4761 gbq = qvp [jdx].gbq;
4762 if (lasttype == NULL && gbq != NULL) {
4763 lasttype = gbq->qual;
4764 }
4765 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
4766 if (! StringHasNoText (gbq->val)) {
4767 if (StringCmp(gbq->val,qvp[FTQUAL_gene].str) != 0 &&
4768 StringCmp(gbq->val,qvp[FTQUAL_product].str) != 0) {
4769 if (!isTRNA || !StringStr (gbq->val, "RNA")) {
4770 FFAddString_NoRedund (unique, prefix, gbq->val, NULL, TRUE);
4771 prefix = "; ";
4772 add_period = FALSE;
4773 }
4774 }
4775 }
4776 gbq = gbq->next;
4777 }
4778 break;
4779
4780 case Qual_class_its :
4781 str = qvp [jdx].str;
4782 if (! StringHasNoText (str)) {
4783 if (sfp->comment == NULL || StringStr (sfp->comment, str) == NULL) {
4784 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4785 prefix = "; ";
4786 add_period = FALSE;
4787 }
4788 }
4789 break;
4790
4791 case Qual_class_trna_codons :
4792 trna = qvp [jdx].trp;
4793 if (trna) {
4794 numcodons = ComposeCodonsRecognizedString (trna, numbuf, sizeof (numbuf));
4795 if (numcodons < 1 || StringHasNoText (numbuf)) {
4796 } else if (numcodons == 1) {
4797 isTRNA = TRUE;
4798 sprintf (buf, "codon recognized: %s", numbuf);
4799 if (StringStr (qvp [FTQUAL_seqfeat_note].str, buf) == NULL) {
4800 FFAddString_NoRedund (unique, prefix, "codon recognized: ", numbuf, TRUE);
4801 prefix = "; ";
4802 }
4803 } else {
4804 isTRNA = TRUE;
4805 FFAddString_NoRedund (unique, prefix, "codons recognized: ", numbuf, TRUE);
4806 prefix = "; ";
4807 add_period = FALSE;
4808 }
4809 }
4810 break;
4811
4812 case Qual_class_model_ev :
4813 uop = qvp [jdx].uop;
4814 if (uop != NULL) {
4815 str = NULL;
4816 VisitUserObjectsInUop (sfp->ext, (Pointer) &str, GetStrFormRNAEvidence);
4817 if (! StringHasNoText (str)) {
4818 FFAddString_NoRedund (unique, prefix, str, NULL, TRUE);
4819 prefix = "; ";
4820 add_period = FALSE;
4821 }
4822 }
4823 break;
4824
4825 case Qual_class_nuc_id :
4826 link_is_na = TRUE;
4827 /* fall through */
4828 case Qual_class_prt_id :
4829 sip = qvp [jdx].sip;
4830 if (sip != NULL) {
4831 /* should always be found above for protein_id or transcript_id
4832 prod = BioseqFind (sip);
4833 */
4834 if (prod != NULL) {
4835 choice = 0;
4836 for (sip = prod->id; sip != NULL; sip = sip->next) {
4837 if (sip->choice == SEQID_GENBANK ||
4838 sip->choice == SEQID_EMBL ||
4839 sip->choice == SEQID_DDBJ ||
4840 sip->choice == SEQID_OTHER ||
4841 sip->choice == SEQID_TPG ||
4842 sip->choice == SEQID_TPE ||
4843 sip->choice == SEQID_TPD ||
4844 sip->choice == SEQID_GPIPE) {
4845 choice = sip->choice;
4846 if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
4847 FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
4848 FALSE, TRUE, TILDE_IGNORE);
4849 prefix = "; ";
4850 }
4851 } else if (sip->choice == SEQID_GI) {
4852 if (choice == 0) {
4853 sprintf (seqid, "%ld", (long) sip->data.intvalue);
4854 FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
4855 FALSE, TRUE, TILDE_IGNORE);
4856 prefix = "; ";
4857 }
4858 }
4859 }
4860 } else {
4861 if (sip->choice == SEQID_GI) {
4862 gi = sip->data.intvalue;
4863 if (GetAccnVerFromServer (gi, seqid)) {
4864 if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
4865 FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
4866 FALSE, TRUE, TILDE_IGNORE);
4867 prefix = "; ";
4868 }
4869 } else {
4870 sip = GetSeqIdForGI (gi);
4871 if (sip != NULL && SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
4872 if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
4873 FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
4874 FALSE, TRUE, TILDE_IGNORE);
4875 prefix = "; ";
4876 }
4877 } else if (! ajp->flags.dropIllegalQuals) {
4878 sprintf (seqid, "%ld", (long) gi);
4879 FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
4880 FALSE, TRUE, TILDE_IGNORE);
4881 prefix = "; ";
4882 }
4883 }
4884 } else if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, sizeof (seqid)) != NULL) {
4885 if ((! ajp->flags.dropIllegalQuals) || ValidateAccn (seqid) == 0) {
4886 FFAddTextToString(unique, prefix, "transcript found in: ", seqid,
4887 FALSE, TRUE, TILDE_IGNORE);
4888 prefix = "; ";
4889 }
4890 }
4891 }
4892 add_period = FALSE;
4893 }
4894 break;
4895 default :
4896 break;
4897 }
4898 }
4899
4900 if ( !FFEmpty(unique) ) {
4901 notestr = FFToCharPtr(unique);
4902 TrimSpacesAroundString (notestr);
4903 if (add_period) {
4904 if (! suppress_period) {
4905 s_AddPeriodToEnd (notestr);
4906 }
4907 }
4908
4909 #ifdef ASN2GNBK_STRIP_NOTE_PERIODS
4910 if (! IsEllipsis (notestr))
4911 s_RemovePeriodFromEnd (notestr);
4912 #endif
4913
4914 FFAddOneString(ffstring, "/note=\"", FALSE, FALSE, TILDE_IGNORE);
4915 FFAddOneString(ffstring, notestr, FALSE, FALSE, TILDE_SEMICOLON);
4916 FFAddOneString(ffstring, "\"\n", FALSE, FALSE, TILDE_IGNORE);
4917
4918 MemFree (notestr);
4919 /*ValNodeFreeData (head);*/
4920 }
4921 break;
4922
4923 default:
4924 break;
4925
4926 }
4927 }
4928 FFRecycleString(ajp, unique);
4929 }
4930
4931
4932 static void FF_asn2gb_www_featkey (
4933 StringItemPtr ffstring,
4934 CharPtr key,
4935 SeqFeatPtr sfp,
4936 Int4 from,
4937 Int4 to,
4938 Uint1 strand,
4939 Uint4 itemID
4940 )
4941
4942 {
4943 BioseqPtr bsp;
4944 Char buf [16];
4945 Int4 featID = 0;
4946 Int4 ffrom = 0;
4947 Int4 fto = 0;
4948 Int4 gi = 0;
4949 Char gi_buf[16];
4950 Boolean is_aa = FALSE;
4951 ObjectIdPtr oip;
4952 CharPtr prefix = "?";
4953 SeqIntPtr sintp;
4954 SeqIdPtr sip;
4955 SeqLocPtr slp;
4956
4957 if (sfp == NULL) return;
4958 slp = sfp->location;
4959 bsp = BioseqFindFromSeqLoc (slp);
4960 if (bsp != NULL) {
4961 is_aa = ISA_aa (bsp->mol);
4962 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4963 if (sip->choice == SEQID_GI) {
4964 gi = (Int4) sip->data.intvalue;
4965 }
4966 }
4967 } else {
4968 if (sfp->id.choice == 3) {
4969 oip = (ObjectIdPtr) sfp->id.value.ptrvalue;
4970 if (oip != NULL && oip->str == NULL) {
4971 featID = oip->id;
4972 }
4973 }
4974 sip = SeqLocId (slp);
4975 if (sip != NULL && sip->choice == SEQID_GI) {
4976 gi = (Int4) sip->data.intvalue;
4977 }
4978 }
4979 if (slp->choice == SEQLOC_INT) {
4980 sintp = (SeqIntPtr) slp->data.ptrvalue;
4981 if (sintp != NULL) {
4982 ffrom = sintp->from + 1;
4983 fto = sintp->to + 1;
4984 sip = sintp->id;
4985 if (sip->choice == SEQID_GI) {
4986 gi = (Int4) sip->data.intvalue;
4987 }
4988 }
4989 }
4990
4991 sprintf (gi_buf, "%ld", (long)gi);
4992
4993 if (gi > 0) {
4994 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4995 if (is_aa) {
4996 FF_Add_NCBI_Base_URL (ffstring, link_featp);
4997 } else {
4998 FF_Add_NCBI_Base_URL (ffstring, link_featn);
4999 }
5000 /* FFAddOneString(ffstring, "val=", FALSE, FALSE, TILDE_IGNORE); */
5001 FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
5002 if (featID > 0) {
5003 sprintf (buf, "%ld", (long) featID);
5004 FFAddOneString(ffstring, "?featID=", FALSE, FALSE, TILDE_IGNORE);
5005 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
5006 prefix = "&";
5007 } else if (ffrom > 0 && fto > 0) {
5008 sprintf (buf, "%ld", (long) ffrom);
5009 FFAddOneString(ffstring, "?from=", FALSE, FALSE, TILDE_IGNORE);
5010 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
5011 sprintf (buf, "%ld", (long) fto);
5012 FFAddOneString(ffstring, "&to=", FALSE, FALSE, TILDE_IGNORE);
5013 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
5014 prefix = "&";
5015 } else if (itemID > 0) {
5016 sprintf (buf, "%ld", (long) itemID);
5017 FFAddOneString(ffstring, "?itemid=", FALSE, FALSE, TILDE_IGNORE);
5018 FFAddOneString(ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
5019 prefix = "&";
5020 }
5021 if ( is_aa ) {
5022 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
5023 FFAddOneString(ffstring, "report=gpwithparts", FALSE, FALSE, TILDE_IGNORE);
5024 } else {
5025 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
5026 FFAddOneString(ffstring, "report=gbwithparts", FALSE, FALSE, TILDE_IGNORE);
5027 }
5028 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
5029 }
5030
5031 FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
5032
5033 if (gi > 0) {
5034 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
5035 }
5036 }
5037
5038
5039 NLM_EXTERN SeqIdPtr SeqLocIdForProduct (
5040 SeqLocPtr product
5041 )
5042
5043 {
5044 SeqIdPtr sip;
5045 SeqLocPtr slp;
5046
5047 /* in case product is a SEQLOC_EQUIV */
5048
5049 if (product == NULL) return NULL;
5050 sip = SeqLocId (product);
5051 if (sip != NULL) return sip;
5052 slp = SeqLocFindNext (product, NULL);
5053 while (slp != NULL) {
5054 sip = SeqLocId (slp);
5055 if (sip != NULL) return sip;
5056 slp = SeqLocFindNext (product, slp);
5057 }
5058 return NULL;
5059 }
5060
5061 NLM_EXTERN CharPtr goQualType [] = {
5062 "", "Process", "Component", "Function", NULL
5063 };
5064
5065 static void RecordGoFieldsInQVP (
5066 UserFieldPtr ufp,
5067 Pointer userdata
5068 )
5069
5070 {
5071 UserFieldPtr entry;
5072 Int2 i;
5073 ObjectIdPtr oip;
5074 QualValPtr qvp;
5075
5076 qvp = (QualValPtr) userdata;
5077
5078 if (ufp == NULL || ufp->choice != 11) return;
5079 oip = ufp->label;
5080 if (oip == NULL) return;
5081 for (i = 0; goQualType [i] != NULL; i++) {
5082 if (StringICmp (oip->str, goQualType [i]) == 0) break;
5083 }
5084 if (goQualType [i] == NULL) return;
5085
5086 entry = ufp->data.ptrvalue;
5087 if (entry == NULL || entry->choice != 11) return;
5088
5089 /* ufp = (UserFieldPtr) entry->data.ptrvalue; */
5090 switch (i) {
5091 case 1 :
5092 qvp [FTQUAL_go_process].ufp = entry;
5093 break;
5094 case 2 :
5095 qvp [FTQUAL_go_component].ufp = entry;
5096 break;
5097 case 3 :
5098 qvp [FTQUAL_go_function].ufp = entry;
5099 break;
5100 default :
5101 break;
5102 }
5103 }
5104
5105 static void RecordUserObjectsInQVP (
5106 UserObjectPtr uop,
5107 Pointer userdata
5108 )
5109
5110 {
5111 ObjectIdPtr oip;
5112 QualValPtr qvp;
5113
5114 if (uop == NULL || userdata == NULL) return;
5115 qvp = (QualValPtr) userdata;
5116 oip = uop->type;
5117 if (oip == NULL) return;
5118 if (StringCmp (oip->str, "ModelEvidence") == 0) {
5119 qvp [FTQUAL_modelev].uop = uop;
5120 } else if (StringCmp (oip->str, "GeneOntology") == 0) {
5121 VisitUserFieldsInUop (uop, (Pointer) qvp, RecordGoFieldsInQVP);
5122 } else if (StringCmp (oip->str, "OfficialNomenclature") == 0) {
5123 qvp [FTQUAL_nomenclature].uop = uop;
5124 }
5125 }
5126
5127 NLM_EXTERN void AddIntervalsToGbfeat (
5128 GBFeaturePtr gbfeat,
5129 SeqLocPtr location,
5130 BioseqPtr target
5131 )
5132
5133 {
5134 Char accn [41];
5135 SeqLocPtr copy = NULL;
5136 Int4 from;
5137 IntFuzzPtr fuzz;
5138 GBIntervalPtr gbint;
5139 Int4 gi;
5140 Boolean interbp;
5141 Boolean iscomp;
5142 GBIntervalPtr last = NULL;
5143 Int4 point;
5144 SeqIntPtr sint;
5145 SeqIdPtr sip;
5146 SeqLocPtr slp;
5147 SeqPntPtr spp;
5148 Int4 to;
5149 Int4 swap;
5150
5151 if (gbfeat == NULL || location == NULL) return;
5152 if (target != NULL) {
5153 copy = SeqLocMerge (target, location, NULL, FALSE, TRUE, FALSE);
5154 location = copy;
5155 }
5156
5157 slp = SeqLocFindNext (location, NULL);
5158 while (slp != NULL) {
5159 from = 0;
5160 to = 0;
5161 point = 0;
5162 iscomp = FALSE;
5163 interbp = FALSE;
5164 sip = NULL;
5165 switch (slp->choice) {
5166 case SEQLOC_WHOLE :
5167 sip = (SeqIdPtr) slp->data.ptrvalue;
5168 if (sip != NULL) {
5169 from = 1;
5170 to = SeqLocLen (slp);
5171 if (to < 0) {
5172 sip = NULL;
5173 }
5174 }
5175 break;
5176 case SEQLOC_INT :
5177 sint = (SeqIntPtr) slp->data.ptrvalue;
5178 if (sint != NULL) {
5179 from = sint->from + 1;
5180 to = sint->to + 1;
5181 sip = sint->id;
5182 if (sint->strand == Seq_strand_minus && from < to) {
5183 swap = from;
5184 from = to;
5185 to = swap;
5186 }
5187 if (sint->strand == Seq_strand_minus) {
5188 iscomp = TRUE;
5189 }
5190 }
5191 break;
5192 case SEQLOC_PNT :
5193 spp = (SeqPntPtr) slp->data.ptrvalue;
5194 if (spp != NULL) {
5195 point = spp->point + 1;
5196 sip = spp->id;
5197 if (spp->strand == Seq_strand_minus) {
5198 iscomp = TRUE;
5199 }
5200 fuzz = spp->fuzz;
5201 if (fuzz != NULL) {
5202 if (fuzz->choice == 4) {
5203 if (fuzz->a == 3) { /* space to right */
5204 from = point;
5205 to = point + 1;
5206 point = 0;
5207 interbp = TRUE;
5208 } else if (fuzz->a == 4 && point > 1) { /* space to left */
5209 from = point - 1;
5210 to = point;
5211 point = 0;
5212 interbp = TRUE;
5213 }
5214 }
5215 }
5216 }
5217 break;
5218 default :
5219 break;
5220 }
5221 if (sip != NULL) {
5222 accn [0] = '\0';
5223 if (sip->choice == SEQID_GI) {
5224 gi = sip->data.intvalue;
5225 if (! GetAccnVerFromServer (gi, accn)) {
5226 accn [0] = '\0';
5227 }
5228 if (StringHasNoText (accn)) {
5229 sip = GetSeqIdForGI (gi);
5230 SeqIdWrite (sip, accn, PRINTID_TEXTID_ACC_VER, sizeof (accn));
5231 SeqIdFree (sip);
5232 }
5233 } else {
5234 SeqIdWrite (sip, accn, PRINTID_TEXTID_ACC_VER, sizeof (accn));
5235 }
5236 if (! StringHasNoText (accn)) {
5237 gbint = GBIntervalNew ();
5238 if (gbint != NULL) {
5239 gbint->from = from;
5240 gbint->to = to;
5241 gbint->point = point;
5242 gbint->iscomp = iscomp;
5243 gbint->interbp = interbp;
5244 gbint->accession = StringSave (accn);
5245 if (gbfeat->intervals == NULL) {
5246 gbfeat->intervals = gbint;
5247 } else if (last != NULL) {
5248 last->next = gbint;
5249 }
5250 last = gbint;
5251 }
5252 }
5253 }
5254 slp = SeqLocFindNext (location, slp);
5255 }
5256
5257 SeqLocFree (copy);
5258 }
5259
5260 static CharPtr validExceptionString [] = {
5261 "RNA editing",
5262 "reasons given in citation",
5263 "rearrangement required for product",
5264 "annotated by transcript or proteomic data",
5265 NULL
5266 };
5267
5268 static CharPtr validRefSeqExceptionString [] = {
5269 "alternative processing",
5270 "artificial frameshift",
5271 "nonconsensus splice site",
5272 "modified codon recognition",
5273 "alternative start codon",
5274 "dicistronic gene",
5275 "unclassified transcription discrepancy",
5276 "unclassified translation discrepancy",
5277 "mismatches in transcription",
5278 "mismatches in translation",
5279 "adjusted for low-quality genome",
5280 "transcribed product replaced",
5281 "translated product replaced",
5282 "transcribed pseudogene",
5283 "heterogeneous population sequenced",
5284 "low-quality sequence region",
5285 "unextendable partial coding region",
5286 NULL
5287 };
5288
5289 /* ribosomal slippage and trans-splicing now are separate qualifiers */
5290
5291 static void ParseException (
5292 CharPtr original,
5293 CharPtr PNTR exception_string,
5294 CharPtr PNTR exception_note,
5295 Boolean isRefSeq,
5296 Boolean isRelaxed,
5297 Uint1 subtype,
5298 BoolPtr riboSlipP,
5299 BoolPtr transSpliceP
5300 )
5301
5302 {
5303 ValNodePtr excpt = NULL, note = NULL, vnp;
5304 Boolean first, found;
5305 Int2 i;
5306 size_t len;
5307 CharPtr ptr, str, tmp;
5308
5309 *exception_string = NULL;
5310 *exception_note = NULL;
5311 *riboSlipP = FALSE;
5312 *transSpliceP = FALSE;
5313
5314 if (StringHasNoText (original)) return;
5315
5316 str = StringSave (original);
5317 if (str == NULL) return;
5318
5319 tmp = str;
5320 while (! StringHasNoText (tmp)) {
5321 ptr = StringChr (tmp, ',');
5322 if (ptr != NULL) {
5323 *ptr = '\0';
5324 ptr++;
5325 }
5326 TrimSpacesAroundString (tmp);
5327 if (! StringHasNoText (tmp)) {
5328 found = FALSE;
5329 for (i = 0; validExceptionString [i] != NULL; i++) {
5330 if (StringICmp (tmp, validExceptionString [i]) == 0) {
5331 if (isRefSeq || isRelaxed || subtype == FEATDEF_CDS) {
5332 ValNodeCopyStr (&excpt, 0, tmp);
5333 } else {
5334 ValNodeCopyStr (¬e, 0, tmp);
5335 }
5336 found = TRUE;
5337 break;
5338 }
5339 }
5340 if (! found) {
5341 for (i = 0; validRefSeqExceptionString [i] != NULL; i++) {
5342 if (StringICmp (tmp, validRefSeqExceptionString [i]) == 0) {
5343 if (isRefSeq || isRelaxed) {
5344 ValNodeCopyStr (&excpt, 0, tmp);
5345 } else {
5346 ValNodeCopyStr (¬e, 0, tmp);
5347 }
5348 found = TRUE;
5349 break;
5350 }
5351 }
5352 }
5353 if (! found) {
5354 if (StringICmp (tmp, "ribosomal slippage") == 0) {
5355 if (subtype == FEATDEF_CDS) {
5356 *riboSlipP = TRUE;
5357 } else {
5358 ValNodeCopyStr (¬e, 0, tmp);
5359 }
5360 found = TRUE;
5361 } else if (StringICmp (tmp, "trans-splicing") == 0) {
5362 if (subtype == FEATDEF_GENE ||
5363 subtype == FEATDEF_CDS ||
5364 subtype == FEATDEF_mRNA ||
5365 subtype == FEATDEF_tRNA ||
5366 subtype == FEATDEF_preRNA ||
5367 subtype == FEATDEF_otherRNA ||
5368 subtype == FEATDEF_3clip ||
5369 subtype == FEATDEF_3UTR ||
5370 subtype == FEATDEF_5clip ||
5371 subtype == FEATDEF_5UTR) {
5372 *transSpliceP = TRUE;
5373 } else {
5374 ValNodeCopyStr (¬e, 0, tmp);
5375 }
5376 found = TRUE;
5377 }
5378 }
5379 if (! found) {
5380 if (isRelaxed) {
5381 ValNodeCopyStr (&excpt, 0, tmp);
5382 } else {
5383 ValNodeCopyStr (¬e, 0, tmp);
5384 }
5385 }
5386 }
5387 tmp = ptr;
5388 }
5389
5390 if (excpt != NULL) {
5391 for (vnp = excpt, len = 0; vnp != NULL; vnp = vnp->next) {
5392 tmp = (CharPtr) vnp->data.ptrvalue;
5393 len += StringLen (tmp) + 3;
5394 }
5395 ptr = (CharPtr) MemNew (len + 2);
5396 if (ptr != NULL) {
5397 for (vnp = excpt, first = TRUE; vnp != NULL; vnp = vnp->next) {
5398 if (! first) {
5399 StringCat (ptr, ", ");
5400 }
5401 tmp = (CharPtr) vnp->data.ptrvalue;
5402 StringCat (ptr, tmp);
5403 first = FALSE;
5404 }
5405 }
5406 *exception_string = ptr;
5407 }
5408
5409 if (note != NULL) {
5410 for (vnp = note, len = 0; vnp != NULL; vnp = vnp->next) {
5411 tmp = (CharPtr) vnp->data.ptrvalue;
5412 len += StringLen (tmp) + 3;
5413 }
5414 ptr = (CharPtr) MemNew (len + 2);
5415 if (ptr != NULL) {
5416 for (vnp = note, first = TRUE; vnp != NULL; vnp = vnp->next) {
5417 if (! first) {
5418 StringCat (ptr, ", ");
5419 }
5420 tmp = (CharPtr) vnp->data.ptrvalue;
5421 StringCat (ptr, tmp);
5422 first = FALSE;
5423 }
5424 }
5425 *exception_note = ptr;
5426 }
5427
5428 ValNodeFreeData (excpt);
5429 ValNodeFreeData (note);
5430 MemFree (str);
5431 }
5432
5433 static CharPtr legalInferencePrefixes [] = {
5434 "",
5435 "similar to sequence",
5436 "similar to AA sequence",
5437 "similar to DNA sequence",
5438 "similar to RNA sequence",
5439 "similar to RNA sequence, mRNA",
5440 "similar to RNA sequence, EST",
5441 "similar to RNA sequence, other RNA",
5442 "profile",
5443 "nucleotide motif",
5444 "protein motif",
5445 "ab initio prediction",
5446 "alignment",
5447 NULL
5448 };
5449
5450 static void ParseInference (
5451 GBQualPtr quals,
5452 ValNodePtr PNTR good_inferenceP,
5453 ValNodePtr PNTR bad_inferenceP
5454 )
5455
5456 {
5457 Int2 best, j;
5458 ValNodePtr good = NULL, bad = NULL;
5459 GBQualPtr gbq;
5460 size_t len;
5461
5462 *good_inferenceP = NULL;
5463 *bad_inferenceP = NULL;
5464
5465 if (quals == NULL) return;
5466
5467 for (gbq = quals; gbq != NULL; gbq = gbq->next) {
5468 if (StringICmp (gbq->qual, "inference") != 0) continue;
5469 if (StringHasNoText (gbq->val)) continue;
5470 best = -1;
5471 for (j = 0; legalInferencePrefixes [j] != NULL; j++) {
5472 len = StringLen (legalInferencePrefixes [j]);
5473 if (StringNICmp (gbq->val, legalInferencePrefixes [j], len) != 0) continue;
5474 best = j;
5475 }
5476 if (best >= 0 && legalInferencePrefixes [best] != NULL) {
5477 ValNodeCopyStr (&good, 0, gbq->val);
5478 } else {
5479 ValNodeCopyStr (&bad, 0, gbq->val);
5480 }
5481 }
5482
5483 *good_inferenceP = good;
5484 *bad_inferenceP = bad;
5485 }
5486
5487 typedef struct geneprot {
5488 SeqFeatPtr gene;
5489 SeqFeatPtr cds;
5490 Boolean failed;
5491 } GeneProtData, PNTR GeneProtPtr;
5492
5493 static void CheckGeneOnIsolatedProtein (
5494 SeqFeatPtr sfp,
5495 Pointer userdata
5496 )
5497
5498 {
5499 GeneProtPtr gpp;
5500
5501 if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
5502 gpp = (GeneProtPtr) userdata;
5503 if (gpp == NULL) return;
5504
5505 if (SeqLocAinB (gpp->cds->location, sfp->location) < 0) return;
5506 if (gpp->gene != NULL) {
5507 gpp->failed = TRUE;
5508 } else {
5509 gpp->gene = sfp;
5510 }
5511 }
5512
5513 static SeqFeatPtr FindGeneOnIsolatedProtein (
5514 SeqEntryPtr sep,
5515 SeqFeatPtr cds
5516 )
5517
5518 {
5519 GeneProtData gpd;
5520
5521 if (sep == NULL || cds == NULL) return NULL;
5522
5523 MemSet ((Pointer) &gpd, 0, sizeof (GeneProtData));
5524 gpd.cds = cds;
5525 VisitFeaturesInSep (sep, (Pointer) &gpd, CheckGeneOnIsolatedProtein);
5526
5527 if (gpd.failed) return NULL;
5528
5529 return gpd.gene;
5530 }
5531
5532 static SeqFeatPtr GetOverlappingGeneInEntity (
5533 Uint2 entityID,
5534 SeqMgrFeatContextPtr fcontext,
5535 SeqMgrFeatContextPtr gcontext,
5536 SeqLocPtr locforgene,
5537 IntAsn2gbJobPtr ajp
5538 )
5539
5540 {
5541 SeqFeatPtr gene = NULL;
5542 SeqEntryPtr sep, oldscope;
5543 SeqInt sint;
5544 SeqIntPtr sintp;
5545 SeqPntPtr spp;
5546 SeqPnt spt;
5547 ValNode vn;
5548
5549 sep = GetTopSeqEntryForEntityID (entityID);
5550 oldscope = SeqEntrySetScope (sep);
5551 if (fcontext->featdeftype == FEATDEF_variation && locforgene != NULL) {
5552 /* first check same strand for variation */
5553 gene = SeqMgrGetOverlappingGene (locforgene, gcontext);
5554 if (gene == NULL) {
5555 /* special case variation feature - copy location but set strand both */
5556 if (locforgene->choice == SEQLOC_INT && locforgene->data.ptrvalue != NULL) {
5557 sintp = (SeqIntPtr) locforgene->data.ptrvalue;
5558 MemSet ((Pointer) &sint, 0, sizeof (SeqInt));
5559 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
5560 sint.from = sintp->from;
5561 sint.to = sintp->to;
5562 sint.id = sintp->id;
5563 sint.if_from = sintp->if_from;
5564 sint.if_to = sintp->if_to;
5565 sint.strand = Seq_strand_both;
5566 vn.choice = SEQLOC_INT;
5567 vn.data.ptrvalue = (Pointer) &sint;
5568 gene = SeqMgrGetOverlappingGene (&vn, gcontext);
5569
5570 } else if (locforgene->choice == SEQLOC_PNT && locforgene->data.ptrvalue != NULL) {
5571 spp = (SeqPntPtr) locforgene->data.ptrvalue;
5572 MemSet ((Pointer) &spt, 0, sizeof (SeqPnt));
5573 MemSet ((Pointer) &vn, 0, sizeof (ValNode));
5574 spt.point = spp->point;
5575 spt.id = spp->id;
5576 spt.fuzz = spp->fuzz;
5577 spt.strand = Seq_strand_both;
5578 vn.choice = SEQLOC_PNT;
5579 vn.data.ptrvalue = (Pointer) &spt;
5580 gene = SeqMgrGetOverlappingGene (&vn, gcontext);
5581
5582 /*
5583 } else {
5584 gene = SeqMgrGetOverlappingGene (locforgene, gcontext);
5585 */
5586 }
5587 }
5588 } else {
5589 if (fcontext->bad_order || fcontext->mixed_strand) {
5590 gene = SeqMgrGetOverlappingFeature (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext);
5591 } else if (ajp->multiIntervalGenes) {
5592 gene = SeqMgrGetOverlappingFeature (locforgene, FEATDEF_GENE, NULL, 0, NULL, LOCATION_SUBSET, gcontext);
5593 if (gene == NULL) {
5594 gene = SeqMgrGetOverlappingGene (locforgene, gcontext);
5595 }
5596 } else {
5597 gene = SeqMgrGetOverlappingGene (locforgene, gcontext);
5598 }
5599 }
5600 SeqEntrySetScope (oldscope);
5601 return gene;
5602 }
5603
5604 static Boolean LocStrandsMatch (SeqLocPtr loc1, SeqLocPtr loc2)
5605
5606 {
5607 Uint1 featstrand;
5608 Uint1 locstrand;
5609
5610 if (loc1 == NULL || loc2 == NULL) return FALSE;
5611 featstrand = SeqLocStrand (loc1);
5612 locstrand = SeqLocStrand (loc2);
5613 if (featstrand == locstrand) return TRUE;
5614 if (locstrand == Seq_strand_unknown && featstrand != Seq_strand_minus) return TRUE;
5615 if (featstrand == Seq_strand_unknown && locstrand != Seq_strand_minus) return TRUE;
5616 if (featstrand == Seq_strand_both && locstrand != Seq_strand_minus) return TRUE;
5617 if (locstrand == Seq_strand_both) return TRUE;
5618 return FALSE;
5619 }
5620
5621 /*
5622 static CharPtr SeqLoc2Str (
5623 SeqLocPtr slp
5624 )
5625
5626 {
5627 AsnIoBSPtr aibp;
5628 ByteStorePtr bs;
5629 Char ch;
5630 CharPtr ptr;
5631 CharPtr str;
5632
5633 if (slp == NULL) return NULL;
5634
5635 bs = BSNew (1000);
5636 if (bs == NULL) return NULL;
5637 aibp = AsnIoBSOpen ("w", bs);
5638 if (aibp == NULL) return NULL;
5639
5640 SeqLocAsnWrite (slp, aibp->aip, NULL);
5641
5642 AsnIoFlush (aibp->aip);
5643 AsnIoBSClose (aibp);
5644
5645 str = BSMerge (bs, NULL);
5646 BSFree (bs);
5647
5648 if (str == NULL) return NULL;
5649
5650 ptr = str;
5651 ch = *ptr;
5652 while (ch != '\0') {
5653 if (ch == '\n' || ch == '\r' || ch == '\t') {
5654 *ptr = ' ';
5655 }
5656 ptr++;
5657 ch = *ptr;
5658 }
5659
5660 TrimSpacesAndSemicolons (str);
5661 Asn2gnbkCompressSpaces (str);
5662
5663 return str;
5664 }
5665 */
5666
5667 static CharPtr FormatFeatureBlockEx (
5668 IntAsn2gbJobPtr ajp,
5669 Asn2gbSectPtr asp,
5670 BioseqPtr bsp,
5671 BioseqPtr target,
5672 SeqFeatPtr sfp,
5673 SeqMgrFeatContextPtr fcontext,
5674 QualValPtr qvp,
5675 FmtType format,
5676 IntFeatBlockPtr ifp,
5677 Boolean isProt,
5678 Boolean doKey
5679 )
5680
5681 {
5682 Uint1 aa;
5683 AnnotDescrPtr adp;
5684 Boolean annotDescCommentToComment;
5685 ValNodePtr bad_inference = NULL;
5686 Int2 bondidx;
5687 BioseqPtr bspx = NULL;
5688 BioseqSetPtr bssp;
5689 Choice cbaa;
5690 CodeBreakPtr cbp;
5691 BioseqPtr cdna;
5692 SeqFeatPtr cds = NULL;
5693 Char ch;
5694 Uint1 code = Seq_code_ncbieaa;
5695 CdRegionPtr crp;
5696 SeqMgrDescContext dcontext;
5697 Boolean encode_prefix = FALSE;
5698 CharPtr exception_note = NULL;
5699 CharPtr exception_string = NULL;
5700 Uint1 featdeftype;
5701 Uint1 from;
5702 GBQualPtr gbq;
5703 GBFeaturePtr gbfeat = NULL;
5704 GBSeqPtr gbseq;
5705 SeqMgrFeatContext gcontext;
5706 ValNodePtr gcp;
5707 SeqFeatPtr gene = NULL;
5708 ValNodePtr gene_syn = NULL;
5709 ValNodePtr good_inference = NULL;
5710 GeneRefPtr grp = NULL;
5711 IntCdsBlockPtr icp;
5712 Uint2 idx;
5713 ValNodePtr illegal = NULL;
5714 ImpFeatPtr imp = NULL;
5715 IndxPtr index;
5716 Boolean is_ed = FALSE;
5717 Boolean is_ged = FALSE;
5718 Boolean is_gps = FALSE;
5719 Boolean is_journalscan = FALSE;
5720 Boolean is_other = FALSE;
5721 Boolean is_misc_rna = FALSE;
5722 Uint4 itemID;
5723 CharPtr its_prod = NULL;
5724 CharPtr key = NULL;
5725 CharPtr lasttype = NULL;
5726 Int4 left = -1;
5727 SeqLocPtr loc = NULL;
5728 SeqLocPtr location = NULL;
5729 SeqLocPtr locforgene = NULL;
5730 SeqLocPtr locformatpep = NULL;
5731 SeqMgrFeatContext mcontext;
5732 MolInfoPtr mip;
5733 SeqFeatPtr mrna;
5734 SeqLocPtr newloc;
5735 Boolean noLeft;
5736 Boolean noRight;
5737 SeqMgrFeatContext ocontext;
5738 ObjectIdPtr oip;
5739 SeqEntryPtr oldscope;
5740 SeqFeatPtr operon = NULL;
5741 Uint2 partial;
5742 SeqMgrFeatContext pcontext;
5743 BioseqPtr prd;
5744 CharPtr precursor_comment = NULL;
5745 BioseqPtr prod = NULL;
5746 SeqFeatPtr prot;
5747 Boolean protein = FALSE;
5748 Char protein_pid_g [32];
5749 ProtRefPtr prp;
5750 ProtRefPtr prpxref;
5751 Boolean pseudo = FALSE;
5752 CharPtr ptr;
5753 Uint2 pEID;
5754 Int2 qualclass;
5755 Uint1 residue;
5756 RNAGenPtr rgp;
5757 Boolean riboSlippage = FALSE;
5758 Int4 right = -1;
5759 RNAQualSetPtr rqsp;
5760 RnaRefPtr rrp;
5761 SeqAnnotPtr sap;
5762 SeqCodeTablePtr sctp;
5763 SeqDescrPtr sdp;
5764 SeqEntryPtr sep;
5765 Uint1 seqcode;
5766 Uint1 seqfeattype;
5767 SeqIdPtr sip;
5768 Int2 siteidx;
5769 SeqMapTablePtr smtp;
5770 Boolean split;
5771 CharPtr str;
5772 Uint1 strand = Seq_strand_unknown;
5773 Boolean suppressed = FALSE;
5774 CharPtr tmp;
5775 Boolean transSplice = FALSE;
5776 tRNAPtr trna;
5777 UserFieldPtr ufp;
5778 BioseqPtr unlockme = NULL;
5779 UserObjectPtr uop;
5780 ValNodePtr vnp;
5781 StringItemPtr ffstring;
5782 /*
5783 CharPtr firstloc = NULL;
5784 CharPtr secondloc = NULL;
5785 CharPtr thirdloc = NULL;
5786 */
5787
5788 if (ajp == NULL || fcontext == NULL || qvp == NULL || ifp == NULL) return NULL;
5789
5790 ffstring = FFGetString(ajp);
5791 if ( ffstring == NULL ) return NULL;
5792
5793 if (ajp->index && asp != NULL) {
5794 index = &asp->index;
5795 } else {
5796 index = NULL;
5797 }
5798
5799 if (ajp->gbseq && asp != NULL) {
5800 gbseq = &asp->gbseq;
5801 } else {
5802 gbseq = NULL;
5803 }
5804
5805 protein_pid_g [0] = '\0';
5806
5807 itemID = fcontext->itemID;
5808
5809 featdeftype = fcontext->featdeftype;
5810
5811 if (featdeftype < FEATDEF_GENE || featdeftype >= FEATDEF_MAX) {
5812 featdeftype = FEATDEF_BAD;
5813 }
5814 if (featdeftype == 0) {
5815 featdeftype = sfp->idx.subtype;
5816 }
5817
5818 seqfeattype = fcontext->seqfeattype;
5819 if (seqfeattype == 0) {
5820 seqfeattype = sfp->data.choice;
5821 }
5822
5823
5824 if (doKey) {
5825 /* may need to map location between aa and dna */
5826
5827 if (ifp->mapToNuc) {
5828
5829 /* map mat_peptide, etc., to nucleotide coordinates */
5830
5831 sip = SeqLocId (sfp->location);
5832 prd = BioseqFind (sip);
5833 cds = SeqMgrGetCDSgivenProduct (prd, NULL);
5834 CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
5835 location = aaFeatLoc_to_dnaFeatLoc (cds, sfp->location);
5836 SetSeqLocPartial (location, noLeft, noRight);
5837 /*
5838 locforgene = location;
5839 */
5840 if (cds != NULL) {
5841 grp = SeqMgrGetGeneXref (cds); /* mat_peptide first obeys any CDS gene xref */
5842 locformatpep = location; /* mat_peptide next gets exact match for /gene */
5843 locforgene = cds->location; /* mat_peptide last gets parent CDS /gene */
5844 }
5845 loc = location;
5846
5847 } else if (ifp->mapToProt) {
5848
5849 /* map CDS to protein product coordinates */
5850
5851 sip = SeqLocIdForProduct (sfp->product);
5852 prd = BioseqFind (sip);
5853 cds = SeqMgrGetCDSgivenProduct (prd, NULL);
5854 location = dnaLoc_to_aaLoc (cds, sfp->location, TRUE, NULL, FALSE);
5855 SetSeqLocPartial (location, FALSE, FALSE);
5856 locforgene = sfp->location;
5857 loc = location;
5858
5859 } else if (ifp->mapToGen) {
5860
5861 /* map CDS from cDNA to genomic Bioseq */
5862
5863 cdna = BioseqFindFromSeqLoc (sfp->location);
5864 mrna = SeqMgrGetRNAgivenProduct (cdna, &mcontext);
5865 CheckSeqLocForPartial (sfp->location, &noLeft, &noRight);
5866 location = productLoc_to_locationLoc (mrna, sfp->location);
5867 SetSeqLocPartial (location, noLeft, noRight);
5868 locforgene = location;
5869 loc = location;
5870
5871 } else if (ifp->mapToMrna) {
5872
5873 /* map gene from genomic to cDNA Bioseq */
5874
5875 sep = SeqMgrGetSeqEntryForData (bsp);
5876 location = CreateWholeInterval (sep);
5877 SetSeqLocPartial (location, FALSE, FALSE);
5878 locforgene = location;
5879 loc = location;
5880
5881 } else if (ifp->mapToPep) {
5882
5883 /* map protein processing from precursor to subpeptide Bioseq */
5884
5885 sep = SeqMgrGetSeqEntryForData (bsp);
5886 location = CreateWholeInterval (sep);
5887 SetSeqLocPartial (location, FALSE, FALSE);
5888 locforgene = location;
5889 loc = location;
5890
5891 } else {
5892
5893 /* no aa-dna or dna-aa mapping, just use location */
5894
5895 location = sfp->location;
5896 locforgene = sfp->location;
5897 }
5898 if (location == NULL) return NULL;
5899
5900 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
5901 if (sep != NULL && IS_Bioseq_set (sep)) {
5902 bssp = (BioseqSetPtr) sep->data.ptrvalue;
5903 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
5904 is_gps = TRUE;
5905 }
5906 }
5907
5908 if (bsp != NULL) {
5909 for (sip = bsp->id; sip != NULL; sip = sip->next) {
5910 switch (sip->choice) {
5911 case SEQID_OTHER :
5912 is_other = TRUE;
5913 break;
5914 case SEQID_GIBBSQ :
5915 case SEQID_GIBBMT :
5916 case SEQID_GIIM :
5917 is_journalscan = TRUE;
5918 break;
5919 case SEQID_GENBANK :
5920 is_ged = TRUE;
5921 break;
5922 case SEQID_EMBL :
5923 is_ged = TRUE;
5924 is_ed = TRUE;
5925 break;
5926 case SEQID_DDBJ :
5927 is_ged = TRUE;
5928 is_ed = TRUE;
5929 break;
5930 case SEQID_TPG :
5931 is_ged = TRUE;
5932 break;
5933 case SEQID_TPE :
5934 case SEQID_TPD :
5935 is_ged = TRUE;
5936 is_ed = TRUE;
5937 break;
5938 default :
5939 break;
5940 }
5941 }
5942 }
5943
5944 if (ajp->refseqConventions) {
5945 is_other = TRUE;
5946 }
5947
5948 key = FindKeyFromFeatDefType (featdeftype, TRUE);
5949
5950 if (format == GENPEPT_FMT && isProt) {
5951 if (featdeftype == FEATDEF_REGION) {
5952 key = "Region";
5953 } else if (featdeftype == FEATDEF_BOND) {
5954 key = "Bond";
5955 } else if (featdeftype == FEATDEF_SITE) {
5956 key = "Site";
5957 }
5958 if (ifp->mapToPep) {
5959 if (featdeftype >= FEATDEF_preprotein && featdeftype <= FEATDEF_transit_peptide_aa) {
5960 key = "Precursor";
5961 itemID = 0;
5962 }
5963 }
5964 }
5965 if (! isProt) {
5966 if (featdeftype == FEATDEF_preprotein) {
5967 if (! is_other) {
5968 key = "misc_feature";
5969 encode_prefix = TRUE;
5970 }
5971 }
5972 }
5973 if (featdeftype == FEATDEF_CLONEREF) {
5974 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
5975 key = "misc_feature";
5976 }
5977 }
5978
5979 /* deal with unmappable impfeats */
5980
5981 if (featdeftype == FEATDEF_BAD && seqfeattype == SEQFEAT_IMP) {
5982 imp = (ImpFeatPtr) sfp->data.value.ptrvalue;
5983 if (imp != NULL) {
5984 key = imp->key;
5985 }
5986 }
5987
5988 /* prior to BSEC conversion, map for release and web, allow old feature to be seen in Sequin */
5989 if (featdeftype == FEATDEF_repeat_unit && (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE)) {
5990 key = "repeat_region";
5991 }
5992
5993 FFStartPrint(ffstring, format, 5, 21, NULL, 0, 5, 21, "FT", /* ifp->firstfeat */ FALSE);
5994 if (ajp->ajp.slp != NULL) {
5995 FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
5996 } else if ( GetWWW(ajp) && StringICmp (key, "gap") != 0 && bsp != NULL /* && SeqMgrGetParentOfPart (bsp, NULL) == NULL */ ) {
5997 FF_asn2gb_www_featkey (ffstring, key, sfp, fcontext->left + 1, fcontext->right + 1,
5998 fcontext->strand, itemID);
5999 } else {
6000 FFAddOneString(ffstring, key, FALSE, FALSE, TILDE_IGNORE);
6001 }
6002 FFAddNChar(ffstring, ' ', 21 - 5 - StringLen(key), FALSE);
6003
6004 if (gbseq != NULL) {
6005 gbfeat = GBFeatureNew ();
6006 if (gbfeat != NULL) {
6007 gbfeat->key = StringSave (key);
6008 }
6009 }
6010
6011 if (imp == NULL || StringHasNoText (imp->loc)) {
6012
6013
6014 if (ajp->ajp.slp != NULL) {
6015 sip = SeqIdParse ("lcl|dummy");
6016 left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END);
6017 right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
6018 strand = SeqLocStrand (ajp->ajp.slp);
6019 split = FALSE;
6020 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, location, 0, FALSE, ajp->masterStyle);
6021 /*
6022 newloc = SeqLocCopyRegion (sip, location, bsp, left, right, strand, &split);
6023 */
6024 SeqIdFree (sip);
6025 if (newloc == NULL) return NULL;
6026 /*
6027 firstloc = SeqLoc2Str (newloc);
6028 */
6029 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
6030 /*
6031 secondloc = SeqLoc2Str (newloc);
6032 */
6033 str = FFFlatLoc (ajp, target, newloc, ajp->masterStyle);
6034 SeqLocFree (newloc);
6035 /*
6036 thirdloc = SeqLoc2Str (ajp->ajp.slp);
6037 if (StringCmp (str, "?") != 0) {
6038 firstloc = MemFree (firstloc);
6039 secondloc = MemFree (secondloc);
6040 thirdloc = MemFree (thirdloc);
6041 }
6042 */
6043 } else {
6044 str = FFFlatLoc (ajp, target, location, ajp->masterStyle);
6045 /*
6046 if (StringCmp (str, "?") == 0) {
6047 firstloc = SeqLoc2Str (location);
6048 SeqIdWrite (target->id, buf, PRINTID_FASTA_LONG, sizeof (buf));
6049 secondloc = StringSave (buf);
6050 thirdloc = NULL;
6051 }
6052 */
6053 }
6054 } else {
6055 str = StringSave (imp->loc);
6056 }
6057 if ( GetWWW(ajp) ) {
6058 FF_www_featloc (ffstring, str);
6059 } else {
6060 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
6061 }
6062
6063 if (gbseq != NULL) {
6064 if (gbfeat != NULL) {
6065 gbfeat->location = StringSave (str);
6066 if (StringDoesHaveText (str)) {
6067 if (StringStr (str, "join") != NULL) {
6068 gbfeat->operator__ = StringSave ("join");
6069 } else if (StringStr (str, "order") != NULL) {
6070 gbfeat->operator__ = StringSave ("order");
6071 }
6072 }
6073 gbfeat->partial5 = fcontext->partialL;
6074 gbfeat->partial3 = fcontext->partialR;
6075 if (ajp->masterStyle) {
6076 AddIntervalsToGbfeat (gbfeat, location, target);
6077 } else {
6078 AddIntervalsToGbfeat (gbfeat, location, NULL);
6079 }
6080 }
6081 }
6082
6083 MemFree (str);
6084
6085 } else {
6086
6087 location = sfp->location;
6088 locforgene = sfp->location;
6089 }
6090
6091 /* populate qualifier table from feature fields */
6092
6093 /*
6094 if (sfp->partial == TRUE)
6095 sfp->partial = FlatAnnotPartial(sfp, use_product);
6096 */
6097
6098 if (sfp->partial) {
6099 partial = SeqLocPartialCheck (location);
6100 if (partial == SLP_COMPLETE /* || partial > SLP_OTHER */ ) {
6101 qvp [FTQUAL_partial].ble = TRUE;
6102 }
6103 if (LookForFuzz (location)) {
6104 qvp [FTQUAL_partial].ble = FALSE;
6105 }
6106 if (imp != NULL) {
6107 if (StringChr (imp->loc, '<') != NULL || StringChr (imp->loc, '>') != NULL) {
6108 qvp [FTQUAL_partial].ble = FALSE;
6109 }
6110 }
6111
6112 /* hide unclassified /partial in RELEASE_MODE and ENTREZ_MODE */
6113
6114 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
6115 qvp [FTQUAL_partial].ble = FALSE;
6116 }
6117 /*
6118 if (ajp->flags.checkQualSyntax) {
6119 switch (featdeftype) {
6120 case FEATDEF_conflict:
6121 case FEATDEF_mutation:
6122 case FEATDEF_N_region:
6123 case FEATDEF_polyA_site:
6124 qvp [FTQUAL_partial].ble = FALSE;
6125 break;
6126 default:
6127 break;
6128 }
6129 }
6130 */
6131 }
6132 if (ifp->mapToProt) {
6133 qvp [FTQUAL_partial].ble = FALSE;
6134 }
6135
6136 if (sfp->pseudo) {
6137 pseudo = TRUE;
6138 }
6139
6140 if (seqfeattype == SEQFEAT_GENE) {
6141 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
6142 if (grp != NULL) {
6143 if (! StringHasNoText (grp->locus)) {
6144 qvp [FTQUAL_gene].str = grp->locus;
6145 qvp [FTQUAL_locus_tag].str = grp->locus_tag;
6146 qvp [FTQUAL_gene_desc].str = grp->desc;
6147 qvp [FTQUAL_gene_syn].vnp = grp->syn;
6148 } else if (grp->locus_tag != NULL) {
6149 qvp [FTQUAL_locus_tag].str = grp->locus_tag;
6150 qvp [FTQUAL_gene_desc].str = grp->desc;
6151 qvp [FTQUAL_gene_syn].vnp = grp->syn;
6152 } else if (! StringHasNoText (grp->desc)) {
6153 qvp [FTQUAL_gene].str = grp->desc;
6154 qvp [FTQUAL_gene_syn].vnp = grp->syn;
6155 } else if (grp->syn != NULL) {
6156 vnp = grp->syn;
6157 qvp [FTQUAL_gene].str = (CharPtr) vnp->data.ptrvalue;
6158 vnp = vnp->next;
6159 qvp [FTQUAL_gene_syn].vnp = vnp;
6160 }
6161 qvp [FTQUAL_gene_map].str = grp->maploc;
6162 qvp [FTQUAL_gene_allele].str = grp->allele;
6163 qvp [FTQUAL_gene_xref].vnp = grp->db;
6164 if (grp->pseudo) {
6165 pseudo = TRUE;
6166 }
6167 qvp [FTQUAL_gene_nomen].gnp = grp->formal_name;
6168 }
6169 if (! ajp->flags.separateGeneSyns) {
6170 qvp [FTQUAL_gene_syn_refseq].vnp = qvp [FTQUAL_gene_syn].vnp;
6171 qvp [FTQUAL_gene_syn].vnp = NULL;
6172 }
6173 operon = SeqMgrGetOverlappingOperon (locforgene, &ocontext);
6174 if (operon != NULL) {
6175 for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) {
6176 if (StringCmp (gbq->qual, "operon") == 0) {
6177 qvp [FTQUAL_operon].gbq = gbq;
6178 }
6179 }
6180 if (operon->pseudo) {
6181 pseudo = TRUE;
6182 }
6183 }
6184
6185 } else if (featdeftype != FEATDEF_operon && featdeftype != FEATDEF_gap) {
6186
6187 /* if mat_peptide, grp is already be set based on parent CDS, otherwise check current feature */
6188
6189 if (grp == NULL) {
6190 grp = SeqMgrGetGeneXref (sfp);
6191 }
6192
6193 /* if gene xref, then find referenced gene, take everything as if it overlapped */
6194
6195 if (grp != NULL) {
6196 if (SeqMgrGeneIsSuppressed (grp)) {
6197 suppressed = TRUE;
6198 } else {
6199 if (grp->pseudo) {
6200 pseudo = TRUE;
6201 }
6202 bspx = BioseqFindFromSeqLoc (sfp->location);
6203 if (bspx != NULL) {
6204 if (StringDoesHaveText (grp->locus_tag)) {
6205 gene = SeqMgrGetGeneByLocusTag (bspx, grp->locus_tag, &gcontext);
6206 } else if (StringDoesHaveText (grp->locus)) {
6207 gene = SeqMgrGetFeatureByLabel (bspx, grp->locus, SEQFEAT_GENE, 0, &gcontext);
6208 }
6209 if (gene != NULL) {
6210 grp = (GeneRefPtr) gene->data.value.ptrvalue;
6211 if (gene->pseudo) {
6212 pseudo = TRUE;
6213 }
6214 if (grp != NULL && grp->db != NULL) {
6215 qvp [FTQUAL_gene_xref].vnp = grp->db;
6216 } else {
6217 qvp [FTQUAL_gene_xref].vnp = gene->dbxref;
6218 }
6219 }
6220 }
6221 }
6222 }
6223
6224 if (! suppressed) {
6225
6226 /* first look for gene that exactly matches mat_peptide DNA projection */
6227
6228 if (gene == NULL && grp == NULL && locformatpep != NULL) {
6229 gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locformatpep, ajp);
6230 if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) {
6231 gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locformatpep, ajp);
6232 }
6233
6234 if (gene != NULL) {
6235 if (SeqLocCompare (gene->location, locformatpep) == SLC_A_EQ_B &&
6236 LocStrandsMatch (gene->location, locformatpep)) {
6237 qvp [FTQUAL_gene_note].str = gene->comment;
6238
6239 grp = (GeneRefPtr) gene->data.value.ptrvalue;
6240 if (gene->pseudo) {
6241 pseudo = TRUE;
6242 }
6243 if (grp != NULL && grp->db != NULL) {
6244 qvp [FTQUAL_gene_xref].vnp = grp->db;
6245 } else {
6246 qvp [FTQUAL_gene_xref].vnp = gene->dbxref;
6247 }
6248 } else {
6249 gene = NULL;
6250 }
6251 }
6252 }
6253
6254 /* otherwise, if not suppressed and no gene xref, get gene by overlap */
6255
6256 if (gene == NULL && grp == NULL) {
6257 if (featdeftype != FEATDEF_primer_bind) {
6258 gene = GetOverlappingGeneInEntity (ajp->ajp.entityID, fcontext, &gcontext, locforgene, ajp);
6259 if (gene == NULL && ajp->ajp.entityID != sfp->idx.entityID) {
6260 gene = GetOverlappingGeneInEntity (sfp->idx.entityID, fcontext, &gcontext, locforgene, ajp);
6261 }
6262 }
6263
6264 /* special case to get gene by overlap for coded_by cds on isolated protein bioseq */
6265 if (ifp->mapToProt && seqfeattype == SEQFEAT_CDREGION) {
6266 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
6267 if (sep != NULL && IS_Bioseq (sep)) {
6268 bspx = (BioseqPtr) sep->data.ptrvalue;
6269 if (bspx != NULL && ISA_aa (bspx->mol)) {
6270 gene = FindGeneOnIsolatedProtein (sep, sfp);
6271 }
6272 }
6273 }
6274
6275 if (gene != NULL) {
6276 qvp [FTQUAL_gene_note].str = gene->comment;
6277
6278 grp = (GeneRefPtr) gene->data.value.ptrvalue;
6279 if (gene->pseudo) {
6280 pseudo = TRUE;
6281 }
6282 if (grp != NULL && grp->db != NULL) {
6283 qvp [FTQUAL_gene_xref].vnp = grp->db;
6284 } else {
6285 qvp [FTQUAL_gene_xref].vnp = gene->dbxref;
6286 }
6287 }
6288 }
6289
6290 if (grp != NULL && grp->pseudo) {
6291 pseudo = TRUE;
6292 }
6293
6294 if (grp != NULL && (featdeftype != FEATDEF_repeat_region || is_ed || gene == NULL)) {
6295 if (! StringHasNoText (grp->locus)) {
6296 qvp [FTQUAL_gene].str = grp->locus;
6297 qvp [FTQUAL_locus_tag].str = grp->locus_tag;
6298 qvp [FTQUAL_gene_syn].vnp = grp->syn;
6299 gene_syn = grp->syn;
6300 } else if (! StringHasNoText (grp->locus_tag)) {
6301 qvp [FTQUAL_locus_tag].str = grp->locus_tag;
6302 qvp [FTQUAL_gene_syn].vnp = grp->syn;
6303 gene_syn = grp->syn;
6304 } else if (! StringHasNoText (grp->desc)) {
6305 qvp [FTQUAL_gene].str = grp->desc;
6306 qvp [FTQUAL_gene_syn].vnp = grp->syn;
6307 gene_syn = grp->syn;
6308 } else if (grp->syn != NULL) {
6309 vnp = grp->syn;
6310 qvp [FTQUAL_gene].str = (CharPtr) vnp->data.ptrvalue;
6311 vnp = vnp->next;
6312 qvp [FTQUAL_gene_syn].vnp = vnp;
6313 gene_syn = vnp;
6314 }
6315 }
6316 if (! ajp->flags.separateGeneSyns) {
6317 qvp [FTQUAL_gene_syn_refseq].vnp = qvp [FTQUAL_gene_syn].vnp;
6318 qvp [FTQUAL_gene_syn].vnp = NULL;
6319 }
6320 if (grp != NULL &&
6321 featdeftype != FEATDEF_variation &&
6322 (featdeftype != FEATDEF_repeat_region || is_ed)) {
6323 qvp [FTQUAL_gene_allele].str = grp->allele; /* now propagating /allele */
6324 }
6325
6326 if (gene != NULL && (featdeftype != FEATDEF_repeat_region || is_ed)) {
6327 /* now propagate old_locus_tag to almost any underlying feature */
6328 for (gbq = gene->qual; gbq != NULL; gbq = gbq->next) {
6329 if (StringHasNoText (gbq->val)) continue;
6330 idx = GbqualToFeaturIndex (gbq->qual);
6331 if (idx == FTQUAL_old_locus_tag) {
6332 qvp [FTQUAL_old_locus_tag].gbq = gbq;
6333 break; /* record first old_locus_tag gbqual to display all */
6334 }
6335 }
6336 }
6337 if (seqfeattype != SEQFEAT_CDREGION && seqfeattype != SEQFEAT_RNA) {
6338 qvp [FTQUAL_gene_xref].vnp = NULL;
6339 }
6340
6341 if (featdeftype != FEATDEF_operon) {
6342 operon = SeqMgrGetOverlappingOperon (locforgene, &ocontext);
6343 if (operon != NULL) {
6344 for (gbq = operon->qual; gbq != NULL; gbq = gbq->next) {
6345 if (StringCmp (gbq->qual, "operon") == 0) {
6346 qvp [FTQUAL_operon].gbq = gbq;
6347 }
6348 }
6349 if (operon->pseudo) {
6350 pseudo = TRUE;
6351 }
6352 }
6353 }
6354 }
6355
6356 /* specific fields set here */
6357
6358 switch (seqfeattype) {
6359 case SEQFEAT_CDREGION :
6360 if (! ifp->mapToProt) {
6361 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
6362 if (crp != NULL) {
6363
6364 qvp [FTQUAL_codon_start].num = crp->frame;
6365 if (qvp [FTQUAL_codon_start].num == 0) {
6366 qvp [FTQUAL_codon_start].num = 1;
6367 }
6368 qvp [FTQUAL_transl_except].cbp = crp->code_break;
6369 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
6370 seqcode = 0;
6371 sctp = NULL;
6372 cbaa = cbp->aa;
6373 switch (cbaa.choice) {
6374 case 1 :
6375 seqcode = Seq_code_ncbieaa;
6376 break;
6377 case 2 :
6378 seqcode = Seq_code_ncbi8aa;
6379 break;
6380 case 3 :
6381 seqcode = Seq_code_ncbistdaa;
6382 break;
6383 default :
6384 break;
6385 }
6386 if (seqcode != 0) {
6387 sctp = SeqCodeTableFind (seqcode);
6388 if (sctp != NULL) {
6389 residue = cbaa.value.intvalue;
6390 if (residue != 42) {
6391 if (seqcode != Seq_code_ncbieaa) {
6392 smtp = SeqMapTableFind (seqcode, Seq_code_ncbieaa);
6393 residue = SeqMapTableConvert (smtp, residue);
6394 }
6395 /*
6396 if (residue == 'U') {
6397 if (ajp->flags.selenocysteineToNote) {
6398 qvp [FTQUAL_selenocysteine_note].str = "selenocysteine";
6399 } else {
6400 qvp [FTQUAL_selenocysteine].ble = TRUE;
6401 }
6402 } else if (residue == 'O') {
6403 if (ajp->flags.pyrrolysineToNote) {
6404 qvp [FTQUAL_pyrrolysine_note].str = "pyrrolysine";
6405 } else {
6406 qvp [FTQUAL_pyrrolysine].ble = TRUE;
6407 }
6408 }
6409 */
6410 }
6411 }
6412 }
6413 }
6414
6415 gcp = crp->genetic_code;
6416 if (gcp != NULL) {
6417 for (vnp = gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
6418 if (vnp->choice == 2 && vnp->data.intvalue != 0) {
6419 qvp [FTQUAL_transl_table].num = vnp->data.intvalue;
6420 }
6421 }
6422
6423 /* suppress table 1, but always show it in GBSeqXML */
6424
6425 if (qvp [FTQUAL_transl_table].num == 1 && ajp->gbseq == NULL) {
6426 qvp [FTQUAL_transl_table].num = 0;
6427 }
6428 }
6429
6430 if (sfp->product != NULL && SeqLocLen (sfp->product) != 0) {
6431 protein = TRUE;
6432 }
6433 if (crp->conflict && (protein || (! sfp->excpt))) {
6434 if (protein) {
6435 qvp [FTQUAL_prot_conflict].str = conflict_msg;
6436 } else {
6437 /*
6438 qvp [FTQUAL_prot_missing].str = no_protein_msg;
6439 */
6440 }
6441 }
6442 }
6443
6444 sip = SeqLocIdForProduct (sfp->product);
6445 qvp [FTQUAL_protein_id].sip = sip;
6446
6447 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
6448
6449 if (! ajp->alwaysTranslCds) {
6450
6451 /* by default only show /translation if product bioseq is within entity */
6452
6453 oldscope = SeqEntrySetScope (sep);
6454 prod = BioseqFind (sip);
6455 SeqEntrySetScope (oldscope);
6456
6457 if (prod == NULL && ajp->showFarTransl) {
6458
6459 /* but flag can override and force far /translation */
6460
6461 prod = BioseqLockById (sip);
6462 unlockme = prod;
6463 }
6464 }
6465
6466 prp = NULL;
6467
6468 if (prod != NULL) {
6469 for (sip = prod->id; sip != NULL; sip = sip->next) {
6470 if (sip->choice == SEQID_GI) {
6471 sprintf (protein_pid_g, "PID:g%ld", (long) sip->data.intvalue);
6472 }
6473 }
6474 sdp = SeqMgrGetNextDescriptor (prod, NULL, Seq_descr_comment, &dcontext);
6475 if (sdp != NULL && dcontext.level == 0) {
6476 if (! StringHasNoText ((CharPtr) sdp->data.ptrvalue)) {
6477 qvp [FTQUAL_prot_comment].str = (CharPtr) sdp->data.ptrvalue;
6478 }
6479 }
6480 sdp = SeqMgrGetNextDescriptor (prod, NULL, Seq_descr_molinfo, &dcontext);
6481 if (sdp != NULL && dcontext.level == 0) {
6482 mip = (MolInfoPtr) sdp->data.ptrvalue;
6483 if (mip != NULL && mip->tech > 1 &&
6484 mip->tech != MI_TECH_concept_trans &&
6485 mip->tech != MI_TECH_concept_trans_a) {
6486 str = StringForSeqTech (mip->tech);
6487 if (! StringHasNoText (str)) {
6488 qvp [FTQUAL_prot_method].str = str;
6489 }
6490 }
6491 }
6492 pEID = ObjMgrGetEntityIDForPointer (prod);
6493 if (pEID != 0 && pEID != ajp->ajp.entityID &&
6494 SeqMgrFeaturesAreIndexed (pEID) == 0) {
6495 /* index far record so SeqMgrGetBestProteinFeature can work */
6496 SeqMgrIndexFeatures (pEID, NULL);
6497 }
6498 prot = SeqMgrGetBestProteinFeature (prod, &pcontext);
6499 if (prot != NULL) {
6500 prp = (ProtRefPtr) prot->data.value.ptrvalue;
6501 if (prp != NULL && prp->processed < 2) {
6502 qvp [FTQUAL_prot_note].str = prot->comment;
6503 }
6504 }
6505 }
6506
6507 /* protein xref overrides names, but should not prevent /protein_id, etc. */
6508
6509 prpxref = SeqMgrGetProtXref (sfp);
6510 if (prpxref != NULL) {
6511 prp = prpxref;
6512 }
6513 if (prp != NULL) {
6514 vnp = prp->name;
6515 if (vnp != NULL && (! StringHasNoText ((CharPtr) vnp->data.ptrvalue))) {
6516 qvp [FTQUAL_cds_product].str = (CharPtr) vnp->data.ptrvalue;
6517 vnp = vnp->next;
6518 if (ajp->flags.extraProductsToNote) {
6519 qvp [FTQUAL_prot_names].vnp = vnp;
6520 } else {
6521 qvp [FTQUAL_extra_products].vnp = vnp;
6522 }
6523 }
6524 qvp [FTQUAL_prot_desc].str = prp->desc;
6525 qvp [FTQUAL_prot_activity].vnp = prp->activity;
6526 qvp [FTQUAL_prot_EC_number].vnp = prp->ec;
6527 }
6528
6529 if (! pseudo) {
6530 if (prod != NULL || ajp->transIfNoProd || ajp->alwaysTranslCds) {
6531 if (doKey) {
6532 if (! ajp->hideTranslation) {
6533 qvp [FTQUAL_translation].ble = TRUE;
6534 }
6535 }
6536 }
6537 }
6538
6539 if (ifp->isCDS) {
6540 icp = (IntCdsBlockPtr) ifp;
6541 qvp [FTQUAL_figure].str = icp->fig;
6542 qvp [FTQUAL_maploc].str = icp->maploc;
6543 }
6544 } else {
6545 qvp [FTQUAL_coded_by].slp = sfp->location;
6546
6547 /* show /evidence on coded_by CDS */
6548
6549 qvp [FTQUAL_evidence].num = sfp->exp_ev;
6550
6551 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
6552 if (crp != NULL) {
6553 if (crp->frame > 1) {
6554 qvp [FTQUAL_codon_start].num = crp->frame;
6555 }
6556 gcp = crp->genetic_code;
6557 if (gcp != NULL) {
6558 for (vnp = gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
6559 if (vnp->choice == 2 && vnp->data.intvalue != 0) {
6560 qvp [FTQUAL_transl_table].num = vnp->data.intvalue;
6561 }
6562 }
6563
6564 /* suppress table 1 */
6565
6566 if (qvp [FTQUAL_transl_table].num == 1) {
6567 qvp [FTQUAL_transl_table].num = 0;
6568 }
6569 }
6570 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
6571 seqcode = 0;
6572 sctp = NULL;
6573 cbaa = cbp->aa;
6574 switch (cbaa.choice) {
6575 case 1 :
6576 seqcode = Seq_code_ncbieaa;
6577 break;
6578 case 2 :
6579 seqcode = Seq_code_ncbi8aa;
6580 break;
6581 case 3 :
6582 seqcode = Seq_code_ncbistdaa;
6583 break;
6584 default :
6585 break;
6586 }
6587 if (seqcode != 0) {
6588 sctp = SeqCodeTableFind (seqcode);
6589 if (sctp != NULL) {
6590 residue = cbaa.value.intvalue;
6591 if (residue != 42) {
6592 if (seqcode != Seq_code_ncbieaa) {
6593 smtp = SeqMapTableFind (seqcode, Seq_code_ncbieaa);
6594 residue = SeqMapTableConvert (smtp, residue);
6595 }
6596 /*
6597 if (residue == 'U') {
6598 if (ajp->flags.selenocysteineToNote) {
6599 qvp [FTQUAL_selenocysteine_note].str = "selenocysteine";
6600 } else {
6601 qvp [FTQUAL_selenocysteine].ble = TRUE;
6602 }
6603 } else if (residue == 'O') {
6604 if (ajp->flags.pyrrolysineToNote) {
6605 qvp [FTQUAL_pyrrolysine_note].str = "pyrrolysine";
6606 } else {
6607 qvp [FTQUAL_pyrrolysine].ble = TRUE;
6608 }
6609 }
6610 */
6611 }
6612 }
6613 }
6614 }
6615 }
6616 }
6617 break;
6618 case SEQFEAT_PROT :
6619 if (! ifp->mapToPep) {
6620 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
6621 if (prp != NULL) {
6622 vnp = prp->name;
6623 if (vnp != NULL && (! StringHasNoText ((CharPtr) vnp->data.ptrvalue))) {
6624 qvp [FTQUAL_product].str = (CharPtr) vnp->data.ptrvalue;
6625 vnp = vnp->next;
6626 qvp [FTQUAL_prot_names].vnp = vnp;
6627 }
6628 if (format != GENPEPT_FMT) {
6629 qvp [FTQUAL_prot_desc].str = prp->desc;
6630 } else {
6631 qvp [FTQUAL_prot_name].str = prp->desc;
6632 }
6633 if (format != GENPEPT_FMT || prp->processed != 2) {
6634 qvp [FTQUAL_prot_activity].vnp = prp->activity;
6635 }
6636 qvp [FTQUAL_prot_EC_number].vnp = prp->ec;
6637 }
6638 sip = SeqLocIdForProduct (sfp->product);
6639 if (sip != NULL) {
6640 /* for RefSeq records or GenBank not release_mode */
6641 if (is_other || (! ajp->flags.forGbRelease)) {
6642 qvp [FTQUAL_protein_id].sip = sip;
6643 }
6644 prod = BioseqFind (sip);
6645 }
6646 } else {
6647 qvp [FTQUAL_derived_from].slp = sfp->location;
6648 sip = SeqLocIdForProduct (sfp->product);
6649 if (sip != NULL) {
6650 prod = BioseqFind (sip);
6651 if (prod != NULL) {
6652 prot = SeqMgrGetBestProteinFeature (prod, NULL);
6653 if (prot != NULL) {
6654 precursor_comment = prot->comment;
6655 }
6656 }
6657 }
6658 }
6659 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
6660 if (prp != NULL) {
6661 if (! pseudo) {
6662 if (ajp->showPeptide) {
6663 if (prp->processed == 2 || prp->processed == 3 || prp->processed == 4) {
6664 qvp [FTQUAL_peptide].ble = TRUE;
6665 }
6666 }
6667 if (format == GENPEPT_FMT && isProt && is_other) {
6668 /* enable calculated_mol_wt qualifier for RefSeq proteins */
6669 qvp [FTQUAL_mol_wt].ble = TRUE;
6670 }
6671 }
6672 if (prp->processed == 3 || prp->processed == 4) {
6673 if (! is_other) {
6674 /* Only RefSeq allows product on signal or transit peptide */
6675 qvp [FTQUAL_product].str = NULL;
6676 }
6677 }
6678 if (prp->processed == 1 && encode_prefix && (! is_other)) {
6679 qvp [FTQUAL_encodes].str = qvp [FTQUAL_product].str;
6680 qvp [FTQUAL_product].str = NULL;
6681 }
6682 }
6683 break;
6684 case SEQFEAT_RNA :
6685 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
6686 if (rrp != NULL) {
6687 if (rrp->pseudo) {
6688 pseudo = TRUE;
6689 }
6690 sip = SeqLocIdForProduct (sfp->product);
6691 if (sip != NULL) {
6692 /* for RefSeq records or GenBank not release_mode or entrez_mode */
6693 if (is_other || (ajp->mode == SEQUIN_MODE || ajp->mode == DUMP_MODE)) {
6694 qvp [FTQUAL_transcript_id].sip = sip;
6695 } else {
6696 /* otherwise now goes in note */
6697 qvp [FTQUAL_transcript_id_note].sip = sip; /* !!! remove October 15, 2003 !!! */
6698 }
6699
6700 if (! ajp->alwaysTranslCds) {
6701
6702 /* by default only show /transcription if product bioseq is within entity */
6703
6704 oldscope = SeqEntrySetScope (sep);
6705 prod = BioseqFind (sip);
6706 SeqEntrySetScope (oldscope);
6707
6708 if (prod == NULL && ajp->showFarTransl) {
6709
6710 /* but flag can override and force far /transcription */
6711
6712 prod = BioseqLockById (sip);
6713 unlockme = prod;
6714 }
6715 }
6716 }
6717 if (rrp->type == 2) {
6718 if (! pseudo) {
6719 if (ajp->showTranscript) {
6720 qvp [FTQUAL_transcription].ble = TRUE;
6721 }
6722 }
6723 }
6724 if (rrp->type == 3) {
6725 if (rrp->ext.choice == 1) {
6726 /* amino acid could not be parsed into structured form */
6727 if (! ajp->flags.dropIllegalQuals) {
6728 str = (CharPtr) rrp->ext.value.ptrvalue;
6729 qvp [FTQUAL_product].str = str;
6730 } else {
6731 qvp [FTQUAL_product].str = "tRNA-OTHER";
6732 }
6733 } else if (rrp->ext.choice == 2) {
6734 trna = (tRNAPtr) rrp->ext.value.ptrvalue;
6735 if (trna != NULL) {
6736 aa = 0;
6737 if (trna->aatype == 2) {
6738 aa = trna->aa;
6739 } else {
6740 from = 0;
6741 switch (trna->aatype) {
6742 case 0 :
6743 from = 0;
6744 break;
6745 case 1 :
6746 from = Seq_code_iupacaa;
6747 break;
6748 case 2 :
6749 from = Seq_code_ncbieaa;
6750 break;
6751 case 3 :
6752 from = Seq_code_ncbi8aa;
6753 break;
6754 case 4 :
6755 from = Seq_code_ncbistdaa;
6756 break;
6757 default:
6758 break;
6759 }
6760 if (ajp->flags.iupacaaOnly) {
6761 code = Seq_code_iupacaa;
6762 } else {
6763 code = Seq_code_ncbieaa;
6764 }
6765 smtp = SeqMapTableFind (code, from);
6766 if (smtp != NULL) {
6767 aa = SeqMapTableConvert (smtp, trna->aa);
6768 if (aa == 255 && from == Seq_code_iupacaa) {
6769 if (trna->aa == 'U') {
6770 aa = 'U';
6771 } else if (trna->aa == 'O') {
6772 aa = 'O';
6773 }
6774 }
6775 }
6776 }
6777 if (ajp->flags.iupacaaOnly) {
6778 smtp = SeqMapTableFind (Seq_code_iupacaa, Seq_code_ncbieaa);
6779 if (smtp != NULL) {
6780 aa = SeqMapTableConvert (smtp, trna->aa);
6781 } else {
6782 aa = 'X';
6783 }
6784 }
6785 if (aa > 0 && aa != 255) {
6786 /*
6787 if (aa == 'U') {
6788 if (ajp->flags.selenocysteineToNote) {
6789 qvp [FTQUAL_selenocysteine_note].str = "selenocysteine";
6790 } else {
6791 qvp [FTQUAL_selenocysteine].ble = TRUE;
6792 }
6793 } else if (aa == 'O') {
6794 if (ajp->flags.pyrrolysineToNote) {
6795 qvp [FTQUAL_pyrrolysine_note].str = "pyrrolysine";
6796 } else {
6797 qvp [FTQUAL_pyrrolysine].ble = TRUE;
6798 }
6799 }
6800 */
6801 if (ajp->mode == RELEASE_MODE || ajp->mode == ENTREZ_MODE) {
6802 /* O and J no longer quarantined */
6803 /*
6804 if (aa == 79 || aa == 74) {
6805 aa = 88;
6806 }
6807 */
6808 }
6809 /* - no gaps now that O and J are added
6810 if (aa <= 74) {
6811 shift = 0;
6812 } else if (aa > 79) {
6813 shift = 2;
6814 } else {
6815 shift = 1;
6816 }
6817 */
6818 if (aa != '*') {
6819 idx = aa - (64 /* + shift */);
6820 } else {
6821 idx = 25;
6822 }
6823 if (idx > 0 && idx < 28) {
6824 str = trnaList [idx];
6825 qvp [FTQUAL_product].str = str;
6826 if (StringNICmp (str, "tRNA-", 5) == 0) {
6827 qvp [FTQUAL_trna_aa].str = str + 5;
6828 }
6829 }
6830 }
6831 qvp [FTQUAL_anticodon].slp = trna->anticodon;
6832 if (ajp->flags.codonRecognizedToNote) {
6833 qvp [FTQUAL_trna_codons_note].trp = trna;
6834 } else {
6835 qvp [FTQUAL_trna_codons].trp = trna;
6836 }
6837 }
6838 }
6839 } else {
6840 if (rrp->ext.choice == 1) {
6841 if (rrp->type == 255) {
6842 str = (CharPtr) rrp->ext.value.ptrvalue;
6843 if (StringCmp (str, "ncRNA") == 0 ||
6844 StringCmp (str, "tmRNA") == 0) {
6845 /* pick up product from gbqual */
6846 } else if (StringICmp (str, "misc_RNA") == 0) {
6847 is_misc_rna = TRUE;
6848 /* pick up product from gbqual */
6849 } else {
6850 str = (CharPtr) rrp->ext.value.ptrvalue;
6851 qvp [FTQUAL_product].str = str;
6852 }
6853 } else {
6854 str = (CharPtr) rrp->ext.value.ptrvalue;
6855 qvp [FTQUAL_product].str = str;
6856 }
6857 }
6858 }
6859 if (rrp->type == 10) {
6860 is_misc_rna = TRUE;
6861 }
6862 if (rrp->ext.choice == 3) {
6863 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
6864 if (rgp != NULL) {
6865 if (StringDoesHaveText (rgp->product)) {
6866 qvp [FTQUAL_product].str = rgp->product;
6867 }
6868 if (StringDoesHaveText (rgp->_class)) {
6869 if (IsStringInNcRNAClassList (rgp->_class)) {
6870 qvp [FTQUAL_ncRNA_other].str = rgp->_class;
6871 } else {
6872 qvp [FTQUAL_ncRNA_other].str = "other";
6873 qvp [FTQUAL_ncRNA_note].str = rgp->_class;
6874 }
6875 }
6876 for (rqsp = rgp->quals; rqsp != NULL; rqsp = rqsp->next) {
6877 if (StringICmp (rqsp->qual, "tag_peptide") == 0) {
6878 if (StringDoesHaveText (rqsp->val)) {
6879 qvp [FTQUAL_tag_peptide_str].str = rqsp->val;
6880 }
6881 }
6882 }
6883 }
6884 }
6885 }
6886 if (rrp != NULL && rrp->ext.choice == 3) {
6887 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
6888 if (rgp != NULL) {
6889 if (StringDoesHaveText (rgp->product)) {
6890 str = rgp->product;
6891 if (StringNICmp (str, "internal transcribed spacer ", 28) == 0) {
6892 str += 28;
6893 if (IS_DIGIT (*str) && str [1] == '\0') {
6894 its_prod = str;
6895 }
6896 }
6897 }
6898 }
6899 }
6900 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
6901 if (StringCmp (gbq->qual, "product") != 0) continue;
6902 if (StringDoesHaveText (gbq->val)) {
6903 str = gbq->val;
6904 if (StringNICmp (str, "internal transcribed spacer ", 28) == 0) {
6905 str += 28;
6906 if (IS_DIGIT (*str) && str [1] == '\0') {
6907 its_prod = str;
6908 }
6909 }
6910 }
6911 }
6912 if (is_misc_rna && StringDoesHaveText (its_prod)) {
6913 if (StringCmp (its_prod, "1") == 0) {
6914 qvp [FTQUAL_rrna_its].str = "ITS1";
6915 } else if (StringCmp (its_prod, "2") == 0) {
6916 qvp [FTQUAL_rrna_its].str = "ITS2";
6917 } else if (StringCmp (its_prod, "3") == 0) {
6918 qvp [FTQUAL_rrna_its].str = "ITS3";
6919 }
6920 }
6921 break;
6922 case SEQFEAT_REGION :
6923 if (format == GENPEPT_FMT && featdeftype == FEATDEF_REGION && isProt) {
6924 qvp [FTQUAL_region_name].str = (CharPtr) sfp->data.value.ptrvalue;
6925 } else {
6926 qvp [FTQUAL_region].str = (CharPtr) sfp->data.value.ptrvalue;
6927 }
6928 if (sfp->ext != NULL) {
6929 uop = FindUopByTag (sfp->ext, "cddScoreData");
6930 if (uop != NULL) {
6931 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6932 if (ufp->choice != 1) continue;
6933 oip = ufp->label;
6934 if (oip == NULL) continue;
6935 if (StringICmp (oip->str, "definition") == 0) {
6936 str = (CharPtr) ufp->data.ptrvalue;
6937 if (StringDoesHaveText (str)) {
6938 qvp [FTQUAL_cdd_definition].str = str;
6939 }
6940 }
6941 }
6942 }
6943 }
6944 break;
6945 case SEQFEAT_COMMENT :
6946 break;
6947 case SEQFEAT_BOND :
6948 bondidx = (Int2) sfp->data.value.intvalue;
6949 if (bondidx == 255) {
6950 bondidx = 5;
6951 }
6952 if (bondidx > 0 && bondidx < 6) {
6953 if (format == GENPEPT_FMT && isProt) {
6954 qvp [FTQUAL_bond_type].str = bondList [bondidx];
6955 } else {
6956 qvp [FTQUAL_bond].str = bondList [bondidx];
6957 }
6958 }
6959 break;
6960 case SEQFEAT_SITE :
6961 siteidx = (Int2) sfp->data.value.intvalue;
6962 if (siteidx == 255) {
6963 siteidx = 27;
6964 }
6965 if (siteidx > 0 && siteidx < 28) {
6966 if (format == GENPEPT_FMT && isProt) {
6967 qvp [FTQUAL_site_type].str = siteFFList [siteidx];
6968 } else {
6969 qvp [FTQUAL_site].str = siteFFList [siteidx];
6970 }
6971 }
6972 break;
6973 case SEQFEAT_PSEC_STR :
6974 qvp [FTQUAL_sec_str_type].num = sfp->data.value.intvalue;
6975 break;
6976 case SEQFEAT_HET :
6977 qvp [FTQUAL_heterogen].str = (CharPtr) sfp->data.value.ptrvalue;
6978 break;
6979 default :
6980 break;
6981 }
6982 }
6983
6984 /* common fields set here */
6985
6986 if (ajp->mode == DUMP_MODE && qvp [FTQUAL_gene_syn_refseq].vnp != NULL) {
6987 qvp [FTQUAL_gene_syn].vnp = qvp [FTQUAL_gene_syn_refseq].vnp;
6988 qvp [FTQUAL_gene_syn_refseq].vnp = NULL;
6989 }
6990
6991 VisitUserObjectsInUop (sfp->ext, (Pointer) qvp, RecordUserObjectsInQVP);
6992
6993 if (ajp->hideGoTerms) {
6994 qvp [FTQUAL_go_process].ufp = NULL;
6995 qvp [FTQUAL_go_component].ufp = NULL;
6996 qvp [FTQUAL_go_function].ufp = NULL;
6997 }
6998
6999 if (featdeftype == FEATDEF_repeat_region) {
7000 pseudo = FALSE;
7001 }
7002
7003 qvp [FTQUAL_pseudo].ble = pseudo;
7004
7005 qvp [FTQUAL_seqfeat_note].str = sfp->comment;
7006
7007 sap = fcontext->sap;
7008 if (sap != NULL) {
7009 annotDescCommentToComment = FALSE;
7010 for (adp = sap->desc; adp != NULL; adp = adp->next) {
7011 if (adp->choice == Annot_descr_comment) {
7012 if (StringDoesHaveText ((CharPtr) adp->data.ptrvalue)) {
7013 qvp [FTQUAL_seqannot_note].str = (CharPtr) adp->data.ptrvalue;
7014 }
7015 } else if (adp->choice == Annot_descr_user) {
7016 uop = (UserObjectPtr) adp->data.ptrvalue;
7017 if (uop == NULL) continue;
7018 oip = uop->type;
7019 if (oip == NULL) continue;
7020 if (StringCmp (oip->str, "AnnotDescCommentPolicy") == 0) {
7021 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7022 oip = ufp->label;
7023 if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
7024 if (StringCmp (oip->str, "Policy") == 0) {
7025 if (StringICmp ((CharPtr) ufp->data.ptrvalue, "ShowInComment") == 0) {
7026 annotDescCommentToComment = TRUE;
7027 }
7028 }
7029 }
7030 }
7031 }
7032 }
7033 if (annotDescCommentToComment) {
7034 qvp [FTQUAL_seqannot_note].str = NULL;
7035 }
7036 }
7037
7038 /* if RELEASE_MODE, check list of features that can have /pseudo */
7039
7040 if (ajp->flags.dropIllegalQuals && pseudo &&
7041 (seqfeattype == SEQFEAT_RNA || seqfeattype == SEQFEAT_IMP) ) {
7042 switch (featdeftype) {
7043
7044 case FEATDEF_allele:
7045 case FEATDEF_attenuator:
7046 case FEATDEF_CAAT_signal:
7047 case FEATDEF_conflict:
7048 case FEATDEF_D_loop:
7049 case FEATDEF_enhancer:
7050 case FEATDEF_GC_signal:
7051 case FEATDEF_iDNA:
7052 case FEATDEF_LTR:
7053 case FEATDEF_misc_binding:
7054 case FEATDEF_misc_difference:
7055 case FEATDEF_misc_recomb:
7056 case FEATDEF_misc_signal:
7057 case FEATDEF_misc_structure:
7058 case FEATDEF_modified_base:
7059 case FEATDEF_mutation:
7060 case FEATDEF_old_sequence:
7061 case FEATDEF_polyA_signal:
7062 case FEATDEF_polyA_site:
7063 case FEATDEF_precursor_RNA:
7064 case FEATDEF_prim_transcript:
7065 case FEATDEF_primer_bind:
7066 case FEATDEF_protein_bind:
7067 case FEATDEF_RBS:
7068 case FEATDEF_repeat_region:
7069 case FEATDEF_repeat_unit:
7070 case FEATDEF_rep_origin:
7071 case FEATDEF_satellite:
7072 case FEATDEF_stem_loop:
7073 case FEATDEF_STS:
7074 case FEATDEF_TATA_signal:
7075 case FEATDEF_terminator:
7076 case FEATDEF_unsure:
7077 case FEATDEF_variation:
7078 case FEATDEF_3clip:
7079 case FEATDEF_3UTR:
7080 case FEATDEF_5clip:
7081 case FEATDEF_5UTR:
7082 case FEATDEF_10_signal:
7083 case FEATDEF_35_signal:
7084 qvp [FTQUAL_pseudo].ble = FALSE;
7085 break;
7086 default:
7087 break;
7088 }
7089 }
7090
7091 /*
7092 if (format != GENPEPT_FMT) {
7093 qvp [FTQUAL_evidence].num = sfp->exp_ev;
7094 }
7095 */
7096 qvp [FTQUAL_evidence].num = sfp->exp_ev;
7097
7098 if (sfp->excpt && StringDoesHaveText (sfp->except_text)) {
7099 ParseException (sfp->except_text,
7100 &exception_string,
7101 &exception_note,
7102 is_other,
7103 (Boolean) (! ajp->flags.dropIllegalQuals),
7104 sfp->idx.subtype,
7105 &riboSlippage,
7106 &transSplice);
7107
7108 qvp [FTQUAL_exception].str = exception_string;
7109 qvp [FTQUAL_exception_note].str = exception_note;
7110 qvp [FTQUAL_ribosomal_slippage].ble = riboSlippage;
7111 qvp [FTQUAL_trans_splicing].ble = transSplice;
7112
7113 /*
7114 if (StringHasNoText (qvp [FTQUAL_exception].str)) {
7115 qvp [FTQUAL_exception].str = NULL;
7116 }
7117 if (StringHasNoText (qvp [FTQUAL_exception_note].str)) {
7118 qvp [FTQUAL_exception_note].str = NULL;
7119 }
7120 */
7121 }
7122
7123 qvp [FTQUAL_db_xref].vnp = sfp->dbxref;
7124 qvp [FTQUAL_citation].vnp = sfp->cit;
7125
7126 /* /product same as sfp->comment will suppress /note */
7127
7128 if (! StringHasNoText (qvp [FTQUAL_product].str) &&
7129 StringICmp (sfp->comment, qvp [FTQUAL_product].str) == 0) {
7130 qvp [FTQUAL_seqfeat_note].str = NULL;
7131 }
7132 /* case sensitive AJ011317.1 */
7133 if (! StringHasNoText (qvp [FTQUAL_cds_product].str) &&
7134 StringCmp (sfp->comment, qvp [FTQUAL_cds_product].str) == 0) {
7135 qvp [FTQUAL_seqfeat_note].str = NULL;
7136 }
7137
7138 /* /gene same as sfp->comment will suppress /note */
7139 /* case sensitive -gi|6572973|gb|AF195052.1|AF195052 */
7140
7141 if (! StringHasNoText (qvp [FTQUAL_gene].str) &&
7142 StringCmp (sfp->comment, qvp [FTQUAL_gene].str) == 0) {
7143 qvp [FTQUAL_seqfeat_note].str = NULL;
7144 }
7145
7146 /* gene /note same as sfp->comment will suppress /note - U92435.1 says do not do this */
7147
7148 /*
7149 if (! StringHasNoText (qvp [FTQUAL_gene_note].str) &&
7150 StringICmp (sfp->comment, qvp [FTQUAL_gene_note].str) == 0) {
7151 qvp [FTQUAL_seqfeat_note].str = NULL;
7152 }
7153 */
7154
7155 /* if site sfp->comment contains site name, suppress site in /note */
7156
7157 if (! StringHasNoText (qvp [FTQUAL_site].str) &&
7158 StringStr (sfp->comment, qvp [FTQUAL_site].str) != NULL) {
7159 qvp [FTQUAL_site].str = NULL;
7160 }
7161
7162 /* /EC_number same as sfp->comment will suppress /note */
7163
7164 for (vnp = qvp [FTQUAL_prot_EC_number].vnp; vnp != NULL; vnp = vnp->next) {
7165 str = (CharPtr) vnp->data.ptrvalue;
7166 if ((! StringHasNoText (str)) &&
7167 StringICmp (sfp->comment, str) == 0) {
7168 qvp [FTQUAL_seqfeat_note].str = NULL;
7169 }
7170 }
7171
7172 /* mat_peptide precursor same as sfp->comment will suppress /note in GenPept */
7173
7174 if (precursor_comment != NULL && StringCmp (precursor_comment, sfp->comment) == 0) {
7175 qvp [FTQUAL_seqfeat_note].str = NULL;
7176 }
7177
7178
7179 /* now go through gbqual list */
7180
7181 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
7182 idx = GbqualToFeaturIndex (gbq->qual);
7183 if (idx > 0 && idx < ASN2GNBK_TOTAL_FEATUR) {
7184 if (qvp [idx].gbq == NULL) {
7185 if (idx == FTQUAL_product_quals) {
7186 if (qvp [FTQUAL_product].str == NULL) {
7187 qvp [FTQUAL_product].str = gbq->val;
7188 } else if (qvp [FTQUAL_xtra_prod_quals].gbq == NULL) {
7189 /* chain will include remaining product gbquals */
7190 qvp [FTQUAL_xtra_prod_quals].gbq = gbq;
7191 }
7192 } else {
7193 qvp [idx].gbq = gbq;
7194 }
7195 }
7196
7197 } else if (idx == 0) {
7198
7199 qualclass = IllegalGbqualToClass (gbq->qual);
7200 if (qualclass == 0) {
7201 qualclass = Qual_class_quote;
7202 }
7203 tmp = StringSave (gbq->val);
7204 if (tmp != NULL) {
7205 str = MemNew (sizeof (Char) * (StringLen (gbq->qual) + StringLen (tmp) + 10));
7206 if (str != NULL) {
7207 if (qualclass == Qual_class_quote) {
7208 if (StringIsJustQuotes (tmp)) {
7209 sprintf (str, "/%s", gbq->qual);
7210 } else {
7211 ptr = tmp;
7212 ch = *ptr;
7213 while (ch != '\0') {
7214 if (ch == '"') {
7215 *ptr = '\'';
7216 }
7217 ptr++;
7218 ch = *ptr;
7219 }
7220 sprintf (str, "/%s=\"%s\"", gbq->qual, tmp);
7221 }
7222 ValNodeCopyStr (&illegal, 0, str);
7223 } else if (qualclass == Qual_class_noquote || qualclass == Qual_class_label) {
7224 if (StringIsJustQuotes (tmp)) {
7225 sprintf (str, "/%s", gbq->qual);
7226 } else {
7227 sprintf (str, "/%s=%s", gbq->qual, tmp);
7228 }
7229 ValNodeCopyStr (&illegal, 0, str);
7230 }
7231 MemFree (str);
7232 }
7233 MemFree (tmp);
7234 }
7235 }
7236 }
7237
7238 /* experiment and inference qualifiers supercede evidence qualifier */
7239
7240 if (qvp [FTQUAL_evidence].num > 0) {
7241 if (qvp [FTQUAL_experiment].gbq != NULL || qvp [FTQUAL_inference].gbq != NULL) {
7242 qvp [FTQUAL_evidence].num = 0;
7243 } else if (qvp [FTQUAL_evidence].num == 1) {
7244 qvp [FTQUAL_experiment_string].str = "experimental evidence, no additional details recorded";
7245 qvp [FTQUAL_evidence].num = 0;
7246 } else if (qvp [FTQUAL_evidence].num == 2) {
7247 qvp [FTQUAL_inference_string].str = "non-experimental evidence, no additional details recorded";
7248 qvp [FTQUAL_evidence].num = 0;
7249 }
7250 }
7251
7252 if (qvp [FTQUAL_ncRNA_class].gbq != NULL) {
7253 gbq = qvp [FTQUAL_ncRNA_class].gbq;
7254 if (StringDoesHaveText (gbq->val)) {
7255 if (! IsStringInNcRNAClassList (gbq->val)) {
7256 qvp [FTQUAL_ncRNA_other].str = "other";
7257 qvp [FTQUAL_ncRNA_note].str = gbq->val;
7258 qvp [FTQUAL_ncRNA_class].gbq = NULL;
7259 }
7260 }
7261 }
7262
7263 if (ajp->mode != DUMP_MODE) {
7264 ParseInference (qvp [FTQUAL_inference].gbq, &good_inference, &bad_inference);
7265 qvp [FTQUAL_inference_good].vnp = good_inference;
7266 qvp [FTQUAL_inference_bad].vnp = bad_inference;
7267 qvp [FTQUAL_inference].gbq = NULL;
7268 }
7269
7270 /* optionally suppress evidence, inference and experiment qualifiers */
7271
7272 if (ajp->hideEvidence) {
7273 qvp [FTQUAL_inference_good].vnp = NULL;
7274 qvp [FTQUAL_inference_bad].vnp = NULL;
7275 qvp [FTQUAL_inference].gbq = NULL;
7276 qvp [FTQUAL_experiment].gbq = NULL;
7277 qvp [FTQUAL_evidence].num = 0;
7278 qvp [FTQUAL_experiment_string].gbq = NULL;
7279 qvp [FTQUAL_inference_string].gbq = NULL;
7280 }
7281
7282 /* special handling for cyt_map, gen_map, rad_map */
7283
7284 if (ajp->flags.hideSpecificGeneMaps) {
7285 if (qvp [FTQUAL_gene_map].str == NULL) {
7286 gbq = qvp [FTQUAL_gene_cyt_map].gbq;
7287 if (gbq != NULL) {
7288 qvp [FTQUAL_gene_map].str = gbq->val;
7289 } else {
7290 gbq = qvp [FTQUAL_gene_gen_map].gbq;
7291 if (gbq != NULL) {
7292 qvp [FTQUAL_gene_map].str = gbq->val;
7293 } else {
7294 gbq = qvp [FTQUAL_gene_rad_map].gbq;
7295 if (gbq != NULL) {
7296 qvp [FTQUAL_gene_map].str = gbq->val;
7297 }
7298 }
7299 }
7300 }
7301 qvp [FTQUAL_gene_cyt_map].gbq = NULL;
7302 qvp [FTQUAL_gene_gen_map].gbq = NULL;
7303 qvp [FTQUAL_gene_rad_map].gbq = NULL;
7304 }
7305
7306 /* illegal qualifiers are copied and formatted in valnode chain */
7307
7308 if (! ajp->flags.dropIllegalQuals) {
7309 qvp [FTQUAL_illegal_qual].vnp = illegal;
7310 }
7311
7312 /* remove protein description that equals the gene name, case sensitive */
7313
7314 if (StringCmp (qvp [FTQUAL_gene].str, qvp [FTQUAL_prot_desc].str) == 0) {
7315 qvp [FTQUAL_prot_desc].str = NULL;
7316 }
7317
7318 /* remove protein description that equals the cds product, case sensitive */
7319
7320 if (StringCmp (qvp [FTQUAL_cds_product].str, qvp [FTQUAL_prot_desc].str) == 0) {
7321 qvp [FTQUAL_prot_desc].str = NULL;
7322 }
7323
7324 /* remove comment contained in prot_desc - gi|4530123|gb|AF071539.1|AF071539 */
7325
7326 if (StringStr (qvp [FTQUAL_prot_desc].str, qvp [FTQUAL_seqfeat_note].str) != NULL) {
7327 qvp [FTQUAL_seqfeat_note].str = NULL;
7328 }
7329
7330 /* remove protein description that equals the standard name */
7331
7332 if (qvp [FTQUAL_standard_name].gbq != NULL && qvp [FTQUAL_prot_desc].str != NULL) {
7333 gbq = qvp [FTQUAL_standard_name].gbq;
7334 lasttype = gbq->qual;
7335 while (gbq != NULL && StringICmp (gbq->qual, lasttype) == 0) {
7336 if (StringICmp (gbq->val, qvp [FTQUAL_prot_desc].str) == 0) {
7337 qvp [FTQUAL_prot_desc].str = NULL;
7338 }
7339 gbq = gbq->next;
7340 }
7341 }
7342
7343 /* remove protein description that equals a gene synonym - case insensitive AF109216.1 */
7344
7345 for (vnp = gene_syn; vnp != NULL; vnp = vnp->next) {
7346 str = (CharPtr) vnp->data.ptrvalue;
7347 if ((! StringHasNoText (str)) &&
7348 StringCmp (str, qvp [FTQUAL_prot_desc].str) == 0) {
7349 /* NC_001823 leave in prot_desc if no cds_product */
7350 if (qvp [FTQUAL_cds_product].str != NULL) {
7351 qvp [FTQUAL_prot_desc].str = NULL;
7352 }
7353 }
7354 }
7355
7356 /* remove comment that equals a gene synonym */
7357
7358 if (format != GENPEPT_FMT && (! ifp->mapToProt)) {
7359 for (vnp = gene_syn; vnp != NULL; vnp = vnp->next) {
7360 str = (CharPtr) vnp->data.ptrvalue;
7361 if ((! StringHasNoText (str)) &&
7362 StringICmp (str, qvp [FTQUAL_seqfeat_note].str) == 0) {
7363 qvp [FTQUAL_seqfeat_note].str = NULL;
7364 }
7365 }
7366 }
7367
7368 /* remove protein comment descriptor that equals the protein note */
7369
7370 if (StringCmp (qvp [FTQUAL_prot_note].str, qvp [FTQUAL_prot_comment].str) == 0) {
7371 qvp [FTQUAL_prot_comment].str = NULL;
7372 }
7373
7374 /* suppress cds comment if a subset of protein note - AF002218.1 */
7375
7376 if (StringStr (qvp [FTQUAL_prot_note].str, qvp [FTQUAL_seqfeat_note].str) != NULL) {
7377 qvp [FTQUAL_seqfeat_note].str = NULL;
7378 }
7379
7380 /* suppress selenocysteine note if already in comment */
7381
7382 if (StringStr (sfp->comment, "selenocysteine") != NULL) {
7383 qvp [FTQUAL_selenocysteine_note].str = NULL;
7384 }
7385
7386 /* suppress pyrrolysine note if already in comment */
7387
7388 if (StringStr (sfp->comment, "pyrrolysine") != NULL) {
7389 qvp [FTQUAL_pyrrolysine_note].str = NULL;
7390 }
7391
7392 /* if /allele inherited from gene, suppress allele gbqual on feature */
7393
7394 if (qvp [FTQUAL_gene_allele].str != NULL) {
7395 qvp [FTQUAL_allele].gbq = NULL;
7396 }
7397
7398 /* now print qualifiers from table */
7399
7400 #ifdef DISPLAY_STRINGS
7401 s_DisplayQVP(qvp, feat_note_order);
7402 #endif
7403
7404 /* Strip duplicate notes */
7405
7406 if ((StringCmp(qvp[FTQUAL_product].str,
7407 qvp[FTQUAL_seqfeat_note].str) == 0)) {
7408 qvp[FTQUAL_seqfeat_note].str = NULL;
7409 }
7410
7411 if ((qvp[FTQUAL_standard_name].gbq != NULL) &&
7412 (qvp[FTQUAL_standard_name].gbq->val != NULL)) {
7413 if ((StringCmp(qvp[FTQUAL_seqfeat_note].str,
7414 qvp[FTQUAL_standard_name].gbq->val) == 0)) {
7415 qvp[FTQUAL_seqfeat_note].str = NULL;
7416 }
7417 }
7418
7419 /* Display strings for debugging purposes */
7420
7421 #ifdef DISPLAY_STRINGS
7422 s_DisplayQVP(qvp, feat_qual_order);
7423 #endif
7424
7425 /*
7426 qvp[FTQUAL_loc_debug_str1].str = firstloc;
7427 qvp[FTQUAL_loc_debug_str2].str = secondloc;
7428 qvp[FTQUAL_loc_debug_str3].str = thirdloc;
7429 */
7430
7431 /* optionally populate indexes for NCBI internal database */
7432
7433 if (index != NULL) {
7434 if (! StringHasNoText (qvp [FTQUAL_gene].str)) {
7435 ValNodeCopyStrToHead (&(index->genes), 0, qvp [FTQUAL_gene].str);
7436 }
7437 }
7438
7439 if (doKey) {
7440 FFAddOneChar(ffstring, '\n', FALSE);
7441 }
7442
7443 /* Build the flat file */
7444 FormatFeatureBlockQuals (ffstring, ajp, asp, bsp, featdeftype, gene_syn,
7445 lasttype, location, prod,
7446 protein_pid_g, qvp,
7447 left, right, strand,
7448 sfp, target, ifp, is_other,
7449 is_journalscan, is_gps, is_ged);
7450
7451 /* ??? and then deal with the various note types separately (not in order table) ??? */
7452
7453 /* free aa-dna or dna-aa mapped location */
7454
7455 SeqLocFree (loc);
7456
7457 /*
7458 MemFree (firstloc);
7459 MemFree (secondloc);
7460 MemFree (thirdloc);
7461 */
7462
7463 ValNodeFreeData (illegal);
7464 MemFree (exception_string);
7465 MemFree (exception_note);
7466 ValNodeFreeData (good_inference);
7467 ValNodeFreeData (bad_inference);
7468
7469 BioseqUnlock (unlockme);
7470
7471 str = FFEndPrint (ajp, ffstring, format, 21, 21, 21, 21, "FT");
7472
7473 /* optionally populate gbseq for XML-ized GenBank format */
7474
7475 if (gbseq != NULL) {
7476 if (gbfeat != NULL) {
7477 AddFeatureToGbseq (gbseq, gbfeat, str, sfp);
7478 }
7479 }
7480
7481 FFRecycleString(ajp, ffstring);
7482 return str;
7483 }
7484
7485 NLM_EXTERN CharPtr FormatFeatureBlock (
7486 Asn2gbFormatPtr afp,
7487 BaseBlockPtr bbp
7488 )
7489
7490 {
7491 IntAsn2gbJobPtr ajp;
7492 Asn2gbSectPtr asp;
7493 BioseqPtr bsp;
7494 SeqMgrFeatContext fcontext;
7495 FmtType format;
7496 ValNodePtr head;
7497 QualValPtr qvp;
7498 SeqFeatPtr sfp;
7499 CharPtr str;
7500 BioseqPtr target;
7501
7502 if (afp == NULL || bbp == NULL) return NULL;
7503 asp = afp->asp;
7504 if (asp == NULL) return NULL;
7505 target = asp->target;
7506 bsp = asp->bsp;
7507 if (target == NULL || bsp == NULL) return NULL;
7508 ajp = afp->ajp;
7509 if (ajp == NULL) return NULL;
7510 qvp = afp->qvp;
7511 if (qvp == NULL) return NULL;
7512 format = afp->format;
7513
7514 /* all features in this list are known to be valid for the designated mode */
7515
7516 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
7517 if (sfp == NULL) return NULL;
7518
7519 /* five-column feature table uses special code for formatting */
7520
7521 if (ajp->format == FTABLE_FMT) {
7522 head = NULL;
7523 PrintFtableLocAndQuals (ajp, &head, target, sfp, &fcontext);
7524 str = MergeFFValNodeStrs (head);
7525 ValNodeFreeData (head);
7526 return str;
7527 }
7528
7529 /* otherwise do regular flatfile formatting */
7530
7531 return FormatFeatureBlockEx (ajp, asp, bsp, target, sfp, &fcontext, qvp,
7532 format, (IntFeatBlockPtr) bbp, ISA_aa (bsp->mol), TRUE);
7533 }
7534
7535 const CharPtr feature_table_header_format = ">Feature %s\n";
7536
7537 NLM_EXTERN CharPtr FormatFeatHeaderBlock (
7538 Asn2gbFormatPtr afp,
7539 BaseBlockPtr bbp
7540 )
7541
7542 {
7543 IntAsn2gbJobPtr ajp;
7544 Asn2gbSectPtr asp;
7545 BioseqPtr bsp;
7546 Char ch;
7547 Boolean has_space;
7548 Char id [128];
7549 ObjectIdPtr oip;
7550 CharPtr ptr;
7551 SeqIdPtr sip;
7552 SeqIdPtr sip2;
7553 CharPtr str = NULL;
7554 BioseqPtr target;
7555 CharPtr tmp = NULL;
7556
7557 if (afp == NULL || bbp == NULL) return NULL;
7558 ajp = afp->ajp;
7559 if (ajp == NULL) return NULL;
7560 asp = afp->asp;
7561 if (asp == NULL) return NULL;
7562 target = asp->target;
7563 bsp = asp->bsp;
7564 if (target == NULL || bsp == NULL) return NULL;
7565
7566 /* five-column feature table uses special code for formatting */
7567
7568 if (ajp->format == FTABLE_FMT) {
7569 sip = SeqIdFindBest (target->id, 0);
7570 if (sip == NULL) return NULL;
7571 id [0] = '\0';
7572
7573 if (sip->choice == SEQID_GI) {
7574 sip2 = GetSeqIdForGI (sip->data.intvalue);
7575 if (sip2 != NULL) {
7576 sip = sip2;
7577 }
7578 }
7579 if (sip->choice == SEQID_LOCAL) {
7580 oip = (ObjectIdPtr) sip->data.ptrvalue;
7581 if (oip != NULL && StringDoesHaveText (oip->str)) {
7582 has_space = FALSE;
7583 ptr = oip->str;
7584 ch = *ptr;
7585 while (ch != '\0') {
7586 if (IS_WHITESP (ch)) {
7587 has_space = TRUE;
7588 }
7589 ptr++;
7590 ch = *ptr;
7591 }
7592 if (has_space) {
7593 sprintf (id, "lcl|%c%s%c", (char) '"', oip->str, (char) '"');
7594 }
7595 }
7596 }
7597
7598 if (id [0] == '\0') {
7599 SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
7600 }
7601 if (! StringHasNoText (id)) {
7602 tmp = (CharPtr) MemNew ((StringLen(feature_table_header_format) + StringLen(id)) * sizeof(Char));
7603 sprintf (tmp, ">Feature %s\n", id);
7604 str = tmp;
7605 }
7606 return str;
7607 }
7608
7609 /* otherwise do regular flatfile formatting */
7610
7611 return StringSaveNoNull (bbp->string);
7612 }
7613
7614
7615 /* stand alone function to produce qualifiers in genbank style */
7616
7617 static void StripLeadingSpaces (
7618 CharPtr str
7619 )
7620
7621 {
7622 Uchar ch;
7623 CharPtr dst;
7624 CharPtr ptr;
7625
7626
7627 if (str == NULL || str [0] == '\0') return;
7628
7629 dst = str;
7630 ptr = str;
7631 ch = *ptr;
7632 while (ch != '\0') {
7633 while (ch == ' ') {
7634 ptr++;
7635 ch = *ptr;
7636 }
7637 while (ch != '\n' && ch != '\r') {
7638 *dst = ch;
7639 dst++;
7640 ptr++;
7641 ch = *ptr;
7642 }
7643 *dst = ch;
7644 dst++;
7645 ptr++;
7646 ch = *ptr;
7647 }
7648 *dst = '\0';
7649 }
7650
7651 NLM_EXTERN void DoImmediateRemoteFeatureFormat (
7652 Asn2gbFormatPtr afp,
7653 BaseBlockPtr bbp,
7654 SeqFeatPtr sfp
7655 )
7656
7657 {
7658 IntAsn2gbJobPtr ajp;
7659 Asn2gbSectPtr asp;
7660 BlockType blocktype;
7661 BioseqPtr bsp;
7662 SeqMgrFeatContext fcontext;
7663 size_t max;
7664 SeqEntryPtr oldscope;
7665 QualValPtr qvp = NULL;
7666 SeqEntryPtr sep;
7667 SeqLocPtr slp;
7668 CharPtr str = NULL;
7669 BioseqPtr target;
7670
7671 if (afp == NULL || bbp == NULL || sfp == NULL) return;
7672 asp = afp->asp;
7673 if (asp == NULL) return;
7674 target = asp->target;
7675 bsp = asp->bsp;
7676 if (target == NULL || bsp == NULL) return;
7677 ajp = afp->ajp;
7678 if (ajp == NULL) return;
7679
7680 blocktype = bbp->blocktype;
7681 if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return;
7682
7683 max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
7684 qvp = MemNew (sizeof (QualVal) * (max + 5));
7685 if (qvp == NULL) return;
7686
7687 MemSet ((Pointer) &fcontext, 0, sizeof (SeqMgrFeatContext));
7688 fcontext.itemID = 0;
7689 fcontext.featdeftype = sfp->idx.subtype;
7690 fcontext.seqfeattype = sfp->data.choice;
7691 slp = sfp->location;
7692 fcontext.left = GetOffsetInBioseq (slp, bsp, SEQLOC_LEFT_END);
7693 fcontext.right = GetOffsetInBioseq (slp, bsp, SEQLOC_RIGHT_END);
7694 fcontext.strand = SeqLocStrand (slp);
7695
7696 sep = GetTopSeqEntryForEntityID (bbp->entityID);
7697
7698 oldscope = SeqEntrySetScope (sep);
7699
7700 if (ajp->format != FTABLE_FMT) {
7701 str = FormatFeatureBlockEx (ajp, asp, bsp, target, sfp, &fcontext, qvp,
7702 ajp->format, (IntFeatBlockPtr) bbp, ISA_aa (bsp->mol), TRUE);
7703 }
7704
7705 SeqEntrySetScope (oldscope);
7706
7707 if (str != NULL) {
7708 if (afp->fp != NULL) {
7709 fprintf (afp->fp, "%s", str);
7710 }
7711 if (afp->ffwrite != NULL) {
7712 afp->ffwrite (str, afp->userdata, blocktype, sfp->idx.entityID, OBJ_SEQFEAT, sfp->idx.itemID);
7713 }
7714 } else {
7715 if (afp->fp != NULL) {
7716 fprintf (afp->fp, "?\n");
7717 }
7718 if (afp->ffwrite != NULL) {
7719 afp->ffwrite ("?\n", afp->userdata, blocktype, sfp->idx.entityID, OBJ_SEQFEAT, sfp->idx.itemID);
7720 }
7721 }
7722
7723 MemFree (str);
7724 MemFree (qvp
7725 );
7726 }
7727
7728 NLM_EXTERN CharPtr FormatFeatureQuals (
7729 SeqFeatPtr sfp
7730 )
7731
7732 {
7733 IntAsn2gbJob ajb;
7734 IntAsn2gbJobPtr ajp;
7735 BioseqPtr bsp;
7736 SeqMgrFeatContext fcontext;
7737 IntCdsBlock ifb;
7738 IntFeatBlockPtr ifp;
7739 size_t max;
7740 QualValPtr qvp;
7741 CharPtr str;
7742
7743 if (sfp == NULL) return NULL;
7744 bsp = BioseqFindFromSeqLoc (sfp->location);
7745 if (bsp == NULL) return NULL;
7746
7747 if (SeqMgrGetDesiredFeature (0, bsp, 0, 0, sfp, &fcontext) != sfp) return NULL;
7748
7749 MemSet ((Pointer) &ajb, 0, sizeof (IntAsn2gbJob));
7750 ajp = &ajb;
7751 MemSet ((Pointer) &ifb, 0, sizeof (IntCdsBlock));
7752 ifp = (IntFeatBlockPtr) &ifb;
7753
7754 max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
7755 qvp = MemNew (sizeof (QualVal) * (max + 5));
7756 if (qvp == NULL) return NULL;
7757
7758 str = FormatFeatureBlockEx (ajp, NULL, NULL, NULL, sfp, &fcontext, qvp,
7759 GENBANK_FMT, ifp, FALSE, FALSE);
7760
7761 MemFree (qvp);
7762 StripLeadingSpaces (str);
7763 return str;
7764 }
7765
7766 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |