|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/asn2ff3.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2ff3.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2ff3.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date: 7/15/95
31 *
32 *
33 * File Description:
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * $Log: asn2ff3.c,v $
38 * Revision 6.119 2006/07/13 17:06:38 bollin
39 * use Uint4 instead of Uint2 for itemID values
40 * removed unused variables
41 * resolved compiler warnings
42 *
43 * Revision 6.118 2003/07/22 16:18:27 kans
44 * added ZFIN as legal db_xref
45 *
46 * Revision 6.117 2003/06/10 18:44:10 kans
47 * added GeneDB to list of legal db_xrefs
48 *
49 * Revision 6.116 2003/05/29 20:25:19 kans
50 * added Interpro to list of legal dbxrefs
51 *
52 * Revision 6.115 2002/11/30 20:18:27 kans
53 * added GOA to list of legal db_xrefs
54 *
55 * Revision 6.114 2002/11/27 22:25:17 kans
56 * added AceView/WormGenes, NextDB, and WorfDB to legal db_xrefs
57 *
58 * Revision 6.113 2002/07/12 17:34:35 kans
59 * WormBase is now legal dbxref for all records, not just RefSeq
60 *
61 * Revision 6.112 2002/06/21 15:31:11 kans
62 * added GABI db_xref
63 *
64 * Revision 6.111 2002/06/18 20:59:59 kans
65 * added ISFinder as legal db_xref with hotlink
66 *
67 * Revision 6.110 2002/05/06 22:15:12 kans
68 * added IFO and JCM db_xrefs
69 *
70 * Revision 6.109 2002/02/27 13:47:11 kans
71 * fixed model evidence printing
72 *
73 * Revision 6.108 2002/02/20 21:59:04 tatiana
74 * IMGT/LIGM dbxref added
75 *
76 * Revision 6.107 2002/01/31 22:31:31 tatiana
77 * allow trascript_id in NC records
78 *
79 * Revision 6.106 2002/01/18 19:53:24 kans
80 * if RefSeq, allow WormBase dbxref
81 *
82 * Revision 6.105 2001/12/28 21:37:10 kans
83 * allow sfp->product to be SEQLOC_EQUIV
84 *
85 * Revision 6.104 2001/11/29 18:29:38 kans
86 * added FANTOM_DB to list of legal db_xrefs, incremented DBNUM
87 *
88 * Revision 6.103 2001/11/12 19:32:38 kans
89 * updated mRNAEvidenceComment
90 *
91 * Revision 6.102 2001/10/25 12:45:45 kans
92 * Get3LetterSymbol was using table->num instead of table_3aa->num
93 *
94 * Revision 6.101 2001/10/15 17:08:44 kans
95 * updated legal db_xref list to collaboration + RefSeq
96 *
97 * Revision 6.100 2001/10/15 13:57:22 kans
98 * added BDGP_INS and SoyBase as legal db_xrefs
99 *
100 * Revision 6.99 2001/10/02 17:39:50 yaschenk
101 * Removing memory leaks
102 *
103 * Revision 6.98 2001/09/06 20:31:24 yaschenk
104 * removing memory leak - seqid returned by GetSeqIdForGI() needs to be freed
105 *
106 * Revision 6.97 2001/09/05 23:37:42 tatiana
107 * ribosomal slippage added to /note
108 *
109 * Revision 6.96 2001/09/05 23:32:39 tatiana
110 * supressed comparison of note to gene->synonym
111 *
112 * Revision 6.95 2001/08/22 22:35:07 kans
113 * added ProductIsLocal for /translation
114 *
115 * Revision 6.94 2001/08/07 16:49:41 kans
116 * use NUM_SEQID, added third party annotation SeqIDs to one more place
117 *
118 * Revision 6.93 2001/08/03 20:36:16 kans
119 * implemented ASN2GNBK_PRINT_UNKNOWN_ORG test to suppress unwanted mode diffs for asn2gnbk QA
120 *
121 * Revision 6.92 2001/07/12 17:12:49 kans
122 * biop->genome range checks in AddBioSourceToGBQual to prevent crashes
123 *
124 * Revision 6.91 2001/07/08 21:18:50 kans
125 * if ssp->subtype is 0, use ? as tag in note
126 *
127 * Revision 6.90 2001/06/26 19:50:07 kans
128 * call AddPID with is_NC as an option for showing /protein_id with the gi
129 *
130 * Revision 6.89 2001/06/25 22:22:17 kans
131 * ProteinFromCdRegion and GetProductFromCDS only if sfp->product and ! ajp->genome_view, should eliminate unwanted fetches to get far delta components
132 *
133 * Revision 6.88 2001/05/31 17:42:18 kans
134 * NC and NG RefSeq records allow remote fetching for /protein_id and /transcript_id, show gi if fetching not enabled
135 *
136 * Revision 6.87 2001/03/17 00:51:30 tatiana
137 * GeneID added to dbxref array
138 *
139 * Revision 6.86 2001/02/13 23:31:58 kans
140 * allow trans splicing exception, do not change sfp_in->excpt
141 *
142 * Revision 6.85 2001/01/30 16:25:54 kans
143 * precursor_RNA now has /product as legal qualifier
144 *
145 * Revision 6.84 2001/01/26 19:26:36 kans
146 * added niaEST, increased DBNUM
147 *
148 * Revision 6.83 2001/01/26 19:21:45 kans
149 * extrachromosomal into source note, removed macronuclear, extrachrom, plasmid from organism line
150 *
151 * Revision 6.82 2001/01/18 23:57:01 kans
152 * add GO (gene ontology) to list of legal dbxrefs
153 *
154 * Revision 6.81 2001/01/02 19:56:48 kans
155 * Get3LetterSymbol protects against empty string
156 *
157 * Revision 6.80 2000/12/07 19:03:53 tatiana
158 * transcript_id shows for NT only
159 *
160 * Revision 6.79 2000/12/06 22:00:46 tatiana
161 * ifdef removed
162 *
163 * Revision 6.78 2000/12/06 20:56:24 tatiana
164 * AceView link added
165 *
166 * Revision 6.76 2000/12/04 22:23:47 tatiana
167 * contig comments added
168 *
169 * Revision 6.75 2000/11/22 16:48:18 tatiana
170 * remove debugging error printing
171 *
172 * Revision 6.74 2000/11/10 00:37:13 tatiana
173 * changes in AddPID
174 *
175 * Revision 6.73 2000/10/25 15:57:57 kans
176 * sfp_in->excpt set to FALSE, not NULL, UNIX compiler does not know the difference, but Mac and PC compilers do
177 *
178 * Revision 6.72 2000/10/24 20:35:35 tatiana
179 * CDS without protein sequence is accepted for not forgbrel mode
180 *
181 * Revision 6.70 2000/10/19 18:52:32 kans
182 * added another NULL entry to organelleQual for endogenous virus to suppress as organelle qualifier
183 *
184 * Revision 6.69 2000/10/16 19:10:17 kans
185 * added UniSTS and InterimID to legal dbxrefs
186 *
187 * Revision 6.68 2000/10/10 15:06:02 kans
188 * added SUBSRC_endogenous_virus_name
189 *
190 * Revision 6.67 2000/08/28 22:17:18 kans
191 * added CDD to list of legal dbxrefs
192 *
193 * Revision 6.66 2000/07/14 20:24:26 kans
194 * added RGD as dbxref with web link
195 *
196 * Revision 6.65 2000/07/12 22:45:15 kans
197 * added ORGMOD_old_lineage
198 *
199 * Revision 6.64 2000/06/20 17:31:34 kans
200 * added authority through breed as orgmod.subtypes
201 *
202 * Revision 6.63 2000/06/15 16:45:40 kans
203 * added segment to biosource note print
204 *
205 * Revision 6.62 2000/06/05 17:52:11 tatiana
206 * increase size of feature arrays to Int4
207 *
208 * Revision 6.61 2000/05/15 15:52:50 bazhin
209 * Removed memory leak in "PrintSourceFeat()".
210 *
211 * Revision 6.60 2000/03/30 20:37:29 kans
212 * added tilde to newline code in PrintImpFeatEx (thanks to Sergei B)
213 *
214 * Revision 6.59 2000/03/01 19:09:53 tatiana
215 * for SYN records with multiple source features there is no subtraction
216 *
217 * Revision 6.58 2000/02/17 21:59:18 kans
218 * /organelle not under ajp->forgbrel for this release now
219 *
220 * Revision 6.57 2000/02/15 22:53:56 kans
221 * added dbSNP and RATMAP as legal dbxrefs, put /organelle under ajp->forgrel control
222 *
223 * Revision 6.56 2000/02/09 01:12:51 tatiana
224 * remove space in organelle qualifier
225 *
226 * Revision 6.55 2000/01/21 20:48:45 kans
227 * changes to merge several source qualifiers under new organelle qualifier
228 *
229 * Revision 6.54 2000/01/11 22:49:37 tatiana
230 * protein accession is not required in DUMP_MODE
231 *
232 * Revision 6.53 2000/01/03 23:16:17 kans
233 * CDS note components from GetProtRefComment are separated by semicolons - to be consistent with upcoming asn2gnbk style
234 *
235 * Revision 6.52 1999/10/18 20:13:34 kans
236 * removed erroneous cast in sprintf
237 *
238 * Revision 6.51 1999/10/06 22:18:29 kans
239 * calls ComposeCodonsRecognizedString
240 *
241 * Revision 6.50 1999/10/06 20:23:48 bazhin
242 * Removed memory leaks.
243 *
244 * Revision 6.49 1999/08/03 20:48:23 tatiana
245 * UMR error fixed in PrintImpFeat
246 *
247 * Revision 6.47 1999/04/26 18:53:00 tatiana
248 * added pseuod from sfp in ConvertToNAImpFeat()
249 *
250 * Revision 6.46 1999/04/06 22:37:45 tatiana
251 * protein_id hot link added
252 *
253 * Revision 6.45 1999/04/06 15:00:16 tatiana
254 * www_featkey is not called for slp view
255 *
256 * Revision 6.44 1999/03/30 22:23:33 kans
257 * pseudo can be on grp or sfp
258 *
259 * Revision 6.43 1999/03/30 19:18:19 tatiana
260 * changes for SEQID_OTHER
261 *
262 * Revision 6.42 1999/03/22 23:09:26 tatiana
263 * AddPID() changes
264 *
265 * Revision 6.41 1998/10/19 15:57:35 tatiana
266 * UniGene added to dbtag array
267 *
268 * Revision 6.40 1998/09/24 17:45:57 kans
269 * fixed GetDBXrefFromGene problem (TT)
270 *
271 * Revision 6.39 1998/09/01 19:25:21 kans
272 * context parameter in get best protein, get cds/rna given product
273 *
274 * Revision 6.38 1998/08/19 18:40:38 tatiana
275 * RiceGenes added to dbtag array
276 *
277 * Revision 6.37 1998/07/21 15:14:50 kans
278 * GetProtRefComments modified for indexes because continue statement avoided get next feature, got stuck
279 *
280 * Revision 6.36 1998/07/15 22:07:19 kans
281 * implemented sequence manager indexes for non-segmented nucleotides
282 *
283 * Revision 6.35 1998/07/13 14:52:24 tatiana
284 * subtypes added to source feature /note
285 *
286 * Revision 6.34 1998/06/15 14:57:22 tatiana
287 * UNIX compiler warnings and extra HTML characters in notes fixed
288 *
289 * Revision 6.33 1998/05/20 20:05:40 tatiana
290 * SEQFEAT_REGION added to get_prot_feats()
291 *
292 * Revision 6.32 1998/05/18 14:41:53 tatiana
293 * GI added to dbtag array
294 *
295 * Revision 6.31 1998/05/08 21:56:56 tatiana
296 * added new PARTIAL_MODE
297 *
298 * Revision 6.30 1998/04/30 21:42:36 tatiana
299 * *** empty log message ***
300 *
301 * Revision 6.29 1998/04/27 18:31:51 tatiana
302 * added /focus in PrintSourceFeat()
303 *
304 * Revision 6.28 1998/04/24 15:10:08 tatiana
305 * GetProtRefComment() fixed: only main Prot-Ref feature adds comment to CDS
306 *
307 * Revision 6.27 1998/04/15 21:38:32 kans
308 * rearrange dbtag array so PID set is at start, allow unknown database on all but release_mode (Tatiana)
309 *
310 * Revision 6.24 1998/04/06 14:59:08 tatiana
311 * PutTranslationLast has been moved
312 *
313 * Revision 6.23 1998/04/03 22:38:36 tatiana
314 * selenocysteine added tp /note in ComposeCodeBreakQuals()
315 *
316 * Revision 6.22 1998/04/02 21:42:53 tatiana
317 * ignore old_name in OrgMod
318 *
319 * Revision 6.21 1998/04/02 17:21:23 tatiana
320 * a bug fixed in AddBioSourceToGBQual()
321 *
322 * Revision 6.20 1998/03/30 20:38:56 tatiana
323 * nat_host changed to specific_host
324 *
325 * Revision 6.19 1998/03/27 23:01:54 tatiana
326 * AddBioSourceToGBQual: added all OrgMod.subtypes as /notes on the source feature
327 *
328 * Revision 6.18 1998/03/24 19:47:45 tatiana
329 * added check for sfp->except_text
330 *
331 * Revision 6.17 1998/03/04 18:38:48 tatiana
332 * illegal feature will be dropped in ConvertToAAImpFeat
333 *
334 * Revision 6.16 1998/02/19 21:28:52 tatiana
335 * dbtags array updated
336 *
337 * Revision 6.15 1998/01/26 21:16:16 tatiana
338 * biovar and country added to source feature /note
339 *
340 * Revision 6.14 1998/01/20 22:45:11 tatiana
341 * show both product and descr in Genpept
342 *
343 * Revision 6.13 1998/01/13 16:27:38 tatiana
344 * fixed a bug in dbtag check in PrintSourceFeat
345 *
346 * Revision 6.12 1997/12/23 21:57:16 tatiana
347 * focus and specimen_voucher
348 *
349 * Revision 6.11 1997/12/15 15:48:33 tatiana
350 * features processing has been changed
351 *
352 * Revision 6.10 1997/12/02 18:15:02 tatiana
353 * fix use of printf
354 *
355 * Revision 6.9 1997/10/23 16:57:42 tatiana
356 * *** empty log message ***
357 *
358 * Revision 6.6 1997/09/16 15:48:07 kans
359 * removed automatically generated diff lines
360 *
361 * Revision 6.5 1997/09/16 15:42:52 kans
362 * show non-gbff source qualifiers in note with labels (TT)
363 *
364 * Revision 6.4 1997/09/12 20:20:18 tatiana
365 * fixed typo
366 *
367 * Revision 6.3 1997/09/12 20:03:53 tatiana
368 * added source feature in genome_view
369 *
370 * Revision 6.2 1997/09/04 01:16:48 kans
371 * fixed typo
372 *
373 * Revision 6.1 1997/09/03 21:49:37 tatiana
374 * GatherItemWithLock() added for ProtRef features
375 *
376 * Revision 6.0 1997/08/25 18:04:51 madden
377 * Revision changed to 6.0
378 *
379 * Revision 5.59 1997/08/21 19:03:17 tatiana
380 * map, syn, description eliminated from features other than gene
381 *
382 * Revision 5.58 1997/08/05 20:09:08 kans
383 * added check for po->sfp null in PrintSourceFeat
384 *
385 * Revision 5.57 1997/07/29 14:55:51 kans
386 * make sure features on protein are SEQFEAT_PROT
387 *
388 * Revision 5.56 1997/07/16 21:08:28 tatiana
389 * Use gene synonym for /gene qualifier
390 *
391 * Revision 5.55 1997/06/19 18:37:02 vakatov
392 * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
393 *
394 * Revision 5.54 1997/06/12 16:56:37 kans
395 * fixed typo that resulted in lost note (TT)
396 *
397 * Revision 5.53 1997/06/10 15:27:12 tatiana
398 * fix a typo in COnvertToNa... that leaded to the lost /note
399 *
400 * Revision 5.47 1997/03/14 21:21:33 tatiana
401 * exp_evidence fix
402 *
403 * Revision 5.46 1997/03/05 22:12:33 tatiana
404 * print 'pseudo' in /note for orphan genes
405 *
406 * Revision 5.45 1997/03/04 23:45:14 tatiana
407 * check for 'pseudo' gene added in ConvertToNAImpFeat()
408 *
409 * Revision 5.44 1997/02/25 23:47:21 tatiana
410 * new error message added for dropped feature
411 *
412 * Revision 5.42 1997/01/29 15:49:11 tatiana
413 * fix the entityID in GatherProductGeneInfo()
414 *
415 * Revision 5.40 1997/01/15 17:23:38 tatiana
416 * a bug fixed (purify reported) in PrintNAFeatByNumber()
417 *
418 * Revision 5.39 1997/01/07 23:27:13 tatiana
419 * check for NULLs added in CompareTranslation
420 *
421 * Revision 5.38 1997/01/07 22:32:41 tatiana
422 * added SEQFEAT_SITE to get_prot_feats callback
423 *
424 * Revision 5.37 1997/01/02 22:49:55 tatiana
425 * gather SEQFEAT_BOND
426 *
427 * Revision 5.36 1996/12/10 17:45:41 tatiana
428 * a bug fixed in ComposeNoteFromNoteStruct()
429 *
430 * Revision 5.35 1996/12/09 19:12:33 tatiana
431 * SPTREMBL added to legal db_xref database names
432 *
433 * Revision 5.34 1996/12/04 16:52:16 tatiana
434 * a typo fixed in Add_dbxref
435 *
436 * Revision 5.33 1996/12/03 15:49:57 tatiana
437 * 'CK' added to array of legal databases in db_xref
438 *
439 * Revision 5.32 1996/10/30 16:52:36 tatiana
440 * SeqIdFindBest added in PrintSourceFeat
441 *
442 * Revision 5.31 1996/10/25 22:11:19 tatiana
443 * NoteCmp changed
444 *
445 * Revision 5.30 1996/10/24 20:40:12 tatiana
446 * a bug fixed in AddDBXref()
447 *
448 * Revision 5.29 1996/10/18 21:37:22 tatiana
449 * a bug fixed in NoteCmp
450 *
451 * Revision 5.28 1996/10/09 15:15:00 tatiana
452 * Take the main protein ONLY (not sig_peptide mat_peptide)
453 * to make CDS comments
454 *
455 * Revision 5.27 1996/09/25 18:05:45 tatiana
456 * SEQFEAT_COMMENT becomes misc_feature
457 *
458 * Revision 5.26 1996/09/18 20:41:26 kans
459 * changed uninitialized variable names to correct names, removed unused
460 * variable
461 *
462 * Revision 5.25 1996/09/18 20:21:27 tatiana
463 * NoteCmp added to ComposeNoteFromNoteStruct to check for identical notes
464 *
465 * Revision 5.24 1996/09/17 14:59:04 tatiana
466 * virion and transl_except added
467 *
468 * Revision 5.23 1996/09/12 17:52:28 tatiana
469 * a bug fixed in PrintSourceFeat
470 *
471 * Revision 5.22 1996/09/06 14:58:00 tatiana
472 * clean sfp_out at the end of PrintSourceFeat and PrintNAFeatByNumber
473 *
474 * Revision 5.21 1996/09/04 13:40:17 tatiana
475 * a bug fixed in GetDotTRNA
476 *
477 * Revision 5.19 1996/09/03 19:51:30 tatiana
478 * extra_loc added
479 *
480 * Revision 5.18 1996/08/16 20:32:23 tatiana
481 * for ifp->key StringSave is used not StringCpy
482 *
483 * Revision 5.17 1996/08/12 16:36:40 tatiana
484 * ErrPostEx changed to ErrPostStr
485 *
486 * Revision 5.16 1996/08/06 20:30:46 kans
487 * SeqIdFindBest called to handle local IDs and genbank IDs coexisting
488 *
489 * Revision 5.15 1996/08/02 21:41:23 tatiana
490 * turned off metho conceptual transl by author
491 *
492 * Revision 5.14 1996/07/30 17:28:07 kans
493 * ParFlat_... arrays now external in header file
494 *
495 * Revision 5.13 1996/07/30 16:34:09 tatiana
496 * minor change in PrintSourcefeat
497 *
498 * Revision 5.12 1996/07/29 19:46:14 tatiana
499 * GBQual_names changed to use a structureGBQual_names changed to use a structure
500 *
501 * Revision 5.11 1996/07/23 22:33:40 tatiana
502 * prot feats in genpept (piptides)
503 *
504 * Revision 5.10 1996/07/22 22:07:21 tatiana
505 * a bug fixed in DoTRNAQual
506 *
507 * Revision 5.9 1996/07/15 18:07:10 tatiana
508 * minor changes in PrintSourceFeat to show 'unknown' in debug mode
509 *
510 * Revision 5.8 1996/07/12 20:38:22 tatiana
511 * concept_transl_a supressed
512 *
513 * Revision 5.7 1996/07/12 20:11:49 tatiana
514 * DotRNAQuals() changed
515 *
516 * Revision 5.6 1996/07/11 14:58:27 tatiana
517 * product in tRNA
518 *
519 * Revision 5.5 1996/07/09 16:31:34 tatiana
520 * a bug fixed in PrintNAFeatByNumber
521 *
522 * Revision 5.4 1996/07/02 18:09:17 tatiana
523 * don't print duplicated features (PrintNAFeatByNumber)
524 *
525 * Revision 5.3 1996/06/14 18:03:56 tatiana
526 * GetNAFeatKey change
527 *
528 * Revision 5.2 1996/06/11 15:35:04 tatiana
529 * make GetGeneticCode static and get_prot_feats non-static
530 *
531 * Revision 5.1 1996/05/31 18:01:24 tatiana
532 * check for /pseudo in CdRegion added
533 *
534 * Revision 4.35 1996/05/21 21:02:03 tatiana
535 * a bug fixed in location[] size in PrintSourceFeat()
536 *
537 * Revision 4.34 1996/05/16 20:58:09 tatiana
538 * GetCdregionGeneXrefInfo changed to Boolean
539 *
540 * Revision 4.33 1996/04/25 14:55:33 kans
541 * protect against biosource subsource subtype of 255 (other) or bad values
542 *
543 * Revision 4.32 1996/04/15 14:36:49 tatiana
544 * memory leaks cleaning
545 *
546 * Revision 4.31 1996/04/08 21:53:56 tatiana
547 * change in www_featloc
548 *
549 * Revision 4.30 1996/04/05 17:43:36 ostell
550 * added quickie patch for overrun of buf[30] when called by
551 * www_featloc()
552 *
553 * Revision 4.29 1996/03/25 15:20:19 tatiana
554 * add html symbols
555 *
556 * Revision 4.28 1996/03/19 23:58:27 tatiana
557 * print activity in CDS
558 *
559 * Revision 4.27 1996/03/12 21:36:32 tatiana
560 * 'serotype' added to orgmod_subtype array
561 *
562 * Revision 4.26 1996/02/28 04:53:06 ostell
563 * changes to support segmented master seeuquences
564 *
565 * Revision 4.25 1996/02/26 00:46:18 ostell
566 * removed unused local variables and integer size mismatch fusses
567 *
568 * Revision 4.24 1996/02/18 21:16:48 tatiana
569 * memory leaks cleaned up
570 *
571 * Revision 4.23 1996/02/16 16:22:32 tatiana
572 * a bug fixed in ConvertToNAImpFeat
573 *
574 * Revision 4.22 1996/02/15 15:52:18 tatiana
575 * Gather for temp loaded items and sortin features within entity addded
576 *
577 * Revision 4.21 1996/01/29 22:34:42 tatiana
578 * mainly PID changes
579 *
580 * Revision 4.20 1995/12/20 22:38:02 tatiana
581 * gene xrefs to db_xref
582 *
583 * Revision 4.19 1995/12/15 02:47:01 ostell
584 * added protection so that GatherProductGeneInfo does not gather if protein
585 * bioseq not already in memory
586 *
587 * Revision 4.18 1995/12/13 16:31:36 tatiana
588 * anticodon added to new tRNA slot
589 *
590 * Revision 4.17 1995/12/04 23:01:16 tatiana
591 * take starin from OrgRef.mod in PrintSourceFeat()
592 *
593 * Revision 4.16 1995/11/28 15:19:46 tatiana
594 * GetPID fixed
595 *
596 * Revision 4.15 1995/11/22 18:59:42 tatiana
597 * a bug fixed in orphan genes printing
598 *
599 * Revision 4.14 1995/11/17 21:49:19 tatiana
600 * hot link to genetic code added
601 *
602 * Revision 4.13 1995/11/17 21:28:35 kans
603 * asn2ff now uses gather (Tatiana)
604 *
605 * Revision 4.4 1995/08/18 22:18:31 tatiana
606 * a bug fix
607 *
608 * Revision 4.1 1995/08/01 14:51:29 tatiana
609 * change SeqIdPrint to SeqIdWrite
610 *
611 * Revision 1.65 1995/07/17 19:33:20 kans
612 * parameters combined into Asn2ffJobPtr structure
613 *
614 * Revision 1.61 1995/06/19 21:40:02 kans
615 * Tatiana's first major reorganization, moving printing, adding HTML
616 *
617 * Revision 1.60 1995/05/19 21:25:06 kans
618 * no longer fetches CDS protein product causing Entrez disc swap
619 *
620 * Revision 1.59 1995/05/15 21:46:05 ostell
621 * added Log line
622 *
623 *
624 **************************************/
625 #include <asn2ffp.h>
626 #include <a2ferrdf.h>
627 #include <a2ferr.h>
628 #include <utilpub.h>
629 #include <ffprint.h>
630 #include <parsegb.h>
631 #include <sequtil.h>
632 #include <edutil.h>
633 #include <gather.h>
634 #include <explore.h>
635 #include <sqnutils.h>
636
637 #define METHOD_concept_transl_a 6
638
639 NLM_EXTERN CharPtr mRNAEvidenceComment PROTO ((UserObjectPtr obj, Boolean add));
640 NLM_EXTERN Int2 ConvertToNAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
641 NLM_EXTERN Int2 ConvertToAAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
642 NLM_EXTERN Int2 ValidateAAImpFeat PROTO ((SeqFeatPtr sfp, Boolean use_product));
643 NLM_EXTERN Int2 ValidateNAImpFeat PROTO ((SeqFeatPtr sfp));
644 NLM_EXTERN void AddProteinQuals PROTO ((SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp));
645 static void GetGeneticCode PROTO ((CharPtr ptr, SeqFeatPtr sfp));
646 NLM_EXTERN void ComposeGBQuals PROTO((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo));
647 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct PROTO ((NoteStructPtr nsp, GeneStructPtr gsp));
648 NLM_EXTERN void AddPID PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG));
649 NLM_EXTERN void Add_trid PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out));
650 NLM_EXTERN Int2 MakeGBSelectNote PROTO ((CharPtr ptr, SeqFeatPtr sfp));
651 static void GetProtRefComment PROTO ((SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method));
652 NLM_EXTERN Int2 MiscFeatOrphanGenes PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp, Int2 index));
653 Int2 CheckForQual PROTO ((GBQualPtr gbqual, CharPtr string_q, CharPtr string_v));
654 NLM_EXTERN GBQualPtr AddModifsToGBQual PROTO ((GBEntryPtr gbp, GBQualPtr gbqual));
655 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual PROTO ((OrgRefPtr orp, GBQualPtr gbqual));
656 NLM_EXTERN Int2 CheckForEqualSign PROTO ((CharPtr qual));
657 NLM_EXTERN CharPtr GetProductFromCDS PROTO ((ValNodePtr product, ValNodePtr location, Int4 length));
658 NLM_EXTERN void PrepareSourceFeatQuals PROTO ((SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif));
659 static Int2 CheckForExtraChars PROTO ((CharPtr note));
660 NLM_EXTERN GBQualPtr AddBioSourceToGBQual PROTO((Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release));
661 NLM_EXTERN Boolean delete_qual PROTO((GBQualPtr PNTR qlist, CharPtr qual));
662
663 typedef struct {
664 CharPtr name;
665 Uint1 num;
666 } ORGMOD;
667
668 #define num_subtype 25
669 CharPtr subtype[num_subtype] = {
670 "chromosome", "map", "clone", "sub_clone", "haplotype", "genotype", "sex",
671 "cell_line", "cell_type", "tissue_type", "clone_lib", "dev_stage",
672 "frequency", "germline", "rearranged", "lab_host", "pop_variant",
673 "tissue_lib", "plasmid", "transposon", "insertion_seq", "plastid", "country",
674 "segment", "endogenous_virus"};
675
676 #define num_genome 15
677 static CharPtr genome[num_genome] = {"unknown", "genomic", "chloroplast", "chromoplast", "kinetoplast", "mitochondrion", "plastid", "macronuclear",
678 "extrachrom", "plasmid", "transposon", "insertion_seq", "cyanelle", "proviral", "virion"};
679
680 /*______________________________________________________________________
681 **
682 ** This code is not currently used.
683 ** I do not remove this piece of code, just comment it out.
684 ** -- Dmitri Lukyanov
685 */
686 #if 0
687
688 #define num_biomol 7
689 static CharPtr biomol[num_biomol] = {"genomic", "RNA", "mRNA", "rRNA",
690 "tRNA", "snRNA", "scRNA"};
691
692 #endif
693 /*______________________________________________________________________
694 */
695
696 ORGMOD orgmod_subtype[34] = {
697 { "strain", 2 }, {"sub_strain", 3}, {"type", 4}, {"subtype", 5},
698 {"variety", 6}, {"serotype",7}, {"serogroup",8}, {"serovar", 9},
699 {"cultivar", 10}, {"pathovar", 11}, {"chemovar", 12}, {"biovar", 13},
700 {"biotype", 14}, {"group", 15}, {"subgroup", 16}, {"isolate", 17},
701 {"common", 18}, {"acronym", 19}, {"dosage", 20}, {"nat_host", 21},
702 {"sub_species", 22}, {"specimen_voucher", 23}, {"authority", 24},
703 {"forma", 25}, {"forma_specialis", 26}, {"ecotype", 27},
704 {"synonym", 28}, {"anamorph", 29}, {"teleomorph", 30}, {"breed", 31},
705 {"old_lineage", 253}, {"old_name", 254}, {"note", 255}, { NULL, 0 }
706 };
707
708 /*
709 CharPtr dbtag[DBNUM] = {
710 "PIDe", "PIDd", "PIDg", "PID", "FLYBASE",
711 "GDB", "MIM", "SGD", "SWISS-PROT", "CK",
712 "SPTREMBL", "ATCC", "ATCC (inhost)", "ATCC (dna)", "taxon",
713 "BDGP_EST", "dbEST", "dbSTS", "MGD", "PIR",
714 "GI", "RiceGenes", "UniGene", "LocusID", "dbSNP",
715 "RATMAP", "RGD", "CDD", "UniSTS", "InterimID", "COG", "GO", "niaEST",
716 "GeneID", "BDGP_INS", "SoyBase",
717 };
718 */
719
720 CharPtr dbtag[DBNUM] = {
721 "PIDe", "PIDd", "PIDg", "PID",
722 "AceView/WormGenes",
723 "ATCC",
724 "ATCC(in host)",
725 "ATCC(dna)",
726 "BDGP_EST",
727 "BDGP_INS",
728 "CDD",
729 "CK",
730 "COG",
731 "dbEST",
732 "dbSNP",
733 "dbSTS",
734 "ENSEMBL",
735 "ESTLIB",
736 "FANTOM_DB",
737 "FLYBASE",
738 "GABI",
739 "GDB",
740 "GeneDB",
741 "GeneID",
742 "GI",
743 "GO",
744 "GOA",
745 "IFO",
746 "IMGT/LIGM",
747 "IMGT/HLA",
748 "InterimID",
749 "Interpro",
750 "ISFinder",
751 "JCM",
752 "LocusID",
753 "MaizeDB",
754 "MGD",
755 "MGI",
756 "MIM",
757 "NextDB",
758 "niaEST",
759 "PIR",
760 "PSEUDO",
761 "RATMAP",
762 "RiceGenes",
763 "REMTREMBL",
764 "RGD",
765 "RZPD",
766 "SGD",
767 "SoyBase",
768 "SPTREMBL",
769 "SWISS-PROT",
770 "taxon",
771 "UniGene",
772 "UniSTS",
773 "WorfDB",
774 "WormBase",
775 "ZFIN",
776 };
777
778
779 /*************************************************************************
780 * sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
781 * This function puts the dbxref qualifier on every SeqFeatPtr.
782 *************************************************************************/
783 static Boolean IsRefSeq (BioseqPtr bsp)
784 {
785 SeqIdPtr sip;
786
787 if (bsp == NULL)
788 return FALSE;
789 for (sip = bsp->id; sip != NULL; sip = sip->next) {
790 if (sip->choice == SEQID_OTHER)
791 return TRUE;
792 }
793 return FALSE;
794 }
795
796 static void Add_dbxref (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, SeqFeatPtr sfp, BioseqPtr bsp)
797 {
798 Int4 id = -1;
799 Int2 i;
800 ValNodePtr vnp;
801 DbtagPtr db = NULL;
802 CharPtr val;
803
804 if (sfp == NULL || sfp->dbxref == NULL) {
805 return;
806 }
807 for (vnp=sfp->dbxref; vnp; vnp=vnp->next) {
808 id = -1;
809 db = vnp->data.ptrvalue;
810 if (db && db->db) {
811 for (i =0; i < DBNUM; i++) {
812 if (StringCmp(db->db, dbtag[i]) == 0) {
813 id = i;
814 break;
815 }
816 }
817 if (id == -1 && StringCmp (db->db, "WormBase") == 0 && IsRefSeq (bsp)) {
818 id = 18; /* show it even if not RefSeq record */
819 }
820 if (ajp->mode == RELEASE_MODE && id == -1) {
821 continue; /* drop unknown dbtag */
822 }
823 }
824 if (sfp->data.choice == SEQFEAT_CDREGION) {
825 /*
826 if (sfp->product != NULL && id > 4) {
827 continue;
828 }
829 */
830 } else {
831 if (id == -1 && ajp->mode != RELEASE_MODE) {
832 } else
833 if (id < 4) {
834 continue; /* PID is illegal on non-CDS features */
835 }
836 }
837 if (db == NULL) {
838 return;
839 }
840 if (db->tag && db->tag->str) {
841 val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
842 sprintf(val, "%s:%s", db->db, db->tag->str);
843 } else if (db->tag) {
844 val = MemNew(StringLen(db->db)+16);
845 if (StringNCmp(db->db, "PIDe", 4) == 0) {
846 sprintf(val, "PID:e%ld", (long) db->tag->id);
847 } else if (StringNCmp(db->db, "PIDd", 4) == 0) {
848 sprintf(val, "PID:d%ld", (long) db->tag->id);
849 } else if (StringNCmp(db->db, "PIDg", 4) == 0) {
850 sprintf(val, "PID:g%ld", (long) db->tag->id);
851 } else {
852 sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
853 }
854 }
855 if (val[0] != '\0') {
856 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
857 MemFree(val);
858 }
859 }
860 return;
861 } /* Add_dbxref */
862
863 static Boolean CheckSeqIdChoice(SeqIdPtr sip)
864 {
865 Uint1 ch;
866 SeqIdPtr si;
867
868 for (si = sip; si; si=si->next) {
869 ch = si->choice;
870 if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
871 ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
872 return TRUE;
873 }
874 }
875 return FALSE;
876 }
877
878 static SeqIdPtr GetSeqIdChoice(SeqIdPtr sip)
879 {
880 Uint1 ch;
881 SeqIdPtr si;
882
883 for (si = sip; si; si=si->next) {
884 ch = si->choice;
885 if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
886 ch == SEQID_OTHER || ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
887 return si;
888 }
889 }
890 return NULL;
891 }
892
893 static Boolean CheckSeqIdAccVer(SeqIdPtr sip)
894 {
895 Uint1 ch;
896 SeqIdPtr si;
897 TextSeqIdPtr tsip;
898
899 for (si = sip; si; si=si->next) {
900 ch = si->choice;
901 if (ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ || ch == SEQID_OTHER ||
902 ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
903 tsip = si->data.ptrvalue;
904 if (tsip->accession != NULL && tsip->version >= 1) {
905 return TRUE;
906 }
907 }
908 }
909 return FALSE;
910 }
911
912 static void GetNonGeneQuals (Int2 mode, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp)
913 {
914 GBQualPtr gbqp;
915 Boolean evidence_present;
916 Int2 i;
917
918 for (gbqp=sfp_in->qual; gbqp; gbqp=gbqp->next) {
919 if (StringCmp(gbqp->qual, "gene") == 0) {
920 ;
921 } else if (StringCmp(gbqp->qual, "product") == 0) {
922 ;
923 } else if (StringCmp(gbqp->qual, "standard_name") == 0) {
924 ;
925 } else if (StringCmp(gbqp->qual, "map") == 0) {
926 ;
927 } else if (StringCmp(gbqp->qual, "EC_number") == 0) {
928 ;
929 } else if (StringCmp(gbqp->qual, "anticodon") == 0) {
930 ; /* This is done by DotRNAQuals */
931 } else if (StringCmp(gbqp->qual, "note") == 0) {
932 CpNoteToCharPtrStack(nsp, NULL, gbqp->val);
933 } else if (StringCmp(gbqp->qual, "transl_table") == 0) {
934 sfp_out->qual =
935 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
936 /* This is captured by GetGeneticCode */
937 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
938 for (i =0; i < DBNUM; i++) {
939 if (StringNCmp(gbqp->val, dbtag[i], StringLen(dbtag[i])) == 0) {
940 break;
941 }
942 }
943 if (mode == RELEASE_MODE && i == DBNUM) {
944 continue; /* drop unknown dbtag */
945 }
946 sfp_out->qual =
947 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
948 } else {
949 sfp_out->qual =
950 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
951 }
952 }
953
954 evidence_present = GBQualPresent("evidence", sfp_out->qual);
955 if (sfp_out->exp_ev) {
956 if (evidence_present == FALSE) {
957 if (sfp_out->exp_ev == 1)
958 sfp_out->qual =
959 AddGBQual(sfp_out->qual, "evidence", "experimental");
960 if (sfp_out->exp_ev == 2)
961 sfp_out->qual =
962 AddGBQual(sfp_out->qual, "evidence", "not_experimental");
963 } else {
964 for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
965 if (StringCmp(gbqp->qual, "evidence") == 0) {
966 gbqp->val = MemFree(gbqp->val);
967 if (sfp_out->exp_ev == 1)
968 gbqp->val = StringSave("experimental");
969 if (sfp_out->exp_ev == 2)
970 gbqp->val = StringSave("not_experimental");
971 break;
972 }
973 }
974 } else if (evidence_present == TRUE) {
975 for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
976 if (StringCmp(gbqp->qual, "evidence") == 0) {
977 if (StringCmp(gbqp->val, "EXPERIMENTAL") == 0) {
978 StringCpy(gbqp->val, "experimental");
979 } else if (StringCmp(gbqp->val, "NOT_EXPERIMENTAL") == 0) {
980 StringCpy(gbqp->val, "not_experimental");
981 }
982 break;
983 }
984 }
985 return;
986 } /* GetNonGeneQuals */
987
988 /*****************************************************************************
989 *LookForPartialImpFeat
990 *
991 * This function first looks for the sfp->qual of type "partial".
992 * If found the qual is deleted and the variable "partial" is
993 * set equal to TRUE. If "partial" is TRUE or if sfp->partial
994 * is TRUE, FlatAnnotPartial is called (modified version of Karl Sirotkin's
995 * program) to see if sfp->partial should really be TRUE.
996 * WARNING: sfp should be an ImpFeatPtr
997 *
998 * written by Tom Madden (12/7/93)
999 *****************************************************************************/
1000 static void LookForPartialImpFeat(SeqFeatPtr sfp, Boolean use_product)
1001
1002 {
1003 Boolean partial=FALSE;
1004 GBQualPtr curq, gbqual, lastq=NULL, tmpqual;
1005
1006 gbqual = sfp->qual;
1007
1008 while (gbqual && (StringCmp(gbqual->qual, "partial")==0))
1009 {
1010 partial = TRUE;
1011 tmpqual = gbqual->next;
1012 gbqual->next = NULL;
1013 gbqual = GBQualFree(gbqual);
1014 gbqual = tmpqual;
1015 }
1016
1017 if (gbqual)
1018 {
1019 for (lastq=gbqual, curq=gbqual->next; curq; curq=curq->next)
1020 {
1021 if (StringCmp(curq->qual, "partial") == 0)
1022 {
1023 partial = TRUE;
1024 lastq->next = curq->next;
1025 curq->next = NULL;
1026 curq = GBQualFree(curq);
1027 curq = lastq;
1028 }
1029 else
1030 lastq = curq;
1031 }
1032 }
1033
1034 sfp->qual = gbqual;
1035
1036 if (partial == TRUE || sfp->partial == TRUE)
1037 sfp->partial = FlatAnnotPartial(sfp, use_product);
1038 } /* LookForPartialImpFeat */
1039
1040 static CharPtr SeqCodeNameGet (SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
1041 {
1042 int index=residue - table -> start_at;
1043 static CharPtr oops = "?";
1044
1045 if (index >= 0 && index < (int) table -> num){
1046 return (table -> names) [index];
1047 }else {
1048 if (error_msgs == TRUE)
1049 ErrPostEx(SEV_WARNING, CTX_NCBI2GB, 1,
1050 "asn2ff: %c(%d) > max in SeqCode table=%d",
1051 (char) residue, (int) residue, (int) table -> num);
1052 return oops;
1053 }
1054 }
1055
1056 /***************************************************************************
1057 *CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
1058 *
1059 * if (ASN2FF_IUPACAA_ONLY == TRUE) then
1060 * Check if the residue is legal in iupacaa; if not, return 'X', if so,
1061 * return the three letter code from iupacaa3.
1062 *
1063 * if (ASN2FF_IUPACAA_ONLY != TRUE) then
1064 * Then do a translation, if necessary, then get th three letter code
1065 * from iupacaa3.
1066 *
1067 ***************************************************************************/
1068
1069 static CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
1070 {
1071 static CharPtr bad_symbol= "OTHER";
1072 CharPtr ptr, retval=NULL;
1073 Int2 index;
1074 SeqCodeTablePtr table_3aa;
1075 SeqMapTablePtr smtp;
1076 Uint1 code, new_residue;
1077
1078 if (residue == 42) { /* stop codon in NCBIeaa */
1079 retval = "TERM";
1080 return retval;
1081 }
1082 if (ASN2FF_IUPACAA_ONLY == TRUE)
1083 code = Seq_code_iupacaa;
1084 else
1085 code = Seq_code_ncbieaa;
1086
1087 if (code != seq_code)
1088 {/* if code and seq_code are identical, then smtp is NULL?? */
1089 smtp = SeqMapTableFind(seq_code, code);
1090 new_residue = SeqMapTableConvert(smtp, residue);
1091 }
1092 else
1093 new_residue = residue;
1094
1095 /* The following looks for non-symbols (255) and "Undetermined" (88) */
1096 if ((int) new_residue == 255 || (int) new_residue == 88)
1097 retval = bad_symbol;
1098 else
1099 {
1100 ptr = SeqCodeNameGet(table, residue, error_msgs);
1101
1102 table_3aa=SeqCodeTableFind (Seq_code_iupacaa3);
1103 if (ptr != NULL && *ptr != '\0' && table_3aa != NULL)
1104 {
1105 for (index=0; index < (int) table_3aa->num; index++)
1106 {
1107 if (StringCmp(ptr, (table_3aa->names) [index]) == 0)
1108 {
1109 retval = (table_3aa->symbols) [index];
1110 break;
1111 }
1112 }
1113 }
1114 }
1115
1116 return retval;
1117
1118 } /* Get3LetterSymbol */
1119
1120 static CharPtr GetNameFromOrgName(OrgNamePtr orgname)
1121 {
1122 BinomialOrgNamePtr bi;
1123 CharPtr name = NULL, virus, newname;
1124 Int2 len=0;
1125 Boolean first;
1126 OrgNamePtr org;
1127
1128 switch(orgname->choice)
1129 {
1130 case 1: /*binomial*/
1131 bi = (BinomialOrgNamePtr) orgname->data;
1132 len = StringLen(bi->genus);
1133 if (bi->species) {
1134 len += StringLen(bi->species);
1135 }
1136 name = MemNew(len + 2);
1137 StringCpy(name, bi->genus);
1138 if (bi->species) {
1139 name = StringCat(name, " ");
1140 name = StringCat(name, bi->species);
1141 } else {
1142 name = StringCat(name, " sp.");
1143 }
1144 break;
1145 case 2: /*virus*/
1146 virus = (CharPtr) orgname->data;
1147 name = MemNew(StringLen(virus));
1148 StringCpy(name, virus);
1149 break;
1150 case 3: /*hybrid*/
1151 first = TRUE;
1152 for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
1153 newname = GetNameFromOrgName(org);
1154 len += StringLen(newname) + 3;
1155 }
1156 name = MemNew(len + 1);
1157 for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
1158 newname = GetNameFromOrgName(org);
1159 if (first == TRUE) {
1160 name = StringCat(name, newname);
1161 first = FALSE;
1162 } else {
1163 name = StringCat(name, " x ");
1164 name = StringCat(name, newname);
1165 }
1166 }
1167 break;
1168 case 4: /*namedhybrid*/
1169 bi = (BinomialOrgNamePtr) orgname->data;
1170 len = StringLen(bi->genus);
1171 if (bi->species) {
1172 len += StringLen(bi->species);
1173 }
1174 name = MemNew(len + 4);
1175 StringCpy(name, bi->genus);
1176 if (bi->species) {
1177 name = StringCat(name, " x ");
1178 name = StringCat(name, bi->species);
1179 }
1180 break;
1181 case 5: /*partial*/
1182 /* not implemented yet */
1183 ErrPostStr(SEV_WARNING, 0, 0, "Partial name in OrgName.name");
1184 break;
1185 default:
1186 break;
1187 }
1188 return name;
1189 }
1190
1191 NLM_EXTERN void PrintSourceFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
1192
1193 {
1194 BioseqPtr bsp;
1195 Char location[40];
1196 ImpFeatPtr ifp;
1197 Int2 status = -1, /* mol = -1, -- UNUSED */ i, bsize=0;
1198 NoteStructPtr nsp = NULL;
1199 OrgRefPtr orp=NULL;
1200 SeqFeatPtr sfp_in, sfp_out=NULL, sfp;
1201 SeqIntPtr sip;
1202 SeqLocPtr slp, keep_loc;
1203 ValNodePtr vnp=NULL;
1204 BioSourcePtr biosp = NULL;
1205 OrgModPtr omp;
1206 SortStructPtr pss, ps=NULL, bs = NULL, po=NULL;
1207 DescrStructPtr ds;
1208 CharPtr name;
1209
1210 if (gbp == NULL) {
1211 return;
1212 }
1213 if (gbp->feat) {
1214 nsp=gbp->feat->source_notes;
1215 po = gbp->feat->Orglist;
1216 ps = gbp->feat->Sourcelist;
1217 bs = gbp->feat->Biosrclist;
1218 bsize = gbp->feat->biosrcsize;
1219 }
1220 ds = gbp->source_info;
1221 bsp = gbp->bsp;
1222 if (ajp->slp) {
1223 return;
1224 }
1225 sprintf(location, "1..%ld", (long) (bsp->length));
1226 sfp_out = ajp->sfp_out;
1227 ifp = sfp_out->data.value.ptrvalue;
1228 ifp->key = StringSave("source");
1229 if (ajp->slp) {
1230 slp = AsnIoMemCopy(ajp->slp,
1231 (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
1232 } else {
1233 slp = (SeqLocPtr) ValNodeNew(NULL);
1234 slp->choice = SEQLOC_INT;
1235 sip = SeqIntNew();
1236 sip->from = 0;
1237 sip->to = (bsp->length)-1;
1238 sip->id = SeqIdDup(SeqIdFindBest (bsp->id, 0));
1239 slp->data.ptrvalue = sip;
1240 }
1241 sfp_out->location = slp;
1242 if (ds != NULL) {
1243 vnp = ds->vnp;
1244 keep_loc = AsnIoMemCopy(slp,
1245 (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
1246 if (vnp && vnp->choice == Seq_descr_source) {
1247 biosp = vnp->data.ptrvalue;
1248 if (biosp->is_focus == TRUE) {
1249 sfp_out->qual = AddGBQual(sfp_out->qual,
1250 "focus", NULL);
1251 if (StringNCmp(gbp->div, "SYN", 3) != 0) {
1252 for (pss=bs, i= 0; pss && i < bsize; i++, pss++) {
1253 if (pss->sfp == NULL)
1254 continue;
1255 sfp_out->location =
1256 SeqLocSubtract(sfp_out->location,
1257 pss->sfp->location);
1258 }
1259 }
1260 }
1261 }
1262 if (sfp_out->location == NULL) {
1263 sfp_out->location = keep_loc;
1264 }
1265 else
1266 SeqLocFree(keep_loc);
1267 }
1268 flat2asn_install_feature_user_string("source", ifp->loc);
1269 if (gbp->feat && gbp->feat->sfpSourcesize != 0) {
1270 if ((sfp_in = ps->sfp) == NULL) {
1271 GatherItemWithLock(ps->entityID, ps->itemID, ps->itemtype,
1272 &sfp_in, find_item);
1273 }
1274 if (sfp_out->qual != NULL)
1275 sfp_out->qual = GBQualFree(sfp_out->qual);
1276 NoteStructReset(nsp);
1277 PrepareSourceFeatQuals(sfp_in, sfp_out, gbp, FALSE);
1278 Add_dbxref(ajp, sfp_out, sfp_in, bsp);
1279 status = ValidateNAImpFeat(sfp_out);
1280 if (status < 0) {
1281 /* source feat is probably missing organism name, add
1282 and try again. Don't delete old quals! */
1283 if (ds != NULL) {
1284 vnp = ds->vnp;
1285 if (vnp->choice == Seq_descr_source) {
1286 biosp = vnp->data.ptrvalue;
1287 orp = (OrgRefPtr) biosp->org;
1288 } else if (vnp->choice == Seq_descr_org) {
1289 orp = (OrgRefPtr) vnp->data.ptrvalue;
1290 }
1291 } else if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
1292 if ((sfp = po->sfp) == NULL) {
1293 GatherItemWithLock(po->entityID, po->itemID, po->itemtype,
1294 &sfp, find_item);
1295 }
1296 if (sfp != NULL) {
1297 orp = (OrgRefPtr) sfp->data.value.ptrvalue;
1298 }
1299 }
1300 if (orp) {
1301 if (ajp->orgname && orp->orgname) {
1302 name = GetNameFromOrgName(orp->orgname);
1303 sfp_out->qual = AddGBQual(sfp_out->qual,
1304 "organism", name);
1305 MemFree(name);
1306 } else if (orp->taxname) {
1307 sfp_out->qual = AddGBQual(sfp_out->qual,
1308 "organism", orp->taxname);
1309 if (orp->common && sfp_in->comment != NULL)
1310 CpNoteToCharPtrStack(nsp, NULL, orp->common);
1311 } else if (orp->common) {
1312 if (StrStr(orp->common, "virus") ||
1313 StrStr(orp->common, "Virus") ||
1314 StrStr(orp->common, "phage") ||
1315 StrStr(orp->common, "Phage") ||
1316 StrStr(orp->common, "viroid") ||
1317 StrStr(orp->common, "Viroid")) {
1318 sfp_out->qual = AddGBQual(sfp_out->qual,
1319 "organism", orp->common);
1320 }
1321 }
1322 }
1323 status = ValidateNAImpFeat(sfp_out);
1324 }
1325 }
1326 if (status < 0) {
1327 if (ds != NULL) {
1328 if ((vnp = ds->vnp) != NULL) {
1329 if (vnp->choice == Seq_descr_source) {
1330 biosp = vnp->data.ptrvalue;
1331 orp = (OrgRefPtr) biosp->org;
1332 } else if (vnp->choice == Seq_descr_org) {
1333 orp = (OrgRefPtr) vnp->data.ptrvalue;
1334 }
1335 }
1336 } else if (gbp->feat && gbp->feat->sfpOrgsize != 0 && po->sfp != NULL) {
1337 orp = (OrgRefPtr) (po->sfp)->data.value.ptrvalue;
1338 } else {
1339 orp = NULL;
1340 }
1341 if (orp) {
1342 if (nsp) {
1343 NoteStructReset(nsp);
1344 }
1345 if (sfp_out->qual != NULL)
1346 sfp_out->qual = GBQualFree(sfp_out->qual);
1347 if (ajp->orgname && orp->orgname) {
1348 name = GetNameFromOrgName(orp->orgname);
1349 sfp_out->qual = AddGBQual(sfp_out->qual,
1350 "organism", name);
1351 MemFree(name);
1352 } else if (orp->taxname) {
1353 sfp_out->qual = AddGBQual(sfp_out->qual,
1354 "organism", orp->taxname);
1355 } else if (orp->common) {
1356 if (StrStr(orp->common, "virus") ||
1357 StrStr(orp->common, "Virus") ||
1358 StrStr(orp->common, "phage") ||
1359 StrStr(orp->common, "Phage") ||
1360 StrStr(orp->common, "viroid") ||
1361 StrStr(orp->common, "Viroid")) {
1362 sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
1363 orp->common);
1364 }
1365 }
1366 if (orp->orgname && orp->orgname->mod) {
1367 omp = orp->orgname->mod;
1368 if (omp->subtype == 0 && omp->subname != NULL) {
1369 CpNoteToCharPtrStack(nsp, NULL, omp->subname);
1370 }
1371 }
1372 sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
1373 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
1374 }
1375 if ((vnp=BioseqGetSeqDescr(gbp->bsp, Seq_descr_molinfo, NULL)) != NULL){
1376 /*
1377 mfp = vnp->data.ptrvalue;
1378 if (mfp) {
1379 mol = mfp->biomol;
1380 }
1381 -- NO EFFECT */
1382 }
1383 PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
1384 status = ValidateNAImpFeat(sfp_out);
1385 }
1386 /* ----------Organism not found -------------*/
1387 if (status < 0) {
1388 if (sfp_out->qual)
1389 sfp_out->qual = GBQualFree(sfp_out->qual);
1390 sfp_out->qual = AddGBQual(sfp_out->qual, "organism", "unknown");
1391 NoteStructReset(nsp);
1392 if (orp && orp->common)
1393 CpNoteToCharPtrStack(nsp, NULL, orp->common);
1394 /*try new first */
1395 if (biosp) {
1396 sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
1397 if (orp)
1398 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
1399 }
1400 /* try old then */
1401 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
1402 PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
1403 status = ValidateNAImpFeat(sfp_out);
1404 }
1405 flat2asn_delete_feature_user_string();
1406
1407 if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
1408 PrintImpFeat(ajp, gbp->bsp, sfp_out);
1409 }
1410 sfp_out->comment = NULL;
1411 sfp_out->location = SeqLocFree(sfp_out->location);
1412 sfp_out->location = NULL;
1413 sfp_out->product = NULL;
1414 sfp_out->exp_ev = FALSE;
1415 sfp_out->partial = FALSE;
1416 sfp_out->excpt = FALSE;
1417 ifp = sfp_out->data.value.ptrvalue;
1418 if (ifp->key) {
1419 ifp->key = MemFree(ifp->key);
1420 }
1421 if (ifp->loc) {
1422 ifp->loc = MemFree(ifp->loc);
1423 }
1424 if (sfp_out->qual)
1425 sfp_out->qual = GBQualFree(sfp_out->qual);
1426 return;
1427 } /* PrintSourceFeat */
1428
1429 /*****************************************************************************
1430 *
1431 * Add the quals of the form "/transl_except=(pos: ,aa: )" to the
1432 * SeqFeatPtr sfp_out.
1433 *
1434 *****************************************************************************/
1435
1436 static void ComposeCodeBreakQuals (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt, NoteStructPtr nsp)
1437
1438 {
1439 CdRegionPtr crp;
1440 CharPtr buffer, ptr, pos;
1441 Choice aa;
1442 CodeBreakPtr cbp;
1443 SeqCodeTablePtr table;
1444 SeqLocPtr slp;
1445 Uint1 seq_code=0, the_residue;
1446 Int2 i, buflen;
1447
1448 if ((sfp_in == NULL) || (sfp_in->data.choice != 3)) {
1449 return;
1450 }
1451
1452 crp = (CdRegionPtr) sfp_in->data.value.ptrvalue;
1453
1454 if (crp->code_break != NULL) {
1455 cbp = crp->code_break;
1456 while (cbp != NULL) {
1457 aa = cbp->aa;
1458 switch (aa.choice) {
1459 case 1:
1460 seq_code = 8;
1461 break;
1462 case 2:
1463 seq_code = 7;
1464 break;
1465 case 3:
1466 seq_code = 11;
1467 break;
1468 }
1469 table = NULL;
1470 if (seq_code != 0)
1471 table=SeqCodeTableFind (seq_code);
1472 if (table == NULL) {
1473 continue;
1474 }
1475 if (extra_loc_cnt > 0) { /* was converted to new coordinates*/
1476 for (i=0; i < extra_loc_cnt; i++) {
1477 if (extra_loc[i] == NULL) {
1478 continue;
1479 }
1480 slp = extra_loc[i];
1481 pos = FlatLoc(bsp, slp);
1482 if (pos) {
1483 the_residue = (Uint1) cbp->aa.value.intvalue;
1484 if (the_residue == 'U') {
1485 CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
1486 }
1487 ptr = Get3LetterSymbol(seq_code, table,
1488 the_residue, ajp->error_msgs);
1489 buflen = StringLen(pos) + StringLen(ptr) + 11;
1490 buffer = MemNew(buflen);
1491 sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
1492 sfp_out->qual = AddGBQual(sfp_out->qual,
1493 "transl_except", buffer);
1494 MemFree(buffer);
1495 MemFree(pos);
1496 } else if (ajp->error_msgs) {
1497 ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc,
1498 "Invalid Code-break.location: %s", pos);
1499 }
1500 }
1501 } else {
1502 slp = NULL;
1503 while ((slp = SeqLocFindNext(cbp->loc, slp)) != NULL) {
1504 pos = FlatLoc(bsp, slp);
1505 if (pos) {
1506 the_residue = (Uint1) cbp->aa.value.intvalue;
1507 if (the_residue == 'U') {
1508 CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
1509 }
1510 ptr = Get3LetterSymbol(seq_code, table,
1511 the_residue, ajp->error_msgs);
1512 buflen = StringLen(pos) + StringLen(ptr) + 11;
1513 buffer = MemNew(buflen);
1514 sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
1515 sfp_out->qual = AddGBQual(sfp_out->qual,
1516 "transl_except", buffer);
1517 MemFree(buffer);
1518 MemFree(pos);
1519 } else if (ajp->error_msgs) {
1520 ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc,
1521 "Invalid Code-break.location: %s", pos);
1522 }
1523 }
1524 }
1525 cbp = cbp->next;
1526 }
1527 }
1528
1529 return;
1530
1531 } /* ComposeCodeBreakQuals */
1532
1533 /***********************************************************************
1534 *void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
1535 *
1536 * returns ONLY non-standard (i.e., id not 0 or 1)
1537 * genetic codes.
1538 ***********************************************************************/
1539
1540 static void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
1541
1542 {
1543 Boolean code_is_one=FALSE;
1544 CdRegionPtr cdr;
1545 GBQualPtr qual;
1546 ValNodePtr gcp, var;
1547
1548 cdr = sfp->data.value.ptrvalue;
1549 gcp = cdr->genetic_code;
1550
1551 if (gcp != NULL)
1552 {
1553 for (var=gcp->data.ptrvalue; var != NULL; var=var->next)
1554 {
1555 if (var->choice == 2)
1556 {
1557 if (var->data.intvalue != 0 )
1558 {
1559 if (var->data.intvalue == 1)
1560 code_is_one = TRUE;
1561 else
1562 sprintf(ptr, "%ld", (long) (var->data.intvalue));
1563 }
1564 break;
1565 }
1566 }
1567 if (*ptr != '\0')
1568 {
1569 for (qual=sfp->qual; qual; qual=qual->next)
1570 {
1571 if (StringCmp("transl_table", qual->qual) == 0 &&
1572 StringCmp(ptr, qual->val) != 0)
1573 {
1574 ErrPostStr(SEV_WARNING,
1575 ERR_FEATURE_GcodeAndTTableClash, "");
1576 break;
1577 }
1578 }
1579 }
1580 else if (code_is_one == TRUE)
1581 {
1582 for (qual=sfp->qual; qual; qual=qual->next)
1583 {
1584 if (StringCmp("transl_table", qual->qual) == 0 &&
1585 StringCmp("1", qual->val) != 0)
1586 {
1587 ErrPostStr(SEV_WARNING,
1588 ERR_FEATURE_GcodeAndTTableClash, "");
1589 break;
1590 }
1591 }
1592 }
1593 }
1594 else
1595 {
1596 for (qual=sfp->qual; qual; qual=qual->next)
1597 if (StringCmp("transl_table", qual->qual) == 0)
1598 {
1599 StringCpy(ptr, qual->val);
1600 break;
1601 }
1602 }
1603
1604 return;
1605 } /* GetGeneticCode */
1606
1607 static SeqFeatPtr cleanup_sfp(SeqFeatPtr sfp_out)
1608 {
1609 ImpFeatPtr ifp;
1610
1611 if (sfp_out == NULL) {
1612 return NULL;
1613 }
1614 sfp_out->comment = NULL;
1615 sfp_out->location = NULL;
1616 sfp_out->product = NULL;
1617 sfp_out->exp_ev = FALSE;
1618 sfp_out->partial = FALSE;
1619 sfp_out->excpt = FALSE;
1620 ifp = sfp_out->data.value.ptrvalue;
1621 if (ifp->key) {
1622 ifp->key = MemFree(ifp->key);
1623 }
1624 if (ifp->loc) {
1625 ifp->loc = MemFree(ifp->loc);
1626 }
1627 if (sfp_out->qual)
1628 sfp_out->qual = GBQualFree(sfp_out->qual);
1629
1630 return sfp_out;
1631 }
1632
1633 static GBQualPtr remove_qual(GBQualPtr head, GBQualPtr x)
1634 {
1635 GBQualPtr v, p;
1636
1637 if (head == NULL) {
1638 return NULL;
1639 }
1640 if (x == head) {
1641 head = x->next;
1642 x->next = NULL;
1643 return head;
1644 }
1645 for (v = head; v != NULL && v != x; v = v->next) {
1646 p = v;
1647 }
1648 if (v != NULL) {
1649 p->next = x->next;
1650 x->next = NULL;
1651 }
1652 return head;
1653 }
1654
1655 static void PutGeneFirst(SeqFeatPtr sfp)
1656
1657 {
1658 Boolean still_looking=TRUE;
1659 GBQualPtr gbqual, qual, qual_temp=NULL, qual_gene=NULL;
1660 ImpFeatPtr ifp=NULL;
1661
1662 if ((sfp == NULL) || (sfp->data.choice != 8))
1663 return;
1664 if (sfp->qual == NULL)
1665 return;
1666
1667 ifp = sfp->data.value.ptrvalue;
1668 if (StringCmp(ifp->key, "gene") == 0)
1669 {
1670 gbqual = sfp->qual;
1671 for (qual=gbqual; qual; qual=qual->next) {
1672 if (StringCmp("gene", qual->qual) == 0) {
1673 qual_gene = qual;
1674 break;
1675 }
1676 }
1677 if (qual_gene == NULL) {
1678 return;
1679 }
1680 gbqual = remove_qual(gbqual, qual_gene);
1681 qual_gene->next = gbqual;
1682 sfp->qual = qual_gene;
1683 }
1684 return;
1685 } /* PutGeneFirst */
1686
1687 static void PutTranslationLast(SeqFeatPtr sfp)
1688
1689 {
1690 Boolean still_looking=TRUE;
1691 GBQualPtr gbqual, qual, qual_temp=NULL, qual_last;
1692 ImpFeatPtr ifp=NULL;
1693
1694 if ((sfp == NULL) || (sfp->data.choice != 8))
1695 return;
1696 if (sfp->qual == NULL)
1697 return;
1698
1699 ifp = sfp->data.value.ptrvalue;
1700 if (StringCmp(ifp->key, "CDS") == 0)
1701 {
1702 gbqual = sfp->qual;
1703 qual_last = NULL;
1704 for (qual=gbqual; qual->next; qual=qual->next)
1705 { /* We need to go to the end of the linked list */
1706 if (still_looking == TRUE &&
1707 StringCmp("translation", qual->qual) == 0)
1708 {
1709 still_looking = FALSE;
1710 if (qual->next != NULL)
1711 { /* if it's not the last qual anyway */
1712 if (qual_last == NULL) /*first*/
1713 gbqual = qual->next;
1714 else
1715 qual_last->next = qual->next;
1716 qual_temp = qual;
1717 qual=qual->next;
1718 qual_temp->next = NULL;
1719 }
1720 }
1721 qual_last = qual;
1722 if (qual->next == NULL)
1723 break;
1724 }
1725 qual->next = qual_temp;
1726 sfp->qual = gbqual;
1727 }
1728 return;
1729 } /* PutTranslationLast */
1730
1731 static CharPtr mrnaevtext1 = "Derived by automated computational analysis";
1732 static CharPtr mrnaevtext2 = "using gene prediction method:";
1733 static CharPtr mrnaevtext3 = "Supporting evidence includes similarity to:";
1734
1735 NLM_EXTERN CharPtr mRNAEvidenceComment(UserObjectPtr uop, Boolean add)
1736 {
1737 ObjectIdPtr oip;
1738 UserFieldPtr ufp, u, uu;
1739 CharPtr method = NULL, ptr, ne_name;
1740 static Char temp[20];
1741 Int2 ptrlen=0, np=0, nd=0, nm=0, ne=0;
1742 Boolean is_evidence = FALSE;
1743 Int4 Locus_id = 0;
1744
1745 if (uop == NULL) return NULL;
1746 if ((oip = uop->type) == NULL) return NULL;
1747 if (StringCmp(oip->str, "ModelEvidence") != 0) return NULL;
1748 for (ufp=uop->data; ufp; ufp=ufp->next) {
1749 oip = ufp->label;
1750 if (StringCmp(oip->str, "Method") == 0) {
1751 if (ufp->data.ptrvalue) {
1752 method = StringSave((CharPtr) ufp->data.ptrvalue);
1753 }
1754 }
1755 if (StringCmp(oip->str, "mRNA")==0) {
1756 is_evidence = TRUE;
1757 for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1758 for (uu = (UserFieldPtr) u->data.ptrvalue; uu; uu=uu->next) {
1759 oip = uu->label;
1760 if (StringCmp(oip->str, "accession") == 0) {
1761 nm++;
1762 }
1763 }
1764 }
1765 }
1766 if (StringCmp(oip->str, "EST")==0) {
1767 is_evidence = TRUE;
1768 for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1769 for (uu = (UserFieldPtr) u->data.ptrvalue;uu; uu=uu->next) {
1770 oip = uu->label;
1771 if (StringCmp(oip->str, "count") == 0) {
1772 ne = uu->data.intvalue;
1773 }
1774 if (StringCmp(oip->str, "organism") == 0) {
1775 ne_name = StringSave(( CharPtr) uu->data.ptrvalue);
1776 }
1777 }
1778 }
1779 }
1780 }
1781 ptrlen = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 25;
1782 if (np > 0) {
1783 ptrlen += StringLen("proteins") + 5;
1784 }
1785 if (nd > 0) {
1786 ptrlen += StringLen("domains") + 5;
1787 }
1788 if (nm > 0) {
1789 ptrlen += StringLen("mRNAs") + 5;
1790 }
1791 if (ne > 0) {
1792 ptrlen += StringLen("ESTs") + StringLen(ne_name) + 10;
1793 }
1794 ptr = (CharPtr) MemNew(ptrlen) + 1;
1795 if (add) {
1796 if (method != NULL) {
1797 sprintf (ptr, "%s %s %s.", mrnaevtext1, mrnaevtext2, method);
1798 } else {
1799 sprintf (ptr, "%s.", mrnaevtext1);
1800 }
1801 }
1802 if (is_evidence) {
1803 if (add) StringCat(ptr, " ");
1804 StringCat(ptr, "Supporting evidence includes similarity to:");
1805 }
1806 if (np > 0) {
1807 sprintf(temp, " %d proteins", np);
1808 StringCat(ptr, temp);
1809 }
1810 if (nd > 0) {
1811 if (np > 0)
1812 StringCat(ptr, ",");
1813 sprintf(temp, " %d domains", np);
1814 StringCat(ptr, temp);
1815 }
1816 if (nm > 0) {
1817 if (np > 0 || nd > 0)
1818 StringCat(ptr, ",");
1819 if (nm > 1) {
1820 sprintf(temp, " %d mRNAs", nm);
1821 } else {
1822 sprintf(temp, " %d mRNA", nm);
1823 }
1824 StringCat(ptr, temp);
1825 }
1826 if (ne > 0) {
1827 if ( np > 0 || nm > 0 || nd > 0)
1828 StringCat(ptr, ",");
1829 sprintf(temp, " %d %s ESTs", ne, ne_name);
1830 StringCat(ptr, temp);
1831 }
1832 return ptr;
1833 }
1834
1835 static CharPtr mRNAFeatEvidenceComment(SeqFeatPtr sfp_in)
1836 {
1837 RnaRefPtr rfp;
1838 UserObjectPtr uop, obj;
1839 ObjectIdPtr oip;
1840 UserFieldPtr uf;
1841
1842 rfp = (RnaRefPtr) sfp_in->data.value.ptrvalue;
1843 if (rfp->type != 2) { /* mRNA */
1844 return NULL;
1845 }
1846 if ((uop = sfp_in->ext) == NULL)
1847 return NULL;
1848 if ((oip = uop->type) == NULL) return NULL;
1849 if (StringCmp(oip->str, "CombinedFeatureUserObjects") != 0) return NULL;
1850 for (uf=uop->data; uf; uf=uf->next) {
1851 obj = (UserObjectPtr) uf->data.ptrvalue;
1852 return( mRNAEvidenceComment(obj, TRUE));
1853 }
1854 return NULL;
1855 }
1856
1857 NLM_EXTERN void PrintNAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1858 {
1859
1860 Boolean loc_ok;
1861 Char genetic_code[3];
1862 CharPtr ptr=NULL, sptr;
1863 ImpFeatPtr ifp;
1864 SeqFeatPtr sfp_in, sfp_out=NULL;
1865 Int4 status, total_feats, feat_index;
1866 SortStructPtr p;
1867
1868 if (gbp == NULL || gbp->feat == NULL) {
1869 return;
1870 }
1871 feat_index = ajp->pap_index;
1872 total_feats=gbp->feat->sfpListsize;
1873 if (total_feats == 0) {
1874 return;
1875 }
1876 sfp_out=ajp->sfp_out;
1877 if (sfp_out->qual)
1878 sfp_out->qual = GBQualFree(sfp_out->qual);
1879 ifp = sfp_out->data.value.ptrvalue;
1880 if (ifp->loc)
1881 ifp->loc = MemFree(ifp->loc);
1882 if (feat_index < total_feats) {
1883 p = gbp->feat->List + feat_index;
1884 if (p == NULL)
1885 return;
1886 if (p->tempload == TRUE) {
1887 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
1888 &sfp_in, find_item);
1889 } else {
1890 sfp_in = p->sfp;
1891 }
1892 if (sfp_in == NULL) {
1893 return;
1894 }
1895 if (ajp->mode == PARTIAL_MODE &&
1896 sfp_in->data.choice != SEQFEAT_CDREGION) {
1897 sfp_out = cleanup_sfp(sfp_out);
1898 return;
1899 }
1900 status = ConvertToNAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1901 if (status < 1) {
1902 sfp_out = cleanup_sfp(sfp_out);
1903 return;
1904 }
1905 if (p->slp != NULL) {
1906 sfp_out->location = p->slp;
1907 }
1908 ifp = sfp_out->data.value.ptrvalue;
1909 flat2asn_install_feature_user_string(ifp->key, NULL);
1910 loc_ok=CheckAndGetNAFeatLoc(gbp->bsp, &ptr, sfp_out, TRUE);
1911 if (loc_ok == TRUE || ASN2FF_VALIDATE_FEATURES == FALSE) {
1912 ifp->loc = ptr;
1913 } else {
1914 flat2asn_delete_feature_user_string();
1915 flat2asn_install_feature_user_string(ifp->key, ptr);
1916 MemFree(ptr);
1917 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1918 ErrPostEx(SEV_WARNING, ERR_FEATURE_Dropped, "Unparsable location");
1919 }
1920 sfp_out = cleanup_sfp(sfp_out);
1921 flat2asn_delete_feature_user_string();
1922 return;
1923 }
1924 flat2asn_delete_feature_user_string();
1925 flat2asn_install_feature_user_string(ifp->key, ptr);
1926 if (p->dup == TRUE) {
1927 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1928 ErrPostEx(SEV_WARNING, ERR_FEATURE_Duplicated,
1929 "Duplicated feature dropped");
1930 }
1931 sfp_out = cleanup_sfp(sfp_out);
1932 flat2asn_delete_feature_user_string();
1933 return;
1934 }
1935 if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1936 ComposeCodeBreakQuals(ajp, gbp->bsp, sfp_in, sfp_out,
1937 p->extra_loc, p->extra_loc_cnt, p->nsp);
1938 genetic_code[0]='\0';
1939 if (ASN2FF_TRANSL_TABLE == TRUE) {
1940 GetGeneticCode(genetic_code, sfp_in);
1941 if (genetic_code[0] != '\0') {
1942 sfp_out->qual = AddGBQual(sfp_out->qual,
1943 "transl_table", genetic_code);
1944 }
1945 }
1946 }
1947 if (sfp_in->data.choice == SEQFEAT_GENE) {
1948 if (ajp->show_gene == FALSE) {
1949 sfp_out = cleanup_sfp(sfp_out);
1950 flat2asn_delete_feature_user_string();
1951 return;
1952 }
1953 }
1954 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, p->nsp);
1955 LookForPartialImpFeat(sfp_out, FALSE);
1956 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1957 status = ValidateNAImpFeat(sfp_out);
1958 if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1959 PutTranslationLast(sfp_out);
1960 } else if (sfp_in->data.choice == SEQFEAT_GENE) {
1961 PutGeneFirst(sfp_out);
1962 } else if (sfp_in->data.choice == SEQFEAT_RNA) {
1963 if ((sptr = mRNAFeatEvidenceComment(sfp_in)) != NULL) {
1964 sfp_out->qual =
1965 AddGBQual(sfp_out->qual, "note", sptr);
1966 }
1967 }
1968 if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
1969 PrintImpFeatEx(ajp, gbp->bsp, sfp_out, gbp->gi, p->entityID, p->itemID);
1970 }
1971 flat2asn_delete_feature_user_string();
1972 }
1973 sfp_out = cleanup_sfp(sfp_out);
1974 return;
1975 } /* PrintNAFeatByNumber */
1976
1977 /***************************************************************************
1978 *PrintAAFeatByNumber
1979 *
1980 * This function prints out the genpept SeqFeats.
1981 *
1982 **************************************************************************/
1983
1984 NLM_EXTERN void PrintAAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1985 {
1986 CharPtr ptr=NULL;
1987 Char genetic_code[3];
1988 ImpFeatPtr ifp;
1989 Int2 status;
1990 Int4 feat_index, total_feats;
1991 NoteStructPtr nsp;
1992 SeqFeatPtr sfp_in, sfp_out=NULL;
1993 SortStructPtr p;
1994
1995 if (gbp == NULL || gbp->feat == NULL) {
1996 return;
1997 }
1998 feat_index = ajp->pap_index;
1999 total_feats=gbp->feat->sfpListsize;
2000 if (total_feats == 0) {
2001 return;
2002 }
2003 sfp_out=ajp->sfp_out;
2004 if (sfp_out->qual) {
2005 sfp_out->qual = GBQualFree(sfp_out->qual);
2006 }
2007 ifp = sfp_out->data.value.ptrvalue;
2008 if (ifp->loc) {
2009 ifp->loc = MemFree(ifp->loc);
2010 }
2011 if (feat_index < total_feats) {
2012 p = gbp->feat->List + feat_index;
2013 if (p == NULL || p->dup == TRUE) {
2014 return;
2015 }
2016 if ((sfp_in = p->sfp) == NULL) {
2017 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
2018 &sfp_in, find_item);
2019 }
2020 if (sfp_in == NULL) {
2021 return;
2022 }
2023 nsp = p->nsp;
2024 switch (sfp_in->data.choice) {
2025 /* Note: the functions that CheckAndGetFeatLoc use for
2026 checking fails on protein locations sometimes. */
2027 case SEQFEAT_CDREGION:
2028 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2029 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2030 if (status < 0)
2031 break;
2032 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2033 GetAAFeatLoc(gbp->bsp, &ptr, sfp_in, TRUE);
2034 ifp->loc = ptr;
2035 ptr = FlatLoc(gbp->bsp, sfp_in->location);
2036 sfp_out->qual =
2037 AddGBQual(sfp_out->qual, "coded_by", ptr);
2038 ptr = MemFree(ptr);
2039 genetic_code[0]='\0';
2040 if (ASN2FF_TRANSL_TABLE == TRUE) {
2041 GetGeneticCode(genetic_code, sfp_in);
2042 if (genetic_code[0] != '\0')
2043 sfp_out->qual =
2044 AddGBQual(sfp_out->qual, "transl_table", genetic_code);
2045 }
2046 status = ValidateAAImpFeat(sfp_out, TRUE);
2047 if (status >= 0)
2048 PrintImpFeat(ajp, gbp->bsp, sfp_out);
2049 break;
2050 case SEQFEAT_PROT:
2051 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2052 AddProteinQuals(sfp_in, sfp_out, nsp);
2053 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2054 if (status < 0)
2055 break;
2056 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2057 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
2058 ifp->loc = ptr;
2059 status = ValidateAAImpFeat(sfp_out, FALSE);
2060 if (status >= 0)
2061 PrintImpFeat(ajp, gbp->bsp, sfp_out);
2062 break;
2063 case SEQFEAT_SEQ:
2064 case SEQFEAT_IMP:
2065 case SEQFEAT_REGION:
2066 case SEQFEAT_COMMENT:
2067 case SEQFEAT_BOND:
2068 case SEQFEAT_SITE:
2069 case SEQFEAT_PSEC_STR:
2070 case SEQFEAT_NON_STD_RESIDUE:
2071 case SEQFEAT_HET:
2072 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2073 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2074 if (status < 0)
2075 break;
2076 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2077 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
2078 ifp->loc = ptr;
2079 status = ValidateAAImpFeat(sfp_out, FALSE);
2080 if (status >= 0)
2081 PrintImpFeat(ajp, gbp->bsp, sfp_out);
2082 break;
2083 case SEQFEAT_GENE:
2084 if (ajp->show_gene == FALSE) {
2085 break;
2086 }
2087 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2088 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2089 if (status < 0)
2090 break;
2091 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2092 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
2093 ifp->loc = ptr;
2094 status = ValidateAAImpFeat(sfp_out, FALSE);
2095 if (status >= 0)
2096 PrintImpFeat(ajp, gbp->bsp, sfp_out);
2097 break;
2098 default:
2099 break;
2100 }
2101 }
2102 sfp_out = cleanup_sfp(sfp_out);
2103 } /* PrintAAFeatByNumber */
2104
2105
2106 /************************************************************************
2107 *GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 length)
2108 *
2109 * Gets the CDS product, using SeqPortNewByLoc
2110 * The bsp is that of the protein, and comes from the location. The bsp
2111 * is found in the calling program anyway, as it's used to get
2112 * the EC_NUM.
2113 * The protein sequence comes back in allocated memory. The user
2114 * is responsible for deallocating that.
2115 *
2116 * A check is made (BioseqFind()) that the protein Bioseq is in memory.
2117 * This guarantees that a fetch is NOT made if it is not memory, to accomodate
2118 * the splitting of DNA and protein in Entrez. In this case, it's just
2119 * translated.
2120 *
2121 *************************************************************************/
2122
2123 NLM_EXTERN CharPtr GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 bsp_length)
2124
2125 {
2126 Boolean at_end=FALSE;
2127 CharPtr protein_seq=NULL, start_ptr=NULL;
2128 Int4 length;
2129 SeqPortPtr spp;
2130 Uint1 residue, code;
2131 BioseqPtr bsp;
2132 SeqIdPtr sip;
2133
2134 if (ASN2FF_IUPACAA_ONLY == TRUE)
2135 code = Seq_code_iupacaa;
2136 else
2137 code = Seq_code_ncbieaa;
2138
2139 if (product) {
2140 sip = SeqLocId(product);
2141 bsp = BioseqFindCore(sip);
2142 if (bsp != NULL) /* Bioseq is (or has been) in memory */ {
2143 length = SeqLocLen(product);
2144 if (length > 0) {
2145 if (SeqLocStart(location) == 0 ||
2146 SeqLocStop(location) == bsp_length-1)
2147 at_end = TRUE;
2148 start_ptr = protein_seq =
2149 (CharPtr) MemNew((size_t) (length*sizeof(CharPtr)));
2150 spp = SeqPortNewByLoc(product, code);
2151 spp->do_virtual = TRUE;
2152 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
2153 if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
2154 continue;
2155 if (residue == INVALID_RESIDUE)
2156 residue = (Uint1) 'X';
2157 *protein_seq = residue;
2158 protein_seq++;
2159 }
2160 SeqPortFree(spp);
2161 if (at_end) {
2162 if (StringLen(start_ptr) < GENPEPT_MIN)
2163 start_ptr = MemFree(start_ptr);
2164 }
2165 }
2166 }
2167 }
2168 return start_ptr;
2169 }
2170
2171 /**************************************************************************
2172 *CharPtr GettRNAaa (tRNAPtr trna, Boolean error_messages)
2173 *
2174 * Return a pointer containing the amino acid type.
2175 **************************************************************************/
2176
2177 static CharPtr GettRNAaa (tRNAPtr trna, Boolean error_msgs)
2178
2179 {
2180 CharPtr ptr=NULL;
2181 SeqCodeTablePtr table;
2182 Uint1 seq_code;
2183 /*
2184 The choice values used in the tRNA structure do NOT corresond to
2185 the choice(==ENUMs) of Seq-code_type, and the latter are used
2186 by all the utility functions, so we map them...
2187 */
2188 if ( trna && trna -> aatype) {
2189 switch (trna -> aatype) {
2190 case 1:
2191 seq_code = 2;
2192 break;
2193 case 2:
2194 seq_code = 8;
2195 break;
2196 case 3:
2197 seq_code = 7;
2198 break;
2199 case 4:
2200 seq_code = 11;
2201 break;
2202 }
2203
2204 if ((table=SeqCodeTableFind (seq_code)) != NULL)
2205 ptr = Get3LetterSymbol(seq_code, table, trna->aa, error_msgs);
2206 }
2207
2208 return ptr;
2209 } /* GettRNAaa */
2210
2211 /*************************************************************************
2212 *ComposetRNANote (Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna, )
2213 *
2214 * Add info from Trna-ext to Note stack in the GeneStructPtr.
2215 **************************************************************************/
2216
2217 static void ComposetRNANote(Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna)
2218 {
2219 /*
2220 Char buffer[25];
2221 CharPtr ptr = &(buffer[0]);
2222 Int2 index;
2223 Uint1 codon[4];
2224
2225 if (! trna)
2226 return;
2227
2228 if ((trna->codon)[0] != 255)
2229 {
2230 codon[3] = '\0';
2231 for (index=0; index<6; index++)
2232 {
2233 if ((trna->codon)[index] == 255)
2234 break;
2235 if (CodonForIndex((trna->codon)[index], Seq_code_iupacna, codon))
2236 {
2237 StringCpy(ptr, (CharPtr) codon);
2238 ptr += 3;
2239 }
2240 else
2241 {
2242 *ptr = '?'; ptr++;
2243 }
2244 if (index<5 && (trna->codon)[index+1] != 255)
2245 {
2246 *ptr = ','; ptr++;
2247 *ptr = ' '; ptr++;
2248 }
2249 }
2250 if ((trna->codon)[1] == 255)
2251 {
2252 ptr = &buffer[0];
2253 SaveNoteToCharPtrStack(nsp, "codon recognized:", ptr);
2254 }
2255 else
2256 {
2257 ptr = &buffer[0];
2258 SaveNoteToCharPtrStack(nsp, "codons recognized:", ptr);
2259 }
2260 }
2261 return;
2262 */
2263
2264 Char buffer [25];
2265 Int2 num;
2266
2267 num = ComposeCodonsRecognizedString (trna, buffer, sizeof (buffer));
2268 if (num < 1 || StringHasNoText (buffer)) return;
2269 if (num == 1) {
2270 SaveNoteToCharPtrStack(nsp, "codon recognized:", buffer);
2271 } else {
2272 SaveNoteToCharPtrStack(nsp, "codons recognized:", buffer);
2273 }
2274
2275 } /* ComposetRNANote */
2276
2277
2278 /************************************************************************
2279 * Make the anticodon qualifier and (possible) note to the tRNA
2280 * with the following paradigm:
2281 * 0.) First look at the new anticodon slot on tRNAPtr
2282 * if not found do the rest:
2283 ** 1.) Look at SeqFeat.ext for a UserObject using the fct. QualLocWrite,
2284 * if result is not NULL, use this location in anticodon qualifier;
2285 *
2286 * 2.) Look for an anticodon qualifier, use if no QualLocWrite's
2287 * result was not zero;
2288 * 3.) Make note if neither 1.) or 2.) was true, or there are
2289 * multiple codons.
2290 *****************************
2291 * NEW ALGORITHM 07-15-96
2292 *****************************
2293 * 1) aa present?
2294 * print /product = tRNA-aa
2295 * 2) codon recognized present?
2296 * print /note="codon recognized: codon"
2297 * 3) anticodon and aa present?
2298 * print /anticodon=...
2299 *************************************************************************/
2300 static void DotRNAQuals (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp, SeqLocPtr PNTR extra_loc,
2301 Int2 extra_loc_cnt)
2302 {
2303 Boolean found_anticodon=FALSE /*, found_qual=FALSE -- UNUSED */;
2304 Char buffer[40];
2305 CharPtr aa_ptr, newptr=NULL, ptr = &(buffer[0]), tmp=NULL;
2306 GBQualPtr curq;
2307 RnaRefPtr rrp;
2308 tRNAPtr trna=NULL;
2309 SeqLocPtr slp=NULL;
2310 SeqIntPtr sip;
2311
2312 if (sfp_in == NULL) {
2313 return;
2314 }
2315 if (sfp_in->data.choice != SEQFEAT_RNA) {
2316 return;
2317 }
2318 rrp = sfp_in->data.value.ptrvalue;
2319 /* Look for anticodon struct */
2320 if (rrp->ext.choice == 2) {
2321 newptr = MemNew(50*sizeof(Char));
2322 trna = rrp->ext.value.ptrvalue;
2323 if ((aa_ptr = GettRNAaa(trna, ajp->error_msgs)) != NULL) {
2324 if (GBQualPresent("product", sfp_out->qual) == FALSE) {
2325 sprintf(newptr, "tRNA-%s", aa_ptr);
2326 sfp_out->qual = AddGBQual(sfp_out->qual, "product", newptr);
2327 }
2328 }
2329 if (trna && (slp = trna->anticodon) != NULL && aa_ptr) {
2330 if (extra_loc_cnt > 0) {
2331 slp = extra_loc[0];
2332 }
2333 if (slp && slp->choice == SEQLOC_INT) {
2334 sip = slp->data.ptrvalue;
2335 sprintf(ptr, "%ld..%ld", (long) sip->from+1, (long) sip->to+1);
2336 sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
2337 sfp_out->qual = AddGBQual(sfp_out->qual, "anticodon", newptr);
2338 found_anticodon=TRUE;
2339 }
2340 }
2341 }
2342 if (! found_anticodon) {
2343 if (sfp_in->ext) { /* Look for UserObject */
2344 tmp = QualLocWrite(sfp_in->ext, ptr);
2345 if (tmp) {
2346 newptr = MemNew(50*sizeof(Char));
2347 rrp = sfp_in->data.value.ptrvalue;
2348 trna = rrp->ext.value.ptrvalue;
2349 aa_ptr = GettRNAaa(trna, ajp->error_msgs);
2350 if (aa_ptr) {
2351 sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
2352 sfp_out->qual =
2353 AddGBQual(sfp_out->qual, "anticodon", newptr);
2354 found_anticodon=TRUE;
2355 }
2356 }
2357 }
2358 }
2359 if (! found_anticodon) {
2360 /* Look for anticodon qual if no UserObject found */
2361 for (curq=sfp_in->qual; curq; curq=curq->next)
2362 if (StringCmp("anticodon", curq->qual) == 0) {
2363 sfp_out->qual =
2364 AddGBQual(sfp_out->qual, "anticodon", curq->val);
2365 /* found_qual=TRUE; -- NO EFFECT */
2366 break;
2367 }
2368 }
2369
2370 /* make note "codon recognized*/
2371 ComposetRNANote(ajp, nsp, trna);
2372 MemFree(newptr);
2373
2374 } /* DotRNAQuals */
2375
2376 /**************************************************************************
2377 *ConvertToAAImpFeat
2378 *
2379 * This code copies a SeqFeat into an ImpFeat format for use in
2380 * producing GenBank format. Two SeqFeatPtr's should be passed
2381 * in as arguments (sfp_in, sfp_out). On the first call, of a
2382 * number of calls, sfp_out should be NULL so that memory for
2383 * ImpFeat can be allocated. On subsequent calls, sfp_out->data.choice
2384 * should be "8" (for ImpFeats).
2385 *
2386 * Written by Tom Madden
2387 *
2388 **************************************************************************/
2389
2390 NLM_EXTERN Int2 ConvertToAAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr p)
2391 {
2392 BioseqPtr bsp=NULL;
2393 Char printbuf[41], temp[65];
2394 CharPtr ptr;
2395 ImpFeatPtr ifp, ifp_in;
2396 Int2 retval=1;
2397 NoteStructPtr nsp;
2398 GeneStructPtr gsp;
2399 ProtRefPtr prot;
2400 SeqFeatPtr sfp_out;
2401 SeqIdPtr sip=NULL, xid;
2402 ValNodePtr vnp, vnp1;
2403
2404 sfp_out = *sfpp_out;
2405
2406 if (sfp_out->data.choice != SEQFEAT_IMP)
2407 return -1;
2408
2409 ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
2410
2411 sfp_out->partial = sfp_in->partial;
2412 sfp_out->comment = sfp_in->comment;
2413 sfp_out->exp_ev = sfp_in->exp_ev;
2414 sfp_out->location = sfp_in->location;
2415
2416 nsp = p->nsp;
2417 gsp = p->gsp;
2418 if (sfp_out->comment) {
2419 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
2420 }
2421
2422 switch (sfp_in->data.choice) {
2423 case SEQFEAT_CDREGION:
2424 ifp->key = StringSave("CDS");
2425 break;
2426 case SEQFEAT_PROT:
2427 prot = sfp_in->data.value.ptrvalue;
2428 if (prot->processed == 0 || prot->processed == 1) {
2429 GetProtRefInfo(ajp->format, gsp, nsp, prot);
2430 ifp->key = StringSave("Protein");
2431 } else if (prot->processed == 2) {
2432 ifp->key = StringSave("mat_peptide");
2433 } else if (prot->processed == 3) {
2434 ifp->key = StringSave("sig_peptide");
2435 } else if (prot->processed == 4) {
2436 ifp->key = StringSave("transit_peptide");
2437 }
2438 if (sfp_in->location) {
2439 sip = SeqLocId(sfp_in->location);
2440 if (sip)
2441 bsp = BioseqFind(sip);
2442 if (bsp) {
2443 vnp = bsp->descr;
2444 for (vnp = bsp->descr; vnp; vnp = vnp->next) {
2445 if (vnp->choice != Seq_descr_modif) {
2446 continue;
2447 }
2448 for (vnp1 = vnp->data.ptrvalue; vnp1; vnp1=vnp1->next) {
2449 if (vnp1->data.intvalue == 1) {
2450 sfp_out->partial = TRUE;
2451 break;
2452 }
2453 }
2454 }
2455 }
2456 }
2457 break;
2458 case SEQFEAT_SEQ:
2459 ifp->key = StringSave("misc_feature");
2460 if ((xid=CheckXrefFeat(gbp->bsp, sfp_in)) != NULL)
2461 {
2462 SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
2463 ptr = &(temp[0]);
2464 sprintf(ptr, "Cross-reference: %s", printbuf);
2465 SaveNoteToCharPtrStack(nsp, NULL, ptr);
2466 }
2467 else
2468 retval = 0;
2469 break;
2470 case SEQFEAT_IMP:
2471 ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
2472 ifp->key = StringSave(ifp_in->key);
2473 break;
2474 case SEQFEAT_REGION:
2475 sfp_out->qual =
2476 AddGBQual(sfp_out->qual, "region_name", sfp_in->data.value.ptrvalue);
2477 ifp->key = StringSave("Region");
2478 break;
2479 case SEQFEAT_COMMENT:
2480 ifp->key = StringSave("misc_feature");
2481 break;
2482 case SEQFEAT_BOND:
2483 ptr = AsnEnumStr("SeqFeatData.bond",
2484 (Int2) (sfp_in->data.value.intvalue));
2485 sfp_out->qual = AddGBQual(sfp_in->qual, "bond_type", ptr);
2486 ifp->key = StringSave("Bond");
2487 break;
2488 case SEQFEAT_SITE:
2489 ptr = AsnEnumStr("SeqFeatData.site",
2490 (Int2) (sfp_in->data.value.intvalue));
2491 sfp_out->qual = AddGBQual(sfp_out->qual, "site_type", ptr);
2492 ifp->key = StringSave("Site");
2493 break;
2494 case SEQFEAT_PSEC_STR:
2495 ptr = AsnEnumStr("SeqFeatData.psec-str",
2496 (Int2) (sfp_in->data.value.intvalue));
2497 sfp_out->qual = AddGBQual(sfp_out->qual, "sec_str_type", ptr);
2498 ifp->key = StringSave("SecStr");
2499 break;
2500 case SEQFEAT_NON_STD_RESIDUE:
2501 sfp_out->qual =
2502 AddGBQual(sfp_out->qual, "non-std-residue",
2503 sfp_in->data.value.ptrvalue);
2504 ifp->key = StringSave("NonStdResidue");
2505 break;
2506 case SEQFEAT_HET:
2507 sfp_out->qual =
2508 AddGBQual(sfp_out->qual, "heterogen", sfp_in->data.value.ptrvalue);
2509 ifp->key = StringSave("Het");
2510 break;
2511 default:
2512 if (ajp->error_msgs == TRUE)
2513 ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey,
2514 "Unimplemented type of feat in ConvertToAAImpFeat");
2515 retval = 1;
2516 break;
2517 }
2518
2519 return retval;
2520
2521 } /* ConvertToAAImpFeat */
2522
2523 /*****************************************************************************
2524 * CompareTranslation:
2525 * -- if bsp != translation's value return FALSE
2526 *****************************************************************************/
2527 static Boolean CompareTranslation(ByteStorePtr bsp, CharPtr qval)
2528 {
2529 CharPtr ptr;
2530 Int2 residue, residue1, residue2;
2531 Int4 len, blen;
2532 Boolean done;
2533
2534 if (qval == NULL || bsp == NULL) {
2535 return FALSE; /* no comparison */
2536 }
2537 len = StringLen(qval);
2538 BSSeek(bsp, 0, SEEK_SET);
2539
2540 blen = BSLen(bsp);
2541 done = FALSE;
2542 while ((! done) && (len)) {
2543 residue1 = qval[(len-1)];
2544 if (residue1 == 'X') /* remove terminal X */
2545 len--;
2546 else
2547 done = TRUE;
2548 }
2549 done = FALSE;
2550 while ((! done) && (blen)) {
2551 BSSeek(bsp, (blen-1), SEEK_SET);
2552 residue2 = BSGetByte(bsp);
2553 if (residue2 == 'X')
2554 blen--;
2555 else
2556 done = TRUE;
2557 }
2558 BSSeek(bsp, 0, SEEK_SET);
2559 if (blen != len) {
2560 return FALSE;
2561 } else {
2562 for (ptr = qval; *ptr != '\0' &&
2563 (residue = BSGetByte(bsp)) != EOF; ptr++) {
2564
2565 if (residue != *ptr) {
2566 return FALSE;
2567 }
2568
2569 } /* for */
2570
2571 } /* compare two sequences */
2572 return TRUE;
2573 } /* check */
2574
2575 static void GatherProductGeneInfo (Asn2ffJobPtr ajp, SeqFeatPtr sfp_in, GBEntryPtr gbp, SortStructPtr gp, Uint1 method)
2576 {
2577 BioseqPtr p_bsp;
2578 GatherScope gs;
2579 GeneStructPtr gsp;
2580 NoteStructPtr nsp;
2581 Int2 index;
2582 Int4 length, longest_length=0;
2583 ProtRefPtr prot=NULL;
2584 SeqFeatPtr sfp=NULL;
2585 SeqIdPtr sip;
2586 ValNodePtr product=NULL;
2587 OrganizeProtPtr opp;
2588 SortStructPtr p;
2589 Uint2 entityID;
2590
2591 if (sfp_in->product)
2592 product = sfp_in->product;
2593 else
2594 return;
2595 if (gp == NULL)
2596 return;
2597 gsp = gp->gsp;
2598 nsp = gp->nsp;
2599 sip = SeqLocId(product);
2600 p_bsp = BioseqFindCore(sip);
2601 if (p_bsp == NULL) /* Bioseq is (or has been) in memory */
2602 return;
2603 if (ajp->useSeqMgrIndexes) {
2604 sfp = SeqMgrGetBestProteinFeature (p_bsp, NULL);
2605 if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
2606 prot = (ProtRefPtr) sfp->data.value.ptrvalue;
2607 if (prot != NULL) {
2608 GetProtRefInfo(ajp->format, gsp, nsp, prot);
2609 GetProtRefComment(sfp_in, p_bsp, ajp, NULL, nsp, method);
2610 return;
2611 }
2612 }
2613 }
2614 entityID = ObjMgrGetEntityIDForPointer(p_bsp);
2615 opp = (OrganizeProtPtr) MemNew(sizeof(OrganizeProt));
2616 opp->size = 0;
2617 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2618 MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2619 gs.ignore[OBJ_SEQANNOT] = FALSE;
2620 gs.ignore[OBJ_SEQFEAT] = FALSE;
2621 gs.get_feats_location = TRUE;
2622 gs.target = product;
2623 gs.seglevels = 1;
2624 GatherEntity(entityID, opp, get_prot_feats, &gs);
2625 if (opp->size > 0) {
2626 prot = NULL;
2627 p = opp->list;
2628 for (index=0; index < opp->size; index++, p++) {
2629 if ((sfp = p->sfp) == NULL) {
2630 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
2631 &sfp, find_item);
2632 }
2633 if (sfp == NULL) {
2634 continue;
2635 }
2636 if (sfp->data.choice != SEQFEAT_PROT) {
2637 continue;
2638 }
2639 if ((length=SeqLocLen(sfp->location)) == -1)
2640 continue;
2641 if (length > longest_length) {
2642 prot = sfp->data.value.ptrvalue;
2643 longest_length = length;
2644 }
2645 }
2646 GetProtRefInfo(ajp->format, gsp, nsp, prot);
2647 }
2648 GetProtRefComment(sfp_in, p_bsp, ajp, opp, nsp, method);
2649 p = opp->list;
2650 for (index=0; index < opp->size; index++, p++) {
2651 if (p && p->gsp)
2652 GeneStructFree(p->gsp);
2653 if (p && p->nsp)
2654 NoteStructFree(p->nsp);
2655 }
2656 MemFree(opp->list);
2657 MemFree(opp);
2658
2659 return;
2660 }
2661
2662 /**************************************************************************
2663 *ConvertToNAImpFeat
2664 *
2665 * This code copies a SeqFeat into an ImpFeat format for use in
2666 * producing GenBank format. Two SeqFeatPtr's should be passed
2667 * in as arguments (sfp_in, sfp_out).
2668 * return status:
2669 * 1: conversion successful
2670 * 0: no conversion, also no error (data in ASN.1 is lost or put out
2671 * otherwise
2672 * -1 an error
2673 **************************************************************************/
2674
2675 static Boolean ProductIsLocal (Uint2 entityID, SeqLocPtr product)
2676
2677 {
2678 BioseqPtr bsp;
2679 SeqEntryPtr sep, oldscope;
2680 SeqIdPtr sip = NULL;
2681 SeqLocPtr slp;
2682
2683 slp = SeqLocFindNext (product, NULL);
2684 while (slp != NULL && sip == NULL) {
2685 sip = SeqLocId (slp);
2686 slp = SeqLocFindNext (product, slp);
2687 }
2688 if (sip == NULL) return FALSE;
2689 sep = GetTopSeqEntryForEntityID (entityID);
2690 if (sep == NULL) return FALSE;
2691 oldscope = SeqEntrySetScope (sep);
2692 bsp = BioseqFind (sip);
2693 SeqEntrySetScope (oldscope);
2694 if (bsp != NULL) return TRUE;
2695 return FALSE;
2696 }
2697
2698 NLM_EXTERN Int2 ConvertToNAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr gp)
2699 {
2700 BioseqPtr bsp=gbp->bsp, pbsp=NULL;
2701 Boolean found_key, non_pseudo = FALSE;
2702 CdRegionPtr cdr;
2703 Char buffer[2], printbuf[41], temp[65];
2704 CharPtr buf_ptr = &(buffer[0]), protein_seq=NULL, ptr = &(temp[0]);
2705 NoteStructPtr nsp;
2706 ImpFeatPtr ifp, ifp_in;
2707 Int2 retval=1;
2708 Int4 length=0;
2709 SeqFeatPtr sfp_out;
2710 SeqIdPtr xid;
2711 ValNodePtr product;
2712 ValNodePtr mod, syn;
2713 BioSourcePtr biosp;
2714 OrgRefPtr orp;
2715 RnaRefPtr rrp;
2716 ByteStorePtr byte_sp;
2717 /* Int4 len_cds, len_prot; -- UNUSED */
2718 Uint1 method = 0;
2719 GeneRefPtr grp;
2720 Boolean was_gene = FALSE;
2721 CharPtr key=NULL, tmp;
2722 GeneStructPtr gsp;
2723 CharPtr except_msg="No explanation supplied", loc;
2724
2725 sfp_out = *sfpp_out;
2726
2727 if (sfp_out->data.choice != SEQFEAT_IMP)
2728 return -1;
2729
2730 ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
2731
2732 sfp_out->partial = sfp_in->partial;
2733 sfp_out->comment = sfp_in->comment;
2734 sfp_out->exp_ev = sfp_in->exp_ev;
2735 sfp_out->location = sfp_in->location;
2736 sfp_out->product = sfp_in->product;
2737 sfp_out->pseudo = sfp_in->pseudo;
2738
2739 found_key = GetNAFeatKey(ajp->show_gene, &(key), sfp_in, sfp_out);
2740 if (!found_key)
2741 return -1;
2742 ifp->key = key;
2743 nsp = gp->nsp;
2744 gsp=gp->gsp;
2745 switch (sfp_in->data.choice)
2746 {
2747 case SEQFEAT_BIOSRC:
2748 biosp = sfp_in->data.value.ptrvalue;
2749 orp = (OrgRefPtr) biosp->org;
2750 if (orp) {
2751 if (orp->taxname) {
2752 sfp_out->qual = AddGBQual(sfp_out->qual,
2753 "organism", orp->taxname);
2754 } else if (orp->common) {
2755 if (StrStr(orp->common, "virus") ||
2756 StrStr(orp->common, "Virus") ||
2757 StrStr(orp->common, "phage") ||
2758 StrStr(orp->common, "Phage") ||
2759 StrStr(orp->common, "viroid") ||
2760 StrStr(orp->common, "Viroid"))
2761 {
2762 sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2763 orp->common);
2764 }
2765 }
2766 /* added from OrgRef.mod 03.20.96 */
2767 for (mod = orp->mod; mod; mod = mod->next) {
2768 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) mod->data.ptrvalue);
2769 }
2770 } else {
2771 sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2772 "unknown");
2773 }
2774 sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual,
2775 TRUE);
2776 break;
2777 case SEQFEAT_CDREGION:
2778 product = sfp_in->product;
2779 if (ajp->mode == RELEASE_MODE) {
2780 if (GBQualPresent("pseudo", sfp_in->qual) == FALSE &&
2781 gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2782 non_pseudo = TRUE;
2783 }
2784 if (non_pseudo) {
2785 if (product == NULL) {
2786 if (ajp->error_msgs == TRUE) {
2787 loc = SeqLocPrint(sfp_in->location);
2788 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2789 "Dropping CDS due to missing product: %s", loc);
2790 MemFree(loc);
2791 }
2792 return -1;
2793 }
2794 if (ajp->forgbrel && CheckSeqIdChoice(SeqLocId(product)) == FALSE) {
2795 if (ajp->error_msgs == TRUE) {
2796 loc = SeqLocPrint(sfp_in->location);
2797 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2798 "Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2799 MemFree(loc);
2800 }
2801 return -1;
2802 }
2803 if (ajp->forgbrel && (pbsp = BioseqFindCore(SeqLocId(product))) == NULL) {
2804 if (ajp->error_msgs == TRUE) {
2805 loc = SeqLocPrint(sfp_in->location);
2806 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2807 "Dropping CDS due to missing protein: %s", loc);
2808 MemFree(loc);
2809 }
2810 return -1;
2811 }
2812 if (pbsp != NULL) {
2813 if (ajp->forgbrel && CheckSeqIdChoice(pbsp->id) == FALSE) {
2814 if (ajp->error_msgs == TRUE) {
2815 loc = SeqLocPrint(sfp_in->location);
2816 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2817 "Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2818 MemFree(loc);
2819 }
2820 return -1;
2821 }
2822 if (ajp->show_version == TRUE) {
2823 if (CheckSeqIdAccVer(pbsp->id) == FALSE) {
2824 if (ajp->error_msgs == TRUE) {
2825 loc = SeqLocPrint(sfp_in->location);
2826 ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped,
2827 "Dropping CDS due to missing protein accession.version: %s", loc);
2828 MemFree(loc);
2829 }
2830 return -1;
2831 }
2832 }
2833 }
2834 }
2835 }
2836 cdr = (CdRegionPtr) sfp_in->data.value.ptrvalue;
2837 if ((GBQualPresent("codon_start", sfp_in->qual)) == FALSE)
2838 { /* Above checks if codon_start is already present. */
2839 if (cdr->frame)
2840 sprintf(buf_ptr, "%ld", (long) (cdr->frame));
2841 else
2842 sprintf(buf_ptr, "1");
2843 sfp_out->qual = AddGBQual(sfp_out->qual, "codon_start", buf_ptr);
2844 }
2845 if (product && (! ajp->genome_view) && (ProductIsLocal (ajp->entityID, product))) {
2846 byte_sp = ProteinFromCdRegion(sfp_in, FALSE);
2847
2848 if (product) {
2849 length = bsp->length;
2850 protein_seq = GetProductFromCDS(product, sfp_in->location, length);
2851 /* check conflict flag and fix it */
2852 if (cdr->conflict == TRUE) {
2853 if (CompareTranslation(byte_sp, protein_seq)) {
2854 cdr->conflict = FALSE;
2855 } else {
2856 method = METHOD_concept_transl_a;
2857 }
2858 }
2859 if (protein_seq) {
2860 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2861 gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2862 sfp_out->qual = AddGBQual(sfp_out->qual,
2863 "translation", protein_seq);
2864 }
2865 MemFree(protein_seq);
2866 }
2867 }
2868 BSFree(byte_sp);
2869 }
2870 if (sfp_in->pseudo) {
2871 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2872 }
2873 if (sfp_in->excpt) {
2874 if (StringCmp("ribosomal slippage", sfp_in->except_text) == 0 ||
2875 StringCmp("ribosome slippage", sfp_in->except_text) == 0) {
2876 sfp_out->qual = AddGBQual(sfp_out->qual,
2877 "note", sfp_in->except_text);
2878 sfp_out->excpt = FALSE;
2879 } else if (StringCmp("trans splicing", sfp_in->except_text) == 0 ||
2880 StringCmp("trans-splicing", sfp_in->except_text) == 0) {
2881 sfp_out->excpt = FALSE;
2882 } else if (sfp_in->except_text) {
2883 sfp_out->qual = AddGBQual(sfp_out->qual,
2884 "exception", sfp_in->except_text);
2885 } else if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2886 sfp_out->qual = AddGBQual(sfp_out->qual,
2887 "exception", sfp_in->qual->val);
2888 } else if (sfp_out->comment != NULL) {
2889 sfp_out->qual = AddGBQual(sfp_out->qual,
2890 "exception", sfp_in->comment);
2891 sfp_out->comment = NULL;
2892 } else {
2893 sfp_out->qual = AddGBQual(sfp_out->qual,
2894 "exception", except_msg);
2895 }
2896 } else {
2897 if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2898 sfp_out->qual = AddGBQual(sfp_out->qual,
2899 "exception", sfp_in->qual->val);
2900 }
2901 }
2902
2903 GatherProductGeneInfo(ajp, sfp_in, gbp, gp, method);
2904
2905 /******************************************************************************
2906 - asn2ff shouldn't generate a de-novo /translation for any
2907 cdregion that lacks a product, regardless of mode or -V setting 2/15/99
2908 ******************************************************************************
2909 if (protein_seq == NULL && ajp->mode != RELEASE_MODE) {
2910 protein_seq = BSMerge(byte_sp, NULL);
2911 if ( protein_seq && protein_seq[0] != '-') {
2912 len_prot = StringLen(protein_seq);
2913 SeqLocLen(sfp_in->location) - (cdr->frame - 1);
2914 if (len_prot >= 6) {
2915 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2916 gsp->pseudo == FALSE) {
2917 sfp_out->qual =
2918 AddGBQual(sfp_out->qual,
2919 "translation", protein_seq);
2920 }
2921 }
2922 }
2923 MemFree(protein_seq);
2924 }
2925 BSFree(byte_sp);
2926 */
2927 break;
2928 case SEQFEAT_RNA:
2929 rrp = sfp_in->data.value.ptrvalue;
2930 /* the following code was taken (almost) directly
2931 from Karl Sirotkin's code. */
2932 switch ( rrp -> type){ /* order of case n: matches tests in
2933 is_RNA_type() of genasn.c in
2934 GenBankConversion directory */
2935 case 2:
2936 break;
2937 case 255:
2938 break;
2939 case 3:
2940 if (rrp->ext.choice == 1) {
2941 if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2942 sfp_out->qual = AddGBQual(sfp_out->qual,
2943 "product", (CharPtr) rrp->ext.value.ptrvalue);
2944 }
2945 } else if (rrp->ext.choice == 0 ||
2946 rrp->ext.choice == 2) {
2947 DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2948 gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2949 }
2950 break;
2951 case 4:
2952 break;
2953 case 1:
2954 if (rrp->ext.choice == 1) {
2955 if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2956 sfp_out->qual = AddGBQual(sfp_out->qual,
2957 "product", (CharPtr) rrp->ext.value.ptrvalue);
2958 }
2959 } else if (rrp->ext.choice == 0 ||
2960 rrp->ext.choice == 2) {
2961 DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2962 gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2963 }
2964 break;
2965 case 5:
2966 break;
2967 case 6:
2968 break;
2969 }
2970
2971 if (rrp && rrp->pseudo == TRUE) {
2972 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
2973 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2974 }
2975
2976
2977 break;
2978 case SEQFEAT_SEQ:
2979 if ((xid=CheckXrefFeat(bsp, sfp_in)) != NULL) {
2980 ptr = &(temp[0]);
2981 SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
2982 sprintf(ptr, "Cross-reference: %s", printbuf);
2983 SaveNoteToCharPtrStack(nsp, NULL, ptr);
2984 }
2985 else
2986 retval = 0;
2987 break;
2988 case SEQFEAT_IMP:
2989 ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
2990 if (ifp_in->loc != NULL)
2991 ifp->loc = ifp_in->loc;
2992 if (StringCmp(ifp_in->key, "CDS") == 0) {
2993 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2994 ajp->error_msgs == TRUE)
2995 ErrPostStr(SEV_INFO, ERR_FEATURE_non_pseudo,
2996 "ConvertToNAImpFeat: Non-pseudo ImpFeat CDS found");
2997 if ((GBQualPresent("translation", sfp_in->qual)) == TRUE &&
2998 ajp->mode == RELEASE_MODE) {
2999 if (ajp->error_msgs == TRUE) {
3000 ErrPostStr(SEV_ERROR, ERR_FEATURE_Dropped,
3001 "ImpFeat CDS with /translation found");
3002 }
3003 retval = -1;
3004 }
3005 }
3006 break;
3007 case SEQFEAT_REGION:
3008 tmp = MemNew(StringLen(sfp_in->data.value.ptrvalue) + 9);
3009 sprintf(tmp, "Region: %s", (CharPtr ) sfp_in->data.value.ptrvalue);
3010 sfp_out->qual = AddGBQual(sfp_out->qual, "note", tmp);
3011 tmp = MemFree(tmp);
3012 break;
3013 case SEQFEAT_SITE:
3014 AddSiteNoteQual(sfp_in, sfp_out);
3015 break;
3016 case SEQFEAT_RSITE:
3017 break;
3018 case SEQFEAT_COMMENT:
3019 if(ifp->key != NULL)
3020 MemFree(ifp->key);
3021 ifp->key = StringSave("misc_feature");
3022 break;
3023 case SEQFEAT_GENE:
3024 grp = (GeneRefPtr) sfp_in->data.value.ptrvalue;
3025 if (grp == NULL)
3026 break;
3027 syn=grp->syn;
3028 if (grp->locus ) {
3029 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->locus);
3030 was_gene = TRUE;
3031 } else if (syn != NULL) {
3032 sfp_out->qual = AddGBQual(sfp_out->qual, "gene",
3033 syn->data.ptrvalue);
3034 syn=syn->next;
3035 was_gene = TRUE;
3036 }
3037 if (grp->desc ) {
3038 if (was_gene) {
3039 CpNoteToCharPtrStack(nsp, NULL, grp->desc);
3040 } else {
3041 /* s = MemNew(StringLen(grp->desc) + 15);
3042 sprintf(s, "Description: %s", grp->desc);
3043 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", s);*/
3044 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->desc);
3045 }
3046 }
3047 if (grp->allele ) {
3048 if ((GBQualPresent("allele", sfp_in->qual)) == FALSE)
3049 sfp_out->qual = AddGBQual(sfp_out->qual, "allele", grp->allele);
3050 }
3051 if (grp->maploc ) {
3052 if ((GBQualPresent("map", sfp_in->qual)) == FALSE)
3053 sfp_out->qual = AddGBQual(sfp_out->qual, "map", grp->maploc);
3054 }
3055 for (; syn; syn=syn->next) {
3056 CpNoteToCharPtrStack(nsp, NULL, syn->data.ptrvalue);
3057 }
3058 if (grp->pseudo == TRUE || sfp_in->pseudo) {
3059 if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
3060 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
3061 }
3062 GetDBXrefFromGene(grp, sfp_out);
3063 break;
3064 default:
3065 if (ajp->error_msgs == TRUE)
3066 ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey,
3067 "Unimplemented type of gbqual in ConvertToNAImpFeat");
3068 retval = 0;
3069 break;
3070 }
3071 if (gsp->grp) {
3072 GetDBXrefFromGene(gsp->grp, sfp_out);
3073 }
3074 if (sfp_in->pseudo) {
3075 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
3076 }
3077 if (sfp_out->comment) {
3078 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
3079 sfp_out->comment = NULL;
3080 }
3081 return retval;
3082
3083 } /* ConvertToNAImpFeat */
3084
3085 /*****************************************************************************
3086 *ValidateNAImpFeat
3087 *
3088 * This code validates an ImpFeat using some functions from
3089 * the flat2asn parser.
3090 *
3091 * If a feat is bad and can't be corrected, -1 is returned.
3092 *
3093 *****************************************************************************/
3094
3095 NLM_EXTERN Int2 ValidateNAImpFeat (SeqFeatPtr sfp)
3096
3097 {
3098 CharPtr key;
3099 ImpFeatPtr ifp;
3100 Int2 index, retval=0, status=0;
3101
3102 if (sfp->data.choice != SEQFEAT_IMP) {
3103 return -1;
3104 } else {
3105
3106 ifp = sfp->data.value.ptrvalue;
3107 key = StringSave(ifp->key);
3108 index = GBFeatKeyNameValid(&key, ASN2FF_SHOW_ERROR_MSG);
3109 if (StringCmp(key, ifp->key) != 0) {
3110 ifp->key = key;
3111 } else {
3112 MemFree(key);
3113 }
3114
3115 if (index == -1) {
3116 retval = -2;
3117 } else {
3118 status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual,
3119 ASN2FF_SHOW_ERROR_MSG, ASN2FF_VALIDATE_FEATURES);
3120 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
3121 if (index == 46 && status == GB_FEAT_ERR_NONE) {
3122 status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual,
3123 ASN2FF_SHOW_ERROR_MSG, TRUE);
3124 }
3125 #endif
3126 if (status == GB_FEAT_ERR_NONE) {
3127 retval = 1;
3128 } else if (status == GB_FEAT_ERR_REPAIRABLE) {
3129 retval = 0;
3130 } else if (status == GB_FEAT_ERR_DROP) {
3131 retval = -1;
3132 }
3133 }
3134
3135 }
3136
3137 return retval;
3138 } /* ValidateNAImpFeat */
3139
3140 /*****************************************************************************
3141 *ValidateAAImpFeat
3142 *
3143 * This code will validate an ImpFeat using some functions from
3144 * the flat2asn parser. Right now it just checks to see that the
3145 * sfp is an ImpFeat and checks for a partial qualifier.
3146 *
3147 * If a feat is bad and can't be corrected, -1 is returned.
3148 *
3149 *****************************************************************************/
3150
3151 NLM_EXTERN Int2 ValidateAAImpFeat (SeqFeatPtr sfp, Boolean use_product)
3152
3153 {
3154
3155 if (sfp->data.choice != SEQFEAT_IMP)
3156 return -1;
3157
3158 LookForPartialImpFeat(sfp, use_product);
3159
3160 return 0;
3161 } /* ValidateAAImpFeat */
3162
3163
3164 /*****************************************************************************
3165 *void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modifs)
3166 *
3167 *Normally called from PrintSourceFeat, collects all notes etc. together.
3168 *Note: sfp_out may already have quals when it comes here, they should not
3169 *be deleted!
3170 * add_modifs: allows the addition of modifs to be specified, don't add
3171 * modifs if the source feature is a ImpFeat.
3172 *
3173 *For many cases there is no sfp_in, so that must be checked for.
3174 *****************************************************************************/
3175
3176 NLM_EXTERN void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif)
3177
3178 {
3179 CharPtr note=NULL;
3180 GBQualPtr qual1;
3181 NoteStructPtr nsp=NULL;
3182
3183 if (gbp->feat) {
3184 nsp=gbp->feat->source_notes;
3185 }
3186 if (sfp_in) {
3187 for (qual1=sfp_in->qual; qual1; qual1=qual1->next) {
3188 if (StringCmp(qual1->qual, "note") == 0)
3189 CpNoteToCharPtrStack(nsp, NULL, qual1->val);
3190 else
3191 sfp_out->qual =
3192 AddGBQual(sfp_out->qual, qual1->qual, qual1->val);
3193 }
3194 }
3195 /* not used in new style */
3196 if (add_modif == TRUE)
3197 sfp_out->qual = AddModifsToGBQual(gbp, sfp_out->qual);
3198 /*--------------------- tatiana */
3199 if (sfp_in && sfp_in->comment) {
3200 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_in->comment);
3201 }
3202
3203 if (nsp && nsp->note[0]) {
3204 note = ComposeNoteFromNoteStruct(nsp, NULL);
3205 if (note) {
3206 sfp_out->qual = AddGBQual(sfp_out->qual, "note", note);
3207 note = MemFree(note);
3208 }
3209 }
3210 if (sfp_in && sfp_in->cit) {
3211 if (ASN2FF_SHOW_ERROR_MSG)
3212 ErrPostStr(SEV_WARNING, 0, 0,
3213 "Unwanted /citation on 'source' feature will be dropped");
3214 }
3215
3216 return;
3217 }
3218
3219
3220 /*************************************************************************
3221 *AddProteinQuals
3222 *
3223 *************************************************************************/
3224
3225 NLM_EXTERN void AddProteinQuals (SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp)
3226
3227 {
3228 ProtRefPtr prp=sfp->data.value.ptrvalue;
3229 ValNodePtr vnp;
3230
3231 if (prp->name != NULL) {
3232 for (vnp=prp->name; vnp; vnp=vnp->next)
3233 if (GBQualPresent("product", sfp_out->qual) == FALSE)
3234 sfp_out->qual =
3235 AddGBQual(sfp_out->qual, "product", vnp->data.ptrvalue);
3236 else
3237 CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
3238 }
3239 if (prp->desc) {
3240 sfp_out->qual =
3241 AddGBQual(sfp_out->qual, "name", prp->desc);
3242 }
3243
3244 for (vnp=prp->ec; vnp; vnp=vnp->next)
3245 if ((CheckForQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue)) == 0)
3246 sfp_out->qual =
3247 AddGBQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue);
3248
3249 return;
3250 }
3251
3252 /*______________________________________________________________________
3253 **
3254 ** This code is not currently used.
3255 ** I do not remove this piece of code, just comment it out.
3256 ** -- Dmitri Lukyanov
3257 */
3258 #if 0
3259
3260 static GBQualPtr RemoveQual(GBQualPtr head, GBQualPtr x)
3261 {
3262 GBQualPtr v, p;
3263
3264 if (head == NULL) {
3265 return NULL;
3266 }
3267 if (x == head) {
3268 head = x->next;
3269 x->next = NULL;
3270 GBQualFree(x);
3271 return head;
3272 }
3273 for (v = head; v != NULL && v != x; v = v->next) {
3274 p = v;
3275 }
3276 if (v != NULL) {
3277 p->next = x->next;
3278 x->next = NULL;
3279 GBQualFree(x);
3280 }
3281 return head;
3282 }
3283
3284 #endif
3285 /*______________________________________________________________________
3286 */
3287
3288 static void Add_gene_id (GeneStructPtr gsp, SeqFeatPtr sfp_out)
3289 {
3290 ImpFeatPtr ifp;
3291 GeneRefPtr grp;
3292 ValNodePtr vnp;
3293 Char val[40];
3294
3295 if ((grp = gsp->grp) == NULL)
3296 return;
3297 ifp = sfp_out->data.value.ptrvalue;
3298 if (StringCmp(ifp->key, "CDS") != 0) {
3299 return;
3300 }
3301 if ((vnp = grp->syn) == NULL) /* no synonyms */
3302 return;
3303 sprintf(val, "GeneID:%s", vnp->data.ptrvalue);
3304 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3305 }
3306
3307 /****************************************************************************
3308 * Composes the GBQuals for sfp_out using the information in the
3309 * GeneStructPtr (gsp), and then the quals already on sfp_out.
3310 *
3311 * use only info from GeneStruct throw away the quals gene and map if they
3312 * different /tatiana 07.11.95/
3313 * do not add /map to the features other than 'gene' /08-29-97/
3314 * sfp_out: SEQFEAT_IMP
3315 * map /citation added by Tatiana
3316 **************************************************************************/
3317 NLM_EXTERN void ComposeGBQuals (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo)
3318 {
3319 Char temp[65];
3320 Char buffer[10];
3321 CharPtr ascii, start, note=NULL, ptr=NULL, tmp;
3322 GBQualPtr gbqp=NULL, qual1, qnext;
3323 GeneStructPtr gsp;
3324 Int2 int_index, status;
3325 NoteStructPtr nsp;
3326 PubStructPtr psp;
3327 SeqFeatPtr sfp = NULL;
3328 Int2 ascii_len, l;
3329 ValNodePtr vnp, vnp1;
3330 ValNodePtr pub, pubq, pubset;
3331 ImpFeatPtr ifp;
3332 BioseqPtr bsp;
3333 Boolean is_contig = FALSE, is_NC = FALSE, is_NG = FALSE;
3334 SeqIdPtr sid;
3335 TextSeqIdPtr tsip;
3336
3337 if (gbp == NULL || gbp->feat == NULL || p == NULL) {
3338 return;
3339 }
3340 bsp = gbp->bsp;
3341 for (sid=bsp->id; sid; sid=sid->next) {
3342 if (sid->choice == SEQID_OTHER) {
3343 tsip = (TextSeqIdPtr) sid->data.ptrvalue;
3344 if (StringNCmp(tsip->accession, "NT", 2) == 0) {
3345 is_contig = TRUE;
3346 }
3347 if (StringNCmp(tsip->accession, "NC", 2) == 0
3348 || StringNCmp(tsip->accession, "NP", 2) == 0) {
3349 is_NC = TRUE;
3350 }
3351 if (StringNCmp(tsip->accession, "NG", 2) == 0) {
3352 is_NG = TRUE;
3353 }
3354 }
3355 }
3356 gsp=p->gsp;
3357 nsp = p->nsp;
3358 if ((sfp=p->sfp) == NULL) {
3359 GatherItemWithLock(p->entityID, p->itemID, p->itemtype,
3360 &sfp, find_item);
3361 }
3362 if (gsp) {
3363 if (gsp->gene) {
3364 /* delete_qual(&(sfp_out->qual), "gene"); */
3365 for (vnp=gsp->gene; vnp; vnp=vnp->next)
3366 {
3367 ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
3368 start = ascii = MemNew((size_t) (10+ascii_len));
3369 ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
3370 if ((GBQualPresent("gene", gbqp)) == FALSE) {
3371 if ((GBQualPresent("gene", sfp_out->qual)) == FALSE) {
3372 gbqp=AddGBQual(gbqp, "gene", start);
3373 }
3374 }
3375 start = MemFree(start);
3376 }
3377 }
3378 if (gsp->product) {
3379 for (vnp=gsp->product; vnp; vnp=vnp->next)
3380 {
3381 if (GBQualPresent("product", gbqp) == FALSE &&
3382 GBQualPresent("product", sfp_out->qual) == FALSE)
3383 sfp_out->qual = AddGBQual(sfp_out->qual, "product",
3384 vnp->data.ptrvalue);
3385 else
3386 CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
3387 }
3388 }
3389 if (gsp->standard_name) {
3390 for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
3391 {
3392 if ((CheckForQual(sfp_out->qual, "standard_name",
3393 vnp->data.ptrvalue)) == 0) {
3394 gbqp=AddGBQual(gbqp, "standard_name", vnp->data.ptrvalue);
3395 }
3396 }
3397 }
3398 if (ajp->show_gene == TRUE) {
3399 ifp = sfp_out->data.value.ptrvalue;
3400 if (StringCmp(ifp->key, "gene") == 0) {
3401 if (gsp->map[0]) {
3402 gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
3403 }
3404 }
3405 } else {
3406 if (gsp->map[0]) {
3407 gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
3408 }
3409 }
3410 for (vnp=gsp->ECNum; vnp; vnp=vnp->next) {
3411 if ((CheckForQual(sfp_out->qual, "EC_number",
3412 vnp->data.ptrvalue)) == 0) {
3413 gbqp=AddGBQual(gbqp, "EC_number", vnp->data.ptrvalue);
3414 }
3415 }
3416 for (vnp=gsp->activity; vnp; vnp=vnp->next) {
3417 if ((CheckForQual(sfp_out->qual, "function",
3418 vnp->data.ptrvalue)) == 0) {
3419 gbqp=AddGBQual(gbqp, "function", vnp->data.ptrvalue);
3420 }
3421 }
3422 if (gsp->pseudo == TRUE) {
3423 if (note_pseudo == TRUE) {
3424 CpNoteToCharPtrStack(nsp, NULL, "pseudogene");
3425 } else if (GBQualPresent("pseudo", gbqp) == FALSE &&
3426 GBQualPresent("pseudo", sfp_out->qual) == FALSE) {
3427 gbqp = AddGBQual(gbqp, "pseudo", NULL);
3428 }
3429 }
3430 }
3431 /* Add Experimental note */
3432 if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION)
3433 {
3434 ptr = &(temp[0]);
3435 status = MakeGBSelectNote(ptr, sfp);
3436 if (status > 0)
3437 SaveNoteToCharPtrStack(nsp, NULL, ptr);
3438 ptr=NULL;
3439 /* gene synonym appears as db-xref
3440 if (is_NC) {
3441 Add_gene_id(gsp, sfp_out);
3442 }
3443 */
3444 }
3445 if (nsp && nsp->note[0])
3446 {
3447 note = ComposeNoteFromNoteStruct(nsp, gsp);
3448 if (note)
3449 {
3450 gbqp = AddGBQual(gbqp, "note", note);
3451 note = MemFree(note);
3452 }
3453 }
3454 if (ajp->mode != DIRSUB_MODE) {
3455 AddPID(ajp, sfp_out, (Boolean) (is_contig || is_NG || is_NC));
3456 }
3457 if (is_contig || is_NG || is_NC) {
3458 if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA) {
3459 Add_trid(ajp, sfp_out);
3460 }
3461 }
3462 Add_dbxref(ajp, sfp_out, sfp, bsp);
3463 vnp = gbp->Pub;
3464 if (sfp && sfp->cit) {
3465 buffer[0] = '\0';
3466 pubset = sfp->cit;
3467 for (pubq = pubset->data.ptrvalue; pubq; pubq = pubq->next) {
3468 if (pubq->choice == PUB_Equiv) {
3469 pub = pubq->data.ptrvalue;
3470 for (; pub != NULL; pub = pub->next) {
3471 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3472 psp = vnp1->data.ptrvalue;
3473 if (PubLabelMatch(psp->pub, pub) == 0) {
3474 sprintf(buffer, "[%ld]", (long) (psp->number));
3475 gbqp = AddGBQual(gbqp, "citation", buffer);
3476 break;
3477 }
3478 }
3479 }
3480 } else {
3481 pub = pubq;
3482 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3483 psp = vnp1->data.ptrvalue;
3484 if (PubLabelMatch(psp->pub, pub) == 0) {
3485 sprintf(buffer, "[%ld]", (long) (psp->number));
3486 gbqp = AddGBQual(gbqp, "citation", buffer);
3487 break;
3488 }
3489 }
3490 }
3491 }
3492 /************** old algorithm for pub matching ****************/
3493 if (buffer[0] == '\0') {
3494 for (vnp1=vnp; vnp1; vnp1=vnp1->next)
3495 {
3496 psp = vnp1->data.ptrvalue;
3497 for (int_index=0; int_index<psp->pubcount; int_index++)
3498 if (sfp == psp->pubfeat[int_index])
3499 {
3500 sprintf(buffer, "[%ld]", (long) (psp->number));
3501 gbqp = AddGBQual(gbqp, "citation", buffer);
3502 }
3503 }
3504 }
3505 }
3506 if (gbqp) /* any gene or note related quals added above? */
3507 {
3508 for (qual1=gbqp; qual1->next; qual1=qual1->next)
3509 ;
3510 qual1->next = sfp_out->qual;
3511 sfp_out->qual = gbqp;
3512 }
3513 /* check for the qual gdb_xref */
3514 for (qual1 = sfp_out->qual; qual1; qual1 = qnext) {
3515 qnext = qual1->next;
3516 if (StringCmp(qual1->qual, "gdb_xref") == 0) {
3517 qual1->qual = StringSave("db_xref");
3518 l = StringLen(qual1->val);
3519 tmp = MemNew(l + 5);
3520 sprintf(tmp, "GDB:%s", qual1->val);
3521 qual1->val = StringSave(tmp);
3522 MemFree(tmp);
3523 }
3524 if (ajp->show_gene == FALSE) {
3525 /* change qual 'replace' to the old style location operator */
3526 /* changed December 1996 release 100.0 */
3527 /* if (StringCmp(qual1->qual, "replace") == 0) {
3528 ifp = sfp_out->data.value.ptrvalue;
3529 loc = FlatLoc(gbp->bsp, sfp->location);
3530 l = StringLen(qual1->val) + StringLen(loc);
3531 tmp = MemNew(l + 15);
3532 sprintf(tmp, "replace(%s,\"%s\")", loc, qual1->val);
3533 MemFree(loc);
3534 ifp->loc = tmp;
3535 sfp_out->qual = RemoveQual(sfp_out->qual, qual1);
3536 }
3537 */
3538 }
3539 }
3540 return;
3541 } /* ComposeGBQuals */
3542
3543 static CharPtr tmp_save(CharPtr str)
3544 /* deletes spaces from the begining and the end and returns Nlm_StringSave */ {
3545 CharPtr s, ss;
3546
3547 if (str == NULL) {
3548 return NULL;
3549 }
3550 for (; isspace(*str) || *str == ','; str++) continue;
3551 for (s = str; *s != '\0'; s++) {
3552 if (*s == '\n') {
3553 for (ss = s+1; isspace(*ss); ss++) continue;
3554 *s = ' ';
3555 strcpy(s+1, ss);
3556 }
3557 }
3558 for (s=str+StringLen(str)-1; s >= str && (*s == ' ' || *s == ';' ||
3559 *s == ',' || *s == '.' || *s == '\"' || *s == '\t'); s--) {
3560 *s = '\0';
3561 }
3562
3563 if (*str == '\0') {
3564 return NULL;
3565 } else {
3566 return Nlm_StringSave(str);
3567 }
3568 }
3569 static Int2 NoteCmp(CharPtr n1, CharPtr n2)
3570 {
3571 CharPtr s1, s2;
3572 Int2 ret = 1;
3573
3574 if (n1 == NULL || n2 == NULL)
3575 return ret;
3576 s1 = tmp_save(n1);
3577 s2 = tmp_save(n2);
3578 if (StringStr(s1, s2) != NULL)
3579 ret = 0; /*duplicated */
3580 MemFree(s1);
3581 MemFree(s2);
3582
3583 return ret;
3584 }
3585
3586 /****************************************************************************
3587 * CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStrunctPtr gsp)
3588 *
3589 * This function composes a "/note" for a SeqFeatPtr from the information
3590 * in the GeneStructPtr (gsp).
3591 * The first "for" loop initializes the first CharPtr and a check
3592 * is done that the information in gsp->note is *not* redundant. If
3593 * it is not, first gsp->note_annot is copied onto a CharPtr (this
3594 * field contains words describing the origin of the info in note, i.e.,
3595 * "Description"); then the actual note is copied onto the CharPtr.
3596 * The second "for" loop does the same checking as the first and the
3597 * concatenation of more "note" strings is performed.
3598 *
3599 *n.b.: the caller is responsible for deallocating the final returned "note".
3600 ***************************************************************************/
3601 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStructPtr gsp)
3602
3603 {
3604 Boolean status;
3605 CharPtr note1=NULL, note2, note3;
3606 Int2 index, index1, index2, len;
3607
3608 for (index=0; index<nsp->note_index; index++) {
3609 if (gsp) {
3610 if (CompareStringWithGsp(gsp, nsp->note[index]) != 0) {
3611 if (nsp->note_annot[index])
3612 note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3613 else
3614 note1 = StringSave(nsp->note[index]);
3615 len = CheckForExtraChars(note1);
3616 if (len == 0)
3617 note1 = MemFree(note1);
3618 else
3619 break;
3620 }
3621 } else {
3622 if (nsp->note_annot[index])
3623 note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3624 else
3625 note1 = StringSave(nsp->note[index]);
3626 len = CheckForExtraChars(note1);
3627 if (len == 0)
3628 note1 = MemFree(note1);
3629 else
3630 break;
3631 }
3632 }
3633 index++;
3634
3635 for (index1=index; index1<nsp->note_index; index1++)
3636 {
3637 status = TRUE;
3638 note2 = nsp->note[index1];
3639 if (gsp && CompareStringWithGsp(gsp, note2) == 0)
3640 continue;
3641
3642 for (index2=0; index2<index1; index2++) {
3643 if (gsp)
3644 if (GeneStringCmp(note2, nsp->note[index2]) == 0)
3645 status = FALSE;
3646 }
3647 if (status == TRUE) {
3648 if (nsp->note_annot[index1])
3649 note2 = Cat2Strings(nsp->note_annot[index1], nsp->note[index1], " ", 0);
3650 else /* rewrite to not always allocate note2 if no annot?????*/
3651 note2 = StringSave(nsp->note[index1]);
3652 len = CheckForExtraChars(note1);
3653 if (NoteCmp(note1, note2) == 0) {
3654 len = 0;
3655 }
3656 if (len > 0) {
3657 if (note1[len-1] == '.') {
3658 note3 = Cat2Strings(note1, note2, " ", -1);
3659 } else {
3660 note3 = Cat2Strings(note1, note2, "; ", -1);
3661 }
3662 note1 = MemFree(note1);
3663 note2 = MemFree(note2);
3664 note1 = note3;
3665 note3 = NULL;
3666 } else {
3667 if (note2) {
3668 note2 = MemFree(note2);
3669 }
3670 }
3671 }
3672 }
3673
3674 return note1;
3675 } /* ComposeNoteFromNoteStruct */
3676
3677 /*************************************************************************
3678 *static Int2 CheckForExtraChars(CharPtr note)
3679 *
3680 * Check for spaces or semi-colons on the ends of notes.
3681 ************************************************************************/
3682
3683 static Int2 CheckForExtraChars(CharPtr note)
3684
3685 {
3686 Int2 len=0;
3687
3688 if (note != NULL)
3689 {
3690 len = StringLen(note);
3691 while (len > 0)
3692 {
3693 if (note[len-1] == ' ' || note[len-1] == ';')
3694 note[len-1] = '\0';
3695 else
3696 break;
3697 len--;
3698 }
3699 }
3700
3701 return len;
3702
3703 } /* CheckForExtraChars */
3704
3705 NLM_EXTERN void Add_trid (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out)
3706
3707 {
3708 ImpFeatPtr ifp;
3709 Int4 gi = -1;
3710 SeqIdPtr sip, newid=NULL;
3711 ValNodePtr product;
3712 Char buf[MAX_ACCESSION_LEN+5];
3713
3714 ifp = sfp_out->data.value.ptrvalue;
3715 if (StringCmp(ifp->key, "mRNA") != 0) {
3716 return;
3717 }
3718 product = sfp_out->product;
3719 if (product == NULL) {
3720 return;
3721 }
3722 sip = GetProductSeqId(product);
3723 if (sip == NULL) return;
3724 if (sip->choice == SEQID_GI) {
3725 if ((newid = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3726 SeqIdWrite(newid, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3727 } else {
3728 sprintf(buf, "%ld", sip->data.intvalue);
3729 }
3730 } else {
3731 SeqIdWrite(sip, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3732 }
3733 sfp_out->qual = AddGBQual(sfp_out->qual, "transcript_id", buf);
3734 }
3735
3736 /*************************************************************************
3737 * sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
3738 *
3739 * This function puts the GI number on a SeqFeatPtr /db_xref of type CDS.
3740 * Checking is first done to see if this sfp is indeed a CDS, then
3741 * the PID number is gotten from the product SeqId
3742 *****************************************************************************/
3743
3744 NLM_EXTERN void AddPID (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG)
3745
3746 {
3747 ImpFeatPtr ifp;
3748 Int4 gi = -1;
3749 SeqIdPtr sip, new_id=NULL;
3750 ValNodePtr product, vnp;
3751 BioseqPtr p_bsp = NULL;
3752 DbtagPtr db;
3753 Char val[20];
3754 Char buf[MAX_ACCESSION_LEN+1];
3755
3756 ifp = sfp_out->data.value.ptrvalue;
3757 if (StringCmp(ifp->key, "CDS") != 0) {
3758 return;
3759 }
3760 product = sfp_out->product;
3761 if (product == NULL) {
3762 return;
3763 }
3764 sip = GetProductSeqId(product);
3765 if (sip) { /* Get protein bsp */
3766 if (sip->choice == SEQID_GI && is_NTorNG) {
3767 if ((new_id = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3768 SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3769 SeqIdFree(new_id); /*** need to free it !!! (EY) ***/
3770 } else {
3771 sprintf(buf, "%ld", sip->data.intvalue);
3772 }
3773 sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3774 } else if ((p_bsp = BioseqFind(sip)) != NULL) {
3775 new_id = GetSeqIdChoice(p_bsp->id);
3776 if (ajp->forgbrel && new_id == NULL) {
3777 ErrPostStr(SEV_ERROR, ERR_ACCESSION_NoAccessNum, "");
3778 } else if (new_id) {
3779 SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER,
3780 MAX_ACCESSION_LEN+1);
3781 sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3782 }
3783 }
3784 }
3785 if (p_bsp == NULL) {
3786 gi = GetGINumFromSip(sip);
3787 if (gi != -1) {
3788 if (ajp->show_gi) {
3789 val[0] = '\0';
3790 sprintf(val, "PID:g%ld", (long) gi);
3791 if (val[0] != '\0') {
3792 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3793 }
3794 }
3795 if (ajp->show_version) {
3796 val[0] = '\0';
3797 sprintf(val, "GI:%ld", (long) gi);
3798 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3799 }
3800 }
3801 return;
3802 }
3803 for (vnp=p_bsp->id; vnp; vnp=vnp->next) {
3804 if (vnp->choice == SEQID_GENERAL) {
3805 db = vnp->data.ptrvalue;
3806 if (db == NULL) {
3807 continue;
3808 }
3809 val[0] = '\0';
3810 if (StringNCmp(db->db, "PIDe", 4) == 0) {
3811 sprintf(val, "PID:e%ld", (long) db->tag->id);
3812 gi = db->tag->id;
3813 } else if (StringNCmp(db->db, "PIDd", 4) == 0) {
3814 sprintf(val, "PID:d%ld", (long) db->tag->id);
3815 gi = db->tag->id;
3816 } else if (StringNCmp(db->db, "PID", 3) == 0) {
3817 if (db->tag && db->tag->str) {
3818 sprintf(val, "%s:%s", db->db, db->tag->str);
3819 gi = atoi((db->tag->str)+1);
3820 }
3821 }
3822 if (ajp->show_gi && val[0] != '\0') {
3823 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3824 }
3825 /*if (ajp->show_version) {
3826 val[0] = '\0';
3827 sprintf(val, "GI:%ld", (long) gi);
3828 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3829 }*/
3830 }
3831 if (vnp->choice == SEQID_GI) {
3832 if (ajp->show_gi) {
3833 val[0] = '\0';
3834 sprintf(val, "PID:g%ld", (long) vnp->data.intvalue);
3835 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3836 }
3837 if (ajp->show_version) {
3838 val[0] = '\0';
3839 sprintf(val, "GI:%ld", (long) vnp->data.intvalue);
3840 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3841 }
3842 }
3843 }
3844 return;
3845 } /* AddPID */
3846
3847 /***************************************************************************
3848 *Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3849 *
3850 *Adds note to CDS GenBankSelect
3851 ***************************************************************************/
3852 NLM_EXTERN Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3853
3854 {
3855 Boolean found_select=FALSE, found_match=FALSE;
3856 CharPtr acc=NULL;
3857 Int2 number = -1;
3858 ObjectIdPtr oip=NULL, type;
3859 UserFieldPtr ufp;
3860 UserObjectPtr uop=NULL;
3861
3862 if (sfp && (uop=sfp->ext) != NULL)
3863 {
3864 if (uop->_class && (type=uop->type) != NULL)
3865 {
3866 if (StringCmp(uop->_class, "GB-Select") == 0)
3867 found_select = TRUE;
3868 if (type->str)
3869 if (StringCmp(type->str, "SPmatch") == 0)
3870 found_match = TRUE;
3871 if (found_match && found_select)
3872 {
3873 for (ufp=uop->data; ufp; ufp=ufp->next)
3874 {
3875 oip = ufp->label;
3876 if (oip->id == 2)
3877 {
3878 if (ufp->choice == 1)
3879 acc = ufp->data.ptrvalue;
3880 }
3881 else if (oip->id == 3)
3882 {
3883 if (ufp->choice == 2)
3884 {
3885 number = (Int2) (ufp->data.intvalue);
3886 }
3887 }
3888
3889 }
3890 if (number == 1)
3891 sprintf(ptr,
3892 "Identical to Swiss-Prot Accession Number %s", acc);
3893 else if (number == 2 || number == 3)
3894 sprintf(ptr,
3895 "Similar to Swiss-Prot Accession Number %s", acc);
3896 }
3897 }
3898 }
3899 return number;
3900 }
3901
3902 NLM_EXTERN Boolean get_prot_feats (GatherContextPtr gcp)
3903 {
3904 BioseqPtr bsp;
3905 OrganizeProtPtr opp;
3906 SeqFeatPtr sfp;
3907 Boolean temp = FALSE;
3908
3909 opp = gcp->userdata;
3910
3911 switch (gcp->thistype)
3912 {
3913 case OBJ_SEQFEAT:
3914 sfp = (SeqFeatPtr) (gcp->thisitem);
3915 if (sfp->data.choice == SEQFEAT_PROT ||
3916 sfp->data.choice == SEQFEAT_REGION ||
3917 sfp->data.choice == SEQFEAT_BOND ||
3918 sfp->data.choice == SEQFEAT_SITE) {
3919 bsp = BioseqFindCore(SeqLocId(sfp->location));
3920 if (gcp->tempload == TRUE) {
3921 temp = TRUE;
3922 }
3923 opp->list = EnlargeSortList(opp->list, opp->size);
3924 opp->size = StoreFeatTemp(opp->list, sfp, opp->size, bsp, NULL,
3925 gcp->entityID, gcp->itemID, gcp->thistype,
3926 gcp->new_loc, NULL, 0, temp);
3927 }
3928 break;
3929 default:
3930 break;
3931 }
3932 return TRUE;
3933 }
3934
3935 /********************************************************************
3936 * Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3937 *
3938 * gsp: GeneStructPtr containing the gene information,
3939 * gene->synonym in is store in gsp->gene with choice 1 (GetGeneRefInfo)
3940 * it is not compared to note string
3941 *
3942 * string: a CharPtr with (possibly) relevant gene information
3943 * (i.e., gene name, allele, product etc.).
3944 *
3945 * A comparison is made between string and the information already
3946 * stored in the gsp. Following the convention for StringCmp,
3947 * "0" is returned if a match is found, otherwise "1" is returned.
3948 * At present (2/7/94) GeneStringCmp is a #define for StringCmp.
3949 ************************************************************************/
3950
3951 NLM_EXTERN Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3952
3953 {
3954 CharPtr ascii, start;
3955 Int2 ascii_len;
3956 ValNodePtr vnp;
3957
3958 for (vnp=gsp->gene; vnp; vnp=vnp->next)
3959 {
3960 if (vnp->choice == 1) {
3961 continue;
3962 }
3963 ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
3964 start = ascii = MemNew((size_t) (10+ascii_len));
3965 ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
3966 if (GeneStringCmp(start, string) == 0)
3967 {
3968 start = MemFree(start);
3969 return 0;
3970 }
3971 start = MemFree(start);
3972 }
3973 vnp=gsp->product;
3974 if (vnp != NULL)
3975 {
3976 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3977 return 0;
3978 }
3979 for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
3980 {
3981 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3982 return 0;
3983 }
3984 if (gsp->map[0] && GeneStringCmp(gsp->map[0], string) == 0)
3985 return 0;
3986 if (gsp->ECNum)
3987 for (vnp=gsp->ECNum; vnp; vnp=vnp->next)
3988 {
3989 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3990 return 0;
3991 }
3992
3993 return 1;
3994 } /* CompareStringWithGsp */
3995
3996 NLM_EXTERN void GetDBXrefFromGene (GeneRefPtr grp, SeqFeatPtr sfp)
3997
3998 {
3999 CharPtr dbase;
4000 DbtagPtr dbtp;
4001 ValNodePtr tmp;
4002 Char buffer[50];
4003
4004 if (grp == NULL) {
4005 return;
4006 }
4007 for (tmp = grp->db; tmp != NULL; tmp=tmp->next) {
4008 dbtp = tmp->data.ptrvalue;
4009 if (dbtp && dbtp->db && dbtp->tag) {
4010 dbase = MemNew(StringLen(dbtp->db) + 3);
4011 sprintf(dbase, "%s:", dbtp->db);
4012 if (dbtp->tag->str) {
4013 sprintf(buffer, "%s%s", dbase, dbtp->tag->str);
4014 sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
4015 } else if (dbtp->tag->id) {
4016 sprintf(buffer, "%s%ld", dbase, (long) dbtp->tag->id);
4017 sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
4018 }
4019 MemFree(dbase);
4020 }
4021 }
4022
4023 return;
4024 }
4025
4026 /****************************************************************************
4027 * void GetProtRefInfo (GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
4028 *
4029 * gsp: GeneStructPtr containing gene information
4030 * prp: ProtRefPtr from a sfp of type protein or a sfp xref.
4031 *
4032 * If fields are empty on the gsp, and the relevant information
4033 * is given by the prp, that field is filled on the gsp
4034 ****************************************************************************/
4035 NLM_EXTERN void GetProtRefInfo (Uint1 format, GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
4036 {
4037 ValNodePtr tmp, vnp;
4038
4039 if (prp == NULL) {
4040 return;
4041 }
4042 for (vnp=prp->name; vnp; vnp=vnp->next) {
4043 tmp = ValNodeNew(NULL);
4044 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
4045 gsp->product = tie_next(gsp->product, tmp);
4046 }
4047 for (vnp=prp->ec; vnp; vnp=vnp->next) {
4048 tmp = ValNodeNew(NULL);
4049 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
4050 gsp->ECNum = tie_next(gsp->ECNum, tmp);
4051 }
4052 for (vnp=prp->activity; vnp; vnp=vnp->next) {
4053 tmp = ValNodeNew(NULL);
4054 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
4055 gsp->activity = tie_next(gsp->activity, tmp);
4056 }
4057 if (format != GENPEPT_FMT) {
4058 if (prp->desc) {
4059 SaveNoteToCharPtrStack(nsp, NULL, prp->desc);
4060 }
4061 }
4062 return;
4063 }
4064
4065 /****************************************************************************
4066 *
4067 * sfp: SeqFeatPtr for CDS
4068 * nsp: NoteStructPtr
4069 *
4070 * Used to get comments from the Protein for use in a CDS /note.
4071 *
4072 * Take the main protein ONLY (not sig_peptide mat_peptide)
4073 *
4074 * Will find the Protein Pubs, as they are needed and (presumably) haven't
4075 * been found yet, so as to save "upfront" time when the formatter is
4076 * running in Entrez.
4077 ****************************************************************************/
4078 static void GetProtRefComment (SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method)
4079 {
4080 Boolean first_done=FALSE, protein=FALSE;
4081 CharPtr ptr = NULL, string=NULL, string1=NULL, newstring=NULL, temp, s;
4082 CharPtr conflict_msg_no_protein="Coding region translates with internal stops";
4083 /* CharPtr except_msg_no_protein="Coding region translates with internal stops for reasons explained in citation. "; -- except_msg_no_protein UNUSED */
4084 CharPtr conflict_msg="Protein sequence is in conflict with the conceptual translation";
4085 /* CharPtr except_msg="Protein sequence differs from the conceptual translation for reasons explained in citation. "; -- except_msg UNUSED */
4086 CdRegionPtr cdr=NULL;
4087 Int2 total=0, i;
4088 PubdescPtr pdp;
4089 PubStructPtr psp;
4090 SeqFeatPtr sfp_local=NULL;
4091 ValNodePtr descr, vnp, vnp1, vnp1next, product;
4092 MolInfoPtr mfp;
4093 GatherScope gs;
4094 SeqLocPtr slp;
4095 ProtRefPtr prot_local;
4096 SeqMgrFeatContext fcontext;
4097 SeqMgrDescContext dcontext;
4098 GatherContext gc;
4099 SeqFeatPtr psfp;
4100 ValNodePtr psdp;
4101 ObjMgrDataPtr omdp;
4102 SeqSubmitPtr ssp;
4103 SubmitBlockPtr sbp;
4104 CharPtr prefix = "";
4105
4106 if (ajp->useSeqMgrIndexes) {
4107 sfp_local = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext);
4108 while (sfp_local != NULL) {
4109 prot_local = sfp_local->data.value.ptrvalue;
4110 if (prot_local->processed <= 1) {
4111 if (first_done) {
4112 if (StringLen(sfp_local->comment)) {
4113 string1 = CheckEndPunctuation(sfp_local->comment, '\0');
4114 if (StringCmp(string, string1) != 0) {
4115 newstring = Cat2Strings(string, string1, "; ", 0);
4116 string = MemFree(string);
4117 string = newstring;
4118 }
4119 string1 = MemFree(string1);
4120 }
4121 } else {
4122 if (StringLen(sfp_local->comment)) {
4123 string = CheckEndPunctuation(sfp_local->comment, '\0');
4124 first_done = TRUE;
4125 }
4126 }
4127 }
4128 sfp_local = SeqMgrGetNextFeature (bsp, sfp_local, SEQFEAT_PROT, 0, &fcontext);
4129 }
4130 } else if (opp != NULL) {
4131 for (i = 0; i < opp->size; i++) {
4132 if ((sfp_local = opp->list[i].sfp) == NULL) {
4133 continue;
4134 }
4135 if (sfp_local->data.choice != SEQFEAT_PROT) {
4136 continue;
4137 }
4138 prot_local = sfp_local->data.value.ptrvalue;
4139 if (prot_local->processed > 1) {
4140 continue;
4141 }
4142 if (first_done) {
4143 if (StringLen(sfp_local->comment)) {
4144 string1 = CheckEndPunctuation(sfp_local->comment, '\0');
4145 if (StringCmp(string, string1) != 0) {
4146 newstring = Cat2Strings(string, string1, "; ", 0);
4147 string = MemFree(string);
4148 string = newstring;
4149 }
4150 string1 = MemFree(string1);
4151 }
4152 } else {
4153 if (StringLen(sfp_local->comment)) {
4154 string = CheckEndPunctuation(sfp_local->comment, '\0');
4155 first_done = TRUE;
4156 }
4157 }
4158 }
4159 }
4160
4161 if (bsp && (descr=bsp->descr) != NULL) {
4162 for (vnp=descr; vnp; vnp=vnp->next) {
4163 if (vnp->choice == Seq_descr_comment) {
4164 if (first_done) {
4165 if (StringLen(vnp->data.ptrvalue)) {
4166 string1 = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
4167 if (StringCmp(string, string1) != 0) {
4168 newstring = Cat2Strings(string, string1, "; ", 0);
4169 string = MemFree(string);
4170 string = newstring;
4171 }
4172 string1 = MemFree(string1);
4173 }
4174 } else {
4175 if (StringLen(vnp->data.ptrvalue)) {
4176 string = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
4177 first_done = TRUE;
4178 }
4179 }
4180 } else if (vnp->choice == Seq_descr_molinfo) {
4181 mfp = vnp->data.ptrvalue;
4182 if (mfp && mfp->tech > 1 && mfp->tech != 8) {
4183 if (mfp->tech == MI_TECH_concept_trans_a) {
4184 /* s = StringForSeqMethod(method); */
4185 s = NULL;
4186 } else {
4187 s = StringForSeqTech(mfp->tech);
4188 }
4189 if (s!= NULL && *s != '\0') {
4190 ptr = MemNew(StringLen(s) + 10);
4191 sprintf(ptr, "Method: %s", s);
4192 }
4193 if (first_done) {
4194 newstring = Cat2Strings(string, ptr, "; ", 0);
4195 string = MemFree(string);
4196 string = newstring;
4197 } else {
4198 string = StringSave(ptr);
4199 first_done = TRUE;
4200 }
4201 MemFree(ptr);
4202 }
4203 } else if (vnp->choice == Seq_descr_method) {
4204 if (vnp->data.intvalue > 1) {
4205 if (method == METHOD_concept_transl_a) {
4206 /* s = StringForSeqMethod(method);*/
4207 s = NULL;
4208 } else {
4209 s = StringForSeqMethod((Uint1)(vnp->data.intvalue));
4210 }
4211 if (s!= NULL && *s != '\0') {
4212 ptr = MemNew(StringLen(s) + 10);
4213 sprintf(ptr, "Method: %s", s);
4214 }
4215
4216 if (first_done) {
4217 newstring = Cat2Strings(string, ptr, "; ", 0);
4218 string = MemFree(string);
4219 string = newstring;
4220 } else {
4221 string = StringSave(ptr);
4222 first_done = TRUE;
4223 }
4224 MemFree(ptr);
4225 }
4226 }
4227 }
4228 }
4229 /* gather pubs on protein bioseq do not do checking or sorting*/
4230 vnp = NULL;
4231 if (ajp->useSeqMgrIndexes) {
4232 /* finess calls to get_pubs */
4233 MemSet ((Pointer) (&gc), 0, sizeof (GatherContext));
4234 gc.userdata = (Pointer) (&vnp);
4235 gc.entityID = ajp->entityID;
4236 psdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
4237 while (psdp != NULL) {
4238 gc.thistype = OBJ_SEQDESC;
4239 gc.itemID = dcontext.itemID;
4240 gc.thisitem = (Pointer) psdp;
4241 omdp = dcontext.omdp;
4242 if (omdp != NULL) {
4243 gc.parenttype = omdp->datatype;
4244 gc.parentitem = omdp->dataptr;
4245 } else {
4246 gc.parenttype = 0;
4247 gc.parentitem = NULL;
4248 }
4249 get_pubs (&gc);
4250 psdp = SeqMgrGetNextDescriptor (bsp, psdp, Seq_descr_pub, &dcontext);
4251 }
4252 psfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
4253 while (psfp != NULL) {
4254 gc.thistype = OBJ_SEQFEAT;
4255 gc.itemID = dcontext.itemID;
4256 gc.thisitem = (Pointer) psfp;
4257 get_pubs (&gc);
4258 psfp = SeqMgrGetNextFeature (bsp, psfp, SEQFEAT_PUB, 0, &fcontext);
4259 }
4260 omdp = ObjMgrGetData (ajp->entityID);
4261 if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
4262 ssp = (SeqSubmitPtr) omdp->dataptr;
4263 if (ssp != NULL) {
4264 sbp = ssp->sub;
4265 if (sbp != NULL) {
4266 gc.thistype = OBJ_SUBMIT_BLOCK;
4267 gc.itemID = 1;
4268 gc.thisitem = (Pointer) sbp;
4269 get_pubs (&gc);
4270 }
4271 }
4272 }
4273 /* also submit block */
4274 } else {
4275 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
4276 /* MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
4277 gs.ignore[OBJ_SEQENTRY] = FALSE;
4278 gs.ignore[OBJ_BIOSEQ] = FALSE;
4279 gs.ignore[OBJ_SEQDESC] = FALSE;*/
4280 gs.ignore[OBJ_SEQSUB] = TRUE;
4281 gs.ignore[OBJ_SEQSUB_CIT] = TRUE;
4282 slp = ValNodeNew(NULL);
4283 slp->choice = SEQLOC_WHOLE;
4284 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
4285 gs.target = slp;
4286 gs.seglevels = 4;
4287
4288 GatherEntity(ajp->entityID, &vnp, get_pubs, &gs);
4289 if (slp)
4290 SeqLocFree(slp);
4291 }
4292 /* if ((status = CheckPubs(ajp, bsp, &vnp)) < 0) {
4293 ValNodeFree(vnp);
4294 vnp = NULL;
4295 }
4296 vnp = OrganizePubList(vnp); */
4297 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
4298 psp = vnp1->data.ptrvalue;
4299 if ((pdp=psp->descr) != NULL) {
4300 if (pdp->fig) {
4301 total += 32;
4302 total += StringLen(pdp->fig);
4303 }
4304 if (pdp->maploc) {
4305 total += 22;
4306 total += StringLen(pdp->maploc);
4307 }
4308 }
4309 }
4310
4311 if (sfp) {
4312 cdr = (CdRegionPtr) sfp->data.value.ptrvalue;
4313 product = sfp->product;
4314 if (product && SeqLocLen(product))
4315 protein = TRUE;
4316 if (sfp->excpt)
4317 total += 112;
4318 if (cdr && cdr->conflict && (protein || ! sfp->excpt))
4319 total += 112;
4320 }
4321
4322 string1 = (CharPtr) MemNew(total*sizeof(Char));
4323
4324 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
4325 psp = vnp1->data.ptrvalue;
4326 if ((pdp=psp->descr) != NULL) {
4327 if (pdp->fig) {
4328
4329 temp = CheckEndPunctuation(pdp->fig, '\0');
4330 total = StringLen(string1);
4331
4332 sprintf(string1+total, "This sequence comes from %s", temp);
4333 prefix = "; ";
4334 temp = MemFree(temp);
4335 }
4336 if (pdp->maploc) {
4337 total = StringLen(string1);
4338 sprintf(string1+total, "%sMap location %s", prefix, pdp->maploc);
4339 prefix = "; ";
4340 }
4341 }
4342 }
4343
4344 if (sfp) {
4345 if (cdr && cdr->conflict && (protein || ! sfp->excpt)) {
4346 total = StringLen(string1);
4347 sprintf(string1+total, "%s%s", prefix,
4348 protein?conflict_msg:conflict_msg_no_protein);
4349 }
4350 }
4351 if (string && string1) {
4352 newstring = Cat2Strings(string, string1, "; ", 0);
4353 string = MemFree(string);
4354 string1 = MemFree(string1);
4355 } else if (string) {
4356 newstring = string;
4357 } else if (string1) {
4358 newstring = string1;
4359 }
4360
4361 if (newstring) {
4362 SaveNoteToCharPtrStack(nsp, NULL, newstring);
4363 newstring = MemFree(newstring);
4364 }
4365 for (vnp1=vnp; vnp1; vnp1=vnp1next) {
4366 vnp1next = vnp1->next;
4367 psp = vnp1->data.ptrvalue;
4368 FreePubStruct(psp);
4369 MemFree(vnp1);
4370 }
4371 return;
4372 } /* GetProtRefComment */
4373
4374 NLM_EXTERN GBQualPtr AddModifsToGBQual (GBEntryPtr gbp, GBQualPtr gbqual)
4375 {
4376 CharPtr ptr;
4377 ValNodePtr descr, man;
4378
4379 descr=BioseqGetSeqDescr(gbp->bsp, Seq_descr_modif, NULL);
4380 if (descr) {
4381 for (man = (ValNodePtr) descr-> data.ptrvalue; man != NULL; man = man -> next){
4382 switch (man -> data.intvalue){
4383 case 3: case 14:
4384 ptr = AsnEnumStr("GIBB-mod",
4385 (Int2) man->data.intvalue);
4386 if (GBQualPresent(ptr, gbqual) == FALSE)
4387 gbqual = AddGBQual(gbqual, ptr, " ");
4388 break;
4389 case 4:
4390 if (GBQualPresent("mitochondrion", gbqual) == FALSE)
4391 gbqual = AddGBQual(gbqual, "mitochondrion", NULL);
4392 break;
4393 case 15:
4394 if (GBQualPresent("insertion_seq", gbqual) == FALSE)
4395 gbqual = AddGBQual(gbqual, "insertion_seq", " ");
4396 break;
4397 case 5: case 6: case 7: case 18: case 19:
4398 ptr = AsnEnumStr("GIBB-mod",
4399 (Int2) man->data.intvalue);
4400 if (GBQualPresent(ptr, gbqual) == FALSE)
4401 gbqual = AddGBQual(gbqual, ptr, NULL);
4402 break;
4403 default:
4404 break;
4405 }
4406 }
4407 }
4408 return gbqual;
4409 } /* AddModifsToGBQual */
4410
4411 /*************************************************************************
4412 *GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual);
4413 *
4414 *Add the OrgRef.mod to a source feat. Note: a few of the quals added
4415 *may be illegal for a source feature, but the validator will catch them
4416 *in the end.
4417 ***************************************************************************/
4418 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual)
4419
4420 {
4421 CharPtr mod, ptr, temp_ptr;
4422 Char temp[ASN2FF_STD_BUF]; /* ASN2FF_STD_BUF (now 35) is longer than
4423 any qual. */
4424 Int2 index;
4425 ValNodePtr vnp;
4426
4427 if (orp && orp->mod)
4428 {
4429 for (vnp=orp->mod; vnp; vnp=vnp->next)
4430 {
4431 mod = vnp->data.ptrvalue;
4432 if (StringNCmp(mod, "citation", 8) == 0)
4433 continue;
4434 index=0;
4435 for (ptr=mod; *ptr != '\0'; ptr++)
4436 {
4437 index++;
4438 if (*ptr == ' ' || *ptr == '=')
4439 {
4440 ptr++;
4441 index--;
4442 break;
4443 }
4444 }
4445 if (index > ASN2FF_STD_BUF-1)
4446 continue;
4447
4448 temp_ptr = &(temp[0]);
4449 StringNCpy(temp_ptr, mod, index);
4450 temp[index] = '\0';
4451 if ((GBQualNameValid(temp_ptr)) == -1)
4452 continue;
4453 if (ptr)
4454 gbqual = AddGBQual(gbqual, temp_ptr, ptr);
4455 else
4456 gbqual = AddGBQual(gbqual, temp_ptr, NULL);
4457 }
4458 }
4459 return gbqual;
4460 } /* AddOrgRefModToGBQual */
4461
4462 /*************************************************************************
4463 *GBQualPtr AddBioSourceToGBQual (BioSourcePtr biosp, GBQualPtr gbqual);
4464 *
4465 *Add the OrgMod.subtypes and SubSource.subtypes to a source feat.
4466 *Add BioSource.genome to a source feat.
4467 *Note: a few of the quals added may be illegal for a source feature,
4468 *but the validator will catch them in the end.
4469 ***************************************************************************/
4470
4471 static CharPtr organelleQual [] = {
4472 NULL,
4473 NULL,
4474 "plastid:chloroplast",
4475 "plastid:chromoplast",
4476 "mitochondrion:kinetoplast",
4477 "mitochondrion",
4478 "plastid",
4479 NULL,
4480 NULL,
4481 NULL,
4482 NULL,
4483 NULL,
4484 "plastid:cyanelle",
4485 NULL,
4486 NULL,
4487 "nucleomorph",
4488 "plastid:apicoplast",
4489 "plastid:leucoplast",
4490 "plastid:proplastid",
4491 NULL
4492 };
4493
4494 NLM_EXTERN GBQualPtr AddBioSourceToGBQual (Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release)
4495 {
4496 CharPtr qual, val = NULL;
4497 OrgModPtr omp;
4498 OrgNamePtr onp;
4499 SubSourcePtr ssp;
4500 Int2 i;
4501 Int4 id = -1;
4502 DbtagPtr db = NULL;
4503 OrgRefPtr org;
4504 ValNodePtr vnp;
4505 CharPtr s;
4506
4507 if (biosp == NULL)
4508 return gbqual;
4509 if (biosp->genome) {
4510 i = biosp->genome;
4511 if (i > 1 && i < 20) {
4512 val = organelleQual [i];
4513 if (val != NULL) {
4514 gbqual = AddGBQual (gbqual, "organelle", val);
4515 } else if (i < num_genome) {
4516 qual = genome[i];
4517 if (qual && (GBQualNameValid(qual)) != -1) {
4518 if (i == 8) { /*extrachrom*/
4519 gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
4520 } else {
4521 gbqual = AddGBQual(gbqual, qual, val);
4522 }
4523 } else if (qual && i == 8) {
4524 gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
4525 }
4526 }
4527 }
4528 }
4529 org = (OrgRefPtr) biosp->org;
4530 if (org) {
4531 if ((onp = (OrgNamePtr) org->orgname) != NULL) {
4532 for (omp=onp->mod; omp != NULL; omp=omp->next) {
4533 for (i=0; orgmod_subtype[i].name != NULL; i++) {
4534 if (omp->subtype == orgmod_subtype[i].num)
4535 break;
4536 }
4537 if (orgmod_subtype[i].name == NULL) {
4538 continue;
4539 }
4540 if (orgmod_subtype[i].num == 253) { /* old_lineage */
4541 continue;
4542 }
4543 if (orgmod_subtype[i].num == 254) { /* old_name */
4544 continue;
4545 }
4546 qual = orgmod_subtype[i].name;
4547 if (orgmod_subtype[i].num == 21) { /* nat_hos */
4548 qual = "specific_host";
4549 }
4550 if ((val = omp->subname) == NULL)
4551 val = "";
4552 if ((GBQualNameValid(qual)) != -1) {
4553 gbqual = AddGBQual(gbqual, qual, val);
4554 } else {
4555 s = MemNew(StringLen(val) +
4556 StringLen(qual) + 3);
4557 sprintf(s, "%s: %s", qual, val);
4558 CpNoteToCharPtrStack(nsp, NULL, s);
4559 }
4560 }
4561 }
4562 /* add db_xref */
4563 val = NULL;
4564 for (vnp=org->db; vnp; vnp=vnp->next) {
4565 id = -1;
4566 db = (DbtagPtr) vnp->data.ptrvalue;
4567 if (db && db->db) {
4568 for (i =0; i < DBNUM; i++) {
4569 if (StringCmp(db->db, dbtag[i]) == 0) {
4570 id = i;
4571 break;
4572 }
4573 }
4574 if (id == -1) {
4575 continue; /* unknown dbtag */
4576 }
4577 }
4578 if (db->tag && db->tag->str) {
4579 val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
4580 sprintf(val, "%s:%s", db->db, db->tag->str);
4581 } else if (db->tag) {
4582 val = MemNew(StringLen(db->db)+16);
4583 sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
4584 }
4585 if (val[0] != '\0') {
4586 gbqual = AddGBQual(gbqual, "db_xref", val);
4587 MemFree(val);
4588 }
4589 }
4590 }
4591 for (ssp = biosp->subtype; ssp != NULL; ssp=ssp->next) {
4592 qual = NULL;
4593 if (ssp->subtype == 255) {
4594 qual = "note";
4595 } else if (ssp->subtype > num_subtype) {
4596 qual = NULL;
4597 } else if (ssp->subtype > 0) {
4598 qual = subtype[ssp->subtype - 1];
4599 } else {
4600 qual = "?";
4601 }
4602 val = ssp->name;
4603 if (ssp->subtype != 14 && ssp->subtype != 15) {
4604 if (val == NULL)
4605 val = "";
4606 }
4607 if ((GBQualNameValid(qual)) == -1) {
4608 if (qual == NULL) {
4609 qual = "?";
4610 }
4611 s = MemNew(StringLen(val) + StringLen(qual) + 3);
4612 sprintf(s, "%s: %s", qual, val);
4613 CpNoteToCharPtrStack(nsp, NULL, s);
4614 } else {
4615 gbqual = AddGBQual(gbqual, qual, val);
4616 }
4617 }
4618 if (biosp->is_focus == TRUE) {
4619 gbqual = AddGBQual(gbqual, "focus", NULL);
4620 }
4621 return gbqual;
4622 } /* AddBioSourceToGBQual */
4623
4624 /****************************************************************************
4625 *PrintImpFeatEx
4626 *
4627 * This code prints out an ImpFeat in GenBank and HTML format.
4628 *
4629 ****************************************************************************/
4630 NLM_EXTERN Int2 PrintImpFeatEx (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, Int4 gi, Int2 entityID, Uint4 itemID)
4631 {
4632 CharPtr flatloc_ptr, key, loc;
4633 GBQualPtr gbqp;
4634 ImpFeatPtr ifp;
4635 Uint1 class_qual, format=ajp->format;
4636 Int2 class_equal, gbqual_index;
4637 static CharPtr buf = NULL;
4638 Uint2 retval;
4639 ValNodePtr seqid;
4640 CharPtr p, q;
4641
4642 if (sfp == NULL)
4643 return -1;
4644 if (sfp->data.choice != SEQFEAT_IMP)
4645 return -1;
4646 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4647 key = ifp->key;
4648 loc = ifp->loc;
4649
4650 for (seqid = ajp->id_print; seqid; seqid=seqid->next) {
4651 if (seqid->choice == SEQID_GI) {
4652 }
4653 }
4654 if (format == EMBL_FMT || format == PSEUDOEMBL_FMT ||
4655 format == EMBLPEPT_FMT)
4656 ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4657 else
4658 ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4659
4660 if (ajp->slp) {
4661 ff_AddString(key);
4662 } else {
4663 www_featkey(key, gi, entityID, itemID);
4664 }
4665 TabToColumn(22);
4666 if (loc == NULL) {
4667 flatloc_ptr = FlatLoc(bsp, sfp->location);
4668 if (get_www()) {
4669 buf = www_featloc(flatloc_ptr);
4670 ff_AddString(buf);
4671 MemFree(buf);
4672 } else {
4673 ff_AddString(flatloc_ptr);
4674 }
4675 MemFree(flatloc_ptr);
4676 } else {
4677 if (get_www()) {
4678 buf = www_featloc(loc);
4679 ff_AddString(buf);
4680 MemFree(buf);
4681 } else {
4682 ff_AddString(loc);
4683 }
4684 }
4685 if (sfp->partial == TRUE) {
4686 retval = SeqLocPartialCheck(sfp->location);
4687 if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4688 NewContLine();
4689 ff_AddString("/partial");
4690 }
4691 }
4692 for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4693 gbqual_index = GBQualNameValid(gbqp->qual);
4694 if (gbqual_index != -1) {
4695 NewContLine();
4696 ff_AddChar( '/');
4697 ff_AddString(gbqp->qual);
4698 class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4699 if (class_qual == Class_none) {
4700 class_equal=CheckForEqualSign(gbqp->qual);
4701 if (class_equal == 1)
4702 continue;
4703 }
4704 ff_AddChar('=');
4705 if (class_qual == Class_text &&
4706 StringCmp(gbqp->val, "\"\"") == 0) {
4707 ff_AddString(gbqp->val);
4708 continue;
4709 }
4710 if (get_www() && (class_qual == Class_text
4711 || class_qual == Class_note)) {
4712 buf = www_featloc(gbqp->val);
4713 } else {
4714 buf = StringSave(gbqp->val);
4715 }
4716 if (class_qual == Class_text || class_qual == Class_none
4717 || class_qual == Class_ecnum || class_qual == Class_note)
4718 ff_AddString("\"");
4719 if (class_qual == Class_note) {
4720 /* start of process tildes */
4721 if (StringCmp (gbqp->qual, "note") == 0) {
4722 for (p = buf, q = buf; *p != '\0'; *q++ = *p++) {
4723 if (*p != '~')
4724 continue;
4725 if (p [1] != '~')
4726 *p = '\n';
4727 else
4728 p++;
4729 }
4730 *q = '\0';
4731 }
4732 /* end of process tildes */
4733 www_note_gi(buf);
4734 } else if (class_qual != Class_none) {
4735 if (StringCmp(gbqp->qual, "transl_table") == 0) {
4736 www_gcode(buf);
4737 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4738 www_db_xref(buf);
4739 } else if (StringCmp(gbqp->qual, "protein_id") == 0 ||
4740 StringCmp(gbqp->qual, "transcript_id") == 0) {
4741 www_protein_id(buf);
4742 } else {
4743 ff_AddString(buf);
4744 }
4745 }
4746 if (class_qual == Class_text || class_qual == Class_none
4747 || class_qual == Class_ecnum || class_qual == Class_note)
4748 ff_AddString("\"");
4749 if (buf) {
4750 MemFree(buf);
4751 }
4752 } else if (format == GENPEPT_FMT) {
4753 if (StringCmp(gbqp->qual, "site_type") == 0) {
4754 NewContLine();
4755 ff_AddChar('/');
4756 ff_AddString(gbqp->qual);
4757 ff_AddChar('=');
4758 ff_AddString("\"");
4759 ff_AddString(gbqp->val);
4760 ff_AddString("\"");
4761 } else if (StringCmp(gbqp->qual, "bond_type") == 0) {
4762 NewContLine();
4763 ff_AddChar('/');
4764 ff_AddString(gbqp->qual);
4765 ff_AddChar('=');
4766 ff_AddString("\"");
4767 ff_AddString(gbqp->val);
4768 ff_AddString("\"");
4769 } else if (StringCmp(gbqp->qual, "region_name") == 0) {
4770 NewContLine();
4771 ff_AddChar('/');
4772 ff_AddString(gbqp->qual);
4773 ff_AddChar('=');
4774 ff_AddString("\"");
4775 ff_AddString(gbqp->val);
4776 ff_AddString("\"");
4777 } else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
4778 NewContLine();
4779 ff_AddChar('/');
4780 ff_AddString(gbqp->qual);
4781 ff_AddChar('=');
4782 ff_AddString("\"");
4783 ff_AddString(gbqp->val);
4784 ff_AddString("\"");
4785 } else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
4786 NewContLine();
4787 ff_AddChar('/');
4788 ff_AddString(gbqp->qual);
4789 ff_AddChar('=');
4790 ff_AddString("\"");
4791 ff_AddString(gbqp->val);
4792 ff_AddString("\"");
4793 } else if (StringCmp(gbqp->qual, "heterogen") == 0) {
4794 NewContLine();
4795 ff_AddChar('/');
4796 ff_AddString(gbqp->qual);
4797 ff_AddChar('=');
4798 ff_AddString("\"");
4799 ff_AddString(gbqp->val);
4800 ff_AddString("\"");
4801 } else if (StringCmp(gbqp->qual, "name") == 0) {
4802 NewContLine();
4803 ff_AddChar('/');
4804 ff_AddString(gbqp->qual);
4805 ff_AddChar('=');
4806 ff_AddString("\"");
4807 ff_AddString(gbqp->val);
4808 ff_AddString("\"");
4809 } else if (StringCmp(gbqp->qual, "coded_by") == 0) {
4810 NewContLine();
4811 ff_AddChar('/');
4812 ff_AddString(gbqp->qual);
4813 ff_AddChar('=');
4814 ff_AddString("\"");
4815 ff_AddString(gbqp->val);
4816 ff_AddString("\"");
4817 }
4818 } else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
4819 NewContLine();
4820 ff_AddChar('/');
4821 ff_AddString(gbqp->qual);
4822 if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
4823 ff_AddChar('=');
4824 ff_AddString("\"");
4825 ff_AddString(gbqp->val);
4826 ff_AddString("\"");
4827 }
4828 }
4829 }
4830
4831 ff_EndPrint();
4832
4833 return 1;
4834 } /*PrintImpFeatEx */
4835
4836 static GBQualPtr extract_qual(GBQualPtr PNTR head, GBQualPtr x)
4837 {
4838 GBQualPtr v, p;
4839
4840 if (*head == NULL) {
4841 return NULL;
4842 }
4843 if (x == *head) {
4844 *head = x->next;
4845 x->next = NULL;
4846 return x;
4847 }
4848 for (v = *head; v != NULL && v != x; v = v->next) {
4849 p = v;
4850 }
4851 if (v == NULL) {
4852 return NULL;
4853 }
4854 p->next = x->next;
4855 x->next = NULL;
4856 return x;
4857 }
4858 static GBQualPtr tie_next_qual(GBQualPtr head, GBQualPtr next)
4859 {
4860 GBQualPtr v;
4861
4862 if (head == NULL) {
4863 return next;
4864 }
4865 for (v = head; v->next != NULL; v = v->next) {
4866 v = v;
4867 }
4868 v->next = next;
4869 return head;
4870 }
4871
4872 /****************************************************************************
4873 *PrintImpFeat
4874 *
4875 * This code prints out an ImpFeat in GenBank and HTML format.
4876 *
4877 ****************************************************************************/
4878 NLM_EXTERN Int2 PrintImpFeat (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp)
4879 {
4880 CharPtr flatloc_ptr, key, loc;
4881 GBQualPtr gbqp;
4882 ImpFeatPtr ifp;
4883 Uint1 class_qual, format=ajp->format;
4884 Int2 class_equal, gbqual_index;
4885 static CharPtr buf = NULL;
4886 Uint2 retval;
4887 Boolean first=TRUE;
4888 GBQualPtr tmp, gbqpnext, head=NULL;
4889
4890 if (sfp == NULL)
4891 return -1;
4892 if (sfp->data.choice != SEQFEAT_IMP)
4893 return -1;
4894 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4895 key = ifp->key;
4896 loc = ifp->loc;
4897
4898 if (format == EMBL_FMT || format == PSEUDOEMBL_FMT ||
4899 format == EMBLPEPT_FMT)
4900 ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4901 else
4902 ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4903 ff_AddString(key);
4904 TabToColumn(22);
4905 if (loc == NULL) {
4906 flatloc_ptr = FlatLoc(bsp, sfp->location);
4907 if (get_www()) {
4908 buf = www_featloc(flatloc_ptr);
4909 ff_AddString(buf);
4910 MemFree(buf);
4911 } else {
4912 ff_AddString(flatloc_ptr);
4913 }
4914 MemFree(flatloc_ptr);
4915 } else {
4916 if (get_www()) {
4917 buf = www_featloc(loc);
4918 ff_AddString(buf);
4919 MemFree(buf);
4920 } else {
4921 ff_AddString(loc);
4922 }
4923 }
4924 if (sfp->partial == TRUE) {
4925 retval = SeqLocPartialCheck(sfp->location);
4926 if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4927 NewContLine();
4928 ff_AddString("/partial");
4929 }
4930 }
4931 /* put all /note last */
4932 for (gbqp=sfp->qual; gbqp; gbqp=gbqpnext) {
4933 gbqpnext=gbqp->next;
4934 if (StringCmp(gbqp->qual, "note") == 0) {
4935 tmp = extract_qual(&(sfp->qual), gbqp);
4936 head = tie_next_qual(head, tmp);
4937 }
4938 }
4939 if (head) {
4940 sfp->qual = tie_next_qual(sfp->qual, head);
4941 }
4942 for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4943 gbqual_index = GBQualNameValid(gbqp->qual);
4944 if (gbqual_index != -1) {
4945 NewContLine();
4946 if (first) {
4947 ff_AddChar( '/');
4948 ff_AddString(gbqp->qual);
4949 }
4950 class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4951 if (class_qual == Class_none) {
4952 class_equal=CheckForEqualSign(gbqp->qual);
4953 if (class_equal == 1)
4954 continue;
4955 }
4956 if (first) {
4957 ff_AddChar('=');
4958 }
4959 if (class_qual == Class_text &&
4960 StringCmp(gbqp->val, "\"\"") == 0) {
4961 /* an empty string is considered legal */
4962 ff_AddString(gbqp->val);
4963 continue;
4964 }
4965 if (get_www() && (class_qual == Class_text
4966 || class_qual == Class_note)) {
4967 buf = www_featloc(gbqp->val);
4968 } else {
4969 buf = StringSave(gbqp->val);
4970 }
4971 if (class_qual == Class_text || class_qual == Class_none
4972 || class_qual == Class_ecnum)
4973 ff_AddString("\"");
4974 if (first && class_qual == Class_note)
4975 ff_AddString("\"");
4976 if (class_qual == Class_note) {
4977 www_note_gi(buf);
4978 } else if (class_qual != Class_none) {
4979 if (StringCmp(gbqp->qual, "transl_table") == 0) {
4980 www_gcode(buf);
4981 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4982 www_db_xref(buf);
4983 } else {
4984 ff_AddString(buf);
4985 }
4986 }
4987 if (class_qual == Class_text || class_qual == Class_none
4988 || class_qual == Class_ecnum)
4989 ff_AddString("\"");
4990 if (gbqp->next == NULL && class_qual == Class_note)
4991 ff_AddString("\"");
4992 if (buf) {
4993 MemFree(buf);
4994 }
4995 if (class_qual == Class_note) {
4996 if (first == TRUE)
4997 first = FALSE;
4998 }
4999 } else if (format == GENPEPT_FMT) {
5000 if (StringCmp(gbqp->qual, "site_type") == 0) {
5001 NewContLine();
5002 ff_AddChar('/');
5003 ff_AddString(gbqp->qual);
5004 ff_AddChar('=');
5005 ff_AddString("\"");
5006 ff_AddString(gbqp->val);
5007 ff_AddString("\"");
5008 } else if (StringCmp(gbqp->qual, "bond_type") == 0) {
5009 NewContLine();
5010 ff_AddChar('/');
5011 ff_AddString(gbqp->qual);
5012 ff_AddChar('=');
5013 ff_AddString("\"");
5014 ff_AddString(gbqp->val);
5015 ff_AddString("\"");
5016 } else if (StringCmp(gbqp->qual, "region_name") == 0) {
5017 NewContLine();
5018 ff_AddChar('/');
5019 ff_AddString(gbqp->qual);
5020 ff_AddChar('=');
5021 ff_AddString("\"");
5022 ff_AddString(gbqp->val);
5023 ff_AddString("\"");
5024 } else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
5025 NewContLine();
5026 ff_AddChar('/');
5027 ff_AddString(gbqp->qual);
5028 ff_AddChar('=');
5029 ff_AddString("\"");
5030 ff_AddString(gbqp->val);
5031 ff_AddString("\"");
5032 } else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
5033 NewContLine();
5034 ff_AddChar('/');
5035 ff_AddString(gbqp->qual);
5036 ff_AddChar('=');
5037 ff_AddString("\"");
5038 ff_AddString(gbqp->val);
5039 ff_AddString("\"");
5040 } else if (StringCmp(gbqp->qual, "heterogen") == 0) {
5041 NewContLine();
5042 ff_AddChar('/');
5043 ff_AddString(gbqp->qual);
5044 ff_AddChar('=');
5045 ff_AddString("\"");
5046 ff_AddString(gbqp->val);
5047 ff_AddString("\"");
5048 } else if (StringCmp(gbqp->qual, "name") == 0) {
5049 NewContLine();
5050 ff_AddChar('/');
5051 ff_AddString(gbqp->qual);
5052 ff_AddChar('=');
5053 ff_AddString("\"");
5054 ff_AddString(gbqp->val);
5055 ff_AddString("\"");
5056 } else if (StringCmp(gbqp->qual, "coded_by") == 0) {
5057 NewContLine();
5058 ff_AddChar('/');
5059 ff_AddString(gbqp->qual);
5060 ff_AddChar('=');
5061 ff_AddString("\"");
5062 ff_AddString(gbqp->val);
5063 ff_AddString("\"");
5064 }
5065 } else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
5066 NewContLine();
5067 ff_AddChar('/');
5068 ff_AddString(gbqp->qual);
5069 if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
5070 ff_AddChar('=');
5071 ff_AddString("\"");
5072 ff_AddString(gbqp->val);
5073 ff_AddString("\"");
5074 }
5075 }
5076 }
5077
5078 ff_EndPrint();
5079
5080 return 1;
5081 } /*PrintImpFeat */
5082
5083 #define NOEQUALTOTAL 13
5084 NLM_EXTERN Int2 CheckForEqualSign(CharPtr qual)
5085 /* this have to be changed. Tatiana 02.28.95 */
5086 {
5087 Int2 i;
5088 static CharPtr NoEqualSign[NOEQUALTOTAL] = {
5089 "chloroplast",
5090 "chromoplast",
5091 "cyanelle",
5092 "germline",
5093 "kinetoplast",
5094 "macronuclear",
5095 "mitochondrion",
5096 "partial",
5097 "proviral",
5098 "pseudo",
5099 "rearranged",
5100 "virion",
5101 "focus"
5102 };
5103
5104 if (qual == NULL)
5105 return -1;
5106
5107 for (i=0; i < NOEQUALTOTAL; i++)
5108 if (StringICmp(qual, NoEqualSign[i]) == 0)
5109 return 1;
5110
5111 return 0;
5112
5113 }
5114
5115 /*-------------------------- delete_qual() ----------------------------*/
5116 /*************************************************************************
5117 * delete_qual:
5118 * -- return TRUE if found the "qual" in the "qlist", also remove
5119 * the "qual" from list
5120 * 7-8-93
5121 **************************************************************************/
5122 NLM_EXTERN Boolean delete_qual(GBQualPtr PNTR qlist, CharPtr qual)
5123 {
5124 GBQualPtr curq, preq;
5125
5126 for (preq = NULL, curq = *qlist; curq != NULL; curq = curq->next) {
5127 if (StringCmp(curq->qual, qual) == 0) {
5128 if (preq == NULL)
5129 preq = *qlist = curq->next;
5130 else
5131 preq->next = curq->next;
5132
5133 curq->next = NULL;
5134 GBQualFree(curq);
5135 curq = NULL;
5136
5137 return (TRUE);
5138 }
5139
5140 preq = curq;
5141 }
5142
5143 return (FALSE);
5144
5145 }
5146 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |