NCBI C Toolkit Cross Reference

C/api/asn2ff3.c


  1 /*   asn2ff3.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2ff3.c
 27 *
 28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov
 29 *
 30 * Version Creation Date:   7/15/95
 31 *
 32 *
 33 * File Description: 
 34 *
 35 * Modifications:  
 36 * --------------------------------------------------------------------------
 37 * $Log: asn2ff3.c,v $
 38 * Revision 6.119  2006/07/13 17:06:38  bollin
 39 * use Uint4 instead of Uint2 for itemID values
 40 * removed unused variables
 41 * resolved compiler warnings
 42 *
 43 * Revision 6.118  2003/07/22 16:18:27  kans
 44 * added ZFIN as legal db_xref
 45 *
 46 * Revision 6.117  2003/06/10 18:44:10  kans
 47 * added GeneDB to list of legal db_xrefs
 48 *
 49 * Revision 6.116  2003/05/29 20:25:19  kans
 50 * added Interpro to list of legal dbxrefs
 51 *
 52 * Revision 6.115  2002/11/30 20:18:27  kans
 53 * added GOA to list of legal db_xrefs
 54 *
 55 * Revision 6.114  2002/11/27 22:25:17  kans
 56 * added AceView/WormGenes, NextDB, and WorfDB to legal db_xrefs
 57 *
 58 * Revision 6.113  2002/07/12 17:34:35  kans
 59 * WormBase is now legal dbxref for all records, not just RefSeq
 60 *
 61 * Revision 6.112  2002/06/21 15:31:11  kans
 62 * added GABI db_xref
 63 *
 64 * Revision 6.111  2002/06/18 20:59:59  kans
 65 * added ISFinder as legal db_xref with hotlink
 66 *
 67 * Revision 6.110  2002/05/06 22:15:12  kans
 68 * added IFO and JCM db_xrefs
 69 *
 70 * Revision 6.109  2002/02/27 13:47:11  kans
 71 * fixed model evidence printing
 72 *
 73 * Revision 6.108  2002/02/20 21:59:04  tatiana
 74 * IMGT/LIGM dbxref added
 75 *
 76 * Revision 6.107  2002/01/31 22:31:31  tatiana
 77 * allow trascript_id in NC records
 78 *
 79 * Revision 6.106  2002/01/18 19:53:24  kans
 80 * if RefSeq, allow WormBase dbxref
 81 *
 82 * Revision 6.105  2001/12/28 21:37:10  kans
 83 * allow sfp->product to be SEQLOC_EQUIV
 84 *
 85 * Revision 6.104  2001/11/29 18:29:38  kans
 86 * added FANTOM_DB to list of legal db_xrefs, incremented DBNUM
 87 *
 88 * Revision 6.103  2001/11/12 19:32:38  kans
 89 * updated mRNAEvidenceComment
 90 *
 91 * Revision 6.102  2001/10/25 12:45:45  kans
 92 * Get3LetterSymbol was using table->num instead of table_3aa->num
 93 *
 94 * Revision 6.101  2001/10/15 17:08:44  kans
 95 * updated legal db_xref list to collaboration + RefSeq
 96 *
 97 * Revision 6.100  2001/10/15 13:57:22  kans
 98 * added BDGP_INS and SoyBase as legal db_xrefs
 99 *
100 * Revision 6.99  2001/10/02 17:39:50  yaschenk
101 * Removing memory leaks
102 *
103 * Revision 6.98  2001/09/06 20:31:24  yaschenk
104 * removing memory leak - seqid returned by GetSeqIdForGI() needs to be freed
105 *
106 * Revision 6.97  2001/09/05 23:37:42  tatiana
107 * ribosomal slippage added to /note
108 *
109 * Revision 6.96  2001/09/05 23:32:39  tatiana
110 * supressed comparison of note to gene->synonym
111 *
112 * Revision 6.95  2001/08/22 22:35:07  kans
113 * added ProductIsLocal for /translation
114 *
115 * Revision 6.94  2001/08/07 16:49:41  kans
116 * use NUM_SEQID, added third party annotation SeqIDs to one more place
117 *
118 * Revision 6.93  2001/08/03 20:36:16  kans
119 * implemented ASN2GNBK_PRINT_UNKNOWN_ORG test to suppress unwanted mode diffs for asn2gnbk QA
120 *
121 * Revision 6.92  2001/07/12 17:12:49  kans
122 * biop->genome range checks in AddBioSourceToGBQual to prevent crashes
123 *
124 * Revision 6.91  2001/07/08 21:18:50  kans
125 * if ssp->subtype is 0, use ? as tag in note
126 *
127 * Revision 6.90  2001/06/26 19:50:07  kans
128 * call AddPID with is_NC as an option for showing /protein_id with the gi
129 *
130 * Revision 6.89  2001/06/25 22:22:17  kans
131 * ProteinFromCdRegion and GetProductFromCDS only if sfp->product and ! ajp->genome_view, should eliminate unwanted fetches to get far delta components
132 *
133 * Revision 6.88  2001/05/31 17:42:18  kans
134 * NC and NG RefSeq records allow remote fetching for /protein_id and /transcript_id, show gi if fetching not enabled
135 *
136 * Revision 6.87  2001/03/17 00:51:30  tatiana
137 * GeneID added to dbxref array
138 *
139 * Revision 6.86  2001/02/13 23:31:58  kans
140 * allow trans splicing exception, do not change sfp_in->excpt
141 *
142 * Revision 6.85  2001/01/30 16:25:54  kans
143 * precursor_RNA now has /product as legal qualifier
144 *
145 * Revision 6.84  2001/01/26 19:26:36  kans
146 * added niaEST, increased DBNUM
147 *
148 * Revision 6.83  2001/01/26 19:21:45  kans
149 * extrachromosomal into source note, removed macronuclear, extrachrom, plasmid from organism line
150 *
151 * Revision 6.82  2001/01/18 23:57:01  kans
152 * add GO (gene ontology) to list of legal dbxrefs
153 *
154 * Revision 6.81  2001/01/02 19:56:48  kans
155 * Get3LetterSymbol protects against empty string
156 *
157 * Revision 6.80  2000/12/07 19:03:53  tatiana
158 * transcript_id shows for NT only
159 *
160 * Revision 6.79  2000/12/06 22:00:46  tatiana
161 * ifdef removed
162 *
163 * Revision 6.78  2000/12/06 20:56:24  tatiana
164 * AceView link added
165 *
166 * Revision 6.76  2000/12/04 22:23:47  tatiana
167 * contig comments added
168 *
169 * Revision 6.75  2000/11/22 16:48:18  tatiana
170 * remove debugging error printing
171 *
172 * Revision 6.74  2000/11/10 00:37:13  tatiana
173 * changes in AddPID
174 *
175 * Revision 6.73  2000/10/25 15:57:57  kans
176 * sfp_in->excpt set to FALSE, not NULL, UNIX compiler does not know the difference, but Mac and PC compilers do
177 *
178 * Revision 6.72  2000/10/24 20:35:35  tatiana
179 * CDS without protein sequence is accepted for  not forgbrel mode
180 *
181 * Revision 6.70  2000/10/19 18:52:32  kans
182 * added another NULL entry to organelleQual for endogenous virus to suppress as organelle qualifier
183 *
184 * Revision 6.69  2000/10/16 19:10:17  kans
185 * added UniSTS and InterimID to legal dbxrefs
186 *
187 * Revision 6.68  2000/10/10 15:06:02  kans
188 * added SUBSRC_endogenous_virus_name
189 *
190 * Revision 6.67  2000/08/28 22:17:18  kans
191 * added CDD to list of legal dbxrefs
192 *
193 * Revision 6.66  2000/07/14 20:24:26  kans
194 * added RGD as dbxref with web link
195 *
196 * Revision 6.65  2000/07/12 22:45:15  kans
197 * added ORGMOD_old_lineage
198 *
199 * Revision 6.64  2000/06/20 17:31:34  kans
200 * added authority through breed as orgmod.subtypes
201 *
202 * Revision 6.63  2000/06/15 16:45:40  kans
203 * added segment to biosource note print
204 *
205 * Revision 6.62  2000/06/05 17:52:11  tatiana
206 * increase size of feature arrays to Int4
207 *
208 * Revision 6.61  2000/05/15 15:52:50  bazhin
209 * Removed memory leak in "PrintSourceFeat()".
210 *
211 * Revision 6.60  2000/03/30 20:37:29  kans
212 * added tilde to newline code in PrintImpFeatEx (thanks to Sergei B)
213 *
214 * Revision 6.59  2000/03/01 19:09:53  tatiana
215 * for SYN records with multiple source features there is no subtraction
216 *
217 * Revision 6.58  2000/02/17 21:59:18  kans
218 * /organelle not under ajp->forgbrel for this release now
219 *
220 * Revision 6.57  2000/02/15 22:53:56  kans
221 * added dbSNP and RATMAP as legal dbxrefs, put /organelle under ajp->forgrel control
222 *
223 * Revision 6.56  2000/02/09 01:12:51  tatiana
224 * remove space in organelle qualifier
225 *
226 * Revision 6.55  2000/01/21 20:48:45  kans
227 * changes to merge several source qualifiers under new organelle qualifier
228 *
229 * Revision 6.54  2000/01/11 22:49:37  tatiana
230 * protein accession is not required in DUMP_MODE
231 *
232 * Revision 6.53  2000/01/03 23:16:17  kans
233 * CDS note components from GetProtRefComment are separated by semicolons - to be consistent with upcoming asn2gnbk style
234 *
235 * Revision 6.52  1999/10/18 20:13:34  kans
236 * removed erroneous cast in sprintf
237 *
238 * Revision 6.51  1999/10/06 22:18:29  kans
239 * calls ComposeCodonsRecognizedString
240 *
241 * Revision 6.50  1999/10/06 20:23:48  bazhin
242 * Removed memory leaks.
243 *
244 * Revision 6.49  1999/08/03 20:48:23  tatiana
245 * UMR error fixed in PrintImpFeat
246 *
247 * Revision 6.47  1999/04/26 18:53:00  tatiana
248 *  added pseuod from sfp in ConvertToNAImpFeat()
249 *
250 * Revision 6.46  1999/04/06 22:37:45  tatiana
251 * protein_id hot link added
252 *
253 * Revision 6.45  1999/04/06 15:00:16  tatiana
254 * www_featkey is not called for slp view
255 *
256 * Revision 6.44  1999/03/30 22:23:33  kans
257 * pseudo can be on grp or sfp
258 *
259 * Revision 6.43  1999/03/30 19:18:19  tatiana
260 * changes for SEQID_OTHER
261 *
262 * Revision 6.42  1999/03/22 23:09:26  tatiana
263 * AddPID() changes
264 *
265 * Revision 6.41  1998/10/19 15:57:35  tatiana
266 * UniGene added to dbtag array
267 *
268 * Revision 6.40  1998/09/24 17:45:57  kans
269 * fixed GetDBXrefFromGene problem (TT)
270 *
271 * Revision 6.39  1998/09/01 19:25:21  kans
272 * context parameter in get best protein, get cds/rna given product
273 *
274 * Revision 6.38  1998/08/19 18:40:38  tatiana
275 * RiceGenes added to dbtag array
276 *
277 * Revision 6.37  1998/07/21 15:14:50  kans
278 * GetProtRefComments modified for indexes because continue statement avoided get next feature, got stuck
279 *
280 * Revision 6.36  1998/07/15 22:07:19  kans
281 * implemented sequence manager indexes for non-segmented nucleotides
282 *
283 * Revision 6.35  1998/07/13 14:52:24  tatiana
284 * subtypes added to source feature /note
285 *
286 * Revision 6.34  1998/06/15 14:57:22  tatiana
287 * UNIX compiler warnings  and extra HTML characters in notes fixed
288 *
289 * Revision 6.33  1998/05/20 20:05:40  tatiana
290 * SEQFEAT_REGION added to get_prot_feats()
291 *
292 * Revision 6.32  1998/05/18 14:41:53  tatiana
293 * GI added to dbtag array
294 *
295 * Revision 6.31  1998/05/08 21:56:56  tatiana
296 * added new PARTIAL_MODE
297 *
298 * Revision 6.30  1998/04/30 21:42:36  tatiana
299 * *** empty log message ***
300 *
301 * Revision 6.29  1998/04/27 18:31:51  tatiana
302 * added /focus in PrintSourceFeat()
303 *
304 * Revision 6.28  1998/04/24 15:10:08  tatiana
305 * GetProtRefComment() fixed: only main Prot-Ref feature adds comment to CDS
306 *
307 * Revision 6.27  1998/04/15 21:38:32  kans
308 * rearrange dbtag array so PID set is at start, allow unknown database on all but release_mode (Tatiana)
309 *
310 * Revision 6.24  1998/04/06 14:59:08  tatiana
311 * PutTranslationLast has been moved
312 *
313 * Revision 6.23  1998/04/03 22:38:36  tatiana
314 * selenocysteine added tp /note in ComposeCodeBreakQuals()
315 *
316 * Revision 6.22  1998/04/02 21:42:53  tatiana
317 * ignore old_name in OrgMod
318 *
319 * Revision 6.21  1998/04/02 17:21:23  tatiana
320 * a bug fixed in AddBioSourceToGBQual()
321 *
322 * Revision 6.20  1998/03/30 20:38:56  tatiana
323 * nat_host changed to specific_host
324 *
325 * Revision 6.19  1998/03/27 23:01:54  tatiana
326 * AddBioSourceToGBQual: added all OrgMod.subtypes as /notes on the source feature
327 *
328 * Revision 6.18  1998/03/24 19:47:45  tatiana
329 * added check for sfp->except_text
330 *
331 * Revision 6.17  1998/03/04 18:38:48  tatiana
332 *  illegal feature will be dropped in ConvertToAAImpFeat
333 *
334 * Revision 6.16  1998/02/19 21:28:52  tatiana
335 * dbtags array updated
336 *
337 * Revision 6.15  1998/01/26 21:16:16  tatiana
338 * biovar and country added to source feature /note
339 *
340 * Revision 6.14  1998/01/20 22:45:11  tatiana
341 * show both product and descr in Genpept
342 *
343 * Revision 6.13  1998/01/13 16:27:38  tatiana
344 * fixed a bug in dbtag check in PrintSourceFeat
345 *
346 * Revision 6.12  1997/12/23 21:57:16  tatiana
347 * focus and specimen_voucher
348 *
349 * Revision 6.11  1997/12/15 15:48:33  tatiana
350 * features processing has been changed
351 *
352 * Revision 6.10  1997/12/02 18:15:02  tatiana
353 * fix use of printf
354 *
355 * Revision 6.9  1997/10/23 16:57:42  tatiana
356 * *** empty log message ***
357 *
358 * Revision 6.6  1997/09/16 15:48:07  kans
359 * removed automatically generated diff lines
360 *
361 * Revision 6.5  1997/09/16 15:42:52  kans
362 * show non-gbff source qualifiers in note with labels (TT)
363 *
364 * Revision 6.4  1997/09/12 20:20:18  tatiana
365 * fixed typo
366 *
367 * Revision 6.3  1997/09/12 20:03:53  tatiana
368 * added source feature in genome_view
369 *
370 * Revision 6.2  1997/09/04 01:16:48  kans
371 * fixed typo
372 *
373 * Revision 6.1  1997/09/03 21:49:37  tatiana
374 * GatherItemWithLock() added for ProtRef features
375 *
376 * Revision 6.0  1997/08/25 18:04:51  madden
377 * Revision changed to 6.0
378 *
379 * Revision 5.59  1997/08/21 19:03:17  tatiana
380 * map, syn, description eliminated from features other than gene
381 *
382 * Revision 5.58  1997/08/05 20:09:08  kans
383 * added check for po->sfp null in PrintSourceFeat
384 *
385 * Revision 5.57  1997/07/29 14:55:51  kans
386 * make sure features on protein are SEQFEAT_PROT
387 *
388 * Revision 5.56  1997/07/16 21:08:28  tatiana
389 * Use gene synonym for /gene qualifier
390 *
391 * Revision 5.55  1997/06/19 18:37:02  vakatov
392 * [WIN32,MSVC++]  Adopted for the "NCBIOBJ.LIB" DLL'ization
393 *
394 * Revision 5.54  1997/06/12 16:56:37  kans
395 * fixed typo that resulted in lost note (TT)
396 *
397 * Revision 5.53  1997/06/10 15:27:12  tatiana
398 * fix a typo in COnvertToNa... that leaded to the lost /note
399 *
400  * Revision 5.47  1997/03/14  21:21:33  tatiana
401  * exp_evidence fix
402  *
403  * Revision 5.46  1997/03/05  22:12:33  tatiana
404  * print 'pseudo' in /note for orphan genes
405  *
406  * Revision 5.45  1997/03/04  23:45:14  tatiana
407  * check for 'pseudo' gene added in ConvertToNAImpFeat()
408  *
409  * Revision 5.44  1997/02/25  23:47:21  tatiana
410  * new error message added for dropped feature
411  *
412  * Revision 5.42  1997/01/29  15:49:11  tatiana
413  * fix the entityID in GatherProductGeneInfo()
414  *
415  * Revision 5.40  1997/01/15  17:23:38  tatiana
416  * a bug fixed (purify reported) in PrintNAFeatByNumber()
417  *
418  * Revision 5.39  1997/01/07  23:27:13  tatiana
419  * check for NULLs added in CompareTranslation
420  *
421  * Revision 5.38  1997/01/07  22:32:41  tatiana
422  * added SEQFEAT_SITE to get_prot_feats callback
423  *
424  * Revision 5.37  1997/01/02  22:49:55  tatiana
425  * gather SEQFEAT_BOND
426  *
427  * Revision 5.36  1996/12/10  17:45:41  tatiana
428  * a bug fixed in ComposeNoteFromNoteStruct()
429  *
430  * Revision 5.35  1996/12/09  19:12:33  tatiana
431  * SPTREMBL added to legal db_xref database names
432  *
433  * Revision 5.34  1996/12/04  16:52:16  tatiana
434  * a typo fixed in Add_dbxref
435  *
436  * Revision 5.33  1996/12/03  15:49:57  tatiana
437  * 'CK' added to array of legal databases in db_xref
438  *
439  * Revision 5.32  1996/10/30  16:52:36  tatiana
440  * SeqIdFindBest added in PrintSourceFeat
441  *
442  * Revision 5.31  1996/10/25  22:11:19  tatiana
443  * NoteCmp changed
444  *
445  * Revision 5.30  1996/10/24  20:40:12  tatiana
446  * a bug fixed in AddDBXref()
447  *
448  * Revision 5.29  1996/10/18  21:37:22  tatiana
449  * a bug fixed in NoteCmp
450  *
451  * Revision 5.28  1996/10/09  15:15:00  tatiana
452  * Take the main protein ONLY (not sig_peptide mat_peptide)
453  * to make CDS comments
454  *
455  * Revision 5.27  1996/09/25  18:05:45  tatiana
456  * SEQFEAT_COMMENT becomes misc_feature
457  *
458  * Revision 5.26  1996/09/18  20:41:26  kans
459  * changed uninitialized variable names to correct names, removed unused
460  * variable
461  *
462  * Revision 5.25  1996/09/18  20:21:27  tatiana
463  * NoteCmp added to ComposeNoteFromNoteStruct to check for identical notes
464  *
465  * Revision 5.24  1996/09/17  14:59:04  tatiana
466  * virion and transl_except added
467  *
468  * Revision 5.23  1996/09/12  17:52:28  tatiana
469  * a bug fixed in PrintSourceFeat
470  *
471  * Revision 5.22  1996/09/06  14:58:00  tatiana
472  * clean sfp_out at the end of PrintSourceFeat and PrintNAFeatByNumber
473  *
474  * Revision 5.21  1996/09/04  13:40:17  tatiana
475  * a  bug fixed in GetDotTRNA
476  *
477  * Revision 5.19  1996/09/03  19:51:30  tatiana
478  * extra_loc added
479  *
480  * Revision 5.18  1996/08/16  20:32:23  tatiana
481  * for ifp->key StringSave is used not StringCpy
482  *
483  * Revision 5.17  1996/08/12  16:36:40  tatiana
484  * ErrPostEx changed to ErrPostStr
485  *
486  * Revision 5.16  1996/08/06  20:30:46  kans
487  * SeqIdFindBest called to handle local IDs and genbank IDs coexisting
488  *
489  * Revision 5.15  1996/08/02  21:41:23  tatiana
490  * turned off metho conceptual transl by author
491  *
492  * Revision 5.14  1996/07/30  17:28:07  kans
493  * ParFlat_... arrays now external in header file
494  *
495  * Revision 5.13  1996/07/30  16:34:09  tatiana
496  * minor change in PrintSourcefeat
497  *
498  * Revision 5.12  1996/07/29  19:46:14  tatiana
499  * GBQual_names changed to use a structureGBQual_names changed to use a structure
500  *
501  * Revision 5.11  1996/07/23  22:33:40  tatiana
502  * prot feats in genpept (piptides)
503  *
504  * Revision 5.10  1996/07/22  22:07:21  tatiana
505  * a bug fixed in DoTRNAQual
506  *
507  * Revision 5.9  1996/07/15  18:07:10  tatiana
508  * minor changes in PrintSourceFeat to show 'unknown' in debug mode
509  *
510  * Revision 5.8  1996/07/12  20:38:22  tatiana
511  * concept_transl_a supressed
512  *
513  * Revision 5.7  1996/07/12  20:11:49  tatiana
514  * DotRNAQuals() changed
515  *
516  * Revision 5.6  1996/07/11  14:58:27  tatiana
517  * product in tRNA
518  *
519  * Revision 5.5  1996/07/09  16:31:34  tatiana
520  * a bug fixed in PrintNAFeatByNumber
521  *
522  * Revision 5.4  1996/07/02  18:09:17  tatiana
523  * don't print duplicated features (PrintNAFeatByNumber)
524  *
525  * Revision 5.3  1996/06/14  18:03:56  tatiana
526  * GetNAFeatKey change
527  *
528  * Revision 5.2  1996/06/11  15:35:04  tatiana
529  * make GetGeneticCode static and get_prot_feats non-static
530  *
531  * Revision 5.1  1996/05/31  18:01:24  tatiana
532  * check for /pseudo in CdRegion added
533  *
534  * Revision 4.35  1996/05/21  21:02:03  tatiana
535  * a bug fixed in location[] size in PrintSourceFeat()
536  *
537  * Revision 4.34  1996/05/16  20:58:09  tatiana
538  * GetCdregionGeneXrefInfo changed to Boolean
539  *
540  * Revision 4.33  1996/04/25  14:55:33  kans
541  * protect against biosource subsource subtype of 255 (other) or bad values
542  *
543  * Revision 4.32  1996/04/15  14:36:49  tatiana
544  * memory leaks cleaning
545  *
546  * Revision 4.31  1996/04/08  21:53:56  tatiana
547  * change in www_featloc
548  *
549  * Revision 4.30  1996/04/05  17:43:36  ostell
550  * added quickie patch for overrun of buf[30] when called by
551  * www_featloc()
552  *
553  * Revision 4.29  1996/03/25  15:20:19  tatiana
554  * add html symbols
555  *
556  * Revision 4.28  1996/03/19  23:58:27  tatiana
557  * print activity in CDS
558  *
559  * Revision 4.27  1996/03/12  21:36:32  tatiana
560  * 'serotype' added to orgmod_subtype array
561  *
562  * Revision 4.26  1996/02/28  04:53:06  ostell
563  * changes to support segmented master seeuquences
564  *
565  * Revision 4.25  1996/02/26  00:46:18  ostell
566  * removed unused local variables and integer size mismatch fusses
567  *
568  * Revision 4.24  1996/02/18  21:16:48  tatiana
569  * memory leaks cleaned up
570  *
571  * Revision 4.23  1996/02/16  16:22:32  tatiana
572  * a bug fixed in ConvertToNAImpFeat
573  *
574  * Revision 4.22  1996/02/15  15:52:18  tatiana
575  * Gather for temp loaded items and sortin features within entity addded
576  *
577  * Revision 4.21  1996/01/29  22:34:42  tatiana
578  * mainly PID changes
579  *
580  * Revision 4.20  1995/12/20  22:38:02  tatiana
581  * gene xrefs to db_xref
582  *
583  * Revision 4.19  1995/12/15  02:47:01  ostell
584  * added protection so that GatherProductGeneInfo does not gather if protein
585  * bioseq not already in memory
586  *
587  * Revision 4.18  1995/12/13  16:31:36  tatiana
588  * anticodon added to new tRNA slot
589  *
590  * Revision 4.17  1995/12/04  23:01:16  tatiana
591  * take starin from OrgRef.mod in PrintSourceFeat()
592  *
593  * Revision 4.16  1995/11/28  15:19:46  tatiana
594  * GetPID fixed
595  *
596  * Revision 4.15  1995/11/22  18:59:42  tatiana
597  * a bug fixed in orphan genes printing
598  *
599  * Revision 4.14  1995/11/17  21:49:19  tatiana
600  * hot link to genetic code added
601  *
602  * Revision 4.13  1995/11/17  21:28:35  kans
603  * asn2ff now uses gather (Tatiana)
604  *
605  * Revision 4.4  1995/08/18  22:18:31  tatiana
606  * a bug fix
607  *
608  * Revision 4.1  1995/08/01  14:51:29  tatiana
609  * change SeqIdPrint to SeqIdWrite
610  *
611  * Revision 1.65  1995/07/17  19:33:20  kans
612  * parameters combined into Asn2ffJobPtr structure
613  *
614  * Revision 1.61  1995/06/19  21:40:02  kans
615  * Tatiana's first major reorganization, moving printing, adding HTML
616  *
617  * Revision 1.60  1995/05/19  21:25:06  kans
618  * no longer fetches CDS protein product causing Entrez disc swap
619  *
620  * Revision 1.59  1995/05/15  21:46:05  ostell
621  * added Log line
622  *
623 *
624 **************************************/
625 #include <asn2ffp.h>
626 #include <a2ferrdf.h>
627 #include <a2ferr.h>
628 #include <utilpub.h>
629 #include <ffprint.h>
630 #include <parsegb.h>
631 #include <sequtil.h>
632 #include <edutil.h>
633 #include <gather.h>
634 #include <explore.h>
635 #include <sqnutils.h>
636 
637 #define METHOD_concept_transl_a 6
638 
639 NLM_EXTERN CharPtr mRNAEvidenceComment PROTO ((UserObjectPtr obj, Boolean add));
640 NLM_EXTERN Int2 ConvertToNAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
641 NLM_EXTERN Int2 ConvertToAAImpFeat PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfp_out, SortStructPtr p));
642 NLM_EXTERN Int2 ValidateAAImpFeat PROTO ((SeqFeatPtr sfp, Boolean use_product));
643 NLM_EXTERN Int2 ValidateNAImpFeat PROTO ((SeqFeatPtr sfp));
644 NLM_EXTERN void AddProteinQuals PROTO ((SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp));
645 static void GetGeneticCode PROTO ((CharPtr ptr, SeqFeatPtr sfp));
646 NLM_EXTERN void ComposeGBQuals PROTO((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo));
647 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct PROTO ((NoteStructPtr nsp, GeneStructPtr gsp));
648 NLM_EXTERN void AddPID PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG));
649 NLM_EXTERN void Add_trid PROTO ((Asn2ffJobPtr ajp, SeqFeatPtr sfp_out));
650 NLM_EXTERN Int2 MakeGBSelectNote PROTO ((CharPtr ptr, SeqFeatPtr sfp));
651 static void GetProtRefComment PROTO ((SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method));
652 NLM_EXTERN Int2 MiscFeatOrphanGenes PROTO ((Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp, Int2 index));
653 Int2 CheckForQual PROTO ((GBQualPtr gbqual, CharPtr string_q, CharPtr string_v));
654 NLM_EXTERN GBQualPtr AddModifsToGBQual PROTO ((GBEntryPtr gbp, GBQualPtr gbqual));
655 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual PROTO ((OrgRefPtr orp, GBQualPtr gbqual));
656 NLM_EXTERN Int2 CheckForEqualSign PROTO ((CharPtr qual));
657 NLM_EXTERN CharPtr GetProductFromCDS PROTO ((ValNodePtr product, ValNodePtr location, Int4 length));
658 NLM_EXTERN void PrepareSourceFeatQuals PROTO ((SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif));
659 static Int2 CheckForExtraChars PROTO ((CharPtr note));
660 NLM_EXTERN GBQualPtr AddBioSourceToGBQual PROTO((Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release));
661 NLM_EXTERN Boolean delete_qual PROTO((GBQualPtr PNTR qlist, CharPtr qual));
662 
663 typedef struct {
664         CharPtr name;
665         Uint1   num;
666 } ORGMOD;
667 
668 #define num_subtype 25
669 CharPtr subtype[num_subtype] = {
670 "chromosome", "map", "clone", "sub_clone", "haplotype", "genotype", "sex",
671 "cell_line", "cell_type", "tissue_type", "clone_lib", "dev_stage", 
672 "frequency", "germline", "rearranged", "lab_host", "pop_variant",
673 "tissue_lib", "plasmid", "transposon", "insertion_seq", "plastid", "country",
674 "segment", "endogenous_virus"};
675 
676 #define num_genome 15
677 static CharPtr genome[num_genome] = {"unknown", "genomic", "chloroplast", "chromoplast", "kinetoplast", "mitochondrion", "plastid", "macronuclear",
678 "extrachrom", "plasmid", "transposon", "insertion_seq", "cyanelle", "proviral", "virion"};
679 
680 /*______________________________________________________________________
681 **
682 **      This code is not currently used.
683 **      I do not remove this piece of code, just comment it out.
684 **      -- Dmitri Lukyanov
685 */
686 #if 0
687 
688 #define num_biomol 7
689 static CharPtr biomol[num_biomol] = {"genomic", "RNA", "mRNA", "rRNA",
690 "tRNA", "snRNA", "scRNA"};
691 
692 #endif
693 /*______________________________________________________________________
694 */
695 
696 ORGMOD orgmod_subtype[34] = {
697         { "strain", 2 }, {"sub_strain", 3}, {"type", 4}, {"subtype", 5},
698         {"variety", 6}, {"serotype",7}, {"serogroup",8}, {"serovar", 9}, 
699         {"cultivar", 10}, {"pathovar", 11}, {"chemovar", 12}, {"biovar", 13},
700         {"biotype", 14}, {"group", 15}, {"subgroup", 16}, {"isolate", 17},
701         {"common", 18}, {"acronym", 19}, {"dosage", 20}, {"nat_host", 21},
702         {"sub_species", 22}, {"specimen_voucher", 23}, {"authority", 24},
703         {"forma", 25}, {"forma_specialis", 26}, {"ecotype", 27},
704         {"synonym", 28}, {"anamorph", 29}, {"teleomorph", 30}, {"breed", 31},
705         {"old_lineage", 253}, {"old_name", 254}, {"note", 255}, { NULL, 0 }
706 };
707 
708 /*
709 CharPtr dbtag[DBNUM] = {
710   "PIDe", "PIDd", "PIDg", "PID", "FLYBASE",
711   "GDB", "MIM", "SGD", "SWISS-PROT", "CK",
712   "SPTREMBL", "ATCC", "ATCC (inhost)", "ATCC (dna)", "taxon",
713   "BDGP_EST", "dbEST", "dbSTS", "MGD", "PIR",
714   "GI", "RiceGenes", "UniGene", "LocusID", "dbSNP",
715   "RATMAP", "RGD", "CDD", "UniSTS", "InterimID", "COG", "GO", "niaEST",
716   "GeneID", "BDGP_INS", "SoyBase",
717   };
718 */
719 
720 CharPtr dbtag[DBNUM] = {
721   "PIDe", "PIDd", "PIDg", "PID",
722   "AceView/WormGenes",
723   "ATCC",
724   "ATCC(in host)",
725   "ATCC(dna)",
726   "BDGP_EST",
727   "BDGP_INS",
728   "CDD",
729   "CK",
730   "COG",
731   "dbEST",
732   "dbSNP",
733   "dbSTS",
734   "ENSEMBL",
735   "ESTLIB",
736   "FANTOM_DB",
737   "FLYBASE",
738   "GABI",
739   "GDB",
740   "GeneDB",
741   "GeneID",
742   "GI",
743   "GO",
744   "GOA",
745   "IFO",
746   "IMGT/LIGM",
747   "IMGT/HLA",
748   "InterimID",
749   "Interpro",
750   "ISFinder",
751   "JCM",
752   "LocusID",
753   "MaizeDB",
754   "MGD",
755   "MGI",
756   "MIM",
757   "NextDB",
758   "niaEST",
759   "PIR",
760   "PSEUDO",
761   "RATMAP",
762   "RiceGenes",
763   "REMTREMBL",
764   "RGD",
765   "RZPD",
766   "SGD",
767   "SoyBase",
768   "SPTREMBL",
769   "SWISS-PROT",
770   "taxon",
771   "UniGene",
772   "UniSTS",
773   "WorfDB",
774   "WormBase",
775   "ZFIN",
776   };
777 
778 
779 /*************************************************************************
780 *       sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
781 *       This function puts the dbxref qualifier on every SeqFeatPtr.
782 *************************************************************************/
783 static Boolean IsRefSeq (BioseqPtr bsp)
784 {
785   SeqIdPtr        sip;
786 
787   if (bsp == NULL)
788     return FALSE;
789   for (sip = bsp->id; sip != NULL; sip = sip->next) {
790     if (sip->choice == SEQID_OTHER)
791       return TRUE;
792   }
793   return FALSE;
794 }
795 
796 static void Add_dbxref (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, SeqFeatPtr sfp, BioseqPtr bsp)
797 {
798         Int4 id = -1;
799         Int2 i;
800         ValNodePtr vnp;
801         DbtagPtr db = NULL;
802         CharPtr val;
803 
804         if (sfp == NULL || sfp->dbxref == NULL) {
805                 return;
806         }
807         for (vnp=sfp->dbxref; vnp; vnp=vnp->next) {
808                 id = -1;
809                 db = vnp->data.ptrvalue;
810                 if (db && db->db) {
811                         for (i =0; i < DBNUM; i++) {
812                                 if (StringCmp(db->db, dbtag[i]) == 0) {
813                                         id = i;
814                                         break;
815                                 }
816                         }
817                         if (id == -1 && StringCmp (db->db, "WormBase") == 0 && IsRefSeq (bsp)) {
818                                 id = 18; /* show it even if not RefSeq record */
819                         }
820                         if (ajp->mode == RELEASE_MODE && id == -1) {
821                                 continue;  /* drop unknown dbtag */
822                         }
823                 }
824                 if (sfp->data.choice == SEQFEAT_CDREGION) {
825                         /*
826                         if (sfp->product != NULL && id > 4) {
827                                 continue;
828                         }
829                         */
830                 } else {
831                         if (id == -1 && ajp->mode != RELEASE_MODE) {
832                         } else
833                         if (id < 4) {
834                                 continue;  /* PID is illegal on non-CDS features */
835                         }
836                 }
837                 if (db == NULL) {
838                         return;
839                 }
840                 if (db->tag && db->tag->str) {
841                         val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
842                         sprintf(val, "%s:%s", db->db, db->tag->str);
843                 } else if (db->tag) {
844                         val = MemNew(StringLen(db->db)+16);
845                         if (StringNCmp(db->db, "PIDe", 4) == 0) {
846                                         sprintf(val, "PID:e%ld", (long) db->tag->id);
847                         } else if (StringNCmp(db->db, "PIDd", 4) == 0) {
848                                         sprintf(val, "PID:d%ld", (long) db->tag->id);
849                         } else if (StringNCmp(db->db, "PIDg", 4) == 0) {
850                                         sprintf(val, "PID:g%ld", (long) db->tag->id);
851                         } else {
852                                 sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
853                         }
854                 }
855                 if (val[0] != '\0') {
856                         sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
857                         MemFree(val);
858                 }
859         }
860         return;
861 }       /* Add_dbxref */
862 
863 static Boolean CheckSeqIdChoice(SeqIdPtr sip)
864 {
865         Uint1 ch;
866         SeqIdPtr si;
867         
868         for (si = sip; si; si=si->next) {
869                 ch = si->choice;
870                 if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
871                         ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
872                         return TRUE;
873                 }
874         }
875         return FALSE;
876 }
877 
878 static SeqIdPtr GetSeqIdChoice(SeqIdPtr sip)
879 {
880         Uint1 ch;
881         SeqIdPtr si;
882         
883         for (si = sip; si; si=si->next) {
884                 ch = si->choice;
885                 if (ch == SEQID_GI || ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ ||
886                         ch == SEQID_OTHER || ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
887                         return si;
888                 }
889         }
890         return NULL;
891 }
892 
893 static Boolean CheckSeqIdAccVer(SeqIdPtr sip)
894 {
895         Uint1 ch;
896         SeqIdPtr si;
897         TextSeqIdPtr tsip;
898         
899         for (si = sip; si; si=si->next) {
900                 ch = si->choice;
901                 if (ch == SEQID_GENBANK || ch == SEQID_EMBL || ch == SEQID_DDBJ || ch == SEQID_OTHER ||
902                         ch == SEQID_TPG || ch == SEQID_TPE || ch == SEQID_TPD) {
903                         tsip = si->data.ptrvalue;
904                         if (tsip->accession != NULL && tsip->version >= 1) {
905                                 return TRUE;
906                         }
907                 }
908         }
909         return FALSE;
910 }
911 
912 static void GetNonGeneQuals (Int2 mode, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp)
913 {
914         GBQualPtr gbqp;
915         Boolean evidence_present;
916         Int2 i;
917         
918         for (gbqp=sfp_in->qual; gbqp; gbqp=gbqp->next) {
919                 if (StringCmp(gbqp->qual, "gene") == 0) {
920                         ;
921                 } else if (StringCmp(gbqp->qual, "product") == 0) {
922                         ;
923                 } else if (StringCmp(gbqp->qual, "standard_name") == 0) {
924                         ;
925                 } else if (StringCmp(gbqp->qual, "map") == 0) {
926                         ;
927                 } else if (StringCmp(gbqp->qual, "EC_number") == 0) {
928                         ;
929                 } else if (StringCmp(gbqp->qual, "anticodon") == 0) {
930                         ;       /* This is done by DotRNAQuals */
931                 } else if (StringCmp(gbqp->qual, "note") == 0) {
932                         CpNoteToCharPtrStack(nsp, NULL, gbqp->val);
933                 } else if (StringCmp(gbqp->qual, "transl_table") == 0) {
934                         sfp_out->qual = 
935                                 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
936                         /* This is captured by GetGeneticCode */
937                 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
938                         for (i =0; i < DBNUM; i++) {
939                                 if (StringNCmp(gbqp->val, dbtag[i], StringLen(dbtag[i])) == 0) {
940                                         break;
941                                 }
942                         }
943                         if (mode == RELEASE_MODE && i == DBNUM) {
944                                 continue;  /* drop unknown dbtag */
945                         }                       
946                         sfp_out->qual = 
947                                 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
948                 } else {
949                         sfp_out->qual = 
950                                 AddGBQual(sfp_out->qual, gbqp->qual, gbqp->val);
951                 }
952         }
953 
954         evidence_present = GBQualPresent("evidence", sfp_out->qual);
955         if (sfp_out->exp_ev) {
956                 if (evidence_present == FALSE) {
957                         if (sfp_out->exp_ev == 1)
958                                 sfp_out->qual = 
959                                 AddGBQual(sfp_out->qual, "evidence", "experimental");
960                         if (sfp_out->exp_ev == 2)
961                                 sfp_out->qual = 
962                                 AddGBQual(sfp_out->qual, "evidence", "not_experimental");
963                 } else {
964                         for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
965                                 if (StringCmp(gbqp->qual, "evidence") == 0) {
966                                         gbqp->val = MemFree(gbqp->val);
967                                         if (sfp_out->exp_ev == 1)
968                                                 gbqp->val = StringSave("experimental");
969                                         if (sfp_out->exp_ev == 2)
970                                                 gbqp->val = StringSave("not_experimental");
971                                         break;
972                                 }
973                 }
974         } else if (evidence_present == TRUE) {
975                 for (gbqp=sfp_out->qual; gbqp; gbqp=gbqp->next)
976                         if (StringCmp(gbqp->qual, "evidence") == 0) {
977                                 if (StringCmp(gbqp->val, "EXPERIMENTAL") == 0) {
978                                         StringCpy(gbqp->val, "experimental");
979                                 } else if (StringCmp(gbqp->val, "NOT_EXPERIMENTAL") == 0) {
980                                         StringCpy(gbqp->val, "not_experimental");
981                                 }
982                                 break;
983                         }
984         }
985         return;
986 }       /* GetNonGeneQuals */
987 
988 /*****************************************************************************
989 *LookForPartialImpFeat
990 *
991 *       This function first looks for the sfp->qual of type "partial".
992 *       If found the qual is deleted and the variable "partial" is
993 *       set equal to TRUE.  If "partial" is TRUE or if sfp->partial
994 *       is TRUE, FlatAnnotPartial is called (modified version of Karl Sirotkin's
995 *       program) to see if sfp->partial should really be TRUE.
996 *       WARNING: sfp should be an ImpFeatPtr
997 *
998 *       written by Tom Madden (12/7/93)
999 *****************************************************************************/
1000 static void LookForPartialImpFeat(SeqFeatPtr sfp, Boolean use_product)
1001 
1002 {
1003         Boolean partial=FALSE;
1004         GBQualPtr curq, gbqual, lastq=NULL, tmpqual;
1005 
1006         gbqual = sfp->qual;
1007 
1008         while (gbqual && (StringCmp(gbqual->qual, "partial")==0))
1009         {
1010                 partial = TRUE;
1011                 tmpqual = gbqual->next;
1012                 gbqual->next = NULL;
1013                 gbqual = GBQualFree(gbqual);
1014                 gbqual = tmpqual;
1015         }
1016 
1017         if (gbqual)
1018         {
1019                 for (lastq=gbqual, curq=gbqual->next; curq; curq=curq->next)
1020                 {
1021                         if (StringCmp(curq->qual, "partial") == 0)
1022                         {
1023                                 partial = TRUE;
1024                                 lastq->next = curq->next;
1025                                 curq->next = NULL;
1026                                 curq = GBQualFree(curq);
1027                                 curq = lastq;
1028                         }
1029                         else
1030                                 lastq = curq;
1031                 }
1032         }
1033 
1034         sfp->qual = gbqual;
1035 
1036         if (partial == TRUE || sfp->partial == TRUE)
1037                 sfp->partial = FlatAnnotPartial(sfp, use_product);
1038 }       /* LookForPartialImpFeat */
1039 
1040 static CharPtr SeqCodeNameGet (SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
1041 {
1042         int index=residue - table -> start_at;
1043         static CharPtr oops = "?";
1044 
1045         if (index >= 0 && index < (int) table -> num){
1046                 return (table -> names) [index];
1047         }else {
1048                 if (error_msgs == TRUE) 
1049                         ErrPostEx(SEV_WARNING, CTX_NCBI2GB, 1,
1050                         "asn2ff: %c(%d) > max in SeqCode table=%d",
1051                         (char) residue, (int) residue, (int) table -> num);
1052                 return oops;
1053         }
1054 }
1055 
1056 /***************************************************************************
1057 *CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
1058 *
1059 *       if (ASN2FF_IUPACAA_ONLY == TRUE) then
1060 *       Check if the residue is legal in iupacaa; if not, return 'X', if so,
1061 *       return the three letter code from iupacaa3.
1062 *
1063 *       if (ASN2FF_IUPACAA_ONLY != TRUE) then
1064 *       Then do a translation, if necessary, then get th three letter code
1065 *       from iupacaa3.
1066 *
1067 ***************************************************************************/
1068 
1069 static CharPtr Get3LetterSymbol (Uint1 seq_code, SeqCodeTablePtr table, Uint1 residue, Boolean error_msgs)
1070 {
1071         static CharPtr bad_symbol= "OTHER";
1072         CharPtr ptr, retval=NULL;
1073         Int2 index;
1074         SeqCodeTablePtr table_3aa;
1075         SeqMapTablePtr smtp;
1076         Uint1 code, new_residue;
1077 
1078         if (residue == 42) {  /* stop codon in NCBIeaa */
1079                 retval = "TERM";
1080                 return retval;
1081         }
1082         if (ASN2FF_IUPACAA_ONLY == TRUE)
1083                 code = Seq_code_iupacaa;
1084         else
1085                 code = Seq_code_ncbieaa;
1086 
1087         if (code != seq_code)
1088         {/* if code and seq_code are identical, then smtp is NULL?? */
1089                 smtp = SeqMapTableFind(seq_code, code);
1090                 new_residue = SeqMapTableConvert(smtp, residue);
1091         }
1092         else
1093                 new_residue = residue;
1094 
1095 /* The following looks for non-symbols (255) and "Undetermined" (88) */
1096         if ((int) new_residue == 255 || (int) new_residue == 88)
1097                 retval = bad_symbol;
1098         else
1099         {
1100                 ptr = SeqCodeNameGet(table, residue, error_msgs);
1101                 
1102                 table_3aa=SeqCodeTableFind (Seq_code_iupacaa3);
1103                 if (ptr != NULL && *ptr != '\0' && table_3aa != NULL)
1104                 {
1105                         for (index=0; index < (int) table_3aa->num; index++)
1106                         {
1107                                 if (StringCmp(ptr, (table_3aa->names) [index]) == 0)
1108                                 {
1109                                         retval = (table_3aa->symbols) [index];
1110                                         break;
1111                                 }
1112                         }
1113                 }
1114         }
1115         
1116         return retval;
1117 
1118 }       /* Get3LetterSymbol */
1119 
1120 static CharPtr GetNameFromOrgName(OrgNamePtr orgname)
1121 {
1122         BinomialOrgNamePtr bi;
1123         CharPtr name = NULL, virus, newname;
1124         Int2 len=0;
1125         Boolean first;
1126         OrgNamePtr org;
1127         
1128         switch(orgname->choice)
1129         {
1130                 case 1:                 /*binomial*/
1131                         bi = (BinomialOrgNamePtr) orgname->data;
1132                         len = StringLen(bi->genus);
1133                         if (bi->species) {
1134                                 len += StringLen(bi->species);
1135                         }
1136                         name = MemNew(len + 2);
1137                         StringCpy(name, bi->genus);
1138                         if (bi->species) {
1139                                 name = StringCat(name, " ");
1140                                 name = StringCat(name, bi->species);
1141                         } else {
1142                                 name = StringCat(name, " sp.");
1143                         }
1144                 break;
1145                 case 2:                 /*virus*/
1146                         virus = (CharPtr) orgname->data;
1147                         name = MemNew(StringLen(virus));
1148                         StringCpy(name, virus);
1149                 break;
1150                 case 3:                 /*hybrid*/
1151                         first = TRUE;
1152                         for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
1153                                 newname = GetNameFromOrgName(org);
1154                                 len += StringLen(newname) + 3;
1155                         }
1156                         name = MemNew(len + 1);
1157                         for (org = (OrgNamePtr) orgname->data; org; org=org->next) {
1158                                 newname = GetNameFromOrgName(org);
1159                                 if (first == TRUE) {
1160                                         name = StringCat(name, newname);
1161                                         first = FALSE;
1162                                 } else {
1163                                         name = StringCat(name, " x ");
1164                                         name = StringCat(name, newname);
1165                                 }
1166                         }                       
1167                 break;
1168                 case 4:                 /*namedhybrid*/
1169                         bi = (BinomialOrgNamePtr) orgname->data;
1170                         len = StringLen(bi->genus);
1171                         if (bi->species) {
1172                                 len += StringLen(bi->species);
1173                         }
1174                         name = MemNew(len + 4);
1175                         StringCpy(name, bi->genus);
1176                         if (bi->species) {
1177                                 name = StringCat(name, " x ");
1178                                 name = StringCat(name, bi->species);
1179                         }
1180                 break;
1181                 case 5:                 /*partial*/
1182         /* not implemented yet */
1183                         ErrPostStr(SEV_WARNING, 0, 0, "Partial name in OrgName.name");
1184                 break;
1185                 default:
1186                 break;
1187         }
1188         return name;
1189 }
1190 
1191 NLM_EXTERN void PrintSourceFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
1192 
1193 {
1194         BioseqPtr bsp;
1195         Char location[40];
1196         ImpFeatPtr ifp;
1197         Int2  status = -1, /* mol = -1, -- UNUSED */ i, bsize=0;
1198         NoteStructPtr nsp = NULL;
1199         OrgRefPtr orp=NULL;
1200         SeqFeatPtr sfp_in, sfp_out=NULL, sfp;
1201         SeqIntPtr sip;
1202         SeqLocPtr slp, keep_loc;
1203         ValNodePtr vnp=NULL;
1204         BioSourcePtr biosp = NULL;
1205         OrgModPtr omp;
1206         SortStructPtr pss, ps=NULL, bs = NULL, po=NULL;
1207         DescrStructPtr ds;
1208         CharPtr name;
1209         
1210         if (gbp == NULL) {
1211                 return;
1212         }
1213         if (gbp->feat) {
1214                 nsp=gbp->feat->source_notes;
1215                 po = gbp->feat->Orglist;
1216                 ps = gbp->feat->Sourcelist;
1217                 bs = gbp->feat->Biosrclist;
1218                 bsize = gbp->feat->biosrcsize;
1219         }
1220         ds = gbp->source_info;
1221         bsp = gbp->bsp;
1222         if (ajp->slp) {
1223                 return;
1224         }
1225         sprintf(location, "1..%ld", (long) (bsp->length));
1226     sfp_out = ajp->sfp_out;
1227         ifp = sfp_out->data.value.ptrvalue;
1228         ifp->key = StringSave("source");
1229         if (ajp->slp) {
1230                 slp = AsnIoMemCopy(ajp->slp,
1231                                         (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
1232         } else {
1233                 slp = (SeqLocPtr) ValNodeNew(NULL);
1234                 slp->choice = SEQLOC_INT;
1235                 sip = SeqIntNew();
1236                 sip->from = 0;
1237                 sip->to = (bsp->length)-1;
1238                 sip->id = SeqIdDup(SeqIdFindBest (bsp->id, 0));
1239                 slp->data.ptrvalue = sip;
1240         }
1241         sfp_out->location = slp;
1242         if (ds != NULL) {
1243                 vnp = ds->vnp;
1244                 keep_loc = AsnIoMemCopy(slp,
1245                                         (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);                                    
1246                 if (vnp && vnp->choice == Seq_descr_source) {
1247                         biosp = vnp->data.ptrvalue;
1248                         if (biosp->is_focus == TRUE) {
1249                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
1250                                                                                 "focus", NULL);
1251                                 if (StringNCmp(gbp->div, "SYN", 3) != 0) {
1252                                         for (pss=bs, i= 0; pss && i < bsize; i++, pss++) {
1253                                                 if (pss->sfp == NULL)
1254                                                         continue;
1255                                                 sfp_out->location = 
1256                                                         SeqLocSubtract(sfp_out->location,
1257                                                                                                  pss->sfp->location);
1258                                         }
1259                                 }
1260                         }
1261                 }
1262                 if (sfp_out->location == NULL) {
1263                         sfp_out->location = keep_loc;
1264                 }
1265                 else
1266                         SeqLocFree(keep_loc);
1267         }
1268         flat2asn_install_feature_user_string("source", ifp->loc);
1269         if (gbp->feat && gbp->feat->sfpSourcesize != 0) {
1270                 if ((sfp_in = ps->sfp) == NULL) {
1271                         GatherItemWithLock(ps->entityID, ps->itemID, ps->itemtype, 
1272                                                                         &sfp_in, find_item);
1273                 }
1274                 if (sfp_out->qual != NULL)
1275                         sfp_out->qual = GBQualFree(sfp_out->qual);
1276                 NoteStructReset(nsp);
1277                 PrepareSourceFeatQuals(sfp_in, sfp_out, gbp, FALSE);
1278                 Add_dbxref(ajp, sfp_out, sfp_in, bsp); 
1279                 status = ValidateNAImpFeat(sfp_out);
1280                 if (status < 0) { 
1281 /* source feat is probably missing organism name, add
1282                 and try again.  Don't delete old quals! */
1283                         if (ds != NULL) {
1284                                 vnp = ds->vnp;
1285                                 if (vnp->choice == Seq_descr_source) {
1286                                         biosp = vnp->data.ptrvalue;
1287                                         orp = (OrgRefPtr) biosp->org;
1288                                 } else if (vnp->choice == Seq_descr_org) {
1289                                         orp = (OrgRefPtr) vnp->data.ptrvalue;
1290                                 }
1291                         } else if (gbp->feat && gbp->feat->sfpOrgsize != 0) {
1292                                 if ((sfp = po->sfp) == NULL) {
1293                                         GatherItemWithLock(po->entityID, po->itemID, po->itemtype, 
1294                                                                         &sfp, find_item);
1295                                 }
1296                                 if (sfp != NULL) {
1297                                         orp = (OrgRefPtr) sfp->data.value.ptrvalue;
1298                                 }
1299                         }
1300                         if (orp) {
1301                                 if (ajp->orgname && orp->orgname) {
1302                                         name = GetNameFromOrgName(orp->orgname);
1303                                         sfp_out->qual = AddGBQual(sfp_out->qual, 
1304                                                                                 "organism", name);
1305                                         MemFree(name);
1306                                 } else if (orp->taxname) {
1307                                         sfp_out->qual = AddGBQual(sfp_out->qual, 
1308                                                                                 "organism", orp->taxname);
1309                                         if (orp->common && sfp_in->comment != NULL)
1310                                                 CpNoteToCharPtrStack(nsp, NULL, orp->common);
1311                                 } else if (orp->common) {
1312                                         if (StrStr(orp->common, "virus") ||
1313                                             StrStr(orp->common, "Virus") ||
1314                                             StrStr(orp->common, "phage") ||
1315                                             StrStr(orp->common, "Phage") ||
1316                                             StrStr(orp->common, "viroid") ||
1317                                             StrStr(orp->common, "Viroid")) {
1318                                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
1319                                                                                         "organism", orp->common);
1320                                         }
1321                                 }
1322                         }
1323                         status = ValidateNAImpFeat(sfp_out);
1324                 }
1325         }
1326         if (status < 0) {
1327                 if (ds != NULL) {
1328                         if ((vnp = ds->vnp) != NULL) {
1329                                 if (vnp->choice == Seq_descr_source) {
1330                                         biosp = vnp->data.ptrvalue;
1331                                         orp = (OrgRefPtr) biosp->org;
1332                                 } else if (vnp->choice == Seq_descr_org) {
1333                                         orp = (OrgRefPtr) vnp->data.ptrvalue;
1334                                 }
1335                         }
1336                 } else if (gbp->feat && gbp->feat->sfpOrgsize != 0 && po->sfp != NULL) {
1337                                 orp = (OrgRefPtr) (po->sfp)->data.value.ptrvalue;
1338                 } else {
1339                         orp = NULL;
1340                 }
1341                 if (orp) {
1342                         if (nsp) {
1343                                 NoteStructReset(nsp);
1344                         }
1345                         if (sfp_out->qual != NULL)
1346                                 sfp_out->qual = GBQualFree(sfp_out->qual);
1347                         if (ajp->orgname && orp->orgname) {
1348                                 name = GetNameFromOrgName(orp->orgname);
1349                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
1350                                                                         "organism", name);
1351                                 MemFree(name);
1352                         } else if (orp->taxname) {
1353                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
1354                                         "organism", orp->taxname);
1355                         } else if (orp->common) {
1356                                 if (StrStr(orp->common, "virus") ||
1357                                     StrStr(orp->common, "Virus") ||
1358                                     StrStr(orp->common, "phage") ||
1359                                     StrStr(orp->common, "Phage") ||
1360                                     StrStr(orp->common, "viroid") ||
1361                                     StrStr(orp->common, "Viroid")) {
1362                                         sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
1363                                                                                                                          orp->common);
1364                                 }
1365                         }
1366                         if (orp->orgname && orp->orgname->mod) {
1367                                 omp = orp->orgname->mod;
1368                                 if (omp->subtype == 0 && omp->subname != NULL) {
1369                                         CpNoteToCharPtrStack(nsp, NULL, omp->subname);
1370                                 }
1371                         }
1372                         sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
1373                         sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
1374                 }
1375                 if ((vnp=BioseqGetSeqDescr(gbp->bsp, Seq_descr_molinfo, NULL)) != NULL){
1376                         /*
1377                         mfp = vnp->data.ptrvalue;
1378                         if (mfp) {
1379                                 mol = mfp->biomol;
1380                         }
1381                         -- NO EFFECT */
1382                 }
1383                 PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
1384                 status = ValidateNAImpFeat(sfp_out);
1385         }
1386 /* ----------Organism not found -------------*/
1387         if (status < 0) {
1388                 if (sfp_out->qual)
1389                         sfp_out->qual = GBQualFree(sfp_out->qual);
1390                 sfp_out->qual = AddGBQual(sfp_out->qual, "organism", "unknown");
1391                 NoteStructReset(nsp);
1392                 if (orp && orp->common)
1393                         CpNoteToCharPtrStack(nsp, NULL, orp->common);
1394 /*try new first */
1395                 if (biosp) {
1396                         sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual, TRUE);
1397                         if (orp)
1398                                 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
1399                 }
1400 /* try old  then */
1401                 sfp_out->qual = AddOrgRefModToGBQual(orp, sfp_out->qual);
1402                 PrepareSourceFeatQuals(NULL, sfp_out, gbp, TRUE);
1403                 status = ValidateNAImpFeat(sfp_out);
1404         }       
1405         flat2asn_delete_feature_user_string();
1406 
1407         if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
1408                 PrintImpFeat(ajp, gbp->bsp, sfp_out);
1409         }
1410         sfp_out->comment = NULL;
1411         sfp_out->location = SeqLocFree(sfp_out->location);
1412         sfp_out->location = NULL;
1413         sfp_out->product = NULL;
1414         sfp_out->exp_ev = FALSE;
1415         sfp_out->partial = FALSE;
1416         sfp_out->excpt = FALSE;
1417         ifp = sfp_out->data.value.ptrvalue;
1418         if (ifp->key) {
1419                 ifp->key = MemFree(ifp->key);
1420         }
1421         if (ifp->loc) {
1422                 ifp->loc = MemFree(ifp->loc);
1423         }
1424         if (sfp_out->qual)
1425                 sfp_out->qual = GBQualFree(sfp_out->qual);
1426         return;
1427 }       /* PrintSourceFeat */
1428 
1429 /*****************************************************************************
1430 *
1431 *       Add the quals of the form "/transl_except=(pos: ,aa: )" to the
1432 *       SeqFeatPtr sfp_out.  
1433 *       
1434 *****************************************************************************/  
1435 
1436 static void ComposeCodeBreakQuals (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, SeqLocPtr PNTR extra_loc, Int2 extra_loc_cnt, NoteStructPtr nsp)
1437 
1438 {
1439         CdRegionPtr crp;
1440         CharPtr buffer, ptr, pos;
1441         Choice aa;
1442         CodeBreakPtr cbp;
1443         SeqCodeTablePtr table;
1444         SeqLocPtr slp;
1445         Uint1 seq_code=0, the_residue;
1446         Int2 i, buflen;
1447 
1448         if ((sfp_in == NULL) || (sfp_in->data.choice != 3)) {
1449                 return;
1450         }
1451 
1452         crp = (CdRegionPtr) sfp_in->data.value.ptrvalue;
1453 
1454         if (crp->code_break != NULL) {
1455                 cbp = crp->code_break;
1456                 while (cbp != NULL) {
1457                         aa = cbp->aa;
1458                         switch (aa.choice) {
1459                                 case 1:
1460                                         seq_code = 8;
1461                                         break;
1462                                 case 2:
1463                                         seq_code = 7;
1464                                         break;
1465                                 case 3:
1466                                         seq_code = 11;
1467                                         break;
1468                         }
1469                         table = NULL;
1470                         if (seq_code != 0)
1471                                 table=SeqCodeTableFind (seq_code);
1472                         if (table == NULL) {
1473                                 continue;
1474                         }
1475                         if (extra_loc_cnt > 0) {  /* was converted to new coordinates*/
1476                                 for (i=0; i < extra_loc_cnt; i++) {
1477                                         if (extra_loc[i] == NULL) {
1478                                                 continue;
1479                                         }
1480                                         slp = extra_loc[i];
1481                                         pos = FlatLoc(bsp, slp);
1482                                         if (pos) {
1483                                                 the_residue = (Uint1) cbp->aa.value.intvalue;
1484                                                 if (the_residue == 'U') {
1485                                         CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
1486                                                 }
1487                                                 ptr = Get3LetterSymbol(seq_code, table, 
1488                                                                             the_residue, ajp->error_msgs);
1489                                                 buflen = StringLen(pos) + StringLen(ptr) + 11;
1490                                                 buffer = MemNew(buflen);
1491                                                 sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
1492                                         sfp_out->qual = AddGBQual(sfp_out->qual, 
1493                                                                                     "transl_except", buffer); 
1494                                                 MemFree(buffer);
1495                                                 MemFree(pos);
1496                                 } else if (ajp->error_msgs) {
1497                                         ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc, 
1498                                                                 "Invalid Code-break.location: %s", pos);
1499                                 }
1500                                 }    
1501                         } else {
1502                                 slp = NULL;
1503                                 while ((slp = SeqLocFindNext(cbp->loc, slp)) != NULL) {
1504                                         pos = FlatLoc(bsp, slp);
1505                                 if (pos) {
1506                                         the_residue = (Uint1) cbp->aa.value.intvalue;
1507                                                 if (the_residue == 'U') {
1508                                         CpNoteToCharPtrStack(nsp, NULL, "selenocysteine");
1509                                                 }
1510                                         ptr = Get3LetterSymbol(seq_code, table, 
1511                                                                             the_residue, ajp->error_msgs);
1512                                                 buflen = StringLen(pos) + StringLen(ptr) + 11;
1513                                                 buffer = MemNew(buflen);
1514                                         sprintf(buffer, "(pos:%s,aa:%s)", pos, ptr);
1515                                         sfp_out->qual = AddGBQual(sfp_out->qual, 
1516                                                                                     "transl_except", buffer);
1517                                                 MemFree(buffer);
1518                                                 MemFree(pos);
1519                                 } else if (ajp->error_msgs) {
1520                                         ErrPostEx(SEV_WARNING, ERR_FEATURE_CodeBreakLoc, 
1521                                                                 "Invalid Code-break.location: %s", pos);
1522                                         }
1523                             }
1524                         }
1525                         cbp = cbp->next;
1526                 }
1527         }
1528 
1529         return;
1530 
1531 }       /* ComposeCodeBreakQuals */
1532 
1533 /***********************************************************************
1534 *void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
1535 *
1536 *       returns ONLY non-standard (i.e., id not 0 or 1)
1537 *       genetic codes.
1538 ***********************************************************************/
1539 
1540 static void GetGeneticCode(CharPtr ptr, SeqFeatPtr sfp)
1541 
1542 {
1543         Boolean code_is_one=FALSE;
1544         CdRegionPtr cdr;
1545         GBQualPtr qual;
1546         ValNodePtr gcp, var;
1547 
1548         cdr = sfp->data.value.ptrvalue;
1549         gcp = cdr->genetic_code;
1550 
1551         if (gcp != NULL)
1552         {
1553                 for (var=gcp->data.ptrvalue; var != NULL; var=var->next)
1554                 {
1555                         if (var->choice == 2)
1556                         {
1557                                 if (var->data.intvalue != 0 )
1558                                 {
1559                                         if (var->data.intvalue == 1)
1560                                                 code_is_one = TRUE;
1561                                         else
1562                                                 sprintf(ptr, "%ld", (long) (var->data.intvalue));
1563                                 }
1564                                 break;  
1565                         }
1566                 }
1567                 if (*ptr != '\0')
1568                 {
1569                         for (qual=sfp->qual; qual; qual=qual->next)
1570                         {
1571                                 if (StringCmp("transl_table", qual->qual) == 0 &&
1572                                           StringCmp(ptr, qual->val) != 0)
1573                                 {
1574                                           ErrPostStr(SEV_WARNING, 
1575                                                 ERR_FEATURE_GcodeAndTTableClash, "");
1576                                           break;
1577                                 }
1578                         }
1579                 }       
1580                 else if (code_is_one == TRUE)
1581                 {
1582                         for (qual=sfp->qual; qual; qual=qual->next)
1583                         {
1584                                 if (StringCmp("transl_table", qual->qual) == 0 &&
1585                                           StringCmp("1", qual->val) != 0)
1586                                 {
1587                                           ErrPostStr(SEV_WARNING, 
1588                                                 ERR_FEATURE_GcodeAndTTableClash, "");
1589                                           break;
1590                                 }
1591                         }
1592                 }
1593         }
1594         else
1595         {
1596                 for (qual=sfp->qual; qual; qual=qual->next)
1597                         if (StringCmp("transl_table", qual->qual) == 0)
1598                         {
1599                                 StringCpy(ptr, qual->val);
1600                                 break;
1601                         }
1602         }
1603 
1604         return;
1605 }       /* GetGeneticCode */
1606 
1607 static SeqFeatPtr cleanup_sfp(SeqFeatPtr sfp_out)
1608 {
1609         ImpFeatPtr ifp;
1610         
1611         if (sfp_out == NULL) {
1612                 return NULL;
1613         }
1614         sfp_out->comment = NULL;
1615         sfp_out->location = NULL;
1616         sfp_out->product = NULL;
1617         sfp_out->exp_ev = FALSE;
1618         sfp_out->partial = FALSE;
1619         sfp_out->excpt = FALSE;
1620         ifp = sfp_out->data.value.ptrvalue;
1621         if (ifp->key) {
1622                 ifp->key = MemFree(ifp->key);
1623         }
1624         if (ifp->loc) {
1625                 ifp->loc = MemFree(ifp->loc);
1626         }
1627         if (sfp_out->qual)
1628                 sfp_out->qual = GBQualFree(sfp_out->qual);
1629                 
1630         return sfp_out;
1631 }
1632 
1633 static GBQualPtr remove_qual(GBQualPtr head, GBQualPtr x)
1634 {
1635         GBQualPtr       v, p;
1636         
1637         if (head == NULL) {
1638                 return NULL;
1639         }
1640         if (x == head) {
1641                 head = x->next;
1642                 x->next = NULL;
1643                 return head;
1644         }
1645         for (v = head; v != NULL && v != x; v = v->next) {
1646                 p = v;
1647         }
1648         if (v != NULL) {
1649                 p->next = x->next;
1650                 x->next = NULL;
1651         }
1652         return head;
1653 }
1654 
1655 static void PutGeneFirst(SeqFeatPtr sfp)
1656 
1657 {
1658         Boolean still_looking=TRUE;
1659         GBQualPtr gbqual, qual, qual_temp=NULL, qual_gene=NULL;
1660         ImpFeatPtr ifp=NULL;
1661 
1662         if ((sfp == NULL) || (sfp->data.choice != 8))
1663                 return;
1664         if (sfp->qual == NULL)
1665                 return;
1666 
1667         ifp = sfp->data.value.ptrvalue;
1668         if (StringCmp(ifp->key, "gene") == 0)
1669         {
1670                 gbqual = sfp->qual;
1671                 for (qual=gbqual; qual; qual=qual->next) {
1672                         if (StringCmp("gene", qual->qual) == 0) {
1673                                 qual_gene = qual;
1674                                 break;
1675                         }
1676                 }
1677                 if (qual_gene == NULL) {
1678                         return;
1679                 }
1680                 gbqual = remove_qual(gbqual, qual_gene);
1681                 qual_gene->next = gbqual;
1682                 sfp->qual = qual_gene;
1683         }
1684         return;
1685 }       /* PutGeneFirst */
1686 
1687 static void PutTranslationLast(SeqFeatPtr sfp)
1688 
1689 {
1690         Boolean still_looking=TRUE;
1691         GBQualPtr gbqual, qual, qual_temp=NULL, qual_last;
1692         ImpFeatPtr ifp=NULL;
1693 
1694         if ((sfp == NULL) || (sfp->data.choice != 8))
1695                 return;
1696         if (sfp->qual == NULL)
1697                 return;
1698 
1699         ifp = sfp->data.value.ptrvalue;
1700         if (StringCmp(ifp->key, "CDS") == 0)
1701         {
1702                 gbqual = sfp->qual;
1703                 qual_last = NULL;
1704                 for (qual=gbqual; qual->next; qual=qual->next)
1705                 { /* We need to go to the end of the linked list */
1706                         if (still_looking == TRUE &&
1707                                 StringCmp("translation", qual->qual) == 0)
1708                         {
1709                                 still_looking = FALSE;
1710                                 if (qual->next != NULL)
1711                                 { /* if it's not the last qual anyway */
1712                                         if (qual_last == NULL) /*first*/
1713                                                 gbqual = qual->next;
1714                                         else
1715                                                 qual_last->next = qual->next;
1716                                         qual_temp = qual;
1717                                         qual=qual->next;
1718                                         qual_temp->next = NULL;
1719                                 }
1720                         }
1721                         qual_last = qual;
1722                         if (qual->next == NULL)
1723                                 break;
1724                 }
1725                 qual->next = qual_temp;
1726                 sfp->qual = gbqual;
1727         }
1728         return;
1729 }       /* PutTranslationLast */
1730 
1731 static CharPtr mrnaevtext1 = "Derived by automated computational analysis";
1732 static CharPtr mrnaevtext2 = "using gene prediction method:";
1733 static CharPtr mrnaevtext3 = "Supporting evidence includes similarity to:";
1734 
1735 NLM_EXTERN CharPtr mRNAEvidenceComment(UserObjectPtr uop, Boolean add)
1736 {
1737     ObjectIdPtr         oip;
1738         UserFieldPtr    ufp, u, uu;
1739         CharPtr                 method = NULL, ptr, ne_name;
1740         static Char             temp[20];
1741         Int2                    ptrlen=0, np=0, nd=0, nm=0, ne=0;
1742         Boolean                 is_evidence = FALSE;
1743         Int4                    Locus_id = 0;
1744 
1745         if (uop == NULL) return NULL;
1746         if ((oip = uop->type) == NULL) return NULL;
1747         if (StringCmp(oip->str, "ModelEvidence") != 0) return NULL;
1748         for (ufp=uop->data; ufp; ufp=ufp->next) {
1749                 oip = ufp->label;
1750                 if (StringCmp(oip->str, "Method") == 0) {
1751                         if (ufp->data.ptrvalue) {
1752                                 method = StringSave((CharPtr) ufp->data.ptrvalue);
1753                         }
1754                 }
1755                 if (StringCmp(oip->str, "mRNA")==0) {
1756                         is_evidence = TRUE;
1757                         for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1758                                 for (uu = (UserFieldPtr) u->data.ptrvalue; uu; uu=uu->next) {
1759                                 oip = uu->label;
1760                                 if (StringCmp(oip->str, "accession") == 0) {
1761                                         nm++;
1762                                 }
1763                                 }
1764                         }
1765                 }
1766                 if (StringCmp(oip->str, "EST")==0) {
1767                         is_evidence = TRUE;
1768                         for (u = (UserFieldPtr) ufp->data.ptrvalue;u; u=u->next) {
1769                                 for (uu = (UserFieldPtr) u->data.ptrvalue;uu; uu=uu->next) {
1770                                         oip = uu->label;
1771                                         if (StringCmp(oip->str, "count") == 0) {
1772                                                 ne = uu->data.intvalue;
1773                                         }
1774                                         if (StringCmp(oip->str, "organism") == 0) {
1775                                                 ne_name = StringSave(( CharPtr) uu->data.ptrvalue);
1776                                         }
1777                                 }
1778                         }
1779                 }
1780         }
1781         ptrlen = StringLen (mrnaevtext1) + StringLen (mrnaevtext2) + StringLen (mrnaevtext3) + StringLen (method) + 25;
1782         if (np > 0) {
1783                 ptrlen += StringLen("proteins") + 5;
1784         }
1785         if (nd > 0) {
1786                 ptrlen += StringLen("domains") + 5;
1787         }
1788         if (nm > 0) {
1789                 ptrlen += StringLen("mRNAs") + 5;
1790         }
1791         if (ne > 0) {
1792                 ptrlen += StringLen("ESTs") + StringLen(ne_name) + 10;
1793         }
1794         ptr = (CharPtr) MemNew(ptrlen) + 1;
1795         if (add) {
1796                 if (method != NULL) {
1797                         sprintf (ptr, "%s %s %s.", mrnaevtext1, mrnaevtext2, method);
1798                 } else {
1799                         sprintf (ptr, "%s.", mrnaevtext1);
1800                 }
1801         }
1802         if (is_evidence) {
1803                 if (add)  StringCat(ptr, " ");
1804          StringCat(ptr, "Supporting evidence includes similarity to:");
1805         } 
1806         if (np > 0) {
1807          sprintf(temp, " %d proteins", np);
1808          StringCat(ptr, temp);
1809         }
1810         if (nd > 0) {
1811                 if (np > 0)
1812                         StringCat(ptr, ",");
1813          sprintf(temp, " %d domains", np);
1814          StringCat(ptr, temp);
1815         }
1816         if (nm > 0) {
1817                 if (np > 0 || nd > 0)
1818                 StringCat(ptr, ",");
1819          if (nm > 1) {
1820                  sprintf(temp, " %d mRNAs", nm);
1821          } else {
1822                  sprintf(temp, " %d mRNA", nm);
1823          }
1824          StringCat(ptr, temp);
1825         }
1826         if (ne > 0) {
1827         if ( np > 0 || nm > 0 || nd > 0)
1828                 StringCat(ptr, ",");
1829          sprintf(temp, " %d %s ESTs", ne, ne_name);
1830          StringCat(ptr, temp);
1831         }
1832         return ptr;
1833 }
1834 
1835 static CharPtr mRNAFeatEvidenceComment(SeqFeatPtr sfp_in)
1836 {
1837         RnaRefPtr               rfp;
1838         UserObjectPtr   uop, obj;
1839     ObjectIdPtr         oip;
1840         UserFieldPtr    uf;
1841         
1842         rfp = (RnaRefPtr) sfp_in->data.value.ptrvalue;
1843         if (rfp->type != 2) { /* mRNA */
1844                 return NULL;
1845         }
1846         if ((uop = sfp_in->ext) == NULL)
1847                 return NULL;
1848         if ((oip = uop->type) == NULL) return NULL;
1849         if (StringCmp(oip->str, "CombinedFeatureUserObjects") != 0) return NULL;
1850         for (uf=uop->data; uf; uf=uf->next) {
1851                 obj = (UserObjectPtr) uf->data.ptrvalue;
1852                 return( mRNAEvidenceComment(obj, TRUE));
1853         }
1854         return NULL;
1855 }
1856 
1857 NLM_EXTERN void PrintNAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1858 {
1859         
1860         Boolean loc_ok;
1861         Char genetic_code[3];
1862         CharPtr ptr=NULL, sptr;
1863         ImpFeatPtr ifp;
1864         SeqFeatPtr sfp_in, sfp_out=NULL;
1865         Int4 status, total_feats, feat_index;
1866         SortStructPtr p;
1867 
1868         if (gbp == NULL || gbp->feat == NULL) {
1869                 return;
1870         }
1871         feat_index = ajp->pap_index;
1872         total_feats=gbp->feat->sfpListsize;
1873         if (total_feats == 0) {
1874                 return;
1875         }
1876         sfp_out=ajp->sfp_out;
1877         if (sfp_out->qual)
1878                 sfp_out->qual = GBQualFree(sfp_out->qual);
1879         ifp = sfp_out->data.value.ptrvalue;
1880         if (ifp->loc)
1881                 ifp->loc = MemFree(ifp->loc);
1882         if (feat_index < total_feats) {
1883                 p = gbp->feat->List + feat_index;
1884                 if (p == NULL)
1885                         return;
1886                 if (p->tempload == TRUE) {
1887                         GatherItemWithLock(p->entityID, p->itemID, p->itemtype, 
1888                                                                         &sfp_in, find_item);
1889                 } else {
1890                         sfp_in = p->sfp;
1891                 }
1892                 if (sfp_in == NULL) {
1893                         return;
1894                 }
1895                 if (ajp->mode == PARTIAL_MODE &&
1896                                         sfp_in->data.choice != SEQFEAT_CDREGION) {
1897                         sfp_out = cleanup_sfp(sfp_out);
1898                         return;
1899                 }
1900                 status = ConvertToNAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
1901                 if (status < 1) {
1902                         sfp_out = cleanup_sfp(sfp_out);
1903                         return;
1904                 }
1905                 if (p->slp != NULL) {
1906                         sfp_out->location = p->slp;
1907                 }
1908                 ifp = sfp_out->data.value.ptrvalue;
1909                 flat2asn_install_feature_user_string(ifp->key, NULL);
1910                 loc_ok=CheckAndGetNAFeatLoc(gbp->bsp, &ptr, sfp_out, TRUE);
1911                 if (loc_ok == TRUE || ASN2FF_VALIDATE_FEATURES == FALSE) {
1912                         ifp->loc = ptr;
1913                 } else {
1914                         flat2asn_delete_feature_user_string();
1915                         flat2asn_install_feature_user_string(ifp->key, ptr);
1916                         MemFree(ptr);
1917                         if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1918                                 ErrPostEx(SEV_WARNING, ERR_FEATURE_Dropped, "Unparsable location");
1919                         }
1920                         sfp_out = cleanup_sfp(sfp_out);
1921                         flat2asn_delete_feature_user_string();
1922                         return;
1923                 }
1924                 flat2asn_delete_feature_user_string();
1925                 flat2asn_install_feature_user_string(ifp->key, ptr);
1926                 if (p->dup == TRUE) {
1927                         if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
1928                                 ErrPostEx(SEV_WARNING, ERR_FEATURE_Duplicated, 
1929                                         "Duplicated feature dropped");
1930                         }
1931                         sfp_out = cleanup_sfp(sfp_out);
1932                         flat2asn_delete_feature_user_string();
1933                         return;
1934                 }
1935                 if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1936                         ComposeCodeBreakQuals(ajp, gbp->bsp, sfp_in, sfp_out, 
1937                                                         p->extra_loc, p->extra_loc_cnt, p->nsp);
1938                         genetic_code[0]='\0';
1939                         if (ASN2FF_TRANSL_TABLE == TRUE) {
1940                                 GetGeneticCode(genetic_code, sfp_in);
1941                                 if (genetic_code[0] != '\0') {
1942                                         sfp_out->qual = AddGBQual(sfp_out->qual, 
1943                                                         "transl_table", genetic_code);
1944                                 }
1945                         }
1946                 }
1947                 if (sfp_in->data.choice == SEQFEAT_GENE) {
1948                         if (ajp->show_gene == FALSE) {
1949                                 sfp_out = cleanup_sfp(sfp_out);
1950                                 flat2asn_delete_feature_user_string();
1951                                 return;
1952                         }
1953                 }
1954                 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, p->nsp);
1955                 LookForPartialImpFeat(sfp_out, FALSE);
1956                 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
1957                 status = ValidateNAImpFeat(sfp_out);
1958                 if (sfp_in->data.choice == SEQFEAT_CDREGION) {
1959                         PutTranslationLast(sfp_out);
1960                 } else if (sfp_in->data.choice == SEQFEAT_GENE) {
1961                         PutGeneFirst(sfp_out);
1962                 } else if (sfp_in->data.choice == SEQFEAT_RNA) {
1963                         if ((sptr = mRNAFeatEvidenceComment(sfp_in)) != NULL) {
1964                                 sfp_out->qual =
1965                                      AddGBQual(sfp_out->qual, "note", sptr);
1966                         }
1967                 }
1968                 if (status >= 0 || ASN2FF_VALIDATE_FEATURES == FALSE) {
1969                         PrintImpFeatEx(ajp, gbp->bsp, sfp_out, gbp->gi, p->entityID, p->itemID);
1970                 }
1971                 flat2asn_delete_feature_user_string();
1972         }
1973         sfp_out = cleanup_sfp(sfp_out);
1974         return;
1975 }       /* PrintNAFeatByNumber */
1976 
1977 /***************************************************************************
1978 *PrintAAFeatByNumber
1979 *
1980 *       This function prints out the genpept SeqFeats.
1981 *
1982 **************************************************************************/
1983 
1984 NLM_EXTERN void PrintAAFeatByNumber (Asn2ffJobPtr ajp, GBEntryPtr gbp)
1985 {
1986         CharPtr ptr=NULL;
1987         Char genetic_code[3];
1988         ImpFeatPtr ifp;
1989         Int2 status;
1990         Int4 feat_index, total_feats;
1991         NoteStructPtr nsp;
1992         SeqFeatPtr sfp_in, sfp_out=NULL;
1993         SortStructPtr p;
1994 
1995         if (gbp == NULL || gbp->feat == NULL) {
1996                 return;
1997         }
1998         feat_index = ajp->pap_index;
1999         total_feats=gbp->feat->sfpListsize;
2000         if (total_feats == 0) {
2001                 return;
2002         }
2003         sfp_out=ajp->sfp_out;
2004         if (sfp_out->qual) {
2005                 sfp_out->qual = GBQualFree(sfp_out->qual);
2006         }
2007         ifp = sfp_out->data.value.ptrvalue;
2008         if (ifp->loc) {
2009                 ifp->loc = MemFree(ifp->loc);
2010         }
2011         if (feat_index < total_feats) {
2012                 p = gbp->feat->List + feat_index;
2013                 if (p == NULL || p->dup == TRUE) {
2014                         return;
2015                 }
2016                 if ((sfp_in = p->sfp) == NULL) {
2017                         GatherItemWithLock(p->entityID, p->itemID, p->itemtype, 
2018                                                                         &sfp_in, find_item);
2019                 }
2020                 if (sfp_in == NULL) {
2021                         return;
2022                 }
2023                 nsp = p->nsp;
2024                 switch (sfp_in->data.choice) {
2025 /* Note: the functions that CheckAndGetFeatLoc use for
2026                 checking fails on protein locations sometimes. */
2027                         case SEQFEAT_CDREGION:
2028                                 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2029                                 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2030                                 if (status < 0)
2031                                         break;
2032                                 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2033                                 GetAAFeatLoc(gbp->bsp, &ptr, sfp_in, TRUE);
2034                                 ifp->loc = ptr;
2035                                 ptr = FlatLoc(gbp->bsp, sfp_in->location);
2036                                 sfp_out->qual =
2037                                      AddGBQual(sfp_out->qual, "coded_by", ptr);
2038                                 ptr = MemFree(ptr);
2039                                 genetic_code[0]='\0';
2040                                 if (ASN2FF_TRANSL_TABLE == TRUE) {
2041                                         GetGeneticCode(genetic_code, sfp_in);
2042                                         if (genetic_code[0] != '\0')
2043                                                 sfp_out->qual =
2044                                                         AddGBQual(sfp_out->qual, "transl_table", genetic_code);
2045                                 }
2046                                 status = ValidateAAImpFeat(sfp_out, TRUE);
2047                                 if (status >= 0)
2048                                         PrintImpFeat(ajp, gbp->bsp, sfp_out);
2049                                 break;
2050                         case SEQFEAT_PROT:
2051                                 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2052                                 AddProteinQuals(sfp_in, sfp_out, nsp);
2053                                 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2054                                 if (status < 0)
2055                                         break;
2056                                 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2057                                 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
2058                                 ifp->loc = ptr;
2059                                 status = ValidateAAImpFeat(sfp_out, FALSE);
2060                                 if (status >= 0)
2061                                         PrintImpFeat(ajp, gbp->bsp, sfp_out);
2062                                 break;
2063                         case SEQFEAT_SEQ:
2064                         case SEQFEAT_IMP:
2065                         case SEQFEAT_REGION:
2066                         case SEQFEAT_COMMENT:
2067                         case SEQFEAT_BOND:
2068                         case SEQFEAT_SITE:
2069                         case SEQFEAT_PSEC_STR:
2070                         case SEQFEAT_NON_STD_RESIDUE:
2071                         case SEQFEAT_HET:
2072                                 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2073                                 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2074                                 if (status < 0)
2075                                         break;
2076                                 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2077                                 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
2078                                 ifp->loc = ptr;
2079                                 status = ValidateAAImpFeat(sfp_out, FALSE);
2080                                 if (status >= 0)
2081                                         PrintImpFeat(ajp, gbp->bsp, sfp_out);
2082                                 break;
2083                         case SEQFEAT_GENE:
2084                                 if (ajp->show_gene == FALSE) {
2085                                         break;
2086                                 }
2087                                 GetNonGeneQuals(ajp->mode, sfp_in, sfp_out, nsp);
2088                                 status = ConvertToAAImpFeat(ajp, gbp, sfp_in, &sfp_out, p);
2089                                 if (status < 0)
2090                                         break;
2091                                 ComposeGBQuals(ajp, sfp_out, gbp, p, FALSE);
2092                                 GetAAFeatLoc(gbp->bsp, &ptr, sfp_out, FALSE);
2093                                 ifp->loc = ptr;
2094                                 status = ValidateAAImpFeat(sfp_out, FALSE);
2095                                 if (status >= 0)
2096                                         PrintImpFeat(ajp, gbp->bsp, sfp_out);
2097                                 break;
2098                         default:
2099                                 break;
2100                 }
2101         }
2102         sfp_out = cleanup_sfp(sfp_out);
2103 }       /* PrintAAFeatByNumber */
2104 
2105 
2106 /************************************************************************
2107 *GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 length)
2108 *
2109 *       Gets the CDS product, using SeqPortNewByLoc
2110 *       The bsp is that of the protein, and comes from the location.  The bsp
2111 *       is found in the calling program anyway, as it's used to get
2112 *       the EC_NUM.
2113 *       The protein sequence comes back in allocated memory.  The user
2114 *       is responsible for deallocating that.
2115 *
2116 *  A check is made (BioseqFind()) that the protein Bioseq is in memory.
2117 *  This guarantees that a fetch is NOT made if it is not memory, to accomodate
2118 *  the splitting of DNA and protein in Entrez. In this case, it's just
2119 *  translated.
2120 *
2121 *************************************************************************/
2122 
2123 NLM_EXTERN CharPtr GetProductFromCDS(ValNodePtr product, ValNodePtr location, Int4 bsp_length)
2124 
2125 {
2126         Boolean at_end=FALSE;
2127         CharPtr protein_seq=NULL, start_ptr=NULL;
2128         Int4 length;
2129         SeqPortPtr spp;
2130         Uint1 residue, code;
2131         BioseqPtr bsp;
2132         SeqIdPtr sip;
2133 
2134         if (ASN2FF_IUPACAA_ONLY == TRUE)
2135                 code = Seq_code_iupacaa;
2136         else
2137                 code = Seq_code_ncbieaa;
2138 
2139         if (product) {
2140                 sip = SeqLocId(product);
2141                 bsp = BioseqFindCore(sip);
2142                 if (bsp != NULL)    /* Bioseq is (or has been) in memory */ {
2143                         length = SeqLocLen(product);
2144                         if (length > 0) {
2145                                 if (SeqLocStart(location) == 0 ||
2146                                         SeqLocStop(location) == bsp_length-1)
2147                                         at_end = TRUE;
2148                                 start_ptr = protein_seq = 
2149                                         (CharPtr) MemNew((size_t) (length*sizeof(CharPtr)));
2150                                 spp = SeqPortNewByLoc(product, code);
2151                                 spp->do_virtual = TRUE;
2152                                 while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF) {
2153                                         if ( !IS_residue(residue) && residue != INVALID_RESIDUE )
2154                                                 continue;
2155                                         if (residue == INVALID_RESIDUE)
2156                                                 residue = (Uint1) 'X';
2157                                         *protein_seq = residue;
2158                                         protein_seq++;
2159                                 }
2160                                 SeqPortFree(spp);
2161                                 if (at_end) {
2162                                         if (StringLen(start_ptr) < GENPEPT_MIN)
2163                                                 start_ptr = MemFree(start_ptr);
2164                                 }
2165                         }
2166                 }
2167         }
2168         return start_ptr;
2169 }
2170 
2171 /**************************************************************************
2172 *CharPtr GettRNAaa (tRNAPtr trna, Boolean error_messages)
2173 *
2174 *       Return a pointer containing the amino acid type.
2175 **************************************************************************/
2176 
2177 static CharPtr GettRNAaa (tRNAPtr trna, Boolean error_msgs)
2178 
2179 {
2180         CharPtr ptr=NULL;
2181         SeqCodeTablePtr table;
2182         Uint1 seq_code;
2183 /*
2184   The choice values used in the tRNA structure do NOT corresond to
2185   the choice(==ENUMs) of Seq-code_type, and the latter are used
2186   by all the utility functions, so we map them...
2187 */
2188         if ( trna && trna -> aatype) {
2189                 switch (trna -> aatype) {
2190                         case 1:
2191                                 seq_code = 2;
2192                                 break;
2193                         case 2:
2194                                 seq_code = 8;
2195                                 break;
2196                         case 3:
2197                                 seq_code = 7;
2198                                 break;
2199                         case 4:
2200                                 seq_code = 11;
2201                                 break;
2202                 }
2203 
2204                 if ((table=SeqCodeTableFind (seq_code)) != NULL)
2205                   ptr = Get3LetterSymbol(seq_code, table, trna->aa, error_msgs);
2206         }
2207         
2208         return ptr;
2209 }       /* GettRNAaa */
2210 
2211 /*************************************************************************
2212 *ComposetRNANote (Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna, )
2213 *
2214 *       Add info from Trna-ext to Note stack in the GeneStructPtr.
2215 **************************************************************************/
2216 
2217 static void ComposetRNANote(Asn2ffJobPtr ajp, NoteStructPtr nsp, tRNAPtr trna)
2218 {
2219         /*
2220         Char buffer[25];
2221         CharPtr ptr = &(buffer[0]);
2222         Int2 index;
2223         Uint1 codon[4];
2224 
2225         if (! trna) 
2226                 return;
2227 
2228         if ((trna->codon)[0] != 255)
2229         {
2230                 codon[3] = '\0';
2231                 for (index=0; index<6; index++)
2232                 {
2233                         if ((trna->codon)[index] == 255)
2234                                 break;
2235                         if (CodonForIndex((trna->codon)[index], Seq_code_iupacna, codon))
2236                         {
2237                                 StringCpy(ptr, (CharPtr) codon);
2238                                 ptr += 3;
2239                         }
2240                         else
2241                         {
2242                                 *ptr = '?';     ptr++;
2243                         }
2244                         if (index<5 && (trna->codon)[index+1] != 255)
2245                         {
2246                                 *ptr = ',';     ptr++;
2247                                 *ptr = ' ';     ptr++;
2248                         }
2249                 }
2250                 if ((trna->codon)[1] == 255)
2251                 {
2252                         ptr = &buffer[0];
2253                         SaveNoteToCharPtrStack(nsp, "codon recognized:", ptr);
2254                 }
2255                 else
2256                 {
2257                         ptr = &buffer[0];
2258                         SaveNoteToCharPtrStack(nsp, "codons recognized:", ptr);
2259                 }
2260         }       
2261         return;
2262         */
2263 
2264         Char  buffer [25];
2265         Int2  num;
2266 
2267         num = ComposeCodonsRecognizedString (trna, buffer, sizeof (buffer));
2268         if (num < 1 || StringHasNoText (buffer)) return;
2269         if (num == 1) {
2270                         SaveNoteToCharPtrStack(nsp, "codon recognized:", buffer);
2271         } else {
2272                         SaveNoteToCharPtrStack(nsp, "codons recognized:", buffer);
2273         }
2274 
2275 }       /* ComposetRNANote */
2276 
2277 
2278 /************************************************************************
2279 *       Make the anticodon qualifier and (possible) note to the tRNA
2280 *       with the following paradigm:
2281 *       0.) First look at the new anticodon slot on tRNAPtr
2282 *       if not found do the rest:
2283 **      1.) Look at SeqFeat.ext for a UserObject using the fct. QualLocWrite,
2284 *       if result is not NULL, use this location in anticodon qualifier;
2285 *
2286 *       2.) Look for an anticodon qualifier, use if no QualLocWrite's 
2287 *       result was not zero;
2288 *       3.) Make note if neither 1.) or 2.) was true, or there are
2289 *       multiple codons.
2290 *****************************
2291 *       NEW ALGORITHM 07-15-96
2292 *****************************
2293 *       1) aa present?
2294 *             print /product = tRNA-aa
2295 *       2) codon recognized present?
2296 *             print /note="codon recognized: codon"
2297 *       3) anticodon and aa present?
2298 *             print /anticodon=...
2299 *************************************************************************/
2300 static void DotRNAQuals (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, NoteStructPtr nsp, SeqLocPtr PNTR extra_loc, 
2301 Int2 extra_loc_cnt)
2302 {
2303         Boolean found_anticodon=FALSE /*, found_qual=FALSE -- UNUSED */;
2304         Char buffer[40];
2305         CharPtr aa_ptr, newptr=NULL, ptr = &(buffer[0]), tmp=NULL;
2306         GBQualPtr curq;
2307         RnaRefPtr rrp;
2308         tRNAPtr trna=NULL;
2309         SeqLocPtr slp=NULL;
2310         SeqIntPtr sip;
2311 
2312         if (sfp_in == NULL) {
2313                 return;
2314         }
2315         if (sfp_in->data.choice != SEQFEAT_RNA) {
2316                 return;
2317         }
2318         rrp = sfp_in->data.value.ptrvalue;
2319 /* Look for anticodon struct */
2320         if (rrp->ext.choice == 2) {
2321                 newptr = MemNew(50*sizeof(Char));
2322                 trna = rrp->ext.value.ptrvalue;
2323                 if ((aa_ptr = GettRNAaa(trna, ajp->error_msgs)) != NULL) {
2324                         if (GBQualPresent("product", sfp_out->qual) == FALSE) {
2325                                 sprintf(newptr, "tRNA-%s", aa_ptr);
2326                                 sfp_out->qual = AddGBQual(sfp_out->qual, "product", newptr);
2327                         }
2328                 }
2329                 if (trna && (slp = trna->anticodon) != NULL && aa_ptr) {
2330                         if (extra_loc_cnt > 0) {
2331                                 slp = extra_loc[0];
2332                         }
2333                         if (slp && slp->choice == SEQLOC_INT) {
2334                                 sip = slp->data.ptrvalue;
2335                                 sprintf(ptr, "%ld..%ld", (long) sip->from+1, (long) sip->to+1);
2336                                 sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
2337                                 sfp_out->qual = AddGBQual(sfp_out->qual, "anticodon", newptr); 
2338                                 found_anticodon=TRUE;
2339                         }
2340                 }
2341         }
2342         if (! found_anticodon) {
2343                 if (sfp_in->ext) {      /* Look for UserObject */
2344                         tmp = QualLocWrite(sfp_in->ext, ptr);
2345                         if (tmp) {
2346                                 newptr = MemNew(50*sizeof(Char));
2347                                 rrp = sfp_in->data.value.ptrvalue;
2348                                 trna = rrp->ext.value.ptrvalue;
2349                                 aa_ptr = GettRNAaa(trna, ajp->error_msgs);
2350                                 if (aa_ptr) {
2351                                         sprintf(newptr, "(pos:%s,aa:%s)", ptr, aa_ptr);
2352                                         sfp_out->qual = 
2353                                                         AddGBQual(sfp_out->qual, "anticodon", newptr); 
2354                                         found_anticodon=TRUE;
2355                                 }
2356                         }
2357                 }
2358         }
2359         if (! found_anticodon) {
2360                 /* Look for anticodon qual if no UserObject found */
2361                 for (curq=sfp_in->qual; curq; curq=curq->next)
2362                         if (StringCmp("anticodon", curq->qual) == 0) {
2363                             sfp_out->qual = 
2364                                AddGBQual(sfp_out->qual, "anticodon", curq->val); 
2365                             /* found_qual=TRUE; -- NO EFFECT */
2366                             break;
2367                         }
2368         }
2369 
2370 /* make note "codon recognized*/
2371         ComposetRNANote(ajp, nsp, trna);
2372         MemFree(newptr);
2373         
2374 }       /* DotRNAQuals */
2375 
2376 /**************************************************************************
2377 *ConvertToAAImpFeat
2378 *
2379 *       This code copies a SeqFeat into an ImpFeat format for use in
2380 *       producing GenBank format.  Two SeqFeatPtr's should be passed
2381 *       in as arguments (sfp_in, sfp_out).  On the first call, of a
2382 *       number of calls, sfp_out should be NULL so that memory for 
2383 *       ImpFeat can be allocated.  On subsequent calls, sfp_out->data.choice
2384 *       should be "8" (for ImpFeats).
2385 *
2386 *       Written by Tom Madden 
2387 *
2388 **************************************************************************/
2389 
2390 NLM_EXTERN Int2 ConvertToAAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr p)
2391 {
2392         BioseqPtr bsp=NULL;
2393         Char printbuf[41], temp[65];
2394         CharPtr ptr;
2395         ImpFeatPtr ifp, ifp_in;
2396         Int2 retval=1;
2397         NoteStructPtr nsp;
2398         GeneStructPtr gsp;
2399         ProtRefPtr prot;
2400         SeqFeatPtr sfp_out;
2401         SeqIdPtr sip=NULL, xid;
2402         ValNodePtr vnp, vnp1;
2403 
2404         sfp_out = *sfpp_out;
2405 
2406         if (sfp_out->data.choice != SEQFEAT_IMP)
2407                 return -1;
2408 
2409         ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
2410 
2411         sfp_out->partial = sfp_in->partial;
2412         sfp_out->comment = sfp_in->comment;
2413         sfp_out->exp_ev = sfp_in->exp_ev;
2414         sfp_out->location = sfp_in->location;
2415 
2416         nsp = p->nsp;
2417         gsp = p->gsp;
2418         if (sfp_out->comment) {
2419                 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
2420         }
2421 
2422         switch (sfp_in->data.choice) {
2423         case SEQFEAT_CDREGION:
2424                 ifp->key = StringSave("CDS");
2425                 break;
2426         case SEQFEAT_PROT:
2427                 prot = sfp_in->data.value.ptrvalue;
2428                 if (prot->processed == 0 || prot->processed == 1) {
2429                         GetProtRefInfo(ajp->format, gsp, nsp, prot);
2430                         ifp->key = StringSave("Protein");
2431                 } else if (prot->processed == 2) {
2432                         ifp->key = StringSave("mat_peptide");
2433                 } else if (prot->processed == 3) {
2434                         ifp->key = StringSave("sig_peptide");
2435                 } else if (prot->processed == 4) {
2436                         ifp->key = StringSave("transit_peptide");
2437                 }
2438                 if (sfp_in->location) {
2439                         sip = SeqLocId(sfp_in->location);
2440                         if (sip)
2441                                 bsp = BioseqFind(sip);
2442                         if (bsp) {
2443                                 vnp = bsp->descr;
2444                                 for (vnp = bsp->descr; vnp; vnp = vnp->next) {
2445                                         if (vnp->choice != Seq_descr_modif) {
2446                                                 continue;
2447                                         }
2448                                         for (vnp1 = vnp->data.ptrvalue; vnp1; vnp1=vnp1->next) {
2449                                                 if (vnp1->data.intvalue == 1) {
2450                                                         sfp_out->partial = TRUE;
2451                                                         break;
2452                                             }
2453                                         }
2454                                 }
2455                         }
2456                 }
2457                 break;
2458         case SEQFEAT_SEQ:       
2459                 ifp->key = StringSave("misc_feature");
2460                 if ((xid=CheckXrefFeat(gbp->bsp, sfp_in)) != NULL)
2461                 {
2462                         SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
2463                         ptr = &(temp[0]);
2464                         sprintf(ptr, "Cross-reference: %s", printbuf);
2465                         SaveNoteToCharPtrStack(nsp, NULL, ptr);
2466                 }
2467                 else
2468                         retval = 0;
2469                 break;
2470         case SEQFEAT_IMP:
2471                 ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
2472                 ifp->key = StringSave(ifp_in->key);
2473                 break;
2474         case SEQFEAT_REGION:
2475                 sfp_out->qual = 
2476                 AddGBQual(sfp_out->qual, "region_name", sfp_in->data.value.ptrvalue);
2477                 ifp->key = StringSave("Region");
2478                 break;
2479         case SEQFEAT_COMMENT:
2480                 ifp->key = StringSave("misc_feature");
2481                 break;
2482         case SEQFEAT_BOND:
2483                 ptr = AsnEnumStr("SeqFeatData.bond", 
2484                                                         (Int2) (sfp_in->data.value.intvalue));
2485                 sfp_out->qual = AddGBQual(sfp_in->qual, "bond_type", ptr);
2486                 ifp->key = StringSave("Bond");
2487                 break;
2488         case SEQFEAT_SITE:
2489                 ptr = AsnEnumStr("SeqFeatData.site", 
2490                                                         (Int2) (sfp_in->data.value.intvalue));
2491                 sfp_out->qual = AddGBQual(sfp_out->qual, "site_type", ptr);
2492                 ifp->key = StringSave("Site");
2493                 break;
2494         case SEQFEAT_PSEC_STR:
2495                 ptr = AsnEnumStr("SeqFeatData.psec-str", 
2496                                                                 (Int2) (sfp_in->data.value.intvalue));
2497                 sfp_out->qual = AddGBQual(sfp_out->qual, "sec_str_type", ptr);
2498                 ifp->key = StringSave("SecStr");
2499                 break;
2500         case SEQFEAT_NON_STD_RESIDUE:
2501                 sfp_out->qual = 
2502                         AddGBQual(sfp_out->qual, "non-std-residue", 
2503                                                                                         sfp_in->data.value.ptrvalue);
2504                 ifp->key = StringSave("NonStdResidue");
2505                 break;
2506         case SEQFEAT_HET:
2507                 sfp_out->qual = 
2508                      AddGBQual(sfp_out->qual, "heterogen", sfp_in->data.value.ptrvalue);
2509                 ifp->key = StringSave("Het");
2510                 break;
2511         default:
2512                 if (ajp->error_msgs == TRUE)
2513                         ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey, 
2514                                 "Unimplemented type of feat in ConvertToAAImpFeat");
2515                 retval = 1;
2516                 break;
2517         }
2518 
2519         return retval;
2520 
2521 }       /* ConvertToAAImpFeat */
2522 
2523 /*****************************************************************************
2524 *  CompareTranslation:
2525 *  -- if bsp != translation's value return FALSE
2526 *****************************************************************************/
2527 static Boolean CompareTranslation(ByteStorePtr bsp, CharPtr qval)
2528 {
2529         CharPtr                         ptr;
2530         Int2                     residue, residue1, residue2;
2531         Int4                     len, blen;
2532         Boolean          done;
2533 
2534         if (qval == NULL || bsp == NULL) {
2535                 return FALSE;  /* no comparison */
2536         }
2537         len = StringLen(qval);
2538         BSSeek(bsp, 0, SEEK_SET);
2539 
2540         blen = BSLen(bsp);
2541         done = FALSE;
2542         while ((! done) && (len)) {
2543                   residue1 = qval[(len-1)];
2544                   if (residue1 == 'X')  /* remove terminal X */
2545                                 len--;
2546                   else
2547                                 done = TRUE;
2548          }
2549          done = FALSE;
2550          while ((! done) && (blen)) {
2551                   BSSeek(bsp, (blen-1), SEEK_SET);
2552                   residue2 = BSGetByte(bsp);
2553                   if (residue2 == 'X')
2554                                 blen--;
2555                   else
2556                                 done = TRUE;
2557          }
2558                 BSSeek(bsp, 0, SEEK_SET);
2559                 if (blen != len) {
2560                         return FALSE;
2561                 } else {
2562                         for (ptr = qval; *ptr != '\0' && 
2563                                                                 (residue = BSGetByte(bsp)) != EOF; ptr++) {
2564 
2565                                  if (residue != *ptr) {
2566                                         return FALSE;
2567                                  }
2568 
2569                          } /* for */
2570 
2571                  } /* compare two sequences */
2572                         return TRUE;
2573 } /* check */
2574 
2575 static void  GatherProductGeneInfo (Asn2ffJobPtr ajp, SeqFeatPtr sfp_in, GBEntryPtr gbp, SortStructPtr gp, Uint1 method)
2576 {
2577         BioseqPtr p_bsp;
2578         GatherScope gs;
2579         GeneStructPtr gsp;
2580         NoteStructPtr nsp;
2581         Int2 index;
2582         Int4 length, longest_length=0;
2583         ProtRefPtr prot=NULL;
2584         SeqFeatPtr sfp=NULL;
2585         SeqIdPtr sip;
2586         ValNodePtr product=NULL;
2587         OrganizeProtPtr opp;
2588         SortStructPtr p;
2589         Uint2 entityID;
2590         
2591         if (sfp_in->product)
2592                 product = sfp_in->product;
2593         else 
2594                 return;
2595         if (gp == NULL)
2596                 return;
2597         gsp = gp->gsp;
2598         nsp = gp->nsp;
2599         sip = SeqLocId(product);
2600         p_bsp = BioseqFindCore(sip);
2601         if (p_bsp == NULL)    /* Bioseq is (or has been) in memory */
2602                 return;
2603         if (ajp->useSeqMgrIndexes) {
2604                 sfp = SeqMgrGetBestProteinFeature (p_bsp, NULL);
2605                 if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT) {
2606                         prot = (ProtRefPtr) sfp->data.value.ptrvalue;
2607                         if (prot != NULL) {
2608                                 GetProtRefInfo(ajp->format, gsp, nsp, prot);
2609                                 GetProtRefComment(sfp_in, p_bsp, ajp, NULL, nsp, method);
2610                                 return;
2611                         }
2612                 }
2613         }
2614         entityID = ObjMgrGetEntityIDForPointer(p_bsp);
2615         opp = (OrganizeProtPtr) MemNew(sizeof(OrganizeProt));
2616         opp->size = 0;
2617           MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2618         MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
2619         gs.ignore[OBJ_SEQANNOT] = FALSE;
2620         gs.ignore[OBJ_SEQFEAT] = FALSE;
2621         gs.get_feats_location = TRUE;
2622         gs.target = product;
2623         gs.seglevels = 1;
2624         GatherEntity(entityID, opp, get_prot_feats, &gs);
2625         if (opp->size > 0)      {
2626                 prot = NULL;
2627                 p = opp->list;
2628                 for (index=0; index < opp->size; index++, p++) {
2629                         if ((sfp = p->sfp) == NULL) {
2630                                 GatherItemWithLock(p->entityID, p->itemID, p->itemtype, 
2631                                                                 &sfp, find_item);
2632                         }
2633                         if (sfp == NULL) {
2634                                 continue;
2635                         }
2636                         if (sfp->data.choice != SEQFEAT_PROT) {
2637                                 continue;
2638                         }
2639                         if ((length=SeqLocLen(sfp->location)) == -1)
2640                                 continue;
2641                         if (length > longest_length) {
2642                                 prot = sfp->data.value.ptrvalue;
2643                                 longest_length = length;
2644                         }
2645                 }
2646                 GetProtRefInfo(ajp->format, gsp, nsp, prot);
2647         }
2648         GetProtRefComment(sfp_in, p_bsp, ajp, opp, nsp, method);
2649         p = opp->list;
2650         for (index=0; index < opp->size; index++, p++) {
2651                 if (p && p->gsp)
2652                         GeneStructFree(p->gsp);
2653                 if (p && p->nsp)
2654                         NoteStructFree(p->nsp);
2655         }
2656         MemFree(opp->list);
2657         MemFree(opp);
2658         
2659         return;
2660 }
2661 
2662 /**************************************************************************
2663 *ConvertToNAImpFeat
2664 *
2665 *       This code copies a SeqFeat into an ImpFeat format for use in
2666 *       producing GenBank format.  Two SeqFeatPtr's should be passed
2667 *       in as arguments (sfp_in, sfp_out).  
2668 *       return status:
2669 *               1: conversion successful
2670 *               0: no conversion, also no error (data in ASN.1 is lost or put out
2671 *                       otherwise
2672 *               -1 an error
2673 **************************************************************************/
2674 
2675 static Boolean ProductIsLocal (Uint2 entityID, SeqLocPtr product)
2676 
2677 {
2678   BioseqPtr    bsp;
2679   SeqEntryPtr  sep, oldscope;
2680   SeqIdPtr     sip = NULL;
2681   SeqLocPtr    slp;
2682 
2683   slp = SeqLocFindNext (product, NULL);
2684   while (slp != NULL && sip == NULL) {
2685     sip = SeqLocId (slp);
2686     slp = SeqLocFindNext (product, slp);
2687   }
2688   if (sip == NULL) return FALSE;
2689   sep = GetTopSeqEntryForEntityID (entityID);
2690   if (sep == NULL) return FALSE;
2691   oldscope = SeqEntrySetScope (sep);
2692   bsp = BioseqFind (sip);
2693   SeqEntrySetScope (oldscope);
2694   if (bsp != NULL) return TRUE;
2695   return FALSE;
2696 }
2697 
2698 NLM_EXTERN Int2 ConvertToNAImpFeat (Asn2ffJobPtr ajp, GBEntryPtr gbp, SeqFeatPtr sfp_in, SeqFeatPtr PNTR sfpp_out, SortStructPtr gp)
2699 {
2700         BioseqPtr bsp=gbp->bsp, pbsp=NULL;
2701         Boolean found_key, non_pseudo = FALSE;
2702         CdRegionPtr cdr;
2703         Char buffer[2], printbuf[41], temp[65];
2704         CharPtr buf_ptr = &(buffer[0]), protein_seq=NULL, ptr = &(temp[0]);
2705         NoteStructPtr nsp;
2706         ImpFeatPtr ifp, ifp_in;
2707         Int2 retval=1;
2708         Int4 length=0;
2709         SeqFeatPtr sfp_out;
2710         SeqIdPtr xid;
2711         ValNodePtr product;
2712         ValNodePtr mod, syn;
2713         BioSourcePtr biosp;
2714         OrgRefPtr orp;
2715         RnaRefPtr rrp;
2716         ByteStorePtr byte_sp;
2717         /* Int4 len_cds, len_prot; -- UNUSED */
2718         Uint1 method = 0;
2719         GeneRefPtr grp;
2720         Boolean was_gene = FALSE;
2721         CharPtr key=NULL, tmp;
2722         GeneStructPtr gsp;
2723         CharPtr except_msg="No explanation supplied", loc;
2724         
2725         sfp_out = *sfpp_out;
2726 
2727         if (sfp_out->data.choice != SEQFEAT_IMP)
2728                 return -1;
2729 
2730         ifp = (ImpFeatPtr) sfp_out->data.value.ptrvalue;
2731 
2732         sfp_out->partial = sfp_in->partial;
2733         sfp_out->comment = sfp_in->comment;
2734         sfp_out->exp_ev = sfp_in->exp_ev;
2735         sfp_out->location = sfp_in->location;
2736         sfp_out->product = sfp_in->product;
2737         sfp_out->pseudo = sfp_in->pseudo;
2738         
2739         found_key = GetNAFeatKey(ajp->show_gene, &(key), sfp_in, sfp_out);
2740         if (!found_key)
2741                 return -1;
2742         ifp->key = key;
2743         nsp = gp->nsp;
2744         gsp=gp->gsp;
2745         switch (sfp_in->data.choice)
2746         {
2747         case SEQFEAT_BIOSRC:
2748                 biosp = sfp_in->data.value.ptrvalue;
2749                 orp = (OrgRefPtr) biosp->org;   
2750                 if (orp) {
2751                         if (orp->taxname) {
2752                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2753                                                                         "organism", orp->taxname);
2754                         } else if (orp->common) {
2755                                 if (StrStr(orp->common, "virus") ||
2756                                     StrStr(orp->common, "Virus") ||
2757                                     StrStr(orp->common, "phage") ||
2758                                     StrStr(orp->common, "Phage") ||
2759                                     StrStr(orp->common, "viroid") ||
2760                                     StrStr(orp->common, "Viroid"))
2761                                 {
2762                                         sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2763                                                                                                                          orp->common);
2764                                 }
2765                         }
2766 /* added from OrgRef.mod 03.20.96 */
2767                         for (mod = orp->mod; mod; mod = mod->next) {
2768                                 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) mod->data.ptrvalue);
2769                         }
2770                 } else {
2771                         sfp_out->qual = AddGBQual(sfp_out->qual, "organism",
2772                                                                                                                          "unknown");
2773                 }
2774                 sfp_out->qual = AddBioSourceToGBQual(ajp, nsp, biosp, sfp_out->qual,
2775                         TRUE);
2776                 break;  
2777         case SEQFEAT_CDREGION:
2778                 product = sfp_in->product;
2779                 if (ajp->mode == RELEASE_MODE) {
2780                         if (GBQualPresent("pseudo", sfp_in->qual) == FALSE &&
2781                                                 gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2782                                 non_pseudo = TRUE;
2783                         }
2784                   if (non_pseudo) {
2785                         if (product == NULL) {
2786                                 if (ajp->error_msgs == TRUE) {
2787                                         loc = SeqLocPrint(sfp_in->location);
2788                                         ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped, 
2789                                                 "Dropping CDS due to missing product: %s", loc);
2790                                         MemFree(loc);
2791                                 }
2792                                 return -1;
2793                         }
2794                         if (ajp->forgbrel && CheckSeqIdChoice(SeqLocId(product)) == FALSE) {
2795                                 if (ajp->error_msgs == TRUE) {
2796                                         loc = SeqLocPrint(sfp_in->location);
2797                                         ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped, 
2798                                                 "Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2799                                         MemFree(loc);
2800                                 }
2801                                 return -1;
2802                         }
2803                         if (ajp->forgbrel && (pbsp = BioseqFindCore(SeqLocId(product))) == NULL) {
2804                                 if (ajp->error_msgs == TRUE) {
2805                                         loc = SeqLocPrint(sfp_in->location);
2806                                         ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped, 
2807                                                 "Dropping CDS due to missing protein: %s", loc);
2808                                         MemFree(loc);
2809                                 }
2810                                 return -1;
2811                         }
2812                         if (pbsp != NULL) {
2813                                 if (ajp->forgbrel && CheckSeqIdChoice(pbsp->id) == FALSE) {
2814                                                 if (ajp->error_msgs == TRUE) {
2815                                                         loc = SeqLocPrint(sfp_in->location);
2816                                                         ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped, 
2817                                                 "Dropping CDS due to missing EMBL/DDBJ/GB protein accession: %s", loc);
2818                                                         MemFree(loc);
2819                                                 }
2820                                                 return -1;
2821                                 }
2822                                 if (ajp->show_version == TRUE) {
2823                                         if (CheckSeqIdAccVer(pbsp->id) == FALSE) {
2824                                                 if (ajp->error_msgs == TRUE) {
2825                                                         loc = SeqLocPrint(sfp_in->location);
2826                                                         ErrPostEx(SEV_ERROR, ERR_FEATURE_Dropped, 
2827                                                                 "Dropping CDS due to missing protein accession.version: %s", loc);
2828                                                         MemFree(loc);
2829                                                 }
2830                                                 return -1;
2831                                         }
2832                                 }
2833                         }
2834                   }
2835                 }
2836                 cdr = (CdRegionPtr) sfp_in->data.value.ptrvalue;
2837                 if ((GBQualPresent("codon_start", sfp_in->qual)) == FALSE)
2838                 { /* Above checks if codon_start is already present. */
2839                         if (cdr->frame)
2840                                 sprintf(buf_ptr, "%ld", (long) (cdr->frame)); 
2841                         else 
2842                                 sprintf(buf_ptr, "1"); 
2843                         sfp_out->qual = AddGBQual(sfp_out->qual, "codon_start", buf_ptr);
2844                 }
2845                 if (product && (! ajp->genome_view) && (ProductIsLocal (ajp->entityID, product))) {
2846                         byte_sp = ProteinFromCdRegion(sfp_in, FALSE);
2847 
2848                         if (product) {
2849                                 length = bsp->length;
2850                                 protein_seq = GetProductFromCDS(product, sfp_in->location, length);
2851 /* check conflict flag and fix it */
2852                                 if (cdr->conflict == TRUE) {
2853                                         if (CompareTranslation(byte_sp, protein_seq)) {
2854                                                 cdr->conflict = FALSE;
2855                                         } else {
2856                                                 method = METHOD_concept_transl_a;
2857                                         }
2858                                 }
2859                                 if (protein_seq) {
2860                                         if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2861                                                  gsp->pseudo == FALSE && sfp_in->pseudo == FALSE) {
2862                                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2863                                                                                 "translation", protein_seq);
2864                                         }
2865                                         MemFree(protein_seq);
2866                                 }
2867                         }
2868                         BSFree(byte_sp);
2869                 }
2870                 if (sfp_in->pseudo) {
2871                         sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2872                 }
2873                 if (sfp_in->excpt) {
2874                         if (StringCmp("ribosomal slippage", sfp_in->except_text) == 0 ||
2875                                 StringCmp("ribosome slippage", sfp_in->except_text) == 0) {
2876                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2877                                                                         "note", sfp_in->except_text);
2878                                 sfp_out->excpt = FALSE;
2879                         } else if (StringCmp("trans splicing", sfp_in->except_text) == 0 ||
2880                                                 StringCmp("trans-splicing", sfp_in->except_text) == 0) {
2881                                 sfp_out->excpt = FALSE;
2882                         } else if (sfp_in->except_text) {
2883                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2884                                                                         "exception", sfp_in->except_text);
2885                         } else if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2886                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2887                                                                         "exception", sfp_in->qual->val);
2888                         } else if (sfp_out->comment != NULL) {
2889                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2890                                                                         "exception", sfp_in->comment);
2891                                 sfp_out->comment = NULL;                        
2892                         } else {
2893                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2894                                                                         "exception", except_msg);
2895                         }
2896                 } else {
2897                         if (GBQualPresent("exception", sfp_in->qual) == TRUE) {
2898                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2899                                                                         "exception", sfp_in->qual->val);
2900                         }
2901                 }
2902                 
2903                 GatherProductGeneInfo(ajp, sfp_in, gbp, gp, method);
2904 
2905 /******************************************************************************
2906 - asn2ff shouldn't generate a de-novo /translation for any
2907   cdregion that lacks a product, regardless of mode or -V setting  2/15/99
2908 ******************************************************************************
2909                 if (protein_seq == NULL && ajp->mode != RELEASE_MODE) {
2910                         protein_seq = BSMerge(byte_sp, NULL);
2911                         if ( protein_seq && protein_seq[0] != '-') {
2912                                 len_prot = StringLen(protein_seq);
2913                                 SeqLocLen(sfp_in->location) - (cdr->frame - 1);
2914                                 if (len_prot >= 6) {
2915                                         if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2916                                                  gsp->pseudo == FALSE) {
2917                                                 sfp_out->qual = 
2918                                                         AddGBQual(sfp_out->qual, 
2919                                                                 "translation", protein_seq);
2920                                         }
2921                                 }
2922                         }
2923                         MemFree(protein_seq);
2924                 }
2925                 BSFree(byte_sp);
2926 */
2927                 break;
2928         case SEQFEAT_RNA:
2929                 rrp = sfp_in->data.value.ptrvalue;
2930                 /* the following code was taken (almost) directly
2931                         from Karl Sirotkin's code.                                      */
2932                 switch ( rrp -> type){ /* order of case n: matches tests in
2933                                 is_RNA_type() of genasn.c in
2934                                 GenBankConversion directory */
2935                         case 2:
2936                                 break;
2937                         case 255:
2938                                 break;
2939                         case 3:
2940                                 if (rrp->ext.choice == 1) {
2941                                         if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2942                                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2943                                                                 "product", (CharPtr) rrp->ext.value.ptrvalue);
2944                                         }
2945                                 } else if (rrp->ext.choice == 0 ||
2946                                         rrp->ext.choice == 2) {
2947                                         DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2948                                                         gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2949                                 }
2950                                 break;
2951                         case 4:
2952                                 break;
2953                         case 1:
2954                                 if (rrp->ext.choice == 1) {
2955                                         if ((GBQualPresent("product", sfp_in->qual)) == FALSE) {
2956                                                 sfp_out->qual = AddGBQual(sfp_out->qual, 
2957                                                                 "product", (CharPtr) rrp->ext.value.ptrvalue);
2958                                         }
2959                                 } else if (rrp->ext.choice == 0 ||
2960                                         rrp->ext.choice == 2) {
2961                                         DotRNAQuals(ajp, gbp, sfp_in, sfp_out,
2962                                                         gp->nsp, gp->extra_loc, gp->extra_loc_cnt);
2963                                 }
2964                                 break;
2965                         case 5:
2966                                 break;
2967                         case 6:
2968                                 break;
2969                 }
2970 
2971                 if (rrp && rrp->pseudo == TRUE) {
2972                         if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
2973                                 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
2974                 }
2975 
2976                         
2977                 break;
2978         case SEQFEAT_SEQ:       
2979                 if ((xid=CheckXrefFeat(bsp, sfp_in)) != NULL) {
2980                         ptr = &(temp[0]);
2981                         SeqIdWrite(xid, printbuf, PRINTID_FASTA_SHORT, 40);
2982                         sprintf(ptr, "Cross-reference: %s", printbuf);
2983                         SaveNoteToCharPtrStack(nsp, NULL, ptr);
2984                 }
2985                 else
2986                         retval = 0;
2987                 break;
2988         case SEQFEAT_IMP:
2989                 ifp_in = (ImpFeatPtr) sfp_in->data.value.ptrvalue;
2990                 if (ifp_in->loc != NULL)
2991                         ifp->loc = ifp_in->loc;
2992                 if (StringCmp(ifp_in->key, "CDS") == 0) {
2993                         if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE &&
2994                                 ajp->error_msgs == TRUE)
2995                                 ErrPostStr(SEV_INFO, ERR_FEATURE_non_pseudo, 
2996                                     "ConvertToNAImpFeat: Non-pseudo ImpFeat CDS found");
2997                         if ((GBQualPresent("translation", sfp_in->qual)) == TRUE &&
2998                                                                                                 ajp->mode == RELEASE_MODE) {
2999                                 if (ajp->error_msgs == TRUE) {
3000                                         ErrPostStr(SEV_ERROR, ERR_FEATURE_Dropped, 
3001                                     "ImpFeat CDS with /translation found");
3002                                 }
3003                                 retval = -1;
3004                         }
3005                 }
3006                 break;
3007         case SEQFEAT_REGION:
3008                 tmp = MemNew(StringLen(sfp_in->data.value.ptrvalue) + 9);
3009                 sprintf(tmp, "Region: %s", (CharPtr ) sfp_in->data.value.ptrvalue);
3010                 sfp_out->qual = AddGBQual(sfp_out->qual, "note", tmp);
3011                 tmp = MemFree(tmp);
3012                 break;
3013         case SEQFEAT_SITE:
3014                 AddSiteNoteQual(sfp_in, sfp_out);
3015                 break;
3016         case SEQFEAT_RSITE:
3017                 break;
3018         case SEQFEAT_COMMENT:
3019                 if(ifp->key != NULL)
3020                         MemFree(ifp->key);
3021                 ifp->key = StringSave("misc_feature");
3022                 break;
3023         case SEQFEAT_GENE:
3024                 grp = (GeneRefPtr) sfp_in->data.value.ptrvalue;
3025                 if (grp == NULL)
3026                         break;
3027                 syn=grp->syn;
3028                 if (grp->locus ) {
3029                         sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->locus);
3030                         was_gene = TRUE;
3031                 } else if (syn != NULL) {
3032                         sfp_out->qual = AddGBQual(sfp_out->qual, "gene", 
3033                                                                                                 syn->data.ptrvalue);
3034                         syn=syn->next;
3035                         was_gene = TRUE;
3036                 }
3037                 if (grp->desc ) {
3038                         if (was_gene) {
3039                                 CpNoteToCharPtrStack(nsp, NULL, grp->desc);
3040                         } else {
3041                         /*      s = MemNew(StringLen(grp->desc) + 15);
3042                                 sprintf(s, "Description: %s", grp->desc);
3043                                 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", s);*/
3044                                 sfp_out->qual = AddGBQual(sfp_out->qual, "gene", grp->desc);
3045                         }
3046                 }
3047                 if (grp->allele ) {
3048                         if ((GBQualPresent("allele", sfp_in->qual)) == FALSE)
3049                                 sfp_out->qual = AddGBQual(sfp_out->qual, "allele", grp->allele);
3050                 }
3051                 if (grp->maploc ) {
3052                         if ((GBQualPresent("map", sfp_in->qual)) == FALSE)
3053                                 sfp_out->qual = AddGBQual(sfp_out->qual, "map", grp->maploc);
3054                 }
3055                 for (; syn; syn=syn->next) {
3056                         CpNoteToCharPtrStack(nsp, NULL, syn->data.ptrvalue);
3057                 }
3058                 if (grp->pseudo == TRUE || sfp_in->pseudo) {
3059                         if ((GBQualPresent("pseudo", sfp_in->qual)) == FALSE)
3060                                 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
3061                 }
3062                 GetDBXrefFromGene(grp, sfp_out);
3063                 break;
3064         default:
3065                 if (ajp->error_msgs == TRUE)
3066                         ErrPostStr(SEV_WARNING, ERR_FEATURE_UnknownFeatureKey, 
3067                                 "Unimplemented type of gbqual in ConvertToNAImpFeat");
3068                 retval = 0;
3069                 break;
3070         }
3071         if (gsp->grp) {
3072                 GetDBXrefFromGene(gsp->grp, sfp_out);
3073         }
3074         if (sfp_in->pseudo) {
3075                 sfp_out->qual = AddGBQual(sfp_out->qual, "pseudo", NULL);
3076         }
3077         if (sfp_out->comment) {
3078                 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_out->comment);
3079                 sfp_out->comment = NULL;
3080         }
3081         return retval;
3082 
3083 }       /* ConvertToNAImpFeat */
3084 
3085 /*****************************************************************************
3086 *ValidateNAImpFeat
3087 *
3088 *       This code validates an ImpFeat using some functions from
3089 *       the flat2asn parser.
3090 *
3091 *       If a feat is bad and can't be corrected, -1 is returned.
3092 *
3093 *****************************************************************************/
3094 
3095 NLM_EXTERN Int2 ValidateNAImpFeat (SeqFeatPtr sfp)
3096 
3097 {
3098         CharPtr key;
3099         ImpFeatPtr ifp;
3100         Int2 index, retval=0, status=0;
3101 
3102         if (sfp->data.choice != SEQFEAT_IMP) {
3103                 return -1;
3104         } else {
3105 
3106                 ifp = sfp->data.value.ptrvalue;
3107                 key = StringSave(ifp->key);
3108                 index = GBFeatKeyNameValid(&key, ASN2FF_SHOW_ERROR_MSG);
3109                 if (StringCmp(key, ifp->key) != 0) {
3110                         ifp->key = key;
3111                 } else {
3112                         MemFree(key);
3113                 }
3114         
3115                 if (index == -1) {
3116                         retval = -2;
3117                 } else {
3118                         status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual, 
3119                                                 ASN2FF_SHOW_ERROR_MSG, ASN2FF_VALIDATE_FEATURES);
3120 #ifdef ASN2GNBK_PRINT_UNKNOWN_ORG
3121                         if (index == 46 && status == GB_FEAT_ERR_NONE) {
3122                                 status = GBFeatKeyQualValid(sfp->cit, index, &sfp->qual, 
3123                                                         ASN2FF_SHOW_ERROR_MSG, TRUE);
3124                         }
3125 #endif
3126                         if (status == GB_FEAT_ERR_NONE) {
3127                                 retval = 1;
3128                         } else if (status == GB_FEAT_ERR_REPAIRABLE) {
3129                                 retval = 0;
3130                         } else if (status == GB_FEAT_ERR_DROP) {
3131                                 retval = -1;
3132                         }
3133                 }
3134 
3135         }
3136 
3137         return retval;
3138 }       /* ValidateNAImpFeat */
3139 
3140 /*****************************************************************************
3141 *ValidateAAImpFeat
3142 *
3143 *       This code will validate an ImpFeat using some functions from
3144 *       the flat2asn parser.  Right now it just checks to see that the
3145 *       sfp is an ImpFeat and checks for a partial qualifier.
3146 *
3147 *       If a feat is bad and can't be corrected, -1 is returned.
3148 *
3149 *****************************************************************************/
3150 
3151 NLM_EXTERN Int2 ValidateAAImpFeat (SeqFeatPtr sfp, Boolean use_product)
3152 
3153 {
3154 
3155         if (sfp->data.choice != SEQFEAT_IMP)
3156                 return -1;
3157 
3158         LookForPartialImpFeat(sfp, use_product);
3159 
3160         return 0;
3161 }       /* ValidateAAImpFeat */
3162 
3163 
3164 /*****************************************************************************
3165 *void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modifs)
3166 *
3167 *Normally called from PrintSourceFeat, collects all notes etc. together.
3168 *Note: sfp_out may already have quals when it comes here, they should not
3169 *be deleted!
3170 *       add_modifs: allows the addition of modifs to be specified, don't add
3171 *               modifs if the source feature is a ImpFeat.
3172 *
3173 *For many cases there is no sfp_in, so that must be checked for.
3174 *****************************************************************************/
3175 
3176 NLM_EXTERN void PrepareSourceFeatQuals(SeqFeatPtr sfp_in, SeqFeatPtr sfp_out, GBEntryPtr gbp, Boolean add_modif)
3177 
3178 {
3179         CharPtr note=NULL;
3180         GBQualPtr qual1;
3181         NoteStructPtr nsp=NULL;
3182 
3183         if (gbp->feat) {
3184                 nsp=gbp->feat->source_notes;
3185         }
3186         if (sfp_in) {
3187                 for (qual1=sfp_in->qual; qual1; qual1=qual1->next) {
3188                         if (StringCmp(qual1->qual, "note") == 0)
3189                                 CpNoteToCharPtrStack(nsp, NULL, qual1->val);
3190                         else    
3191                                 sfp_out->qual = 
3192                                         AddGBQual(sfp_out->qual, qual1->qual, qual1->val);
3193                 }
3194         }
3195 /* not used in new style  */
3196         if (add_modif == TRUE)
3197                 sfp_out->qual = AddModifsToGBQual(gbp, sfp_out->qual);
3198 /*---------------------    tatiana */
3199         if (sfp_in && sfp_in->comment) {
3200                 CpNoteToCharPtrStack(nsp, NULL, (CharPtr) sfp_in->comment);
3201         }
3202 
3203         if (nsp && nsp->note[0]) {
3204                 note = ComposeNoteFromNoteStruct(nsp, NULL);
3205                 if (note) {
3206                         sfp_out->qual = AddGBQual(sfp_out->qual, "note", note);
3207                         note = MemFree(note);
3208                 }
3209         }
3210         if (sfp_in && sfp_in->cit) {
3211                 if (ASN2FF_SHOW_ERROR_MSG)
3212                         ErrPostStr(SEV_WARNING, 0, 0, 
3213                                 "Unwanted /citation on 'source' feature will be dropped");
3214         }
3215 
3216         return;
3217 }
3218 
3219 
3220 /*************************************************************************
3221 *AddProteinQuals
3222 *
3223 *************************************************************************/
3224 
3225 NLM_EXTERN void AddProteinQuals (SeqFeatPtr sfp, SeqFeatPtr sfp_out, NoteStructPtr nsp)
3226 
3227 {
3228         ProtRefPtr prp=sfp->data.value.ptrvalue;
3229         ValNodePtr vnp;
3230 
3231         if (prp->name != NULL) {
3232                 for (vnp=prp->name; vnp; vnp=vnp->next)
3233                         if (GBQualPresent("product", sfp_out->qual) == FALSE)
3234                                 sfp_out->qual = 
3235                                      AddGBQual(sfp_out->qual, "product", vnp->data.ptrvalue);
3236                         else 
3237                                 CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
3238         }
3239         if (prp->desc) {
3240                 sfp_out->qual = 
3241                      AddGBQual(sfp_out->qual, "name", prp->desc);
3242         }
3243 
3244         for (vnp=prp->ec; vnp; vnp=vnp->next)
3245                 if ((CheckForQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue)) == 0)
3246                         sfp_out->qual = 
3247                                 AddGBQual(sfp_out->qual, "EC_number", vnp->data.ptrvalue);
3248 
3249         return;
3250 }
3251 
3252 /*______________________________________________________________________
3253 **
3254 **      This code is not currently used.
3255 **      I do not remove this piece of code, just comment it out.
3256 **      -- Dmitri Lukyanov
3257 */
3258 #if 0
3259 
3260 static GBQualPtr RemoveQual(GBQualPtr head, GBQualPtr x)
3261 {
3262         GBQualPtr       v, p;
3263         
3264         if (head == NULL) {
3265                 return NULL;
3266         }
3267         if (x == head) {
3268                 head = x->next;
3269                 x->next = NULL;
3270                 GBQualFree(x);
3271                 return head;
3272         }
3273         for (v = head; v != NULL && v != x; v = v->next) {
3274                 p = v;
3275         }
3276         if (v != NULL) {
3277                 p->next = x->next;
3278                 x->next = NULL;
3279                 GBQualFree(x);
3280         }
3281         return head;
3282 }
3283 
3284 #endif
3285 /*______________________________________________________________________
3286 */
3287 
3288 static void Add_gene_id (GeneStructPtr gsp, SeqFeatPtr sfp_out)
3289 {
3290         ImpFeatPtr ifp;
3291         GeneRefPtr grp;
3292         ValNodePtr vnp;
3293         Char val[40];
3294         
3295         if ((grp = gsp->grp) == NULL)
3296                 return;
3297         ifp = sfp_out->data.value.ptrvalue;
3298         if (StringCmp(ifp->key, "CDS") != 0) {
3299                 return;
3300         }
3301         if ((vnp = grp->syn) == NULL)  /* no synonyms */
3302                 return;
3303         sprintf(val, "GeneID:%s", vnp->data.ptrvalue);
3304         sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3305 }
3306 
3307 /****************************************************************************
3308 *       Composes the GBQuals for sfp_out using the information in the
3309 *       GeneStructPtr (gsp), and then the quals already on sfp_out.
3310 *
3311 *       use only info from GeneStruct throw away the quals gene and map if they
3312 *       different /tatiana  07.11.95/
3313 *       do not add /map to the features other than 'gene' /08-29-97/
3314 *       sfp_out: SEQFEAT_IMP
3315 *       map /citation added by Tatiana
3316 **************************************************************************/
3317 NLM_EXTERN void ComposeGBQuals (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, GBEntryPtr gbp, SortStructPtr p, Boolean note_pseudo)
3318 {
3319         Char temp[65];
3320         Char buffer[10];
3321         CharPtr ascii, start, note=NULL, ptr=NULL, tmp;
3322         GBQualPtr gbqp=NULL, qual1, qnext;
3323         GeneStructPtr gsp;
3324         Int2 int_index, status;
3325         NoteStructPtr nsp;
3326         PubStructPtr psp;
3327         SeqFeatPtr sfp = NULL;
3328         Int2 ascii_len, l;
3329         ValNodePtr vnp, vnp1;
3330         ValNodePtr pub, pubq, pubset;
3331         ImpFeatPtr ifp;
3332         BioseqPtr bsp;
3333         Boolean is_contig = FALSE, is_NC = FALSE, is_NG = FALSE;
3334         SeqIdPtr sid;
3335         TextSeqIdPtr tsip;
3336 
3337         if (gbp == NULL || gbp->feat == NULL || p == NULL) {
3338                 return;
3339         }
3340         bsp = gbp->bsp;
3341         for (sid=bsp->id; sid; sid=sid->next) {
3342                 if (sid->choice == SEQID_OTHER) {
3343                         tsip = (TextSeqIdPtr) sid->data.ptrvalue;
3344                         if (StringNCmp(tsip->accession, "NT", 2) == 0) {
3345                                 is_contig = TRUE;
3346                         }
3347                         if (StringNCmp(tsip->accession, "NC", 2) == 0 
3348                                         || StringNCmp(tsip->accession, "NP", 2) == 0) {
3349                                 is_NC = TRUE;
3350                         }
3351                         if (StringNCmp(tsip->accession, "NG", 2) == 0) {
3352                                 is_NG = TRUE;
3353                         }
3354                 }
3355         }
3356         gsp=p->gsp;
3357         nsp = p->nsp;
3358         if ((sfp=p->sfp) == NULL) {
3359                 GatherItemWithLock(p->entityID, p->itemID, p->itemtype, 
3360                                                                 &sfp, find_item);
3361         }
3362         if (gsp) {
3363                 if (gsp->gene) {
3364                 /*      delete_qual(&(sfp_out->qual), "gene"); */
3365                         for (vnp=gsp->gene; vnp; vnp=vnp->next)
3366                         {
3367                                 ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
3368                                 start = ascii = MemNew((size_t) (10+ascii_len));
3369                                 ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
3370                                 if ((GBQualPresent("gene", gbqp)) == FALSE) {
3371                                         if ((GBQualPresent("gene", sfp_out->qual)) == FALSE) {
3372                                                 gbqp=AddGBQual(gbqp, "gene", start);
3373                                         } 
3374                                 }
3375                                 start = MemFree(start);
3376                         }
3377                 }
3378                 if (gsp->product) {
3379                         for (vnp=gsp->product; vnp; vnp=vnp->next)
3380                         {
3381                                 if (GBQualPresent("product", gbqp) == FALSE &&
3382                                         GBQualPresent("product", sfp_out->qual) == FALSE)
3383                                                 sfp_out->qual = AddGBQual(sfp_out->qual, "product",
3384                                                                                                                  vnp->data.ptrvalue);
3385                                 else 
3386                                         CpNoteToCharPtrStack(nsp, NULL, vnp->data.ptrvalue);
3387                         }
3388                 }
3389                 if (gsp->standard_name) {
3390                         for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
3391                         {
3392                                 if ((CheckForQual(sfp_out->qual, "standard_name",
3393                                                                                          vnp->data.ptrvalue)) == 0) {
3394                                         gbqp=AddGBQual(gbqp, "standard_name", vnp->data.ptrvalue);
3395                                 }
3396                         }
3397                 }
3398                 if (ajp->show_gene == TRUE) {
3399                         ifp = sfp_out->data.value.ptrvalue;
3400                         if (StringCmp(ifp->key, "gene") == 0) {
3401                                 if (gsp->map[0]) {
3402                                         gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
3403                                 }
3404                         }
3405                 } else {
3406                         if (gsp->map[0]) {
3407                                 gbqp = AddGBQual(gbqp, "map", gsp->map[0]);
3408                         }
3409                 }
3410                 for (vnp=gsp->ECNum; vnp; vnp=vnp->next) {
3411                         if ((CheckForQual(sfp_out->qual, "EC_number",
3412                                                                         vnp->data.ptrvalue)) == 0) {
3413                                 gbqp=AddGBQual(gbqp, "EC_number", vnp->data.ptrvalue);
3414                         }
3415                 }
3416                 for (vnp=gsp->activity; vnp; vnp=vnp->next) {
3417                         if ((CheckForQual(sfp_out->qual, "function",
3418                                                                         vnp->data.ptrvalue)) == 0) {
3419                                 gbqp=AddGBQual(gbqp, "function", vnp->data.ptrvalue);
3420                         }
3421                 }
3422                 if (gsp->pseudo == TRUE) {
3423                         if (note_pseudo == TRUE) {
3424                                         CpNoteToCharPtrStack(nsp, NULL, "pseudogene");
3425                         } else if (GBQualPresent("pseudo", gbqp) == FALSE &&
3426                                         GBQualPresent("pseudo", sfp_out->qual) == FALSE) {
3427                                 gbqp = AddGBQual(gbqp, "pseudo", NULL);
3428                         }
3429                 }
3430         }
3431 /* Add Experimental note */
3432         if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION)
3433         {
3434                 ptr = &(temp[0]);
3435                 status = MakeGBSelectNote(ptr, sfp);
3436                 if (status > 0)
3437                         SaveNoteToCharPtrStack(nsp, NULL, ptr);
3438                 ptr=NULL;
3439 /* gene synonym appears as db-xref
3440                 if (is_NC) {
3441                         Add_gene_id(gsp, sfp_out); 
3442                 }
3443 */
3444         }
3445         if (nsp && nsp->note[0])
3446         {
3447                 note = ComposeNoteFromNoteStruct(nsp, gsp);
3448                 if (note)
3449                 {
3450                         gbqp = AddGBQual(gbqp, "note", note);
3451                         note = MemFree(note);
3452                 }
3453         }
3454         if (ajp->mode != DIRSUB_MODE) {
3455                 AddPID(ajp, sfp_out, (Boolean) (is_contig || is_NG || is_NC));
3456         }
3457         if (is_contig || is_NG || is_NC) {
3458                 if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA) {
3459                         Add_trid(ajp, sfp_out); 
3460                 }
3461         }
3462         Add_dbxref(ajp, sfp_out, sfp, bsp); 
3463         vnp = gbp->Pub;
3464         if (sfp && sfp->cit) {
3465                 buffer[0] = '\0';
3466                 pubset = sfp->cit;
3467                 for (pubq = pubset->data.ptrvalue; pubq; pubq = pubq->next) {
3468                         if (pubq->choice == PUB_Equiv) {
3469                                 pub = pubq->data.ptrvalue;
3470                                 for (; pub != NULL; pub = pub->next) {
3471                                         for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3472                                                 psp = vnp1->data.ptrvalue;
3473                                                 if (PubLabelMatch(psp->pub, pub) == 0) {
3474                                                         sprintf(buffer, "[%ld]", (long) (psp->number));
3475                                                          gbqp = AddGBQual(gbqp, "citation", buffer);
3476                                                         break;
3477                                                 }
3478                                         }
3479                                 }
3480                         } else {
3481                                 pub = pubq;
3482                                 for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
3483                                         psp = vnp1->data.ptrvalue;
3484                                         if (PubLabelMatch(psp->pub, pub) == 0) {
3485                                                 sprintf(buffer, "[%ld]", (long) (psp->number));
3486                                                  gbqp = AddGBQual(gbqp, "citation", buffer);
3487                                                 break;
3488                                         }
3489                                 }
3490                         }
3491                 }
3492 /************** old algorithm for pub matching ****************/
3493                 if (buffer[0] == '\0') {
3494                         for (vnp1=vnp; vnp1; vnp1=vnp1->next)
3495                         {
3496                                 psp = vnp1->data.ptrvalue;
3497                                 for (int_index=0; int_index<psp->pubcount; int_index++)
3498                                         if (sfp == psp->pubfeat[int_index])
3499                                         {
3500                                                 sprintf(buffer, "[%ld]", (long) (psp->number));
3501                                                 gbqp = AddGBQual(gbqp, "citation", buffer);
3502                                         }
3503                         }
3504                 }
3505         }
3506         if (gbqp)       /* any gene or note related quals added above? */
3507         {
3508                 for (qual1=gbqp; qual1->next; qual1=qual1->next)
3509                         ;
3510                 qual1->next = sfp_out->qual;
3511                 sfp_out->qual = gbqp;
3512         }
3513 /* check for the qual gdb_xref */
3514         for (qual1 = sfp_out->qual; qual1; qual1 = qnext) {
3515                 qnext = qual1->next;
3516                 if (StringCmp(qual1->qual, "gdb_xref") == 0) {
3517                         qual1->qual = StringSave("db_xref");
3518                         l = StringLen(qual1->val);
3519                         tmp = MemNew(l + 5);
3520                         sprintf(tmp, "GDB:%s", qual1->val);
3521                         qual1->val = StringSave(tmp);
3522                         MemFree(tmp);
3523                 }
3524                 if (ajp->show_gene == FALSE) {
3525 /* change qual 'replace' to the old style location operator */
3526 /* changed December 1996 release 100.0 */
3527                 /*      if (StringCmp(qual1->qual, "replace") == 0) {
3528                                 ifp = sfp_out->data.value.ptrvalue;
3529                                 loc = FlatLoc(gbp->bsp, sfp->location);
3530                                 l = StringLen(qual1->val) + StringLen(loc);
3531                                 tmp = MemNew(l + 15);
3532                                 sprintf(tmp, "replace(%s,\"%s\")", loc, qual1->val);
3533                                 MemFree(loc);
3534                                 ifp->loc = tmp;
3535                                 sfp_out->qual = RemoveQual(sfp_out->qual, qual1);
3536                         }
3537                                 */
3538                 }
3539         }
3540         return;
3541 }       /* ComposeGBQuals */
3542 
3543 static CharPtr  tmp_save(CharPtr str) 
3544 /* deletes spaces from the begining and the end and returns Nlm_StringSave */                      {
3545         CharPtr s, ss;
3546 
3547         if (str == NULL) {
3548                 return NULL;
3549         }
3550         for (; isspace(*str) || *str == ','; str++) continue;
3551         for (s = str; *s != '\0'; s++) {
3552                 if (*s == '\n') {
3553                         for (ss = s+1; isspace(*ss); ss++) continue;
3554                         *s = ' ';
3555                         strcpy(s+1, ss);
3556                 }
3557         }
3558         for (s=str+StringLen(str)-1; s >= str && (*s == ' ' || *s == ';' ||
3559                  *s == ',' || *s == '.' || *s == '\"' || *s == '\t'); s--) {
3560                 *s = '\0';
3561         }  
3562 
3563         if (*str == '\0') { 
3564             return NULL;
3565         } else {
3566             return Nlm_StringSave(str);
3567         }
3568 }
3569 static Int2 NoteCmp(CharPtr n1, CharPtr n2)
3570 {
3571         CharPtr s1, s2;
3572         Int2 ret = 1;
3573         
3574         if (n1 == NULL || n2 == NULL)
3575                 return ret;
3576         s1 = tmp_save(n1);
3577         s2 = tmp_save(n2);
3578         if (StringStr(s1, s2) != NULL) 
3579                 ret = 0;  /*duplicated */
3580         MemFree(s1);
3581         MemFree(s2);
3582         
3583         return ret;
3584 }
3585 
3586 /****************************************************************************
3587 * CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStrunctPtr gsp)
3588 *
3589 *       This function composes a "/note" for a SeqFeatPtr from the information
3590 *       in the GeneStructPtr (gsp).
3591 *       The first "for" loop initializes the first CharPtr and a check 
3592 *       is done that the information in gsp->note is *not* redundant.  If 
3593 *       it is not, first gsp->note_annot is copied onto a CharPtr (this 
3594 *       field contains words describing the origin of the info in note, i.e., 
3595 *       "Description"); then the actual note is copied onto the CharPtr.  
3596 *       The second "for" loop does the same checking as the first and the
3597 *       concatenation of more "note" strings is performed.
3598 *
3599 *n.b.: the caller is responsible for deallocating the final returned "note".
3600 ***************************************************************************/
3601 NLM_EXTERN CharPtr ComposeNoteFromNoteStruct (NoteStructPtr nsp, GeneStructPtr gsp)
3602 
3603 {
3604         Boolean status;
3605         CharPtr note1=NULL, note2, note3;
3606         Int2 index, index1, index2, len;
3607 
3608         for (index=0; index<nsp->note_index; index++) {
3609                 if (gsp) {
3610                         if (CompareStringWithGsp(gsp, nsp->note[index]) != 0) {
3611                                 if (nsp->note_annot[index])
3612                                         note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3613                                 else
3614                                         note1 = StringSave(nsp->note[index]);
3615                                 len = CheckForExtraChars(note1);
3616                                 if (len == 0)
3617                                         note1 = MemFree(note1);
3618                                 else
3619                                         break;
3620                         }
3621                 } else {
3622                         if (nsp->note_annot[index])
3623                                 note1 = Cat2Strings(nsp->note_annot[index], nsp->note[index], " ", 0);
3624                         else
3625                                 note1 = StringSave(nsp->note[index]);
3626                         len = CheckForExtraChars(note1);
3627                         if (len == 0)
3628                                 note1 = MemFree(note1);
3629                         else
3630                                 break;
3631                 }
3632         }
3633         index++;
3634 
3635         for (index1=index; index1<nsp->note_index; index1++)
3636         {
3637                 status = TRUE;
3638                 note2 = nsp->note[index1];
3639                 if (gsp && CompareStringWithGsp(gsp, note2) == 0)
3640                         continue;
3641 
3642                 for (index2=0; index2<index1; index2++) {
3643                         if (gsp)
3644                                 if (GeneStringCmp(note2, nsp->note[index2]) == 0)
3645                                         status = FALSE;
3646                 }
3647                 if (status == TRUE) {
3648                         if (nsp->note_annot[index1])
3649                                 note2 = Cat2Strings(nsp->note_annot[index1], nsp->note[index1], " ", 0);
3650                         else /* rewrite to not always allocate note2 if no annot?????*/
3651                                 note2 = StringSave(nsp->note[index1]);
3652                         len = CheckForExtraChars(note1);
3653                         if (NoteCmp(note1, note2) == 0) {
3654                                 len = 0;
3655                         }
3656                         if (len > 0) {
3657                                 if (note1[len-1] == '.') {
3658                                         note3 = Cat2Strings(note1, note2, "  ", -1);
3659                                 } else {
3660                                         note3 = Cat2Strings(note1, note2, "; ", -1);
3661                                 }
3662                                 note1 = MemFree(note1);
3663                                 note2 = MemFree(note2);
3664                                 note1 = note3;
3665                                 note3 = NULL;
3666                         } else {
3667                                 if (note2) {
3668                                         note2 = MemFree(note2);
3669                                 }
3670                         }
3671                 }
3672         }
3673 
3674         return note1;
3675 }       /* ComposeNoteFromNoteStruct */
3676 
3677 /*************************************************************************
3678 *static Int2 CheckForExtraChars(CharPtr note)
3679 *
3680 *       Check for spaces or semi-colons on the ends of notes.
3681 ************************************************************************/
3682 
3683 static Int2 CheckForExtraChars(CharPtr note)
3684 
3685 {
3686         Int2 len=0;
3687 
3688         if (note != NULL)
3689         {
3690                 len = StringLen(note);
3691                 while (len > 0)
3692                 {
3693                         if (note[len-1] == ' ' || note[len-1] == ';')
3694                                 note[len-1] = '\0';
3695                         else
3696                                 break;
3697                         len--;
3698                 }
3699         }
3700 
3701         return len;
3702 
3703 }       /* CheckForExtraChars */
3704 
3705 NLM_EXTERN void Add_trid (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out)
3706 
3707 {
3708         ImpFeatPtr ifp;
3709         Int4 gi = -1;
3710         SeqIdPtr sip, newid=NULL;
3711         ValNodePtr product;
3712         Char buf[MAX_ACCESSION_LEN+5];
3713         
3714         ifp = sfp_out->data.value.ptrvalue;
3715         if (StringCmp(ifp->key, "mRNA") != 0) {
3716                 return;
3717         }
3718         product = sfp_out->product; 
3719         if (product == NULL) {
3720                 return;
3721         }
3722         sip = GetProductSeqId(product);
3723         if (sip == NULL) return;
3724         if (sip->choice == SEQID_GI) {
3725                 if ((newid = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3726                         SeqIdWrite(newid, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3727                 } else {
3728                         sprintf(buf, "%ld", sip->data.intvalue);
3729                 }
3730         } else {        
3731                 SeqIdWrite(sip, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3732         }
3733         sfp_out->qual = AddGBQual(sfp_out->qual, "transcript_id", buf);
3734 }
3735 
3736 /*************************************************************************
3737 *       sfp_out: synthetic SeqFeatPtr of type ImpFeat for use in printing.
3738 *
3739 *       This function puts the GI number on a SeqFeatPtr /db_xref of type CDS.
3740 *       Checking is first done to see if this sfp is indeed a CDS, then
3741 *       the PID number is gotten from the product SeqId
3742 *****************************************************************************/
3743 
3744 NLM_EXTERN void AddPID (Asn2ffJobPtr ajp, SeqFeatPtr sfp_out, Boolean is_NTorNG)
3745 
3746 {
3747         ImpFeatPtr ifp;
3748         Int4 gi = -1;
3749         SeqIdPtr sip, new_id=NULL;
3750         ValNodePtr product, vnp;
3751         BioseqPtr p_bsp = NULL;
3752         DbtagPtr db;
3753         Char val[20];
3754         Char buf[MAX_ACCESSION_LEN+1];
3755         
3756         ifp = sfp_out->data.value.ptrvalue;
3757         if (StringCmp(ifp->key, "CDS") != 0) {
3758                 return;
3759         }
3760         product = sfp_out->product; 
3761         if (product == NULL) {
3762                 return;
3763         }
3764         sip = GetProductSeqId(product);
3765         if (sip) {      /* Get protein bsp      */
3766                 if (sip->choice == SEQID_GI && is_NTorNG) {
3767                         if ((new_id = GetSeqIdForGI(sip->data.intvalue)) != NULL) {
3768                                 SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER, MAX_ACCESSION_LEN+1);
3769                                 SeqIdFree(new_id); /*** need to free it !!! (EY) ***/
3770                         } else {
3771                                 sprintf(buf, "%ld", sip->data.intvalue);
3772                         }
3773                         sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3774                 } else if ((p_bsp = BioseqFind(sip)) != NULL) {
3775                         new_id = GetSeqIdChoice(p_bsp->id);
3776                         if (ajp->forgbrel && new_id == NULL) {
3777                                 ErrPostStr(SEV_ERROR, ERR_ACCESSION_NoAccessNum, "");
3778                         } else if (new_id) {
3779                                 SeqIdWrite(new_id, buf, PRINTID_TEXTID_ACC_VER,
3780                                                                                                                 MAX_ACCESSION_LEN+1);
3781                                 sfp_out->qual = AddGBQual(sfp_out->qual, "protein_id", buf);
3782                         }
3783                 }
3784         }
3785         if (p_bsp == NULL) {
3786                 gi = GetGINumFromSip(sip);
3787                 if (gi != -1) {
3788                         if (ajp->show_gi) {
3789                                 val[0] = '\0';
3790                                 sprintf(val, "PID:g%ld", (long) gi);
3791                                 if (val[0] != '\0') {
3792                                         sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3793                                 }
3794                         }
3795                         if (ajp->show_version) {
3796                                 val[0] = '\0';
3797                                 sprintf(val, "GI:%ld", (long) gi);
3798                                 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3799                         }
3800                 }
3801                 return;
3802         }
3803         for (vnp=p_bsp->id; vnp; vnp=vnp->next) {
3804                 if (vnp->choice == SEQID_GENERAL) {
3805                         db = vnp->data.ptrvalue;
3806                         if (db == NULL) {
3807                                 continue;
3808                         }
3809                         val[0] = '\0';
3810                         if (StringNCmp(db->db, "PIDe", 4) == 0) {
3811                                         sprintf(val, "PID:e%ld", (long) db->tag->id);
3812                                         gi = db->tag->id;
3813                         } else if (StringNCmp(db->db, "PIDd", 4) == 0) {
3814                                         sprintf(val, "PID:d%ld", (long) db->tag->id);
3815                                         gi = db->tag->id;
3816                         } else if (StringNCmp(db->db, "PID", 3) == 0) {
3817                                 if (db->tag && db->tag->str) {
3818                                         sprintf(val, "%s:%s", db->db, db->tag->str);
3819                                         gi = atoi((db->tag->str)+1);
3820                                 }
3821                         }
3822                         if (ajp->show_gi && val[0] != '\0') {
3823                                 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3824                         }
3825                         /*if (ajp->show_version) {
3826                                 val[0] = '\0';
3827                                 sprintf(val, "GI:%ld", (long) gi);
3828                                 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3829                         }*/
3830                 }
3831                 if (vnp->choice == SEQID_GI) {
3832                         if (ajp->show_gi) {
3833                                 val[0] = '\0';
3834                                 sprintf(val, "PID:g%ld", (long) vnp->data.intvalue);
3835                                 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3836                         }
3837                         if (ajp->show_version) {
3838                                 val[0] = '\0';
3839                                 sprintf(val, "GI:%ld", (long) vnp->data.intvalue);
3840                                 sfp_out->qual = AddGBQual(sfp_out->qual, "db_xref", val);
3841                         }
3842                 }
3843         }
3844         return;
3845 }       /* AddPID */
3846 
3847 /***************************************************************************
3848 *Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3849 *
3850 *Adds note to CDS GenBankSelect 
3851 ***************************************************************************/
3852 NLM_EXTERN Int2 MakeGBSelectNote (CharPtr ptr, SeqFeatPtr sfp)
3853 
3854 {
3855         Boolean found_select=FALSE, found_match=FALSE;
3856         CharPtr acc=NULL;
3857         Int2 number = -1;
3858         ObjectIdPtr oip=NULL, type;
3859         UserFieldPtr ufp;
3860         UserObjectPtr uop=NULL;
3861 
3862         if (sfp && (uop=sfp->ext) != NULL)
3863         {
3864                 if (uop->_class && (type=uop->type) != NULL)
3865                 {
3866                         if (StringCmp(uop->_class, "GB-Select") == 0)
3867                                 found_select = TRUE;
3868                         if (type->str) 
3869                                 if (StringCmp(type->str, "SPmatch") == 0)
3870                                         found_match = TRUE;
3871                         if (found_match && found_select)
3872                         {
3873                                 for (ufp=uop->data; ufp; ufp=ufp->next)
3874                                 {
3875                                         oip = ufp->label;
3876                                         if (oip->id == 2)
3877                                         {
3878                                                 if (ufp->choice == 1)
3879                                                         acc = ufp->data.ptrvalue;
3880                                         }
3881                                         else if (oip->id == 3)
3882                                         {
3883                                                 if (ufp->choice == 2)
3884                                                 {
3885                                                         number = (Int2) (ufp->data.intvalue);
3886                                                 }
3887                                         }
3888                                                 
3889                                 }
3890                                 if (number == 1)
3891                                         sprintf(ptr, 
3892                                                 "Identical to Swiss-Prot Accession Number %s", acc);
3893                                 else if (number == 2 || number == 3)
3894                                         sprintf(ptr, 
3895                                                 "Similar to Swiss-Prot Accession Number %s", acc);
3896                         }
3897                 }
3898         }
3899         return number;
3900 }
3901 
3902 NLM_EXTERN Boolean get_prot_feats (GatherContextPtr gcp)
3903 {
3904         BioseqPtr       bsp;
3905         OrganizeProtPtr opp;
3906         SeqFeatPtr sfp;
3907         Boolean temp = FALSE;
3908 
3909         opp = gcp->userdata;
3910 
3911         switch (gcp->thistype)
3912         {
3913                 case OBJ_SEQFEAT:
3914                         sfp = (SeqFeatPtr) (gcp->thisitem);
3915                         if (sfp->data.choice == SEQFEAT_PROT || 
3916                                         sfp->data.choice == SEQFEAT_REGION || 
3917                                         sfp->data.choice == SEQFEAT_BOND || 
3918                                                 sfp->data.choice == SEQFEAT_SITE) {
3919                                 bsp = BioseqFindCore(SeqLocId(sfp->location));
3920                         if (gcp->tempload == TRUE) {
3921                                 temp = TRUE;
3922                         }
3923                         opp->list = EnlargeSortList(opp->list, opp->size);
3924                                 opp->size = StoreFeatTemp(opp->list, sfp, opp->size, bsp, NULL,
3925                                         gcp->entityID, gcp->itemID, gcp->thistype,
3926                                                 gcp->new_loc, NULL, 0, temp);
3927                         }
3928                         break;
3929                 default:
3930                         break;
3931         }
3932         return TRUE;
3933 }
3934 
3935 /********************************************************************
3936 *       Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3937 *
3938 *       gsp: GeneStructPtr containing the gene information,
3939 *       gene->synonym in is store in gsp->gene with choice 1 (GetGeneRefInfo)
3940 *       it is not compared to note string 
3941 *
3942 *       string: a CharPtr with (possibly) relevant gene information
3943 *               (i.e., gene name, allele, product etc.).
3944 *
3945 *       A comparison is made between string and the information already
3946 *       stored in the gsp.  Following the convention for StringCmp,
3947 *       "0" is returned if a match is found, otherwise "1" is returned.
3948 *       At present (2/7/94) GeneStringCmp is a #define for StringCmp.
3949 ************************************************************************/
3950 
3951 NLM_EXTERN Int2 CompareStringWithGsp (GeneStructPtr gsp, CharPtr string)
3952 
3953 {
3954         CharPtr ascii, start;
3955         Int2 ascii_len;
3956         ValNodePtr vnp; 
3957 
3958         for (vnp=gsp->gene; vnp; vnp=vnp->next)
3959         {
3960                 if (vnp->choice == 1) {
3961                         continue;
3962                 }
3963                 ascii_len = Sgml2AsciiLen(vnp->data.ptrvalue);
3964                 start = ascii = MemNew((size_t) (10+ascii_len));
3965                 ascii = Sgml2Ascii(vnp->data.ptrvalue, ascii, ascii_len+1);
3966                 if (GeneStringCmp(start, string) == 0)
3967                 {
3968                         start = MemFree(start);
3969                         return 0;
3970                 }
3971                 start = MemFree(start);
3972         }
3973         vnp=gsp->product;
3974         if (vnp != NULL)
3975         {
3976                 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3977                         return 0;
3978         }
3979         for (vnp=gsp->standard_name; vnp; vnp=vnp->next)
3980         {
3981                 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3982                         return 0;
3983         }
3984         if (gsp->map[0] && GeneStringCmp(gsp->map[0], string) == 0)
3985                 return 0;
3986         if (gsp->ECNum)
3987         for (vnp=gsp->ECNum; vnp; vnp=vnp->next)
3988         {
3989                 if (GeneStringCmp(vnp->data.ptrvalue, string) == 0)
3990                         return 0;
3991         }
3992 
3993         return 1;
3994 }       /* CompareStringWithGsp */
3995 
3996 NLM_EXTERN void GetDBXrefFromGene (GeneRefPtr grp, SeqFeatPtr sfp)
3997 
3998 {
3999         CharPtr dbase;
4000         DbtagPtr dbtp;
4001         ValNodePtr tmp;
4002         Char buffer[50];
4003         
4004         if (grp == NULL) {
4005                 return;
4006         }
4007         for (tmp = grp->db; tmp != NULL; tmp=tmp->next) {
4008             dbtp = tmp->data.ptrvalue;
4009             if (dbtp && dbtp->db && dbtp->tag) {
4010                         dbase = MemNew(StringLen(dbtp->db) + 3);
4011                         sprintf(dbase, "%s:", dbtp->db);
4012                         if (dbtp->tag->str) {
4013                                 sprintf(buffer, "%s%s", dbase, dbtp->tag->str);
4014                                 sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
4015                         } else if (dbtp->tag->id) {
4016                                 sprintf(buffer, "%s%ld", dbase, (long) dbtp->tag->id);
4017                                 sfp->qual = AddGBQual(sfp->qual, "db_xref", buffer);
4018                         }
4019                         MemFree(dbase);
4020             }
4021         }
4022 
4023         return;
4024 }
4025 
4026 /****************************************************************************
4027 *       void GetProtRefInfo (GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
4028 *
4029 *       gsp: GeneStructPtr containing gene information
4030 *       prp: ProtRefPtr from a sfp of type protein or a sfp xref.
4031 *
4032 *       If fields are empty on the gsp, and the relevant information
4033 *       is given by the prp, that field is filled on the gsp
4034 ****************************************************************************/
4035 NLM_EXTERN void GetProtRefInfo (Uint1 format, GeneStructPtr gsp, NoteStructPtr nsp, ProtRefPtr prp)
4036 {
4037         ValNodePtr tmp, vnp;
4038 
4039         if (prp == NULL) {
4040                 return;
4041         }
4042         for (vnp=prp->name; vnp; vnp=vnp->next) { 
4043                 tmp = ValNodeNew(NULL);
4044                 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
4045                 gsp->product = tie_next(gsp->product, tmp);
4046         }
4047         for (vnp=prp->ec; vnp; vnp=vnp->next) {
4048                 tmp = ValNodeNew(NULL);
4049                 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
4050                 gsp->ECNum = tie_next(gsp->ECNum, tmp);
4051         }
4052         for (vnp=prp->activity; vnp; vnp=vnp->next) {
4053                 tmp = ValNodeNew(NULL);
4054                 tmp->data.ptrvalue = StringSave(vnp->data.ptrvalue);
4055                 gsp->activity = tie_next(gsp->activity, tmp);
4056         }
4057         if (format != GENPEPT_FMT) {
4058                 if (prp->desc) {
4059                         SaveNoteToCharPtrStack(nsp, NULL, prp->desc);
4060                 }
4061         }
4062         return;
4063 }
4064 
4065 /****************************************************************************
4066 *
4067 *       sfp: SeqFeatPtr for CDS
4068 *       nsp: NoteStructPtr 
4069 *
4070 * Used to get comments from the Protein for use in a CDS /note.
4071 *
4072 * Take the main protein ONLY (not sig_peptide mat_peptide)
4073 *
4074 * Will find the Protein Pubs, as they are needed and (presumably) haven't
4075 * been found yet, so as to save "upfront" time when the formatter is 
4076 * running in Entrez.
4077 ****************************************************************************/
4078 static void GetProtRefComment (SeqFeatPtr sfp, BioseqPtr bsp, Asn2ffJobPtr ajp, OrganizeProtPtr opp, NoteStructPtr nsp, Uint1 method)
4079 {
4080         Boolean first_done=FALSE, protein=FALSE;
4081         CharPtr ptr = NULL, string=NULL, string1=NULL, newstring=NULL, temp, s;
4082         CharPtr conflict_msg_no_protein="Coding region translates with internal stops";
4083 /*      CharPtr except_msg_no_protein="Coding region translates with internal stops for reasons explained in citation. "; -- except_msg_no_protein UNUSED */
4084         CharPtr conflict_msg="Protein sequence is in conflict with the conceptual translation";
4085 /*      CharPtr except_msg="Protein sequence differs from the conceptual translation for reasons explained in citation. "; -- except_msg UNUSED */
4086         CdRegionPtr cdr=NULL;
4087         Int2 total=0, i;
4088         PubdescPtr pdp;
4089         PubStructPtr psp;
4090         SeqFeatPtr sfp_local=NULL;
4091         ValNodePtr descr, vnp, vnp1, vnp1next, product;
4092         MolInfoPtr mfp;
4093         GatherScope gs;
4094         SeqLocPtr slp;
4095         ProtRefPtr prot_local;
4096         SeqMgrFeatContext fcontext;
4097         SeqMgrDescContext dcontext;
4098         GatherContext gc;
4099         SeqFeatPtr psfp;
4100         ValNodePtr psdp;
4101         ObjMgrDataPtr omdp;
4102         SeqSubmitPtr ssp;
4103         SubmitBlockPtr sbp;
4104         CharPtr prefix = "";
4105 
4106         if (ajp->useSeqMgrIndexes) {
4107                 sfp_local = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PROT, 0, &fcontext);
4108                 while (sfp_local != NULL) {
4109                         prot_local = sfp_local->data.value.ptrvalue;
4110                         if (prot_local->processed <= 1) {
4111                                 if (first_done) {
4112                                         if (StringLen(sfp_local->comment)) {
4113                                                 string1 = CheckEndPunctuation(sfp_local->comment, '\0');
4114                                                 if (StringCmp(string, string1) != 0) {
4115                                                         newstring = Cat2Strings(string, string1, "; ", 0);
4116                                                         string = MemFree(string);
4117                                                         string = newstring;
4118                                                 }
4119                                                 string1 = MemFree(string1);
4120                                         }
4121                                 } else {
4122                                         if (StringLen(sfp_local->comment)) {
4123                                                 string = CheckEndPunctuation(sfp_local->comment, '\0');
4124                                                 first_done = TRUE;
4125                                         }
4126                                 }
4127                         }
4128                         sfp_local = SeqMgrGetNextFeature (bsp, sfp_local, SEQFEAT_PROT, 0, &fcontext);
4129                 }
4130         } else if (opp != NULL) {
4131                 for (i = 0; i < opp->size; i++) {
4132                         if ((sfp_local = opp->list[i].sfp) == NULL) {
4133                                 continue;
4134                         }
4135                         if (sfp_local->data.choice != SEQFEAT_PROT) {
4136                                 continue;
4137                         }
4138                         prot_local = sfp_local->data.value.ptrvalue;
4139                         if (prot_local->processed > 1) {
4140                                 continue;
4141                         }
4142                         if (first_done) {
4143                                 if (StringLen(sfp_local->comment)) {
4144                                         string1 = CheckEndPunctuation(sfp_local->comment, '\0');
4145                                         if (StringCmp(string, string1) != 0) {
4146                                                 newstring = Cat2Strings(string, string1, "; ", 0);
4147                                                 string = MemFree(string);
4148                                                 string = newstring;
4149                                         }
4150                                         string1 = MemFree(string1);
4151                                 }
4152                         } else {
4153                                 if (StringLen(sfp_local->comment)) {
4154                                         string = CheckEndPunctuation(sfp_local->comment, '\0');
4155                                         first_done = TRUE;
4156                                 }
4157                         }
4158                 }
4159         }
4160 
4161         if (bsp && (descr=bsp->descr) != NULL) {
4162                 for (vnp=descr; vnp; vnp=vnp->next) {
4163                         if (vnp->choice == Seq_descr_comment) {
4164                                 if (first_done) {
4165                                         if (StringLen(vnp->data.ptrvalue)) {
4166                                                 string1 = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
4167                                                 if (StringCmp(string, string1) != 0) {
4168                                                         newstring = Cat2Strings(string, string1, "; ", 0);
4169                                                         string = MemFree(string);
4170                                                         string = newstring;
4171                                                 }
4172                                                 string1 = MemFree(string1);
4173                                         }
4174                                 } else {
4175                                         if (StringLen(vnp->data.ptrvalue)) {
4176                                                 string = CheckEndPunctuation(vnp->data.ptrvalue, '\0');
4177                                                 first_done = TRUE;
4178                                         }
4179                                 }
4180                         } else if (vnp->choice == Seq_descr_molinfo) {
4181                                 mfp = vnp->data.ptrvalue;
4182                                 if (mfp && mfp->tech > 1 && mfp->tech != 8) {
4183                                         if (mfp->tech == MI_TECH_concept_trans_a) {
4184                                 /*              s = StringForSeqMethod(method); */
4185                                                 s = NULL;
4186                                         } else {
4187                                                 s = StringForSeqTech(mfp->tech);
4188                                         }
4189                                         if (s!= NULL && *s != '\0') {
4190                                                 ptr = MemNew(StringLen(s) + 10);
4191                                                 sprintf(ptr, "Method: %s", s); 
4192                                         }
4193                                         if (first_done) {
4194                                                 newstring = Cat2Strings(string, ptr, "; ", 0);
4195                                                 string = MemFree(string);
4196                                                 string = newstring;
4197                                         } else {
4198                                                 string = StringSave(ptr);
4199                                                 first_done = TRUE;
4200                                         }
4201                                         MemFree(ptr);
4202                                 }
4203                         } else if (vnp->choice == Seq_descr_method) {
4204                                 if (vnp->data.intvalue > 1) {
4205                                         if (method == METHOD_concept_transl_a) {
4206                                         /*      s = StringForSeqMethod(method);*/
4207                                                 s = NULL;
4208                                         } else {
4209                                                 s = StringForSeqMethod((Uint1)(vnp->data.intvalue));
4210                                         }
4211                                         if (s!= NULL && *s != '\0') {
4212                                                 ptr = MemNew(StringLen(s) + 10);
4213                                                 sprintf(ptr, "Method: %s", s); 
4214                                         }
4215                 
4216                                         if (first_done) {
4217                                                 newstring = Cat2Strings(string, ptr, "; ", 0);
4218                                                 string = MemFree(string);
4219                                                 string = newstring;
4220                                         } else {
4221                                                 string = StringSave(ptr);
4222                                                 first_done = TRUE;
4223                                         }
4224                                         MemFree(ptr);
4225                                 }
4226                         }
4227                 }
4228         }
4229 /* gather pubs on protein bioseq do not do checking or sorting*/
4230         vnp = NULL;
4231         if (ajp->useSeqMgrIndexes) {
4232                 /* finess calls to get_pubs */
4233                 MemSet ((Pointer) (&gc), 0, sizeof (GatherContext));
4234                 gc.userdata = (Pointer) (&vnp);
4235                 gc.entityID = ajp->entityID;
4236                 psdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pub, &dcontext);
4237                 while (psdp != NULL) {
4238                         gc.thistype = OBJ_SEQDESC;
4239                         gc.itemID = dcontext.itemID;
4240                         gc.thisitem = (Pointer) psdp;
4241                         omdp = dcontext.omdp;
4242                         if (omdp != NULL) {
4243                                 gc.parenttype = omdp->datatype;
4244                                 gc.parentitem = omdp->dataptr;
4245                         } else {
4246                                 gc.parenttype = 0;
4247                                 gc.parentitem = NULL;
4248                         }
4249                         get_pubs (&gc);
4250                         psdp = SeqMgrGetNextDescriptor (bsp, psdp, Seq_descr_pub, &dcontext);
4251                 }
4252                 psfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
4253                 while (psfp != NULL) {
4254                         gc.thistype = OBJ_SEQFEAT;
4255                         gc.itemID = dcontext.itemID;
4256                         gc.thisitem = (Pointer) psfp;
4257                         get_pubs (&gc);
4258                         psfp = SeqMgrGetNextFeature (bsp, psfp, SEQFEAT_PUB, 0, &fcontext);
4259                 }
4260                 omdp = ObjMgrGetData (ajp->entityID);
4261                 if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
4262                         ssp = (SeqSubmitPtr) omdp->dataptr;
4263                         if (ssp != NULL) {
4264                                 sbp = ssp->sub;
4265                                 if (sbp != NULL) {
4266                                         gc.thistype = OBJ_SUBMIT_BLOCK;
4267                                         gc.itemID = 1;
4268                                         gc.thisitem = (Pointer) sbp;
4269                                         get_pubs (&gc);
4270                                 }
4271                         }
4272                 }
4273                 /* also submit block */
4274         } else {
4275                 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
4276 /*      MemSet ((Pointer) (gs.ignore), (int)(TRUE), (size_t) (OBJ_MAX * sizeof(Boolean)));
4277                 gs.ignore[OBJ_SEQENTRY] = FALSE;
4278                 gs.ignore[OBJ_BIOSEQ] = FALSE;
4279                 gs.ignore[OBJ_SEQDESC] = FALSE;*/
4280                 gs.ignore[OBJ_SEQSUB] = TRUE;
4281                 gs.ignore[OBJ_SEQSUB_CIT] = TRUE;
4282                 slp = ValNodeNew(NULL);
4283                 slp->choice = SEQLOC_WHOLE;
4284                 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
4285                 gs.target = slp;
4286                 gs.seglevels = 4;
4287 
4288                 GatherEntity(ajp->entityID, &vnp, get_pubs, &gs);
4289                 if (slp)
4290                         SeqLocFree(slp);
4291         }
4292 /*      if ((status = CheckPubs(ajp, bsp, &vnp)) < 0) {
4293                         ValNodeFree(vnp);
4294                         vnp = NULL;
4295         }
4296         vnp = OrganizePubList(vnp); */
4297         for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
4298                 psp = vnp1->data.ptrvalue;
4299                 if ((pdp=psp->descr) != NULL) {
4300                         if (pdp->fig) {
4301                                 total += 32;
4302                                 total += StringLen(pdp->fig);
4303                         }
4304                         if (pdp->maploc) {
4305                                 total += 22;
4306                                 total += StringLen(pdp->maploc);
4307                         }
4308                 }
4309         }
4310 
4311         if (sfp) {
4312                 cdr = (CdRegionPtr) sfp->data.value.ptrvalue;
4313                 product = sfp->product;
4314                 if (product && SeqLocLen(product)) 
4315                         protein = TRUE;
4316                 if (sfp->excpt)
4317                         total += 112;
4318                 if (cdr && cdr->conflict && (protein || ! sfp->excpt))
4319                         total += 112;
4320         }
4321 
4322         string1 = (CharPtr) MemNew(total*sizeof(Char));
4323 
4324         for (vnp1=vnp; vnp1; vnp1=vnp1->next) {
4325                 psp = vnp1->data.ptrvalue;
4326                 if ((pdp=psp->descr) != NULL) {
4327                         if (pdp->fig) {
4328                         
4329                                 temp = CheckEndPunctuation(pdp->fig, '\0');
4330                                 total = StringLen(string1);
4331 
4332                                 sprintf(string1+total, "This sequence comes from %s", temp);
4333                                 prefix = "; ";
4334                                 temp = MemFree(temp);
4335                         }
4336                         if (pdp->maploc) {
4337                                 total = StringLen(string1);
4338                                 sprintf(string1+total, "%sMap location %s", prefix, pdp->maploc);
4339                                 prefix = "; ";
4340                         }
4341                 }
4342         }
4343 
4344         if (sfp) {
4345                 if (cdr && cdr->conflict && (protein || ! sfp->excpt)) {
4346                         total = StringLen(string1);
4347                         sprintf(string1+total, "%s%s", prefix,
4348                                         protein?conflict_msg:conflict_msg_no_protein);
4349                 }
4350         }
4351         if (string && string1) {
4352                 newstring = Cat2Strings(string, string1, "; ", 0);
4353                 string = MemFree(string);
4354                 string1 = MemFree(string1);
4355         } else if (string) {
4356                 newstring = string;
4357         } else if (string1) {
4358                 newstring = string1;
4359         }
4360 
4361         if (newstring) {
4362                 SaveNoteToCharPtrStack(nsp, NULL, newstring);
4363                 newstring = MemFree(newstring);
4364         }
4365         for (vnp1=vnp; vnp1; vnp1=vnp1next) {
4366                 vnp1next = vnp1->next;
4367                 psp = vnp1->data.ptrvalue;
4368                 FreePubStruct(psp);
4369                 MemFree(vnp1);
4370         }
4371         return;
4372 }       /* GetProtRefComment */
4373 
4374 NLM_EXTERN GBQualPtr AddModifsToGBQual (GBEntryPtr gbp, GBQualPtr gbqual)
4375 {
4376         CharPtr ptr;
4377         ValNodePtr descr, man;
4378 
4379         descr=BioseqGetSeqDescr(gbp->bsp, Seq_descr_modif, NULL); 
4380         if (descr) {
4381                 for (man = (ValNodePtr) descr-> data.ptrvalue; man != NULL; man = man -> next){
4382                         switch (man -> data.intvalue){
4383                         case 3: case 14: 
4384                                 ptr = AsnEnumStr("GIBB-mod", 
4385                                    (Int2) man->data.intvalue);
4386                                 if (GBQualPresent(ptr, gbqual) == FALSE)
4387                                         gbqual = AddGBQual(gbqual, ptr, " ");
4388                                 break;
4389                         case 4:
4390                                 if (GBQualPresent("mitochondrion", gbqual) == FALSE)
4391                                         gbqual = AddGBQual(gbqual, "mitochondrion", NULL);
4392                                 break;
4393                         case 15:
4394                                 if (GBQualPresent("insertion_seq", gbqual) == FALSE)
4395                                         gbqual = AddGBQual(gbqual, "insertion_seq", " ");
4396                                 break;
4397                         case 5: case 6: case 7: case 18: case 19:
4398                                 ptr = AsnEnumStr("GIBB-mod", 
4399                                    (Int2) man->data.intvalue);
4400                                 if (GBQualPresent(ptr, gbqual) == FALSE)
4401                                         gbqual = AddGBQual(gbqual, ptr, NULL);
4402                                 break;
4403                         default:
4404                                 break;
4405                         }
4406                 }
4407         }
4408         return gbqual;
4409 }       /* AddModifsToGBQual */
4410 
4411 /*************************************************************************
4412 *GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual);
4413 *
4414 *Add the OrgRef.mod to a source feat.  Note: a few of the quals added
4415 *may be illegal for a source feature, but the validator will catch them 
4416 *in the end.
4417 ***************************************************************************/
4418 NLM_EXTERN GBQualPtr AddOrgRefModToGBQual (OrgRefPtr orp, GBQualPtr gbqual)
4419 
4420 {
4421         CharPtr mod, ptr, temp_ptr;
4422         Char temp[ASN2FF_STD_BUF]; /* ASN2FF_STD_BUF (now 35) is longer than 
4423 any qual. */
4424         Int2 index;
4425         ValNodePtr vnp;
4426 
4427         if (orp && orp->mod)
4428         {
4429                 for (vnp=orp->mod; vnp; vnp=vnp->next)
4430                 {
4431                         mod = vnp->data.ptrvalue;
4432                         if (StringNCmp(mod, "citation", 8) == 0)
4433                                 continue;
4434                         index=0;
4435                         for (ptr=mod; *ptr != '\0'; ptr++)
4436                         {
4437                                 index++;
4438                                 if (*ptr == ' ' || *ptr == '=')
4439                                 {
4440                                         ptr++;
4441                                         index--;
4442                                         break;
4443                                 }
4444                         }
4445                         if (index > ASN2FF_STD_BUF-1)
4446                                 continue;
4447 
4448                         temp_ptr = &(temp[0]);
4449                         StringNCpy(temp_ptr, mod, index);
4450                         temp[index] = '\0';
4451                         if ((GBQualNameValid(temp_ptr)) == -1)
4452                                 continue;
4453                         if (ptr)
4454                                 gbqual = AddGBQual(gbqual, temp_ptr, ptr);
4455                         else
4456                                 gbqual = AddGBQual(gbqual, temp_ptr, NULL);
4457                 }
4458         }
4459         return gbqual;
4460 }       /* AddOrgRefModToGBQual */
4461 
4462 /*************************************************************************
4463 *GBQualPtr AddBioSourceToGBQual (BioSourcePtr biosp, GBQualPtr gbqual);
4464 *
4465 *Add the OrgMod.subtypes and SubSource.subtypes to a source feat.
4466 *Add BioSource.genome to a source feat.  
4467 *Note: a few of the quals added may be illegal for a source feature, 
4468 *but the validator will catch them in the end.
4469 ***************************************************************************/
4470 
4471 static CharPtr organelleQual [] = {
4472   NULL,
4473   NULL,
4474   "plastid:chloroplast",
4475   "plastid:chromoplast",
4476   "mitochondrion:kinetoplast",
4477   "mitochondrion",
4478   "plastid",
4479   NULL,
4480   NULL,
4481   NULL, 
4482   NULL,
4483   NULL,
4484   "plastid:cyanelle",
4485   NULL,
4486   NULL,
4487   "nucleomorph",
4488   "plastid:apicoplast",
4489   "plastid:leucoplast",
4490   "plastid:proplastid",
4491   NULL
4492 };
4493 
4494 NLM_EXTERN GBQualPtr AddBioSourceToGBQual (Asn2ffJobPtr ajp, NoteStructPtr nsp, BioSourcePtr biosp, GBQualPtr gbqual, Boolean new_release)
4495 {
4496         CharPtr qual, val = NULL;
4497         OrgModPtr omp;
4498         OrgNamePtr onp;
4499         SubSourcePtr ssp;
4500         Int2 i;
4501         Int4 id = -1;
4502         DbtagPtr db = NULL;
4503         OrgRefPtr org;
4504         ValNodePtr vnp;
4505         CharPtr s;
4506 
4507         if (biosp == NULL)
4508                 return gbqual;
4509         if (biosp->genome) {
4510                 i = biosp->genome;
4511                 if (i > 1 && i < 20) {
4512                         val = organelleQual [i];
4513                         if (val != NULL) {
4514                                 gbqual = AddGBQual (gbqual, "organelle", val);
4515                         } else if (i < num_genome) {
4516                                 qual = genome[i];
4517                                 if (qual && (GBQualNameValid(qual)) != -1) {
4518                                         if (i == 8) {  /*extrachrom*/
4519                                                 gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
4520                                         } else {
4521                                                 gbqual = AddGBQual(gbqual, qual, val);
4522                                         }
4523                                 } else if (qual && i == 8) {
4524                                         gbqual = AddGBQual(gbqual, "note", "extrachromosomal");
4525                                 }
4526                         }
4527                 }
4528         }
4529         org = (OrgRefPtr) biosp->org;
4530         if (org) {
4531                 if ((onp = (OrgNamePtr) org->orgname) != NULL) {
4532                         for (omp=onp->mod; omp != NULL; omp=omp->next) {
4533                                 for (i=0; orgmod_subtype[i].name != NULL; i++) {
4534                                         if (omp->subtype == orgmod_subtype[i].num)
4535                                                 break;
4536                                 }
4537                                 if (orgmod_subtype[i].name == NULL) {
4538                                         continue;
4539                                 }
4540                                 if (orgmod_subtype[i].num == 253) { /* old_lineage */
4541                                         continue;
4542                                 }
4543                                 if (orgmod_subtype[i].num == 254) { /* old_name */
4544                                         continue;
4545                                 }
4546                                 qual = orgmod_subtype[i].name;
4547                                 if (orgmod_subtype[i].num == 21) {   /* nat_hos */
4548                                         qual = "specific_host";
4549                                 }
4550                                 if ((val = omp->subname) == NULL)
4551                                         val = "";
4552                                 if ((GBQualNameValid(qual)) != -1) {
4553                                         gbqual = AddGBQual(gbqual, qual, val);
4554                                 } else {
4555                                         s = MemNew(StringLen(val) + 
4556                                                         StringLen(qual) + 3);
4557                                         sprintf(s, "%s: %s", qual, val);
4558                                         CpNoteToCharPtrStack(nsp, NULL, s);
4559                                 }
4560                         }
4561                 }
4562 /* add db_xref */
4563                 val = NULL;
4564                 for (vnp=org->db; vnp; vnp=vnp->next) {
4565                         id = -1;
4566                         db = (DbtagPtr) vnp->data.ptrvalue;
4567                         if (db && db->db) {
4568                                 for (i =0; i < DBNUM; i++) {
4569                                         if (StringCmp(db->db, dbtag[i]) == 0) {
4570                                                 id = i;
4571                                                 break;
4572                                         }
4573                                 }
4574                                 if (id == -1) {
4575                                         continue;  /* unknown dbtag */
4576                                 }
4577                         }
4578                         if (db->tag && db->tag->str) {
4579                                 val = MemNew(StringLen(db->db)+StringLen(db->tag->str)+2);
4580                                 sprintf(val, "%s:%s", db->db, db->tag->str);
4581                         } else if (db->tag) {
4582                                 val = MemNew(StringLen(db->db)+16);
4583                                 sprintf(val, "%s:%ld", db->db, (long) db->tag->id);
4584                         }
4585                         if (val[0] != '\0') {
4586                                 gbqual = AddGBQual(gbqual, "db_xref", val);
4587                                 MemFree(val);
4588                         }
4589                 }
4590         }
4591         for (ssp = biosp->subtype; ssp != NULL; ssp=ssp->next) {
4592                 qual = NULL;
4593                 if (ssp->subtype == 255) {
4594                         qual = "note";
4595                 } else if (ssp->subtype > num_subtype) {
4596                         qual = NULL;
4597                 } else if (ssp->subtype > 0) {
4598                         qual = subtype[ssp->subtype - 1];
4599                 } else {
4600                         qual = "?";
4601                 }
4602                 val = ssp->name;
4603                 if (ssp->subtype != 14 && ssp->subtype != 15) {
4604                         if (val == NULL)
4605                                 val = "";
4606                         }
4607                 if ((GBQualNameValid(qual)) == -1) {
4608                         if (qual == NULL) {
4609                                 qual = "?";
4610                         }
4611                         s = MemNew(StringLen(val) + StringLen(qual) + 3);
4612                         sprintf(s, "%s: %s", qual, val);
4613                         CpNoteToCharPtrStack(nsp, NULL, s);
4614                 } else {
4615                         gbqual = AddGBQual(gbqual, qual, val);
4616                 }
4617         }
4618         if (biosp->is_focus == TRUE) {
4619                 gbqual = AddGBQual(gbqual, "focus", NULL);
4620         }
4621         return gbqual;
4622 }       /* AddBioSourceToGBQual */
4623 
4624 /****************************************************************************
4625 *PrintImpFeatEx
4626 *
4627 *       This code prints out an ImpFeat in GenBank and HTML format.
4628 *
4629 ****************************************************************************/
4630 NLM_EXTERN Int2 PrintImpFeatEx (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp, Int4 gi, Int2 entityID, Uint4 itemID)
4631 {
4632         CharPtr flatloc_ptr, key, loc;
4633         GBQualPtr gbqp;
4634         ImpFeatPtr ifp;
4635         Uint1 class_qual, format=ajp->format;
4636         Int2 class_equal, gbqual_index;
4637         static CharPtr buf = NULL;
4638         Uint2 retval;
4639         ValNodePtr seqid;
4640         CharPtr p, q;
4641 
4642         if (sfp == NULL)
4643                 return -1;
4644         if (sfp->data.choice != SEQFEAT_IMP)
4645                 return -1;
4646         ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4647         key = ifp->key;
4648         loc = ifp->loc;
4649 
4650         for (seqid = ajp->id_print; seqid; seqid=seqid->next) {
4651                 if (seqid->choice == SEQID_GI) {
4652                 }
4653         }
4654         if (format == EMBL_FMT || format == PSEUDOEMBL_FMT || 
4655                                                 format == EMBLPEPT_FMT)
4656                 ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4657         else
4658                 ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4659 
4660         if (ajp->slp) {
4661                 ff_AddString(key);
4662         } else {
4663                 www_featkey(key, gi, entityID, itemID);
4664         }
4665         TabToColumn(22);
4666         if (loc == NULL) {
4667                 flatloc_ptr = FlatLoc(bsp, sfp->location);
4668                 if (get_www()) {
4669                         buf = www_featloc(flatloc_ptr);
4670                         ff_AddString(buf);
4671                         MemFree(buf);
4672                 } else {
4673                         ff_AddString(flatloc_ptr);
4674                 }
4675                 MemFree(flatloc_ptr);
4676         } else {
4677                 if (get_www()) {
4678                         buf = www_featloc(loc);
4679                         ff_AddString(buf);
4680                         MemFree(buf);
4681                 } else {
4682                         ff_AddString(loc);
4683                 }
4684         }
4685         if (sfp->partial == TRUE) {
4686                 retval = SeqLocPartialCheck(sfp->location);
4687                 if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4688                         NewContLine();
4689                         ff_AddString("/partial");
4690                 }
4691         }
4692         for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4693                 gbqual_index = GBQualNameValid(gbqp->qual);
4694                 if (gbqual_index != -1) {
4695                         NewContLine();
4696                         ff_AddChar( '/');
4697                         ff_AddString(gbqp->qual);
4698                         class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4699                         if (class_qual == Class_none) {
4700                                 class_equal=CheckForEqualSign(gbqp->qual);
4701                                 if (class_equal == 1)
4702                                         continue;
4703                         }
4704                         ff_AddChar('=');
4705                         if (class_qual == Class_text && 
4706                                 StringCmp(gbqp->val, "\"\"") == 0) { 
4707                                 ff_AddString(gbqp->val);
4708                                 continue;
4709                         }
4710                         if (get_www() && (class_qual == Class_text
4711                                                         || class_qual == Class_note)) {
4712                                 buf = www_featloc(gbqp->val);
4713                         } else {
4714                                 buf = StringSave(gbqp->val);
4715                         }
4716                         if (class_qual == Class_text || class_qual == Class_none
4717                                 || class_qual == Class_ecnum || class_qual == Class_note)
4718                                 ff_AddString("\"");
4719                         if (class_qual == Class_note) {
4720                                 /* start of process tildes */
4721                                 if (StringCmp (gbqp->qual, "note") == 0) {
4722                                         for (p = buf, q = buf; *p != '\0'; *q++ = *p++) {
4723                                                 if (*p != '~')
4724                                                         continue;
4725                                                 if (p [1] != '~')
4726                                                         *p = '\n';
4727                                                 else
4728                                                         p++;
4729                                         }
4730                                         *q = '\0';
4731                                 }
4732                                 /* end of process tildes */
4733                                 www_note_gi(buf);
4734                         } else if (class_qual != Class_none) {
4735                                 if (StringCmp(gbqp->qual, "transl_table") == 0) {
4736                                         www_gcode(buf);
4737                                 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4738                                         www_db_xref(buf);
4739                                 } else if (StringCmp(gbqp->qual, "protein_id") == 0 ||
4740                                         StringCmp(gbqp->qual, "transcript_id") == 0) {
4741                                         www_protein_id(buf);
4742                                 } else {
4743                                         ff_AddString(buf);
4744                                 }
4745                         }
4746                         if (class_qual == Class_text || class_qual == Class_none
4747                                 || class_qual == Class_ecnum || class_qual == Class_note)
4748                                 ff_AddString("\"");
4749                         if (buf) {
4750                                 MemFree(buf);
4751                         }
4752                 } else if (format == GENPEPT_FMT) {
4753                         if (StringCmp(gbqp->qual, "site_type") == 0) {
4754                                 NewContLine();
4755                                 ff_AddChar('/');
4756                                 ff_AddString(gbqp->qual);
4757                                 ff_AddChar('=');
4758                                 ff_AddString("\"");
4759                                 ff_AddString(gbqp->val);
4760                                 ff_AddString("\"");
4761                         } else if (StringCmp(gbqp->qual, "bond_type") == 0) {
4762                                 NewContLine();
4763                                 ff_AddChar('/');
4764                                 ff_AddString(gbqp->qual);
4765                                 ff_AddChar('=');
4766                                 ff_AddString("\"");
4767                                 ff_AddString(gbqp->val);
4768                                 ff_AddString("\"");
4769                         } else if (StringCmp(gbqp->qual, "region_name") == 0) {
4770                                 NewContLine();
4771                                 ff_AddChar('/');
4772                                 ff_AddString(gbqp->qual);
4773                                 ff_AddChar('=');
4774                                 ff_AddString("\"");
4775                                 ff_AddString(gbqp->val);
4776                                 ff_AddString("\"");
4777                         } else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
4778                                 NewContLine();
4779                                 ff_AddChar('/');
4780                                 ff_AddString(gbqp->qual);
4781                                 ff_AddChar('=');
4782                                 ff_AddString("\"");
4783                                 ff_AddString(gbqp->val);
4784                                 ff_AddString("\"");
4785                         } else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
4786                                 NewContLine();
4787                                 ff_AddChar('/');
4788                                 ff_AddString(gbqp->qual);
4789                                 ff_AddChar('=');
4790                                 ff_AddString("\"");
4791                                 ff_AddString(gbqp->val);
4792                                 ff_AddString("\"");
4793                         } else if (StringCmp(gbqp->qual, "heterogen") == 0) {
4794                                 NewContLine();
4795                                 ff_AddChar('/');
4796                                 ff_AddString(gbqp->qual);
4797                                 ff_AddChar('=');
4798                                 ff_AddString("\"");
4799                                 ff_AddString(gbqp->val);
4800                                 ff_AddString("\"");
4801                         } else if (StringCmp(gbqp->qual, "name") == 0) {
4802                                 NewContLine();
4803                                 ff_AddChar('/');
4804                                 ff_AddString(gbqp->qual);
4805                                 ff_AddChar('=');
4806                                 ff_AddString("\"");
4807                                 ff_AddString(gbqp->val);
4808                                 ff_AddString("\"");
4809                         } else if (StringCmp(gbqp->qual, "coded_by") == 0) {
4810                                 NewContLine();
4811                                 ff_AddChar('/');
4812                                 ff_AddString(gbqp->qual);
4813                                 ff_AddChar('=');
4814                                 ff_AddString("\"");
4815                                 ff_AddString(gbqp->val);
4816                                 ff_AddString("\"");
4817                         }
4818                 } else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
4819                         NewContLine();
4820                         ff_AddChar('/');
4821                         ff_AddString(gbqp->qual);
4822                         if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
4823                                 ff_AddChar('=');
4824                                 ff_AddString("\"");
4825                                 ff_AddString(gbqp->val);
4826                                 ff_AddString("\"");
4827                         }
4828                 }
4829         }
4830 
4831         ff_EndPrint();
4832 
4833         return 1;
4834 } /*PrintImpFeatEx */
4835                 
4836 static GBQualPtr extract_qual(GBQualPtr PNTR head, GBQualPtr x)
4837 {
4838         GBQualPtr       v, p;
4839         
4840         if (*head == NULL) {
4841                 return NULL;
4842         }
4843         if (x == *head) {
4844                 *head = x->next;
4845                 x->next = NULL;
4846                 return x;
4847         }
4848         for (v = *head; v != NULL && v != x; v = v->next) {
4849                 p = v;
4850         }
4851         if (v == NULL) {
4852                 return NULL;
4853         }
4854         p->next = x->next;
4855         x->next = NULL;
4856         return x;
4857 }
4858 static GBQualPtr tie_next_qual(GBQualPtr head, GBQualPtr next)
4859 {
4860         GBQualPtr v;
4861 
4862         if (head == NULL) {
4863                 return next;
4864         }
4865         for (v = head; v->next != NULL; v = v->next) {
4866                 v = v;
4867         }
4868         v->next = next;
4869         return head;
4870 }
4871 
4872 /****************************************************************************
4873 *PrintImpFeat
4874 *
4875 *       This code prints out an ImpFeat in GenBank and HTML format.
4876 *
4877 ****************************************************************************/
4878 NLM_EXTERN Int2 PrintImpFeat (Asn2ffJobPtr ajp, BioseqPtr bsp, SeqFeatPtr sfp)
4879 {
4880         CharPtr flatloc_ptr, key, loc;
4881         GBQualPtr gbqp;
4882         ImpFeatPtr ifp;
4883         Uint1 class_qual, format=ajp->format;
4884         Int2 class_equal, gbqual_index;
4885         static CharPtr buf = NULL;
4886         Uint2 retval;
4887         Boolean first=TRUE;
4888         GBQualPtr tmp, gbqpnext, head=NULL;
4889 
4890         if (sfp == NULL)
4891                 return -1;
4892         if (sfp->data.choice != SEQFEAT_IMP)
4893                 return -1;
4894         ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
4895         key = ifp->key;
4896         loc = ifp->loc;
4897 
4898         if (format == EMBL_FMT || format == PSEUDOEMBL_FMT || 
4899                                                 format == EMBLPEPT_FMT)
4900                 ff_StartPrint(5, 21, ASN2FF_EMBL_MAX, "FT");
4901         else
4902                 ff_StartPrint(5, 21, ASN2FF_GB_MAX, NULL);
4903         ff_AddString(key);
4904         TabToColumn(22);
4905         if (loc == NULL) {
4906                 flatloc_ptr = FlatLoc(bsp, sfp->location);
4907                 if (get_www()) {
4908                         buf = www_featloc(flatloc_ptr);
4909                         ff_AddString(buf);
4910                         MemFree(buf);
4911                 } else {
4912                         ff_AddString(flatloc_ptr);
4913                 }
4914                 MemFree(flatloc_ptr);
4915         } else {
4916                 if (get_www()) {
4917                         buf = www_featloc(loc);
4918                         ff_AddString(buf);
4919                         MemFree(buf);
4920                 } else {
4921                         ff_AddString(loc);
4922                 }
4923         }
4924         if (sfp->partial == TRUE) {
4925                 retval = SeqLocPartialCheck(sfp->location);
4926                 if (retval == SLP_COMPLETE || retval > SLP_OTHER) {
4927                         NewContLine();
4928                         ff_AddString("/partial");
4929                 }
4930         }
4931 /* put all /note last */
4932         for (gbqp=sfp->qual; gbqp; gbqp=gbqpnext) {
4933                 gbqpnext=gbqp->next;
4934                 if (StringCmp(gbqp->qual, "note") == 0) {
4935                         tmp = extract_qual(&(sfp->qual), gbqp);
4936                         head = tie_next_qual(head, tmp);
4937                 }
4938         }
4939         if (head) {
4940                 sfp->qual = tie_next_qual(sfp->qual, head);
4941         }
4942         for (gbqp=sfp->qual; gbqp; gbqp=gbqp->next) {
4943                 gbqual_index = GBQualNameValid(gbqp->qual);
4944                 if (gbqual_index != -1) {
4945                         NewContLine();
4946                         if (first) {
4947                                 ff_AddChar( '/');
4948                                 ff_AddString(gbqp->qual);
4949                         }
4950                         class_qual = ParFlat_GBQual_names[gbqual_index].gbclass;
4951                         if (class_qual == Class_none) {
4952                                 class_equal=CheckForEqualSign(gbqp->qual);
4953                                 if (class_equal == 1)
4954                                         continue;
4955                         }
4956                         if (first) {
4957                                 ff_AddChar('=');
4958                         }
4959                         if (class_qual == Class_text && 
4960                                 StringCmp(gbqp->val, "\"\"") == 0) { 
4961                         /* an empty string is considered legal */ 
4962                                 ff_AddString(gbqp->val);
4963                                 continue;
4964                         }
4965                         if (get_www() && (class_qual == Class_text
4966                                                         || class_qual == Class_note)) {
4967                                 buf = www_featloc(gbqp->val);
4968                         } else {
4969                                 buf = StringSave(gbqp->val);
4970                         }
4971                         if (class_qual == Class_text || class_qual == Class_none
4972                                 || class_qual == Class_ecnum)
4973                                 ff_AddString("\"");
4974                         if (first && class_qual == Class_note)
4975                                         ff_AddString("\"");                                     
4976                         if (class_qual == Class_note) {
4977                                 www_note_gi(buf);
4978                         } else if (class_qual != Class_none) {
4979                                 if (StringCmp(gbqp->qual, "transl_table") == 0) {
4980                                         www_gcode(buf);
4981                                 } else if (StringCmp(gbqp->qual, "db_xref") == 0) {
4982                                         www_db_xref(buf);
4983                                 } else {
4984                                         ff_AddString(buf);
4985                                 }
4986                         }
4987                         if (class_qual == Class_text || class_qual == Class_none
4988                                 || class_qual == Class_ecnum)
4989                                 ff_AddString("\"");
4990                         if (gbqp->next == NULL && class_qual == Class_note)
4991                                 ff_AddString("\"");
4992                         if (buf) {
4993                                 MemFree(buf);
4994                         }
4995                         if (class_qual == Class_note) {
4996                                 if (first == TRUE)
4997                                         first = FALSE;
4998                         }
4999                 } else if (format == GENPEPT_FMT) {
5000                         if (StringCmp(gbqp->qual, "site_type") == 0) {
5001                                 NewContLine();
5002                                 ff_AddChar('/');
5003                                 ff_AddString(gbqp->qual);
5004                                 ff_AddChar('=');
5005                                 ff_AddString("\"");
5006                                 ff_AddString(gbqp->val);
5007                                 ff_AddString("\"");
5008                         } else if (StringCmp(gbqp->qual, "bond_type") == 0) {
5009                                 NewContLine();
5010                                 ff_AddChar('/');
5011                                 ff_AddString(gbqp->qual);
5012                                 ff_AddChar('=');
5013                                 ff_AddString("\"");
5014                                 ff_AddString(gbqp->val);
5015                                 ff_AddString("\"");
5016                         } else if (StringCmp(gbqp->qual, "region_name") == 0) {
5017                                 NewContLine();
5018                                 ff_AddChar('/');
5019                                 ff_AddString(gbqp->qual);
5020                                 ff_AddChar('=');
5021                                 ff_AddString("\"");
5022                                 ff_AddString(gbqp->val);
5023                                 ff_AddString("\"");
5024                         } else if (StringCmp(gbqp->qual, "sec_str_type") == 0) {
5025                                 NewContLine();
5026                                 ff_AddChar('/');
5027                                 ff_AddString(gbqp->qual);
5028                                 ff_AddChar('=');
5029                                 ff_AddString("\"");
5030                                 ff_AddString(gbqp->val);
5031                                 ff_AddString("\"");
5032                         } else if (StringCmp(gbqp->qual, "non-std-residue") == 0) {
5033                                 NewContLine();
5034                                 ff_AddChar('/');
5035                                 ff_AddString(gbqp->qual);
5036                                 ff_AddChar('=');
5037                                 ff_AddString("\"");
5038                                 ff_AddString(gbqp->val);
5039                                 ff_AddString("\"");
5040                         } else if (StringCmp(gbqp->qual, "heterogen") == 0) {
5041                                 NewContLine();
5042                                 ff_AddChar('/');
5043                                 ff_AddString(gbqp->qual);
5044                                 ff_AddChar('=');
5045                                 ff_AddString("\"");
5046                                 ff_AddString(gbqp->val);
5047                                 ff_AddString("\"");
5048                         } else if (StringCmp(gbqp->qual, "name") == 0) {
5049                                 NewContLine();
5050                                 ff_AddChar('/');
5051                                 ff_AddString(gbqp->qual);
5052                                 ff_AddChar('=');
5053                                 ff_AddString("\"");
5054                                 ff_AddString(gbqp->val);
5055                                 ff_AddString("\"");
5056                         } else if (StringCmp(gbqp->qual, "coded_by") == 0) {
5057                                 NewContLine();
5058                                 ff_AddChar('/');
5059                                 ff_AddString(gbqp->qual);
5060                                 ff_AddChar('=');
5061                                 ff_AddString("\"");
5062                                 ff_AddString(gbqp->val);
5063                                 ff_AddString("\"");
5064                         }
5065                 } else if (ASN2FF_VALIDATE_FEATURES == FALSE) {
5066                         NewContLine();
5067                         ff_AddChar('/');
5068                         ff_AddString(gbqp->qual);
5069                         if (gbqp->val != NULL && StringLen(gbqp->val) != 0) {
5070                                 ff_AddChar('=');
5071                                 ff_AddString("\"");
5072                                 ff_AddString(gbqp->val);
5073                                 ff_AddString("\"");
5074                         }
5075                 }
5076         }
5077 
5078         ff_EndPrint();
5079 
5080         return 1;
5081 } /*PrintImpFeat */
5082                 
5083 #define NOEQUALTOTAL 13
5084 NLM_EXTERN Int2 CheckForEqualSign(CharPtr qual)
5085                                 /* this have to be changed. Tatiana 02.28.95 */
5086 {
5087         Int2 i;
5088         static CharPtr NoEqualSign[NOEQUALTOTAL] = {
5089         "chloroplast",
5090         "chromoplast",
5091         "cyanelle", 
5092         "germline",
5093         "kinetoplast",
5094         "macronuclear", 
5095         "mitochondrion",
5096         "partial",
5097         "proviral",
5098         "pseudo",
5099         "rearranged",
5100         "virion",
5101         "focus"
5102         };
5103 
5104         if (qual == NULL)
5105                 return -1;
5106 
5107         for (i=0; i < NOEQUALTOTAL; i++)
5108                 if (StringICmp(qual, NoEqualSign[i]) == 0)
5109                         return 1;
5110 
5111         return 0;
5112 
5113 }
5114 
5115 /*-------------------------- delete_qual() ----------------------------*/
5116 /*************************************************************************
5117 *   delete_qual:
5118 *   -- return TRUE if found the "qual" in the "qlist", also remove
5119 *      the "qual" from list
5120 *                                                                7-8-93
5121 **************************************************************************/
5122 NLM_EXTERN Boolean delete_qual(GBQualPtr PNTR qlist, CharPtr qual)
5123 {
5124    GBQualPtr curq, preq;
5125 
5126    for (preq = NULL, curq = *qlist; curq != NULL; curq = curq->next) {
5127        if (StringCmp(curq->qual, qual) == 0) {
5128           if (preq == NULL)
5129              preq = *qlist = curq->next;
5130           else
5131              preq->next = curq->next;
5132 
5133           curq->next = NULL;
5134           GBQualFree(curq);
5135           curq = NULL;
5136 
5137           return (TRUE);
5138        }
5139 
5140        preq = curq;
5141    }
5142 
5143    return (FALSE);
5144 
5145 }
5146 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.