NCBI C Toolkit Cross Reference

C/api/asn2ff4.c


  1 /*   asn2ff4.c
  2 * ===========================================================================
  3 *
  4 *                            PUBLIC DOMAIN NOTICE
  5 *            National Center for Biotechnology Information (NCBI)
  6 *
  7 *  This software/database is a "United States Government Work" under the
  8 *  terms of the United States Copyright Act.  It was written as part of
  9 *  the author's official duties as a United States Government employee and
 10 *  thus cannot be copyrighted.  This software/database is freely available
 11 *  to the public for use. The National Library of Medicine and the U.S.
 12 *  Government do not place any restriction on its use or reproduction.
 13 *  We would, however, appreciate having the NCBI and the author cited in
 14 *  any work or product based on this material
 15 *
 16 *  Although all reasonable efforts have been taken to ensure the accuracy
 17 *  and reliability of the software and data, the NLM and the U.S.
 18 *  Government do not and cannot warrant the performance or results that
 19 *  may be obtained by using this software or data. The NLM and the U.S.
 20 *  Government disclaim all warranties, express or implied, including
 21 *  warranties of performance, merchantability or fitness for any particular
 22 *  purpose.
 23 *
 24 * ===========================================================================
 25 *
 26 * File Name:  asn2ff4.c
 27 *
 28 * Author:  Karl Sirotkin, Tom Madden, Tatiana Tatusov
 29 *
 30 * Version Creation Date:   7/15/95
 31 *
 32 * $Revision: 6.51 $
 33 *
 34 * File Description: 
 35 *
 36 * Modifications:  
 37 * --------------------------------------------------------------------------
 38 * Date     Name        Description of modification
 39 * -------  ----------  -----------------------------------------------------
 40 *
 41 *
 42 * ==========================================================================
 43 */
 44 
 45 
 46 /*************************************
 47 *
 48 =======
 49 * $Log: asn2ff4.c,v $
 50 * Revision 6.51  2001/10/02 16:13:15  yaschenk
 51 * GetSeqIdForGI returns SeqIdDup() - needs freeing
 52 *
 53 * Revision 6.50  2001/09/06 19:15:19  yaschenk
 54 * removing memory leak - AsnIoMemCopy is done twice
 55 *
 56 * Revision 6.49  2001/09/05 23:31:34  tatiana
 57 *  synonym is added to Genestruct with choice 1
 58 *
 59 * Revision 6.48  2001/08/07 16:49:41  kans
 60 * use NUM_SEQID, added third party annotation SeqIDs to one more place
 61 *
 62 * Revision 6.47  2001/08/07 15:51:08  kans
 63 * use NUM_SEQID, added third party annotation seqids
 64 *
 65 * Revision 6.46  2001/07/18 14:50:13  kans
 66 * gather features with gsc.useSeqMgrIndexes if genpept, raw, indexing requested, and IndexedGetDescrForDiv to speed up finding division
 67 *
 68 * Revision 6.45  2001/06/26 20:41:16  kans
 69 * FlatLocPoint as last resort prints gi|#####
 70 *
 71 * Revision 6.44  2001/06/25 19:18:13  kans
 72 * get_feats SEQFEAT_CDREGION finds core without changing scope, if using indexes it indexes the entity if necessary, never goes to old gather code
 73 *
 74 * Revision 6.43  2001/04/12 22:48:52  yaschenk
 75 * removing excessive ObjMgr calls
 76 *
 77 * Revision 6.42  2001/04/05 21:44:51  tatiana
 78 * additional synonym in GeneRefInfoToGsp()
 79 *
 80 * Revision 6.41  2001/02/01 23:06:32  tatiana
 81 * check for NULL added in MatchNAGeneToFeat
 82 *
 83 * Revision 6.40  2001/01/19 17:32:23  yaschenk
 84 * Removed BioseqLockById when only GetSeqIdForGI is needed
 85 *
 86 * Revision 6.39  2000/12/05 22:24:34  tatiana
 87 * bug fixed in FeatMatch
 88 *
 89 * Revision 6.38  2000/11/21 20:52:44  tatiana
 90 * fixes in CreateImpFeatFromProt
 91 *
 92 * Revision 6.37  2000/11/02 01:53:07  tatiana
 93 * static CompXref() added in FeatMatch
 94 *
 95 * Revision 6.36  2000/06/05 17:52:18  tatiana
 96 * increase size of feature arrays to Int4
 97 *
 98 * Revision 6.35  2000/04/13 14:17:32  ostell
 99 * fixed support for lim->tr. FlatLocHalfCaret alwasy assume lim->tl
100 *
101 * Revision 6.34  2000/04/03 23:28:19  tatiana
102 * added showSeqLoc for web feature view
103 *
104 * Revision 6.33  2000/01/21 17:17:52  kans
105 * MatchAAGeneToFeat now calls SeqMgrGetOverlappingGene on CDS first, avoids multiple targeted gathers, just like MatchNAGeneToFeat has done since feature indexing was first implemented
106 *
107 * Revision 6.32  1999/12/22 22:08:19  tatiana
108 * strand check fixed
109 *
110 * Revision 6.31  1999/12/13 19:51:47  tatiana
111 * Seq_strand_unknown added to strand check
112 *
113 * Revision 6.30  1999/12/09 14:33:07  tatiana
114 * check the starnd for mapping gene
115 *
116 * Revision 6.29  1999/11/05 14:55:53  tatiana
117 * check foe embl_feat added the get_feats in Gather
118 *
119 * Revision 6.28  1999/10/07 15:17:20  bazhin
120 * Bug fixed.
121 *
122 * Revision 6.27  1999/10/06 20:21:50  bazhin
123 * Removed memory leak in get_feats() function.
124 *
125 * Revision 6.26  1999/06/04 21:03:52  tatiana
126 * a bug fixed in MatchAAGeneToFeat()
127 *
128 * Revision 6.25  1999/04/29 22:49:20  tatiana
129 * added REFSEQ dbxrefs in GenPept format
130 *
131 * Revision 6.24  1999/03/30 22:23:33  kans
132 * pseudo can be on grp or sfp
133 *
134 * Revision 6.23  1999/03/30 19:47:40  tatiana
135 * use non-strict binding for REFSEQ
136 *
137 * Revision 6.22  1999/03/25 00:26:38  kans
138 * restored first sort in SortOrganizeFeat
139 *
140 * Revision 5.31  1997/06/19 18:37:07  vakatov
141 * [WIN32,MSVC++]  Adopted for the "NCBIOBJ.LIB" DLL'ization
142 *
143 * Revision 5.30  1997/04/25 15:35:39  tatiana
144 * EMBL_PREFNUM added
145 *
146  * Revision 5.29  1997/03/13  17:58:27  tatiana
147  * *** empty log message ***
148  *
149  * Revision 5.28  1997/02/27  16:20:23  kans
150  * check for sfp != NULL in UniqueGeneName
151  *
152  * Revision 5.27  1997/01/27  19:14:39  tatiana
153  * *** empty log message ***
154  *
155  * Revision 5.25  1997/01/13  23:27:10  tatiana
156  * added check for NULL in UniqueGeneNames()
157  *
158  * Revision 5.24  1997/01/13  22:32:48  tatiana
159  * *** empty log message ***
160  *
161  * Revision 5.23  1997/01/13  21:44:50  tatiana
162  * a bug fixed in CreateImpFeatFromProt()
163  *
164  * Revision 5.22  1997/01/06  19:55:22  tatiana
165  * convert site and bond features to dna misc features
166  *
167  * Revision 5.21  1997/01/02  22:50:19  tatiana
168  * *** empty log message ***
169  *
170  * Revision 5.20  1996/12/17  22:49:02  tatiana
171  * StoreFeat() changed to StoreFeatFree for converted peptide feats
172  *
173  * Revision 5.19  1996/10/22  17:48:33  tatiana
174  * check for right-truncated genes added in get_feats callback
175  *
176  * Revision 5.17  1996/09/27  22:07:05  tatiana
177  * no gene binding to repeat_region feature
178  *
179  * Revision 5.16  1996/09/25  18:06:23  tatiana
180  * SEQFEAT_COMMENT is stored in a generic feature list
181  *
182  * Revision 5.15  1996/09/17  14:59:40  tatiana
183  * error msg for identical feats added
184  *
185  * Revision 5.14  1996/09/13  17:01:31  kans
186  * feature field is now excpt, not except, and removed extraneous line
187  *
188  * Revision 5.13  1996/09/13  16:48:54  tatiana
189  * except and exp_ev added in CreateImpFeatFromProt
190  *
191  * Revision 5.11  1996/09/13  16:26:55  kans
192  * get_feats can NULL out sfp, so should test before dereferencing
193  *
194  * Revision 5.10  1996/09/12  17:52:53  tatiana
195  * less peptide fets were missing some qualifiers
196  *
197  * Revision 5.9  1996/09/03  19:52:00  tatiana
198  * extra_loc added in StoreFeat
199  *
200  * Revision 5.8  1996/08/06  20:30:46  kans
201  * SeqIdFindBest called to handle local IDs and genbank IDs coexisting
202  *
203  * Revision 5.7  1996/07/30  19:20:44  tatiana
204  * Don't bind gene to gene in MatchNAToGene()
205  *
206  * Revision 5.6  1996/07/30  16:37:16  tatiana
207  * a bug fixed in UniqueFeat()
208  *
209  * Revision 5.5  1996/07/23  22:34:11  tatiana
210  * prot feats in genpept (piptides)
211  *
212  * Revision 5.4  1996/07/16  15:45:24  tatiana
213  * *** empty log message ***
214  *
215  * Revision 5.3  1996/07/02  18:10:50  tatiana
216  * calculate hash in StoreFeat
217  *
218  * Revision 5.2  1996/06/11  17:05:59  tatiana
219  * *** empty log message ***
220  *
221  * Revision 5.1  1996/06/11  15:44:00  tatiana
222  * Support Prot-ref feature mapping
223  *
224  * Revision 4.12  1996/05/16  20:59:50  tatiana
225  * RemoveRedundantFeats addded
226  *
227  * Revision 4.11  1996/03/25  15:21:24  tatiana
228  * *** empty log message ***
229  *
230  * Revision 4.10  1996/03/08  15:03:19  tatiana
231  * don't bind gene to boisource feature
232  * a bug fixed in FlatLocPoint
233  *
234  * Revision 4.9  1996/03/04  17:11:20  ostell
235  * added support for ignore_top features
236  *
237  * Revision 4.8  1996/02/28  04:53:06  ostell
238  * changes to support segmented master seeuquences
239  *
240  * Revision 4.7  1996/02/15  15:53:43  tatiana
241  * Gather for temp loaded items added
242  *
243  * Revision 4.6  1996/01/29  22:35:36  tatiana
244  * *** empty log message ***
245  *
246  * Revision 4.5  1995/12/20  22:40:55  tatiana
247  * GetDBXrefFromGene() added
248  *
249  * Revision 4.4  1995/11/22  19:14:03  tatiana
250  * a bug fixed for GenPept
251  *
252  * Revision 4.3  1995/11/22  19:01:07  tatiana
253  * a bug fixed in orphan genes printing
254  *
255  * Revision 4.2  1995/11/17  21:28:35  kans
256  * asn2ff now uses gather (Tatiana)
257  *
258  * Revision 4.1  1995/08/01  14:52:03  tatiana
259  * change SeqIdPrint to SeqIdWrite.
260  *
261  * Revision 1.15  1995/07/17  19:33:20  kans
262  * parameters combined into Asn2ffJobPtr structure
263  *
264  * Revision 1.14  1995/06/19  21:40:02  kans
265  * Tatiana's first major reorganization, moving printing, adding HTML
266  *
267  * Revision 1.13  1995/05/15  21:46:05  ostell
268  * added Log line
269  *
270 *
271 **************************************/
272 
273 #include <asn2ffp.h>
274 #include <a2ferrdf.h>
275 #include <gather.h>
276 #include <asn2ff6.h>
277 #include <explore.h>
278 
279 #define CTX_2GB_LOCATION_TROUBLE 0
280 #define CTX_2GB_NOT_IMPLEMENTED 1
281 
282 /******************** Function Prototypes *********************************/
283 
284 NLM_EXTERN CharPtr FlatLocHalf PROTO ((CharPtr buf, Int4 base, IntFuzzPtr fuzz));
285 NLM_EXTERN CharPtr FlatLocHalfCaret PROTO ((CharPtr buf, Int4 base, IntFuzzPtr fuzz));
286 NLM_EXTERN Boolean FlatLocPoint PROTO ((SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr));
287 NLM_EXTERN Boolean FlatLocCaret PROTO ((SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr));
288 NLM_EXTERN Boolean FlatVirtLoc PROTO ((BioseqPtr bsp, ValNodePtr location));
289 NLM_EXTERN Boolean FlatLocElement PROTO ((BioseqPtr bsp, ValNodePtr location, CharPtr buf));
290 NLM_EXTERN CharPtr complement_FlatLoc PROTO ((SeqIdPtr this_sidp, Boolean PNTR is_okPt, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location));
291 NLM_EXTERN CharPtr FlatSmartStringMove PROTO ((CharPtr total_buf, Int4Ptr lengthPt, CharPtr temp, CharPtr string));
292 NLM_EXTERN Boolean FlatNullAhead PROTO ((BioseqPtr bsp, ValNodePtr location));
293 NLM_EXTERN CharPtr FlatPackedPoint PROTO ((CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, PackSeqPntPtr pspp, SeqIdPtr this_sidp));
294 NLM_EXTERN CharPtr do_FlatLoc PROTO ((Boolean PNTR is_okPt, Boolean ok_to_complement, SeqIdPtr this_sidp, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location));
295 NLM_EXTERN CharPtr group_FlatLoc PROTO ((SeqIdPtr this_sidp, Boolean PNTR is_okPt, int which, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location));
296 NLM_EXTERN Boolean is_real_id PROTO ((SeqIdPtr pointIdPtr, SeqIdPtr this_sidp));
297 NLM_EXTERN void Bond PROTO ((SeqBondPtr bondp, SeqIdPtr this_sidp, CharPtr buf));
298 NLM_EXTERN Boolean LookForFuzz PROTO ((SeqLocPtr head));
299 /*************************************************************************/
300 
301 /*--- the number of characters per location element is less
302       than 130.  The maximum would be for a bond with both accessions
303       and both with (n.m) locations.
304 ----*/
305 #define MAX_CHAR_LOCATION 135
306 
307 static CharPtr lim_str [5] = {"", ">","<", ">", "<"};
308 
309 /*----------- FlatLocHalf ()------*/
310 NLM_EXTERN CharPtr FlatLocHalf 
311 (CharPtr buf, Int4 base, IntFuzzPtr fuzz)
312 {
313         char localbuf [30];
314         Uint1 index;
315 
316 /*------
317 typedef struct intfuzz {
318    Uint1 choice;        1=p-m, 2=range, 3=pct, 4=lim 
319    Int4 a, b;           a=p-m,max,pct,orlim, b=min 
320 } IntFuzz, PNTR IntFuzzPtr;
321 
322 Int-fuzz ::= CHOICE {
323     p-m INTEGER ,                    -- plus or minus fixed amount
324     range SEQUENCE {                 -- max to min
325         max INTEGER ,
326         min INTEGER } ,
327     pct INTEGER ,                    -- % plus or minus (x10) 0-1000
328     lim ENUMERATED {                 -- some limit value
329         unk (0) ,                    -- unknown
330         gt (1) ,                     -- greater than
331         lt (2) ,                     -- less than
332         tr (3) ,                     -- space to right of position
333         tl (4) ,                     -- space to left of position
334         other (255) } }              -- something else
335 -------*/
336 
337         localbuf[0] = '\0';
338         buf[0] = '\0';
339 
340         if (fuzz){
341                 /* Fuzz_found = TRUE; */
342                 switch (fuzz -> choice){
343                         case 1:
344                                 sprintf(localbuf,"(%ld.%ld)", (long) (base - fuzz -> a),
345                                         (long) (base + fuzz -> a));
346                                 break;
347                         case 2:
348                                 sprintf(localbuf,"(%ld.%ld)", (long) (1+fuzz -> b),
349                                         (long) (1+fuzz -> a));
350                                 break;
351                         case 3:
352                                 sprintf(localbuf,"(%ld.%ld)", 
353                                         (long) (base - base* ((double) fuzz -> a/1000.0 )),
354                                         (long) (base +base*( (double) fuzz -> a/1000.0 )));
355                                 break;
356                         case 4:
357                                 index = (Uint1) fuzz -> a;
358                                 if (index > 4) index = 0;
359                                 sprintf(localbuf,"%s%ld", lim_str[index], (long) base); 
360                                 break;
361                         default:
362                         sprintf(localbuf,"%ld", (long) base);
363                 }
364         } else {
365                 sprintf(localbuf,"%ld", (long) base);
366         }
367 
368         StringMove(buf, localbuf);
369 
370         return buf;
371 }
372 
373 /*----------- FlatLocHalfCaret ()------*/
374 
375 NLM_EXTERN CharPtr FlatLocHalfCaret
376 (CharPtr buf, Int4 base, IntFuzzPtr fuzz)
377 {
378         char localbuf [30];
379         Uint1 index;
380 
381         localbuf[0] = '\0';
382         buf[0] = '\0';
383 
384         if (fuzz){
385                 /* Fuzz_found = TRUE; */
386                 switch (fuzz -> choice){
387                         case 1:
388                                 sprintf(localbuf,"(%ld.%ld)..(%ld.%ld)", 
389                                         (long) (base - fuzz -> a), (long) base, (long) base,
390                                                                                                         (long) (base + fuzz -> a));
391                                 break;
392                         case 2:
393                                 sprintf(localbuf,"%ld^%ld", (long) (1+fuzz -> b),
394                                         (long) (1+fuzz -> a));
395                                 break;
396                         case 3:
397                                 sprintf(localbuf,"%ld^%ld", 
398                                         (long) (base - base* ((double) fuzz -> a/1000.0 )),
399                                         (long) (base +base*( (double) fuzz -> a/1000.0 )));
400                                 break;
401                         case 4:
402                                 if (fuzz->a == 3) /* space to right */
403                                 {
404                                         sprintf(localbuf, "%ld^%ld", (long)(base), (long)(base+1));
405                                 }
406                                 else if ((fuzz->a == 4) && (base > 1))   /* space to left */
407                                 {
408                                         sprintf(localbuf, "%ld^%ld", (long)(base-1), (long)(base));
409                                 }
410                                 else{
411                                         index = (Uint1) fuzz -> a;
412                                         if (index > 4) index = 0;
413                                         sprintf(localbuf,"%s%ld", 
414                                                 lim_str[index], (long) base); 
415                                 }
416                                 break;
417                         default:
418                                 sprintf(localbuf,"%ld", (long) base);
419                                 break;
420                 }
421         }else{
422                 sprintf(localbuf,"%ld", (long) base);
423         }
424 
425         StringMove(buf, localbuf);
426 
427         return buf;
428 }
429 
430 NLM_EXTERN Boolean FlatLocPoint (SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr)
431 /* FLATLOC_CONTEXT_LOC is removed 08.31.95 */
432 {
433         SeqIdPtr use_id,free_seqid=NULL;
434         Char buf_space[MAX_CHAR_LOCATION +1], halfbuf_space[MAX_CHAR_LOCATION +1];
435         CharPtr buf, halfbuf, temp;
436         static Boolean order_initialized = FALSE;
437         static Uint1 order[NUM_SEQID];
438         ObjectIdPtr ob;
439         
440 if ( ! order_initialized){
441         int dex;
442         for (dex=0; dex < NUM_SEQID; dex ++)
443                 order[dex] = 255;
444         order_initialized = TRUE;
445                 order[SEQID_GENBANK ] = 1;
446                 order[SEQID_EMBL ] = 2;
447                 order[SEQID_DDBJ ] = 3;
448                 order[SEQID_LOCAL ] =4;
449                 order[SEQID_OTHER ] =5;
450                 order[SEQID_TPG ] = 6;
451                 order[SEQID_TPE ] = 7;
452                 order[SEQID_TPD ] = 8;
453                 order[SEQID_GIBBSQ ] =9;
454                 order[SEQID_GIBBMT ] =10;
455                 order[SEQID_PRF ] =11;
456                 order[SEQID_PDB ] =12;
457                 order[SEQID_PIR ] =13;
458                 order[SEQID_SWISSPROT ] =14;
459                 order[SEQID_PATENT ] =15;
460                 order[SEQID_GI ] =16;
461                 order[SEQID_GENERAL ] =17;
462                 order[SEQID_GIIM ] =18;
463 }
464 
465         buf = buf_space;
466         halfbuf = halfbuf_space;
467         piecebuf[0] = '\0';
468         buf[0] = '\0';
469         temp = buf;
470 
471         if (pointIdPtr) {
472                 if ( ! SeqIdIn ( pointIdPtr, this_sidp)){
473                         if (pointIdPtr->choice == SEQID_GI) {
474                                 free_seqid = use_id = GetSeqIdForGI(pointIdPtr->data.intvalue); /** returns SeqIdDup **/
475                         } else {
476                                 use_id = pointIdPtr;
477                         }
478                         
479                         SeqIdWrite( use_id, buf, PRINTID_TEXTID_ACC_VER, MAX_CHAR_LOCATION);
480                         if(*buf == '\0') {
481                                 SeqIdWrite(use_id, buf,PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
482                         }
483                         if (*buf == '\0' && use_id == NULL && pointIdPtr->choice == SEQID_GI) {
484                                 SeqIdWrite (pointIdPtr, buf, PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
485                         }
486                         if (*buf == '\0') {
487                                 StringCpy(buf,"?00000");
488                                 if (use_id && use_id -> choice == SEQID_LOCAL){
489                                         ob = (ObjectIdPtr) use_id -> data.ptrvalue;
490                                         if (ob ->str) {
491                                                 if (*ob -> str) {
492                                                         StringNCpy(buf, ob ->str, 12);
493                                                 }
494                                         }
495                                 }
496                         }
497                         temp = StringMove (temp, buf);
498                         temp = StringMove(temp,":");
499                 }
500         }
501         FlatLocHalf(halfbuf, point+1, pointfuzzPtr);
502         temp = StringMove(temp, halfbuf);
503         StringMove(piecebuf, buf);
504         if(free_seqid) SeqIdFree(free_seqid);
505         return TRUE;
506 }
507 
508 NLM_EXTERN Boolean FlatLocCaret 
509 (SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr)
510 {
511         BioseqPtr bs;
512         Char buf_space[MAX_CHAR_LOCATION +1], halfbuf_space[MAX_CHAR_LOCATION +1];
513         CharPtr buf, halfbuf, temp;
514         SeqIdPtr use_id;
515         static Boolean order_initialized = FALSE;
516         static Uint1 order[NUM_SEQID];
517         
518 if ( ! order_initialized){
519         int dex;
520         for (dex=0; dex < NUM_SEQID; dex ++)
521                 order[dex] = 255;
522         order_initialized = TRUE;
523                 order[SEQID_GENBANK ] = 1;
524                 order[SEQID_EMBL ] = 2;
525                 order[SEQID_DDBJ ] = 3;
526                 order[SEQID_LOCAL ] =4;
527                 order[SEQID_OTHER ] =5;
528                 order[SEQID_TPG ] = 6;
529                 order[SEQID_TPE ] = 7;
530                 order[SEQID_TPD ] = 8;
531                 order[SEQID_GIBBSQ ] =9;
532                 order[SEQID_GIBBMT ] =10;
533                 order[SEQID_PRF ] =11;
534                 order[SEQID_PDB ] =12;
535                 order[SEQID_PIR ] =13;
536                 order[SEQID_SWISSPROT ] =14;
537                 order[SEQID_PATENT ] =15;
538                 order[SEQID_GI ] =16;
539                 order[SEQID_GENERAL ] =17;
540                 order[SEQID_GIIM ] =18;
541 }
542 
543         buf = &(buf_space[0]);
544         halfbuf = &(halfbuf_space[0]);
545         piecebuf[0] = '\0';
546         buf[0] = '\0';
547         temp = buf;
548 
549                 if (pointIdPtr)
550                 if ( ! SeqIdIn ( pointIdPtr, this_sidp)){
551                         use_id = pointIdPtr;
552                         bs = BioseqFind(use_id);
553                         if ( bs ){
554                                 use_id = SeqIdSelect ( bs -> id, order,NUM_SEQID);
555                         }
556                         SeqIdWrite( use_id, buf, PRINTID_TEXTID_ACC_VER, MAX_CHAR_LOCATION);
557                         temp = StringMove (temp, buf);
558                         temp = StringMove(temp,":");
559                 }
560                 FlatLocHalfCaret(halfbuf, point+1, pointfuzzPtr);
561     temp = StringMove(temp, halfbuf);
562                 StringMove(piecebuf, buf);
563 
564         return TRUE;
565 }
566 
567 NLM_EXTERN Boolean FlatVirtLoc(BioseqPtr bsp, ValNodePtr location)
568 {
569         Boolean retval = FALSE;
570         SeqIntPtr sintp;
571         BioseqPtr this_bsp=NULL;
572         SeqIdPtr this_sidp=NULL, sidp = NULL;
573         SeqPntPtr spp;
574 
575         this_bsp = bsp;
576         this_sidp = this_bsp -> id;
577 
578 
579         switch ( location -> choice){
580                 case SEQLOC_MIX:
581                 case SEQLOC_EQUIV:
582                 case SEQLOC_PACKED_INT:
583                 case  SEQLOC_PACKED_PNT:
584                 case  SEQLOC_NULL:
585                 break;
586                 case  SEQLOC_EMPTY:
587                         break;
588                 case  SEQLOC_WHOLE:
589                               sidp = (SeqIdPtr) location -> data.ptrvalue;
590                         if (! sidp){
591                                 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
592                                 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
593                                                         "FlatLocElement: whole location without ID:");
594                 retval = TRUE;
595                 break;
596                         }
597 /*--- no break on purpose ---*/
598                 case  SEQLOC_INT:
599                         if ( location -> choice == SEQLOC_INT){
600                                 sintp = (SeqIntPtr) location -> data.ptrvalue;
601                                 sidp = sintp-> id;
602                         }
603                 
604                         if ( ! is_real_id(sidp, this_sidp)){
605                                 retval = TRUE;
606                         }
607 
608                         break;
609                 case  SEQLOC_PNT:
610                         spp = (SeqPntPtr) ( location -> data.ptrvalue);
611                         if ( ! is_real_id(spp-> id, this_sidp)){
612                                 retval = TRUE;
613                         }
614                         break;
615                 case SEQLOC_BOND:
616                         break;
617                 case SEQLOC_FEAT:
618                 /*if ( !  (Flat_Be_quiet&1))*/
619                 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
620                                 ErrPostEx(SEV_INFO, CTX_NCBI2GB, CTX_2GB_NOT_IMPLEMENTED,
621                                         "FlatVirtLoc:SEQLOC_FEAT not implemented");
622                         break;
623         }
624 
625 
626         return retval;
627 }
628 
629 NLM_EXTERN Boolean FlatLocElement (BioseqPtr bsp, ValNodePtr location, CharPtr buf)
630 {
631         Char localbuf_space[MAX_CHAR_LOCATION +1], piecebuf_space[MAX_CHAR_LOCATION +1];
632         CharPtr localbuf , piecebuf ;
633         CharPtr temp ;
634         SeqIntPtr sintp;
635         SeqPntPtr spp;
636         SeqIdPtr this_sidp=NULL, sidp=NULL;
637         SeqIntPtr whole_intPtr =NULL;
638         Boolean retval=TRUE;
639         Boolean whole_trouble;
640 
641         localbuf = &(localbuf_space[0]);
642         piecebuf = &(piecebuf_space[0]);
643         temp = localbuf;
644 
645         this_sidp = bsp->id;
646         sidp = this_sidp;
647 
648         localbuf[0] = '\0';
649         buf[0] = '\0';
650 
651         switch ( location -> choice){
652                 case SEQLOC_MIX:
653                 case SEQLOC_EQUIV:
654                 case SEQLOC_PACKED_INT:
655                 case  SEQLOC_PACKED_PNT:
656                 case  SEQLOC_NULL:
657                         if (ASN2FF_SHOW_ERROR_MSG == TRUE)
658                                 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
659                                 "Unexpected internal complex type");
660                         retval = FALSE;
661                 break;
662                 case  SEQLOC_EMPTY:
663                         break;
664                 case  SEQLOC_WHOLE:
665                 whole_trouble=TRUE;
666                 sidp = (SeqIdPtr) location -> data.ptrvalue;
667                 if (sidp){
668             bsp = BioseqFind(sidp);
669                         if (bsp) {
670                                 sintp = whole_intPtr = MemNew( sizeof(SeqInt) );
671                                 whole_intPtr -> id = sidp;
672                                 whole_intPtr -> from = 0;
673                                 whole_intPtr -> to = -1;
674                                 if ( bsp -> length > 0)
675                                 {
676                                         whole_intPtr -> to = bsp -> length -1 ;
677                                         whole_trouble=FALSE;
678                                 }
679                         }
680                 }
681                 if (whole_trouble) {
682                         if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
683                                 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
684         "FlatLocElement: whole location without being able to look up limits: %s",
685                                                 sidp?SeqIdWrite (sidp, localbuf, PRINTID_FASTA_LONG, 
686                                                                                                 MAX_CHAR_LOCATION):"No Id");
687                         }
688                         retval = FALSE;
689                         break;
690                 } 
691                 /*
692                 else if (whole_trouble) {
693                                 SeqIdWrite (sidp, localbuf, 
694                                         PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
695                         break;
696                 }
697                 */
698 /*--- no break on purpose ---*/
699                 case  SEQLOC_INT:
700                         if ( location -> choice == SEQLOC_INT){
701                                 sintp = (SeqIntPtr) location -> data.ptrvalue;
702                         }
703                 
704                         if (is_real_id(sintp-> id, this_sidp)){
705                                 if (sintp -> strand == 2) /* minus strand */
706                                         temp = StringMove(temp, "complement("); /* ) vi match */
707                                 FlatLocPoint (sintp->id, this_sidp, piecebuf, 
708                                                 sintp -> from, sintp -> if_from );
709                                 temp = StringMove(temp, piecebuf);
710                                 if ( sintp -> to >0 && (sintp -> to != sintp -> from 
711                                                 || sintp -> if_from ||  sintp -> if_to )){
712                                         temp = StringMove(temp,"..");
713                                         FlatLocPoint(NULL, this_sidp, piecebuf, 
714                                                 sintp -> to, sintp -> if_to);
715                                         temp = StringMove(temp, piecebuf);
716                                 }
717                                 if (sintp -> strand == 2) /* minus strand */
718                                         /* ( vi match */ temp = StringMove(temp, ")");
719                         }else{
720 #ifdef VIRTUALS_NOT_TREATED_AS_NULLS
721                                 StringCpy(localbuf,"No id");
722                                 if (sintp -> id){
723                                         SeqIdWrite (sintp -> id, localbuf, 
724                                                 PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
725                                         if (ASN2FF_SHOW_ERROR_MSG == TRUE)
726                                                 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
727                                         "FlatLocElement: interval without being able to use id: %s", 
728                                                 localbuf);
729                                 }
730                                 retval = FALSE;
731 #endif
732                         }
733 
734                         break;
735                 case  SEQLOC_PNT:
736                         spp = (SeqPntPtr) ( location -> data.ptrvalue);
737                         if (is_real_id(spp-> id, this_sidp)){
738                                 if (spp -> strand == 2) /* minus strand */
739                                         temp = StringMove(temp, "complement("); /* ) vi match */
740                                 if ( spp -> fuzz){
741 /*--------
742  *  points with fuzz treated as if always come from '^': 
743  *  not best, perhaps, but pretty close
744  *-------*/
745                                         FlatLocCaret (spp -> id, this_sidp, piecebuf, 
746                                                         spp -> point, spp -> fuzz );
747                                 } else {
748                                         FlatLocPoint(spp -> id, this_sidp, piecebuf, 
749                                                         spp -> point, spp -> fuzz );
750                                 }
751                                 temp = StringMove(temp, piecebuf);
752                                 if (spp -> strand == 2) /* minus strand */
753                                         /* ( vi match */ temp = StringMove(temp, ")");
754                         } else {
755 #ifdef VIRTUALS_NOT_TREATED_AS_NULLS
756 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
757 "FlatLocElement: point without being able to use id: %s", sidp?SeqIdWrite (sidp, localbuf, PRINTID_FASTA_LONG):"No Id", MAX_CHAR_LOCATION);
758                                 retval = FALSE;
759 #endif
760                         }
761                         break;
762                 case SEQLOC_BOND:
763                 /*
764                 bondp = (SeqBondPtr) location -> data.ptrvalue;
765                 spp = bondp -> a;
766                 FlatLocPoint(spp -> id, this_sidp, piecebuf, 
767                                 spp -> point, spp -> fuzz );
768                 temp = StringMove(temp, piecebuf);
769                 temp = StringMove(temp,",");
770                 spp = bondp -> b;
771                 FlatLocPoint(NULL, this_sidp, piecebuf, spp -> point, spp -> fuzz );
772                 temp = StringMove(temp, piecebuf);
773                 */
774                 Bond((SeqBondPtr)location->data.ptrvalue, this_sidp,  localbuf);
775                         break;
776                 case SEQLOC_FEAT:
777         /*      if ( !  (Flat_Be_quiet&1)) */
778                 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
779                         ErrPostEx(SEV_INFO, CTX_NCBI2GB, CTX_2GB_NOT_IMPLEMENTED,
780                                 "FlatLocElement:SEQLOC_FEAT not implemented");
781                         break;
782         }
783 
784         if (whole_intPtr)
785                 MemFree(whole_intPtr);  /* NOT object free ! ! ! */
786         
787         if (! retval){
788                 *buf = '\0';
789         }else{
790                 StringMove(buf, localbuf);
791         }
792 
793         return retval;
794 }
795 
796 /****************************************************************************
797 *Bond
798 *
799 *       This function takes a SeqBondPtr and a CharPtr, in buf, and returns a
800 *       string, in buf.  If both ends of the bond exist, the output
801 *       is bond(a, b); if only one end exists, the output is bond(a).
802 *
803 *       Tom Madden
804 *
805 **************************************************************************/
806 
807 NLM_EXTERN void Bond(SeqBondPtr bondp, SeqIdPtr this_sidp, CharPtr buf)
808 
809 {
810         Char piecebuf1[MAX_CHAR_LOCATION+1];
811         Char piecebuf2[MAX_CHAR_LOCATION+1];
812         SeqPntPtr spp;
813 
814         spp = bondp -> a;
815         FlatLocPoint(spp -> id, this_sidp, &(piecebuf1[0]), 
816                 spp -> point, spp -> fuzz );
817         if (bondp->b)
818         {
819                 spp = bondp -> b;
820                 FlatLocPoint(NULL, this_sidp, &(piecebuf2[0]), 
821                         spp -> point, spp -> fuzz );
822                 sprintf(buf, "bond(%s,%s)", piecebuf1, piecebuf2);
823         }
824         else
825         {
826                 sprintf(buf, "bond(%s)", piecebuf1);
827 
828         }
829 }
830 
831 /*----------- FlatLoc  ()------*/
832 
833 NLM_EXTERN CharPtr FlatLoc (BioseqPtr bsp, ValNodePtr location)
834 {
835         CharPtr retval = NULL;
836         Int4 max_length, len_used;
837         Boolean is_ok = TRUE;
838         SeqIdPtr this_sidp;
839 
840         if (location){
841                 this_sidp = bsp -> id;
842                 max_length = 0;
843                 do_FlatLoc(& is_ok, TRUE, this_sidp, NULL, NULL, 
844                         & max_length, bsp, location);
845                 if (is_ok){
846                         retval = (CharPtr) MemNew((size_t) (max_length + 10));
847                         len_used = 0;
848                         do_FlatLoc(&is_ok, TRUE, this_sidp,  retval, retval, 
849                                         &len_used, bsp, location);
850                 }
851         }
852 
853         return retval;
854 }
855 
856 
857 
858 /*-------------complement_FlatLoc ()---------------*/
859 
860 NLM_EXTERN CharPtr
861 complement_FlatLoc (SeqIdPtr this_sidp, Boolean PNTR is_okPt, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location)
862 {
863         SeqLocRevCmp(location);
864 
865         temp = FlatSmartStringMove(total_buf, lengthPt, temp,"complement(");
866         temp = do_FlatLoc (is_okPt, FALSE, this_sidp, total_buf, 
867                 temp, lengthPt, bsp, location);
868         temp = FlatSmartStringMove(total_buf, lengthPt, temp,")");
869 
870         SeqLocRevCmp(location);
871 
872         return temp;
873 }
874 /*-------- FlatSmartStringMove()-------*/
875 
876 NLM_EXTERN CharPtr 
877 FlatSmartStringMove(CharPtr total_buf, Int4Ptr lengthPt, CharPtr temp, CharPtr string)
878 {
879         CharPtr retval = temp;
880 
881         if ( lengthPt){
882                 (*lengthPt) += StringLen(string);
883         }
884         if (total_buf){
885                 retval = StringMove(temp,string);
886         }
887 
888         return retval;
889 }
890 #define FLAT_EQUIV 0
891 #define FLAT_JOIN 1
892 #define FLAT_ORDER 2
893 static CharPtr group_names []= {
894 "one-of","join","order"};
895 
896 /*--------- FlatNullAhead()---------*/
897 
898 NLM_EXTERN Boolean
899 FlatNullAhead(BioseqPtr bsp, ValNodePtr location)
900 {
901         Boolean retval = FALSE;
902         SeqLocPtr next;
903         if (location){
904                 next = location -> next;
905                 if (next){
906                         if ( next -> choice == SEQLOC_NULL)
907                                 retval = TRUE;
908                         if (FlatVirtLoc(bsp, next))
909                                 retval = TRUE;
910                 }else{
911                         retval = TRUE;  /* last one always true */
912                 }
913         }
914 
915         return retval;
916 }
917 
918                                                                 
919 /*---------- FlatPackedPoint() -----------*/
920 
921 NLM_EXTERN CharPtr
922 FlatPackedPoint (CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, PackSeqPntPtr pspp, SeqIdPtr this_sidp)
923 {
924         Char buf_space[MAX_CHAR_LOCATION +1];
925         CharPtr buf;
926         int dex;
927 
928         buf = &(buf_space[0]);
929         for (dex=0; dex < (int) pspp -> used; dex ++){
930                 FlatLocPoint(pspp -> id, this_sidp, buf, (pspp->pnts)[dex], pspp->fuzz);
931                 temp = FlatSmartStringMove(total_buf, lengthPt, temp,buf);
932         }
933 
934         return temp;
935 }
936 /*--------- group_FlatLoc ()-----------*/
937 
938 NLM_EXTERN CharPtr group_FlatLoc (SeqIdPtr this_sidp, Boolean PNTR is_okPt, int which, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location)
939 {
940         Char buf_space[MAX_CHAR_LOCATION +1];
941         CharPtr buf;
942         SeqLocPtr this_loc;
943         Boolean special_mode = FALSE; /* join in order */
944         int parens = 1;
945         Boolean found_non_virt = FALSE;
946 
947         buf = &(buf_space[0]);
948         
949          temp = FlatSmartStringMove(total_buf, lengthPt,
950                 temp,group_names[which]);
951          temp = FlatSmartStringMove(total_buf, lengthPt,
952                 temp,"(");
953         
954         for (this_loc = (SeqLocPtr) location -> data.ptrvalue;
955                         this_loc && *is_okPt; this_loc = this_loc -> next){
956                 if ( FlatVirtLoc(bsp, this_loc)) {
957                         if ( this_loc != location && this_loc -> next){
958                                 if (special_mode ){
959                                         special_mode = FALSE;
960                                         temp = FlatSmartStringMove(total_buf, lengthPt,
961                                                 temp,")");
962                                         parens --;
963                                 }
964                         }
965                         continue;
966                 }
967                 if ( found_non_virt &&
968                                 this_loc -> choice !=  SEQLOC_EMPTY &&
969                                 this_loc -> choice !=  SEQLOC_NULL ){
970                         temp = FlatSmartStringMove(total_buf, lengthPt, temp,",");
971                 }
972                 switch ( this_loc -> choice ){
973                 case SEQLOC_NULL : 
974                         if ( this_loc != location && this_loc -> next){
975                                 if (special_mode ){
976                                         special_mode = FALSE;
977                                         temp = FlatSmartStringMove(total_buf, lengthPt, temp,")");
978                                         parens --;
979                                 }
980                         }
981                         break;
982                 case SEQLOC_EMPTY :
983                         break;
984                 case SEQLOC_WHOLE : 
985                 case SEQLOC_PNT : 
986                 case SEQLOC_BOND :
987                 case SEQLOC_FEAT :
988                  found_non_virt = TRUE;
989                         if (FlatVirtLoc(bsp, this_loc)){
990                                 if ( this_loc != location && this_loc -> next){
991                                         if (special_mode ){
992                                                 special_mode = FALSE;
993                                                 temp = FlatSmartStringMove(total_buf, lengthPt,
994                                                         temp,"),");
995                                                 parens --;
996                                         }
997                                 }
998                         }else{
999                                 if( FlatLocElement(bsp, this_loc, buf)){
1000                                         temp = FlatSmartStringMove(total_buf, lengthPt, 
1001                                                 temp,buf);
1002                                 }else{
1003                                         temp = NULL;
1004                                         * is_okPt = FALSE;
1005                                 }
1006                         }
1007                         break;
1008                 case SEQLOC_INT :
1009                  found_non_virt = TRUE;
1010                         if ( which == FLAT_ORDER 
1011                                         && ! FlatNullAhead(bsp, this_loc)){
1012                                 special_mode = TRUE;
1013                                  temp = FlatSmartStringMove(total_buf, lengthPt,
1014                                         temp,group_names[FLAT_JOIN]);
1015                                  temp = FlatSmartStringMove(total_buf, lengthPt,
1016                                         temp,"(");
1017                                 parens ++;
1018                         }
1019                 
1020                         if( FlatLocElement(bsp, this_loc, buf)){
1021                                 temp = FlatSmartStringMove(total_buf, lengthPt, 
1022                                         temp,buf);
1023                         }else{
1024                                 temp = NULL;
1025                                 * is_okPt = FALSE;
1026                         }
1027                         break;
1028 
1029                 case SEQLOC_PACKED_PNT :
1030                  found_non_virt = TRUE;
1031                                 temp = FlatPackedPoint(total_buf, temp, lengthPt,
1032                                         (PackSeqPntPtr)this_loc->data.ptrvalue, this_sidp);
1033                         break;
1034                 case SEQLOC_PACKED_INT :
1035                 case SEQLOC_MIX :
1036                 case SEQLOC_EQUIV :
1037                 {
1038                         ValNodePtr hold_next = this_loc -> next;
1039                          found_non_virt = TRUE;
1040                         this_loc -> next = NULL;
1041                         temp = do_FlatLoc(is_okPt, FALSE, this_sidp, total_buf, temp, lengthPt, bsp, this_loc);
1042                         this_loc-> next = hold_next;
1043                 }
1044                         break;
1045                 }
1046         }
1047 
1048         if (* is_okPt){
1049                 while ( parens > 0){
1050                  temp = FlatSmartStringMove(total_buf, lengthPt,
1051                         temp,")");
1052                         parens --;
1053                 }
1054         }
1055 
1056         return temp;
1057 }
1058 
1059 NLM_EXTERN CharPtr 
1060 do_FlatLoc (Boolean PNTR is_okPt, Boolean ok_to_complement, SeqIdPtr this_sidp, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location)
1061 {
1062         Char buf_space[MAX_CHAR_LOCATION +1];
1063         CharPtr buf;
1064         SeqLocPtr slp;
1065         SeqLocPtr next_loc = NULL;
1066 
1067         buf = &(buf_space[0]);
1068 
1069         if (location == NULL) {
1070                 return NULL;
1071         }
1072         if ( ok_to_complement && 
1073                         SeqLocStrand(location) == Seq_strand_minus ){
1074                 temp = complement_FlatLoc (this_sidp, is_okPt, total_buf, 
1075                         temp, lengthPt, bsp, location);
1076                 return temp;
1077         }
1078         for ( slp = location; slp && *is_okPt; slp = slp -> next){
1079                 if ( slp -> choice == SEQLOC_NULL ||
1080                                 FlatVirtLoc(bsp, slp))
1081                         continue;
1082                 if ( slp != location){
1083                          temp = FlatSmartStringMove(total_buf, lengthPt,
1084                                 temp,",");
1085                 }
1086                 switch( slp -> choice){
1087                         case  SEQLOC_NULL:
1088                                 break;
1089                         case SEQLOC_MIX:
1090                         case SEQLOC_PACKED_INT:
1091                                 {
1092                                         Boolean found_null = FALSE;
1093                                         for (next_loc = (SeqLocPtr) slp -> 
1094                                                         data.ptrvalue; next_loc;
1095                                                         next_loc = next_loc -> next){
1096                                                 if ( next_loc -> choice == SEQLOC_NULL
1097                                                                 || FlatVirtLoc( bsp, next_loc)){
1098                                                         found_null = TRUE;
1099                                                         temp=group_FlatLoc(this_sidp, is_okPt,
1100                                                                 FLAT_ORDER, total_buf, temp, lengthPt, 
1101                                                                 bsp, slp);
1102                                                         break;
1103                                                 }
1104                                         }
1105                                         if ( ! found_null){
1106                                                 temp=group_FlatLoc(this_sidp, is_okPt,
1107                                                         FLAT_JOIN, total_buf, temp, lengthPt,
1108                                                         bsp, slp);
1109                                         }
1110                                 }
1111                                 break;
1112                         case SEQLOC_EQUIV:
1113                                 temp=group_FlatLoc(this_sidp, is_okPt,
1114                                         FLAT_EQUIV, total_buf, temp,
1115                                         lengthPt, bsp, slp);
1116                                 break;
1117                         case  SEQLOC_PACKED_PNT:
1118                         temp = FlatPackedPoint(total_buf, temp, lengthPt,
1119                                 (PackSeqPntPtr)slp->data.ptrvalue, this_sidp);
1120                         break;
1121                         default:
1122                         if ( ! FlatVirtLoc( bsp, slp))
1123                         if( FlatLocElement(bsp, slp, buf)){
1124                                 temp = FlatSmartStringMove(total_buf, lengthPt, 
1125                                         temp,buf);
1126                         }else{
1127                                 temp = NULL;
1128                                 *is_okPt = FALSE;
1129                         }
1130                 }
1131         }
1132         return temp;
1133 }
1134 
1135 NLM_EXTERN Boolean is_real_id(SeqIdPtr pointIdPtr, SeqIdPtr this_sidp)
1136 {
1137         Boolean retval = TRUE;
1138         BioseqPtr bs;
1139         SeqIdPtr use_id;
1140 
1141                 if ( ! SeqIdIn ( pointIdPtr, this_sidp)){
1142                         use_id = pointIdPtr;
1143                         bs = BioseqFind(use_id);
1144                         if ( bs ){
1145                         if (bs -> repr == Seq_repr_virtual)
1146                                 retval = FALSE;
1147                         }
1148                 }
1149         return retval;
1150 }
1151 
1152 NLM_EXTERN Boolean FlatAnnotPartial (SeqFeatPtr sfp, Boolean use_product)
1153 {
1154         Boolean fuzz=FALSE, retval = TRUE;
1155         CharPtr str;
1156         ImpFeatPtr imp;
1157 
1158         if ( sfp -> data.choice == 8) /*  Imp-feat    */
1159         {
1160                 imp = (ImpFeatPtr) (sfp -> data.value.ptrvalue);
1161                 if (imp ->loc != NULL)
1162                 for( str = imp -> loc; *str; str ++){
1163                         if ( *str == '<' || *str == '>'){
1164                                 retval = FALSE;
1165                                 break;
1166                         }else if (*str == 'r' && *(str +1) == 'e'){
1167                                 if (StringNCmp ("replace",str, (size_t) 7) == 0){
1168                                         retval = FALSE;
1169                                         break;
1170                                 }
1171                         }
1172                 }
1173                 if (retval == TRUE) /* Look for fuzz in the ASN.1 location */
1174                 {
1175                         if (use_product)
1176                                 fuzz = LookForFuzz (sfp->product);
1177                         else
1178                                 fuzz = LookForFuzz (sfp->location);
1179                         if (fuzz == TRUE)
1180                                 retval = FALSE;
1181                 }
1182         }
1183 
1184         return retval;
1185 }
1186 
1187 /************************************************************************
1188 *Boolean LookForFuzz (SeqLocPtr slp)
1189 *
1190 *       Look for fuzz in the location.  For use in FlatAnnotPartial.
1191 ************************************************************************/
1192 
1193 NLM_EXTERN Boolean LookForFuzz (SeqLocPtr head)
1194 {
1195         Boolean retval=FALSE;
1196         IntFuzzPtr ifp;
1197         PackSeqPntPtr pspp;
1198         SeqIntPtr sip;
1199         SeqLocPtr slp;
1200         SeqPntPtr spp;
1201 
1202         if (head == NULL)
1203                 return retval;
1204 
1205         slp=NULL;
1206         while ((slp = SeqLocFindNext(head, slp)) != NULL)
1207         {
1208                 switch (slp->choice)
1209                 {
1210                         case SEQLOC_INT:
1211                                 sip = (SeqIntPtr)(slp->data.ptrvalue);
1212                                 ifp = sip->if_from;
1213                                 if (ifp != NULL)
1214                                 {
1215                                         if (ifp->choice == 4)
1216                                         {
1217                                                 if (ifp->a != 0)
1218                                                         retval=TRUE;
1219                                         }
1220                                         else
1221                                                 retval = TRUE;  
1222                                 }
1223                                 ifp = sip->if_to;
1224                                 if (ifp != NULL)
1225                                 {
1226                                         if (ifp->choice == 4)
1227                                         {
1228                                                 if (ifp->a != 0)
1229                                                         retval=TRUE;
1230                                         }
1231                                         else
1232                                                 retval = TRUE;  
1233                                 }
1234                                 break;
1235                         case SEQLOC_PNT:
1236                                 spp = (SeqPntPtr)(slp->data.ptrvalue);
1237                                 ifp = spp->fuzz;
1238                                 if (ifp != NULL)
1239                                 {
1240                                         if (ifp->choice == 4)
1241                                         {
1242                                                 if (ifp->a != 0)
1243                                                         retval=TRUE;
1244                                         }
1245                                         else
1246                                                 retval = TRUE;  
1247                                 }
1248                                 break;
1249                         case SEQLOC_PACKED_PNT:
1250                                 pspp = (PackSeqPntPtr)(slp->data.ptrvalue);
1251                                 ifp = pspp->fuzz;
1252                                 if (ifp != NULL)
1253                                 {
1254                                         if (ifp->choice == 4)
1255                                         {
1256                                                 if (ifp->a != 0)
1257                                                         retval=TRUE;
1258                                         }
1259                                         else
1260                                                 retval = TRUE;  
1261                                 }
1262                                 break;
1263                         default:
1264                                 break;
1265                 }
1266                 if (retval == TRUE)
1267                         break;
1268         }
1269         return retval;
1270 } /* LookForFuzz */
1271 
1272 
1273 
1274 /*****************************************************************************
1275 *                                                                                       modified by Tatiana 12.08.97
1276 *       collecting and soritng the features features 
1277 *
1278 ******************************************************************************/
1279 static OrganizeFeatPtr CreateOrganizeFeat(void)
1280 {
1281         OrganizeFeatPtr ofp;
1282         
1283                 ofp = (OrganizeFeatPtr) MemNew(sizeof(OrganizeFeat));
1284                 MemSet ((Pointer) ofp, 0, sizeof (OrganizeFeat));
1285                 
1286                 return ofp;
1287 }
1288 
1289 static void UniqueGeneName(Boolean error_msgs, OrganizeFeatPtr ofp)
1290 {
1291         SortStructPtr   newp, p;
1292         SeqFeatPtr              sfp;
1293         GeneRefPtr              grp;
1294         CharPtr                 gene;
1295         CharPtr                 s, ss;
1296         SeqLocPtr               slp;
1297         Int4                    index, size;
1298         
1299         if (ofp == NULL)
1300                 return;
1301         if ((size = ofp->sfpGenesize) == 0)
1302                 return;
1303         newp = (SortStructPtr) MemNew((size)*sizeof(SortStruct));
1304         newp = MemCopy(newp, ofp->Genelist, (size * sizeof(SortStruct)));
1305         HeapSort((VoidPtr) (newp), 
1306                         (size_t) (size), sizeof(SortStruct), CompareGeneName);
1307         p = newp;
1308         if ((sfp = p->sfp) == NULL) {
1309                 MemFree (newp);
1310                 return;
1311         }
1312         slp = sfp->location;
1313         grp = (GeneRefPtr) sfp->data.value.ptrvalue;
1314         gene = grp->locus;
1315         for (index = 1, p++; index < size; index++, p++) {
1316                 sfp = p->sfp;
1317                 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
1318                 if (gene == NULL) {
1319                         gene = grp->locus;
1320                         slp = sfp->location;
1321                         continue;
1322                 }
1323                 if (error_msgs) {
1324                         if (grp->locus == NULL) {
1325                                 ErrPostEx(SEV_WARNING,ERR_FEATURE_NULLGeneLocus, "No gene locus in %s:", SeqLocPrint(sfp->location));
1326                                 continue;
1327                         }
1328                         if (StringCmp(gene, grp->locus) == 0) {
1329                                 s = SeqLocPrint(slp);
1330                                 ss = SeqLocPrint(sfp->location);
1331                                 ErrPostEx(SEV_WARNING, ERR_FEATURE_IdenticalGeneName, "Identical gene locus name [%s] in %s and %s", gene, s,  ss);
1332                                 MemFree(s);
1333                                 MemFree(ss);
1334                         }
1335                 }
1336                 gene = grp->locus;
1337                 slp = sfp->location;
1338         }
1339         MemFree (newp);
1340         return;
1341 }
1342 
1343 /*****************************************************************************
1344 *       Compare two ImpFeats by name and location
1345 *       returns 1 for matching features otherwise returns 0
1346 ******************************************************************************/
1347 static CmpImpFeat (ImpFeatPtr f1, ImpFeatPtr f2)
1348 {
1349     if (f1 == NULL && f2)
1350                 return 0;
1351 
1352     if (f2 == NULL && f1)
1353                 return 0;
1354                 
1355     if (StringCmp(f1->key, f2->key) != 0)
1356                 return 0;
1357 
1358     if (StringCmp(f1->loc, f2->loc) != 0)
1359                 return 0;
1360 
1361     return 1;
1362 }
1363 
1364 static Int2 CompXref (ValNodePtr x1, ValNodePtr x2)
1365 {
1366         DbtagPtr db1 = NULL, db2;
1367         CharPtr s1=NULL, s2=NULL;
1368         
1369     if (x1 == NULL && x2)
1370                 return 0;
1371     if (x2 == NULL && x1)
1372                 return 0;
1373         db1 = x1->data.ptrvalue;
1374         db2 = x2->data.ptrvalue;
1375         if (StringCmp(db1->db, db2->db) != 0) {
1376                 return 0;
1377         }
1378         if (db1->tag && db1->tag->str) {
1379                 s1 = db1->tag->str;
1380         }
1381         if (db2->tag && db2->tag->str) {
1382                 s2 = db2->tag->str;
1383         }
1384     if (s1 == NULL && s2)
1385                 return 0;
1386     if (s2 == NULL && s1)
1387                 return 0;
1388         if (s1 && s2) {
1389                 if (StringCmp(s1, s2) == 0) {
1390                         return 1;
1391                 } else {
1392                         return 0;
1393                 }
1394         } else {
1395                 if (db1->tag->id == db2->tag->id) {
1396                         return 1;
1397                 }
1398         }
1399         return 0;
1400 }
1401 /*****************************************************************************
1402 *       compare features by location and choice 
1403 *
1404 ******************************************************************************/
1405 static Int2 FeatMatch (SeqFeatPtr f1, SeqFeatPtr f2)
1406 {
1407         Int2 retval = 0;
1408         
1409     if (f1 == NULL && f2)
1410                 return 0;
1411     if (f2 == NULL && f1)
1412                 return 0;
1413         if ((SeqLocCompare(f1->location, f2->location)) != SLC_A_EQ_B)
1414                 return 0;
1415         if (f1->data.choice != f2->data.choice)
1416                 return 0;
1417         switch (f1->data.choice)
1418         {
1419                 case SEQFEAT_IMP:
1420                         retval = 
1421                         CmpImpFeat(f1->data.value.ptrvalue, f2->data.value.ptrvalue);
1422                         break;
1423                 case SEQFEAT_REGION:
1424                         if (f1->dbxref != NULL || f2->dbxref != NULL) { 
1425                                 retval = CompXref(f1->dbxref, f2->dbxref);
1426                         }
1427                         break;
1428                 default:
1429                         break;
1430         }
1431         return retval;
1432 }
1433 
1434 /*****************************************************************************
1435 *       mark (with boolean 'dup') identical features in the List
1436 *
1437 ******************************************************************************/
1438 static void UniqueFeat(SortStructPtr List, Int4 size)
1439 {
1440         Int4                    i, j, jj, ii;
1441         Int4                    start;
1442         SortStructPtr   p, pp;
1443         
1444         for (i = 0, p = List; i < size; i = ii) {
1445                 ii = i + 1;
1446                 if (p->sfp == NULL) {
1447                         continue;
1448                 }
1449                 start = SeqLocStart(p->sfp->location);
1450                 for (pp = p+1; ii < size; ii++, pp++) {
1451                         if (pp->sfp == NULL) {
1452                                 continue;
1453                         }
1454                         if (start != SeqLocStart(pp->sfp->location)) {
1455                                 break;
1456                         }
1457                 }
1458                 for (j = i; j < ii; j++, p++) {
1459                         for (jj = j+1, pp = p+1; jj < ii; jj++, pp++) {
1460                                 if (p->hash == pp->hash) {
1461                                         if (FeatMatch(p->sfp, pp->sfp) == 1) {
1462                                                 pp->dup = TRUE;
1463                                         }
1464                                 }
1465                                 
1466                         }
1467                 }
1468         }
1469         return;
1470 }
1471 
1472 /*****************************************************************************
1473 *       collect landmarks from map (Gather is not used) 
1474 *
1475 ******************************************************************************/
1476 NLM_EXTERN Int2 GetMapFeats(Asn2ffJobPtr ajp, GBEntryPtr gbp)
1477 {
1478         BioseqPtr               bsp;
1479         SeqFeatPtr              sfp;
1480         OrganizeFeatPtr ofp;
1481 
1482                 if (gbp == NULL || gbp->bsp == NULL) {
1483                         return 0;
1484                 }
1485                 bsp = gbp->bsp;
1486                 BioseqLock(bsp);
1487                 ofp = CreateOrganizeFeat();
1488                 ofp->bsp = bsp;
1489                 ofp->useSeqMgrIndexes = ajp->useSeqMgrIndexes;
1490                 
1491                 if (bsp->seq_ext_type == 3) {   /* map-ext */
1492                         for (sfp = (SeqFeatPtr) bsp->seq_ext; sfp; sfp=sfp->next) {
1493                                 switch (sfp->data.choice) {
1494                                         case SEQFEAT_GENE: 
1495                                         ofp->List = EnlargeSortList(ofp->List,
1496                                                                                                          ofp->sfpListsize);
1497                                                 ofp->sfpListsize = StoreFeat(ofp->List, sfp, 
1498                                                 ofp->sfpListsize, bsp, NULL, 0, 0, 0, NULL, NULL, 0);
1499                                         break;
1500                                         default:
1501                                         ofp->List = EnlargeSortList(ofp->List, 
1502                                                                                                         ofp->sfpListsize);
1503                                                 ofp->sfpListsize = StoreFeat(ofp->List, sfp, 
1504                                                 ofp->sfpListsize, bsp, NULL, 0, 0, 0, NULL, NULL, 0);
1505                                         break;
1506                                 }
1507                         }
1508                         
1509                 }
1510                 SortOrganizeFeat(ofp);
1511                 gbp->feat = ofp;
1512                 if (ofp == NULL) {
1513                         return 0;
1514                 }
1515                 return (gbp->feat->sfpListsize);
1516 }
1517 
1518 /*****************************************************************************
1519 *       function and activity for proteins
1520 *
1521 ******************************************************************************/
1522 static SeqFeatPtr AddProtRefInfo(SeqFeatPtr sfp, ProtRefPtr prot)
1523 {
1524         ValNodePtr vnp;
1525         
1526         for (vnp=prot->name; vnp; vnp=vnp->next) {
1527                 if (GBQualPresent("product", sfp->qual) == FALSE) {
1528                         sfp->qual = AddGBQual(sfp->qual, "product", vnp->data.ptrvalue);
1529                 } else {
1530                         sfp->qual = AddGBQual(sfp->qual, "note", vnp->data.ptrvalue);
1531                 }
1532         }
1533         if (prot->desc) {
1534                 sfp->qual = AddGBQual(sfp->qual, "note", prot->desc);
1535         }
1536         for (vnp=prot->ec; vnp; vnp=vnp->next) {
1537                 sfp->qual=AddGBQual(sfp->qual, "EC_number", vnp->data.ptrvalue);
1538         }
1539         for (vnp=prot->activity; vnp; vnp=vnp->next) {
1540                 sfp->qual=AddGBQual(sfp->qual, "function", vnp->data.ptrvalue);
1541         }
1542         
1543         return sfp;
1544 }
1545 
1546 /*****************************************************************************
1547 *       add site description notes
1548 *
1549 ******************************************************************************/
1550 NLM_EXTERN void AddSiteNoteQual(SeqFeatPtr sfp_in, SeqFeatPtr sfp)
1551 {
1552         if (sfp_in == NULL) {
1553                 return;
1554         }
1555         if (sfp_in->data.choice != SEQFEAT_SITE) {
1556                 return;
1557         }
1558         switch (sfp_in->data.value.intvalue) {
1559                 case 1:
1560                         sfp->qual = AddGBQual(sfp->qual, "note", "active site");
1561                         break;
1562                 case 2:
1563                         sfp->qual = AddGBQual(sfp->qual, "note", "binding site");
1564                         break;
1565                 case 3:
1566                         sfp->qual = AddGBQual(sfp->qual, "note", "cleavage site");
1567                         break;
1568                 case 4:
1569                         sfp->qual = AddGBQual(sfp->qual, "note", "inhibit site");
1570                         break;
1571                 case 5:
1572                         sfp->qual = AddGBQual(sfp->qual, "note", "modified site");
1573                         break;
1574                 case 6:
1575                         sfp->qual = AddGBQual(sfp->qual, "note", "glycosylation site");
1576                         break;
1577                 case 7:
1578                         sfp->qual = AddGBQual(sfp->qual, "note", "myristoylation site");
1579                         break;
1580                 case 8:
1581                         sfp->qual = AddGBQual(sfp->qual, "note", "mutagenized site");
1582                         break;
1583                 case 9:
1584                         sfp->qual = AddGBQual(sfp->qual, "note", "metal-binding site");
1585                         break;
1586                 case 10:
1587                         sfp->qual = AddGBQual(sfp->qual, "note", "phosphorylation site");
1588                         break;
1589                 case 11:
1590                         sfp->qual = AddGBQual(sfp->qual, "note", "acetylation site");
1591                         break;
1592                 case 12:
1593                         sfp->qual = AddGBQual(sfp->qual, "note", "amidation site");
1594                         break;
1595                 case 13:
1596                         sfp->qual = AddGBQual(sfp->qual, "note", "methylation site");
1597                         break;
1598                 case 14:
1599                         sfp->qual = AddGBQual(sfp->qual, "note", "hydroxylation site");
1600                         break;
1601                 case 15:
1602                                 sfp->qual = AddGBQual(sfp->qual, "note", "sulfatation site");
1603                         break;
1604                 case 16:
1605                         sfp->qual = AddGBQual(sfp->qual, "note", 
1606                         "oxidative-deamination site");
1607                         break;
1608                 case 17:
1609                         sfp->qual = AddGBQual(sfp->qual, "note",        
1610                         "pyrrolidone-carboxylic-acid site");
1611                         break;
1612                 case 18:
1613                         sfp->qual = AddGBQual(sfp->qual, "note", 
1614                                 "gamma-carboxyglutamic-acid site");
1615                         break;
1616                 case 19:
1617                         sfp->qual = AddGBQual(sfp->qual, "note", "blocked site");
1618                         break;
1619                 case 20:
1620                         sfp->qual = AddGBQual(sfp->qual, "note", "lipid-binding site");
1621                         break;
1622                 case 21:
1623                         sfp->qual = AddGBQual(sfp->qual, "note", "np-binding site");
1624                         break;
1625                 case 22:
1626                                 sfp->qual = AddGBQual(sfp->qual, "note", "DNA binding site");
1627                         break;
1628                 case 23:
1629                         sfp->qual = AddGBQual(sfp->qual, "note", "signal-peptide site");
1630                         break;
1631                 case 24:
1632                         sfp->qual = AddGBQual(sfp->qual, "note", 
1633                                 "transit-peptide site");
1634                         break;
1635                 case 25:
1636                         sfp->qual = AddGBQual(sfp->qual, "note", 
1637                                                 "transmembrane-region site");
1638                         break;
1639                 default:
1640                         sfp->qual = AddGBQual(sfp->qual, "note", "unclassified site");
1641                         break;
1642         }
1643 }
1644 
1645 /*****************************************************************************
1646 *       protein features are shown as misc_feats on nucleotide records
1647 *
1648 ******************************************************************************/
1649 static SeqFeatPtr CreateImpFeatFromProt(Uint1 format, SeqFeatPtr psfp, SeqFeatPtr cds, SeqLocPtr new_loc)
1650 {
1651         SeqFeatPtr              sfp;
1652         ImpFeatPtr              ifp;
1653         ProtRefPtr              prot;
1654         GBQualPtr               q;
1655         Uint2                   retval;
1656         Char                    buf[2];
1657         CdRegionPtr     cdr;
1658         CharPtr                 tmp;
1659         GeneRefPtr              grp;
1660         SeqFeatXrefPtr  xrp;
1661         
1662         if (psfp->data.choice == SEQFEAT_PSEC_STR) {
1663                 return NULL;
1664         }
1665         sfp = SeqFeatNew();
1666         ifp = ImpFeatNew();
1667         sfp->data.choice = SEQFEAT_IMP;
1668         sfp->data.value.ptrvalue = ifp;
1669         ifp->key = StringSave("misc_feature");
1670         if (psfp->data.choice == SEQFEAT_PROT) { 
1671                 prot = psfp->data.value.ptrvalue;
1672                 if (prot->processed == 0 || prot->processed == 1) {
1673                         SeqFeatFree(sfp);
1674                         return NULL;
1675                 }
1676                 if (prot->processed == 2) {
1677                         MemFree(ifp->key);
1678                         ifp->key = StringSave("mat_peptide");
1679                 } else if (prot->processed == 3) {
1680                         MemFree(ifp->key);
1681                         ifp->key = StringSave("sig_peptide");
1682                 } else if (prot->processed == 4) {
1683                         MemFree(ifp->key);
1684                         ifp->key = StringSave("transit_peptide");
1685                 }
1686                 sfp = AddProtRefInfo(sfp, prot);
1687         } else if (psfp->data.choice == SEQFEAT_BOND) {
1688                 if (psfp->data.value.intvalue == 1) {
1689                         sfp->qual = AddGBQual(sfp->qual, "note", "disulfide bond");
1690                 } else if (psfp->data.value.intvalue == 2) {
1691                         sfp->qual = AddGBQual(sfp->qual, "note", "thiolester bond");
1692                 } else if (psfp->data.value.intvalue == 3) {
1693                         sfp->qual = AddGBQual(sfp->qual, "note", "xlink bond");
1694                 } else if (psfp->data.value.intvalue == 4) {
1695                         sfp->qual = AddGBQual(sfp->qual, "note", "thioether bond");
1696                 } else {
1697                         sfp->qual = AddGBQual(sfp->qual, "note", "bond");
1698                 }
1699         } else if (psfp->data.choice == SEQFEAT_SITE) {
1700                         AddSiteNoteQual(psfp, sfp);
1701         } else if (psfp->data.choice == SEQFEAT_REGION) {
1702                 tmp = MemNew(StringLen(psfp->data.value.ptrvalue) + 9);
1703                 sprintf(tmp, "Region: %s", (CharPtr) psfp->data.value.ptrvalue);
1704                 sfp->qual = AddGBQual(sfp->qual, "note", tmp);
1705                 tmp = MemFree(tmp);
1706         } else {
1707                 SeqFeatFree(sfp);
1708                 return NULL;
1709         }
1710         sfp->excpt = psfp->excpt;
1711         for (xrp=psfp->xref; xrp; xrp=xrp->next) {
1712                 if (xrp->data.choice == SEQFEAT_GENE) {
1713                         grp = (GeneRefPtr) xrp->data.value.ptrvalue;
1714                         sfp->xref = AsnIoMemCopy(xrp, 
1715                                 (AsnReadFunc) SeqFeatXrefAsnRead, 
1716                                         (AsnWriteFunc) SeqFeatXrefAsnWrite);
1717                         break;
1718                 }
1719         }
1720         if ((sfp->partial = psfp->partial) == TRUE) {
1721                 cdr = (CdRegionPtr) cds->data.value.ptrvalue;
1722                 if (cdr->frame) {
1723                         sprintf(buf, "%d", cdr->frame); 
1724                 } else {
1725                         sprintf(buf, "1"); 
1726                 }
1727                 sfp->qual = AddGBQualEx(&(ifp->key), sfp->qual, "codon_start", buf);
1728         }
1729         sfp->exp_ev = psfp->exp_ev;
1730         if (psfp->comment)
1731                 sfp->comment = StringSave(psfp->comment);
1732         if (psfp->title)
1733                 sfp->comment = StringSave(psfp->title);
1734         for (q=psfp->qual; q; q=q->next) {
1735                 sfp->qual = AddGBQual(sfp->qual, q->qual, q->val);
1736         }
1737         if (format == GENPEPT_FMT || format == EMBLPEPT_FMT) {
1738                 sfp->location = AsnIoMemCopy(psfp->location, 
1739                         (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
1740                 return sfp;
1741         }
1742         if (new_loc) {
1743                 sfp->location = aaFeatLoc_to_dnaFeatLoc(cds, new_loc);
1744         } else {
1745                 sfp->location = aaFeatLoc_to_dnaFeatLoc(cds, psfp->location);
1746         }
1747         if (sfp->location == NULL) {
1748                 SeqFeatFree(sfp);
1749                 return NULL;
1750         }
1751         if (sfp->partial == FALSE) {
1752                 retval = SeqLocPartialCheck(sfp->location);
1753                 if (retval > SLP_COMPLETE && retval < SLP_NOSTART) {
1754                         sfp->partial = TRUE;
1755                 }
1756         }
1757         return sfp;
1758 }
1759 static Boolean CheckNewSfpLoc(SeqFeatPtr sfp, BioseqPtr bsp)
1760 {
1761         SeqLocPtr slp, slp1 = NULL;
1762         
1763         while ((slp1=SeqLocFindNext(sfp->location, slp1)) != NULL) {
1764                 slp = slp1;
1765         }
1766         if (SeqIdForSameBioseq(SeqLocId(slp), bsp->id)) {
1767                 if (SeqLocLen(slp) <= BioseqGetLen(bsp)) {
1768                         return TRUE;
1769                 }
1770         }
1771         return FALSE;
1772 }
1773 
1774 /*****************************************************************************
1775 *       gather callback for collecting gene features
1776 *
1777 ******************************************************************************/
1778 static Boolean get_genes (GatherContextPtr gcp)
1779 {
1780         BioseqPtr               bsp = NULL;
1781         OrganizeFeatPtr ofp;
1782         SeqFeatPtr              sfp;
1783         Boolean                 temp=FALSE;
1784         
1785         ofp = gcp->userdata;
1786         if (gcp->thistype != OBJ_SEQFEAT) {
1787                 return TRUE;
1788         }
1789         sfp = (SeqFeatPtr) (gcp->thisitem);
1790         bsp = ofp->bsp;
1791         if (sfp->data.choice != SEQFEAT_GENE) {
1792                 return TRUE;
1793         }
1794         if ((gcp->tempload == TRUE) && (! gcp->hold)) {
1795                 temp = TRUE;
1796         }
1797         ofp->Genelist = EnlargeSortList(ofp->Genelist,ofp->sfpGenesize);
1798         ofp->sfpGenesize = StoreFeatTemp(ofp->Genelist, sfp, 
1799         ofp->sfpGenesize, bsp, ofp->seg_bsp, gcp->entityID, 
1800                                         gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1801         return TRUE;
1802 }
1803 
1804 /*****************************************************************************
1805 *       gather callback for collecting all features
1806 *
1807 ******************************************************************************/
1808 static Boolean get_feats (GatherContextPtr gcp)
1809 {
1810         BioseqPtr               bsp = NULL;
1811         OrganizeFeatPtr ofp;
1812         SeqFeatPtr              sfp;
1813         ImpFeatPtr      ifp;
1814         SeqIdPtr                xid;
1815         GatherRange             gr;
1816         Boolean                 r_trunc;
1817         OrganizeProtPtr opp;
1818         BioseqPtr               p_bsp;
1819         SeqEntryPtr     sep;
1820         GatherScope     gs;
1821         SeqFeatPtr              new_sfp, psfp;
1822         Int4                    index;
1823         Boolean                 temp = FALSE;
1824         Uint2           entityID;
1825         SeqMgrFeatContext fcontext;
1826         
1827         ofp = gcp->userdata;
1828         if (gcp->thistype != OBJ_SEQFEAT) {
1829                 return TRUE;
1830         }
1831         sfp = (SeqFeatPtr) (gcp->thisitem);
1832 /* do sorting within EntityId */ 
1833         if (gcp->entityID != ofp->oldID) {
1834                 ofp->lock_bsp = BioseqFindCore(SeqLocId(sfp->location));
1835                 BioseqLock(ofp->lock_bsp);
1836                 SortOrganizeFeat(ofp);
1837                 ofp->oldID = gcp->entityID;
1838         }
1839         gr = gcp->extremes;
1840         r_trunc = gr.r_trunc;
1841         if (sfp->data.choice != SEQFEAT_CDREGION && 
1842                                         sfp->data.choice != SEQFEAT_GENE) {
1843                 if ((!ofp->embl_feat && ASN2FF_SHOW_GB_STYLE 
1844                                         && !(ofp->showSeqLoc)) && r_trunc) {
1845                         return TRUE;
1846                 }
1847         }
1848         bsp = ofp->bsp;
1849         if ((gcp->tempload == TRUE) && (! gcp->hold)) {
1850                 temp = TRUE;
1851         }
1852         switch (sfp->data.choice) {
1853                 case SEQFEAT_GENE: 
1854                     ofp->Genelist = EnlargeSortList(ofp->Genelist,ofp->sfpGenesize);
1855                         ofp->sfpGenesize = StoreFeatTemp(ofp->Genelist, sfp, 
1856                                 ofp->sfpGenesize, bsp, ofp->seg_bsp, gcp->entityID, 
1857                                         gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1858                         if (ofp->show_gene) {
1859                                 if (r_trunc && !ofp->embl_feat && ASN2FF_SHOW_GB_STYLE
1860                                                                         && !(ofp->showSeqLoc)) {
1861                                         break;
1862                                 }
1863                             ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1864                                 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp, 
1865                                         ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID, 
1866                                         gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1867                         }
1868                 break;
1869                 case SEQFEAT_BIOSRC: /* save in both lists */
1870                     ofp->Biosrclist = 
1871                     EnlargeSortList(ofp->Biosrclist, ofp->biosrcsize);
1872                         ofp->biosrcsize = StoreFeatTemp(ofp->Biosrclist, sfp, 
1873                                 ofp->biosrcsize, bsp, ofp->seg_bsp, gcp->entityID, 
1874                                         gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1875                         ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1876                             ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp, 
1877                                     ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID, 
1878                                     gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1879                 break;
1880                 case SEQFEAT_ORG: 
1881                     ofp->Orglist = 
1882                     EnlargeSortList(ofp->Orglist, ofp->sfpOrgsize);
1883                         ofp->sfpOrgsize = StoreFeatTemp(ofp->Orglist, sfp, 
1884                                 ofp->sfpOrgsize, bsp, ofp->seg_bsp, gcp->entityID, 
1885                                         gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1886                 break;
1887         case SEQFEAT_PUB: /* Pubs are already captured by "StorePubInfo". */
1888                 break;
1889         case SEQFEAT_IMP: /* This case must be before the generic case */
1890                 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
1891                 if (StringCmp(ifp->key, "source") == 0) {
1892 /* Capture only the first source feat that covers the
1893                 entire entry, the others go among the generic features.*/
1894                         if (ofp->sfpSourcesize == 0) {
1895                                 if (bsp->length != -1 &&
1896                                         bsp->length == SeqLocLen(sfp->location)) {
1897                                        ofp->Sourcelist = 
1898                                                    EnlargeSortList(ofp->Sourcelist, 
1899                                                                                                    ofp->sfpSourcesize);
1900                                         ofp->sfpSourcesize =
1901                                                 StoreFeatTemp(ofp->Sourcelist, sfp, ofp->sfpSourcesize, 
1902                                                 bsp, ofp->seg_bsp,gcp->entityID, gcp->itemID, 
1903                                                 gcp->thistype, gcp->new_loc, NULL, 0, temp);
1904                                         break;
1905                                 }
1906                         }
1907                 } else if (StringCmp(ifp->key, "Site-ref") == 0 && sfp->cit == NULL) {
1908 /* if "Site-ref" has a pub, put out info as a pub */
1909                         ofp->Siteslist =
1910                            EnlargeSortList(ofp->Siteslist, ofp->sfpSitesize);
1911                         ofp->sfpSitesize = 
1912                         StoreFeatTemp(ofp->Siteslist, sfp, ofp->sfpSitesize, bsp, 
1913                                 ofp->seg_bsp, gcp->entityID, gcp->itemID, 
1914                                         gcp->thistype, gcp->new_loc, NULL, 0, temp);
1915                         break;
1916                 } else if (StringCmp(ifp->key, "Site-ref") == 0 && sfp->cit != NULL) {
1917 /* Check to see if this was already put out as a pub*/
1918                         break;
1919                 } else {
1920 /* If none of the above is true, execute generic. */
1921                         ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1922                         ofp->sfpListsize = 
1923                         StoreFeatTemp(ofp->List, sfp, ofp->sfpListsize, bsp, 
1924                                 ofp->seg_bsp, gcp->entityID, gcp->itemID, 
1925                                         gcp->thistype, gcp->new_loc, NULL, 0, temp);
1926                 }
1927                 break;
1928 /* Look to see if an Xref goes out as a SeqFeat or as a 2nd accession */
1929         case SEQFEAT_SEQ:
1930                 xid=CheckXrefFeat(bsp, sfp);
1931                 if (xid == NULL) {
1932                       ofp->Xreflist = EnlargeSortList(ofp->Xreflist,    
1933                                                                                       ofp->sfpXrefsize);
1934                         ofp->sfpXrefsize = StoreFeatTemp(ofp->Xreflist, sfp,
1935                                 ofp->sfpXrefsize, bsp, ofp->seg_bsp, gcp->entityID, 
1936                                 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1937                         break;
1938 
1939                 }
1940                 break;
1941         case SEQFEAT_COMMENT:
1942 /* The following assures a valid comment */
1943                 if (sfp->comment == NULL || StringLen(sfp->comment) == 0)
1944                                 break;
1945                 if (bsp && bsp->length != -1 &&
1946                         bsp->length == SeqLocLen(sfp->location)) {
1947 /** will go to COMMENT field **/
1948                       ofp->Commlist = 
1949                       EnlargeSortList(ofp->Commlist, ofp->sfpCommsize);
1950                         ofp->sfpCommsize =
1951                         StoreFeatTemp(ofp->Commlist, sfp, ofp->sfpCommsize, 
1952                                 bsp, ofp->seg_bsp, gcp->entityID, gcp->itemID, 
1953                                         gcp->thistype, gcp->new_loc, NULL, 0, temp);
1954                 } else {
1955 /** will go to misc_feature **/
1956                         ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1957                 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp, 
1958                         ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID, 
1959                                 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1960                 }
1961                 break;
1962         case SEQFEAT_CDREGION:
1963                 if (r_trunc != TRUE || ofp->embl_feat || ofp->showSeqLoc
1964                                                                                  || !ASN2FF_SHOW_GB_STYLE) {
1965                         ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1966                 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp, 
1967                                  ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID, 
1968                                 gcp->itemID, gcp->thistype, gcp->new_loc, gcp->extra_loc, 
1969                                         gcp->extra_loc_cnt, temp);
1970                 }
1971 /* Look for Prot-ref features, create ImpFeats */
1972                 if (sfp != NULL && sfp->product != NULL && ofp->format != GENPEPT_FMT) {
1973                         p_bsp = BioseqFindCore(SeqLocId(sfp->product));
1974                         if (p_bsp != NULL)    /*Bioseq is (or has been) in memory */
1975                         {
1976                                 if (ofp->useSeqMgrIndexes) {
1977                                         entityID = ObjMgrGetEntityIDForPointer (p_bsp);
1978                                         if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
1979                                                 SeqMgrIndexFeatures (entityID, NULL);
1980                                         }
1981                                         psfp = SeqMgrGetBestProteinFeature (p_bsp, NULL);
1982                                         if (psfp != NULL) {
1983                                                 psfp = SeqMgrGetNextFeature (p_bsp, NULL, 0, 0, &fcontext);
1984                                                 while (psfp != NULL) {
1985                                                         new_sfp = CreateImpFeatFromProt(ofp->format, psfp, sfp, NULL);
1986                                                         if (new_sfp != NULL) {
1987                                                                 if (CheckNewSfpLoc(new_sfp, bsp)) {
1988                                                                 ofp->List = EnlargeSortList(ofp->List, 
1989                                                                                                                     ofp->sfpListsize);
1990                                                                         ofp->sfpListsize = 
1991                                                                                 StoreFeatFree(ofp->List,new_sfp, 
1992                                                                                 ofp->sfpListsize, bsp, ofp->seg_bsp, 
1993                                                                                 fcontext.entityID, fcontext.itemID, 
1994                                                                                 OBJ_SEQFEAT, NULL, NULL, 0, TRUE);
1995                                                                 }
1996 /* if opp->list[index].slp !=NULL I shoud use it*/
1997                                                         }
1998                                                         psfp = SeqMgrGetNextFeature (p_bsp, psfp, 0, 0, &fcontext);
1999                                                 }
2000                                         }
2001                                         return TRUE;
2002                                 }
2003                                 opp = (OrganizeProtPtr) MemNew(sizeof(OrganizeProt));
2004                                 opp->size = 0;
2005                                 sep = SeqEntryNew();
2006                                 sep->choice = 1;
2007                                 sep->data.ptrvalue = p_bsp;
2008                                 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2009                                 gs.get_feats_location = TRUE;
2010                                 gs.target = sfp->product;
2011                                 GatherSeqEntry(sep, opp, get_prot_feats, &gs);
2012                                 for (index=0; index < opp->size; index++) {
2013                                         if ((psfp = opp->list[index].sfp) == NULL) {
2014                                                 continue;
2015                                         }
2016                                         new_sfp = CreateImpFeatFromProt(ofp->format, psfp, sfp, 
2017                                                                                                 opp->list[index].slp);
2018                                         if (new_sfp != NULL) {
2019                                                 if (CheckNewSfpLoc(new_sfp, bsp)) {
2020                                                 ofp->List = EnlargeSortList(ofp->List, 
2021                                                                                                     ofp->sfpListsize);
2022                                                         ofp->sfpListsize = 
2023                                                                 StoreFeatFree(ofp->List,new_sfp, 
2024                                                                 ofp->sfpListsize, bsp, ofp->seg_bsp, 
2025                                                                 opp->list[index].entityID, 
2026                                                                 opp->list[index].itemID, 
2027                                                                 opp->list[index].itemtype, 
2028                                                                 NULL, NULL, 0, TRUE);
2029                                                 }
2030 /* if opp->list[index].slp !=NULL I shoud use it*/
2031                                         }
2032                                 }
2033                                 if(opp->list != NULL && opp->list->nsp != NULL)
2034                                         NoteStructFree(opp->list->nsp);
2035                                 MemFree(opp->list);
2036                                 MemFree(opp);
2037                                 MemFree(sep);
2038                         }
2039                 }
2040                 break;
2041         case SEQFEAT_RNA:
2042                 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
2043                 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp, 
2044                         ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID, 
2045                                 gcp->itemID, gcp->thistype, gcp->new_loc, gcp->extra_loc, 
2046                                         gcp->extra_loc_cnt, temp);
2047                 break;
2048         case SEQFEAT_RSITE:
2049 /* do not gather this type*/
2050                 break;
2051         default:
2052 /* If none of the above is true, execute generic. */
2053                 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
2054             ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp, 
2055                         ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID, 
2056                                 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
2057                 break;
2058         }
2059         return TRUE;
2060 }
2061 
2062 static Boolean is_embl(GBEntryPtr gbp)
2063 {
2064         CharPtr prefix = EMBL_AC;
2065         static CharPtr  embl_accpref[EMBL_PREFNUM] = {"AJ", "AL", "AM", "AN", "AX"};
2066         Boolean retval = FALSE;
2067         Int2 i;
2068         
2069         if (gbp == NULL || gbp->accession == NULL)
2070                 return FALSE;
2071         if (IS_DIGIT(gbp->accession[1]) && 
2072                 StringChr(prefix, gbp->accession[0]) != NULL) {
2073                 retval = TRUE;
2074         }  else {
2075         for (i = 0; i < EMBL_PREFNUM; i++) {
2076                         if (StringNCmp(gbp->accession, embl_accpref[i], 2) == 0) {
2077                                 retval = TRUE;
2078                 }
2079             }
2080     }
2081         return retval;
2082 }
2083 
2084 /****************************************************************************
2085 *       void GetGeneRefInfo (GeneStructPtr gsp, NoteStructPtr nsp, GeneRefPtr grp)
2086 *
2087 *       gsp: GeneStructPtr containing gene information
2088 *       grp: GeneRefPtr from a sfp of type gene or a sfp xref.
2089 *
2090 *       If fields are empty on the gsp, and the relevant information
2091 *       is given by the grp, that field is filled on the gsp
2092 ****************************************************************************/
2093 
2094 static void GeneRefInfoToGsp (GeneStructPtr gsp, GeneRefPtr grp, SeqFeatPtr sfp)
2095 
2096 {
2097         ValNodePtr syn, vsyn = NULL;
2098         
2099         if (grp == NULL) {
2100                 return;
2101         }
2102         syn=grp->syn;
2103         if (grp->locus != NULL) {
2104                 if (gsp->gene != NULL && 
2105                         StringCmp(gsp->gene->data.ptrvalue, grp->locus) != 0) {
2106                         if (syn != NULL) {
2107                                 vsyn = ValNodeCopyStr(&(vsyn), 1, syn->data.ptrvalue);
2108                                 gsp->gene->next=vsyn;
2109                         }
2110                         return;
2111                 }
2112                 if (gsp->gene == NULL) {
2113                         gsp->gene = ValNodeCopyStr(&(gsp->gene), 0, grp->locus);
2114                 }
2115         } else if (grp->desc != NULL) {
2116                 gsp->gene = ValNodeCopyStr(&(gsp->gene), 0, grp->desc);
2117         }
2118         if (syn != NULL) {
2119                         vsyn = ValNodeCopyStr(&(vsyn), 1, syn->data.ptrvalue);
2120                 if (gsp->gene == NULL) {
2121                         gsp->gene = vsyn;
2122                 } else {
2123                         gsp->gene->next=vsyn;
2124                 }
2125         }
2126         if (gsp->map[0] == NULL && grp->maploc)
2127                 gsp->map[0] = grp->maploc;
2128         if (grp->pseudo) {
2129                 gsp->pseudo = TRUE;
2130         } else if (sfp != NULL && sfp->pseudo) {
2131                 gsp->pseudo = TRUE;
2132         } else {
2133                 gsp->pseudo = FALSE;
2134         }
2135 
2136         gsp->grp = AsnIoMemCopy((GeneRefPtr)grp, (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite);
2137         return;
2138 }
2139 
2140 /*****************************************************************************
2141 *       gather genes for particular CDS (for GenPept an d EmblPept
2142 *
2143 ******************************************************************************/
2144 static OrganizeFeatPtr GetGeneListForCds(Uint2 entityID, BioseqPtr bsp)
2145 {
2146     OrganizeFeatPtr     ofp=NULL;
2147     GatherScope         gsc;
2148     SeqLocPtr           slp;
2149     
2150     ofp = CreateOrganizeFeat();
2151     MemSet ((Pointer) (&gsc), 0, sizeof (GatherScope));
2152     MemSet ((Pointer) (gsc.ignore), (int)(TRUE), 
2153             (size_t) (OBJ_MAX * sizeof(Boolean)));
2154     gsc.ignore[OBJ_SEQANNOT] = FALSE;
2155     gsc.ignore[OBJ_SEQFEAT] = FALSE;
2156     gsc.get_feats_location = TRUE;
2157     gsc.seglevels = 1;
2158     if (bsp != NULL) {
2159         slp = ValNodeNew(NULL);
2160         slp->choice = SEQLOC_WHOLE;
2161         slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
2162         gsc.target = slp;
2163     } else {
2164         gsc.target = NULL;
2165     }
2166     ofp->bsp = bsp;
2167     ofp->seg_bsp = NULL;
2168     
2169     GatherEntity(entityID, ofp, get_genes, &gsc);
2170     
2171     if((slp = gsc.target) != NULL) {
2172         SeqIdFree(slp->data.ptrvalue);
2173         ValNodeFree(slp);
2174     }
2175     
2176     return ofp;
2177 }
2178 
2179 /***************************************************************************
2180 *       This function assigns genes to the given sfp by comparing locations.
2181 *       of sfp with the location from the list of genes using SeqLocAinB
2182 *       (sfp have to be in the gene).  
2183 *       If SeqLocAinB returns "0", there is an exact match and this gene
2184 *       is judged "the best" match to the sfp.`
2185 *       If SeqLocAinB returns the diff > 0, slp is contained within 
2186 *       gene_loc and the difference between the two features is measured. 
2187 *       The lowest difference gives the "best_gene"
2188 *       and that information is stored in the GeneStructPtr (gsp) by
2189 *       GeneRefInfoToGsp.  
2190 ***************************************************************************/    
2191                                                                 
2192 NLM_EXTERN void MatchNAGeneToFeat (Boolean non_strict, OrganizeFeatPtr ofp, SortStructPtr p)
2193 {
2194 
2195         Boolean                 bind_to_feat=FALSE;
2196         NoteStructPtr   nsp; /* UNUSED */
2197         GeneStructPtr   gsp;
2198         GeneRefPtr              grp=NULL;
2199         ImpFeatPtr              ifp;
2200         Int4                    best_gene = -1, index;
2201         Int4                    diff_lowest, diff_current;
2202         SeqFeatPtr              gene = NULL, best_gene_feat = NULL, sfp;
2203         Uint1                   sg, sf;
2204         
2205         if (p == NULL)
2206                 return;
2207         if ((sfp = p->sfp) == NULL)
2208                 return;
2209         if (sfp->data.choice == SEQFEAT_BIOSRC || sfp->data.choice == SEQFEAT_GENE)     
2210                 return;
2211         if (sfp->data.choice == SEQFEAT_IMP)    {
2212                 ifp = sfp->data.value.ptrvalue;
2213                 if (ifp && StringCmp(ifp->key, "repeat_region") == 0) {
2214                         return;
2215                 }
2216         }
2217         if (non_strict == FALSE) { /* binding is limited to RNA and CDS */
2218                 if (sfp->data.choice == SEQFEAT_CDREGION || 
2219                                                         sfp->data.choice == SEQFEAT_RNA) {
2220                         bind_to_feat = TRUE;    
2221                 } else if (sfp->data.choice == SEQFEAT_IMP) {
2222                         ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
2223                         if (StringCmp(ifp->key, "CDS") == 0)
2224                                 bind_to_feat = TRUE;    
2225                 }
2226         } else {
2227                 bind_to_feat = TRUE;    
2228         }
2229         if (bind_to_feat == FALSE)
2230                 return;
2231 
2232         best_gene_feat = SeqMgrGetOverlappingGene (sfp->location, NULL);
2233         if (best_gene_feat != NULL) {
2234                 grp = best_gene_feat->data.value.ptrvalue;
2235                 if (grp != NULL) {
2236                         gsp = p->gsp;
2237                 /*      GetDBXrefFromGene(grp, sfp);*/
2238         /********               gsp->grp = AsnIoMemCopy(grp, 
2239         (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite); ****/ /*** it is redone in GeneRefInfoToGsp (EY) */
2240                         GeneRefInfoToGsp(gsp, grp, best_gene_feat);  /*copy GeRefInfo to GeneStruct */
2241                         GetGeneQuals(sfp, gsp); /* copy quals info to GenStruct */
2242                         return;
2243                 }
2244         }
2245         nsp = p->nsp;
2246         gsp = p->gsp;
2247         diff_lowest = -1;
2248         p = ofp->Genelist;
2249         for (index=0; index < ofp->sfpGenesize; index++, p++) {
2250                 if ((gene = p->sfp) == NULL) {
2251                         continue;
2252                 }
2253                 sg = SeqLocStrand(gene->location);
2254                 sf = SeqLocStrand(sfp->location);
2255                 if (sf == sg ||
2256                         (sg == Seq_strand_unknown && sf != Seq_strand_minus) ||
2257                                 (sf == Seq_strand_unknown && sg != Seq_strand_minus)) {
2258                         diff_current = SeqLocAinB(sfp->location, gene->location);
2259                 } else {
2260                         continue;
2261                 }
2262                 if (! diff_current)   /* perfect match */ {
2263                         best_gene = index;
2264                         best_gene_feat = gene;
2265                         break;
2266                 } else if (diff_current > 0) {
2267                         if ((diff_lowest == -1) || (diff_current < diff_lowest)) {
2268                                 diff_lowest = diff_current;
2269                                 best_gene = index;
2270                                 best_gene_feat = gene;
2271                         }
2272                 }
2273 
2274         }
2275         if (best_gene == -1) {  /*no gene found that completely contains CDS*/
2276                 return;
2277         } else {
2278                 if (best_gene_feat != NULL) {
2279                         grp = best_gene_feat->data.value.ptrvalue;
2280                 }
2281         }
2282         /*      GetDBXrefFromGene(grp, sfp);*/
2283                         gsp->grp = AsnIoMemCopy(grp, 
2284                         (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite);
2285         GeneRefInfoToGsp(gsp, grp, best_gene_feat);  /*copy GeRefInfo to GeneStruct */
2286         GetGeneQuals(sfp, gsp); /* copy quals info to GenStruct */
2287         
2288         return;
2289 }
2290 
2291 static Boolean CheckCdregionGeneXref (SortStructPtr p, Uint1 format)
2292 {
2293         SeqFeatPtr              sfp;
2294         GeneRefPtr              grp;
2295         GeneStructPtr   gsp;
2296         NoteStructPtr   nsp;
2297         ProtRefPtr              prp;
2298         SeqFeatXrefPtr  xrp;
2299         Boolean retval = FALSE;
2300 
2301         if (p == NULL) {
2302                 return retval;
2303         }
2304         if ((sfp = p->sfp) == NULL)
2305                 return retval;
2306         gsp = p->gsp;
2307         nsp = p->nsp;
2308         for (xrp=sfp->xref; xrp; xrp=xrp->next) {
2309                 if (xrp->data.choice == SEQFEAT_GENE) {
2310                         retval = TRUE;
2311                         grp = (GeneRefPtr) xrp->data.value.ptrvalue;
2312                         GeneRefInfoToGsp(gsp, grp, sfp);
2313                 } else if (xrp->data.choice == SEQFEAT_PROT) {
2314                         prp = (ProtRefPtr) xrp->data.value.ptrvalue;
2315                         GetProtRefInfo(format, gsp, nsp, prp);
2316                 }
2317         }
2318 
2319         return retval;
2320 }
2321 
2322 
2323 NLM_EXTERN Boolean GetGeneQuals(SeqFeatPtr sfp_in, GeneStructPtr gsp)
2324 {
2325         Boolean                 has_gene = FALSE;
2326         GBQualPtr               qual1;
2327 
2328         for (qual1=sfp_in->qual; qual1; qual1=qual1->next) {
2329                 if (StringCmp(qual1->qual, "gene") == 0) {
2330                         has_gene = TRUE;
2331                         if (gsp->gene == NULL) {
2332                                 gsp->gene = ValNodeCopyStr(&(gsp->gene), 0, qual1->val);
2333                         }
2334                 } else if (StringCmp(qual1->qual, "product") == 0) {
2335                         if (gsp->product)
2336                                 ValNodeCopyStr(&(gsp->product), 0, qual1->val); 
2337                         else
2338                                 gsp->product = ValNodeCopyStr(&(gsp->product), 0, qual1->val);
2339                 } else if (StringCmp(qual1->qual, "standard_name") == 0) {
2340                         if (gsp->standard_name)
2341                                 ValNodeCopyStr(&(gsp->standard_name), 0, qual1->val); 
2342                         else
2343                                 gsp->standard_name = ValNodeCopyStr(&(gsp->standard_name), 0, qual1->val);
2344                 } else if (gsp->map[0] == NULL 
2345                         && StringCmp(qual1->qual, "map") == 0) {
2346                         gsp->map[0] = qual1->val;
2347                 } else if (StringCmp(qual1->qual, "EC_number") == 0) {
2348                         if (gsp->ECNum)
2349                                 ValNodeCopyStr(&(gsp->ECNum), 0, qual1->val); 
2350                         else
2351                                 gsp->ECNum = ValNodeCopyStr(&(gsp->ECNum), 0, qual1->val);
2352                 }
2353         }
2354 
2355         return has_gene;
2356 }
2357 
2358 /*****************************************************************************
2359 *       see MatchNAGeneToFeat (above) for details
2360 *
2361 ******************************************************************************/
2362 NLM_EXTERN void MatchAAGeneToFeat (OrganizeFeatPtr ofp, SortStructPtr p)
2363 {
2364         GeneRefPtr                      grp = NULL;
2365         Int4                            best_gene = -1, index;
2366         SeqFeatPtr                      gene = NULL, best_gene_feat = NULL, sfp;
2367         NoteStructPtr           nsp; /* UNUSED */
2368         GeneStructPtr           gsp;
2369         BioseqPtr                       bsp;
2370         Int4                            diff_lowest, diff_current;
2371         OrganizeFeatPtr         gofp;
2372         
2373         if (p == NULL)
2374                 return;
2375         if ((sfp = p->sfp) == NULL)
2376                 return;
2377         if (SeqLocLen(sfp->location) == -1)
2378                 return;  /*SeqLocLen failed on CDS location */
2379 
2380 /* Only look on the CDS!! for genpept.          */
2381         if (sfp->data.choice != SEQFEAT_CDREGION) {
2382                 return;
2383         }
2384         bsp = BioseqFind(SeqLocId(sfp->location));
2385 
2386         best_gene_feat = SeqMgrGetOverlappingGene (sfp->location, NULL);
2387         if (best_gene_feat != NULL) {
2388                 grp = best_gene_feat->data.value.ptrvalue;
2389                 gsp = p->gsp;
2390                 GeneRefInfoToGsp(gsp, grp, best_gene_feat);  /*copy GeRefInfo to GeneStruct */
2391                 if (bsp && bsp->id->choice == SEQID_OTHER) {
2392                         GetDBXrefFromGene(grp, sfp);
2393                 }
2394                 return;
2395         }
2396 
2397         gofp = GetGeneListForCds(p->entityID, bsp);
2398         if (gofp == NULL) {
2399                 return;
2400         }
2401         nsp = p->nsp;
2402         gsp = p->gsp;
2403         if (bsp == NULL && gofp->sfpGenesize == 1) {
2404                 gene = gofp->Genelist->sfp;
2405                 if (gene != NULL) {
2406                         grp = gene->data.value.ptrvalue;
2407                         GeneRefInfoToGsp(gsp, grp, gene);  /*copy GeRefInfo to GeneStruct */
2408                 }
2409                 MemFree(gofp->Genelist);
2410                 MemFree(gofp);
2411                 return; /* first and best gene */
2412         }
2413         diff_lowest = -1;
2414         for (p=gofp->Genelist, index=0; index < gofp->sfpGenesize; p++, index++) {
2415                 if ((gene = p->sfp) == NULL) {
2416                                 continue;       
2417                 }
2418                 diff_current = SeqLocAinB(sfp->location, gene->location);
2419                 if (! diff_current)   /* perfect match */ {
2420                         best_gene = index;
2421                         best_gene_feat = gene;
2422                         break;
2423                 } else if (diff_current > 0) {
2424                         if ((diff_lowest == -1) || (diff_current < diff_lowest)) {
2425                                 diff_lowest = diff_current;
2426                                 best_gene = index;
2427                                 best_gene_feat = gene;
2428                         }
2429                 }
2430         }
2431         if (best_gene == -1) {
2432                 return;
2433         }
2434         if (best_gene_feat != NULL) {
2435                 grp = best_gene_feat->data.value.ptrvalue;
2436                 GeneRefInfoToGsp(gsp, grp, best_gene_feat);  /*copy GeRefInfo to GeneStruct */
2437                 if (bsp && bsp->id->choice == SEQID_OTHER) {
2438                         GetDBXrefFromGene(grp, sfp);
2439                 }
2440         }
2441         MemFree(gofp->Genelist);
2442         MemFree(gofp);
2443         return;
2444 }
2445 
2446 /*****************************************************************************
2447 *       do sorting and gene mapping within one entity (that should be locked)
2448 *
2449 ******************************************************************************/
2450 NLM_EXTERN void SortOrganizeFeat(OrganizeFeatPtr ofp)
2451 {
2452         SortStructPtr   p;
2453         GeneStructPtr   gsp;
2454         Int4                    index;
2455         SeqFeatPtr              sfp;
2456         
2457         if (ofp == NULL)
2458                 return;
2459                 if (ofp->sfpListsize > 0 && ofp->sortListsize < ofp->sfpListsize) {
2460                         HeapSort((VoidPtr) (ofp->List + ofp->sortListsize), 
2461                                 (size_t) (ofp->sfpListsize - ofp->sortListsize), 
2462                                         sizeof(SortStruct), CompareSfpForHeap);
2463                         UniqueFeat(ofp->List + ofp->sortListsize,
2464                                                 ofp->sfpListsize - ofp->sortListsize);
2465                 }
2466         if (ofp->sfpCommsize > 0 && ofp->sortCommsize < ofp->sfpCommsize)
2467                 HeapSort((VoidPtr) (ofp->Commlist + ofp->sortCommsize), 
2468                         (size_t) (ofp->sfpCommsize - ofp->sortCommsize), 
2469                                 sizeof(SortStruct), CompareSfpForHeap);
2470         if (ofp->sfpGenesize > 0 && ofp->sortGenesize < ofp->sfpGenesize)
2471                 HeapSort((VoidPtr) (ofp->Genelist + ofp->sortGenesize), 
2472                         (size_t) (ofp->sfpGenesize - ofp->sortGenesize), 
2473                                 sizeof(SortStruct), CompareSfpForHeap);
2474         if (ofp->sfpOrgsize > 0 && ofp->sortOrgsize < ofp->sfpOrgsize)
2475                 HeapSort((VoidPtr) (ofp->Orglist + ofp->sortOrgsize), 
2476                         (size_t) (ofp->sfpOrgsize - ofp->sortOrgsize), 
2477                                         sizeof(SortStruct), CompareSfpForHeap);
2478         if (ofp->sfpSitesize > 0 && ofp->sortSitesize < ofp->sfpSitesize)
2479                 HeapSort((VoidPtr) (ofp->Siteslist + ofp->sortSitesize), 
2480                         (size_t) (ofp->sfpSitesize - ofp->sortSitesize), 
2481                                 sizeof(SortStruct), CompareSfpForHeap);
2482         if (ofp->sfpSourcesize > 0 && ofp->sortSourcesize < ofp->sfpSourcesize)
2483                 HeapSort((VoidPtr) (ofp->Sourcelist + ofp->sortSourcesize), 
2484                         (size_t) (ofp->sfpSourcesize - ofp->sortSourcesize), 
2485                                 sizeof(SortStruct),
2486                                         CompareSfpForHeap);
2487         ofp->sortListsize = ofp->sfpListsize;
2488         ofp->sortCommsize = ofp->sfpCommsize;
2489         ofp->sortGenesize = ofp->sfpGenesize;
2490         ofp->sortOrgsize = ofp->sfpOrgsize;
2491         ofp->sortSitesize = ofp->sfpSitesize;
2492         ofp->sortSourcesize = ofp->sfpSourcesize;
2493 
2494         p = ofp->List;
2495         for (index=0; index < ofp->sfpListsize; index++, p++) {
2496                 sfp = p->sfp;
2497                 if (p == NULL)
2498                         continue;
2499                 gsp = GeneStructNew();
2500                 p->gsp = gsp;
2501                 if (CheckCdregionGeneXref(p, ofp->format) == FALSE) {
2502                         if (ofp->format == EMBLPEPT_FMT || ofp->format == GENPEPT_FMT) {
2503                                 MatchAAGeneToFeat(ofp, p);
2504                         } else {
2505                                 MatchNAGeneToFeat(ofp->non_strict, ofp, p);
2506                         }
2507                 }
2508                 GetGeneQuals(sfp, gsp);
2509         }
2510 /*      if (ofp->sfpListsize > 0) {
2511                 BioseqUnlock(ofp->lock_bsp);
2512         }
2513 */
2514         BioseqUnlock(ofp->lock_bsp);
2515         return;
2516 }
2517 
2518 /*
2519 static Boolean is_mRNA_set(SeqEntryPtr sep, BioseqPtr bsp)
2520 {
2521         BioseqSetPtr    bssp;
2522         ValNodePtr              vnp;
2523         MolInfoPtr              mip = NULL;
2524         
2525         if (sep == NULL) {
2526                 return FALSE;
2527         }
2528         if (IS_Bioseq(sep)) {
2529                 return FALSE;
2530         }
2531         bssp = (BioseqSetPtr) sep->data.ptrvalue;
2532         if (bssp->_class != BioseqseqSet_class_gen_prod_set) {
2533                 return FALSE;
2534         }
2535         for (vnp = bsp->descr; vnp; vnp=vnp->next) {
2536                 if (vnp->choice == Seq_descr_molinfo) {
2537                         mip = (MolInfoPtr) vnp->data.ptrvalue;
2538                         break;
2539                 }
2540         }
2541         if (mip == NULL) {
2542                 return FALSE;
2543         }
2544         if (mip->biomol != 3) {
2545                 return FALSE;
2546         }
2547         return TRUE;
2548 }
2549 */
2550         
2551 /*****************************************************************************
2552 *       Gather all features in one GBEntry and process within
2553 *       each entity (in callback)
2554 *
2555 ******************************************************************************/
2556 NLM_EXTERN void OrganizeSeqFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2557 {
2558         BioseqPtr               bsp;
2559         GatherScope     gsc;
2560         ValNodePtr              slp = NULL;
2561         OrganizeFeatPtr ofp;
2562         SeqFeatPtr              mrna, gene = NULL, newg;
2563         GeneRefPtr              grp;
2564         SeqMgrFeatContext fcontext;
2565         SeqIdPtr                sip;
2566 
2567         ofp = CreateOrganizeFeat();
2568         ofp->lock_bsp = NULL;
2569         if ((bsp = gbp->bsp) == NULL)
2570                 return;
2571         ofp->embl_feat = is_embl(gbp);
2572         MemSet ((Pointer) (&gsc), 0, sizeof (GatherScope));
2573         MemSet ((Pointer) (gsc.ignore), (int)(TRUE),
2574                         (size_t) (OBJ_MAX * sizeof(Boolean)));
2575         gsc.ignore[OBJ_SEQANNOT] = FALSE;
2576         gsc.ignore[OBJ_SEQFEAT] = FALSE;
2577         gsc.get_feats_location = TRUE;
2578         if (ajp->ignore_top)
2579                 gsc.ignore_top = TRUE;
2580                 gsc.seglevels = 1;
2581         if (ajp->format == GENPEPT_FMT) {
2582                 gsc.get_feats_product = TRUE;
2583         }
2584         gsc.seglevels = 0;
2585         if (ajp->slp != NULL) {
2586                 gsc.target = ajp->slp;
2587                 gsc.convert_loc = TRUE;
2588                 gsc.newid = bsp->id;
2589         } else {
2590                 slp = ValNodeNew(NULL);
2591                 slp->choice = SEQLOC_WHOLE;
2592                 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
2593                 gsc.target = slp;
2594                 if (ajp->only_one) {
2595                         gsc.convert_loc = TRUE;
2596                         gsc.newid = bsp->id;
2597                 }
2598         }
2599         ofp->bsp = bsp;
2600         ofp->showSeqLoc = (ajp->slp) ? TRUE : FALSE;
2601         ofp->useSeqMgrIndexes = ajp->useSeqMgrIndexes;
2602         ofp->seg_bsp = ajp->asn2ffwep->seg;
2603         ofp->format = ajp->format;
2604         ofp->non_strict = ajp->non_strict;
2605         for (sip=bsp->id; sip; sip=sip->next) { /* non_strict binding for REFSEQ*/
2606                 if (sip->choice == SEQID_OTHER) {
2607                         ofp->non_strict = FALSE;
2608                 }
2609         }
2610         ofp->show_gene = ajp->show_gene;
2611         if (ajp->format == GENPEPT_FMT && bsp->repr == Seq_repr_raw && ajp->useSeqMgrIndexes) {
2612                 gsc.useSeqMgrIndexes = TRUE;
2613         }
2614         GatherEntity(ajp->entityID, ofp, get_feats, &gsc);
2615         if (slp) {
2616                 SeqLocFree(slp);
2617         }
2618         ofp->lock_bsp = ofp->bsp;
2619         BioseqLock(ofp->lock_bsp);
2620 
2621         if (/* is_mRNA_set(ajp->sep, bsp) && */ ajp->useSeqMgrIndexes) {
2622                 mrna = SeqMgrGetRNAgivenProduct(bsp, NULL);
2623                 if (mrna) {
2624                         gene = SeqMgrGetOverlappingGene(mrna->location, &fcontext);
2625                 }
2626                 if (gene) {
2627                         grp = (GeneRefPtr) gene->data.value.ptrvalue;
2628                         newg = SeqFeatNew();
2629                         newg->data.choice = SEQFEAT_GENE;
2630                         newg->location = SeqLocIntNew(0, bsp->length-1, 
2631                                 SeqLocStrand(gene->location), SeqIdDup(bsp->id));
2632                         newg->data.value.ptrvalue = AsnIoMemCopy(grp, 
2633                         (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite);
2634                         ofp->Genelist = EnlargeSortList(ofp->Genelist,ofp->sfpGenesize);
2635                         ofp->sfpGenesize = StoreFeat(ofp->Genelist, newg, 
2636                         ofp->sfpGenesize, bsp, ofp->seg_bsp, fcontext.entityID, fcontext.itemID, OBJ_SEQFEAT, NULL, NULL, 0);
2637                         ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
2638                         ofp->sfpListsize = StoreFeat(ofp->List, newg, 
2639                         ofp->sfpListsize, bsp, ofp->seg_bsp, fcontext.entityID, fcontext.itemID, OBJ_SEQFEAT, NULL, NULL, 0);
2640                 }
2641         }
2642         
2643         SortOrganizeFeat(ofp);  /* for the last entity */
2644         UniqueGeneName(ajp->error_msgs, ofp);
2645         ofp->source_notes = NoteStructNew(ofp->source_notes);
2646         gbp->feat = ofp;
2647 
2648         return;
2649 }
2650 
2651 /*****************************************************************************
2652 *       Gather all features
2653 *
2654 ******************************************************************************/
2655 NLM_EXTERN void GetSeqFeat(Asn2ffJobPtr ajp)
2656 {
2657         GBEntryPtr              gbp;
2658 
2659         for (gbp = ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2660                 if (gbp->locus) {
2661                         flat2asn_delete_locus_user_string();
2662                         flat2asn_install_locus_user_string(gbp->locus);
2663                 }
2664                 if (gbp->accession) {
2665                         flat2asn_delete_accession_user_string();
2666                         flat2asn_install_accession_user_string(gbp->accession);
2667                 }
2668                 OrganizeSeqFeat(ajp, gbp);
2669         }
2670 
2671 }
2672 
2673 
2674 
2675 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.