|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/asn2ff4.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2ff4.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2ff4.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov
29 *
30 * Version Creation Date: 7/15/95
31 *
32 * $Revision: 6.51 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date Name Description of modification
39 * ------- ---------- -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44
45
46 /*************************************
47 *
48 =======
49 * $Log: asn2ff4.c,v $
50 * Revision 6.51 2001/10/02 16:13:15 yaschenk
51 * GetSeqIdForGI returns SeqIdDup() - needs freeing
52 *
53 * Revision 6.50 2001/09/06 19:15:19 yaschenk
54 * removing memory leak - AsnIoMemCopy is done twice
55 *
56 * Revision 6.49 2001/09/05 23:31:34 tatiana
57 * synonym is added to Genestruct with choice 1
58 *
59 * Revision 6.48 2001/08/07 16:49:41 kans
60 * use NUM_SEQID, added third party annotation SeqIDs to one more place
61 *
62 * Revision 6.47 2001/08/07 15:51:08 kans
63 * use NUM_SEQID, added third party annotation seqids
64 *
65 * Revision 6.46 2001/07/18 14:50:13 kans
66 * gather features with gsc.useSeqMgrIndexes if genpept, raw, indexing requested, and IndexedGetDescrForDiv to speed up finding division
67 *
68 * Revision 6.45 2001/06/26 20:41:16 kans
69 * FlatLocPoint as last resort prints gi|#####
70 *
71 * Revision 6.44 2001/06/25 19:18:13 kans
72 * get_feats SEQFEAT_CDREGION finds core without changing scope, if using indexes it indexes the entity if necessary, never goes to old gather code
73 *
74 * Revision 6.43 2001/04/12 22:48:52 yaschenk
75 * removing excessive ObjMgr calls
76 *
77 * Revision 6.42 2001/04/05 21:44:51 tatiana
78 * additional synonym in GeneRefInfoToGsp()
79 *
80 * Revision 6.41 2001/02/01 23:06:32 tatiana
81 * check for NULL added in MatchNAGeneToFeat
82 *
83 * Revision 6.40 2001/01/19 17:32:23 yaschenk
84 * Removed BioseqLockById when only GetSeqIdForGI is needed
85 *
86 * Revision 6.39 2000/12/05 22:24:34 tatiana
87 * bug fixed in FeatMatch
88 *
89 * Revision 6.38 2000/11/21 20:52:44 tatiana
90 * fixes in CreateImpFeatFromProt
91 *
92 * Revision 6.37 2000/11/02 01:53:07 tatiana
93 * static CompXref() added in FeatMatch
94 *
95 * Revision 6.36 2000/06/05 17:52:18 tatiana
96 * increase size of feature arrays to Int4
97 *
98 * Revision 6.35 2000/04/13 14:17:32 ostell
99 * fixed support for lim->tr. FlatLocHalfCaret alwasy assume lim->tl
100 *
101 * Revision 6.34 2000/04/03 23:28:19 tatiana
102 * added showSeqLoc for web feature view
103 *
104 * Revision 6.33 2000/01/21 17:17:52 kans
105 * MatchAAGeneToFeat now calls SeqMgrGetOverlappingGene on CDS first, avoids multiple targeted gathers, just like MatchNAGeneToFeat has done since feature indexing was first implemented
106 *
107 * Revision 6.32 1999/12/22 22:08:19 tatiana
108 * strand check fixed
109 *
110 * Revision 6.31 1999/12/13 19:51:47 tatiana
111 * Seq_strand_unknown added to strand check
112 *
113 * Revision 6.30 1999/12/09 14:33:07 tatiana
114 * check the starnd for mapping gene
115 *
116 * Revision 6.29 1999/11/05 14:55:53 tatiana
117 * check foe embl_feat added the get_feats in Gather
118 *
119 * Revision 6.28 1999/10/07 15:17:20 bazhin
120 * Bug fixed.
121 *
122 * Revision 6.27 1999/10/06 20:21:50 bazhin
123 * Removed memory leak in get_feats() function.
124 *
125 * Revision 6.26 1999/06/04 21:03:52 tatiana
126 * a bug fixed in MatchAAGeneToFeat()
127 *
128 * Revision 6.25 1999/04/29 22:49:20 tatiana
129 * added REFSEQ dbxrefs in GenPept format
130 *
131 * Revision 6.24 1999/03/30 22:23:33 kans
132 * pseudo can be on grp or sfp
133 *
134 * Revision 6.23 1999/03/30 19:47:40 tatiana
135 * use non-strict binding for REFSEQ
136 *
137 * Revision 6.22 1999/03/25 00:26:38 kans
138 * restored first sort in SortOrganizeFeat
139 *
140 * Revision 5.31 1997/06/19 18:37:07 vakatov
141 * [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
142 *
143 * Revision 5.30 1997/04/25 15:35:39 tatiana
144 * EMBL_PREFNUM added
145 *
146 * Revision 5.29 1997/03/13 17:58:27 tatiana
147 * *** empty log message ***
148 *
149 * Revision 5.28 1997/02/27 16:20:23 kans
150 * check for sfp != NULL in UniqueGeneName
151 *
152 * Revision 5.27 1997/01/27 19:14:39 tatiana
153 * *** empty log message ***
154 *
155 * Revision 5.25 1997/01/13 23:27:10 tatiana
156 * added check for NULL in UniqueGeneNames()
157 *
158 * Revision 5.24 1997/01/13 22:32:48 tatiana
159 * *** empty log message ***
160 *
161 * Revision 5.23 1997/01/13 21:44:50 tatiana
162 * a bug fixed in CreateImpFeatFromProt()
163 *
164 * Revision 5.22 1997/01/06 19:55:22 tatiana
165 * convert site and bond features to dna misc features
166 *
167 * Revision 5.21 1997/01/02 22:50:19 tatiana
168 * *** empty log message ***
169 *
170 * Revision 5.20 1996/12/17 22:49:02 tatiana
171 * StoreFeat() changed to StoreFeatFree for converted peptide feats
172 *
173 * Revision 5.19 1996/10/22 17:48:33 tatiana
174 * check for right-truncated genes added in get_feats callback
175 *
176 * Revision 5.17 1996/09/27 22:07:05 tatiana
177 * no gene binding to repeat_region feature
178 *
179 * Revision 5.16 1996/09/25 18:06:23 tatiana
180 * SEQFEAT_COMMENT is stored in a generic feature list
181 *
182 * Revision 5.15 1996/09/17 14:59:40 tatiana
183 * error msg for identical feats added
184 *
185 * Revision 5.14 1996/09/13 17:01:31 kans
186 * feature field is now excpt, not except, and removed extraneous line
187 *
188 * Revision 5.13 1996/09/13 16:48:54 tatiana
189 * except and exp_ev added in CreateImpFeatFromProt
190 *
191 * Revision 5.11 1996/09/13 16:26:55 kans
192 * get_feats can NULL out sfp, so should test before dereferencing
193 *
194 * Revision 5.10 1996/09/12 17:52:53 tatiana
195 * less peptide fets were missing some qualifiers
196 *
197 * Revision 5.9 1996/09/03 19:52:00 tatiana
198 * extra_loc added in StoreFeat
199 *
200 * Revision 5.8 1996/08/06 20:30:46 kans
201 * SeqIdFindBest called to handle local IDs and genbank IDs coexisting
202 *
203 * Revision 5.7 1996/07/30 19:20:44 tatiana
204 * Don't bind gene to gene in MatchNAToGene()
205 *
206 * Revision 5.6 1996/07/30 16:37:16 tatiana
207 * a bug fixed in UniqueFeat()
208 *
209 * Revision 5.5 1996/07/23 22:34:11 tatiana
210 * prot feats in genpept (piptides)
211 *
212 * Revision 5.4 1996/07/16 15:45:24 tatiana
213 * *** empty log message ***
214 *
215 * Revision 5.3 1996/07/02 18:10:50 tatiana
216 * calculate hash in StoreFeat
217 *
218 * Revision 5.2 1996/06/11 17:05:59 tatiana
219 * *** empty log message ***
220 *
221 * Revision 5.1 1996/06/11 15:44:00 tatiana
222 * Support Prot-ref feature mapping
223 *
224 * Revision 4.12 1996/05/16 20:59:50 tatiana
225 * RemoveRedundantFeats addded
226 *
227 * Revision 4.11 1996/03/25 15:21:24 tatiana
228 * *** empty log message ***
229 *
230 * Revision 4.10 1996/03/08 15:03:19 tatiana
231 * don't bind gene to boisource feature
232 * a bug fixed in FlatLocPoint
233 *
234 * Revision 4.9 1996/03/04 17:11:20 ostell
235 * added support for ignore_top features
236 *
237 * Revision 4.8 1996/02/28 04:53:06 ostell
238 * changes to support segmented master seeuquences
239 *
240 * Revision 4.7 1996/02/15 15:53:43 tatiana
241 * Gather for temp loaded items added
242 *
243 * Revision 4.6 1996/01/29 22:35:36 tatiana
244 * *** empty log message ***
245 *
246 * Revision 4.5 1995/12/20 22:40:55 tatiana
247 * GetDBXrefFromGene() added
248 *
249 * Revision 4.4 1995/11/22 19:14:03 tatiana
250 * a bug fixed for GenPept
251 *
252 * Revision 4.3 1995/11/22 19:01:07 tatiana
253 * a bug fixed in orphan genes printing
254 *
255 * Revision 4.2 1995/11/17 21:28:35 kans
256 * asn2ff now uses gather (Tatiana)
257 *
258 * Revision 4.1 1995/08/01 14:52:03 tatiana
259 * change SeqIdPrint to SeqIdWrite.
260 *
261 * Revision 1.15 1995/07/17 19:33:20 kans
262 * parameters combined into Asn2ffJobPtr structure
263 *
264 * Revision 1.14 1995/06/19 21:40:02 kans
265 * Tatiana's first major reorganization, moving printing, adding HTML
266 *
267 * Revision 1.13 1995/05/15 21:46:05 ostell
268 * added Log line
269 *
270 *
271 **************************************/
272
273 #include <asn2ffp.h>
274 #include <a2ferrdf.h>
275 #include <gather.h>
276 #include <asn2ff6.h>
277 #include <explore.h>
278
279 #define CTX_2GB_LOCATION_TROUBLE 0
280 #define CTX_2GB_NOT_IMPLEMENTED 1
281
282 /******************** Function Prototypes *********************************/
283
284 NLM_EXTERN CharPtr FlatLocHalf PROTO ((CharPtr buf, Int4 base, IntFuzzPtr fuzz));
285 NLM_EXTERN CharPtr FlatLocHalfCaret PROTO ((CharPtr buf, Int4 base, IntFuzzPtr fuzz));
286 NLM_EXTERN Boolean FlatLocPoint PROTO ((SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr));
287 NLM_EXTERN Boolean FlatLocCaret PROTO ((SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr));
288 NLM_EXTERN Boolean FlatVirtLoc PROTO ((BioseqPtr bsp, ValNodePtr location));
289 NLM_EXTERN Boolean FlatLocElement PROTO ((BioseqPtr bsp, ValNodePtr location, CharPtr buf));
290 NLM_EXTERN CharPtr complement_FlatLoc PROTO ((SeqIdPtr this_sidp, Boolean PNTR is_okPt, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location));
291 NLM_EXTERN CharPtr FlatSmartStringMove PROTO ((CharPtr total_buf, Int4Ptr lengthPt, CharPtr temp, CharPtr string));
292 NLM_EXTERN Boolean FlatNullAhead PROTO ((BioseqPtr bsp, ValNodePtr location));
293 NLM_EXTERN CharPtr FlatPackedPoint PROTO ((CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, PackSeqPntPtr pspp, SeqIdPtr this_sidp));
294 NLM_EXTERN CharPtr do_FlatLoc PROTO ((Boolean PNTR is_okPt, Boolean ok_to_complement, SeqIdPtr this_sidp, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location));
295 NLM_EXTERN CharPtr group_FlatLoc PROTO ((SeqIdPtr this_sidp, Boolean PNTR is_okPt, int which, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location));
296 NLM_EXTERN Boolean is_real_id PROTO ((SeqIdPtr pointIdPtr, SeqIdPtr this_sidp));
297 NLM_EXTERN void Bond PROTO ((SeqBondPtr bondp, SeqIdPtr this_sidp, CharPtr buf));
298 NLM_EXTERN Boolean LookForFuzz PROTO ((SeqLocPtr head));
299 /*************************************************************************/
300
301 /*--- the number of characters per location element is less
302 than 130. The maximum would be for a bond with both accessions
303 and both with (n.m) locations.
304 ----*/
305 #define MAX_CHAR_LOCATION 135
306
307 static CharPtr lim_str [5] = {"", ">","<", ">", "<"};
308
309 /*----------- FlatLocHalf ()------*/
310 NLM_EXTERN CharPtr FlatLocHalf
311 (CharPtr buf, Int4 base, IntFuzzPtr fuzz)
312 {
313 char localbuf [30];
314 Uint1 index;
315
316 /*------
317 typedef struct intfuzz {
318 Uint1 choice; 1=p-m, 2=range, 3=pct, 4=lim
319 Int4 a, b; a=p-m,max,pct,orlim, b=min
320 } IntFuzz, PNTR IntFuzzPtr;
321
322 Int-fuzz ::= CHOICE {
323 p-m INTEGER , -- plus or minus fixed amount
324 range SEQUENCE { -- max to min
325 max INTEGER ,
326 min INTEGER } ,
327 pct INTEGER , -- % plus or minus (x10) 0-1000
328 lim ENUMERATED { -- some limit value
329 unk (0) , -- unknown
330 gt (1) , -- greater than
331 lt (2) , -- less than
332 tr (3) , -- space to right of position
333 tl (4) , -- space to left of position
334 other (255) } } -- something else
335 -------*/
336
337 localbuf[0] = '\0';
338 buf[0] = '\0';
339
340 if (fuzz){
341 /* Fuzz_found = TRUE; */
342 switch (fuzz -> choice){
343 case 1:
344 sprintf(localbuf,"(%ld.%ld)", (long) (base - fuzz -> a),
345 (long) (base + fuzz -> a));
346 break;
347 case 2:
348 sprintf(localbuf,"(%ld.%ld)", (long) (1+fuzz -> b),
349 (long) (1+fuzz -> a));
350 break;
351 case 3:
352 sprintf(localbuf,"(%ld.%ld)",
353 (long) (base - base* ((double) fuzz -> a/1000.0 )),
354 (long) (base +base*( (double) fuzz -> a/1000.0 )));
355 break;
356 case 4:
357 index = (Uint1) fuzz -> a;
358 if (index > 4) index = 0;
359 sprintf(localbuf,"%s%ld", lim_str[index], (long) base);
360 break;
361 default:
362 sprintf(localbuf,"%ld", (long) base);
363 }
364 } else {
365 sprintf(localbuf,"%ld", (long) base);
366 }
367
368 StringMove(buf, localbuf);
369
370 return buf;
371 }
372
373 /*----------- FlatLocHalfCaret ()------*/
374
375 NLM_EXTERN CharPtr FlatLocHalfCaret
376 (CharPtr buf, Int4 base, IntFuzzPtr fuzz)
377 {
378 char localbuf [30];
379 Uint1 index;
380
381 localbuf[0] = '\0';
382 buf[0] = '\0';
383
384 if (fuzz){
385 /* Fuzz_found = TRUE; */
386 switch (fuzz -> choice){
387 case 1:
388 sprintf(localbuf,"(%ld.%ld)..(%ld.%ld)",
389 (long) (base - fuzz -> a), (long) base, (long) base,
390 (long) (base + fuzz -> a));
391 break;
392 case 2:
393 sprintf(localbuf,"%ld^%ld", (long) (1+fuzz -> b),
394 (long) (1+fuzz -> a));
395 break;
396 case 3:
397 sprintf(localbuf,"%ld^%ld",
398 (long) (base - base* ((double) fuzz -> a/1000.0 )),
399 (long) (base +base*( (double) fuzz -> a/1000.0 )));
400 break;
401 case 4:
402 if (fuzz->a == 3) /* space to right */
403 {
404 sprintf(localbuf, "%ld^%ld", (long)(base), (long)(base+1));
405 }
406 else if ((fuzz->a == 4) && (base > 1)) /* space to left */
407 {
408 sprintf(localbuf, "%ld^%ld", (long)(base-1), (long)(base));
409 }
410 else{
411 index = (Uint1) fuzz -> a;
412 if (index > 4) index = 0;
413 sprintf(localbuf,"%s%ld",
414 lim_str[index], (long) base);
415 }
416 break;
417 default:
418 sprintf(localbuf,"%ld", (long) base);
419 break;
420 }
421 }else{
422 sprintf(localbuf,"%ld", (long) base);
423 }
424
425 StringMove(buf, localbuf);
426
427 return buf;
428 }
429
430 NLM_EXTERN Boolean FlatLocPoint (SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr)
431 /* FLATLOC_CONTEXT_LOC is removed 08.31.95 */
432 {
433 SeqIdPtr use_id,free_seqid=NULL;
434 Char buf_space[MAX_CHAR_LOCATION +1], halfbuf_space[MAX_CHAR_LOCATION +1];
435 CharPtr buf, halfbuf, temp;
436 static Boolean order_initialized = FALSE;
437 static Uint1 order[NUM_SEQID];
438 ObjectIdPtr ob;
439
440 if ( ! order_initialized){
441 int dex;
442 for (dex=0; dex < NUM_SEQID; dex ++)
443 order[dex] = 255;
444 order_initialized = TRUE;
445 order[SEQID_GENBANK ] = 1;
446 order[SEQID_EMBL ] = 2;
447 order[SEQID_DDBJ ] = 3;
448 order[SEQID_LOCAL ] =4;
449 order[SEQID_OTHER ] =5;
450 order[SEQID_TPG ] = 6;
451 order[SEQID_TPE ] = 7;
452 order[SEQID_TPD ] = 8;
453 order[SEQID_GIBBSQ ] =9;
454 order[SEQID_GIBBMT ] =10;
455 order[SEQID_PRF ] =11;
456 order[SEQID_PDB ] =12;
457 order[SEQID_PIR ] =13;
458 order[SEQID_SWISSPROT ] =14;
459 order[SEQID_PATENT ] =15;
460 order[SEQID_GI ] =16;
461 order[SEQID_GENERAL ] =17;
462 order[SEQID_GIIM ] =18;
463 }
464
465 buf = buf_space;
466 halfbuf = halfbuf_space;
467 piecebuf[0] = '\0';
468 buf[0] = '\0';
469 temp = buf;
470
471 if (pointIdPtr) {
472 if ( ! SeqIdIn ( pointIdPtr, this_sidp)){
473 if (pointIdPtr->choice == SEQID_GI) {
474 free_seqid = use_id = GetSeqIdForGI(pointIdPtr->data.intvalue); /** returns SeqIdDup **/
475 } else {
476 use_id = pointIdPtr;
477 }
478
479 SeqIdWrite( use_id, buf, PRINTID_TEXTID_ACC_VER, MAX_CHAR_LOCATION);
480 if(*buf == '\0') {
481 SeqIdWrite(use_id, buf,PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
482 }
483 if (*buf == '\0' && use_id == NULL && pointIdPtr->choice == SEQID_GI) {
484 SeqIdWrite (pointIdPtr, buf, PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
485 }
486 if (*buf == '\0') {
487 StringCpy(buf,"?00000");
488 if (use_id && use_id -> choice == SEQID_LOCAL){
489 ob = (ObjectIdPtr) use_id -> data.ptrvalue;
490 if (ob ->str) {
491 if (*ob -> str) {
492 StringNCpy(buf, ob ->str, 12);
493 }
494 }
495 }
496 }
497 temp = StringMove (temp, buf);
498 temp = StringMove(temp,":");
499 }
500 }
501 FlatLocHalf(halfbuf, point+1, pointfuzzPtr);
502 temp = StringMove(temp, halfbuf);
503 StringMove(piecebuf, buf);
504 if(free_seqid) SeqIdFree(free_seqid);
505 return TRUE;
506 }
507
508 NLM_EXTERN Boolean FlatLocCaret
509 (SeqIdPtr pointIdPtr, SeqIdPtr this_sidp, CharPtr piecebuf, Int4 point, IntFuzzPtr pointfuzzPtr)
510 {
511 BioseqPtr bs;
512 Char buf_space[MAX_CHAR_LOCATION +1], halfbuf_space[MAX_CHAR_LOCATION +1];
513 CharPtr buf, halfbuf, temp;
514 SeqIdPtr use_id;
515 static Boolean order_initialized = FALSE;
516 static Uint1 order[NUM_SEQID];
517
518 if ( ! order_initialized){
519 int dex;
520 for (dex=0; dex < NUM_SEQID; dex ++)
521 order[dex] = 255;
522 order_initialized = TRUE;
523 order[SEQID_GENBANK ] = 1;
524 order[SEQID_EMBL ] = 2;
525 order[SEQID_DDBJ ] = 3;
526 order[SEQID_LOCAL ] =4;
527 order[SEQID_OTHER ] =5;
528 order[SEQID_TPG ] = 6;
529 order[SEQID_TPE ] = 7;
530 order[SEQID_TPD ] = 8;
531 order[SEQID_GIBBSQ ] =9;
532 order[SEQID_GIBBMT ] =10;
533 order[SEQID_PRF ] =11;
534 order[SEQID_PDB ] =12;
535 order[SEQID_PIR ] =13;
536 order[SEQID_SWISSPROT ] =14;
537 order[SEQID_PATENT ] =15;
538 order[SEQID_GI ] =16;
539 order[SEQID_GENERAL ] =17;
540 order[SEQID_GIIM ] =18;
541 }
542
543 buf = &(buf_space[0]);
544 halfbuf = &(halfbuf_space[0]);
545 piecebuf[0] = '\0';
546 buf[0] = '\0';
547 temp = buf;
548
549 if (pointIdPtr)
550 if ( ! SeqIdIn ( pointIdPtr, this_sidp)){
551 use_id = pointIdPtr;
552 bs = BioseqFind(use_id);
553 if ( bs ){
554 use_id = SeqIdSelect ( bs -> id, order,NUM_SEQID);
555 }
556 SeqIdWrite( use_id, buf, PRINTID_TEXTID_ACC_VER, MAX_CHAR_LOCATION);
557 temp = StringMove (temp, buf);
558 temp = StringMove(temp,":");
559 }
560 FlatLocHalfCaret(halfbuf, point+1, pointfuzzPtr);
561 temp = StringMove(temp, halfbuf);
562 StringMove(piecebuf, buf);
563
564 return TRUE;
565 }
566
567 NLM_EXTERN Boolean FlatVirtLoc(BioseqPtr bsp, ValNodePtr location)
568 {
569 Boolean retval = FALSE;
570 SeqIntPtr sintp;
571 BioseqPtr this_bsp=NULL;
572 SeqIdPtr this_sidp=NULL, sidp = NULL;
573 SeqPntPtr spp;
574
575 this_bsp = bsp;
576 this_sidp = this_bsp -> id;
577
578
579 switch ( location -> choice){
580 case SEQLOC_MIX:
581 case SEQLOC_EQUIV:
582 case SEQLOC_PACKED_INT:
583 case SEQLOC_PACKED_PNT:
584 case SEQLOC_NULL:
585 break;
586 case SEQLOC_EMPTY:
587 break;
588 case SEQLOC_WHOLE:
589 sidp = (SeqIdPtr) location -> data.ptrvalue;
590 if (! sidp){
591 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
592 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
593 "FlatLocElement: whole location without ID:");
594 retval = TRUE;
595 break;
596 }
597 /*--- no break on purpose ---*/
598 case SEQLOC_INT:
599 if ( location -> choice == SEQLOC_INT){
600 sintp = (SeqIntPtr) location -> data.ptrvalue;
601 sidp = sintp-> id;
602 }
603
604 if ( ! is_real_id(sidp, this_sidp)){
605 retval = TRUE;
606 }
607
608 break;
609 case SEQLOC_PNT:
610 spp = (SeqPntPtr) ( location -> data.ptrvalue);
611 if ( ! is_real_id(spp-> id, this_sidp)){
612 retval = TRUE;
613 }
614 break;
615 case SEQLOC_BOND:
616 break;
617 case SEQLOC_FEAT:
618 /*if ( ! (Flat_Be_quiet&1))*/
619 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
620 ErrPostEx(SEV_INFO, CTX_NCBI2GB, CTX_2GB_NOT_IMPLEMENTED,
621 "FlatVirtLoc:SEQLOC_FEAT not implemented");
622 break;
623 }
624
625
626 return retval;
627 }
628
629 NLM_EXTERN Boolean FlatLocElement (BioseqPtr bsp, ValNodePtr location, CharPtr buf)
630 {
631 Char localbuf_space[MAX_CHAR_LOCATION +1], piecebuf_space[MAX_CHAR_LOCATION +1];
632 CharPtr localbuf , piecebuf ;
633 CharPtr temp ;
634 SeqIntPtr sintp;
635 SeqPntPtr spp;
636 SeqIdPtr this_sidp=NULL, sidp=NULL;
637 SeqIntPtr whole_intPtr =NULL;
638 Boolean retval=TRUE;
639 Boolean whole_trouble;
640
641 localbuf = &(localbuf_space[0]);
642 piecebuf = &(piecebuf_space[0]);
643 temp = localbuf;
644
645 this_sidp = bsp->id;
646 sidp = this_sidp;
647
648 localbuf[0] = '\0';
649 buf[0] = '\0';
650
651 switch ( location -> choice){
652 case SEQLOC_MIX:
653 case SEQLOC_EQUIV:
654 case SEQLOC_PACKED_INT:
655 case SEQLOC_PACKED_PNT:
656 case SEQLOC_NULL:
657 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
658 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
659 "Unexpected internal complex type");
660 retval = FALSE;
661 break;
662 case SEQLOC_EMPTY:
663 break;
664 case SEQLOC_WHOLE:
665 whole_trouble=TRUE;
666 sidp = (SeqIdPtr) location -> data.ptrvalue;
667 if (sidp){
668 bsp = BioseqFind(sidp);
669 if (bsp) {
670 sintp = whole_intPtr = MemNew( sizeof(SeqInt) );
671 whole_intPtr -> id = sidp;
672 whole_intPtr -> from = 0;
673 whole_intPtr -> to = -1;
674 if ( bsp -> length > 0)
675 {
676 whole_intPtr -> to = bsp -> length -1 ;
677 whole_trouble=FALSE;
678 }
679 }
680 }
681 if (whole_trouble) {
682 if (ASN2FF_SHOW_ERROR_MSG == TRUE) {
683 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
684 "FlatLocElement: whole location without being able to look up limits: %s",
685 sidp?SeqIdWrite (sidp, localbuf, PRINTID_FASTA_LONG,
686 MAX_CHAR_LOCATION):"No Id");
687 }
688 retval = FALSE;
689 break;
690 }
691 /*
692 else if (whole_trouble) {
693 SeqIdWrite (sidp, localbuf,
694 PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
695 break;
696 }
697 */
698 /*--- no break on purpose ---*/
699 case SEQLOC_INT:
700 if ( location -> choice == SEQLOC_INT){
701 sintp = (SeqIntPtr) location -> data.ptrvalue;
702 }
703
704 if (is_real_id(sintp-> id, this_sidp)){
705 if (sintp -> strand == 2) /* minus strand */
706 temp = StringMove(temp, "complement("); /* ) vi match */
707 FlatLocPoint (sintp->id, this_sidp, piecebuf,
708 sintp -> from, sintp -> if_from );
709 temp = StringMove(temp, piecebuf);
710 if ( sintp -> to >0 && (sintp -> to != sintp -> from
711 || sintp -> if_from || sintp -> if_to )){
712 temp = StringMove(temp,"..");
713 FlatLocPoint(NULL, this_sidp, piecebuf,
714 sintp -> to, sintp -> if_to);
715 temp = StringMove(temp, piecebuf);
716 }
717 if (sintp -> strand == 2) /* minus strand */
718 /* ( vi match */ temp = StringMove(temp, ")");
719 }else{
720 #ifdef VIRTUALS_NOT_TREATED_AS_NULLS
721 StringCpy(localbuf,"No id");
722 if (sintp -> id){
723 SeqIdWrite (sintp -> id, localbuf,
724 PRINTID_FASTA_LONG, MAX_CHAR_LOCATION);
725 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
726 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
727 "FlatLocElement: interval without being able to use id: %s",
728 localbuf);
729 }
730 retval = FALSE;
731 #endif
732 }
733
734 break;
735 case SEQLOC_PNT:
736 spp = (SeqPntPtr) ( location -> data.ptrvalue);
737 if (is_real_id(spp-> id, this_sidp)){
738 if (spp -> strand == 2) /* minus strand */
739 temp = StringMove(temp, "complement("); /* ) vi match */
740 if ( spp -> fuzz){
741 /*--------
742 * points with fuzz treated as if always come from '^':
743 * not best, perhaps, but pretty close
744 *-------*/
745 FlatLocCaret (spp -> id, this_sidp, piecebuf,
746 spp -> point, spp -> fuzz );
747 } else {
748 FlatLocPoint(spp -> id, this_sidp, piecebuf,
749 spp -> point, spp -> fuzz );
750 }
751 temp = StringMove(temp, piecebuf);
752 if (spp -> strand == 2) /* minus strand */
753 /* ( vi match */ temp = StringMove(temp, ")");
754 } else {
755 #ifdef VIRTUALS_NOT_TREATED_AS_NULLS
756 ErrPostEx(SEV_INFO, CTX_NCBI2GB,CTX_2GB_LOCATION_TROUBLE,
757 "FlatLocElement: point without being able to use id: %s", sidp?SeqIdWrite (sidp, localbuf, PRINTID_FASTA_LONG):"No Id", MAX_CHAR_LOCATION);
758 retval = FALSE;
759 #endif
760 }
761 break;
762 case SEQLOC_BOND:
763 /*
764 bondp = (SeqBondPtr) location -> data.ptrvalue;
765 spp = bondp -> a;
766 FlatLocPoint(spp -> id, this_sidp, piecebuf,
767 spp -> point, spp -> fuzz );
768 temp = StringMove(temp, piecebuf);
769 temp = StringMove(temp,",");
770 spp = bondp -> b;
771 FlatLocPoint(NULL, this_sidp, piecebuf, spp -> point, spp -> fuzz );
772 temp = StringMove(temp, piecebuf);
773 */
774 Bond((SeqBondPtr)location->data.ptrvalue, this_sidp, localbuf);
775 break;
776 case SEQLOC_FEAT:
777 /* if ( ! (Flat_Be_quiet&1)) */
778 if (ASN2FF_SHOW_ERROR_MSG == TRUE)
779 ErrPostEx(SEV_INFO, CTX_NCBI2GB, CTX_2GB_NOT_IMPLEMENTED,
780 "FlatLocElement:SEQLOC_FEAT not implemented");
781 break;
782 }
783
784 if (whole_intPtr)
785 MemFree(whole_intPtr); /* NOT object free ! ! ! */
786
787 if (! retval){
788 *buf = '\0';
789 }else{
790 StringMove(buf, localbuf);
791 }
792
793 return retval;
794 }
795
796 /****************************************************************************
797 *Bond
798 *
799 * This function takes a SeqBondPtr and a CharPtr, in buf, and returns a
800 * string, in buf. If both ends of the bond exist, the output
801 * is bond(a, b); if only one end exists, the output is bond(a).
802 *
803 * Tom Madden
804 *
805 **************************************************************************/
806
807 NLM_EXTERN void Bond(SeqBondPtr bondp, SeqIdPtr this_sidp, CharPtr buf)
808
809 {
810 Char piecebuf1[MAX_CHAR_LOCATION+1];
811 Char piecebuf2[MAX_CHAR_LOCATION+1];
812 SeqPntPtr spp;
813
814 spp = bondp -> a;
815 FlatLocPoint(spp -> id, this_sidp, &(piecebuf1[0]),
816 spp -> point, spp -> fuzz );
817 if (bondp->b)
818 {
819 spp = bondp -> b;
820 FlatLocPoint(NULL, this_sidp, &(piecebuf2[0]),
821 spp -> point, spp -> fuzz );
822 sprintf(buf, "bond(%s,%s)", piecebuf1, piecebuf2);
823 }
824 else
825 {
826 sprintf(buf, "bond(%s)", piecebuf1);
827
828 }
829 }
830
831 /*----------- FlatLoc ()------*/
832
833 NLM_EXTERN CharPtr FlatLoc (BioseqPtr bsp, ValNodePtr location)
834 {
835 CharPtr retval = NULL;
836 Int4 max_length, len_used;
837 Boolean is_ok = TRUE;
838 SeqIdPtr this_sidp;
839
840 if (location){
841 this_sidp = bsp -> id;
842 max_length = 0;
843 do_FlatLoc(& is_ok, TRUE, this_sidp, NULL, NULL,
844 & max_length, bsp, location);
845 if (is_ok){
846 retval = (CharPtr) MemNew((size_t) (max_length + 10));
847 len_used = 0;
848 do_FlatLoc(&is_ok, TRUE, this_sidp, retval, retval,
849 &len_used, bsp, location);
850 }
851 }
852
853 return retval;
854 }
855
856
857
858 /*-------------complement_FlatLoc ()---------------*/
859
860 NLM_EXTERN CharPtr
861 complement_FlatLoc (SeqIdPtr this_sidp, Boolean PNTR is_okPt, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location)
862 {
863 SeqLocRevCmp(location);
864
865 temp = FlatSmartStringMove(total_buf, lengthPt, temp,"complement(");
866 temp = do_FlatLoc (is_okPt, FALSE, this_sidp, total_buf,
867 temp, lengthPt, bsp, location);
868 temp = FlatSmartStringMove(total_buf, lengthPt, temp,")");
869
870 SeqLocRevCmp(location);
871
872 return temp;
873 }
874 /*-------- FlatSmartStringMove()-------*/
875
876 NLM_EXTERN CharPtr
877 FlatSmartStringMove(CharPtr total_buf, Int4Ptr lengthPt, CharPtr temp, CharPtr string)
878 {
879 CharPtr retval = temp;
880
881 if ( lengthPt){
882 (*lengthPt) += StringLen(string);
883 }
884 if (total_buf){
885 retval = StringMove(temp,string);
886 }
887
888 return retval;
889 }
890 #define FLAT_EQUIV 0
891 #define FLAT_JOIN 1
892 #define FLAT_ORDER 2
893 static CharPtr group_names []= {
894 "one-of","join","order"};
895
896 /*--------- FlatNullAhead()---------*/
897
898 NLM_EXTERN Boolean
899 FlatNullAhead(BioseqPtr bsp, ValNodePtr location)
900 {
901 Boolean retval = FALSE;
902 SeqLocPtr next;
903 if (location){
904 next = location -> next;
905 if (next){
906 if ( next -> choice == SEQLOC_NULL)
907 retval = TRUE;
908 if (FlatVirtLoc(bsp, next))
909 retval = TRUE;
910 }else{
911 retval = TRUE; /* last one always true */
912 }
913 }
914
915 return retval;
916 }
917
918
919 /*---------- FlatPackedPoint() -----------*/
920
921 NLM_EXTERN CharPtr
922 FlatPackedPoint (CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, PackSeqPntPtr pspp, SeqIdPtr this_sidp)
923 {
924 Char buf_space[MAX_CHAR_LOCATION +1];
925 CharPtr buf;
926 int dex;
927
928 buf = &(buf_space[0]);
929 for (dex=0; dex < (int) pspp -> used; dex ++){
930 FlatLocPoint(pspp -> id, this_sidp, buf, (pspp->pnts)[dex], pspp->fuzz);
931 temp = FlatSmartStringMove(total_buf, lengthPt, temp,buf);
932 }
933
934 return temp;
935 }
936 /*--------- group_FlatLoc ()-----------*/
937
938 NLM_EXTERN CharPtr group_FlatLoc (SeqIdPtr this_sidp, Boolean PNTR is_okPt, int which, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location)
939 {
940 Char buf_space[MAX_CHAR_LOCATION +1];
941 CharPtr buf;
942 SeqLocPtr this_loc;
943 Boolean special_mode = FALSE; /* join in order */
944 int parens = 1;
945 Boolean found_non_virt = FALSE;
946
947 buf = &(buf_space[0]);
948
949 temp = FlatSmartStringMove(total_buf, lengthPt,
950 temp,group_names[which]);
951 temp = FlatSmartStringMove(total_buf, lengthPt,
952 temp,"(");
953
954 for (this_loc = (SeqLocPtr) location -> data.ptrvalue;
955 this_loc && *is_okPt; this_loc = this_loc -> next){
956 if ( FlatVirtLoc(bsp, this_loc)) {
957 if ( this_loc != location && this_loc -> next){
958 if (special_mode ){
959 special_mode = FALSE;
960 temp = FlatSmartStringMove(total_buf, lengthPt,
961 temp,")");
962 parens --;
963 }
964 }
965 continue;
966 }
967 if ( found_non_virt &&
968 this_loc -> choice != SEQLOC_EMPTY &&
969 this_loc -> choice != SEQLOC_NULL ){
970 temp = FlatSmartStringMove(total_buf, lengthPt, temp,",");
971 }
972 switch ( this_loc -> choice ){
973 case SEQLOC_NULL :
974 if ( this_loc != location && this_loc -> next){
975 if (special_mode ){
976 special_mode = FALSE;
977 temp = FlatSmartStringMove(total_buf, lengthPt, temp,")");
978 parens --;
979 }
980 }
981 break;
982 case SEQLOC_EMPTY :
983 break;
984 case SEQLOC_WHOLE :
985 case SEQLOC_PNT :
986 case SEQLOC_BOND :
987 case SEQLOC_FEAT :
988 found_non_virt = TRUE;
989 if (FlatVirtLoc(bsp, this_loc)){
990 if ( this_loc != location && this_loc -> next){
991 if (special_mode ){
992 special_mode = FALSE;
993 temp = FlatSmartStringMove(total_buf, lengthPt,
994 temp,"),");
995 parens --;
996 }
997 }
998 }else{
999 if( FlatLocElement(bsp, this_loc, buf)){
1000 temp = FlatSmartStringMove(total_buf, lengthPt,
1001 temp,buf);
1002 }else{
1003 temp = NULL;
1004 * is_okPt = FALSE;
1005 }
1006 }
1007 break;
1008 case SEQLOC_INT :
1009 found_non_virt = TRUE;
1010 if ( which == FLAT_ORDER
1011 && ! FlatNullAhead(bsp, this_loc)){
1012 special_mode = TRUE;
1013 temp = FlatSmartStringMove(total_buf, lengthPt,
1014 temp,group_names[FLAT_JOIN]);
1015 temp = FlatSmartStringMove(total_buf, lengthPt,
1016 temp,"(");
1017 parens ++;
1018 }
1019
1020 if( FlatLocElement(bsp, this_loc, buf)){
1021 temp = FlatSmartStringMove(total_buf, lengthPt,
1022 temp,buf);
1023 }else{
1024 temp = NULL;
1025 * is_okPt = FALSE;
1026 }
1027 break;
1028
1029 case SEQLOC_PACKED_PNT :
1030 found_non_virt = TRUE;
1031 temp = FlatPackedPoint(total_buf, temp, lengthPt,
1032 (PackSeqPntPtr)this_loc->data.ptrvalue, this_sidp);
1033 break;
1034 case SEQLOC_PACKED_INT :
1035 case SEQLOC_MIX :
1036 case SEQLOC_EQUIV :
1037 {
1038 ValNodePtr hold_next = this_loc -> next;
1039 found_non_virt = TRUE;
1040 this_loc -> next = NULL;
1041 temp = do_FlatLoc(is_okPt, FALSE, this_sidp, total_buf, temp, lengthPt, bsp, this_loc);
1042 this_loc-> next = hold_next;
1043 }
1044 break;
1045 }
1046 }
1047
1048 if (* is_okPt){
1049 while ( parens > 0){
1050 temp = FlatSmartStringMove(total_buf, lengthPt,
1051 temp,")");
1052 parens --;
1053 }
1054 }
1055
1056 return temp;
1057 }
1058
1059 NLM_EXTERN CharPtr
1060 do_FlatLoc (Boolean PNTR is_okPt, Boolean ok_to_complement, SeqIdPtr this_sidp, CharPtr total_buf, CharPtr temp, Int4Ptr lengthPt, BioseqPtr bsp, ValNodePtr location)
1061 {
1062 Char buf_space[MAX_CHAR_LOCATION +1];
1063 CharPtr buf;
1064 SeqLocPtr slp;
1065 SeqLocPtr next_loc = NULL;
1066
1067 buf = &(buf_space[0]);
1068
1069 if (location == NULL) {
1070 return NULL;
1071 }
1072 if ( ok_to_complement &&
1073 SeqLocStrand(location) == Seq_strand_minus ){
1074 temp = complement_FlatLoc (this_sidp, is_okPt, total_buf,
1075 temp, lengthPt, bsp, location);
1076 return temp;
1077 }
1078 for ( slp = location; slp && *is_okPt; slp = slp -> next){
1079 if ( slp -> choice == SEQLOC_NULL ||
1080 FlatVirtLoc(bsp, slp))
1081 continue;
1082 if ( slp != location){
1083 temp = FlatSmartStringMove(total_buf, lengthPt,
1084 temp,",");
1085 }
1086 switch( slp -> choice){
1087 case SEQLOC_NULL:
1088 break;
1089 case SEQLOC_MIX:
1090 case SEQLOC_PACKED_INT:
1091 {
1092 Boolean found_null = FALSE;
1093 for (next_loc = (SeqLocPtr) slp ->
1094 data.ptrvalue; next_loc;
1095 next_loc = next_loc -> next){
1096 if ( next_loc -> choice == SEQLOC_NULL
1097 || FlatVirtLoc( bsp, next_loc)){
1098 found_null = TRUE;
1099 temp=group_FlatLoc(this_sidp, is_okPt,
1100 FLAT_ORDER, total_buf, temp, lengthPt,
1101 bsp, slp);
1102 break;
1103 }
1104 }
1105 if ( ! found_null){
1106 temp=group_FlatLoc(this_sidp, is_okPt,
1107 FLAT_JOIN, total_buf, temp, lengthPt,
1108 bsp, slp);
1109 }
1110 }
1111 break;
1112 case SEQLOC_EQUIV:
1113 temp=group_FlatLoc(this_sidp, is_okPt,
1114 FLAT_EQUIV, total_buf, temp,
1115 lengthPt, bsp, slp);
1116 break;
1117 case SEQLOC_PACKED_PNT:
1118 temp = FlatPackedPoint(total_buf, temp, lengthPt,
1119 (PackSeqPntPtr)slp->data.ptrvalue, this_sidp);
1120 break;
1121 default:
1122 if ( ! FlatVirtLoc( bsp, slp))
1123 if( FlatLocElement(bsp, slp, buf)){
1124 temp = FlatSmartStringMove(total_buf, lengthPt,
1125 temp,buf);
1126 }else{
1127 temp = NULL;
1128 *is_okPt = FALSE;
1129 }
1130 }
1131 }
1132 return temp;
1133 }
1134
1135 NLM_EXTERN Boolean is_real_id(SeqIdPtr pointIdPtr, SeqIdPtr this_sidp)
1136 {
1137 Boolean retval = TRUE;
1138 BioseqPtr bs;
1139 SeqIdPtr use_id;
1140
1141 if ( ! SeqIdIn ( pointIdPtr, this_sidp)){
1142 use_id = pointIdPtr;
1143 bs = BioseqFind(use_id);
1144 if ( bs ){
1145 if (bs -> repr == Seq_repr_virtual)
1146 retval = FALSE;
1147 }
1148 }
1149 return retval;
1150 }
1151
1152 NLM_EXTERN Boolean FlatAnnotPartial (SeqFeatPtr sfp, Boolean use_product)
1153 {
1154 Boolean fuzz=FALSE, retval = TRUE;
1155 CharPtr str;
1156 ImpFeatPtr imp;
1157
1158 if ( sfp -> data.choice == 8) /* Imp-feat */
1159 {
1160 imp = (ImpFeatPtr) (sfp -> data.value.ptrvalue);
1161 if (imp ->loc != NULL)
1162 for( str = imp -> loc; *str; str ++){
1163 if ( *str == '<' || *str == '>'){
1164 retval = FALSE;
1165 break;
1166 }else if (*str == 'r' && *(str +1) == 'e'){
1167 if (StringNCmp ("replace",str, (size_t) 7) == 0){
1168 retval = FALSE;
1169 break;
1170 }
1171 }
1172 }
1173 if (retval == TRUE) /* Look for fuzz in the ASN.1 location */
1174 {
1175 if (use_product)
1176 fuzz = LookForFuzz (sfp->product);
1177 else
1178 fuzz = LookForFuzz (sfp->location);
1179 if (fuzz == TRUE)
1180 retval = FALSE;
1181 }
1182 }
1183
1184 return retval;
1185 }
1186
1187 /************************************************************************
1188 *Boolean LookForFuzz (SeqLocPtr slp)
1189 *
1190 * Look for fuzz in the location. For use in FlatAnnotPartial.
1191 ************************************************************************/
1192
1193 NLM_EXTERN Boolean LookForFuzz (SeqLocPtr head)
1194 {
1195 Boolean retval=FALSE;
1196 IntFuzzPtr ifp;
1197 PackSeqPntPtr pspp;
1198 SeqIntPtr sip;
1199 SeqLocPtr slp;
1200 SeqPntPtr spp;
1201
1202 if (head == NULL)
1203 return retval;
1204
1205 slp=NULL;
1206 while ((slp = SeqLocFindNext(head, slp)) != NULL)
1207 {
1208 switch (slp->choice)
1209 {
1210 case SEQLOC_INT:
1211 sip = (SeqIntPtr)(slp->data.ptrvalue);
1212 ifp = sip->if_from;
1213 if (ifp != NULL)
1214 {
1215 if (ifp->choice == 4)
1216 {
1217 if (ifp->a != 0)
1218 retval=TRUE;
1219 }
1220 else
1221 retval = TRUE;
1222 }
1223 ifp = sip->if_to;
1224 if (ifp != NULL)
1225 {
1226 if (ifp->choice == 4)
1227 {
1228 if (ifp->a != 0)
1229 retval=TRUE;
1230 }
1231 else
1232 retval = TRUE;
1233 }
1234 break;
1235 case SEQLOC_PNT:
1236 spp = (SeqPntPtr)(slp->data.ptrvalue);
1237 ifp = spp->fuzz;
1238 if (ifp != NULL)
1239 {
1240 if (ifp->choice == 4)
1241 {
1242 if (ifp->a != 0)
1243 retval=TRUE;
1244 }
1245 else
1246 retval = TRUE;
1247 }
1248 break;
1249 case SEQLOC_PACKED_PNT:
1250 pspp = (PackSeqPntPtr)(slp->data.ptrvalue);
1251 ifp = pspp->fuzz;
1252 if (ifp != NULL)
1253 {
1254 if (ifp->choice == 4)
1255 {
1256 if (ifp->a != 0)
1257 retval=TRUE;
1258 }
1259 else
1260 retval = TRUE;
1261 }
1262 break;
1263 default:
1264 break;
1265 }
1266 if (retval == TRUE)
1267 break;
1268 }
1269 return retval;
1270 } /* LookForFuzz */
1271
1272
1273
1274 /*****************************************************************************
1275 * modified by Tatiana 12.08.97
1276 * collecting and soritng the features features
1277 *
1278 ******************************************************************************/
1279 static OrganizeFeatPtr CreateOrganizeFeat(void)
1280 {
1281 OrganizeFeatPtr ofp;
1282
1283 ofp = (OrganizeFeatPtr) MemNew(sizeof(OrganizeFeat));
1284 MemSet ((Pointer) ofp, 0, sizeof (OrganizeFeat));
1285
1286 return ofp;
1287 }
1288
1289 static void UniqueGeneName(Boolean error_msgs, OrganizeFeatPtr ofp)
1290 {
1291 SortStructPtr newp, p;
1292 SeqFeatPtr sfp;
1293 GeneRefPtr grp;
1294 CharPtr gene;
1295 CharPtr s, ss;
1296 SeqLocPtr slp;
1297 Int4 index, size;
1298
1299 if (ofp == NULL)
1300 return;
1301 if ((size = ofp->sfpGenesize) == 0)
1302 return;
1303 newp = (SortStructPtr) MemNew((size)*sizeof(SortStruct));
1304 newp = MemCopy(newp, ofp->Genelist, (size * sizeof(SortStruct)));
1305 HeapSort((VoidPtr) (newp),
1306 (size_t) (size), sizeof(SortStruct), CompareGeneName);
1307 p = newp;
1308 if ((sfp = p->sfp) == NULL) {
1309 MemFree (newp);
1310 return;
1311 }
1312 slp = sfp->location;
1313 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
1314 gene = grp->locus;
1315 for (index = 1, p++; index < size; index++, p++) {
1316 sfp = p->sfp;
1317 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
1318 if (gene == NULL) {
1319 gene = grp->locus;
1320 slp = sfp->location;
1321 continue;
1322 }
1323 if (error_msgs) {
1324 if (grp->locus == NULL) {
1325 ErrPostEx(SEV_WARNING,ERR_FEATURE_NULLGeneLocus, "No gene locus in %s:", SeqLocPrint(sfp->location));
1326 continue;
1327 }
1328 if (StringCmp(gene, grp->locus) == 0) {
1329 s = SeqLocPrint(slp);
1330 ss = SeqLocPrint(sfp->location);
1331 ErrPostEx(SEV_WARNING, ERR_FEATURE_IdenticalGeneName, "Identical gene locus name [%s] in %s and %s", gene, s, ss);
1332 MemFree(s);
1333 MemFree(ss);
1334 }
1335 }
1336 gene = grp->locus;
1337 slp = sfp->location;
1338 }
1339 MemFree (newp);
1340 return;
1341 }
1342
1343 /*****************************************************************************
1344 * Compare two ImpFeats by name and location
1345 * returns 1 for matching features otherwise returns 0
1346 ******************************************************************************/
1347 static CmpImpFeat (ImpFeatPtr f1, ImpFeatPtr f2)
1348 {
1349 if (f1 == NULL && f2)
1350 return 0;
1351
1352 if (f2 == NULL && f1)
1353 return 0;
1354
1355 if (StringCmp(f1->key, f2->key) != 0)
1356 return 0;
1357
1358 if (StringCmp(f1->loc, f2->loc) != 0)
1359 return 0;
1360
1361 return 1;
1362 }
1363
1364 static Int2 CompXref (ValNodePtr x1, ValNodePtr x2)
1365 {
1366 DbtagPtr db1 = NULL, db2;
1367 CharPtr s1=NULL, s2=NULL;
1368
1369 if (x1 == NULL && x2)
1370 return 0;
1371 if (x2 == NULL && x1)
1372 return 0;
1373 db1 = x1->data.ptrvalue;
1374 db2 = x2->data.ptrvalue;
1375 if (StringCmp(db1->db, db2->db) != 0) {
1376 return 0;
1377 }
1378 if (db1->tag && db1->tag->str) {
1379 s1 = db1->tag->str;
1380 }
1381 if (db2->tag && db2->tag->str) {
1382 s2 = db2->tag->str;
1383 }
1384 if (s1 == NULL && s2)
1385 return 0;
1386 if (s2 == NULL && s1)
1387 return 0;
1388 if (s1 && s2) {
1389 if (StringCmp(s1, s2) == 0) {
1390 return 1;
1391 } else {
1392 return 0;
1393 }
1394 } else {
1395 if (db1->tag->id == db2->tag->id) {
1396 return 1;
1397 }
1398 }
1399 return 0;
1400 }
1401 /*****************************************************************************
1402 * compare features by location and choice
1403 *
1404 ******************************************************************************/
1405 static Int2 FeatMatch (SeqFeatPtr f1, SeqFeatPtr f2)
1406 {
1407 Int2 retval = 0;
1408
1409 if (f1 == NULL && f2)
1410 return 0;
1411 if (f2 == NULL && f1)
1412 return 0;
1413 if ((SeqLocCompare(f1->location, f2->location)) != SLC_A_EQ_B)
1414 return 0;
1415 if (f1->data.choice != f2->data.choice)
1416 return 0;
1417 switch (f1->data.choice)
1418 {
1419 case SEQFEAT_IMP:
1420 retval =
1421 CmpImpFeat(f1->data.value.ptrvalue, f2->data.value.ptrvalue);
1422 break;
1423 case SEQFEAT_REGION:
1424 if (f1->dbxref != NULL || f2->dbxref != NULL) {
1425 retval = CompXref(f1->dbxref, f2->dbxref);
1426 }
1427 break;
1428 default:
1429 break;
1430 }
1431 return retval;
1432 }
1433
1434 /*****************************************************************************
1435 * mark (with boolean 'dup') identical features in the List
1436 *
1437 ******************************************************************************/
1438 static void UniqueFeat(SortStructPtr List, Int4 size)
1439 {
1440 Int4 i, j, jj, ii;
1441 Int4 start;
1442 SortStructPtr p, pp;
1443
1444 for (i = 0, p = List; i < size; i = ii) {
1445 ii = i + 1;
1446 if (p->sfp == NULL) {
1447 continue;
1448 }
1449 start = SeqLocStart(p->sfp->location);
1450 for (pp = p+1; ii < size; ii++, pp++) {
1451 if (pp->sfp == NULL) {
1452 continue;
1453 }
1454 if (start != SeqLocStart(pp->sfp->location)) {
1455 break;
1456 }
1457 }
1458 for (j = i; j < ii; j++, p++) {
1459 for (jj = j+1, pp = p+1; jj < ii; jj++, pp++) {
1460 if (p->hash == pp->hash) {
1461 if (FeatMatch(p->sfp, pp->sfp) == 1) {
1462 pp->dup = TRUE;
1463 }
1464 }
1465
1466 }
1467 }
1468 }
1469 return;
1470 }
1471
1472 /*****************************************************************************
1473 * collect landmarks from map (Gather is not used)
1474 *
1475 ******************************************************************************/
1476 NLM_EXTERN Int2 GetMapFeats(Asn2ffJobPtr ajp, GBEntryPtr gbp)
1477 {
1478 BioseqPtr bsp;
1479 SeqFeatPtr sfp;
1480 OrganizeFeatPtr ofp;
1481
1482 if (gbp == NULL || gbp->bsp == NULL) {
1483 return 0;
1484 }
1485 bsp = gbp->bsp;
1486 BioseqLock(bsp);
1487 ofp = CreateOrganizeFeat();
1488 ofp->bsp = bsp;
1489 ofp->useSeqMgrIndexes = ajp->useSeqMgrIndexes;
1490
1491 if (bsp->seq_ext_type == 3) { /* map-ext */
1492 for (sfp = (SeqFeatPtr) bsp->seq_ext; sfp; sfp=sfp->next) {
1493 switch (sfp->data.choice) {
1494 case SEQFEAT_GENE:
1495 ofp->List = EnlargeSortList(ofp->List,
1496 ofp->sfpListsize);
1497 ofp->sfpListsize = StoreFeat(ofp->List, sfp,
1498 ofp->sfpListsize, bsp, NULL, 0, 0, 0, NULL, NULL, 0);
1499 break;
1500 default:
1501 ofp->List = EnlargeSortList(ofp->List,
1502 ofp->sfpListsize);
1503 ofp->sfpListsize = StoreFeat(ofp->List, sfp,
1504 ofp->sfpListsize, bsp, NULL, 0, 0, 0, NULL, NULL, 0);
1505 break;
1506 }
1507 }
1508
1509 }
1510 SortOrganizeFeat(ofp);
1511 gbp->feat = ofp;
1512 if (ofp == NULL) {
1513 return 0;
1514 }
1515 return (gbp->feat->sfpListsize);
1516 }
1517
1518 /*****************************************************************************
1519 * function and activity for proteins
1520 *
1521 ******************************************************************************/
1522 static SeqFeatPtr AddProtRefInfo(SeqFeatPtr sfp, ProtRefPtr prot)
1523 {
1524 ValNodePtr vnp;
1525
1526 for (vnp=prot->name; vnp; vnp=vnp->next) {
1527 if (GBQualPresent("product", sfp->qual) == FALSE) {
1528 sfp->qual = AddGBQual(sfp->qual, "product", vnp->data.ptrvalue);
1529 } else {
1530 sfp->qual = AddGBQual(sfp->qual, "note", vnp->data.ptrvalue);
1531 }
1532 }
1533 if (prot->desc) {
1534 sfp->qual = AddGBQual(sfp->qual, "note", prot->desc);
1535 }
1536 for (vnp=prot->ec; vnp; vnp=vnp->next) {
1537 sfp->qual=AddGBQual(sfp->qual, "EC_number", vnp->data.ptrvalue);
1538 }
1539 for (vnp=prot->activity; vnp; vnp=vnp->next) {
1540 sfp->qual=AddGBQual(sfp->qual, "function", vnp->data.ptrvalue);
1541 }
1542
1543 return sfp;
1544 }
1545
1546 /*****************************************************************************
1547 * add site description notes
1548 *
1549 ******************************************************************************/
1550 NLM_EXTERN void AddSiteNoteQual(SeqFeatPtr sfp_in, SeqFeatPtr sfp)
1551 {
1552 if (sfp_in == NULL) {
1553 return;
1554 }
1555 if (sfp_in->data.choice != SEQFEAT_SITE) {
1556 return;
1557 }
1558 switch (sfp_in->data.value.intvalue) {
1559 case 1:
1560 sfp->qual = AddGBQual(sfp->qual, "note", "active site");
1561 break;
1562 case 2:
1563 sfp->qual = AddGBQual(sfp->qual, "note", "binding site");
1564 break;
1565 case 3:
1566 sfp->qual = AddGBQual(sfp->qual, "note", "cleavage site");
1567 break;
1568 case 4:
1569 sfp->qual = AddGBQual(sfp->qual, "note", "inhibit site");
1570 break;
1571 case 5:
1572 sfp->qual = AddGBQual(sfp->qual, "note", "modified site");
1573 break;
1574 case 6:
1575 sfp->qual = AddGBQual(sfp->qual, "note", "glycosylation site");
1576 break;
1577 case 7:
1578 sfp->qual = AddGBQual(sfp->qual, "note", "myristoylation site");
1579 break;
1580 case 8:
1581 sfp->qual = AddGBQual(sfp->qual, "note", "mutagenized site");
1582 break;
1583 case 9:
1584 sfp->qual = AddGBQual(sfp->qual, "note", "metal-binding site");
1585 break;
1586 case 10:
1587 sfp->qual = AddGBQual(sfp->qual, "note", "phosphorylation site");
1588 break;
1589 case 11:
1590 sfp->qual = AddGBQual(sfp->qual, "note", "acetylation site");
1591 break;
1592 case 12:
1593 sfp->qual = AddGBQual(sfp->qual, "note", "amidation site");
1594 break;
1595 case 13:
1596 sfp->qual = AddGBQual(sfp->qual, "note", "methylation site");
1597 break;
1598 case 14:
1599 sfp->qual = AddGBQual(sfp->qual, "note", "hydroxylation site");
1600 break;
1601 case 15:
1602 sfp->qual = AddGBQual(sfp->qual, "note", "sulfatation site");
1603 break;
1604 case 16:
1605 sfp->qual = AddGBQual(sfp->qual, "note",
1606 "oxidative-deamination site");
1607 break;
1608 case 17:
1609 sfp->qual = AddGBQual(sfp->qual, "note",
1610 "pyrrolidone-carboxylic-acid site");
1611 break;
1612 case 18:
1613 sfp->qual = AddGBQual(sfp->qual, "note",
1614 "gamma-carboxyglutamic-acid site");
1615 break;
1616 case 19:
1617 sfp->qual = AddGBQual(sfp->qual, "note", "blocked site");
1618 break;
1619 case 20:
1620 sfp->qual = AddGBQual(sfp->qual, "note", "lipid-binding site");
1621 break;
1622 case 21:
1623 sfp->qual = AddGBQual(sfp->qual, "note", "np-binding site");
1624 break;
1625 case 22:
1626 sfp->qual = AddGBQual(sfp->qual, "note", "DNA binding site");
1627 break;
1628 case 23:
1629 sfp->qual = AddGBQual(sfp->qual, "note", "signal-peptide site");
1630 break;
1631 case 24:
1632 sfp->qual = AddGBQual(sfp->qual, "note",
1633 "transit-peptide site");
1634 break;
1635 case 25:
1636 sfp->qual = AddGBQual(sfp->qual, "note",
1637 "transmembrane-region site");
1638 break;
1639 default:
1640 sfp->qual = AddGBQual(sfp->qual, "note", "unclassified site");
1641 break;
1642 }
1643 }
1644
1645 /*****************************************************************************
1646 * protein features are shown as misc_feats on nucleotide records
1647 *
1648 ******************************************************************************/
1649 static SeqFeatPtr CreateImpFeatFromProt(Uint1 format, SeqFeatPtr psfp, SeqFeatPtr cds, SeqLocPtr new_loc)
1650 {
1651 SeqFeatPtr sfp;
1652 ImpFeatPtr ifp;
1653 ProtRefPtr prot;
1654 GBQualPtr q;
1655 Uint2 retval;
1656 Char buf[2];
1657 CdRegionPtr cdr;
1658 CharPtr tmp;
1659 GeneRefPtr grp;
1660 SeqFeatXrefPtr xrp;
1661
1662 if (psfp->data.choice == SEQFEAT_PSEC_STR) {
1663 return NULL;
1664 }
1665 sfp = SeqFeatNew();
1666 ifp = ImpFeatNew();
1667 sfp->data.choice = SEQFEAT_IMP;
1668 sfp->data.value.ptrvalue = ifp;
1669 ifp->key = StringSave("misc_feature");
1670 if (psfp->data.choice == SEQFEAT_PROT) {
1671 prot = psfp->data.value.ptrvalue;
1672 if (prot->processed == 0 || prot->processed == 1) {
1673 SeqFeatFree(sfp);
1674 return NULL;
1675 }
1676 if (prot->processed == 2) {
1677 MemFree(ifp->key);
1678 ifp->key = StringSave("mat_peptide");
1679 } else if (prot->processed == 3) {
1680 MemFree(ifp->key);
1681 ifp->key = StringSave("sig_peptide");
1682 } else if (prot->processed == 4) {
1683 MemFree(ifp->key);
1684 ifp->key = StringSave("transit_peptide");
1685 }
1686 sfp = AddProtRefInfo(sfp, prot);
1687 } else if (psfp->data.choice == SEQFEAT_BOND) {
1688 if (psfp->data.value.intvalue == 1) {
1689 sfp->qual = AddGBQual(sfp->qual, "note", "disulfide bond");
1690 } else if (psfp->data.value.intvalue == 2) {
1691 sfp->qual = AddGBQual(sfp->qual, "note", "thiolester bond");
1692 } else if (psfp->data.value.intvalue == 3) {
1693 sfp->qual = AddGBQual(sfp->qual, "note", "xlink bond");
1694 } else if (psfp->data.value.intvalue == 4) {
1695 sfp->qual = AddGBQual(sfp->qual, "note", "thioether bond");
1696 } else {
1697 sfp->qual = AddGBQual(sfp->qual, "note", "bond");
1698 }
1699 } else if (psfp->data.choice == SEQFEAT_SITE) {
1700 AddSiteNoteQual(psfp, sfp);
1701 } else if (psfp->data.choice == SEQFEAT_REGION) {
1702 tmp = MemNew(StringLen(psfp->data.value.ptrvalue) + 9);
1703 sprintf(tmp, "Region: %s", (CharPtr) psfp->data.value.ptrvalue);
1704 sfp->qual = AddGBQual(sfp->qual, "note", tmp);
1705 tmp = MemFree(tmp);
1706 } else {
1707 SeqFeatFree(sfp);
1708 return NULL;
1709 }
1710 sfp->excpt = psfp->excpt;
1711 for (xrp=psfp->xref; xrp; xrp=xrp->next) {
1712 if (xrp->data.choice == SEQFEAT_GENE) {
1713 grp = (GeneRefPtr) xrp->data.value.ptrvalue;
1714 sfp->xref = AsnIoMemCopy(xrp,
1715 (AsnReadFunc) SeqFeatXrefAsnRead,
1716 (AsnWriteFunc) SeqFeatXrefAsnWrite);
1717 break;
1718 }
1719 }
1720 if ((sfp->partial = psfp->partial) == TRUE) {
1721 cdr = (CdRegionPtr) cds->data.value.ptrvalue;
1722 if (cdr->frame) {
1723 sprintf(buf, "%d", cdr->frame);
1724 } else {
1725 sprintf(buf, "1");
1726 }
1727 sfp->qual = AddGBQualEx(&(ifp->key), sfp->qual, "codon_start", buf);
1728 }
1729 sfp->exp_ev = psfp->exp_ev;
1730 if (psfp->comment)
1731 sfp->comment = StringSave(psfp->comment);
1732 if (psfp->title)
1733 sfp->comment = StringSave(psfp->title);
1734 for (q=psfp->qual; q; q=q->next) {
1735 sfp->qual = AddGBQual(sfp->qual, q->qual, q->val);
1736 }
1737 if (format == GENPEPT_FMT || format == EMBLPEPT_FMT) {
1738 sfp->location = AsnIoMemCopy(psfp->location,
1739 (AsnReadFunc) SeqLocAsnRead, (AsnWriteFunc) SeqLocAsnWrite);
1740 return sfp;
1741 }
1742 if (new_loc) {
1743 sfp->location = aaFeatLoc_to_dnaFeatLoc(cds, new_loc);
1744 } else {
1745 sfp->location = aaFeatLoc_to_dnaFeatLoc(cds, psfp->location);
1746 }
1747 if (sfp->location == NULL) {
1748 SeqFeatFree(sfp);
1749 return NULL;
1750 }
1751 if (sfp->partial == FALSE) {
1752 retval = SeqLocPartialCheck(sfp->location);
1753 if (retval > SLP_COMPLETE && retval < SLP_NOSTART) {
1754 sfp->partial = TRUE;
1755 }
1756 }
1757 return sfp;
1758 }
1759 static Boolean CheckNewSfpLoc(SeqFeatPtr sfp, BioseqPtr bsp)
1760 {
1761 SeqLocPtr slp, slp1 = NULL;
1762
1763 while ((slp1=SeqLocFindNext(sfp->location, slp1)) != NULL) {
1764 slp = slp1;
1765 }
1766 if (SeqIdForSameBioseq(SeqLocId(slp), bsp->id)) {
1767 if (SeqLocLen(slp) <= BioseqGetLen(bsp)) {
1768 return TRUE;
1769 }
1770 }
1771 return FALSE;
1772 }
1773
1774 /*****************************************************************************
1775 * gather callback for collecting gene features
1776 *
1777 ******************************************************************************/
1778 static Boolean get_genes (GatherContextPtr gcp)
1779 {
1780 BioseqPtr bsp = NULL;
1781 OrganizeFeatPtr ofp;
1782 SeqFeatPtr sfp;
1783 Boolean temp=FALSE;
1784
1785 ofp = gcp->userdata;
1786 if (gcp->thistype != OBJ_SEQFEAT) {
1787 return TRUE;
1788 }
1789 sfp = (SeqFeatPtr) (gcp->thisitem);
1790 bsp = ofp->bsp;
1791 if (sfp->data.choice != SEQFEAT_GENE) {
1792 return TRUE;
1793 }
1794 if ((gcp->tempload == TRUE) && (! gcp->hold)) {
1795 temp = TRUE;
1796 }
1797 ofp->Genelist = EnlargeSortList(ofp->Genelist,ofp->sfpGenesize);
1798 ofp->sfpGenesize = StoreFeatTemp(ofp->Genelist, sfp,
1799 ofp->sfpGenesize, bsp, ofp->seg_bsp, gcp->entityID,
1800 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1801 return TRUE;
1802 }
1803
1804 /*****************************************************************************
1805 * gather callback for collecting all features
1806 *
1807 ******************************************************************************/
1808 static Boolean get_feats (GatherContextPtr gcp)
1809 {
1810 BioseqPtr bsp = NULL;
1811 OrganizeFeatPtr ofp;
1812 SeqFeatPtr sfp;
1813 ImpFeatPtr ifp;
1814 SeqIdPtr xid;
1815 GatherRange gr;
1816 Boolean r_trunc;
1817 OrganizeProtPtr opp;
1818 BioseqPtr p_bsp;
1819 SeqEntryPtr sep;
1820 GatherScope gs;
1821 SeqFeatPtr new_sfp, psfp;
1822 Int4 index;
1823 Boolean temp = FALSE;
1824 Uint2 entityID;
1825 SeqMgrFeatContext fcontext;
1826
1827 ofp = gcp->userdata;
1828 if (gcp->thistype != OBJ_SEQFEAT) {
1829 return TRUE;
1830 }
1831 sfp = (SeqFeatPtr) (gcp->thisitem);
1832 /* do sorting within EntityId */
1833 if (gcp->entityID != ofp->oldID) {
1834 ofp->lock_bsp = BioseqFindCore(SeqLocId(sfp->location));
1835 BioseqLock(ofp->lock_bsp);
1836 SortOrganizeFeat(ofp);
1837 ofp->oldID = gcp->entityID;
1838 }
1839 gr = gcp->extremes;
1840 r_trunc = gr.r_trunc;
1841 if (sfp->data.choice != SEQFEAT_CDREGION &&
1842 sfp->data.choice != SEQFEAT_GENE) {
1843 if ((!ofp->embl_feat && ASN2FF_SHOW_GB_STYLE
1844 && !(ofp->showSeqLoc)) && r_trunc) {
1845 return TRUE;
1846 }
1847 }
1848 bsp = ofp->bsp;
1849 if ((gcp->tempload == TRUE) && (! gcp->hold)) {
1850 temp = TRUE;
1851 }
1852 switch (sfp->data.choice) {
1853 case SEQFEAT_GENE:
1854 ofp->Genelist = EnlargeSortList(ofp->Genelist,ofp->sfpGenesize);
1855 ofp->sfpGenesize = StoreFeatTemp(ofp->Genelist, sfp,
1856 ofp->sfpGenesize, bsp, ofp->seg_bsp, gcp->entityID,
1857 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1858 if (ofp->show_gene) {
1859 if (r_trunc && !ofp->embl_feat && ASN2FF_SHOW_GB_STYLE
1860 && !(ofp->showSeqLoc)) {
1861 break;
1862 }
1863 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1864 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp,
1865 ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID,
1866 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1867 }
1868 break;
1869 case SEQFEAT_BIOSRC: /* save in both lists */
1870 ofp->Biosrclist =
1871 EnlargeSortList(ofp->Biosrclist, ofp->biosrcsize);
1872 ofp->biosrcsize = StoreFeatTemp(ofp->Biosrclist, sfp,
1873 ofp->biosrcsize, bsp, ofp->seg_bsp, gcp->entityID,
1874 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1875 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1876 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp,
1877 ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID,
1878 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1879 break;
1880 case SEQFEAT_ORG:
1881 ofp->Orglist =
1882 EnlargeSortList(ofp->Orglist, ofp->sfpOrgsize);
1883 ofp->sfpOrgsize = StoreFeatTemp(ofp->Orglist, sfp,
1884 ofp->sfpOrgsize, bsp, ofp->seg_bsp, gcp->entityID,
1885 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1886 break;
1887 case SEQFEAT_PUB: /* Pubs are already captured by "StorePubInfo". */
1888 break;
1889 case SEQFEAT_IMP: /* This case must be before the generic case */
1890 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
1891 if (StringCmp(ifp->key, "source") == 0) {
1892 /* Capture only the first source feat that covers the
1893 entire entry, the others go among the generic features.*/
1894 if (ofp->sfpSourcesize == 0) {
1895 if (bsp->length != -1 &&
1896 bsp->length == SeqLocLen(sfp->location)) {
1897 ofp->Sourcelist =
1898 EnlargeSortList(ofp->Sourcelist,
1899 ofp->sfpSourcesize);
1900 ofp->sfpSourcesize =
1901 StoreFeatTemp(ofp->Sourcelist, sfp, ofp->sfpSourcesize,
1902 bsp, ofp->seg_bsp,gcp->entityID, gcp->itemID,
1903 gcp->thistype, gcp->new_loc, NULL, 0, temp);
1904 break;
1905 }
1906 }
1907 } else if (StringCmp(ifp->key, "Site-ref") == 0 && sfp->cit == NULL) {
1908 /* if "Site-ref" has a pub, put out info as a pub */
1909 ofp->Siteslist =
1910 EnlargeSortList(ofp->Siteslist, ofp->sfpSitesize);
1911 ofp->sfpSitesize =
1912 StoreFeatTemp(ofp->Siteslist, sfp, ofp->sfpSitesize, bsp,
1913 ofp->seg_bsp, gcp->entityID, gcp->itemID,
1914 gcp->thistype, gcp->new_loc, NULL, 0, temp);
1915 break;
1916 } else if (StringCmp(ifp->key, "Site-ref") == 0 && sfp->cit != NULL) {
1917 /* Check to see if this was already put out as a pub*/
1918 break;
1919 } else {
1920 /* If none of the above is true, execute generic. */
1921 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1922 ofp->sfpListsize =
1923 StoreFeatTemp(ofp->List, sfp, ofp->sfpListsize, bsp,
1924 ofp->seg_bsp, gcp->entityID, gcp->itemID,
1925 gcp->thistype, gcp->new_loc, NULL, 0, temp);
1926 }
1927 break;
1928 /* Look to see if an Xref goes out as a SeqFeat or as a 2nd accession */
1929 case SEQFEAT_SEQ:
1930 xid=CheckXrefFeat(bsp, sfp);
1931 if (xid == NULL) {
1932 ofp->Xreflist = EnlargeSortList(ofp->Xreflist,
1933 ofp->sfpXrefsize);
1934 ofp->sfpXrefsize = StoreFeatTemp(ofp->Xreflist, sfp,
1935 ofp->sfpXrefsize, bsp, ofp->seg_bsp, gcp->entityID,
1936 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1937 break;
1938
1939 }
1940 break;
1941 case SEQFEAT_COMMENT:
1942 /* The following assures a valid comment */
1943 if (sfp->comment == NULL || StringLen(sfp->comment) == 0)
1944 break;
1945 if (bsp && bsp->length != -1 &&
1946 bsp->length == SeqLocLen(sfp->location)) {
1947 /** will go to COMMENT field **/
1948 ofp->Commlist =
1949 EnlargeSortList(ofp->Commlist, ofp->sfpCommsize);
1950 ofp->sfpCommsize =
1951 StoreFeatTemp(ofp->Commlist, sfp, ofp->sfpCommsize,
1952 bsp, ofp->seg_bsp, gcp->entityID, gcp->itemID,
1953 gcp->thistype, gcp->new_loc, NULL, 0, temp);
1954 } else {
1955 /** will go to misc_feature **/
1956 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1957 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp,
1958 ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID,
1959 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
1960 }
1961 break;
1962 case SEQFEAT_CDREGION:
1963 if (r_trunc != TRUE || ofp->embl_feat || ofp->showSeqLoc
1964 || !ASN2FF_SHOW_GB_STYLE) {
1965 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
1966 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp,
1967 ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID,
1968 gcp->itemID, gcp->thistype, gcp->new_loc, gcp->extra_loc,
1969 gcp->extra_loc_cnt, temp);
1970 }
1971 /* Look for Prot-ref features, create ImpFeats */
1972 if (sfp != NULL && sfp->product != NULL && ofp->format != GENPEPT_FMT) {
1973 p_bsp = BioseqFindCore(SeqLocId(sfp->product));
1974 if (p_bsp != NULL) /*Bioseq is (or has been) in memory */
1975 {
1976 if (ofp->useSeqMgrIndexes) {
1977 entityID = ObjMgrGetEntityIDForPointer (p_bsp);
1978 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
1979 SeqMgrIndexFeatures (entityID, NULL);
1980 }
1981 psfp = SeqMgrGetBestProteinFeature (p_bsp, NULL);
1982 if (psfp != NULL) {
1983 psfp = SeqMgrGetNextFeature (p_bsp, NULL, 0, 0, &fcontext);
1984 while (psfp != NULL) {
1985 new_sfp = CreateImpFeatFromProt(ofp->format, psfp, sfp, NULL);
1986 if (new_sfp != NULL) {
1987 if (CheckNewSfpLoc(new_sfp, bsp)) {
1988 ofp->List = EnlargeSortList(ofp->List,
1989 ofp->sfpListsize);
1990 ofp->sfpListsize =
1991 StoreFeatFree(ofp->List,new_sfp,
1992 ofp->sfpListsize, bsp, ofp->seg_bsp,
1993 fcontext.entityID, fcontext.itemID,
1994 OBJ_SEQFEAT, NULL, NULL, 0, TRUE);
1995 }
1996 /* if opp->list[index].slp !=NULL I shoud use it*/
1997 }
1998 psfp = SeqMgrGetNextFeature (p_bsp, psfp, 0, 0, &fcontext);
1999 }
2000 }
2001 return TRUE;
2002 }
2003 opp = (OrganizeProtPtr) MemNew(sizeof(OrganizeProt));
2004 opp->size = 0;
2005 sep = SeqEntryNew();
2006 sep->choice = 1;
2007 sep->data.ptrvalue = p_bsp;
2008 MemSet ((Pointer) (&gs), 0, sizeof (GatherScope));
2009 gs.get_feats_location = TRUE;
2010 gs.target = sfp->product;
2011 GatherSeqEntry(sep, opp, get_prot_feats, &gs);
2012 for (index=0; index < opp->size; index++) {
2013 if ((psfp = opp->list[index].sfp) == NULL) {
2014 continue;
2015 }
2016 new_sfp = CreateImpFeatFromProt(ofp->format, psfp, sfp,
2017 opp->list[index].slp);
2018 if (new_sfp != NULL) {
2019 if (CheckNewSfpLoc(new_sfp, bsp)) {
2020 ofp->List = EnlargeSortList(ofp->List,
2021 ofp->sfpListsize);
2022 ofp->sfpListsize =
2023 StoreFeatFree(ofp->List,new_sfp,
2024 ofp->sfpListsize, bsp, ofp->seg_bsp,
2025 opp->list[index].entityID,
2026 opp->list[index].itemID,
2027 opp->list[index].itemtype,
2028 NULL, NULL, 0, TRUE);
2029 }
2030 /* if opp->list[index].slp !=NULL I shoud use it*/
2031 }
2032 }
2033 if(opp->list != NULL && opp->list->nsp != NULL)
2034 NoteStructFree(opp->list->nsp);
2035 MemFree(opp->list);
2036 MemFree(opp);
2037 MemFree(sep);
2038 }
2039 }
2040 break;
2041 case SEQFEAT_RNA:
2042 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
2043 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp,
2044 ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID,
2045 gcp->itemID, gcp->thistype, gcp->new_loc, gcp->extra_loc,
2046 gcp->extra_loc_cnt, temp);
2047 break;
2048 case SEQFEAT_RSITE:
2049 /* do not gather this type*/
2050 break;
2051 default:
2052 /* If none of the above is true, execute generic. */
2053 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
2054 ofp->sfpListsize = StoreFeatTemp(ofp->List, sfp,
2055 ofp->sfpListsize, bsp, ofp->seg_bsp, gcp->entityID,
2056 gcp->itemID, gcp->thistype, gcp->new_loc, NULL, 0, temp);
2057 break;
2058 }
2059 return TRUE;
2060 }
2061
2062 static Boolean is_embl(GBEntryPtr gbp)
2063 {
2064 CharPtr prefix = EMBL_AC;
2065 static CharPtr embl_accpref[EMBL_PREFNUM] = {"AJ", "AL", "AM", "AN", "AX"};
2066 Boolean retval = FALSE;
2067 Int2 i;
2068
2069 if (gbp == NULL || gbp->accession == NULL)
2070 return FALSE;
2071 if (IS_DIGIT(gbp->accession[1]) &&
2072 StringChr(prefix, gbp->accession[0]) != NULL) {
2073 retval = TRUE;
2074 } else {
2075 for (i = 0; i < EMBL_PREFNUM; i++) {
2076 if (StringNCmp(gbp->accession, embl_accpref[i], 2) == 0) {
2077 retval = TRUE;
2078 }
2079 }
2080 }
2081 return retval;
2082 }
2083
2084 /****************************************************************************
2085 * void GetGeneRefInfo (GeneStructPtr gsp, NoteStructPtr nsp, GeneRefPtr grp)
2086 *
2087 * gsp: GeneStructPtr containing gene information
2088 * grp: GeneRefPtr from a sfp of type gene or a sfp xref.
2089 *
2090 * If fields are empty on the gsp, and the relevant information
2091 * is given by the grp, that field is filled on the gsp
2092 ****************************************************************************/
2093
2094 static void GeneRefInfoToGsp (GeneStructPtr gsp, GeneRefPtr grp, SeqFeatPtr sfp)
2095
2096 {
2097 ValNodePtr syn, vsyn = NULL;
2098
2099 if (grp == NULL) {
2100 return;
2101 }
2102 syn=grp->syn;
2103 if (grp->locus != NULL) {
2104 if (gsp->gene != NULL &&
2105 StringCmp(gsp->gene->data.ptrvalue, grp->locus) != 0) {
2106 if (syn != NULL) {
2107 vsyn = ValNodeCopyStr(&(vsyn), 1, syn->data.ptrvalue);
2108 gsp->gene->next=vsyn;
2109 }
2110 return;
2111 }
2112 if (gsp->gene == NULL) {
2113 gsp->gene = ValNodeCopyStr(&(gsp->gene), 0, grp->locus);
2114 }
2115 } else if (grp->desc != NULL) {
2116 gsp->gene = ValNodeCopyStr(&(gsp->gene), 0, grp->desc);
2117 }
2118 if (syn != NULL) {
2119 vsyn = ValNodeCopyStr(&(vsyn), 1, syn->data.ptrvalue);
2120 if (gsp->gene == NULL) {
2121 gsp->gene = vsyn;
2122 } else {
2123 gsp->gene->next=vsyn;
2124 }
2125 }
2126 if (gsp->map[0] == NULL && grp->maploc)
2127 gsp->map[0] = grp->maploc;
2128 if (grp->pseudo) {
2129 gsp->pseudo = TRUE;
2130 } else if (sfp != NULL && sfp->pseudo) {
2131 gsp->pseudo = TRUE;
2132 } else {
2133 gsp->pseudo = FALSE;
2134 }
2135
2136 gsp->grp = AsnIoMemCopy((GeneRefPtr)grp, (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite);
2137 return;
2138 }
2139
2140 /*****************************************************************************
2141 * gather genes for particular CDS (for GenPept an d EmblPept
2142 *
2143 ******************************************************************************/
2144 static OrganizeFeatPtr GetGeneListForCds(Uint2 entityID, BioseqPtr bsp)
2145 {
2146 OrganizeFeatPtr ofp=NULL;
2147 GatherScope gsc;
2148 SeqLocPtr slp;
2149
2150 ofp = CreateOrganizeFeat();
2151 MemSet ((Pointer) (&gsc), 0, sizeof (GatherScope));
2152 MemSet ((Pointer) (gsc.ignore), (int)(TRUE),
2153 (size_t) (OBJ_MAX * sizeof(Boolean)));
2154 gsc.ignore[OBJ_SEQANNOT] = FALSE;
2155 gsc.ignore[OBJ_SEQFEAT] = FALSE;
2156 gsc.get_feats_location = TRUE;
2157 gsc.seglevels = 1;
2158 if (bsp != NULL) {
2159 slp = ValNodeNew(NULL);
2160 slp->choice = SEQLOC_WHOLE;
2161 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
2162 gsc.target = slp;
2163 } else {
2164 gsc.target = NULL;
2165 }
2166 ofp->bsp = bsp;
2167 ofp->seg_bsp = NULL;
2168
2169 GatherEntity(entityID, ofp, get_genes, &gsc);
2170
2171 if((slp = gsc.target) != NULL) {
2172 SeqIdFree(slp->data.ptrvalue);
2173 ValNodeFree(slp);
2174 }
2175
2176 return ofp;
2177 }
2178
2179 /***************************************************************************
2180 * This function assigns genes to the given sfp by comparing locations.
2181 * of sfp with the location from the list of genes using SeqLocAinB
2182 * (sfp have to be in the gene).
2183 * If SeqLocAinB returns "0", there is an exact match and this gene
2184 * is judged "the best" match to the sfp.`
2185 * If SeqLocAinB returns the diff > 0, slp is contained within
2186 * gene_loc and the difference between the two features is measured.
2187 * The lowest difference gives the "best_gene"
2188 * and that information is stored in the GeneStructPtr (gsp) by
2189 * GeneRefInfoToGsp.
2190 ***************************************************************************/
2191
2192 NLM_EXTERN void MatchNAGeneToFeat (Boolean non_strict, OrganizeFeatPtr ofp, SortStructPtr p)
2193 {
2194
2195 Boolean bind_to_feat=FALSE;
2196 NoteStructPtr nsp; /* UNUSED */
2197 GeneStructPtr gsp;
2198 GeneRefPtr grp=NULL;
2199 ImpFeatPtr ifp;
2200 Int4 best_gene = -1, index;
2201 Int4 diff_lowest, diff_current;
2202 SeqFeatPtr gene = NULL, best_gene_feat = NULL, sfp;
2203 Uint1 sg, sf;
2204
2205 if (p == NULL)
2206 return;
2207 if ((sfp = p->sfp) == NULL)
2208 return;
2209 if (sfp->data.choice == SEQFEAT_BIOSRC || sfp->data.choice == SEQFEAT_GENE)
2210 return;
2211 if (sfp->data.choice == SEQFEAT_IMP) {
2212 ifp = sfp->data.value.ptrvalue;
2213 if (ifp && StringCmp(ifp->key, "repeat_region") == 0) {
2214 return;
2215 }
2216 }
2217 if (non_strict == FALSE) { /* binding is limited to RNA and CDS */
2218 if (sfp->data.choice == SEQFEAT_CDREGION ||
2219 sfp->data.choice == SEQFEAT_RNA) {
2220 bind_to_feat = TRUE;
2221 } else if (sfp->data.choice == SEQFEAT_IMP) {
2222 ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
2223 if (StringCmp(ifp->key, "CDS") == 0)
2224 bind_to_feat = TRUE;
2225 }
2226 } else {
2227 bind_to_feat = TRUE;
2228 }
2229 if (bind_to_feat == FALSE)
2230 return;
2231
2232 best_gene_feat = SeqMgrGetOverlappingGene (sfp->location, NULL);
2233 if (best_gene_feat != NULL) {
2234 grp = best_gene_feat->data.value.ptrvalue;
2235 if (grp != NULL) {
2236 gsp = p->gsp;
2237 /* GetDBXrefFromGene(grp, sfp);*/
2238 /******** gsp->grp = AsnIoMemCopy(grp,
2239 (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite); ****/ /*** it is redone in GeneRefInfoToGsp (EY) */
2240 GeneRefInfoToGsp(gsp, grp, best_gene_feat); /*copy GeRefInfo to GeneStruct */
2241 GetGeneQuals(sfp, gsp); /* copy quals info to GenStruct */
2242 return;
2243 }
2244 }
2245 nsp = p->nsp;
2246 gsp = p->gsp;
2247 diff_lowest = -1;
2248 p = ofp->Genelist;
2249 for (index=0; index < ofp->sfpGenesize; index++, p++) {
2250 if ((gene = p->sfp) == NULL) {
2251 continue;
2252 }
2253 sg = SeqLocStrand(gene->location);
2254 sf = SeqLocStrand(sfp->location);
2255 if (sf == sg ||
2256 (sg == Seq_strand_unknown && sf != Seq_strand_minus) ||
2257 (sf == Seq_strand_unknown && sg != Seq_strand_minus)) {
2258 diff_current = SeqLocAinB(sfp->location, gene->location);
2259 } else {
2260 continue;
2261 }
2262 if (! diff_current) /* perfect match */ {
2263 best_gene = index;
2264 best_gene_feat = gene;
2265 break;
2266 } else if (diff_current > 0) {
2267 if ((diff_lowest == -1) || (diff_current < diff_lowest)) {
2268 diff_lowest = diff_current;
2269 best_gene = index;
2270 best_gene_feat = gene;
2271 }
2272 }
2273
2274 }
2275 if (best_gene == -1) { /*no gene found that completely contains CDS*/
2276 return;
2277 } else {
2278 if (best_gene_feat != NULL) {
2279 grp = best_gene_feat->data.value.ptrvalue;
2280 }
2281 }
2282 /* GetDBXrefFromGene(grp, sfp);*/
2283 gsp->grp = AsnIoMemCopy(grp,
2284 (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite);
2285 GeneRefInfoToGsp(gsp, grp, best_gene_feat); /*copy GeRefInfo to GeneStruct */
2286 GetGeneQuals(sfp, gsp); /* copy quals info to GenStruct */
2287
2288 return;
2289 }
2290
2291 static Boolean CheckCdregionGeneXref (SortStructPtr p, Uint1 format)
2292 {
2293 SeqFeatPtr sfp;
2294 GeneRefPtr grp;
2295 GeneStructPtr gsp;
2296 NoteStructPtr nsp;
2297 ProtRefPtr prp;
2298 SeqFeatXrefPtr xrp;
2299 Boolean retval = FALSE;
2300
2301 if (p == NULL) {
2302 return retval;
2303 }
2304 if ((sfp = p->sfp) == NULL)
2305 return retval;
2306 gsp = p->gsp;
2307 nsp = p->nsp;
2308 for (xrp=sfp->xref; xrp; xrp=xrp->next) {
2309 if (xrp->data.choice == SEQFEAT_GENE) {
2310 retval = TRUE;
2311 grp = (GeneRefPtr) xrp->data.value.ptrvalue;
2312 GeneRefInfoToGsp(gsp, grp, sfp);
2313 } else if (xrp->data.choice == SEQFEAT_PROT) {
2314 prp = (ProtRefPtr) xrp->data.value.ptrvalue;
2315 GetProtRefInfo(format, gsp, nsp, prp);
2316 }
2317 }
2318
2319 return retval;
2320 }
2321
2322
2323 NLM_EXTERN Boolean GetGeneQuals(SeqFeatPtr sfp_in, GeneStructPtr gsp)
2324 {
2325 Boolean has_gene = FALSE;
2326 GBQualPtr qual1;
2327
2328 for (qual1=sfp_in->qual; qual1; qual1=qual1->next) {
2329 if (StringCmp(qual1->qual, "gene") == 0) {
2330 has_gene = TRUE;
2331 if (gsp->gene == NULL) {
2332 gsp->gene = ValNodeCopyStr(&(gsp->gene), 0, qual1->val);
2333 }
2334 } else if (StringCmp(qual1->qual, "product") == 0) {
2335 if (gsp->product)
2336 ValNodeCopyStr(&(gsp->product), 0, qual1->val);
2337 else
2338 gsp->product = ValNodeCopyStr(&(gsp->product), 0, qual1->val);
2339 } else if (StringCmp(qual1->qual, "standard_name") == 0) {
2340 if (gsp->standard_name)
2341 ValNodeCopyStr(&(gsp->standard_name), 0, qual1->val);
2342 else
2343 gsp->standard_name = ValNodeCopyStr(&(gsp->standard_name), 0, qual1->val);
2344 } else if (gsp->map[0] == NULL
2345 && StringCmp(qual1->qual, "map") == 0) {
2346 gsp->map[0] = qual1->val;
2347 } else if (StringCmp(qual1->qual, "EC_number") == 0) {
2348 if (gsp->ECNum)
2349 ValNodeCopyStr(&(gsp->ECNum), 0, qual1->val);
2350 else
2351 gsp->ECNum = ValNodeCopyStr(&(gsp->ECNum), 0, qual1->val);
2352 }
2353 }
2354
2355 return has_gene;
2356 }
2357
2358 /*****************************************************************************
2359 * see MatchNAGeneToFeat (above) for details
2360 *
2361 ******************************************************************************/
2362 NLM_EXTERN void MatchAAGeneToFeat (OrganizeFeatPtr ofp, SortStructPtr p)
2363 {
2364 GeneRefPtr grp = NULL;
2365 Int4 best_gene = -1, index;
2366 SeqFeatPtr gene = NULL, best_gene_feat = NULL, sfp;
2367 NoteStructPtr nsp; /* UNUSED */
2368 GeneStructPtr gsp;
2369 BioseqPtr bsp;
2370 Int4 diff_lowest, diff_current;
2371 OrganizeFeatPtr gofp;
2372
2373 if (p == NULL)
2374 return;
2375 if ((sfp = p->sfp) == NULL)
2376 return;
2377 if (SeqLocLen(sfp->location) == -1)
2378 return; /*SeqLocLen failed on CDS location */
2379
2380 /* Only look on the CDS!! for genpept. */
2381 if (sfp->data.choice != SEQFEAT_CDREGION) {
2382 return;
2383 }
2384 bsp = BioseqFind(SeqLocId(sfp->location));
2385
2386 best_gene_feat = SeqMgrGetOverlappingGene (sfp->location, NULL);
2387 if (best_gene_feat != NULL) {
2388 grp = best_gene_feat->data.value.ptrvalue;
2389 gsp = p->gsp;
2390 GeneRefInfoToGsp(gsp, grp, best_gene_feat); /*copy GeRefInfo to GeneStruct */
2391 if (bsp && bsp->id->choice == SEQID_OTHER) {
2392 GetDBXrefFromGene(grp, sfp);
2393 }
2394 return;
2395 }
2396
2397 gofp = GetGeneListForCds(p->entityID, bsp);
2398 if (gofp == NULL) {
2399 return;
2400 }
2401 nsp = p->nsp;
2402 gsp = p->gsp;
2403 if (bsp == NULL && gofp->sfpGenesize == 1) {
2404 gene = gofp->Genelist->sfp;
2405 if (gene != NULL) {
2406 grp = gene->data.value.ptrvalue;
2407 GeneRefInfoToGsp(gsp, grp, gene); /*copy GeRefInfo to GeneStruct */
2408 }
2409 MemFree(gofp->Genelist);
2410 MemFree(gofp);
2411 return; /* first and best gene */
2412 }
2413 diff_lowest = -1;
2414 for (p=gofp->Genelist, index=0; index < gofp->sfpGenesize; p++, index++) {
2415 if ((gene = p->sfp) == NULL) {
2416 continue;
2417 }
2418 diff_current = SeqLocAinB(sfp->location, gene->location);
2419 if (! diff_current) /* perfect match */ {
2420 best_gene = index;
2421 best_gene_feat = gene;
2422 break;
2423 } else if (diff_current > 0) {
2424 if ((diff_lowest == -1) || (diff_current < diff_lowest)) {
2425 diff_lowest = diff_current;
2426 best_gene = index;
2427 best_gene_feat = gene;
2428 }
2429 }
2430 }
2431 if (best_gene == -1) {
2432 return;
2433 }
2434 if (best_gene_feat != NULL) {
2435 grp = best_gene_feat->data.value.ptrvalue;
2436 GeneRefInfoToGsp(gsp, grp, best_gene_feat); /*copy GeRefInfo to GeneStruct */
2437 if (bsp && bsp->id->choice == SEQID_OTHER) {
2438 GetDBXrefFromGene(grp, sfp);
2439 }
2440 }
2441 MemFree(gofp->Genelist);
2442 MemFree(gofp);
2443 return;
2444 }
2445
2446 /*****************************************************************************
2447 * do sorting and gene mapping within one entity (that should be locked)
2448 *
2449 ******************************************************************************/
2450 NLM_EXTERN void SortOrganizeFeat(OrganizeFeatPtr ofp)
2451 {
2452 SortStructPtr p;
2453 GeneStructPtr gsp;
2454 Int4 index;
2455 SeqFeatPtr sfp;
2456
2457 if (ofp == NULL)
2458 return;
2459 if (ofp->sfpListsize > 0 && ofp->sortListsize < ofp->sfpListsize) {
2460 HeapSort((VoidPtr) (ofp->List + ofp->sortListsize),
2461 (size_t) (ofp->sfpListsize - ofp->sortListsize),
2462 sizeof(SortStruct), CompareSfpForHeap);
2463 UniqueFeat(ofp->List + ofp->sortListsize,
2464 ofp->sfpListsize - ofp->sortListsize);
2465 }
2466 if (ofp->sfpCommsize > 0 && ofp->sortCommsize < ofp->sfpCommsize)
2467 HeapSort((VoidPtr) (ofp->Commlist + ofp->sortCommsize),
2468 (size_t) (ofp->sfpCommsize - ofp->sortCommsize),
2469 sizeof(SortStruct), CompareSfpForHeap);
2470 if (ofp->sfpGenesize > 0 && ofp->sortGenesize < ofp->sfpGenesize)
2471 HeapSort((VoidPtr) (ofp->Genelist + ofp->sortGenesize),
2472 (size_t) (ofp->sfpGenesize - ofp->sortGenesize),
2473 sizeof(SortStruct), CompareSfpForHeap);
2474 if (ofp->sfpOrgsize > 0 && ofp->sortOrgsize < ofp->sfpOrgsize)
2475 HeapSort((VoidPtr) (ofp->Orglist + ofp->sortOrgsize),
2476 (size_t) (ofp->sfpOrgsize - ofp->sortOrgsize),
2477 sizeof(SortStruct), CompareSfpForHeap);
2478 if (ofp->sfpSitesize > 0 && ofp->sortSitesize < ofp->sfpSitesize)
2479 HeapSort((VoidPtr) (ofp->Siteslist + ofp->sortSitesize),
2480 (size_t) (ofp->sfpSitesize - ofp->sortSitesize),
2481 sizeof(SortStruct), CompareSfpForHeap);
2482 if (ofp->sfpSourcesize > 0 && ofp->sortSourcesize < ofp->sfpSourcesize)
2483 HeapSort((VoidPtr) (ofp->Sourcelist + ofp->sortSourcesize),
2484 (size_t) (ofp->sfpSourcesize - ofp->sortSourcesize),
2485 sizeof(SortStruct),
2486 CompareSfpForHeap);
2487 ofp->sortListsize = ofp->sfpListsize;
2488 ofp->sortCommsize = ofp->sfpCommsize;
2489 ofp->sortGenesize = ofp->sfpGenesize;
2490 ofp->sortOrgsize = ofp->sfpOrgsize;
2491 ofp->sortSitesize = ofp->sfpSitesize;
2492 ofp->sortSourcesize = ofp->sfpSourcesize;
2493
2494 p = ofp->List;
2495 for (index=0; index < ofp->sfpListsize; index++, p++) {
2496 sfp = p->sfp;
2497 if (p == NULL)
2498 continue;
2499 gsp = GeneStructNew();
2500 p->gsp = gsp;
2501 if (CheckCdregionGeneXref(p, ofp->format) == FALSE) {
2502 if (ofp->format == EMBLPEPT_FMT || ofp->format == GENPEPT_FMT) {
2503 MatchAAGeneToFeat(ofp, p);
2504 } else {
2505 MatchNAGeneToFeat(ofp->non_strict, ofp, p);
2506 }
2507 }
2508 GetGeneQuals(sfp, gsp);
2509 }
2510 /* if (ofp->sfpListsize > 0) {
2511 BioseqUnlock(ofp->lock_bsp);
2512 }
2513 */
2514 BioseqUnlock(ofp->lock_bsp);
2515 return;
2516 }
2517
2518 /*
2519 static Boolean is_mRNA_set(SeqEntryPtr sep, BioseqPtr bsp)
2520 {
2521 BioseqSetPtr bssp;
2522 ValNodePtr vnp;
2523 MolInfoPtr mip = NULL;
2524
2525 if (sep == NULL) {
2526 return FALSE;
2527 }
2528 if (IS_Bioseq(sep)) {
2529 return FALSE;
2530 }
2531 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2532 if (bssp->_class != BioseqseqSet_class_gen_prod_set) {
2533 return FALSE;
2534 }
2535 for (vnp = bsp->descr; vnp; vnp=vnp->next) {
2536 if (vnp->choice == Seq_descr_molinfo) {
2537 mip = (MolInfoPtr) vnp->data.ptrvalue;
2538 break;
2539 }
2540 }
2541 if (mip == NULL) {
2542 return FALSE;
2543 }
2544 if (mip->biomol != 3) {
2545 return FALSE;
2546 }
2547 return TRUE;
2548 }
2549 */
2550
2551 /*****************************************************************************
2552 * Gather all features in one GBEntry and process within
2553 * each entity (in callback)
2554 *
2555 ******************************************************************************/
2556 NLM_EXTERN void OrganizeSeqFeat(Asn2ffJobPtr ajp, GBEntryPtr gbp)
2557 {
2558 BioseqPtr bsp;
2559 GatherScope gsc;
2560 ValNodePtr slp = NULL;
2561 OrganizeFeatPtr ofp;
2562 SeqFeatPtr mrna, gene = NULL, newg;
2563 GeneRefPtr grp;
2564 SeqMgrFeatContext fcontext;
2565 SeqIdPtr sip;
2566
2567 ofp = CreateOrganizeFeat();
2568 ofp->lock_bsp = NULL;
2569 if ((bsp = gbp->bsp) == NULL)
2570 return;
2571 ofp->embl_feat = is_embl(gbp);
2572 MemSet ((Pointer) (&gsc), 0, sizeof (GatherScope));
2573 MemSet ((Pointer) (gsc.ignore), (int)(TRUE),
2574 (size_t) (OBJ_MAX * sizeof(Boolean)));
2575 gsc.ignore[OBJ_SEQANNOT] = FALSE;
2576 gsc.ignore[OBJ_SEQFEAT] = FALSE;
2577 gsc.get_feats_location = TRUE;
2578 if (ajp->ignore_top)
2579 gsc.ignore_top = TRUE;
2580 gsc.seglevels = 1;
2581 if (ajp->format == GENPEPT_FMT) {
2582 gsc.get_feats_product = TRUE;
2583 }
2584 gsc.seglevels = 0;
2585 if (ajp->slp != NULL) {
2586 gsc.target = ajp->slp;
2587 gsc.convert_loc = TRUE;
2588 gsc.newid = bsp->id;
2589 } else {
2590 slp = ValNodeNew(NULL);
2591 slp->choice = SEQLOC_WHOLE;
2592 slp->data.ptrvalue = (SeqIdPtr) SeqIdDup (SeqIdFindBest (bsp->id, 0));
2593 gsc.target = slp;
2594 if (ajp->only_one) {
2595 gsc.convert_loc = TRUE;
2596 gsc.newid = bsp->id;
2597 }
2598 }
2599 ofp->bsp = bsp;
2600 ofp->showSeqLoc = (ajp->slp) ? TRUE : FALSE;
2601 ofp->useSeqMgrIndexes = ajp->useSeqMgrIndexes;
2602 ofp->seg_bsp = ajp->asn2ffwep->seg;
2603 ofp->format = ajp->format;
2604 ofp->non_strict = ajp->non_strict;
2605 for (sip=bsp->id; sip; sip=sip->next) { /* non_strict binding for REFSEQ*/
2606 if (sip->choice == SEQID_OTHER) {
2607 ofp->non_strict = FALSE;
2608 }
2609 }
2610 ofp->show_gene = ajp->show_gene;
2611 if (ajp->format == GENPEPT_FMT && bsp->repr == Seq_repr_raw && ajp->useSeqMgrIndexes) {
2612 gsc.useSeqMgrIndexes = TRUE;
2613 }
2614 GatherEntity(ajp->entityID, ofp, get_feats, &gsc);
2615 if (slp) {
2616 SeqLocFree(slp);
2617 }
2618 ofp->lock_bsp = ofp->bsp;
2619 BioseqLock(ofp->lock_bsp);
2620
2621 if (/* is_mRNA_set(ajp->sep, bsp) && */ ajp->useSeqMgrIndexes) {
2622 mrna = SeqMgrGetRNAgivenProduct(bsp, NULL);
2623 if (mrna) {
2624 gene = SeqMgrGetOverlappingGene(mrna->location, &fcontext);
2625 }
2626 if (gene) {
2627 grp = (GeneRefPtr) gene->data.value.ptrvalue;
2628 newg = SeqFeatNew();
2629 newg->data.choice = SEQFEAT_GENE;
2630 newg->location = SeqLocIntNew(0, bsp->length-1,
2631 SeqLocStrand(gene->location), SeqIdDup(bsp->id));
2632 newg->data.value.ptrvalue = AsnIoMemCopy(grp,
2633 (AsnReadFunc) GeneRefAsnRead, (AsnWriteFunc) GeneRefAsnWrite);
2634 ofp->Genelist = EnlargeSortList(ofp->Genelist,ofp->sfpGenesize);
2635 ofp->sfpGenesize = StoreFeat(ofp->Genelist, newg,
2636 ofp->sfpGenesize, bsp, ofp->seg_bsp, fcontext.entityID, fcontext.itemID, OBJ_SEQFEAT, NULL, NULL, 0);
2637 ofp->List = EnlargeSortList(ofp->List, ofp->sfpListsize);
2638 ofp->sfpListsize = StoreFeat(ofp->List, newg,
2639 ofp->sfpListsize, bsp, ofp->seg_bsp, fcontext.entityID, fcontext.itemID, OBJ_SEQFEAT, NULL, NULL, 0);
2640 }
2641 }
2642
2643 SortOrganizeFeat(ofp); /* for the last entity */
2644 UniqueGeneName(ajp->error_msgs, ofp);
2645 ofp->source_notes = NoteStructNew(ofp->source_notes);
2646 gbp->feat = ofp;
2647
2648 return;
2649 }
2650
2651 /*****************************************************************************
2652 * Gather all features
2653 *
2654 ******************************************************************************/
2655 NLM_EXTERN void GetSeqFeat(Asn2ffJobPtr ajp)
2656 {
2657 GBEntryPtr gbp;
2658
2659 for (gbp = ajp->asn2ffwep->gbp; gbp; gbp = gbp->next) {
2660 if (gbp->locus) {
2661 flat2asn_delete_locus_user_string();
2662 flat2asn_install_locus_user_string(gbp->locus);
2663 }
2664 if (gbp->accession) {
2665 flat2asn_delete_accession_user_string();
2666 flat2asn_install_accession_user_string(gbp->accession);
2667 }
2668 OrganizeSeqFeat(ajp, gbp);
2669 }
2670
2671 }
2672
2673
2674
2675 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |