|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/asn2gnb1.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2gnb1.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb1.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 *
32 * Version Creation Date: 10/21/98
33 *
34 * $Revision: 1.185 $
35 *
36 * File Description: New GenBank flatfile generator - work in progress
37 *
38 * Modifications:
39 * --------------------------------------------------------------------------
40 * ==========================================================================
41 */
42
43 #include <ncbi.h>
44 #include <objall.h>
45 #include <objsset.h>
46 #include <objsub.h>
47 #include <objfdef.h>
48 #include <objpubme.h>
49 #include <seqport.h>
50 #include <sequtil.h>
51 #include <sqnutils.h>
52 #include <subutil.h>
53 #include <tofasta.h>
54 #include <explore.h>
55 #include <gbfeat.h>
56 #include <gbftdef.h>
57 #include <edutil.h>
58 #include <alignmgr2.h>
59 #include <asn2gnbi.h>
60
61 #ifdef WIN_MAC
62 #if __profile__
63 #include <Profiler.h>
64 #endif
65 #endif
66
67 static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP);
68
69 /* utility functions */
70
71 NLM_EXTERN ValNodePtr ValNodeCopyStrToHead (ValNodePtr PNTR head, Int2 choice, CharPtr str)
72
73 {
74 ValNodePtr newnode;
75
76 if (head == NULL || str == NULL) return NULL;
77
78 newnode = ValNodeNew (NULL);
79 if (newnode == NULL) return NULL;
80
81 newnode->choice = (Uint1) choice;
82 newnode->data.ptrvalue = StringSave (str);
83
84 newnode->next = *head;
85 *head = newnode;
86
87 return newnode;
88 }
89
90 /* the val node strings mechanism will be replaced by a more efficient method later */
91
92 NLM_EXTERN CharPtr MergeFFValNodeStrs (
93 ValNodePtr list
94 )
95
96 {
97 size_t len;
98 CharPtr ptr;
99 CharPtr str;
100 CharPtr tmp;
101 ValNodePtr vnp;
102
103
104 if (list == NULL) return NULL;
105
106 for (vnp = list, len = 0; vnp != NULL; vnp = vnp->next) {
107 str = (CharPtr) vnp->data.ptrvalue;
108 len += StringLen (str);
109 }
110 if (len == 0) return NULL;
111
112 ptr = MemNew (sizeof (Char) * (len + 2));
113 if (ptr == NULL) return NULL;
114
115 for (vnp = list, tmp = ptr; vnp != NULL; vnp = vnp->next) {
116 str = (CharPtr) vnp->data.ptrvalue;
117 tmp = StringMove (tmp, str);
118 }
119
120 return ptr;
121 }
122
123
124 NLM_EXTERN void AddValNodeString (
125 ValNodePtr PNTR head,
126 CharPtr prefix,
127 CharPtr string,
128 CharPtr suffix
129 )
130
131 {
132 Char buf [256];
133 CharPtr freeme = NULL;
134 size_t len;
135 CharPtr newstr;
136 CharPtr strptr;
137
138 len = StringLen (prefix) + StringLen (string) + StringLen (suffix);
139 if (len == 0) return;
140
141 if (len < sizeof (buf)) {
142
143 /* if new string fits in stack buffer, no need to allocate */
144
145 MemSet ((Pointer) buf, 0, sizeof (buf));
146 newstr = buf;
147
148 } else {
149
150 /* new string bigger than stack buffer, so allocate sufficient string */
151
152 newstr = (CharPtr) MemNew (sizeof (Char) * (len + 2));
153 if (newstr == NULL) return;
154
155 /* allocated string will be freed at end of function */
156
157 freeme = newstr;
158 }
159
160 strptr = newstr;
161
162 if (prefix != NULL) {
163 strptr = StringMove (strptr, prefix);
164 }
165
166 if (string != NULL) {
167 strptr = StringMove (strptr, string);
168 }
169
170 if (suffix != NULL) {
171 strptr = StringMove (strptr, suffix);
172 }
173
174 /* currently just makes a valnode list, to be enhanced later */
175
176 ValNodeCopyStr (head, 0, newstr);
177
178 /* if large string was allocated, free it now */
179
180 if (freeme != NULL) {
181 MemFree (freeme);
182 }
183 }
184
185
186 NLM_EXTERN void FFAddString_NoRedund (
187 StringItemPtr unique,
188 CharPtr prefix,
189 CharPtr string,
190 CharPtr suffix,
191 Boolean convertQuotes
192 )
193 {
194 CharPtr str = string;
195 Int4 foundPos = 0;
196 Boolean wholeWord = FALSE;
197
198 if ( StringHasNoText(prefix) &&
199 StringHasNoText(string) &&
200 StringHasNoText(suffix) ) return;
201
202 if (StringNICmp (string, "tRNA-", 5) == 0) {
203 str = string+5;
204 }
205
206 while ( foundPos >= 0 && !wholeWord ) {
207 foundPos = FFStringSearch(unique, str, foundPos);
208 if ( foundPos >= 0 ) {
209 wholeWord = IsWholeWordSubstr(unique, foundPos, str);
210 foundPos += StringLen(str);
211 }
212 }
213
214 if ( foundPos < 0 || !wholeWord ) {
215 FFAddTextToString(unique, prefix, string, suffix, FALSE, convertQuotes, TILDE_IGNORE);
216 }
217 }
218
219
220
221 /* s_AddPeriodToEnd () -- Adds a '.' to the end of a given string if */
222 /* there is not already one there. */
223 /* */
224 /* Note that this adds one character to the */
225 /* length of the string, leading to a */
226 /* memory overrun if space was not previously */
227 /* allocated for this. */
228
229 NLM_EXTERN void s_AddPeriodToEnd (CharPtr someString)
230 {
231 Int4 len;
232
233 if (StringHasNoText (someString)) return;
234 len = StringLen (someString);
235 if (len < 1) return;
236 if (someString[len-1] != '.')
237 {
238 someString[len] = '.';
239 someString[len+1] = '\0';
240 }
241 }
242
243 /* s_RemovePeriodFromEnd () -- If the last character in a given */
244 /* string is a '.', removes it. */
245
246 NLM_EXTERN Boolean s_RemovePeriodFromEnd (CharPtr someString)
247 {
248 Int4 len;
249
250 if (StringHasNoText (someString)) return FALSE;
251 len = StringLen (someString);
252 if (len < 1) return FALSE;
253 if (someString[len-1] == '.') {
254 someString[len-1] = '\0';
255 return TRUE;
256 }
257 return FALSE;
258 }
259
260 /**/
261 /* isEllipsis () - Determines if a string ends in an ellipses */
262 /**/
263
264 NLM_EXTERN Boolean IsEllipsis (
265 CharPtr str
266 )
267
268 {
269 size_t len;
270 CharPtr ptr;
271
272 if (StringHasNoText (str)) return FALSE;
273 len = StringLen (str);
274 if (len < 3) return FALSE;
275 ptr = str + len - 3;
276 return (Boolean) (ptr [0] == '.' && ptr [1] == '.' && ptr [2] == '.');
277 }
278
279 NLM_EXTERN void A2GBSeqLocReplaceID (
280 SeqLocPtr newloc,
281 SeqLocPtr ajpslp
282 )
283
284 {
285 BioseqPtr bsp;
286 SeqIdPtr sip;
287
288 bsp = BioseqFindFromSeqLoc (ajpslp);
289 if (bsp == NULL) return;
290 sip = SeqIdFindBest (bsp->id, 0);
291 SeqLocReplaceID (newloc, sip);
292 }
293
294 NLM_EXTERN CharPtr asn2gb_PrintDate (
295 DatePtr dp
296 )
297
298 {
299 Char buf [30];
300 size_t len;
301
302 if (dp == NULL) return NULL;
303
304 if (DatePrint (dp, buf)) {
305 if (StringICmp (buf, "Not given") != 0) {
306 len = StringLen (buf);
307 if (len > 0) {
308 if (buf [len - 1] == '\n') {
309 if (buf [len - 2] == '.') {
310 buf [len - 2] = '\0';
311 } else {
312 buf [len - 1] = '\0';
313 }
314 }
315 }
316 return StringSave (buf);
317 }
318 }
319
320 return NULL;
321 }
322
323 static CharPtr month_names [] = {
324 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
325 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
326 "??"
327 };
328
329 NLM_EXTERN CharPtr DateToFF (
330 CharPtr buf,
331 DatePtr dp,
332 Boolean citSub
333 )
334
335 {
336 Int2 day;
337 Int2 month;
338 Int2 year;
339
340 if (buf != NULL) {
341 *buf = '\0';
342 }
343 if (dp == NULL) return NULL;
344
345 if (dp->data [0] == 0) {
346
347 StringCpy (buf, dp->str);
348
349 } else if (dp->data [0] == 1) {
350
351 year = 1900 + (Int2) dp->data [1];
352 month = (Int2) dp->data [2];
353 day = (Int2) dp->data [3];
354
355 if (citSub) {
356 if (month < 1 || month > 12) {
357 month = 13;
358 }
359 if (day < 1 || day > 31) {
360 day = 0;
361 }
362 } else {
363 if (month < 1 || month > 12) {
364 month = 1;
365 }
366 if (day < 1 || day > 31) {
367 day = 1;
368 }
369 }
370
371 if (day < 1) {
372 sprintf (buf, "\?\?-%s-%ld",
373 month_names [month-1], (long) year);
374 } else if (day < 10) {
375 sprintf (buf, "0%ld-%s-%ld",
376 (long) day, month_names [month-1], (long) year);
377 } else {
378 sprintf(buf, "%ld-%s-%ld",
379 (long) day, month_names [month-1], (long) year);
380 }
381 }
382
383 return buf;
384 }
385
386
387 NLM_EXTERN StringItemPtr FFGetString (IntAsn2gbJobPtr ajp)
388
389 {
390 StringItemPtr sip;
391
392 if (ajp == NULL) return NULL;
393 if (ajp->pool != NULL) {
394 sip = ajp->pool;
395 ajp->pool = sip->next;
396 sip->next = NULL;
397 MemSet ((Pointer) sip, 0, sizeof (StringItem));
398 } else {
399 sip = (StringItemPtr) MemNew (sizeof (StringItem));
400 if (sip == NULL) return NULL;
401 }
402 sip->curr = sip;
403 sip->iajp = ajp;
404 sip->pos = 0;
405 return sip;
406 }
407
408 NLM_EXTERN void FFRecycleString (IntAsn2gbJobPtr ajp, StringItemPtr ffstring)
409
410 {
411 StringItemPtr nxt;
412
413 if (ajp == NULL || ffstring == NULL) return;
414 if ( ffstring->pos == -1 ) return;
415
416 nxt = ffstring;
417 nxt->pos = -1;
418 while (nxt->next != NULL) {
419 nxt->pos = -1;
420 nxt = nxt->next;
421 }
422 nxt->next = ajp->pool;
423 ajp->pool = ffstring;
424
425 ffstring->curr = NULL;
426 }
427
428 NLM_EXTERN void FFAddOneChar (
429 StringItemPtr sip,
430 Char ch,
431 Boolean convertQuotes
432 )
433 {
434 StringItemPtr current = sip->curr;
435
436 if ( current->pos == STRING_BUF_LEN ) {
437 current->next = FFGetString(sip->iajp);
438 current = current->next;
439 current->pos = 0;
440 sip->curr = current;
441 }
442
443 if ( convertQuotes && ch == '\"' ) {
444 ch = '\'';
445 }
446 current->buf[current->pos] = ch;
447 current->pos++;
448 }
449
450 NLM_EXTERN void FFAddNewLine(StringItemPtr ffstring) {
451 FFAddOneChar(ffstring, '\n', FALSE);
452 }
453
454 NLM_EXTERN void FFAddNChar (
455 StringItemPtr sip,
456 Char ch,
457 Int4 n,
458 Boolean convertQuotes
459 )
460 {
461 Int4 i;
462
463 for ( i = 0; i < n; ++i ) {
464 FFAddOneChar(sip, ch, convertQuotes);
465 }
466 }
467
468
469 NLM_EXTERN void FFExpandTildes (StringItemPtr sip, CharPtr PNTR cpp) {
470 Char replace = **cpp;
471
472 if ( **cpp == '~' ) {
473 if ( *((*cpp) + 1) == '~' ) { /* "~~" -> '~' */
474 replace = '~';
475 (*cpp)++;
476 } else {
477 replace = '\n';
478 }
479 }
480
481 FFAddOneChar(sip, replace, FALSE);
482 }
483
484
485 NLM_EXTERN void FFSemicolonSeparateTildes (StringItemPtr sip, CharPtr PNTR cpp)
486
487 {
488 Char replace = **cpp;
489
490 if ( **cpp == '~' ) {
491 if ( *((*cpp) + 1) == '~' ) { /* "~~" -> '~' */
492 replace = '~';
493 (*cpp)++;
494 } else {
495 FFAddOneChar(sip, ';', FALSE);
496 replace = '\n';
497 }
498 }
499
500 FFAddOneChar(sip, replace, FALSE);
501 }
502
503
504 NLM_EXTERN void FFReplaceTildesWithSpaces (StringItemPtr ffstring, CharPtr PNTR cpp) {
505 Char replace = **cpp, lookahead;
506 CharPtr cptr = *cpp;
507
508 if ( *cptr == '`' ) {
509 FFAddOneChar(ffstring, replace, FALSE);
510 return;
511 }
512
513 replace = ' ';
514 lookahead = *(cptr + 1);
515
516 if ( IS_DIGIT(lookahead) ) {
517 replace = '~';
518 }
519 else {
520 if ( (lookahead == ' ') || (lookahead == '(') ) {
521 if ( IS_DIGIT(*(cptr + 2)) ) {
522 replace = '~';
523 }
524 }
525 }
526
527 FFAddOneChar(ffstring, replace, FALSE);
528 }
529
530 NLM_EXTERN void FFOldExpand (StringItemPtr sip, CharPtr PNTR cpp) {
531 /* "~" -> "\n", "~~" or "~~ ~~" -> "\n\n" */
532 CharPtr cp = *cpp;
533 Char current = *cp;
534 Char next = *(cp + 1);
535
536 /* handle "'~" */
537 if ( current == '`' ) {
538 if ( next != '~' ) {
539 FFAddOneChar(sip, current, FALSE);
540 } else {
541 FFAddOneChar(sip, '~', FALSE);
542 (*cpp)++;
543 }
544 return;
545 }
546
547 /* handle "~", "~~" or "~~ ~~" */
548 FFAddOneChar(sip, '\n', FALSE);
549 if ( next == '~' ) {
550 FFAddOneChar(sip, '\n', FALSE);
551 cp++;
552 *cpp = cp;
553 cp++;
554 if ( *cp == ' ' ) {
555 cp++;
556 if ( *cp == '~' ) {
557 cp++;
558 if ( *cp == '~' ) { /* saw "~~ ~~" */
559 *cpp = cp;
560 }
561 }
562 }
563 }
564 }
565
566 NLM_EXTERN void AddCommentStringWithTildes (StringItemPtr ffstring, CharPtr string)
567 {
568 /* One "~" is a new line, "~~" or "~~ ~~" means 2 returns */
569
570 /* Int2 i; */
571
572 while (*string != '\0') {
573 if (*string == '`' && *(string+1) == '~') {
574 FFAddOneChar(ffstring, '~', FALSE);
575 string += 2;
576 } else if (*string == '~') {
577 FFAddOneChar(ffstring, '\n', FALSE);
578 string++;
579 if (*string == '~') {
580 /*
581 for (i = 0; i < 12; i++) {
582 FFAddOneChar(ffstring, ' ', FALSE);
583 }
584 */
585 FFAddOneChar(ffstring, '\n', FALSE);
586 string++;
587 if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
588 string += 3;
589 }
590 }
591 } else if (*string == '\"') {
592 *string = '\'';
593 FFAddOneChar(ffstring, *string, FALSE);
594 string++;
595 } else {
596 FFAddOneChar(ffstring, *string, FALSE);
597 string++;
598 }
599 }
600 } /* AddCommentStringWithTildes */
601
602
603 NLM_EXTERN void AddStringWithTildes (StringItemPtr ffstring, CharPtr string)
604 {
605 /* One "~" is a new line, "~~" or "~~ ~~" means 2 returns */
606
607 while (*string != '\0') {
608 if (*string == '`' && *(string+1) == '~') {
609 FFAddOneChar(ffstring, '~', FALSE);
610 string += 2;
611 } else if (*string == '~') {
612 FFAddOneChar(ffstring, '\n', FALSE);
613 string++;
614 if (*string == '~') {
615 FFAddOneChar(ffstring, '\n', FALSE);
616 string++;
617 if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
618 string += 3;
619 }
620 }
621 } else if (*string == '\"') {
622 *string = '\'';
623 FFAddOneChar(ffstring, *string, FALSE);
624 string++;
625 } else {
626 FFAddOneChar(ffstring, *string, FALSE);
627 string++;
628 }
629 }
630 } /* AddStringWithTildes */
631
632
633 NLM_EXTERN void FFProcessTildes (StringItemPtr sip, CharPtr PNTR cpp, Int2 tildeAction) {
634
635 switch (tildeAction) {
636
637 case TILDE_EXPAND :
638 FFExpandTildes(sip, cpp);
639 break;
640
641 case TILDE_SEMICOLON :
642 FFSemicolonSeparateTildes(sip, cpp);
643 break;
644
645 case TILDE_OLD_EXPAND :
646 FFOldExpand(sip, cpp);
647 break;
648
649 case TILDE_TO_SPACES :
650 FFReplaceTildesWithSpaces (sip, cpp);
651 break;
652
653 case TILDE_IGNORE:
654 default:
655 FFAddOneChar(sip, **cpp, FALSE);
656 break;
657 }
658 }
659
660 NLM_EXTERN void FFAddPeriod (StringItemPtr sip) {
661 Int4 i;
662 Char ch = '\0';
663 StringItemPtr riter = sip->curr, prev;
664 IntAsn2gbJobPtr ajp;
665
666 if ( sip == NULL ) return;
667 ajp = (IntAsn2gbJobPtr)sip->iajp;
668 if ( ajp == NULL ) return;
669
670 for ( i = riter->pos - 1; i >= 0; --i ) {
671 ch = riter->buf[i];
672
673 if ( (ch == ' ') || (ch == '\t') || (ch == '~') || (ch == '.') || (ch == '\n') || (ch == '\r')) {
674 riter->pos--;
675
676 if ( i < 0 && riter != sip ) {
677 for ( prev = sip; prev->next != NULL; prev = prev->next ) {
678 if ( prev->next == riter ) {
679 i = prev->pos - 1;
680 FFRecycleString(ajp, riter);
681 riter = prev;
682 riter->next = NULL;
683 sip->curr = riter;
684 break;
685 }
686 }
687 }
688
689 } else {
690 break;
691 }
692 }
693
694 if (ch != '.') {
695 FFAddOneChar(sip, '.', FALSE);
696 }
697 }
698
699 NLM_EXTERN void FFAddOneString (
700 StringItemPtr sip,
701 CharPtr string,
702 Boolean addPeriod,
703 Boolean convertQuotes,
704 Int2 tildeAction
705 )
706 {
707 CharPtr strp = string;
708 Char ch;
709 Char prevchar = '\0';
710
711 if ( string == NULL ) return;
712
713 ch = *strp;
714 while ( ch != '\0' ) {
715 if ( (ch == '`') || (ch == '~') ) {
716 if (tildeAction == TILDE_SEMICOLON && prevchar == ';') {
717 FFProcessTildes(sip, &strp, TILDE_EXPAND);
718 } else if (tildeAction == TILDE_SEMICOLON && prevchar == ' ') {
719 FFProcessTildes(sip, &strp, TILDE_EXPAND);
720 } else {
721 FFProcessTildes(sip, &strp, tildeAction);
722 }
723 } else {
724 FFAddOneChar(sip, ch, convertQuotes);
725 }
726 prevchar = ch;
727 strp++;
728 ch = *strp;
729 }
730
731 if ( addPeriod ) {
732 FFAddPeriod(sip);
733 }
734 }
735
736 NLM_EXTERN void FFCatenateSubString (
737 StringItemPtr dest,
738 StringItemPtr start_sip, Int4 start_pos,
739 StringItemPtr end_sip, Int4 end_pos,
740 Uint4 line_max
741 )
742 {
743 Int4 max_i, min_i, i, len = 0;
744 StringItemPtr current;
745 Boolean in_url = FALSE, found_start = FALSE;
746 IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)dest->iajp;
747 Uint4 char_count = 0;
748
749 for ( current = start_sip, i = start_pos;
750 current != NULL;
751 current = current->next ) {
752 if ( current == start_sip ) {
753 min_i = start_pos;
754 } else {
755 min_i = 0;
756 }
757
758 if ( current == end_sip ) {
759 max_i = end_pos;
760 } else {
761 max_i = current->pos;
762 }
763
764 for ( i = min_i; i < max_i; ++i ) {
765
766 /* -----------------------------------------------------------------------
767 * HTML specific processing:
768 * ---------------------------------------------------------------------*/
769 if ( GetWWW(ajp) ) {
770 if ( ! in_url ) {
771 if ( current->buf[i] == '<' ) {
772 /* Watch out! */
773 if (FFIsStartOfLinkEx (current, i, &len)) {
774 FFAddOneChar(dest, '<', FALSE);
775 in_url = TRUE;
776 found_start = TRUE;
777 continue;
778 } else {
779 FFAddOneString(dest, "<", FALSE, FALSE, TILDE_IGNORE);
780 ++char_count;
781 continue;
782 }
783 }
784 if (char_count == line_max) {
785 break;
786 }
787
788 if ( current->buf[i] == '>' ) {
789 /* Obviously *not* a tag terminator */
790 FFAddOneString(dest, ">", FALSE, FALSE, TILDE_IGNORE);
791 ++char_count;
792 continue;
793 }
794
795 /* Common garden variety of character */
796 FFAddOneChar(dest, current->buf[i], FALSE);
797 ++char_count;
798
799 if (found_start && len > 0) {
800 len--;
801 if (len == 0) {
802 FFAddOneChar(dest, '"', FALSE);
803 found_start = FALSE;
804 }
805 }
806 }
807
808 else /* in_url */ {
809 if ( current->buf[i] == '&' ) {
810 /* encode ampersand for XHMLT */
811 FFAddOneString(dest, "&", FALSE, FALSE, TILDE_IGNORE);
812 continue;
813 }
814 if ( current->buf[i] == '>' ) {
815 FFAddOneChar(dest, '>', FALSE);
816 in_url = FALSE;
817 found_start = FALSE;
818 continue;
819 }
820
821 /* nothing inside a link needs any cooking. And neither does it
822 count against the page width limit. */
823 FFAddOneChar(dest, current->buf[i], FALSE);
824 }
825 }
826
827 /* ---------------------------------------------------------------------
828 * TEXT mode processing:
829 * --------------------------------------------------------------------*/
830 else {
831 FFAddOneChar(dest, current->buf[i], FALSE);
832 if (++char_count == line_max) {
833 break;
834 }
835 }
836
837 }
838 if ( current == end_sip || char_count == line_max ) break;
839 }
840 }
841
842 NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip) {
843 Int4 size = 0;
844 StringItemPtr iter;
845 CharPtr result, temp;
846
847 for ( iter = sip; iter != NULL; iter = iter->next ) {
848 size += iter->pos;
849 }
850
851 result = (CharPtr)MemNew(size + 2);
852 temp = result;
853
854 for ( iter = sip; iter != NULL; iter = iter->next ) {
855 MemCpy( temp, iter->buf, iter->pos );
856 temp += iter->pos;
857 }
858
859 *temp = '\0';
860
861 return result;
862 }
863
864
865
866 /* word wrap functions */
867
868 static CharPtr url_anchor_strings [] = {
869 "</A>",
870 "<A HREF=/",
871 "<A HREF=\"/",
872 "<A HREF=FTP://",
873 "<A HREF=MAILTO:",
874 "<A HREF=HTTP://",
875 "<A HREF=HTTPS://",
876 "<A HREF=\"HTTP://",
877 "<A HREF=\"HTTPS://",
878 NULL
879 };
880
881 static TextFsaPtr GetUrlAnchorFSA (void)
882
883 {
884 return (TextFsaPtr) GetAppProperty ("Asn2gbUrlAnchorFSA");
885 }
886
887 static TextFsaPtr InitUrlAnchorFSA (void)
888
889 {
890 TextFsaPtr fsa;
891 Int2 q;
892
893 fsa = GetUrlAnchorFSA ();
894 if (fsa != NULL) return fsa;
895
896 fsa = TextFsaNew ();
897 if (fsa == NULL) return NULL;
898
899 for (q = 0; url_anchor_strings [q] != NULL; q++) {
900 TextFsaAdd (fsa, url_anchor_strings [q]);
901 }
902
903 SetAppProperty ("Asn2gbUrlAnchorFSA", (Pointer) fsa);
904
905 return fsa;
906 }
907
908 static void FreeUrlAnchorFSA (void)
909
910 {
911 TextFsaPtr fsa;
912
913 fsa = GetUrlAnchorFSA ();
914 if (fsa == NULL) return;
915
916 SetAppProperty ("Asn2gbUrlAnchorFSA", NULL);
917 TextFsaFree (fsa);
918 }
919
920 NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip) {
921 StringItemPtr iter = *iterp;
922 Int4 i = *ip;
923
924 while ( (iter != NULL) && (iter->buf[i] != '>') ) {
925 ++i;
926
927 if ( i == iter->pos ) {
928 iter = iter->next;
929 i = 0;
930 }
931 }
932 ++i;
933 if ( iter != NULL && i == iter->pos && iter->next != NULL ) {
934 iter = iter->next;
935 i = 0;
936 }
937
938 *iterp = iter;
939 *ip = i;
940 }
941
942 static Boolean FFIsStartOfLinkEx (StringItemPtr iter, Int4 pos, Int4Ptr lenP)
943
944 {
945 Char ch;
946 TextFsaPtr fsa;
947 Int4 i;
948 ValNodePtr matches;
949 Int4 max_url_len;
950 Int4 state = 0;
951
952 if ( iter == NULL || pos >= iter->pos ) return FALSE;
953 if ( iter->buf [pos] != '<' ) return FALSE;
954
955 fsa = GetUrlAnchorFSA ();
956 if (fsa == NULL) return FALSE;
957
958 if (! TextFsaGetStats (fsa, NULL, NULL, &max_url_len)) return FALSE;
959
960 for (i = 0; i < max_url_len; i++) {
961 ch = iter->buf [pos];
962 ch = TO_UPPER (ch);
963 state = TextFsaNext (fsa, state, ch, &matches);
964 if (matches != NULL) {
965 if (lenP != NULL) {
966 *lenP = i + 1;
967 }
968 return TRUE;
969 }
970
971 pos++;
972 if (pos >= iter->pos) {
973 iter = iter->next;
974 pos = 0;
975 if (iter == NULL) return FALSE;
976 }
977 }
978
979 return FALSE;
980 }
981
982 NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos)
983
984 {
985 return FFIsStartOfLinkEx (iter, pos, NULL);
986 }
987
988 /*
989 NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos) {
990 static CharPtr start_link = "<A HREF=";
991 static CharPtr end_link = "</A>";
992 Int4 start_len = StringLen(start_link);
993 Int4 end_len = StringLen(end_link);
994 Char temp[10];
995 Int4 i;
996
997 if ( iter == NULL || pos >= iter->pos ) return FALSE;
998 if ( iter->buf[pos] != '<' ) return FALSE;
999
1000 MemSet(temp, 0, sizeof(temp));
1001 for ( i = 0; i < start_len && iter != NULL; ++i ) {
1002 if ( pos + i < iter->pos ) {
1003 temp[i] = iter->buf[pos+i];
1004 if ( i == end_len - 1 ) {
1005 if ( StringNICmp(temp, end_link, end_len) == 0 ) {
1006 return TRUE;
1007 }
1008 }
1009 } else {
1010 iter = iter->next;
1011 pos = -i;
1012 --i;
1013 }
1014 }
1015
1016 if ( i == start_len ) {
1017 if ( StringNICmp(temp, start_link, start_len) == 0 ) {
1018 return TRUE;
1019 }
1020 }
1021
1022 return FALSE;
1023 }
1024 */
1025
1026
1027 NLM_EXTERN void FFSavePosition(StringItemPtr ffstring, StringItemPtr PNTR bufptr, Int4 PNTR posptr) {
1028 *bufptr = ffstring->curr;
1029 *posptr = ffstring->curr->pos;
1030 }
1031
1032
1033 NLM_EXTERN void FFTrim (
1034 StringItemPtr ffstring,
1035 StringItemPtr line_start,
1036 Int4 line_pos,
1037 Int4 line_prefix_len
1038 )
1039 {
1040 StringItemPtr riter, iter;
1041 Int4 i;
1042 IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)ffstring->iajp;
1043
1044 for ( i = 0; i < line_prefix_len; ++i ) {
1045 ++line_pos;
1046 if ( line_pos == STRING_BUF_LEN ) {
1047 line_pos = 0;
1048 line_start= line_start->next;
1049 }
1050 }
1051
1052 riter = ffstring->curr;
1053 while ( riter != NULL ) {
1054 for ( i = riter->pos - 1;
1055 /* (i >= 0) && !(riter == line_start && i <= line_pos); */
1056 (i >= 0) && ((riter != line_start) || (i >= line_pos));
1057 --i ) {
1058 if ( !IS_WHITESP(riter->buf[i]) || (riter->buf[i] == '\n') ) {
1059 break;
1060 }
1061 }
1062 if ( i < 0 ) {
1063 i = STRING_BUF_LEN - 1;
1064 for ( iter = ffstring; iter != NULL; iter = iter->next ) {
1065 if ( iter->next == riter ) {
1066 break;
1067 }
1068 }
1069 if ( iter == NULL ){
1070 ffstring->pos = 0;
1071 break;
1072 } else {
1073
1074 riter = iter;
1075 ffstring->curr = riter;
1076 }
1077 } else {
1078 riter->pos = i + 1;
1079 FFRecycleString(ajp, riter->next);
1080 riter->next = NULL;
1081 break;
1082 }
1083 }
1084 }
1085
1086 NLM_EXTERN int FFNextChar(
1087 StringItemPtr start_sip,
1088 Int4 start_pos
1089 )
1090 {
1091 if (start_pos < start_sip->pos-1) {
1092 return start_sip->buf[start_pos+1];
1093 }
1094 else if (start_sip->next != NULL) {
1095 return (start_sip->next->buf)[0];
1096 }
1097 else {
1098 return 0;
1099 }
1100 }
1101
1102 NLM_EXTERN void FFAdvanceChar(
1103 StringItemPtr* start_sip,
1104 Int4* start_pos
1105 )
1106 {
1107 if (*start_pos < (*start_sip)->pos-1) {
1108 ++(*start_pos);
1109 }
1110 else {
1111 (*start_sip) = (*start_sip)->next;
1112 *start_pos = 0;
1113 }
1114 }
1115
1116 /* A line is wrapped when the visble text in th eline exceeds the line size. */
1117 /* Visible text is text that is not an HTML hyper-link. */
1118 /* A line may be broken in one of the following characters: */
1119 /* space, comma and dash */
1120 /* the oredr of search is first spaces, then commas and then dashes. */
1121 /* We nee to take into account the possiblity that a 'new-line' character */
1122 /* already exists in the line, in such case we break at the 'new-line' */
1123 /* spaces, dashes and new-lines will be broken at that character wheras for */
1124 /* commas we break at the character following the comma. */
1125
1126 NLM_EXTERN void FFCalculateLineBreak (
1127 StringItemPtr PNTR break_sip, Int4 PNTR break_pos,
1128 Int4 init_indent, Int4 visible
1129 )
1130 {
1131 StringItemPtr iter, prev;
1132 Int4 i,
1133 done = FALSE,
1134 copied = 0,
1135 start = *break_pos,
1136 pos = 0;
1137 Char ch;
1138 Boolean found_comma = FALSE, found_dash = FALSE, found_lb = FALSE;
1139 /* each candidate is a pair of buffer and position withingh this buffer */
1140 StringItemPtr candidate_sip_space = NULL,
1141 candidate_sip_comma = NULL,
1142 candidate_sip_dash = NULL;
1143 Int4 candidate_int_space = -1,
1144 candidate_int_comma = -1,
1145 candidate_int_dash = -1;
1146
1147
1148 iter = *break_sip;
1149 prev = iter;
1150
1151 i = start;
1152
1153 /* skip the first 'init_indent' characters of the line */
1154 while ( iter != NULL && !done ) {
1155 for ( i = start; i < iter->pos && init_indent > 0; ++i ) {
1156 if ( iter->buf[i] == '\n' ) {
1157 candidate_sip_space = iter;
1158 candidate_int_space = i;
1159 done = TRUE;
1160 break;
1161 }
1162 if ( FFIsStartOfLink(iter, i) ) {
1163 FFSkipLink(&iter, &i);
1164 --i;
1165 continue;
1166 }
1167
1168 --init_indent;
1169 ++copied;
1170 }
1171 if ( init_indent > 0 ) {
1172 start = 0;
1173 iter = iter->next;
1174 } else {
1175 break;
1176 }
1177 }
1178 start = i;
1179
1180 while ( iter != NULL && !done ) {
1181 for ( i = start; iter != NULL && i < iter->pos; ++i ) {
1182 if ( found_comma ) {
1183 candidate_sip_comma = iter;
1184 candidate_int_comma = i;
1185 found_comma = FALSE;
1186 }
1187 if ( found_dash ) {
1188 candidate_sip_dash = iter;
1189 candidate_int_dash = i;
1190 found_dash= FALSE;
1191 }
1192
1193 ch = iter->buf[i];
1194 if ( ch == '\n' ) {
1195 candidate_sip_space = iter;
1196 candidate_int_space = i;
1197 found_lb = TRUE;
1198 done = TRUE;
1199 break;
1200 } else if ( ch == ' ' ) {
1201 candidate_sip_space = iter;
1202 candidate_int_space = i;
1203 } else if ( ch == ',' ) {
1204 found_comma = TRUE;
1205 } else if ( ch == '-' ) {
1206 found_dash = TRUE;
1207 /*candidate_sip_dash = iter;
1208 candidate_int_dash = i;*/
1209 }
1210
1211 if ( FFIsStartOfLink(iter, i) ) {
1212 FFSkipLink(&iter, &i);
1213 --i;
1214 continue;
1215 }
1216
1217 ++copied;
1218 if ( copied >= visible ) {
1219 if ( (candidate_sip_space == NULL) && (candidate_int_space == -1) &&
1220 (candidate_sip_comma == NULL) && (candidate_int_comma == -1) &&
1221 (candidate_sip_dash == NULL) && (candidate_int_dash == -1) ) {
1222 candidate_sip_space = iter;
1223 candidate_int_space = i;
1224 }
1225 done = TRUE;
1226 break;
1227 }
1228 }
1229 start = 0;
1230 if ( iter != NULL && !done ) {
1231 prev = iter;
1232 pos = prev->pos;
1233 iter = iter->next;
1234 }
1235 }
1236
1237 /* the order in which we examine the various candidate breaks is important */
1238 if ( iter == NULL && !done) { /* reached the end */
1239 *break_sip = prev;
1240 *break_pos = pos;
1241 } else {
1242 if( candidate_sip_space != NULL ) {
1243 *break_sip = candidate_sip_space;
1244 *break_pos = candidate_int_space;
1245 } else if( candidate_sip_comma != NULL ) {
1246 *break_sip = candidate_sip_comma;
1247 *break_pos = candidate_int_comma;
1248 } else if( candidate_sip_dash != NULL ) {
1249 *break_sip = candidate_sip_dash;
1250 *break_pos = candidate_int_dash;
1251 }
1252 if (! found_lb) {
1253 while (FFNextChar(*break_sip, *break_pos) == ' ') {
1254 FFAdvanceChar(break_sip, break_pos);
1255 }
1256 if (FFNextChar(*break_sip, *break_pos) == '\n') {
1257 FFAdvanceChar(break_sip, break_pos);
1258 }
1259 }
1260 }
1261 }
1262
1263 /*
1264 * Scans the given buffer froma given scan position, for the next occurrence of
1265 * the indicated character. The search breaks when the character is found, or the
1266 * supplied break position is reached.
1267 * On exit, the scan position will either be on the character found, or at the
1268 * given break position.
1269 *
1270 * *p_line_sip: in: points to the buffer where scan should start
1271 * out: points to the buffer where the scan ended
1272 * *p_line_pos: in: points to the position in *p_line_sip where the scan should
1273 * start
1274 * out: points to the position in *p_line_sip where the scan ended.
1275 * break_sip: points to buffer where the scan should stop
1276 * break_pos: position in *break_sip where the scan should stop
1277 * c: the character we are looking for
1278 */
1279 NLM_EXTERN Boolean FFFindSingleChar(
1280 StringItemPtr* p_line_sip,
1281 Int4* p_line_pos,
1282 StringItemPtr break_sip,
1283 Int4 break_pos,
1284 char c )
1285 {
1286 while( *p_line_pos >= (*p_line_sip)->pos) {
1287 *p_line_pos -= (*p_line_sip)->pos;
1288 (*p_line_sip) = (*p_line_sip)->next;
1289 if ( *p_line_sip == NULL ) {
1290 return FALSE;
1291 }
1292 }
1293 while (*p_line_sip != break_sip){
1294 while (*p_line_pos <(*p_line_sip)->pos) {
1295 if ((*p_line_sip)->buf[ *p_line_pos ] == c)
1296 return TRUE;
1297 else
1298 ++(*p_line_pos);
1299 }
1300 *p_line_pos = 0;
1301 *p_line_sip = (*p_line_sip)->next;
1302 }
1303 while (*p_line_pos < break_pos){
1304 if ( (*p_line_sip)->buf[ *p_line_pos ] == c )
1305 return TRUE;
1306 else
1307 ++(*p_line_pos);
1308 }
1309 return FALSE;
1310 }
1311
1312 /*
1313 * Returns the number of bytes remaining in the buffer chain, starting from the
1314 * given buffer and a read mark inside it.
1315 *
1316 * sip: points to the buffer where the string starts,
1317 * cur_pos: read mark in the buffer
1318 */
1319 NLM_EXTERN Int4 FFRemainingLength(
1320 StringItemPtr sip,
1321 Int4 cur_pos )
1322 {
1323 return FFLength(sip)-cur_pos;
1324 }
1325
1326 /*
1327 * Scans the given line for the next opening tag of an HTML hyperlink. Ajusts
1328 * the line position to immediately after the opening tag (if such a tag is
1329 * found) or the the end of the line (if no such tag is found).
1330 * If a character buffer is supplied, this function will copy any opening tag
1331 * it finds into that buffer.
1332 *
1333 * *p_line_sip: in: points to the string buffer where the scan should start
1334 * out: points to the string buffer where the scan ended
1335 * *p_line_pos: in: position in **p_start_sip where the scan should start
1336 * out: position in **p_start_sip where the scan ended
1337 * break_sip: buffer that contain the line break
1338 * break_pos: position in break_sip that represents the line break
1339 * buf_open_link: character buffer to hold a copy of the opening link found
1340 * (or =0 if this information is not required).
1341 */
1342 NLM_EXTERN Boolean FFExtractNextOpenLink(
1343 StringItemPtr* p_line_sip,
1344 Int4* p_line_pos,
1345 StringItemPtr break_sip,
1346 Int4 break_pos,
1347 char* buf_open_link )
1348 {
1349 int i;
1350
1351 const char* buf_markup_open = "<A HREF";
1352 const int markup_size = strlen(buf_markup_open);
1353
1354 while ((*p_line_sip != break_sip) || (*p_line_pos < break_pos)) {
1355
1356 if (FFFindSingleChar(p_line_sip, p_line_pos, break_sip, break_pos, '<' )) {
1357
1358 if (FFRemainingLength(*p_line_sip, *p_line_pos) < markup_size) {
1359 *p_line_sip = break_sip;
1360 *p_line_pos = break_pos;
1361 return FALSE;
1362 }
1363 for ( i=0; i < markup_size; ++i ) {
1364 if (buf_markup_open[i] != toupper( FFCharAt( *p_line_sip, (*p_line_pos)+i )))
1365 break;
1366 }
1367 if ( i == markup_size ) {
1368 if (buf_open_link != 0) {
1369
1370 char next;
1371
1372 for (i=0; '>' != (next = FFCharAt( *p_line_sip, *p_line_pos )); ++(*p_line_pos)) {
1373
1374 if (next == '&') {
1375
1376 MemCopy( buf_open_link+i, "&", strlen( "&" ) );
1377
1378 i += strlen("&");
1379
1380 }
1381
1382 else {
1383
1384 buf_open_link[i++] = next;
1385
1386 }
1387
1388 }
1389
1390 buf_open_link[i++] = '>';
1391
1392 buf_open_link[i] = 0;
1393
1394 } else {
1395 *p_line_pos += markup_size;
1396 }
1397
1398 return TRUE;
1399 } else {
1400 ++(*p_line_pos);
1401 }
1402 }
1403 }
1404 return FALSE;
1405 }
1406
1407 /*
1408 * Scans the given line for the next closing tag of an HTML hyperlink. Ajusts
1409 * the line position to immediately after the closing tag (if such a tag is
1410 * found) or the the end of the line (if no such tag is found).
1411 *
1412 * *p_line_sip: in: points to the string buffer where the scan should start
1413 * out: points to the string buffer where the scan ended
1414 * *p_line_pos: in: position in **p_start_sip where the scan should start
1415 * out: position in **p_start_sip where the scan ended
1416 * break_sip: buffer that contain the line break
1417 * break_pos: position in break_sip that represents the line break
1418 */
1419 NLM_EXTERN Boolean FFExtractNextCloseLink(
1420 StringItemPtr* p_line_sip,
1421 Int4* p_line_pos,
1422 StringItemPtr break_sip,
1423 Int4 break_pos )
1424 {
1425 int i;
1426
1427 const char* buf_close_link = "</A>";
1428 const int markup_close_size = strlen(buf_close_link);
1429
1430 while ((*p_line_sip != break_sip) || (*p_line_pos < break_pos)) {
1431
1432 if (FFFindSingleChar(p_line_sip, p_line_pos, break_sip, break_pos, '<' )) {
1433 if (FFRemainingLength(*p_line_sip, *p_line_pos) < markup_close_size) {
1434 *p_line_sip = break_sip;
1435 *p_line_pos = break_pos;
1436 return FALSE;
1437 }
1438 for ( i=0; i < markup_close_size; ++i ) {
1439 if (buf_close_link[i] != toupper(FFCharAt( *p_line_sip, (*p_line_pos)+i)))
1440 break;
1441 }
1442 if (i == markup_close_size) {
1443 (*p_line_pos) += markup_close_size;
1444 return TRUE;
1445 } else {
1446 ++(*p_line_pos);
1447 }
1448 }
1449 }
1450 return FALSE;
1451 }
1452
1453 /*
1454 * Checks a given line whether its end falls between the opening and the closing
1455 * tag of an HTML link.
1456 *
1457 * start_sip: string buffer where the given line starts,
1458 * start_pos: position in start_sip where the given line starts,
1459 * break_sip: string buffer where the given line ends,
1460 * break_pos: position in break_pos where the given line ends,
1461 * buf_link_open: optional buffer where the open tag of the split link will be
1462 * written to. Leave =0 if you don't need this.
1463 */
1464 NLM_EXTERN Boolean FFLineBreakSplitsHtmlLink(
1465 StringItemPtr start_sip,
1466 Int4 start_pos,
1467 StringItemPtr break_sip,
1468 Int4 break_pos,
1469 char* buf_link_open,
1470 Int4* html_open_link_counter )
1471 {
1472 StringItemPtr cur_iter=0;
1473 int cur_pos=0;
1474
1475 if ( ! GetWWW((IntAsn2gbJobPtr)start_sip->iajp) )
1476 return FALSE;
1477
1478 cur_iter = start_sip;
1479 cur_pos = start_pos;
1480
1481 while ((cur_iter != break_sip) || (cur_pos < break_pos)) {
1482 switch(*html_open_link_counter) {
1483 case 0:
1484 if (FFExtractNextOpenLink(&cur_iter, &cur_pos, break_sip, break_pos, buf_link_open ))
1485 ++(*html_open_link_counter);
1486 break;
1487 case 1:
1488 if (FFExtractNextCloseLink(&cur_iter, &cur_pos, break_sip, break_pos ))
1489 --(*html_open_link_counter);
1490 break;
1491 default:
1492 break;
1493 }
1494 }
1495 return (*html_open_link_counter);
1496 } /*FFLineBreakSplitsHtmlLink*/
1497
1498 NLM_EXTERN void FFLineWrap (
1499 StringItemPtr dest,
1500 StringItemPtr src,
1501 Int4 init_indent,
1502 Int4 cont_indent,
1503 Int4 line_max,
1504 CharPtr eb_line_prefix
1505 )
1506 {
1507 /* line break candidate is a pair <StringItemPtr, position> */
1508 StringItemPtr break_sip = src;
1509 Int4 break_pos = 0;
1510 StringItemPtr line_start = NULL;
1511 Int4 line_pos = 0;
1512 Int4 i, line_prefix_len = 0;
1513 StringItemPtr iter;
1514 Boolean cont = FALSE;
1515
1516 /* Note:
1517 The value of the next two variables needs to persist between consecutive
1518 invocations of FFLineBreakSplitsHtmlLink().
1519 */
1520 Int4 html_open_link_counter = 0;
1521 char buf_split_link_open[ 1024 ];
1522
1523 Boolean linebreak_splits_link = FALSE;
1524 const char* buf_split_link_close = "</a>";
1525
1526 MemSet( (void*)buf_split_link_open, 0, sizeof(buf_split_link_open) );
1527 FFSavePosition(dest, &line_start, &line_pos);
1528
1529 for ( iter = src; iter != NULL; iter = iter->next ) {
1530 for ( i = 0; i < iter->pos; ) {
1531
1532
1533 break_pos = i;
1534 break_sip = iter;
1535
1536 FFCalculateLineBreak(
1537 &break_sip, &break_pos, init_indent, line_max - line_prefix_len + 1);
1538 linebreak_splits_link =
1539 FFLineBreakSplitsHtmlLink(iter, i, break_sip, break_pos,
1540 buf_split_link_open, &html_open_link_counter );
1541 FFCatenateSubString(dest, iter, i, break_sip, break_pos, line_max);
1542 if (0 && eb_line_prefix) {
1543 /* don't quit at the indent width but trim all the way down to the EMBL line code */
1544 FFTrim(dest, line_start, line_pos, strlen(eb_line_prefix));
1545 } else {
1546 FFTrim(dest, line_start, line_pos, cont_indent);
1547 }
1548 if ( linebreak_splits_link ) {
1549 FFAddOneString( dest,
1550 (char*)buf_split_link_close, FALSE, FALSE, TILDE_IGNORE );
1551 }
1552 FFAddOneChar(dest, '\n', FALSE);
1553
1554 FFSavePosition(dest, &line_start, &line_pos);
1555
1556 /* for EMBL 'XX' lines */
1557 if (eb_line_prefix != NULL) {
1558 cont = FALSE;
1559 if (break_pos > 1) {
1560 if (break_sip->buf[break_pos-1] == 'X' && break_sip->buf[break_pos-2] == 'X') {
1561 if ((break_pos == 2) || (break_sip->buf[break_pos-3] == '\n')) {
1562 ++break_pos;
1563 cont = TRUE;
1564 }
1565 }
1566 } else if (break_pos == 1) {
1567 if (break_sip->buf[0] == 'X' && iter->buf[iter->pos-1] == 'X') {
1568 if ((iter->pos > 1) && iter->buf[iter->pos-2] == '\n') {
1569 ++break_pos;
1570 cont = TRUE;
1571 }
1572 }
1573 }
1574 }
1575
1576 i = break_pos;
1577 iter = break_sip;
1578
1579 if (cont) continue;
1580
1581 if ( IS_WHITESP(iter->buf[i]) ) {
1582 i++;
1583 }
1584 if ( iter != src->curr || i < iter->pos ) {
1585 if ( eb_line_prefix != NULL ) {
1586 FFAddOneString(dest, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1587 }
1588 FFAddNChar(dest, ' ', cont_indent - StringLen(eb_line_prefix), FALSE);
1589 if ( linebreak_splits_link ) {
1590 FFAddOneString( dest, buf_split_link_open, FALSE, FALSE, TILDE_IGNORE );
1591 }
1592 init_indent = 0;
1593 line_prefix_len = cont_indent;
1594 /*FFSkipGarbage(&iter, &i);*/
1595 }
1596 }
1597 }
1598 }
1599
1600 /* === */
1601
1602 NLM_EXTERN void FFStartPrint (
1603 StringItemPtr sip,
1604 FmtType format,
1605 Int4 gb_init_indent,
1606 Int4 gb_cont_indent,
1607 CharPtr gb_label,
1608 Int4 gb_tab_to,
1609 Int4 eb_init_indent,
1610 Int4 eb_cont_indent,
1611 CharPtr eb_line_prefix,
1612 Boolean eb_print_xx
1613 )
1614
1615 {
1616 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1617 FFAddNChar(sip, ' ', gb_init_indent, FALSE);
1618 FFAddOneString(sip, gb_label, FALSE, FALSE, TILDE_IGNORE);
1619 FFAddNChar(sip, ' ', gb_tab_to - gb_init_indent - StringLen(gb_label), FALSE);
1620 } else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1621 if ( eb_print_xx ) {
1622 FFAddOneString(sip, "XX\n", FALSE, FALSE, TILDE_IGNORE);
1623 }
1624 FFAddOneString(sip, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1625 FFAddNChar(sip, ' ', eb_init_indent - StringLen(eb_line_prefix), FALSE);
1626 }
1627 }
1628
1629 NLM_EXTERN void FFAddTextToString (
1630 StringItemPtr ffstring,
1631 CharPtr prefix,
1632 CharPtr string,
1633 CharPtr suffix,
1634 Boolean addPeriod,
1635 Boolean convertQuotes,
1636 Int2 tildeAction
1637 )
1638
1639 {
1640 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1641 FFAddOneString (ffstring, string, FALSE, convertQuotes, tildeAction);
1642 FFAddOneString (ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1643
1644 if ( addPeriod ) {
1645 FFAddPeriod(ffstring);
1646 }
1647 }
1648
1649 NLM_EXTERN CharPtr FFEndPrint (
1650 IntAsn2gbJobPtr ajp,
1651 StringItemPtr ffstring,
1652 FmtType format,
1653 Int2 gb_init_indent,
1654 Int2 gb_cont_indent,
1655 Int2 eb_init_indent,
1656 Int2 eb_cont_indent,
1657 CharPtr eb_line_prefix
1658 )
1659 {
1660 StringItemPtr temp = FFGetString(ajp);
1661 CharPtr result;
1662
1663 if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
1664
1665 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1666 FFLineWrap(temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX, NULL);
1667 } else {
1668 FFLineWrap(temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX, eb_line_prefix);
1669 }
1670 result = FFToCharPtr(temp);
1671 FFRecycleString(ajp, temp);
1672 return result;
1673 }
1674
1675 NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring) {
1676 Uint4 len = 0;
1677 StringItemPtr current;
1678
1679 for ( current = ffstring; current != NULL; current = current->next ) {
1680 len += current->pos;
1681 }
1682
1683 return len;
1684 }
1685
1686
1687 NLM_EXTERN Char FFCharAt(StringItemPtr ffstring, Uint4 pos) {
1688 Uint4 inbufpos = pos % STRING_BUF_LEN;
1689 Uint4 count = 0;
1690 StringItemPtr current = NULL;
1691
1692 inbufpos = pos % STRING_BUF_LEN;
1693
1694 for ( current = ffstring; current != NULL; current = current->next ) {
1695 count += current->pos;
1696 if ( count > pos ) break;
1697 }
1698
1699 if ( current != NULL && inbufpos <= pos ) {
1700 return current->buf[inbufpos];
1701 }
1702
1703 return '\0';
1704 }
1705
1706
1707 NLM_EXTERN Char FFFindChar (
1708 StringItemPtr ffstring, /* StringItem to search in */
1709 StringItemPtr start_buf, /* the position of the last char searched for (buffer) */
1710 Uint4 start_pos, /* the position of the last char searched for (pos) */
1711 Uint4 old_pos, /* the global position searched for */
1712 Uint4 new_pos /* new search position */
1713 )
1714 {
1715 Uint4 delta;
1716 Uint4 count;
1717 StringItemPtr current = NULL;
1718
1719 Char result = '\0';
1720
1721 if ( new_pos == old_pos ) {
1722 result = start_buf->buf[start_pos];
1723 }
1724
1725 if ( new_pos > old_pos ) {
1726 delta = new_pos - old_pos;
1727 current = start_buf;
1728 count = current->pos - start_pos - 1;
1729 current = current->next;
1730
1731 while ( delta > count && current != NULL ) {
1732 current = current->next;
1733 count += current->pos;
1734 }
1735
1736 if ( current != NULL ) {
1737 result = current->buf[new_pos % STRING_BUF_LEN];
1738 }
1739
1740 } else /* new_pos < old_pos */ {
1741 delta = old_pos - new_pos;
1742 if ( old_pos % STRING_BUF_LEN >= delta ) {
1743 result = start_buf->buf[new_pos % STRING_BUF_LEN];
1744 } else {
1745 result = FFCharAt(ffstring, new_pos);
1746 }
1747 }
1748
1749 return result;
1750 }
1751
1752 NLM_EXTERN Boolean FFEmpty(StringItemPtr ffstring) {
1753 if ( ffstring != NULL && ffstring->pos != 0 ) {
1754 return FALSE;
1755 }
1756 return TRUE;
1757 }
1758
1759 /*
1760 * Compute the right-most position in the pattern at which character a occurs,
1761 * for each character a in the alphabet (assumed ASCII-ISO 8859-1)
1762 *
1763 * The result is returned in the supplied vector.
1764 */
1765 static void ComputeLastOccurrence(const CharPtr pattern, Uint4 last_occurrence[])
1766 {
1767 Uint4 i;
1768 Uint4 pat_len;
1769
1770 /* Initilalize vector */
1771 for ( i = 0; i < 256; ++i ) {
1772 last_occurrence[i] = 0;
1773 }
1774
1775 /* compute right-most occurrence */
1776 pat_len = StringLen(pattern);
1777 for ( i = 0; i < pat_len; ++i ) {
1778 last_occurrence[(Uint1)pattern[i]] = i;
1779 }
1780 }
1781
1782 static void ComputePrefix(const CharPtr pattern, Uint4 longest_prefix[])
1783 {
1784 Uint4 pat_len = StringLen(pattern);
1785 Uint4 k, q;
1786
1787 longest_prefix[0] = 0;
1788
1789 k = 0;
1790 for ( q = 1; q < pat_len; ++q ) {
1791 while ( k > 0 && pattern[k] != pattern[q] ) {
1792 k = longest_prefix[k - 1];
1793 }
1794 if ( pattern[k] == pattern[q] ) {
1795 ++k;
1796 }
1797 longest_prefix[q] = k;
1798 }
1799 }
1800
1801
1802 static void ComputeGoodSuffix(const CharPtr pattern, Uint4 good_suffix[])
1803 {
1804 Uint4 pat_len = StringLen(pattern);
1805 Uint4Ptr longest_prefix, reverse_longest_prefix;
1806 CharPtr reverse_pattern;
1807 Uint4 i, j;
1808
1809 /* allocate memory */
1810 longest_prefix = MemNew(pat_len * sizeof(Uint4));
1811 reverse_longest_prefix = MemNew(pat_len * sizeof(Uint4));
1812 reverse_pattern = MemNew((pat_len + 1) * sizeof(Char));
1813
1814 if ( longest_prefix == NULL ||
1815 reverse_longest_prefix == NULL ||
1816 reverse_pattern == NULL ) {
1817 MemFree(longest_prefix);
1818 MemFree(reverse_longest_prefix);
1819 MemFree(reverse_pattern);
1820 return;
1821 }
1822
1823 /* compute reverse pattern */
1824 for ( i = 0; i < pat_len; ++i ) {
1825 reverse_pattern[pat_len - i] = pattern[i];
1826 }
1827
1828 ComputePrefix(pattern, longest_prefix);
1829 ComputePrefix(reverse_pattern, reverse_longest_prefix);
1830
1831 for ( j = 0; j < pat_len; ++j) {
1832 good_suffix[j] = pat_len - longest_prefix[pat_len-1];
1833 }
1834
1835 for ( i = 0; i < pat_len; ++i ) {
1836 j = pat_len - reverse_longest_prefix[i] - 1;
1837 if ( good_suffix[j] > i - reverse_longest_prefix[i] + 1) {
1838 good_suffix[j] = i - reverse_longest_prefix[i] + 1;
1839 }
1840 }
1841
1842 MemFree(longest_prefix);
1843 MemFree(reverse_longest_prefix);
1844 MemFree(reverse_pattern);
1845 }
1846
1847
1848 /*
1849 * searches for a pattern in a StringItem.
1850 * Using the Boyer-Moore algorithm for the search.
1851 */
1852 NLM_EXTERN Int4 FFStringSearch (
1853 StringItemPtr text,
1854 const CharPtr pattern,
1855 Uint4 position )
1856 {
1857 Uint4 text_len = FFLength(text);
1858 Uint4 pat_len = StringLen(pattern);
1859 Uint4 last_occurrence[256];
1860 Uint4Ptr good_suffix;
1861 Uint4 shift;
1862 Int4 j;
1863
1864 if ( pat_len == 0 ) return 0;
1865 if ( text_len == 0 || pat_len > text_len - position ) return -1;
1866
1867 good_suffix = (Uint4Ptr)MemNew(pat_len * sizeof(Int4));
1868 if ( good_suffix == NULL ) return -1;
1869
1870 ComputeLastOccurrence(pattern, last_occurrence);
1871 ComputeGoodSuffix(pattern, good_suffix);
1872
1873 shift = position;
1874 while ( shift <= text_len - pat_len ) {
1875 j = pat_len - 1;
1876 while( j >= 0 && pattern[j] == FFCharAt(text,shift + j) ) {
1877 --j;
1878 }
1879 if ( j == -1 ) {
1880 MemFree (good_suffix);
1881 return shift;
1882 } else {
1883 shift += MAX( (Int4)good_suffix[(int) j],
1884 (Int4)(j - last_occurrence[(int) FFCharAt(text,shift + j)]));
1885 }
1886 }
1887 MemFree (good_suffix);
1888
1889 return -1;
1890 }
1891
1892
1893 /* */
1894 /* IsWholeWordSubstr () -- Determines if a substring that is */
1895 /* contained in another string is a whole */
1896 /* word or phrase -- i.e. is it both */
1897 /* preceded and followed by white space. */
1898 /* */
1899
1900 NLM_EXTERN Boolean IsWholeWordSubstr (
1901 StringItemPtr searchStr,
1902 Uint4 foundPos,
1903 CharPtr subStr
1904 )
1905 {
1906 Boolean left, right;
1907 Char ch;
1908
1909
1910 /* check on the left only if there is a character there */
1911 if (foundPos > 0) {
1912 ch = FFCharAt(searchStr, foundPos - 1);
1913 left = IS_WHITESP(ch) || ispunct(ch);
1914 } else {
1915 left = TRUE;
1916 }
1917
1918 foundPos += StringLen(subStr);
1919 if ( foundPos == FFLength(searchStr) ) {
1920 right = TRUE;
1921 } else {
1922 ch = FFCharAt(searchStr, foundPos);
1923 right = IS_WHITESP(ch) || ispunct(ch);
1924 }
1925
1926 return left; /* see comment above */
1927 /* return left && right; this is how it should be!*/
1928 }
1929
1930
1931 /* functions to record sections or blocks in linked lists */
1932
1933 NLM_EXTERN BaseBlockPtr Asn2gbAddBlock (
1934 Asn2gbWorkPtr awp,
1935 BlockType blocktype,
1936 size_t size
1937 )
1938
1939 {
1940 BaseBlockPtr bbp;
1941 ValNodePtr vnp;
1942
1943 if (awp == NULL || size < 1) return NULL;
1944
1945 bbp = (BaseBlockPtr) MemNew (size);
1946 if (bbp == NULL) return NULL;
1947 bbp->blocktype = blocktype;
1948 bbp->section = awp->currsection;
1949
1950 vnp = ValNodeAddPointer (&(awp->lastblock), 0, bbp);
1951 if (vnp == NULL) return bbp;
1952
1953 awp->lastblock = vnp;
1954 if (awp->blockList == NULL) {
1955 awp->blockList = vnp;
1956 }
1957
1958 return bbp;
1959 }
1960
1961
1962 /*--------------------------------------------------------*/
1963 /* */
1964 /* s_LocusGetBaseName() - */
1965 /* */
1966 /*--------------------------------------------------------*/
1967
1968 static Boolean s_LocusGetBaseName (BioseqPtr parent, BioseqPtr segment, CharPtr baseName)
1969 {
1970 Char parentName[SEQID_MAX_LEN];
1971 Char segName[SEQID_MAX_LEN];
1972 SeqIdPtr sip;
1973 TextSeqIdPtr tsip;
1974 Char prefix[5];
1975 Char bufTmp[SEQID_MAX_LEN];
1976 Int2 deleteChars;
1977 Int2 newLength;
1978 Int2 i;
1979 Uint2 segNameLen;
1980
1981 /* Get the parent Sequence ID */
1982
1983 parentName [0] = '\0';
1984 sip = NULL;
1985 for (sip = parent->id; sip != NULL; sip = sip->next) {
1986 if (sip->choice == SEQID_GENBANK ||
1987 sip->choice == SEQID_EMBL ||
1988 sip->choice == SEQID_DDBJ) break;
1989 if (sip->choice == SEQID_TPG ||
1990 sip->choice == SEQID_TPE ||
1991 sip->choice == SEQID_TPD) break;
1992 }
1993
1994 if (sip != NULL) {
1995 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1996 if (tsip != NULL && (! StringHasNoText (tsip->name))) {
1997 StringNCpy_0 (parentName, tsip->name, sizeof (parentName));
1998 }
1999 }
2000
2001 if (StringHasNoText (parentName)) {
2002 StringNCpy_0 (parentName, baseName, sizeof (parentName));
2003 }
2004
2005 /* Get segment id */
2006
2007 segName [0] = '\0';
2008 segNameLen = 0;
2009 sip = NULL;
2010 for (sip = segment->id; sip != NULL; sip = sip->next) {
2011 if (sip->choice == SEQID_GENBANK ||
2012 sip->choice == SEQID_EMBL ||
2013 sip->choice == SEQID_DDBJ) break;
2014 if (sip->choice == SEQID_TPG ||
2015 sip->choice == SEQID_TPE ||
2016 sip->choice == SEQID_TPD) break;
2017 }
2018
2019 if (sip != NULL) {
2020 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2021 if (tsip != NULL && (! StringHasNoText (tsip->name))) {
2022 StringNCpy_0 (segName, tsip->name, sizeof (segName));
2023 segNameLen = StringLen(segName);
2024 }
2025 }
2026
2027 /* If there's no "SEG_" prefix, then */
2028 /* just use the parent ID. */
2029
2030 StringNCpy_0 (prefix,parentName,sizeof (prefix));
2031 prefix[4] = '\0';
2032 if (StringCmp(prefix,"SEG_") != 0)
2033 {
2034 StringCpy(baseName,parentName);
2035 return FALSE;
2036 }
2037
2038 /* Otherwise, eliminate the "SEG_" ... */
2039
2040 StringCpy(bufTmp, &parentName[4]);
2041 StringCpy(parentName,bufTmp);
2042
2043 /* ... And calculate a base name */
2044
2045 if (segNameLen > 0 &&
2046 (segName[segNameLen-1] == '1') &&
2047 (StringLen(parentName) == segNameLen) &&
2048 (parentName[segNameLen-1] == segName[segNameLen-1]))
2049 {
2050 deleteChars = 1;
2051 for (i = segNameLen-2; i >= 0; i--)
2052 if (parentName[i] == '0')
2053 deleteChars++;
2054 else
2055 break;
2056 newLength = segNameLen - deleteChars;
2057 StringNCpy (parentName,segName,newLength); /* not StringNCpy_0 */
2058 parentName[newLength] = '\0';
2059 }
2060
2061 /* Return the base name in the basename parameter */
2062
2063 StringCpy(baseName,parentName);
2064 return TRUE;
2065 }
2066
2067 /* ********************************************************************** */
2068
2069 static Uint1 fasta_order [NUM_SEQID] = {
2070 33, /* 0 = not set */
2071 20, /* 1 = local Object-id */
2072 15, /* 2 = gibbsq */
2073 16, /* 3 = gibbmt */
2074 30, /* 4 = giim Giimport-id */
2075 10, /* 5 = genbank */
2076 10, /* 6 = embl */
2077 10, /* 7 = pir */
2078 10, /* 8 = swissprot */
2079 15, /* 9 = patent */
2080 20, /* 10 = other TextSeqId */
2081 20, /* 11 = general Dbtag */
2082 255, /* 12 = gi */
2083 10, /* 13 = ddbj */
2084 10, /* 14 = prf */
2085 12, /* 15 = pdb */
2086 10, /* 16 = tpg */
2087 10, /* 17 = tpe */
2088 10, /* 18 = tpd */
2089 10, /* 19 = gpp */
2090 10 /* 20 = nat */
2091 };
2092
2093 /* DoOneSection builds a single report for one bioseq or segment */
2094
2095 static Asn2gbSectPtr Asn2gbAddSection (
2096 Asn2gbWorkPtr awp
2097 )
2098
2099 {
2100 Asn2gbSectPtr asp;
2101 ValNodePtr vnp;
2102
2103 if (awp == NULL) return NULL;
2104
2105 asp = (Asn2gbSectPtr) MemNew (sizeof (IntAsn2gbSect));
2106 if (asp == NULL) return NULL;
2107
2108 vnp = ValNodeAddPointer (&(awp->lastsection), 0, asp);
2109 if (vnp == NULL) return asp;
2110
2111 awp->lastsection = vnp;
2112 if (awp->sectionList == NULL) {
2113 awp->sectionList = vnp;
2114 }
2115
2116 return asp;
2117 }
2118
2119 NLM_EXTERN Boolean DeltaLitOnly (
2120 BioseqPtr bsp
2121 )
2122
2123 {
2124 ValNodePtr vnp;
2125
2126 if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
2127 for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
2128 if (vnp->choice == 1) return FALSE;
2129 }
2130 return TRUE;
2131 }
2132
2133 NLM_EXTERN Boolean SegHasParts (
2134 BioseqPtr bsp
2135 )
2136
2137 {
2138 BioseqSetPtr bssp;
2139 SeqEntryPtr sep;
2140
2141 if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE;
2142 sep = bsp->seqentry;
2143 if (sep == NULL) return FALSE;
2144 sep = sep->next;
2145 if (sep == NULL || (! IS_Bioseq_set (sep))) return FALSE;
2146 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2147 if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts) return TRUE;
2148 return FALSE;
2149 }
2150
2151 NLM_EXTERN void DoOneSection (
2152 BioseqPtr target,
2153 BioseqPtr parent,
2154 BioseqPtr bsp,
2155 BioseqPtr refs,
2156 SeqLocPtr slp,
2157 Uint2 seg,
2158 Int4 from,
2159 Int4 to,
2160 Boolean contig,
2161 Boolean onePartOfSeg,
2162 Asn2gbWorkPtr awp
2163 )
2164
2165 {
2166 size_t acclen;
2167 Asn2gbFormatPtr afp;
2168 IntAsn2gbJobPtr ajp;
2169 Asn2gbSectPtr asp;
2170 SeqMgrBioseqContext bcontext;
2171 BlockMask bkmask;
2172 BaseBlockPtr PNTR blockArray;
2173 Boolean cagemaster = FALSE;
2174 SeqMgrDescContext dcontext;
2175 Boolean hasRefs;
2176 Int4 i;
2177 IntAsn2gbSectPtr iasp;
2178 Boolean isGpipe = FALSE;
2179 Boolean isRefSeq = FALSE;
2180 MolInfoPtr mip;
2181 Boolean nsgenome = FALSE;
2182 Int4 numBlocks;
2183 Int4 numsegs = 0;
2184 SeqDescrPtr sdp;
2185 SeqIdPtr sip;
2186 TextSeqIdPtr tsip;
2187 ValNodePtr vnp;
2188 Boolean wgsmaster = FALSE;
2189 Boolean wgstech = FALSE;
2190 Boolean willshowcage = FALSE;
2191 Boolean willshowwgs = FALSE;
2192 Boolean willshowgenome = FALSE;
2193 Boolean willshowcontig = FALSE;
2194 Boolean willshowsequence = FALSE;
2195
2196 if (target == NULL || parent == NULL || bsp == NULL || awp == NULL) return;
2197 ajp = awp->ajp;
2198 if (ajp == NULL) return;
2199 bkmask = ajp->bkmask;
2200
2201 if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
2202 if (bsp->repr == Seq_repr_seg) {
2203 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2204 } else return;
2205 }
2206
2207 if (ajp->flags.suppressLocalID) {
2208 sip = SeqIdSelect (bsp->id, fasta_order, NUM_SEQID);
2209 if (sip == NULL || sip->choice == SEQID_LOCAL) return;
2210 }
2211
2212 if (seg == 0) {
2213 awp->basename[0] = '\0';
2214 } else if (seg == 1) {
2215 s_LocusGetBaseName (parent, bsp, awp->basename);
2216 }
2217
2218 asp = Asn2gbAddSection (awp);
2219 if (asp == NULL) return;
2220
2221 afp = awp->afp;
2222 if (afp != NULL) {
2223 afp->asp = asp;
2224 }
2225
2226 numsegs = awp->partcount;
2227 if (numsegs == 0 && SeqMgrGetBioseqContext (parent, &bcontext)) {
2228 numsegs = bcontext.numsegs;
2229 }
2230
2231 /* set working data fields */
2232
2233 awp->asp = asp;
2234
2235 awp->target = target;
2236 awp->parent = parent;
2237 awp->bsp = bsp;
2238 awp->refs = refs;
2239 awp->slp = slp;
2240 (awp->sectionCount)++;
2241 awp->currGi = 0;
2242 awp->seg = seg;
2243 awp->numsegs = numsegs;
2244 awp->from = from;
2245 awp->to = to;
2246 awp->contig = contig;
2247
2248 awp->firstfeat = TRUE;
2249 awp->featseen = FALSE;
2250 awp->featjustseen = FALSE;
2251 awp->wgsaccnlist = NULL;
2252
2253 /* initialize empty blockList for this section */
2254
2255 awp->blockList = NULL;
2256 awp->lastblock = NULL;
2257
2258 /* and store section data into section fields */
2259
2260 asp->target = target;
2261 asp->bsp = bsp;
2262 asp->slp = slp;
2263 asp->seg = seg;
2264 asp->numsegs = numsegs;
2265 asp->from = from;
2266 asp->to = to;
2267
2268 iasp = (IntAsn2gbSectPtr) asp;
2269
2270 asp->blockArray = NULL;
2271 asp->numBlocks = 0;
2272
2273 /* WGS master and NS_ virtual records treated differently */
2274
2275 if (bsp->repr == Seq_repr_virtual) {
2276
2277 /* check for certain ID types */
2278
2279 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2280 if (sip->choice == SEQID_GENBANK ||
2281 sip->choice == SEQID_EMBL ||
2282 sip->choice == SEQID_DDBJ ||
2283 sip->choice == SEQID_TPG ||
2284 sip->choice == SEQID_TPE ||
2285 sip->choice == SEQID_TPD) {
2286 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2287 if (tsip != NULL && tsip->accession != NULL) {
2288 acclen = StringLen (tsip->accession);
2289 if (acclen == 12) {
2290 if (StringCmp (tsip->accession + 6, "000000") == 0) {
2291 wgsmaster = TRUE;
2292 }
2293 } else if (acclen == 13) {
2294 if (StringCmp (tsip->accession + 6, "0000000") == 0) {
2295 wgsmaster = TRUE;
2296 }
2297 }
2298 }
2299 } else if (sip->choice == SEQID_OTHER) {
2300 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2301 if (tsip != NULL && tsip->accession != NULL) {
2302 if (StringNICmp (tsip->accession, "NC_", 3) == 0) {
2303 wgsmaster = TRUE;
2304 } else if (StringNICmp (tsip->accession, "NS_", 3) == 0) {
2305 nsgenome = TRUE;
2306 } else if (StringNICmp (tsip->accession, "NZ_", 3) == 0) {
2307 if (StringLen (tsip->accession) == 15) {
2308 if (StringCmp (tsip->accession + 9, "000000") == 0) {
2309 wgsmaster = TRUE;
2310 }
2311 } else if (StringLen (tsip->accession) == 16) {
2312 if (StringCmp (tsip->accession + 9, "0000000") == 0) {
2313 wgsmaster = TRUE;
2314 }
2315 }
2316 }
2317 }
2318 }
2319 }
2320
2321 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
2322 if (sdp != NULL) {
2323 mip = (MolInfoPtr) sdp->data.ptrvalue;
2324 if (mip != NULL) {
2325 if (mip->tech == MI_TECH_wgs) {
2326 wgstech = TRUE;
2327 } else if (mip->tech == MI_TECH_other && StringCmp (mip->techexp, "cage") == 0) {
2328 cagemaster = TRUE;
2329 }
2330 }
2331 }
2332 }
2333
2334 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2335 if (sip->choice == SEQID_OTHER) {
2336 isRefSeq = TRUE;
2337 } else if (sip->choice == SEQID_GI) {
2338 awp->currGi = (Int4) sip->data.intvalue;
2339 } else if (sip->choice == SEQID_GPIPE) {
2340 isGpipe = TRUE;
2341 }
2342 }
2343
2344 /* start exploring and populating paragraphs */
2345
2346 if (awp->format == FTABLE_FMT) {
2347 AddFeatHeaderBlock (awp);
2348 if (awp->showFtableRefs) {
2349 AddReferenceBlock (awp, isRefSeq);
2350 }
2351 if (! awp->hideSources) {
2352 AddSourceFeatBlock (awp);
2353 }
2354 if (! awp->hideFeatures) {
2355 AddFeatureBlock (awp);
2356 }
2357
2358 } else {
2359
2360 if (wgsmaster && wgstech) {
2361 willshowwgs = TRUE;
2362 } else if (cagemaster) {
2363 willshowcage = TRUE;
2364 } else if (nsgenome) {
2365 willshowgenome = TRUE;
2366 } else if (contig) {
2367 willshowcontig = TRUE;
2368 if (awp->showContigAndSeq) {
2369 if (! awp->hideSequence) {
2370 willshowsequence = TRUE;
2371 }
2372 }
2373 } else {
2374 if (awp->showContigAndSeq) {
2375 if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
2376 willshowcontig = TRUE;
2377 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2378 willshowcontig = TRUE;
2379 }
2380 }
2381 if (! awp->hideSequence) {
2382 willshowsequence = TRUE;
2383 }
2384 }
2385
2386 AddLocusBlock (awp, willshowwgs, willshowcage, willshowgenome, willshowcontig, willshowsequence);
2387
2388 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2389
2390 AddDeflineBlock (awp);
2391 AddAccessionBlock (awp);
2392
2393 if (ISA_aa (bsp->mol)) {
2394 /*
2395 AddPidBlock (awp);
2396 */
2397 }
2398
2399 AddVersionBlock (awp);
2400
2401 /* if (ISA_na (bsp->mol)) { */
2402 AddDblinkBlock (awp);
2403 /* } */
2404
2405 if (ISA_aa (bsp->mol)) {
2406 AddDbsourceBlock (awp);
2407 }
2408
2409 } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2410
2411 AddAccessionBlock (awp);
2412
2413 if (ISA_na (bsp->mol)) {
2414 AddVersionBlock (awp);
2415 }
2416
2417 if (ISA_aa (bsp->mol)) {
2418 /* AddPidBlock (awp); */
2419 /* AddDbsourceBlock (awp); */
2420 }
2421
2422 AddDateBlock (awp);
2423
2424 AddDeflineBlock (awp);
2425 }
2426
2427 AddKeywordsBlock (awp);
2428
2429 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
2430 AddSegmentBlock (awp, onePartOfSeg, (Boolean) ISA_na (bsp->mol));
2431 }
2432
2433 AddSourceBlock (awp);
2434 AddOrganismBlock (awp);
2435
2436 /*
2437 if (awp->showRefStats) {
2438 AddRefStatsBlock (awp);
2439 }
2440 */
2441
2442 if (! awp->hidePubs) {
2443
2444 /* !!! RELEASE_MODE should check return value of AddReferenceBlock !!! */
2445
2446 hasRefs = AddReferenceBlock (awp, isRefSeq);
2447 if (! hasRefs) {
2448 if (ajp->flags.needAtLeastOneRef) {
2449 /* RefSeq and Gpipe do not require a publication */
2450 if ((! isRefSeq) && (! isGpipe)) {
2451 awp->failed = TRUE;
2452 }
2453 }
2454 }
2455 }
2456
2457 AddCommentBlock (awp);
2458 AddPrimaryBlock (awp);
2459
2460 /*
2461 if (awp->showFeatStats) {
2462 AddFeatStatsBlock (awp);
2463 }
2464 */
2465
2466 AddFeatHeaderBlock (awp);
2467 if (! awp->hideSources) {
2468 AddSourceFeatBlock (awp);
2469 }
2470
2471 if (wgsmaster && wgstech) {
2472
2473 AddWGSBlock (awp);
2474
2475 } else if (cagemaster) {
2476
2477 AddCAGEBlock (awp);
2478
2479 } else if (nsgenome) {
2480
2481 AddGenomeBlock (awp);
2482
2483 } else if (contig) {
2484
2485 if (awp->showconfeats) {
2486 if (! awp->hideFeatures) {
2487 AddFeatureBlock (awp);
2488 }
2489 } else if (awp->smartconfeats && bsp->length <= 1000000) {
2490 if (! awp->hideFeatures) {
2491 AddFeatureBlock (awp);
2492 }
2493 }
2494 AddContigBlock (awp);
2495
2496 if (awp->showContigAndSeq) {
2497 if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
2498 if (awp->showBaseCount) {
2499 AddBasecountBlock (awp);
2500 }
2501 }
2502 AddOriginBlock (awp);
2503
2504 if (! awp->hideSequence) {
2505 AddSequenceBlock (awp);
2506 }
2507 }
2508
2509 } else {
2510
2511 if (! awp->hideFeatures) {
2512 AddFeatureBlock (awp);
2513 }
2514
2515 if (awp->showContigAndSeq) {
2516 if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
2517 AddContigBlock (awp);
2518 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2519 AddContigBlock (awp);
2520 }
2521 }
2522
2523 if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
2524 if (awp->showBaseCount) {
2525 AddBasecountBlock (awp );
2526 }
2527 }
2528 AddOriginBlock (awp);
2529
2530 if (! awp->hideSequence) {
2531 AddSequenceBlock (awp);
2532 }
2533 }
2534
2535 AddSlashBlock (awp);
2536 }
2537
2538 /* allocate block array for this section */
2539
2540 numBlocks = ValNodeLen (awp->blockList);
2541 asp->numBlocks = numBlocks;
2542
2543 if (numBlocks > 0) {
2544 blockArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numBlocks + 1));
2545 asp->blockArray = blockArray;
2546
2547 if (blockArray != NULL) {
2548 for (vnp = awp->blockList, i = 0; vnp != NULL; vnp = vnp->next, i++) {
2549 blockArray [i] = (BaseBlockPtr) vnp->data.ptrvalue;
2550 }
2551 }
2552 }
2553
2554 /* free blockList, but leave data, now pointed to by blockArray elements */
2555
2556 awp->blockList = ValNodeFree (awp->blockList);
2557 awp->lastblock = NULL;
2558
2559 (awp->currsection)++;
2560 }
2561
2562 /* ********************************************************************** */
2563
2564 /*
2565 the following functions handle various kinds of input, all calling
2566 DoOneSection once for each component that gets its own report
2567 */
2568
2569 static Boolean LIBCALLBACK Asn2Seg (
2570 SeqLocPtr slp,
2571 SeqMgrSegmentContextPtr context
2572 )
2573
2574 {
2575 Asn2gbWorkPtr awp;
2576 BioseqPtr bsp = NULL;
2577 Uint2 entityID;
2578 Int4 from;
2579 SeqLocPtr loc;
2580 BioseqPtr parent;
2581 SeqIdPtr sip;
2582 Int4 to;
2583
2584 if (slp == NULL || context == NULL) return FALSE;
2585 awp = (Asn2gbWorkPtr) context->userdata;
2586
2587 parent = context->parent;
2588
2589 from = context->cumOffset;
2590 to = from + context->to - context->from;
2591
2592 sip = SeqLocId (slp);
2593 if (sip == NULL) {
2594 loc = SeqLocFindNext (slp, NULL);
2595 if (loc != NULL) {
2596 sip = SeqLocId (loc);
2597 }
2598 }
2599 if (sip == NULL) return TRUE;
2600
2601 /* may remote fetch genome component if not already in memory */
2602
2603 bsp = BioseqLockById (sip);
2604
2605 if (bsp == NULL) return TRUE;
2606
2607 entityID = ObjMgrGetEntityIDForPointer (bsp);
2608
2609 if (entityID != awp->entityID) {
2610
2611 /* if segment not packaged in record, may need to feature index it */
2612
2613 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
2614 SeqMgrIndexFeatures (entityID, NULL);
2615 }
2616
2617 /* collect features indexed on the remote bioseq */
2618
2619 parent = bsp;
2620 from = 0;
2621 to = bsp->length - 1;
2622 }
2623
2624 if (bsp->repr != Seq_repr_virtual) {
2625 (awp->seg)++;
2626 DoOneSection (bsp, parent, bsp, bsp, /* slp */ NULL, awp->seg, from, to, FALSE, FALSE, awp);
2627 }
2628
2629 BioseqUnlock (bsp);
2630
2631 return TRUE;
2632 }
2633
2634 static Int4 CountRealParts (
2635 SeqLocPtr slp_head
2636 )
2637
2638 {
2639 SeqIdPtr id;
2640 Int4 numparts;
2641 BioseqPtr part;
2642 SeqIdPtr sip;
2643 SeqLocPtr slp;
2644
2645 numparts = 0;
2646 for (slp = (SeqLocPtr) slp_head; slp != NULL; slp = slp->next) {
2647 sip = SeqLocId (slp);
2648 if (sip == NULL) continue;
2649 if (sip->choice == SEQID_GI) {
2650 part = BioseqFind (sip);
2651 if (part == NULL) continue;
2652 for (id = part->id; id != NULL; id = id->next) {
2653 if (id->choice == SEQID_GIBBSQ ||
2654 id->choice == SEQID_GIBBMT ||
2655 id->choice == SEQID_GIIM) break;
2656 }
2657 if (id != NULL && part->repr == Seq_repr_virtual) continue;
2658 }
2659 numparts++;
2660 }
2661 return numparts;
2662 }
2663
2664 typedef struct findseg {
2665 BioseqPtr bsp;
2666 Uint2 seg;
2667 } FindSeg, PNTR FindSegPtr;
2668
2669 static Boolean LIBCALLBACK FindSegForPart (
2670 SeqLocPtr slp,
2671 SeqMgrSegmentContextPtr context
2672 )
2673
2674 {
2675 FindSegPtr fsp;
2676 BioseqPtr bsp = NULL;
2677 SeqLocPtr loc;
2678 SeqIdPtr sip;
2679
2680 if (slp == NULL || context == NULL) return TRUE;
2681 fsp = (FindSegPtr) context->userdata;
2682
2683 sip = SeqLocId (slp);
2684 if (sip == NULL) {
2685 loc = SeqLocFindNext (slp, NULL);
2686 if (loc != NULL) {
2687 sip = SeqLocId (loc);
2688 }
2689 }
2690 if (sip == NULL) return TRUE;
2691
2692 bsp = BioseqFind (sip);
2693 if (bsp == NULL) return TRUE;
2694
2695 if (bsp->repr != Seq_repr_virtual) {
2696 (fsp->seg)++;
2697 }
2698
2699 if (bsp != fsp->bsp) return TRUE;
2700
2701 return FALSE;
2702 }
2703
2704 NLM_EXTERN void DoOneBioseq (
2705 BioseqPtr bsp,
2706 Pointer userdata
2707 )
2708
2709 {
2710 IntAsn2gbJobPtr ajp;
2711 Asn2gbWorkPtr awp;
2712 BioseqSetPtr bssp;
2713 SeqMgrSegmentContext context;
2714 Boolean contig = FALSE;
2715 Int4 from;
2716 FindSeg fs;
2717 SeqEntryPtr oldscope;
2718 BioseqPtr parent;
2719 Boolean segmented = FALSE;
2720 SeqEntryPtr sep;
2721 Int4 to;
2722
2723 if (bsp == NULL) return;
2724 awp = (Asn2gbWorkPtr) userdata;
2725 if (awp == NULL) return;
2726 ajp = awp->ajp;
2727 if (ajp == NULL) return;
2728
2729 /* return if molecule not right for format */
2730
2731 if (ISA_na (bsp->mol)) {
2732 if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
2733
2734 /* only do mRNA feature tables in GPS if targeted to a specific mRNA */
2735
2736 if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
2737 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
2738 bssp = (BioseqSetPtr) bsp->idx.parentptr;
2739 if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
2740 if (bssp->idx.parenttype == OBJ_BIOSEQSET) {
2741 bssp = (BioseqSetPtr) bssp->idx.parentptr;
2742 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
2743 return;
2744 }
2745 }
2746 }
2747 }
2748 }
2749
2750 } else if (ISA_aa (bsp->mol)) {
2751 if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
2752
2753 /* only do protein feature tables if targeted to a specific protein */
2754
2755 if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
2756 }
2757
2758 if (awp->style == SEGMENT_STYLE) {
2759 segmented = TRUE;
2760 }
2761 if (awp->style == CONTIG_STYLE) {
2762 contig = TRUE;
2763 }
2764 /* Never do segmented style in FTABLE format */
2765 if (awp->format == FTABLE_FMT) {
2766 segmented = FALSE;
2767 contig = FALSE;
2768 }
2769
2770 awp->partcount = 0;
2771
2772 if (bsp->repr == Seq_repr_seg && awp->style == NORMAL_STYLE) {
2773
2774 /* if bsp followed by parts set, then do not default to contig style */
2775
2776 if (SegHasParts (bsp)) {
2777 segmented = TRUE;
2778 contig = FALSE;
2779
2780 if (bsp->seq_ext_type == 1) {
2781
2782 /* count only non-virtual parts */
2783
2784 sep = GetTopSeqEntryForEntityID (awp->entityID);
2785 oldscope = SeqEntrySetScope (sep);
2786 awp->partcount = CountRealParts ((SeqLocPtr) bsp->seq_ext);
2787 SeqEntrySetScope (oldscope);
2788 }
2789 } else {
2790 segmented = FALSE;
2791 contig = TRUE;
2792 }
2793 }
2794 if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
2795 if (! DeltaLitOnly (bsp)) {
2796 contig = TRUE;
2797 }
2798 }
2799
2800 if (bsp->repr == Seq_repr_seg) {
2801
2802 /* this is a segmented bioseq */
2803
2804 if (segmented) {
2805
2806 /* show all segments individually */
2807
2808 awp->seg = 0;
2809 SeqMgrExploreSegments (bsp, (Pointer) awp, Asn2Seg);
2810
2811 } else {
2812
2813 /* show as single bioseq */
2814
2815 parent = bsp;
2816 from = 0;
2817 to = bsp->length - 1;
2818
2819 DoOneSection (parent, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
2820 }
2821
2822 } else if (bsp->repr == Seq_repr_raw ||
2823 bsp->repr == Seq_repr_const ||
2824 bsp->repr == Seq_repr_delta ||
2825 bsp->repr == Seq_repr_virtual) {
2826
2827 parent = SeqMgrGetParentOfPart (bsp, &context);
2828 if (parent != NULL) {
2829
2830 /* this is a part of an indexed segmented bioseq */
2831
2832 from = context.cumOffset;
2833 to = from + context.to - context.from;
2834
2835 s_LocusGetBaseName (parent, bsp, awp->basename);
2836
2837 fs.bsp = bsp;
2838 fs.seg = 0;
2839 SeqMgrExploreSegments (parent, (Pointer) &fs, FindSegForPart);
2840 awp->showAllFeats = TRUE;
2841
2842 DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, fs.seg, from, to, contig, TRUE, awp);
2843
2844 } else {
2845
2846 /* this is a regular non-segmented bioseq */
2847
2848 parent = bsp;
2849 from = 0;
2850 to = bsp->length - 1;
2851
2852 DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
2853 }
2854 }
2855 }
2856
2857 static void DoBioseqSetList (
2858 SeqEntryPtr seq_set,
2859 Asn2gbWorkPtr awp
2860 )
2861
2862 {
2863 BioseqSetPtr bssp;
2864 SeqEntryPtr sep;
2865
2866 if (seq_set == NULL || awp == NULL) return;
2867
2868 /* iterate rather than recurse unless multiple nested sets > nuc-prot */
2869
2870 for (sep = seq_set; sep != NULL; sep = sep->next) {
2871
2872 if (IS_Bioseq_set (sep)) {
2873 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2874 if (bssp == NULL) continue;
2875
2876 if (bssp->_class == BioseqseqSet_class_genbank ||
2877 bssp->_class == BioseqseqSet_class_mut_set ||
2878 bssp->_class == BioseqseqSet_class_pop_set ||
2879 bssp->_class == BioseqseqSet_class_phy_set ||
2880 bssp->_class == BioseqseqSet_class_eco_set ||
2881 bssp->_class == BioseqseqSet_class_wgs_set ||
2882 bssp->_class == BioseqseqSet_class_gen_prod_set) {
2883
2884 /* if popset within genbank set, for example, recurse */
2885
2886 DoBioseqSetList (bssp->seq_set, awp);
2887
2888 continue;
2889 }
2890 }
2891
2892 /* at most nuc-prot set, so do main bioseqs that fit the format */
2893
2894 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
2895 }
2896 }
2897
2898 static void DoOneBioseqSet (
2899 SeqEntryPtr sep,
2900 Asn2gbWorkPtr awp
2901 )
2902
2903 {
2904 BioseqSetPtr bssp;
2905
2906 if (sep == NULL || awp == NULL) return;
2907
2908 if (IS_Bioseq_set (sep)) {
2909 bssp = (BioseqSetPtr) sep->data.ptrvalue;
2910 if (bssp == NULL) return;
2911
2912 if (bssp->_class == BioseqseqSet_class_genbank ||
2913 bssp->_class == BioseqseqSet_class_mut_set ||
2914 bssp->_class == BioseqseqSet_class_pop_set ||
2915 bssp->_class == BioseqseqSet_class_phy_set ||
2916 bssp->_class == BioseqseqSet_class_eco_set ||
2917 bssp->_class == BioseqseqSet_class_wgs_set ||
2918 bssp->_class == BioseqseqSet_class_gen_prod_set) {
2919
2920 /* this is a pop/phy/mut/eco set, catenate separate reports */
2921
2922 DoBioseqSetList (bssp->seq_set, awp);
2923
2924 return;
2925 }
2926 }
2927
2928 /* at most nuc-prot set, so do main bioseqs that fit the format */
2929
2930 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
2931 }
2932
2933 /* ********************************************************************** */
2934
2935 static void RecordOneSection (
2936 Asn2gbWorkPtr awp,
2937 BioseqPtr bsp,
2938 SeqIdPtr sip
2939 )
2940
2941 {
2942 IntAsn2gbJobPtr ajp;
2943 ValNodePtr vnp;
2944
2945 if (awp == NULL) return;
2946 ajp = awp->ajp;
2947 if (ajp == NULL) return;
2948
2949 if (bsp != NULL) {
2950 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2951 if (sip->choice == SEQID_GI) break;
2952 }
2953 }
2954
2955 if (sip == NULL) return;
2956
2957 if (sip->choice == SEQID_GI) {
2958 vnp = ValNodeAddInt (&(ajp->gitail), 0, (Int4) sip->data.intvalue);
2959 if (ajp->gihead == NULL) {
2960 ajp->gihead = vnp;
2961 }
2962 ajp->gitail = vnp;
2963 }
2964
2965 (awp->sectionMax)++;
2966 }
2967
2968 static void CountOneSection (
2969 BioseqPtr target,
2970 BioseqPtr parent,
2971 BioseqPtr bsp,
2972 BioseqPtr refs,
2973 SeqLocPtr slp,
2974 Uint2 seg,
2975 Int4 from,
2976 Int4 to,
2977 Boolean contig,
2978 Boolean onePartOfSeg,
2979 Asn2gbWorkPtr awp
2980 )
2981
2982 {
2983 IntAsn2gbJobPtr ajp;
2984 SeqIdPtr sip;
2985
2986 if (target == NULL || parent == NULL || bsp == NULL || awp == NULL) return;
2987 ajp = awp->ajp;
2988 if (ajp == NULL) return;
2989
2990 if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
2991 if (bsp->repr == Seq_repr_seg) {
2992 } else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2993 } else return;
2994 }
2995
2996 if (ajp->flags.suppressLocalID) {
2997 sip = SeqIdSelect (bsp->id, fasta_order, NUM_SEQID);
2998 if (sip == NULL || sip->choice == SEQID_LOCAL) return;
2999 }
3000
3001 RecordOneSection (awp, bsp, NULL);
3002 }
3003
3004
3005 static Boolean LIBCALLBACK Count2Seg (
3006 SeqLocPtr slp,
3007 SeqMgrSegmentContextPtr context
3008 )
3009
3010 {
3011 Asn2gbWorkPtr awp;
3012 BioseqPtr bsp = NULL;
3013 Int4 from;
3014 SeqLocPtr loc;
3015 BioseqPtr parent;
3016 SeqIdPtr sip;
3017 Int4 to;
3018
3019 if (slp == NULL || context == NULL) return FALSE;
3020 awp = (Asn2gbWorkPtr) context->userdata;
3021
3022 parent = context->parent;
3023
3024 from = context->cumOffset;
3025 to = from + context->to - context->from;
3026
3027 sip = SeqLocId (slp);
3028 if (sip == NULL) {
3029 loc = SeqLocFindNext (slp, NULL);
3030 if (loc != NULL) {
3031 sip = SeqLocId (loc);
3032 }
3033 }
3034 if (sip == NULL) return TRUE;
3035
3036 bsp = BioseqFindCore (sip);
3037 if (bsp != NULL && bsp->repr == Seq_repr_virtual) return TRUE;
3038
3039 RecordOneSection (awp, NULL, sip);
3040
3041 return TRUE;
3042 }
3043
3044 static void CountOneBioseq (
3045 BioseqPtr bsp,
3046 Pointer userdata
3047 )
3048
3049 {
3050 IntAsn2gbJobPtr ajp;
3051 Asn2gbWorkPtr awp;
3052 BioseqSetPtr bssp;
3053 SeqMgrSegmentContext context;
3054 Boolean contig = FALSE;
3055 Int4 from;
3056 BioseqPtr parent;
3057 Boolean segmented = FALSE;
3058 Int4 to;
3059
3060 if (bsp == NULL) return;
3061 awp = (Asn2gbWorkPtr) userdata;
3062 if (awp == NULL) return;
3063 ajp = awp->ajp;
3064 if (ajp == NULL) return;
3065
3066 if (ISA_na (bsp->mol)) {
3067 if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
3068
3069 if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
3070 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3071 bssp = (BioseqSetPtr) bsp->idx.parentptr;
3072 if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
3073 if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
3074 bssp = (BioseqSetPtr) bsp->idx.parentptr;
3075 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3076 return;
3077 }
3078 }
3079 }
3080 }
3081 }
3082
3083 } else if (ISA_aa (bsp->mol)) {
3084 if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
3085
3086 if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
3087 }
3088
3089 if (awp->style == SEGMENT_STYLE) {
3090 segmented = TRUE;
3091 }
3092 if (awp->style == CONTIG_STYLE) {
3093 contig = TRUE;
3094 }
3095 if (awp->format == FTABLE_FMT) {
3096 segmented = FALSE;
3097 contig = FALSE;
3098 }
3099
3100 if (bsp->repr == Seq_repr_seg && awp->style == NORMAL_STYLE) {
3101
3102 if (SegHasParts (bsp)) {
3103 segmented = TRUE;
3104 contig = FALSE;
3105 } else {
3106 segmented = FALSE;
3107 contig = TRUE;
3108 }
3109 }
3110 if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
3111 if (! DeltaLitOnly (bsp)) {
3112 contig = TRUE;
3113 }
3114 }
3115
3116 if (bsp->repr == Seq_repr_seg) {
3117
3118 if (segmented) {
3119
3120 SeqMgrExploreSegments (bsp, (Pointer) awp, Count2Seg);
3121
3122 } else {
3123
3124 parent = bsp;
3125 from = 0;
3126 to = bsp->length - 1;
3127
3128 CountOneSection (parent, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3129 }
3130
3131 } else if (bsp->repr == Seq_repr_raw ||
3132 bsp->repr == Seq_repr_const ||
3133 bsp->repr == Seq_repr_delta ||
3134 bsp->repr == Seq_repr_virtual) {
3135
3136 parent = SeqMgrGetParentOfPart (bsp, &context);
3137 if (parent != NULL) {
3138
3139 from = context.cumOffset;
3140 to = from + context.to - context.from;
3141
3142 CountOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, TRUE, awp);
3143
3144 } else {
3145
3146 parent = bsp;
3147 from = 0;
3148 to = bsp->length - 1;
3149
3150 CountOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
3151 }
3152 }
3153 }
3154
3155
3156
3157 static void CountBioseqSetList (
3158 SeqEntryPtr seq_set,
3159 Asn2gbWorkPtr awp
3160 )
3161
3162 {
3163 BioseqSetPtr bssp;
3164 SeqEntryPtr sep;
3165
3166 if (seq_set == NULL || awp == NULL) return;
3167
3168 for (sep = seq_set; sep != NULL; sep = sep->next) {
3169
3170 if (IS_Bioseq_set (sep)) {
3171 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3172 if (bssp == NULL) continue;
3173
3174 if (bssp->_class == BioseqseqSet_class_genbank ||
3175 bssp->_class == BioseqseqSet_class_mut_set ||
3176 bssp->_class == BioseqseqSet_class_pop_set ||
3177 bssp->_class == BioseqseqSet_class_phy_set ||
3178 bssp->_class == BioseqseqSet_class_eco_set ||
3179 bssp->_class == BioseqseqSet_class_wgs_set ||
3180 bssp->_class == BioseqseqSet_class_gen_prod_set) {
3181
3182 CountBioseqSetList (bssp->seq_set, awp);
3183
3184 continue;
3185 }
3186 }
3187
3188 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, CountOneBioseq);
3189 }
3190 }
3191
3192 static void CountOneBioseqSet (
3193 SeqEntryPtr sep,
3194 Asn2gbWorkPtr awp
3195 )
3196
3197 {
3198 BioseqSetPtr bssp;
3199
3200 if (sep == NULL || awp == NULL) return;
3201
3202 if (IS_Bioseq_set (sep)) {
3203 bssp = (BioseqSetPtr) sep->data.ptrvalue;
3204 if (bssp == NULL) return;
3205
3206 if (bssp->_class == BioseqseqSet_class_genbank ||
3207 bssp->_class == BioseqseqSet_class_mut_set ||
3208 bssp->_class == BioseqseqSet_class_pop_set ||
3209 bssp->_class == BioseqseqSet_class_phy_set ||
3210 bssp->_class == BioseqseqSet_class_eco_set ||
3211 bssp->_class == BioseqseqSet_class_wgs_set ||
3212 bssp->_class == BioseqseqSet_class_gen_prod_set) {
3213
3214 CountBioseqSetList (bssp->seq_set, awp);
3215
3216 return;
3217 }
3218 }
3219
3220 VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, CountOneBioseq);
3221 }
3222
3223 /* ********************************************************************** */
3224
3225 /* public functions */
3226
3227 static int LIBCALLBACK SortParagraphByIDProc (
3228 VoidPtr vp1,
3229 VoidPtr vp2
3230 )
3231
3232 {
3233 BaseBlockPtr bbp1, bbp2;
3234
3235 if (vp1 == NULL || vp2 == NULL) return 0;
3236 bbp1 = *((BaseBlockPtr PNTR) vp1);
3237 bbp2 = *((BaseBlockPtr PNTR) vp2);
3238 if (bbp1 == NULL || bbp2 == NULL) return 0;
3239
3240 if (bbp1->entityID > bbp2->entityID) return 1;
3241 if (bbp1->entityID < bbp2->entityID) return -1;
3242
3243 if (bbp1->itemtype > bbp2->itemtype) return 1;
3244 if (bbp1->itemtype < bbp2->itemtype) return -1;
3245
3246 if (bbp1->itemID > bbp2->itemID) return 1;
3247 if (bbp1->itemID < bbp2->itemID) return -1;
3248
3249 if (bbp1->paragraph > bbp2->paragraph) return 1;
3250 if (bbp1->paragraph < bbp2->paragraph) return -1;
3251
3252 return 0;
3253 }
3254
3255 static void IsBspRefseq (
3256 BioseqPtr bsp,
3257 Pointer userdata
3258 )
3259
3260 {
3261 BoolPtr has_refseqP;
3262 SeqIdPtr sip;
3263
3264 if (bsp == NULL || userdata == NULL) return;
3265 has_refseqP = (BoolPtr) userdata;
3266 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3267 if (sip->choice == SEQID_OTHER) {
3268 *has_refseqP = TRUE;
3269 }
3270 }
3271 }
3272
3273 static Boolean IsSepRefseq (
3274 SeqEntryPtr sep
3275 )
3276
3277 {
3278 Boolean is_refseq = FALSE;
3279
3280 if (sep == NULL) return FALSE;
3281 VisitBioseqsInSep (sep, (Pointer) &is_refseq, IsBspRefseq);
3282 return is_refseq;
3283 }
3284
3285 typedef struct modeflags {
3286 Boolean flags [30];
3287 } ModeFlags, PNTR ModeFlagsPtr;
3288
3289 static ModeFlags flagTable [] = {
3290
3291 /* RELEASE_MODE */
3292 {TRUE, TRUE, TRUE, TRUE, TRUE,
3293 TRUE, TRUE, TRUE, TRUE, TRUE,
3294 TRUE, TRUE, TRUE, TRUE, TRUE,
3295 TRUE, TRUE, TRUE, TRUE, TRUE,
3296 TRUE, TRUE, TRUE, TRUE, TRUE,
3297 TRUE, TRUE, TRUE, TRUE, TRUE},
3298
3299 /* ENTREZ_MODE */
3300 {FALSE, TRUE, TRUE, TRUE, TRUE,
3301 FALSE, TRUE, TRUE, TRUE, TRUE,
3302 TRUE, TRUE, FALSE, TRUE, TRUE,
3303 TRUE, TRUE, FALSE, FALSE, TRUE,
3304 TRUE, TRUE, TRUE, TRUE, TRUE,
3305 TRUE, TRUE, TRUE, TRUE, FALSE},
3306
3307 /* SEQUIN_MODE */
3308 {FALSE, FALSE, FALSE, FALSE, FALSE,
3309 FALSE, FALSE, TRUE, FALSE, FALSE,
3310 FALSE, FALSE, FALSE, FALSE, FALSE,
3311 FALSE, FALSE, FALSE, FALSE, FALSE,
3312 FALSE, FALSE, TRUE, FALSE, FALSE,
3313 FALSE, TRUE, FALSE, FALSE, FALSE},
3314
3315 /* DUMP_MODE */
3316 {FALSE, FALSE, FALSE, FALSE, FALSE,
3317 FALSE, FALSE, FALSE, FALSE, FALSE,
3318 FALSE, FALSE, FALSE, FALSE, FALSE,
3319 FALSE, FALSE, FALSE, FALSE, FALSE,
3320 FALSE, FALSE, FALSE, FALSE, FALSE,
3321 FALSE, FALSE, FALSE, FALSE, FALSE}
3322 };
3323
3324 static void SetFlagsFromMode (
3325 IntAsn2gbJobPtr ajp,
3326 ModType mode
3327 )
3328
3329 {
3330 BoolPtr bp;
3331 ModeFlagsPtr mfp;
3332 SeqEntryPtr sep;
3333
3334 if (ajp == NULL) return;
3335 if (! (mode >= RELEASE_MODE && mode <= DUMP_MODE)) {
3336 mode = DUMP_MODE;
3337 }
3338 mfp = &(flagTable [(int) (mode - 1)]);
3339 bp = &(mfp->flags [0]);
3340
3341 ajp->flags.suppressLocalID = *(bp++);
3342 ajp->flags.validateFeats = *(bp++);
3343 ajp->flags.ignorePatPubs = *(bp++);
3344 ajp->flags.dropShortAA = *(bp++);
3345 ajp->flags.avoidLocusColl = *(bp++);
3346
3347 ajp->flags.iupacaaOnly = *(bp++);
3348 ajp->flags.dropBadCitGens = *(bp++);
3349 ajp->flags.noAffilOnUnpub = *(bp++);
3350 ajp->flags.dropIllegalQuals = *(bp++);
3351 ajp->flags.checkQualSyntax = *(bp++);
3352
3353 ajp->flags.needRequiredQuals = *(bp++);
3354 ajp->flags.needOrganismQual = *(bp++);
3355 ajp->flags.needAtLeastOneRef = *(bp++);
3356 ajp->flags.citArtIsoJta = *(bp++);
3357 ajp->flags.dropBadDbxref = *(bp++);
3358
3359 ajp->flags.useEmblMolType = *(bp++);
3360 ajp->flags.hideBankItComment = *(bp++);
3361 ajp->flags.checkCDSproductID = *(bp++);
3362 ajp->flags.suppressSegLoc = *(bp++);
3363 ajp->flags.srcQualsToNote = *(bp)++;
3364
3365 ajp->flags.hideEmptySource = *(bp++);
3366 ajp->flags.goQualsToNote = *(bp++);
3367 ajp->flags.separateGeneSyns = *(bp++);
3368 ajp->flags.refSeqQualsToNote = *(bp++);
3369 ajp->flags.selenocysteineToNote = *(bp++);
3370
3371 ajp->flags.pyrrolysineToNote = *(bp++);
3372 ajp->flags.extraProductsToNote = *(bp++);
3373 ajp->flags.codonRecognizedToNote = *(bp++);
3374 ajp->flags.hideSpecificGeneMaps = *(bp++);
3375 ajp->flags.forGbRelease = *(bp++);
3376
3377 /* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */
3378
3379 if (ajp->gbseq == NULL) {
3380
3381 /* collaboration unapproved source quals on their own line only in indexer Sequin - relaxed */
3382
3383 /*
3384 if (GetAppProperty ("InternalNcbiSequin") == NULL) {
3385
3386 ajp->flags.srcQualsToNote = TRUE;
3387 }
3388 */
3389
3390 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3391 if (IsSepRefseq (sep)) {
3392
3393 ajp->flags.srcQualsToNote = FALSE;
3394 ajp->flags.separateGeneSyns = FALSE;
3395 ajp->flags.codonRecognizedToNote = FALSE;
3396 ajp->flags.goQualsToNote = FALSE;
3397 ajp->flags.refSeqQualsToNote = FALSE;
3398
3399 /* selenocysteine always a separate qualifier for RefSeq */
3400
3401 ajp->flags.selenocysteineToNote = FALSE;
3402 ajp->flags.pyrrolysineToNote = FALSE;
3403
3404 } else {
3405
3406 /* collaboration unapproved Gene Ontology quals on their own line only for RefSeq */
3407
3408 /* ajp->flags.goQualsToNote = TRUE; */
3409 /* ajp->flags.separateGeneSyns = TRUE; */
3410 }
3411
3412 } else {
3413
3414 sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
3415 if (IsSepRefseq (sep)) {
3416
3417 ajp->flags.srcQualsToNote = FALSE;
3418 ajp->flags.separateGeneSyns = FALSE;
3419 ajp->flags.codonRecognizedToNote = FALSE;
3420
3421 /* selenocysteine always a separate qualifier for RefSeq */
3422
3423 ajp->flags.selenocysteineToNote = FALSE;
3424 ajp->flags.pyrrolysineToNote = FALSE;
3425
3426 }
3427 }
3428
3429 if (ajp->refseqConventions) {
3430 ajp->flags.srcQualsToNote = FALSE;
3431 ajp->flags.separateGeneSyns = FALSE;
3432 ajp->flags.codonRecognizedToNote = FALSE;
3433 ajp->flags.goQualsToNote = FALSE;
3434 ajp->flags.refSeqQualsToNote = FALSE;
3435 ajp->flags.hideSpecificGeneMaps = FALSE;
3436 }
3437 }
3438
3439 static void CheckVersionWithGi (BioseqPtr bsp, Pointer userdata)
3440
3441 {
3442 Boolean hasGi = FALSE;
3443 BoolPtr missingVersion;
3444 SeqIdPtr sip;
3445 TextSeqIdPtr tsip;
3446 Boolean zeroVersion = FALSE;
3447
3448 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3449 switch (sip->choice) {
3450 case SEQID_TPG:
3451 case SEQID_TPE:
3452 case SEQID_TPD:
3453 case SEQID_GENBANK:
3454 case SEQID_EMBL:
3455 case SEQID_DDBJ:
3456 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3457 if (tsip != NULL && tsip->version == 0) {
3458 zeroVersion = TRUE;
3459 }
3460 break;
3461 case SEQID_GI :
3462 hasGi = TRUE;
3463 break;
3464 default :
3465 break;
3466 }
3467 }
3468 if (hasGi && zeroVersion) {
3469 missingVersion = (BoolPtr) userdata;
3470 *missingVersion = TRUE;
3471 }
3472 }
3473
3474
3475 typedef struct lookforids {
3476 Boolean isG;
3477 Boolean isGED;
3478 Boolean isNTorNWorNG;
3479 Boolean isNC;
3480 Boolean isRefSeq;
3481 Boolean isGeneral;
3482 Boolean isTPA;
3483 Boolean isTPG;
3484 Boolean isSP;
3485 Boolean isNuc;
3486 Boolean isProt;
3487 Boolean isLocal;
3488 Boolean isNonLocal;
3489 Boolean sourcePubFuse;
3490 } LookForIDs, PNTR LookForIDsPtr;
3491
3492 static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
3493
3494 {
3495 DbtagPtr dbt;
3496 LookForIDsPtr lfip;
3497 SeqIdPtr sip;
3498 TextSeqIdPtr tsip;
3499
3500 lfip = (LookForIDsPtr) userdata;
3501 if (ISA_na (bsp->mol)) {
3502 lfip->isNuc = TRUE;
3503 }
3504 if (ISA_aa (bsp->mol)) {
3505 lfip->isProt = TRUE;
3506 }
3507
3508 for (sip = bsp->id; sip != NULL; sip = sip->next) {
3509 switch (sip->choice) {
3510 case SEQID_GENBANK :
3511 lfip->isG = TRUE;
3512 /* and fall through to EMBL and DDBJ */
3513 case SEQID_EMBL :
3514 case SEQID_DDBJ :
3515 lfip->isGED = TRUE;
3516 lfip->isNonLocal = TRUE;
3517 break;
3518 case SEQID_SWISSPROT :
3519 lfip->isSP = TRUE;
3520 break;
3521 case SEQID_TPG :
3522 lfip->isTPG = TRUE;
3523 /* and fall through to TPE and TPD */
3524 case SEQID_TPE :
3525 case SEQID_TPD :
3526 lfip->isTPA = TRUE;
3527 lfip->isNonLocal = TRUE;
3528 break;
3529 case SEQID_OTHER :
3530 lfip->isRefSeq = TRUE;
3531 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3532 if (tsip != NULL) {
3533 if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
3534 lfip->isNC = TRUE;
3535 } else if (StringNCmp (tsip->accession, "NT_", 3) == 0) {
3536 lfip->isNTorNWorNG = TRUE;
3537 } else if (StringNCmp (tsip->accession, "NW_", 3) == 0) {
3538 lfip->isNTorNWorNG = TRUE;
3539 } else if (StringNCmp (tsip->accession, "NG_", 3) == 0) {
3540 lfip->isNTorNWorNG = TRUE;
3541 }
3542 }
3543 lfip->isNonLocal = TRUE;
3544 break;
3545 case SEQID_GENERAL :
3546 dbt = (DbtagPtr) sip->data.ptrvalue;
3547 if (dbt != NULL && !IsSkippableDbtag(dbt)) {
3548 lfip->isGeneral = TRUE;
3549 lfip->isNonLocal = TRUE;
3550 }
3551 break;
3552 case SEQID_LOCAL :
3553 lfip->isLocal = TRUE;
3554 break;
3555 default :
3556 lfip->isNonLocal = TRUE;
3557 break;
3558 }
3559 /* also set policy on sourcePubFuse */
3560 switch (sip->choice) {
3561 case SEQID_GIBBSQ :
3562 case SEQID_GIBBMT :
3563 lfip->sourcePubFuse = TRUE;
3564 break;
3565 case SEQID_EMBL :
3566 case SEQID_PIR :
3567 case SEQID_SWISSPROT :
3568 case SEQID_PATENT :
3569 case SEQID_DDBJ :
3570 case SEQID_PRF :
3571 case SEQID_PDB :
3572 case SEQID_TPE:
3573 case SEQID_TPD:
3574 case SEQID_GPIPE:
3575 lfip->sourcePubFuse = TRUE;
3576 break;
3577 case SEQID_GENBANK :
3578 case SEQID_TPG:
3579 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
3580 if (tsip != NULL) {
3581 if (StringLen (tsip->accession) == 6) {
3582 lfip->sourcePubFuse = TRUE;
3583 }
3584 }
3585 break;
3586 case SEQID_NOT_SET :
3587 case SEQID_LOCAL :
3588 case SEQID_OTHER :
3589 case SEQID_GENERAL :
3590 break;
3591 default :
3592 break;
3593 }
3594 }
3595 }
3596
3597 static void LookForGEDetc (
3598 SeqEntryPtr topsep,
3599 BoolPtr isG,
3600 BoolPtr isGED,
3601 BoolPtr isNTorNWorNG,
3602 BoolPtr isNC,
3603 BoolPtr isRefSeq,
3604 BoolPtr isGeneral,
3605 BoolPtr isTPA,
3606 BoolPtr isTPG,
3607 BoolPtr isSP,
3608 BoolPtr isNuc,
3609 BoolPtr isProt,
3610 BoolPtr isOnlyLocal,
3611 BoolPtr sourcePubFuse
3612 )
3613
3614 {
3615 LookForIDs lfi;
3616
3617 MemSet ((Pointer) &lfi, 0, sizeof (LookForIDs));
3618 VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs);
3619 *isG = lfi.isG;
3620 *isGED = lfi.isGED;
3621 *isNTorNWorNG = lfi.isNTorNWorNG;
3622 *isNC = lfi.isNC;
3623 *isRefSeq = lfi.isRefSeq;
3624 *isGeneral = lfi.isGeneral;
3625 *isTPA = lfi.isTPA;
3626 *isTPG = lfi.isTPG;
3627 *isSP = lfi.isSP;
3628 *isNuc = lfi.isNuc;
3629 *isProt = lfi.isProt;
3630 if (lfi.isLocal && (! lfi.isNonLocal)) {
3631 *isOnlyLocal = TRUE;
3632 } else {
3633 *isOnlyLocal = FALSE;
3634 }
3635 *sourcePubFuse = lfi.sourcePubFuse;
3636 }
3637
3638 static void MakeGapFeatsBase (
3639 BioseqPtr bsp,
3640 Pointer userdata,
3641 Boolean isSP
3642 )
3643
3644 {
3645 Char buf [32];
3646 Int4 currpos = 0;
3647 BioseqPtr fakebsp = NULL;
3648 IntFuzzPtr fuzz;
3649 ValNodePtr PNTR gapvnp;
3650 ImpFeatPtr ifp;
3651 SeqLitPtr litp;
3652 SeqAnnotPtr sap = NULL;
3653 SeqFeatPtr sfp;
3654 SeqIdPtr sip;
3655 SeqLocPtr slp;
3656 ValNodePtr vnp;
3657
3658 if (bsp == NULL || bsp->repr != Seq_repr_delta) return;
3659 gapvnp = (ValNodePtr PNTR) userdata;
3660 sip = SeqIdFindBest (bsp->id, 0);
3661 if (sip == NULL) return;
3662 /* no longer suppress on far delta contigs */
3663 /* if (! DeltaLitOnly (bsp)) return; */
3664
3665 for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
3666 if (vnp->choice == 1) {
3667 slp = (SeqLocPtr) vnp->data.ptrvalue;
3668 if (slp == NULL) continue;
3669 currpos += SeqLocLen (slp);
3670 }
3671 if (vnp->choice == 2) {
3672 litp = (SeqLitPtr) vnp->data.ptrvalue;
3673 if (litp == NULL) continue;
3674 if (litp->seq_data == NULL || litp->seq_data_type == Seq_code_gap) {
3675 if (litp->length > 0) {
3676 if (fakebsp == NULL) {
3677 /* to be freed with MemFree, not BioseqFree */
3678 fakebsp = MemNew (sizeof (Bioseq));
3679 if (fakebsp == NULL) return;
3680 sap = SeqAnnotNew ();
3681 if (sap == NULL) return;
3682 sap->type = 1;
3683 fakebsp->annot = sap;
3684 ValNodeAddPointer (gapvnp, 0, (Pointer) fakebsp);
3685 }
3686 ifp = ImpFeatNew ();
3687 if (ifp == NULL) continue;
3688 ifp->key = StringSave ("gap");
3689 sfp = SeqFeatNew ();
3690 if (sfp == NULL) continue;
3691 sfp->data.choice = SEQFEAT_IMP;
3692 sfp->data.value.ptrvalue = (Pointer) ifp;
3693 sfp->next = (SeqFeatPtr) sap->data;
3694 sap->data = (Pointer) sfp;
3695 fuzz = litp->fuzz;
3696 if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
3697 AddQualifierToFeature (sfp, "estimated_length", "unknown");
3698 } else {
3699 sprintf (buf, "%ld", (long) litp->length);
3700 AddQualifierToFeature (sfp, "estimated_length", buf);
3701 }
3702 sfp->location = AddIntervalToLocation (NULL, sip, currpos, currpos + litp->length - 1, FALSE, FALSE);
3703 } else if (isSP && litp->length == 0) {
3704 if (fakebsp == NULL) {
3705 /* to be freed with MemFree, not BioseqFree */
3706 fakebsp = MemNew (sizeof (Bioseq));
3707 if (fakebsp == NULL) return;
3708 sap = SeqAnnotNew ();
3709 if (sap == NULL) return;
3710 sap->type = 1;
3711 fakebsp->annot = sap;
3712 ValNodeAddPointer (gapvnp, 0, (Pointer) fakebsp);
3713 }
3714 ifp = ImpFeatNew ();
3715 if (ifp == NULL) continue;
3716 ifp->key = StringSave ("gap");
3717 sfp = SeqFeatNew ();
3718 if (sfp == NULL) continue;
3719 sfp->data.choice = SEQFEAT_IMP;
3720 sfp->data.value.ptrvalue = (Pointer) ifp;
3721 sfp->next = (SeqFeatPtr) sap->data;
3722 sap->data = (Pointer) sfp;
3723 fuzz = litp->fuzz;
3724 if (fuzz != NULL && fuzz->choice == 4 && fuzz->a == 0) {
3725 AddQualifierToFeature (sfp, "estimated_length", "unknown");
3726 } else {
3727 sprintf (buf, "%ld", (long) litp->length);
3728 AddQualifierToFeature (sfp, "estimated_length", buf);
3729 }
3730 sfp->location = AddIntervalToLocation (NULL, sip, currpos - 1, currpos, FALSE, FALSE);
3731 sfp->comment = StringSave ("Non-consecutive residues");
3732 }
3733 }
3734 currpos += litp->length;
3735 }
3736 }
3737 }
3738
3739 static void MakeSPGapFeats (
3740 BioseqPtr bsp,
3741 Pointer userdata
3742 )
3743
3744 {
3745 MakeGapFeatsBase (bsp, userdata, TRUE);
3746 }
3747
3748 static void MakeGapFeats (
3749 BioseqPtr bsp,
3750 Pointer userdata
3751 )
3752
3753 {
3754 MakeGapFeatsBase (bsp, userdata, FALSE);
3755 }
3756
3757 static void LookFarFeatFetchPolicy (
3758 SeqDescrPtr sdp,
3759 Pointer userdata
3760 )
3761
3762 {
3763 BoolPtr forceOnlyNearFeatsP;
3764 ObjectIdPtr oip;
3765 UserFieldPtr ufp;
3766 UserObjectPtr uop;
3767
3768 if (sdp == NULL || sdp->choice != Seq_descr_user) return;
3769 forceOnlyNearFeatsP = (BoolPtr) userdata;
3770 if (forceOnlyNearFeatsP == NULL) return;
3771
3772 uop = (UserObjectPtr) sdp->data.ptrvalue;
3773 if (uop == NULL) return;
3774 oip = uop->type;
3775 if (oip == NULL) return;
3776 if (StringCmp (oip->str, "FeatureFetchPolicy") != 0) return;
3777
3778 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
3779 oip = ufp->label;
3780 if (oip == NULL || ufp->data.ptrvalue == NULL) continue;
3781 if (StringCmp (oip->str, "Policy") == 0) {
3782 if (StringICmp ((CharPtr) ufp->data.ptrvalue, "OnlyNearFeatures") == 0) {
3783 *forceOnlyNearFeatsP = TRUE;
3784 }
3785 }
3786 }
3787 }
3788
3789 static void FindMultiIntervalGenes (
3790 SeqFeatPtr sfp,
3791 Pointer userdata
3792 )
3793
3794 {
3795 BoolPtr multiIntervalGenesP;
3796 SeqLocPtr slp;
3797
3798 if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE) return;
3799 multiIntervalGenesP = (BoolPtr) userdata;
3800 if (multiIntervalGenesP == NULL) return;
3801
3802 slp = sfp->location;
3803 if (slp == NULL) return;
3804 switch (slp->choice) {
3805 case SEQLOC_PACKED_INT :
3806 case SEQLOC_PACKED_PNT :
3807 case SEQLOC_MIX :
3808 case SEQLOC_EQUIV :
3809 *multiIntervalGenesP = TRUE;
3810 break;
3811 default :
3812 break;
3813 }
3814 }
3815
3816 static CharPtr bad_html_strings [] = {
3817 "<script", "<object", "<applet", "<embed", "<form", "javascript:", "vbscript:", NULL
3818 };
3819
3820 static CharPtr defHead = "\
3821 <!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\
3822 \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n\
3823 <html lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n\
3824 <head>\n\
3825 <meta http-equiv=\"Content-Type\" content=\"text/html; charset=us-ascii\" />\
3826 <title>GenBank entry</title>\n\
3827 </head>\n\
3828 <body>\n\
3829 <hr />";
3830
3831 static CharPtr defTail = "\
3832 <hr />\n\
3833 </body>\n\
3834 </html>\n";
3835
3836 #define FAR_TRANS_MASK (SHOW_FAR_TRANSLATION | TRANSLATE_IF_NO_PRODUCT | ALWAYS_TRANSLATE_CDS)
3837 #define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS)
3838 #define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
3839 #define PUBLICATION_MASK (HIDE_GENE_RIFS | ONLY_GENE_RIFS | ONLY_REVIEW_PUBS | NEWEST_PUBS | OLDEST_PUBS | HIDE_ALL_PUBS)
3840
3841 static Asn2gbJobPtr asn2gnbk_setup_ex (
3842 BioseqPtr bsp,
3843 BioseqSetPtr bssp,
3844 SeqLocPtr slp,
3845 FmtType format,
3846 ModType mode,
3847 StlType style,
3848 FlgType flags,
3849 LckType locks,
3850 CstType custom,
3851 XtraPtr extra,
3852 Boolean stream,
3853 FILE *fp,
3854 AsnIoPtr aip,
3855 AsnTypePtr atp
3856 )
3857
3858 {
3859 Asn2gbFormat af;
3860 IntAsn2gbJobPtr ajp = NULL;
3861 Asn2gbSectPtr asp;
3862 Asn2gbWork aw;
3863 BaseBlockPtr bbp;
3864 BlockMask bkmask = (BlockMask) 0;
3865 BaseBlockPtr PNTR blockArray;
3866 Uint2 eID = 0;
3867 Uint2 entityID = 0;
3868 Uint2 item_type = 0;
3869 Uint4 item_id = 0;
3870 CharPtr ffhead = NULL;
3871 CharPtr fftail = NULL;
3872 Asn2gbWriteFunc ffwrite = NULL;
3873 Boolean forceOnlyNearFeats = FALSE;
3874 ValNodePtr gapvnp = NULL;
3875 GBSeqPtr gbseq = NULL;
3876 Int4 i;
3877 IndxPtr index = NULL;
3878 Boolean isG;
3879 Boolean isGED;
3880 Boolean isGeneral;
3881 Boolean isNTorNWorNG;
3882 Boolean isNC;
3883 Boolean isNuc;
3884 Boolean isOnlyLocal;
3885 Boolean isProt;
3886 Boolean isRefSeq;
3887 Boolean isSP;
3888 Boolean isTPA;
3889 Boolean isTPG;
3890 Int4 j;
3891 Int4 k;
3892 SeqLocPtr loc = NULL;
3893 Boolean lockFarComp;
3894 Boolean lockFarLocs;
3895 Boolean lockFarProd;
3896 Boolean lookupFarComp;
3897 Boolean lookupFarHist;
3898 Boolean lookupFarInf;
3899 Boolean lookupFarLocs;
3900 Boolean lookupFarOthers;
3901 Boolean lookupFarProd;
3902 Boolean missingVersion;
3903 Boolean multiIntervalGenes = FALSE;
3904 Int4 nextGi = 0;
3905 Boolean noLeft;
3906 Boolean noRight;
3907 Int4 numBlocks;
3908 Int4 numSections;
3909 SeqEntryPtr oldscope;
3910 ObjMgrDataPtr omdp;
3911 Int4 numParagraphs;
3912 BaseBlockPtr PNTR paragraphArray;
3913 BaseBlockPtr PNTR paragraphByIDs;
3914 BioseqPtr parent = NULL;
3915 Int4 prevGi = 0;
3916 Int2 q;
3917 Boolean reindex = TRUE;
3918 Pointer remotedata = NULL;
3919 Asn2gbFreeFunc remotefree = NULL;
3920 Asn2gbLockFunc remotelock = NULL;
3921 ValNodePtr remotevnp = NULL;
3922 Asn2gbSectPtr PNTR sectionArray;
3923 SubmitBlockPtr sbp;
3924 SeqEntryPtr sep;
3925 SeqIntPtr sintp;
3926 SeqIdPtr sip;
3927 Boolean skipMrnas = FALSE;
3928 Boolean skipProts = FALSE;
3929 Boolean sourcePubFuse;
3930 SeqSubmitPtr ssp;
3931 BioseqSetPtr topbssp;
3932 Pointer userdata = NULL;
3933 ValNodePtr vnp;
3934 Boolean is_html = FALSE;
3935
3936 if (format == 0) {
3937 format = GENBANK_FMT;
3938 }
3939 if (mode == 0) {
3940 mode = SEQUIN_MODE;
3941 }
3942 if (style == 0) {
3943 style = NORMAL_STYLE;
3944 }
3945
3946 if (extra != NULL) {
3947 ffwrite = extra->ffwrite;
3948 ffhead = extra->ffhead;
3949 fftail = extra->fftail;
3950 index = extra->index;
3951 gbseq = extra->gbseq;
3952 userdata = extra->userdata;
3953 remotelock = extra->remotelock;
3954 remotefree = extra->remotefree;
3955 remotedata = extra->remotedata;
3956 prevGi = extra->prevGi;
3957 nextGi = extra->nextGi;
3958 bkmask = extra->bkmask;
3959 reindex = extra->reindex;
3960 }
3961
3962 if (slp != NULL) {
3963 sip = SeqLocId (slp);
3964 bsp = BioseqFind (sip);
3965 if (bsp == NULL) {
3966 bsp = BioseqFindFromSeqLoc (slp);
3967 }
3968 if (bsp == NULL) return NULL;
3969
3970 /* if location is on part of segmented set, need to map to segmented bioseq */
3971
3972 if (slp->choice == SEQLOC_WHOLE) {
3973 /* Entrez server may pass in whole location on part instead of part bioseq */
3974 slp = NULL;
3975 } else if (sip == NULL) {
3976 parent = bsp;
3977 } else {
3978
3979 /* SeqMgrGetParentOfPart depends upon feature indexing */
3980
3981 eID = ObjMgrGetEntityIDForPointer (bsp);
3982 if (SeqMgrFeaturesAreIndexed (eID) == 0) {
3983 SeqMgrIndexFeatures (eID, NULL);
3984 }
3985
3986 parent = SeqMgrGetParentOfPart (bsp, NULL);
3987 }
3988 if (parent != NULL) {
3989 CheckSeqLocForPartial (slp, &noLeft, &noRight);
3990 loc = SeqLocMergeEx (parent, slp, NULL, FALSE, TRUE, FALSE, FALSE);
3991 slp = loc;
3992 FreeAllFuzz (slp);
3993 SetSeqLocPartial (slp, noLeft, noRight);
3994 }
3995
3996 /* if location is whole, generate normal bioseq report */
3997
3998 if (slp == NULL) {
3999 /* reality check in case SeqLocMergeEx fails and sets slp to NULL, or if was cleared above */
4000 } else if (slp->choice == SEQLOC_WHOLE) {
4001 slp = NULL;
4002 SeqLocFree (loc);
4003 loc = NULL;
4004 } else if (slp->choice == SEQLOC_INT) {
4005 sintp = (SeqIntPtr) slp->data.ptrvalue;
4006 if (sintp != NULL &&
4007 sintp->from == 0 &&
4008 sintp->to == bsp->length - 1 &&
4009 sintp->strand == Seq_strand_plus) {
4010 slp = NULL;
4011 SeqLocFree (loc);
4012 loc = NULL;
4013 }
4014 }
4015 }
4016
4017 if (bsp != NULL) {
4018 bssp = NULL;
4019 entityID = ObjMgrGetEntityIDForPointer (bsp);
4020 item_type = OBJ_BIOSEQ;
4021 item_id = bsp->idx.itemID;
4022 } else if (bssp != NULL) {
4023 entityID = ObjMgrGetEntityIDForPointer (bssp);
4024 item_type = OBJ_BIOSEQSET;
4025 item_id = bssp->idx.itemID;
4026
4027 if (format == FTABLE_FMT) {
4028 skipProts = TRUE;
4029 skipMrnas = TRUE;
4030 }
4031 }
4032 if ((Boolean) ((custom & SHOW_PROT_FTABLE) != 0)) {
4033 skipProts = FALSE;
4034 skipMrnas = FALSE;
4035 }
4036
4037 if (entityID == 0) return NULL;
4038
4039 sep = GetTopSeqEntryForEntityID (entityID);
4040
4041 LookForGEDetc (sep, &isG, &isGED, &isNTorNWorNG, &isNC, &isRefSeq,
4042 &isGeneral, &isTPA, &isTPG, &isSP, &isNuc, &isProt,
4043 &isOnlyLocal, &sourcePubFuse);
4044
4045 if (mode == RELEASE_MODE) {
4046 missingVersion = FALSE;
4047 VisitBioseqsInSep (sep, (Pointer) &missingVersion, CheckVersionWithGi);
4048 if (missingVersion) return NULL;
4049 }
4050
4051 ajp = (IntAsn2gbJobPtr) MemNew (sizeof (IntAsn2gbJob));
4052 if (ajp == NULL) return NULL;
4053
4054 VisitDescriptorsInSep (sep, (Pointer) &forceOnlyNearFeats, LookFarFeatFetchPolicy);
4055
4056 gapvnp = NULL;
4057 if (format != FTABLE_FMT) {
4058 if (isGED /* was isG */ || isTPG || isOnlyLocal || isRefSeq || isSP || (isGeneral && (! isGED))) {
4059 if ((Boolean) ((custom & HIDE_GAP_FEATS) == 0)) {
4060 if (isSP) {
4061 VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeSPGapFeats);
4062 } else {
4063 VisitBioseqsInSep (sep, (Pointer) &gapvnp, MakeGapFeats);
4064 }
4065 }
4066 }
4067 }
4068 ajp->gapvnp = gapvnp;
4069
4070 remotevnp = NULL;
4071 ajp->remotelock = remotelock;
4072 ajp->remotefree = remotefree;
4073 ajp->remotedata = remotedata;
4074 if (remotelock != NULL && bsp != NULL) {
4075 sip = SeqIdFindBest (bsp->id, SEQID_GI);
4076 if (sip != NULL) {
4077 remotevnp = remotelock (sip, remotedata);
4078 }
4079 }
4080 ajp->remotevnp = remotevnp;
4081
4082 if (gapvnp != NULL || remotevnp != NULL) {
4083 /* if both gapvnp and remotevnp, link together so everything is indexed */
4084 if (gapvnp != NULL) {
4085 ValNodeLink(&gapvnp, remotevnp);
4086 } else {
4087 gapvnp = remotevnp;
4088 }
4089 SeqMgrClearFeatureIndexes (entityID, NULL);
4090 SeqMgrIndexFeaturesExEx (entityID, NULL, FALSE, FALSE, gapvnp);
4091 gapvnp->next = NULL;
4092 }
4093
4094 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
4095 SeqMgrIndexFeatures (entityID, NULL);
4096 }
4097
4098 is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
4099 if (is_html) {
4100 InitWWW(ajp);
4101 }
4102
4103 ajp->ajp.entityID = entityID;
4104 ajp->ajp.bsp = bsp;
4105 ajp->ajp.bssp = bssp;
4106
4107 if (loc != NULL) {
4108 ajp->ajp.slp = loc;
4109 } else if (slp != NULL) {
4110 ajp->ajp.slp = AsnIoMemCopy ((Pointer) slp,
4111 (AsnReadFunc) SeqLocAsnRead,
4112 (AsnWriteFunc) SeqLocAsnWrite);
4113 } else {
4114 ajp->ajp.slp = NULL;
4115 }
4116
4117 /* reality check on interval sublocation */
4118
4119 slp = ajp->ajp.slp;
4120 if (slp != NULL && slp->choice == SEQLOC_INT) {
4121 sintp = (SeqIntPtr) slp->data.ptrvalue;
4122 if (sintp != NULL) {
4123 bsp = BioseqFind (sintp->id);
4124 if (bsp != NULL) {
4125 if (sintp->from < 0) {
4126 sintp->from = 0;
4127 } else if (sintp->from > bsp->length - 1) {
4128 sintp->from = bsp->length - 1;
4129 }
4130 if (sintp->to < 0) {
4131 sintp->to = 0;
4132 } else if (sintp->to > bsp->length - 1) {
4133 sintp->to = bsp->length - 1;
4134 }
4135 }
4136 }
4137 }
4138
4139 /* if location specified, normal defaults to master style */
4140
4141 if (ajp->ajp.slp != NULL && style == NORMAL_STYLE) {
4142 style = MASTER_STYLE;
4143 }
4144
4145 ajp->format = format;
4146 ajp->mode = mode; /* for showing new qualifiers before quarantine ends */
4147
4148 ajp->index = index;
4149 ajp->gbseq = gbseq; /* gbseq output can relax srcQualsToNote or goQualsToNote strictness */
4150 if (bkmask == 0) {
4151 bkmask = (BlockMask) (0xFFFFFFFF - FEAT_STATS_MASK - REF_STATS_MASK);
4152 }
4153 ajp->bkmask = bkmask;
4154 ajp->reindex = reindex;
4155 ajp->aip = aip;
4156 ajp->atp = atp;
4157
4158 ajp->refseqConventions = (Boolean) ((flags & REFSEQ_CONVENTIONS) != 0);
4159
4160 SetFlagsFromMode (ajp, mode);
4161
4162 lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
4163 lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
4164 lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
4165
4166 if (lockFarComp || lockFarLocs || lockFarProd) {
4167
4168 /* lock all bioseqs in advance, including remote genome components */
4169
4170 if (ajp->ajp.slp != NULL && lockFarComp) {
4171 ajp->lockedBspList = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, ajp->ajp.slp);
4172 } else {
4173 ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
4174 }
4175 }
4176
4177 lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
4178 lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
4179 lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
4180 lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
4181 lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
4182 lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
4183
4184 if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
4185
4186 /* lookukp all far SeqIDs in advance */
4187
4188 LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
4189 }
4190
4191 ajp->showFarTransl = (Boolean) ((flags & FAR_TRANS_MASK) == SHOW_FAR_TRANSLATION);
4192 ajp->transIfNoProd = (Boolean) ((flags & FAR_TRANS_MASK) == TRANSLATE_IF_NO_PRODUCT);
4193 ajp->alwaysTranslCds = (Boolean) ((flags & FAR_TRANS_MASK) == ALWAYS_TRANSLATE_CDS);
4194 if (ajp->transIfNoProd || ajp->alwaysTranslCds) {
4195 ajp->showFarTransl = TRUE;
4196 }
4197
4198 ajp->masterStyle = (Boolean) (style == MASTER_STYLE);
4199
4200 ajp->showTranscript = (Boolean) ((flags & SHOW_TRANCRIPTION) != 0);
4201 ajp->showPeptide = (Boolean) ((flags & SHOW_PEPTIDE) != 0);
4202
4203 if (stream && (format == GENBANK_FMT || format == GENPEPT_FMT)) {
4204 ajp->specialGapFormat = (Boolean) ((flags & SPECIAL_GAP_DISPLAY) != 0);
4205 if (is_html && mode == ENTREZ_MODE) {
4206 ajp->specialGapFormat = TRUE;
4207 }
4208 if ((custom & EXPANDED_GAP_DISPLAY) != 0) {
4209 ajp->specialGapFormat = FALSE;
4210 }
4211 } else {
4212 ajp->specialGapFormat = FALSE;
4213 }
4214 ajp->seqGapCurrLen = 0;
4215
4216 ajp->produceInsdSeq = (Boolean) ((flags & PRODUCE_OLD_GBSEQ) == 0);
4217
4218 ajp->gihead = NULL;
4219 ajp->gitail = NULL;
4220
4221 ajp->hideGoTerms = (Boolean) ((custom & HIDE_GO_TERMS) != 0);
4222 ajp->hideTranslation = (Boolean) ((custom & HIDE_TRANSLATION) != 0);
4223
4224 if (format == GENBANK_FMT || format == GENPEPT_FMT) {
4225 ajp->newSourceOrg = TRUE;
4226 }
4227
4228 VisitFeaturesInSep (sep, (Pointer) &multiIntervalGenes, FindMultiIntervalGenes);
4229 ajp->multiIntervalGenes = multiIntervalGenes;
4230
4231 ajp->relModeError = FALSE;
4232 ajp->skipProts = skipProts;
4233 ajp->skipMrnas = skipMrnas;
4234
4235 MemSet ((Pointer) (&aw), 0, sizeof (Asn2gbWork));
4236 aw.ajp = ajp;
4237 aw.entityID = entityID;
4238
4239 aw.sectionList = NULL;
4240 aw.lastsection = NULL;
4241
4242 aw.currsection = 0;
4243 aw.showAllFeats = FALSE;
4244
4245 aw.showconfeats = (Boolean) ((flags & SHOW_CONTIG_FEATURES) != 0);
4246 aw.showconsource = (Boolean) ((flags & SHOW_CONTIG_SOURCES) != 0);
4247
4248 aw.format = format;
4249 aw.mode = mode;
4250 aw.style = style;
4251
4252 /* sectionCount used for hyperlinks */
4253
4254 aw.sectionCount = 0;
4255 aw.sectionMax = 0;
4256 aw.gilistpos = NULL;
4257
4258 aw.currGi = 0;
4259 aw.prevGi = prevGi;
4260 aw.nextGi = nextGi;
4261
4262 /* internal format pointer if writing at time of creation */
4263
4264 if (stream) {
4265 MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
4266 af.ajp = ajp;
4267 af.asp = NULL;
4268 af.qvp = NULL;
4269 af.format = format;
4270 af.ffwrite = ffwrite;
4271 af.userdata = userdata;
4272 af.fp = fp;
4273 af.aip = aip;
4274 af.atp = atp;
4275
4276 aw.afp = ⁡
4277 }
4278
4279 /* special types of records override feature fetching and contig display parameters */
4280
4281 if (mode == ENTREZ_MODE) {
4282 if (! aw.showconfeats) {
4283 aw.smartconfeats = TRUE; /* features suppressed if CONTIG style and length > 1 MB */
4284 aw.showconfeats = FALSE;
4285 aw.showconsource = FALSE;
4286 }
4287 }
4288
4289 aw.onlyNearFeats = FALSE;
4290 aw.farFeatsSuppress = FALSE;
4291 aw.nearFeatsSuppress = FALSE;
4292
4293 if (isNC) {
4294
4295 if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
4296 aw.onlyNearFeats = TRUE;
4297 } else if (forceOnlyNearFeats) {
4298 aw.onlyNearFeats = TRUE;
4299 } else {
4300 aw.nearFeatsSuppress = TRUE;
4301 }
4302
4303 } else if (isNTorNWorNG || isTPA) {
4304
4305 aw.onlyNearFeats = TRUE;
4306
4307 } else if (isGED) {
4308
4309 if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
4310 aw.onlyNearFeats = TRUE;
4311 } else if (forceOnlyNearFeats) {
4312 aw.onlyNearFeats = TRUE;
4313 } else {
4314 aw.nearFeatsSuppress = TRUE;
4315 }
4316 ajp->showFarTransl = TRUE;
4317
4318 } else if (forceOnlyNearFeats) {
4319
4320 aw.onlyNearFeats = TRUE;
4321
4322 } else {
4323
4324 aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
4325 aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
4326 aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
4327 }
4328
4329 /* continue setting flags */
4330
4331 aw.showFeatStats = (Boolean) ((custom & SHOW_FEATURE_STATS) != 0);
4332 aw.showRefStats = (Boolean) ((custom & SHOW_REFERENCE_STATS) != 0);
4333 aw.hideFeatures = (Boolean) ((custom & HIDE_FEATURES) != 0);
4334
4335 aw.hideImpFeats = (Boolean) ((custom & HIDE_IMP_FEATS) != 0);
4336 aw.hideVariations = (Boolean) ((custom & HIDE_VARS_AND_REPT_REGNS) != 0);
4337 aw.hideRepeatRegions = (Boolean) ((custom & HIDE_VARS_AND_REPT_REGNS) != 0);
4338 aw.hideSitesBondsRegions = (Boolean) ((custom & HIDE_SITES_BONDS_REGIONS) != 0);
4339 aw.hideCddFeats = (Boolean) ((custom & HIDE_CDD_FEATS) != 0);
4340 aw.hideCdsProdFeats = (Boolean) ((custom & HIDE_CDS_PROD_FEATS) != 0);
4341
4342 ajp->hideEvidence = (Boolean) ((custom & HIDE_EVIDENCE_QUALS) != 0);
4343
4344 aw.hideGeneRIFs = (Boolean) ((custom & PUBLICATION_MASK) == HIDE_GENE_RIFS);
4345 aw.onlyGeneRIFs = (Boolean) ((custom & PUBLICATION_MASK) == ONLY_GENE_RIFS);
4346 aw.onlyReviewPubs = (Boolean) ((custom & PUBLICATION_MASK) == ONLY_REVIEW_PUBS);
4347 aw.newestPubs = (Boolean) ((custom & PUBLICATION_MASK) == NEWEST_PUBS);
4348 aw.oldestPubs = (Boolean) ((custom & PUBLICATION_MASK) == OLDEST_PUBS);
4349 aw.hidePubs = (Boolean) ((custom & PUBLICATION_MASK) == HIDE_ALL_PUBS);
4350
4351 aw.showFtableRefs = (Boolean) ((custom & SHOW_FTABLE_REFS) != 0);
4352 aw.hideSources = (Boolean) ((custom & HIDE_SOURCE_FEATS) != 0);
4353 aw.hideGaps = (Boolean) ((custom & HIDE_GAP_FEATS) != 0);
4354 aw.hideSequence = (Boolean) ((custom & HIDE_SEQUENCE) != 0);
4355
4356 aw.isGPS = FALSE;
4357 if (sep != NULL && IS_Bioseq_set (sep)) {
4358 topbssp = (BioseqSetPtr) sep->data.ptrvalue;
4359 if (topbssp != NULL && topbssp->_class == BioseqseqSet_class_gen_prod_set) {
4360 aw.isGPS = TRUE;
4361 aw.copyGpsCdsUp = (Boolean) ((flags & COPY_GPS_CDS_UP) != 0);
4362 aw.copyGpsGeneDown = (Boolean) ((flags & COPY_GPS_GENE_DOWN) != 0);
4363 }
4364 }
4365
4366 aw.showContigAndSeq = (Boolean) ((flags & SHOW_CONTIG_AND_SEQ) != 0);
4367 /*
4368 if (style != MASTER_STYLE && style != SEGMENT_STYLE) {
4369 aw.showContigAndSeq = FALSE;
4370 }
4371 */
4372
4373 aw.newLocusLine = TRUE;
4374 aw.showBaseCount = FALSE;
4375
4376 if ((Boolean) ((flags & DDBJ_VARIANT_FORMAT) != 0)) {
4377 aw.citSubsFirst = TRUE;
4378 aw.hideGeneFeats = TRUE;
4379 aw.newLocusLine = FALSE;
4380 aw.showBaseCount = TRUE;
4381 ajp->newSourceOrg = FALSE;
4382 }
4383 if (mode == SEQUIN_MODE || mode == DUMP_MODE) {
4384 aw.showBaseCount = TRUE;
4385 }
4386 aw.forcePrimaryBlock = (Boolean) ((flags & FORCE_PRIMARY_BLOCK) != 0);
4387
4388 aw.localFeatCount = VisitFeaturesInSep (sep, NULL, NULL);
4389
4390 aw.sourcePubFuse = sourcePubFuse;
4391
4392 aw.hup = FALSE;
4393 aw.ssp = NULL;
4394
4395 aw.failed = FALSE;
4396
4397 omdp = ObjMgrGetData (entityID);
4398 if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
4399 ssp = (SeqSubmitPtr) omdp->dataptr;
4400 if (ssp != NULL && ssp->datatype == 1) {
4401 aw.ssp = ssp;
4402 sbp = ssp->sub;
4403 if (sbp != NULL) {
4404 aw.hup = sbp->hup;
4405 }
4406 }
4407 }
4408
4409 ajp->bad_html_fsa = TextFsaNew ();
4410
4411 for (q = 0; bad_html_strings [q] != NULL; q++) {
4412 TextFsaAdd (ajp->bad_html_fsa, bad_html_strings [q]);
4413 }
4414
4415 InitUrlAnchorFSA ();
4416
4417 oldscope = SeqEntrySetScope (sep);
4418
4419 if (stream) {
4420 /* send optional head string */
4421
4422 is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
4423 if (ffhead == NULL && is_html) {
4424 ffhead = defHead;
4425 }
4426 if (ffhead != NULL) {
4427 if (fp != NULL) {
4428 fprintf (fp, "%s", ffhead);
4429 }
4430 }
4431 if (ffwrite != NULL) {
4432 ffwrite (ffhead, userdata, HEAD_BLOCK, entityID, item_type, item_id);
4433 }
4434 if (is_html) {
4435 DoQuickLinkFormat (aw.afp, "<div class=\"sequence\">");
4436 }
4437 }
4438
4439 /* if Web Entrez, set awp->sectionMax to decide when Next hyperlink is needed */
4440
4441 if (is_html && mode == ENTREZ_MODE && stream &&
4442 (format == GENBANK_FMT || format == GENPEPT_FMT)) {
4443 /* add dummy node as prev id for first section */
4444 ajp->gihead = ValNodeAddInt (&(ajp->gitail), 0, (Int4) 0);
4445 ajp->gitail = ajp->gihead;
4446 if (bssp != NULL) {
4447 CountOneBioseqSet (SeqMgrGetSeqEntryForData (bssp), &aw);
4448 } else {
4449 CountOneBioseq (bsp, &aw);
4450 }
4451 }
4452
4453 if (bssp != NULL) {
4454
4455 /* handle all components of a pop/phy/mut/eco set */
4456
4457 sep = SeqMgrGetSeqEntryForData (bssp);
4458 DoOneBioseqSet (sep, &aw);
4459
4460 } else {
4461
4462 /* handle single bioseq, which may be segmented or a local part */
4463
4464 DoOneBioseq (bsp, &aw);
4465 }
4466
4467 if (stream) {
4468 if (is_html) {
4469 DoQuickLinkFormat (aw.afp, "</div>");
4470 }
4471
4472 /* send optional tail string */
4473
4474 if (fftail == NULL && is_html) {
4475 fftail = defTail;
4476 }
4477 if (fftail != NULL) {
4478 if (fp != NULL) {
4479 fprintf (fp, "%s", fftail);
4480 }
4481 }
4482 if (ffwrite != NULL) {
4483 ffwrite (fftail, userdata, TAIL_BLOCK, entityID, item_type, item_id);
4484 }
4485 }
4486
4487 SeqEntrySetScope (oldscope);
4488
4489 /* check for failure to populate anything */
4490
4491 numSections = ValNodeLen (aw.sectionList);
4492 ajp->ajp.numSections = numSections;
4493
4494 if (numSections == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
4495
4496 /* allocate section array for this job - needed for memory cleanup even if streamed */
4497
4498 sectionArray = (Asn2gbSectPtr PNTR) MemNew (sizeof (Asn2gbSectPtr) * (numSections + 1));
4499 ajp->ajp.sectionArray = sectionArray;
4500
4501 if (sectionArray == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
4502
4503 /* fill in section and paragraph arrays */
4504
4505 numParagraphs = 0;
4506 for (vnp = aw.sectionList, i = 0; vnp != NULL && i < numSections; vnp = vnp->next, i++) {
4507 asp = (Asn2gbSectPtr) vnp->data.ptrvalue;
4508 sectionArray [i] = asp;
4509 if (asp != NULL) {
4510 numParagraphs += asp->numBlocks;
4511 }
4512 }
4513
4514 /* allocate paragraph array pointing to all blocks in all sections */
4515
4516 ajp->ajp.numParagraphs = numParagraphs;
4517 if (numParagraphs == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
4518
4519 paragraphArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
4520 ajp->ajp.paragraphArray = paragraphArray;
4521
4522 paragraphByIDs = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
4523 ajp->ajp.paragraphByIDs = paragraphByIDs;
4524
4525 if (paragraphArray == NULL || paragraphByIDs == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
4526
4527 k = 0;
4528 for (i = 0; i < numSections; i++) {
4529 asp = sectionArray [i];
4530 if (asp != NULL) {
4531
4532 numBlocks = asp->numBlocks;
4533 blockArray = asp->blockArray;
4534 if (blockArray != NULL) {
4535
4536 for (j = 0; j < numBlocks; j++) {
4537 bbp = blockArray [j];
4538
4539 paragraphArray [k] = bbp;
4540 paragraphByIDs [k] = bbp;
4541 bbp->paragraph = k;
4542 k++;
4543 }
4544 }
4545 }
4546 }
4547
4548 /* sort paragraphByIDs array by entityID/itemtype/itemID/paragraph */
4549
4550 HeapSort (paragraphByIDs, (size_t) numParagraphs, sizeof (BaseBlockPtr), SortParagraphByIDProc);
4551
4552 /* free sectionList, but leave data, now pointed to by sectionArray elements */
4553
4554 ValNodeFree (aw.sectionList);
4555
4556 /* check for failure to to make legal flatfile */
4557
4558 if (ajp->flags.needAtLeastOneRef && aw.failed) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
4559
4560 return (Asn2gbJobPtr) ajp;
4561 }
4562
4563 NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
4564 BioseqPtr bsp,
4565 BioseqSetPtr bssp,
4566 SeqLocPtr slp,
4567 FmtType format,
4568 ModType mode,
4569 StlType style,
4570 FlgType flags,
4571 LckType locks,
4572 CstType custom,
4573 XtraPtr extra
4574 )
4575
4576 {
4577 return asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
4578 flags, locks, custom, extra,
4579 FALSE, NULL, NULL, NULL);
4580 }
4581
4582 /* ********************************************************************** */
4583
4584 /* format functions allocate printable string for given paragraph */
4585
4586 NLM_EXTERN CharPtr DefaultFormatBlock (
4587 Asn2gbFormatPtr afp,
4588 BaseBlockPtr bbp
4589 )
4590
4591 {
4592 if (afp == NULL || bbp == NULL) return NULL;
4593
4594 /* default format function assumes string pre-allocated by add block function */
4595
4596 return StringSaveNoNull (bbp->string);
4597 }
4598
4599 typedef CharPtr (*FormatProc) (Asn2gbFormatPtr afp, BaseBlockPtr bbp);
4600
4601 static FormatProc asn2gnbk_fmt_functions [30] = {
4602 NULL,
4603 NULL,
4604 DefaultFormatBlock,
4605 DefaultFormatBlock,
4606 DefaultFormatBlock,
4607 DefaultFormatBlock,
4608 DefaultFormatBlock,
4609 DefaultFormatBlock,
4610 DefaultFormatBlock,
4611 DefaultFormatBlock,
4612 DefaultFormatBlock,
4613 DefaultFormatBlock,
4614 FormatSourceBlock,
4615 FormatOrganismBlock,
4616 DefaultFormatBlock,
4617 FormatReferenceBlock,
4618 DefaultFormatBlock,
4619 FormatCommentBlock,
4620 DefaultFormatBlock,
4621 FormatFeatHeaderBlock,
4622 FormatSourceFeatBlock,
4623 FormatFeatureBlock,
4624 FormatBasecountBlock,
4625 DefaultFormatBlock,
4626 FormatSequenceBlock,
4627 FormatContigBlock,
4628 DefaultFormatBlock,
4629 DefaultFormatBlock,
4630 FormatSlashBlock,
4631 NULL
4632 };
4633
4634 static CharPtr asn2gnbk_fmt_labels [30] = {
4635 NULL,
4636 NULL,
4637 "locus",
4638 "defline",
4639 "accession",
4640 "version",
4641 "project",
4642 "pid",
4643 "dbsource",
4644 "date",
4645 "keywords",
4646 "segment",
4647 "source",
4648 "organism",
4649 "refstats",
4650 "reference",
4651 "primary",
4652 "comment",
4653 "featstats",
4654 "featheader",
4655 "sourcefeat",
4656 "feature",
4657 "basecount",
4658 "origin",
4659 "sequence",
4660 "contig",
4661 "wgs",
4662 "genome",
4663 "slash",
4664 NULL
4665 };
4666
4667 NLM_EXTERN CharPtr asn2gnbk_block_label (
4668 BlockType blocktype
4669 )
4670
4671 {
4672 if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return NULL;
4673 return asn2gnbk_fmt_labels [(int) blocktype];
4674 }
4675
4676 NLM_EXTERN void PrintFtableIntervals (
4677 ValNodePtr PNTR head,
4678 BioseqPtr target,
4679 SeqLocPtr location,
4680 CharPtr label
4681 )
4682
4683 {
4684 IntFuzzPtr ifp;
4685 Boolean partial5;
4686 Boolean partial3;
4687 SeqLocPtr slp;
4688 SeqPntPtr spp;
4689 Int4 start;
4690 Int4 stop;
4691 Char str [64];
4692 Char str1 [32];
4693 Char str2 [32];
4694
4695 if (head == NULL || target == NULL || location == NULL || label == NULL) return;
4696
4697 if (location->choice == SEQLOC_PNT) {
4698 spp = (SeqPntPtr) location->data.ptrvalue;
4699 if (spp != NULL) {
4700 ifp = spp->fuzz;
4701 if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) {
4702 sprintf (str, "%ld^\t%ld\t%s\n", (long) (spp->point + 1),
4703 (long) (spp->point + 2), label);
4704 ValNodeCopyStr (head, 0, str);
4705 return;
4706 }
4707 }
4708 }
4709
4710 slp = SeqLocFindNext (location, NULL);
4711 if (slp == NULL) return;
4712
4713 start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1;
4714 stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1;
4715 CheckSeqLocForPartial (slp, &partial5, &partial3);
4716 if (partial5) {
4717 sprintf (str1, "<%ld", (long) start);
4718 } else {
4719 sprintf (str1, "%ld", (long) start);
4720 }
4721 if (partial3) {
4722 sprintf (str2, ">%ld", (long) stop);
4723 } else {
4724 sprintf (str2, "%ld", (long) stop);
4725 }
4726 sprintf (str, "%s\t%s\t%s\n", str1, str2, label);
4727 ValNodeCopyStr (head, 0, str);
4728
4729 while ((slp = SeqLocFindNext (location, slp)) != NULL) {
4730 start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1;
4731 stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1;
4732 CheckSeqLocForPartial (slp, &partial5, &partial3);
4733 if (partial5) {
4734 sprintf (str1, "<%ld", (long) start);
4735 } else {
4736 sprintf (str1, "%ld", (long) start);
4737 }
4738 if (partial3) {
4739 sprintf (str2, ">%ld", (long) stop);
4740 } else {
4741 sprintf (str2, "%ld", (long) stop);
4742 }
4743 if (start != 0 && stop != 0) {
4744 sprintf (str, "%s\t%s\n", str1, str2);
4745 ValNodeCopyStr (head, 0, str);
4746 }
4747 }
4748 }
4749
4750 static CharPtr goQualList [] = {
4751 "", "go_process", "go_component", "go_function", NULL
4752 };
4753
4754 static void PrintGeneOntologyUserFld (
4755 UserFieldPtr ufp,
4756 Pointer userdata
4757 )
4758
4759 {
4760 UserFieldPtr entry;
4761 CharPtr evidence;
4762 Char gid [32];
4763 CharPtr goid;
4764 CharPtr goref;
4765 ValNodePtr PNTR head;
4766 Int2 i;
4767 Int2 j;
4768 size_t len;
4769 ObjectIdPtr oip;
4770 Int4 pmid;
4771 CharPtr str;
4772 CharPtr textstr;
4773 Char tmp [16];
4774
4775 if (ufp == NULL || ufp->choice != 11) return;
4776 oip = ufp->label;
4777 if (oip == NULL) return;
4778 for (i = 0; goQualType [i] != NULL; i++) {
4779 if (StringICmp (oip->str, goQualType [i]) == 0) break;
4780 }
4781 if (goQualType [i] == NULL) return;
4782
4783 /* loop to allow multiple entries for each type of GO term */
4784 for (entry = ufp->data.ptrvalue; entry != NULL; entry = entry->next) {
4785 if (entry == NULL || entry->choice != 11) break;
4786
4787 pmid = 0;
4788 goid = NULL;
4789 goref = NULL;
4790 evidence = NULL;
4791 textstr = NULL;
4792
4793 for (ufp = (UserFieldPtr) entry->data.ptrvalue; ufp != NULL; ufp = ufp->next) {
4794 oip = ufp->label;
4795 if (oip == NULL) continue;
4796 for (j = 0; goFieldType [j] != NULL; j++) {
4797 if (StringICmp (oip->str, goFieldType [j]) == 0) break;
4798 }
4799 if (goFieldType [j] == NULL) continue;
4800 switch (j) {
4801 case 1 :
4802 if (ufp->choice == 1) {
4803 textstr = (CharPtr) ufp->data.ptrvalue;
4804 }
4805 break;
4806 case 2 :
4807 if (ufp->choice == 1) {
4808 goid = (CharPtr) ufp->data.ptrvalue;
4809 } else if (ufp->choice == 2) {
4810 sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
4811 goid = (CharPtr) gid;
4812 }
4813 break;
4814 case 3 :
4815 if (ufp->choice == 2) {
4816 pmid = (Int4) ufp->data.intvalue;
4817 }
4818 break;
4819 case 4 :
4820 if (ufp->choice == 1) {
4821 goref = (CharPtr) ufp->data.ptrvalue;
4822 }
4823 break;
4824 case 5 :
4825 if (ufp->choice == 1) {
4826 evidence = (CharPtr) ufp->data.ptrvalue;
4827 }
4828 break;
4829 default :
4830 break;
4831 }
4832 }
4833 /* if (StringHasNoText (textstr)) break; */
4834
4835 len = StringLen (textstr) + StringLen (goid) + StringLen (goref) + StringLen (evidence) + 40;
4836 str = (CharPtr) MemNew (len);
4837 if (str == NULL) return;
4838 StringCpy (str, "\t\t\t");
4839 StringCat (str, goQualList [i]);
4840 StringCat (str, "\t");
4841 StringCat (str, textstr);
4842 if (StringDoesHaveText (goid)) {
4843 StringCat (str, "|");
4844 StringCat (str, goid);
4845 } else {
4846 StringCat (str, "|");
4847 }
4848 if (pmid != 0) {
4849 sprintf (tmp, "|%ld", (long) pmid);
4850 StringCat (str, tmp);
4851 } else if (StringDoesHaveText (goref)) {
4852 StringCat (str, "|");
4853 StringCat (str, goref);
4854 } else {
4855 StringCat (str, "|");
4856 }
4857 if (StringDoesHaveText (evidence)) {
4858 StringCat (str, "|");
4859 StringCat (str, evidence);
4860 }
4861 len = StringLen (str);
4862 while (len > 0 && str [len - 1] == '|') {
4863 str [len - 1] = '\0';
4864 len--;
4865 }
4866 StringCat (str, "\n");
4867
4868 head = (ValNodePtr PNTR) userdata;
4869 ValNodeCopyStr (head, 0, str);
4870 MemFree (str);
4871 }
4872 }
4873
4874 static void PrintNomenclatureUserObject (
4875 UserObjectPtr uop,
4876 Pointer userdata
4877 )
4878
4879 {
4880 CharPtr ds = NULL, me = NULL, nm = NULL, sy = NULL;
4881 ValNodePtr PNTR head;
4882 size_t len;
4883 ObjectIdPtr oip;
4884 CharPtr str = NULL;
4885 UserFieldPtr ufp;
4886
4887 if (uop == NULL) return;
4888 oip = uop->type;
4889 if (oip == NULL) return;
4890 if (StringCmp (oip->str, "OfficialNomenclature") != 0) return;
4891
4892 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
4893 oip = ufp->label;
4894 if (oip == NULL || oip->str == NULL) continue;
4895 if (StringICmp (oip->str, "Symbol") == 0) {
4896 if (ufp->choice == 1) {
4897 str = (CharPtr) ufp->data.ptrvalue;
4898 if (StringDoesHaveText (str)) {
4899 sy = str;
4900 }
4901 }
4902 } else if (StringICmp (oip->str, "Name") == 0) {
4903 if (ufp->choice == 1) {
4904 str = (CharPtr) ufp->data.ptrvalue;
4905 if (StringDoesHaveText (str)) {
4906 nm = str;
4907 }
4908 }
4909 } else if (StringICmp (oip->str, "DataSource") == 0) {
4910 if (ufp->choice == 1) {
4911 str = (CharPtr) ufp->data.ptrvalue;
4912 if (StringDoesHaveText (str)) {
4913 ds = str;
4914 }
4915 }
4916 } else if (StringICmp (oip->str, "Status") == 0) {
4917 if (ufp->choice == 1) {
4918 str = (CharPtr) ufp->data.ptrvalue;
4919 if (StringDoesHaveText (str)) {
4920 me = str;
4921 }
4922 }
4923 }
4924 }
4925 if (me == NULL) {
4926 me = "Unclassified";
4927 }
4928
4929 if (StringHasNoText (sy)) return;
4930
4931 len = StringLen (ds) + StringLen (me) + StringLen (nm) + StringLen (sy) + 80;
4932 str = (CharPtr) MemNew (len);
4933 if (str == NULL) return;
4934
4935 StringCpy (str, "\t\t\tnomenclature\t");
4936 StringCat (str, me);
4937 StringCat (str, "|");
4938 StringCat (str, sy);
4939 StringCat (str, "|");
4940 if (StringDoesHaveText (nm)) {
4941 StringCat (str, nm);
4942 }
4943 StringCat (str, "|");
4944 if (StringDoesHaveText (ds)) {
4945 StringCat (str, ds);
4946 }
4947 StringCat (str, "\n");
4948
4949 head = (ValNodePtr PNTR) userdata;
4950 ValNodeCopyStr (head, 0, str);
4951 MemFree (str);
4952 }
4953
4954 static void PrintFTUserObj (
4955 UserObjectPtr uop,
4956 Pointer userdata
4957 )
4958
4959 {
4960 ObjectIdPtr oip;
4961
4962 if (uop == NULL) return;
4963 oip = uop->type;
4964 if (oip == NULL) return;
4965 if (StringICmp (oip->str, "GeneOntology") == 0) {
4966 VisitUserFieldsInUop (uop, userdata, PrintGeneOntologyUserFld);
4967 } else if (StringICmp (oip->str, "OfficialNomenclature") == 0) {
4968 PrintNomenclatureUserObject (uop, userdata);
4969 }
4970 }
4971
4972 static void PrintFTCodeBreak (
4973 ValNodePtr PNTR head,
4974 CodeBreakPtr cbp,
4975 BioseqPtr target
4976 )
4977
4978 {
4979 Char buf [80];
4980 Choice cbaa;
4981 IntAsn2gbJob iaj;
4982 CharPtr ptr;
4983 Uint1 residue;
4984 SeqCodeTablePtr sctp;
4985 Uint1 seqcode;
4986 SeqLocPtr slp;
4987 CharPtr str;
4988
4989 seqcode = 0;
4990 sctp = NULL;
4991 cbaa = cbp->aa;
4992 switch (cbaa.choice) {
4993 case 1 :
4994 seqcode = Seq_code_ncbieaa;
4995 break;
4996 case 2 :
4997 seqcode = Seq_code_ncbi8aa;
4998 break;
4999 case 3 :
5000 seqcode = Seq_code_ncbistdaa;
5001 break;
5002 default :
5003 break;
5004 }
5005 if (seqcode == 0) return;
5006 sctp = SeqCodeTableFind (seqcode);
5007 if (sctp == NULL) return;
5008
5009 MemSet ((Pointer) &iaj, 0, sizeof (IntAsn2gbJob));
5010 iaj.flags.iupacaaOnly = FALSE;
5011 iaj.relModeError = FALSE;
5012
5013 slp = SeqLocFindNext (cbp->loc, NULL);
5014 while (slp != NULL) {
5015 str = FFFlatLoc (&iaj, target, slp, FALSE);
5016 if (str != NULL) {
5017 residue = cbaa.value.intvalue;
5018 ptr = Get3LetterSymbol (&iaj, seqcode, sctp, residue);
5019 if (ptr == NULL) {
5020 ptr = "OTHER";
5021 }
5022 sprintf (buf, "\t\t\ttransl_except\t(pos:%s,aa:%s)\n", str, ptr);
5023 ValNodeCopyStr (head, 0, buf);
5024 MemFree (str);
5025 }
5026 slp = SeqLocFindNext (cbp->loc, slp);
5027 }
5028 }
5029
5030 static Boolean SeqIdWriteForTable (SeqIdPtr sip, CharPtr buf, size_t buflen, IntAsn2gbJobPtr ajp, Boolean giOK)
5031
5032 {
5033 SeqIdPtr accn = NULL, local = NULL, patent = NULL,
5034 pdb = NULL, general = NULL, gi = NULL;
5035 DbtagPtr dbt;
5036 Char id [128], str [250];
5037 Int2 numids;
5038 CharPtr prefix = NULL;
5039
5040 if (sip == NULL || buf == NULL || ajp == NULL) return FALSE;
5041
5042 while (sip != NULL) {
5043 switch (sip->choice) {
5044 case SEQID_LOCAL :
5045 local = sip;
5046 break;
5047 case SEQID_GENBANK :
5048 case SEQID_EMBL :
5049 case SEQID_PIR :
5050 case SEQID_SWISSPROT :
5051 case SEQID_DDBJ :
5052 case SEQID_PRF :
5053 case SEQID_TPG :
5054 case SEQID_TPE :
5055 case SEQID_TPD :
5056 case SEQID_OTHER :
5057 case SEQID_GPIPE :
5058 accn = sip;
5059 break;
5060 case SEQID_PATENT :
5061 patent = sip;
5062 break;
5063 case SEQID_GENERAL :
5064 dbt = (DbtagPtr) sip->data.ptrvalue;
5065 if (dbt != NULL && ! IsSkippableDbtag(dbt)) {
5066 general = sip;
5067 }
5068 break;
5069 case SEQID_PDB :
5070 pdb = sip;
5071 break;
5072 case SEQID_GI :
5073 gi = sip;
5074 break;
5075 default :
5076 break;
5077 }
5078 sip = sip->next;
5079 }
5080
5081 str [0] = '\0';
5082 numids = 0;
5083
5084 if (accn != NULL) {
5085 if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5086 StringCat (str, prefix);
5087 StringCat (str, id);
5088 prefix = "|";
5089 numids++;
5090 }
5091 }
5092
5093 if (general != NULL) {
5094 if (SeqIdWrite (general, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5095 StringCat (str, prefix);
5096 StringCat (str, id);
5097 prefix = "|";
5098 numids++;
5099 }
5100 }
5101
5102 if (local != NULL && (! ajp->flags.suppressLocalID) && numids == 0) {
5103 if (SeqIdWrite (local, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5104 StringCat (str, prefix);
5105 StringCat (str, id);
5106 prefix = "|";
5107 numids++;
5108 }
5109 }
5110
5111 if (gi != NULL && giOK && numids == 0) {
5112 if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
5113 StringCat (str, prefix);
5114 StringCat (str, id);
5115 prefix = "|";
5116 numids++;
5117 }
5118 }
5119
5120 StringNCpy_0 (buf, str, buflen);
5121 if (StringHasNoText (buf)) return FALSE;
5122
5123 return TRUE;
5124 }
5125
5126 static void PrintBioSourceFtableEntry (
5127 ValNodePtr PNTR head,
5128 BioSourcePtr biop
5129 )
5130 {
5131 OrgModPtr mod;
5132 SubSourcePtr ssp;
5133 Char str [256];
5134
5135 if (head == NULL || biop == NULL) return;
5136
5137 if (biop->org != NULL && ! StringHasNoText (biop->org->taxname))
5138 {
5139 sprintf (str, "\t\t\torganism\t%s\n", biop->org->taxname);
5140 ValNodeCopyStr (head, 0, str);
5141 }
5142
5143 /* add OrgMods */
5144 if (biop->org != NULL && biop->org->orgname != NULL)
5145 {
5146 for (mod = biop->org->orgname->mod;
5147 mod != NULL;
5148 mod = mod->next)
5149 {
5150 switch (mod->subtype)
5151 {
5152 case ORGMOD_strain :
5153 sprintf (str, "\t\t\tstrain\t");
5154 break;
5155 case ORGMOD_substrain :
5156 sprintf (str, "\t\t\tsubstrain\t");
5157 break;
5158 case ORGMOD_type :
5159 sprintf (str, "\t\t\ttype\t");
5160 break;
5161 case ORGMOD_subtype :
5162 sprintf (str, "\t\t\tsubtype\t");
5163 break;
5164 case ORGMOD_variety :
5165 sprintf (str, "\t\t\tvariety\t");
5166 break;
5167 case ORGMOD_serotype :
5168 sprintf (str, "\t\t\tserotype\t");
5169 break;
5170 case ORGMOD_serogroup :
5171 sprintf (str, "\t\t\tserogroup\t");
5172 break;
5173 case ORGMOD_serovar :
5174 sprintf (str, "\t\t\tserovar\t");
5175 break;
5176 case ORGMOD_cultivar :
5177 sprintf (str, "\t\t\tcultivar\t");
5178 break;
5179 case ORGMOD_pathovar :
5180 sprintf (str, "\t\t\tpathovar\t");
5181 break;
5182 case ORGMOD_chemovar :
5183 sprintf (str, "\t\t\tchemovar\t");
5184 break;
5185 case ORGMOD_biovar :
5186 sprintf (str, "\t\t\tbiovar\t");
5187 break;
5188 case ORGMOD_biotype :
5189 sprintf (str, "\t\t\tbiotype\t");
5190 break;
5191 case ORGMOD_group :
5192 sprintf (str, "\t\t\tgroup\t");
5193 break;
5194 case ORGMOD_subgroup :
5195 sprintf (str, "\t\t\tsubgroup\t");
5196 break;
5197 case ORGMOD_isolate :
5198 sprintf (str, "\t\t\tisolate\t");
5199 break;
5200 case ORGMOD_common :
5201 sprintf (str, "\t\t\tcommon\t");
5202 break;
5203 case ORGMOD_acronym :
5204 sprintf (str, "\t\t\tacronym\t");
5205 break;
5206 case ORGMOD_dosage :
5207 sprintf (str, "\t\t\tdosage\t");
5208 break;
5209 case ORGMOD_nat_host :
5210 sprintf (str, "\t\t\tnat_host\t");
5211 break;
5212 case ORGMOD_sub_species :
5213 sprintf (str, "\t\t\tsub_species\t");
5214 break;
5215 case ORGMOD_specimen_voucher :
5216 sprintf (str, "\t\t\tspecimen_voucher\t");
5217 break;
5218 case ORGMOD_authority :
5219 sprintf (str, "\t\t\tauthority\t");
5220 break;
5221 case ORGMOD_forma :
5222 sprintf (str, "\t\t\tforma\t");
5223 break;
5224 case ORGMOD_forma_specialis :
5225 sprintf (str, "\t\t\tforma_specialis\t");
5226 break;
5227 case ORGMOD_ecotype :
5228 sprintf (str, "\t\t\tecotype\t");
5229 break;
5230 case ORGMOD_synonym :
5231 sprintf (str, "\t\t\tsynonym\t");
5232 break;
5233 case ORGMOD_anamorph :
5234 sprintf (str, "\t\t\tanamorph\t");
5235 break;
5236 case ORGMOD_teleomorph :
5237 sprintf (str, "\t\t\tteleomorph\t");
5238 break;
5239 case ORGMOD_breed :
5240 sprintf (str, "\t\t\tbreed\t");
5241 break;
5242 case ORGMOD_gb_acronym :
5243 sprintf (str, "\t\t\tgb_acronym\t");
5244 break;
5245 case ORGMOD_gb_anamorph :
5246 sprintf (str, "\t\t\tgb_anamorph\t");
5247 break;
5248 case ORGMOD_culture_collection :
5249 sprintf (str, "\t\t\tculture_collection\t");
5250 break;
5251 case ORGMOD_bio_material :
5252 sprintf (str, "\t\t\tbio_material\t");
5253 break;
5254 case ORGMOD_metagenome_source :
5255 sprintf (str, "\t\t\tmetagenome_source\t");
5256 break;
5257 case ORGMOD_old_lineage :
5258 sprintf (str, "\t\t\told_lineage\t");
5259 break;
5260 case ORGMOD_old_name :
5261 sprintf (str, "\t\t\told_name\t");
5262 break;
5263 case ORGMOD_other :
5264 sprintf (str, "\t\t\tnote\t");
5265 break;
5266 default :
5267 str [0] = 0;
5268 }
5269 if ( str [0] == 0) continue;
5270 if (! StringHasNoText (mod->subname))
5271 {
5272 StringNCat (str, mod->subname, sizeof (str) - StringLen (str) - 2);
5273 str [sizeof (str) - 2] = 0;
5274 }
5275 StringCat (str, "\n");
5276 ValNodeCopyStr (head, 0, str);
5277 }
5278 }
5279
5280 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
5281 {
5282 switch (ssp->subtype)
5283 {
5284 case SUBSRC_chromosome :
5285 sprintf (str, "\t\t\tchromosome\t");
5286 break;
5287 case SUBSRC_map :
5288 sprintf (str, "\t\t\tmap\t");
5289 break;
5290 case SUBSRC_clone :
5291 sprintf (str, "\t\t\tclone\t");
5292 break;
5293 case SUBSRC_haplotype :
5294 sprintf (str, "\t\t\thaplotype\t");
5295 break;
5296 case SUBSRC_genotype :
5297 sprintf (str, "\t\t\tgenotype\t");
5298 break;
5299 case SUBSRC_sex :
5300 sprintf (str, "\t\t\tsex\t");
5301 break;
5302 case SUBSRC_cell_line :
5303 sprintf (str, "\t\t\tcell_line\t");
5304 break;
5305 case SUBSRC_cell_type :
5306 sprintf (str, "\t\t\tcell_type\t");
5307 break;
5308 case SUBSRC_tissue_type :
5309 sprintf (str, "\t\t\ttissue_type\t");
5310 break;
5311 case SUBSRC_clone_lib :
5312 sprintf (str, "\t\t\tclone_lib\t");
5313 break;
5314 case SUBSRC_dev_stage :
5315 sprintf (str, "\t\t\tdev_stage\t");
5316 break;
5317 case SUBSRC_frequency :
5318 sprintf (str, "\t\t\tfrequency\t");
5319 break;
5320 case SUBSRC_germline :
5321 sprintf (str, "\t\t\tgermline\t");
5322 break;
5323 case SUBSRC_rearranged :
5324 sprintf (str, "\t\t\trearranged\t");
5325 break;
5326 case SUBSRC_lab_host :
5327 sprintf (str, "\t\t\tlab_host\t");
5328 break;
5329 case SUBSRC_pop_variant :
5330 sprintf (str, "\t\t\tpop_variant\t");
5331 break;
5332 case SUBSRC_tissue_lib :
5333 sprintf (str, "\t\t\ttissue_lib\t");
5334 break;
5335 case SUBSRC_plasmid_name :
5336 sprintf (str, "\t\t\tplasmid_name\t");
5337 break;
5338 case SUBSRC_transposon_name :
5339 sprintf (str, "\t\t\ttransposon_name\t");
5340 break;
5341 case SUBSRC_insertion_seq_name :
5342 sprintf (str, "\t\t\tinsertion_seq_name\t");
5343 break;
5344 case SUBSRC_plastid_name :
5345 sprintf (str, "\t\t\tplastid_name\t");
5346 break;
5347 case SUBSRC_country :
5348 sprintf (str, "\t\t\tcountry\t");
5349 break;
5350 case SUBSRC_segment :
5351 sprintf (str, "\t\t\tsegment\t");
5352 break;
5353 case SUBSRC_endogenous_virus_name :
5354 sprintf (str, "\t\t\tendogenous_virus_name\t");
5355 break;
5356 case SUBSRC_transgenic :
5357 sprintf (str, "\t\t\ttransgenic\t");
5358 break;
5359 case SUBSRC_environmental_sample :
5360 sprintf (str, "\t\t\tenvironmental_sample\t");
5361 break;
5362 case SUBSRC_isolation_source :
5363 sprintf (str, "\t\t\tisolation_source\t");
5364 break;
5365 case SUBSRC_lat_lon :
5366 sprintf (str, "\t\t\tlat_lon\t");
5367 break;
5368 case SUBSRC_collection_date :
5369 sprintf (str, "\t\t\tcollection_date\t");
5370 break;
5371 case SUBSRC_collected_by :
5372 sprintf (str, "\t\t\tcollected_by\t");
5373 break;
5374 case SUBSRC_identified_by :
5375 sprintf (str, "\t\t\tidentified_by\t");
5376 break;
5377 case SUBSRC_fwd_primer_seq :
5378 sprintf (str, "\t\t\tfwd_pcr_primer_seq\t");
5379 break;
5380 case SUBSRC_rev_primer_seq :
5381 sprintf (str, "\t\t\trev_pcr_primer_seq\t");
5382 break;
5383 case SUBSRC_fwd_primer_name :
5384 sprintf (str, "\t\t\tfwd_pcr_primer_name\t");
5385 break;
5386 case SUBSRC_rev_primer_name :
5387 sprintf (str, "\t\t\trev_pcr_primer_name\t");
5388 break;
5389 case SUBSRC_metagenomic :
5390 sprintf (str, "\t\t\tmetagenomic\t");
5391 break;
5392 case SUBSRC_mating_type :
5393 sprintf (str, "\t\t\tmating_type\t");
5394 break;
5395 case SUBSRC_linkage_group :
5396 sprintf (str, "\t\t\tlinkage_group\t");
5397 break;
5398 case SUBSRC_haplogroup :
5399 sprintf (str, "\t\t\thaplogroup\t");
5400 break;
5401 case SUBSRC_other :
5402 sprintf (str, "\t\t\tnote\t");
5403 break;
5404 default :
5405 str [0] = 0;
5406 }
5407 if ( str [0] == 0) continue;
5408 if (! StringHasNoText (ssp->name))
5409 {
5410 StringNCat (str, ssp->name, sizeof (str) - StringLen (str) - 2);
5411 str [sizeof (str) - 2] = 0;
5412 }
5413 StringCat (str, "\n");
5414 ValNodeCopyStr (head, 0, str);
5415 }
5416 }
5417
5418 static void AddOneFtableQual (
5419 ValNodePtr PNTR head,
5420 CharPtr qual,
5421 CharPtr val
5422 )
5423
5424 {
5425 size_t len;
5426 CharPtr tmp;
5427
5428 if (head == NULL) return;
5429 if (StringHasNoText (qual)) return;
5430 if (StringHasNoText (val)) return;
5431
5432 len = StringLen (qual) + StringLen (val) + 10;
5433 tmp = (CharPtr) MemNew (sizeof (Char) * len);
5434 if (tmp == NULL) return;
5435
5436 StringCpy (tmp, "\t\t\t");
5437 StringCat (tmp, qual);
5438 StringCat (tmp, "\t");
5439 StringCat (tmp, val);
5440 StringCat (tmp, "\n");
5441
5442 ValNodeAddStr (head, 0, tmp);
5443 }
5444
5445 NLM_EXTERN void PrintFtableLocAndQuals (
5446 IntAsn2gbJobPtr ajp,
5447 ValNodePtr PNTR head,
5448 BioseqPtr target,
5449 SeqFeatPtr sfp,
5450 SeqMgrFeatContextPtr context
5451 )
5452
5453 {
5454 CharPtr aa;
5455 Int2 bondidx;
5456 BioseqSetPtr bssp;
5457 CodeBreakPtr cbp;
5458 BioseqPtr cdna;
5459 SeqFeatPtr cds;
5460 CdRegionPtr crp;
5461 SeqMgrDescContext dcontext;
5462 DbtagPtr dbt;
5463 SeqMgrFeatContext fcontext;
5464 GBQualPtr gbq;
5465 ValNodePtr geneorprotdb;
5466 GeneRefPtr grp;
5467 Boolean is_gps_genomic = FALSE;
5468 CharPtr label;
5469 MolInfoPtr mip;
5470 SeqLocPtr newloc;
5471 Char numbuf [32];
5472 Int2 numcodons;
5473 ObjectIdPtr oip;
5474 BioseqPtr prod;
5475 SeqFeatPtr prot;
5476 ProtRefPtr prp = NULL;
5477 Boolean pseudo;
5478 RNAGenPtr rgp;
5479 RNAQualPtr rqp;
5480 RnaRefPtr rrp;
5481 SeqDescrPtr sdp;
5482 Int4 sec_str;
5483 SeqIdPtr sip;
5484 SeqIdPtr sip2;
5485 Int2 siteidx;
5486 SeqLocPtr slp;
5487 Char str [256];
5488 Char tmp [512];
5489 CharPtr tmpx;
5490 tRNAPtr trp;
5491 ValNodePtr vnp;
5492
5493 if (head == NULL || target == NULL || sfp == NULL || context == NULL) return;
5494 /* label = (CharPtr) FeatDefTypeLabel (sfp); */
5495 label = FindKeyFromFeatDefType (sfp->idx.subtype, FALSE);
5496 if (StringCmp (label, "Gene") == 0) {
5497 label = "gene";
5498 }
5499 else if (StringCmp (label, "Src") == 0) {
5500 label = "source";
5501 }
5502 if (StringHasNoText (label)) {
5503 label = "???";
5504 }
5505
5506 /* check if genomic sequence in genomic product set */
5507
5508 if (target->idx.parenttype == OBJ_BIOSEQSET) {
5509 bssp = (BioseqSetPtr) target->idx.parentptr;
5510 if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
5511 sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_molinfo, &dcontext);
5512 if (sdp != NULL) {
5513 mip = (MolInfoPtr) sdp->data.ptrvalue;
5514 if (mip != NULL && mip->biomol == MOLECULE_TYPE_GENOMIC) {
5515 is_gps_genomic = TRUE;
5516 }
5517 }
5518 }
5519 }
5520
5521 PrintFtableIntervals (head, target, sfp->location, label);
5522
5523 geneorprotdb = NULL;
5524 pseudo = sfp->pseudo;
5525
5526 switch (context->seqfeattype) {
5527 case SEQFEAT_GENE :
5528 grp = (GeneRefPtr) sfp->data.value.ptrvalue;
5529 if (grp != NULL) {
5530 geneorprotdb = grp->db;
5531 pseudo |= grp->pseudo;
5532
5533 StringNCpy_0 (str, (CharPtr) grp->locus, sizeof (str));
5534 if (! StringHasNoText (str)) {
5535 sprintf (tmp, "\t\t\tgene\t%s\n", str);
5536 ValNodeCopyStr (head, 0, tmp);
5537 }
5538 for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
5539 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
5540 if (! StringHasNoText (str)) {
5541 sprintf (tmp, "\t\t\tgene_syn\t%s\n", str);
5542 ValNodeCopyStr (head, 0, tmp);
5543 }
5544 }
5545 if (! StringHasNoText (grp->desc)) {
5546 sprintf (tmp, "\t\t\tgene_desc\t%s\n", grp->desc);
5547 ValNodeCopyStr (head, 0, tmp);
5548 }
5549 if (! StringHasNoText (grp->maploc)) {
5550 sprintf (tmp, "\t\t\tmap\t%s\n", grp->maploc);
5551 ValNodeCopyStr (head, 0, tmp);
5552 }
5553 if (! StringHasNoText (grp->locus_tag)) {
5554 sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
5555 ValNodeCopyStr (head, 0, tmp);
5556 }
5557 }
5558 break;
5559 case SEQFEAT_CDREGION :
5560 prod = BioseqFind (SeqLocId (sfp->product));
5561 prot = SeqMgrGetBestProteinFeature (prod, NULL);
5562 if (prot != NULL) {
5563 prp = (ProtRefPtr) prot->data.value.ptrvalue;
5564 }
5565 if (prp == NULL) {
5566 prp = SeqMgrGetProtXref (sfp);
5567 }
5568 if (prp != NULL) {
5569 geneorprotdb = prp->db;
5570 if (prp->name != NULL) {
5571 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
5572 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
5573 if (! StringHasNoText (str)) {
5574 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
5575 ValNodeCopyStr (head, 0, tmp);
5576 }
5577 }
5578 }
5579 if (prp->desc != NULL) {
5580 StringNCpy_0 (str, prp->desc, sizeof (str));
5581 if (! StringHasNoText (str)) {
5582 sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
5583 ValNodeCopyStr (head, 0, tmp);
5584 }
5585 }
5586 for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
5587 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
5588 if (! StringHasNoText (str)) {
5589 sprintf (tmp, "\t\t\tfunction\t%s\n", str);
5590 ValNodeCopyStr (head, 0, tmp);
5591 }
5592 }
5593 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
5594 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
5595 if (! StringHasNoText (str)) {
5596 sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
5597 ValNodeCopyStr (head, 0, tmp);
5598 }
5599 }
5600 }
5601 if (prot != NULL) {
5602 AddOneFtableQual (head, "prot_note", prot->comment);
5603 /*
5604 StringNCpy_0 (str, prot->comment, sizeof (str));
5605 if (! StringHasNoText (str)) {
5606 sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
5607 ValNodeCopyStr (head, 0, tmp);
5608 }
5609 */
5610 }
5611 crp = (CdRegionPtr) sfp->data.value.ptrvalue;
5612 if (crp != NULL) {
5613 if (crp->frame > 1 && crp->frame <= 3) {
5614 sprintf (tmp, "\t\t\tcodon_start\t%d\n", (int) crp->frame);
5615 ValNodeCopyStr (head, 0, tmp);
5616 }
5617 for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
5618 PrintFTCodeBreak (head, cbp, target);
5619 }
5620 }
5621 if (prod != NULL) {
5622 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
5623 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
5624 ValNodeCopyStr (head, 0, tmp);
5625 }
5626 if (is_gps_genomic) {
5627 cds = SeqMgrGetCDSgivenProduct (prod, NULL);
5628 if (cds != NULL) {
5629 cdna = BioseqFindFromSeqLoc (cds->location);
5630 if (cdna != NULL) {
5631 if (SeqIdWriteForTable (cdna->id, str, sizeof (str), ajp, FALSE)) {
5632 sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
5633 ValNodeCopyStr (head, 0, tmp);
5634 }
5635 }
5636 }
5637 }
5638 } else if (sfp->product != NULL) {
5639 sip = SeqLocId (sfp->product);
5640 if (sip != NULL) {
5641 if (sip->choice == SEQID_GI) {
5642 sip2 = GetSeqIdForGI (sip->data.intvalue);
5643 if (sip2 != NULL) {
5644 sip = sip2;
5645 }
5646 }
5647 if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
5648 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
5649 ValNodeCopyStr (head, 0, tmp);
5650 }
5651 }
5652 }
5653 break;
5654 case SEQFEAT_RNA :
5655 prod = BioseqFind (SeqLocId (sfp->product));
5656 rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
5657 if (rrp != NULL) {
5658 switch (rrp->ext.choice) {
5659 case 1 :
5660 StringNCpy_0 (str, (CharPtr) rrp->ext.value.ptrvalue, sizeof (str));
5661 if (! StringHasNoText (str)) {
5662 if (rrp->type == 255 &&
5663 (StringICmp (str, "misc_RNA") == 0 ||
5664 StringICmp (str, "ncRNA") == 0 ||
5665 StringICmp (str, "tmRNA") == 0)) {
5666 /* type other now uses name for type, product gbqual for product name */
5667 } else {
5668 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
5669 ValNodeCopyStr (head, 0, tmp);
5670 }
5671 }
5672 break;
5673 case 2 :
5674 trp = (tRNAPtr) rrp->ext.value.ptrvalue;
5675 if (trp != NULL) {
5676 FeatDefLabel (sfp, str, sizeof (str) - 1, OM_LABEL_CONTENT);
5677 if (! StringHasNoText (str)) {
5678 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
5679 ValNodeCopyStr (head, 0, tmp);
5680 }
5681 numcodons = ComposeCodonsRecognizedString (trp, numbuf, sizeof (numbuf));
5682 if (numcodons > 0 && StringDoesHaveText (numbuf)) {
5683 sprintf (tmp, "\t\t\tcodon_recognized\t%s\n", numbuf);
5684 ValNodeCopyStr (head, 0, tmp);
5685 }
5686 slp = trp->anticodon;
5687 newloc = NULL;
5688 if (slp != NULL && ajp->ajp.slp != NULL) {
5689 sip = SeqIdParse ("lcl|dummy");
5690 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
5691 SeqIdFree (sip);
5692 slp = newloc;
5693 if (newloc != NULL) {
5694 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
5695 }
5696 }
5697 aa = str;
5698 if (StringNICmp (aa, "tRNA-", 5) == 0) {
5699 aa += 5;
5700 }
5701 if (slp != NULL && StringDoesHaveText (aa)) {
5702 tmpx = FFFlatLoc (ajp, target, slp, ajp->masterStyle);
5703 if (tmpx != NULL) {
5704 sprintf (tmp, "\t\t\tanticodon\t(pos:%s,aa:%s)\n", tmpx, aa);
5705 ValNodeCopyStr (head, 0, tmp);
5706 }
5707 MemFree (tmpx);
5708 }
5709 if (newloc != NULL) {
5710 SeqLocFree (newloc);
5711 }
5712 }
5713 break;
5714 case 3 :
5715 rgp = (RNAGenPtr) rrp->ext.value.ptrvalue;
5716 if (rgp != NULL) {
5717 StringNCpy_0 (str, rgp->_class, sizeof (str));
5718 if (StringDoesHaveText (str)) {
5719 sprintf (tmp, "\t\t\tncRNA_class\t%s\n", str);
5720 ValNodeCopyStr (head, 0, tmp);
5721 }
5722 StringNCpy_0 (str, rgp->product, sizeof (str));
5723 if (StringDoesHaveText (str)) {
5724 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
5725 ValNodeCopyStr (head, 0, tmp);
5726 }
5727 for (rqp = rgp->quals; rqp != NULL; rqp = rqp->next) {
5728 if (StringDoesHaveText (rqp->qual) && StringDoesHaveText (rqp->val)) {
5729 AddOneFtableQual (head, rqp->qual, rqp->val);
5730 }
5731 }
5732 }
5733 default :
5734 break;
5735 }
5736 }
5737 if (prod != NULL) {
5738 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
5739 sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
5740 ValNodeCopyStr (head, 0, tmp);
5741 }
5742 if (is_gps_genomic) {
5743 cds = SeqMgrGetNextFeature (prod, NULL, SEQFEAT_CDREGION, 0, &fcontext);
5744 if (cds != NULL && SeqMgrGetNextFeature (prod, cds, SEQFEAT_CDREGION, 0, &fcontext) == NULL) {
5745 prod = BioseqFindFromSeqLoc (cds->product);
5746 if (prod != NULL) {
5747 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
5748 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
5749 ValNodeCopyStr (head, 0, tmp);
5750 }
5751 }
5752 }
5753 }
5754 } else if (sfp->product != NULL) {
5755 sip = SeqLocId (sfp->product);
5756 if (sip != NULL) {
5757 if (sip->choice == SEQID_GI) {
5758 sip2 = GetSeqIdForGI (sip->data.intvalue);
5759 if (sip2 != NULL) {
5760 sip = sip2;
5761 }
5762 }
5763 if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
5764 sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
5765 ValNodeCopyStr (head, 0, tmp);
5766 }
5767 }
5768 }
5769 break;
5770 case SEQFEAT_PROT :
5771 prod = BioseqFind (SeqLocId (sfp->product));
5772 prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5773 if (prp != NULL) {
5774 if (prp->name != NULL) {
5775 for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
5776 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
5777 if (! StringHasNoText (str)) {
5778 sprintf (tmp, "\t\t\tproduct\t%s\n", str);
5779 ValNodeCopyStr (head, 0, tmp);
5780 }
5781 }
5782 }
5783 if (prp->desc != NULL) {
5784 StringNCpy_0 (str, prp->desc, sizeof (str));
5785 if (! StringHasNoText (str)) {
5786 sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
5787 ValNodeCopyStr (head, 0, tmp);
5788 }
5789 }
5790 for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
5791 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
5792 if (! StringHasNoText (str)) {
5793 sprintf (tmp, "\t\t\tfunction\t%s\n", str);
5794 ValNodeCopyStr (head, 0, tmp);
5795 }
5796 }
5797 for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
5798 StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
5799 if (! StringHasNoText (str)) {
5800 sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
5801 ValNodeCopyStr (head, 0, tmp);
5802 }
5803 }
5804 }
5805 AddOneFtableQual (head, "prot_note", sfp->comment);
5806 /*
5807 StringNCpy_0 (str, sfp->comment, sizeof (str));
5808 if (! StringHasNoText (str)) {
5809 sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
5810 ValNodeCopyStr (head, 0, tmp);
5811 }
5812 */
5813 if (prod != NULL) {
5814 if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
5815 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
5816 ValNodeCopyStr (head, 0, tmp);
5817 }
5818 } else if (sfp->product != NULL) {
5819 sip = SeqLocId (sfp->product);
5820 if (sip != NULL) {
5821 if (sip->choice == SEQID_GI) {
5822 sip2 = GetSeqIdForGI (sip->data.intvalue);
5823 if (sip2 != NULL) {
5824 sip = sip2;
5825 }
5826 }
5827 if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
5828 sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
5829 ValNodeCopyStr (head, 0, tmp);
5830 }
5831 }
5832 }
5833 break;
5834 case SEQFEAT_REGION :
5835 StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
5836 if (! StringHasNoText (str)) {
5837 sprintf (tmp, "\t\t\tregion_name\t%s\n", str);
5838 ValNodeCopyStr (head, 0, tmp);
5839 }
5840 break;
5841 case SEQFEAT_BOND :
5842 bondidx = (Int2) sfp->data.value.intvalue;
5843 if (bondidx == 255) {
5844 bondidx = 5;
5845 }
5846 if (bondidx > 0 && bondidx < 6) {
5847 sprintf (tmp, "\t\t\tbond_type\t%s\n", bondList [bondidx]);
5848 ValNodeCopyStr (head, 0, tmp);
5849 }
5850 break;
5851 case SEQFEAT_SITE :
5852 siteidx = (Int2) sfp->data.value.intvalue;
5853 if (siteidx == 255) {
5854 siteidx = 26;
5855 }
5856 if (siteidx > 0 && siteidx < 27) {
5857 sprintf (tmp, "\t\t\tsite_type\t%s\n", siteList [siteidx]);
5858 ValNodeCopyStr (head, 0, tmp);
5859 }
5860 break;
5861 case SEQFEAT_PSEC_STR :
5862 sec_str = (Int2) sfp->data.value.intvalue;
5863 if (sec_str > 0 && sec_str <= 3) {
5864 sprintf (tmp, "\t\t\tsec_str_type\t%s\n", secStrText [sec_str]);
5865 ValNodeCopyStr (head, 0, tmp);
5866 }
5867 break;
5868 case SEQFEAT_HET :
5869 StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
5870 if (! StringHasNoText (str)) {
5871 sprintf (tmp, "\t\t\theterogen\t%s\n", str);
5872 ValNodeCopyStr (head, 0, tmp);
5873 }
5874 break;
5875 case SEQFEAT_BIOSRC :
5876 PrintBioSourceFtableEntry (head, sfp->data.value.ptrvalue);
5877 break;
5878 default :
5879 break;
5880 }
5881 if (pseudo) {
5882 ValNodeCopyStr (head, 0, "\t\t\tpseudo\n");
5883 }
5884 grp = SeqMgrGetGeneXref (sfp);
5885 if (grp != NULL) {
5886 if (SeqMgrGeneIsSuppressed (grp)) {
5887 ValNodeCopyStr (head, 0, "\t\t\tgene\t-\n");
5888 } else {
5889 if (StringDoesHaveText (grp->locus)) {
5890 sprintf (tmp, "\t\t\tgene\t%s\n", grp->locus);
5891 ValNodeCopyStr (head, 0, tmp);
5892 }
5893 if (StringDoesHaveText (grp->locus_tag)) {
5894 sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
5895 ValNodeCopyStr (head, 0, tmp);
5896 }
5897 }
5898 }
5899 if (! StringHasNoText (sfp->comment)) {
5900 ValNodeCopyStr (head, 0, "\t\t\tnote\t");
5901 ValNodeCopyStr (head, 0, sfp->comment);
5902 ValNodeCopyStr (head, 0, "\n");
5903 }
5904 switch (sfp->exp_ev) {
5905 case 1 :
5906 ValNodeCopyStr (head, 0, "\t\t\tevidence\texperimental\n");
5907 break;
5908 case 2 :
5909 ValNodeCopyStr (head, 0, "\t\t\tevidence\tnot_experimental\n");
5910 break;
5911 default :
5912 break;
5913 }
5914 if (! StringHasNoText (sfp->except_text)) {
5915 ValNodeCopyStr (head, 0, "\t\t\texception\t");
5916 ValNodeCopyStr (head, 0, sfp->except_text);
5917 ValNodeCopyStr (head, 0, "\n");
5918 } else if (sfp->excpt) {
5919 ValNodeCopyStr (head, 0, "\t\t\texception\n");
5920 }
5921 for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
5922 AddOneFtableQual (head, gbq->qual, gbq->val);
5923 /*
5924 if (! StringHasNoText (gbq->qual)) {
5925 if (! StringHasNoText (gbq->val)) {
5926 sprintf (tmp, "\t\t\t%s\t%s\n", gbq->qual, gbq->val);
5927 ValNodeCopyStr (head, 0, tmp);
5928 }
5929 }
5930 */
5931 }
5932 VisitUserObjectsInUop (sfp->ext, (Pointer) head, PrintFTUserObj);
5933 for (vnp = geneorprotdb; vnp != NULL; vnp = vnp->next) {
5934 dbt = (DbtagPtr) vnp->data.ptrvalue;
5935 if (dbt != NULL) {
5936 if (! StringHasNoText (dbt->db)) {
5937 oip = dbt->tag;
5938 if (oip->str != NULL && (! StringHasNoText (oip->str))) {
5939 sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
5940 ValNodeCopyStr (head, 0, tmp);
5941 } else {
5942 sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
5943 ValNodeCopyStr (head, 0, tmp);
5944 }
5945 }
5946 }
5947 }
5948 for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
5949 dbt = (DbtagPtr) vnp->data.ptrvalue;
5950 if (dbt != NULL) {
5951 if (! StringHasNoText (dbt->db)) {
5952 oip = dbt->tag;
5953 if (oip->str != NULL && (! StringHasNoText (oip->str))) {
5954 sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
5955 ValNodeCopyStr (head, 0, tmp);
5956 } else {
5957 sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
5958 ValNodeCopyStr (head, 0, tmp);
5959 }
5960 }
5961 }
5962 }
5963 }
5964
5965 static BioseqPtr FindFirstBioseq (SeqEntryPtr sep)
5966
5967 {
5968 BioseqPtr bsp;
5969 BioseqSetPtr bssp;
5970
5971 if (sep == NULL || sep->data.ptrvalue == NULL ||
5972 /* sep->choice < 0 || */ sep->choice > 2) return NULL;
5973 if (IS_Bioseq (sep)) {
5974 bsp = (BioseqPtr) sep->data.ptrvalue;
5975 return bsp;
5976 }
5977 bssp = (BioseqSetPtr) sep->data.ptrvalue;
5978 for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
5979 bsp = FindFirstBioseq (sep);
5980 if (bsp != NULL) return bsp;
5981 }
5982 return NULL;
5983 }
5984
5985 static BioseqPtr BioseqLockAndIndexByEntity (Uint2 entityID)
5986
5987 {
5988 BioseqPtr bsp;
5989 SeqEntryPtr sep;
5990 SeqIdPtr sip;
5991
5992 if (entityID < 1) return NULL;
5993
5994 sep = SeqMgrGetSeqEntryForEntityID (entityID);
5995 if (sep == NULL) return NULL;
5996
5997 bsp = FindFirstBioseq (sep);
5998 if (bsp == NULL) return NULL;
5999
6000 sip = SeqIdFindBest (bsp->id, 0);
6001 if (sip == NULL) return NULL;
6002
6003 bsp = BioseqLockById (sip);
6004 if (bsp == NULL) return NULL;
6005
6006 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
6007 SeqMgrIndexFeatures (entityID, NULL);
6008 }
6009
6010 return bsp;
6011 }
6012
6013 NLM_EXTERN CharPtr FormatFtableSourceFeatBlock (
6014 BaseBlockPtr bbp,
6015 BioseqPtr target
6016 )
6017
6018 {
6019 SeqFeatPtr sfp;
6020 SeqDescPtr sdp;
6021 SeqMgrDescContext dcontext;
6022 SeqMgrFeatContext fcontext;
6023 BioSourcePtr biop;
6024 ValNodePtr head;
6025 IntSrcBlockPtr isp;
6026 CharPtr str;
6027
6028 if (bbp == NULL) return NULL;
6029
6030 isp = (IntSrcBlockPtr) bbp;
6031 head = NULL;
6032 biop = NULL;
6033
6034 if (bbp->itemtype == OBJ_SEQDESC) {
6035 sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID,
6036 0, NULL, &dcontext);
6037 if (sdp == NULL) return NULL;
6038 biop = sdp->data.ptrvalue;
6039 } else if (bbp->itemtype == OBJ_SEQFEAT) {
6040 sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
6041 if (sfp == NULL) return NULL;
6042 biop = sfp->data.value.ptrvalue;
6043 }
6044 if (biop == NULL) return NULL;
6045 PrintFtableIntervals (&head, target, isp->loc, "source");
6046 PrintBioSourceFtableEntry (&head, biop);
6047
6048 str = MergeFFValNodeStrs (head);
6049 ValNodeFreeData (head);
6050
6051 return str;
6052 }
6053
6054 NLM_EXTERN void DoImmediateFormat (
6055 Asn2gbFormatPtr afp,
6056 BaseBlockPtr bbp
6057 )
6058
6059 {
6060 IntAsn2gbJobPtr ajp;
6061 BlockType blocktype;
6062 BioseqPtr bsp;
6063 FormatProc fmt;
6064 Boolean is_www;
6065 size_t max;
6066 SeqEntryPtr oldscope;
6067 QualValPtr qv = NULL;
6068 SeqEntryPtr sep;
6069 CharPtr str = NULL;
6070 Uint2 itemtype;
6071 Uint2 itemID;
6072
6073 if (afp == NULL || bbp == NULL) return;
6074 ajp = afp->ajp;
6075 if (ajp == NULL) return;
6076 is_www = GetWWW (ajp);
6077
6078 blocktype = bbp->blocktype;
6079 if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return;
6080 fmt = asn2gnbk_fmt_functions [(int) blocktype];
6081 if (fmt == NULL) return;
6082
6083 max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
6084 qv = MemNew (sizeof (QualVal) * (max + 5));
6085 if (qv == NULL) return;
6086
6087 sep = GetTopSeqEntryForEntityID (bbp->entityID);
6088
6089 bsp = BioseqLockAndIndexByEntity (bbp->entityID);
6090 oldscope = SeqEntrySetScope (sep);
6091
6092 afp->qvp = qv;
6093 str = fmt (afp, bbp);
6094 afp->qvp = NULL;
6095
6096 if (bbp->itemtype == 0 && ajp->ajp.bsp != NULL) {
6097 itemtype = ajp->ajp.bsp->idx.itemtype;
6098 itemID = ajp->ajp.bsp->idx.itemID;
6099 } else {
6100 itemtype = bbp->itemtype;
6101 itemID = bbp->itemID;
6102 }
6103
6104 SeqEntrySetScope (oldscope);
6105 BioseqUnlock (bsp);
6106
6107 if (str != NULL) {
6108 if (afp->fp != NULL) {
6109 fprintf (afp->fp, "%s", str);
6110 }
6111 if (afp->ffwrite != NULL) {
6112 afp->ffwrite (str, afp->userdata, blocktype, bbp->entityID, itemtype, itemID);
6113 }
6114 } else {
6115 if (afp->fp != NULL) {
6116 fprintf (afp->fp, "?\n");
6117 }
6118 if (afp->ffwrite != NULL) {
6119 afp->ffwrite ("?\n", afp->userdata, blocktype, bbp->entityID, itemtype, itemID);
6120 }
6121 }
6122
6123 MemFree (str);
6124 MemFree (qv);
6125 }
6126
6127 NLM_EXTERN void DoQuickLinkFormat (
6128 Asn2gbFormatPtr afp,
6129 CharPtr str
6130 )
6131
6132 {
6133 Uint2 entityID = 0, item_type = 0;
6134 Uint4 itemID = 0;
6135
6136 if (afp == NULL || StringHasNoText (str)) return;
6137
6138 if (afp->ajp != NULL) {
6139 if (afp->ajp->ajp.bsp != NULL) {
6140 entityID = afp->ajp->ajp.bsp->idx.entityID;
6141 item_type = OBJ_BIOSEQ;
6142 itemID = afp->ajp->ajp.bsp->idx.itemID;
6143 } else if (afp->ajp->ajp.bssp != NULL) {
6144 entityID = afp->ajp->ajp.bssp->idx.entityID;
6145 item_type = OBJ_BIOSEQSET;
6146 itemID = afp->ajp->ajp.bssp->idx.itemID;
6147 }
6148 }
6149
6150 if (str != NULL) {
6151 if (afp->fp != NULL) {
6152 fprintf (afp->fp, "%s", str);
6153 }
6154 if (afp->ffwrite != NULL) {
6155 afp->ffwrite (str, afp->userdata, (BlockType) 0, entityID, item_type, itemID);
6156 }
6157 }
6158 }
6159
6160 NLM_EXTERN CharPtr asn2gnbk_format (
6161 Asn2gbJobPtr ajp,
6162 Int4 paragraph
6163 )
6164
6165 {
6166 Asn2gbFormat af;
6167 Asn2gbSectPtr asp;
6168 BaseBlockPtr bbp;
6169 BlockType blocktype;
6170 BioseqPtr bsp;
6171 FormatProc fmt;
6172 IntAsn2gbJobPtr iajp;
6173 size_t max;
6174 SeqEntryPtr oldscope;
6175 QualValPtr qv;
6176 Int4 section;
6177 SeqEntryPtr sep;
6178 CharPtr str = NULL;
6179
6180 /* qv must hold MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
6181
6182 iajp = (IntAsn2gbJobPtr) ajp;
6183 if (iajp == NULL || ajp->sectionArray == NULL || ajp->paragraphArray == NULL) return NULL;
6184 if (paragraph < 0 || paragraph >= ajp->numParagraphs) return NULL;
6185
6186 bbp = ajp->paragraphArray [paragraph];
6187 if (bbp == NULL) return NULL;
6188
6189 section = bbp->section;
6190 if (section < 0 || section >= ajp->numSections) return NULL;
6191
6192 asp = ajp->sectionArray [section];
6193 if (asp == NULL) return NULL;
6194
6195 blocktype = bbp->blocktype;
6196 if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return NULL;
6197
6198 max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
6199 qv = MemNew (sizeof (QualVal) * (max + 5));
6200 if (qv == NULL) return NULL;
6201
6202 MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
6203 af.ajp = (IntAsn2gbJobPtr) ajp;
6204 af.asp = asp;
6205 af.qvp = qv;
6206 af.format = iajp->format;
6207 af.aip = iajp->aip;
6208 af.atp = iajp->atp;
6209
6210 sep = GetTopSeqEntryForEntityID (bbp->entityID);
6211
6212 fmt = asn2gnbk_fmt_functions [(int) blocktype];
6213 if (fmt == NULL) return NULL;
6214
6215 bsp = BioseqLockAndIndexByEntity (bbp->entityID);
6216 oldscope = SeqEntrySetScope (sep);
6217
6218 str = fmt (&af, bbp);
6219
6220 SeqEntrySetScope (oldscope);
6221 BioseqUnlock (bsp);
6222
6223 if (str == NULL) {
6224 str = StringSave ("???\n");
6225 }
6226
6227 MemFree (qv);
6228
6229 return str;
6230 }
6231
6232 NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
6233 Asn2gbJobPtr ajp
6234 )
6235
6236 {
6237 Asn2gbSectPtr asp;
6238 BaseBlockPtr bbp;
6239 BaseBlockPtr PNTR blockArray;
6240 BioseqPtr bsp;
6241 ValNodePtr gapvnp;
6242 Int4 i;
6243 IntAsn2gbJobPtr iajp;
6244 IntAsn2gbSectPtr iasp;
6245 IntCdsBlockPtr icp;
6246 IntFeatBlockPtr ifp;
6247 IntRefBlockPtr irp;
6248 IntSrcBlockPtr isp;
6249 Int4 j;
6250 Int4 numBlocks;
6251 Int4 numSections;
6252 RefBlockPtr rbp;
6253 Asn2gbFreeFunc remotefree;
6254 ValNodePtr remotevnp;
6255 SeqAnnotPtr sap;
6256 SeqAnnotPtr sapnext;
6257 Asn2gbSectPtr PNTR sectionArray;
6258 StringItemPtr sip, nxt;
6259 SeqBlockPtr sbp;
6260 ValNodePtr vnp;
6261
6262 iajp = (IntAsn2gbJobPtr) ajp;
6263 if (iajp == NULL) return NULL;
6264
6265 SeqLocFree (iajp->ajp.slp);
6266
6267 numSections = ajp->numSections;
6268 sectionArray = ajp->sectionArray;
6269
6270 if (sectionArray != NULL) {
6271
6272 for (i = 0; i < numSections; i++) {
6273 asp = sectionArray [i];
6274 if (asp != NULL) {
6275 iasp = (IntAsn2gbSectPtr) asp;
6276
6277 numBlocks = asp->numBlocks;
6278 blockArray = asp->blockArray;
6279 if (blockArray != NULL) {
6280
6281 for (j = 0; j < numBlocks; j++) {
6282 bbp = blockArray [j];
6283 if (bbp != NULL) {
6284
6285 MemFree (bbp->string);
6286
6287 if (bbp->blocktype == REFERENCE_BLOCK) {
6288 rbp = (RefBlockPtr) bbp;
6289 MemFree (rbp->uniquestr);
6290 irp = (IntRefBlockPtr) rbp;
6291 DateFree (irp->date);
6292 SeqLocFree (irp->loc);
6293 MemFree (irp->authstr);
6294 MemFree (irp->fig);
6295 MemFree (irp->maploc);
6296
6297 } else if (bbp->blocktype == SOURCEFEAT_BLOCK) {
6298
6299 isp = (IntSrcBlockPtr) bbp;
6300 SeqLocFree (isp->loc);
6301
6302 } else if (bbp->blocktype == FEATURE_BLOCK) {
6303
6304 ifp = (IntFeatBlockPtr) bbp;
6305 if (ifp->isCDS) {
6306 icp = (IntCdsBlockPtr) ifp;
6307 MemFree (icp->fig);
6308 MemFree (icp->maploc);
6309 }
6310
6311 } else if (bbp->blocktype == SEQUENCE_BLOCK) {
6312
6313 sbp = (SeqBlockPtr) bbp;
6314 MemFree (sbp->bases);
6315 }
6316
6317 MemFree (bbp);
6318 }
6319 }
6320 }
6321 MemFree (asp->blockArray);
6322 MemFree (asp->referenceArray);
6323 MemFree (asp);
6324 }
6325 }
6326 }
6327
6328 MemFree (ajp->sectionArray);
6329 MemFree (ajp->paragraphArray);
6330 MemFree (ajp->paragraphByIDs);
6331
6332 sip = iajp->pool;
6333 while (sip != NULL) {
6334 nxt = sip->next;
6335 MemFree (sip);
6336 sip = nxt;
6337 }
6338
6339 if (iajp->lockedBspList != NULL) {
6340 UnlockFarComponents (iajp->lockedBspList);
6341 }
6342
6343 if (iajp->gapvnp != NULL || iajp->remotevnp != NULL) {
6344 SeqMgrClearFeatureIndexes (ajp->entityID, NULL);
6345 if (iajp->reindex) {
6346 SeqMgrIndexFeaturesExEx (ajp->entityID, NULL, FALSE, FALSE, NULL);
6347 }
6348 }
6349
6350 if (iajp->gapvnp != NULL) {
6351 gapvnp = iajp->gapvnp;
6352 gapvnp->next = NULL; /* unlink in case remotevnp still linked after gapvnp */
6353 bsp = (BioseqPtr) gapvnp->data.ptrvalue;
6354 if (bsp != NULL) {
6355 sap = bsp->annot;
6356 while (sap != NULL) {
6357 sapnext = sap->next;
6358 SeqAnnotFree (sap);
6359 sap = sapnext;
6360 }
6361 }
6362 /* frees fake Bioseq that was created by MemNew, not BioseqNew */
6363 ValNodeFreeData (gapvnp);
6364 }
6365
6366 if (iajp->remotevnp != NULL) {
6367 remotevnp = iajp->remotevnp;
6368 remotefree = iajp->remotefree;
6369 if (remotefree != NULL) {
6370 /* if remotefree exists, it is responsible for all freeing */
6371 remotefree (remotevnp, iajp->remotedata);
6372 } else {
6373 /* otherwise free Bioseqs and ValNode chain ourselves */
6374 for (vnp = remotevnp; vnp != NULL; vnp = vnp->next) {
6375 bsp = (BioseqPtr) vnp->data.ptrvalue;
6376 if (bsp != NULL) {
6377 BioseqFree (bsp);
6378 }
6379 }
6380 ValNodeFree (remotevnp);
6381 }
6382 }
6383
6384 TextFsaFree (iajp->bad_html_fsa);
6385
6386 FreeUrlAnchorFSA ();
6387
6388 ValNodeFree (iajp->gihead);
6389
6390 free_buff ();
6391 FiniWWW (iajp);
6392
6393 MemFree (iajp);
6394
6395 return NULL;
6396 }
6397
6398 NLM_EXTERN Boolean SeqEntryToGnbk (
6399 SeqEntryPtr sep,
6400 SeqLocPtr slp,
6401 FmtType format,
6402 ModType mode,
6403 StlType style,
6404 FlgType flags,
6405 LckType locks,
6406 CstType custom,
6407 XtraPtr extra,
6408 FILE *fp
6409 )
6410
6411 {
6412 AsnIoPtr aip = NULL;
6413 AsnIoPtr aipfree = NULL;
6414 Asn2gbJobPtr ajp;
6415 AsnTypePtr atp = NULL;
6416 BioseqPtr bsp = NULL;
6417 BioseqSetPtr bssp = NULL;
6418 Boolean do_gbseq_asn = FALSE;
6419 Boolean do_gbseq_xml = FALSE;
6420 Asn2gbWriteFunc ffwrite = NULL;
6421 GBSeqPtr gbseq = NULL;
6422 GBSeq gbsq;
6423 IntAsn2gbJobPtr iajp;
6424 Boolean rsult = FALSE;
6425 Int1 type = ASNIO_TEXT_OUT;
6426 Pointer userdata = NULL;
6427 XtraBlock xtra;
6428 /*
6429 BaseBlockPtr bbp;
6430 BlockType block;
6431 CharPtr ffhead = NULL;
6432 CharPtr fftail = NULL;
6433 Int4 i;
6434 Boolean is_html;
6435 Int4 numParagraphs;
6436 BaseBlockPtr PNTR paragraphArray;
6437 CharPtr str;
6438 */
6439 #ifdef WIN_MAC
6440 #if __profile__
6441 ValNodePtr bsplist = NULL;
6442 Uint2 entityID;
6443 Boolean lockFarComp;
6444 Boolean lockFarLocs;
6445 Boolean lockFarProd;
6446 Boolean lookupFarComp;
6447 Boolean lookupFarHist;
6448 Boolean lookupFarInf;
6449 Boolean lookupFarLocs;
6450 Boolean lookupFarOthers;
6451 Boolean lookupFarProd;
6452 #endif
6453 #endif
6454
6455 if (extra != NULL) {
6456 ffwrite = extra->ffwrite;
6457 /*
6458 ffhead = extra->ffhead;
6459 fftail = extra->fftail;
6460 */
6461 gbseq = extra->gbseq;
6462 aip = extra->aip;
6463 atp = extra->atp;
6464 userdata = extra->userdata;
6465 }
6466 if (fp == NULL && ffwrite == NULL && aip == NULL) return FALSE;
6467 if (sep == NULL && slp == NULL) return FALSE;
6468 if (sep != NULL) {
6469 if (IS_Bioseq (sep)) {
6470 bsp = (BioseqPtr) sep->data.ptrvalue;
6471 } else if (IS_Bioseq_set (sep)) {
6472 bssp = (BioseqSetPtr) sep->data.ptrvalue;
6473 }
6474 }
6475
6476 #ifdef WIN_MAC
6477 #if __profile__
6478 /* this allows profiling of just the formatter, without feature indexing, on the Mac */
6479
6480 if (sep != NULL) {
6481 entityID = ObjMgrGetEntityIDForPointer (sep->data.ptrvalue);
6482 if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
6483 SeqMgrIndexFeatures (entityID, NULL);
6484 }
6485 }
6486
6487 lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
6488 lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
6489 lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
6490
6491 if (lockFarComp || lockFarLocs || lockFarProd) {
6492 locks = locks ^ (LOCK_FAR_COMPONENTS | LOCK_FAR_LOCATIONS | LOCK_FAR_PRODUCTS);
6493 if (slp != NULL && lockFarComp) {
6494 bsplist = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, slp);
6495 } else {
6496 bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
6497 }
6498 }
6499
6500 lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
6501 lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
6502 lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
6503 lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
6504 lookupFarInf = (Boolean) ((locks & LOOKUP_FAR_INFERENCE) != 0);
6505 lookupFarOthers = (Boolean) ((locks & LOOKUP_FAR_OTHERS) != 0);
6506
6507 if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist || lookupFarInf || lookupFarOthers) {
6508 locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY | LOOKUP_FAR_INFERENCE | LOOKUP_FAR_OTHERS);
6509 LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist, lookupFarInf, lookupFarOthers);
6510 }
6511
6512 ProfilerSetStatus (TRUE);
6513 #endif
6514 #endif
6515
6516 do_gbseq_xml = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_XML_GBSEQ_FILE);
6517 do_gbseq_asn = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_ASN_GBSEQ_FILE);
6518
6519 if (do_gbseq_xml || do_gbseq_asn) {
6520 if (fp != NULL && aip == NULL) {
6521 if (do_gbseq_xml) {
6522 type |= ASNIO_XML;
6523 }
6524 aip = AsnIoNew (type, fp, NULL, NULL, NULL);
6525 aipfree = aip;
6526 fp = NULL;
6527 }
6528 if (extra == NULL) {
6529 MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
6530 extra = &xtra;
6531 }
6532 if (extra->gbseq == NULL) {
6533 MemSet ((Pointer) &gbsq, 0, sizeof (GBSeq));
6534 extra->gbseq = &gbsq;
6535 gbseq = extra->gbseq;
6536 }
6537 }
6538
6539 /* pass TRUE for stream to do immediate write at time of creation for speed */
6540
6541 ajp = asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
6542 flags, locks, custom, extra,
6543 TRUE, fp, aip, atp);
6544
6545 if (ajp != NULL) {
6546 rsult = TRUE;
6547 iajp = (IntAsn2gbJobPtr) ajp;
6548
6549 #if 0
6550 /* if streaming, all output was written in setup function, otherwise output here */
6551
6552 if (! stream) {
6553
6554 /* send optional head string */
6555
6556 is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
6557 if (ffhead == NULL && is_html) {
6558 ffhead = defHead;
6559 }
6560 if (ffhead != NULL) {
6561 if (fp != NULL) {
6562 fprintf (fp, ffhead);
6563 }
6564 }
6565 if (ffwrite != NULL) {
6566 ffwrite (ffhead, userdata, HEAD_BLOCK, 0, 0, 0);
6567 }
6568
6569 /* send each paragraph */
6570
6571 numParagraphs = ajp->numParagraphs;
6572 paragraphArray = ajp->paragraphArray;
6573
6574 for (i = 0; i < numParagraphs; i++) {
6575 str = asn2gnbk_format (ajp, i);
6576 block = (BlockType) 0;
6577 if (paragraphArray != NULL) {
6578 bbp = paragraphArray [i];
6579 if (bbp != NULL) {
6580 block = bbp->blocktype;
6581 }
6582 }
6583 if (str != NULL) {
6584 if (fp != NULL) {
6585 fprintf (fp, "%s", str);
6586 }
6587 if (ffwrite != NULL) {
6588 ffwrite (str, userdata, block, 0, 0, 0);
6589 }
6590 } else {
6591 if (fp != NULL) {
6592 fprintf (fp, "?\n");
6593 }
6594 if (ffwrite != NULL) {
6595 ffwrite ("?\n", userdata, block, 0, 0, 0);
6596 }
6597 }
6598
6599 MemFree (str);
6600 }
6601
6602 /* send optional tail string */
6603
6604 if (fftail == NULL && is_html) {
6605 fftail = defTail;
6606 }
6607 if (fftail != NULL) {
6608 if (fp != NULL) {
6609 fprintf (fp, fftail);
6610 }
6611 }
6612 if (ffwrite != NULL) {
6613 ffwrite (fftail, userdata, TAIL_BLOCK, 0, 0, 0);
6614 }
6615 }
6616 #endif
6617
6618 /* if RELEASE_MODE, warn if unresolved gi numbers, missing translation, etc. */
6619
6620 if (iajp->relModeError && mode == RELEASE_MODE) {
6621 rsult = FALSE;
6622 }
6623
6624 asn2gnbk_cleanup (ajp);
6625 }
6626
6627 if (aipfree != NULL) {
6628 AsnIoFree (aipfree, FALSE);
6629 }
6630
6631 #ifdef WIN_MAC
6632 #if __profile__
6633 ProfilerSetStatus (FALSE);
6634
6635 UnlockFarComponents (bsplist);
6636 #endif
6637 #endif
6638
6639 return rsult;
6640 }
6641
6642 NLM_EXTERN Boolean BioseqToGnbk (
6643 BioseqPtr bsp,
6644 SeqLocPtr slp,
6645 FmtType format,
6646 ModType mode,
6647 StlType style,
6648 FlgType flags,
6649 LckType locks,
6650 CstType custom,
6651 XtraPtr extra,
6652 FILE *fp
6653 )
6654
6655 {
6656 SeqEntryPtr sep = NULL;
6657
6658 if (bsp == NULL && slp == NULL) return FALSE;
6659 if (bsp != NULL) {
6660 sep = SeqMgrGetSeqEntryForData (bsp);
6661 }
6662 return SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, fp);
6663 }
6664
6665
6666 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |