|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/api/asn2gnb2.c |
source navigation diff markup identifier search freetext search file search |
1 /* asn2gnb2.c
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name: asn2gnb2.c
27 *
28 * Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
29 * Mati Shomrat
30 *
31 * Version Creation Date: 10/21/98
32 *
33 * $Revision: 1.131 $
34 *
35 * File Description: New GenBank flatfile generator - work in progress
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * ==========================================================================
40 */
41
42 #include <ncbi.h>
43 #include <objall.h>
44 #include <objsset.h>
45 #include <objsub.h>
46 #include <objfdef.h>
47 #include <objpubme.h>
48 #include <seqport.h>
49 #include <sequtil.h>
50 #include <sqnutils.h>
51 #include <subutil.h>
52 #include <tofasta.h>
53 #include <explore.h>
54 #include <gbfeat.h>
55 #include <gbftdef.h>
56 #include <edutil.h>
57 #include <alignmgr2.h>
58 #include <asn2gnbi.h>
59
60 #ifdef WIN_MAC
61 #if __profile__
62 #include <Profiler.h>
63 #endif
64 #endif
65
66 static CharPtr link_projid = "http://www.ncbi.nlm.nih.gov/sites/entrez?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=";
67
68 static CharPtr link_wgs = "http://www.ncbi.nlm.nih.gov/sites/entrez?";
69
70 static CharPtr link_sp = "http://www.uniprot.org/uniprot/";
71
72 /*
73 static CharPtr link_featn = "http://www.ncbi.nlm.nih.gov/nuccore/";
74 static CharPtr link_featp = "http://www.ncbi.nlm.nih.gov/protein/";
75 */
76
77 static CharPtr link_seqn = "http://www.ncbi.nlm.nih.gov/nuccore/";
78 static CharPtr link_seqp = "http://www.ncbi.nlm.nih.gov/protein/";
79
80 /*
81 static CharPtr link_omim = "http://www.ncbi.nlm.nih.gov/entrez/dispomim.cgi?id=";
82 */
83
84
85
86 /* ********************************************************************** */
87
88 /* add functions allocate specific blocks, populate with paragraph print info */
89
90 static CharPtr strd [4] = {
91 " ", "ss-", "ds-", "ms-"
92 };
93
94 static CharPtr gnbk_mol [16] = {
95 " ", "DNA ", "RNA ", "mRNA", "rRNA", "tRNA", "snRNA", "scRNA",
96 " AA ", "DNA ", "DNA ", "cRNA ", "snoRNA", "RNA ", "RNA ", "tmRNA "
97 };
98
99 /* EMBL_FMT in RELEASE_MODE or ENTREZ_MODE, otherwise use gnbk_mol */
100
101 static CharPtr embl_mol [16] = {
102 "xxx", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA", "RNA",
103 "AA ", "DNA", "DNA", "RNA", "RNA", "RNA", "RNA", "RNA"
104 };
105
106 static CharPtr embl_divs [18] = {
107 "FUN", "INV", "MAM", "ORG", "PHG", "PLN", "PRI", "PRO", "ROD"
108 "SYN", "UNA", "VRL", "VRT", "PAT", "EST", "STS", "HUM", "HTC"
109 };
110
111 static Uint1 imolToMoltype [16] = {
112 0, 1, 2, 5, 4, 3, 6, 7, 9, 1, 1, 2, 8, 2, 10, 11
113 };
114
115 static CharPtr gbseq_strd [4] = {
116 NULL, "single", "double", "mixed"
117 };
118
119 static CharPtr gbseq_mol [16] = {
120 "?", "DNA", "RNA", "mRNA", "rRNA", "tRNA", "snRNA", "scRNA",
121 "AA", "DNA", "DNA", "cRNA", "snoRNA", "RNA", "ncRNA", "tmRNA "
122 };
123
124 static CharPtr gbseq_top [3] = {
125 NULL, "linear", "circular"
126 };
127
128 static DatePtr GetBestDate (
129 DatePtr a,
130 DatePtr b
131 )
132
133 {
134 Int2 status;
135
136 if (a == NULL) return b;
137 if (b == NULL) return a;
138
139 status = DateMatch (a, b, FALSE);
140 if (status == 1) return a;
141
142 return b;
143 }
144
145 /*--------------------------------------------------------*/
146 /* */
147 /* s_IsSeperatorNeeded() */
148 /* */
149 /*--------------------------------------------------------*/
150
151 static Boolean s_IsSeperatorNeeded(CharPtr baseString, Int4 baseLength, Int2 suffixLength)
152 {
153 Char lastChar;
154 Char nextToLastChar;
155
156 lastChar = baseString[baseLength - 1];
157 nextToLastChar = baseString[baseLength - 2];
158
159 /* This first check put here to emulate what may be a */
160 /* bug in the original code (in CheckLocusLength() ) */
161 /* which adds an 'S' segment seperator only if it */
162 /* DOES make the string longer than the max. */
163
164 if (baseLength + suffixLength < 16)
165 return FALSE;
166
167 /* If the last character is not a digit */
168 /* then don't use a seperator. */
169
170 if (!IS_DIGIT(lastChar))
171 return FALSE;
172
173 /* If the last two characters are a non-digit */
174 /* followed by a '0', then don't use seperator. */
175
176 if ((lastChar == '0') && (!IS_DIGIT(nextToLastChar)))
177 return FALSE;
178
179 /* If we made it to here, use a seperator */
180
181 return TRUE;
182 }
183
184 /*--------------------------------------------------------*/
185 /* */
186 /* s_LocusAddSuffix() - */
187 /* */
188 /*--------------------------------------------------------*/
189
190 static Boolean s_LocusAddSuffix (CharPtr locus, Asn2gbWorkPtr awp)
191 {
192 size_t buflen;
193 Char ch;
194 Char segCountStr[6];
195 Int2 segCountStrLen;
196 Char segSuffix[5];
197
198 buflen = StringLen (locus);
199
200 /* If there's one or less segments, */
201 /* no suffix is needed. */
202
203 if (awp->numsegs <= 1)
204 return FALSE;
205
206 /* If the basestring has one or less */
207 /* characters, no suffix is needed. */
208
209 if (buflen <=1)
210 return FALSE;
211
212 /* Add the suffix */
213
214 ch = locus[buflen-1];
215 sprintf(segCountStr,"%d",awp->numsegs);
216 segCountStrLen = StringLen(segCountStr);
217 segSuffix[0] = '\0';
218
219 if (s_IsSeperatorNeeded(locus,buflen,segCountStrLen) == TRUE)
220 sprintf(segSuffix,"S%0*d",segCountStrLen,awp->seg);
221 else
222 sprintf(segSuffix,"%0*d",segCountStrLen,awp->seg);
223 StringCat(locus,segSuffix);
224
225 /* Return successfully */
226
227 return TRUE;
228 }
229
230 /*--------------------------------------------------------*/
231 /* */
232 /* s_LocusAdjustLength() - */
233 /* */
234 /*--------------------------------------------------------*/
235
236 static Boolean s_LocusAdjustLength(CharPtr locus, Int2 maxLength)
237 {
238 Int2 trimCount;
239 Int2 buflen;
240 CharPtr buftmp;
241
242 buflen = StringLen (locus);
243 if (buflen <= maxLength) return FALSE;
244
245 buftmp = MemNew(maxLength + 1);
246
247 /* If the sequence id is an NCBI locus of the */
248 /* form HSU00001, then make sure that if */
249 /* there is trimming the HS gets trimmed off */
250 /* as a unit, never just the 'H'. */
251
252 trimCount = buflen - maxLength;
253 if (trimCount == 1)
254 if (IS_ALPHA(locus[0]) != 0 &&
255 IS_ALPHA(locus[1]) != 0 &&
256 IS_ALPHA(locus[2]) != 0 &&
257 IS_DIGIT(locus[3]) != 0 &&
258 IS_DIGIT(locus[4]) != 0 &&
259 IS_DIGIT(locus[5]) != 0 &&
260 IS_DIGIT(locus[6]) != 0 &&
261 IS_DIGIT(locus[7]) != 0 &&
262 locus[8] == 'S' &&
263 locus[9] == '\0')
264 trimCount++;
265
266 /* Left truncate the sequence id */
267
268 StringCpy(buftmp, &locus[trimCount]);
269 StringCpy(locus, buftmp);
270
271 MemFree(buftmp);
272 return TRUE;
273 }
274
275 /*--------------------------------------------------------*/
276 /* */
277 /* AddLocusBlock() - */
278 /* */
279 /*--------------------------------------------------------*/
280
281 static DatePtr GetBestDateForBsp (
282 BioseqPtr bsp
283 )
284
285 {
286 DatePtr best_date = NULL;
287 SeqMgrDescContext dcontext;
288 DatePtr dp;
289 EMBLBlockPtr ebp;
290 GBBlockPtr gbp;
291 PdbBlockPtr pdp;
292 PdbRepPtr prp;
293 SeqDescrPtr sdp;
294 SPBlockPtr spp;
295
296 if (bsp == NULL) return NULL;
297
298 dp = NULL;
299 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
300 if (sdp != NULL) {
301 dp = (DatePtr) sdp->data.ptrvalue;
302 best_date = GetBestDate (dp, best_date);
303 }
304
305 /* !!! temporarily also look at genbank block entry date !!! */
306
307 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
308 if (sdp != NULL) {
309 gbp = (GBBlockPtr) sdp->data.ptrvalue;
310 if (gbp != NULL) {
311 dp = gbp->entry_date;
312 best_date = GetBestDate (dp, best_date);
313 }
314 }
315
316 /* more complicated code for dates from various objects goes here */
317
318 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
319 if (sdp != NULL) {
320 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
321 if (ebp != NULL) {
322 dp = ebp->creation_date;
323 best_date = GetBestDate (dp, best_date);
324 dp = ebp->update_date;
325 best_date = GetBestDate (dp, best_date);
326 }
327 }
328
329 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
330 if (sdp != NULL) {
331 spp = (SPBlockPtr) sdp->data.ptrvalue;
332 if (spp != NULL) {
333 dp = spp->created;
334 if (dp != NULL && dp->data [0] == 1) {
335 best_date = GetBestDate (dp, best_date);
336 }
337 dp = spp->sequpd;
338 if (dp != NULL && dp->data [0] == 1) {
339 best_date = GetBestDate (dp, best_date);
340 }
341 dp = spp->annotupd;
342 if (dp != NULL && dp->data [0] == 1) {
343 best_date = GetBestDate (dp, best_date);
344 }
345 }
346 }
347
348 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
349 if (sdp != NULL) {
350 pdp = (PdbBlockPtr) sdp->data.ptrvalue;
351 if (pdp != NULL) {
352 dp = pdp->deposition;
353 if (dp != NULL && dp->data [0] == 1) {
354 best_date = GetBestDate (dp, best_date);
355 }
356 prp = pdp->replace;
357 if (prp != NULL) {
358 dp = prp->date;
359 if (dp != NULL && dp->data[0] == 1) {
360 best_date = GetBestDate (dp, best_date);
361 }
362 }
363 }
364 }
365
366 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
367 if (sdp != NULL) {
368 dp = (DatePtr) sdp->data.ptrvalue;
369 if (dp != NULL) {
370 best_date = GetBestDate (dp, best_date);
371 }
372 }
373
374 return best_date;
375 }
376
377 static Boolean LocusHasBadChars (
378 CharPtr locus
379 )
380
381 {
382 Char ch;
383 CharPtr ptr;
384
385 ptr = locus;
386 ch = *ptr;
387 while (ch != '\0') {
388 if (! IS_ALPHANUM (ch)) {
389 return TRUE;
390 }
391 ptr++;
392 ch = *ptr;
393 }
394 return FALSE;
395 }
396
397 static void LookupAccnForNavLink (
398 Int4 gi,
399 CharPtr seqid,
400 size_t len,
401 CharPtr dfault
402 )
403
404 {
405 SeqIdPtr sip;
406
407 if (seqid == NULL) return;
408 *seqid = '\0';
409 if (gi > 0) {
410 if (GetAccnVerFromServer (gi, seqid)) return;
411 sip = GetSeqIdForGI (gi);
412 if (sip != NULL) {
413 if (SeqIdWrite (sip, seqid, PRINTID_TEXTID_ACC_VER, len) != NULL) {
414 SeqIdFree (sip);
415 return;
416 }
417 SeqIdFree (sip);
418 }
419 }
420 if (dfault == NULL) return;
421 StringCpy (seqid, dfault);
422 }
423
424 NLM_EXTERN void AddLocusBlock (
425 Asn2gbWorkPtr awp,
426 Boolean willshowwgs,
427 Boolean willshowcage,
428 Boolean willshowgenome,
429 Boolean willshowcontig,
430 Boolean willshowsequence
431 )
432
433 {
434 size_t acclen;
435 IntAsn2gbJobPtr ajp;
436 Asn2gbSectPtr asp;
437 BaseBlockPtr bbp;
438 DatePtr best_date = NULL;
439 BioSourcePtr biop;
440 Int2 bmol = 0;
441 BioseqPtr bsp;
442 Char buf [1024];
443 Boolean cagemaster = FALSE;
444 SeqFeatPtr cds;
445 Int4 currGi;
446 Char dataclass [10];
447 Char date [40];
448 SeqMgrDescContext dcontext;
449 Char div [10];
450 BioseqPtr dna;
451 DatePtr dp;
452 CharPtr ebmol;
453 EMBLBlockPtr ebp;
454 Char embldiv [10];
455 SeqMgrFeatContext fcontext;
456 StringItemPtr ffstring;
457 GBBlockPtr gbp;
458 Char gene [32];
459 Boolean genome_view;
460 GBSeqPtr gbseq;
461 ValNodePtr gilistpos;
462 SeqIdPtr gpp = NULL;
463 Boolean has_next_pref_ul = FALSE;
464 Boolean hasComment;
465 Char id [41];
466 Int2 imol = 0;
467 IndxPtr index;
468 Int2 istrand;
469 Boolean is_nm = FALSE;
470 Boolean is_np = FALSE;
471 Boolean is_nz = FALSE;
472 Boolean is_env_sample = FALSE;
473 Boolean is_transgenic = FALSE;
474 Boolean is_tpa = FALSE;
475 Char len [32];
476 Int4 length;
477 size_t loclen;
478 Char locus [41];
479 MolInfoPtr mip;
480 Char mol [64];
481 Int4 nextGi;
482 BioseqPtr nm = NULL;
483 BioseqPtr nuc;
484 ObjectIdPtr oip;
485 OrgNamePtr onp;
486 Uint1 origin;
487 OrgRefPtr orp;
488 BioseqPtr parent;
489 Int4 prevGi;
490 SeqDescrPtr sdp;
491 Char sect [128];
492 Char seg [32];
493 Char seqid [128];
494 SeqFeatPtr sfp;
495 SeqHistPtr hist;
496 SeqIdPtr sip;
497 SubSourcePtr ssp;
498 CharPtr str;
499 CharPtr suffix = NULL;
500 Uint1 tech;
501 Uint1 topology;
502 TextSeqIdPtr tsip;
503 UserObjectPtr uop;
504 Char ver [16];
505 Int2 version;
506 ValNodePtr vnp;
507 Boolean wgsmaster = FALSE;
508 Int2 moltype, strandedness, topol;
509 /*
510 Int4 gi = 0;
511 Char gi_buf [16];
512 Boolean is_aa;
513 CharPtr prefix = NULL;
514 */
515
516 if (awp == NULL) return;
517 ajp = awp->ajp;
518 if (ajp == NULL) return;
519 bsp = awp->bsp;
520 if (bsp == NULL) return;
521 asp = awp->asp;
522 if (asp == NULL) return;
523
524 bbp = Asn2gbAddBlock (awp, LOCUS_BLOCK, sizeof (BaseBlock));
525 if (bbp == NULL) return;
526
527 ffstring = FFGetString(ajp);
528 if ( ffstring == NULL ) return;
529
530 mol [0] = '\0';
531 len [0] = '\0';
532 div [0] = '\0';
533 embldiv [0] = '\0';
534 dataclass [0] = '\0';
535 date [0] = '\0';
536 gene [0] = '\0';
537
538 genome_view = FALSE;
539 if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
540 genome_view = TRUE;
541
542 }
543 if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
544 genome_view = TRUE;
545 }
546
547 /* locus id */
548
549 sip = NULL;
550 version = 0;
551 for (sip = bsp->id; sip != NULL; sip = sip->next) {
552 if (sip->choice == SEQID_OTHER) {
553 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
554 if (tsip != NULL) {
555 version = tsip->version;
556 if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
557 StringNCmp (tsip->accession, "NR_", 3) == 0 ||
558 StringNCmp (tsip->accession, "XM_", 3) == 0 ||
559 StringNCmp (tsip->accession, "XR_", 3) == 0) {
560 is_nm = TRUE;
561 nm = bsp;
562 } else if (StringNCmp (tsip->accession, "NP_", 3) == 0 ||
563 StringNCmp (tsip->accession, "XP_", 3) == 0) {
564 is_np = TRUE;
565 } else if (StringNCmp (tsip->accession, "NZ_", 3) == 0) {
566 is_nz = TRUE;
567 }
568 }
569 break;
570 }
571 if (sip->choice == SEQID_GENBANK ||
572 sip->choice == SEQID_EMBL ||
573 sip->choice == SEQID_DDBJ) {
574 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
575 if (tsip != NULL) {
576 version = tsip->version;
577 }
578 break;
579 }
580 if (sip->choice == SEQID_TPG ||
581 sip->choice == SEQID_TPE ||
582 sip->choice == SEQID_TPD) {
583 is_tpa = TRUE;
584 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
585 if (tsip != NULL) {
586 version = tsip->version;
587 }
588 break;
589 }
590 if (sip->choice == SEQID_PIR ||
591 sip->choice == SEQID_SWISSPROT ||
592 sip->choice == SEQID_PRF ||
593 sip->choice == SEQID_PDB) break;
594 if (sip->choice == SEQID_GPIPE) {
595 gpp = sip;
596 }
597 }
598 if (sip == NULL) {
599 sip = gpp;
600 }
601 if (sip == NULL) {
602 sip = SeqIdFindBest (bsp->id, SEQID_GENBANK);
603 }
604 sprintf (ver, "%d", (int) version);
605
606 if (genome_view) {
607 SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
608 } else {
609 SeqIdWrite (sip, locus, PRINTID_TEXTID_LOCUS, sizeof (locus) - 1);
610 if (LocusHasBadChars (locus)) {
611 SeqIdWrite (sip, locus, PRINTID_TEXTID_ACCESSION, sizeof (locus) - 1);
612 }
613 }
614
615 if (is_np) {
616 sfp = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
617 if (sfp != NULL && fcontext.bsp != NULL) {
618 nm = fcontext.bsp;
619 for (sip = nm->id; sip != NULL; sip = sip->next) {
620 if (sip->choice == SEQID_OTHER) {
621 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
622 if (tsip != NULL) {
623 if (StringNCmp (tsip->accession, "NM_", 3) == 0 ||
624 StringNCmp (tsip->accession, "XM_", 3) == 0) {
625 is_nm = TRUE;
626 }
627 }
628 }
629 }
630 if (! is_nm) {
631 nm = NULL;
632 }
633 }
634 }
635 if (nm != NULL) {
636 /*
637 sfp = SeqMgrGetNextFeature (nm, NULL, SEQFEAT_GENE, 0, &fcontext);
638 if (sfp != NULL) {
639 StringNCpy_0 (gene, fcontext.label, sizeof (gene));
640 if (SeqMgrGetNextFeature (nm, sfp, SEQFEAT_GENE, 0, &fcontext) != NULL) {
641 gene [0] = '\0';
642 }
643 if (StringLen (gene) > 15) {
644 gene [0] = '\0';
645 }
646 }
647 */
648 }
649
650 /* more complicated code to get parent locus, if segmented, goes here */
651
652 if (awp->slp != NULL) {
653 length = SeqLocLen (awp->slp);
654 } else {
655 length = bsp->length;
656 }
657
658 mip = NULL;
659 tech = MI_TECH_standard;
660 origin = 0;
661 bmol = bsp->mol;
662 if (bmol > Seq_mol_aa) {
663 bmol = 0;
664 }
665 istrand = bsp->strand;
666 if (istrand > 3) {
667 istrand = 0;
668 }
669
670 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
671 if (sdp != NULL) {
672 bbp->entityID = dcontext.entityID;
673 bbp->itemID = dcontext.itemID;
674 bbp->itemtype = OBJ_SEQDESC;
675
676 mip = (MolInfoPtr) sdp->data.ptrvalue;
677 if (mip != NULL) {
678 if (mip->biomol <= MOLECULE_TYPE_TMRNA) {
679 imol = (Int2) mip->biomol;
680 }
681 tech = mip->tech;
682
683 if (tech == MI_TECH_wgs && bsp->repr == Seq_repr_virtual) {
684
685 /* check for WGS master record */
686
687 for (sip = bsp->id; sip != NULL; sip = sip->next) {
688 switch (sip->choice) {
689 case SEQID_GENBANK :
690 case SEQID_EMBL :
691 case SEQID_DDBJ :
692 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
693 if (tsip != NULL && tsip->accession != NULL) {
694 acclen = StringLen (tsip->accession);
695 if (acclen == 12) {
696 if (StringCmp (tsip->accession + 6, "000000") == 0) {
697 wgsmaster = TRUE;
698 }
699 } else if (acclen == 13) {
700 if (StringCmp (tsip->accession + 6, "0000000") == 0) {
701 wgsmaster = TRUE;
702 }
703 }
704 }
705 break;
706 case SEQID_OTHER :
707 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
708 if (tsip != NULL && tsip->accession != NULL) {
709 if (StringLen (tsip->accession) == 15) {
710 if (StringCmp (tsip->accession + 9, "000000") == 0) {
711 wgsmaster = TRUE;
712 }
713 }
714 }
715 break;
716 default :
717 break;
718 }
719 }
720 }
721
722 if (tech == MI_TECH_other && willshowcage && bsp->repr == Seq_repr_virtual) {
723
724 /* check for TAG master record */
725
726 for (sip = bsp->id; sip != NULL; sip = sip->next) {
727 switch (sip->choice) {
728 case SEQID_GENBANK :
729 case SEQID_EMBL :
730 case SEQID_DDBJ :
731 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
732 if (tsip != NULL && tsip->accession != NULL) {
733 acclen = StringLen (tsip->accession);
734 if (acclen == 12) {
735 if (StringCmp (tsip->accession + 5, "0000000") == 0) {
736 cagemaster = TRUE;
737 }
738 }
739 }
740 break;
741 default :
742 break;
743 }
744 }
745 }
746 }
747 }
748
749 /* check inst.mol if mol-type is not-set or genomic */
750
751 if (imol <= MOLECULE_TYPE_GENOMIC) {
752 if (bmol == Seq_mol_aa) {
753 imol = MOLECULE_TYPE_PEPTIDE;
754 } else if (bmol == Seq_mol_na) {
755 imol = 0;
756 } else if (bmol == Seq_mol_rna) {
757 imol = 2;
758 } else {
759 imol = 1;
760 }
761 } else if (imol == MOLECULE_TYPE_OTHER_GENETIC_MATERIAL) {
762 if (bmol == Seq_mol_rna) {
763 imol = 2;
764 }
765 }
766
767 /* if ds-DNA don't show ds */
768
769 if (bmol == Seq_mol_dna && istrand == 2) {
770 istrand = 0;
771 }
772
773 /* ss=any RNA don't show ss */
774
775 if ((bmol > Seq_mol_rna ||
776 (imol >= MOLECULE_TYPE_MRNA && imol <= MOLECULE_TYPE_PEPTIDE) ||
777 (imol >= MOLECULE_TYPE_CRNA && imol <= MOLECULE_TYPE_TMRNA)) &&
778 istrand == 1) {
779 istrand = 0;
780 }
781
782 topology = bsp->topology;
783 if (awp->slp != NULL) {
784 topology = TOPOLOGY_LINEAR;
785 }
786
787 /* length, topology, and molecule type */
788
789 if (awp->format == GENBANK_FMT) {
790
791 if (awp->newLocusLine) {
792
793 if (wgsmaster && (! is_nz)) {
794 sprintf (len, "%ld rc", (long) length);
795 } else if (cagemaster) {
796 sprintf (len, "%ld rc", (long) length);
797 } else {
798 sprintf (len, "%ld bp", (long) length);
799 }
800 sprintf (mol, "%s%-4s", strd [istrand], gnbk_mol [imol]);
801
802 } else {
803
804 if (topology == TOPOLOGY_CIRCULAR) {
805 sprintf (len, "%7ld bp", (long) length);
806 sprintf (mol, "%s%-4s circular", strd [istrand], gnbk_mol [imol]);
807 } else {
808 sprintf (len, "%7ld bp", (long) length);
809 sprintf (mol, "%s%-4s ", strd [istrand], gnbk_mol [imol]);
810 }
811 }
812
813 } else if (awp->format == GENPEPT_FMT) {
814
815 if (awp->newLocusLine) {
816 sprintf (len, "%ld aa", (long) length);
817 } else {
818 sprintf (len, "%7ld aa", (long) length);
819 }
820
821 } else if (awp->format == EMBL_FMT) {
822
823 if (imol < MOLECULE_TYPE_PEPTIDE) {
824 if (ajp->flags.useEmblMolType) {
825 ebmol = embl_mol [imol];
826 } else {
827 ebmol = gnbk_mol [imol];
828 }
829
830 if (topology == TOPOLOGY_CIRCULAR) {
831 sprintf (mol, "circular %s", ebmol);
832 sprintf (len, "%ld BP.", (long) length);
833 } else {
834 sprintf (mol, "%s", ebmol);
835 sprintf (len, "%ld BP.", (long) length);
836 }
837 }
838 }
839
840 /* division */
841
842 biop = NULL;
843 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
844 if (sdp != NULL) {
845 biop = (BioSourcePtr) sdp->data.ptrvalue;
846 } else {
847 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
848 if (sfp != NULL) {
849 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
850 } else if (ISA_aa (bsp->mol)) {
851
852 /* if protein with no sources, get sources applicable to DNA location of CDS */
853
854 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
855 if (cds != NULL) {
856 sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
857 if (sfp != NULL) {
858 biop = (BioSourcePtr) sfp->data.value.ptrvalue;
859 } else {
860 dna = BioseqFindFromSeqLoc (cds->location);
861 if (dna != NULL) {
862 sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
863 if (sdp != NULL) {
864 biop = (BioSourcePtr) sdp->data.ptrvalue;
865 }
866 }
867 }
868 }
869 }
870 }
871 if (biop != NULL) {
872 origin = biop->origin;
873 orp = biop->org;
874 if (orp != NULL) {
875 onp = orp->orgname;
876 if (onp != NULL) {
877 StringNCpy_0 (div, onp->div, sizeof (div));
878 StringNCpy_0 (embldiv, onp->div, sizeof (embldiv));
879 }
880 }
881 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
882 if (ssp->subtype == SUBSRC_transgenic) {
883 is_transgenic = TRUE;
884 } else if (ssp->subtype == SUBSRC_environmental_sample) {
885 is_env_sample = TRUE;
886 }
887 }
888 }
889
890 StringCpy (dataclass, "STD");
891 if (is_tpa) {
892 StringCpy (dataclass, "TPA");
893 }
894
895 switch (tech) {
896 case MI_TECH_est :
897 StringCpy (div, "EST");
898 StringCpy (dataclass, "EST");
899 break;
900 case MI_TECH_sts :
901 StringCpy (div, "STS");
902 StringCpy (dataclass, "STS");
903 break;
904 case MI_TECH_survey :
905 StringCpy (div, "GSS");
906 StringCpy (dataclass, "GSS");
907 break;
908 case MI_TECH_htgs_0 :
909 case MI_TECH_htgs_1 :
910 case MI_TECH_htgs_2 :
911 StringCpy (div, "HTG");
912 StringCpy (dataclass, "HTG");
913 break;
914 case MI_TECH_htc :
915 StringCpy (div, "HTC");
916 StringCpy (dataclass, "HTC");
917 break;
918 case MI_TECH_tsa :
919 StringCpy (div, "TSA");
920 StringCpy (dataclass, "TSA");
921 break;
922 default :
923 break;
924 }
925
926 if (origin == ORG_MUT ||
927 origin == ORG_ARTIFICIAL ||
928 origin == ORG_SYNTHETIC ||
929 is_transgenic) {
930 StringCpy (div, "SYN");
931 StringCpy (embldiv, "SYN");
932 } else if (is_env_sample) {
933 if (tech == MI_TECH_unknown ||
934 tech == MI_TECH_standard ||
935 tech == MI_TECH_other ||
936 tech == MI_TECH_htgs_3) {
937 StringCpy (div, "ENV");
938 StringCpy (embldiv, "ENV");
939 }
940 }
941
942 if (is_transgenic && tech == MI_TECH_survey) {
943 StringCpy (div, "GSS");
944 StringCpy (dataclass, "GSS");
945 }
946
947 sip = SeqIdFindBest (bsp->id, SEQID_PATENT);
948 if (sip != NULL && sip->choice == SEQID_PATENT) {
949 StringCpy (div, "PAT");
950 StringCpy (dataclass, "PAT");
951 }
952
953 /* if protein is encoded by a patent nucleotide, use PAT division */
954
955 if (ISA_aa (bsp->mol)) {
956 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
957 if (cds != NULL) {
958 nuc = BioseqFindFromSeqLoc (cds->location);
959 if (nuc != NULL) {
960 for (sip = nuc->id; sip != NULL; sip = sip->next) {
961 if (sip->choice == SEQID_PATENT) {
962 StringCpy (div, "PAT");
963 StringCpy (dataclass, "PAT");
964 }
965 }
966 }
967 }
968 }
969
970 /* more complicated code for division, if necessary, goes here */
971
972 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
973 while (sdp != NULL) {
974 gbp = (GBBlockPtr) sdp->data.ptrvalue;
975 if (gbp != NULL) {
976 if (StringHasNoText (div) && gbp->div != NULL) {
977 StringCpy (div, gbp->div);
978 StringCpy (embldiv, gbp->div);
979 } else if (StringCmp(gbp->div, "PAT") == 0) {
980 StringCpy (div, gbp->div);
981 StringCpy (dataclass, gbp->div);
982 } else if (StringCmp(gbp->div, "SYN") == 0 ) {
983 StringCpy (div, gbp->div);
984 StringCpy (embldiv, gbp->div);
985 }
986 }
987 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_genbank, &dcontext);
988 }
989
990 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
991
992 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_embl, &dcontext);
993 if (sdp != NULL) {
994 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
995 if (ebp != NULL) {
996 if (ebp->div == 255) {
997 if (mip == NULL) {
998 StringCpy (div, "HUM");
999 StringCpy (embldiv, "HUM");
1000 }
1001 } else if (ebp->div < 18) {
1002 StringCpy (div, embl_divs [ebp->div]);
1003 StringCpy (embldiv, embl_divs [ebp->div]);
1004 }
1005 }
1006 }
1007
1008 if (StringHasNoText (div)) {
1009 StringCpy (div, "UNA");
1010 StringCpy (embldiv, "UNA");
1011 }
1012 }
1013
1014 /* empty division field if unable to find anything */
1015
1016 if (StringHasNoText (div)) {
1017 StringCpy (div, " ");
1018 }
1019 if (StringHasNoText (embldiv)) {
1020 StringCpy (embldiv, " ");
1021 }
1022
1023 /* contig style (old genome_view flag) forces CON division */
1024
1025 if (awp->contig) {
1026 StringCpy (div, "CON");
1027 StringCpy (dataclass, "CON");
1028 }
1029
1030 if (genome_view) {
1031 StringCpy (div, "CON");
1032 StringCpy (dataclass, "CON");
1033 }
1034
1035 if (StringCmp (dataclass, "CON") == 0) {
1036 if (DeltaLitOnly (bsp)) {
1037 if (SeqMgrGetNextFeature (bsp, NULL, 0, 0, &fcontext) != NULL) {
1038 StringCpy (dataclass, "ANN");
1039 }
1040 }
1041 }
1042
1043 /* date */
1044
1045 best_date = GetBestDateForBsp (bsp);
1046
1047 if (best_date == NULL) {
1048
1049 /* if bsp is product of CDS or mRNA feature, get date from sfp->location bsp */
1050
1051 sfp = NULL;
1052 if (ISA_na (bsp->mol)) {
1053 sfp = SeqMgrGetRNAgivenProduct (bsp, NULL);
1054 } else if (ISA_aa (bsp->mol)) {
1055 sfp = SeqMgrGetCDSgivenProduct (bsp, NULL);
1056 }
1057 if (sfp != NULL) {
1058 parent = BioseqFindFromSeqLoc (sfp->location);
1059 if (parent != NULL) {
1060 best_date = GetBestDateForBsp (parent);
1061 }
1062 }
1063 }
1064
1065 /* convert best date */
1066
1067 if (best_date != NULL) {
1068 DateToFF (date, best_date, FALSE);
1069 }
1070 if (StringHasNoText (date)) {
1071 StringCpy (date, "01-JAN-1900");
1072 }
1073
1074 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1075
1076 /* Create the proper locus name */
1077
1078 parent = awp->parent;
1079 if (parent->repr == Seq_repr_seg) {
1080
1081 if (! StringHasNoText (awp->basename)) {
1082 StringCpy (locus, awp->basename);
1083 s_LocusAddSuffix (locus, awp);
1084 }
1085 }
1086
1087 /* Print the "LOCUS_NEW" line, if requested */
1088
1089 if (awp->newLocusLine) {
1090
1091 FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1092 parent = awp->parent;
1093
1094 if (parent->repr == Seq_repr_seg)
1095 s_LocusAdjustLength (locus,16);
1096
1097 if (is_nm && (! StringHasNoText (gene))) {
1098 FFAddOneString (ffstring, gene, FALSE, FALSE, TILDE_IGNORE);
1099 } else {
1100 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1101 }
1102 FFAddNChar(ffstring, ' ', 43 - StringLen(len)- ffstring->curr->pos, FALSE);
1103 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1104 FFAddNChar(ffstring, ' ', 44 - ffstring->curr->pos, FALSE);
1105 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1106 FFAddNChar(ffstring, ' ', 55 - ffstring->curr->pos, FALSE);
1107 if (topology == TOPOLOGY_CIRCULAR) {
1108 FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1109 } else {
1110 FFAddOneString (ffstring, "linear ", FALSE, FALSE, TILDE_IGNORE);
1111 }
1112 FFAddNChar(ffstring, ' ', 64 - ffstring->curr->pos, FALSE);
1113 FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1114 FFAddNChar(ffstring, ' ', 68 - ffstring->curr->pos, FALSE);
1115 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1116 }
1117
1118 /* Else print the "LOCUS" line */
1119
1120 else {
1121
1122 FFStartPrint (ffstring, awp->format, 0, 0, "LOCUS", 12, 0, 0, NULL, FALSE);
1123
1124 if (parent->repr == Seq_repr_seg)
1125 s_LocusAdjustLength (locus,16);
1126
1127 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1128 FFAddNChar(ffstring, ' ', 32 - StringLen(len) - ffstring->curr->pos, FALSE);
1129 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1130 FFAddNChar(ffstring, ' ', 33 - ffstring->curr->pos, FALSE);
1131 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1132 FFAddNChar(ffstring, ' ', 52 - ffstring->curr->pos, FALSE);
1133 FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1134 FFAddNChar(ffstring, ' ', 62 - ffstring->curr->pos, FALSE);
1135 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
1136 }
1137
1138 } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1139
1140 if (awp->newLocusLine) {
1141
1142 str = GetMolTypeQual (bsp);
1143 if (str == NULL) {
1144 switch (bsp->mol) {
1145 case Seq_mol_dna :
1146 str = "unassigned DNA";
1147 break;
1148 case Seq_mol_rna :
1149 str = "unassigned RNA";
1150 break;
1151 case Seq_mol_aa :
1152 break;
1153 default :
1154 str = "unassigned DNA";
1155 break;
1156 }
1157 }
1158 if (StringCmp (str, "viral cRNA") == 0) {
1159 str = "other RNA";
1160 }
1161 if (StringICmp (str, "ncRNA") == 0) {
1162 str = "RNA";
1163 }
1164 StringNCpy_0 (mol, str, sizeof (mol));
1165
1166 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1167
1168 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1169 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1170 FFAddOneString (ffstring, "SV ", FALSE, FALSE, TILDE_IGNORE);
1171 FFAddOneString (ffstring, ver, FALSE, FALSE, TILDE_IGNORE);
1172 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1173 if (topology == TOPOLOGY_CIRCULAR) {
1174 FFAddOneString (ffstring, "circular", FALSE, FALSE, TILDE_IGNORE);
1175 } else {
1176 FFAddOneString (ffstring, "linear", FALSE, FALSE, TILDE_IGNORE);
1177 }
1178 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1179 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1180 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1181 FFAddOneString (ffstring, dataclass, FALSE, FALSE, TILDE_IGNORE);
1182 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1183 FFAddOneString (ffstring, embldiv, FALSE, FALSE, TILDE_IGNORE);
1184 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1185 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1186
1187 } else {
1188
1189 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 0, "ID", FALSE);
1190
1191 FFAddOneString (ffstring, locus, FALSE, FALSE, TILDE_IGNORE);
1192 loclen = StringLen(locus);
1193 if (14 - 5 - loclen > 0) {
1194 FFAddNChar(ffstring, ' ', 14 - 5 - loclen, FALSE);
1195 }
1196 if (awp->hup) {
1197 FFAddOneString (ffstring, " confidential; ", FALSE, FALSE, TILDE_IGNORE);
1198 } else {
1199 FFAddOneString (ffstring, " standard; ", FALSE, FALSE, TILDE_IGNORE);
1200 }
1201 FFAddOneString (ffstring, mol, FALSE, FALSE, TILDE_IGNORE);
1202 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1203
1204 /* conditional code to make div "UNA" goes here */
1205
1206 FFAddOneString (ffstring, div, FALSE, FALSE, TILDE_IGNORE);
1207 FFAddOneString (ffstring, "; ", FALSE, FALSE, TILDE_IGNORE);
1208 FFAddOneString (ffstring, len, FALSE, FALSE, TILDE_IGNORE);
1209 }
1210 }
1211
1212 /* optionally populate indexes for NCBI internal database */
1213
1214 if (ajp->index) {
1215 index = &asp->index;
1216 } else {
1217 index = NULL;
1218 }
1219
1220 if (index != NULL) {
1221 Char tmp [20];
1222 index->locus = StringSave (locus);
1223 index->div = StringSave (div);
1224 sprintf (tmp, "%ld", (long) length);
1225 index->base_cnt = StringSave (tmp);
1226 }
1227
1228 /* optionally populate gbseq for XML-ized GenBank format */
1229
1230 if (ajp->gbseq) {
1231 gbseq = &asp->gbseq;
1232 } else {
1233 gbseq = NULL;
1234 }
1235
1236 if (gbseq != NULL) {
1237 gbseq->locus = StringSave (locus);
1238 gbseq->length = length;
1239 gbseq->division = StringSave (div);
1240
1241 gbseq->moltype = StringSave (gbseq_mol [imol]);
1242
1243 strandedness = (Int2) bsp->strand;
1244 if (strandedness < 0 || strandedness > 3) {
1245 strandedness = 0;
1246 }
1247 if (strandedness == 0) {
1248 moltype = (Int2) imolToMoltype [imol];
1249 if (moltype < 0 || moltype > 11) {
1250 moltype = 0;
1251 }
1252 if (moltype == 1) {
1253 strandedness = 2; /* default to double strand for DNA */
1254 } else if ((moltype >= 2 && moltype <= 8) || moltype >= 10 && moltype <= 11) {
1255 strandedness = 1; /* default to single strand for RNA */
1256 }
1257 }
1258 gbseq->strandedness = StringSaveNoNull (gbseq_strd [strandedness]);
1259
1260 topol = (Int2) bsp->topology;
1261 if (topol < 0 || topol > 2) {
1262 topol = 0;
1263 }
1264 if (topol == 0) {
1265 topol = 1; /* default to linear if not set */
1266 }
1267 gbseq->topology = StringSaveNoNull (gbseq_top [topol]);
1268
1269 for (sip = bsp->id; sip != NULL; sip = sip->next) {
1270 SeqIdWrite (sip, id, PRINTID_FASTA_SHORT, sizeof (id));
1271 ValNodeCopyStr (&gbseq->other_seqids, 0, id);
1272 }
1273
1274 date [0] = '\0';
1275 dp = NULL;
1276 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
1277 if (sdp != NULL) {
1278 dp = (DatePtr) sdp->data.ptrvalue;
1279 }
1280 if (dp != NULL) {
1281 DateToFF (date, dp, FALSE);
1282 if (StringDoesHaveText (date)) {
1283 gbseq->create_date = StringSave (date);
1284 }
1285 }
1286 /*
1287 if (StringHasNoText (date)) {
1288 StringCpy (date, "01-JAN-1900");
1289 }
1290 gbseq->create_date = StringSave (date);
1291 */
1292
1293 date [0] = '\0';
1294 dp = NULL;
1295 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
1296 if (sdp != NULL) {
1297 dp = (DatePtr) sdp->data.ptrvalue;
1298 }
1299 if (dp != NULL) {
1300 DateToFF (date, dp, FALSE);
1301 }
1302 if (StringHasNoText (date)) {
1303 StringCpy (date, "01-JAN-1900");
1304 }
1305 gbseq->update_date = StringSave (date);
1306 }
1307
1308 suffix = FFEndPrint(ajp, ffstring, awp->format, 12, 0, 5, 0, "ID");
1309 FFRecycleString(ajp, ffstring);
1310
1311 bbp->string = suffix;
1312
1313 /*
1314 if (awp->contig && (! awp->showconfeats) && awp->smartconfeats && GetWWW (ajp) &&
1315 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1316 is_aa = ISA_aa (bsp->mol);
1317 gi = 0;
1318 for (sip = bsp->id; sip != NULL; sip = sip->next) {
1319 if (sip->choice == SEQID_GI) {
1320 gi = (Int4) sip->data.intvalue;
1321 }
1322 }
1323 if (gi > 0) {
1324 ffstring = FFGetString(ajp);
1325
1326 sprintf(gi_buf, "%ld", (long) gi);
1327 FFAddOneString(ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1328 if (is_aa) {
1329 FF_Add_NCBI_Base_URL(ffstring, link_featp);
1330 } else {
1331 FF_Add_NCBI_Base_URL(ffstring, link_featn);
1332 }
1333 FFAddOneString(ffstring, gi_buf, FALSE, FALSE, TILDE_IGNORE);
1334 if ( is_aa ) {
1335 FFAddOneString(ffstring, "?report=gpwithparts", FALSE, FALSE, TILDE_IGNORE);
1336 } else {
1337 FFAddOneString(ffstring, "?report=gbwithparts", FALSE, FALSE, TILDE_IGNORE);
1338 }
1339 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1340 if (bsp->length > 1000000) {
1341 FFAddOneString(ffstring, "Click here to see all features and the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1342 } else {
1343 FFAddOneString(ffstring, "Click here to see the sequence of this contig record.", FALSE, FALSE, TILDE_IGNORE);
1344 }
1345 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1346
1347 prefix = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "??");
1348
1349 FFRecycleString(ajp, ffstring);
1350
1351 if (awp->afp != NULL) {
1352 DoQuickLinkFormat (awp->afp, prefix);
1353 }
1354 MemFree (prefix);
1355 }
1356 }
1357 */
1358
1359 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
1360 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
1361
1362 sprintf (buf, "<a name=\"locus_%ld\"></a>", (long) awp->currGi);
1363 DoQuickLinkFormat (awp->afp, buf);
1364
1365 buf [0] = '\0';
1366 hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext) != NULL);
1367 if (! hasComment) {
1368 hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_region, &dcontext) != NULL);
1369 }
1370 if (! hasComment) {
1371 hasComment = (Boolean) (SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_maploc, &dcontext) != NULL);
1372 }
1373 if (! hasComment) {
1374 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
1375 while (sdp != NULL) {
1376 uop = (UserObjectPtr) sdp->data.ptrvalue;
1377 if (uop != NULL) {
1378 oip = uop->type;
1379 if (oip != NULL) {
1380 if (StringCmp (oip->str, "RefGeneTracking") == 0) {
1381 hasComment = TRUE;
1382 } else if (StringCmp (oip->str, "GenomeBuild") == 0) {
1383 hasComment = TRUE;
1384 } else if (StringCmp (oip->str, "ENCODE") == 0) {
1385 hasComment = TRUE;
1386 }
1387 }
1388 }
1389 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
1390 }
1391 }
1392 if (! hasComment) {
1393 hist = bsp->hist;
1394 if (hist != NULL) {
1395 if (hist->replaced_by_ids != NULL && hist->replaced_by_date != NULL) {
1396 hasComment = TRUE;
1397 } else if (hist->replace_ids != NULL && hist->replace_date != NULL) {
1398 hasComment = TRUE;
1399 }
1400 }
1401 }
1402
1403 buf [0] = '\0';
1404 StringCpy (buf, "<div class=\"localnav\"><ul class=\"locals\">");
1405
1406 if (hasComment) {
1407 sprintf (sect, "<li><a href=\"#comment_%ld\" title=\"Jump to the comment section of this record\">Comment</a></li>", (long) awp->currGi);
1408 StringCat (buf, sect);
1409 }
1410 sprintf (sect, "<li><a href=\"#feature_%ld\" title=\"Jump to the feature table of this record\">Features</a></li>", (long) awp->currGi);
1411 StringCat (buf, sect);
1412 if (willshowwgs) {
1413 sprintf (sect, "<li><a href=\"#wgs_%ld\" title=\"Jump to WGS section of this record\">WGS</a></li>", (long) awp->currGi);
1414 StringCat (buf, sect);
1415 }
1416 if (willshowgenome) {
1417 sprintf (sect, "<li><a href=\"#genome_%ld\" title=\"Jump to the genome section of this record\">Genome</a></li>", (long) awp->currGi);
1418 StringCat (buf, sect);
1419 }
1420 if (willshowcontig) {
1421 sprintf (sect, "<li><a href=\"#contig_%ld\" title=\"Jump to the contig section of this record\">Contig</a></li>", (long) awp->currGi);
1422 StringCat (buf, sect);
1423 }
1424 if (willshowsequence) {
1425 sprintf (sect, "<li><a href=\"#sequence_%ld\" title=\"Jump to the sequence of this record\">Sequence</a></li>", (long) awp->currGi);
1426 StringCat (buf, sect);
1427 }
1428
1429 StringCat (buf, "</ul>");
1430
1431 prevGi = 0;
1432 currGi = 0;
1433 nextGi = 0;
1434 gilistpos = awp->gilistpos;
1435 if (gilistpos == NULL) {
1436 gilistpos = ajp->gihead;
1437 }
1438 do {
1439 vnp = gilistpos;
1440 if (vnp != NULL) {
1441 prevGi = vnp->data.intvalue;
1442 vnp = vnp->next;
1443 gilistpos = vnp;
1444 if (vnp != NULL) {
1445 currGi = vnp->data.intvalue;
1446 vnp = vnp->next;
1447 if (vnp != NULL) {
1448 nextGi = vnp->data.intvalue;
1449 }
1450 }
1451 }
1452 } while (gilistpos != NULL && currGi != awp->currGi);
1453
1454 has_next_pref_ul = FALSE;
1455
1456 if (currGi == awp->currGi && nextGi > 0 && awp->sectionCount < awp->sectionMax) {
1457 if (! has_next_pref_ul) {
1458 StringCat (buf, "<ul class=\"nextprevlinks\">");
1459 has_next_pref_ul = TRUE;
1460 }
1461 LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1462 if (awp->seg + 1 > 0 && awp->numsegs > 0 && awp->seg + 1 <= awp->numsegs) {
1463 sprintf (seg, " (segment %d of %ld)", (int) (awp->seg + 1), (long) awp->numsegs);
1464 StringCat (seqid, seg);
1465 }
1466 sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) nextGi, seqid);
1467 StringCat (buf, sect);
1468 } else if (awp->nextGi > 0) {
1469 if (! has_next_pref_ul) {
1470 StringCat (buf, "<ul class=\"nextprevlinks\">");
1471 has_next_pref_ul = TRUE;
1472 }
1473 LookupAccnForNavLink (nextGi, seqid, sizeof (seqid), "the next record");
1474 sprintf (sect, "<li class=\"next\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Next</a></li>", (long) awp->nextGi, seqid);
1475 StringCat (buf, sect);
1476 }
1477 if (currGi == awp->currGi && prevGi > 0 && awp->sectionCount > 1) {
1478 if (! has_next_pref_ul) {
1479 StringCat (buf, "<ul class=\"nextprevlinks\">");
1480 has_next_pref_ul = TRUE;
1481 }
1482 LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1483 if (awp->seg - 1 > 0 && awp->numsegs > 0 && awp->seg - 1 <= awp->numsegs) {
1484 sprintf (seg, " (segment %d of %ld)", (int) (awp->seg - 1), (long) awp->numsegs);
1485 StringCat (seqid, seg);
1486 }
1487 sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) prevGi, seqid);
1488 StringCat (buf, sect);
1489 } else if (awp->prevGi > 0) {
1490 if (! has_next_pref_ul) {
1491 StringCat (buf, "<ul class=\"nextprevlinks\">");
1492 has_next_pref_ul = TRUE;
1493 }
1494 LookupAccnForNavLink (prevGi, seqid, sizeof (seqid), "the previous record");
1495 sprintf (sect, "<li class=\"prev\"><a href=\"#locus_%ld\" title=\"Jump to %s\">Previous</a></li>", (long) awp->prevGi, seqid);
1496 StringCat (buf, sect);
1497 }
1498 if (has_next_pref_ul) {
1499 StringCat (buf, "</ul>");
1500 }
1501 StringCat (buf, "</div>\n");
1502 StringCat (buf, "<pre class=\"genbank\">");
1503 DoQuickLinkFormat (awp->afp, buf);
1504 } else if (GetWWW (ajp)) {
1505 buf [0] = '\0';
1506 StringCat (buf, "<pre>");
1507 DoQuickLinkFormat (awp->afp, buf);
1508 }
1509
1510 if (awp->afp != NULL) {
1511 DoImmediateFormat (awp->afp, bbp);
1512 }
1513 }
1514
1515 NLM_EXTERN void AddDeflineBlock (
1516 Asn2gbWorkPtr awp
1517 )
1518
1519 {
1520 IntAsn2gbJobPtr ajp;
1521 Asn2gbSectPtr asp;
1522 BaseBlockPtr bbp;
1523 BioseqPtr bsp;
1524 Char buf [4096];
1525 GBSeqPtr gbseq;
1526 ItemInfo ii;
1527 StringItemPtr ffstring;
1528
1529 if (awp == NULL) return;
1530 ajp = awp->ajp;
1531 if (ajp == NULL) return;
1532 bsp = awp->bsp;
1533 if (bsp == NULL) return;
1534 asp = awp->asp;
1535 if (asp == NULL) return;
1536
1537 bbp = Asn2gbAddBlock (awp, DEFLINE_BLOCK, sizeof (BaseBlock));
1538 if (bbp == NULL) return;
1539
1540 ffstring = FFGetString(ajp);
1541 if ( ffstring == NULL ) return;
1542
1543 MemSet ((Pointer) (&ii), 0, sizeof (ItemInfo));
1544 MemSet ((Pointer) buf, 0, sizeof (buf));
1545
1546 /* create default defline */
1547
1548 if (NewCreateDefLineBuf (&ii, bsp, buf, sizeof (buf), FALSE, FALSE)) {
1549 bbp->entityID = ii.entityID;
1550 bbp->itemID = ii.itemID;
1551 bbp->itemtype = ii.itemtype;
1552
1553 FFStartPrint (ffstring, awp->format, 0, 12, "DEFINITION", 12, 5, 5, "DE", TRUE);
1554
1555 if (StringHasNoText (buf)) {
1556 FFAddOneChar (ffstring, '.', FALSE);
1557 } else {
1558 FFAddOneString (ffstring, buf, TRUE, TRUE, TILDE_IGNORE);
1559 }
1560
1561 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "DE");
1562 }
1563
1564 /* optionally populate gbseq for XML-ized GenBank format */
1565
1566 if (ajp->gbseq) {
1567 gbseq = &asp->gbseq;
1568 } else {
1569 gbseq = NULL;
1570 }
1571
1572 if (gbseq != NULL) {
1573 gbseq->definition = StringSave (buf);
1574 }
1575
1576 FFRecycleString(ajp, ffstring);
1577
1578 /*
1579 if (bbp->itemtype == 0) {
1580 bbp->entityID = bsp->idx.entityID;
1581 bbp->itemtype = bsp->idx.itemtype;
1582 bbp->itemID = bsp->idx.itemID;
1583 }
1584 */
1585
1586 if (awp->afp != NULL) {
1587 DoImmediateFormat (awp->afp, bbp);
1588 }
1589 }
1590
1591 static void FF_www_accession (
1592 IntAsn2gbJobPtr ajp,
1593 StringItemPtr ffstring,
1594 CharPtr cstring,
1595 Boolean is_na
1596 )
1597 {
1598 if (cstring == NULL || ffstring == NULL) return;
1599
1600 if ( GetWWW(ajp) ) {
1601 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
1602 if (is_na) {
1603 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
1604 } else {
1605 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
1606 }
1607 FFAddTextToString(ffstring, /* "val=" */ NULL, cstring, "\">", FALSE, FALSE, TILDE_IGNORE);
1608 FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1609 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
1610 } else {
1611 FFAddOneString(ffstring, cstring, FALSE, FALSE, TILDE_IGNORE);
1612 }
1613 return;
1614 }
1615
1616 /* Check if acc directly follows prev */
1617 static Boolean IsSuccessor(CharPtr acc, CharPtr prev)
1618 {
1619 CharPtr accp, prevp;
1620 Int4 acc_num, prev_num;
1621
1622 if (acc == NULL || prev == NULL) return FALSE;
1623
1624 if (StringLen(acc) != StringLen(prev)) return FALSE;
1625
1626 accp = acc;
1627 prevp = prev;
1628 while (accp != '\0' && prevp != '\0') {
1629 if (*accp != *prevp) return FALSE;
1630 if (IS_DIGIT(*accp)) {
1631 acc_num = (Int4)atol(accp);
1632 prev_num = (Int4)atol(prevp);
1633 return (acc_num == prev_num + 1);
1634 }
1635 ++accp;
1636 ++prevp;
1637 }
1638 return FALSE;
1639 }
1640
1641
1642 static Boolean IsProjectAccn(CharPtr acc)
1643 {
1644 Int2 letters;
1645 Char digits[3];
1646 CharPtr ptr;
1647
1648 if (acc == NULL) {
1649 return FALSE;
1650 }
1651 digits[0] = '\0';
1652
1653 for (ptr = acc, letters = 0; ptr != '\0' && IS_ALPHA(*ptr); ++ptr, ++letters) continue;
1654 if (letters != 4 || StringLen(ptr) < 2) {
1655 return FALSE;
1656 }
1657 digits[0] = *ptr++;
1658 digits[1] = *ptr++;
1659 digits[2] = '\0';
1660 if (atoi(digits) < 1) {
1661 return FALSE;
1662 }
1663 while (*ptr != '\0') {
1664 if (*ptr != '0') {
1665 return FALSE;
1666 }
1667 ++ptr;
1668 }
1669 return TRUE;
1670 }
1671
1672
1673 static ValNodePtr GetSecondaryAccessions(ValNodePtr extra_access)
1674 {
1675 #define EXTRA_ACCESSION_CUTOFF 20
1676 #define BIN_ACCESSION_CUTOFF 5
1677
1678 Int4 extra_acc_num = 0;
1679 ValNodePtr bins, bin, vnp, result = NULL, temp, prj;
1680 CharPtr first, last, curr, prev = NULL;
1681 Char range[40];
1682
1683 extra_acc_num = ValNodeLen(extra_access);
1684 if (extra_acc_num < EXTRA_ACCESSION_CUTOFF) {
1685 for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1686 ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1687 }
1688 return result;
1689 }
1690
1691 /* sort the accessions into bins of successive accessions */
1692 bin = bins = NULL;
1693 for (vnp = extra_access; vnp != NULL; vnp = vnp->next) {
1694 curr = (CharPtr) vnp->data.ptrvalue;
1695 if (ValidateAccn (curr) != 0) {
1696 continue;
1697 }
1698 if (IsProjectAccn(curr)) {
1699 prj = ValNodeNew(NULL);
1700 ValNodeAddStr ((ValNodePtr PNTR) &(prj->data.ptrvalue), 0, curr);
1701 prj->next = bins;
1702 bins = prj;
1703 continue;
1704 }
1705 if (!IsSuccessor(curr, prev)) {
1706 bin = ValNodeAdd(&bins);
1707 }
1708 if (bin != NULL) {
1709 temp = (ValNodePtr)bin->data.ptrvalue;
1710 ValNodeAddStr(&temp, 0, curr);
1711 bin->data.ptrvalue = temp;
1712 }
1713
1714 prev = curr;
1715 }
1716
1717 for (bin = bins; bin != NULL; bin = bin->next) {
1718 vnp = (ValNodePtr)bin->data.ptrvalue;
1719 if (ValNodeLen(vnp) > BIN_ACCESSION_CUTOFF) {
1720 first = last = NULL;
1721 for ( ; vnp != NULL; vnp = vnp->next) {
1722 last = (CharPtr)vnp->data.ptrvalue;
1723 if (first == NULL) {
1724 first = last;
1725 }
1726 }
1727 range[0] = '\0';
1728 StringCat(range, first);
1729 StringCat(range, "-");
1730 StringCat(range, last);
1731 ValNodeCopyStr(&result, 0, range);
1732 } else {
1733 for ( ; vnp != NULL; vnp = vnp->next) {
1734 ValNodeCopyStr(&result, 0, (CharPtr)vnp->data.ptrvalue);
1735 }
1736 }
1737 bin->data.ptrvalue = ValNodeFree((ValNodePtr)bin->data.ptrvalue);
1738 }
1739
1740 bins = ValNodeFreeData(bins);
1741 return result;
1742 }
1743
1744
1745 /* !!! this definitely needs more work to support all classes, use proper SeqId !!! */
1746
1747 NLM_EXTERN void AddAccessionBlock (
1748 Asn2gbWorkPtr awp
1749 )
1750
1751 {
1752 size_t acclen;
1753 SeqIdPtr accn = NULL;
1754 IntAsn2gbJobPtr ajp;
1755 Asn2gbSectPtr asp;
1756 BaseBlockPtr bbp;
1757 BioseqPtr bsp;
1758 Char buf [41];
1759 SeqMgrDescContext dcontext;
1760 EMBLBlockPtr ebp;
1761 ValNodePtr extra_access;
1762 CharPtr flatloc;
1763 GBBlockPtr gbp;
1764 SeqIdPtr gi = NULL;
1765 GBSeqPtr gbseq;
1766 SeqIdPtr gnl = NULL;
1767 SeqIdPtr gpp = NULL;
1768 IndxPtr index;
1769 Boolean is_na;
1770 SeqIdPtr lcl = NULL;
1771 size_t len = 0;
1772 MolInfoPtr mip;
1773 SeqDescrPtr sdp;
1774 ValNodePtr secondary_acc;
1775 CharPtr separator = " ";
1776 SeqIdPtr sip;
1777 TextSeqIdPtr tsip;
1778 ValNodePtr vnp;
1779 CharPtr wgsaccn = NULL;
1780 CharPtr xtra;
1781 StringItemPtr ffstring;
1782
1783 if (awp == NULL) return;
1784 ajp = awp->ajp;
1785 if (ajp == NULL) return;
1786 bsp = awp->bsp;
1787 if (bsp == NULL) return;
1788 asp = awp->asp;
1789 if (asp == NULL) return;
1790
1791 ffstring = FFGetString(ajp);
1792 if ( ffstring == NULL ) return;
1793
1794 is_na = ISA_na (bsp->mol);
1795
1796 for (sip = bsp->id; sip != NULL; sip = sip->next) {
1797 switch (sip->choice) {
1798 case SEQID_GI :
1799 gi = sip;
1800 break;
1801 case SEQID_GENBANK :
1802 case SEQID_EMBL :
1803 case SEQID_DDBJ :
1804 accn = sip;
1805 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1806 if (tsip != NULL) {
1807 acclen = StringLen (tsip->accession);
1808 if (acclen == 12) {
1809 wgsaccn = tsip->accession;
1810 len = 12;
1811 } else if (acclen == 13) {
1812 wgsaccn = tsip->accession;
1813 len = 13;
1814 }
1815 }
1816 break;
1817 case SEQID_OTHER :
1818 accn = sip;
1819 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1820 if (tsip != NULL) {
1821 if (StringLen (tsip->accession) == 15) {
1822 wgsaccn = tsip->accession;
1823 len = 15;
1824 }
1825 }
1826 break;
1827 case SEQID_PIR :
1828 case SEQID_SWISSPROT :
1829 case SEQID_PRF :
1830 case SEQID_PDB :
1831 accn = sip;
1832 break;
1833 case SEQID_TPG :
1834 case SEQID_TPE :
1835 case SEQID_TPD :
1836 accn = sip;
1837 break;
1838 case SEQID_GPIPE :
1839 /* should not override better accession */
1840 gpp = sip;
1841 break;
1842 case SEQID_GENERAL :
1843 /* should not override better accession */
1844 gnl = sip;
1845 break;
1846 case SEQID_LOCAL :
1847 lcl = sip;
1848 break;
1849 default :
1850 break;
1851 }
1852 }
1853
1854 sip = NULL;
1855 if (accn == NULL) {
1856 accn = gpp;
1857 gpp = NULL;
1858 }
1859 if (accn != NULL) {
1860 sip = accn;
1861 } else if (gnl != NULL) {
1862 sip = gnl;
1863 } else if (lcl != NULL) {
1864 sip = lcl;
1865 } else if (gi != NULL) {
1866 sip = gi;
1867 }
1868
1869 if (sip == NULL) return;
1870
1871 SeqIdWrite (sip, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
1872
1873 bbp = Asn2gbAddBlock (awp, ACCESSION_BLOCK, sizeof (BaseBlock));
1874 if (bbp == NULL) return;
1875
1876 bbp->entityID = awp->entityID;
1877
1878 if (accn == NULL) {
1879
1880 /* if no accession, do not show local or general in ACCESSION */
1881
1882 if (ajp->mode == ENTREZ_MODE || ajp->mode == SEQUIN_MODE) {
1883 buf [0] = '\0';
1884 }
1885 }
1886
1887 FFStartPrint (ffstring, awp->format, 0, 12, "ACCESSION", 12, 5, 5, "AC", TRUE);
1888
1889 if (awp->hup && accn != NULL) {
1890 FFAddOneString (ffstring, ";", FALSE, FALSE, TILDE_TO_SPACES);
1891
1892 } else if (ajp->ajp.slp != NULL) {
1893 FF_www_accession (ajp, ffstring, buf, is_na);
1894 flatloc = FFFlatLoc (ajp, bsp, ajp->ajp.slp, ajp->masterStyle);
1895 FFAddTextToString (ffstring, " REGION: ", flatloc, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1896 MemFree (flatloc);
1897 } else {
1898 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
1899 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1900 FFAddOneChar(ffstring, ';', FALSE);
1901 }
1902 }
1903
1904 /* optionally populate indexes for NCBI internal database */
1905
1906 if (ajp->index) {
1907 index = &asp->index;
1908 } else {
1909 index = NULL;
1910 }
1911
1912 if (index != NULL) {
1913 index->accession = StringSave (buf);
1914 }
1915
1916 /* optionally populate gbseq for XML-ized GenBank format */
1917
1918 if (ajp->gbseq) {
1919 gbseq = &asp->gbseq;
1920 } else {
1921 gbseq = NULL;
1922 }
1923
1924 if (gbseq != NULL) {
1925 gbseq->primary_accession = StringSave (buf);
1926 }
1927
1928 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1929 separator = " ";
1930 } else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1931 separator = " ";
1932 }
1933
1934 if (gpp != NULL) {
1935 SeqIdWrite (gpp, buf, PRINTID_TEXTID_ACC_ONLY, sizeof (buf));
1936 FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
1937 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1938 FFAddOneChar(ffstring, ';', FALSE);
1939 }
1940 }
1941
1942 if (ajp->ajp.slp == NULL) {
1943 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
1944 if (sdp != NULL && wgsaccn != NULL) {
1945 mip = (MolInfoPtr) sdp->data.ptrvalue;
1946 if (mip != NULL && mip->tech == MI_TECH_wgs) {
1947 StringNCpy_0 (buf, wgsaccn, sizeof (buf));
1948 acclen = StringLen (buf);
1949 if (acclen == 12 && StringCmp (buf + len - 6, "000000") != 0) {
1950 StringCpy (buf + len - 6, "000000");
1951 } else if (acclen == 13 && StringCmp (buf + len - 7, "0000000") != 0) {
1952 StringCpy (buf + len - 7, "0000000");
1953 } else if (acclen == 15 && StringCmp (buf + len - 8, "00000000") != 0) {
1954 StringCpy (buf + len - 8, "00000000");
1955 } else {
1956 buf [0] = '\0';
1957 }
1958 if (! StringHasNoText (buf)) {
1959 if ( GetWWW(ajp) ) {
1960 FFAddTextToString(ffstring, separator, "<a href=\"", NULL, FALSE, FALSE, TILDE_IGNORE);
1961 FF_Add_NCBI_Base_URL (ffstring, link_wgs);
1962 FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", buf, "\">", FALSE, FALSE, TILDE_IGNORE);
1963 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
1964 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
1965 } else {
1966 FFAddTextToString(ffstring, separator, buf, NULL, FALSE, FALSE, TILDE_TO_SPACES);
1967 }
1968 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1969 FFAddOneChar(ffstring, ';', FALSE);
1970 }
1971 }
1972 }
1973 }
1974
1975 sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
1976 while (sdp != NULL) {
1977
1978 extra_access = NULL;
1979
1980 switch (dcontext.seqdesctype) {
1981 case Seq_descr_genbank :
1982 gbp = (GBBlockPtr) sdp->data.ptrvalue;
1983 if (gbp != NULL) {
1984 extra_access = gbp->extra_accessions;
1985 }
1986 break;
1987 case Seq_descr_embl :
1988 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
1989 if (ebp != NULL) {
1990 extra_access = ebp->extra_acc;
1991 }
1992 break;
1993 default :
1994 break;
1995 }
1996
1997 if (extra_access != NULL) {
1998 bbp->entityID = dcontext.entityID;
1999 bbp->itemID = dcontext.itemID;
2000 bbp->itemtype = OBJ_SEQDESC;
2001
2002
2003 secondary_acc = GetSecondaryAccessions(extra_access);
2004 for (vnp = secondary_acc; vnp != NULL; vnp = vnp->next) {
2005 xtra = (CharPtr)vnp->data.ptrvalue;
2006 FFAddTextToString(ffstring, separator, xtra, NULL, FALSE, FALSE, TILDE_IGNORE);
2007 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2008 FFAddOneChar(ffstring, ';', FALSE);
2009 }
2010
2011 /* optionally populate indexes for NCBI internal database */
2012
2013 if (index != NULL) {
2014 ValNodeCopyStrToHead (&(index->secondaries), 0, xtra);
2015 }
2016
2017 /* optionally populate gbseq for XML-ized GenBank format */
2018
2019 if (gbseq != NULL) {
2020 ValNodeCopyStr (&(gbseq->secondary_accessions), 0, xtra);
2021 }
2022 }
2023 ValNodeFreeData(secondary_acc);
2024 }
2025
2026 sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
2027 }
2028 }
2029
2030 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "AC");
2031 FFRecycleString(ajp, ffstring);
2032
2033 if (bbp->itemtype == 0) {
2034 bbp->entityID = bsp->idx.entityID;
2035 bbp->itemtype = bsp->idx.itemtype;
2036 bbp->itemID = bsp->idx.itemID;
2037 }
2038
2039 if (awp->afp != NULL) {
2040 DoImmediateFormat (awp->afp, bbp);
2041 }
2042 }
2043
2044 NLM_EXTERN void AddVersionBlock (
2045 Asn2gbWorkPtr awp
2046 )
2047
2048 {
2049 SeqIdPtr accn = NULL;
2050 IntAsn2gbJobPtr ajp;
2051 Asn2gbSectPtr asp;
2052 BaseBlockPtr bbp;
2053 BioseqPtr bsp;
2054 Char buf [41];
2055 Uint1 format = PRINTID_TEXTID_ACC_VER;
2056 GBSeqPtr gbseq;
2057 Int4 gi = -1;
2058 SeqIdPtr gpp = NULL;
2059 IndxPtr index;
2060 CharPtr ptr;
2061 SeqIdPtr sip;
2062 Char tmp [41];
2063 Char version [64];
2064 StringItemPtr ffstring;
2065
2066 if (awp == NULL) return;
2067 ajp = awp->ajp;
2068 if (ajp == NULL) return;
2069 bsp = awp->bsp;
2070 if (bsp == NULL) return;
2071 asp = awp->asp;
2072 if (asp == NULL) return;
2073
2074 ffstring = FFGetString(ajp);
2075 if ( ffstring == NULL ) return;
2076
2077 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2078 switch (sip->choice) {
2079 case SEQID_GI :
2080 gi = sip->data.intvalue;
2081 break;
2082 case SEQID_GENBANK :
2083 case SEQID_EMBL :
2084 case SEQID_DDBJ :
2085 case SEQID_OTHER :
2086 accn = sip;
2087 break;
2088 case SEQID_PIR :
2089 case SEQID_SWISSPROT :
2090 case SEQID_PRF :
2091 case SEQID_PDB :
2092 accn = sip;
2093 break;
2094 case SEQID_TPG :
2095 case SEQID_TPE :
2096 case SEQID_TPD :
2097 accn = sip;
2098 break;
2099 case SEQID_GPIPE :
2100 /* should not override better accession */
2101 gpp = sip;
2102 break;
2103 default :
2104 break;
2105 }
2106 }
2107
2108 if (accn == NULL) {
2109 accn = gpp;
2110 /*
2111 format = PRINTID_TEXTID_ACC_ONLY;
2112 */
2113 }
2114
2115 /* if (gi < 1 && accn == NULL) return; */
2116
2117 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2118 if ( accn == NULL ) return;
2119 if (awp->newLocusLine) return;
2120 }
2121
2122 bbp = Asn2gbAddBlock (awp, VERSION_BLOCK, sizeof (BaseBlock));
2123 if (bbp == NULL) return;
2124
2125 bbp->entityID = awp->entityID;
2126
2127 /* no longer displaying NID */
2128
2129 /*
2130 if (gi > 0) {
2131 sprintf (version, "g%ld", (long) gi);
2132
2133 gb_StartPrint (awp->format, needInitBuff, 0, 12, "NID", 13, 5, 5, "NI", TRUE);
2134 needInitBuff = FALSE;
2135
2136 gb_AddString (NULL, version, NULL, FALSE, FALSE, TILDE_TO_SPACES);
2137
2138 ff_EndPrint();
2139 needEndPrint = FALSE;
2140 }
2141 */
2142
2143 version [0] = '\0';
2144
2145 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
2146 SeqIdWrite (accn, version, format, sizeof (version) - 1);
2147
2148 FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2149
2150 FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
2151
2152 FFAddOneChar(ffstring, '\n', FALSE);
2153
2154 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2155 FFRecycleString(ajp, ffstring);
2156
2157 if (awp->afp != NULL) {
2158 DoImmediateFormat (awp->afp, bbp);
2159 }
2160
2161 return;
2162 }
2163
2164 if (accn != NULL) {
2165
2166 buf [0] = '\0';
2167 SeqIdWrite (accn, buf, format, sizeof (buf) - 1);
2168
2169 if (gi > 0) {
2170 sprintf (version, "%s GI:%ld", buf, (long) gi);
2171 } else {
2172 sprintf (version, "%s", buf);
2173 }
2174
2175 FFStartPrint (ffstring, awp->format, 0, 12, "VERSION", 12, 5, 5, "SV", TRUE);
2176
2177 FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2178 /* optionally populate indexes for NCBI internal database */
2179
2180 if (ajp->index) {
2181 index = &asp->index;
2182 } else {
2183 index = NULL;
2184 }
2185
2186 if (index != NULL) {
2187 ptr = StringChr (buf, '.');
2188 if (ptr != NULL) {
2189 ptr++;
2190 index->version = StringSave (ptr);
2191 }
2192 if (gi > 0) {
2193 sprintf (tmp, "%ld", (long) gi);
2194 index->gi = StringSave (tmp);
2195 }
2196 }
2197
2198 /* optionally populate gbseq for XML-ized GenBank format */
2199
2200 if (ajp->gbseq) {
2201 gbseq = &asp->gbseq;
2202 } else {
2203 gbseq = NULL;
2204 }
2205
2206 if (gbseq != NULL) {
2207 ptr = StringChr (buf, '.');
2208 if (ptr != NULL) {
2209 gbseq->accession_version = StringSave (buf);
2210 }
2211 }
2212
2213 } else if (gi > 0) {
2214
2215 FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 12, 5, 5, "SV", TRUE);
2216
2217 sprintf (version, " GI:%ld", (long) gi);
2218
2219 FFAddTextToString (ffstring, NULL, version, "\n", FALSE, FALSE, TILDE_TO_SPACES);
2220
2221 } else {
2222
2223 FFStartPrint (ffstring, awp->format, 0, 0, "VERSION", 0, 5, 5, "SV", TRUE);
2224 FFAddOneChar(ffstring, '\n', FALSE);
2225 }
2226
2227 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "SV");
2228 FFRecycleString(ajp, ffstring);
2229
2230 if (bbp->itemtype == 0) {
2231 bbp->itemtype = bsp->idx.itemtype;
2232 bbp->itemID = bsp->idx.itemID;
2233 }
2234
2235 if (awp->afp != NULL) {
2236 DoImmediateFormat (awp->afp, bbp);
2237 }
2238 }
2239
2240 static void FF_asn2gb_www_projID (
2241 StringItemPtr ffstring,
2242 CharPtr projID
2243 )
2244
2245 {
2246 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2247 FF_Add_NCBI_Base_URL (ffstring, link_projid);
2248 FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2249 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
2250 FFAddOneString (ffstring, projID, FALSE, FALSE, TILDE_IGNORE);
2251 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2252 }
2253
2254 static CharPtr GetDBLinkString (
2255 UserObjectPtr uop
2256 )
2257
2258 {
2259 Char buf1 [256];
2260 Char buf2 [256];
2261 Char buf3 [256];
2262 CharPtr PNTR cpp;
2263 Int4 i;
2264 Int4Ptr ip;
2265 size_t len;
2266 ObjectIdPtr oip;
2267 CharPtr prefix;
2268 CharPtr str;
2269 Char tmp [32];
2270 UserFieldPtr ufp;
2271 Int4 val;
2272
2273 if (uop == NULL) return NULL;
2274
2275 buf1 [0] = '\0';
2276 buf2 [0] = '\0';
2277 buf3 [0] = '\0';
2278
2279 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
2280 oip = ufp->label;
2281 if (oip == NULL || oip->str == NULL) continue;
2282 if (StringICmp (oip->str, "Trace Assembly Archive") == 0 && ufp->choice == 8) {
2283 ip = (Int4Ptr) ufp->data.ptrvalue;
2284 if (ufp->num > 0 && ip != NULL) {
2285 val = ip [0];
2286 if (val > 0) {
2287 sprintf (buf1, "Trace Assembly Archive:%ld", (long) val);
2288 for (i = 1; i < ufp->num; i++) {
2289 val = ip [i];
2290 if (val > 0) {
2291 sprintf (tmp, ",%ld", (long) val);
2292 StringCat (buf1, tmp);
2293 }
2294 }
2295 }
2296 }
2297 }
2298 if (StringICmp (oip->str, "Bio Sample") == 0 && ufp->choice == 7) {
2299 cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2300 if (ufp->num > 0 && cpp != NULL) {
2301 str = cpp [0];
2302 if (StringDoesHaveText (str)) {
2303 sprintf (buf2, "Bio Sample:%s", str);
2304 for (i = 1; i < ufp->num; i++) {
2305 str = cpp [i];
2306 if (StringDoesHaveText (str)) {
2307 sprintf (tmp, ",%s", str);
2308 StringCat (buf2, tmp);
2309 }
2310 }
2311 }
2312 }
2313 }
2314 if (StringICmp (oip->str, "ProbeDB") == 0 && ufp->choice == 7) {
2315 cpp = (CharPtr PNTR) ufp->data.ptrvalue;
2316 if (ufp->num > 0 && cpp != NULL) {
2317 str = cpp [0];
2318 if (StringDoesHaveText (str)) {
2319 sprintf (buf3, "ProbeDB:%s", str);
2320 for (i = 1; i < ufp->num; i++) {
2321 str = cpp [i];
2322 if (StringDoesHaveText (str)) {
2323 sprintf (tmp, ",%s", str);
2324 StringCat (buf3, tmp);
2325 }
2326 }
2327 }
2328 }
2329 }
2330 }
2331
2332 if (StringHasNoText (buf1) && StringHasNoText (buf2) && StringHasNoText (buf3)) return NULL;
2333
2334 len = StringLen (buf1) + StringLen (buf2) + StringLen (buf3);
2335 str = (CharPtr) MemNew (sizeof (Char) * (len + 2));
2336 if (str == NULL) return NULL;
2337
2338 prefix = "";
2339
2340 if (StringDoesHaveText (buf1)) {
2341 StringCat (str, buf1);
2342 prefix = "\n";
2343 }
2344
2345 if (StringDoesHaveText (buf2)) {
2346 StringCat (str, prefix);
2347 StringCat (str, buf2);
2348 prefix = "\n";
2349 }
2350
2351 if (StringDoesHaveText (buf3)) {
2352 StringCat (str, prefix);
2353 StringCat (str, buf3);
2354 prefix = "\n";
2355 }
2356
2357 return str;
2358 }
2359
2360 NLM_EXTERN void AddDblinkBlock (
2361 Asn2gbWorkPtr awp
2362 )
2363
2364 {
2365 IntAsn2gbJobPtr ajp;
2366 Asn2gbSectPtr asp;
2367 BaseBlockPtr bbp;
2368 BioseqPtr bsp;
2369 Char buf [32];
2370 UserFieldPtr curr;
2371 Uint4 dbitemID = 0;
2372 UserObjectPtr dbuop = NULL;
2373 SeqMgrDescContext dcontext;
2374 Boolean first = TRUE;
2375 StringItemPtr ffstring;
2376 GBSeqPtr gbseq;
2377 Uint4 gpitemID = 0;
2378 UserObjectPtr gpuop = NULL;
2379 ValNodePtr head = NULL;
2380 ObjectIdPtr oip;
2381 Int4 parentID;
2382 CharPtr prefix;
2383 Int4 projectID;
2384 SeqDescrPtr sdp;
2385 CharPtr str;
2386 UserObjectPtr uop;
2387 Int4 val;
2388
2389 if (awp == NULL) return;
2390 ajp = awp->ajp;
2391 if (ajp == NULL) return;
2392 bsp = awp->bsp;
2393 if (bsp == NULL) return;
2394 asp = awp->asp;
2395 if (asp == NULL) return;
2396
2397 if (ISA_na (bsp->mol) && awp->format != GENBANK_FMT) return;
2398 if (ISA_aa (bsp->mol) && awp->format != GENPEPT_FMT) return;
2399
2400 /*
2401 if (! ISA_na (bsp->mol)) return;
2402 if (awp->format != GENBANK_FMT) return;
2403 */
2404
2405 if (ajp->gbseq) {
2406 gbseq = &asp->gbseq;
2407 } else {
2408 gbseq = NULL;
2409 }
2410
2411 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
2412 while (sdp != NULL) {
2413 uop = (UserObjectPtr) sdp->data.ptrvalue;
2414 if (uop != NULL) {
2415 oip = uop->type;
2416 if (oip != NULL && StringICmp (oip->str, "GenomeProjectsDB") == 0) {
2417 gpuop = uop;
2418 gpitemID = dcontext.itemID;
2419 }
2420 if (oip != NULL && StringICmp (oip->str, "DBLink") == 0) {
2421 dbuop = uop;
2422 dbitemID = dcontext.itemID;
2423 }
2424 }
2425 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
2426 }
2427 if (gpuop == NULL && dbuop == NULL) return;
2428
2429 ffstring = FFGetString (ajp);
2430 if ( ffstring == NULL ) return;
2431
2432 if (gpuop != NULL) {
2433 bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
2434 if (bbp == NULL) return;
2435
2436 bbp->entityID = awp->entityID;
2437 bbp->itemID = gpitemID;
2438 bbp->itemtype = OBJ_SEQDESC;
2439
2440 if (first) {
2441 FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
2442 } else {
2443 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
2444 }
2445 first = FALSE;
2446
2447 prefix = "Project:";
2448 projectID = 0;
2449 parentID = 0;
2450 for (curr = gpuop->data; curr != NULL; curr = curr->next) {
2451 oip = curr->label;
2452 if (oip == NULL) continue;
2453 if (StringICmp (oip->str, "ProjectID") == 0) {
2454 if (curr->choice == 2) {
2455 val = (Int4) curr->data.intvalue;
2456 if (projectID > 0) {
2457 sprintf (buf, "%ld", (long) projectID);
2458 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2459 if (GetWWW (ajp)) {
2460 FF_asn2gb_www_projID (ffstring, buf);
2461 } else {
2462 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2463 }
2464 /*
2465 FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2466 */
2467 if (gbseq != NULL) {
2468 if (head == NULL) {
2469 sprintf (buf, "%ld", (long) projectID);
2470 } else {
2471 sprintf (buf, ", %ld", (long) projectID);
2472 }
2473 ValNodeCopyStr (&head, 0, buf);
2474 }
2475 prefix = ",";
2476 parentID = 0;
2477 }
2478 projectID = val;
2479 }
2480 } else if (StringICmp (oip->str, "ParentID") == 0) {
2481 if (curr->choice == 2) {
2482 val = (Int4) curr->data.intvalue;
2483 parentID = val;
2484 }
2485 }
2486 }
2487 if (projectID > 0) {
2488 sprintf (buf, "%ld", (long) projectID);
2489 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
2490 if (GetWWW (ajp)) {
2491 FF_asn2gb_www_projID (ffstring, buf);
2492 } else {
2493 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
2494 }
2495 /*
2496 FFAddTextToString (ffstring, prefix, buf, NULL, FALSE, FALSE, TILDE_IGNORE);
2497 */
2498 if (gbseq != NULL) {
2499 if (head == NULL) {
2500 sprintf (buf, "%ld", (long) projectID);
2501 } else {
2502 sprintf (buf, ", %ld", (long) projectID);
2503 }
2504 ValNodeCopyStr (&head, 0, buf);
2505 }
2506 }
2507
2508 bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
2509 FFRecycleString (ajp, ffstring);
2510 ffstring = FFGetString (ajp);
2511
2512 if (awp->afp != NULL) {
2513 DoImmediateFormat (awp->afp, bbp);
2514 }
2515 }
2516
2517 if (dbuop != NULL) {
2518 str = GetDBLinkString (dbuop);
2519 if (StringDoesHaveText (str)) {
2520 bbp = Asn2gbAddBlock (awp, PROJECT_BLOCK, sizeof (BaseBlock));
2521 if (bbp == NULL) return;
2522
2523 bbp->entityID = awp->entityID;
2524 bbp->itemID = dbitemID;
2525 bbp->itemtype = OBJ_SEQDESC;
2526
2527 if (first) {
2528 FFStartPrint (ffstring, awp->format, 0, 12, "DBLINK", 12, 5, 5, "XX", TRUE);
2529 } else {
2530 FFStartPrint (ffstring, awp->format, 0, 12, NULL, 12, 5, 5, "XX", TRUE);
2531 }
2532 first = FALSE;
2533
2534 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_EXPAND);
2535
2536 bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
2537 MemFree (str);
2538
2539 if (awp->afp != NULL) {
2540 DoImmediateFormat (awp->afp, bbp);
2541 }
2542 }
2543 }
2544
2545 FFRecycleString (ajp, ffstring);
2546
2547 if (gbseq != NULL) {
2548 if (head != NULL) {
2549 gbseq->project = MergeFFValNodeStrs (head);
2550 ValNodeFreeData (head);
2551 }
2552 }
2553 }
2554
2555 /* only displaying PID in GenPept format */
2556
2557 /*
2558 static void AddPidBlock (Asn2gbWorkPtr awp)
2559
2560 {
2561 IntAsn2gbJobPtr ajp;
2562 BaseBlockPtr bbp;
2563 BioseqPtr bsp;
2564 Int4 gi = -1;
2565 SeqIdPtr sip;
2566 Char version [64];
2567 StringItemPtr ffstring;
2568
2569 if (awp == NULL) return;
2570 ajp = awp->ajp;
2571 if (ajp == NULL) return;
2572 bsp = awp->bsp;
2573 if (bsp == NULL) return;
2574
2575 for (sip = bsp->id; sip != NULL; sip = sip->next) {
2576 switch (sip->choice) {
2577 case SEQID_GI :
2578 gi = sip->data.intvalue;
2579 break;
2580 default :
2581 break;
2582 }
2583 }
2584
2585 if (gi < 1) return;
2586
2587 bbp = Asn2gbAddBlock (awp, PID_BLOCK, sizeof (BaseBlock));
2588 if (bbp == NULL) return;
2589
2590 ffstring = FFGetString(ajp);
2591 if ( ffstring == NULL ) return;
2592
2593 FFStartPrint (ffstring, awp->format, 0, 12, "PID", 12, 5, 5, NULL, TRUE);
2594
2595 sprintf (version, "g%ld", (long) gi);
2596 FFAddOneString (ffstring, version, FALSE, FALSE, TILDE_TO_SPACES);
2597
2598 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
2599 FFRecycleString(ajp, ffstring);
2600 }
2601 */
2602
2603 static Uint1 dbsource_fasta_order [NUM_SEQID] = {
2604 33, /* 0 = not set */
2605 20, /* 1 = local Object-id */
2606 15, /* 2 = gibbsq */
2607 16, /* 3 = gibbmt */
2608 30, /* 4 = giim Giimport-id */
2609 10, /* 5 = genbank */
2610 10, /* 6 = embl */
2611 10, /* 7 = pir */
2612 10, /* 8 = swissprot */
2613 15, /* 9 = patent */
2614 18, /* 10 = other TextSeqId */
2615 20, /* 11 = general Dbtag */
2616 31, /* 12 = gi */
2617 10, /* 13 = ddbj */
2618 10, /* 14 = prf */
2619 12, /* 15 = pdb */
2620 10, /* 16 = tpg */
2621 10, /* 17 = tpe */
2622 10, /* 18 = tpd */
2623 10, /* 19 = gpp */
2624 10 /* 20 = nat */
2625 };
2626
2627 static void AddToUniqueSipList (
2628 ValNodePtr PNTR list,
2629 SeqIdPtr sip
2630 )
2631
2632 {
2633 ValNodePtr vnp;
2634
2635 if (list == NULL || sip == NULL) return;
2636 for (vnp = *list; vnp != NULL; vnp = vnp->next) {
2637 if (SeqIdMatch (sip, (SeqIdPtr) vnp->data.ptrvalue)) return;
2638 }
2639 ValNodeAddPointer (list, 0, (Pointer) sip);
2640 }
2641
2642 static Boolean WriteDbsourceID (
2643 SeqIdPtr sip,
2644 CharPtr str,
2645 BoolPtr is_na_p
2646 )
2647
2648 {
2649 Boolean check_na = FALSE;
2650 DbtagPtr db;
2651 CharPtr dt;
2652 Int4 gi;
2653 ObjectIdPtr oip;
2654 CharPtr pfx;
2655 PDBSeqIdPtr psip = NULL;
2656 CharPtr prefix;
2657 Boolean rsult = FALSE;
2658 CharPtr sfx;
2659 CharPtr suffix;
2660 Char tmp [32];
2661 TextSeqIdPtr tsip = NULL;
2662
2663 if (is_na_p != NULL) {
2664 *is_na_p = FALSE;
2665 }
2666 if (sip == NULL || str == NULL) return FALSE;
2667 *str = '\0';
2668 switch (sip->choice) {
2669 case SEQID_LOCAL :
2670 oip = (ObjectIdPtr) sip->data.ptrvalue;
2671 if (oip == NULL) return FALSE;
2672 if (! StringHasNoText (oip->str)) {
2673 StringCat (str, oip->str);
2674 return TRUE;
2675 } else if (oip->id > 0) {
2676 sprintf (tmp, "%ld", (long) oip->id);
2677 StringCat (str, tmp);
2678 return TRUE;
2679 }
2680 return FALSE;
2681 case SEQID_GI :
2682 gi = (Int4) sip->data.intvalue;
2683 if (gi == 0) return FALSE;
2684 sprintf (tmp, "gi: %ld", (long) gi);
2685 StringCat (str, tmp);
2686 return TRUE;
2687 case SEQID_GENERAL :
2688 db = (DbtagPtr) sip->data.ptrvalue;
2689 if (db == NULL) return FALSE;
2690 /* !!! still need to implement this !!! */
2691 return FALSE;
2692 case SEQID_GENBANK :
2693 case SEQID_EMBL :
2694 case SEQID_DDBJ :
2695 case SEQID_OTHER :
2696 case SEQID_TPG :
2697 case SEQID_TPE :
2698 case SEQID_TPD :
2699 case SEQID_GPIPE :
2700 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2701 if (tsip == NULL) return FALSE;
2702 check_na = TRUE;
2703 break;
2704 case SEQID_PIR :
2705 case SEQID_SWISSPROT :
2706 case SEQID_PRF :
2707 tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2708 if (tsip == NULL) return FALSE;
2709 break;
2710 case SEQID_PDB :
2711 psip = (PDBSeqIdPtr) sip->data.ptrvalue;
2712 if (psip == NULL) return FALSE;
2713 break;
2714 default :
2715 break;
2716 }
2717 prefix = " ";
2718 suffix = NULL;
2719 switch (sip->choice) {
2720 case SEQID_EMBL :
2721 StringCat (str, "embl ");
2722 suffix = ",";
2723 break;
2724 case SEQID_OTHER :
2725 StringCat (str, "REFSEQ: ");
2726 break;
2727 case SEQID_SWISSPROT :
2728 StringCat (str, "UniProtKB: ");
2729 suffix = ",";
2730 break;
2731 case SEQID_PIR :
2732 StringCat (str, "UniProtKB: ");
2733 break;
2734 case SEQID_PRF :
2735 StringCat (str, "prf: ");
2736 break;
2737 case SEQID_PDB :
2738 StringCat (str, "pdb: ");
2739 suffix = ",";
2740 break;
2741 default :
2742 break;
2743 }
2744 pfx = NULL;
2745 sfx = NULL;
2746 if (tsip != NULL) {
2747 if (! StringHasNoText (tsip->name)) {
2748 StringCat (str, sfx);
2749 StringCat (str, pfx);
2750 StringCat (str, "locus ");
2751 StringCat (str, tsip->name);
2752 sfx = suffix;
2753 pfx = prefix;
2754 rsult = TRUE;
2755 }
2756 if (! StringHasNoText (tsip->accession)) {
2757 StringCat (str, sfx);
2758 StringCat (str, pfx);
2759 StringCat (str, "accession ");
2760 StringCat (str, tsip->accession);
2761 sfx = suffix;
2762 pfx = prefix;
2763 rsult = TRUE;
2764 if (check_na && is_na_p != NULL) {
2765 *is_na_p = IS_ntdb_accession (tsip->accession);
2766 }
2767 }
2768 if (tsip->version > 0 && sip->choice != SEQID_SWISSPROT) {
2769 sprintf (tmp, ".%d", (int) tsip->version);
2770 StringCat (str, tmp);
2771 sfx = suffix;
2772 pfx = prefix;
2773 }
2774 if (! StringHasNoText (tsip->release) && sip->choice != SEQID_SWISSPROT) {
2775 StringCat (str, pfx);
2776 StringCat (str, "release ");
2777 StringCat (str, tsip->release);
2778 sfx = suffix;
2779 pfx = prefix;
2780 }
2781 if (sip->choice == SEQID_SWISSPROT || sip->choice == SEQID_PIR || sip->choice == SEQID_PRF) {
2782 StringCat (str, ";");
2783 }
2784 return rsult;
2785 }
2786 if (psip != NULL) {
2787 if (! StringHasNoText (psip->mol)) {
2788 StringCat (str, "molecule ");
2789 StringCat (str, psip->mol);
2790 sfx = suffix;
2791 pfx = prefix;
2792 rsult = TRUE;
2793 }
2794 if (psip->chain > 0) {
2795 StringCat (str, sfx);
2796 StringCat (str, pfx);
2797 sprintf (tmp, "chain %d", (int) psip->chain);
2798 StringCat (str, tmp);
2799 sfx = suffix;
2800 pfx = prefix;
2801 rsult = TRUE;
2802 }
2803 if (psip->rel != NULL) {
2804 StringCat (str, sfx);
2805 StringCat (str, pfx);
2806 StringCat (str, "release ");
2807 dt = asn2gb_PrintDate (psip->rel);
2808 StringCat (str, dt);
2809 MemFree (dt);
2810 sfx = suffix;
2811 pfx = prefix;
2812 rsult = TRUE;
2813 }
2814 StringCat (str, ";");
2815 return rsult;
2816 }
2817 return rsult;
2818 }
2819
2820
2821 static void AddSPBlock (
2822 IntAsn2gbJobPtr ajp,
2823 StringItemPtr ffstring,
2824 BioseqPtr bsp
2825 )
2826
2827 {
2828 CharPtr acc;
2829 Char buf [64];
2830 DbtagPtr db;
2831 SeqMgrDescContext dcontext;
2832 Boolean first;
2833 Int4 gi;
2834 Boolean has_link;
2835 Char id [42];
2836 ObjectIdPtr oip;
2837 CharPtr ptr;
2838 SeqDescrPtr sdp;
2839 SeqIdPtr sid;
2840 SeqIdPtr sif;
2841 SeqIdPtr sip;
2842 SPBlockPtr spb;
2843 CharPtr string;
2844 ValNodePtr vnp;
2845 CharPtr str;
2846 Char numbuf[40];
2847
2848 if (bsp == NULL) return;
2849 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_sp, &dcontext);
2850 if (sdp == NULL) return;
2851 spb = (SPBlockPtr) sdp->data.ptrvalue;
2852 if (spb == NULL) return;
2853
2854 if (spb->_class == 1) {
2855 FFAddOneString (ffstring, "class: standard.", FALSE, FALSE, TILDE_IGNORE);
2856 FFAddNewLine(ffstring);
2857 } else if (spb->_class == 2) {
2858 FFAddOneString (ffstring, "class: preliminary.", FALSE, FALSE, TILDE_IGNORE);
2859 FFAddNewLine(ffstring);
2860 }
2861
2862 if (spb->extra_acc) {
2863 FFAddOneString (ffstring, "extra accessions:", FALSE, FALSE, TILDE_IGNORE);
2864 for (vnp = spb->extra_acc; vnp != NULL; vnp = vnp->next) {
2865 FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
2866 if (vnp->next != NULL) {
2867 FFAddOneChar (ffstring, ',', FALSE );
2868 }
2869 }
2870 FFAddNewLine(ffstring);
2871 }
2872
2873 if (spb->imeth) {
2874 FFAddOneString (ffstring, "seq starts with Met", FALSE, FALSE, TILDE_IGNORE);
2875 }
2876
2877 if (spb->plasnm != NULL) {
2878 FFAddOneString (ffstring, "plasmid:", FALSE, FALSE, TILDE_IGNORE);
2879 for (vnp = spb->plasnm; vnp != NULL; vnp = vnp->next) {
2880 FFAddOneString (ffstring, (CharPtr) vnp->data.ptrvalue, FALSE, FALSE, TILDE_IGNORE);
2881 FFAddOneChar (ffstring, ',', FALSE );
2882 }
2883 }
2884
2885 if (spb->created) {
2886 string = PrintDate (spb->created);
2887 FFAddOneString (ffstring, "created: ", FALSE, FALSE, TILDE_IGNORE);
2888 FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
2889
2890 MemFree (string);
2891 }
2892
2893 if (spb->sequpd) {
2894 string = PrintDate (spb->sequpd);
2895 FFAddOneString (ffstring, "sequence updated: ", FALSE, FALSE, TILDE_IGNORE);
2896 FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
2897 MemFree (string);
2898 }
2899
2900 if (spb->annotupd) {
2901 string = PrintDate (spb->annotupd);
2902 FFAddOneString (ffstring, "annotation updated: ", FALSE, FALSE, TILDE_IGNORE);
2903 FFAddOneString (ffstring, string, FALSE, FALSE, TILDE_IGNORE);
2904 MemFree (string);
2905 }
2906
2907 if (spb->seqref) {
2908 FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
2909 first = TRUE;
2910 for (sid = spb->seqref; sid != NULL; sid = sid->next) {
2911 acc = NULL;
2912 has_link = FALSE;
2913 if (first == FALSE) {
2914 FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
2915 }
2916 first = FALSE;
2917 sip = sid;
2918 sif = NULL;
2919 id [0] = '\0';
2920 if (sip->choice == SEQID_GI) {
2921 gi = sid->data.intvalue;
2922 if (! GetAccnVerFromServer (gi, id)) {
2923 sif = GetSeqIdForGI (gi);
2924 if (sif != NULL) {
2925 sip = sif;
2926 }
2927 }
2928 }
2929 if (id [0] == '\0') {
2930 SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
2931 }
2932 if (sid->choice == SEQID_GI) {
2933 has_link = TRUE;
2934 }
2935 if (StringDoesHaveText (id)) {
2936 acc = id;
2937 }
2938 if (acc != NULL) {
2939 if ( GetWWW(ajp) && has_link ) {
2940 sprintf(numbuf, "%ld", (long) sid->data.intvalue);
2941 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
2942 StringNCpy_0 (buf, acc, sizeof (buf));
2943 ptr = StringChr (buf, '.');
2944 if (ptr != NULL) {
2945 *ptr = '\0';
2946 }
2947 if (IS_ntdb_accession (buf)) {
2948 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
2949 } else {
2950 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
2951 }
2952 FFAddTextToString(ffstring, /* "val=" */ NULL, numbuf, "\">", FALSE, FALSE, TILDE_IGNORE);
2953 FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
2954 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
2955 } else {
2956 FFAddOneString(ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
2957 }
2958 }
2959 if (sif != NULL) {
2960 SeqIdFree (sif);
2961 }
2962 }
2963 }
2964
2965 first = TRUE;
2966 for (vnp = spb->dbref; vnp != NULL; vnp = vnp->next) {
2967 db = (DbtagPtr) vnp->data.ptrvalue;
2968 if (db == NULL) continue;
2969 oip = db->tag;
2970 if (oip == NULL) continue;
2971 has_link = FALSE;
2972 if (first) {
2973 FFAddNewLine(ffstring);
2974 FFAddOneString (ffstring, "xrefs (non-sequence databases): ", FALSE, FALSE, TILDE_IGNORE);
2975 first = FALSE;
2976 } else {
2977 FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
2978 }
2979
2980 str = NULL;
2981 if ( oip->str != NULL ) {
2982 str = oip->str;
2983 if (StringNCmp (str, "GO:", 3) == 0) {
2984 str += 3;
2985 } else if (StringNCmp (str, "MGI:", 4) == 0) {
2986 str += 4;
2987 } else if (StringNCmp (str, "HGNC:", 5) == 0) {
2988 str += 5;
2989 }
2990 } else if ( oip->id > 0 ) {
2991 sprintf (numbuf, "%d", oip->id);
2992 str = numbuf;
2993 }
2994
2995 FF_www_db_xref (ajp, ffstring, db->db, str, bsp);
2996
2997 /*
2998 if (StringCmp (db->db, "MGD") == 0 || StringCmp (db->db, "MGI") == 0) {
2999 FFAddOneString (ffstring, "MGI", FALSE, FALSE, TILDE_IGNORE);
3000 } else {
3001 FFAddOneString (ffstring, db->db, FALSE, FALSE, TILDE_IGNORE);
3002 }
3003 if (StringCmp (db->db, "MIM") == 0) {
3004 has_link = TRUE;
3005 }
3006
3007 str = NULL;
3008 if ( oip->str != NULL ) {
3009 str = oip->str;
3010 if (StringNCmp (str, "GO:", 3) == 0) {
3011 str += 3;
3012 } else if (StringNCmp (str, "MGI:", 4) == 0) {
3013 str += 4;
3014 } else if (StringNCmp (str, "HGNC:", 5) == 0) {
3015 str += 5;
3016 }
3017 } else if ( oip->id > 0 ) {
3018 sprintf(numbuf, "%d", oip->id);
3019 str = numbuf;
3020 }
3021
3022 if ( !StringHasNoText(str) ) {
3023 FFAddOneString (ffstring, ":", FALSE, FALSE, TILDE_IGNORE);
3024 if ( GetWWW(ajp) && has_link) {
3025 FFAddOneChar (ffstring, ' ', FALSE);
3026 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3027 FF_Add_NCBI_Base_URL (ffstring, link_omim);
3028 FFAddOneString (ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3029 FFAddTextToString(ffstring, "\">", str, "</a>", FALSE, FALSE, TILDE_IGNORE);
3030 } else {
3031 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3032 }
3033 }
3034 */
3035 }
3036 }
3037
3038 static void AddPIRBlock (
3039 IntAsn2gbJobPtr ajp,
3040 StringItemPtr ffstring,
3041 BioseqPtr bsp
3042 )
3043
3044 {
3045 CharPtr acc;
3046 SeqMgrDescContext dcontext;
3047 Boolean first;
3048 Char id [41];
3049 CharPtr prefix = NULL;
3050 SeqDescrPtr sdp;
3051 SeqIdPtr sid;
3052 PirBlockPtr pbp;
3053
3054 if (bsp == NULL) return;
3055 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pir, &dcontext);
3056 if (sdp == NULL) return;
3057 pbp = (PirBlockPtr) sdp->data.ptrvalue;
3058 if (pbp == NULL) return;
3059
3060 if (pbp->host != NULL) {
3061 FFAddTextToString (ffstring, "host:", pbp->host, "\n", FALSE, TRUE, TILDE_IGNORE);
3062 prefix = ";";
3063 }
3064
3065 if (pbp->source != NULL) {
3066 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3067 FFAddNewLine(ffstring);
3068 FFAddTextToString(ffstring, "source: ", pbp->source, "\n", FALSE, TRUE, TILDE_IGNORE);
3069 prefix = ";";
3070 }
3071
3072 if (pbp->summary != NULL) {
3073 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3074 FFAddNewLine(ffstring);
3075 FFAddTextToString(ffstring, "summary: ", pbp->summary, "\n", FALSE, TRUE, TILDE_IGNORE);
3076 prefix = ";";
3077 }
3078
3079 if (pbp->genetic != NULL) {
3080 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3081 FFAddNewLine(ffstring);
3082 FFAddTextToString(ffstring, "genetic: ", pbp->genetic, "\n", FALSE, TRUE, TILDE_IGNORE);
3083 prefix = ";";
3084 }
3085
3086 if (pbp->includes != NULL) {
3087 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3088 FFAddNewLine(ffstring);
3089 FFAddTextToString(ffstring, "includes: ", pbp->includes, "\n", FALSE, TRUE, TILDE_IGNORE);
3090 prefix = ";";
3091 }
3092
3093 if (pbp->placement != NULL) {
3094 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3095 FFAddNewLine(ffstring);
3096 FFAddTextToString(ffstring, "placement: ", pbp->placement, "\n", FALSE, TRUE, TILDE_IGNORE);
3097 prefix = ";";
3098 }
3099
3100 if (pbp->superfamily != NULL) {
3101 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3102 FFAddNewLine(ffstring);
3103 FFAddTextToString(ffstring, "superfamily: ", pbp->superfamily, "\n", FALSE, TRUE, TILDE_IGNORE);
3104 prefix = ";";
3105 }
3106
3107 if (pbp->cross_reference != NULL) {
3108 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3109 FFAddNewLine(ffstring);
3110 FFAddTextToString(ffstring, "xref: ", pbp->cross_reference, "\n", FALSE, TRUE, TILDE_IGNORE);
3111 prefix = ";";
3112 }
3113
3114 if (pbp->date != NULL) {
3115 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3116 FFAddNewLine(ffstring);
3117 FFAddTextToString (ffstring, "PIR dates: ", pbp->date, "\n", FALSE, TRUE, TILDE_IGNORE);
3118 prefix = ";";
3119 }
3120
3121 if (pbp->had_punct) {
3122 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3123 FFAddNewLine(ffstring);
3124 FFAddOneString (ffstring, "punctuation in sequence", FALSE, FALSE, TILDE_IGNORE);
3125 prefix = ";";
3126 }
3127
3128 if (pbp->seqref) {
3129 FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3130 FFAddNewLine(ffstring);
3131 FFAddOneString (ffstring, "xrefs: ", FALSE, FALSE, TILDE_IGNORE);
3132 first = TRUE;
3133 for (sid = pbp->seqref; sid != NULL; sid = sid->next) {
3134 acc = NULL;
3135 if (first == FALSE) {
3136 FFAddOneString (ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3137 }
3138 first = FALSE;
3139 SeqIdWrite (sid, id, PRINTID_TEXTID_ACC_VER, sizeof (id) - 1);
3140 acc = id;
3141 if (acc != NULL) {
3142 switch (sid->choice) {
3143 case SEQID_GENBANK:
3144 FFAddOneString (ffstring, "genbank ", FALSE, FALSE, TILDE_IGNORE);
3145 break;
3146 case SEQID_EMBL:
3147 FFAddOneString (ffstring, "embl ", FALSE, FALSE, TILDE_IGNORE);
3148 break;
3149 case SEQID_PIR:
3150 FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3151 break;
3152 case SEQID_SWISSPROT:
3153 FFAddOneString (ffstring, "UniProtKB ", FALSE, FALSE, TILDE_IGNORE);
3154 break;
3155 case SEQID_DDBJ:
3156 FFAddOneString (ffstring, "ddbj ", FALSE, FALSE, TILDE_IGNORE);
3157 break;
3158 case SEQID_PRF:
3159 FFAddOneString (ffstring, "prf ", FALSE, FALSE, TILDE_IGNORE);
3160 break;
3161 case SEQID_GI:
3162 FFAddOneString (ffstring, "gi: ", FALSE, FALSE, TILDE_IGNORE);
3163 break;
3164 default:
3165 acc = NULL;
3166 break;
3167 }
3168 }
3169 if (acc != NULL) {
3170 FFAddOneString (ffstring, acc, FALSE, FALSE, TILDE_IGNORE);
3171 }
3172 }
3173 }
3174 FFAddOneString (ffstring, ".", FALSE, FALSE, TILDE_IGNORE);
3175 }
3176
3177 static void AddPRFBlock (
3178 IntAsn2gbJobPtr ajp,
3179 StringItemPtr ffstring,
3180 BioseqPtr bsp
3181 )
3182
3183 {
3184 SeqMgrDescContext dcontext;
3185 PrfExtSrcPtr extra;
3186 CharPtr prefix = NULL;
3187 SeqDescrPtr sdp;
3188 PrfBlockPtr prf;
3189
3190 if (bsp == NULL) return;
3191 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_prf, &dcontext);
3192 if (sdp == NULL) return;
3193 prf = (PrfBlockPtr) sdp->data.ptrvalue;
3194 if (prf == NULL) return;
3195 if ( ffstring == NULL ) return;
3196
3197 extra = prf->extra_src;
3198 if (extra != NULL) {
3199
3200 if (extra->host != NULL) {
3201 FFAddTextToString(ffstring, "host:", extra->host, NULL, FALSE, TRUE, TILDE_IGNORE);
3202 prefix = ";\n";
3203 }
3204
3205 if (extra->part != NULL) {
3206 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3207 FFAddTextToString(ffstring, "part: ", extra->part, NULL, FALSE, TRUE, TILDE_IGNORE);
3208 prefix = ";\n";
3209 }
3210 if (extra->state != NULL) {
3211 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3212 FFAddTextToString(ffstring, "state: ", extra->state, NULL, FALSE, TRUE, TILDE_IGNORE);
3213 prefix = ";\n";
3214 }
3215 if (extra->strain != NULL) {
3216 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3217 FFAddTextToString(ffstring, "strain: ", extra->strain, NULL, FALSE, TRUE, TILDE_IGNORE);
3218 prefix = ";\n";
3219 }
3220 if (extra->taxon != NULL) {
3221 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3222 FFAddTextToString(ffstring, "taxonomy: ", extra->taxon, NULL, FALSE, TRUE, TILDE_IGNORE);
3223 prefix = ";\n";
3224 }
3225
3226 FFAddOneChar(ffstring, '.', FALSE);
3227 }
3228 }
3229
3230 static void AddPDBBlock (
3231 IntAsn2gbJobPtr ajp,
3232 StringItemPtr ffstring,
3233 BioseqPtr bsp
3234 )
3235
3236 {
3237 SeqMgrDescContext dcontext;
3238 CharPtr dt;
3239 CharPtr prefix = NULL;
3240 SeqDescrPtr sdp;
3241 PdbBlockPtr pdb;
3242 PdbRepPtr replace;
3243 CharPtr str;
3244 ValNodePtr vnp;
3245
3246 if (bsp == NULL) return;
3247 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_pdb, &dcontext);
3248 if (sdp == NULL) return;
3249 pdb = (PdbBlockPtr) sdp->data.ptrvalue;
3250 if (pdb == NULL) return;
3251
3252 if (pdb->deposition != NULL) {
3253 dt = asn2gb_PrintDate (pdb->deposition);
3254 FFAddTextToString (ffstring, "deposition: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3255 MemFree (dt);
3256 prefix = ";";
3257 }
3258 if (pdb->pdbclass != NULL) {
3259 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3260 FFAddNewLine(ffstring);
3261 FFAddTextToString(ffstring, "class: ", pdb->pdbclass, NULL, FALSE, TRUE, TILDE_IGNORE);
3262 prefix = ";";
3263 }
3264 if (pdb->source != NULL) {
3265 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3266 FFAddNewLine(ffstring);
3267 FFAddOneString(ffstring, "source: ", FALSE, TRUE, TILDE_IGNORE);
3268 prefix = NULL;
3269 for (vnp = pdb->source; vnp != NULL; vnp = vnp->next) {
3270 str = (CharPtr) vnp->data.ptrvalue;
3271 if (StringHasNoText (str)) continue;
3272 FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3273 prefix = ", ";
3274 }
3275 prefix = ";";
3276 }
3277 if (pdb->exp_method != NULL) {
3278 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3279 FFAddNewLine(ffstring);
3280 FFAddTextToString(ffstring, "Exp. method: ", pdb->exp_method, NULL, FALSE, TRUE, TILDE_IGNORE);
3281 prefix = ";";
3282 }
3283 replace = pdb->replace;
3284 if (replace != NULL) {
3285 if (replace->ids != NULL) {
3286 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3287 FFAddNewLine(ffstring);
3288 FFAddOneString(ffstring, "ids replaced: ", FALSE, TRUE, TILDE_IGNORE);
3289
3290 prefix = NULL;
3291 for (vnp = replace->ids; vnp != NULL; vnp = vnp->next) {
3292 str = (CharPtr) vnp->data.ptrvalue;
3293 if (StringHasNoText (str)) continue;
3294 FFAddTextToString (ffstring, prefix, str, NULL, FALSE, TRUE, TILDE_IGNORE);
3295 prefix = ", ";
3296 }
3297 prefix = ";";
3298 }
3299 if (replace->date != NULL) {
3300 FFAddOneString(ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
3301 FFAddNewLine(ffstring);
3302
3303 dt = asn2gb_PrintDate (replace->date);
3304 FFAddTextToString(ffstring, "replacement date: ", dt, NULL, FALSE, TRUE, TILDE_IGNORE);
3305 MemFree (dt);
3306 prefix = ";";
3307 }
3308 }
3309
3310 FFAddOneChar(ffstring, '.', FALSE);
3311 }
3312
3313 static CharPtr TxtSave (CharPtr text, size_t len)
3314
3315 {
3316 CharPtr str = NULL;
3317
3318 if ((text == NULL) || (len == 0))
3319 return str;
3320
3321 str = MemNew((size_t)(len + 1));
3322 MemCopy(str, text, (size_t)len);
3323
3324 return (str);
3325 }
3326
3327 static Boolean FF_www_dbsource (
3328 IntAsn2gbJobPtr ajp,
3329 StringItemPtr ffstring,
3330 CharPtr str,
3331 Boolean first,
3332 SeqIdPtr sip,
3333 Boolean is_na
3334 )
3335
3336 {
3337 CharPtr temp, end, text, loc, link = NULL;
3338 Uint1 choice;
3339 Int2 j;
3340 Int4 gi = 0;
3341 Char gibuf [32];
3342
3343 if (sip == NULL) return FALSE;
3344 choice = sip->choice;
3345
3346 if( GetWWW(ajp) ) {
3347 if (choice == SEQID_PIR) {
3348 link = link_seqp;
3349 } else if (choice == SEQID_SWISSPROT) {
3350 link = link_sp;
3351 } else if (choice == SEQID_PDB || choice == SEQID_PRF) {
3352 link = link_seqp;
3353 } else if (choice == SEQID_EMBL || choice == SEQID_GENBANK ||
3354 choice == SEQID_DDBJ || choice == SEQID_GIBBSQ ||
3355 choice == SEQID_GIBBMT || choice == SEQID_GI ||
3356 choice == SEQID_GIIM || choice == SEQID_OTHER ||
3357 choice == SEQID_TPG || choice == SEQID_TPE || choice == SEQID_TPD ||
3358 choice == SEQID_GPIPE) {
3359 if (is_na) {
3360 link = link_seqn;
3361 } else {
3362 link = link_seqp;
3363 }
3364 } else {
3365 AddStringWithTildes(ffstring, str);
3366 return TRUE;
3367 }
3368
3369 if ((text = StringStr(str, "accession")) != NULL) {
3370 end = text + 9;
3371 j = 9;
3372 while (*end == ' ') {
3373 ++end;
3374 j++;
3375 }
3376 if (first == FALSE) {
3377 FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3378 }
3379 loc = TxtSave (str, end-str - j);
3380 FFAddOneString(ffstring, loc, FALSE, FALSE, TILDE_IGNORE);
3381 MemFree (loc);
3382 for (; text != end; ++text ) {
3383 FFAddOneChar(ffstring, *text, FALSE);
3384 }
3385
3386 temp = text;
3387 end += StringLen(text) - 1;
3388 if ( *end != ';' ) {
3389 ++end;
3390 }
3391
3392 if (choice == SEQID_SWISSPROT) {
3393 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3394 FF_Add_NCBI_Base_URL (ffstring, link);
3395 for (text = temp; text != end; ++text ) {
3396 FFAddOneChar (ffstring, *text, FALSE);
3397 }
3398 } else {
3399 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
3400 FF_Add_NCBI_Base_URL (ffstring, link);
3401 gi = GetGIForSeqId (sip);
3402 if (gi > 0) {
3403 sprintf (gibuf, "%ld", (long) gi);
3404 FFAddOneString (ffstring, gibuf, FALSE, FALSE, TILDE_IGNORE);
3405 } else {
3406 for (text = temp; text != end; ++text ) {
3407 FFAddOneChar(ffstring, *text, FALSE);
3408 }
3409 }
3410 }
3411 FFAddOneString(ffstring, "\">", FALSE, FALSE, TILDE_IGNORE);
3412
3413 for (text = temp; text != end; ++text ) {
3414 FFAddOneChar(ffstring, *text, FALSE);
3415 }
3416 FFAddOneString(ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
3417 if ( *end == ';' ) {
3418 FFAddOneChar(ffstring, ';', FALSE);
3419 }
3420 } else {
3421 if (first == FALSE) {
3422 FFAddOneString(ffstring, ", ", FALSE, FALSE, TILDE_IGNORE);
3423 }
3424 FFAddOneString(ffstring, str, FALSE, FALSE, TILDE_IGNORE);
3425 }
3426 } else {
3427 AddStringWithTildes(ffstring, str);
3428 }
3429 return TRUE;
3430 }
3431
3432 NLM_EXTERN void AddDbsourceBlock (
3433 Asn2gbWorkPtr awp
3434 )
3435
3436 {
3437 IntAsn2gbJobPtr ajp;
3438 Asn2gbSectPtr asp;
3439 BaseBlockPtr bbp;
3440 BioseqPtr bsp;
3441 Char buf [256];
3442 SeqFeatPtr cds;
3443 DbtagPtr db;
3444 GBSeqPtr gbseq;
3445 SeqIdPtr id;
3446 Boolean is_na;
3447 ValNodePtr list = NULL;
3448 BioseqPtr nuc;
3449 SeqEntryPtr sep;
3450 SeqIdPtr sip;
3451 SeqLocPtr slp;
3452 CharPtr str;
3453 Boolean unknown = TRUE;
3454 ValNodePtr vnp;
3455 StringItemPtr ffstring;
3456
3457 if (awp == NULL) return;
3458 ajp = awp->ajp;
3459 if (ajp == NULL) return;
3460 asp = awp->asp;
3461 if (asp == NULL) return;
3462 bsp = awp->bsp;
3463 if (bsp == NULL) return;
3464
3465 bbp = Asn2gbAddBlock (awp, DBSOURCE_BLOCK, sizeof (BaseBlock));
3466 if (bbp == NULL) return;
3467
3468 bbp->entityID = awp->entityID;
3469
3470 ffstring = FFGetString(ajp);
3471 if ( ffstring == NULL ) return;
3472
3473 FFStartPrint (ffstring, awp->format, 0, 12, "DBSOURCE", 12, 5, 5, NULL, TRUE);
3474
3475 sip = SeqIdSelect (bsp->id, dbsource_fasta_order, NUM_SEQID);
3476
3477 if (sip != NULL) {
3478
3479 switch (sip->choice) {
3480 case SEQID_PIR :
3481 case SEQID_SWISSPROT :
3482 case SEQID_PRF :
3483 case SEQID_PDB :
3484 if (WriteDbsourceID (sip, buf, &is_na)) {
3485 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3486 FFAddNewLine(ffstring);
3487 unknown = FALSE;
3488 }
3489 break;
3490 case SEQID_GENERAL :
3491 db = sip->data.ptrvalue;
3492 if (db == NULL) {
3493 break;
3494 }
3495 if (StringNCmp (db->db, "PIDe", 4) != 0 &&
3496 StringNCmp (db->db, "PIDd", 4) != 0 &&
3497 StringNCmp (db->db, "PID", 3) != 0) {
3498 break;
3499 }
3500 /* if (ChoicePID) found, continue on to next set of cases */
3501 case SEQID_EMBL :
3502 case SEQID_GENBANK :
3503 case SEQID_DDBJ :
3504 case SEQID_GIBBSQ :
3505 case SEQID_GIBBMT :
3506 case SEQID_OTHER :
3507 case SEQID_TPG :
3508 case SEQID_TPE :
3509 case SEQID_TPD :
3510 case SEQID_GPIPE :
3511 case SEQID_GI :
3512 case SEQID_GIIM :
3513 cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
3514 if (cds == NULL) {
3515 /* now may also be protein product of mature peptide feature */
3516 cds = SeqMgrGetPROTgivenProduct (bsp, NULL);
3517 }
3518 if (cds != NULL) {
3519 nuc = BioseqFindFromSeqLoc (cds->location);
3520 if (nuc != NULL) {
3521 slp = SeqLocFindNext (cds->location, NULL);
3522 while (slp != NULL) {
3523 sip = SeqLocId (slp);
3524 AddToUniqueSipList (&list, sip);
3525 slp = SeqLocFindNext (cds->location, slp);
3526 }
3527 for (vnp = list; vnp != NULL; vnp = vnp->next) {
3528 id = (SeqIdPtr) vnp->data.ptrvalue;
3529 nuc = BioseqFindCore (id);
3530 sip = NULL;
3531 if (nuc != NULL) {
3532 sip = SeqIdSelect (nuc->id, dbsource_fasta_order, NUM_SEQID);
3533 } else if (id != NULL && id->choice == SEQID_GI) {
3534 sip = GetSeqIdForGI (id->data.intvalue);
3535 }
3536 if (sip == NULL) {
3537 sip = id;
3538 }
3539 if (sip != NULL) {
3540 if (WriteDbsourceID (sip, buf, &is_na)) {
3541 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3542 FFAddNewLine(ffstring);
3543 unknown = FALSE;
3544 }
3545 }
3546 }
3547 ValNodeFree (list);
3548 } else {
3549 sep = GetTopSeqEntryForEntityID (awp->entityID);
3550 if (sep != NULL && IS_Bioseq (sep)) {
3551 /* special case for coded_by CDS packed on retcode 1 protein */
3552 id = SeqLocId (cds->location);
3553 if (id != NULL && id->choice == SEQID_GI) {
3554 sip = GetSeqIdForGI (id->data.intvalue);
3555 if (sip == NULL) {
3556 sip = id;
3557 }
3558 }
3559 if (WriteDbsourceID (sip, buf, &is_na)) {
3560 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3561 FFAddNewLine(ffstring);
3562 unknown = FALSE;
3563 }
3564 }
3565 }
3566 } else {
3567 if (WriteDbsourceID (sip, buf, &is_na)) {
3568 FF_www_dbsource (ajp, ffstring, buf, TRUE, sip, is_na);
3569 FFAddNewLine(ffstring);
3570 unknown = FALSE;
3571 }
3572 }
3573 break;
3574 default :
3575 break;
3576 }
3577
3578 if (sip != NULL) {
3579 switch (sip->choice) {
3580 case SEQID_PIR :
3581 AddPIRBlock (ajp, ffstring, bsp);
3582 break;
3583 case SEQID_SWISSPROT :
3584 AddSPBlock (ajp, ffstring, bsp);
3585 break;
3586 case SEQID_PRF :
3587 AddPRFBlock (ajp, ffstring, bsp);
3588 break;
3589 case SEQID_PDB :
3590 AddPDBBlock (ajp, ffstring, bsp);
3591 break;
3592 default :
3593 break;
3594 }
3595 }
3596 }
3597
3598 if (unknown) {
3599 FFAddOneString (ffstring, "UNKNOWN", FALSE, FALSE, TILDE_TO_SPACES);
3600 }
3601
3602 str = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, NULL);
3603
3604 /* optionally populate gbseq for XML-ized GenBank format */
3605
3606 if (ajp->gbseq) {
3607 gbseq = &asp->gbseq;
3608 } else {
3609 gbseq = NULL;
3610 }
3611
3612 if (gbseq != NULL) {
3613 if (StringNCmp (str, "DBSOURCE ", 12) == 0) {
3614 gbseq->source_db = StringSave (str + 12);
3615 } else {
3616 gbseq->source_db = StringSave (str);
3617 }
3618 CleanQualValue (gbseq->source_db);
3619 Asn2gnbkCompressSpaces (gbseq->source_db);
3620 }
3621
3622 bbp->string = str;
3623 FFRecycleString(ajp, ffstring);
3624
3625 if (awp->afp != NULL) {
3626 DoImmediateFormat (awp->afp, bbp);
3627 }
3628 }
3629
3630 NLM_EXTERN void AddDateBlock (
3631 Asn2gbWorkPtr awp
3632 )
3633
3634 {
3635 IntAsn2gbJobPtr ajp;
3636 BaseBlockPtr bbp;
3637 BioseqPtr bsp;
3638 Char date [40];
3639 SeqMgrDescContext dcontext;
3640 DatePtr dp;
3641 SeqDescrPtr sdp;
3642 StringItemPtr ffstring;
3643
3644 if (awp == NULL) return;
3645 ajp = awp->ajp;
3646 if (ajp == NULL) return;
3647 bsp = awp->bsp;
3648 if (bsp == NULL) return;
3649
3650 ffstring = FFGetString(ajp);
3651 if ( ffstring == NULL ) return;
3652
3653 bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
3654 if (bbp == NULL) return;
3655
3656 date [0] = '\0';
3657
3658 dp = NULL;
3659 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_create_date, &dcontext);
3660 if (sdp != NULL) {
3661 dp = (DatePtr) sdp->data.ptrvalue;
3662 }
3663 if (dp != NULL) {
3664 DateToFF (date, dp, FALSE);
3665 }
3666 if (StringHasNoText (date)) {
3667 StringCpy (date, "01-JAN-1900");
3668 }
3669
3670 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", TRUE);
3671 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
3672
3673 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
3674 FFRecycleString(ajp, ffstring);
3675
3676 bbp = Asn2gbAddBlock (awp, DATE_BLOCK, sizeof (BaseBlock));
3677 if (bbp == NULL) return;
3678
3679 ffstring = FFGetString(ajp);
3680
3681 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_update_date, &dcontext);
3682 if (sdp != NULL) {
3683 dp = (DatePtr) sdp->data.ptrvalue;
3684 }
3685 if (dp != NULL) {
3686 DateToFF (date, dp, FALSE);
3687 }
3688
3689 FFStartPrint (ffstring, awp->format, 0, 0, NULL, 0, 5, 5, "DT", FALSE);
3690 FFAddOneString (ffstring, date, FALSE, FALSE, TILDE_IGNORE);
3691
3692 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 0, 5, 5, "DT");
3693 FFRecycleString(ajp, ffstring);
3694
3695 if (awp->afp != NULL) {
3696 DoImmediateFormat (awp->afp, bbp);
3697 }
3698 }
3699
3700
3701 #define TOTAL_ESTKW 11
3702 #define TOTAL_STSKW 5
3703 #define TOTAL_GSSKW 2
3704
3705 static CharPtr EST_kw_array[ TOTAL_ESTKW] = {
3706 "EST", "EST PROTO((expressed sequence tag)", "expressed sequence tag",
3707 "EST (expressed sequence tag)", "EST(expressed sequence tag)",
3708 "partial cDNA sequence", "transcribed sequence fragment", "TSR",
3709 "putatively transcribed partial sequence", "UK putts"
3710 };
3711
3712 static CharPtr GSS_kw_array [TOTAL_GSSKW] = {
3713 "GSS", "trapped exon"
3714 };
3715 static CharPtr STS_kw_array[TOTAL_STSKW] = {
3716 "STS", "STS(sequence tagged site)", "STS (sequence tagged site)",
3717 "STS sequence", "sequence tagged site"
3718 };
3719
3720 static Int2 MatchArrayString (
3721 CharPtr array_string [],
3722 Int2 totalstr,
3723 CharPtr text
3724 )
3725
3726 {
3727 Int2 i;
3728
3729 for (i = 0; i < totalstr && text != NULL; i++) {
3730 if (StringCmp (array_string [i], text) == 0) {
3731 return (i);
3732 }
3733 }
3734
3735 return (-1);
3736 }
3737
3738 static Boolean CheckSpecialKeyword (
3739 Boolean is_est,
3740 Boolean is_sts,
3741 Boolean is_gss,
3742 CharPtr kwd
3743 )
3744
3745 {
3746 if (kwd == NULL) return FALSE;
3747
3748 if (is_est) {
3749 if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
3750 if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
3751 }
3752
3753 if (is_sts) {
3754 if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
3755 if (MatchArrayString (GSS_kw_array, TOTAL_GSSKW, kwd) != -1) return FALSE;
3756 }
3757
3758 if (is_gss) {
3759 if (MatchArrayString (STS_kw_array, TOTAL_STSKW, kwd) != -1) return FALSE;
3760 if (MatchArrayString (EST_kw_array, TOTAL_ESTKW, kwd) != -1) return FALSE;
3761 }
3762
3763 return TRUE;
3764 }
3765
3766 static Boolean KeywordAlreadyInList (
3767 ValNodePtr head,
3768 CharPtr kwd
3769 )
3770
3771 {
3772 ValNodePtr vnp;
3773
3774 for (vnp = head; vnp != NULL; vnp = vnp->next) {
3775 if (StringICmp ((CharPtr) vnp->data.ptrvalue, kwd) == 0) return TRUE;
3776 }
3777
3778 return FALSE;
3779 }
3780
3781 typedef struct finstatdata {
3782 CharPtr inuserobj;
3783 CharPtr inkeyword;
3784 } FinStatData, PNTR FinStatPtr;
3785
3786 static FinStatData finStatKywds [] = {
3787 {"Standard-Draft", "STANDARD_DRAFT"},
3788 {"High-quality-draft", "HIGH_QUALITY_DRAFT"},
3789 {"Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT"},
3790 {"Annotation-grade", "ANNOTATION_GRADE"},
3791 {"Non-contiguous-finished", "NON_CONTIGUOUS_FINISHED"},
3792 {"Finished", "FINISHED"},
3793 {NULL, NULL}
3794 };
3795
3796 static CharPtr GetFinishingStatus (
3797 CharPtr str
3798 )
3799
3800 {
3801 Char buf [64];
3802 Char ch;
3803 Int2 i;
3804 CharPtr ptr;
3805
3806 if (StringHasNoText (str)) return NULL;
3807
3808 StringNCpy_0 (buf, str, sizeof (buf));
3809 ptr = buf;
3810 ch = *ptr;
3811 while (ch != '\0') {
3812 if (ch == ' ') {
3813 *ptr = '-';
3814 }
3815 ptr++;
3816 ch = *ptr;
3817 }
3818
3819 for (i = 0; finStatKywds [i].inuserobj != NULL; i++) {
3820 if (StringICmp (buf, finStatKywds [i].inuserobj) == 0) {
3821 return finStatKywds [i].inkeyword;
3822 }
3823 }
3824
3825 return NULL;
3826 }
3827
3828 NLM_EXTERN void AddKeywordsBlock (
3829 Asn2gbWorkPtr awp
3830 )
3831
3832 {
3833 Boolean add_encode = FALSE;
3834 IntAsn2gbJobPtr ajp;
3835 Asn2gbSectPtr asp;
3836 BaseBlockPtr bbp;
3837 BioseqPtr bsp;
3838 BioSourcePtr biop;
3839 UserFieldPtr curr;
3840 SeqMgrDescContext dcontext;
3841 EMBLBlockPtr ebp;
3842 CharPtr field;
3843 CharPtr finishing_status = NULL;
3844 GBBlockPtr gbp;
3845 GBSeqPtr gbseq;
3846 ValNodePtr head = NULL;
3847 IndxPtr index;
3848 Boolean is_est = FALSE;
3849 Boolean is_gss = FALSE;
3850 Boolean is_sts = FALSE;
3851 Boolean is_env_sample = FALSE;
3852 Boolean is_genome_assembly = FALSE;
3853 ValNodePtr keywords;
3854 CharPtr kwd;
3855 MolInfoPtr mip;
3856 ObjectIdPtr oip;
3857 PirBlockPtr pir;
3858 PrfBlockPtr prf;
3859 SeqDescrPtr sdp;
3860 SeqIdPtr sip;
3861 SPBlockPtr sp;
3862 SubSourcePtr ssp;
3863 CharPtr str;
3864 UserObjectPtr uop;
3865 ValNodePtr vnp;
3866 StringItemPtr ffstring;
3867
3868 if (awp == NULL) return;
3869 ajp = awp->ajp;
3870 if (ajp == NULL) return;
3871 bsp = awp->bsp;
3872 if (bsp == NULL) return;
3873 asp = awp->asp;
3874 if (asp == NULL) return;
3875
3876 bbp = (BaseBlockPtr) Asn2gbAddBlock (awp, KEYWORDS_BLOCK, sizeof (BaseBlock));
3877 if (bbp == NULL) return;
3878
3879 ffstring = FFGetString(ajp);
3880 if ( ffstring == NULL ) return;
3881
3882 biop = NULL;
3883 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
3884 if (sdp != NULL) {
3885 biop = (BioSourcePtr) sdp->data.ptrvalue;
3886 }
3887 if (biop != NULL) {
3888 for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next) {
3889 if (ssp->subtype == SUBSRC_environmental_sample) {
3890 is_env_sample = TRUE;
3891 }
3892 }
3893 }
3894
3895 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
3896 if (sdp != NULL) {
3897 bbp->entityID = dcontext.entityID;
3898 bbp->itemID = dcontext.itemID;
3899 bbp->itemtype = OBJ_SEQDESC;
3900
3901 mip = (MolInfoPtr) sdp->data.ptrvalue;
3902 if (mip != NULL) {
3903 switch (mip->tech) {
3904 case MI_TECH_htgs_1 :
3905 if (head != NULL) {
3906 ValNodeCopyStr (&head, 0, "; ");
3907 }
3908 ValNodeCopyStr (&head, 0, "HTG");
3909 ValNodeCopyStr (&head, 0, "; ");
3910 ValNodeCopyStr (&head, 0, "HTGS_PHASE1");
3911 break;
3912 case MI_TECH_htgs_2 :
3913 if (head != NULL) {
3914 ValNodeCopyStr (&head, 0, "; ");
3915 }
3916 ValNodeCopyStr (&head, 0, "HTG");
3917 ValNodeCopyStr (&head, 0, "; ");
3918 ValNodeCopyStr (&head, 0, "HTGS_PHASE2");
3919 break;
3920 case MI_TECH_htgs_3 :
3921 if (head != NULL) {
3922 ValNodeCopyStr (&head, 0, "; ");
3923 }
3924 ValNodeCopyStr (&head, 0, "HTG");
3925 break;
3926 case MI_TECH_est :
3927 if (head != NULL) {
3928 ValNodeCopyStr (&head, 0, "; ");
3929 }
3930 is_est = TRUE;
3931 ValNodeCopyStr (&head, 0, "EST");
3932 if (is_env_sample) {
3933 if (head != NULL) {
3934 ValNodeCopyStr (&head, 0, "; ");
3935 }
3936 ValNodeCopyStr (&head, 0, "ENV");
3937 }
3938 break;
3939 case MI_TECH_sts :
3940 if (head != NULL) {
3941 ValNodeCopyStr (&head, 0, "; ");
3942 }
3943 is_sts = TRUE;
3944 ValNodeCopyStr (&head, 0, "STS");
3945 break;
3946 case MI_TECH_survey :
3947 if (head != NULL) {
3948 ValNodeCopyStr (&head, 0, "; ");
3949 }
3950 is_gss = TRUE;
3951 ValNodeCopyStr (&head, 0, "GSS");
3952 if (is_env_sample) {
3953 if (head != NULL) {
3954 ValNodeCopyStr (&head, 0, "; ");
3955 }
3956 ValNodeCopyStr (&head, 0, "ENV");
3957 }
3958 break;
3959 case MI_TECH_fli_cdna :
3960 if (head != NULL) {
3961 ValNodeCopyStr (&head, 0, "; ");
3962 }
3963 ValNodeCopyStr (&head, 0, "FLI_CDNA");
3964 break;
3965 case MI_TECH_htgs_0 :
3966 if (head != NULL) {
3967 ValNodeCopyStr (&head, 0, "; ");
3968 }
3969 ValNodeCopyStr (&head, 0, "HTG");
3970 ValNodeCopyStr (&head, 0, "; ");
3971 ValNodeCopyStr (&head, 0, "HTGS_PHASE0");
3972 break;
3973 case MI_TECH_htc :
3974 if (head != NULL) {
3975 ValNodeCopyStr (&head, 0, "; ");
3976 }
3977 ValNodeCopyStr (&head, 0, "HTC");
3978 break;
3979 case MI_TECH_wgs :
3980 if (head != NULL) {
3981 ValNodeCopyStr (&head, 0, "; ");
3982 }
3983 ValNodeCopyStr (&head, 0, "WGS");
3984 break;
3985 /*
3986 case MI_TECH_barcode :
3987 if (head != NULL) {
3988 ValNodeCopyStr (&head, 0, "; ");
3989 }
3990 ValNodeCopyStr (&head, 0, "BARCODE");
3991 break;
3992 */
3993 case MI_TECH_tsa :
3994 if (head != NULL) {
3995 ValNodeCopyStr (&head, 0, "; ");
3996 }
3997 ValNodeCopyStr (&head, 0, "TSA");
3998 ValNodeCopyStr (&head, 0, "; ");
3999 ValNodeCopyStr (&head, 0, "Transcriptome Shotgun Assembly");
4000 break;
4001 case MI_TECH_unknown :
4002 case MI_TECH_standard :
4003 case MI_TECH_other :
4004 if (is_env_sample) {
4005 if (head != NULL) {
4006 ValNodeCopyStr (&head, 0, "; ");
4007 }
4008 ValNodeCopyStr (&head, 0, "ENV");
4009 }
4010 break;
4011 default :
4012 break;
4013 }
4014 }
4015 }
4016
4017 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
4018 while (sdp != NULL) {
4019 uop = (UserObjectPtr) sdp->data.ptrvalue;
4020 if (uop != NULL) {
4021 oip = uop->type;
4022 if (oip != NULL && StringICmp (oip->str, "ENCODE") == 0) {
4023 add_encode = TRUE;
4024 } else if (oip != NULL && StringICmp (oip->str, "StructuredComment") == 0) {
4025 for (curr = uop->data; curr != NULL; curr = curr->next) {
4026 if (curr->choice != 1) continue;
4027 oip = curr->label;
4028 if (oip == NULL) continue;
4029 field = oip->str;
4030 if (StringHasNoText (field)) continue;
4031 if (StringCmp (field, "StructuredCommentPrefix") == 0) {
4032 if (StringCmp ((CharPtr) curr->data.ptrvalue, "##Genome-Assembly-Data-START##") == 0) {
4033 is_genome_assembly = TRUE;
4034 }
4035 }
4036 if (StringCmp (field, "Current Finishing Status") == 0) {
4037 finishing_status = GetFinishingStatus ((CharPtr) curr->data.ptrvalue);
4038 }
4039 }
4040 }
4041 }
4042 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
4043 }
4044 if (add_encode) {
4045 if (head != NULL) {
4046 ValNodeCopyStr (&head, 0, "; ");
4047 }
4048 ValNodeCopyStr (&head, 0, "ENCODE");
4049 }
4050 if (is_genome_assembly && StringDoesHaveText (finishing_status)) {
4051 if (head != NULL) {
4052 ValNodeCopyStr (&head, 0, "; ");
4053 }
4054 ValNodeCopyStr (&head, 0, finishing_status);
4055 }
4056
4057 for (sip = bsp->id; sip != NULL; sip = sip->next) {
4058 if (sip->choice == SEQID_TPG || sip->choice == SEQID_TPE || sip->choice == SEQID_TPD) {
4059 if (head != NULL) {
4060 ValNodeCopyStr (&head, 0, "; ");
4061 }
4062 ValNodeCopyStr (&head, 0, "Third Party Annotation");
4063 ValNodeCopyStr (&head, 0, "; ");
4064 ValNodeCopyStr (&head, 0, "TPA");
4065 }
4066 }
4067
4068 sdp = SeqMgrGetNextDescriptor (bsp, NULL, 0, &dcontext);
4069 while (sdp != NULL) {
4070
4071 keywords = NULL;
4072
4073 switch (dcontext.seqdesctype) {
4074 case Seq_descr_genbank :
4075 gbp = (GBBlockPtr) sdp->data.ptrvalue;
4076 if (gbp != NULL) {
4077 keywords = gbp->keywords;
4078 }
4079 break;
4080 case Seq_descr_embl :
4081 ebp = (EMBLBlockPtr) sdp->data.ptrvalue;
4082 if (ebp != NULL) {
4083 keywords = ebp->keywords;
4084 }
4085 break;
4086 case Seq_descr_pir :
4087 pir = (PirBlockPtr) sdp->data.ptrvalue;
4088 if (pir != NULL) {
4089 keywords = pir->keywords;
4090 }
4091 break;
4092 case Seq_descr_prf :
4093 prf = (PrfBlockPtr) sdp->data.ptrvalue;
4094 if (prf != NULL) {
4095 keywords = prf->keywords;
4096 }
4097 break;
4098 case Seq_descr_sp :
4099 sp = (SPBlockPtr) sdp->data.ptrvalue;
4100 if (sp != NULL) {
4101 keywords = sp->keywords;
4102 }
4103 break;
4104 default :
4105 break;
4106 }
4107
4108 if (keywords != NULL) {
4109 bbp->entityID = dcontext.entityID;
4110 bbp->itemID = dcontext.itemID;
4111 bbp->itemtype = OBJ_SEQDESC;
4112 }
4113
4114 for (vnp = keywords; vnp != NULL; vnp = vnp->next) {
4115 kwd = (CharPtr) vnp->data.ptrvalue;
4116 if (CheckSpecialKeyword (is_est, is_sts, is_gss, kwd)) {
4117 if (! KeywordAlreadyInList (head, kwd)) {
4118 if (head != NULL) {
4119 ValNodeCopyStr (&head, 0, "; ");
4120 }
4121 ValNodeCopyStr (&head, 0, kwd);
4122 }
4123 }
4124 }
4125
4126 sdp = SeqMgrGetNextDescriptor (bsp, sdp, 0, &dcontext);
4127 }
4128
4129 FFStartPrint( ffstring, awp->format, 0, 12, "KEYWORDS", 12, 5, 5, "KW", TRUE);
4130 str = MergeFFValNodeStrs (head);
4131
4132 /* if no keywords were found, period will still be added by this call */
4133 if ( str != NULL ) {
4134 FFAddOneString (ffstring, str, TRUE, FALSE, TILDE_TO_SPACES);
4135 } else {
4136 FFAddOneChar(ffstring, '.', FALSE);
4137 }
4138
4139 MemFree (str);
4140
4141 /* optionally populate indexes for NCBI internal database */
4142
4143 if (ajp->index) {
4144 index = &asp->index;
4145 } else {
4146 index = NULL;
4147 }
4148
4149 if (index != NULL) {
4150 for (vnp = head; vnp != NULL; vnp = vnp->next) {
4151 kwd = (CharPtr) vnp->data.ptrvalue;
4152 if (StringCmp (kwd, "; ") == 0) continue;
4153 ValNodeCopyStrToHead (&(index->keywords), 0, kwd);
4154 }
4155 }
4156
4157 /* optionally populate gbseq for XML-ized GenBank format */
4158
4159 if (ajp->gbseq) {
4160 gbseq = &asp->gbseq;
4161 } else {
4162 gbseq = NULL;
4163 }
4164
4165 if (gbseq != NULL) {
4166 for (vnp = head; vnp != NULL; vnp = vnp->next) {
4167 kwd = (CharPtr) vnp->data.ptrvalue;
4168 if (StringCmp (kwd, "; ") == 0) continue;
4169 ValNodeCopyStr (&(gbseq->keywords), 0, kwd);
4170 }
4171 }
4172
4173 ValNodeFreeData (head);
4174
4175 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "KW");
4176
4177 FFRecycleString(ajp, ffstring);
4178
4179 if (awp->afp != NULL) {
4180 DoImmediateFormat (awp->afp, bbp);
4181 }
4182 }
4183
4184 NLM_EXTERN void AddSegmentBlock (
4185 Asn2gbWorkPtr awp,
4186 Boolean onePartOfSeg,
4187 Boolean is_na
4188 )
4189
4190 {
4191 Char acc [41];
4192 IntAsn2gbJobPtr ajp;
4193 Asn2gbSectPtr asp;
4194 BaseBlockPtr bbp;
4195 Char buf [32];
4196 GBSeqPtr gbseq;
4197 StringItemPtr ffstring;
4198
4199 if (awp == NULL) return;
4200 ajp = awp->ajp;
4201 if (ajp == NULL) return;
4202 asp = awp->asp;
4203 if (asp == NULL) return;
4204
4205 if (awp->seg < 1 || awp->numsegs < 1) return;
4206
4207 bbp = Asn2gbAddBlock (awp, SEGMENT_BLOCK, sizeof (BaseBlock));
4208 if (bbp == NULL) return;
4209
4210 ffstring = FFGetString(ajp);
4211 if ( ffstring == NULL ) return;
4212
4213
4214 FFStartPrint (ffstring, awp->format, 0, 12, "SEGMENT", 12, 5, 5, "XX", FALSE);
4215
4216 if ( GetWWW(ajp) && awp->parent != NULL && onePartOfSeg) {
4217 sprintf (buf, "%d of ", (int) awp->seg);
4218 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4219 SeqIdWrite (awp->parent->id, acc, PRINTID_TEXTID_ACC_VER, sizeof (acc) - 1);
4220
4221 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
4222 if (is_na) {
4223 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
4224 } else {
4225 FF_Add_NCBI_Base_URL (ffstring, link_seqp);
4226 }
4227 FFAddTextToString(ffstring, /* "val=" */ NULL, acc, "\">", FALSE, FALSE, TILDE_IGNORE);
4228
4229 sprintf (buf, "%ld", (long) awp->numsegs);
4230 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_IGNORE);
4231 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_IGNORE);
4232 } else {
4233 sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4234 FFAddOneString (ffstring, buf, FALSE, TRUE, TILDE_TO_SPACES);
4235 }
4236
4237 /* optionally populate gbseq for XML-ized GenBank format */
4238
4239 if (ajp->gbseq) {
4240 gbseq = &asp->gbseq;
4241 } else {
4242 gbseq = NULL;
4243 }
4244
4245 if (gbseq != NULL) {
4246 sprintf (buf, "%d of %ld", (int) awp->seg, (long) awp->numsegs);
4247 gbseq->segment = StringSave (buf);
4248 }
4249
4250 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 5, 5, "XX");
4251 FFRecycleString(ajp, ffstring);
4252
4253 if (awp->afp != NULL) {
4254 DoImmediateFormat (awp->afp, bbp);
4255 }
4256 }
4257
4258 NLM_EXTERN void AddSourceBlock (
4259 Asn2gbWorkPtr awp
4260 )
4261
4262 {
4263 IntAsn2gbJobPtr ajp;
4264 BaseBlockPtr bbp;
4265 BioseqPtr bsp;
4266 SeqFeatPtr cds;
4267 SeqMgrDescContext dcontext;
4268 BioseqPtr dna;
4269 SeqMgrFeatContext fcontext;
4270 GBBlockPtr gbp;
4271 SeqDescrPtr sdp;
4272 SeqFeatPtr sfp;
4273
4274 if (awp == NULL) return;
4275 ajp = awp->ajp;
4276 if (ajp == NULL) return;
4277 bsp = awp->bsp;
4278 if (bsp == NULL) return;
4279
4280 bbp = Asn2gbAddBlock (awp, SOURCE_BLOCK, sizeof (BaseBlock));
4281 if (bbp == NULL) return;
4282
4283 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
4284 if (sdp != NULL && (! ajp->newSourceOrg)) {
4285 gbp = (GBBlockPtr) sdp->data.ptrvalue;
4286 if (gbp != NULL && (! StringHasNoText (gbp->source))) {
4287 bbp->entityID = dcontext.entityID;
4288 bbp->itemID = dcontext.itemID;
4289 bbp->itemtype = OBJ_SEQDESC;
4290
4291 if (awp->afp != NULL) {
4292 DoImmediateFormat (awp->afp, bbp);
4293 }
4294
4295 return;
4296 }
4297 }
4298
4299 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
4300 if (sdp != NULL) {
4301 bbp->entityID = dcontext.entityID;
4302 bbp->itemID = dcontext.itemID;
4303 bbp->itemtype = OBJ_SEQDESC;
4304 } else {
4305 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
4306 if (sfp != NULL) {
4307 bbp->entityID = fcontext.entityID;
4308 bbp->itemID = fcontext.itemID;
4309 bbp->itemtype = OBJ_SEQFEAT;
4310 } else if (ISA_aa (bsp->mol)) {
4311
4312 /* if protein with no sources, get sources applicable to DNA location of CDS */
4313
4314 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
4315 if (cds != NULL) {
4316 sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
4317 if (sfp != NULL) {
4318 bbp->entityID = fcontext.entityID;
4319 bbp->itemID = fcontext.itemID;
4320 bbp->itemtype = OBJ_SEQFEAT;
4321 } else {
4322 dna = BioseqFindFromSeqLoc (cds->location);
4323 if (dna != NULL) {
4324 sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
4325 if (sdp != NULL) {
4326 bbp->entityID = dcontext.entityID;
4327 bbp->itemID = dcontext.itemID;
4328 bbp->itemtype = OBJ_SEQDESC;
4329 }
4330 }
4331 }
4332 }
4333 }
4334 }
4335
4336 if (awp->afp != NULL) {
4337 DoImmediateFormat (awp->afp, bbp);
4338 }
4339 }
4340
4341 NLM_EXTERN void AddOrganismBlock (
4342 Asn2gbWorkPtr awp
4343 )
4344
4345 {
4346 BaseBlockPtr bbp;
4347 BioseqPtr bsp;
4348 SeqFeatPtr cds;
4349 SeqMgrDescContext dcontext;
4350 BioseqPtr dna;
4351 SeqMgrFeatContext fcontext;
4352 SeqDescrPtr sdp;
4353 SeqFeatPtr sfp;
4354
4355 if (awp == NULL) return;
4356 bsp = awp->bsp;
4357 if (bsp == NULL) return;
4358
4359 bbp = Asn2gbAddBlock (awp, ORGANISM_BLOCK, sizeof (BaseBlock));
4360 if (bbp == NULL) return;
4361
4362 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
4363 if (sdp != NULL) {
4364 bbp->entityID = dcontext.entityID;
4365 bbp->itemID = dcontext.itemID;
4366 bbp->itemtype = OBJ_SEQDESC;
4367 } else {
4368 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
4369 if (sfp != NULL) {
4370 bbp->entityID = fcontext.entityID;
4371 bbp->itemID = fcontext.itemID;
4372 bbp->itemtype = OBJ_SEQFEAT;
4373 } else if (ISA_aa (bsp->mol)) {
4374
4375 /* if protein with no sources, get sources applicable to DNA location of CDS */
4376
4377 cds = SeqMgrGetCDSgivenProduct (bsp, &fcontext);
4378 if (cds != NULL) {
4379 sfp = SeqMgrGetOverlappingSource (cds->location, &fcontext);
4380 if (sfp != NULL) {
4381 bbp->entityID = fcontext.entityID;
4382 bbp->itemID = fcontext.itemID;
4383 bbp->itemtype = OBJ_SEQFEAT;
4384 } else {
4385 dna = BioseqFindFromSeqLoc (cds->location);
4386 if (dna != NULL) {
4387 sdp = SeqMgrGetNextDescriptor (dna, NULL, Seq_descr_source, &dcontext);
4388 if (sdp != NULL) {
4389 bbp->entityID = dcontext.entityID;
4390 bbp->itemID = dcontext.itemID;
4391 bbp->itemtype = OBJ_SEQDESC;
4392 }
4393 }
4394 }
4395 }
4396 }
4397 }
4398
4399 if (awp->afp != NULL) {
4400 DoImmediateFormat (awp->afp, bbp);
4401 }
4402 }
4403
4404 static RefBlockPtr AddPub (
4405 Asn2gbWorkPtr awp,
4406 ValNodePtr PNTR head,
4407 PubdescPtr pdp
4408 )
4409
4410 {
4411 Char buf [521]; /* increased for consortium in citsub */
4412 CitArtPtr cap;
4413 CitBookPtr cbp;
4414 CitGenPtr cgp;
4415 CitJourPtr cjp;
4416 CitPatPtr cpp;
4417 CitSubPtr csp;
4418 DatePtr dp = NULL;
4419 Boolean justuids = TRUE;
4420 ImprintPtr imp = NULL;
4421 IntRefBlockPtr irp;
4422 RefBlockPtr rbp;
4423 ValNodePtr vnp;
4424 ArticleIdPtr aip;
4425
4426 if (awp == NULL || head == NULL || pdp == NULL) return NULL;
4427
4428 if (awp->hideGeneRIFs) {
4429 if (StringISearch (pdp->comment, "GeneRIF") != NULL) return NULL;
4430 } else if (awp->onlyGeneRIFs) {
4431 if (StringISearch (pdp->comment, "GeneRIF") == NULL) return NULL;
4432 } else if (awp->onlyReviewPubs) {
4433 if (StringISearch (pdp->comment, "Review Article") == NULL) return NULL;
4434 }
4435
4436 rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
4437 if (rbp == NULL) return NULL;
4438 rbp->blocktype = REFERENCE_BLOCK;
4439 rbp->section = awp->currsection;
4440
4441 rbp->serial = INT2_MAX;
4442
4443 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4444 switch (vnp->choice) {
4445 case PUB_Gen :
4446 /* may be unpublished, or may be serial number of swiss-prot reference */
4447 cgp = (CitGenPtr) vnp->data.ptrvalue;
4448 if (cgp != NULL) {
4449 if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
4450 rbp->category = REF_CAT_UNP;
4451 if (dp == NULL) {
4452 dp = cgp->date;
4453 }
4454 if (cgp->serial_number > 0) {
4455 rbp->serial = cgp->serial_number;
4456 }
4457 if (cgp->cit != NULL) {
4458 if (StringNICmp ("unpublished", cgp->cit, 11) != 0 &&
4459 StringNICmp ("submitted", cgp->cit, 8) != 0 &&
4460 StringNICmp ("to be published", cgp->cit, 15) != 0 &&
4461 StringNICmp ("in press", cgp->cit, 8) != 0 &&
4462 StringStr (cgp->cit, "Journal") == NULL) {
4463 if (cgp->serial_number == 0) {
4464 MemFree (rbp);
4465 return NULL;
4466 }
4467 }
4468 } else if (cgp->journal == NULL || cgp->date == NULL) {
4469 if (cgp->serial_number == 0) {
4470 MemFree (rbp);
4471 return NULL;
4472 }
4473 }
4474 }
4475 }
4476 break;
4477 case PUB_Sub :
4478 rbp->category = REF_CAT_SUB;
4479 csp = (CitSubPtr) vnp->data.ptrvalue;
4480 if (csp != NULL) {
4481 imp = csp->imp;
4482 if (imp != NULL) {
4483 if (dp == NULL) {
4484 dp = imp->date;
4485 }
4486 }
4487 if (csp->date != NULL) {
4488 if (dp == NULL) {
4489 dp = csp->date;
4490 }
4491 }
4492 }
4493 break;
4494 case PUB_Article:
4495 cap = (CitArtPtr) vnp->data.ptrvalue;
4496 if (cap != NULL) {
4497 switch (cap->from) {
4498 case 1:
4499 cjp = (CitJourPtr) cap->fromptr;
4500 if (cjp != NULL) {
4501 imp = (ImprintPtr) cjp->imp;
4502 if (imp != NULL) {
4503 if (dp == NULL) {
4504 dp = imp->date;
4505 }
4506 }
4507 }
4508 break;
4509 case 2:
4510 cbp = (CitBookPtr) cap->fromptr;
4511 if (cbp != NULL) {
4512 imp = (ImprintPtr) cbp->imp;
4513 if (imp != NULL) {
4514 if (dp == NULL) {
4515 dp = imp->date;
4516 }
4517 }
4518 }
4519 break;
4520 case 3:
4521 cbp = (CitBookPtr) cap->fromptr;
4522 if (cbp != NULL) {
4523 imp = (ImprintPtr) cbp->imp;
4524 if (imp != NULL) {
4525 if (dp == NULL) {
4526 dp = imp->date;
4527 }
4528 }
4529 }
4530 break;
4531 default:
4532 break;
4533 }
4534 /* look for PMID and MUID in the Cit-art article ids set */
4535 if (cap->ids != NULL) {
4536 for (aip = cap->ids; aip != NULL; aip = aip->next) {
4537 if (aip->choice == ARTICLEID_PUBMED && rbp->pmid == 0) {
4538 rbp->pmid = aip->data.intvalue;
4539 rbp->category = REF_CAT_PUB;
4540 } else if (aip->choice == ARTICLEID_MEDLINE && rbp->muid == 0) {
4541 rbp->muid = aip->data.intvalue;
4542 rbp->category = REF_CAT_PUB;
4543 }
4544 }
4545 }
4546 }
4547 break;
4548 case PUB_Book:
4549 cbp = (CitBookPtr) vnp->data.ptrvalue;
4550 if (cbp != NULL) {
4551 imp = (ImprintPtr) cbp->imp;
4552 if (imp != NULL) {
4553 if (dp == NULL) {
4554 dp = imp->date;
4555 }
4556 }
4557 }
4558 break;
4559 case PUB_Proc:
4560 cbp = (CitBookPtr) vnp->data.ptrvalue;
4561 if (cbp != NULL) {
4562 imp = (ImprintPtr) cbp->imp;
4563 if (imp != NULL) {
4564 if (dp == NULL) {
4565 dp = imp->date;
4566 }
4567 }
4568 }
4569 break;
4570 case PUB_Patent :
4571 rbp->category = REF_CAT_PUB;
4572 cpp = (CitPatPtr) vnp->data.ptrvalue;
4573 if (cpp != NULL) {
4574 if (cpp->date_issue != NULL) {
4575 if (dp == NULL) {
4576 dp = (DatePtr) cpp->date_issue;
4577 }
4578 } else if (cpp->app_date != NULL) {
4579 if (dp == NULL) {
4580 dp = (DatePtr) cpp->app_date;
4581 }
4582 }
4583 }
4584 break;
4585 case PUB_Man:
4586 cbp = (CitBookPtr) vnp->data.ptrvalue;
4587 if (cbp != NULL) {
4588 imp = (ImprintPtr) cbp->imp;
4589 if (imp != NULL) {
4590 if (dp == NULL) {
4591 dp = imp->date;
4592 }
4593 }
4594 }
4595 break;
4596 case PUB_Muid :
4597 if (rbp->muid == 0) {
4598 rbp->muid = vnp->data.intvalue;
4599 rbp->category = REF_CAT_PUB;
4600 }
4601 break;
4602 case PUB_PMid :
4603 if (rbp->pmid == 0) {
4604 rbp->pmid = vnp->data.intvalue;
4605 rbp->category = REF_CAT_PUB;
4606 }
4607 break;
4608 default :
4609 break;
4610 }
4611 if (vnp->choice != PUB_Muid && vnp->choice != PUB_PMid) {
4612 justuids = FALSE;
4613 }
4614 }
4615
4616 /* check for submitted vs. in-press */
4617
4618 if (imp != NULL) {
4619 rbp->category = REF_CAT_PUB;
4620 switch (imp->prepub) {
4621 case 1 :
4622 rbp->category = REF_CAT_UNP;
4623 break;
4624 case 2 :
4625 rbp->category = REF_CAT_PUB;
4626 break;
4627 default :
4628 break;
4629 }
4630 }
4631
4632 /* check for sites reftype */
4633
4634 if (pdp->reftype != 0) {
4635 rbp->sites = pdp->reftype;
4636 }
4637
4638 if (rbp->muid == 0 && rbp->pmid == 0) {
4639 vnp = pdp->pub;
4640
4641 /* skip over just serial number */
4642
4643 if (vnp != NULL && vnp->choice == PUB_Gen && vnp->next != NULL) {
4644 cgp = (CitGenPtr) vnp->data.ptrvalue;
4645 if (cgp != NULL) {
4646 if (StringNICmp ("BackBone id_pub", cgp->cit, 15) != 0) {
4647 if (cgp->cit == NULL && cgp->journal == NULL && cgp->date == NULL && cgp->serial_number) {
4648 vnp = vnp->next;
4649 }
4650 }
4651 }
4652 }
4653
4654 if (PubLabelUnique (vnp, buf, sizeof (buf) - 1, OM_LABEL_CONTENT, TRUE) > 0) {
4655 rbp->uniquestr = StringSaveNoNull (buf);
4656 }
4657 }
4658
4659 irp = (IntRefBlockPtr) rbp;
4660 irp->date = DateDup (dp);
4661 irp->justuids = justuids;
4662 /* if (justuids) { */
4663 irp->fig = StringSaveNoNull (pdp->fig);
4664 irp->maploc = StringSaveNoNull (pdp->maploc);
4665 irp->poly_a = pdp->poly_a;
4666 /* } */
4667
4668 /* if not rejected by now, link in */
4669
4670 ValNodeAddPointer (head, 0, rbp);
4671
4672 return rbp;
4673 }
4674
4675 static int LIBCALLBACK SortReferences (
4676 VoidPtr ptr1,
4677 VoidPtr ptr2,
4678 Boolean serialFirst,
4679 Boolean isRefSeq
4680 )
4681
4682 {
4683 int compare;
4684 IntRefBlockPtr irp1;
4685 IntRefBlockPtr irp2;
4686 RefBlockPtr rbp1;
4687 RefBlockPtr rbp2;
4688 Int2 status;
4689 RefBlockPtr temp;
4690 ValNodePtr vnp1;
4691 ValNodePtr vnp2;
4692
4693 if (ptr1 == NULL || ptr2 == NULL) return 0;
4694 vnp1 = *((ValNodePtr PNTR) ptr1);
4695 vnp2 = *((ValNodePtr PNTR) ptr2);
4696 if (vnp1 == NULL || vnp2 == NULL) return 0;
4697 rbp1 = (RefBlockPtr) vnp1->data.ptrvalue;
4698 rbp2 = (RefBlockPtr) vnp2->data.ptrvalue;
4699 if (rbp1 == NULL || rbp2 == NULL) return 0;
4700
4701 if (serialFirst) {
4702 if (rbp1->serial > rbp2->serial) {
4703 return 1;
4704 } else if (rbp1->serial < rbp2->serial) {
4705 return -1;
4706 }
4707 }
4708
4709 /* usual first sort by published, unpublished, and cit-subs */
4710
4711 if (rbp1->category > rbp2->category) {
4712 return 1;
4713 } else if (rbp1->category < rbp2->category) {
4714 return -1;
4715 }
4716
4717 /* for RefSeq, newer publications first, so temporarily swap pointers */
4718
4719 if (isRefSeq) {
4720 temp = rbp1;
4721 rbp1 = rbp2;
4722 rbp2 = temp;
4723 }
4724
4725 /* within class, sort by date, older publications first (except RefSeq) */
4726
4727 irp1 = (IntRefBlockPtr) rbp1;
4728 irp2 = (IntRefBlockPtr) rbp2;
4729
4730 if ( irp1->date != 0 && irp2->date == 0 ) {
4731 return 1;
4732 } else if ( irp1->date == 0 && irp2->date != 0 ) {
4733 return -1;
4734 }
4735
4736 status = DateMatch (irp1->date, irp2->date, TRUE);
4737 if (status == 1 || status == -1) return status;
4738
4739 /* if dates (e.g., years) match, try to distinguish by uids */
4740
4741 if (rbp1->pmid != 0 && rbp2->pmid != 0) {
4742 if (rbp1->pmid > rbp2->pmid) {
4743 return 1;
4744 } else if (rbp1->pmid < rbp2->pmid) {
4745 return -1;
4746 }
4747 }
4748
4749 if (rbp1->muid != 0 && rbp2->muid != 0) {
4750 if (rbp1->muid > rbp2->muid) {
4751 return 1;
4752 } else if (rbp1->muid < rbp2->muid) {
4753 return -1;
4754 }
4755 }
4756
4757 /* restore sort order after date and pmid/muid matching */
4758
4759 if (isRefSeq) {
4760 temp = rbp1;
4761 rbp1 = rbp2;
4762 rbp2 = temp;
4763
4764 irp1 = (IntRefBlockPtr) rbp1;
4765 irp2 = (IntRefBlockPtr) rbp2;
4766 }
4767
4768 /* if same uid, one with just uids goes last to be excised but remembered */
4769
4770 if ((rbp1->pmid != 0 && rbp2->pmid != 0) || (rbp1->muid != 0 && rbp2->muid != 0)) {
4771 if (irp1->justuids && (! irp2->justuids)) {
4772 return 1;
4773 } else if ((! irp1->justuids) && irp2->justuids) {
4774 return -1;
4775 }
4776 }
4777
4778 /* put sites after pubs that refer to all or a range of bases */
4779
4780 if (rbp1->sites > rbp2->sites) {
4781 return 1;
4782 } else if (rbp2->sites > rbp1->sites) {
4783 return -1;
4784 }
4785
4786 /* for publication features, sort in explore index order */
4787
4788 if (irp1->index > irp2->index) {
4789 return 1;
4790 } else if (irp1->index < irp2->index) {
4791 return -1;
4792 }
4793
4794 /* next use author string */
4795
4796 if (irp1->authstr != NULL && irp2->authstr != NULL) {
4797 compare = StringICmp (irp1->authstr, irp2->authstr);
4798 if (compare > 0) {
4799 return 1;
4800 } else if (compare < 0) {
4801 return -1;
4802 }
4803 }
4804
4805 /* use unique label string to determine sort order */
4806
4807 if (rbp1->uniquestr != NULL && rbp2->uniquestr != NULL) {
4808 compare = StringICmp (rbp1->uniquestr, rbp2->uniquestr);
4809 if (compare > 0) {
4810 return 1;
4811 } else if (compare < 0) {
4812 return -1;
4813 }
4814 }
4815
4816 /* last resort for equivalent publication descriptors, sort in itemID order */
4817
4818 if (rbp1->itemtype == OBJ_SEQDESC && rbp2->itemtype == OBJ_SEQDESC) {
4819 if (rbp1->itemID > rbp2->itemID) {
4820 return 1;
4821 } else if (rbp1->itemID < rbp2->itemID) {
4822 return -1;
4823 }
4824 }
4825
4826 if (rbp1->itemtype == OBJ_ANNOTDESC && rbp2->itemtype == OBJ_ANNOTDESC) {
4827 if (rbp1->itemID > rbp2->itemID) {
4828 return 1;
4829 } else if (rbp1->itemID < rbp2->itemID) {
4830 return -1;
4831 }
4832 }
4833
4834 if (! serialFirst) {
4835 if (rbp1->serial > rbp2->serial) {
4836 return 1;
4837 } else if (rbp1->serial < rbp2->serial) {
4838 return -1;
4839 }
4840 }
4841
4842 return 0;
4843 }
4844
4845 static int LIBCALLBACK SortReferencesA (
4846 VoidPtr ptr1,
4847 VoidPtr ptr2
4848 )
4849
4850 {
4851 return SortReferences (ptr1, ptr2, FALSE, FALSE);
4852 }
4853
4854 static int LIBCALLBACK SortReferencesB (
4855 VoidPtr ptr1,
4856 VoidPtr ptr2
4857 )
4858
4859 {
4860 return SortReferences (ptr1, ptr2, TRUE, FALSE);
4861 }
4862
4863 static int LIBCALLBACK SortReferencesAR (
4864 VoidPtr ptr1,
4865 VoidPtr ptr2
4866 )
4867
4868 {
4869 return SortReferences (ptr1, ptr2, FALSE, TRUE);
4870 }
4871
4872 static int LIBCALLBACK SortReferencesBR (
4873 VoidPtr ptr1,
4874 VoidPtr ptr2
4875 )
4876
4877 {
4878 return SortReferences (ptr1, ptr2, TRUE, TRUE);
4879 }
4880
4881 static CharPtr GetAuthorsPlusConsortium (
4882 FmtType format,
4883 AuthListPtr alp
4884 )
4885
4886 {
4887 CharPtr consortium;
4888 CharPtr str;
4889 CharPtr tmp;
4890
4891 consortium = NULL;
4892 str = GetAuthorsString (format, alp, &consortium, NULL, NULL);
4893 if (str == NULL) return consortium;
4894 if (consortium == NULL) return str;
4895 tmp = MemNew (StringLen (str) + StringLen (consortium) + 5);
4896 if (tmp == NULL) return NULL;
4897 StringCpy (tmp, str);
4898 StringCat (tmp, "; ");
4899 StringCat (tmp, consortium);
4900 MemFree (str);
4901 MemFree (consortium);
4902 return tmp;
4903 }
4904
4905 static Boolean HasNoPmidOrMuid (
4906 PubdescPtr pdp
4907 )
4908
4909 {
4910 ValNodePtr vnp;
4911
4912 if (pdp == NULL) return TRUE;
4913 for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
4914 if (vnp->choice == PUB_PMid || vnp->choice == PUB_Muid) return FALSE;
4915 }
4916 return TRUE;
4917 }
4918
4919 typedef struct cdspubs {
4920 Asn2gbWorkPtr awp;
4921 BioseqPtr target;
4922 ValNodePtr vnp;
4923 } CdsPubs, PNTR CdsPubsPtr;
4924
4925 static Boolean LIBCALLBACK GetRefsOnCDS (
4926 SeqFeatPtr sfp,
4927 SeqMgrFeatContextPtr context
4928 )
4929
4930 {
4931 AuthListPtr alp;
4932 Asn2gbWorkPtr awp;
4933 CdsPubsPtr cpp;
4934 IntRefBlockPtr irp;
4935 Boolean okay;
4936 PubdescPtr pdp;
4937 RefBlockPtr rbp;
4938 BioseqPtr target;
4939
4940 if (sfp == NULL || context == NULL) return TRUE;
4941 cpp = (CdsPubsPtr) context->userdata;
4942 awp = cpp->awp;
4943 if (awp == NULL) return TRUE;
4944 target = cpp->target;
4945
4946 okay = TRUE;
4947 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
4948 if (awp->format == FTABLE_FMT) {
4949 if (HasNoPmidOrMuid (pdp)) {
4950 okay = FALSE;
4951 }
4952 }
4953
4954 if (okay) {
4955 rbp = AddPub (awp, &(awp->pubhead), pdp);
4956 if (rbp != NULL) {
4957
4958 rbp->entityID = context->entityID;
4959 rbp->itemID = context->itemID;
4960 rbp->itemtype = OBJ_SEQFEAT;
4961
4962 irp = (IntRefBlockPtr) rbp;
4963 irp->loc = SeqLocMerge (cpp->target, cpp->vnp, NULL, FALSE, TRUE, FALSE);
4964 if (target != NULL) {
4965 irp->left = 0;
4966 irp->right = target->length - 1;
4967 }
4968 alp = GetAuthListPtr (pdp, NULL);
4969 if (alp != NULL) {
4970 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
4971 }
4972 irp->index = 0;
4973 }
4974 }
4975
4976 return TRUE;
4977 }
4978
4979 static void GetRefsOnBioseq (
4980 Asn2gbWorkPtr awp,
4981 BioseqPtr target,
4982 BioseqPtr bsp,
4983 Int4 from,
4984 Int4 to,
4985 SeqLocPtr cdsloc
4986 )
4987
4988 {
4989 SeqMgrAndContext acontext;
4990 AnnotDescPtr adp;
4991 IntAsn2gbJobPtr ajp;
4992 AuthListPtr alp;
4993 CdsPubs cp;
4994 SeqMgrDescContext dcontext;
4995 SeqMgrFeatContext fcontext;
4996 Int2 i;
4997 Int2 idx;
4998 IntRefBlockPtr irp;
4999 Int4Ptr ivals;
5000 Int4 left;
5001 SeqLocPtr newloc;
5002 Int2 numivals;
5003 Boolean okay;
5004 PubdescPtr pdp;
5005 RefBlockPtr rbp;
5006 Int4 right;
5007 SeqDescrPtr sdp;
5008 SeqFeatPtr sfp;
5009 SeqInt sint;
5010 SeqIdPtr sip;
5011 Boolean split;
5012 Int4 start;
5013 Int4 stop;
5014 Uint1 strand;
5015 Boolean takeIt;
5016 ValNode vn;
5017 ValNodePtr vnp;
5018
5019 if (awp == NULL || target == NULL || bsp == NULL) return;
5020 ajp = awp->ajp;
5021 if (ajp == NULL) return;
5022
5023 /* full length loc for descriptors */
5024
5025 sint.from = 0;
5026 if (ajp->ajp.slp != NULL) {
5027 from = SeqLocStart (ajp->ajp.slp); /* other features use awp->slp for from and to */
5028 }
5029 if (ajp->ajp.slp != NULL) {
5030 sint.to = SeqLocLen (ajp->ajp.slp) - 1;
5031 to = SeqLocStop (ajp->ajp.slp); /* other features use awp->slp for from and to */
5032 } else {
5033 sint.to = bsp->length - 1;
5034 }
5035 sint.strand = Seq_strand_plus;
5036 sint.id = SeqIdStripLocus (SeqIdDup (SeqIdFindBest (bsp->id, 0)));
5037 sint.if_from = NULL;
5038 sint.if_to = NULL;
5039
5040 vn.choice = SEQLOC_INT;
5041 vn.data.ptrvalue = (Pointer) &sint;
5042 vn.next = NULL;
5043
5044 sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_pub, &dcontext);
5045 while (sdp != NULL) {
5046
5047 /* check if descriptor on part already added on segmented bioseq */
5048
5049 okay = TRUE;
5050 for (vnp = awp->pubhead; vnp != NULL && okay; vnp = vnp->next) {
5051 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5052 if (rbp != NULL) {
5053 if (rbp->entityID == dcontext.entityID &&
5054 rbp->itemID == dcontext.itemID &&
5055 rbp->itemtype == OBJ_SEQDESC) {
5056 okay = FALSE;
5057 }
5058 }
5059 }
5060 if (awp->format == FTABLE_FMT) {
5061 pdp = (PubdescPtr) sdp->data.ptrvalue;
5062 if (HasNoPmidOrMuid (pdp)) {
5063 okay = FALSE;
5064 }
5065 }
5066
5067 if (okay) {
5068 pdp = (PubdescPtr) sdp->data.ptrvalue;
5069 rbp = AddPub (awp, &(awp->pubhead), pdp);
5070 if (rbp != NULL) {
5071
5072 rbp->entityID = dcontext.entityID;
5073 rbp->itemID = dcontext.itemID;
5074 rbp->itemtype = OBJ_SEQDESC;
5075
5076 irp = (IntRefBlockPtr) rbp;
5077 irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
5078 irp->left = 0;
5079 irp->right = target->length - 1;
5080 alp = GetAuthListPtr (pdp, NULL);
5081 if (alp != NULL) {
5082 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5083 }
5084 irp->index = 0;
5085 }
5086 }
5087 sdp = SeqMgrGetNextDescriptor (target, sdp, Seq_descr_pub, &dcontext);
5088 }
5089
5090 /* if protein with no pubs, get pubs applicable to DNA location of CDS */
5091
5092 if (cdsloc != NULL) {
5093 cp.awp = awp;
5094 cp.target = target;
5095 cp.vnp = &vn;
5096 SeqMgrGetAllOverlappingFeatures (cdsloc, FEATDEF_PUB, NULL, 0, LOCATION_SUBSET, (Pointer) &cp, GetRefsOnCDS);
5097 }
5098
5099 /* also get publications from AnnotDesc on SeqAnnot */
5100
5101 adp = SeqMgrGetNextAnnotDesc (target, NULL, Annot_descr_pub, &acontext);
5102 while (adp != NULL) {
5103
5104 okay = TRUE;
5105
5106 if (okay) {
5107 pdp = (PubdescPtr) adp->data.ptrvalue;
5108 rbp = AddPub (awp, &(awp->pubhead), pdp);
5109 if (rbp != NULL) {
5110
5111 rbp->entityID = acontext.entityID;
5112 rbp->itemID = acontext.itemID;
5113 rbp->itemtype = OBJ_ANNOTDESC;
5114
5115 irp = (IntRefBlockPtr) rbp;
5116 irp->loc = SeqLocMerge (target, &vn, NULL, FALSE, TRUE, FALSE);
5117 irp->left = 0;
5118 irp->right = target->length - 1;
5119 alp = GetAuthListPtr (pdp, NULL);
5120 if (alp != NULL) {
5121 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5122 }
5123 irp->index = 0;
5124 }
5125 }
5126 adp = SeqMgrGetNextAnnotDesc (target, adp, Annot_descr_pub, &acontext);
5127 }
5128
5129 SeqIdFree (sint.id);
5130
5131 /* features are indexed on parent if segmented */
5132
5133 bsp = awp->parent;
5134
5135 sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_PUB, 0, &fcontext);
5136 while (sfp != NULL) {
5137 ivals = fcontext.ivals;
5138 numivals = fcontext.numivals;
5139 if (ivals != NULL && numivals > 0) {
5140
5141 /*
5142 idx = (numivals - 1) * 2;
5143 start = ivals [idx];
5144 stop = ivals [idx + 1];
5145 */
5146
5147 takeIt = FALSE;
5148 for (i = 0, idx = 0; i < numivals; i++, idx += 2) {
5149 start = ivals [idx];
5150 stop = ivals [idx + 1];
5151 if ((start <= from && stop > from) ||
5152 (start < to && stop >= to) ||
5153 (start >= from && stop <= to)) {
5154 takeIt = TRUE;
5155 }
5156 }
5157 if (awp->format == FTABLE_FMT) {
5158 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5159 if (HasNoPmidOrMuid (pdp)) {
5160 takeIt = FALSE;
5161 }
5162 }
5163
5164 if (takeIt /* stop >= from && stop <= to */) {
5165
5166 /*
5167 start = ivals [0] + 1;
5168 stop = ivals [idx + 1] + 1;
5169 */
5170 pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5171 rbp = AddPub (awp, &(awp->pubhead), pdp);
5172 if (rbp != NULL) {
5173
5174 rbp->entityID = fcontext.entityID;
5175 rbp->itemID = fcontext.itemID;
5176 rbp->itemtype = OBJ_SEQFEAT;
5177
5178 irp = (IntRefBlockPtr) rbp;
5179 irp->loc = SeqLocMerge (target, sfp->location, NULL, FALSE, TRUE, FALSE);
5180 irp->left = fcontext.left;
5181 irp->right = fcontext.right;
5182 if (ajp->ajp.slp != NULL) {
5183 sip = SeqIdParse ("lcl|dummy");
5184 left = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_LEFT_END);
5185 right = GetOffsetInBioseq (ajp->ajp.slp, bsp, SEQLOC_RIGHT_END);
5186 strand = SeqLocStrand (ajp->ajp.slp);
5187 split = FALSE;
5188 newloc = SeqLocReMapEx (sip, ajp->ajp.slp, irp->loc, 0, FALSE, ajp->masterStyle);
5189 /*
5190 newloc = SeqLocCopyRegion (sip, irp->loc, bsp, left, right, strand, &split);
5191 */
5192 SeqIdFree (sip);
5193 if (newloc != NULL) {
5194 A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
5195 irp->loc = SeqLocFree (irp->loc);
5196 irp->loc = newloc;
5197 }
5198 }
5199 alp = GetAuthListPtr (pdp, NULL);
5200 if (alp != NULL) {
5201 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5202 }
5203 irp->index = fcontext.index;
5204 }
5205 }
5206 }
5207
5208 sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_PUB, 0, &fcontext);
5209 }
5210 }
5211
5212 static Boolean LIBCALLBACK GetRefsOnSeg (
5213 SeqLocPtr slp,
5214 SeqMgrSegmentContextPtr context
5215 )
5216
5217 {
5218 Asn2gbWorkPtr awp;
5219 BioseqPtr bsp;
5220 Int4 from;
5221 SeqLocPtr loc;
5222 SeqEntryPtr oldscope;
5223 SeqEntryPtr sep;
5224 SeqIdPtr sip;
5225 Int4 to;
5226
5227 if (slp == NULL || context == NULL) return FALSE;
5228 awp = (Asn2gbWorkPtr) context->userdata;
5229
5230 from = context->cumOffset;
5231 to = from + context->to - context->from;
5232
5233 sip = SeqLocId (slp);
5234 if (sip == NULL) {
5235 loc = SeqLocFindNext (slp, NULL);
5236 if (loc != NULL) {
5237 sip = SeqLocId (loc);
5238 }
5239 }
5240 if (sip == NULL) return TRUE;
5241
5242 /* reference descriptors only on parts within entity */
5243
5244 sep = GetTopSeqEntryForEntityID (awp->entityID);
5245 oldscope = SeqEntrySetScope (sep);
5246 bsp = BioseqFind (sip);
5247 SeqEntrySetScope (oldscope);
5248
5249 if (bsp != NULL) {
5250 GetRefsOnBioseq (awp, awp->refs, bsp, from, to, NULL);
5251 return TRUE;
5252 }
5253
5254 /* if we ever want to fetch remote references, code goes here */
5255
5256 return TRUE;
5257 }
5258
5259 NLM_EXTERN Boolean AddReferenceBlock (
5260 Asn2gbWorkPtr awp,
5261 Boolean isRefSeq
5262 )
5263
5264 {
5265 IntAsn2gbJobPtr ajp;
5266 AuthListPtr alp;
5267 Asn2gbSectPtr asp;
5268 BioseqPtr bsp;
5269 SeqFeatPtr cds;
5270 Boolean combine;
5271 SeqMgrFeatContext context;
5272 CitSubPtr csp;
5273 BioseqPtr dna;
5274 Boolean excise;
5275 Int2 firstserial;
5276 ValNodePtr head = NULL;
5277 Int2 i = 0;
5278 IntRefBlockPtr irp;
5279 Boolean is_aa;
5280 Boolean is_ddbj = FALSE;
5281 Boolean is_embl = FALSE;
5282 Boolean is_patent = FALSE;
5283 Int2 j;
5284 IntRefBlockPtr lastirp;
5285 RefBlockPtr lastrbp;
5286 ValNodePtr next;
5287 Int2 numReferences;
5288 ValNodePtr PNTR prev;
5289 RefBlockPtr rbp;
5290 RefBlockPtr PNTR referenceArray;
5291 BioseqPtr refs;
5292 SubmitBlockPtr sbp;
5293 SeqIdPtr sip;
5294 SeqLocPtr slp;
5295 BioseqPtr target;
5296 ValNodePtr vnp;
5297
5298 if (awp == NULL) return FALSE;
5299 ajp = awp->ajp;
5300 if (ajp == NULL) return FALSE;
5301 asp = awp->asp;
5302 if (asp == NULL) return FALSE;
5303 bsp = awp->bsp;
5304 refs = awp->refs;
5305 if (bsp == NULL || refs == NULL) return FALSE;
5306
5307 /* collect publications on bioseq */
5308
5309 awp->pubhead = NULL;
5310 GetRefsOnBioseq (awp, bsp, refs, awp->from, awp->to, NULL);
5311 target = bsp;
5312
5313 for (sip = bsp->id; sip != NULL; sip = sip->next) {
5314 if (sip->choice == SEQID_EMBL) {
5315 is_embl = TRUE;
5316 } else if (sip->choice == SEQID_DDBJ) {
5317 is_ddbj = TRUE;
5318 } else if (sip->choice == SEQID_PATENT) {
5319 is_patent = TRUE;
5320 }
5321 }
5322
5323 is_aa = (Boolean) ISA_aa (bsp->mol);
5324
5325 if (bsp->repr == Seq_repr_seg) {
5326
5327 /* collect publication descriptors on local parts */
5328
5329 SeqMgrExploreSegments (bsp, (Pointer) awp, GetRefsOnSeg);
5330 target = awp->refs;
5331 }
5332
5333 if (awp->pubhead == NULL && ISA_aa (bsp->mol)) {
5334
5335 /* if protein with no pubs, get pubs applicable to DNA location of CDS */
5336
5337 cds = SeqMgrGetCDSgivenProduct (bsp, &context);
5338 if (cds != NULL) {
5339 dna = BioseqFindFromSeqLoc (cds->location);
5340 if (dna != NULL) {
5341 GetRefsOnBioseq (awp, dna, dna, context.left, context.right, cds->location);
5342 target = dna;
5343 }
5344 }
5345 }
5346
5347 head = awp->pubhead;
5348 awp->pubhead = NULL;
5349
5350 if (head == NULL && awp->ssp == NULL) return FALSE;
5351
5352 /* sort by pub/unpub/sites/sub, then date, finally existing serial */
5353
5354 if (isRefSeq) {
5355 head = SortValNode (head, SortReferencesAR);
5356 } else {
5357 head = SortValNode (head, SortReferencesA);
5358 }
5359
5360 if (awp->ssp != NULL && (! awp->onlyGeneRIFs) && (! awp->onlyReviewPubs)) {
5361
5362 /* add seq-submit citation */
5363
5364 rbp = (RefBlockPtr) MemNew (sizeof (IntRefBlock));
5365 if (rbp != NULL) {
5366 irp = (IntRefBlockPtr) rbp;
5367
5368 rbp->blocktype = REFERENCE_BLOCK;
5369 rbp->section = awp->currsection;
5370 rbp->serial = INT2_MAX;
5371 rbp->category = REF_CAT_SUB;
5372
5373 rbp->entityID = ajp->ajp.entityID;
5374 rbp->itemID = 1;
5375 rbp->itemtype = OBJ_SEQSUB_CIT;
5376
5377 sbp = awp->ssp->sub;
5378 if (sbp != NULL) {
5379 csp = sbp->cit;
5380 if (csp != NULL) {
5381 alp = GetAuthListPtr (NULL, csp);
5382 if (alp != NULL) {
5383 irp->authstr = GetAuthorsPlusConsortium (awp->format, alp);
5384 }
5385 if (csp->date != NULL) {
5386 irp->date = DateDup (csp->date);
5387 }
5388 }
5389 }
5390
5391 if (awp->citSubsFirst) {
5392
5393 /* for DDBJ, add seq-submit citation to beginning of list */
5394
5395 vnp = ValNodeNew (NULL);
5396 if (vnp != NULL) {
5397 vnp->choice = 0;
5398 vnp->data.ptrvalue = (VoidPtr) rbp;
5399 vnp->next = head;
5400 head = vnp;
5401 }
5402
5403 } else {
5404
5405 /* for GENBANK and EMBL add seq-submit citation to end of list */
5406
5407 ValNodeAddPointer (&head, 0, rbp);
5408 }
5409 }
5410 }
5411
5412 /* unique references, excise duplicates from list */
5413
5414 prev = &(head);
5415 vnp = head;
5416 lastrbp = NULL;
5417 while (vnp != NULL) {
5418 excise = FALSE;
5419 combine = TRUE;
5420 next = vnp->next;
5421 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5422 if (lastrbp != NULL) {
5423 lastirp = (IntRefBlockPtr) lastrbp;
5424 if (rbp != NULL) {
5425 irp = (IntRefBlockPtr) rbp;
5426 if (lastrbp->pmid != 0 && rbp->pmid != 0) {
5427 if (lastrbp->pmid == rbp->pmid) {
5428 if (lastirp->right + 1 >= irp->left) {
5429 excise = TRUE;
5430 }
5431 }
5432 } else if (lastrbp->muid != 0 && rbp->muid != 0) {
5433 if (lastrbp->muid == rbp->muid) {
5434 if (lastirp->right + 1 >= irp->left) {
5435 excise = TRUE;
5436 }
5437 }
5438 } else if (lastrbp->uniquestr != NULL && rbp->uniquestr != NULL) {
5439 if (StringICmp (lastrbp->uniquestr, rbp->uniquestr) == 0) {
5440 if (SeqLocCompare (irp->loc, lastirp->loc) == SLC_A_EQ_B) {
5441 if (StringICmp (irp->authstr, lastirp->authstr) == 0) {
5442
5443 /* L76496.1 - removing duplicate submission pubs */
5444 if (lastirp->right + 1 >= irp->left) {
5445 excise = TRUE;
5446 }
5447 }
5448 }
5449 }
5450 }
5451 if (excise && lastrbp->sites == 0 && rbp->sites > 0) {
5452 /* real range trumps sites */
5453 combine = FALSE;
5454 }
5455 }
5456 }
5457 if (rbp != NULL) {
5458 irp = (IntRefBlockPtr) rbp;
5459 if (irp->justuids) {
5460 if (isRefSeq && is_aa) {
5461 /* if allowing justuid in protein RefSeq, try to look up dynamically */
5462 excise = TRUE; /* Back to old behavior, do not fetch */
5463 } else {
5464 /* do not allow justuids reference to appear by itself - S79174.1 */
5465 excise = TRUE;
5466 /* justuids should still combine, even if no authors - S67070.1 */
5467 }
5468 } else if (is_embl && is_patent) {
5469 /* EMBL patent records do not need author or title - A29528.1 */
5470 } else if (StringHasNoText (irp->authstr)) {
5471 /* do not allow no author reference to appear by itself - U07000.1 */
5472 excise = TRUE;
5473 combine = FALSE;
5474 } else if (isRefSeq && is_aa && rbp->category == REF_CAT_SUB) {
5475 /* GenPept RefSeq suppresses cit-subs */
5476 excise = TRUE;
5477 combine = FALSE;
5478 }
5479 }
5480 if (awp->mode == DUMP_MODE) {
5481 excise = FALSE;
5482 }
5483 /* do not hide duplicate EMBL and DDBJ publications */
5484 if (is_embl || is_ddbj) {
5485 excise = FALSE;
5486 combine = TRUE;
5487 }
5488 /* does not fuse equivalent publication features for local, general, refseq, and 2+6 genbank ids */
5489 if (excise && awp->sourcePubFuse) {
5490 *prev = vnp->next;
5491 vnp->next = NULL;
5492
5493 /* combine locations of duplicate references */
5494
5495 irp = (IntRefBlockPtr) rbp;
5496 lastirp = (IntRefBlockPtr) lastrbp;
5497 if (combine) {
5498 if (lastirp != NULL) {
5499 slp = SeqLocMerge (target, lastirp->loc, irp->loc, FALSE, TRUE, FALSE);
5500 lastirp->loc = SeqLocFree (lastirp->loc);
5501 lastirp->loc = slp;
5502 }
5503 if (irp != NULL && lastirp != NULL) {
5504 if ((rbp->muid == lastrbp->muid && rbp->muid != 0) ||
5505 (rbp->pmid == lastrbp->pmid && rbp->pmid != 0)) {
5506 if (lastirp->fig == NULL) {
5507 lastirp->fig = StringSaveNoNull (irp->fig);
5508 }
5509 if (lastirp->maploc == NULL) {
5510 lastirp->maploc = StringSaveNoNull (irp->maploc);
5511 }
5512 lastirp->poly_a = irp->poly_a;
5513 }
5514 }
5515 }
5516
5517 /* and remove duplicate reference */
5518
5519 MemFree (rbp->uniquestr);
5520 DateFree (irp->date);
5521 SeqLocFree (irp->loc);
5522 MemFree (irp->authstr);
5523 MemFree (irp->fig);
5524 MemFree (irp->maploc);
5525 MemFree (rbp);
5526 ValNodeFree (vnp);
5527
5528 } else {
5529
5530 prev = &(vnp->next);
5531 lastrbp = rbp;
5532 }
5533 vnp = next;
5534 }
5535
5536 /* resort by existing serial, then pub/unpub/sites/sub, then date */
5537
5538 if (isRefSeq) {
5539 head = SortValNode (head, SortReferencesBR);
5540 } else {
5541 head = SortValNode (head, SortReferencesB);
5542 }
5543
5544 if (head == NULL) return FALSE;
5545
5546 /* if taking newest publications, free remainder */
5547
5548 if (awp->newestPubs) {
5549 for (vnp = head, i = 1; vnp != NULL && i < 5; vnp = vnp->next, i++) continue;
5550 if (vnp != NULL) {
5551 next = vnp->next;
5552 vnp->next = NULL;
5553 for (vnp = next; vnp != NULL; vnp = vnp->next) {
5554 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5555 MemFree (rbp->uniquestr);
5556 irp = (IntRefBlockPtr) rbp;
5557 DateFree (irp->date);
5558 SeqLocFree (irp->loc);
5559 MemFree (irp->authstr);
5560 MemFree (irp->fig);
5561 MemFree (irp->maploc);
5562 MemFree (rbp);
5563 }
5564 }
5565
5566 /* if taking oldest publications, free remainder */
5567
5568 } else if (awp->oldestPubs) {
5569 for (vnp = head, j = 0; vnp != NULL; vnp = vnp->next, j++) continue;
5570 if (j > 5) {
5571 for (vnp = head, i = 0; vnp != NULL && i < j - 6; vnp = vnp->next, i++) continue;
5572 if (vnp != NULL) {
5573 next = vnp->next;
5574 vnp->next = NULL;
5575 for (vnp = head; vnp != NULL; vnp = vnp->next) {
5576 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5577 MemFree (rbp->uniquestr);
5578 irp = (IntRefBlockPtr) rbp;
5579 DateFree (irp->date);
5580 SeqLocFree (irp->loc);
5581 MemFree (irp->authstr);
5582 MemFree (irp->fig);
5583 MemFree (irp->maploc);
5584 MemFree (rbp);
5585 }
5586 head = next;
5587 }
5588 }
5589 }
5590
5591 /* assign serial numbers */
5592
5593 firstserial = 1;
5594
5595 /* first find highest one assigned by EMBL/SWISS-PROT */
5596
5597 for (vnp = head; vnp != NULL; vnp = vnp->next) {
5598 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5599 if (rbp == NULL) continue;
5600 if (rbp->serial > 0 && rbp->serial < INT2_MAX) {
5601 firstserial = rbp->serial + 1;
5602 }
5603 }
5604
5605 /* then give increasing serial numbers to unassigned publications */
5606
5607 for (vnp = head; vnp != NULL; vnp = vnp->next) {
5608 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5609 if (rbp == NULL) continue;
5610 if (rbp->serial > 0 && rbp->serial < INT2_MAX) continue;
5611 rbp->serial = firstserial;
5612 firstserial++;
5613 }
5614
5615 /* allocate reference array for this section */
5616
5617 numReferences = ValNodeLen (head);
5618 asp->numReferences = numReferences;
5619
5620 if (numReferences > 0) {
5621 referenceArray = (RefBlockPtr PNTR) MemNew (sizeof (RefBlockPtr) * (numReferences + 1));
5622 asp->referenceArray = referenceArray;
5623
5624 if (referenceArray != NULL) {
5625
5626 /* fill in reference array */
5627
5628 for (vnp = head, i = 0; vnp != NULL && i < numReferences; vnp = vnp->next, i++) {
5629 referenceArray [i] = (RefBlockPtr) vnp->data.ptrvalue;
5630 }
5631 }
5632 }
5633
5634 /* finally link into blocks for current section */
5635
5636 ValNodeLink (&(awp->lastblock), head);
5637 vnp = awp->lastblock;
5638 if (vnp == NULL) return FALSE;
5639 while (vnp->next != NULL) {
5640 vnp = vnp->next;
5641 }
5642
5643 awp->lastblock = vnp;
5644 if (awp->blockList == NULL) {
5645 awp->blockList = vnp;
5646 }
5647
5648 if (awp->afp != NULL) {
5649 for (vnp = head; vnp != NULL; vnp = vnp->next) {
5650 rbp = (RefBlockPtr) vnp->data.ptrvalue;
5651 if (rbp == NULL) continue;
5652 DoImmediateFormat (awp->afp, (BaseBlockPtr) rbp);
5653 }
5654 }
5655
5656 return TRUE;
5657 }
5658
5659 NLM_EXTERN void AddRefStatsBlock (
5660 Asn2gbWorkPtr awp
5661 )
5662
5663 {
5664 IntAsn2gbJobPtr ajp;
5665 BaseBlockPtr bbp;
5666 BioseqPtr bsp;
5667 StringItemPtr ffstring;
5668
5669 if (awp == NULL) return;
5670 ajp = awp->ajp;
5671 if ( ajp == NULL ) return;
5672 bsp = awp->bsp;
5673 if (bsp == NULL) return;
5674
5675 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5676
5677 bbp = Asn2gbAddBlock (awp, REF_STATS_BLOCK, sizeof (BaseBlock));
5678 if (bbp != NULL) {
5679 ffstring = FFGetString (ajp);
5680 if (ffstring != NULL) {
5681 FFStartPrint (ffstring, awp->format, 0, 12, "REFSTATS", 12, 0, 0, NULL, FALSE);
5682
5683 FFAddOneString (ffstring, "placeholder", FALSE, FALSE, TILDE_TO_SPACES);
5684
5685 bbp->string = FFEndPrint (ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
5686 FFRecycleString(ajp, ffstring);
5687 }
5688
5689 if (awp->afp != NULL) {
5690 DoImmediateFormat (awp->afp, bbp);
5691 }
5692 }
5693 }
5694
5695 NLM_EXTERN void AddWGSBlock (
5696 Asn2gbWorkPtr awp
5697 )
5698
5699 {
5700 IntAsn2gbJobPtr ajp;
5701 BaseBlockPtr bbp;
5702 BioseqPtr bsp;
5703 Char buf [80];
5704 SeqMgrDescContext dcontext;
5705 CharPtr first;
5706 CharPtr last;
5707 ObjectIdPtr oip;
5708 SeqDescrPtr sdp;
5709 UserFieldPtr ufp;
5710 UserObjectPtr uop;
5711 Int2 wgstype;
5712 StringItemPtr ffstring;
5713
5714 if (awp == NULL) return;
5715 ajp = awp->ajp;
5716 if ( ajp == NULL ) return;
5717 bsp = awp->bsp;
5718 if (bsp == NULL) return;
5719
5720 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5721
5722 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
5723 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
5724 sprintf (buf, "<a name=\"wgs_%ld\"></a>", (long) awp->currGi);
5725 DoQuickLinkFormat (awp->afp, buf);
5726 }
5727
5728 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
5729 while (sdp != NULL) {
5730 uop = (UserObjectPtr) sdp->data.ptrvalue;
5731 if (uop != NULL) {
5732 oip = uop->type;
5733 first = NULL;
5734 last = NULL;
5735 wgstype = 0;
5736 if (oip != NULL) {
5737 if (StringICmp (oip->str, "WGSProjects") == 0) {
5738 wgstype = 1;
5739 } else if (StringICmp (oip->str, "WGS-Scaffold-List") == 0) {
5740 wgstype = 2;
5741 } else if (StringICmp (oip->str, "WGS-Contig-List") == 0) {
5742 wgstype = 3;
5743 }
5744 if (wgstype != 0) {
5745 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5746 oip = ufp->label;
5747 if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
5748 if (StringICmp (oip->str, "WGS_accession_first") == 0) {
5749 first = (CharPtr) ufp->data.ptrvalue;
5750 } else if (StringICmp (oip->str, "WGS_accession_last") == 0) {
5751 last = (CharPtr) ufp->data.ptrvalue;
5752 } else if (StringICmp (oip->str, "Accession_first") == 0) {
5753 first = (CharPtr) ufp->data.ptrvalue;
5754 } else if (StringICmp (oip->str, "Accession_last") == 0) {
5755 last = (CharPtr) ufp->data.ptrvalue;
5756 }
5757 }
5758 if (first != NULL && last != NULL) {
5759 bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
5760 if (bbp != NULL) {
5761 ffstring = FFGetString (ajp);
5762 if (ffstring != NULL) {
5763 if (wgstype == 1) {
5764 FFStartPrint (ffstring, awp->format, 0, 12, "WGS", 12, 0, 0, NULL, FALSE);
5765 } else if (wgstype == 2) {
5766 FFStartPrint (ffstring, awp->format, 0, 12, "WGS_SCAFLD", 12, 0, 0, NULL, FALSE);
5767 } else if (wgstype == 3) {
5768 FFStartPrint (ffstring, awp->format, 0, 12, "WGS_CONTIG", 12, 0, 0, NULL, FALSE);
5769 }
5770
5771 if ( GetWWW(ajp) ) {
5772 if (StringCmp (first, last) != 0) {
5773 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5774 FF_Add_NCBI_Base_URL (ffstring, link_wgs);
5775 FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", first, NULL, FALSE, FALSE, TILDE_IGNORE);
5776 FFAddTextToString(ffstring, ":", last, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
5777 sprintf (buf, "%s-%s", first, last);
5778 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5779 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5780 } else {
5781 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5782 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
5783 FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
5784 sprintf (buf, "%s", first);
5785 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5786 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5787 }
5788 } else {
5789 if (StringCmp (first, last) != 0) {
5790 sprintf (buf, "%s-%s", first, last);
5791 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5792 } else {
5793 sprintf (buf, "%s", first);
5794 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5795 }
5796 }
5797
5798 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
5799 FFRecycleString(ajp, ffstring);
5800 }
5801
5802 bbp->entityID = dcontext.entityID;
5803 bbp->itemtype = OBJ_SEQDESC;
5804 bbp->itemID = dcontext.itemID;
5805 if (awp->afp != NULL) {
5806 DoImmediateFormat (awp->afp, bbp);
5807 }
5808 }
5809 }
5810 }
5811 }
5812 }
5813 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
5814 }
5815 }
5816
5817 NLM_EXTERN void AddCAGEBlock (
5818 Asn2gbWorkPtr awp
5819 )
5820
5821 {
5822 IntAsn2gbJobPtr ajp;
5823 BaseBlockPtr bbp;
5824 BioseqPtr bsp;
5825 Char buf [80];
5826 Int2 cagetype;
5827 SeqMgrDescContext dcontext;
5828 CharPtr first;
5829 CharPtr last;
5830 ObjectIdPtr oip;
5831 SeqDescrPtr sdp;
5832 UserFieldPtr ufp;
5833 UserObjectPtr uop;
5834 StringItemPtr ffstring;
5835
5836 if (awp == NULL) return;
5837 ajp = awp->ajp;
5838 if ( ajp == NULL ) return;
5839 bsp = awp->bsp;
5840 if (bsp == NULL) return;
5841
5842 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5843
5844 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
5845 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
5846 sprintf (buf, "<a name=\"wgs_%ld\"></a>", (long) awp->currGi);
5847 DoQuickLinkFormat (awp->afp, buf);
5848 }
5849
5850 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
5851 while (sdp != NULL) {
5852 uop = (UserObjectPtr) sdp->data.ptrvalue;
5853 if (uop != NULL) {
5854 oip = uop->type;
5855 first = NULL;
5856 last = NULL;
5857 cagetype = 0;
5858 if (oip != NULL) {
5859 if (StringICmp (oip->str, "CAGE-Tag-List") == 0) {
5860 cagetype = 1;
5861 }
5862 if (cagetype != 0) {
5863 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5864 oip = ufp->label;
5865 if (oip == NULL || oip->str == NULL || ufp->choice != 1) continue;
5866 if (StringICmp (oip->str, "CAGE_accession_first") == 0) {
5867 first = (CharPtr) ufp->data.ptrvalue;
5868 } else if (StringICmp (oip->str, "CAGE_accession_last") == 0) {
5869 last = (CharPtr) ufp->data.ptrvalue;
5870 }
5871 }
5872 if (first != NULL && last != NULL) {
5873 bbp = Asn2gbAddBlock (awp, WGS_BLOCK, sizeof (BaseBlock));
5874 if (bbp != NULL) {
5875 ffstring = FFGetString (ajp);
5876 if (ffstring != NULL) {
5877 if (cagetype == 1) {
5878 FFStartPrint (ffstring, awp->format, 0, 12, "TAG", 12, 0, 0, NULL, FALSE);
5879 }
5880
5881 if ( GetWWW(ajp) ) {
5882 if (StringCmp (first, last) != 0) {
5883 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5884 FF_Add_NCBI_Base_URL (ffstring, link_wgs);
5885 FFAddTextToString(ffstring, "db=Nucleotide&cmd=Search&term=", first, NULL, FALSE, FALSE, TILDE_IGNORE);
5886 FFAddTextToString(ffstring, ":", last, "[PACC]\">", FALSE, FALSE, TILDE_IGNORE);
5887 sprintf (buf, "%s-%s", first, last);
5888 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5889 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5890 } else {
5891 FFAddOneString (ffstring, "<a href=\"", FALSE, FALSE, TILDE_IGNORE);
5892 FF_Add_NCBI_Base_URL (ffstring, link_seqn);
5893 FFAddTextToString(ffstring, /* "val=" */ NULL, first, "\">", FALSE, FALSE, TILDE_IGNORE);
5894 sprintf (buf, "%s", first);
5895 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5896 FFAddOneString (ffstring, "</a>", FALSE, FALSE, TILDE_TO_SPACES);
5897 }
5898 } else {
5899 if (StringCmp (first, last) != 0) {
5900 sprintf (buf, "%s-%s", first, last);
5901 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5902 } else {
5903 sprintf (buf, "%s", first);
5904 FFAddOneString (ffstring, buf, FALSE, FALSE, TILDE_TO_SPACES);
5905 }
5906 }
5907
5908 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
5909 FFRecycleString(ajp, ffstring);
5910 }
5911
5912 bbp->entityID = dcontext.entityID;
5913 bbp->itemtype = OBJ_SEQDESC;
5914 bbp->itemID = dcontext.itemID;
5915 if (awp->afp != NULL) {
5916 DoImmediateFormat (awp->afp, bbp);
5917 }
5918 }
5919 }
5920 }
5921 }
5922 }
5923 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
5924 }
5925 }
5926
5927 NLM_EXTERN void AddGenomeBlock (
5928 Asn2gbWorkPtr awp
5929 )
5930
5931 {
5932 CharPtr accn;
5933 IntAsn2gbJobPtr ajp;
5934 BaseBlockPtr bbp;
5935 BioseqPtr bsp;
5936 Char buf [128];
5937 SeqMgrDescContext dcontext;
5938 Boolean first = TRUE;
5939 CharPtr moltype;
5940 ObjectIdPtr oip;
5941 SeqDescrPtr sdp;
5942 UserFieldPtr ufp;
5943 UserObjectPtr uop;
5944 UserFieldPtr urf;
5945 StringItemPtr ffstring;
5946
5947 if (awp == NULL) return;
5948 ajp = awp->ajp;
5949 if ( ajp == NULL ) return;
5950 bsp = awp->bsp;
5951 if (bsp == NULL) return;
5952
5953 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
5954
5955 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
5956 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
5957 sprintf (buf, "<a name=\"genome_%ld\"></a>", (long) awp->currGi);
5958 DoQuickLinkFormat (awp->afp, buf);
5959 }
5960
5961 bbp = Asn2gbAddBlock (awp, GENOME_BLOCK, sizeof (BaseBlock));
5962 if (bbp == NULL) return;
5963
5964 ffstring = FFGetString(ajp);
5965 if ( ffstring == NULL ) return;
5966
5967 FFStartPrint (ffstring, awp->format, 0, 12, "GENOME", 12, 0, 0, NULL, FALSE);
5968
5969 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
5970 while (sdp != NULL) {
5971 uop = (UserObjectPtr) sdp->data.ptrvalue;
5972 if (uop != NULL) {
5973 oip = uop->type;
5974 if (oip != NULL && StringICmp (oip->str, "GenomeProject") == 0) {
5975 for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
5976 oip = ufp->label;
5977 if (oip == NULL || oip->str == NULL || ufp->choice != 11) continue;
5978 if (StringICmp (oip->str, "Chromosome") != 0) continue;
5979 accn = NULL;
5980 moltype = NULL;
5981 for (urf = (UserFieldPtr) ufp->data.ptrvalue; urf != NULL; urf = urf->next) {
5982 oip = urf->label;
5983 if (oip == NULL || oip->str == NULL || urf->choice != 1) continue;
5984 if (StringICmp (oip->str, "accession") == 0) {
5985 accn = (CharPtr) urf->data.ptrvalue;
5986 } else if (StringICmp (oip->str, "Moltype") == 0) {
5987 moltype = (CharPtr) urf->data.ptrvalue;
5988 }
5989 }
5990 if (! StringHasNoText (accn)) {
5991 if (! first) {
5992 FFAddNewLine(ffstring);
5993 }
5994 first = FALSE;
5995 FFAddOneString (ffstring, accn, FALSE, FALSE, TILDE_IGNORE);
5996 if (! StringHasNoText (moltype)) {
5997 FFAddTextToString (ffstring, " (", moltype, ")", FALSE, FALSE, TILDE_TO_SPACES);
5998 }
5999 }
6000 }
6001 }
6002 }
6003 sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
6004 }
6005
6006 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 12, 12, 0, 0, NULL);
6007 FFRecycleString(ajp, ffstring);
6008
6009 if (awp->afp != NULL) {
6010 DoImmediateFormat (awp->afp, bbp);
6011 }
6012 }
6013
6014 NLM_EXTERN void AddBasecountBlock (
6015 Asn2gbWorkPtr awp
6016 )
6017
6018 {
6019 IntAsn2gbJobPtr ajp;
6020 BaseBlockPtr bbp;
6021 BioseqPtr bsp;
6022
6023 if (awp == NULL) return;
6024 ajp = awp->ajp;
6025 if (ajp == NULL) return;
6026 bsp = awp->bsp;
6027 if (bsp == NULL) return;
6028
6029 bbp = Asn2gbAddBlock (awp, BASECOUNT_BLOCK, sizeof (BaseBlock));
6030 if (bbp == NULL) return;
6031
6032 bbp->entityID = awp->entityID;
6033 bbp->itemtype = bsp->idx.itemtype;
6034 bbp->itemID = bsp->idx.itemID;
6035
6036 if (awp->afp != NULL) {
6037 DoImmediateFormat (awp->afp, bbp);
6038 }
6039 }
6040
6041 NLM_EXTERN void AddOriginBlock (
6042 Asn2gbWorkPtr awp
6043 )
6044
6045 {
6046 IntAsn2gbJobPtr ajp;
6047 BaseBlockPtr bbp;
6048 BioseqPtr bsp;
6049 Char buf [67];
6050 SeqMgrDescContext dcontext;
6051 GBBlockPtr gbp;
6052 SeqDescrPtr sdp;
6053 StringItemPtr ffstring;
6054
6055 if (awp == NULL) return;
6056 ajp = awp->ajp;
6057 if (ajp == NULL) return;
6058 bsp = awp->bsp;
6059 if (bsp == NULL) return;
6060
6061 if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) return;
6062
6063 ffstring = FFGetString(ajp);
6064 if ( ffstring == NULL ) return;
6065
6066 bbp = Asn2gbAddBlock (awp, ORIGIN_BLOCK, sizeof (BaseBlock));
6067 if (bbp == NULL) return;
6068
6069 bbp->entityID = awp->entityID;
6070
6071 if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
6072
6073 buf [0] = '\0';
6074
6075 sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_genbank, &dcontext);
6076 if (sdp != NULL) {
6077 gbp = (GBBlockPtr) sdp->data.ptrvalue;
6078 if (gbp != NULL && (! StringHasNoText (gbp->origin))) {
6079 StringNCpy_0 (buf, gbp->origin, sizeof (buf));
6080 bbp->entityID = dcontext.entityID;
6081 bbp->itemID = dcontext.itemID;
6082 bbp->itemtype = OBJ_SEQDESC;
6083 }
6084 }
6085
6086 FFStartPrint (ffstring, awp->format, 0, 12, "ORIGIN", 12, 0, 0, NULL, FALSE);
6087
6088 if (! StringHasNoText (buf)) {
6089 FFAddOneString (ffstring, buf, TRUE, FALSE, TILDE_TO_SPACES);
6090 }
6091 }
6092
6093 bbp->string = FFEndPrint(ajp, ffstring, awp->format, 0, 12, 0, 0, NULL);
6094 FFRecycleString(ajp, ffstring);
6095
6096 if (awp->afp != NULL) {
6097 DoImmediateFormat (awp->afp, bbp);
6098 }
6099 }
6100
6101 #define BASES_PER_BLOCK 1200
6102
6103 NLM_EXTERN void AddSequenceBlock (
6104 Asn2gbWorkPtr awp
6105 )
6106
6107 {
6108 IntAsn2gbJobPtr ajp;
6109 BioseqPtr bsp;
6110 Char buf [128];
6111 Int4 extend;
6112 Int4 len;
6113 SeqBlockPtr sbp;
6114 Int4 start;
6115 Int4 stop;
6116
6117 if (awp == NULL) return;
6118 ajp = awp->ajp;
6119 if (ajp == NULL) return;
6120 bsp = awp->bsp;
6121 if (bsp == NULL) return;
6122
6123 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6124 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6125 sprintf (buf, "<a name=\"sequence_%ld\"></a>", (long) awp->currGi);
6126 DoQuickLinkFormat (awp->afp, buf);
6127 }
6128
6129 if (awp->slp != NULL) {
6130 len = SeqLocLen (awp->slp);
6131 } else {
6132 len = bsp->length;
6133 }
6134
6135 /* if generating GBSeq XML, populate single sequence block */
6136
6137 if (ajp->gbseq) {
6138 sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
6139 if (sbp == NULL) return;
6140
6141 sbp->entityID = bsp->idx.entityID;
6142 sbp->itemID = bsp->idx.itemID;
6143 sbp->itemtype = OBJ_BIOSEQ;
6144
6145 sbp->start = 0;
6146 sbp->stop = len;
6147
6148 if (awp->afp != NULL) {
6149 DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
6150 }
6151
6152 return;
6153 }
6154
6155 /* otherwise populate individual sequence blocks for given range */
6156
6157 for (start = 0; start < len; start += BASES_PER_BLOCK) {
6158 sbp = (SeqBlockPtr) Asn2gbAddBlock (awp, SEQUENCE_BLOCK, sizeof (SeqBlock));
6159 if (sbp == NULL) continue;
6160
6161 sbp->entityID = bsp->idx.entityID;
6162 sbp->itemID = bsp->idx.itemID;
6163 sbp->itemtype = OBJ_BIOSEQ;
6164
6165 stop = start + BASES_PER_BLOCK;
6166 if (stop >= len) {
6167 stop = len;
6168 }
6169 extend = start + BASES_PER_BLOCK + 60;
6170 if (extend >= len) {
6171 extend = len;
6172 }
6173
6174 sbp->start = start;
6175 sbp->stop = stop;
6176 sbp->extend = extend;
6177
6178 if (awp->afp != NULL) {
6179 DoImmediateFormat (awp->afp, (BaseBlockPtr) sbp);
6180 }
6181 }
6182 }
6183
6184 NLM_EXTERN void AddContigBlock (
6185 Asn2gbWorkPtr awp
6186 )
6187
6188 {
6189 IntAsn2gbJobPtr ajp;
6190 BaseBlockPtr bbp;
6191 Char buf [128];
6192
6193 if (awp == NULL) return;
6194 ajp = awp->ajp;
6195 if ( ajp == NULL ) return;
6196
6197 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6198 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6199 sprintf (buf, "<a name=\"contig_%ld\"></a>", (long) awp->currGi);
6200 DoQuickLinkFormat (awp->afp, buf);
6201 }
6202 bbp = Asn2gbAddBlock (awp, CONTIG_BLOCK, sizeof (BaseBlock));
6203
6204 if (awp->afp != NULL) {
6205 DoImmediateFormat (awp->afp, bbp);
6206 }
6207 }
6208
6209 NLM_EXTERN void AddSlashBlock (
6210 Asn2gbWorkPtr awp
6211 )
6212
6213 {
6214 IntAsn2gbJobPtr ajp;
6215 BaseBlockPtr bbp;
6216 Char buf [128];
6217 CharPtr str;
6218
6219 if (awp == NULL) return;
6220 ajp = awp->ajp;
6221 if (ajp == NULL) return;
6222
6223 /*
6224 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6225 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6226 sprintf (buf, "<a name=\"slash_%ld\"></a>", (long) awp->currGi);
6227 DoQuickLinkFormat (awp->afp, buf);
6228 }
6229 */
6230
6231 bbp = Asn2gbAddBlock (awp, SLASH_BLOCK, sizeof (BaseBlock));
6232 if (bbp == NULL) return;
6233
6234 bbp->entityID = awp->entityID;
6235
6236 if (GetWWW (ajp) && awp->mode == ENTREZ_MODE && awp->afp != NULL &&
6237 (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT)) {
6238 sprintf (buf, "//</pre>\n<a name=\"slash_%ld\"></a>", (long) awp->currGi);
6239 str = StringSave (buf);
6240 } else if (GetWWW (ajp)) {
6241 sprintf (buf, "//</pre>\n");
6242 str = StringSave (buf);
6243 } else {
6244 str = MemNew(sizeof(Char) * 4);
6245 StringNCpy(str, "//\n", 4);
6246 }
6247
6248 bbp->string = str;
6249
6250 if (awp->afp != NULL) {
6251 DoImmediateFormat (awp->afp, bbp);
6252 }
6253 }
6254
6255 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |