|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/asn/asn.all |
source navigation diff markup identifier search freetext search file search |
1 --$Revision: 6.0 $
2 --**********************************************************************
3 --
4 -- asn.all
5 -- this file contains all NCBI ASN.1 specifications together
6 --
7 -- by James Ostell, 1990
8 --
9 --**********************************************************************
10
11 --$Revision: 6.6 $
12 --**********************************************************************
13 --
14 -- NCBI General Data elements
15 -- by James Ostell, 1990
16 -- Version 3.0 - June 1994
17 --
18 --**********************************************************************
19
20 NCBI-General DEFINITIONS ::=
21 BEGIN
22
23 EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;
24
25 -- StringStore is really a VisibleString. It is used to define very
26 -- long strings which may need to be stored by the receiving program
27 -- in special structures, such as a ByteStore, but it's just a hint.
28 -- AsnTool stores StringStores in ByteStore structures.
29 -- OCTET STRINGs are also stored in ByteStores by AsnTool
30 --
31 -- typedef struct bsunit { /* for building multiline strings */
32 -- Nlm_Handle str; /* the string piece */
33 -- Nlm_Int2 len_avail,
34 -- len;
35 -- struct bsunit PNTR next; } /* the next one */
36 -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
37 --
38 -- typedef struct bytestore {
39 -- Nlm_Int4 seekptr, /* current position */
40 -- totlen, /* total stored data length in bytes */
41 -- chain_offset; /* offset in ByteStore of first byte in curchain */
42 -- Nlm_BSUnitPtr chain, /* chain of elements */
43 -- curchain; /* the BSUnit containing seekptr */
44 -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
45 --
46 -- AsnTool incorporates this as a primitive type, so the definition
47 -- is here just for completeness
48 --
49 -- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
50 --
51
52 -- BigInt is really an INTEGER. It is used to warn the receiving code to expect
53 -- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
54 --
55 -- Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
56 -- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
57 --
58
59 -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
60 -- of ASN.1
61 -- It stores only a date
62 --
63
64 Date ::= CHOICE {
65 str VisibleString , -- for those unparsed dates
66 std Date-std } -- use this if you can
67
68 Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct
69 year INTEGER , -- full year (including 1900)
70 month INTEGER OPTIONAL , -- month (1-12)
71 day INTEGER OPTIONAL , -- day of month (1-31)
72 season VisibleString OPTIONAL , -- for "spring", "may-june", etc
73 hour INTEGER OPTIONAL , -- hour of day (0-23)
74 minute INTEGER OPTIONAL , -- minute of hour (0-59)
75 second INTEGER OPTIONAL } -- second of minute (0-59)
76
77 -- Dbtag is generalized for tagging
78 -- eg. { "Social Security", str "023-79-8841" }
79 -- or { "member", id 8882224 }
80
81 Dbtag ::= SEQUENCE {
82 db VisibleString , -- name of database or system
83 tag Object-id } -- appropriate tag
84
85 -- Object-id can tag or name anything
86 --
87
88 Object-id ::= CHOICE {
89 id INTEGER ,
90 str VisibleString }
91
92 -- Person-id is to define a std element for people
93 --
94
95 Person-id ::= CHOICE {
96 dbtag Dbtag , -- any defined database tag
97 name Name-std , -- structured name
98 ml VisibleString , -- MEDLINE name (semi-structured)
99 -- eg. "Jones RM"
100 str VisibleString, -- unstructured name
101 consortium VisibleString } -- consortium name
102
103 Name-std ::= SEQUENCE { -- Structured names
104 last VisibleString ,
105 first VisibleString OPTIONAL ,
106 middle VisibleString OPTIONAL ,
107 full VisibleString OPTIONAL , -- full name eg. "J. John Smith, Esq"
108 initials VisibleString OPTIONAL, -- first + middle initials
109 suffix VisibleString OPTIONAL , -- Jr, Sr, III
110 title VisibleString OPTIONAL } -- Dr., Sister, etc
111
112 --**** Int-fuzz **********************************************
113 --*
114 --* uncertainties in integer values
115
116 Int-fuzz ::= CHOICE {
117 p-m INTEGER , -- plus or minus fixed amount
118 range SEQUENCE { -- max to min
119 max INTEGER ,
120 min INTEGER } ,
121 pct INTEGER , -- % plus or minus (x10) 0-1000
122 lim ENUMERATED { -- some limit value
123 unk (0) , -- unknown
124 gt (1) , -- greater than
125 lt (2) , -- less than
126 tr (3) , -- space to right of position
127 tl (4) , -- space to left of position
128 circle (5) , -- artificial break at origin of circle
129 other (255) } , -- something else
130 alt SET OF INTEGER } -- set of alternatives for the integer
131
132
133 --**** User-object **********************************************
134 --*
135 --* a general object for a user defined structured data item
136 --* used by Seq-feat and Seq-descr
137
138 User-object ::= SEQUENCE {
139 class VisibleString OPTIONAL , -- endeavor which designed this object
140 type Object-id , -- type of object within class
141 data SEQUENCE OF User-field } -- the object itself
142
143 User-field ::= SEQUENCE {
144 label Object-id , -- field label
145 num INTEGER OPTIONAL , -- required for strs, ints, reals, oss
146 data CHOICE { -- field contents
147 str UTF8String ,
148 int INTEGER ,
149 real REAL ,
150 bool BOOLEAN ,
151 os OCTET STRING ,
152 object User-object , -- for using other definitions
153 strs SEQUENCE OF UTF8String ,
154 ints SEQUENCE OF INTEGER ,
155 reals SEQUENCE OF REAL ,
156 oss SEQUENCE OF OCTET STRING ,
157 fields SEQUENCE OF User-field ,
158 objects SEQUENCE OF User-object } }
159
160
161
162 END
163
164 --$Revision: 6.3 $
165 --****************************************************************
166 --
167 -- NCBI Bibliographic data elements
168 -- by James Ostell, 1990
169 --
170 -- Taken from the American National Standard for
171 -- Bibliographic References
172 -- ANSI Z39.29-1977
173 -- Version 3.0 - June 1994
174 -- PubMedId added in 1996
175 -- ArticleIds and eprint elements added in 1999
176 --
177 --****************************************************************
178
179 NCBI-Biblio DEFINITIONS ::=
180 BEGIN
181
182 EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
183 Cit-proc, Cit-sub, Title, Author, PubMedId, DOI;
184
185 IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
186
187 -- Article Ids
188
189 ArticleId ::= CHOICE { -- can be many ids for an article
190 pubmed PubMedId , -- see types below
191 medline MedlineUID ,
192 doi DOI ,
193 pii PII ,
194 pmcid PmcID ,
195 pmcpid PmcPid ,
196 pmpid PmPid ,
197 other Dbtag } -- generic catch all
198
199 PubMedId ::= INTEGER -- Id from the PubMed database at NCBI
200 MedlineUID ::= INTEGER -- Id from MEDLINE
201 DOI ::= VisibleString -- Document Object Identifier
202 PII ::= VisibleString -- Controlled Publisher Identifier
203 PmcID ::= INTEGER -- PubMed Central Id
204 PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central
205 PmPid ::= VisibleString -- Publisher Id supplied to PubMed
206
207 ArticleIdSet ::= SET OF ArticleId
208
209 -- Status Dates
210
211 PubStatus ::= INTEGER { -- points of publication
212 received (1) , -- date manuscript received for review
213 accepted (2) , -- accepted for publication
214 epublish (3) , -- published electronically by publisher
215 ppublish (4) , -- published in print by publisher
216 revised (5) , -- article revised by publisher/author
217 pmc (6) , -- article first appeared in PubMed Central
218 pmcr (7) , -- article revision in PubMed Central
219 pubmed (8) , -- article citation first appeared in PubMed
220 pubmedr (9) , -- article citation revision in PubMed
221 aheadofprint (10), -- epublish, but will be followed by print
222 premedline (11), -- date into PreMedline status
223 medline (12), -- date made a MEDLINE record
224 other (255) }
225
226 PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added
227 pubstatus PubStatus ,
228 date Date } -- time may be added later
229
230 PubStatusDateSet ::= SET OF PubStatusDate
231
232 -- Citation Types
233
234 Cit-art ::= SEQUENCE { -- article in journal or book
235 title Title OPTIONAL , -- title of paper (ANSI requires)
236 authors Auth-list OPTIONAL , -- authors (ANSI requires)
237 from CHOICE { -- journal or book
238 journal Cit-jour ,
239 book Cit-book ,
240 proc Cit-proc } ,
241 ids ArticleIdSet OPTIONAL } -- lots of ids
242
243 Cit-jour ::= SEQUENCE { -- Journal citation
244 title Title , -- title of journal
245 imp Imprint }
246
247 Cit-book ::= SEQUENCE { -- Book citation
248 title Title , -- Title of book
249 coll Title OPTIONAL , -- part of a collection
250 authors Auth-list, -- authors
251 imp Imprint }
252
253 Cit-proc ::= SEQUENCE { -- Meeting proceedings
254 book Cit-book , -- citation to meeting
255 meet Meeting } -- time and location of meeting
256
257 -- Patent number and date-issue were made optional in 1997 to
258 -- support patent applications being issued from the USPTO
259 -- Semantically a Cit-pat must have either a patent number or
260 -- an application number (or both) to be valid
261
262 Cit-pat ::= SEQUENCE { -- patent citation
263 title VisibleString ,
264 authors Auth-list, -- author/inventor
265 country VisibleString , -- Patent Document Country
266 doc-type VisibleString , -- Patent Document Type
267 number VisibleString OPTIONAL, -- Patent Document Number
268 date-issue Date OPTIONAL, -- Patent Issue/Pub Date
269 class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code
270 app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
271 app-date Date OPTIONAL , -- Patent Appl File Date
272 applicants Auth-list OPTIONAL , -- Applicants
273 assignees Auth-list OPTIONAL , -- Assignees
274 priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
275 abstract VisibleString OPTIONAL } -- abstract of patent
276
277 Patent-priority ::= SEQUENCE {
278 country VisibleString , -- Patent country code
279 number VisibleString , -- number assigned in that country
280 date Date } -- date of application
281
282 Id-pat ::= SEQUENCE { -- just to identify a patent
283 country VisibleString , -- Patent Document Country
284 id CHOICE {
285 number VisibleString , -- Patent Document Number
286 app-number VisibleString } , -- Patent Doc Appl Number
287 doc-type VisibleString OPTIONAL } -- Patent Doc Type
288
289 Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript
290 cit Cit-book , -- same fields as a book
291 man-id VisibleString OPTIONAL , -- Manuscript identifier
292 type ENUMERATED {
293 manuscript (1) ,
294 letter (2) ,
295 thesis (3) } OPTIONAL }
296 -- NOTE: this is just to cite a
297 -- direct data submission, see NCBI-Submit
298 -- for the form of a sequence submission
299 Cit-sub ::= SEQUENCE { -- citation for a direct submission
300 authors Auth-list , -- not necessarily authors of the paper
301 imp Imprint OPTIONAL , -- this only used to get date.. will go
302 medium ENUMERATED { -- medium of submission
303 paper (1) ,
304 tape (2) ,
305 floppy (3) ,
306 email (4) ,
307 other (255) } OPTIONAL ,
308 date Date OPTIONAL , -- replaces imp, will become required
309 descr VisibleString OPTIONAL } -- description of changes for public view
310
311 Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall
312 cit VisibleString OPTIONAL , -- anything, not parsable
313 authors Auth-list OPTIONAL ,
314 muid INTEGER OPTIONAL , -- medline uid
315 journal Title OPTIONAL ,
316 volume VisibleString OPTIONAL ,
317 issue VisibleString OPTIONAL ,
318 pages VisibleString OPTIONAL ,
319 date Date OPTIONAL ,
320 serial-number INTEGER OPTIONAL , -- for GenBank style references
321 title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title"
322 pmid PubMedId OPTIONAL } -- PubMed Id
323
324
325 -- Authorship Group
326 Auth-list ::= SEQUENCE {
327 names CHOICE {
328 std SEQUENCE OF Author , -- full citations
329 ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured
330 str SEQUENCE OF VisibleString } , -- free for all
331 affil Affil OPTIONAL } -- author affiliation
332
333 Author ::= SEQUENCE {
334 name Person-id , -- Author, Primary or Secondary
335 level ENUMERATED {
336 primary (1),
337 secondary (2) } OPTIONAL ,
338 role ENUMERATED { -- Author Role Indicator
339 compiler (1),
340 editor (2),
341 patent-assignee (3),
342 translator (4) } OPTIONAL ,
343 affil Affil OPTIONAL ,
344 is-corr BOOLEAN OPTIONAL } -- TRUE if corresponding author
345
346 Affil ::= CHOICE {
347 str VisibleString , -- unparsed string
348 std SEQUENCE { -- std representation
349 affil VisibleString OPTIONAL , -- Author Affiliation, Name
350 div VisibleString OPTIONAL , -- Author Affiliation, Division
351 city VisibleString OPTIONAL , -- Author Affiliation, City
352 sub VisibleString OPTIONAL , -- Author Affiliation, County Sub
353 country VisibleString OPTIONAL , -- Author Affiliation, Country
354 street VisibleString OPTIONAL , -- street address, not ANSI
355 email VisibleString OPTIONAL ,
356 fax VisibleString OPTIONAL ,
357 phone VisibleString OPTIONAL ,
358 postal-code VisibleString OPTIONAL }}
359
360 -- Title Group
361 -- Valid for = A = Analytic (Cit-art)
362 -- J = Journals (Cit-jour)
363 -- B = Book (Cit-book)
364 -- Valid for:
365 Title ::= SET OF CHOICE {
366 name VisibleString , -- Title, Anal,Coll,Mono AJB
367 tsub VisibleString , -- Title, Subordinate A B
368 trans VisibleString , -- Title, Translated AJB
369 jta VisibleString , -- Title, Abbreviated J
370 iso-jta VisibleString , -- specifically ISO jta J
371 ml-jta VisibleString , -- specifically MEDLINE jta J
372 coden VisibleString , -- a coden J
373 issn VisibleString , -- ISSN J
374 abr VisibleString , -- Title, Abbreviated B
375 isbn VisibleString } -- ISBN B
376
377 Imprint ::= SEQUENCE { -- Imprint group
378 date Date , -- date of publication
379 volume VisibleString OPTIONAL ,
380 issue VisibleString OPTIONAL ,
381 pages VisibleString OPTIONAL ,
382 section VisibleString OPTIONAL ,
383 pub Affil OPTIONAL, -- publisher, required for book
384 cprt Date OPTIONAL, -- copyright date, " " "
385 part-sup VisibleString OPTIONAL , -- part/sup of volume
386 language VisibleString DEFAULT "ENG" , -- put here for simplicity
387 prepub ENUMERATED { -- for prepublication citations
388 submitted (1) , -- submitted, not accepted
389 in-press (2) , -- accepted, not published
390 other (255) } OPTIONAL ,
391 part-supi VisibleString OPTIONAL , -- part/sup on issue
392 retract CitRetract OPTIONAL , -- retraction info
393 pubstatus PubStatus OPTIONAL , -- current status of this publication
394 history PubStatusDateSet OPTIONAL } -- dates for this record
395
396 CitRetract ::= SEQUENCE {
397 type ENUMERATED { -- retraction of an entry
398 retracted (1) , -- this citation retracted
399 notice (2) , -- this citation is a retraction notice
400 in-error (3) , -- an erratum was published about this
401 erratum (4) } , -- this is a published erratum
402 exp VisibleString OPTIONAL } -- citation and/or explanation
403
404 Meeting ::= SEQUENCE {
405 number VisibleString ,
406 date Date ,
407 place Affil OPTIONAL }
408
409
410 END
411
412
413 --$Revision: 6.0 $
414 --**********************************************************************
415 --
416 -- MEDLINE data definitions
417 -- James Ostell, 1990
418 --
419 -- enhanced in 1996 to support PubMed records as well by simply adding
420 -- the PubMedId and making MedlineId optional
421 --
422 --**********************************************************************
423
424 NCBI-Medline DEFINITIONS ::=
425 BEGIN
426
427 EXPORTS Medline-entry, Medline-si;
428
429 IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
430 Date FROM NCBI-General;
431
432 -- a MEDLINE or PubMed entry
433 Medline-entry ::= SEQUENCE {
434 uid INTEGER OPTIONAL , -- MEDLINE UID, sometimes not yet available if from PubMed
435 em Date , -- Entry Month
436 cit Cit-art , -- article citation
437 abstract VisibleString OPTIONAL ,
438 mesh SET OF Medline-mesh OPTIONAL ,
439 substance SET OF Medline-rn OPTIONAL ,
440 xref SET OF Medline-si OPTIONAL ,
441 idnum SET OF VisibleString OPTIONAL , -- ID Number (grants, contracts)
442 gene SET OF VisibleString OPTIONAL ,
443 pmid PubMedId OPTIONAL , -- MEDLINE records may include the PubMedId
444 pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
445 mlfield SET OF Medline-field OPTIONAL , -- additional Medline field types
446 status INTEGER {
447 publisher (1) , -- record as supplied by publisher
448 premedline (2) , -- premedline record
449 medline (3) } DEFAULT medline } -- regular medline record
450
451 Medline-mesh ::= SEQUENCE {
452 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point (*)
453 term VisibleString , -- the MeSH term
454 qual SET OF Medline-qual OPTIONAL } -- qualifiers
455
456 Medline-qual ::= SEQUENCE {
457 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point
458 subh VisibleString } -- the subheading
459
460 Medline-rn ::= SEQUENCE { -- medline substance records
461 type ENUMERATED { -- type of record
462 nameonly (0) ,
463 cas (1) , -- CAS number
464 ec (2) } , -- EC number
465 cit VisibleString OPTIONAL , -- CAS or EC number if present
466 name VisibleString } -- name (always present)
467
468 Medline-si ::= SEQUENCE { -- medline cross reference records
469 type ENUMERATED { -- type of xref
470 ddbj (1) , -- DNA Data Bank of Japan
471 carbbank (2) , -- Carbohydrate Structure Database
472 embl (3) , -- EMBL Data Library
473 hdb (4) , -- Hybridoma Data Bank
474 genbank (5) , -- GenBank
475 hgml (6) , -- Human Gene Map Library
476 mim (7) , -- Mendelian Inheritance in Man
477 msd (8) , -- Microbial Strains Database
478 pdb (9) , -- Protein Data Bank (Brookhaven)
479 pir (10) , -- Protein Identification Resource
480 prfseqdb (11) , -- Protein Research Foundation (Japan)
481 psd (12) , -- Protein Sequence Database (Japan)
482 swissprot (13) , -- SwissProt
483 gdb (14) } , -- Genome Data Base
484 cit VisibleString OPTIONAL } -- the citation/accession number
485
486 Medline-field ::= SEQUENCE {
487 type INTEGER { -- Keyed type
488 other (0) , -- look in line code
489 comment (1) , -- comment line
490 erratum (2) } , -- retracted, corrected, etc
491 str VisibleString , -- the text
492 ids SEQUENCE OF DocRef OPTIONAL } -- pointers relevant to this text
493
494 DocRef ::= SEQUENCE { -- reference to a document
495 type INTEGER {
496 medline (1) ,
497 pubmed (2) ,
498 ncbigi (3) } ,
499 uid INTEGER }
500
501 END
502
503 --$Revision: 6.0 $
504 --**********************************************************************
505 --
506 -- PUBMED data definitions
507 --
508 --**********************************************************************
509
510 NCBI-PubMed DEFINITIONS ::=
511 BEGIN
512
513 EXPORTS Pubmed-entry, Pubmed-url;
514
515 IMPORTS PubMedId FROM NCBI-Biblio
516 Medline-entry FROM NCBI-Medline;
517
518 Pubmed-entry ::= SEQUENCE { -- a PubMed entry
519 -- PUBMED records must include the PubMedId
520 pmid PubMedId,
521
522 -- Medline entry information
523 medent Medline-entry OPTIONAL,
524
525 -- Publisher name
526 publisher VisibleString OPTIONAL,
527
528 -- List of URL to publisher cite
529 urls SET OF Pubmed-url OPTIONAL,
530
531 -- Publisher's article identifier
532 pubid VisibleString OPTIONAL
533 }
534
535 Pubmed-url ::= SEQUENCE {
536 location VisibleString OPTIONAL, -- Location code
537 url VisibleString -- Selected URL for location
538 }
539
540 END
541 --$Revision: 6.0 $
542 --**********************************************************************
543 --
544 -- MEDLARS data definitions
545 -- Grigoriy Starchenko, 1997
546 --
547 --**********************************************************************
548
549 NCBI-Medlars DEFINITIONS ::=
550 BEGIN
551
552 EXPORTS Medlars-entry, Medlars-record;
553
554 IMPORTS PubMedId FROM NCBI-Biblio;
555
556 Medlars-entry ::= SEQUENCE { -- a MEDLARS entry
557 pmid PubMedId, -- All entries in PubMed must have it
558 muid INTEGER OPTIONAL, -- Medline(OCCS) id
559 recs SET OF Medlars-record -- List of Medlars records
560 }
561
562 Medlars-record ::= SEQUENCE {
563 code INTEGER, -- Unit record field type integer form
564 abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
565 data VisibleString -- Unit record data
566 }
567
568 END
569 --$Revision: 6.0 $
570 --********************************************************************
571 --
572 -- Publication common set
573 -- James Ostell, 1990
574 --
575 -- This is the base class definitions for Publications of all sorts
576 --
577 -- support for PubMedId added in 1996
578 --********************************************************************
579
580 NCBI-Pub DEFINITIONS ::=
581 BEGIN
582
583 EXPORTS Pub, Pub-set, Pub-equiv;
584
585 IMPORTS Medline-entry FROM NCBI-Medline
586 Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
587 Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
588
589 Pub ::= CHOICE {
590 gen Cit-gen , -- general or generic unparsed
591 sub Cit-sub , -- submission
592 medline Medline-entry ,
593 muid INTEGER , -- medline uid
594 article Cit-art ,
595 journal Cit-jour ,
596 book Cit-book ,
597 proc Cit-proc , -- proceedings of a meeting
598 patent Cit-pat ,
599 pat-id Id-pat , -- identify a patent
600 man Cit-let , -- manuscript, thesis, or letter
601 equiv Pub-equiv, -- to cite a variety of ways
602 pmid PubMedId } -- PubMedId
603
604 Pub-equiv ::= SET OF Pub -- equivalent identifiers for same citation
605
606 Pub-set ::= CHOICE {
607 pub SET OF Pub ,
608 medline SET OF Medline-entry ,
609 article SET OF Cit-art ,
610 journal SET OF Cit-jour ,
611 book SET OF Cit-book ,
612 proc SET OF Cit-proc , -- proceedings of a meeting
613 patent SET OF Cit-pat }
614
615 END
616
617 --$Revision: 6.5 $
618 --**********************************************************************
619 --
620 -- NCBI Sequence location and identifier elements
621 -- by James Ostell, 1990
622 --
623 -- Version 3.0 - 1994
624 --
625 --**********************************************************************
626
627 NCBI-Seqloc DEFINITIONS ::=
628 BEGIN
629
630 EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
631 Na-strand, Giimport-id;
632
633 IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
634 Id-pat FROM NCBI-Biblio
635 Feat-id FROM NCBI-Seqfeat;
636
637 --*** Sequence identifiers ********************************
638 --*
639
640 Seq-id ::= CHOICE {
641 local Object-id , -- local use
642 gibbsq INTEGER , -- Geninfo backbone seqid
643 gibbmt INTEGER , -- Geninfo backbone moltype
644 giim Giimport-id , -- Geninfo import id
645 genbank Textseq-id ,
646 embl Textseq-id ,
647 pir Textseq-id ,
648 swissprot Textseq-id ,
649 patent Patent-seq-id ,
650 other Textseq-id , -- for historical reasons, 'other' = 'refseq'
651 general Dbtag , -- for other databases
652 gi INTEGER , -- GenInfo Integrated Database
653 ddbj Textseq-id , -- DDBJ
654 prf Textseq-id , -- PRF SEQDB
655 pdb PDB-seq-id , -- PDB sequence
656 tpg Textseq-id , -- Third Party Annot/Seq Genbank
657 tpe Textseq-id , -- Third Party Annot/Seq EMBL
658 tpd Textseq-id , -- Third Party Annot/Seq DDBJ
659 gpipe Textseq-id , -- Internal NCBI genome pipeline processing ID
660 named-annot-track Textseq-id -- Internal named annotation tracking ID
661 }
662
663 Seq-id-set ::= SET OF Seq-id
664
665
666 Patent-seq-id ::= SEQUENCE {
667 seqid INTEGER , -- number of sequence in patent
668 cit Id-pat } -- patent citation
669
670 Textseq-id ::= SEQUENCE {
671 name VisibleString OPTIONAL ,
672 accession VisibleString OPTIONAL ,
673 release VisibleString OPTIONAL ,
674 version INTEGER OPTIONAL }
675
676 Giimport-id ::= SEQUENCE {
677 id INTEGER , -- the id to use here
678 db VisibleString OPTIONAL , -- dbase used in
679 release VisibleString OPTIONAL } -- the release
680
681 PDB-seq-id ::= SEQUENCE {
682 mol PDB-mol-id , -- the molecule name
683 chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
684 rel Date OPTIONAL } -- release date, month and year
685
686 PDB-mol-id ::= VisibleString -- name of mol, 4 chars
687
688 --*** Sequence locations **********************************
689 --*
690
691 Seq-loc ::= CHOICE {
692 null NULL , -- not placed
693 empty Seq-id , -- to NULL one Seq-id in a collection
694 whole Seq-id , -- whole sequence
695 int Seq-interval , -- from to
696 packed-int Packed-seqint ,
697 pnt Seq-point ,
698 packed-pnt Packed-seqpnt ,
699 mix Seq-loc-mix ,
700 equiv Seq-loc-equiv , -- equivalent sets of locations
701 bond Seq-bond ,
702 feat Feat-id } -- indirect, through a Seq-feat
703
704
705 Seq-interval ::= SEQUENCE {
706 from INTEGER ,
707 to INTEGER ,
708 strand Na-strand OPTIONAL ,
709 id Seq-id , -- WARNING: this used to be optional
710 fuzz-from Int-fuzz OPTIONAL ,
711 fuzz-to Int-fuzz OPTIONAL }
712
713 Packed-seqint ::= SEQUENCE OF Seq-interval
714
715 Seq-point ::= SEQUENCE {
716 point INTEGER ,
717 strand Na-strand OPTIONAL ,
718 id Seq-id , -- WARNING: this used to be optional
719 fuzz Int-fuzz OPTIONAL }
720
721 Packed-seqpnt ::= SEQUENCE {
722 strand Na-strand OPTIONAL ,
723 id Seq-id ,
724 fuzz Int-fuzz OPTIONAL ,
725 points SEQUENCE OF INTEGER }
726
727 Na-strand ::= ENUMERATED { -- strand of nucleic acid
728 unknown (0) ,
729 plus (1) ,
730 minus (2) ,
731 both (3) , -- in forward orientation
732 both-rev (4) , -- in reverse orientation
733 other (255) }
734
735 Seq-bond ::= SEQUENCE { -- bond between residues
736 a Seq-point , -- connection to a least one residue
737 b Seq-point OPTIONAL } -- other end may not be available
738
739 Seq-loc-mix ::= SEQUENCE OF Seq-loc -- this will hold anything
740
741 Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations
742
743 END
744
745
746 --$Revision: 6.25 $
747 --**********************************************************************
748 --
749 -- NCBI Sequence elements
750 -- by James Ostell, 1990
751 -- Version 3.0 - June 1994
752 --
753 --**********************************************************************
754
755 NCBI-Sequence DEFINITIONS ::=
756 BEGIN
757
758 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
759 Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
760 Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;
761
762 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
763 Seq-align FROM NCBI-Seqalign
764 Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
765 Seq-graph FROM NCBI-Seqres
766 Pub-equiv FROM NCBI-Pub
767 Org-ref FROM NCBI-Organism
768 BioSource FROM NCBI-BioSource
769 Seq-id, Seq-loc FROM NCBI-Seqloc
770 GB-block FROM GenBank-General
771 PIR-block FROM PIR-General
772 EMBL-block FROM EMBL-General
773 SP-block FROM SP-General
774 PRF-block FROM PRF-General
775 PDB-block FROM PDB-General
776 Seq-table FROM NCBI-SeqTable;
777
778 --*** Sequence ********************************
779 --*
780
781 Bioseq ::= SEQUENCE {
782 id SET OF Seq-id , -- equivalent identifiers
783 descr Seq-descr OPTIONAL , -- descriptors
784 inst Seq-inst , -- the sequence data
785 annot SET OF Seq-annot OPTIONAL }
786
787 --*** Descriptors *****************************
788 --*
789
790 Seq-descr ::= SET OF Seqdesc
791
792 Seqdesc ::= CHOICE {
793 mol-type GIBB-mol , -- type of molecule
794 modif SET OF GIBB-mod , -- modifiers
795 method GIBB-method , -- sequencing method
796 name VisibleString , -- a name for this sequence
797 title VisibleString , -- a title for this sequence
798 org Org-ref , -- if all from one organism
799 comment VisibleString , -- a more extensive comment
800 num Numbering , -- a numbering system
801 maploc Dbtag , -- map location of this sequence
802 pir PIR-block , -- PIR specific info
803 genbank GB-block , -- GenBank specific info
804 pub Pubdesc , -- a reference to the publication
805 region VisibleString , -- overall region (globin locus)
806 user User-object , -- user defined object
807 sp SP-block , -- SWISSPROT specific info
808 dbxref Dbtag , -- xref to other databases
809 embl EMBL-block , -- EMBL specific information
810 create-date Date , -- date entry first created/released
811 update-date Date , -- date of last update
812 prf PRF-block , -- PRF specific information
813 pdb PDB-block , -- PDB specific information
814 het Heterogen , -- cofactor, etc associated but not bound
815 source BioSource , -- source of materials, includes Org-ref
816 molinfo MolInfo , -- info on the molecule and techniques
817 modelev ModelEvidenceSupport -- model evidence for XM records
818 }
819
820 --******* NOTE:
821 --* mol-type, modif, method, and org are consolidated and expanded
822 --* in Org-ref, BioSource, and MolInfo in this specification. They
823 --* will be removed in later specifications. Do not use them in the
824 --* the future. Instead expect the new structures.
825 --*
826 --***************************
827
828 --********************************************************************
829 --
830 -- MolInfo gives information on the
831 -- classification of the type and quality of the sequence
832 --
833 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
834 --
835 --********************************************************************
836
837 MolInfo ::= SEQUENCE {
838 biomol INTEGER {
839 unknown (0) ,
840 genomic (1) ,
841 pre-RNA (2) , -- precursor RNA of any sort really
842 mRNA (3) ,
843 rRNA (4) ,
844 tRNA (5) ,
845 snRNA (6) ,
846 scRNA (7) ,
847 peptide (8) ,
848 other-genetic (9) , -- other genetic material
849 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
850 cRNA (11) , -- viral RNA genome copy intermediate
851 snoRNA (12) , -- small nucleolar RNA
852 transcribed-RNA (13) , -- transcribed RNA other than existing classes
853 ncRNA (14) ,
854 tmRNA (15) ,
855 other (255) } DEFAULT unknown ,
856 tech INTEGER {
857 unknown (0) ,
858 standard (1) , -- standard sequencing
859 est (2) , -- Expressed Sequence Tag
860 sts (3) , -- Sequence Tagged Site
861 survey (4) , -- one-pass genomic sequence
862 genemap (5) , -- from genetic mapping techniques
863 physmap (6) , -- from physical mapping techniques
864 derived (7) , -- derived from other data, not a primary entity
865 concept-trans (8) , -- conceptual translation
866 seq-pept (9) , -- peptide was sequenced
867 both (10) , -- concept transl. w/ partial pept. seq.
868 seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
869 seq-pept-homol (12) , -- sequenced peptide, ordered by homology
870 concept-trans-a (13) , -- conceptual transl. supplied by author
871 htgs-1 (14) , -- unordered High Throughput sequence contig
872 htgs-2 (15) , -- ordered High Throughput sequence contig
873 htgs-3 (16) , -- finished High Throughput sequence
874 fli-cdna (17) , -- full length insert cDNA
875 htgs-0 (18) , -- single genomic reads for coordination
876 htc (19) , -- high throughput cDNA
877 wgs (20) , -- whole genome shotgun sequencing
878 barcode (21) , -- barcode of life project
879 composite-wgs-htgs (22) , -- composite of WGS and HTGS
880 tsa (23) , -- transcriptome shotgun assembly
881 other (255) } -- use Source.techexp
882 DEFAULT unknown ,
883 techexp VisibleString OPTIONAL , -- explanation if tech not enough
884 --
885 -- Completeness is not indicated in most records. For genomes, assume
886 -- the sequences are incomplete unless specifically marked as complete.
887 -- For mRNAs, assume the ends are not known exactly unless marked as
888 -- having the left or right end.
889 --
890 completeness INTEGER {
891 unknown (0) ,
892 complete (1) , -- complete biological entity
893 partial (2) , -- partial but no details given
894 no-left (3) , -- missing 5' or NH3 end
895 no-right (4) , -- missing 3' or COOH end
896 no-ends (5) , -- missing both ends
897 has-left (6) , -- 5' or NH3 end present
898 has-right (7) , -- 3' or COOH end present
899 other (255) } DEFAULT unknown ,
900 gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
901
902
903 GIBB-mol ::= ENUMERATED { -- type of molecule represented
904 unknown (0) ,
905 genomic (1) ,
906 pre-mRNA (2) , -- precursor RNA of any sort really
907 mRNA (3) ,
908 rRNA (4) ,
909 tRNA (5) ,
910 snRNA (6) ,
911 scRNA (7) ,
912 peptide (8) ,
913 other-genetic (9) , -- other genetic material
914 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
915 other (255) }
916
917 GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers
918 dna (0) ,
919 rna (1) ,
920 extrachrom (2) ,
921 plasmid (3) ,
922 mitochondrial (4) ,
923 chloroplast (5) ,
924 kinetoplast (6) ,
925 cyanelle (7) ,
926 synthetic (8) ,
927 recombinant (9) ,
928 partial (10) ,
929 complete (11) ,
930 mutagen (12) , -- subject of mutagenesis ?
931 natmut (13) , -- natural mutant ?
932 transposon (14) ,
933 insertion-seq (15) ,
934 no-left (16) , -- missing left end (5' for na, NH2 for aa)
935 no-right (17) , -- missing right end (3' or COOH)
936 macronuclear (18) ,
937 proviral (19) ,
938 est (20) , -- expressed sequence tag
939 sts (21) , -- sequence tagged site
940 survey (22) , -- one pass survey sequence
941 chromoplast (23) ,
942 genemap (24) , -- is a genetic map
943 restmap (25) , -- is an ordered restriction map
944 physmap (26) , -- is a physical map (not ordered restriction map)
945 other (255) }
946
947 GIBB-method ::= ENUMERATED { -- sequencing methods
948 concept-trans (1) , -- conceptual translation
949 seq-pept (2) , -- peptide was sequenced
950 both (3) , -- concept transl. w/ partial pept. seq.
951 seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
952 seq-pept-homol (5) , -- sequenced peptide, ordered by homology
953 concept-trans-a (6) , -- conceptual transl. supplied by author
954 other (255) }
955
956 Numbering ::= CHOICE { -- any display numbering system
957 cont Num-cont , -- continuous numbering
958 enum Num-enum , -- enumerated names for residues
959 ref Num-ref , -- by reference to another sequence
960 real Num-real } -- supports mapping to a float system
961
962 Num-cont ::= SEQUENCE { -- continuous display numbering system
963 refnum INTEGER DEFAULT 1, -- number assigned to first residue
964 has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
965 ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
966
967 Num-enum ::= SEQUENCE { -- any tags to residues
968 num INTEGER , -- number of tags to follow
969 names SEQUENCE OF VisibleString } -- the tags
970
971 Num-ref ::= SEQUENCE { -- by reference to other sequences
972 type ENUMERATED { -- type of reference
973 not-set (0) ,
974 sources (1) , -- by segmented or const seq sources
975 aligns (2) } , -- by alignments given below
976 aligns Seq-align OPTIONAL }
977
978 Num-real ::= SEQUENCE { -- mapping to floating point system
979 a REAL , -- from an integer system used by Bioseq
980 b REAL , -- position = (a * int_position) + b
981 units VisibleString OPTIONAL }
982
983 Pubdesc ::= SEQUENCE { -- how sequence presented in pub
984 pub Pub-equiv , -- the citation(s)
985 name VisibleString OPTIONAL , -- name used in paper
986 fig VisibleString OPTIONAL , -- figure in paper
987 num Numbering OPTIONAL , -- numbering from paper
988 numexc BOOLEAN OPTIONAL , -- numbering problem with paper
989 poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
990 maploc VisibleString OPTIONAL , -- map location reported in paper
991 seq-raw StringStore OPTIONAL , -- original sequence from paper
992 align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
993 comment VisibleString OPTIONAL, -- any comment on this pub in context
994 reftype INTEGER { -- type of reference in a GenBank record
995 seq (0) , -- refers to sequence
996 sites (1) , -- refers to unspecified features
997 feats (2) , -- refers to specified features
998 no-target (3) } -- nothing specified (EMBL)
999 DEFAULT seq }
1000
1001 Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc
1002
1003 --*** Instances of sequences *******************************
1004 --*
1005
1006 Seq-inst ::= SEQUENCE { -- the sequence data itself
1007 repr ENUMERATED { -- representation class
1008 not-set (0) , -- empty
1009 virtual (1) , -- no seq data
1010 raw (2) , -- continuous sequence
1011 seg (3) , -- segmented sequence
1012 const (4) , -- constructed sequence
1013 ref (5) , -- reference to another sequence
1014 consen (6) , -- consensus sequence or pattern
1015 map (7) , -- ordered map of any kind
1016 delta (8) , -- sequence made by changes (delta) to others
1017 other (255) } ,
1018 mol ENUMERATED { -- molecule class in living organism
1019 not-set (0) , -- > cdna = rna
1020 dna (1) ,
1021 rna (2) ,
1022 aa (3) ,
1023 na (4) , -- just a nucleic acid
1024 other (255) } ,
1025 length INTEGER OPTIONAL , -- length of sequence in residues
1026 fuzz Int-fuzz OPTIONAL , -- length uncertainty
1027 topology ENUMERATED { -- topology of molecule
1028 not-set (0) ,
1029 linear (1) ,
1030 circular (2) ,
1031 tandem (3) , -- some part of tandem repeat
1032 other (255) } DEFAULT linear ,
1033 strand ENUMERATED { -- strandedness in living organism
1034 not-set (0) ,
1035 ss (1) , -- single strand
1036 ds (2) , -- double strand
1037 mixed (3) ,
1038 other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
1039 seq-data Seq-data OPTIONAL , -- the sequence
1040 ext Seq-ext OPTIONAL , -- extensions for special types
1041 hist Seq-hist OPTIONAL } -- sequence history
1042
1043 --*** Sequence Extensions **********************************
1044 --* for representing more complex types
1045 --* const type uses Seq-hist.assembly
1046
1047 Seq-ext ::= CHOICE {
1048 seg Seg-ext , -- segmented sequences
1049 ref Ref-ext , -- hot link to another sequence (a view)
1050 map Map-ext , -- ordered map of markers
1051 delta Delta-ext }
1052
1053 Seg-ext ::= SEQUENCE OF Seq-loc
1054
1055 Ref-ext ::= Seq-loc
1056
1057 Map-ext ::= SEQUENCE OF Seq-feat
1058
1059 Delta-ext ::= SEQUENCE OF Delta-seq
1060
1061 Delta-seq ::= CHOICE {
1062 loc Seq-loc , -- point to a sequence
1063 literal Seq-literal } -- a piece of sequence
1064
1065 Seq-literal ::= SEQUENCE {
1066 length INTEGER , -- must give a length in residues
1067 fuzz Int-fuzz OPTIONAL , -- could be unsure
1068 seq-data Seq-data OPTIONAL } -- may have the data
1069
1070 --*** Sequence History Record ***********************************
1071 --** assembly = records how seq was assembled from others
1072 --** replaces = records sequences made obsolete by this one
1073 --** replaced-by = this seq is made obsolete by another(s)
1074
1075 Seq-hist ::= SEQUENCE {
1076 assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
1077 replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
1078 replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
1079 deleted CHOICE {
1080 bool BOOLEAN ,
1081 date Date } OPTIONAL }
1082
1083 Seq-hist-rec ::= SEQUENCE {
1084 date Date OPTIONAL ,
1085 ids SET OF Seq-id }
1086
1087 --*** Various internal sequence representations ************
1088 --* all are controlled, fixed length forms
1089
1090 Seq-data ::= CHOICE { -- sequence representations
1091 iupacna IUPACna , -- IUPAC 1 letter nuc acid code
1092 iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
1093 ncbi2na NCBI2na , -- 2 bit nucleic acid code
1094 ncbi4na NCBI4na , -- 4 bit nucleic acid code
1095 ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
1096 ncbipna NCBIpna , -- nucleic acid probabilities
1097 ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
1098 ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
1099 ncbipaa NCBIpaa , -- amino acid probabilities
1100 ncbistdaa NCBIstdaa, -- consecutive codes for std aas
1101 gap Seq-gap -- gap types
1102 }
1103
1104 Seq-gap ::= SEQUENCE {
1105 type INTEGER {
1106 unknown(0),
1107 fragment(1), -- Deprecated. Used only for AGP 1.1
1108 clone(2), -- Deprecated. Used only for AGP 1.1
1109 short-arm(3),
1110 heterochromatin(4),
1111 centromere(5),
1112 telomere(6),
1113 repeat(7),
1114 contig(8),
1115 scaffold(9),
1116 other(255)
1117 },
1118 linkage INTEGER {
1119 unlinked(0),
1120 linked(1),
1121 other(255)
1122 } OPTIONAL,
1123 linkage-evidence SET OF Linkage-evidence OPTIONAL
1124 }
1125
1126 Linkage-evidence ::= SEQUENCE {
1127 type INTEGER {
1128 paired-ends(0),
1129 align-genus(1),
1130 align-xgenus(2),
1131 align-trnscpt(3),
1132 within-clone(4),
1133 clone-contig(5),
1134 map(6),
1135 strobe(7),
1136 unspecified(8),
1137 pcr(9),
1138 other(255)
1139 }
1140 }
1141
1142 IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
1143 IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
1144 NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
1145 NCBI4na ::= OCTET STRING -- 1 bit each for agct
1146 -- 0001=A, 0010=C, 0100=G, 1000=T/U
1147 -- 0101=Purine, 1010=Pyrimidine, etc
1148 NCBI8na ::= OCTET STRING -- for modified nucleic acids
1149 NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
1150 -- probabilities are coded 0-255 = 0.0-1.0
1151 NCBI8aa ::= OCTET STRING -- for modified amino acids
1152 NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
1153 -- IUPAC codes + U=selenocysteine
1154 NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
1155 -- A-Y,B,Z,X,(ter),anything
1156 -- probabilities are coded 0-255 = 0.0-1.0
1157 NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
1158
1159 --*** Sequence Annotation *************************************
1160 --*
1161
1162 -- This is a replica of Textseq-id
1163 -- This is specific for annotations, and exists to maintain a semantic
1164 -- difference between IDs assigned to annotations and IDs assigned to
1165 -- sequences
1166 Textannot-id ::= SEQUENCE {
1167 name VisibleString OPTIONAL ,
1168 accession VisibleString OPTIONAL ,
1169 release VisibleString OPTIONAL ,
1170 version INTEGER OPTIONAL
1171 }
1172
1173 Annot-id ::= CHOICE {
1174 local Object-id ,
1175 ncbi INTEGER ,
1176 general Dbtag,
1177 other Textannot-id
1178 }
1179
1180 Annot-descr ::= SET OF Annotdesc
1181
1182 Annotdesc ::= CHOICE {
1183 name VisibleString , -- a short name for this collection
1184 title VisibleString , -- a title for this collection
1185 comment VisibleString , -- a more extensive comment
1186 pub Pubdesc , -- a reference to the publication
1187 user User-object , -- user defined object
1188 create-date Date , -- date entry first created/released
1189 update-date Date , -- date of last update
1190 src Seq-id , -- source sequence from which annot came
1191 align Align-def, -- definition of the SeqAligns
1192 region Seq-loc } -- all contents cover this region
1193
1194 Align-def ::= SEQUENCE {
1195 align-type INTEGER { -- class of align Seq-annot
1196 ref (1) , -- set of alignments to the same sequence
1197 alt (2) , -- set of alternate alignments of the same seqs
1198 blocks (3) , -- set of aligned blocks in the same seqs
1199 other (255) } ,
1200 ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
1201
1202 Seq-annot ::= SEQUENCE {
1203 id SET OF Annot-id OPTIONAL ,
1204 db INTEGER { -- source of annotation
1205 genbank (1) ,
1206 embl (2) ,
1207 ddbj (3) ,
1208 pir (4) ,
1209 sp (5) ,
1210 bbone (6) ,
1211 pdb (7) ,
1212 other (255) } OPTIONAL ,
1213 name VisibleString OPTIONAL ,-- source if "other" above
1214 desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
1215 data CHOICE {
1216 ftable SET OF Seq-feat ,
1217 align SET OF Seq-align ,
1218 graph SET OF Seq-graph ,
1219 ids SET OF Seq-id , -- used for communication between tools
1220 locs SET OF Seq-loc , -- used for communication between tools
1221 seq-table Seq-table } } -- features in table form
1222
1223 END
1224
1225
1226 --$Revision: 6.6 $
1227 --**********************************************************************
1228 --
1229 -- NCBI Sequence Collections
1230 -- by James Ostell, 1990
1231 --
1232 -- Version 3.0 - 1994
1233 --
1234 --**********************************************************************
1235
1236 NCBI-Seqset DEFINITIONS ::=
1237 BEGIN
1238
1239 EXPORTS Bioseq-set, Seq-entry;
1240
1241 IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1242 Object-id, Dbtag, Date FROM NCBI-General;
1243
1244 --*** Sequence Collections ********************************
1245 --*
1246
1247 Bioseq-set ::= SEQUENCE { -- just a collection
1248 id Object-id OPTIONAL ,
1249 coll Dbtag OPTIONAL , -- to identify a collection
1250 level INTEGER OPTIONAL , -- nesting level
1251 class ENUMERATED {
1252 not-set (0) ,
1253 nuc-prot (1) , -- nuc acid and coded proteins
1254 segset (2) , -- segmented sequence + parts
1255 conset (3) , -- constructed sequence + parts
1256 parts (4) , -- parts for 2 or 3
1257 gibb (5) , -- geninfo backbone
1258 gi (6) , -- geninfo
1259 genbank (7) , -- converted genbank
1260 pir (8) , -- converted pir
1261 pub-set (9) , -- all the seqs from a single publication
1262 equiv (10) , -- a set of equivalent maps or seqs
1263 swissprot (11) , -- converted SWISSPROT
1264 pdb-entry (12) , -- a complete PDB entry
1265 mut-set (13) , -- set of mutations
1266 pop-set (14) , -- population study
1267 phy-set (15) , -- phylogenetic study
1268 eco-set (16) , -- ecological sample study
1269 gen-prod-set (17) , -- genomic products, chrom+mRNA+protein
1270 wgs-set (18) , -- whole genome shotgun project
1271 named-annot (19) , -- named annotation set
1272 named-annot-prod (20) , -- with instantiated mRNA+protein
1273 read-set (21) , -- set from a single read
1274 paired-end-reads (22) , -- paired sequences within a read-set
1275 small-genome-set (23) , -- viral segments or mitochondrial minicircles
1276 other (255) } DEFAULT not-set ,
1277 release VisibleString OPTIONAL ,
1278 date Date OPTIONAL ,
1279 descr Seq-descr OPTIONAL ,
1280 seq-set SEQUENCE OF Seq-entry ,
1281 annot SET OF Seq-annot OPTIONAL }
1282
1283 Seq-entry ::= CHOICE {
1284 seq Bioseq ,
1285 set Bioseq-set }
1286
1287 END
1288
1289 --$Revision: 6.0 $
1290 -- *********************************************************************
1291 --
1292 -- These are code and conversion tables for NCBI sequence codes
1293 -- ASN.1 for the sequences themselves are define in seq.asn
1294 --
1295 -- Seq-map-table and Seq-code-table REQUIRE that codes start with 0
1296 -- and increase continuously. So IUPAC codes, which are upper case
1297 -- letters will always have 65 0 cells before the codes begin. This
1298 -- allows all codes to do indexed lookups for things
1299 --
1300 -- Valid names for code tables are:
1301 -- IUPACna
1302 -- IUPACaa
1303 -- IUPACeaa
1304 -- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa
1305 -- display only, not a data exchange type
1306 -- NCBI2na
1307 -- NCBI4na
1308 -- NCBI8na
1309 -- NCBI8aa
1310 -- NCBIstdaa
1311 -- probability types map to IUPAC types for display as characters
1312
1313 NCBI-SeqCode DEFINITIONS ::=
1314 BEGIN
1315
1316 EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
1317
1318 Seq-code-type ::= ENUMERATED { -- sequence representations
1319 iupacna (1) , -- IUPAC 1 letter nuc acid code
1320 iupacaa (2) , -- IUPAC 1 letter amino acid code
1321 ncbi2na (3) , -- 2 bit nucleic acid code
1322 ncbi4na (4) , -- 4 bit nucleic acid code
1323 ncbi8na (5) , -- 8 bit extended nucleic acid code
1324 ncbipna (6) , -- nucleic acid probabilities
1325 ncbi8aa (7) , -- 8 bit extended amino acid codes
1326 ncbieaa (8) , -- extended ASCII 1 letter aa codes
1327 ncbipaa (9) , -- amino acid probabilities
1328 iupacaa3 (10) , -- 3 letter code only for display
1329 ncbistdaa (11) } -- consecutive codes for std aas, 0-25
1330
1331 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
1332 from Seq-code-type , -- code to map from
1333 to Seq-code-type , -- code to map to
1334 num INTEGER , -- number of rows in table
1335 start-at INTEGER DEFAULT 0 , -- index offset of first element
1336 table SEQUENCE OF INTEGER } -- table of values, in from-to order
1337
1338 Seq-code-table ::= SEQUENCE { -- for names of coded values
1339 code Seq-code-type , -- name of code
1340 num INTEGER , -- number of rows in table
1341 one-letter BOOLEAN , -- symbol is ALWAYS 1 letter?
1342 start-at INTEGER DEFAULT 0 , -- index offset of first element
1343 table SEQUENCE OF
1344 SEQUENCE {
1345 symbol VisibleString , -- the printed symbol or letter
1346 name VisibleString } , -- an explanatory name or string
1347 comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
1348
1349 Seq-code-set ::= SEQUENCE { -- for distribution
1350 codes SET OF Seq-code-table OPTIONAL ,
1351 maps SET OF Seq-map-table OPTIONAL }
1352
1353 END
1354
1355 --$Revision: 6.0 $
1356 --*********************************************************************
1357 --
1358 -- 1990 - J.Ostell
1359 -- Version 3.0 - June 1994
1360 --
1361 --*********************************************************************
1362 --*********************************************************************
1363 --
1364 -- EMBL specific data
1365 -- This block of specifications was developed by Reiner Fuchs of EMBL
1366 -- Updated by J.Ostell, 1994
1367 --
1368 --*********************************************************************
1369
1370 EMBL-General DEFINITIONS ::=
1371 BEGIN
1372
1373 EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
1374
1375 IMPORTS Date, Object-id FROM NCBI-General;
1376
1377 EMBL-dbname ::= CHOICE {
1378 code ENUMERATED {
1379 embl(0),
1380 genbank(1),
1381 ddbj(2),
1382 geninfo(3),
1383 medline(4),
1384 swissprot(5),
1385 pir(6),
1386 pdb(7),
1387 epd(8),
1388 ecd(9),
1389 tfd(10),
1390 flybase(11),
1391 prosite(12),
1392 enzyme(13),
1393 mim(14),
1394 ecoseq(15),
1395 hiv(16) ,
1396 other (255) } ,
1397 name VisibleString }
1398
1399 EMBL-xref ::= SEQUENCE {
1400 dbname EMBL-dbname,
1401 id SEQUENCE OF Object-id }
1402
1403 EMBL-block ::= SEQUENCE {
1404 class ENUMERATED {
1405 not-set(0),
1406 standard(1),
1407 unannotated(2),
1408 other(255) } DEFAULT standard,
1409 div ENUMERATED {
1410 fun(0),
1411 inv(1),
1412 mam(2),
1413 org(3),
1414 phg(4),
1415 pln(5),
1416 pri(6),
1417 pro(7),
1418 rod(8),
1419 syn(9),
1420 una(10),
1421 vrl(11),
1422 vrt(12),
1423 pat(13),
1424 est(14),
1425 sts(15),
1426 other (255) } OPTIONAL,
1427 creation-date Date,
1428 update-date Date,
1429 extra-acc SEQUENCE OF VisibleString OPTIONAL,
1430 keywords SEQUENCE OF VisibleString OPTIONAL,
1431 xref SEQUENCE OF EMBL-xref OPTIONAL }
1432
1433 END
1434
1435 --*********************************************************************
1436 --
1437 -- SWISSPROT specific data
1438 -- This block of specifications was developed by Mark Cavanaugh of
1439 -- NCBI working with Amos Bairoch of SWISSPROT
1440 --
1441 --*********************************************************************
1442
1443 SP-General DEFINITIONS ::=
1444 BEGIN
1445
1446 EXPORTS SP-block;
1447
1448 IMPORTS Date, Dbtag FROM NCBI-General
1449 Seq-id FROM NCBI-Seqloc;
1450
1451 SP-block ::= SEQUENCE { -- SWISSPROT specific descriptions
1452 class ENUMERATED {
1453 not-set (0) ,
1454 standard (1) , -- conforms to all SWISSPROT checks
1455 prelim (2) , -- only seq and biblio checked
1456 other (255) } ,
1457 extra-acc SET OF VisibleString OPTIONAL , -- old SWISSPROT ids
1458 imeth BOOLEAN DEFAULT FALSE , -- seq known to start with Met
1459 plasnm SET OF VisibleString OPTIONAL, -- plasmid names carrying gene
1460 seqref SET OF Seq-id OPTIONAL, -- xref to other sequences
1461 dbref SET OF Dbtag OPTIONAL , -- xref to non-sequence dbases
1462 keywords SET OF VisibleString OPTIONAL , -- keywords
1463 created Date OPTIONAL , -- creation date
1464 sequpd Date OPTIONAL , -- sequence update
1465 annotupd Date OPTIONAL } -- annotation update
1466
1467 END
1468
1469 --*********************************************************************
1470 --
1471 -- PIR specific data
1472 -- This block of specifications was developed by Jim Ostell of
1473 -- NCBI
1474 --
1475 --*********************************************************************
1476
1477 PIR-General DEFINITIONS ::=
1478 BEGIN
1479
1480 EXPORTS PIR-block;
1481
1482 IMPORTS Seq-id FROM NCBI-Seqloc;
1483
1484 PIR-block ::= SEQUENCE { -- PIR specific descriptions
1485 had-punct BOOLEAN OPTIONAL , -- had punctuation in sequence ?
1486 host VisibleString OPTIONAL ,
1487 source VisibleString OPTIONAL , -- source line
1488 summary VisibleString OPTIONAL ,
1489 genetic VisibleString OPTIONAL ,
1490 includes VisibleString OPTIONAL ,
1491 placement VisibleString OPTIONAL ,
1492 superfamily VisibleString OPTIONAL ,
1493 keywords SEQUENCE OF VisibleString OPTIONAL ,
1494 cross-reference VisibleString OPTIONAL ,
1495 date VisibleString OPTIONAL ,
1496 seq-raw VisibleString OPTIONAL , -- seq with punctuation
1497 seqref SET OF Seq-id OPTIONAL } -- xref to other sequences
1498
1499 END
1500
1501 --*********************************************************************
1502 --
1503 -- GenBank specific data
1504 -- This block of specifications was developed by Jim Ostell of
1505 -- NCBI
1506 --
1507 --*********************************************************************
1508
1509 GenBank-General DEFINITIONS ::=
1510 BEGIN
1511
1512 EXPORTS GB-block;
1513
1514 IMPORTS Date FROM NCBI-General;
1515
1516 GB-block ::= SEQUENCE { -- GenBank specific descriptions
1517 extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
1518 source VisibleString OPTIONAL , -- source line
1519 keywords SEQUENCE OF VisibleString OPTIONAL ,
1520 origin VisibleString OPTIONAL,
1521 date VisibleString OPTIONAL , -- OBSOLETE old form Entry Date
1522 entry-date Date OPTIONAL , -- replaces date
1523 div VisibleString OPTIONAL , -- GenBank division
1524 taxonomy VisibleString OPTIONAL } -- continuation line of organism
1525
1526 END
1527
1528 --**********************************************************************
1529 -- PRF specific definition
1530 -- PRF is a protein sequence database crated and maintained by
1531 -- Protein Research Foundation, Minoo-city, Osaka, Japan.
1532 --
1533 -- Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1534 -- Kyoto Univ., Japan
1535 --
1536 --**********************************************************************
1537
1538 PRF-General DEFINITIONS ::=
1539 BEGIN
1540
1541 EXPORTS PRF-block;
1542
1543 PRF-block ::= SEQUENCE {
1544 extra-src PRF-ExtraSrc OPTIONAL,
1545 keywords SEQUENCE OF VisibleString OPTIONAL
1546 }
1547
1548 PRF-ExtraSrc ::= SEQUENCE {
1549 host VisibleString OPTIONAL,
1550 part VisibleString OPTIONAL,
1551 state VisibleString OPTIONAL,
1552 strain VisibleString OPTIONAL,
1553 taxon VisibleString OPTIONAL
1554 }
1555
1556 END
1557
1558 --*********************************************************************
1559 --
1560 -- PDB specific data
1561 -- This block of specifications was developed by Jim Ostell and
1562 -- Steve Bryant of NCBI
1563 --
1564 --*********************************************************************
1565
1566 PDB-General DEFINITIONS ::=
1567 BEGIN
1568
1569 EXPORTS PDB-block;
1570
1571 IMPORTS Date FROM NCBI-General;
1572
1573 PDB-block ::= SEQUENCE { -- PDB specific descriptions
1574 deposition Date , -- deposition date month,year
1575 class VisibleString ,
1576 compound SEQUENCE OF VisibleString ,
1577 source SEQUENCE OF VisibleString ,
1578 exp-method VisibleString OPTIONAL , -- present if NOT X-ray diffraction
1579 replace PDB-replace OPTIONAL } -- replacement history
1580
1581 PDB-replace ::= SEQUENCE {
1582 date Date ,
1583 ids SEQUENCE OF VisibleString } -- entry ids replace by this one
1584
1585 END
1586
1587 --$Revision: 6.50 $
1588 --**********************************************************************
1589 --
1590 -- NCBI Sequence Feature elements
1591 -- by James Ostell, 1990
1592 -- Version 3.0 - June 1994
1593 --
1594 --**********************************************************************
1595
1596 NCBI-Seqfeat DEFINITIONS ::=
1597 BEGIN
1598
1599 EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;
1600
1601 IMPORTS Gene-ref FROM NCBI-Gene
1602 Prot-ref FROM NCBI-Protein
1603 Org-ref FROM NCBI-Organism
1604 Variation-ref FROM NCBI-Variation
1605 BioSource FROM NCBI-BioSource
1606 RNA-ref FROM NCBI-RNA
1607 Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
1608 Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1609 Rsite-ref FROM NCBI-Rsite
1610 Txinit FROM NCBI-TxInit
1611 DOI, PubMedId FROM NCBI-Biblio
1612 Pub-set FROM NCBI-Pub
1613 Object-id, Dbtag, User-object FROM NCBI-General;
1614
1615 --*** Feature identifiers ********************************
1616 --*
1617
1618 Feat-id ::= CHOICE {
1619 gibb INTEGER , -- geninfo backbone
1620 giim Giimport-id , -- geninfo import
1621 local Object-id , -- for local software use
1622 general Dbtag } -- for use by various databases
1623
1624 --*** Seq-feat *******************************************
1625 --* sequence feature generalization
1626
1627 Seq-feat ::= SEQUENCE {
1628 id Feat-id OPTIONAL ,
1629 data SeqFeatData , -- the specific data
1630 partial BOOLEAN OPTIONAL , -- incomplete in some way?
1631 except BOOLEAN OPTIONAL , -- something funny about this?
1632 comment VisibleString OPTIONAL ,
1633 product Seq-loc OPTIONAL , -- product of process
1634 location Seq-loc , -- feature made from
1635 qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers
1636 title VisibleString OPTIONAL , -- for user defined label
1637 ext User-object OPTIONAL , -- user defined structure extension
1638 cit Pub-set OPTIONAL , -- citations for this feature
1639 exp-ev ENUMERATED { -- evidence for existence of feature
1640 experimental (1) , -- any reasonable experimental check
1641 not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1642 xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features
1643 dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases
1644 pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene?
1645 except-text VisibleString OPTIONAL , -- explain if except=TRUE
1646 ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field
1647 exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
1648 support SeqFeatSupport OPTIONAL -- will replace /experiment, /inference, model-evidence
1649 }
1650
1651 SeqFeatData ::= CHOICE {
1652 gene Gene-ref ,
1653 org Org-ref ,
1654 cdregion Cdregion ,
1655 prot Prot-ref ,
1656 rna RNA-ref ,
1657 pub Pubdesc , -- publication applies to this seq
1658 seq Seq-loc , -- to annotate origin from another seq
1659 imp Imp-feat ,
1660 region VisibleString, -- named region (globin locus)
1661 comment NULL , -- just a comment
1662 bond ENUMERATED {
1663 disulfide (1) ,
1664 thiolester (2) ,
1665 xlink (3) ,
1666 thioether (4) ,
1667 other (255) } ,
1668 site ENUMERATED {
1669 active (1) ,
1670 binding (2) ,
1671 cleavage (3) ,
1672 inhibit (4) ,
1673 modified (5),
1674 glycosylation (6) ,
1675 myristoylation (7) ,
1676 mutagenized (8) ,
1677 metal-binding (9) ,
1678 phosphorylation (10) ,
1679 acetylation (11) ,
1680 amidation (12) ,
1681 methylation (13) ,
1682 hydroxylation (14) ,
1683 sulfatation (15) ,
1684 oxidative-deamination (16) ,
1685 pyrrolidone-carboxylic-acid (17) ,
1686 gamma-carboxyglutamic-acid (18) ,
1687 blocked (19) ,
1688 lipid-binding (20) ,
1689 np-binding (21) ,
1690 dna-binding (22) ,
1691 signal-peptide (23) ,
1692 transit-peptide (24) ,
1693 transmembrane-region (25) ,
1694 nitrosylation (26) ,
1695 other (255) } ,
1696 rsite Rsite-ref , -- restriction site (for maps really)
1697 user User-object , -- user defined structure
1698 txinit Txinit , -- transcription initiation
1699 num Numbering , -- a numbering system
1700 psec-str ENUMERATED { -- protein secondary structure
1701 helix (1) , -- any helix
1702 sheet (2) , -- beta sheet
1703 turn (3) } , -- beta or gamma turn
1704 non-std-residue VisibleString , -- non-standard residue here in seq
1705 het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq
1706 biosrc BioSource,
1707 clone Clone-ref,
1708 variation Variation-ref
1709 }
1710
1711 SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both
1712 id Feat-id OPTIONAL , -- the feature copied
1713 data SeqFeatData OPTIONAL } -- the specific data
1714
1715 SeqFeatSupport ::= SEQUENCE {
1716 experiment SET OF ExperimentSupport OPTIONAL ,
1717 inference SET OF InferenceSupport OPTIONAL ,
1718 model-evidence SET OF ModelEvidenceSupport OPTIONAL
1719 }
1720
1721 EvidenceCategory ::= INTEGER {
1722 not-set (0) ,
1723 coordinates (1) ,
1724 description (2) ,
1725 existence (3)
1726 }
1727
1728 ExperimentSupport ::= SEQUENCE {
1729 category EvidenceCategory OPTIONAL ,
1730 explanation VisibleString ,
1731 pmids SET OF PubMedId OPTIONAL ,
1732 dois SET OF DOI OPTIONAL
1733 }
1734
1735 Program-id ::= SEQUENCE {
1736 name VisibleString ,
1737 version VisibleString OPTIONAL
1738 }
1739
1740 EvidenceBasis ::= SEQUENCE {
1741 programs SET OF Program-id OPTIONAL ,
1742 accessions SET OF Seq-id OPTIONAL
1743 }
1744
1745 InferenceSupport ::= SEQUENCE {
1746 category EvidenceCategory OPTIONAL ,
1747 type INTEGER {
1748 not-set (0) ,
1749 similar-to-sequence (1) ,
1750 similar-to-aa (2) ,
1751 similar-to-dna (3) ,
1752 similar-to-rna (4) ,
1753 similar-to-mrna (5) ,
1754 similiar-to-est (6) ,
1755 similar-to-other-rna (7) ,
1756 profile (8) ,
1757 nucleotide-motif (9) ,
1758 protein-motif (10) ,
1759 ab-initio-prediction (11) ,
1760 alignment (12) ,
1761 other (255)
1762 } DEFAULT not-set ,
1763 other-type VisibleString OPTIONAL ,
1764 same-species BOOLEAN DEFAULT FALSE ,
1765 basis EvidenceBasis ,
1766 pmids SET OF PubMedId OPTIONAL ,
1767 dois SET OF DOI OPTIONAL
1768 }
1769
1770 ModelEvidenceItem ::= SEQUENCE {
1771 id Seq-id ,
1772 exon-count INTEGER OPTIONAL ,
1773 exon-length INTEGER OPTIONAL ,
1774 full-length BOOLEAN DEFAULT FALSE ,
1775 supports-all-exon-combo BOOLEAN DEFAULT FALSE
1776 }
1777
1778 ModelEvidenceSupport ::= SEQUENCE {
1779 method VisibleString OPTIONAL ,
1780 mrna SET OF ModelEvidenceItem OPTIONAL ,
1781 est SET OF ModelEvidenceItem OPTIONAL ,
1782 protein SET OF ModelEvidenceItem OPTIONAL ,
1783 identification Seq-id OPTIONAL ,
1784 dbxref SET OF Dbtag OPTIONAL ,
1785 exon-count INTEGER OPTIONAL ,
1786 exon-length INTEGER OPTIONAL ,
1787 full-length BOOLEAN DEFAULT FALSE ,
1788 supports-all-exon-combo BOOLEAN DEFAULT FALSE
1789 }
1790
1791 --*** CdRegion ***********************************************
1792 --*
1793 --* Instructions to translate from a nucleic acid to a peptide
1794 --* conflict means it's supposed to translate but doesn't
1795 --*
1796
1797
1798 Cdregion ::= SEQUENCE {
1799 orf BOOLEAN OPTIONAL , -- just an ORF ?
1800 frame ENUMERATED {
1801 not-set (0) , -- not set, code uses one
1802 one (1) ,
1803 two (2) ,
1804 three (3) } DEFAULT not-set , -- reading frame
1805 conflict BOOLEAN OPTIONAL , -- conflict
1806 gaps INTEGER OPTIONAL , -- number of gaps on conflict/except
1807 mismatch INTEGER OPTIONAL , -- number of mismatches on above
1808 code Genetic-code OPTIONAL , -- genetic code used
1809 code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions
1810 stops INTEGER OPTIONAL } -- number of stop codons on above
1811
1812 -- each code is 64 cells long, in the order where
1813 -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1814 -- NOTE: this order does NOT correspond to a Seq-data
1815 -- encoding. It is "natural" to codon usage instead.
1816 -- the value in each cell is the AA coded for
1817 -- start= AA coded only if first in peptide
1818 -- in start array, if codon is not a legitimate start
1819 -- codon, that cell will have the "gap" symbol for
1820 -- that alphabet. Otherwise it will have the AA
1821 -- encoded when that codon is used at the start.
1822
1823 Genetic-code ::= SET OF CHOICE {
1824 name VisibleString , -- name of a code
1825 id INTEGER , -- id in dbase
1826 ncbieaa VisibleString , -- indexed to IUPAC extended
1827 ncbi8aa OCTET STRING , -- indexed to NCBI8aa
1828 ncbistdaa OCTET STRING , -- indexed to NCBIstdaa
1829 sncbieaa VisibleString , -- start, indexed to IUPAC extended
1830 sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa
1831 sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa
1832
1833 Code-break ::= SEQUENCE { -- specific codon exceptions
1834 loc Seq-loc , -- location of exception
1835 aa CHOICE { -- the amino acid
1836 ncbieaa INTEGER , -- ASCII value of NCBIeaa code
1837 ncbi8aa INTEGER , -- NCBI8aa code
1838 ncbistdaa INTEGER } } -- NCBIstdaa code
1839
1840 Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes
1841
1842 --*** Import ***********************************************
1843 --*
1844 --* Features imported from other databases
1845 --*
1846
1847 Imp-feat ::= SEQUENCE {
1848 key VisibleString ,
1849 loc VisibleString OPTIONAL , -- original location string
1850 descr VisibleString OPTIONAL } -- text description
1851
1852 Gb-qual ::= SEQUENCE {
1853 qual VisibleString ,
1854 val VisibleString }
1855
1856
1857 --*** Clone-ref ***********************************************
1858 --*
1859 --* Specification of clone features
1860 --*
1861
1862 Clone-ref ::= SEQUENCE {
1863 name VisibleString, -- Official clone symbol
1864 library VisibleString OPTIONAL, -- Library name
1865
1866 concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1867 unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1868 placement-method INTEGER {
1869 end-seq (0), -- Clone placed by end sequence
1870 insert-alignment (1), -- Clone placed by insert alignment
1871 sts (2), -- Clone placed by STS
1872 fish (3),
1873 fingerprint (4),
1874 end-seq-insert-alignment (5), -- combined end-seq and insert align
1875 external (253), -- Placement provided externally
1876 curated (254), -- Human placed or approved
1877 other (255)
1878 } OPTIONAL,
1879 clone-seq Clone-seq-set OPTIONAL
1880 }
1881
1882 Clone-seq-set ::= SET OF Clone-seq
1883
1884
1885 Clone-seq ::= SEQUENCE {
1886 type INTEGER {
1887 insert (0),
1888 end (1),
1889 other (255)
1890 },
1891 confidence INTEGER {
1892 multiple (0), -- Multiple hits
1893 na (1), -- Unspecified
1894 nohit-rep (2), -- No hits, end flagged repetitive
1895 nohitnorep (3), -- No hits, end not flagged repetitive
1896 other-chrm (4), -- Hit on different chromosome
1897 unique (5),
1898 virtual (6), -- Virtual (hasn't been sequenced)
1899 multiple-rep (7), -- Multiple hits, end flagged repetitive
1900 multiplenorep (8), -- Multiple hits, end not flagged repetitive
1901 no-hit (9), -- No hits
1902 other (255)
1903 } OPTIONAL,
1904 location Seq-loc, -- location on sequence
1905 seq Seq-loc OPTIONAL, -- clone sequence location
1906 align-id Dbtag OPTIONAL, -- internal alignment identifier
1907 support INTEGER {
1908 prototype (0), -- sequence used to place clone
1909 supporting (1), -- sequence supports placement
1910 supports-other(2), -- supports a different placement
1911 non-supporting (3) -- does not support any placement
1912 } OPTIONAL
1913 }
1914
1915 END
1916
1917
1918 --*** Variation-ref ***********************************************
1919 --*
1920 --* Specification of variation features
1921 --*
1922
1923 NCBI-Variation DEFINITIONS ::=
1924 BEGIN
1925
1926 EXPORTS Variation-ref, Variation-inst, VariantProperties,
1927 Population-data, Phenotype;
1928
1929 IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
1930 Seq-literal FROM NCBI-Sequence
1931 SubSource FROM NCBI-BioSource
1932 Seq-loc FROM NCBI-Seqloc
1933 Pub FROM NCBI-Pub;
1934
1935
1936 -- --------------------------------------------------------------------------
1937 -- Historically, the dbSNP definitions document data structures used in the
1938 -- processing and annotation of variations by the dbSNP group. The intention
1939 -- is to provide information to clients that reflect internal information
1940 -- produced during the mapping of SNPs
1941 -- --------------------------------------------------------------------------
1942
1943 VariantProperties ::= SEQUENCE {
1944 version INTEGER,
1945
1946 -- NOTE:
1947 -- The format for most of these values is as an integer
1948 -- Unless otherwise noted, these integers represent a bitwise OR (= simple
1949 -- sum) of the possible values, and as such, these values represent the
1950 -- specific bit flags that may be set for each of the possible attributes
1951 -- here.
1952
1953 resource-link INTEGER {
1954 preserved (1), -- Clinical, Pubmed, Cited, (0x01)
1955 provisional (2), -- Provisional Third Party Annotations (0x02)
1956 has3D (4), -- Has 3D strcture SNP3D table (0x04)
1957 submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
1958 clinical (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
1959 genotypeKit (32) -- Marker exists on high density genotyping kit
1960 -- (0x20)
1961 } OPTIONAL,
1962
1963 gene-location INTEGER {
1964 in-gene (1), -- Sequence intervals covered by a gene ID but not
1965 -- having an aligned transcript (0x01)
1966 near-gene-5 (2), -- Within 2kb of the 5' end of a gene feature
1967 near-gene-3 (4), -- Within 0.5kb of the 3' end of a gene feature
1968 intron (8), -- In Intron (0x08)
1969 donor (16), -- In donor splice-site (0x10)
1970 acceptor (32), -- In acceptor splice-site (0x20)
1971 utr-5 (64), -- In 5' UTR (0x40)
1972 utr-3 (128), -- In 3' UTR (0x80)
1973 in-start-codon(256), -- the variant is observed in a start codon
1974 -- (0x100)
1975 in-stop-codon (512), -- the variant is observed in a stop codon
1976 -- (0x200)
1977 intergenic (1024), -- variant located between genes (0x400)
1978 conserved-noncoding(2048) -- variant is located in a conserved
1979 -- non-coding region (0x800)
1980 } OPTIONAL,
1981
1982 effect INTEGER {
1983 no-change (0), -- known to cause no functional changes
1984 -- since 0 does not combine with any other bit
1985 -- value, 'no-change' specifically implies that
1986 -- there are no consequences
1987 synonymous (1), -- one allele in the set does not change the encoded
1988 -- amino acid (0x1)
1989 nonsense (2), -- one allele in the set changes to STOP codon
1990 -- (TER). (0x2)
1991 missense (4), -- one allele in the set changes protein peptide
1992 -- (0x4)
1993 frameshift (8), -- one allele in the set changes all downstream
1994 -- amino acids (0x8)
1995
1996 up-regulator (16), -- the variant causes increased transcription
1997 -- (0x10)
1998 down-regulator(32), -- the variant causes decreased transcription
1999 -- (0x20)
2000 methylation (64),
2001 stop-gain (128), -- reference codon is not stop codon, but the snp
2002 -- variant allele changes the codon to a
2003 -- terminating codon.
2004 stop-loss (256) -- reverse of STOP-GAIN: reference codon is a
2005 -- stop codon, but a snp variant allele changes
2006 -- the codon to a non-terminating codon.
2007 } OPTIONAL,
2008
2009 mapping INTEGER {
2010 has-other-snp (1), -- Another SNP has the same mapped positions
2011 -- on reference assembly (0x01)
2012 has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
2013 -- chromosomes on different assemblies (0x02)
2014 is-assembly-specific (4) -- Only maps to 1 assembly (0x04)
2015 } OPTIONAL,
2016
2017 -- map-weight captures specificity of placement
2018 -- NOTE: This is *NOT* a bitfield
2019 map-weight INTEGER {
2020 is-uniquely-placed(1),
2021 placed-twice-on-same-chrom(2),
2022 placed-twice-on-diff-chrom(3),
2023 many-placements(10)
2024 } OPTIONAL,
2025
2026 frequency-based-validation INTEGER {
2027 is-mutation (1), -- low frequency variation that is cited in
2028 -- journal or other reputable sources (0x01)
2029 above-5pct-all (2), -- >5% minor allele freq in each and all
2030 -- populations (0x02)
2031 above-5pct-1plus (4), -- >5% minor allele freq in 1+ populations (0x04)
2032 validated (8), -- Bit is set if the variant has a minor allele
2033 -- observed in two or more separate chromosomes
2034 above-1pct-all (16), -- >1% minor allele freq in each and all
2035 -- populations (0x10)
2036 above-1pct-1plus (32) -- >1% minor allele freq in 1+ populations (0x20)
2037 } OPTIONAL,
2038
2039 genotype INTEGER {
2040 in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
2041 has-genotypes (2) -- SNP has individual genotype (0x02)
2042 } OPTIONAL,
2043
2044 -- project IDs are IDs from BioProjects
2045 -- in order to report information about project relationships, we
2046 -- require projects to be registered
2047 -- This field in many ways duplicates dbxrefs; however, the
2048 -- intention of this field is to more adequately reflect
2049 -- ownership and data source
2050 --
2051 -- 11/9/2010: DO NOT USE
2052 -- This field was changed in the spec in a breaking way; using it will
2053 -- break clients. We are officially suppressing / abandoning this field.
2054 -- Clients who need to use this should instead place the data in
2055 -- Seq-feat.dbxref, using the db name 'BioProject'
2056 project-data SET OF INTEGER OPTIONAL,
2057
2058 quality-check INTEGER {
2059 contig-allele-missing (1), -- Reference sequence allele at the mapped
2060 -- position is not present in the SNP
2061 -- allele list, adjusted for orientation
2062 -- (0x01)
2063 withdrawn-by-submitter (2), -- One member SS is withdrawn by submitter
2064 -- (0x02)
2065 non-overlapping-alleles (4), -- RS set has 2+ alleles from different
2066 -- submissions and these sets share no
2067 -- alleles in common (0x04)
2068 strain-specific (8), -- Straing specific fixed difference (0x08)
2069 genotype-conflict (16) -- Has Genotype Conflict (0x10)
2070 } OPTIONAL,
2071
2072 confidence INTEGER {
2073 unknown (0),
2074 likely-artifact (1),
2075 other (255)
2076 } OPTIONAL,
2077
2078 -- has this variant been validated?
2079 -- While a boolean flag offers no subtle distinctions of validation
2080 -- methods, occasionally it is only known as a single boolean value
2081 -- NOTE: this flag is redundant and should be omitted if more comprehensive
2082 -- validation information is present
2083 other-validation BOOLEAN OPTIONAL,
2084
2085 -- origin of this allele, if known
2086 -- note that these are powers-of-two, and represent bits; thus, we can
2087 -- represent more than one state simultaneously through a bitwise OR
2088 allele-origin INTEGER {
2089 unknown (0),
2090 germline (1),
2091 somatic (2),
2092 inherited (4),
2093 paternal (8),
2094 maternal (16),
2095 de-novo (32),
2096 biparental (64),
2097 uniparental (128),
2098 not-tested (256),
2099 tested-inconclusive (512),
2100 not-reported (1024),
2101
2102 -- stopper - 2^31
2103 other (1073741824)
2104 } OPTIONAL,
2105
2106 -- observed allele state, if known
2107 -- NOTE: THIS IS NOT A BITFIELD!
2108 allele-state INTEGER {
2109 unknown (0),
2110 homozygous (1),
2111 heterozygous (2),
2112 hemizygous (3),
2113 nullizygous (4),
2114 other (255)
2115 } OPTIONAL,
2116
2117 -- NOTE:
2118 -- 'allele-frequency' here refers to the minor allele frequency of the
2119 -- default population
2120 allele-frequency REAL OPTIONAL,
2121
2122 -- is this variant the ancestral allele?
2123 is-ancestral-allele BOOLEAN OPTIONAL
2124 }
2125
2126 Phenotype ::= SEQUENCE {
2127 source VisibleString OPTIONAL,
2128 term VisibleString OPTIONAL,
2129 xref SET OF Dbtag OPTIONAL,
2130
2131 -- does this variant have known clinical significance?
2132 clinical-significance INTEGER {
2133 unknown (0),
2134 untested (1),
2135 non-pathogenic (2),
2136 probable-non-pathogenic (3),
2137 probable-pathogenic (4),
2138 pathogenic (5),
2139 drug-response (6),
2140 histocompatibility (7),
2141 other (255)
2142 } OPTIONAL
2143 }
2144
2145 Population-data ::= SEQUENCE {
2146 -- assayed population (e.g. HAPMAP-CEU)
2147 population VisibleString,
2148 genotype-frequency REAL OPTIONAL,
2149 chromosomes-tested INTEGER OPTIONAL,
2150 sample-ids SET OF Object-id OPTIONAL,
2151 allele-frequency REAL OPTIONAL,
2152
2153 -- This field is an explicit bit-field
2154 -- Valid values should be a bitwise combination (= simple sum)
2155 -- of any of the values below
2156 flags INTEGER {
2157 is-default-population (1),
2158 is-minor-allele (2),
2159 is-rare-allele (4)
2160 } OPTIONAL
2161 }
2162
2163 Ext-loc ::= SEQUENCE {
2164 id Object-id,
2165 location Seq-loc
2166 }
2167
2168
2169 Variation-ref ::= SEQUENCE {
2170 -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
2171 -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
2172 --
2173 -- we relate three kinds of IDs here:
2174 -- - our current object's id
2175 -- - the id of this object's parent, if it exists
2176 -- - the sample ID that this item originates from
2177 id Dbtag OPTIONAL,
2178 parent-id Dbtag OPTIONAL,
2179 sample-id Object-id OPTIONAL,
2180 other-ids SET OF Dbtag OPTIONAL,
2181
2182 -- names and synonyms
2183 -- some variants have well-known canonical names and possible accepted
2184 -- synonyms
2185 name VisibleString OPTIONAL,
2186 synonyms SET OF VisibleString OPTIONAL,
2187
2188 -- tag for comment and descriptions
2189 description VisibleString OPTIONAL,
2190
2191 -- phenotype
2192 phenotype SET OF Phenotype OPTIONAL,
2193
2194 -- sequencing / acuisition method
2195 method SET OF INTEGER {
2196 unknown (0),
2197 bac-acgh (1),
2198 computational (2),
2199 curated (3),
2200 digital-array (4),
2201 expression-array (5),
2202 fish (6),
2203 flanking-sequence (7),
2204 maph (8),
2205 mcd-analysis (9),
2206 mlpa (10),
2207 oea-assembly (11),
2208 oligo-acgh (12),
2209 paired-end (13),
2210 pcr (14),
2211 qpcr (15),
2212 read-depth (16),
2213 roma (17),
2214 rt-pcr (18),
2215 sage (19),
2216 sequence-alignment (20),
2217 sequencing (21),
2218 snp-array (22),
2219 snp-genoytyping (23),
2220 southern (24),
2221 western (25),
2222 optical-mapping (26),
2223
2224 other (255)
2225 } OPTIONAL,
2226
2227 -- Note about SNP representation and pretinent fields: allele-frequency,
2228 -- population, quality-codes:
2229 -- The case of multiple alleles for a SNP would be described by
2230 -- parent-feature of type Variation-set.diff-alleles, where the child
2231 -- features of type Variation-inst, all at the same location, would
2232 -- describe individual alleles.
2233
2234 -- population data
2235 -- DEPRECATED - do not use
2236 population-data SET OF Population-data OPTIONAL,
2237
2238 -- variant properties bit fields
2239 variant-prop VariantProperties OPTIONAL,
2240
2241 -- has this variant been validated?
2242 -- DEPRECATED: new field = VariantProperties.other-validation
2243 validated BOOLEAN OPTIONAL,
2244
2245 -- link-outs to GeneTests database
2246 -- DEPRECATED - do not use
2247 clinical-test SET OF Dbtag OPTIONAL,
2248
2249 -- origin of this allele, if known
2250 -- note that these are powers-of-two, and represent bits; thus, we can
2251 -- represent more than one state simultaneously through a bitwise OR
2252 -- DEPRECATED: new field = VariantProperties.allele-origin
2253 allele-origin INTEGER {
2254 unknown (0),
2255 germline (1),
2256 somatic (2),
2257 inherited (4),
2258 paternal (8),
2259 maternal (16),
2260 de-novo (32),
2261 biparental (64),
2262 uniparental (128),
2263 not-tested (256),
2264 tested-inconclusive (512),
2265
2266 -- stopper - 2^31
2267 other (1073741824)
2268 } OPTIONAL,
2269
2270 -- observed allele state, if known
2271 -- DEPRECATED: new field = VariantProperties.allele-state
2272 allele-state INTEGER {
2273 unknown (0),
2274 homozygous (1),
2275 heterozygous (2),
2276 hemizygous (3),
2277 nullizygous (4),
2278 other (255)
2279 } OPTIONAL,
2280
2281 -- NOTE:
2282 -- 'allele-frequency' here refers to the minor allele frequency of the
2283 -- default population
2284 -- DEPRECATED: new field = VariantProperties.allele-frequency
2285 allele-frequency REAL OPTIONAL,
2286
2287 -- is this variant the ancestral allele?
2288 -- DEPRECATED: new field = VariantProperties.is-ancestral-allele
2289 is-ancestral-allele BOOLEAN OPTIONAL,
2290
2291 -- publication support.
2292 -- Note: made this pub instead of pub-equiv, since
2293 -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
2294 -- Pub is more often used as top-level container
2295 -- DEPRECATED - do not use; use Seq-feat.dbxref instead
2296 pub Pub OPTIONAL,
2297
2298 data CHOICE {
2299 unknown NULL,
2300 note VisibleString, --free-form
2301 uniparental-disomy NULL,
2302
2303 -- actual sequence-edit at feat.location
2304 instance Variation-inst,
2305
2306 -- Set of related Variations.
2307 -- Location of the set equals to the union of member locations
2308 set SEQUENCE {
2309 type INTEGER {
2310 unknown (0),
2311 compound (1), -- complex change at the same location on the
2312 -- same molecule
2313 products (2), -- different products arising from the same
2314 -- variation in a precursor, e.g. r.[13g>a,
2315 -- 13_88del]
2316 haplotype (3), -- changes on the same allele, e.g
2317 -- r.[13g>a;15u>c]
2318 genotype (4), -- changes on different alleles in the same
2319 -- genotype, e.g. g.[476C>T]+[476C>T]
2320 mosaic (5), -- different genotypes in the same individual
2321 individual (6), -- same organism; allele relationship unknown,
2322 -- e.g. g.[476C>T(+)183G>C]
2323 population (7), -- population
2324 alleles (8), -- set represents a set of observed alleles
2325 package (9), -- set represents a package of observations at
2326 -- a given location, generally containing
2327 -- asserted + reference
2328 other (255)
2329 },
2330 variations SET OF Variation-ref,
2331 name VisibleString OPTIONAL
2332 },
2333
2334 -- variant is a complex and undescribed change at the location
2335 -- This type of variant is known to occur in dbVar submissions
2336 complex NULL
2337 },
2338
2339 consequence SET OF CHOICE {
2340 unknown NULL,
2341 splicing NULL, --some effect on splicing
2342 note VisibleString, --freeform
2343
2344 -- Describe resulting variation in the product, e.g. missense,
2345 -- nonsense, silent, neutral, etc in a protein, that arises from
2346 -- THIS variation.
2347 variation Variation-ref,
2348
2349 -- see http://www.hgvs.org/mutnomen/recs-prot.html
2350 frameshift SEQUENCE {
2351 phase INTEGER OPTIONAL,
2352 x-length INTEGER OPTIONAL
2353 },
2354
2355 loss-of-heterozygosity SEQUENCE {
2356 -- In germline comparison, it will be reference genome assembly
2357 -- (default) or reference/normal population. In somatic mutation,
2358 -- it will be a name of the normal tissue.
2359 reference VisibleString OPTIONAL,
2360
2361 -- Name of the testing subject type or the testing tissue.
2362 test VisibleString OPTIONAL
2363 }
2364 } OPTIONAL,
2365
2366 -- Observed location, if different from the parent set or feature.location.
2367 -- DEPRECATED - do not use
2368 location Seq-loc OPTIONAL,
2369
2370 -- reference other locs, e.g. mapped source
2371 -- DEPRECATED - do not use
2372 ext-locs SET OF Ext-loc OPTIONAL,
2373
2374 -- DEPRECATED - do not use; use Seq-feat.exts instead
2375 ext User-object OPTIONAL,
2376
2377 somatic-origin SET OF SEQUENCE {
2378 -- description of the somatic origin itself
2379 source SubSource OPTIONAL,
2380 -- condition related to this origin's type
2381 condition SEQUENCE {
2382 description VisibleString OPTIONAL,
2383 -- reference to BioTerm / other descriptive database
2384 object-id SET OF Dbtag OPTIONAL
2385 } OPTIONAL
2386 } OPTIONAL
2387
2388 }
2389
2390
2391 Delta-item ::= SEQUENCE {
2392 seq CHOICE {
2393 literal Seq-literal,
2394 loc Seq-loc,
2395 this NULL --same location as variation-ref itself
2396 } OPTIONAL,
2397
2398 -- Multiplier allows representing a tandem, e.g. ATATAT as AT*3
2399 -- This allows describing CNV/SSR where delta=self with a
2400 -- multiplier which specifies the count of the repeat unit.
2401
2402 multiplier INTEGER OPTIONAL, --assumed 1 if not specified.
2403 multiplier-fuzz Int-fuzz OPTIONAL,
2404
2405 action INTEGER {
2406
2407 -- replace len(seq) positions starting with location.start with seq
2408 morph (0),
2409
2410 -- go downstream by distance specified by multiplier (upstream if < 0),
2411 -- in genomic context.
2412 offset (1),
2413
2414 -- excise sequence at location
2415 -- if multiplier is specified, delete len(location)*multiplier
2416 -- positions downstream
2417 del-at (2),
2418
2419 -- insert seq before the location.start
2420 ins-before (3)
2421
2422 } DEFAULT morph
2423 }
2424
2425
2426 -- Variation instance
2427 Variation-inst ::= SEQUENCE {
2428 type INTEGER {
2429 unknown (0), -- delta=[]
2430 identity (1), -- delta=[]
2431 inv (2), -- delta=[del, ins.seq=
2432 -- RevComp(variation-location)]
2433 snv (3), -- delta=[morph of length 1]
2434 -- NOTE: this is snV not snP; the latter
2435 -- requires frequency-based validation to be
2436 -- established in VariantProperties
2437 -- the strict definition of SNP is an SNV with
2438 -- an established population frequency of at
2439 -- least 1% in at least 1 popuplation
2440 mnp (4), -- delta=[morph of length >1]
2441 delins (5), -- delta=[del, ins]
2442 del (6), -- delta=[del]
2443 ins (7), -- delta=[ins]
2444 microsatellite (8), -- delta=[del, ins.seq= repeat-unit with fuzzy
2445 -- multiplier]
2446 -- variation-location is the microsat expansion
2447 -- on the sequence
2448 transposon (9), -- delta=[del, ins.seq= known donor or 'this']
2449 -- variation-location is equiv of transposon
2450 -- locs.
2451 cnv (10), -- delta=[del, ins= 'this' with fuzzy
2452 -- multiplier]
2453 direct-copy (11), -- delta=[ins.seq= upstream location on the
2454 -- same strand]
2455 rev-direct-copy (12), -- delta=[ins.seq= downstream location on the
2456 -- same strand]
2457 inverted-copy (13), -- delta=[ins.seq= upstream location on the
2458 -- opposite strand]
2459 everted-copy (14), -- delta=[ins.seq= downstream location on the
2460 -- opposite strand]
2461 translocation (15), -- delta=like delins
2462 prot-missense (16), -- delta=[morph of length 1]
2463 prot-nonsense (17), -- delta=[del]; variation-location is the tail
2464 -- of the protein being truncated
2465 prot-neutral (18), -- delta=[morph of length 1]
2466 prot-silent (19), -- delta=[morph of length 1, same AA as at
2467 -- variation-location]
2468 prot-other (20), -- delta=any
2469
2470 other (255) -- delta=any
2471 },
2472
2473 -- Sequence that replaces the location, in biological order.
2474 delta SEQUENCE OF Delta-item,
2475
2476 -- 'observation' is used to label items in a Variation-ref package
2477 -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
2478 -- of the values may be observed.
2479 observation INTEGER {
2480 asserted (1), -- inst represents the asserted base at a
2481 -- position
2482 reference (2), -- inst represents the reference base at the
2483 -- position
2484 variant (4) -- inst represent the observed variant at a
2485 -- given position
2486 } OPTIONAL
2487 }
2488
2489 END
2490
2491
2492 --**********************************************************************
2493 --
2494 -- NCBI Restriction Sites
2495 -- by James Ostell, 1990
2496 -- version 0.8
2497 --
2498 --**********************************************************************
2499
2500 NCBI-Rsite DEFINITIONS ::=
2501 BEGIN
2502
2503 EXPORTS Rsite-ref;
2504
2505 IMPORTS Dbtag FROM NCBI-General;
2506
2507 Rsite-ref ::= CHOICE {
2508 str VisibleString , -- may be unparsable
2509 db Dbtag } -- pointer to a restriction site database
2510
2511 END
2512
2513 --**********************************************************************
2514 --
2515 -- NCBI RNAs
2516 -- by James Ostell, 1990
2517 -- version 0.8
2518 --
2519 --**********************************************************************
2520
2521 NCBI-RNA DEFINITIONS ::=
2522 BEGIN
2523
2524 EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
2525
2526 IMPORTS Seq-loc FROM NCBI-Seqloc;
2527
2528 --*** rnas ***********************************************
2529 --*
2530 --* various rnas
2531 --*
2532 -- minimal RNA sequence
2533 RNA-ref ::= SEQUENCE {
2534 type ENUMERATED { -- type of RNA feature
2535 unknown (0) ,
2536 premsg (1) ,
2537 mRNA (2) ,
2538 tRNA (3) ,
2539 rRNA (4) ,
2540 snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA
2541 scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA
2542 snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA
2543 ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
2544 tmRNA (9) ,
2545 miscRNA (10) ,
2546 other (255) } ,
2547 pseudo BOOLEAN OPTIONAL ,
2548 ext CHOICE {
2549 name VisibleString , -- for naming "other" type
2550 tRNA Trna-ext , -- for tRNAs
2551 gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA
2552 }
2553
2554 Trna-ext ::= SEQUENCE { -- tRNA feature extensions
2555 aa CHOICE { -- aa this carries
2556 iupacaa INTEGER ,
2557 ncbieaa INTEGER ,
2558 ncbi8aa INTEGER ,
2559 ncbistdaa INTEGER } OPTIONAL ,
2560 codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code
2561 anticodon Seq-loc OPTIONAL } -- location of anticodon
2562
2563 RNA-gen ::= SEQUENCE {
2564 class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA:
2565 -- examples: antisense_RNA, guide_RNA, snRNA
2566 product VisibleString OPTIONAL ,
2567 quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs
2568 }
2569
2570 RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen,
2571 qual VisibleString , -- in a tag (qual), value (val) format
2572 val VisibleString }
2573
2574 RNA-qual-set ::= SEQUENCE OF RNA-qual
2575
2576 END
2577
2578 --**********************************************************************
2579 --
2580 -- NCBI Genes
2581 -- by James Ostell, 1990
2582 -- version 0.8
2583 --
2584 --**********************************************************************
2585
2586 NCBI-Gene DEFINITIONS ::=
2587 BEGIN
2588
2589 EXPORTS Gene-ref, Gene-nomenclature;
2590
2591 IMPORTS Dbtag FROM NCBI-General;
2592
2593 --*** Gene ***********************************************
2594 --*
2595 --* reference to a gene
2596 --*
2597
2598 Gene-ref ::= SEQUENCE {
2599 locus VisibleString OPTIONAL , -- Official gene symbol
2600 allele VisibleString OPTIONAL , -- Official allele designation
2601 desc VisibleString OPTIONAL , -- descriptive name
2602 maploc VisibleString OPTIONAL , -- descriptive map location
2603 pseudo BOOLEAN DEFAULT FALSE , -- pseudogene
2604 db SET OF Dbtag OPTIONAL , -- ids in other dbases
2605 syn SET OF VisibleString OPTIONAL , -- synonyms for locus
2606 locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069)
2607 formal-name Gene-nomenclature OPTIONAL
2608 }
2609
2610 Gene-nomenclature ::= SEQUENCE {
2611 status ENUMERATED {
2612 unknown (0) ,
2613 official (1) ,
2614 interim (2)
2615 } ,
2616 symbol VisibleString OPTIONAL ,
2617 name VisibleString OPTIONAL ,
2618 source Dbtag OPTIONAL
2619 }
2620
2621 END
2622
2623
2624 --**********************************************************************
2625 --
2626 -- NCBI Organism
2627 -- by James Ostell, 1994
2628 -- version 3.0
2629 --
2630 --**********************************************************************
2631
2632 NCBI-Organism DEFINITIONS ::=
2633 BEGIN
2634
2635 EXPORTS Org-ref;
2636
2637 IMPORTS Dbtag FROM NCBI-General;
2638
2639 --*** Org-ref ***********************************************
2640 --*
2641 --* Reference to an organism
2642 --* defines only the organism.. lower levels of detail for biological
2643 --* molecules are provided by the Source object
2644 --*
2645
2646 Org-ref ::= SEQUENCE {
2647 taxname VisibleString OPTIONAL , -- preferred formal name
2648 common VisibleString OPTIONAL , -- common name
2649 mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
2650 db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases
2651 syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common
2652 orgname OrgName OPTIONAL }
2653
2654
2655 OrgName ::= SEQUENCE {
2656 name CHOICE {
2657 binomial BinomialOrgName , -- genus/species type name
2658 virus VisibleString , -- virus names are different
2659 hybrid MultiOrgName , -- hybrid between organisms
2660 namedhybrid BinomialOrgName , -- some hybrids have genus x species name
2661 partial PartialOrgName } OPTIONAL , -- when genus not known
2662 attrib VisibleString OPTIONAL , -- attribution of name
2663 mod SEQUENCE OF OrgMod OPTIONAL ,
2664 lineage VisibleString OPTIONAL , -- lineage with semicolon separators
2665 gcode INTEGER OPTIONAL , -- genetic code (see CdRegion)
2666 mgcode INTEGER OPTIONAL , -- mitochondrial genetic code
2667 div VisibleString OPTIONAL , -- GenBank division code
2668 pgcode INTEGER OPTIONAL } -- plastid genetic code
2669
2670
2671 OrgMod ::= SEQUENCE {
2672 subtype INTEGER {
2673 strain (2) ,
2674 substrain (3) ,
2675 type (4) ,
2676 subtype (5) ,
2677 variety (6) ,
2678 serotype (7) ,
2679 serogroup (8) ,
2680 serovar (9) ,
2681 cultivar (10) ,
2682 pathovar (11) ,
2683 chemovar (12) ,
2684 biovar (13) ,
2685 biotype (14) ,
2686 group (15) ,
2687 subgroup (16) ,
2688 isolate (17) ,
2689 common (18) ,
2690 acronym (19) ,
2691 dosage (20) , -- chromosome dosage of hybrid
2692 nat-host (21) , -- natural host of this specimen
2693 sub-species (22) ,
2694 specimen-voucher (23) ,
2695 authority (24) ,
2696 forma (25) ,
2697 forma-specialis (26) ,
2698 ecotype (27) ,
2699 synonym (28) ,
2700 anamorph (29) ,
2701 teleomorph (30) ,
2702 breed (31) ,
2703 gb-acronym (32) , -- used by taxonomy database
2704 gb-anamorph (33) , -- used by taxonomy database
2705 gb-synonym (34) , -- used by taxonomy database
2706 culture-collection (35) ,
2707 bio-material (36) ,
2708 metagenome-source (37) ,
2709 old-lineage (253) ,
2710 old-name (254) ,
2711 other (255) } , -- ASN5: old-name (254) will be added to next spec
2712 subname VisibleString ,
2713 attrib VisibleString OPTIONAL } -- attribution/source of name
2714
2715 BinomialOrgName ::= SEQUENCE {
2716 genus VisibleString , -- required
2717 species VisibleString OPTIONAL , -- species required if subspecies used
2718 subspecies VisibleString OPTIONAL }
2719
2720 MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division
2721
2722 PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus
2723
2724 TaxElement ::= SEQUENCE {
2725 fixed-level INTEGER {
2726 other (0) , -- level must be set in string
2727 family (1) ,
2728 order (2) ,
2729 class (3) } ,
2730 level VisibleString OPTIONAL ,
2731 name VisibleString }
2732
2733 END
2734
2735
2736 --**********************************************************************
2737 --
2738 -- NCBI BioSource
2739 -- by James Ostell, 1994
2740 -- version 3.0
2741 --
2742 --**********************************************************************
2743
2744 NCBI-BioSource DEFINITIONS ::=
2745 BEGIN
2746
2747 EXPORTS BioSource, SubSource;
2748
2749 IMPORTS Org-ref FROM NCBI-Organism;
2750
2751 --********************************************************************
2752 --
2753 -- BioSource gives the source of the biological material
2754 -- for sequences
2755 --
2756 --********************************************************************
2757
2758 BioSource ::= SEQUENCE {
2759 genome INTEGER { -- biological context
2760 unknown (0) ,
2761 genomic (1) ,
2762 chloroplast (2) ,
2763 chromoplast (3) ,
2764 kinetoplast (4) ,
2765 mitochondrion (5) ,
2766 plastid (6) ,
2767 macronuclear (7) ,
2768 extrachrom (8) ,
2769 plasmid (9) ,
2770 transposon (10) ,
2771 insertion-seq (11) ,
2772 cyanelle (12) ,
2773 proviral (13) ,
2774 virion (14) ,
2775 nucleomorph (15) ,
2776 apicoplast (16) ,
2777 leucoplast (17) ,
2778 proplastid (18) ,
2779 endogenous-virus (19) ,
2780 hydrogenosome (20) ,
2781 chromosome (21) ,
2782 chromatophore (22)
2783 } DEFAULT unknown ,
2784 origin INTEGER {
2785 unknown (0) ,
2786 natural (1) , -- normal biological entity
2787 natmut (2) , -- naturally occurring mutant
2788 mut (3) , -- artificially mutagenized
2789 artificial (4) , -- artificially engineered
2790 synthetic (5) , -- purely synthetic
2791 other (255)
2792 } DEFAULT unknown ,
2793 org Org-ref ,
2794 subtype SEQUENCE OF SubSource OPTIONAL ,
2795 is-focus NULL OPTIONAL , -- to distinguish biological focus
2796 pcr-primers PCRReactionSet OPTIONAL }
2797
2798 PCRReactionSet ::= SET OF PCRReaction
2799
2800 PCRReaction ::= SEQUENCE {
2801 forward PCRPrimerSet OPTIONAL ,
2802 reverse PCRPrimerSet OPTIONAL }
2803
2804 PCRPrimerSet ::= SET OF PCRPrimer
2805
2806 PCRPrimer ::= SEQUENCE {
2807 seq PCRPrimerSeq OPTIONAL ,
2808 name PCRPrimerName OPTIONAL }
2809
2810 PCRPrimerSeq ::= VisibleString
2811
2812 PCRPrimerName ::= VisibleString
2813
2814 SubSource ::= SEQUENCE {
2815 subtype INTEGER {
2816 chromosome (1) ,
2817 map (2) ,
2818 clone (3) ,
2819 subclone (4) ,
2820 haplotype (5) ,
2821 genotype (6) ,
2822 sex (7) ,
2823 cell-line (8) ,
2824 cell-type (9) ,
2825 tissue-type (10) ,
2826 clone-lib (11) ,
2827 dev-stage (12) ,
2828 frequency (13) ,
2829 germline (14) ,
2830 rearranged (15) ,
2831 lab-host (16) ,
2832 pop-variant (17) ,
2833 tissue-lib (18) ,
2834 plasmid-name (19) ,
2835 transposon-name (20) ,
2836 insertion-seq-name (21) ,
2837 plastid-name (22) ,
2838 country (23) ,
2839 segment (24) ,
2840 endogenous-virus-name (25) ,
2841 transgenic (26) ,
2842 environmental-sample (27) ,
2843 isolation-source (28) ,
2844 lat-lon (29) , -- +/- decimal degrees
2845 collection-date (30) , -- DD-MMM-YYYY format
2846 collected-by (31) , -- name of person who collected the sample
2847 identified-by (32) , -- name of person who identified the sample
2848 fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated)
2849 rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated)
2850 fwd-primer-name (35) ,
2851 rev-primer-name (36) ,
2852 metagenomic (37) ,
2853 mating-type (38) ,
2854 linkage-group (39) ,
2855 haplogroup (40) ,
2856 whole-replicon (41) ,
2857 phenotype (42) ,
2858 altitude (43) ,
2859 other (255) } ,
2860 name VisibleString ,
2861 attrib VisibleString OPTIONAL } -- attribution/source of this name
2862
2863 END
2864
2865 --**********************************************************************
2866 --
2867 -- NCBI Protein
2868 -- by James Ostell, 1990
2869 -- version 0.8
2870 --
2871 --**********************************************************************
2872
2873 NCBI-Protein DEFINITIONS ::=
2874 BEGIN
2875
2876 EXPORTS Prot-ref;
2877
2878 IMPORTS Dbtag FROM NCBI-General;
2879
2880 --*** Prot-ref ***********************************************
2881 --*
2882 --* Reference to a protein name
2883 --*
2884
2885 Prot-ref ::= SEQUENCE {
2886 name SET OF VisibleString OPTIONAL , -- protein name
2887 desc VisibleString OPTIONAL , -- description (instead of name)
2888 ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
2889 activity SET OF VisibleString OPTIONAL , -- activities
2890 db SET OF Dbtag OPTIONAL , -- ids in other dbases
2891 processed ENUMERATED { -- processing status
2892 not-set (0) ,
2893 preprotein (1) ,
2894 mature (2) ,
2895 signal-peptide (3) ,
2896 transit-peptide (4) } DEFAULT not-set }
2897
2898 END
2899 --********************************************************************
2900 --
2901 -- Transcription Initiation Site Feature Data Block
2902 -- James Ostell, 1991
2903 -- Philip Bucher, David Ghosh
2904 -- version 1.1
2905 --
2906 --
2907 --
2908 --********************************************************************
2909
2910 NCBI-TxInit DEFINITIONS ::=
2911 BEGIN
2912
2913 EXPORTS Txinit;
2914
2915 IMPORTS Gene-ref FROM NCBI-Gene
2916 Prot-ref FROM NCBI-Protein
2917 Org-ref FROM NCBI-Organism;
2918
2919 Txinit ::= SEQUENCE {
2920 name VisibleString , -- descriptive name of initiation site
2921 syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms
2922 gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed
2923 protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced
2924 rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced
2925 expression VisibleString OPTIONAL , -- tissue/time of expression
2926 txsystem ENUMERATED { -- transcription apparatus used at this site
2927 unknown (0) ,
2928 pol1 (1) , -- eukaryotic Pol I
2929 pol2 (2) , -- eukaryotic Pol II
2930 pol3 (3) , -- eukaryotic Pol III
2931 bacterial (4) ,
2932 viral (5) ,
2933 rna (6) , -- RNA replicase
2934 organelle (7) ,
2935 other (255) } ,
2936 txdescr VisibleString OPTIONAL , -- modifiers on txsystem
2937 txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus
2938 mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx
2939 location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
2940 inittype ENUMERATED {
2941 unknown (0) ,
2942 single (1) ,
2943 multiple (2) ,
2944 region (3) } OPTIONAL ,
2945 evidence SET OF Tx-evidence OPTIONAL }
2946
2947 Tx-evidence ::= SEQUENCE {
2948 exp-code ENUMERATED {
2949 unknown (0) ,
2950 rna-seq (1) , -- direct RNA sequencing
2951 rna-size (2) , -- RNA length measurement
2952 np-map (3) , -- nuclease protection mapping with homologous sequence ladder
2953 np-size (4) , -- nuclease protected fragment length measurement
2954 pe-seq (5) , -- dideoxy RNA sequencing
2955 cDNA-seq (6) , -- full-length cDNA sequencing
2956 pe-map (7) , -- primer extension mapping with homologous sequence ladder
2957 pe-size (8) , -- primer extension product length measurement
2958 pseudo-seq (9) , -- full-length processed pseudogene sequencing
2959 rev-pe-map (10) , -- see NOTE (1) below
2960 other (255) } ,
2961 expression-system ENUMERATED {
2962 unknown (0) ,
2963 physiological (1) ,
2964 in-vitro (2) ,
2965 oocyte (3) ,
2966 transfection (4) ,
2967 transgenic (5) ,
2968 other (255) } DEFAULT physiological ,
2969 low-prec-data BOOLEAN DEFAULT FALSE ,
2970 from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on
2971 -- close homolog
2972
2973 -- NOTE (1) length measurement of a reverse direction primer-extension
2974 -- product (blocked by RNA 5'end) by comparison with
2975 -- homologous sequence ladder (J. Mol. Biol. 199, 587)
2976
2977 END
2978
2979 --$Revision: 1.8 $
2980 -- ----------------------------------------------------------------------------
2981 --
2982 -- PUBLIC DOMAIN NOTICE
2983 -- National Center for Biotechnology Information
2984 --
2985 -- This software/database is a "United States Government Work" under the terms
2986 -- of the United States Copyright Act. It was written as part of the author's
2987 -- official duties as a United States Government employee and thus cannot be
2988 -- copyrighted. This software/database is freely available to the public for
2989 -- use. The National Library of Medicine and the U.S. Government have not
2990 -- placed any restriction on its use or reproduction.
2991 --
2992 -- Although all reasonable efforts have been taken to ensure the accuracy and
2993 -- reliability of the software and data, the NLM and the U.S. Government do not
2994 -- and cannot warrant the performance or results that may be obtained by using
2995 -- this software or data. The NLM and the U.S. Government disclaim all
2996 -- warranties, express or implied, including warranties of performance,
2997 -- merchantability or fitness for any particular purpose.
2998 --
2999 -- Please cite the authors in any work or product based on this material.
3000 --
3001 -- ----------------------------------------------------------------------------
3002 --
3003 -- Authors: Mike DiCuccio, Eugene Vasilchenko
3004 --
3005 -- ASN.1 interface to table readers
3006 --
3007 -- ----------------------------------------------------------------------------
3008
3009 NCBI-SeqTable DEFINITIONS ::=
3010
3011 BEGIN
3012
3013 EXPORTS
3014 SeqTable-column-info, SeqTable-column, Seq-table;
3015
3016 IMPORTS
3017 Seq-id, Seq-loc, Seq-interval FROM NCBI-Seqloc;
3018
3019
3020 SeqTable-column-info ::= SEQUENCE {
3021 -- user friendly column name, can be skipped
3022 title VisibleString OPTIONAL,
3023
3024 -- identification of the column data in the objects described by the table
3025 field-id INTEGER { -- known column data types
3026 -- position types
3027 location (0), -- location as Seq-loc
3028 location-id (1), -- location Seq-id
3029 location-gi (2), -- gi
3030 location-from (3), -- interval from
3031 location-to (4), -- interval to
3032 location-strand (5), -- location strand
3033 location-fuzz-from-lim (6),
3034 location-fuzz-to-lim (7),
3035
3036 product (10), -- product as Seq-loc
3037 product-id (11), -- product Seq-id
3038 product-gi (12), -- product gi
3039 product-from (13), -- product interval from
3040 product-to (14), -- product interval to
3041 product-strand (15), -- product strand
3042 product-fuzz-from-lim (16),
3043 product-fuzz-to-lim (17),
3044
3045 -- main feature fields
3046 id-local (20), -- id.local.id
3047 xref-id-local (21), -- xref.id.local.id
3048 partial (22),
3049 comment (23),
3050 title (24),
3051 ext (25), -- field-name must be "E.xxx", see below
3052 qual (26), -- field-name must be "Q.xxx", see below
3053 dbxref (27), -- field-name must be "D.xxx", see below
3054
3055 -- various data fields
3056 data-imp-key (30),
3057 data-region (31),
3058 data-cdregion-frame (32),
3059
3060 -- extra fields, see also special values for str below
3061 ext-type (40),
3062 qual-qual (41),
3063 qual-val (42),
3064 dbxref-db (43),
3065 dbxref-tag (44)
3066 } OPTIONAL,
3067
3068 -- any column can be identified by ASN.1 text locator string
3069 -- with omitted object type.
3070 -- examples:
3071 -- "data.gene.locus" for Seq-feat.data.gene.locus
3072 -- "data.imp.key" for Seq-feat.data.imp.key
3073 -- "qual.qual"
3074 -- - Seq-feat.qual is SEQUENCE so several columns are allowed
3075 -- see also "Q.xxx" special value for shorter qual representation
3076 -- "ext.type.str"
3077 -- "ext.data.label.str"
3078 -- "ext.data.data.int"
3079 -- see also "E.xxx" special value for shorter ext representation
3080 -- special values start with capital letter:
3081 -- "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
3082 -- - Seq-feat.ext.data is SEQUENCE so several columns are allowed
3083 -- "Q.xxx" - qual.qual = xxx, qual.val = data
3084 -- - Seq-feat.qual is SEQUENCE so several columns are allowed
3085 -- "D.xxx" - dbxref.id = xxx, dbxref.tag = data
3086 -- - Seq-feat.dbxref is SET so several columns are allowed
3087 field-name VisibleString OPTIONAL
3088 }
3089
3090
3091 CommonString-table ::= SEQUENCE {
3092 -- set of possible values
3093 strings SEQUENCE OF UTF8String,
3094
3095 -- indexes of values
3096 indexes SEQUENCE OF INTEGER
3097 }
3098
3099
3100 CommonBytes-table ::= SEQUENCE {
3101 -- set of possible values
3102 bytes SEQUENCE OF OCTET STRING,
3103
3104 -- indexes of values
3105 indexes SEQUENCE OF INTEGER
3106 }
3107
3108
3109 Scaled-int-multi-data ::= SEQUENCE {
3110 -- output data[i] = data[i]*mul+add
3111 mul INTEGER,
3112 add INTEGER,
3113 data SeqTable-multi-data
3114 }
3115
3116
3117 Scaled-real-multi-data ::= SEQUENCE {
3118 -- output data[i] = data[i]*mul+add
3119 mul REAL,
3120 add REAL,
3121 data SeqTable-multi-data
3122 }
3123
3124
3125 SeqTable-multi-data ::= CHOICE {
3126 -- a set of integers, one per row
3127 int SEQUENCE OF INTEGER,
3128
3129 -- a set of reals, one per row
3130 real SEQUENCE OF REAL,
3131
3132 -- a set of strings, one per row
3133 string SEQUENCE OF UTF8String,
3134
3135 -- a set of byte arrays, one per row
3136 bytes SEQUENCE OF OCTET STRING,
3137
3138 -- a set of string with small set of possible values
3139 common-string CommonString-table,
3140
3141 -- a set of byte arrays with small set of possible values
3142 common-bytes CommonBytes-table,
3143
3144 -- a set of bits, one per row
3145 -- this uses bm::bvector<> as its storage mechanism
3146 bit OCTET STRING,
3147
3148 -- a set of locations, one per row
3149 loc SEQUENCE OF Seq-loc,
3150 id SEQUENCE OF Seq-id,
3151 interval SEQUENCE OF Seq-interval,
3152
3153 -- delta-encoded data (int/bit -> int)
3154 int-delta SeqTable-multi-data,
3155
3156 -- scaled data (int/bit -> int)
3157 int-scaled Scaled-int-multi-data,
3158
3159 -- scaled data (int/bit -> real)
3160 real-scaled Scaled-real-multi-data,
3161
3162 -- a set of bit, represented as serialized bvector,
3163 -- see include/util/bitset/bm.h
3164 bit-bvector OCTET STRING
3165 }
3166
3167
3168 SeqTable-single-data ::= CHOICE {
3169 -- integer
3170 int INTEGER,
3171
3172 -- real
3173 real REAL,
3174
3175 -- string
3176 string UTF8String,
3177
3178 -- byte array
3179 bytes OCTET STRING,
3180
3181 -- bit
3182 bit BOOLEAN,
3183
3184 -- location
3185 loc Seq-loc,
3186 id Seq-id,
3187 interval Seq-interval
3188 }
3189
3190
3191 SeqTable-sparse-index ::= CHOICE {
3192 -- Indexes of rows with values
3193 indexes SEQUENCE OF INTEGER,
3194
3195 -- Bitset of rows with values, set bit means the row has value.
3196 -- Most-significant bit in an octet comes first.
3197 bit-set OCTET STRING,
3198
3199 -- Indexes of rows with values, delta-encoded
3200 indexes-delta SEQUENCE OF INTEGER,
3201
3202 -- Bitset of rows with values, as serialized bvector<>,
3203 -- see include/util/bitset/bm.h
3204 bit-set-bvector OCTET STRING
3205 }
3206
3207
3208 SeqTable-column ::= SEQUENCE {
3209 -- column description or reference to previously defined info
3210 header SeqTable-column-info, -- information about data
3211
3212 -- row data
3213 data SeqTable-multi-data OPTIONAL,
3214
3215 -- in case not all rows contain data this field will contain sparse info
3216 sparse SeqTable-sparse-index OPTIONAL,
3217
3218 -- default value for sparse table, or if row data is too short
3219 default SeqTable-single-data OPTIONAL,
3220
3221 -- single value for indexes not listed in sparse table
3222 sparse-other SeqTable-single-data OPTIONAL
3223 }
3224
3225
3226 Seq-table ::= SEQUENCE {
3227 -- type of features in this table, equal to Seq-feat.data variant index
3228 feat-type INTEGER,
3229
3230 -- subtype of features in this table, defined in header SeqFeatData.hpp
3231 feat-subtype INTEGER OPTIONAL,
3232
3233 -- number of rows
3234 num-rows INTEGER,
3235
3236 -- data in columns
3237 columns SEQUENCE OF SeqTable-column
3238 }
3239
3240
3241 END
3242 --$Revision: 6.4 $
3243 --**********************************************************************
3244 --
3245 -- NCBI Sequence Alignment elements
3246 -- by James Ostell, 1990
3247 --
3248 --**********************************************************************
3249
3250 NCBI-Seqalign DEFINITIONS ::=
3251 BEGIN
3252
3253 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
3254
3255 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
3256 User-object, Object-id FROM NCBI-General;
3257
3258 --*** Sequence Alignment ********************************
3259 --*
3260
3261 Seq-align-set ::= SET OF Seq-align
3262
3263 Seq-align ::= SEQUENCE {
3264 type ENUMERATED {
3265 not-set (0) ,
3266 global (1) ,
3267 diags (2) , -- unbroken, but not ordered, diagonals
3268 partial (3) , -- mapping pieces together
3269 disc (4) , -- discontinuous alignment
3270 other (255) } ,
3271 dim INTEGER OPTIONAL , -- dimensionality
3272 score SET OF Score OPTIONAL , -- for whole alignment
3273 segs CHOICE { -- alignment data
3274 dendiag SEQUENCE OF Dense-diag ,
3275 denseg Dense-seg ,
3276 std SEQUENCE OF Std-seg ,
3277 packed Packed-seg ,
3278 disc Seq-align-set,
3279 spliced Spliced-seg,
3280 sparse Sparse-seg
3281 } ,
3282
3283 -- regions of sequence over which align
3284 -- was computed
3285 bounds SET OF Seq-loc OPTIONAL,
3286
3287 -- alignment id
3288 id SEQUENCE OF Object-id OPTIONAL,
3289
3290 --extra info
3291 ext SEQUENCE OF User-object OPTIONAL
3292 }
3293
3294 Dense-diag ::= SEQUENCE { -- for (multiway) diagonals
3295 dim INTEGER DEFAULT 2 , -- dimensionality
3296 ids SEQUENCE OF Seq-id , -- sequences in order
3297 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order
3298 len INTEGER , -- len of aligned segments
3299 strands SEQUENCE OF Na-strand OPTIONAL ,
3300 scores SET OF Score OPTIONAL }
3301
3302 -- Dense-seg: the densist packing for sequence alignments only.
3303 -- a start of -1 indicates a gap for that sequence of
3304 -- length lens.
3305 --
3306 -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA
3307 -- id=200 AAGGCCTTTTAG.......GATGATGATGA
3308 -- id=300 ....CCTTTTAGAGATGATGAT....ATGA
3309 --
3310 -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
3311 -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
3312 -- lens = { 4, 8, 7, 3, 4, 4 }
3313 --
3314
3315 Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
3316 dim INTEGER DEFAULT 2 , -- dimensionality
3317 numseg INTEGER , -- number of segments here
3318 ids SEQUENCE OF Seq-id , -- sequences in order
3319 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs
3320 lens SEQUENCE OF INTEGER , -- lengths in ids order within segs
3321 strands SEQUENCE OF Na-strand OPTIONAL ,
3322 scores SEQUENCE OF Score OPTIONAL } -- score for each seg
3323
3324 Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
3325 dim INTEGER DEFAULT 2 , -- dimensionality
3326 numseg INTEGER , -- number of segments here
3327 ids SEQUENCE OF Seq-id , -- sequences in order
3328 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment
3329 present OCTET STRING , -- Boolean if each sequence present or absent in
3330 -- each segment
3331 lens SEQUENCE OF INTEGER , -- length of each segment
3332 strands SEQUENCE OF Na-strand OPTIONAL ,
3333 scores SEQUENCE OF Score OPTIONAL } -- score for each segment
3334
3335 Std-seg ::= SEQUENCE {
3336 dim INTEGER DEFAULT 2 , -- dimensionality
3337 ids SEQUENCE OF Seq-id OPTIONAL ,
3338 loc SEQUENCE OF Seq-loc ,
3339 scores SET OF Score OPTIONAL }
3340
3341
3342 Spliced-seg ::= SEQUENCE {
3343 -- product is either protein or transcript (cDNA)
3344 product-id Seq-id OPTIONAL,
3345 genomic-id Seq-id OPTIONAL,
3346
3347 -- should be 'plus' or 'minus'
3348 product-strand Na-strand OPTIONAL ,
3349 genomic-strand Na-strand OPTIONAL ,
3350
3351 product-type ENUMERATED {
3352 transcript(0),
3353 protein(1)
3354 },
3355
3356 -- set of segments involved
3357 -- each segment corresponds to one exon
3358 -- exons are always in biological order
3359 exons SEQUENCE OF Spliced-exon ,
3360
3361 -- optional poly(A) tail
3362 poly-a INTEGER OPTIONAL,
3363
3364 -- length of the product, in bases/residues
3365 -- from this, a 3' unaligned length can be extracted; this also captures
3366 -- the case in which a protein aligns leaving a partial codon alignment
3367 -- at the 3' end
3368 product-length INTEGER OPTIONAL,
3369
3370 -- alignment descriptors / modifiers
3371 -- this provides us a set for extension
3372 modifiers SET OF Spliced-seg-modifier OPTIONAL
3373 }
3374
3375 Spliced-seg-modifier ::= CHOICE {
3376 -- protein aligns from the start and the first codon
3377 -- on both product and genomic is start codon
3378 start-codon-found BOOLEAN,
3379
3380 -- protein aligns to it's end and there is stop codon
3381 -- on the genomic right after the alignment
3382 stop-codon-found BOOLEAN
3383 }
3384
3385
3386 -- complete or partial exon
3387 -- two consecutive Spliced-exons may belong to one exon
3388 Spliced-exon ::= SEQUENCE {
3389 -- product-end >= product-start
3390 product-start Product-pos ,
3391 product-end Product-pos ,
3392
3393 -- genomic-end >= genomic-start
3394 genomic-start INTEGER ,
3395 genomic-end INTEGER ,
3396
3397 -- product is either protein or transcript (cDNA)
3398 product-id Seq-id OPTIONAL ,
3399 genomic-id Seq-id OPTIONAL ,
3400
3401 -- should be 'plus' or 'minus'
3402 product-strand Na-strand OPTIONAL ,
3403
3404 -- genomic-strand represents the strand of translation
3405 genomic-strand Na-strand OPTIONAL ,
3406
3407 -- basic seqments always are in biologic order
3408 parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
3409
3410 -- scores for this exon
3411 scores Score-set OPTIONAL ,
3412
3413 -- splice sites
3414 acceptor-before-exon Splice-site OPTIONAL,
3415 donor-after-exon Splice-site OPTIONAL,
3416
3417 -- flag: is this exon complete or partial?
3418 partial BOOLEAN OPTIONAL,
3419
3420 --extra info
3421 ext SEQUENCE OF User-object OPTIONAL
3422 }
3423
3424
3425 Product-pos ::= CHOICE {
3426 nucpos INTEGER,
3427 protpos Prot-pos
3428 }
3429
3430
3431 -- codon based position on protein (1/3 of aminoacid)
3432 Prot-pos ::= SEQUENCE {
3433 -- standard protein position
3434 amin INTEGER ,
3435
3436 -- 0, 1, 2, or 3 as for Cdregion
3437 -- 0 = not set
3438 -- 1, 2, 3 = actual frame
3439 frame INTEGER DEFAULT 0
3440 }
3441
3442
3443 -- Spliced-exon-chunk: piece of an exon
3444 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
3445 -- protein)
3446 Spliced-exon-chunk ::= CHOICE {
3447 -- both sequences represented, product and genomic sequences match
3448 match INTEGER ,
3449
3450 -- both sequences represented, product and genomic sequences do not match
3451 mismatch INTEGER ,
3452
3453 -- both sequences are represented, there is sufficient similarity
3454 -- between product and genomic sequences. Can be used to replace stretches
3455 -- of matches and mismatches, mostly for protein to genomic where
3456 -- definition of match or mismatch depends on translation table
3457 diag INTEGER ,
3458
3459 -- insertion in product sequence (i.e. gap in the genomic sequence)
3460 product-ins INTEGER ,
3461
3462 -- insertion in genomic sequence (i.e. gap in the product sequence)
3463 genomic-ins INTEGER
3464 }
3465
3466
3467 -- site involved in splice
3468 Splice-site ::= SEQUENCE {
3469 -- typically two bases in the intronic region, always
3470 -- in IUPAC format
3471 bases VisibleString
3472 }
3473
3474
3475 -- ==========================================================================
3476 --
3477 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
3478 -- representing sparse multiple alignments
3479 --
3480 -- ==========================================================================
3481
3482
3483 Sparse-seg ::= SEQUENCE {
3484 master-id Seq-id OPTIONAL,
3485
3486 -- pairwise alignments constituting this multiple alignment
3487 rows SET OF Sparse-align,
3488
3489 -- per-row scores
3490 row-scores SET OF Score OPTIONAL,
3491
3492 -- index of extra items
3493 ext SET OF Sparse-seg-ext OPTIONAL
3494 }
3495
3496 Sparse-align ::= SEQUENCE {
3497 first-id Seq-id,
3498 second-id Seq-id,
3499
3500 numseg INTEGER, --number of segments
3501 first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg]
3502 second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg]
3503 lens SEQUENCE OF INTEGER , --lengths of segments [numseg]
3504 second-strands SEQUENCE OF Na-strand OPTIONAL ,
3505
3506 -- per-segment scores
3507 seg-scores SET OF Score OPTIONAL
3508 }
3509
3510 Sparse-seg-ext ::= SEQUENCE {
3511 --seg-ext SET OF {
3512 -- index INTEGER,
3513 -- data User-field
3514 -- }
3515 index INTEGER
3516 }
3517
3518
3519
3520 -- use of Score is discouraged for external ASN.1 specifications
3521 Score ::= SEQUENCE {
3522 id Object-id OPTIONAL ,
3523 value CHOICE {
3524 real REAL ,
3525 int INTEGER
3526 }
3527 }
3528
3529 -- use of Score-set is encouraged for external ASN.1 specifications
3530 Score-set ::= SET OF Score
3531
3532 END
3533
3534 --$Revision: 6.0 $
3535 --**********************************************************************
3536 --
3537 -- NCBI Sequence Analysis Results (other than alignments)
3538 -- by James Ostell, 1990
3539 --
3540 --**********************************************************************
3541
3542 NCBI-Seqres DEFINITIONS ::=
3543 BEGIN
3544
3545 EXPORTS Seq-graph;
3546
3547 IMPORTS Seq-loc FROM NCBI-Seqloc;
3548
3549 --*** Sequence Graph ********************************
3550 --*
3551 --* for values mapped by residue or range to sequence
3552 --*
3553
3554 Seq-graph ::= SEQUENCE {
3555 title VisibleString OPTIONAL ,
3556 comment VisibleString OPTIONAL ,
3557 loc Seq-loc , -- region this applies to
3558 title-x VisibleString OPTIONAL , -- title for x-axis
3559 title-y VisibleString OPTIONAL ,
3560 comp INTEGER OPTIONAL , -- compression (residues/value)
3561 a REAL OPTIONAL , -- for scaling values
3562 b REAL OPTIONAL , -- display = (a x value) + b
3563 numval INTEGER , -- number of values in graph
3564 graph CHOICE {
3565 real Real-graph ,
3566 int Int-graph ,
3567 byte Byte-graph } }
3568
3569 Real-graph ::= SEQUENCE {
3570 max REAL , -- top of graph
3571 min REAL , -- bottom of graph
3572 axis REAL , -- value to draw axis on
3573 values SEQUENCE OF REAL }
3574
3575 Int-graph ::= SEQUENCE {
3576 max INTEGER ,
3577 min INTEGER ,
3578 axis INTEGER ,
3579 values SEQUENCE OF INTEGER }
3580
3581 Byte-graph ::= SEQUENCE { -- integer from 0-255
3582 max INTEGER ,
3583 min INTEGER ,
3584 axis INTEGER ,
3585 values OCTET STRING }
3586
3587 END
3588
3589 --$Revision: 6.1 $
3590 --********************************************************************
3591 --
3592 -- Direct Submission of Sequence Data
3593 -- James Ostell, 1991
3594 --
3595 -- This is a trial specification for direct submission of sequence
3596 -- data worked out between NCBI and EMBL
3597 -- Later revised to reflect work with GenBank and Integrated database
3598 --
3599 -- Version 3.0, 1994
3600 -- This is the official NCBI sequence submission format now.
3601 --
3602 --********************************************************************
3603
3604 NCBI-Submit DEFINITIONS ::=
3605 BEGIN
3606
3607 EXPORTS Seq-submit, Contact-info;
3608
3609 IMPORTS Cit-sub, Author FROM NCBI-Biblio
3610 Date, Object-id FROM NCBI-General
3611 Seq-annot FROM NCBI-Sequence
3612 Seq-id FROM NCBI-Seqloc
3613 Seq-entry FROM NCBI-Seqset;
3614
3615 Seq-submit ::= SEQUENCE {
3616 sub Submit-block ,
3617 data CHOICE {
3618 entrys SET OF Seq-entry , -- sequence(s)
3619 annots SET OF Seq-annot , -- annotation(s)
3620 delete SET OF Seq-id } } -- deletions of entries
3621
3622 Submit-block ::= SEQUENCE {
3623 contact Contact-info , -- who to contact
3624 cit Cit-sub , -- citation for this submission
3625 hup BOOLEAN DEFAULT FALSE , -- hold until publish
3626 reldate Date OPTIONAL , -- release by date
3627 subtype INTEGER { -- type of submission
3628 new (1) , -- new data
3629 update (2) , -- update by author
3630 revision (3) , -- 3rd party (non-author) update
3631 other (255) } OPTIONAL ,
3632 tool VisibleString OPTIONAL, -- tool used to make submission
3633 user-tag VisibleString OPTIONAL, -- user supplied id for this submission
3634 comment VisibleString OPTIONAL } -- user comments/advice to database
3635
3636 Contact-info ::= SEQUENCE { -- who to contact to discuss the submission
3637 name VisibleString OPTIONAL , -- OBSOLETE: will be removed
3638 address SEQUENCE OF VisibleString OPTIONAL ,
3639 phone VisibleString OPTIONAL ,
3640 fax VisibleString OPTIONAL ,
3641 email VisibleString OPTIONAL ,
3642 telex VisibleString OPTIONAL ,
3643 owner-id Object-id OPTIONAL , -- for owner accounts
3644 password OCTET STRING OPTIONAL ,
3645 last-name VisibleString OPTIONAL , -- structured to replace name above
3646 first-name VisibleString OPTIONAL ,
3647 middle-initial VisibleString OPTIONAL ,
3648 contact Author OPTIONAL } -- WARNING: this will replace the above
3649
3650 END
3651
3652 --$Revision: 1.15 $
3653 --**********************************************************************
3654 --
3655 -- Definitions for Cn3D-specific data (rendering settings,
3656 -- user annotations, etc.)
3657 --
3658 -- by Paul Thiessen
3659 --
3660 -- National Center for Biotechnology Information
3661 -- National Institutes of Health
3662 -- Bethesda, MD 20894 USA
3663 --
3664 -- asntool -m cn3d.asn -w 100 -o cn3d.h
3665 -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
3666 -- -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
3667 --**********************************************************************
3668
3669 NCBI-Cn3d DEFINITIONS ::=
3670 -- Cn3D-specific information
3671
3672 BEGIN
3673
3674 EXPORTS Cn3d-style-dictionary, Cn3d-user-annotations;
3675
3676 IMPORTS Biostruc-id FROM MMDB
3677 Molecule-id, Residue-id FROM MMDB-Chemical-graph;
3678
3679
3680 -- values of enumerations must match those in cn3d/style_manager.hpp!
3681
3682 Cn3d-backbone-type ::= ENUMERATED { -- for different types of backbones
3683 off (1),
3684 trace (2),
3685 partial (3),
3686 complete (4)
3687 }
3688
3689 Cn3d-drawing-style ::= ENUMERATED { -- atom/bond/object rendering styles
3690 -- for atoms and bonds
3691 wire (1),
3692 tubes (2),
3693 ball-and-stick (3),
3694 space-fill (4),
3695 wire-worm (5),
3696 tube-worm (6),
3697 -- for 3d-objects
3698 with-arrows (7),
3699 without-arrows (8)
3700 }
3701
3702 Cn3d-color-scheme ::= ENUMERATED { -- available color schemes (not all
3703 -- necessarily applicable to all objects)
3704 element (1),
3705 object (2),
3706 molecule (3),
3707 domain (4),
3708 residue (20),
3709 secondary-structure (5),
3710 user-select (6),
3711 -- different alignment conservation coloring (currently only for proteins)
3712 aligned (7),
3713 identity (8),
3714 variety (9),
3715 weighted-variety (10),
3716 information-content (11),
3717 fit (12),
3718 block-fit (17),
3719 block-z-fit (18),
3720 block-row-fit (19),
3721 -- other schemes
3722 temperature (13),
3723 hydrophobicity (14),
3724 charge (15),
3725 rainbow (16)
3726 }
3727
3728 -- RGB triplet, interpreted (after division by the scale-factor) as floating
3729 -- point values which should range from [0..1]. The default scale-factor is
3730 -- 255, so that one can conveniently set integer byte values [0..255] for
3731 -- colors with the scale-factor already set appropriately to map to [0..1].
3732 -- An alpha value is allowed, but is currently ignored by Cn3D.
3733 Cn3d-color ::= SEQUENCE {
3734 scale-factor INTEGER DEFAULT 255,
3735 red INTEGER,
3736 green INTEGER,
3737 blue INTEGER,
3738 alpha INTEGER DEFAULT 255
3739 }
3740
3741 Cn3d-backbone-style ::= SEQUENCE { -- style blob for backbones only
3742 type Cn3d-backbone-type,
3743 style Cn3d-drawing-style,
3744 color-scheme Cn3d-color-scheme,
3745 user-color Cn3d-color
3746 }
3747
3748 Cn3d-general-style ::= SEQUENCE { -- style blob for other objects
3749 is-on BOOLEAN,
3750 style Cn3d-drawing-style,
3751 color-scheme Cn3d-color-scheme,
3752 user-color Cn3d-color
3753 }
3754
3755 Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
3756 spacing INTEGER, -- zero means none
3757 type ENUMERATED {
3758 one-letter (1),
3759 three-letter (2)
3760 },
3761 number ENUMERATED {
3762 none (0),
3763 sequential (1), -- from 1, by residues present, to match sequence
3764 pdb (2) -- use number assigned by PDB
3765 },
3766 termini BOOLEAN,
3767 white BOOLEAN -- all white, or (if false) color of alpha carbon
3768 }
3769
3770 -- rendering settings for Cn3D (mirrors StyleSettings class)
3771 Cn3d-style-settings ::= SEQUENCE {
3772 name VisibleString OPTIONAL, -- a name (for favorites)
3773 protein-backbone Cn3d-backbone-style, -- backbone styles
3774 nucleotide-backbone Cn3d-backbone-style,
3775 protein-sidechains Cn3d-general-style, -- styles for other stuff
3776 nucleotide-sidechains Cn3d-general-style,
3777 heterogens Cn3d-general-style,
3778 solvents Cn3d-general-style,
3779 connections Cn3d-general-style,
3780 helix-objects Cn3d-general-style,
3781 strand-objects Cn3d-general-style,
3782 virtual-disulfides-on BOOLEAN, -- virtual disulfides
3783 virtual-disulfide-color Cn3d-color,
3784 hydrogens-on BOOLEAN, -- hydrogens
3785 background-color Cn3d-color, -- background
3786 -- floating point parameters - scale-factor applies to all the following:
3787 scale-factor INTEGER,
3788 space-fill-proportion INTEGER,
3789 ball-radius INTEGER,
3790 stick-radius INTEGER,
3791 tube-radius INTEGER,
3792 tube-worm-radius INTEGER,
3793 helix-radius INTEGER,
3794 strand-width INTEGER,
3795 strand-thickness INTEGER,
3796 -- backbone labels (no labels if not present)
3797 protein-labels Cn3d-backbone-label-style OPTIONAL,
3798 nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
3799 -- ion labels
3800 ion-labels BOOLEAN OPTIONAL
3801 }
3802
3803 Cn3d-style-settings-set ::= SET OF Cn3d-style-settings
3804
3805 Cn3d-style-table-id ::= INTEGER
3806
3807 Cn3d-style-table-item ::= SEQUENCE {
3808 id Cn3d-style-table-id,
3809 style Cn3d-style-settings
3810 }
3811
3812 -- the global settings, and a lookup table of styles for user annotations.
3813 Cn3d-style-dictionary ::= SEQUENCE {
3814 global-style Cn3d-style-settings,
3815 style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
3816 }
3817
3818 -- a range of residues in a chain, identified by MMDB residue-id
3819 -- (e.g., numbered from 1)
3820 Cn3d-residue-range ::= SEQUENCE {
3821 from Residue-id,
3822 to Residue-id
3823 }
3824
3825 -- set of locations on a particular chain
3826 Cn3d-molecule-location ::= SEQUENCE {
3827 molecule-id Molecule-id, -- MMDB molecule id
3828 -- which residues; whole molecule implied if absent
3829 residues SEQUENCE OF Cn3d-residue-range OPTIONAL
3830 }
3831
3832 -- set of locations on a particular structure object (e.g., a PDB/MMDB
3833 -- structure), which may include multiple ranges of residues each on
3834 -- multiple chains.
3835 Cn3d-object-location ::= SEQUENCE {
3836 structure-id Biostruc-id,
3837 residues SEQUENCE OF Cn3d-molecule-location
3838 }
3839
3840 -- information for an individual user annotation
3841 Cn3d-user-annotation ::= SEQUENCE {
3842 name VisibleString, -- a (short) name for this annotation
3843 description VisibleString OPTIONAL, -- an optional longer description
3844 style-id Cn3d-style-table-id, -- how to draw this annotation
3845 residues SEQUENCE OF Cn3d-object-location, -- which residues to cover
3846 is-on BOOLEAN -- whether this annotation is to be turned on in Cn3D
3847 }
3848
3849 -- a GL-ordered transformation matrix
3850 Cn3d-GL-matrix ::= SEQUENCE {
3851 m0 REAL, m1 REAL, m2 REAL, m3 REAL,
3852 m4 REAL, m5 REAL, m6 REAL, m7 REAL,
3853 m8 REAL, m9 REAL, m10 REAL, m11 REAL,
3854 m12 REAL, m13 REAL, m14 REAL, m15 REAL
3855 }
3856
3857 -- a floating point 3d vector
3858 Cn3d-vector ::= SEQUENCE {
3859 x REAL,
3860 y REAL,
3861 z REAL
3862 }
3863
3864 -- parameters used to set up the camera in Cn3D
3865 Cn3d-view-settings ::= SEQUENCE {
3866 camera-distance REAL, -- camera on +Z axis this distance from origin
3867 camera-angle-rad REAL, -- camera angle
3868 camera-look-at-X REAL, -- X,Y of point in Z=0 plane camera points at
3869 camera-look-at-Y REAL,
3870 camera-clip-near REAL, -- distance of clipping planes from camera
3871 camera-clip-far REAL,
3872 matrix Cn3d-GL-matrix, -- transformation of objects in the scene
3873 rotation-center Cn3d-vector -- center of rotation of whole scene
3874 }
3875
3876 -- The list of annotations for a given CDD/mime. If residue regions overlap
3877 -- between annotations that are turned on, the last annotation in this list
3878 -- that contains these residues will be used as the display style for these
3879 -- residues.
3880 -- Also contains the current viewpoint, so that user's camera angle
3881 -- can be stored and reproduced, for illustrations, on-line figures, etc.
3882 Cn3d-user-annotations ::= SEQUENCE {
3883 annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
3884 view Cn3d-view-settings OPTIONAL
3885 }
3886
3887 END
3888
3889 --$Revision: 6.3 $
3890 --****************************************************************
3891 --
3892 -- NCBI Project Definition Module
3893 -- by Jim Ostell and Jonathan Kans, 1998
3894 --
3895 --****************************************************************
3896
3897 NCBI-Project DEFINITIONS ::=
3898 BEGIN
3899
3900 EXPORTS Project, Project-item;
3901
3902 IMPORTS Date FROM NCBI-General
3903 PubMedId FROM NCBI-Biblio
3904 Seq-id, Seq-loc FROM NCBI-Seqloc
3905 Seq-annot, Pubdesc FROM NCBI-Sequence
3906 Seq-entry FROM NCBI-Seqset
3907 Pubmed-entry FROM NCBI-PubMed;
3908
3909 Project ::= SEQUENCE {
3910 descr Project-descr OPTIONAL ,
3911 data Project-item }
3912
3913 Project-item ::= CHOICE {
3914 pmuid SET OF INTEGER ,
3915 protuid SET OF INTEGER ,
3916 nucuid SET OF INTEGER ,
3917 sequid SET OF INTEGER ,
3918 genomeuid SET OF INTEGER ,
3919 structuid SET OF INTEGER ,
3920 pmid SET OF PubMedId ,
3921 protid SET OF Seq-id ,
3922 nucid SET OF Seq-id ,
3923 seqid SET OF Seq-id ,
3924 genomeid SET OF Seq-id ,
3925 structid NULL ,
3926 pment SET OF Pubmed-entry ,
3927 protent SET OF Seq-entry ,
3928 nucent SET OF Seq-entry ,
3929 seqent SET OF Seq-entry ,
3930 genomeent SET OF Seq-entry ,
3931 structent NULL ,
3932 seqannot SET OF Seq-annot ,
3933 loc SET OF Seq-loc ,
3934 proj SET OF Project
3935 }
3936
3937 Project-descr ::= SEQUENCE {
3938 id SET OF Project-id ,
3939 name VisibleString OPTIONAL ,
3940 descr SET OF Projdesc OPTIONAL }
3941
3942 Projdesc ::= CHOICE {
3943 pub Pubdesc ,
3944 date Date ,
3945 comment VisibleString ,
3946 title VisibleString
3947 }
3948
3949 Project-id ::= VisibleString
3950
3951 END
3952
3953
3954 --$Revision: 6.0 $
3955 --*********************************************************************
3956 --
3957 -- access.asn
3958 --
3959 -- messages for data access
3960 --
3961 --*********************************************************************
3962
3963 NCBI-Access DEFINITIONS ::=
3964 BEGIN
3965
3966 EXPORTS Link-set;
3967
3968 -- links between same class = neighbors
3969 -- links between other classes = links
3970
3971 Link-set ::= SEQUENCE {
3972 num INTEGER , -- number of links to this doc type
3973 uids SEQUENCE OF INTEGER OPTIONAL , -- the links
3974 weights SEQUENCE OF INTEGER OPTIONAL } -- the weights
3975
3976
3977 END
3978 --$Revision: 6.0 $
3979 --**********************************************************************
3980 --
3981 -- NCBI Sequence Feature Definition Module
3982 -- by James Ostell, 1994
3983 --
3984 --**********************************************************************
3985
3986 NCBI-FeatDef DEFINITIONS ::=
3987 BEGIN
3988
3989 EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
3990
3991
3992 FeatDef ::= SEQUENCE {
3993 typelabel VisibleString , -- short label for type eg "CDS"
3994 menulabel VisibleString , -- label for a menu eg "Coding Region"
3995 featdef-key INTEGER , -- unique for this feature definition
3996 seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h
3997 entrygroup INTEGER , -- Group for data entry
3998 displaygroup INTEGER , -- Group for data display
3999 molgroup FeatMolType -- Type of Molecule used for
4000 }
4001
4002 FeatMolType ::= ENUMERATED {
4003 aa (1), -- proteins
4004 na (2), -- nucleic acids
4005 both (3) } -- both
4006
4007 FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions
4008
4009 FeatDispGroup ::= SEQUENCE {
4010 groupkey INTEGER ,
4011 groupname VisibleString }
4012
4013 FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
4014
4015 FeatDefGroupSet ::= SEQUENCE {
4016 groups FeatDispGroupSet ,
4017 defs FeatDefSet }
4018
4019 END
4020
4021
4022 --$Revision: 6.12 $
4023 --****************************************************************
4024 --
4025 -- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
4026 -- by Jonathan Epstein, February 1996
4027 --
4028 --****************************************************************
4029
4030 NCBI-Mime DEFINITIONS ::=
4031 BEGIN
4032
4033 EXPORTS Ncbi-mime-asn1;
4034 IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
4035 Cdd FROM NCBI-Cdd
4036 Seq-entry FROM NCBI-Seqset
4037 Seq-annot FROM NCBI-Sequence
4038 Medline-entry FROM NCBI-Medline
4039 Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;
4040
4041 Ncbi-mime-asn1 ::= CHOICE {
4042 entrez Entrez-general, -- just a structure
4043 alignstruc Biostruc-align, -- structures & sequences & alignments
4044 alignseq Biostruc-align-seq, -- sequence alignment
4045 strucseq Biostruc-seq, -- structure & sequences
4046 strucseqs Biostruc-seqs, -- structure & sequences & alignments
4047 general Biostruc-seqs-aligns-cdd -- all-purpose "grab bag"
4048 -- others may be added here in the future
4049 }
4050
4051 -- generic bundle of sequence and alignment info
4052 Bundle-seqs-aligns ::= SEQUENCE {
4053 sequences SET OF Seq-entry OPTIONAL, -- sequences
4054 seqaligns SET OF Seq-annot OPTIONAL, -- sequence alignments
4055 strucaligns Biostruc-annot-set OPTIONAL, -- structure alignments
4056 imports SET OF Seq-annot OPTIONAL, -- imports (updates in Cn3D)
4057 style-dictionary Cn3d-style-dictionary OPTIONAL, -- Cn3D stuff
4058 user-annotations Cn3d-user-annotations OPTIONAL
4059 }
4060
4061 Biostruc-seqs-aligns-cdd ::= SEQUENCE {
4062 seq-align-data CHOICE {
4063 bundle Bundle-seqs-aligns, -- either seqs + alignments
4064 cdd Cdd -- or CDD (which contains these)
4065 },
4066 structures SET OF Biostruc OPTIONAL, -- structures
4067 structure-type ENUMERATED { -- type of structures to load if
4068 ncbi-backbone(2), -- not present; meanings and
4069 ncbi-all-atom(3), -- values are same as MMDB's
4070 pdb-model(4) -- Model-type
4071 } OPTIONAL
4072 }
4073
4074 Biostruc-align ::= SEQUENCE {
4075 master Biostruc,
4076 slaves SET OF Biostruc,
4077 alignments Biostruc-annot-set, -- structure alignments
4078 sequences SET OF Seq-entry, -- sequences
4079 seqalign SET OF Seq-annot,
4080 style-dictionary Cn3d-style-dictionary OPTIONAL,
4081 user-annotations Cn3d-user-annotations OPTIONAL
4082 }
4083
4084 Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only
4085 sequences SET OF Seq-entry, -- sequences
4086 seqalign SET OF Seq-annot,
4087 style-dictionary Cn3d-style-dictionary OPTIONAL,
4088 user-annotations Cn3d-user-annotations OPTIONAL
4089 }
4090
4091 Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli
4092 structure Biostruc,
4093 sequences SET OF Seq-entry,
4094 style-dictionary Cn3d-style-dictionary OPTIONAL,
4095 user-annotations Cn3d-user-annotations OPTIONAL
4096 }
4097
4098 Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
4099 structure Biostruc,
4100 sequences SET OF Seq-entry, -- sequences
4101 seqalign SET OF Seq-annot,
4102 style-dictionary Cn3d-style-dictionary OPTIONAL,
4103 user-annotations Cn3d-user-annotations OPTIONAL
4104 }
4105
4106 Entrez-style ::= ENUMERATED {
4107 docsum (1),
4108 genbank (2) ,
4109 genpept (3) ,
4110 fasta (4) ,
4111 asn1 (5) ,
4112 graphic (6) ,
4113 alignment (7) ,
4114 globalview (8) ,
4115 report (9) ,
4116 medlars (10) ,
4117 embl (11) ,
4118 pdb (12) ,
4119 kinemage (13) }
4120
4121 Entrez-general ::= SEQUENCE {
4122 title VisibleString OPTIONAL,
4123 data CHOICE {
4124 ml Medline-entry ,
4125 prot Seq-entry ,
4126 nuc Seq-entry ,
4127 genome Seq-entry ,
4128 structure Biostruc ,
4129 strucAnnot Biostruc-annot-set } ,
4130 style Entrez-style ,
4131 location VisibleString OPTIONAL }
4132 END
4133 --$Revision: 6.0 $
4134 --********************************************************************
4135 --
4136 -- Print Templates
4137 -- James Ostell, 1993
4138 --
4139 --
4140 --********************************************************************
4141
4142 NCBI-ObjPrt DEFINITIONS ::=
4143 BEGIN
4144
4145 EXPORTS PrintTemplate, PrintTemplateSet;
4146
4147 PrintTemplate ::= SEQUENCE {
4148 name TemplateName , -- name for this template
4149 labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from
4150 format PrintFormat }
4151
4152 TemplateName ::= VisibleString
4153
4154 PrintTemplateSet ::= SEQUENCE OF PrintTemplate
4155
4156 PrintFormat ::= SEQUENCE {
4157 asn1 VisibleString , -- ASN.1 partial path for this
4158 label VisibleString OPTIONAL , -- printable label
4159 prefix VisibleString OPTIONAL,
4160 suffix VisibleString OPTIONAL,
4161 form PrintForm }
4162
4163 PrintForm ::= CHOICE { -- Forms for various ASN.1 components
4164 block PrintFormBlock,
4165 boolean PrintFormBoolean,
4166 enum PrintFormEnum,
4167 text PrintFormText,
4168 use-template TemplateName,
4169 user UserFormat ,
4170 null NULL } -- rarely used
4171
4172 UserFormat ::= SEQUENCE {
4173 printfunc VisibleString ,
4174 defaultfunc VisibleString OPTIONAL }
4175
4176 PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET
4177 separator VisibleString OPTIONAL ,
4178 components SEQUENCE OF PrintFormat }
4179
4180 PrintFormBoolean ::= SEQUENCE {
4181 true VisibleString OPTIONAL ,
4182 false VisibleString OPTIONAL }
4183
4184 PrintFormEnum ::= SEQUENCE {
4185 values SEQUENCE OF VisibleString OPTIONAL }
4186
4187 PrintFormText ::= SEQUENCE {
4188 textfunc VisibleString OPTIONAL }
4189
4190 END
4191
4192 --$Revision: 6.10 $
4193 --*********************************************************
4194 --
4195 -- ASN.1 and XML for the components of a GenBank format sequence
4196 -- J.Ostell 2002
4197 -- Updated 25 May 2010
4198 --
4199 --*********************************************************
4200
4201 NCBI-GBSeq DEFINITIONS ::=
4202 BEGIN
4203
4204 --********
4205 -- GBSeq represents the elements in a GenBank style report
4206 -- of a sequence with some small additions to structure and support
4207 -- for protein (GenPept) versions of GenBank format as seen in
4208 -- Entrez. While this represents the simplification, reduction of
4209 -- detail, and flattening to a single sequence perspective of GenBank
4210 -- format (compared with the full ASN.1 or XML from which GenBank and
4211 -- this format is derived at NCBI), it is presented in ASN.1 or XML for
4212 -- automated parsing and processing. It is hoped that this compromise
4213 -- will be useful for those bulk processing at the GenBank format level
4214 -- of detail today. Since it is a compromise, a number of pragmatic
4215 -- decisions have been made.
4216 --
4217 -- In pursuit of simplicity and familiarity a number of
4218 -- fields do not have full substructure defined here where there is
4219 -- already a standard GenBank format string. For example:
4220 --
4221 -- Date DD-Mon-YYYY
4222 -- Authors LastName, Intials (with periods)
4223 -- Journal JounalName Volume (issue), page-range (year)
4224 -- FeatureLocations as per GenBank feature table, but FeatureIntervals
4225 -- may also be provided as a convenience
4226 -- FeatureQualifiers as per GenBank feature table
4227 -- Primary has a string that represents a table to construct
4228 -- a third party (TPA) sequence.
4229 -- other-seqids can have strings with the "vertical bar format" sequence
4230 -- identifiers used in BLAST for example, when they are non-genbank types.
4231 -- Currently in GenBank format you only see GI, but there are others, like
4232 -- patents, submitter clone names, etc which will appear here, as they
4233 -- always have in the ASN.1 format, and full XML format.
4234 -- source-db is a formatted text block for peptides in GenPept format that
4235 -- carries information from the source protein database.
4236 --
4237 -- There are also a number of elements that could have been
4238 -- more exactly specified, but in the interest of simplicity
4239 -- have been simply left as options. For example..
4240 --
4241 -- accession and accession.version will always appear in a GenBank record
4242 -- they are optional because this format can also be used for non-GenBank
4243 -- sequences, and in that case will have only "other-seqids".
4244 --
4245 -- sequences will normally all have "sequence" filled in. But contig records
4246 -- will have a "join" statement in the "contig" slot, and no "sequence".
4247 -- We also may consider a retrieval option with no sequence of any kind
4248 -- and no feature table to quickly check minimal values.
4249 --
4250 -- a reference may have an author list, or be from a consortium, or both.
4251 --
4252 -- some fields, such as taxonomy, do appear as separate elements in GenBank
4253 -- format but without a specific linetype (in GenBank format this comes
4254 -- under ORGANISM). Another example is the separation of primary accession
4255 -- from the list of secondary accessions. In GenBank format primary
4256 -- accession is just the first one on the list that includes all secondaries
4257 -- after it.
4258 --
4259 -- create-date deserves special comment. The date you see on the right hand
4260 -- side of the LOCUS line in GenBank format is actually the last date the
4261 -- the record was modified (or the update-date). The date the record was
4262 -- first submitted to GenBank appears in the first submission citation in
4263 -- the reference section. Internally in the databases and ASN.1 NCBI keeps
4264 -- the first date the record was released into the sequence database at
4265 -- NCBI as create-date. For records from EMBL, which supports create-date,
4266 -- it is the date provided by EMBL. For DDBJ records, which do not supply
4267 -- a create-date (same as GenBank format) the create-date is the first date
4268 -- NCBI saw the record from DDBJ. For older GenBank records, before NCBI
4269 -- took responsibility for GenBank, it is just the first date NCBI saw the
4270 -- record. Create-date can be very useful, so we expose it here, but users
4271 -- must understand it is only an approximation and comes from many sources,
4272 -- and with many exceptions and caveats. It does NOT tell you the first
4273 -- date the public might have seen this record and thus is NOT an accurate
4274 -- measure for legal issues of precedence.
4275 --
4276 --********
4277
4278 GBSet ::= SEQUENCE OF GBSeq
4279
4280 GBSeq ::= SEQUENCE {
4281 locus VisibleString ,
4282 length INTEGER ,
4283 strandedness VisibleString OPTIONAL ,
4284 moltype VisibleString ,
4285 topology VisibleString OPTIONAL ,
4286 division VisibleString ,
4287 update-date VisibleString ,
4288 create-date VisibleString OPTIONAL ,
4289 update-release VisibleString OPTIONAL ,
4290 create-release VisibleString OPTIONAL ,
4291 definition VisibleString ,
4292 primary-accession VisibleString OPTIONAL ,
4293 entry-version VisibleString OPTIONAL ,
4294 accession-version VisibleString OPTIONAL ,
4295 other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
4296 secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
4297 project VisibleString OPTIONAL ,
4298 keywords SEQUENCE OF GBKeyword OPTIONAL ,
4299 segment VisibleString OPTIONAL ,
4300 source VisibleString OPTIONAL ,
4301 organism VisibleString OPTIONAL ,
4302 taxonomy VisibleString OPTIONAL ,
4303 references SEQUENCE OF GBReference OPTIONAL ,
4304 comment VisibleString OPTIONAL ,
4305 comment-set SEQUENCE OF GBComment OPTIONAL ,
4306 struc-comments SEQUENCE OF GBStrucComment OPTIONAL ,
4307 primary VisibleString OPTIONAL ,
4308 source-db VisibleString OPTIONAL ,
4309 database-reference VisibleString OPTIONAL ,
4310 feature-table SEQUENCE OF GBFeature OPTIONAL ,
4311 feature-set SEQUENCE OF GBFeatureSet OPTIONAL ,
4312 sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc.
4313 contig VisibleString OPTIONAL ,
4314 alt-seq SEQUENCE OF GBAltSeqData OPTIONAL
4315 }
4316
4317 GBSeqid ::= VisibleString
4318
4319 GBSecondary-accn ::= VisibleString
4320
4321 GBKeyword ::= VisibleString
4322
4323 GBReference ::= SEQUENCE {
4324 reference VisibleString ,
4325 position VisibleString OPTIONAL ,
4326 authors SEQUENCE OF GBAuthor OPTIONAL ,
4327 consortium VisibleString OPTIONAL ,
4328 title VisibleString OPTIONAL ,
4329 journal VisibleString ,
4330 xref SEQUENCE OF GBXref OPTIONAL ,
4331 pubmed INTEGER OPTIONAL ,
4332 remark VisibleString OPTIONAL
4333 }
4334
4335 GBAuthor ::= VisibleString
4336
4337 GBXref ::= SEQUENCE {
4338 dbname VisibleString ,
4339 id VisibleString
4340 }
4341
4342 GBComment ::= SEQUENCE {
4343 type VisibleString OPTIONAL ,
4344 paragraphs SEQUENCE OF GBCommentParagraph
4345 }
4346
4347 GBCommentParagraph ::= SEQUENCE {
4348 items SEQUENCE OF GBCommentItem
4349 }
4350
4351 GBCommentItem ::= SEQUENCE {
4352 value VisibleString OPTIONAL ,
4353 url VisibleString OPTIONAL
4354 }
4355
4356 GBStrucComment ::= SEQUENCE {
4357 name VisibleString OPTIONAL ,
4358 items SEQUENCE OF GBStrucCommentItem
4359 }
4360
4361 GBStrucCommentItem ::= SEQUENCE {
4362 tag VisibleString OPTIONAL ,
4363 value VisibleString OPTIONAL ,
4364 url VisibleString OPTIONAL
4365 }
4366
4367 GBFeatureSet ::= SEQUENCE {
4368 annot-source VisibleString OPTIONAL ,
4369 features SEQUENCE OF GBFeature
4370 }
4371
4372 GBFeature ::= SEQUENCE {
4373 key VisibleString ,
4374 location VisibleString ,
4375 intervals SEQUENCE OF GBInterval OPTIONAL ,
4376 operator VisibleString OPTIONAL ,
4377 partial5 BOOLEAN OPTIONAL ,
4378 partial3 BOOLEAN OPTIONAL ,
4379 quals SEQUENCE OF GBQualifier OPTIONAL ,
4380 xrefs SEQUENCE OF GBXref OPTIONAL
4381 }
4382
4383 GBInterval ::= SEQUENCE {
4384 from INTEGER OPTIONAL ,
4385 to INTEGER OPTIONAL ,
4386 point INTEGER OPTIONAL ,
4387 iscomp BOOLEAN OPTIONAL ,
4388 interbp BOOLEAN OPTIONAL ,
4389 accession VisibleString
4390 }
4391
4392 GBQualifier ::= SEQUENCE {
4393 name VisibleString ,
4394 value VisibleString OPTIONAL
4395 }
4396
4397 GBAltSeqData ::= SEQUENCE {
4398 name VisibleString , -- e.g., contig, wgs, scaffold, cage, genome
4399 items SEQUENCE OF GBAltSeqItem OPTIONAL
4400 }
4401
4402 GBAltSeqItem ::= SEQUENCE {
4403 interval GBInterval OPTIONAL ,
4404 isgap BOOLEAN OPTIONAL ,
4405 gap-length INTEGER OPTIONAL ,
4406 gap-type VisibleString OPTIONAL ,
4407 gap-linkage VisibleString OPTIONAL ,
4408 gap-comment VisibleString OPTIONAL ,
4409 first-accn VisibleString OPTIONAL ,
4410 last-accn VisibleString OPTIONAL ,
4411 value VisibleString OPTIONAL
4412 }
4413
4414 END
4415
4416 --$Revision: 1.8 $
4417 --************************************************************************
4418 --
4419 -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
4420 -- The International Nucleotide Sequence Database (INSD) collaboration
4421 -- Version 1.6, 25 May 2010
4422 --
4423 --************************************************************************
4424
4425 INSD-INSDSeq DEFINITIONS ::=
4426 BEGIN
4427
4428 -- INSDSeq provides the elements of a sequence as presented in the
4429 -- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
4430 -- additional structure.
4431 -- Although this single perspective of the three flatfile formats
4432 -- provides a useful simplification, it hides to some extent the
4433 -- details of the actual data underlying those formats. Nevertheless,
4434 -- the XML version of INSD-Seq is being provided with
4435 -- the hopes that it will prove useful to those who bulk-process
4436 -- sequence data at the flatfile-format level of detail. Further
4437 -- documentation regarding the content and conventions of those formats
4438 -- can be found at:
4439 --
4440 -- URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
4441 -- http://www.ddbj.nig.ac.jp/FT/full_index.html
4442 -- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
4443 -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
4444 --
4445 -- URLs for DDBJ, EMBL, and GenBank Release Notes :
4446 -- ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
4447 -- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
4448 -- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
4449 --
4450 -- Because INSDSeq is a compromise, a number of pragmatic decisions have
4451 -- been made:
4452 --
4453 -- In pursuit of simplicity and familiarity a number of fields do not
4454 -- have full substructure defined here where there is already a
4455 -- standard flatfile format string. For example:
4456 --
4457 -- Dates: DD-MON-YYYY (eg 10-JUN-2003)
4458 --
4459 -- Author: LastName, Initials (eg Smith, J.N.)
4460 -- or Lastname Initials (eg Smith J.N.)
4461 --
4462 -- Journal: JournalName Volume (issue), page-range (year)
4463 -- or JournalName Volume(issue):page-range(year)
4464 -- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
4465 -- Appl. Environ. Microbiol. 61(4):1646-1648(1995).
4466 --
4467 -- FeatureLocations are representated as in the flatfile feature table,
4468 -- but FeatureIntervals may also be provided as a convenience
4469 --
4470 -- FeatureQualifiers are represented as in the flatfile feature table.
4471 --
4472 -- Primary has a string that represents a table to construct
4473 -- a third party (TPA) sequence.
4474 --
4475 -- other-seqids can have strings with the "vertical bar format" sequence
4476 -- identifiers used in BLAST for example, when they are non-INSD types.
4477 --
4478 -- Currently in flatfile format you only see Accession numbers, but there
4479 -- are others, like patents, submitter clone names, etc which will
4480 -- appear here
4481 --
4482 -- There are also a number of elements that could have been more exactly
4483 -- specified, but in the interest of simplicity have been simply left as
4484 -- optional. For example:
4485 --
4486 -- All publicly accessible sequence records in INSDSeq format will
4487 -- include accession and accession.version. However, these elements are
4488 -- optional in optional in INSDSeq so that this format can also be used
4489 -- for non-public sequence data, prior to the assignment of accessions and
4490 -- version numbers. In such cases, records will have only "other-seqids".
4491 --
4492 -- sequences will normally all have "sequence" filled in. But contig records
4493 -- will have a "join" statement in the "contig" slot, and no "sequence".
4494 -- We also may consider a retrieval option with no sequence of any kind
4495 -- and no feature table to quickly check minimal values.
4496 --
4497 -- Four (optional) elements are specific to records represented via the EMBL
4498 -- sequence database: INSDSeq_update-release, INSDSeq_create-release,
4499 -- INSDSeq_entry-version, and INSDSeq_database-reference.
4500 --
4501 -- One (optional) element is specific to records originating at the GenBank
4502 -- and DDBJ sequence databases: INSDSeq_segment.
4503 --
4504 --********
4505
4506 INSDSet ::= SEQUENCE OF INSDSeq
4507
4508 INSDSeq ::= SEQUENCE {
4509 locus VisibleString ,
4510 length INTEGER ,
4511 strandedness VisibleString OPTIONAL ,
4512 moltype VisibleString ,
4513 topology VisibleString OPTIONAL ,
4514 division VisibleString ,
4515 update-date VisibleString ,
4516 create-date VisibleString OPTIONAL ,
4517 update-release VisibleString OPTIONAL ,
4518 create-release VisibleString OPTIONAL ,
4519 definition VisibleString ,
4520 primary-accession VisibleString OPTIONAL ,
4521 entry-version VisibleString OPTIONAL ,
4522 accession-version VisibleString OPTIONAL ,
4523 other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
4524 secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
4525 project VisibleString OPTIONAL ,
4526 keywords SEQUENCE OF INSDKeyword OPTIONAL ,
4527 segment VisibleString OPTIONAL ,
4528 source VisibleString OPTIONAL ,
4529 organism VisibleString OPTIONAL ,
4530 taxonomy VisibleString OPTIONAL ,
4531 references SEQUENCE OF INSDReference OPTIONAL ,
4532 comment VisibleString OPTIONAL ,
4533 comment-set SEQUENCE OF INSDComment OPTIONAL ,
4534 struc-comments SEQUENCE OF INSDStrucComment OPTIONAL ,
4535 primary VisibleString OPTIONAL ,
4536 source-db VisibleString OPTIONAL ,
4537 database-reference VisibleString OPTIONAL ,
4538 feature-table SEQUENCE OF INSDFeature OPTIONAL ,
4539 feature-set SEQUENCE OF INSDFeatureSet OPTIONAL ,
4540 sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc.
4541 contig VisibleString OPTIONAL ,
4542 alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL
4543 }
4544
4545 INSDSeqid ::= VisibleString
4546
4547 INSDSecondary-accn ::= VisibleString
4548
4549 INSDKeyword ::= VisibleString
4550
4551 -- INSDReference_position contains a string value indicating the
4552 -- basepair span(s) to which a reference applies. The allowable
4553 -- formats are:
4554 --
4555 -- X..Y : Where X and Y are integers separated by two periods,
4556 -- X >= 1 , Y <= sequence length, and X <= Y
4557 --
4558 -- Multiple basepair spans can exist, separated by a
4559 -- semi-colon and a space. For example : 10..20; 100..500
4560 --
4561 -- sites : The string literal 'sites', indicating that a reference
4562 -- provides sequence annotation information, but the specific
4563 -- basepair spans are either not captured, or were too numerous
4564 -- to record.
4565 --
4566 -- The 'sites' literal string is singly occuring, and
4567 -- cannot be used in conjunction with any X..Y basepair spans.
4568 --
4569 -- References that lack an INSDReference_position element apply
4570 -- to the entire sequence.
4571
4572 INSDReference ::= SEQUENCE {
4573 reference VisibleString ,
4574 position VisibleString OPTIONAL ,
4575 authors SEQUENCE OF INSDAuthor OPTIONAL ,
4576 consortium VisibleString OPTIONAL ,
4577 title VisibleString OPTIONAL ,
4578 journal VisibleString ,
4579 xref SEQUENCE OF INSDXref OPTIONAL ,
4580 pubmed INTEGER OPTIONAL ,
4581 remark VisibleString OPTIONAL
4582 }
4583
4584 INSDAuthor ::= VisibleString
4585
4586 -- INSDXref provides a method for referring to records in
4587 -- other databases. INSDXref_dbname is a string value that
4588 -- provides the name of the database, and INSDXref_dbname
4589 -- is a string value that provides the record's identifier
4590 -- in that database.
4591
4592 INSDXref ::= SEQUENCE {
4593 dbname VisibleString ,
4594 id VisibleString
4595 }
4596
4597 INSDComment ::= SEQUENCE {
4598 type VisibleString OPTIONAL ,
4599 paragraphs SEQUENCE OF INSDCommentParagraph
4600 }
4601
4602 INSDCommentParagraph ::= SEQUENCE {
4603 items SEQUENCE OF INSDCommentItem
4604 }
4605
4606 INSDCommentItem ::= SEQUENCE {
4607 value VisibleString OPTIONAL ,
4608 url VisibleString OPTIONAL
4609 }
4610
4611 INSDStrucComment ::= SEQUENCE {
4612 name VisibleString OPTIONAL ,
4613 items SEQUENCE OF INSDStrucCommentItem
4614 }
4615
4616 INSDStrucCommentItem ::= SEQUENCE {
4617 tag VisibleString OPTIONAL ,
4618 value VisibleString OPTIONAL ,
4619 url VisibleString OPTIONAL
4620 }
4621
4622 -- INSDFeature_operator contains a string value describing
4623 -- the relationship among a set of INSDInterval within
4624 -- INSDFeature_intervals. The allowable formats are:
4625 --
4626 -- join : The string literal 'join' indicates that the
4627 -- INSDInterval intervals are biologically joined
4628 -- together into a contiguous molecule.
4629 --
4630 -- order : The string literal 'order' indicates that the
4631 -- INSDInterval intervals are in the presented
4632 -- order, but they are not necessarily contiguous.
4633 --
4634 -- Either 'join' or 'order' is required if INSDFeature_intervals
4635 -- is comprised of more than one INSDInterval .
4636
4637 INSDFeatureSet ::= SEQUENCE {
4638 annot-source VisibleString OPTIONAL ,
4639 features SEQUENCE OF INSDFeature
4640 }
4641
4642 INSDFeature ::= SEQUENCE {
4643 key VisibleString ,
4644 location VisibleString ,
4645 intervals SEQUENCE OF INSDInterval OPTIONAL ,
4646 operator VisibleString OPTIONAL ,
4647 partial5 BOOLEAN OPTIONAL ,
4648 partial3 BOOLEAN OPTIONAL ,
4649 quals SEQUENCE OF INSDQualifier OPTIONAL ,
4650 xrefs SEQUENCE OF INSDXref OPTIONAL
4651 }
4652
4653 -- INSDInterval_iscomp is a boolean indicating whether
4654 -- an INSDInterval_from / INSDInterval_to location
4655 -- represents a location on the complement strand.
4656 -- When INSDInterval_iscomp is TRUE, it essentially
4657 -- confirms that a 'from' value which is greater than
4658 -- a 'to' value is intentional, because the location
4659 -- is on the opposite strand of the presented sequence.
4660
4661 -- INSDInterval_interbp is a boolean indicating whether
4662 -- a feature (such as a restriction site) is located
4663 -- between two adjacent basepairs. When INSDInterval_iscomp
4664 -- is TRUE, the 'from' and 'to' values must differ by
4665 -- exactly one base.
4666
4667 INSDInterval ::= SEQUENCE {
4668 from INTEGER OPTIONAL ,
4669 to INTEGER OPTIONAL ,
4670 point INTEGER OPTIONAL ,
4671 iscomp BOOLEAN OPTIONAL ,
4672 interbp BOOLEAN OPTIONAL ,
4673 accession VisibleString
4674 }
4675
4676 INSDQualifier ::= SEQUENCE {
4677 name VisibleString ,
4678 value VisibleString OPTIONAL
4679 }
4680
4681 INSDAltSeqData ::= SEQUENCE {
4682 name VisibleString , -- e.g., CON-division-join, WGS-contig-range,
4683 -- WGS-scaffold-range, MGA/CAGE-range, genome
4684 items SEQUENCE OF INSDAltSeqItem OPTIONAL
4685 }
4686
4687 INSDAltSeqItem ::= SEQUENCE {
4688 interval INSDInterval OPTIONAL ,
4689 isgap BOOLEAN OPTIONAL ,
4690 gap-length INTEGER OPTIONAL ,
4691 gap-type VisibleString OPTIONAL ,
4692 gap-linkage VisibleString OPTIONAL ,
4693 gap-comment VisibleString OPTIONAL ,
4694 first-accn VisibleString OPTIONAL ,
4695 last-accn VisibleString OPTIONAL ,
4696 value VisibleString OPTIONAL
4697 }
4698
4699 END
4700
4701 --$Revision: 6.1 $
4702 --**********************************************************************
4703 --
4704 -- ASN.1 for a tiny Bioseq in XML
4705 -- basically a structured FASTA file with a few extras
4706 -- in this case we drop all modularity of components
4707 -- All ids are Optional - simpler structure, less checking
4708 -- Components of organism are hard coded - can't easily add or change
4709 -- sequence is just string whether DNA or protein
4710 -- by James Ostell, 2000
4711 --
4712 --**********************************************************************
4713
4714 NCBI-TSeq DEFINITIONS ::=
4715 BEGIN
4716
4717 TSeq ::= SEQUENCE {
4718 seqtype ENUMERATED {
4719 nucleotide (1),
4720 protein (2) },
4721 gi INTEGER OPTIONAL,
4722 accver VisibleString OPTIONAL,
4723 sid VisibleString OPTIONAL,
4724 local VisibleString OPTIONAL,
4725 taxid INTEGER OPTIONAL,
4726 orgname VisibleString OPTIONAL,
4727 defline VisibleString,
4728 length INTEGER,
4729 sequence VisibleString }
4730
4731 TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them
4732
4733 END
4734
4735 --$Id: scoremat.asn,v 1.14 2011/12/21 15:29:33 kazimird Exp $
4736 -- ===========================================================================
4737 --
4738 -- PUBLIC DOMAIN NOTICE
4739 -- National Center for Biotechnology Information
4740 --
4741 -- This software/database is a "United States Government Work" under the
4742 -- terms of the United States Copyright Act. It was written as part of
4743 -- the author's official duties as a United States Government employee and
4744 -- thus cannot be copyrighted. This software/database is freely available
4745 -- to the public for use. The National Library of Medicine and the U.S.
4746 -- Government have not placed any restriction on its use or reproduction.
4747 --
4748 -- Although all reasonable efforts have been taken to ensure the accuracy
4749 -- and reliability of the software and data, the NLM and the U.S.
4750 -- Government do not and cannot warrant the performance or results that
4751 -- may be obtained by using this software or data. The NLM and the U.S.
4752 -- Government disclaim all warranties, express or implied, including
4753 -- warranties of performance, merchantability or fitness for any particular
4754 -- purpose.
4755 --
4756 -- Please cite the author in any work or product based on this material.
4757 --
4758 -- ===========================================================================
4759 --
4760 -- Author: Christiam Camacho
4761 --
4762 -- File Description:
4763 -- ASN.1 definitions for scoring matrix
4764 --
4765 -- ===========================================================================
4766
4767 NCBI-ScoreMat DEFINITIONS ::= BEGIN
4768
4769 EXPORTS Pssm, PssmIntermediateData, PssmFinalData,
4770 PssmParameters, PssmWithParameters;
4771
4772 IMPORTS Object-id FROM NCBI-General
4773 Seq-entry FROM NCBI-Seqset;
4774
4775 -- a rudimentary block/core-model, to be used with block-based alignment
4776 -- routines and threading
4777
4778 BlockProperty ::= SEQUENCE {
4779 type INTEGER { unassigned (0),
4780 threshold (1), -- score threshold for heuristics
4781 minscore (2), -- observed minimum score in CD
4782 maxscore (3), -- observed maximum score in CD
4783 meanscore (4), -- observed mean score in CD
4784 variance (5), -- observed score variance
4785 name (10), -- just name the block
4786 is-optional(20), -- block may not have to be used
4787 other (255) },
4788 intvalue INTEGER OPTIONAL,
4789 textvalue VisibleString OPTIONAL
4790 }
4791
4792 CoreBlock ::= SEQUENCE {
4793 start INTEGER, -- begin of block on query
4794 stop INTEGER, -- end of block on query
4795 minstart INTEGER OPTIONAL, -- optional N-terminal extension
4796 maxstop INTEGER OPTIONAL, -- optional C-terminal extension
4797 property SEQUENCE OF BlockProperty OPTIONAL
4798 }
4799
4800 LoopConstraint ::= SEQUENCE {
4801 minlength INTEGER DEFAULT 0, -- minimum length of unaligned region
4802 maxlength INTEGER DEFAULT 100000 -- maximum length of unaligned region
4803 }
4804
4805 CoreDef ::= SEQUENCE {
4806 nblocks INTEGER, -- number of core elements/blocks
4807 blocks SEQUENCE OF CoreBlock, -- nblocks locations
4808 loops SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints
4809
4810 isDiscontinuous BOOLEAN OPTIONAL, -- is it a discontinuous domain
4811
4812 insertions SEQUENCE OF INTEGER OPTIONAL -- positions of long insertions
4813 }
4814
4815 Site-annot ::= SEQUENCE {
4816 startPosition INTEGER, -- location of the annotation,
4817 stopPosition INTEGER, -- start and stop position in the
4818 -- PSSM
4819
4820 description VisibleString OPTIONAL, -- holds description or names, that
4821 -- can be used for labels in
4822 -- visualization
4823
4824 type INTEGER OPTIONAL, -- type of the annotated feature,
4825 -- similarly to Align-annot in
4826 -- NCBI-Cdd
4827
4828 aliases SEQUENCE OF VisibleString OPTIONAL, -- additional names for
4829 -- the annotation
4830
4831 motif VisibleString OPTIONAL, -- motif to validate mapping of sites
4832
4833 motifuse INTEGER OPTIONAL -- 0 for validation
4834 -- 1 for motif in seqloc
4835 -- 2 for multiple motifs in seqloc
4836 }
4837
4838 Site-annot-set ::= SEQUENCE OF Site-annot
4839
4840 -- ===========================================================================
4841 -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
4842 -- ===========================================
4843 --
4844 -- Two possible inputs to PSI-BLAST and formatrpsdb:
4845 -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix
4846 -- of frequency ratios)
4847 -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of
4848 -- scores and statistical parameters) - such as written by cddumper
4849 --
4850 -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
4851 -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
4852 -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
4853 -- statistical parameters are used to perform the search in PSI-BLAST and the
4854 -- same data and the data in PssmWithParams::params::rpsdbparams is used to
4855 -- build the PSSM and ultimately the RPS-BLAST database
4856 --
4857 --
4858 -- reads ++++++++++++++ writes
4859 -- PssmWithParams ====> + PSI-BLAST + =====> PssmWithParams
4860 -- ++++++++++++++ | ^
4861 -- ^ | |
4862 -- | | |
4863 -- +===========================================+ |
4864 -- | |
4865 -- +===========================================+ |
4866 -- | |
4867 -- reads | |
4868 -- v |
4869 -- +++++++++++++++ writes +++++++++++++++++++++++ |
4870 -- | formatrpsdb | =====> | RPS-BLAST databases | |
4871 -- +++++++++++++++ +++++++++++++++++++++++ |
4872 -- ^ |
4873 -- | |
4874 -- | reads |
4875 -- +++++++++++++ |
4876 -- | RPS-BLAST | |
4877 -- +++++++++++++ |
4878 -- |
4879 -- reads ++++++++++++ writes |
4880 -- Cdd ======> | cddumper | =============================+
4881 -- ++++++++++++
4882 --
4883 -- ===========================================================================
4884
4885 -- Contains the PSSM's scores and its associated statistical parameters.
4886 -- Dimensions and order in which scores are stored must be the same as that
4887 -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
4888 PssmFinalData ::= SEQUENCE {
4889
4890 -- PSSM's scores
4891 scores SEQUENCE OF INTEGER,
4892
4893 -- Karlin & Altschul parameter produced during the PSSM's calculation
4894 lambda REAL,
4895
4896 -- Karlin & Altschul parameter produced during the PSSM's calculation
4897 kappa REAL,
4898
4899 -- Karlin & Altschul parameter produced during the PSSM's calculation
4900 h REAL,
4901
4902 -- scaling factor used to obtain more precision when building the PSSM.
4903 -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
4904 -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
4905 -- given a PSSM which contains a scaled-up PSSM (indicated by having a
4906 -- scalingFactor greater than 1), then it will scale down the PSSM to
4907 -- perform the initial stages of the search with it.
4908 -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided
4909 -- scaled-up PSSMs, it will ensure that all PSSMs used to build the
4910 -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST
4911 -- will silently produce incorrect results).
4912 scalingFactor INTEGER DEFAULT 1,
4913
4914 -- Karlin & Altschul parameter produced during the PSSM's calculation
4915 lambdaUngapped REAL OPTIONAL,
4916
4917 -- Karlin & Altschul parameter produced during the PSSM's calculation
4918 kappaUngapped REAL OPTIONAL,
4919
4920 -- Karlin & Altschul parameter produced during the PSSM's calculation
4921 hUngapped REAL OPTIONAL
4922 }
4923
4924 -- Contains the PSSM's intermediate data used to create the PSSM's scores
4925 -- and statistical parameters. Dimensions and order in which scores are
4926 -- stored must be the same as that specified in Pssm::numRows,
4927 -- Pssm::numColumns, and Pssm::byrow
4928 PssmIntermediateData ::= SEQUENCE {
4929
4930 -- observed residue frequencies (or counts) per position of the PSSM
4931 -- (prior to application of pseudocounts)
4932 resFreqsPerPos SEQUENCE OF INTEGER OPTIONAL,
4933
4934 -- Weighted observed residue frequencies per position of the PSSM.
4935 -- (N.B.: each position's weights should add up to 1.0).
4936 -- This field corresponds to f_i (f sub i) in equation 2 of
4937 -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4938 -- NOTE: this is needed for diagnostics information only (i.e.:
4939 -- -out_ascii_pssm option in psiblast)
4940 weightedResFreqsPerPos SEQUENCE OF REAL OPTIONAL,
4941
4942 -- PSSM's frequency ratios
4943 freqRatios SEQUENCE OF REAL,
4944
4945 -- Information content per position of the PSSM
4946 -- NOTE: this is needed for diagnostics information only (i.e.:
4947 -- -out_ascii_pssm option in psiblast)
4948 informationContent SEQUENCE OF REAL OPTIONAL,
4949
4950 -- Relative weight for columns of the PSSM without gaps to pseudocounts
4951 -- NOTE: this is needed for diagnostics information only (i.e.:
4952 -- -out_ascii_pssm option in psiblast)
4953 gaplessColumnWeights SEQUENCE OF REAL OPTIONAL,
4954
4955 -- Used in sequence weights computation
4956 -- NOTE: this is needed for diagnostics information only (i.e.:
4957 -- -out_ascii_pssm option in psiblast)
4958 sigma SEQUENCE OF REAL OPTIONAL,
4959
4960 -- Length of the aligned regions per position of the query sequence
4961 -- NOTE: this is needed for diagnostics information only (i.e.:
4962 -- -out_ascii_pssm option in psiblast)
4963 intervalSizes SEQUENCE OF INTEGER OPTIONAL,
4964
4965 -- Number of matching sequences per position of the PSSM (including the
4966 -- query)
4967 -- NOTE: this is needed for diagnostics information only (i.e.:
4968 -- -out_ascii_pssm option in psiblast)
4969 numMatchingSeqs SEQUENCE OF INTEGER OPTIONAL,
4970
4971 -- Number of independent observations per position of the PSSM
4972 -- NOTE: this is needed for building CDD database for DELTA-BLAST
4973 numIndeptObsr SEQUENCE OF REAL OPTIONAL
4974 }
4975
4976 -- Position-specific scoring matrix
4977 --
4978 -- Column indices on the PSSM refer to the positions corresponding to the
4979 -- query/master sequence, i.e. the number of columns (N) is the same
4980 -- as the length of the query/master sequence.
4981 -- Row indices refer to individual amino acid types, i.e. the number of
4982 -- rows (M) is the same as the number of different residues in the
4983 -- alphabet we use. Consequently, row labels are amino acid identifiers.
4984 --
4985 -- PSSMs are stored as linear arrays of integers. By default, we store
4986 -- them column-by-column, M values for the first column followed by M
4987 -- values for the second column, and so on. In order to provide
4988 -- flexibility for external applications, the boolean field "byrow" is
4989 -- provided to specify the storage order.
4990 Pssm ::= SEQUENCE {
4991
4992 -- Is the this a protein or nucleotide scoring matrix?
4993 isProtein BOOLEAN DEFAULT TRUE,
4994
4995 -- PSSM identifier
4996 identifier Object-id OPTIONAL,
4997
4998 -- The dimensions of the matrix are returned so the client can
4999 -- verify that all data was received.
5000
5001 numRows INTEGER, -- number of rows
5002 numColumns INTEGER, -- number of columns
5003
5004 -- row-labels is given to note the order of residue types so that it can
5005 -- be cross-checked between applications.
5006 -- If this field is not given, the matrix values are presented in
5007 -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
5008 -- for proteins the values returned correspond to
5009 -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
5010 rowLabels SEQUENCE OF VisibleString OPTIONAL,
5011
5012 -- are matrices stored row by row?
5013 byRow BOOLEAN DEFAULT FALSE,
5014
5015 -- PSSM representative sequence (master)
5016 query Seq-entry OPTIONAL,
5017
5018 -- both intermediateData and finalData can be provided, but at least one of
5019 -- them must be provided.
5020 -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData
5021 -- representation.
5022
5023 -- Intermediate or final data for the PSSM
5024 intermediateData PssmIntermediateData OPTIONAL,
5025
5026 -- Final representation for the PSSM
5027 finalData PssmFinalData OPTIONAL
5028 }
5029
5030 -- This structure is used to create the RPS-BLAST database auxiliary file
5031 -- (*.aux) and it contains parameters set at creation time of the PSSM.
5032 -- Also, the matrixName field is used by formatrpsdb to build a PSSM from
5033 -- a Pssm structure which only contains PssmIntermediateData.
5034 FormatRpsDbParameters ::= SEQUENCE {
5035
5036 -- name of the underlying score matrix whose frequency ratios were
5037 -- used in PSSM construction (e.g.: BLOSUM62)
5038 matrixName VisibleString,
5039
5040 -- gap opening penalty corresponding to the matrix above
5041 gapOpen INTEGER OPTIONAL,
5042
5043 -- gap extension penalty corresponding to the matrix above
5044 gapExtend INTEGER OPTIONAL
5045
5046 }
5047
5048 -- Populated by PSSM engine of PSI-BLAST, original source for these values
5049 -- are the PSI-BLAST options specified using the BLAST options API
5050 PssmParameters ::= SEQUENCE {
5051
5052 -- pseudocount constant used for PSSM. This field corresponds to beta in
5053 -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
5054 pseudocount INTEGER OPTIONAL,
5055
5056 -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
5057 -- populated by PSI-BLAST
5058 rpsdbparams FormatRpsDbParameters OPTIONAL,
5059
5060 -- alignment constraints needed by sequence-structure threader
5061 -- and other global or local block-alignment algorithms
5062 constraints CoreDef OPTIONAL,
5063
5064 -- bit score threshold for specific conserved domain hits
5065 bitScoreThresh REAL OPTIONAL,
5066
5067 -- conserved functional sites with annotations
5068 annotatedSites Site-annot-set OPTIONAL
5069 }
5070
5071 -- Envelope containing PSSM and the parameters used to create it.
5072 -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
5073 PssmWithParameters ::= SEQUENCE {
5074
5075 -- This field is applicable to PSI-BLAST and formatrpsdb.
5076 -- When both the intermediate and final PSSM data are provided in this
5077 -- field, the final data (matrix of scores and associated statistical
5078 -- parameters) takes precedence and that data is used for further
5079 -- processing. The rationale for this is that the PSSM's scores and
5080 -- statistical parameters might have been calculated by other applications
5081 -- and it might not be possible to recreate it by using PSI-BLAST's PSSM
5082 -- engine.
5083 pssm Pssm,
5084
5085 -- This field's rpsdbparams is used to specify the values of options
5086 -- for processing by formatrpsdb. If these are not set, the command
5087 -- line defaults of formatrpsdb are applied. This field is used
5088 -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
5089 -- the PSSM is the same as the one being specified through the BLAST
5090 -- Options API. If this field is omitted, no verification will be
5091 -- performed, so be careful to keep track of what matrix was used to build
5092 -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
5093 params PssmParameters OPTIONAL
5094 }
5095
5096 END
5097 --$Revision: 1.160 $
5098 --**********************************************************************
5099 --
5100 -- NCBI ASN.1 macro editing language specifications
5101 --
5102 -- by Colleen Bollin, 2007
5103 --
5104 --**********************************************************************
5105
5106 NCBI-Macro DEFINITIONS ::=
5107 BEGIN
5108
5109 EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set;
5110
5111 -- simple constraints --
5112
5113 String-location ::= ENUMERATED {
5114 contains (1) ,
5115 equals (2) ,
5116 starts (3) ,
5117 ends (4) ,
5118 inlist (5) }
5119
5120 Word-substitution ::= SEQUENCE {
5121 word VisibleString OPTIONAL ,
5122 synonyms SET OF VisibleString OPTIONAL ,
5123 case-sensitive BOOLEAN DEFAULT FALSE ,
5124 whole-word BOOLEAN DEFAULT FALSE }
5125
5126 Word-substitution-set ::= SET OF Word-substitution
5127
5128 String-constraint ::= SEQUENCE {
5129 match-text VisibleString OPTIONAL ,
5130 match-location String-location DEFAULT contains ,
5131 case-sensitive BOOLEAN DEFAULT FALSE ,
5132 ignore-space BOOLEAN DEFAULT FALSE ,
5133 ignore-punct BOOLEAN DEFAULT FALSE ,
5134 ignore-words Word-substitution-set OPTIONAL ,
5135 whole-word BOOLEAN DEFAULT FALSE ,
5136 not-present BOOLEAN DEFAULT FALSE ,
5137 is-all-caps BOOLEAN DEFAULT FALSE ,
5138 is-all-lower BOOLEAN DEFAULT FALSE ,
5139 is-all-punct BOOLEAN DEFAULT FALSE ,
5140 ignore-weasel BOOLEAN DEFAULT FALSE }
5141
5142 String-constraint-set ::= SET OF String-constraint
5143
5144 Strand-constraint ::= ENUMERATED {
5145 any (0) ,
5146 plus (1) ,
5147 minus (2) }
5148
5149 Seqtype-constraint ::= ENUMERATED {
5150 any (0) ,
5151 nuc (1) ,
5152 prot (2) }
5153
5154 Partial-constraint ::= ENUMERATED {
5155 either (0) ,
5156 partial (1) ,
5157 complete (2) }
5158
5159 Location-type-constraint ::= ENUMERATED {
5160 any (0) ,
5161 single-interval (1) ,
5162 joined (2) ,
5163 ordered (3) }
5164
5165 Location-pos-constraint ::= CHOICE {
5166 dist-from-end INTEGER ,
5167 max-dist-from-end INTEGER ,
5168 min-dist-from-end INTEGER }
5169
5170 Location-constraint ::= SEQUENCE {
5171 strand Strand-constraint DEFAULT any ,
5172 seq-type Seqtype-constraint DEFAULT any ,
5173 partial5 Partial-constraint DEFAULT either ,
5174 partial3 Partial-constraint DEFAULT either ,
5175 location-type Location-type-constraint DEFAULT any ,
5176 end5 Location-pos-constraint OPTIONAL ,
5177 end3 Location-pos-constraint OPTIONAL }
5178
5179 Object-type-constraint ::= ENUMERATED {
5180 any (0) ,
5181 feature (1) ,
5182 descriptor (2) }
5183
5184
5185 -- feature values --
5186
5187 Macro-feature-type ::= ENUMERATED {
5188 any (0) ,
5189 gene (1) ,
5190 org (2) ,
5191 cds (3) ,
5192 prot (4) ,
5193 preRNA (5) ,
5194 mRNA (6) ,
5195 tRNA (7) ,
5196 rRNA (8) ,
5197 snRNA (9) ,
5198 scRNA (10) ,
5199 otherRNA (11) ,
5200 pub (12) ,
5201 seq (13) ,
5202 imp (14) ,
5203 allele (15) ,
5204 attenuator (16) ,
5205 c-region (17) ,
5206 caat-signal (18) ,
5207 imp-CDS (19) ,
5208 conflict (20) ,
5209 d-loop (21) ,
5210 d-segment (22) ,
5211 enhancer (23) ,
5212 exon (24) ,
5213 gC-signal (25) ,
5214 iDNA (26) ,
5215 intron (27) ,
5216 j-segment (28) ,
5217 ltr (29) ,
5218 mat-peptide (30) ,
5219 misc-binding (31) ,
5220 misc-difference (32) ,
5221 misc-feature (33) ,
5222 misc-recomb (34) ,
5223 misc-RNA (35) ,
5224 misc-signal (36) ,
5225 misc-structure (37) ,
5226 modified-base (38) ,
5227 mutation (39) ,
5228 n-region (40) ,
5229 old-sequence (41) ,
5230 polyA-signal (42) ,
5231 polyA-site (43) ,
5232 precursor-RNA (44) ,
5233 prim-transcript (45) ,
5234 primer-bind (46) ,
5235 promoter (47) ,
5236 protein-bind (48) ,
5237 rbs (49) ,
5238 repeat-region (50) ,
5239 rep-origin (51) ,
5240 s-region (52) ,
5241 sig-peptide (53) ,
5242 source (54) ,
5243 stem-loop (55) ,
5244 sts (56) ,
5245 tata-signal (57) ,
5246 terminator (58) ,
5247 transit-peptide (59) ,
5248 unsure (60) ,
5249 v-region (61) ,
5250 v-segment (62) ,
5251 variation (63) ,
5252 virion (64) ,
5253 n3clip (65) ,
5254 n3UTR (66) ,
5255 n5clip (67) ,
5256 n5UTR (68) ,
5257 n10-signal (69) ,
5258 n35-signal (70) ,
5259 site-ref (71) ,
5260 region (72) ,
5261 comment (73) ,
5262 bond (74) ,
5263 site (75) ,
5264 rsite (76) ,
5265 user (77) ,
5266 txinit (78) ,
5267 num (79) ,
5268 psec-str (80) ,
5269 non-std-residue (81) ,
5270 het (82) ,
5271 biosrc (83) ,
5272 preprotein (84) ,
5273 mat-peptide-aa (85) ,
5274 sig-peptide-aa (86) ,
5275 transit-peptide-aa (87) ,
5276 snoRNA (88) ,
5277 gap (89) ,
5278 operon (90) ,
5279 oriT (91) ,
5280 ncRNA (92) ,
5281 tmRNA (93) ,
5282 mobile-element (94) }
5283
5284 Feat-qual-legal ::= ENUMERATED {
5285 allele (1) ,
5286 activity (2) ,
5287 anticodon (3) ,
5288 bound-moiety (4) ,
5289 chromosome (5),
5290 citation (6),
5291 codon (7) ,
5292 codon-start (8) ,
5293 codons-recognized (9) ,
5294 compare (10) ,
5295 cons-splice (11) ,
5296 db-xref (12) ,
5297 description (13) ,
5298 direction (14) ,
5299 ec-number (15) ,
5300 environmental-sample (16) ,
5301 evidence (17) ,
5302 exception (18) ,
5303 experiment (19) ,
5304 focus (20) ,
5305 frequency (21) ,
5306 function (22) ,
5307 gene (23) ,
5308 gene-description (24) ,
5309 inference (25) ,
5310 label (26) ,
5311 locus-tag (27) ,
5312 map (28) ,
5313 mobile-element (29) ,
5314 mod-base (30) ,
5315 mol-type (31) ,
5316 ncRNA-class (32) ,
5317 note (33) ,
5318 number (34) ,
5319 old-locus-tag (35) ,
5320 operon (36) ,
5321 organism (37) ,
5322 organelle (38) ,
5323 partial (39) ,
5324 phenotype (40) ,
5325 plasmid (41) ,
5326 product (42) ,
5327 protein-id (43) ,
5328 pseudo (44) ,
5329 rearranged (45) ,
5330 replace (46) ,
5331 rpt-family (47) ,
5332 rpt-type (48) ,
5333 rpt-unit (49) ,
5334 rpt-unit-seq (50) ,
5335 rpt-unit-range (51) ,
5336 segment (52) ,
5337 sequenced-mol (53) ,
5338 standard-name (54) ,
5339 synonym (55) ,
5340 transcript-id (56) ,
5341 transgenic (57) ,
5342 translation (58) ,
5343 transl-except (59) ,
5344 transl-table (60) ,
5345 usedin (61),
5346 mobile-element-type (62),
5347 mobile-element-name (63),
5348 gene-comment (64) ,
5349 satellite (65) ,
5350 satellite-type (66) ,
5351 satellite-name (67) ,
5352 location (68) ,
5353 tag-peptide (69) ,
5354 mobile-element-type-type (70) ,
5355 name (71) ,
5356 pcr-conditions (72) }
5357
5358 Feat-qual-legal-val ::= SEQUENCE {
5359 qual Feat-qual-legal ,
5360 val VisibleString }
5361
5362 Feat-qual-legal-val-choice ::= CHOICE {
5363 qual Feat-qual-legal-val }
5364
5365 Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice
5366
5367 Feat-qual-choice ::= CHOICE {
5368 legal-qual Feat-qual-legal ,
5369 illegal-qual String-constraint }
5370
5371 Feature-field ::= SEQUENCE {
5372 type Macro-feature-type ,
5373 field Feat-qual-choice }
5374
5375 Feature-field-legal ::= SEQUENCE {
5376 type Macro-feature-type ,
5377 field Feat-qual-legal }
5378
5379 Feature-field-pair ::= SEQUENCE {
5380 type Macro-feature-type ,
5381 field-from Feat-qual-choice ,
5382 field-to Feat-qual-choice }
5383
5384 Rna-feat-type ::= CHOICE {
5385 any NULL ,
5386 preRNA NULL ,
5387 mRNA NULL ,
5388 tRNA NULL ,
5389 rRNA NULL ,
5390 ncRNA VisibleString ,
5391 tmRNA NULL,
5392 miscRNA NULL }
5393
5394 Rna-field ::= ENUMERATED {
5395 product (1) ,
5396 comment (2) ,
5397 codons-recognized (3) ,
5398 ncrna-class (4) ,
5399 anticodon (5) ,
5400 transcript-id (6) ,
5401 gene-locus (7) ,
5402 gene-description (8) ,
5403 gene-maploc (9) ,
5404 gene-locus-tag (10) ,
5405 gene-synonym (11) ,
5406 gene-comment (12) ,
5407 tag-peptide (13) }
5408
5409
5410 Rna-qual ::= SEQUENCE {
5411 type Rna-feat-type ,
5412 field Rna-field }
5413
5414 Rna-qual-pair ::= SEQUENCE {
5415 type Rna-feat-type ,
5416 field-from Rna-field ,
5417 field-to Rna-field }
5418
5419 Source-qual ::= ENUMERATED {
5420 acronym (1) ,
5421 anamorph (2) ,
5422 authority (3) ,
5423 bio-material (4) ,
5424 biotype (5) ,
5425 biovar (6) ,
5426 breed (7) ,
5427 cell-line (8) ,
5428 cell-type (9) ,
5429 chemovar (10) ,
5430 chromosome (11) ,
5431 clone (12) ,
5432 clone-lib (13) ,
5433 collected-by (14) ,
5434 collection-date (15) ,
5435 common (16) ,
5436 common-name (17) ,
5437 country (18) ,
5438 cultivar (19) ,
5439 culture-collection (20) ,
5440 dev-stage (21) ,
5441 division (22) ,
5442 dosage (23) ,
5443 ecotype (24) ,
5444 endogenous-virus-name (25) ,
5445 environmental-sample (26) ,
5446 forma (27) ,
5447 forma-specialis (28) ,
5448 frequency (29) ,
5449 fwd-primer-name (30) ,
5450 fwd-primer-seq (31) ,
5451 gb-acronym (32) ,
5452 gb-anamorph (33) ,
5453 gb-synonym (34) ,
5454 genotype (35) ,
5455 germline (36) ,
5456 group (37) ,
5457 haplotype (38) ,
5458 identified-by (39) ,
5459 insertion-seq-name (40) ,
5460 isolate (41) ,
5461 isolation-source (42) ,
5462 lab-host (43) ,
5463 lat-lon (44) ,
5464 lineage (45) ,
5465 map (46) ,
5466 metagenome-source (47) ,
5467 metagenomic (48) ,
5468 old-lineage (49) ,
5469 old-name (50) ,
5470 orgmod-note (51) ,
5471 nat-host (52) ,
5472 pathovar (53) ,
5473 plasmid-name (54) ,
5474 plastid-name (55) ,
5475 pop-variant (56) ,
5476 rearranged (57) ,
5477 rev-primer-name (58) ,
5478 rev-primer-seq (59) ,
5479 segment (60) ,
5480 serogroup (61) ,
5481 serotype (62) ,
5482 serovar (63) ,
5483 sex (64) ,
5484 specimen-voucher (65) ,
5485 strain (66) ,
5486 subclone (67) ,
5487 subgroup (68) ,
5488 subsource-note (69),
5489 sub-species (70) ,
5490 substrain (71) ,
5491 subtype (72) ,
5492 synonym (73) ,
5493 taxname (74) ,
5494 teleomorph (75) ,
5495 tissue-lib (76) ,
5496 tissue-type (77) ,
5497 transgenic (78) ,
5498 transposon-name (79) ,
5499 type (80) ,
5500 variety (81) ,
5501 specimen-voucher-INST (82) ,
5502 specimen-voucher-COLL (83) ,
5503 specimen-voucher-SpecID (84) ,
5504 culture-collection-INST (85) ,
5505 culture-collection-COLL (86) ,
5506 culture-collection-SpecID (87) ,
5507 bio-material-INST (88) ,
5508 bio-material-COLL (89) ,
5509 bio-material-SpecID (90),
5510 all-notes (91),
5511 mating-type (92),
5512 linkage-group (93) ,
5513 haplogroup (94),
5514 all-quals (95),
5515 dbxref (96) ,
5516 taxid (97) ,
5517 all-primers (98) ,
5518 altitude (99)
5519 }
5520
5521 Source-qual-pair ::= SEQUENCE {
5522 field-from Source-qual ,
5523 field-to Source-qual }
5524
5525 Source-location ::= ENUMERATED {
5526 unknown (0) ,
5527 genomic (1) ,
5528 chloroplast (2) ,
5529 chromoplast (3) ,
5530 kinetoplast (4) ,
5531 mitochondrion (5) ,
5532 plastid (6) ,
5533 macronuclear (7) ,
5534 extrachrom (8) ,
5535 plasmid (9) ,
5536 transposon (10) ,
5537 insertion-seq (11) ,
5538 cyanelle (12) ,
5539 proviral (13) ,
5540 virion (14) ,
5541 nucleomorph (15) ,
5542 apicoplast (16) ,
5543 leucoplast (17) ,
5544 proplastid (18) ,
5545 endogenous-virus (19) ,
5546 hydrogenosome (20) ,
5547 chromosome (21) ,
5548 chromatophore (22) }
5549
5550 Source-origin ::= ENUMERATED {
5551 unknown (0) ,
5552 natural (1) ,
5553 natmut (2) ,
5554 mut (3) ,
5555 artificial (4) ,
5556 synthetic (5) ,
5557 other (255) }
5558
5559 Source-qual-choice ::= CHOICE {
5560 textqual Source-qual ,
5561 location Source-location,
5562 origin Source-origin ,
5563 gcode INTEGER ,
5564 mgcode INTEGER }
5565
5566 Source-qual-text-val ::= SEQUENCE {
5567 srcqual Source-qual ,
5568 val VisibleString }
5569
5570 Source-qual-val-choice ::= CHOICE {
5571 textqual Source-qual-text-val ,
5572 location Source-location,
5573 origin Source-origin ,
5574 gcode INTEGER ,
5575 mgcode INTEGER }
5576
5577 Source-qual-val-set ::= SET OF Source-qual-val-choice
5578
5579 CDSGeneProt-field ::= ENUMERATED {
5580 cds-comment (1) ,
5581 gene-locus (2) ,
5582 gene-description (3) ,
5583 gene-comment (4) ,
5584 gene-allele (5) ,
5585 gene-maploc (6) ,
5586 gene-locus-tag (7) ,
5587 gene-synonym (8) ,
5588 gene-old-locus-tag (9) ,
5589 mrna-product (10) ,
5590 mrna-comment (11) ,
5591 prot-name (12) ,
5592 prot-description (13) ,
5593 prot-ec-number (14) ,
5594 prot-activity (15) ,
5595 prot-comment (16) ,
5596 mat-peptide-name (17) ,
5597 mat-peptide-description (18) ,
5598 mat-peptide-ec-number (19) ,
5599 mat-peptide-activity (20) ,
5600 mat-peptide-comment (21) ,
5601 cds-inference (22) ,
5602 gene-inference (23) ,
5603 codon-start (24) }
5604
5605 CDSGeneProt-field-pair ::= SEQUENCE {
5606 field-from CDSGeneProt-field ,
5607 field-to CDSGeneProt-field }
5608
5609 Molecule-type ::= ENUMERATED {
5610 unknown (0) ,
5611 genomic (1) ,
5612 precursor-RNA (2) ,
5613 mRNA (3) ,
5614 rRNA (4) ,
5615 tRNA (5) ,
5616 genomic-mRNA (6) ,
5617 cRNA (7) ,
5618 transcribed-RNA (8) ,
5619 ncRNA (9) ,
5620 transfer-messenger-RNA (10) ,
5621 macro-other (11) }
5622
5623 Technique-type ::= ENUMERATED {
5624 unknown (0) ,
5625 standard (1) ,
5626 est (2) ,
5627 sts (3) ,
5628 survey (4) ,
5629 genetic-map (5) ,
5630 physical-map (6) ,
5631 derived (7) ,
5632 concept-trans (8) ,
5633 seq-pept (9) ,
5634 both (10) ,
5635 seq-pept-overlap (11) ,
5636 seq-pept-homol (12) ,
5637 concept-trans-a (13) ,
5638 htgs-1 (14) ,
5639 htgs-2 (15) ,
5640 htgs-3 (16) ,
5641 fli-cDNA (17) ,
5642 htgs-0 (18) ,
5643 htc (19) ,
5644 wgs (20) ,
5645 barcode (21) ,
5646 composite-wgs-htgs (22) ,
5647 tsa (23) ,
5648 other (24) }
5649
5650 Completedness-type ::= ENUMERATED {
5651 unknown (0) ,
5652 complete (1) ,
5653 partial (2) ,
5654 no-left (3) ,
5655 no-right (4) ,
5656 no-ends (5) ,
5657 has-left (6) ,
5658 has-right (7) ,
5659 other (6) }
5660
5661 Molecule-class-type ::= ENUMERATED {
5662 unknown (0) ,
5663 dna (1) ,
5664 rna (2) ,
5665 protein (3) ,
5666 nucleotide (4),
5667 other (5) }
5668
5669 Topology-type ::= ENUMERATED {
5670 unknown (0) ,
5671 linear (1) ,
5672 circular (2) ,
5673 tandem (3) ,
5674 other (4) }
5675
5676 Strand-type ::= ENUMERATED {
5677 unknown (0) ,
5678 single (1) ,
5679 double (2) ,
5680 mixed (3) ,
5681 mixed-rev (4) ,
5682 other (5) }
5683
5684 Molinfo-field ::= CHOICE {
5685 molecule Molecule-type ,
5686 technique Technique-type ,
5687 completedness Completedness-type ,
5688 mol-class Molecule-class-type ,
5689 topology Topology-type ,
5690 strand Strand-type }
5691
5692 Molinfo-molecule-pair ::= SEQUENCE {
5693 from Molecule-type ,
5694 to Molecule-type }
5695
5696 Molinfo-technique-pair ::= SEQUENCE {
5697 from Technique-type ,
5698 to Technique-type }
5699
5700 Molinfo-completedness-pair ::= SEQUENCE {
5701 from Completedness-type ,
5702 to Completedness-type }
5703
5704 Molinfo-mol-class-pair ::= SEQUENCE {
5705 from Molecule-class-type ,
5706 to Molecule-class-type }
5707
5708 Molinfo-topology-pair ::= SEQUENCE {
5709 from Topology-type ,
5710 to Topology-type }
5711
5712 Molinfo-strand-pair ::= SEQUENCE {
5713 from Strand-type ,
5714 to Strand-type }
5715
5716 Molinfo-field-pair ::= CHOICE {
5717 molecule Molinfo-molecule-pair ,
5718 technique Molinfo-technique-pair ,
5719 completedness Molinfo-completedness-pair ,
5720 mol-class Molinfo-mol-class-pair ,
5721 topology Molinfo-topology-pair ,
5722 strand Molinfo-strand-pair }
5723
5724 Molinfo-field-list ::= SET OF Molinfo-field
5725
5726 Molinfo-field-constraint ::= SEQUENCE {
5727 field Molinfo-field ,
5728 is-not BOOLEAN DEFAULT FALSE }
5729
5730 -- publication fields --
5731
5732 Publication-field ::= ENUMERATED {
5733 cit (1) ,
5734 authors (2) ,
5735 journal (3) ,
5736 volume (4) ,
5737 issue (5) ,
5738 pages (6) ,
5739 date (7) ,
5740 serial-number (8) ,
5741 title (9) ,
5742 affiliation (10) ,
5743 affil-div (11) ,
5744 affil-city (12) ,
5745 affil-sub (13) ,
5746 affil-country (14) ,
5747 affil-street (15) ,
5748 affil-email (16) ,
5749 affil-fax (17) ,
5750 affil-phone (18) ,
5751 affil-zipcode (19),
5752 authors-initials (20),
5753 pmid (21),
5754 pub-class (22)
5755 }
5756
5757 -- structured comment fields --
5758
5759 Structured-comment-field ::= CHOICE {
5760 database NULL ,
5761 named VisibleString ,
5762 field-name NULL
5763 }
5764
5765 Structured-comment-field-pair ::= SEQUENCE {
5766 from Structured-comment-field ,
5767 to Structured-comment-field
5768 }
5769
5770 -- misc fields --
5771 -- these would not appear in pairs --
5772 Misc-field ::= ENUMERATED {
5773 genome-project-id (1) ,
5774 comment-descriptor (2) ,
5775 defline (3) ,
5776 keyword (4)
5777 }
5778
5779 -- dblink fields --
5780 DBLink-field-type ::= ENUMERATED {
5781 trace-assembly (1) ,
5782 bio-sample (2) ,
5783 probe-db (3) ,
5784 sequence-read-archve (4) ,
5785 bio-project (5) ,
5786 assembly (6) }
5787
5788 DBLink-field-pair ::= SEQUENCE {
5789 from DBLink-field-type ,
5790 to DBLink-field-type
5791 }
5792
5793 -- complex constraints --
5794
5795 Pub-type ::= ENUMERATED {
5796 any (0) ,
5797 published (1) ,
5798 unpublished (2) ,
5799 in-press (3) ,
5800 submitter-block (4) }
5801
5802 Pub-field-constraint ::= SEQUENCE {
5803 field Publication-field ,
5804 constraint String-constraint }
5805
5806 Pub-field-special-constraint-type ::= CHOICE {
5807 is-present NULL ,
5808 is-not-present NULL ,
5809 is-all-caps NULL ,
5810 is-all-lower NULL ,
5811 is-all-punct NULL }
5812
5813 Pub-field-special-constraint ::= SEQUENCE {
5814 field Publication-field ,
5815 constraint Pub-field-special-constraint-type }
5816
5817 Publication-constraint ::= SEQUENCE {
5818 type Pub-type ,
5819 field Pub-field-constraint OPTIONAL ,
5820 special-field Pub-field-special-constraint OPTIONAL }
5821
5822 Source-constraint ::= SEQUENCE {
5823 field1 Source-qual-choice OPTIONAL ,
5824 field2 Source-qual-choice OPTIONAL ,
5825 constraint String-constraint OPTIONAL ,
5826 type-constraint Object-type-constraint OPTIONAL }
5827
5828 CDSGeneProt-feature-type-constraint ::= ENUMERATED {
5829 gene (1) ,
5830 mRNA (2) ,
5831 cds (3) ,
5832 prot (4) ,
5833 exon (5) ,
5834 mat-peptide (6) }
5835
5836 CDSGeneProt-pseudo-constraint ::= SEQUENCE {
5837 feature CDSGeneProt-feature-type-constraint ,
5838 is-pseudo BOOLEAN DEFAULT TRUE }
5839
5840 CDSGeneProt-constraint-field ::= CHOICE {
5841 field CDSGeneProt-field }
5842
5843 CDSGeneProt-qual-constraint ::= SEQUENCE {
5844 field1 CDSGeneProt-constraint-field OPTIONAL ,
5845 field2 CDSGeneProt-constraint-field OPTIONAL ,
5846 constraint String-constraint OPTIONAL }
5847
5848 Field-constraint ::= SEQUENCE {
5849 field Field-type ,
5850 string-constraint String-constraint }
5851
5852 Sequence-constraint-rnamol ::= ENUMERATED {
5853 any (0) ,
5854 genomic (1) ,
5855 precursor-RNA (2) ,
5856 mRNA (3) ,
5857 rRNA (4) ,
5858 tRNA (5) ,
5859 genomic-mRNA (6) ,
5860 cRNA (7) ,
5861 transcribed-RNA (8) ,
5862 ncRNA (9) ,
5863 transfer-messenger-RNA (10) }
5864
5865 Sequence-constraint-mol-type-constraint ::= CHOICE {
5866 any NULL ,
5867 nucleotide NULL ,
5868 dna NULL ,
5869 rna Sequence-constraint-rnamol ,
5870 protein NULL }
5871
5872 Quantity-constraint ::= CHOICE {
5873 equals INTEGER ,
5874 greater-than INTEGER ,
5875 less-than INTEGER }
5876
5877 Feature-strandedness-constraint ::= ENUMERATED {
5878 any (0) ,
5879 minus-only (1) ,
5880 plus-only (2) ,
5881 at-least-one-minus (3) ,
5882 at-least-one-plus (4) ,
5883 no-minus (5) ,
5884 no-plus (6) }
5885
5886 Sequence-constraint ::= SEQUENCE {
5887 seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
5888 id String-constraint OPTIONAL ,
5889 feature Macro-feature-type ,
5890 num-type-features Quantity-constraint OPTIONAL ,
5891 num-features Quantity-constraint OPTIONAL ,
5892 length Quantity-constraint OPTIONAL ,
5893 strandedness Feature-strandedness-constraint DEFAULT any }
5894
5895 Match-type-constraint ::= ENUMERATED {
5896 dont-care (0) ,
5897 yes (1) ,
5898 no (2) }
5899
5900 Translation-constraint ::= SEQUENCE {
5901 actual-strings String-constraint-set ,
5902 transl-strings String-constraint-set ,
5903 internal-stops Match-type-constraint DEFAULT dont-care ,
5904 num-mismatches Quantity-constraint OPTIONAL }
5905
5906 Constraint-choice ::= CHOICE {
5907 string String-constraint ,
5908 location Location-constraint ,
5909 field Field-constraint ,
5910 source Source-constraint ,
5911 cdsgeneprot-qual CDSGeneProt-qual-constraint ,
5912 cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
5913 sequence Sequence-constraint ,
5914 pub Publication-constraint ,
5915 molinfo Molinfo-field-constraint ,
5916 field-missing Field-type ,
5917 translation Translation-constraint }
5918
5919 Constraint-choice-set ::= SET OF Constraint-choice
5920
5921 Text-marker ::= CHOICE {
5922 free-text VisibleString ,
5923 digits NULL ,
5924 letters NULL }
5925
5926 Text-portion ::= SEQUENCE {
5927 left-marker Text-marker OPTIONAL ,
5928 include-left BOOLEAN ,
5929 right-marker Text-marker OPTIONAL ,
5930 include-right BOOLEAN ,
5931 inside BOOLEAN ,
5932 case-sensitive BOOLEAN DEFAULT FALSE ,
5933 whole-word BOOLEAN DEFAULT FALSE }
5934
5935 Field-edit-location ::= ENUMERATED {
5936 anywhere (0) ,
5937 beginning (1) ,
5938 end (2) }
5939
5940 Field-edit ::= SEQUENCE {
5941 find-txt VisibleString ,
5942 repl-txt VisibleString OPTIONAL ,
5943 location Field-edit-location DEFAULT anywhere ,
5944 case-insensitive BOOLEAN DEFAULT FALSE }
5945
5946 Field-type ::= CHOICE {
5947 source-qual Source-qual-choice ,
5948 feature-field Feature-field ,
5949 rna-field Rna-qual ,
5950 cds-gene-prot CDSGeneProt-field ,
5951 molinfo-field Molinfo-field ,
5952 pub Publication-field ,
5953 struc-comment-field Structured-comment-field ,
5954 misc Misc-field ,
5955 dblink DBLink-field-type }
5956
5957 Field-pair-type ::= CHOICE {
5958 source-qual Source-qual-pair ,
5959 feature-field Feature-field-pair ,
5960 rna-field Rna-qual-pair ,
5961 cds-gene-prot CDSGeneProt-field-pair ,
5962 molinfo-field Molinfo-field-pair ,
5963 struc-comment-field Structured-comment-field-pair ,
5964 dblink DBLink-field-pair}
5965
5966 ExistingTextOption ::= ENUMERATED {
5967 replace-old (1) ,
5968 append-semi (2) ,
5969 append-space (3) ,
5970 append-colon (4) ,
5971 append-comma (5) ,
5972 append-none (6) ,
5973 prefix-semi (7) ,
5974 prefix-space (8) ,
5975 prefix-colon (9) ,
5976 prefix-comma (10) ,
5977 prefix-none (11) ,
5978 leave-old (12) ,
5979 add-qual (13) }
5980
5981
5982 Apply-action ::= SEQUENCE {
5983 field Field-type ,
5984 value VisibleString ,
5985 existing-text ExistingTextOption }
5986
5987 Edit-action ::= SEQUENCE {
5988 edit Field-edit ,
5989 field Field-type }
5990
5991 Cap-change ::= ENUMERATED {
5992 none (0) ,
5993 tolower (1) ,
5994 toupper (2) ,
5995 firstcap (3) ,
5996 firstcaprestnochange (4) ,
5997 firstlower-restnochange (5) ,
5998 cap-word-space (6) ,
5999 cap-word-space-punc (7)
6000 }
6001
6002 Text-transform ::= CHOICE {
6003 edit Field-edit ,
6004 caps Cap-change ,
6005 remove Text-portion }
6006
6007 Text-transform-set ::= SET OF Text-transform
6008
6009 Convert-action ::= SEQUENCE {
6010 fields Field-pair-type ,
6011 strip-name BOOLEAN DEFAULT FALSE ,
6012 keep-original BOOLEAN DEFAULT FALSE ,
6013 capitalization Cap-change DEFAULT none ,
6014 existing-text ExistingTextOption }
6015
6016 Copy-action ::= SEQUENCE {
6017 fields Field-pair-type ,
6018 existing-text ExistingTextOption }
6019
6020 Swap-action ::= SEQUENCE {
6021 fields Field-pair-type }
6022
6023 AECRParse-action ::= SEQUENCE {
6024 portion Text-portion ,
6025 fields Field-pair-type ,
6026 remove-from-parsed BOOLEAN DEFAULT FALSE ,
6027 remove-left BOOLEAN DEFAULT FALSE ,
6028 remove-right BOOLEAN DEFAULT FALSE ,
6029 transform Text-transform-set OPTIONAL ,
6030 existing-text ExistingTextOption }
6031
6032 Remove-action ::= SEQUENCE {
6033 field Field-type }
6034
6035 Remove-outside-action ::= SEQUENCE {
6036 portion Text-portion ,
6037 field Field-type ,
6038 remove-if-not-found BOOLEAN DEFAULT FALSE }
6039
6040 Action-choice ::= CHOICE {
6041 apply Apply-action ,
6042 edit Edit-action ,
6043 convert Convert-action ,
6044 copy Copy-action ,
6045 swap Swap-action ,
6046 remove Remove-action ,
6047 parse AECRParse-action ,
6048 remove-outside Remove-outside-action }
6049
6050 AECR-action ::= SEQUENCE {
6051 action Action-choice ,
6052 also-change-mrna BOOLEAN DEFAULT FALSE ,
6053 constraint Constraint-choice-set OPTIONAL }
6054
6055 Parse-src-org-choice ::= CHOICE {
6056 source-qual Source-qual ,
6057 taxname-after-binomial NULL }
6058
6059 Parse-src-org ::= SEQUENCE {
6060 field Parse-src-org-choice ,
6061 type Object-type-constraint DEFAULT any }
6062
6063 -- For Parse-src-general-id tag, specify the db of the id from which you
6064 -- want to retrieve the tag. If empty or null, any db will do.
6065 Parse-src-general-id ::= CHOICE {
6066 whole-text NULL ,
6067 db NULL ,
6068 tag VisibleString }
6069
6070 Parse-src ::= CHOICE {
6071 defline NULL ,
6072 flatfile NULL ,
6073 local-id NULL ,
6074 org Parse-src-org ,
6075 comment NULL ,
6076 bankit-comment NULL ,
6077 structured-comment VisibleString ,
6078 file-id NULL ,
6079 general-id Parse-src-general-id }
6080
6081 Parse-dst-org ::= SEQUENCE {
6082 field Source-qual-choice ,
6083 type Object-type-constraint DEFAULT any }
6084
6085 Parse-dest ::= CHOICE {
6086 defline NULL ,
6087 org Parse-dst-org ,
6088 featqual Feature-field-legal ,
6089 comment-descriptor NULL ,
6090 dbxref VisibleString }
6091
6092 Parse-action ::= SEQUENCE {
6093 portion Text-portion ,
6094 src Parse-src ,
6095 dest Parse-dest ,
6096 capitalization Cap-change DEFAULT none ,
6097 remove-from-parsed BOOLEAN DEFAULT FALSE ,
6098 transform Text-transform-set OPTIONAL ,
6099 existing-text ExistingTextOption }
6100
6101
6102 Location-interval ::= SEQUENCE {
6103 from INTEGER ,
6104 to INTEGER }
6105
6106 Location-choice ::= CHOICE {
6107 interval Location-interval ,
6108 whole-sequence NULL ,
6109 point INTEGER }
6110
6111 Sequence-list ::= SET OF VisibleString
6112 Sequence-list-choice ::= CHOICE {
6113 list Sequence-list ,
6114 all NULL }
6115
6116 Apply-feature-action ::= SEQUENCE {
6117 type Macro-feature-type ,
6118 partial5 BOOLEAN DEFAULT FALSE ,
6119 partial3 BOOLEAN DEFAULT FALSE ,
6120 plus-strand BOOLEAN DEFAULT TRUE ,
6121 location Location-choice ,
6122 seq-list Sequence-list-choice ,
6123 add-redundant BOOLEAN DEFAULT TRUE ,
6124 add-mrna BOOLEAN DEFAULT FALSE ,
6125 apply-to-parts BOOLEAN DEFAULT FALSE ,
6126 only-seg-num INTEGER DEFAULT -1 ,
6127 fields Feat-qual-legal-set OPTIONAL,
6128 src-fields Source-qual-val-set OPTIONAL }
6129
6130 Remove-feature-action ::= SEQUENCE {
6131 type Macro-feature-type ,
6132 constraint Constraint-choice-set OPTIONAL }
6133
6134 -- for convert features --
6135 Convert-from-CDS-options ::= SEQUENCE {
6136 remove-mRNA BOOLEAN ,
6137 remove-gene BOOLEAN ,
6138 remove-transcript-id BOOLEAN }
6139
6140 Convert-feature-src-options ::= CHOICE {
6141 cds Convert-from-CDS-options }
6142
6143 Bond-type ::= ENUMERATED {
6144 disulfide (1) ,
6145 thioester (2) ,
6146 crosslink (3) ,
6147 thioether (4) ,
6148 other (5) }
6149
6150
6151 Site-type ::= ENUMERATED {
6152 active (1) ,
6153 binding (2) ,
6154 cleavage (3) ,
6155 inhibit (4) ,
6156 modified (5) ,
6157 glycosylation (6) ,
6158 myristoylation (7) ,
6159 mutagenized (8) ,
6160 metal-binding (9) ,
6161 phosphorylation (10) ,
6162 acetylation (11) ,
6163 amidation (12) ,
6164 methylation (13) ,
6165 hydroxylation (14) ,
6166 sulfatation (15) ,
6167 oxidative-deamination (16) ,
6168 pyrrolidone-carboxylic-acid (17) ,
6169 gamma-carboxyglutamic-acid (18) ,
6170 blocked (19) ,
6171 lipid-binding (20) ,
6172 np-binding (21) ,
6173 dna-binding (22) ,
6174 signal-peptide (23) ,
6175 transit-peptide (24) ,
6176 transmembrane-region (25) ,
6177 nitrosylation (26) ,
6178 other (27) }
6179
6180 -- other choice is to create protein sequences, skipping bad --
6181 Region-type ::= SEQUENCE {
6182 create-nucleotide BOOLEAN }
6183
6184 Convert-feature-dst-options ::= CHOICE {
6185 bond Bond-type ,
6186 site Site-type ,
6187 region Region-type ,
6188 ncrna-class VisibleString ,
6189 remove-original BOOLEAN }
6190
6191
6192 Convert-feature-action ::= SEQUENCE {
6193 type-from Macro-feature-type ,
6194 type-to Macro-feature-type ,
6195 src-options Convert-feature-src-options OPTIONAL ,
6196 dst-options Convert-feature-dst-options OPTIONAL ,
6197 leave-original BOOLEAN ,
6198 src-feat-constraint Constraint-choice-set OPTIONAL }
6199
6200
6201 Feature-location-strand-from ::= ENUMERATED {
6202 any (0) ,
6203 plus (1) ,
6204 minus (2) ,
6205 unknown (3) ,
6206 both (4) }
6207
6208 Feature-location-strand-to ::= ENUMERATED {
6209 plus (1) ,
6210 minus (2) ,
6211 unknown (3) ,
6212 both (4) ,
6213 reverse (5) }
6214
6215 Edit-location-strand ::= SEQUENCE {
6216 strand-from Feature-location-strand-from ,
6217 strand-to Feature-location-strand-to }
6218
6219 Partial-5-set-constraint ::= ENUMERATED {
6220 all (0) ,
6221 at-end (1) ,
6222 bad-start (2) ,
6223 frame-not-one (3) }
6224
6225 Partial-5-set-action ::= SEQUENCE {
6226 constraint Partial-5-set-constraint ,
6227 extend BOOLEAN }
6228
6229 Partial-5-clear-constraint ::= ENUMERATED {
6230 all (0) ,
6231 not-at-end (1) ,
6232 good-start (2) }
6233
6234 Partial-3-set-constraint ::= ENUMERATED {
6235 all (0) ,
6236 at-end (1) ,
6237 bad-end (2) }
6238
6239 Partial-3-set-action ::= SEQUENCE {
6240 constraint Partial-3-set-constraint ,
6241 extend BOOLEAN }
6242
6243 Partial-3-clear-constraint ::= ENUMERATED {
6244 all (0) ,
6245 not-at-end (1) ,
6246 good-end (2) }
6247
6248 Partial-both-set-constraint ::= ENUMERATED {
6249 all (0) ,
6250 at-end (1) }
6251
6252 Partial-both-set-action ::= SEQUENCE {
6253 constraint Partial-both-set-constraint ,
6254 extend BOOLEAN }
6255
6256 Partial-both-clear-constraint ::= ENUMERATED {
6257 all (0) ,
6258 not-at-end (1) }
6259
6260 Convert-location-type ::= ENUMERATED {
6261 join (1) ,
6262 order (2) ,
6263 merge (3) }
6264
6265 Extend-to-feature ::= SEQUENCE {
6266 type Macro-feature-type ,
6267 include-feat BOOLEAN ,
6268 distance Quantity-constraint OPTIONAL }
6269
6270 Location-edit-type ::= CHOICE {
6271 strand Edit-location-strand ,
6272 set-5-partial Partial-5-set-action ,
6273 clear-5-partial Partial-5-clear-constraint ,
6274 set-3-partial Partial-3-set-action ,
6275 clear-3-partial Partial-3-clear-constraint ,
6276 set-both-partial Partial-both-set-action ,
6277 clear-both-partial Partial-both-clear-constraint ,
6278 convert Convert-location-type ,
6279 extend-5 NULL ,
6280 extend-3 NULL ,
6281 extend-5-to-feat Extend-to-feature ,
6282 extend-3-to-feat Extend-to-feature }
6283
6284 Edit-feature-location-action ::= SEQUENCE {
6285 type Macro-feature-type ,
6286 action Location-edit-type ,
6287 retranslate-cds BOOLEAN OPTIONAL ,
6288 also-edit-gene BOOLEAN OPTIONAL ,
6289 constraint Constraint-choice-set OPTIONAL }
6290
6291 Molinfo-block ::= SEQUENCE {
6292 to-list Molinfo-field-list ,
6293 from-list Molinfo-field-list OPTIONAL ,
6294 constraint Constraint-choice-set OPTIONAL }
6295
6296 Descriptor-type ::= ENUMERATED {
6297 all (0) ,
6298 title (1) ,
6299 source (2) ,
6300 publication (3) ,
6301 comment (4) ,
6302 genbank (5) ,
6303 user (6) ,
6304 create-date (7) ,
6305 update-date (8) ,
6306 mol-info (9) ,
6307 structured-comment (10) ,
6308 genome-project-id (11) }
6309
6310 Remove-descriptor-action ::= SEQUENCE {
6311 type Descriptor-type ,
6312 constraint Constraint-choice-set OPTIONAL }
6313
6314 Autodef-list-type ::= ENUMERATED {
6315 feature-list (1) ,
6316 complete-sequence (2) ,
6317 complete-genome (3) ,
6318 sequence (4) }
6319
6320 Autodef-misc-feat-parse-rule ::= ENUMERATED {
6321 use-comment-before-first-semicolon (1) ,
6322 look-for-noncoding-products (2) }
6323
6324 Autodef-action ::= SEQUENCE {
6325 modifiers SET OF Source-qual OPTIONAL ,
6326 clause-list-type Autodef-list-type ,
6327 misc-feat-parse-rule Autodef-misc-feat-parse-rule DEFAULT look-for-noncoding-products }
6328
6329 Fix-pub-caps-action ::= SEQUENCE {
6330 title BOOLEAN OPTIONAL ,
6331 authors BOOLEAN OPTIONAL ,
6332 affiliation BOOLEAN OPTIONAL ,
6333 affil-country BOOLEAN OPTIONAL ,
6334 punct-only BOOLEAN DEFAULT FALSE ,
6335 constraint Constraint-choice-set OPTIONAL }
6336
6337 Sort-order ::= ENUMERATED {
6338 short-to-long (1) ,
6339 long-to-short (2) ,
6340 alphabetical (3) }
6341
6342 Sort-fields-action ::= SEQUENCE {
6343 field Field-type ,
6344 order Sort-order ,
6345 constraint Constraint-choice-set OPTIONAL }
6346
6347 Fix-author-caps ::= SEQUENCE {
6348 last-name-only BOOLEAN }
6349
6350 Fix-caps-action ::= CHOICE {
6351 pub Fix-pub-caps-action ,
6352 src-country NULL ,
6353 mouse-strain NULL ,
6354 src-qual Source-qual ,
6355 author Fix-author-caps }
6356
6357 Fix-format-action ::= CHOICE {
6358 collection-date NULL ,
6359 lat-lon NULL ,
6360 primers NULL ,
6361 protein-name NULL }
6362
6363 Remove-duplicate-feature-action ::= SEQUENCE {
6364 type Macro-feature-type ,
6365 ignore-partials BOOLEAN ,
6366 case-sensitive BOOLEAN ,
6367 remove-proteins BOOLEAN ,
6368 rd-constraint Constraint-choice-set OPTIONAL }
6369
6370 Gene-xref-suppression-type ::= ENUMERATED {
6371 any (0) ,
6372 suppressing (1) ,
6373 non-suppressing (2) }
6374
6375 Gene-xref-necessary-type ::= ENUMERATED {
6376 any (0) ,
6377 necessary (1) ,
6378 unnecessary (2) }
6379
6380 Gene-xref-type ::= SEQUENCE {
6381 feature Macro-feature-type ,
6382 suppression Gene-xref-suppression-type ,
6383 necessary Gene-xref-necessary-type }
6384
6385 Xref-type ::= CHOICE {
6386 gene Gene-xref-type }
6387
6388 Remove-xrefs-action ::= SEQUENCE {
6389 xref-type Xref-type ,
6390 constraint Constraint-choice-set OPTIONAL }
6391
6392 Make-gene-xref-action ::= SEQUENCE {
6393 feature Macro-feature-type ,
6394 constraint Constraint-choice-set OPTIONAL }
6395
6396 Author-fix-type ::= ENUMERATED {
6397 truncate-middle-initials (1) ,
6398 strip-suffix (2) ,
6399 move-middle-to-first (3) }
6400
6401 Author-fix-action ::= SEQUENCE {
6402 fix-type Author-fix-type ,
6403 constraint Constraint-choice-set OPTIONAL }
6404
6405 Update-sequences-action ::= SEQUENCE {
6406 filename VisibleString ,
6407 add-cit-subs BOOLEAN DEFAULT FALSE }
6408
6409 Create-TSA-ids-src ::= CHOICE {
6410 local-id NULL ,
6411 defline Text-portion
6412 }
6413
6414 Create-TSA-ids-action ::= SEQUENCE {
6415 src Create-TSA-ids-src ,
6416 suffix VisibleString OPTIONAL ,
6417 id-text-portion Text-portion OPTIONAL }
6418
6419 Autofix-action ::= SEQUENCE {
6420 test-name VisibleString }
6421
6422 Fix-sets-action ::= CHOICE {
6423 remove-single-item-set NULL ,
6424 renormalize-nuc-prot-sets NULL ,
6425 fix-pop-to-phy NULL
6426 }
6427
6428 Table-match-type ::= CHOICE {
6429 feature-id NULL ,
6430 gene-locus-tag NULL ,
6431 protein-id NULL,
6432 dbxref NULL ,
6433 nuc-id NULL ,
6434 src-qual Source-qual-choice ,
6435 protein-name NULL ,
6436 any NULL
6437 }
6438
6439 Table-match ::= SEQUENCE {
6440 match-type Table-match-type ,
6441 match-location String-location DEFAULT equals
6442 }
6443
6444
6445 Apply-table-extra-data ::= CHOICE {
6446 table NULL }
6447
6448 Apply-table-action ::= SEQUENCE {
6449 filename VisibleString ,
6450 match-type Table-match ,
6451 in-memory-table Apply-table-extra-data OPTIONAL ,
6452 also-change-mrna BOOLEAN DEFAULT FALSE ,
6453 skip-blanks BOOLEAN DEFAULT TRUE
6454 }
6455
6456 Add-file-action ::= SEQUENCE {
6457 filename VisibleString ,
6458 in-memory-table Apply-table-extra-data OPTIONAL
6459 }
6460
6461 Add-descriptor-list-action ::= SEQUENCE {
6462 descriptor-list Add-file-action ,
6463 constraint Constraint-choice-set OPTIONAL
6464 }
6465
6466 Remove-sequences-action ::= SEQUENCE {
6467 constraint Constraint-choice-set
6468 }
6469
6470 Update-replaced-ec-numbers-action ::= SEQUENCE {
6471 delete-improper-format BOOLEAN ,
6472 delete-unrecognized BOOLEAN ,
6473 delete-multiple-replacement BOOLEAN
6474 }
6475
6476
6477 Retranslate-cds-action ::= SEQUENCE {
6478 obey-stop-codon BOOLEAN
6479 }
6480
6481
6482 Macro-action-choice ::= CHOICE {
6483 aecr AECR-action ,
6484 parse Parse-action ,
6485 add-feature Apply-feature-action ,
6486 remove-feature Remove-feature-action ,
6487 convert-feature Convert-feature-action ,
6488 edit-location Edit-feature-location-action ,
6489 remove-descriptor Remove-descriptor-action ,
6490 autodef Autodef-action ,
6491 removesets NULL ,
6492 trim-junk-from-primer-seq NULL ,
6493 trim-stop-from-complete-cds NULL ,
6494 fix-usa-and-states NULL ,
6495 synchronize-cds-partials NULL ,
6496 adjust-for-consensus-splice NULL ,
6497 fix-pub-caps Fix-pub-caps-action ,
6498 remove-seg-gaps NULL ,
6499 sort-fields Sort-fields-action ,
6500 apply-molinfo-block Molinfo-block ,
6501 fix-caps Fix-caps-action ,
6502 fix-format Fix-format-action ,
6503 fix-spell NULL ,
6504 remove-duplicate-features Remove-duplicate-feature-action ,
6505 remove-lineage-notes NULL ,
6506 remove-xrefs Remove-xrefs-action ,
6507 make-gene-xrefs Make-gene-xref-action ,
6508 make-bold-xrefs NULL ,
6509 fix-author Author-fix-action ,
6510 update-sequences Update-sequences-action ,
6511 add-trans-splicing NULL ,
6512 remove-invalid-ecnumbers NULL ,
6513 create-tsa-ids Create-TSA-ids-action ,
6514 perform-autofix Autofix-action ,
6515 fix-sets Fix-sets-action ,
6516 apply-table Apply-table-action ,
6517 remove-sequences Remove-sequences-action ,
6518 propagate-sequence-technology NULL ,
6519 add-file-descriptors Add-descriptor-list-action ,
6520 propagate-missing-old-name NULL ,
6521 autoapply-structured-comments NULL ,
6522 reorder-structured-comments NULL ,
6523 remove-duplicate-structured-comments NULL ,
6524 lookup-taxonomy NULL ,
6525 lookup-pubs NULL ,
6526 trim-terminal-ns NULL ,
6527 update-replaced-ecnumbers Update-replaced-ec-numbers-action ,
6528 instantiate-protein-titles NULL ,
6529 retranslate-cds Retranslate-cds-action ,
6530 add-selenocysteine-except NULL ,
6531 join-short-trnas NULL }
6532
6533
6534 Macro-action-list ::= SET OF Macro-action-choice
6535
6536
6537 Search-func ::= CHOICE {
6538 string-constraint String-constraint ,
6539 contains-plural NULL ,
6540 n-or-more-brackets-or-parentheses INTEGER ,
6541 three-numbers NULL ,
6542 underscore NULL ,
6543 prefix-and-numbers VisibleString ,
6544 all-caps NULL ,
6545 unbalanced-paren NULL ,
6546 too-long INTEGER ,
6547 has-term VisibleString }
6548
6549 Simple-replace ::= SEQUENCE {
6550 replace VisibleString OPTIONAL,
6551 whole-string BOOLEAN DEFAULT FALSE ,
6552 weasel-to-putative BOOLEAN DEFAULT FALSE }
6553
6554 Replace-func ::= CHOICE {
6555 simple-replace Simple-replace ,
6556 haem-replace VisibleString }
6557
6558 Replace-rule ::= SEQUENCE {
6559 replace-func Replace-func ,
6560 move-to-note BOOLEAN DEFAULT FALSE }
6561
6562 Fix-type ::= ENUMERATED {
6563 none (0) ,
6564 typo (1) ,
6565 putative-typo (2) ,
6566 quickfix (3) ,
6567 no-organelle-for-prokaryote (4),
6568 might-be-nonfunctional (5),
6569 database (6),
6570 remove-organism-name (7),
6571 inappropriate-symbol (8),
6572 evolutionary-relationship (9),
6573 use-protein (10),
6574 hypothetical (11),
6575 british (12),
6576 description (13),
6577 gene (14) }
6578
6579 Suspect-rule ::= SEQUENCE {
6580 find Search-func ,
6581 except Search-func OPTIONAL ,
6582 feat-constraint Constraint-choice-set OPTIONAL ,
6583 rule-type Fix-type DEFAULT none ,
6584 replace Replace-rule OPTIONAL ,
6585 description VisibleString OPTIONAL }
6586
6587 Suspect-rule-set ::= SET OF Suspect-rule
6588
6589
6590
6591 END
|
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |