|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/asn/asn.all |
source navigation diff markup identifier search freetext search file search |
1 --$Revision: 6.0 $
2 --**********************************************************************
3 --
4 -- asn.all
5 -- this file contains all NCBI ASN.1 specifications together
6 --
7 -- by James Ostell, 1990
8 --
9 --**********************************************************************
10
11 --$Revision: 6.5 $
12 --**********************************************************************
13 --
14 -- NCBI General Data elements
15 -- by James Ostell, 1990
16 -- Version 3.0 - June 1994
17 --
18 --**********************************************************************
19
20 NCBI-General DEFINITIONS ::=
21 BEGIN
22
23 EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;
24
25 -- StringStore is really a VisibleString. It is used to define very
26 -- long strings which may need to be stored by the receiving program
27 -- in special structures, such as a ByteStore, but it's just a hint.
28 -- AsnTool stores StringStores in ByteStore structures.
29 -- OCTET STRINGs are also stored in ByteStores by AsnTool
30 --
31 -- typedef struct bsunit { /* for building multiline strings */
32 -- Nlm_Handle str; /* the string piece */
33 -- Nlm_Int2 len_avail,
34 -- len;
35 -- struct bsunit PNTR next; } /* the next one */
36 -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
37 --
38 -- typedef struct bytestore {
39 -- Nlm_Int4 seekptr, /* current position */
40 -- totlen, /* total stored data length in bytes */
41 -- chain_offset; /* offset in ByteStore of first byte in curchain */
42 -- Nlm_BSUnitPtr chain, /* chain of elements */
43 -- curchain; /* the BSUnit containing seekptr */
44 -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
45 --
46 -- AsnTool incorporates this as a primitive type, so the definition
47 -- is here just for completeness
48 --
49 -- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
50 --
51
52 -- BigInt is really an INTEGER. It is used to warn the receiving code to expect
53 -- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
54 --
55 -- Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
56 -- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
57 --
58
59 -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
60 -- of ASN.1
61 -- It stores only a date
62 --
63
64 Date ::= CHOICE {
65 str VisibleString , -- for those unparsed dates
66 std Date-std } -- use this if you can
67
68 Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct
69 year INTEGER , -- full year (including 1900)
70 month INTEGER OPTIONAL , -- month (1-12)
71 day INTEGER OPTIONAL , -- day of month (1-31)
72 season VisibleString OPTIONAL , -- for "spring", "may-june", etc
73 hour INTEGER OPTIONAL , -- hour of day (0-23)
74 minute INTEGER OPTIONAL , -- minute of hour (0-59)
75 second INTEGER OPTIONAL } -- second of minute (0-59)
76
77 -- Dbtag is generalized for tagging
78 -- eg. { "Social Security", str "023-79-8841" }
79 -- or { "member", id 8882224 }
80
81 Dbtag ::= SEQUENCE {
82 db VisibleString , -- name of database or system
83 tag Object-id } -- appropriate tag
84
85 -- Object-id can tag or name anything
86 --
87
88 Object-id ::= CHOICE {
89 id INTEGER ,
90 str VisibleString }
91
92 -- Person-id is to define a std element for people
93 --
94
95 Person-id ::= CHOICE {
96 dbtag Dbtag , -- any defined database tag
97 name Name-std , -- structured name
98 ml VisibleString , -- MEDLINE name (semi-structured)
99 -- eg. "Jones RM"
100 str VisibleString, -- unstructured name
101 consortium VisibleString } -- consortium name
102
103 Name-std ::= SEQUENCE { -- Structured names
104 last VisibleString ,
105 first VisibleString OPTIONAL ,
106 middle VisibleString OPTIONAL ,
107 full VisibleString OPTIONAL , -- full name eg. "J. John Smith, Esq"
108 initials VisibleString OPTIONAL, -- first + middle initials
109 suffix VisibleString OPTIONAL , -- Jr, Sr, III
110 title VisibleString OPTIONAL } -- Dr., Sister, etc
111
112 --**** Int-fuzz **********************************************
113 --*
114 --* uncertainties in integer values
115
116 Int-fuzz ::= CHOICE {
117 p-m INTEGER , -- plus or minus fixed amount
118 range SEQUENCE { -- max to min
119 max INTEGER ,
120 min INTEGER } ,
121 pct INTEGER , -- % plus or minus (x10) 0-1000
122 lim ENUMERATED { -- some limit value
123 unk (0) , -- unknown
124 gt (1) , -- greater than
125 lt (2) , -- less than
126 tr (3) , -- space to right of position
127 tl (4) , -- space to left of position
128 circle (5) , -- artificial break at origin of circle
129 other (255) } , -- something else
130 alt SET OF INTEGER } -- set of alternatives for the integer
131
132
133 --**** User-object **********************************************
134 --*
135 --* a general object for a user defined structured data item
136 --* used by Seq-feat and Seq-descr
137
138 User-object ::= SEQUENCE {
139 class VisibleString OPTIONAL , -- endeavor which designed this object
140 type Object-id , -- type of object within class
141 data SEQUENCE OF User-field } -- the object itself
142
143 User-field ::= SEQUENCE {
144 label Object-id , -- field label
145 num INTEGER OPTIONAL , -- required for strs, ints, reals, oss
146 data CHOICE { -- field contents
147 str VisibleString ,
148 int INTEGER ,
149 real REAL ,
150 bool BOOLEAN ,
151 os OCTET STRING ,
152 object User-object , -- for using other definitions
153 strs SEQUENCE OF VisibleString ,
154 ints SEQUENCE OF INTEGER ,
155 reals SEQUENCE OF REAL ,
156 oss SEQUENCE OF OCTET STRING ,
157 fields SEQUENCE OF User-field ,
158 objects SEQUENCE OF User-object } }
159
160
161
162 END
163
164 --$Revision: 6.2 $
165 --****************************************************************
166 --
167 -- NCBI Bibliographic data elements
168 -- by James Ostell, 1990
169 --
170 -- Taken from the American National Standard for
171 -- Bibliographic References
172 -- ANSI Z39.29-1977
173 -- Version 3.0 - June 1994
174 -- PubMedId added in 1996
175 -- ArticleIds and eprint elements added in 1999
176 --
177 --****************************************************************
178
179 NCBI-Biblio DEFINITIONS ::=
180 BEGIN
181
182 EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
183 Cit-proc, Cit-sub, Title, Author, PubMedId;
184
185 IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
186
187 -- Article Ids
188
189 ArticleId ::= CHOICE { -- can be many ids for an article
190 pubmed PubMedId , -- see types below
191 medline MedlineUID ,
192 doi DOI ,
193 pii PII ,
194 pmcid PmcID ,
195 pmcpid PmcPid ,
196 pmpid PmPid ,
197 other Dbtag } -- generic catch all
198
199 PubMedId ::= INTEGER -- Id from the PubMed database at NCBI
200 MedlineUID ::= INTEGER -- Id from MEDLINE
201 DOI ::= VisibleString -- Document Object Identifier
202 PII ::= VisibleString -- Controlled Publisher Identifier
203 PmcID ::= INTEGER -- PubMed Central Id
204 PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central
205 PmPid ::= VisibleString -- Publisher Id supplied to PubMed
206
207 ArticleIdSet ::= SET OF ArticleId
208
209 -- Status Dates
210
211 PubStatus ::= INTEGER { -- points of publication
212 received (1) , -- date manuscript received for review
213 accepted (2) , -- accepted for publication
214 epublish (3) , -- published electronically by publisher
215 ppublish (4) , -- published in print by publisher
216 revised (5) , -- article revised by publisher/author
217 pmc (6) , -- article first appeared in PubMed Central
218 pmcr (7) , -- article revision in PubMed Central
219 pubmed (8) , -- article citation first appeared in PubMed
220 pubmedr (9) , -- article citation revision in PubMed
221 aheadofprint (10), -- epublish, but will be followed by print
222 premedline (11), -- date into PreMedline status
223 medline (12), -- date made a MEDLINE record
224 other (255) }
225
226 PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added
227 pubstatus PubStatus ,
228 date Date } -- time may be added later
229
230 PubStatusDateSet ::= SET OF PubStatusDate
231
232 -- Citation Types
233
234 Cit-art ::= SEQUENCE { -- article in journal or book
235 title Title OPTIONAL , -- title of paper (ANSI requires)
236 authors Auth-list OPTIONAL , -- authors (ANSI requires)
237 from CHOICE { -- journal or book
238 journal Cit-jour ,
239 book Cit-book ,
240 proc Cit-proc } ,
241 ids ArticleIdSet OPTIONAL } -- lots of ids
242
243 Cit-jour ::= SEQUENCE { -- Journal citation
244 title Title , -- title of journal
245 imp Imprint }
246
247 Cit-book ::= SEQUENCE { -- Book citation
248 title Title , -- Title of book
249 coll Title OPTIONAL , -- part of a collection
250 authors Auth-list, -- authors
251 imp Imprint }
252
253 Cit-proc ::= SEQUENCE { -- Meeting proceedings
254 book Cit-book , -- citation to meeting
255 meet Meeting } -- time and location of meeting
256
257 -- Patent number and date-issue were made optional in 1997 to
258 -- support patent applications being issued from the USPTO
259 -- Semantically a Cit-pat must have either a patent number or
260 -- an application number (or both) to be valid
261
262 Cit-pat ::= SEQUENCE { -- patent citation
263 title VisibleString ,
264 authors Auth-list, -- author/inventor
265 country VisibleString , -- Patent Document Country
266 doc-type VisibleString , -- Patent Document Type
267 number VisibleString OPTIONAL, -- Patent Document Number
268 date-issue Date OPTIONAL, -- Patent Issue/Pub Date
269 class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code
270 app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
271 app-date Date OPTIONAL , -- Patent Appl File Date
272 applicants Auth-list OPTIONAL , -- Applicants
273 assignees Auth-list OPTIONAL , -- Assignees
274 priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
275 abstract VisibleString OPTIONAL } -- abstract of patent
276
277 Patent-priority ::= SEQUENCE {
278 country VisibleString , -- Patent country code
279 number VisibleString , -- number assigned in that country
280 date Date } -- date of application
281
282 Id-pat ::= SEQUENCE { -- just to identify a patent
283 country VisibleString , -- Patent Document Country
284 id CHOICE {
285 number VisibleString , -- Patent Document Number
286 app-number VisibleString } , -- Patent Doc Appl Number
287 doc-type VisibleString OPTIONAL } -- Patent Doc Type
288
289 Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript
290 cit Cit-book , -- same fields as a book
291 man-id VisibleString OPTIONAL , -- Manuscript identifier
292 type ENUMERATED {
293 manuscript (1) ,
294 letter (2) ,
295 thesis (3) } OPTIONAL }
296 -- NOTE: this is just to cite a
297 -- direct data submission, see NCBI-Submit
298 -- for the form of a sequence submission
299 Cit-sub ::= SEQUENCE { -- citation for a direct submission
300 authors Auth-list , -- not necessarily authors of the paper
301 imp Imprint OPTIONAL , -- this only used to get date.. will go
302 medium ENUMERATED { -- medium of submission
303 paper (1) ,
304 tape (2) ,
305 floppy (3) ,
306 email (4) ,
307 other (255) } OPTIONAL ,
308 date Date OPTIONAL , -- replaces imp, will become required
309 descr VisibleString OPTIONAL } -- description of changes for public view
310
311 Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall
312 cit VisibleString OPTIONAL , -- anything, not parsable
313 authors Auth-list OPTIONAL ,
314 muid INTEGER OPTIONAL , -- medline uid
315 journal Title OPTIONAL ,
316 volume VisibleString OPTIONAL ,
317 issue VisibleString OPTIONAL ,
318 pages VisibleString OPTIONAL ,
319 date Date OPTIONAL ,
320 serial-number INTEGER OPTIONAL , -- for GenBank style references
321 title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title"
322 pmid PubMedId OPTIONAL } -- PubMed Id
323
324
325 -- Authorship Group
326 Auth-list ::= SEQUENCE {
327 names CHOICE {
328 std SEQUENCE OF Author , -- full citations
329 ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured
330 str SEQUENCE OF VisibleString } , -- free for all
331 affil Affil OPTIONAL } -- author affiliation
332
333 Author ::= SEQUENCE {
334 name Person-id , -- Author, Primary or Secondary
335 level ENUMERATED {
336 primary (1),
337 secondary (2) } OPTIONAL ,
338 role ENUMERATED { -- Author Role Indicator
339 compiler (1),
340 editor (2),
341 patent-assignee (3),
342 translator (4) } OPTIONAL ,
343 affil Affil OPTIONAL ,
344 is-corr BOOLEAN OPTIONAL } -- TRUE if corresponding author
345
346 Affil ::= CHOICE {
347 str VisibleString , -- unparsed string
348 std SEQUENCE { -- std representation
349 affil VisibleString OPTIONAL , -- Author Affiliation, Name
350 div VisibleString OPTIONAL , -- Author Affiliation, Division
351 city VisibleString OPTIONAL , -- Author Affiliation, City
352 sub VisibleString OPTIONAL , -- Author Affiliation, County Sub
353 country VisibleString OPTIONAL , -- Author Affiliation, Country
354 street VisibleString OPTIONAL , -- street address, not ANSI
355 email VisibleString OPTIONAL ,
356 fax VisibleString OPTIONAL ,
357 phone VisibleString OPTIONAL ,
358 postal-code VisibleString OPTIONAL }}
359
360 -- Title Group
361 -- Valid for = A = Analytic (Cit-art)
362 -- J = Journals (Cit-jour)
363 -- B = Book (Cit-book)
364 -- Valid for:
365 Title ::= SET OF CHOICE {
366 name VisibleString , -- Title, Anal,Coll,Mono AJB
367 tsub VisibleString , -- Title, Subordinate A B
368 trans VisibleString , -- Title, Translated AJB
369 jta VisibleString , -- Title, Abbreviated J
370 iso-jta VisibleString , -- specifically ISO jta J
371 ml-jta VisibleString , -- specifically MEDLINE jta J
372 coden VisibleString , -- a coden J
373 issn VisibleString , -- ISSN J
374 abr VisibleString , -- Title, Abbreviated B
375 isbn VisibleString } -- ISBN B
376
377 Imprint ::= SEQUENCE { -- Imprint group
378 date Date , -- date of publication
379 volume VisibleString OPTIONAL ,
380 issue VisibleString OPTIONAL ,
381 pages VisibleString OPTIONAL ,
382 section VisibleString OPTIONAL ,
383 pub Affil OPTIONAL, -- publisher, required for book
384 cprt Date OPTIONAL, -- copyright date, " " "
385 part-sup VisibleString OPTIONAL , -- part/sup of volume
386 language VisibleString DEFAULT "ENG" , -- put here for simplicity
387 prepub ENUMERATED { -- for prepublication citations
388 submitted (1) , -- submitted, not accepted
389 in-press (2) , -- accepted, not published
390 other (255) } OPTIONAL ,
391 part-supi VisibleString OPTIONAL , -- part/sup on issue
392 retract CitRetract OPTIONAL , -- retraction info
393 pubstatus PubStatus OPTIONAL , -- current status of this publication
394 history PubStatusDateSet OPTIONAL } -- dates for this record
395
396 CitRetract ::= SEQUENCE {
397 type ENUMERATED { -- retraction of an entry
398 retracted (1) , -- this citation retracted
399 notice (2) , -- this citation is a retraction notice
400 in-error (3) , -- an erratum was published about this
401 erratum (4) } , -- this is a published erratum
402 exp VisibleString OPTIONAL } -- citation and/or explanation
403
404 Meeting ::= SEQUENCE {
405 number VisibleString ,
406 date Date ,
407 place Affil OPTIONAL }
408
409
410 END
411
412
413 --$Revision: 6.0 $
414 --**********************************************************************
415 --
416 -- MEDLINE data definitions
417 -- James Ostell, 1990
418 --
419 -- enhanced in 1996 to support PubMed records as well by simply adding
420 -- the PubMedId and making MedlineId optional
421 --
422 --**********************************************************************
423
424 NCBI-Medline DEFINITIONS ::=
425 BEGIN
426
427 EXPORTS Medline-entry, Medline-si;
428
429 IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
430 Date FROM NCBI-General;
431
432 -- a MEDLINE or PubMed entry
433 Medline-entry ::= SEQUENCE {
434 uid INTEGER OPTIONAL , -- MEDLINE UID, sometimes not yet available if from PubMed
435 em Date , -- Entry Month
436 cit Cit-art , -- article citation
437 abstract VisibleString OPTIONAL ,
438 mesh SET OF Medline-mesh OPTIONAL ,
439 substance SET OF Medline-rn OPTIONAL ,
440 xref SET OF Medline-si OPTIONAL ,
441 idnum SET OF VisibleString OPTIONAL , -- ID Number (grants, contracts)
442 gene SET OF VisibleString OPTIONAL ,
443 pmid PubMedId OPTIONAL , -- MEDLINE records may include the PubMedId
444 pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
445 mlfield SET OF Medline-field OPTIONAL , -- additional Medline field types
446 status INTEGER {
447 publisher (1) , -- record as supplied by publisher
448 premedline (2) , -- premedline record
449 medline (3) } DEFAULT medline } -- regular medline record
450
451 Medline-mesh ::= SEQUENCE {
452 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point (*)
453 term VisibleString , -- the MeSH term
454 qual SET OF Medline-qual OPTIONAL } -- qualifiers
455
456 Medline-qual ::= SEQUENCE {
457 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point
458 subh VisibleString } -- the subheading
459
460 Medline-rn ::= SEQUENCE { -- medline substance records
461 type ENUMERATED { -- type of record
462 nameonly (0) ,
463 cas (1) , -- CAS number
464 ec (2) } , -- EC number
465 cit VisibleString OPTIONAL , -- CAS or EC number if present
466 name VisibleString } -- name (always present)
467
468 Medline-si ::= SEQUENCE { -- medline cross reference records
469 type ENUMERATED { -- type of xref
470 ddbj (1) , -- DNA Data Bank of Japan
471 carbbank (2) , -- Carbohydrate Structure Database
472 embl (3) , -- EMBL Data Library
473 hdb (4) , -- Hybridoma Data Bank
474 genbank (5) , -- GenBank
475 hgml (6) , -- Human Gene Map Library
476 mim (7) , -- Mendelian Inheritance in Man
477 msd (8) , -- Microbial Strains Database
478 pdb (9) , -- Protein Data Bank (Brookhaven)
479 pir (10) , -- Protein Identification Resource
480 prfseqdb (11) , -- Protein Research Foundation (Japan)
481 psd (12) , -- Protein Sequence Database (Japan)
482 swissprot (13) , -- SwissProt
483 gdb (14) } , -- Genome Data Base
484 cit VisibleString OPTIONAL } -- the citation/accession number
485
486 Medline-field ::= SEQUENCE {
487 type INTEGER { -- Keyed type
488 other (0) , -- look in line code
489 comment (1) , -- comment line
490 erratum (2) } , -- retracted, corrected, etc
491 str VisibleString , -- the text
492 ids SEQUENCE OF DocRef OPTIONAL } -- pointers relevant to this text
493
494 DocRef ::= SEQUENCE { -- reference to a document
495 type INTEGER {
496 medline (1) ,
497 pubmed (2) ,
498 ncbigi (3) } ,
499 uid INTEGER }
500
501 END
502
503 --$Revision: 6.0 $
504 --**********************************************************************
505 --
506 -- PUBMED data definitions
507 --
508 --**********************************************************************
509
510 NCBI-PubMed DEFINITIONS ::=
511 BEGIN
512
513 EXPORTS Pubmed-entry, Pubmed-url;
514
515 IMPORTS PubMedId FROM NCBI-Biblio
516 Medline-entry FROM NCBI-Medline;
517
518 Pubmed-entry ::= SEQUENCE { -- a PubMed entry
519 -- PUBMED records must include the PubMedId
520 pmid PubMedId,
521
522 -- Medline entry information
523 medent Medline-entry OPTIONAL,
524
525 -- Publisher name
526 publisher VisibleString OPTIONAL,
527
528 -- List of URL to publisher cite
529 urls SET OF Pubmed-url OPTIONAL,
530
531 -- Publisher's article identifier
532 pubid VisibleString OPTIONAL
533 }
534
535 Pubmed-url ::= SEQUENCE {
536 location VisibleString OPTIONAL, -- Location code
537 url VisibleString -- Selected URL for location
538 }
539
540 END
541 --$Revision: 6.0 $
542 --**********************************************************************
543 --
544 -- MEDLARS data definitions
545 -- Grigoriy Starchenko, 1997
546 --
547 --**********************************************************************
548
549 NCBI-Medlars DEFINITIONS ::=
550 BEGIN
551
552 EXPORTS Medlars-entry, Medlars-record;
553
554 IMPORTS PubMedId FROM NCBI-Biblio;
555
556 Medlars-entry ::= SEQUENCE { -- a MEDLARS entry
557 pmid PubMedId, -- All entries in PubMed must have it
558 muid INTEGER OPTIONAL, -- Medline(OCCS) id
559 recs SET OF Medlars-record -- List of Medlars records
560 }
561
562 Medlars-record ::= SEQUENCE {
563 code INTEGER, -- Unit record field type integer form
564 abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
565 data VisibleString -- Unit record data
566 }
567
568 END
569 --$Revision: 6.0 $
570 --********************************************************************
571 --
572 -- Publication common set
573 -- James Ostell, 1990
574 --
575 -- This is the base class definitions for Publications of all sorts
576 --
577 -- support for PubMedId added in 1996
578 --********************************************************************
579
580 NCBI-Pub DEFINITIONS ::=
581 BEGIN
582
583 EXPORTS Pub, Pub-set, Pub-equiv;
584
585 IMPORTS Medline-entry FROM NCBI-Medline
586 Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
587 Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
588
589 Pub ::= CHOICE {
590 gen Cit-gen , -- general or generic unparsed
591 sub Cit-sub , -- submission
592 medline Medline-entry ,
593 muid INTEGER , -- medline uid
594 article Cit-art ,
595 journal Cit-jour ,
596 book Cit-book ,
597 proc Cit-proc , -- proceedings of a meeting
598 patent Cit-pat ,
599 pat-id Id-pat , -- identify a patent
600 man Cit-let , -- manuscript, thesis, or letter
601 equiv Pub-equiv, -- to cite a variety of ways
602 pmid PubMedId } -- PubMedId
603
604 Pub-equiv ::= SET OF Pub -- equivalent identifiers for same citation
605
606 Pub-set ::= CHOICE {
607 pub SET OF Pub ,
608 medline SET OF Medline-entry ,
609 article SET OF Cit-art ,
610 journal SET OF Cit-jour ,
611 book SET OF Cit-book ,
612 proc SET OF Cit-proc , -- proceedings of a meeting
613 patent SET OF Cit-pat }
614
615 END
616
617 --$Revision: 6.4 $
618 --**********************************************************************
619 --
620 -- NCBI Sequence location and identifier elements
621 -- by James Ostell, 1990
622 --
623 -- Version 3.0 - 1994
624 --
625 --**********************************************************************
626
627 NCBI-Seqloc DEFINITIONS ::=
628 BEGIN
629
630 EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
631 Na-strand, Giimport-id;
632
633 IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
634 Id-pat FROM NCBI-Biblio
635 Feat-id FROM NCBI-Seqfeat;
636
637 --*** Sequence identifiers ********************************
638 --*
639
640 Seq-id ::= CHOICE {
641 local Object-id , -- local use
642 gibbsq INTEGER , -- Geninfo backbone seqid
643 gibbmt INTEGER , -- Geninfo backbone moltype
644 giim Giimport-id , -- Geninfo import id
645 genbank Textseq-id ,
646 embl Textseq-id ,
647 pir Textseq-id ,
648 swissprot Textseq-id ,
649 patent Patent-seq-id ,
650 other Textseq-id , -- for historical reasons, 'other' = 'refseq'
651 general Dbtag , -- for other databases
652 gi INTEGER , -- GenInfo Integrated Database
653 ddbj Textseq-id , -- DDBJ
654 prf Textseq-id , -- PRF SEQDB
655 pdb PDB-seq-id , -- PDB sequence
656 tpg Textseq-id , -- Third Party Annot/Seq Genbank
657 tpe Textseq-id , -- Third Party Annot/Seq EMBL
658 tpd Textseq-id , -- Third Party Annot/Seq DDBJ
659 gpipe Textseq-id , -- Internal NCBI genome pipeline processing ID
660 named-annot-track Textseq-id -- Internal named annotation tracking ID
661 }
662
663
664 Patent-seq-id ::= SEQUENCE {
665 seqid INTEGER , -- number of sequence in patent
666 cit Id-pat } -- patent citation
667
668 Textseq-id ::= SEQUENCE {
669 name VisibleString OPTIONAL ,
670 accession VisibleString OPTIONAL ,
671 release VisibleString OPTIONAL ,
672 version INTEGER OPTIONAL }
673
674 Giimport-id ::= SEQUENCE {
675 id INTEGER , -- the id to use here
676 db VisibleString OPTIONAL , -- dbase used in
677 release VisibleString OPTIONAL } -- the release
678
679 PDB-seq-id ::= SEQUENCE {
680 mol PDB-mol-id , -- the molecule name
681 chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
682 rel Date OPTIONAL } -- release date, month and year
683
684 PDB-mol-id ::= VisibleString -- name of mol, 4 chars
685
686 --*** Sequence locations **********************************
687 --*
688
689 Seq-loc ::= CHOICE {
690 null NULL , -- not placed
691 empty Seq-id , -- to NULL one Seq-id in a collection
692 whole Seq-id , -- whole sequence
693 int Seq-interval , -- from to
694 packed-int Packed-seqint ,
695 pnt Seq-point ,
696 packed-pnt Packed-seqpnt ,
697 mix Seq-loc-mix ,
698 equiv Seq-loc-equiv , -- equivalent sets of locations
699 bond Seq-bond ,
700 feat Feat-id } -- indirect, through a Seq-feat
701
702
703 Seq-interval ::= SEQUENCE {
704 from INTEGER ,
705 to INTEGER ,
706 strand Na-strand OPTIONAL ,
707 id Seq-id , -- WARNING: this used to be optional
708 fuzz-from Int-fuzz OPTIONAL ,
709 fuzz-to Int-fuzz OPTIONAL }
710
711 Packed-seqint ::= SEQUENCE OF Seq-interval
712
713 Seq-point ::= SEQUENCE {
714 point INTEGER ,
715 strand Na-strand OPTIONAL ,
716 id Seq-id , -- WARNING: this used to be optional
717 fuzz Int-fuzz OPTIONAL }
718
719 Packed-seqpnt ::= SEQUENCE {
720 strand Na-strand OPTIONAL ,
721 id Seq-id ,
722 fuzz Int-fuzz OPTIONAL ,
723 points SEQUENCE OF INTEGER }
724
725 Na-strand ::= ENUMERATED { -- strand of nucleic acid
726 unknown (0) ,
727 plus (1) ,
728 minus (2) ,
729 both (3) , -- in forward orientation
730 both-rev (4) , -- in reverse orientation
731 other (255) }
732
733 Seq-bond ::= SEQUENCE { -- bond between residues
734 a Seq-point , -- connection to a least one residue
735 b Seq-point OPTIONAL } -- other end may not be available
736
737 Seq-loc-mix ::= SEQUENCE OF Seq-loc -- this will hold anything
738
739 Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations
740
741 END
742
743
744 --$Revision: 6.20 $
745 --**********************************************************************
746 --
747 -- NCBI Sequence elements
748 -- by James Ostell, 1990
749 -- Version 3.0 - June 1994
750 --
751 --**********************************************************************
752
753 NCBI-Sequence DEFINITIONS ::=
754 BEGIN
755
756 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
757 Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
758 Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext;
759
760 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
761 Seq-align FROM NCBI-Seqalign
762 Seq-feat FROM NCBI-Seqfeat
763 Seq-graph FROM NCBI-Seqres
764 Pub-equiv FROM NCBI-Pub
765 Org-ref FROM NCBI-Organism
766 BioSource FROM NCBI-BioSource
767 Seq-id, Seq-loc FROM NCBI-Seqloc
768 GB-block FROM GenBank-General
769 PIR-block FROM PIR-General
770 EMBL-block FROM EMBL-General
771 SP-block FROM SP-General
772 PRF-block FROM PRF-General
773 PDB-block FROM PDB-General
774 Seq-table FROM NCBI-SeqTable;
775
776 --*** Sequence ********************************
777 --*
778
779 Bioseq ::= SEQUENCE {
780 id SET OF Seq-id , -- equivalent identifiers
781 descr Seq-descr OPTIONAL , -- descriptors
782 inst Seq-inst , -- the sequence data
783 annot SET OF Seq-annot OPTIONAL }
784
785 --*** Descriptors *****************************
786 --*
787
788 Seq-descr ::= SET OF Seqdesc
789
790 Seqdesc ::= CHOICE {
791 mol-type GIBB-mol , -- type of molecule
792 modif SET OF GIBB-mod , -- modifiers
793 method GIBB-method , -- sequencing method
794 name VisibleString , -- a name for this sequence
795 title VisibleString , -- a title for this sequence
796 org Org-ref , -- if all from one organism
797 comment VisibleString , -- a more extensive comment
798 num Numbering , -- a numbering system
799 maploc Dbtag , -- map location of this sequence
800 pir PIR-block , -- PIR specific info
801 genbank GB-block , -- GenBank specific info
802 pub Pubdesc , -- a reference to the publication
803 region VisibleString , -- overall region (globin locus)
804 user User-object , -- user defined object
805 sp SP-block , -- SWISSPROT specific info
806 dbxref Dbtag , -- xref to other databases
807 embl EMBL-block , -- EMBL specific information
808 create-date Date , -- date entry first created/released
809 update-date Date , -- date of last update
810 prf PRF-block , -- PRF specific information
811 pdb PDB-block , -- PDB specific information
812 het Heterogen , -- cofactor, etc associated but not bound
813 source BioSource , -- source of materials, includes Org-ref
814 molinfo MolInfo } -- info on the molecule and techniques
815
816 --******* NOTE:
817 --* mol-type, modif, method, and org are consolidated and expanded
818 --* in Org-ref, BioSource, and MolInfo in this specification. They
819 --* will be removed in later specifications. Do not use them in the
820 --* the future. Instead expect the new structures.
821 --*
822 --***************************
823
824 --********************************************************************
825 --
826 -- MolInfo gives information on the
827 -- classification of the type and quality of the sequence
828 --
829 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
830 --
831 --********************************************************************
832
833 MolInfo ::= SEQUENCE {
834 biomol INTEGER {
835 unknown (0) ,
836 genomic (1) ,
837 pre-RNA (2) , -- precursor RNA of any sort really
838 mRNA (3) ,
839 rRNA (4) ,
840 tRNA (5) ,
841 snRNA (6) ,
842 scRNA (7) ,
843 peptide (8) ,
844 other-genetic (9) , -- other genetic material
845 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
846 cRNA (11) , -- viral RNA genome copy intermediate
847 snoRNA (12) , -- small nucleolar RNA
848 transcribed-RNA (13) , -- transcribed RNA other than existing classes
849 ncRNA (14) ,
850 tmRNA (15) ,
851 other (255) } DEFAULT unknown ,
852 tech INTEGER {
853 unknown (0) ,
854 standard (1) , -- standard sequencing
855 est (2) , -- Expressed Sequence Tag
856 sts (3) , -- Sequence Tagged Site
857 survey (4) , -- one-pass genomic sequence
858 genemap (5) , -- from genetic mapping techniques
859 physmap (6) , -- from physical mapping techniques
860 derived (7) , -- derived from other data, not a primary entity
861 concept-trans (8) , -- conceptual translation
862 seq-pept (9) , -- peptide was sequenced
863 both (10) , -- concept transl. w/ partial pept. seq.
864 seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
865 seq-pept-homol (12) , -- sequenced peptide, ordered by homology
866 concept-trans-a (13) , -- conceptual transl. supplied by author
867 htgs-1 (14) , -- unordered High Throughput sequence contig
868 htgs-2 (15) , -- ordered High Throughput sequence contig
869 htgs-3 (16) , -- finished High Throughput sequence
870 fli-cdna (17) , -- full length insert cDNA
871 htgs-0 (18) , -- single genomic reads for coordination
872 htc (19) , -- high throughput cDNA
873 wgs (20) , -- whole genome shotgun sequencing
874 barcode (21) , -- barcode of life project
875 composite-wgs-htgs (22) , -- composite of WGS and HTGS
876 tsa (23) , -- transcriptome shotgun assembly
877 other (255) } -- use Source.techexp
878 DEFAULT unknown ,
879 techexp VisibleString OPTIONAL , -- explanation if tech not enough
880 --
881 -- Completeness is not indicated in most records. For genomes, assume
882 -- the sequences are incomplete unless specifically marked as complete.
883 -- For mRNAs, assume the ends are not known exactly unless marked as
884 -- having the left or right end.
885 --
886 completeness INTEGER {
887 unknown (0) ,
888 complete (1) , -- complete biological entity
889 partial (2) , -- partial but no details given
890 no-left (3) , -- missing 5' or NH3 end
891 no-right (4) , -- missing 3' or COOH end
892 no-ends (5) , -- missing both ends
893 has-left (6) , -- 5' or NH3 end present
894 has-right (7) , -- 3' or COOH end present
895 other (255) } DEFAULT unknown ,
896 gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
897
898
899 GIBB-mol ::= ENUMERATED { -- type of molecule represented
900 unknown (0) ,
901 genomic (1) ,
902 pre-mRNA (2) , -- precursor RNA of any sort really
903 mRNA (3) ,
904 rRNA (4) ,
905 tRNA (5) ,
906 snRNA (6) ,
907 scRNA (7) ,
908 peptide (8) ,
909 other-genetic (9) , -- other genetic material
910 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
911 other (255) }
912
913 GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers
914 dna (0) ,
915 rna (1) ,
916 extrachrom (2) ,
917 plasmid (3) ,
918 mitochondrial (4) ,
919 chloroplast (5) ,
920 kinetoplast (6) ,
921 cyanelle (7) ,
922 synthetic (8) ,
923 recombinant (9) ,
924 partial (10) ,
925 complete (11) ,
926 mutagen (12) , -- subject of mutagenesis ?
927 natmut (13) , -- natural mutant ?
928 transposon (14) ,
929 insertion-seq (15) ,
930 no-left (16) , -- missing left end (5' for na, NH2 for aa)
931 no-right (17) , -- missing right end (3' or COOH)
932 macronuclear (18) ,
933 proviral (19) ,
934 est (20) , -- expressed sequence tag
935 sts (21) , -- sequence tagged site
936 survey (22) , -- one pass survey sequence
937 chromoplast (23) ,
938 genemap (24) , -- is a genetic map
939 restmap (25) , -- is an ordered restriction map
940 physmap (26) , -- is a physical map (not ordered restriction map)
941 other (255) }
942
943 GIBB-method ::= ENUMERATED { -- sequencing methods
944 concept-trans (1) , -- conceptual translation
945 seq-pept (2) , -- peptide was sequenced
946 both (3) , -- concept transl. w/ partial pept. seq.
947 seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
948 seq-pept-homol (5) , -- sequenced peptide, ordered by homology
949 concept-trans-a (6) , -- conceptual transl. supplied by author
950 other (255) }
951
952 Numbering ::= CHOICE { -- any display numbering system
953 cont Num-cont , -- continuous numbering
954 enum Num-enum , -- enumerated names for residues
955 ref Num-ref , -- by reference to another sequence
956 real Num-real } -- supports mapping to a float system
957
958 Num-cont ::= SEQUENCE { -- continuous display numbering system
959 refnum INTEGER DEFAULT 1, -- number assigned to first residue
960 has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
961 ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
962
963 Num-enum ::= SEQUENCE { -- any tags to residues
964 num INTEGER , -- number of tags to follow
965 names SEQUENCE OF VisibleString } -- the tags
966
967 Num-ref ::= SEQUENCE { -- by reference to other sequences
968 type ENUMERATED { -- type of reference
969 not-set (0) ,
970 sources (1) , -- by segmented or const seq sources
971 aligns (2) } , -- by alignments given below
972 aligns Seq-align OPTIONAL }
973
974 Num-real ::= SEQUENCE { -- mapping to floating point system
975 a REAL , -- from an integer system used by Bioseq
976 b REAL , -- position = (a * int_position) + b
977 units VisibleString OPTIONAL }
978
979 Pubdesc ::= SEQUENCE { -- how sequence presented in pub
980 pub Pub-equiv , -- the citation(s)
981 name VisibleString OPTIONAL , -- name used in paper
982 fig VisibleString OPTIONAL , -- figure in paper
983 num Numbering OPTIONAL , -- numbering from paper
984 numexc BOOLEAN OPTIONAL , -- numbering problem with paper
985 poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
986 maploc VisibleString OPTIONAL , -- map location reported in paper
987 seq-raw StringStore OPTIONAL , -- original sequence from paper
988 align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
989 comment VisibleString OPTIONAL, -- any comment on this pub in context
990 reftype INTEGER { -- type of reference in a GenBank record
991 seq (0) , -- refers to sequence
992 sites (1) , -- refers to unspecified features
993 feats (2) , -- refers to specified features
994 no-target (3) } -- nothing specified (EMBL)
995 DEFAULT seq }
996
997 Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc
998
999 --*** Instances of sequences *******************************
1000 --*
1001
1002 Seq-inst ::= SEQUENCE { -- the sequence data itself
1003 repr ENUMERATED { -- representation class
1004 not-set (0) , -- empty
1005 virtual (1) , -- no seq data
1006 raw (2) , -- continuous sequence
1007 seg (3) , -- segmented sequence
1008 const (4) , -- constructed sequence
1009 ref (5) , -- reference to another sequence
1010 consen (6) , -- consensus sequence or pattern
1011 map (7) , -- ordered map of any kind
1012 delta (8) , -- sequence made by changes (delta) to others
1013 other (255) } ,
1014 mol ENUMERATED { -- molecule class in living organism
1015 not-set (0) , -- > cdna = rna
1016 dna (1) ,
1017 rna (2) ,
1018 aa (3) ,
1019 na (4) , -- just a nucleic acid
1020 other (255) } ,
1021 length INTEGER OPTIONAL , -- length of sequence in residues
1022 fuzz Int-fuzz OPTIONAL , -- length uncertainty
1023 topology ENUMERATED { -- topology of molecule
1024 not-set (0) ,
1025 linear (1) ,
1026 circular (2) ,
1027 tandem (3) , -- some part of tandem repeat
1028 other (255) } DEFAULT linear ,
1029 strand ENUMERATED { -- strandedness in living organism
1030 not-set (0) ,
1031 ss (1) , -- single strand
1032 ds (2) , -- double strand
1033 mixed (3) ,
1034 other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
1035 seq-data Seq-data OPTIONAL , -- the sequence
1036 ext Seq-ext OPTIONAL , -- extensions for special types
1037 hist Seq-hist OPTIONAL } -- sequence history
1038
1039 --*** Sequence Extensions **********************************
1040 --* for representing more complex types
1041 --* const type uses Seq-hist.assembly
1042
1043 Seq-ext ::= CHOICE {
1044 seg Seg-ext , -- segmented sequences
1045 ref Ref-ext , -- hot link to another sequence (a view)
1046 map Map-ext , -- ordered map of markers
1047 delta Delta-ext }
1048
1049 Seg-ext ::= SEQUENCE OF Seq-loc
1050
1051 Ref-ext ::= Seq-loc
1052
1053 Map-ext ::= SEQUENCE OF Seq-feat
1054
1055 Delta-ext ::= SEQUENCE OF Delta-seq
1056
1057 Delta-seq ::= CHOICE {
1058 loc Seq-loc , -- point to a sequence
1059 literal Seq-literal } -- a piece of sequence
1060
1061 Seq-literal ::= SEQUENCE {
1062 length INTEGER , -- must give a length in residues
1063 fuzz Int-fuzz OPTIONAL , -- could be unsure
1064 seq-data Seq-data OPTIONAL } -- may have the data
1065
1066 --*** Sequence History Record ***********************************
1067 --** assembly = records how seq was assembled from others
1068 --** replaces = records sequences made obsolete by this one
1069 --** replaced-by = this seq is made obsolete by another(s)
1070
1071 Seq-hist ::= SEQUENCE {
1072 assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
1073 replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
1074 replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
1075 deleted CHOICE {
1076 bool BOOLEAN ,
1077 date Date } OPTIONAL }
1078
1079 Seq-hist-rec ::= SEQUENCE {
1080 date Date OPTIONAL ,
1081 ids SET OF Seq-id }
1082
1083 --*** Various internal sequence representations ************
1084 --* all are controlled, fixed length forms
1085
1086 Seq-data ::= CHOICE { -- sequence representations
1087 iupacna IUPACna , -- IUPAC 1 letter nuc acid code
1088 iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
1089 ncbi2na NCBI2na , -- 2 bit nucleic acid code
1090 ncbi4na NCBI4na , -- 4 bit nucleic acid code
1091 ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
1092 ncbipna NCBIpna , -- nucleic acid probabilities
1093 ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
1094 ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
1095 ncbipaa NCBIpaa , -- amino acid probabilities
1096 ncbistdaa NCBIstdaa, -- consecutive codes for std aas
1097 gap Seq-gap -- gap types
1098 }
1099
1100 Seq-gap ::= SEQUENCE {
1101 type INTEGER {
1102 unknown(0),
1103 fragment(1),
1104 clone(2),
1105 short-arm(3),
1106 heterochromatin(4),
1107 centromere(5),
1108 telomere(6),
1109 repeat(7),
1110 contig(8),
1111 other(255)
1112 },
1113 linkage INTEGER {
1114 unlinked(0),
1115 linked(1),
1116 other(255)
1117 } OPTIONAL
1118 }
1119
1120 IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
1121 IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
1122 NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
1123 NCBI4na ::= OCTET STRING -- 1 bit each for agct
1124 -- 0001=A, 0010=C, 0100=G, 1000=T/U
1125 -- 0101=Purine, 1010=Pyrimidine, etc
1126 NCBI8na ::= OCTET STRING -- for modified nucleic acids
1127 NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
1128 -- probabilities are coded 0-255 = 0.0-1.0
1129 NCBI8aa ::= OCTET STRING -- for modified amino acids
1130 NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
1131 -- IUPAC codes + U=selenocysteine
1132 NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
1133 -- A-Y,B,Z,X,(ter),anything
1134 -- probabilities are coded 0-255 = 0.0-1.0
1135 NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
1136
1137 --*** Sequence Annotation *************************************
1138 --*
1139
1140 -- This is a replica of Textseq-id
1141 -- This is specific for annotations, and exists to maintain a semantic
1142 -- difference between IDs assigned to annotations and IDs assigned to
1143 -- sequences
1144 Textannot-id ::= SEQUENCE {
1145 name VisibleString OPTIONAL ,
1146 accession VisibleString OPTIONAL ,
1147 release VisibleString OPTIONAL ,
1148 version INTEGER OPTIONAL
1149 }
1150
1151 Annot-id ::= CHOICE {
1152 local Object-id ,
1153 ncbi INTEGER ,
1154 general Dbtag,
1155 other Textannot-id
1156 }
1157
1158 Annot-descr ::= SET OF Annotdesc
1159
1160 Annotdesc ::= CHOICE {
1161 name VisibleString , -- a short name for this collection
1162 title VisibleString , -- a title for this collection
1163 comment VisibleString , -- a more extensive comment
1164 pub Pubdesc , -- a reference to the publication
1165 user User-object , -- user defined object
1166 create-date Date , -- date entry first created/released
1167 update-date Date , -- date of last update
1168 src Seq-id , -- source sequence from which annot came
1169 align Align-def, -- definition of the SeqAligns
1170 region Seq-loc } -- all contents cover this region
1171
1172 Align-def ::= SEQUENCE {
1173 align-type INTEGER { -- class of align Seq-annot
1174 ref (1) , -- set of alignments to the same sequence
1175 alt (2) , -- set of alternate alignments of the same seqs
1176 blocks (3) , -- set of aligned blocks in the same seqs
1177 other (255) } ,
1178 ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
1179
1180 Seq-annot ::= SEQUENCE {
1181 id SET OF Annot-id OPTIONAL ,
1182 db INTEGER { -- source of annotation
1183 genbank (1) ,
1184 embl (2) ,
1185 ddbj (3) ,
1186 pir (4) ,
1187 sp (5) ,
1188 bbone (6) ,
1189 pdb (7) ,
1190 other (255) } OPTIONAL ,
1191 name VisibleString OPTIONAL ,-- source if "other" above
1192 desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
1193 data CHOICE {
1194 ftable SET OF Seq-feat ,
1195 align SET OF Seq-align ,
1196 graph SET OF Seq-graph ,
1197 ids SET OF Seq-id , -- used for communication between tools
1198 locs SET OF Seq-loc , -- used for communication between tools
1199 seq-table Seq-table } } -- features in table form
1200
1201 END
1202
1203
1204 --$Revision: 6.5 $
1205 --**********************************************************************
1206 --
1207 -- NCBI Sequence Collections
1208 -- by James Ostell, 1990
1209 --
1210 -- Version 3.0 - 1994
1211 --
1212 --**********************************************************************
1213
1214 NCBI-Seqset DEFINITIONS ::=
1215 BEGIN
1216
1217 EXPORTS Bioseq-set, Seq-entry;
1218
1219 IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1220 Object-id, Dbtag, Date FROM NCBI-General;
1221
1222 --*** Sequence Collections ********************************
1223 --*
1224
1225 Bioseq-set ::= SEQUENCE { -- just a collection
1226 id Object-id OPTIONAL ,
1227 coll Dbtag OPTIONAL , -- to identify a collection
1228 level INTEGER OPTIONAL , -- nesting level
1229 class ENUMERATED {
1230 not-set (0) ,
1231 nuc-prot (1) , -- nuc acid and coded proteins
1232 segset (2) , -- segmented sequence + parts
1233 conset (3) , -- constructed sequence + parts
1234 parts (4) , -- parts for 2 or 3
1235 gibb (5) , -- geninfo backbone
1236 gi (6) , -- geninfo
1237 genbank (7) , -- converted genbank
1238 pir (8) , -- converted pir
1239 pub-set (9) , -- all the seqs from a single publication
1240 equiv (10) , -- a set of equivalent maps or seqs
1241 swissprot (11) , -- converted SWISSPROT
1242 pdb-entry (12) , -- a complete PDB entry
1243 mut-set (13) , -- set of mutations
1244 pop-set (14) , -- population study
1245 phy-set (15) , -- phylogenetic study
1246 eco-set (16) , -- ecological sample study
1247 gen-prod-set (17) , -- genomic products, chrom+mRNA+protein
1248 wgs-set (18) , -- whole genome shotgun project
1249 named-annot (19) , -- named annotation set
1250 named-annot-prod (20) , -- with instantiated mRNA+protein
1251 read-set (21) , -- set from a single read
1252 paired-end-reads (22) , -- paired sequences within a read-set
1253 other (255) } DEFAULT not-set ,
1254 release VisibleString OPTIONAL ,
1255 date Date OPTIONAL ,
1256 descr Seq-descr OPTIONAL ,
1257 seq-set SEQUENCE OF Seq-entry ,
1258 annot SET OF Seq-annot OPTIONAL }
1259
1260 Seq-entry ::= CHOICE {
1261 seq Bioseq ,
1262 set Bioseq-set }
1263
1264 END
1265
1266 --$Revision: 6.0 $
1267 -- *********************************************************************
1268 --
1269 -- These are code and conversion tables for NCBI sequence codes
1270 -- ASN.1 for the sequences themselves are define in seq.asn
1271 --
1272 -- Seq-map-table and Seq-code-table REQUIRE that codes start with 0
1273 -- and increase continuously. So IUPAC codes, which are upper case
1274 -- letters will always have 65 0 cells before the codes begin. This
1275 -- allows all codes to do indexed lookups for things
1276 --
1277 -- Valid names for code tables are:
1278 -- IUPACna
1279 -- IUPACaa
1280 -- IUPACeaa
1281 -- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa
1282 -- display only, not a data exchange type
1283 -- NCBI2na
1284 -- NCBI4na
1285 -- NCBI8na
1286 -- NCBI8aa
1287 -- NCBIstdaa
1288 -- probability types map to IUPAC types for display as characters
1289
1290 NCBI-SeqCode DEFINITIONS ::=
1291 BEGIN
1292
1293 EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
1294
1295 Seq-code-type ::= ENUMERATED { -- sequence representations
1296 iupacna (1) , -- IUPAC 1 letter nuc acid code
1297 iupacaa (2) , -- IUPAC 1 letter amino acid code
1298 ncbi2na (3) , -- 2 bit nucleic acid code
1299 ncbi4na (4) , -- 4 bit nucleic acid code
1300 ncbi8na (5) , -- 8 bit extended nucleic acid code
1301 ncbipna (6) , -- nucleic acid probabilities
1302 ncbi8aa (7) , -- 8 bit extended amino acid codes
1303 ncbieaa (8) , -- extended ASCII 1 letter aa codes
1304 ncbipaa (9) , -- amino acid probabilities
1305 iupacaa3 (10) , -- 3 letter code only for display
1306 ncbistdaa (11) } -- consecutive codes for std aas, 0-25
1307
1308 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
1309 from Seq-code-type , -- code to map from
1310 to Seq-code-type , -- code to map to
1311 num INTEGER , -- number of rows in table
1312 start-at INTEGER DEFAULT 0 , -- index offset of first element
1313 table SEQUENCE OF INTEGER } -- table of values, in from-to order
1314
1315 Seq-code-table ::= SEQUENCE { -- for names of coded values
1316 code Seq-code-type , -- name of code
1317 num INTEGER , -- number of rows in table
1318 one-letter BOOLEAN , -- symbol is ALWAYS 1 letter?
1319 start-at INTEGER DEFAULT 0 , -- index offset of first element
1320 table SEQUENCE OF
1321 SEQUENCE {
1322 symbol VisibleString , -- the printed symbol or letter
1323 name VisibleString } , -- an explanatory name or string
1324 comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
1325
1326 Seq-code-set ::= SEQUENCE { -- for distribution
1327 codes SET OF Seq-code-table OPTIONAL ,
1328 maps SET OF Seq-map-table OPTIONAL }
1329
1330 END
1331
1332 --$Revision: 6.0 $
1333 --*********************************************************************
1334 --
1335 -- 1990 - J.Ostell
1336 -- Version 3.0 - June 1994
1337 --
1338 --*********************************************************************
1339 --*********************************************************************
1340 --
1341 -- EMBL specific data
1342 -- This block of specifications was developed by Reiner Fuchs of EMBL
1343 -- Updated by J.Ostell, 1994
1344 --
1345 --*********************************************************************
1346
1347 EMBL-General DEFINITIONS ::=
1348 BEGIN
1349
1350 EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
1351
1352 IMPORTS Date, Object-id FROM NCBI-General;
1353
1354 EMBL-dbname ::= CHOICE {
1355 code ENUMERATED {
1356 embl(0),
1357 genbank(1),
1358 ddbj(2),
1359 geninfo(3),
1360 medline(4),
1361 swissprot(5),
1362 pir(6),
1363 pdb(7),
1364 epd(8),
1365 ecd(9),
1366 tfd(10),
1367 flybase(11),
1368 prosite(12),
1369 enzyme(13),
1370 mim(14),
1371 ecoseq(15),
1372 hiv(16) ,
1373 other (255) } ,
1374 name VisibleString }
1375
1376 EMBL-xref ::= SEQUENCE {
1377 dbname EMBL-dbname,
1378 id SEQUENCE OF Object-id }
1379
1380 EMBL-block ::= SEQUENCE {
1381 class ENUMERATED {
1382 not-set(0),
1383 standard(1),
1384 unannotated(2),
1385 other(255) } DEFAULT standard,
1386 div ENUMERATED {
1387 fun(0),
1388 inv(1),
1389 mam(2),
1390 org(3),
1391 phg(4),
1392 pln(5),
1393 pri(6),
1394 pro(7),
1395 rod(8),
1396 syn(9),
1397 una(10),
1398 vrl(11),
1399 vrt(12),
1400 pat(13),
1401 est(14),
1402 sts(15),
1403 other (255) } OPTIONAL,
1404 creation-date Date,
1405 update-date Date,
1406 extra-acc SEQUENCE OF VisibleString OPTIONAL,
1407 keywords SEQUENCE OF VisibleString OPTIONAL,
1408 xref SEQUENCE OF EMBL-xref OPTIONAL }
1409
1410 END
1411
1412 --*********************************************************************
1413 --
1414 -- SWISSPROT specific data
1415 -- This block of specifications was developed by Mark Cavanaugh of
1416 -- NCBI working with Amos Bairoch of SWISSPROT
1417 --
1418 --*********************************************************************
1419
1420 SP-General DEFINITIONS ::=
1421 BEGIN
1422
1423 EXPORTS SP-block;
1424
1425 IMPORTS Date, Dbtag FROM NCBI-General
1426 Seq-id FROM NCBI-Seqloc;
1427
1428 SP-block ::= SEQUENCE { -- SWISSPROT specific descriptions
1429 class ENUMERATED {
1430 not-set (0) ,
1431 standard (1) , -- conforms to all SWISSPROT checks
1432 prelim (2) , -- only seq and biblio checked
1433 other (255) } ,
1434 extra-acc SET OF VisibleString OPTIONAL , -- old SWISSPROT ids
1435 imeth BOOLEAN DEFAULT FALSE , -- seq known to start with Met
1436 plasnm SET OF VisibleString OPTIONAL, -- plasmid names carrying gene
1437 seqref SET OF Seq-id OPTIONAL, -- xref to other sequences
1438 dbref SET OF Dbtag OPTIONAL , -- xref to non-sequence dbases
1439 keywords SET OF VisibleString OPTIONAL , -- keywords
1440 created Date OPTIONAL , -- creation date
1441 sequpd Date OPTIONAL , -- sequence update
1442 annotupd Date OPTIONAL } -- annotation update
1443
1444 END
1445
1446 --*********************************************************************
1447 --
1448 -- PIR specific data
1449 -- This block of specifications was developed by Jim Ostell of
1450 -- NCBI
1451 --
1452 --*********************************************************************
1453
1454 PIR-General DEFINITIONS ::=
1455 BEGIN
1456
1457 EXPORTS PIR-block;
1458
1459 IMPORTS Seq-id FROM NCBI-Seqloc;
1460
1461 PIR-block ::= SEQUENCE { -- PIR specific descriptions
1462 had-punct BOOLEAN OPTIONAL , -- had punctuation in sequence ?
1463 host VisibleString OPTIONAL ,
1464 source VisibleString OPTIONAL , -- source line
1465 summary VisibleString OPTIONAL ,
1466 genetic VisibleString OPTIONAL ,
1467 includes VisibleString OPTIONAL ,
1468 placement VisibleString OPTIONAL ,
1469 superfamily VisibleString OPTIONAL ,
1470 keywords SEQUENCE OF VisibleString OPTIONAL ,
1471 cross-reference VisibleString OPTIONAL ,
1472 date VisibleString OPTIONAL ,
1473 seq-raw VisibleString OPTIONAL , -- seq with punctuation
1474 seqref SET OF Seq-id OPTIONAL } -- xref to other sequences
1475
1476 END
1477
1478 --*********************************************************************
1479 --
1480 -- GenBank specific data
1481 -- This block of specifications was developed by Jim Ostell of
1482 -- NCBI
1483 --
1484 --*********************************************************************
1485
1486 GenBank-General DEFINITIONS ::=
1487 BEGIN
1488
1489 EXPORTS GB-block;
1490
1491 IMPORTS Date FROM NCBI-General;
1492
1493 GB-block ::= SEQUENCE { -- GenBank specific descriptions
1494 extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
1495 source VisibleString OPTIONAL , -- source line
1496 keywords SEQUENCE OF VisibleString OPTIONAL ,
1497 origin VisibleString OPTIONAL,
1498 date VisibleString OPTIONAL , -- OBSOLETE old form Entry Date
1499 entry-date Date OPTIONAL , -- replaces date
1500 div VisibleString OPTIONAL , -- GenBank division
1501 taxonomy VisibleString OPTIONAL } -- continuation line of organism
1502
1503 END
1504
1505 --**********************************************************************
1506 -- PRF specific definition
1507 -- PRF is a protein sequence database crated and maintained by
1508 -- Protein Research Foundation, Minoo-city, Osaka, Japan.
1509 --
1510 -- Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1511 -- Kyoto Univ., Japan
1512 --
1513 --**********************************************************************
1514
1515 PRF-General DEFINITIONS ::=
1516 BEGIN
1517
1518 EXPORTS PRF-block;
1519
1520 PRF-block ::= SEQUENCE {
1521 extra-src PRF-ExtraSrc OPTIONAL,
1522 keywords SEQUENCE OF VisibleString OPTIONAL
1523 }
1524
1525 PRF-ExtraSrc ::= SEQUENCE {
1526 host VisibleString OPTIONAL,
1527 part VisibleString OPTIONAL,
1528 state VisibleString OPTIONAL,
1529 strain VisibleString OPTIONAL,
1530 taxon VisibleString OPTIONAL
1531 }
1532
1533 END
1534
1535 --*********************************************************************
1536 --
1537 -- PDB specific data
1538 -- This block of specifications was developed by Jim Ostell and
1539 -- Steve Bryant of NCBI
1540 --
1541 --*********************************************************************
1542
1543 PDB-General DEFINITIONS ::=
1544 BEGIN
1545
1546 EXPORTS PDB-block;
1547
1548 IMPORTS Date FROM NCBI-General;
1549
1550 PDB-block ::= SEQUENCE { -- PDB specific descriptions
1551 deposition Date , -- deposition date month,year
1552 class VisibleString ,
1553 compound SEQUENCE OF VisibleString ,
1554 source SEQUENCE OF VisibleString ,
1555 exp-method VisibleString OPTIONAL , -- present if NOT X-ray diffraction
1556 replace PDB-replace OPTIONAL } -- replacement history
1557
1558 PDB-replace ::= SEQUENCE {
1559 date Date ,
1560 ids SEQUENCE OF VisibleString } -- entry ids replace by this one
1561
1562 END
1563
1564 --$Revision: 6.27 $
1565 --**********************************************************************
1566 --
1567 -- NCBI Sequence Feature elements
1568 -- by James Ostell, 1990
1569 -- Version 3.0 - June 1994
1570 --
1571 --**********************************************************************
1572
1573 NCBI-Seqfeat DEFINITIONS ::=
1574 BEGIN
1575
1576 EXPORTS Seq-feat, Feat-id, Genetic-code;
1577
1578 IMPORTS Gene-ref FROM NCBI-Gene
1579 Prot-ref FROM NCBI-Protein
1580 Org-ref FROM NCBI-Organism
1581 BioSource FROM NCBI-BioSource
1582 RNA-ref FROM NCBI-RNA
1583 Seq-loc, Giimport-id FROM NCBI-Seqloc
1584 Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1585 Rsite-ref FROM NCBI-Rsite
1586 Txinit FROM NCBI-TxInit
1587 Pub-set FROM NCBI-Pub
1588 Object-id, Dbtag, User-object FROM NCBI-General;
1589
1590 --*** Feature identifiers ********************************
1591 --*
1592
1593 Feat-id ::= CHOICE {
1594 gibb INTEGER , -- geninfo backbone
1595 giim Giimport-id , -- geninfo import
1596 local Object-id , -- for local software use
1597 general Dbtag } -- for use by various databases
1598
1599 --*** Seq-feat *******************************************
1600 --* sequence feature generalization
1601
1602 Seq-feat ::= SEQUENCE {
1603 id Feat-id OPTIONAL ,
1604 data SeqFeatData , -- the specific data
1605 partial BOOLEAN OPTIONAL , -- incomplete in some way?
1606 except BOOLEAN OPTIONAL , -- something funny about this?
1607 comment VisibleString OPTIONAL ,
1608 product Seq-loc OPTIONAL , -- product of process
1609 location Seq-loc , -- feature made from
1610 qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers
1611 title VisibleString OPTIONAL , -- for user defined label
1612 ext User-object OPTIONAL , -- user defined structure extension
1613 cit Pub-set OPTIONAL , -- citations for this feature
1614 exp-ev ENUMERATED { -- evidence for existence of feature
1615 experimental (1) , -- any reasonable experimental check
1616 not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1617 xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features
1618 dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases
1619 pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene?
1620 except-text VisibleString OPTIONAL , -- explain if except=TRUE
1621 ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field
1622 exts SET OF User-object OPTIONAL } -- set of extensions; will replace 'ext' field
1623
1624 SeqFeatData ::= CHOICE {
1625 gene Gene-ref ,
1626 org Org-ref ,
1627 cdregion Cdregion ,
1628 prot Prot-ref ,
1629 rna RNA-ref ,
1630 pub Pubdesc , -- publication applies to this seq
1631 seq Seq-loc , -- to annotate origin from another seq
1632 imp Imp-feat ,
1633 region VisibleString, -- named region (globin locus)
1634 comment NULL , -- just a comment
1635 bond ENUMERATED {
1636 disulfide (1) ,
1637 thiolester (2) ,
1638 xlink (3) ,
1639 thioether (4) ,
1640 other (255) } ,
1641 site ENUMERATED {
1642 active (1) ,
1643 binding (2) ,
1644 cleavage (3) ,
1645 inhibit (4) ,
1646 modified (5),
1647 glycosylation (6) ,
1648 myristoylation (7) ,
1649 mutagenized (8) ,
1650 metal-binding (9) ,
1651 phosphorylation (10) ,
1652 acetylation (11) ,
1653 amidation (12) ,
1654 methylation (13) ,
1655 hydroxylation (14) ,
1656 sulfatation (15) ,
1657 oxidative-deamination (16) ,
1658 pyrrolidone-carboxylic-acid (17) ,
1659 gamma-carboxyglutamic-acid (18) ,
1660 blocked (19) ,
1661 lipid-binding (20) ,
1662 np-binding (21) ,
1663 dna-binding (22) ,
1664 signal-peptide (23) ,
1665 transit-peptide (24) ,
1666 transmembrane-region (25) ,
1667 nitrosylation (26) ,
1668 other (255) } ,
1669 rsite Rsite-ref , -- restriction site (for maps really)
1670 user User-object , -- user defined structure
1671 txinit Txinit , -- transcription initiation
1672 num Numbering , -- a numbering system
1673 psec-str ENUMERATED { -- protein secondary structure
1674 helix (1) , -- any helix
1675 sheet (2) , -- beta sheet
1676 turn (3) } , -- beta or gamma turn
1677 non-std-residue VisibleString , -- non-standard residue here in seq
1678 het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq
1679 biosrc BioSource,
1680 clone Clone-ref
1681 }
1682
1683 SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both
1684 id Feat-id OPTIONAL , -- the feature copied
1685 data SeqFeatData OPTIONAL } -- the specific data
1686
1687 --*** CdRegion ***********************************************
1688 --*
1689 --* Instructions to translate from a nucleic acid to a peptide
1690 --* conflict means it's supposed to translate but doesn't
1691 --*
1692
1693
1694 Cdregion ::= SEQUENCE {
1695 orf BOOLEAN OPTIONAL , -- just an ORF ?
1696 frame ENUMERATED {
1697 not-set (0) , -- not set, code uses one
1698 one (1) ,
1699 two (2) ,
1700 three (3) } DEFAULT not-set , -- reading frame
1701 conflict BOOLEAN OPTIONAL , -- conflict
1702 gaps INTEGER OPTIONAL , -- number of gaps on conflict/except
1703 mismatch INTEGER OPTIONAL , -- number of mismatches on above
1704 code Genetic-code OPTIONAL , -- genetic code used
1705 code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions
1706 stops INTEGER OPTIONAL } -- number of stop codons on above
1707
1708 -- each code is 64 cells long, in the order where
1709 -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1710 -- NOTE: this order does NOT correspond to a Seq-data
1711 -- encoding. It is "natural" to codon usage instead.
1712 -- the value in each cell is the AA coded for
1713 -- start= AA coded only if first in peptide
1714 -- in start array, if codon is not a legitimate start
1715 -- codon, that cell will have the "gap" symbol for
1716 -- that alphabet. Otherwise it will have the AA
1717 -- encoded when that codon is used at the start.
1718
1719 Genetic-code ::= SET OF CHOICE {
1720 name VisibleString , -- name of a code
1721 id INTEGER , -- id in dbase
1722 ncbieaa VisibleString , -- indexed to IUPAC extended
1723 ncbi8aa OCTET STRING , -- indexed to NCBI8aa
1724 ncbistdaa OCTET STRING , -- indexed to NCBIstdaa
1725 sncbieaa VisibleString , -- start, indexed to IUPAC extended
1726 sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa
1727 sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa
1728
1729 Code-break ::= SEQUENCE { -- specific codon exceptions
1730 loc Seq-loc , -- location of exception
1731 aa CHOICE { -- the amino acid
1732 ncbieaa INTEGER , -- ASCII value of NCBIeaa code
1733 ncbi8aa INTEGER , -- NCBI8aa code
1734 ncbistdaa INTEGER } } -- NCBIstdaa code
1735
1736 Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes
1737
1738 --*** Import ***********************************************
1739 --*
1740 --* Features imported from other databases
1741 --*
1742
1743 Imp-feat ::= SEQUENCE {
1744 key VisibleString ,
1745 loc VisibleString OPTIONAL , -- original location string
1746 descr VisibleString OPTIONAL } -- text description
1747
1748 Gb-qual ::= SEQUENCE {
1749 qual VisibleString ,
1750 val VisibleString }
1751
1752
1753 --*** Clone-ref ***********************************************
1754 --*
1755 --* Specification of clone features
1756 --*
1757
1758 Clone-ref ::= SEQUENCE {
1759 name VisibleString, -- Official clone symbol
1760 library VisibleString OPTIONAL, -- Library name
1761
1762 concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1763 unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1764 placement-method INTEGER {
1765 end-seq (0), -- Clone placed by end sequence
1766 insert-alignment (1), -- Clone placed by insert alignment
1767 sts (2), -- Clone placed by STS
1768 fish (3),
1769 fingerprint (4),
1770 other (255)
1771 } OPTIONAL,
1772 clone-seq Clone-seq-set OPTIONAL
1773 }
1774
1775 Clone-seq-set ::= SET OF Clone-seq
1776
1777
1778 Clone-seq ::= SEQUENCE {
1779 type INTEGER {
1780 insert (0),
1781 end (1),
1782 other (255)
1783 },
1784 confidence INTEGER {
1785 multiple (0), -- Multiple hits
1786 na (1), -- Unspecified
1787 nohit-rep (2), -- No hits, repetitive
1788 nohitnorep (3), -- No hits, not repetitive
1789 other-chrm (4), -- Hit on different chromosome
1790 unique (5),
1791 virtual (6), -- Virtual (hasn't been sequenced)
1792 other (255)
1793 } OPTIONAL,
1794 location Seq-loc, -- location on sequence
1795 seq Seq-loc OPTIONAL, -- clone sequence location
1796 align-id Dbtag OPTIONAL
1797 }
1798
1799
1800 END
1801
1802 --**********************************************************************
1803 --
1804 -- NCBI Restriction Sites
1805 -- by James Ostell, 1990
1806 -- version 0.8
1807 --
1808 --**********************************************************************
1809
1810 NCBI-Rsite DEFINITIONS ::=
1811 BEGIN
1812
1813 EXPORTS Rsite-ref;
1814
1815 IMPORTS Dbtag FROM NCBI-General;
1816
1817 Rsite-ref ::= CHOICE {
1818 str VisibleString , -- may be unparsable
1819 db Dbtag } -- pointer to a restriction site database
1820
1821 END
1822
1823 --**********************************************************************
1824 --
1825 -- NCBI RNAs
1826 -- by James Ostell, 1990
1827 -- version 0.8
1828 --
1829 --**********************************************************************
1830
1831 NCBI-RNA DEFINITIONS ::=
1832 BEGIN
1833
1834 EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
1835
1836 IMPORTS Seq-loc FROM NCBI-Seqloc;
1837
1838 --*** rnas ***********************************************
1839 --*
1840 --* various rnas
1841 --*
1842 -- minimal RNA sequence
1843 RNA-ref ::= SEQUENCE {
1844 type ENUMERATED { -- type of RNA feature
1845 unknown (0) ,
1846 premsg (1) ,
1847 mRNA (2) ,
1848 tRNA (3) ,
1849 rRNA (4) ,
1850 snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA
1851 scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA
1852 snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA
1853 ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
1854 tmRNA (9) ,
1855 miscRNA (10) ,
1856 other (255) } ,
1857 pseudo BOOLEAN OPTIONAL ,
1858 ext CHOICE {
1859 name VisibleString , -- for naming "other" type
1860 tRNA Trna-ext , -- for tRNAs
1861 gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA
1862 }
1863
1864 Trna-ext ::= SEQUENCE { -- tRNA feature extensions
1865 aa CHOICE { -- aa this carries
1866 iupacaa INTEGER ,
1867 ncbieaa INTEGER ,
1868 ncbi8aa INTEGER ,
1869 ncbistdaa INTEGER } OPTIONAL ,
1870 codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code
1871 anticodon Seq-loc OPTIONAL } -- location of anticodon
1872
1873 RNA-gen ::= SEQUENCE {
1874 class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA:
1875 -- examples: antisense_RNA, guide_RNA, snRNA
1876 product VisibleString OPTIONAL ,
1877 quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs
1878 }
1879
1880 RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen,
1881 qual VisibleString , -- in a tag (qual), value (val) format
1882 val VisibleString }
1883
1884 RNA-qual-set ::= SEQUENCE OF RNA-qual
1885
1886 END
1887
1888 --**********************************************************************
1889 --
1890 -- NCBI Genes
1891 -- by James Ostell, 1990
1892 -- version 0.8
1893 --
1894 --**********************************************************************
1895
1896 NCBI-Gene DEFINITIONS ::=
1897 BEGIN
1898
1899 EXPORTS Gene-ref, Gene-nomenclature;
1900
1901 IMPORTS Dbtag FROM NCBI-General;
1902
1903 --*** Gene ***********************************************
1904 --*
1905 --* reference to a gene
1906 --*
1907
1908 Gene-ref ::= SEQUENCE {
1909 locus VisibleString OPTIONAL , -- Official gene symbol
1910 allele VisibleString OPTIONAL , -- Official allele designation
1911 desc VisibleString OPTIONAL , -- descriptive name
1912 maploc VisibleString OPTIONAL , -- descriptive map location
1913 pseudo BOOLEAN DEFAULT FALSE , -- pseudogene
1914 db SET OF Dbtag OPTIONAL , -- ids in other dbases
1915 syn SET OF VisibleString OPTIONAL , -- synonyms for locus
1916 locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069)
1917 formal-name Gene-nomenclature OPTIONAL
1918 }
1919
1920 Gene-nomenclature ::= SEQUENCE {
1921 status ENUMERATED {
1922 unknown (0) ,
1923 official (1) ,
1924 interim (2)
1925 } ,
1926 symbol VisibleString OPTIONAL ,
1927 name VisibleString OPTIONAL ,
1928 source Dbtag OPTIONAL
1929 }
1930
1931 END
1932
1933
1934 --**********************************************************************
1935 --
1936 -- NCBI Organism
1937 -- by James Ostell, 1994
1938 -- version 3.0
1939 --
1940 --**********************************************************************
1941
1942 NCBI-Organism DEFINITIONS ::=
1943 BEGIN
1944
1945 EXPORTS Org-ref;
1946
1947 IMPORTS Dbtag FROM NCBI-General;
1948
1949 --*** Org-ref ***********************************************
1950 --*
1951 --* Reference to an organism
1952 --* defines only the organism.. lower levels of detail for biological
1953 --* molecules are provided by the Source object
1954 --*
1955
1956 Org-ref ::= SEQUENCE {
1957 taxname VisibleString OPTIONAL , -- preferred formal name
1958 common VisibleString OPTIONAL , -- common name
1959 mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
1960 db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases
1961 syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common
1962 orgname OrgName OPTIONAL }
1963
1964
1965 OrgName ::= SEQUENCE {
1966 name CHOICE {
1967 binomial BinomialOrgName , -- genus/species type name
1968 virus VisibleString , -- virus names are different
1969 hybrid MultiOrgName , -- hybrid between organisms
1970 namedhybrid BinomialOrgName , -- some hybrids have genus x species name
1971 partial PartialOrgName } OPTIONAL , -- when genus not known
1972 attrib VisibleString OPTIONAL , -- attribution of name
1973 mod SEQUENCE OF OrgMod OPTIONAL ,
1974 lineage VisibleString OPTIONAL , -- lineage with semicolon separators
1975 gcode INTEGER OPTIONAL , -- genetic code (see CdRegion)
1976 mgcode INTEGER OPTIONAL , -- mitochondrial genetic code
1977 div VisibleString OPTIONAL } -- GenBank division code
1978
1979
1980 OrgMod ::= SEQUENCE {
1981 subtype INTEGER {
1982 strain (2) ,
1983 substrain (3) ,
1984 type (4) ,
1985 subtype (5) ,
1986 variety (6) ,
1987 serotype (7) ,
1988 serogroup (8) ,
1989 serovar (9) ,
1990 cultivar (10) ,
1991 pathovar (11) ,
1992 chemovar (12) ,
1993 biovar (13) ,
1994 biotype (14) ,
1995 group (15) ,
1996 subgroup (16) ,
1997 isolate (17) ,
1998 common (18) ,
1999 acronym (19) ,
2000 dosage (20) , -- chromosome dosage of hybrid
2001 nat-host (21) , -- natural host of this specimen
2002 sub-species (22) ,
2003 specimen-voucher (23) ,
2004 authority (24) ,
2005 forma (25) ,
2006 forma-specialis (26) ,
2007 ecotype (27) ,
2008 synonym (28) ,
2009 anamorph (29) ,
2010 teleomorph (30) ,
2011 breed (31) ,
2012 gb-acronym (32) , -- used by taxonomy database
2013 gb-anamorph (33) , -- used by taxonomy database
2014 gb-synonym (34) , -- used by taxonomy database
2015 culture-collection (35) ,
2016 bio-material (36) ,
2017 metagenome-source (37) ,
2018 old-lineage (253) ,
2019 old-name (254) ,
2020 other (255) } , -- ASN5: old-name (254) will be added to next spec
2021 subname VisibleString ,
2022 attrib VisibleString OPTIONAL } -- attribution/source of name
2023
2024 BinomialOrgName ::= SEQUENCE {
2025 genus VisibleString , -- required
2026 species VisibleString OPTIONAL , -- species required if subspecies used
2027 subspecies VisibleString OPTIONAL }
2028
2029 MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division
2030
2031 PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus
2032
2033 TaxElement ::= SEQUENCE {
2034 fixed-level INTEGER {
2035 other (0) , -- level must be set in string
2036 family (1) ,
2037 order (2) ,
2038 class (3) } ,
2039 level VisibleString OPTIONAL ,
2040 name VisibleString }
2041
2042 END
2043
2044
2045 --**********************************************************************
2046 --
2047 -- NCBI BioSource
2048 -- by James Ostell, 1994
2049 -- version 3.0
2050 --
2051 --**********************************************************************
2052
2053 NCBI-BioSource DEFINITIONS ::=
2054 BEGIN
2055
2056 EXPORTS BioSource;
2057
2058 IMPORTS Org-ref FROM NCBI-Organism;
2059
2060 --********************************************************************
2061 --
2062 -- BioSource gives the source of the biological material
2063 -- for sequences
2064 --
2065 --********************************************************************
2066
2067 BioSource ::= SEQUENCE {
2068 genome INTEGER { -- biological context
2069 unknown (0) ,
2070 genomic (1) ,
2071 chloroplast (2) ,
2072 chromoplast (3) ,
2073 kinetoplast (4) ,
2074 mitochondrion (5) ,
2075 plastid (6) ,
2076 macronuclear (7) ,
2077 extrachrom (8) ,
2078 plasmid (9) ,
2079 transposon (10) ,
2080 insertion-seq (11) ,
2081 cyanelle (12) ,
2082 proviral (13) ,
2083 virion (14) ,
2084 nucleomorph (15) ,
2085 apicoplast (16) ,
2086 leucoplast (17) ,
2087 proplastid (18) ,
2088 endogenous-virus (19) ,
2089 hydrogenosome (20) ,
2090 chromosome (21) ,
2091 chromatophore (22)
2092 } DEFAULT unknown ,
2093 origin INTEGER {
2094 unknown (0) ,
2095 natural (1) , -- normal biological entity
2096 natmut (2) , -- naturally occurring mutant
2097 mut (3) , -- artificially mutagenized
2098 artificial (4) , -- artificially engineered
2099 synthetic (5) , -- purely synthetic
2100 other (255)
2101 } DEFAULT unknown ,
2102 org Org-ref ,
2103 subtype SEQUENCE OF SubSource OPTIONAL ,
2104 is-focus NULL OPTIONAL , -- to distinguish biological focus
2105 pcr-primers PCRReactionSet OPTIONAL }
2106
2107 PCRReactionSet ::= SET OF PCRReaction
2108
2109 PCRReaction ::= SEQUENCE {
2110 forward PCRPrimerSet OPTIONAL ,
2111 reverse PCRPrimerSet OPTIONAL }
2112
2113 PCRPrimerSet ::= SET OF PCRPrimer
2114
2115 PCRPrimer ::= SEQUENCE {
2116 seq PCRPrimerSeq OPTIONAL ,
2117 name PCRPrimerName OPTIONAL }
2118
2119 PCRPrimerSeq ::= VisibleString
2120
2121 PCRPrimerName ::= VisibleString
2122
2123 SubSource ::= SEQUENCE {
2124 subtype INTEGER {
2125 chromosome (1) ,
2126 map (2) ,
2127 clone (3) ,
2128 subclone (4) ,
2129 haplotype (5) ,
2130 genotype (6) ,
2131 sex (7) ,
2132 cell-line (8) ,
2133 cell-type (9) ,
2134 tissue-type (10) ,
2135 clone-lib (11) ,
2136 dev-stage (12) ,
2137 frequency (13) ,
2138 germline (14) ,
2139 rearranged (15) ,
2140 lab-host (16) ,
2141 pop-variant (17) ,
2142 tissue-lib (18) ,
2143 plasmid-name (19) ,
2144 transposon-name (20) ,
2145 insertion-seq-name (21) ,
2146 plastid-name (22) ,
2147 country (23) ,
2148 segment (24) ,
2149 endogenous-virus-name (25) ,
2150 transgenic (26) ,
2151 environmental-sample (27) ,
2152 isolation-source (28) ,
2153 lat-lon (29) , -- +/- decimal degrees
2154 collection-date (30) , -- DD-MMM-YYYY format
2155 collected-by (31) , -- name of person who collected the sample
2156 identified-by (32) , -- name of person who identified the sample
2157 fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated)
2158 rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated)
2159 fwd-primer-name (35) ,
2160 rev-primer-name (36) ,
2161 metagenomic (37) ,
2162 mating-type (38) ,
2163 linkage-group (39) ,
2164 haplogroup (40) ,
2165 other (255) } ,
2166 name VisibleString ,
2167 attrib VisibleString OPTIONAL } -- attribution/source of this name
2168
2169 END
2170
2171 --**********************************************************************
2172 --
2173 -- NCBI Protein
2174 -- by James Ostell, 1990
2175 -- version 0.8
2176 --
2177 --**********************************************************************
2178
2179 NCBI-Protein DEFINITIONS ::=
2180 BEGIN
2181
2182 EXPORTS Prot-ref;
2183
2184 IMPORTS Dbtag FROM NCBI-General;
2185
2186 --*** Prot-ref ***********************************************
2187 --*
2188 --* Reference to a protein name
2189 --*
2190
2191 Prot-ref ::= SEQUENCE {
2192 name SET OF VisibleString OPTIONAL , -- protein name
2193 desc VisibleString OPTIONAL , -- description (instead of name)
2194 ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
2195 activity SET OF VisibleString OPTIONAL , -- activities
2196 db SET OF Dbtag OPTIONAL , -- ids in other dbases
2197 processed ENUMERATED { -- processing status
2198 not-set (0) ,
2199 preprotein (1) ,
2200 mature (2) ,
2201 signal-peptide (3) ,
2202 transit-peptide (4) } DEFAULT not-set }
2203
2204
2205
2206 END
2207 --********************************************************************
2208 --
2209 -- Transcription Initiation Site Feature Data Block
2210 -- James Ostell, 1991
2211 -- Philip Bucher, David Ghosh
2212 -- version 1.1
2213 --
2214 --
2215 --
2216 --********************************************************************
2217
2218 NCBI-TxInit DEFINITIONS ::=
2219 BEGIN
2220
2221 EXPORTS Txinit;
2222
2223 IMPORTS Gene-ref FROM NCBI-Gene
2224 Prot-ref FROM NCBI-Protein
2225 Org-ref FROM NCBI-Organism;
2226
2227 Txinit ::= SEQUENCE {
2228 name VisibleString , -- descriptive name of initiation site
2229 syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms
2230 gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed
2231 protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced
2232 rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced
2233 expression VisibleString OPTIONAL , -- tissue/time of expression
2234 txsystem ENUMERATED { -- transcription apparatus used at this site
2235 unknown (0) ,
2236 pol1 (1) , -- eukaryotic Pol I
2237 pol2 (2) , -- eukaryotic Pol II
2238 pol3 (3) , -- eukaryotic Pol III
2239 bacterial (4) ,
2240 viral (5) ,
2241 rna (6) , -- RNA replicase
2242 organelle (7) ,
2243 other (255) } ,
2244 txdescr VisibleString OPTIONAL , -- modifiers on txsystem
2245 txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus
2246 mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx
2247 location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
2248 inittype ENUMERATED {
2249 unknown (0) ,
2250 single (1) ,
2251 multiple (2) ,
2252 region (3) } OPTIONAL ,
2253 evidence SET OF Tx-evidence OPTIONAL }
2254
2255 Tx-evidence ::= SEQUENCE {
2256 exp-code ENUMERATED {
2257 unknown (0) ,
2258 rna-seq (1) , -- direct RNA sequencing
2259 rna-size (2) , -- RNA length measurement
2260 np-map (3) , -- nuclease protection mapping with homologous sequence ladder
2261 np-size (4) , -- nuclease protected fragment length measurement
2262 pe-seq (5) , -- dideoxy RNA sequencing
2263 cDNA-seq (6) , -- full-length cDNA sequencing
2264 pe-map (7) , -- primer extension mapping with homologous sequence ladder
2265 pe-size (8) , -- primer extension product length measurement
2266 pseudo-seq (9) , -- full-length processed pseudogene sequencing
2267 rev-pe-map (10) , -- see NOTE (1) below
2268 other (255) } ,
2269 expression-system ENUMERATED {
2270 unknown (0) ,
2271 physiological (1) ,
2272 in-vitro (2) ,
2273 oocyte (3) ,
2274 transfection (4) ,
2275 transgenic (5) ,
2276 other (255) } DEFAULT physiological ,
2277 low-prec-data BOOLEAN DEFAULT FALSE ,
2278 from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on
2279 -- close homolog
2280
2281 -- NOTE (1) length measurement of a reverse direction primer-extension
2282 -- product (blocked by RNA 5'end) by comparison with
2283 -- homologous sequence ladder (J. Mol. Biol. 199, 587)
2284
2285
2286 END
2287
2288 --$Revision: 1.5 $
2289 -- ----------------------------------------------------------------------------
2290 --
2291 -- PUBLIC DOMAIN NOTICE
2292 -- National Center for Biotechnology Information
2293 --
2294 -- This software/database is a "United States Government Work" under the terms
2295 -- of the United States Copyright Act. It was written as part of the author's
2296 -- official duties as a United States Government employee and thus cannot be
2297 -- copyrighted. This software/database is freely available to the public for
2298 -- use. The National Library of Medicine and the U.S. Government have not
2299 -- placed any restriction on its use or reproduction.
2300 --
2301 -- Although all reasonable efforts have been taken to ensure the accuracy and
2302 -- reliability of the software and data, the NLM and the U.S. Government do not
2303 -- and cannot warrant the performance or results that may be obtained by using
2304 -- this software or data. The NLM and the U.S. Government disclaim all
2305 -- warranties, express or implied, including warranties of performance,
2306 -- merchantability or fitness for any particular purpose.
2307 --
2308 -- Please cite the authors in any work or product based on this material.
2309 --
2310 -- ----------------------------------------------------------------------------
2311 --
2312 -- Authors: Mike DiCuccio, Eugene Vasilchenko
2313 --
2314 -- ASN.1 interface to table readers
2315 --
2316 -- ----------------------------------------------------------------------------
2317
2318 NCBI-SeqTable DEFINITIONS ::=
2319
2320 BEGIN
2321
2322 EXPORTS
2323 SeqTable-column-info, SeqTable-column, Seq-table;
2324
2325 IMPORTS
2326 Seq-id, Seq-loc, Seq-interval FROM NCBI-Seqloc;
2327
2328
2329 SeqTable-column-info ::= SEQUENCE {
2330 -- user friendly column name, can be skipped
2331 title VisibleString OPTIONAL,
2332
2333 -- identification of the column data in the objects described by the table
2334 field-id INTEGER { -- known column data types
2335 -- position types
2336 location (0), -- location as Seq-loc
2337 location-id (1), -- location Seq-id
2338 location-gi (2), -- gi
2339 location-from (3), -- interval from
2340 location-to (4), -- interval to
2341 location-strand (5), -- location strand
2342 location-fuzz-from-lim (6),
2343 location-fuzz-to-lim (7),
2344
2345 product (10), -- product as Seq-loc
2346 product-id (11), -- product Seq-id
2347 product-gi (12), -- product gi
2348 product-from (13), -- product interval from
2349 product-to (14), -- product interval to
2350 product-strand (15), -- product strand
2351 product-fuzz-from-lim (16),
2352 product-fuzz-to-lim (17),
2353
2354 -- main feature fields
2355 id-local (20), -- id.local.id
2356 xref-id-local (21), -- xref.id.local.id
2357 partial (22),
2358 comment (23),
2359 title (24),
2360 ext (25), -- field-name must be "E.xxx", see below
2361 qual (26), -- field-name must be "Q.xxx", see below
2362 dbxref (27), -- field-name must be "D.xxx", see below
2363
2364 -- various data fields
2365 data-imp-key (30),
2366 data-region (31),
2367 data-cdregion-frame (32),
2368
2369 -- extra fields, see also special values for str below
2370 ext-type (40),
2371 qual-qual (41),
2372 qual-val (42),
2373 dbxref-db (43),
2374 dbxref-tag (44)
2375 } OPTIONAL,
2376
2377 -- any column can be identified by ASN.1 text locator string
2378 -- with omitted object type.
2379 -- examples:
2380 -- "data.gene.locus" for Seq-feat.data.gene.locus
2381 -- "data.imp.key" for Seq-feat.data.imp.key
2382 -- "qual.qual"
2383 -- - Seq-feat.qual is SEQUENCE so several columns are allowed
2384 -- see also "Q.xxx" special value for shorter qual representation
2385 -- "ext.type.str"
2386 -- "ext.data.label.str"
2387 -- "ext.data.data.int"
2388 -- see also "E.xxx" special value for shorter ext representation
2389 -- special values start with capital letter:
2390 -- "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
2391 -- - Seq-feat.ext.data is SEQUENCE so several columns are allowed
2392 -- "Q.xxx" - qual.qual = xxx, qual.val = data
2393 -- - Seq-feat.qual is SEQUENCE so several columns are allowed
2394 -- "D.xxx" - dbxref.id = xxx, dbxref.tag = data
2395 -- - Seq-feat.dbxref is SET so several columns are allowed
2396 field-name VisibleString OPTIONAL
2397 }
2398
2399
2400 CommonString-table ::= SEQUENCE {
2401 -- set of possible values
2402 strings SEQUENCE OF VisibleString,
2403
2404 -- indexes of values
2405 indexes SEQUENCE OF INTEGER
2406 }
2407
2408
2409 CommonBytes-table ::= SEQUENCE {
2410 -- set of possible values
2411 bytes SEQUENCE OF OCTET STRING,
2412
2413 -- indexes of values
2414 indexes SEQUENCE OF INTEGER
2415 }
2416
2417
2418 SeqTable-multi-data ::= CHOICE {
2419 -- a set of integers, one per row
2420 int SEQUENCE OF INTEGER,
2421
2422 -- a set of reals, one per row
2423 real SEQUENCE OF REAL,
2424
2425 -- a set of strings, one per row
2426 string SEQUENCE OF VisibleString,
2427
2428 -- a set of byte arrays, one per row
2429 bytes SEQUENCE OF OCTET STRING,
2430
2431 -- a set of string with small set of possible values
2432 common-string CommonString-table,
2433
2434 -- a set of byte arrays with small set of possible values
2435 common-bytes CommonBytes-table,
2436
2437 -- a set of bits, one per row
2438 -- this uses bm::bvector<> as its storage mechanism
2439 bit OCTET STRING,
2440
2441 -- a set of locations, one per row
2442 loc SEQUENCE OF Seq-loc,
2443 id SEQUENCE OF Seq-id,
2444 interval SEQUENCE OF Seq-interval
2445 }
2446
2447
2448 SeqTable-single-data ::= CHOICE {
2449 -- integer
2450 int INTEGER,
2451
2452 -- real
2453 real REAL,
2454
2455 -- string
2456 string VisibleString,
2457
2458 -- byte array
2459 bytes OCTET STRING,
2460
2461 -- bit
2462 bit BOOLEAN,
2463
2464 -- location
2465 loc Seq-loc,
2466 id Seq-id,
2467 interval Seq-interval
2468 }
2469
2470
2471 SeqTable-sparse-index ::= CHOICE {
2472 -- indexed of rows with values
2473 indexes SEQUENCE OF INTEGER,
2474
2475 -- bitset of rows with values
2476 bit-set OCTET STRING
2477 }
2478
2479
2480 SeqTable-column ::= SEQUENCE {
2481 -- column description or reference to previously defined info
2482 header SeqTable-column-info, -- information about data
2483
2484 -- row data
2485 data SeqTable-multi-data OPTIONAL,
2486
2487 -- in case not all rows contain data this field will contain sparse info
2488 sparse SeqTable-sparse-index OPTIONAL,
2489
2490 -- default value for sparse table, or if row data is too short
2491 default SeqTable-single-data OPTIONAL,
2492
2493 -- single value for indexes not listed in sparse table
2494 sparse-other SeqTable-single-data OPTIONAL
2495 }
2496
2497
2498 Seq-table ::= SEQUENCE {
2499 -- type of features in this table, equal to Seq-feat.data variant index
2500 feat-type INTEGER,
2501
2502 -- subtype of features in this table, defined in header SeqFeatData.hpp
2503 feat-subtype INTEGER OPTIONAL,
2504
2505 -- number of rows
2506 num-rows INTEGER,
2507
2508 -- data in columns
2509 columns SEQUENCE OF SeqTable-column
2510 }
2511
2512
2513 END
2514 --$Revision: 6.4 $
2515 --**********************************************************************
2516 --
2517 -- NCBI Sequence Alignment elements
2518 -- by James Ostell, 1990
2519 --
2520 --**********************************************************************
2521
2522 NCBI-Seqalign DEFINITIONS ::=
2523 BEGIN
2524
2525 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
2526
2527 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
2528 User-object, Object-id FROM NCBI-General;
2529
2530 --*** Sequence Alignment ********************************
2531 --*
2532
2533 Seq-align-set ::= SET OF Seq-align
2534
2535 Seq-align ::= SEQUENCE {
2536 type ENUMERATED {
2537 not-set (0) ,
2538 global (1) ,
2539 diags (2) , -- unbroken, but not ordered, diagonals
2540 partial (3) , -- mapping pieces together
2541 disc (4) , -- discontinuous alignment
2542 other (255) } ,
2543 dim INTEGER OPTIONAL , -- dimensionality
2544 score SET OF Score OPTIONAL , -- for whole alignment
2545 segs CHOICE { -- alignment data
2546 dendiag SEQUENCE OF Dense-diag ,
2547 denseg Dense-seg ,
2548 std SEQUENCE OF Std-seg ,
2549 packed Packed-seg ,
2550 disc Seq-align-set,
2551 spliced Spliced-seg,
2552 sparse Sparse-seg
2553 } ,
2554
2555 -- regions of sequence over which align
2556 -- was computed
2557 bounds SET OF Seq-loc OPTIONAL,
2558
2559 -- alignment id
2560 id SEQUENCE OF Object-id OPTIONAL,
2561
2562 --extra info
2563 ext SEQUENCE OF User-object OPTIONAL
2564 }
2565
2566 Dense-diag ::= SEQUENCE { -- for (multiway) diagonals
2567 dim INTEGER DEFAULT 2 , -- dimensionality
2568 ids SEQUENCE OF Seq-id , -- sequences in order
2569 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order
2570 len INTEGER , -- len of aligned segments
2571 strands SEQUENCE OF Na-strand OPTIONAL ,
2572 scores SET OF Score OPTIONAL }
2573
2574 -- Dense-seg: the densist packing for sequence alignments only.
2575 -- a start of -1 indicates a gap for that sequence of
2576 -- length lens.
2577 --
2578 -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA
2579 -- id=200 AAGGCCTTTTAG.......GATGATGATGA
2580 -- id=300 ....CCTTTTAGAGATGATGAT....ATGA
2581 --
2582 -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
2583 -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
2584 -- lens = { 4, 8, 7, 3, 4, 4 }
2585 --
2586
2587 Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
2588 dim INTEGER DEFAULT 2 , -- dimensionality
2589 numseg INTEGER , -- number of segments here
2590 ids SEQUENCE OF Seq-id , -- sequences in order
2591 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs
2592 lens SEQUENCE OF INTEGER , -- lengths in ids order within segs
2593 strands SEQUENCE OF Na-strand OPTIONAL ,
2594 scores SEQUENCE OF Score OPTIONAL } -- score for each seg
2595
2596 Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
2597 dim INTEGER DEFAULT 2 , -- dimensionality
2598 numseg INTEGER , -- number of segments here
2599 ids SEQUENCE OF Seq-id , -- sequences in order
2600 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment
2601 present OCTET STRING , -- Boolean if each sequence present or absent in
2602 -- each segment
2603 lens SEQUENCE OF INTEGER , -- length of each segment
2604 strands SEQUENCE OF Na-strand OPTIONAL ,
2605 scores SEQUENCE OF Score OPTIONAL } -- score for each segment
2606
2607 Std-seg ::= SEQUENCE {
2608 dim INTEGER DEFAULT 2 , -- dimensionality
2609 ids SEQUENCE OF Seq-id OPTIONAL ,
2610 loc SEQUENCE OF Seq-loc ,
2611 scores SET OF Score OPTIONAL }
2612
2613
2614 Spliced-seg ::= SEQUENCE {
2615 -- product is either protein or transcript (cDNA)
2616 product-id Seq-id OPTIONAL,
2617 genomic-id Seq-id OPTIONAL,
2618
2619 -- should be 'plus' or 'minus'
2620 product-strand Na-strand OPTIONAL ,
2621 genomic-strand Na-strand OPTIONAL ,
2622
2623 product-type ENUMERATED {
2624 transcript(0),
2625 protein(1)
2626 },
2627
2628 -- set of segments involved
2629 -- each segment corresponds to one exon
2630 -- exons are always in biological order
2631 exons SEQUENCE OF Spliced-exon ,
2632
2633 -- optional poly(A) tail
2634 poly-a INTEGER OPTIONAL,
2635
2636 -- length of the product, in bases/residues
2637 -- from this, a 3' unaligned length can be extracted; this also captures
2638 -- the case in which a protein aligns leaving a partial codon alignment
2639 -- at the 3' end
2640 product-length INTEGER OPTIONAL,
2641
2642 -- alignment descriptors / modifiers
2643 -- this provides us a set for extension
2644 modifiers SET OF Spliced-seg-modifier OPTIONAL
2645 }
2646
2647 Spliced-seg-modifier ::= CHOICE {
2648 -- protein aligns from the start and the first codon
2649 -- on both product and genomic is start codon
2650 start-codon-found BOOLEAN,
2651
2652 -- protein aligns to it's end and there is stop codon
2653 -- on the genomic right after the alignment
2654 stop-codon-found BOOLEAN
2655 }
2656
2657
2658 -- complete or partial exon
2659 -- two consecutive Spliced-exons may belong to one exon
2660 Spliced-exon ::= SEQUENCE {
2661 -- product-end >= product-start
2662 product-start Product-pos ,
2663 product-end Product-pos ,
2664
2665 -- genomic-end >= genomic-start
2666 genomic-start INTEGER ,
2667 genomic-end INTEGER ,
2668
2669 -- product is either protein or transcript (cDNA)
2670 product-id Seq-id OPTIONAL ,
2671 genomic-id Seq-id OPTIONAL ,
2672
2673 -- should be 'plus' or 'minus'
2674 product-strand Na-strand OPTIONAL ,
2675
2676 -- genomic-strand represents the strand of translation
2677 genomic-strand Na-strand OPTIONAL ,
2678
2679 -- basic seqments always are in biologic order
2680 parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
2681
2682 -- scores for this exon
2683 scores Score-set OPTIONAL ,
2684
2685 -- splice sites
2686 acceptor-before-exon Splice-site OPTIONAL,
2687 donor-after-exon Splice-site OPTIONAL,
2688
2689 -- flag: is this exon complete or partial?
2690 partial BOOLEAN OPTIONAL,
2691
2692 --extra info
2693 ext SEQUENCE OF User-object OPTIONAL
2694 }
2695
2696
2697 Product-pos ::= CHOICE {
2698 nucpos INTEGER,
2699 protpos Prot-pos
2700 }
2701
2702
2703 -- codon based position on protein (1/3 of aminoacid)
2704 Prot-pos ::= SEQUENCE {
2705 -- standard protein position
2706 amin INTEGER ,
2707
2708 -- 0, 1, 2, or 3 as for Cdregion
2709 -- 0 = not set
2710 -- 1, 2, 3 = actual frame
2711 frame INTEGER DEFAULT 0
2712 }
2713
2714
2715 -- Spliced-exon-chunk: piece of an exon
2716 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
2717 -- protein)
2718 Spliced-exon-chunk ::= CHOICE {
2719 -- both sequences represented, product and genomic sequences match
2720 match INTEGER ,
2721
2722 -- both sequences represented, product and genomic sequences do not match
2723 mismatch INTEGER ,
2724
2725 -- both sequences are represented, there is sufficient similarity
2726 -- between product and genomic sequences. Can be used to replace stretches
2727 -- of matches and mismatches, mostly for protein to genomic where
2728 -- definition of match or mismatch depends on translation table
2729 diag INTEGER ,
2730
2731 -- insertion in product sequence (i.e. gap in the genomic sequence)
2732 product-ins INTEGER ,
2733
2734 -- insertion in genomic sequence (i.e. gap in the product sequence)
2735 genomic-ins INTEGER
2736 }
2737
2738
2739 -- site involved in splice
2740 Splice-site ::= SEQUENCE {
2741 -- typically two bases in the intronic region, always
2742 -- in IUPAC format
2743 bases VisibleString
2744 }
2745
2746
2747 -- ==========================================================================
2748 --
2749 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
2750 -- representing sparse multiple alignments
2751 --
2752 -- ==========================================================================
2753
2754
2755 Sparse-seg ::= SEQUENCE {
2756 master-id Seq-id OPTIONAL,
2757
2758 -- pairwise alignments constituting this multiple alignment
2759 rows SET OF Sparse-align,
2760
2761 -- per-row scores
2762 row-scores SET OF Score OPTIONAL,
2763
2764 -- index of extra items
2765 ext SET OF Sparse-seg-ext OPTIONAL
2766 }
2767
2768 Sparse-align ::= SEQUENCE {
2769 first-id Seq-id,
2770 second-id Seq-id,
2771
2772 numseg INTEGER, --number of segments
2773 first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg]
2774 second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg]
2775 lens SEQUENCE OF INTEGER , --lengths of segments [numseg]
2776 second-strands SEQUENCE OF Na-strand OPTIONAL ,
2777
2778 -- per-segment scores
2779 seg-scores SET OF Score OPTIONAL
2780 }
2781
2782 Sparse-seg-ext ::= SEQUENCE {
2783 --seg-ext SET OF {
2784 -- index INTEGER,
2785 -- data User-field
2786 -- }
2787 index INTEGER
2788 }
2789
2790
2791
2792 -- use of Score is discouraged for external ASN.1 specifications
2793 Score ::= SEQUENCE {
2794 id Object-id OPTIONAL ,
2795 value CHOICE {
2796 real REAL ,
2797 int INTEGER
2798 }
2799 }
2800
2801 -- use of Score-set is encouraged for external ASN.1 specifications
2802 Score-set ::= SET OF Score
2803
2804 END
2805
2806 --$Revision: 6.0 $
2807 --**********************************************************************
2808 --
2809 -- NCBI Sequence Analysis Results (other than alignments)
2810 -- by James Ostell, 1990
2811 --
2812 --**********************************************************************
2813
2814 NCBI-Seqres DEFINITIONS ::=
2815 BEGIN
2816
2817 EXPORTS Seq-graph;
2818
2819 IMPORTS Seq-loc FROM NCBI-Seqloc;
2820
2821 --*** Sequence Graph ********************************
2822 --*
2823 --* for values mapped by residue or range to sequence
2824 --*
2825
2826 Seq-graph ::= SEQUENCE {
2827 title VisibleString OPTIONAL ,
2828 comment VisibleString OPTIONAL ,
2829 loc Seq-loc , -- region this applies to
2830 title-x VisibleString OPTIONAL , -- title for x-axis
2831 title-y VisibleString OPTIONAL ,
2832 comp INTEGER OPTIONAL , -- compression (residues/value)
2833 a REAL OPTIONAL , -- for scaling values
2834 b REAL OPTIONAL , -- display = (a x value) + b
2835 numval INTEGER , -- number of values in graph
2836 graph CHOICE {
2837 real Real-graph ,
2838 int Int-graph ,
2839 byte Byte-graph } }
2840
2841 Real-graph ::= SEQUENCE {
2842 max REAL , -- top of graph
2843 min REAL , -- bottom of graph
2844 axis REAL , -- value to draw axis on
2845 values SEQUENCE OF REAL }
2846
2847 Int-graph ::= SEQUENCE {
2848 max INTEGER ,
2849 min INTEGER ,
2850 axis INTEGER ,
2851 values SEQUENCE OF INTEGER }
2852
2853 Byte-graph ::= SEQUENCE { -- integer from 0-255
2854 max INTEGER ,
2855 min INTEGER ,
2856 axis INTEGER ,
2857 values OCTET STRING }
2858
2859 END
2860
2861 --$Revision: 6.1 $
2862 --********************************************************************
2863 --
2864 -- Direct Submission of Sequence Data
2865 -- James Ostell, 1991
2866 --
2867 -- This is a trial specification for direct submission of sequence
2868 -- data worked out between NCBI and EMBL
2869 -- Later revised to reflect work with GenBank and Integrated database
2870 --
2871 -- Version 3.0, 1994
2872 -- This is the official NCBI sequence submission format now.
2873 --
2874 --********************************************************************
2875
2876 NCBI-Submit DEFINITIONS ::=
2877 BEGIN
2878
2879 EXPORTS Seq-submit, Contact-info;
2880
2881 IMPORTS Cit-sub, Author FROM NCBI-Biblio
2882 Date, Object-id FROM NCBI-General
2883 Seq-annot FROM NCBI-Sequence
2884 Seq-id FROM NCBI-Seqloc
2885 Seq-entry FROM NCBI-Seqset;
2886
2887 Seq-submit ::= SEQUENCE {
2888 sub Submit-block ,
2889 data CHOICE {
2890 entrys SET OF Seq-entry , -- sequence(s)
2891 annots SET OF Seq-annot , -- annotation(s)
2892 delete SET OF Seq-id } } -- deletions of entries
2893
2894 Submit-block ::= SEQUENCE {
2895 contact Contact-info , -- who to contact
2896 cit Cit-sub , -- citation for this submission
2897 hup BOOLEAN DEFAULT FALSE , -- hold until publish
2898 reldate Date OPTIONAL , -- release by date
2899 subtype INTEGER { -- type of submission
2900 new (1) , -- new data
2901 update (2) , -- update by author
2902 revision (3) , -- 3rd party (non-author) update
2903 other (255) } OPTIONAL ,
2904 tool VisibleString OPTIONAL, -- tool used to make submission
2905 user-tag VisibleString OPTIONAL, -- user supplied id for this submission
2906 comment VisibleString OPTIONAL } -- user comments/advice to database
2907
2908 Contact-info ::= SEQUENCE { -- who to contact to discuss the submission
2909 name VisibleString OPTIONAL , -- OBSOLETE: will be removed
2910 address SEQUENCE OF VisibleString OPTIONAL ,
2911 phone VisibleString OPTIONAL ,
2912 fax VisibleString OPTIONAL ,
2913 email VisibleString OPTIONAL ,
2914 telex VisibleString OPTIONAL ,
2915 owner-id Object-id OPTIONAL , -- for owner accounts
2916 password OCTET STRING OPTIONAL ,
2917 last-name VisibleString OPTIONAL , -- structured to replace name above
2918 first-name VisibleString OPTIONAL ,
2919 middle-initial VisibleString OPTIONAL ,
2920 contact Author OPTIONAL } -- WARNING: this will replace the above
2921
2922 END
2923
2924 --$Revision: 1.15 $
2925 --**********************************************************************
2926 --
2927 -- Definitions for Cn3D-specific data (rendering settings,
2928 -- user annotations, etc.)
2929 --
2930 -- by Paul Thiessen
2931 --
2932 -- National Center for Biotechnology Information
2933 -- National Institutes of Health
2934 -- Bethesda, MD 20894 USA
2935 --
2936 -- asntool -m cn3d.asn -w 100 -o cn3d.h
2937 -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
2938 -- -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
2939 --**********************************************************************
2940
2941 NCBI-Cn3d DEFINITIONS ::=
2942 -- Cn3D-specific information
2943
2944 BEGIN
2945
2946 EXPORTS Cn3d-style-dictionary, Cn3d-user-annotations;
2947
2948 IMPORTS Biostruc-id FROM MMDB
2949 Molecule-id, Residue-id FROM MMDB-Chemical-graph;
2950
2951
2952 -- values of enumerations must match those in cn3d/style_manager.hpp!
2953
2954 Cn3d-backbone-type ::= ENUMERATED { -- for different types of backbones
2955 off (1),
2956 trace (2),
2957 partial (3),
2958 complete (4)
2959 }
2960
2961 Cn3d-drawing-style ::= ENUMERATED { -- atom/bond/object rendering styles
2962 -- for atoms and bonds
2963 wire (1),
2964 tubes (2),
2965 ball-and-stick (3),
2966 space-fill (4),
2967 wire-worm (5),
2968 tube-worm (6),
2969 -- for 3d-objects
2970 with-arrows (7),
2971 without-arrows (8)
2972 }
2973
2974 Cn3d-color-scheme ::= ENUMERATED { -- available color schemes (not all
2975 -- necessarily applicable to all objects)
2976 element (1),
2977 object (2),
2978 molecule (3),
2979 domain (4),
2980 residue (20),
2981 secondary-structure (5),
2982 user-select (6),
2983 -- different alignment conservation coloring (currently only for proteins)
2984 aligned (7),
2985 identity (8),
2986 variety (9),
2987 weighted-variety (10),
2988 information-content (11),
2989 fit (12),
2990 block-fit (17),
2991 block-z-fit (18),
2992 block-row-fit (19),
2993 -- other schemes
2994 temperature (13),
2995 hydrophobicity (14),
2996 charge (15),
2997 rainbow (16)
2998 }
2999
3000 -- RGB triplet, interpreted (after division by the scale-factor) as floating
3001 -- point values which should range from [0..1]. The default scale-factor is
3002 -- 255, so that one can conveniently set integer byte values [0..255] for
3003 -- colors with the scale-factor already set appropriately to map to [0..1].
3004 -- An alpha value is allowed, but is currently ignored by Cn3D.
3005 Cn3d-color ::= SEQUENCE {
3006 scale-factor INTEGER DEFAULT 255,
3007 red INTEGER,
3008 green INTEGER,
3009 blue INTEGER,
3010 alpha INTEGER DEFAULT 255
3011 }
3012
3013 Cn3d-backbone-style ::= SEQUENCE { -- style blob for backbones only
3014 type Cn3d-backbone-type,
3015 style Cn3d-drawing-style,
3016 color-scheme Cn3d-color-scheme,
3017 user-color Cn3d-color
3018 }
3019
3020 Cn3d-general-style ::= SEQUENCE { -- style blob for other objects
3021 is-on BOOLEAN,
3022 style Cn3d-drawing-style,
3023 color-scheme Cn3d-color-scheme,
3024 user-color Cn3d-color
3025 }
3026
3027 Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
3028 spacing INTEGER, -- zero means none
3029 type ENUMERATED {
3030 one-letter (1),
3031 three-letter (2)
3032 },
3033 number ENUMERATED {
3034 none (0),
3035 sequential (1), -- from 1, by residues present, to match sequence
3036 pdb (2) -- use number assigned by PDB
3037 },
3038 termini BOOLEAN,
3039 white BOOLEAN -- all white, or (if false) color of alpha carbon
3040 }
3041
3042 -- rendering settings for Cn3D (mirrors StyleSettings class)
3043 Cn3d-style-settings ::= SEQUENCE {
3044 name VisibleString OPTIONAL, -- a name (for favorites)
3045 protein-backbone Cn3d-backbone-style, -- backbone styles
3046 nucleotide-backbone Cn3d-backbone-style,
3047 protein-sidechains Cn3d-general-style, -- styles for other stuff
3048 nucleotide-sidechains Cn3d-general-style,
3049 heterogens Cn3d-general-style,
3050 solvents Cn3d-general-style,
3051 connections Cn3d-general-style,
3052 helix-objects Cn3d-general-style,
3053 strand-objects Cn3d-general-style,
3054 virtual-disulfides-on BOOLEAN, -- virtual disulfides
3055 virtual-disulfide-color Cn3d-color,
3056 hydrogens-on BOOLEAN, -- hydrogens
3057 background-color Cn3d-color, -- background
3058 -- floating point parameters - scale-factor applies to all the following:
3059 scale-factor INTEGER,
3060 space-fill-proportion INTEGER,
3061 ball-radius INTEGER,
3062 stick-radius INTEGER,
3063 tube-radius INTEGER,
3064 tube-worm-radius INTEGER,
3065 helix-radius INTEGER,
3066 strand-width INTEGER,
3067 strand-thickness INTEGER,
3068 -- backbone labels (no labels if not present)
3069 protein-labels Cn3d-backbone-label-style OPTIONAL,
3070 nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
3071 -- ion labels
3072 ion-labels BOOLEAN OPTIONAL
3073 }
3074
3075 Cn3d-style-settings-set ::= SET OF Cn3d-style-settings
3076
3077 Cn3d-style-table-id ::= INTEGER
3078
3079 Cn3d-style-table-item ::= SEQUENCE {
3080 id Cn3d-style-table-id,
3081 style Cn3d-style-settings
3082 }
3083
3084 -- the global settings, and a lookup table of styles for user annotations.
3085 Cn3d-style-dictionary ::= SEQUENCE {
3086 global-style Cn3d-style-settings,
3087 style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
3088 }
3089
3090 -- a range of residues in a chain, identified by MMDB residue-id
3091 -- (e.g., numbered from 1)
3092 Cn3d-residue-range ::= SEQUENCE {
3093 from Residue-id,
3094 to Residue-id
3095 }
3096
3097 -- set of locations on a particular chain
3098 Cn3d-molecule-location ::= SEQUENCE {
3099 molecule-id Molecule-id, -- MMDB molecule id
3100 -- which residues; whole molecule implied if absent
3101 residues SEQUENCE OF Cn3d-residue-range OPTIONAL
3102 }
3103
3104 -- set of locations on a particular structure object (e.g., a PDB/MMDB
3105 -- structure), which may include multiple ranges of residues each on
3106 -- multiple chains.
3107 Cn3d-object-location ::= SEQUENCE {
3108 structure-id Biostruc-id,
3109 residues SEQUENCE OF Cn3d-molecule-location
3110 }
3111
3112 -- information for an individual user annotation
3113 Cn3d-user-annotation ::= SEQUENCE {
3114 name VisibleString, -- a (short) name for this annotation
3115 description VisibleString OPTIONAL, -- an optional longer description
3116 style-id Cn3d-style-table-id, -- how to draw this annotation
3117 residues SEQUENCE OF Cn3d-object-location, -- which residues to cover
3118 is-on BOOLEAN -- whether this annotation is to be turned on in Cn3D
3119 }
3120
3121 -- a GL-ordered transformation matrix
3122 Cn3d-GL-matrix ::= SEQUENCE {
3123 m0 REAL, m1 REAL, m2 REAL, m3 REAL,
3124 m4 REAL, m5 REAL, m6 REAL, m7 REAL,
3125 m8 REAL, m9 REAL, m10 REAL, m11 REAL,
3126 m12 REAL, m13 REAL, m14 REAL, m15 REAL
3127 }
3128
3129 -- a floating point 3d vector
3130 Cn3d-vector ::= SEQUENCE {
3131 x REAL,
3132 y REAL,
3133 z REAL
3134 }
3135
3136 -- parameters used to set up the camera in Cn3D
3137 Cn3d-view-settings ::= SEQUENCE {
3138 camera-distance REAL, -- camera on +Z axis this distance from origin
3139 camera-angle-rad REAL, -- camera angle
3140 camera-look-at-X REAL, -- X,Y of point in Z=0 plane camera points at
3141 camera-look-at-Y REAL,
3142 camera-clip-near REAL, -- distance of clipping planes from camera
3143 camera-clip-far REAL,
3144 matrix Cn3d-GL-matrix, -- transformation of objects in the scene
3145 rotation-center Cn3d-vector -- center of rotation of whole scene
3146 }
3147
3148 -- The list of annotations for a given CDD/mime. If residue regions overlap
3149 -- between annotations that are turned on, the last annotation in this list
3150 -- that contains these residues will be used as the display style for these
3151 -- residues.
3152 -- Also contains the current viewpoint, so that user's camera angle
3153 -- can be stored and reproduced, for illustrations, on-line figures, etc.
3154 Cn3d-user-annotations ::= SEQUENCE {
3155 annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
3156 view Cn3d-view-settings OPTIONAL
3157 }
3158
3159 END
3160
3161 --$Revision: 6.3 $
3162 --****************************************************************
3163 --
3164 -- NCBI Project Definition Module
3165 -- by Jim Ostell and Jonathan Kans, 1998
3166 --
3167 --****************************************************************
3168
3169 NCBI-Project DEFINITIONS ::=
3170 BEGIN
3171
3172 EXPORTS Project, Project-item;
3173
3174 IMPORTS Date FROM NCBI-General
3175 PubMedId FROM NCBI-Biblio
3176 Seq-id, Seq-loc FROM NCBI-Seqloc
3177 Seq-annot, Pubdesc FROM NCBI-Sequence
3178 Seq-entry FROM NCBI-Seqset
3179 Pubmed-entry FROM NCBI-PubMed;
3180
3181 Project ::= SEQUENCE {
3182 descr Project-descr OPTIONAL ,
3183 data Project-item }
3184
3185 Project-item ::= CHOICE {
3186 pmuid SET OF INTEGER ,
3187 protuid SET OF INTEGER ,
3188 nucuid SET OF INTEGER ,
3189 sequid SET OF INTEGER ,
3190 genomeuid SET OF INTEGER ,
3191 structuid SET OF INTEGER ,
3192 pmid SET OF PubMedId ,
3193 protid SET OF Seq-id ,
3194 nucid SET OF Seq-id ,
3195 seqid SET OF Seq-id ,
3196 genomeid SET OF Seq-id ,
3197 structid NULL ,
3198 pment SET OF Pubmed-entry ,
3199 protent SET OF Seq-entry ,
3200 nucent SET OF Seq-entry ,
3201 seqent SET OF Seq-entry ,
3202 genomeent SET OF Seq-entry ,
3203 structent NULL ,
3204 seqannot SET OF Seq-annot ,
3205 loc SET OF Seq-loc ,
3206 proj SET OF Project
3207 }
3208
3209 Project-descr ::= SEQUENCE {
3210 id SET OF Project-id ,
3211 name VisibleString OPTIONAL ,
3212 descr SET OF Projdesc OPTIONAL }
3213
3214 Projdesc ::= CHOICE {
3215 pub Pubdesc ,
3216 date Date ,
3217 comment VisibleString ,
3218 title VisibleString
3219 }
3220
3221 Project-id ::= VisibleString
3222
3223 END
3224
3225
3226 --$Revision: 6.0 $
3227 --*********************************************************************
3228 --
3229 -- access.asn
3230 --
3231 -- messages for data access
3232 --
3233 --*********************************************************************
3234
3235 NCBI-Access DEFINITIONS ::=
3236 BEGIN
3237
3238 EXPORTS Link-set;
3239
3240 -- links between same class = neighbors
3241 -- links between other classes = links
3242
3243 Link-set ::= SEQUENCE {
3244 num INTEGER , -- number of links to this doc type
3245 uids SEQUENCE OF INTEGER OPTIONAL , -- the links
3246 weights SEQUENCE OF INTEGER OPTIONAL } -- the weights
3247
3248
3249 END
3250 --$Revision: 6.0 $
3251 --**********************************************************************
3252 --
3253 -- NCBI Sequence Feature Definition Module
3254 -- by James Ostell, 1994
3255 --
3256 --**********************************************************************
3257
3258 NCBI-FeatDef DEFINITIONS ::=
3259 BEGIN
3260
3261 EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
3262
3263
3264 FeatDef ::= SEQUENCE {
3265 typelabel VisibleString , -- short label for type eg "CDS"
3266 menulabel VisibleString , -- label for a menu eg "Coding Region"
3267 featdef-key INTEGER , -- unique for this feature definition
3268 seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h
3269 entrygroup INTEGER , -- Group for data entry
3270 displaygroup INTEGER , -- Group for data display
3271 molgroup FeatMolType -- Type of Molecule used for
3272 }
3273
3274 FeatMolType ::= ENUMERATED {
3275 aa (1), -- proteins
3276 na (2), -- nucleic acids
3277 both (3) } -- both
3278
3279 FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions
3280
3281 FeatDispGroup ::= SEQUENCE {
3282 groupkey INTEGER ,
3283 groupname VisibleString }
3284
3285 FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
3286
3287 FeatDefGroupSet ::= SEQUENCE {
3288 groups FeatDispGroupSet ,
3289 defs FeatDefSet }
3290
3291 END
3292
3293
3294 --$Revision: 6.12 $
3295 --****************************************************************
3296 --
3297 -- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
3298 -- by Jonathan Epstein, February 1996
3299 --
3300 --****************************************************************
3301
3302 NCBI-Mime DEFINITIONS ::=
3303 BEGIN
3304
3305 EXPORTS Ncbi-mime-asn1;
3306 IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
3307 Cdd FROM NCBI-Cdd
3308 Seq-entry FROM NCBI-Seqset
3309 Seq-annot FROM NCBI-Sequence
3310 Medline-entry FROM NCBI-Medline
3311 Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;
3312
3313 Ncbi-mime-asn1 ::= CHOICE {
3314 entrez Entrez-general, -- just a structure
3315 alignstruc Biostruc-align, -- structures & sequences & alignments
3316 alignseq Biostruc-align-seq, -- sequence alignment
3317 strucseq Biostruc-seq, -- structure & sequences
3318 strucseqs Biostruc-seqs, -- structure & sequences & alignments
3319 general Biostruc-seqs-aligns-cdd -- all-purpose "grab bag"
3320 -- others may be added here in the future
3321 }
3322
3323 -- generic bundle of sequence and alignment info
3324 Bundle-seqs-aligns ::= SEQUENCE {
3325 sequences SET OF Seq-entry OPTIONAL, -- sequences
3326 seqaligns SET OF Seq-annot OPTIONAL, -- sequence alignments
3327 strucaligns Biostruc-annot-set OPTIONAL, -- structure alignments
3328 imports SET OF Seq-annot OPTIONAL, -- imports (updates in Cn3D)
3329 style-dictionary Cn3d-style-dictionary OPTIONAL, -- Cn3D stuff
3330 user-annotations Cn3d-user-annotations OPTIONAL
3331 }
3332
3333 Biostruc-seqs-aligns-cdd ::= SEQUENCE {
3334 seq-align-data CHOICE {
3335 bundle Bundle-seqs-aligns, -- either seqs + alignments
3336 cdd Cdd -- or CDD (which contains these)
3337 },
3338 structures SET OF Biostruc OPTIONAL, -- structures
3339 structure-type ENUMERATED { -- type of structures to load if
3340 ncbi-backbone(2), -- not present; meanings and
3341 ncbi-all-atom(3), -- values are same as MMDB's
3342 pdb-model(4) -- Model-type
3343 } OPTIONAL
3344 }
3345
3346 Biostruc-align ::= SEQUENCE {
3347 master Biostruc,
3348 slaves SET OF Biostruc,
3349 alignments Biostruc-annot-set, -- structure alignments
3350 sequences SET OF Seq-entry, -- sequences
3351 seqalign SET OF Seq-annot,
3352 style-dictionary Cn3d-style-dictionary OPTIONAL,
3353 user-annotations Cn3d-user-annotations OPTIONAL
3354 }
3355
3356 Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only
3357 sequences SET OF Seq-entry, -- sequences
3358 seqalign SET OF Seq-annot,
3359 style-dictionary Cn3d-style-dictionary OPTIONAL,
3360 user-annotations Cn3d-user-annotations OPTIONAL
3361 }
3362
3363 Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli
3364 structure Biostruc,
3365 sequences SET OF Seq-entry,
3366 style-dictionary Cn3d-style-dictionary OPTIONAL,
3367 user-annotations Cn3d-user-annotations OPTIONAL
3368 }
3369
3370 Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
3371 structure Biostruc,
3372 sequences SET OF Seq-entry, -- sequences
3373 seqalign SET OF Seq-annot,
3374 style-dictionary Cn3d-style-dictionary OPTIONAL,
3375 user-annotations Cn3d-user-annotations OPTIONAL
3376 }
3377
3378 Entrez-style ::= ENUMERATED {
3379 docsum (1),
3380 genbank (2) ,
3381 genpept (3) ,
3382 fasta (4) ,
3383 asn1 (5) ,
3384 graphic (6) ,
3385 alignment (7) ,
3386 globalview (8) ,
3387 report (9) ,
3388 medlars (10) ,
3389 embl (11) ,
3390 pdb (12) ,
3391 kinemage (13) }
3392
3393 Entrez-general ::= SEQUENCE {
3394 title VisibleString OPTIONAL,
3395 data CHOICE {
3396 ml Medline-entry ,
3397 prot Seq-entry ,
3398 nuc Seq-entry ,
3399 genome Seq-entry ,
3400 structure Biostruc ,
3401 strucAnnot Biostruc-annot-set } ,
3402 style Entrez-style ,
3403 location VisibleString OPTIONAL }
3404 END
3405 --$Revision: 6.0 $
3406 --********************************************************************
3407 --
3408 -- Print Templates
3409 -- James Ostell, 1993
3410 --
3411 --
3412 --********************************************************************
3413
3414 NCBI-ObjPrt DEFINITIONS ::=
3415 BEGIN
3416
3417 EXPORTS PrintTemplate, PrintTemplateSet;
3418
3419 PrintTemplate ::= SEQUENCE {
3420 name TemplateName , -- name for this template
3421 labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from
3422 format PrintFormat }
3423
3424 TemplateName ::= VisibleString
3425
3426 PrintTemplateSet ::= SEQUENCE OF PrintTemplate
3427
3428 PrintFormat ::= SEQUENCE {
3429 asn1 VisibleString , -- ASN.1 partial path for this
3430 label VisibleString OPTIONAL , -- printable label
3431 prefix VisibleString OPTIONAL,
3432 suffix VisibleString OPTIONAL,
3433 form PrintForm }
3434
3435 PrintForm ::= CHOICE { -- Forms for various ASN.1 components
3436 block PrintFormBlock,
3437 boolean PrintFormBoolean,
3438 enum PrintFormEnum,
3439 text PrintFormText,
3440 use-template TemplateName,
3441 user UserFormat ,
3442 null NULL } -- rarely used
3443
3444 UserFormat ::= SEQUENCE {
3445 printfunc VisibleString ,
3446 defaultfunc VisibleString OPTIONAL }
3447
3448 PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET
3449 separator VisibleString OPTIONAL ,
3450 components SEQUENCE OF PrintFormat }
3451
3452 PrintFormBoolean ::= SEQUENCE {
3453 true VisibleString OPTIONAL ,
3454 false VisibleString OPTIONAL }
3455
3456 PrintFormEnum ::= SEQUENCE {
3457 values SEQUENCE OF VisibleString OPTIONAL }
3458
3459 PrintFormText ::= SEQUENCE {
3460 textfunc VisibleString OPTIONAL }
3461
3462 END
3463
3464 --$Revision: 6.7 $
3465 --*********************************************************
3466 --
3467 -- ASN.1 and XML for the components of a GenBank format sequence
3468 -- J.Ostell 2002
3469 -- Updated 15 January 2009
3470 --
3471 --*********************************************************
3472
3473 NCBI-GBSeq DEFINITIONS ::=
3474 BEGIN
3475
3476 --********
3477 -- GBSeq represents the elements in a GenBank style report
3478 -- of a sequence with some small additions to structure and support
3479 -- for protein (GenPept) versions of GenBank format as seen in
3480 -- Entrez. While this represents the simplification, reduction of
3481 -- detail, and flattening to a single sequence perspective of GenBank
3482 -- format (compared with the full ASN.1 or XML from which GenBank and
3483 -- this format is derived at NCBI), it is presented in ASN.1 or XML for
3484 -- automated parsing and processing. It is hoped that this compromise
3485 -- will be useful for those bulk processing at the GenBank format level
3486 -- of detail today. Since it is a compromise, a number of pragmatic
3487 -- decisions have been made.
3488 --
3489 -- In pursuit of simplicity and familiarity a number of
3490 -- fields do not have full substructure defined here where there is
3491 -- already a standard GenBank format string. For example:
3492 --
3493 -- Date DD-Mon-YYYY
3494 -- Authors LastName, Intials (with periods)
3495 -- Journal JounalName Volume (issue), page-range (year)
3496 -- FeatureLocations as per GenBank feature table, but FeatureIntervals
3497 -- may also be provided as a convenience
3498 -- FeatureQualifiers as per GenBank feature table
3499 -- Primary has a string that represents a table to construct
3500 -- a third party (TPA) sequence.
3501 -- other-seqids can have strings with the "vertical bar format" sequence
3502 -- identifiers used in BLAST for example, when they are non-genbank types.
3503 -- Currently in GenBank format you only see GI, but there are others, like
3504 -- patents, submitter clone names, etc which will appear here, as they
3505 -- always have in the ASN.1 format, and full XML format.
3506 -- source-db is a formatted text block for peptides in GenPept format that
3507 -- carries information from the source protein database.
3508 --
3509 -- There are also a number of elements that could have been
3510 -- more exactly specified, but in the interest of simplicity
3511 -- have been simply left as options. For example..
3512 --
3513 -- accession and accession.version will always appear in a GenBank record
3514 -- they are optional because this format can also be used for non-GenBank
3515 -- sequences, and in that case will have only "other-seqids".
3516 --
3517 -- sequences will normally all have "sequence" filled in. But contig records
3518 -- will have a "join" statement in the "contig" slot, and no "sequence".
3519 -- We also may consider a retrieval option with no sequence of any kind
3520 -- and no feature table to quickly check minimal values.
3521 --
3522 -- a reference may have an author list, or be from a consortium, or both.
3523 --
3524 -- some fields, such as taxonomy, do appear as separate elements in GenBank
3525 -- format but without a specific linetype (in GenBank format this comes
3526 -- under ORGANISM). Another example is the separation of primary accession
3527 -- from the list of secondary accessions. In GenBank format primary
3528 -- accession is just the first one on the list that includes all secondaries
3529 -- after it.
3530 --
3531 -- create-date deserves special comment. The date you see on the right hand
3532 -- side of the LOCUS line in GenBank format is actually the last date the
3533 -- the record was modified (or the update-date). The date the record was
3534 -- first submitted to GenBank appears in the first submission citation in
3535 -- the reference section. Internally in the databases and ASN.1 NCBI keeps
3536 -- the first date the record was released into the sequence database at
3537 -- NCBI as create-date. For records from EMBL, which supports create-date,
3538 -- it is the date provided by EMBL. For DDBJ records, which do not supply
3539 -- a create-date (same as GenBank format) the create-date is the first date
3540 -- NCBI saw the record from DDBJ. For older GenBank records, before NCBI
3541 -- took responsibility for GenBank, it is just the first date NCBI saw the
3542 -- record. Create-date can be very useful, so we expose it here, but users
3543 -- must understand it is only an approximation and comes from many sources,
3544 -- and with many exceptions and caveats. It does NOT tell you the first
3545 -- date the public might have seen this record and thus is NOT an accurate
3546 -- measure for legal issues of precedence.
3547 --
3548 --********
3549
3550 GBSet ::= SEQUENCE OF GBSeq
3551
3552 GBSeq ::= SEQUENCE {
3553 locus VisibleString ,
3554 length INTEGER ,
3555 strandedness VisibleString OPTIONAL ,
3556 moltype VisibleString ,
3557 topology VisibleString OPTIONAL ,
3558 division VisibleString ,
3559 update-date VisibleString ,
3560 create-date VisibleString OPTIONAL ,
3561 update-release VisibleString OPTIONAL ,
3562 create-release VisibleString OPTIONAL ,
3563 definition VisibleString ,
3564 primary-accession VisibleString OPTIONAL ,
3565 entry-version VisibleString OPTIONAL ,
3566 accession-version VisibleString OPTIONAL ,
3567 other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
3568 secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
3569 project VisibleString OPTIONAL ,
3570 keywords SEQUENCE OF GBKeyword OPTIONAL ,
3571 segment VisibleString OPTIONAL ,
3572 source VisibleString OPTIONAL ,
3573 organism VisibleString OPTIONAL ,
3574 taxonomy VisibleString OPTIONAL ,
3575 references SEQUENCE OF GBReference OPTIONAL ,
3576 comment VisibleString OPTIONAL ,
3577 tagset GBTagset OPTIONAL ,
3578 primary VisibleString OPTIONAL ,
3579 source-db VisibleString OPTIONAL ,
3580 database-reference VisibleString OPTIONAL ,
3581 feature-table SEQUENCE OF GBFeature OPTIONAL ,
3582 sequence VisibleString OPTIONAL , -- Optional for other dump forms
3583 contig VisibleString OPTIONAL
3584 }
3585
3586 GBSecondary-accn ::= VisibleString
3587
3588 GBSeqid ::= VisibleString
3589
3590 GBKeyword ::= VisibleString
3591
3592 GBAuthor ::= VisibleString
3593
3594 GBReference ::= SEQUENCE {
3595 reference VisibleString ,
3596 position VisibleString OPTIONAL ,
3597 authors SEQUENCE OF GBAuthor OPTIONAL ,
3598 consortium VisibleString OPTIONAL ,
3599 title VisibleString OPTIONAL ,
3600 journal VisibleString ,
3601 xref SET OF GBXref OPTIONAL ,
3602 pubmed INTEGER OPTIONAL ,
3603 remark VisibleString OPTIONAL
3604 }
3605
3606 GBXref ::= SEQUENCE {
3607 dbname VisibleString ,
3608 id VisibleString
3609 }
3610
3611 GBTagset ::= SEQUENCE {
3612 authority VisibleString OPTIONAL ,
3613 version VisibleString OPTIONAL ,
3614 url VisibleString OPTIONAL ,
3615 tags GBTags OPTIONAL
3616 }
3617
3618 GBTags ::= SEQUENCE OF GBTag
3619
3620 GBTag ::= SEQUENCE {
3621 name VisibleString OPTIONAL ,
3622 value VisibleString OPTIONAL ,
3623 unit VisibleString OPTIONAL
3624 }
3625
3626 GBFeature ::= SEQUENCE {
3627 key VisibleString ,
3628 location VisibleString ,
3629 intervals SEQUENCE OF GBInterval OPTIONAL ,
3630 operator VisibleString OPTIONAL ,
3631 partial5 BOOLEAN OPTIONAL ,
3632 partial3 BOOLEAN OPTIONAL ,
3633 quals SEQUENCE OF GBQualifier OPTIONAL
3634 }
3635
3636 GBInterval ::= SEQUENCE {
3637 from INTEGER OPTIONAL ,
3638 to INTEGER OPTIONAL ,
3639 point INTEGER OPTIONAL ,
3640 iscomp BOOLEAN OPTIONAL ,
3641 interbp BOOLEAN OPTIONAL ,
3642 accession VisibleString
3643 }
3644
3645 GBQualifier ::= SEQUENCE {
3646 name VisibleString ,
3647 value VisibleString OPTIONAL
3648 }
3649
3650 GBTagsetRules ::= SEQUENCE {
3651 authority VisibleString OPTIONAL ,
3652 version VisibleString OPTIONAL ,
3653 mandatorytags GBTagNames OPTIONAL ,
3654 optionaltags GBTagNames OPTIONAL ,
3655 uniquetags GBTagNames OPTIONAL ,
3656 extensible BOOLEAN OPTIONAL
3657 }
3658
3659 GBTagNames ::= SEQUENCE OF VisibleString
3660
3661 GBTagsetRuleSet ::= SEQUENCE OF GBTagsetRules
3662
3663 END
3664
3665 --$Revision: 1.7 $
3666 --************************************************************************
3667 --
3668 -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
3669 -- The International Nucleotide Sequence Database (INSD) collaboration
3670 -- Version 1.5, 15 January 2009
3671 --
3672 --************************************************************************
3673
3674 INSD-INSDSeq DEFINITIONS ::=
3675 BEGIN
3676
3677 -- INSDSeq provides the elements of a sequence as presented in the
3678 -- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
3679 -- additional structure.
3680 -- Although this single perspective of the three flatfile formats
3681 -- provides a useful simplification, it hides to some extent the
3682 -- details of the actual data underlying those formats. Nevertheless,
3683 -- the XML version of INSD-Seq is being provided with
3684 -- the hopes that it will prove useful to those who bulk-process
3685 -- sequence data at the flatfile-format level of detail. Further
3686 -- documentation regarding the content and conventions of those formats
3687 -- can be found at:
3688 --
3689 -- URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
3690 -- http://www.ddbj.nig.ac.jp/FT/full_index.html
3691 -- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
3692 -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
3693 --
3694 -- URLs for DDBJ, EMBL, and GenBank Release Notes :
3695 -- ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
3696 -- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
3697 -- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
3698 --
3699 -- Because INSDSeq is a compromise, a number of pragmatic decisions have
3700 -- been made:
3701 --
3702 -- In pursuit of simplicity and familiarity a number of fields do not
3703 -- have full substructure defined here where there is already a
3704 -- standard flatfile format string. For example:
3705 --
3706 -- Dates: DD-MON-YYYY (eg 10-JUN-2003)
3707 --
3708 -- Author: LastName, Initials (eg Smith, J.N.)
3709 -- or Lastname Initials (eg Smith J.N.)
3710 --
3711 -- Journal: JournalName Volume (issue), page-range (year)
3712 -- or JournalName Volume(issue):page-range(year)
3713 -- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
3714 -- Appl. Environ. Microbiol. 61(4):1646-1648(1995).
3715 --
3716 -- FeatureLocations are representated as in the flatfile feature table,
3717 -- but FeatureIntervals may also be provided as a convenience
3718 --
3719 -- FeatureQualifiers are represented as in the flatfile feature table.
3720 --
3721 -- Primary has a string that represents a table to construct
3722 -- a third party (TPA) sequence.
3723 --
3724 -- other-seqids can have strings with the "vertical bar format" sequence
3725 -- identifiers used in BLAST for example, when they are non-INSD types.
3726 --
3727 -- Currently in flatfile format you only see Accession numbers, but there
3728 -- are others, like patents, submitter clone names, etc which will
3729 -- appear here
3730 --
3731 -- There are also a number of elements that could have been more exactly
3732 -- specified, but in the interest of simplicity have been simply left as
3733 -- optional. For example:
3734 --
3735 -- All publicly accessible sequence records in INSDSeq format will
3736 -- include accession and accession.version. However, these elements are
3737 -- optional in optional in INSDSeq so that this format can also be used
3738 -- for non-public sequence data, prior to the assignment of accessions and
3739 -- version numbers. In such cases, records will have only "other-seqids".
3740 --
3741 -- sequences will normally all have "sequence" filled in. But contig records
3742 -- will have a "join" statement in the "contig" slot, and no "sequence".
3743 -- We also may consider a retrieval option with no sequence of any kind
3744 -- and no feature table to quickly check minimal values.
3745 --
3746 -- Four (optional) elements are specific to records represented via the EMBL
3747 -- sequence database: INSDSeq_update-release, INSDSeq_create-release,
3748 -- INSDSeq_entry-version, and INSDSeq_database-reference.
3749 --
3750 -- One (optional) element is specific to records originating at the GenBank
3751 -- and DDBJ sequence databases: INSDSeq_segment.
3752 --
3753 --********
3754
3755 INSDSet ::= SEQUENCE OF INSDSeq
3756
3757 INSDSeq ::= SEQUENCE {
3758 locus VisibleString ,
3759 length INTEGER ,
3760 strandedness VisibleString OPTIONAL ,
3761 moltype VisibleString ,
3762 topology VisibleString OPTIONAL ,
3763 division VisibleString ,
3764 update-date VisibleString ,
3765 create-date VisibleString OPTIONAL ,
3766 update-release VisibleString OPTIONAL ,
3767 create-release VisibleString OPTIONAL ,
3768 definition VisibleString ,
3769 primary-accession VisibleString OPTIONAL ,
3770 entry-version VisibleString OPTIONAL ,
3771 accession-version VisibleString OPTIONAL ,
3772 other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
3773 secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
3774 project VisibleString OPTIONAL ,
3775 keywords SEQUENCE OF INSDKeyword OPTIONAL ,
3776 segment VisibleString OPTIONAL ,
3777 source VisibleString OPTIONAL ,
3778 organism VisibleString OPTIONAL ,
3779 taxonomy VisibleString OPTIONAL ,
3780 references SEQUENCE OF INSDReference OPTIONAL ,
3781 comment VisibleString OPTIONAL ,
3782 tagset INSDTagset OPTIONAL ,
3783 primary VisibleString OPTIONAL ,
3784 source-db VisibleString OPTIONAL ,
3785 database-reference VisibleString OPTIONAL ,
3786 feature-table SEQUENCE OF INSDFeature OPTIONAL ,
3787 sequence VisibleString OPTIONAL , -- Optional for other dump forms
3788 contig VisibleString OPTIONAL
3789 }
3790
3791 INSDSeqid ::= VisibleString
3792
3793 INSDSecondary-accn ::= VisibleString
3794
3795 INSDKeyword ::= VisibleString
3796
3797 -- INSDReference_position contains a string value indicating the
3798 -- basepair span(s) to which a reference applies. The allowable
3799 -- formats are:
3800 --
3801 -- X..Y : Where X and Y are integers separated by two periods,
3802 -- X >= 1 , Y <= sequence length, and X <= Y
3803 --
3804 -- Multiple basepair spans can exist, separated by a
3805 -- semi-colon and a space. For example : 10..20; 100..500
3806 --
3807 -- sites : The string literal 'sites', indicating that a reference
3808 -- provides sequence annotation information, but the specific
3809 -- basepair spans are either not captured, or were too numerous
3810 -- to record.
3811 --
3812 -- The 'sites' literal string is singly occuring, and
3813 -- cannot be used in conjunction with any X..Y basepair spans.
3814 --
3815 -- References that lack an INSDReference_position element apply
3816 -- to the entire sequence.
3817
3818 INSDAuthor ::= VisibleString
3819
3820 INSDReference ::= SEQUENCE {
3821 reference VisibleString ,
3822 position VisibleString OPTIONAL ,
3823 authors SEQUENCE OF INSDAuthor OPTIONAL ,
3824 consortium VisibleString OPTIONAL ,
3825 title VisibleString OPTIONAL ,
3826 journal VisibleString ,
3827 xref SET OF INSDXref OPTIONAL ,
3828 pubmed INTEGER OPTIONAL ,
3829 remark VisibleString OPTIONAL
3830 }
3831
3832 -- INSDXref provides a method for referring to records in
3833 -- other databases. INSDXref_dbname is a string value that
3834 -- provides the name of the database, and INSDXref_dbname
3835 -- is a string value that provides the record's identifier
3836 -- in that database.
3837
3838 INSDXref ::= SEQUENCE {
3839 dbname VisibleString ,
3840 id VisibleString
3841 }
3842
3843 -- INSDTagset is used for community-specific data elements
3844 -- in a tag/value format.
3845
3846 INSDTagset ::= SEQUENCE {
3847 authority VisibleString OPTIONAL ,
3848 version VisibleString OPTIONAL ,
3849 url VisibleString OPTIONAL ,
3850 tags INSDTags OPTIONAL
3851 }
3852
3853 INSDTags ::= SEQUENCE OF INSDTag
3854
3855 INSDTag ::= SEQUENCE {
3856 name VisibleString OPTIONAL ,
3857 value VisibleString OPTIONAL ,
3858 unit VisibleString OPTIONAL
3859 }
3860
3861 -- INSDFeature_operator contains a string value describing
3862 -- the relationship among a set of INSDInterval within
3863 -- INSDFeature_intervals. The allowable formats are:
3864 --
3865 -- join : The string literal 'join' indicates that the
3866 -- INSDInterval intervals are biologically joined
3867 -- together into a contiguous molecule.
3868 --
3869 -- order : The string literal 'order' indicates that the
3870 -- INSDInterval intervals are in the presented
3871 -- order, but they are not necessarily contiguous.
3872 --
3873 -- Either 'join' or 'order' is required if INSDFeature_intervals
3874 -- is comprised of more than one INSDInterval .
3875
3876 INSDFeature ::= SEQUENCE {
3877 key VisibleString ,
3878 location VisibleString ,
3879 intervals SEQUENCE OF INSDInterval OPTIONAL ,
3880 operator VisibleString OPTIONAL ,
3881 partial5 BOOLEAN OPTIONAL ,
3882 partial3 BOOLEAN OPTIONAL ,
3883 quals SEQUENCE OF INSDQualifier OPTIONAL
3884 }
3885
3886 -- INSDInterval_iscomp is a boolean indicating whether
3887 -- an INSDInterval_from / INSDInterval_to location
3888 -- represents a location on the complement strand.
3889 -- When INSDInterval_iscomp is TRUE, it essentially
3890 -- confirms that a 'from' value which is greater than
3891 -- a 'to' value is intentional, because the location
3892 -- is on the opposite strand of the presented sequence.
3893
3894 -- INSDInterval_interbp is a boolean indicating whether
3895 -- a feature (such as a restriction site) is located
3896 -- between two adjacent basepairs. When INSDInterval_iscomp
3897 -- is TRUE, the 'from' and 'to' values must differ by
3898 -- exactly one base.
3899
3900 INSDInterval ::= SEQUENCE {
3901 from INTEGER OPTIONAL ,
3902 to INTEGER OPTIONAL ,
3903 point INTEGER OPTIONAL ,
3904 iscomp BOOLEAN OPTIONAL ,
3905 interbp BOOLEAN OPTIONAL ,
3906 accession VisibleString
3907 }
3908
3909 INSDQualifier ::= SEQUENCE {
3910 name VisibleString ,
3911 value VisibleString OPTIONAL
3912 }
3913
3914 -- INSDTagsetRules defines mandatory, optional, and unique tags
3915 -- for a given community's INSDTagset. If the tagset is extensible,
3916 -- then additional tags which are not included in the list of
3917 -- mandatory or optional tags may be present. The uniquetags
3918 -- element provides a list of the tags that may occur only once
3919 -- in a given tagset.
3920
3921 INSDTagsetRules ::= SEQUENCE {
3922 authority VisibleString OPTIONAL ,
3923 version VisibleString OPTIONAL ,
3924 mandatorytags INSDTagNames OPTIONAL ,
3925 optionaltags INSDTagNames OPTIONAL ,
3926 uniquetags INSDTagNames OPTIONAL ,
3927 extensible BOOLEAN OPTIONAL
3928 }
3929
3930 INSDTagNames ::= SEQUENCE OF VisibleString
3931
3932 INSDTagsetRuleSet ::= SEQUENCE OF INSDTagsetRules
3933
3934 END
3935
3936 --$Revision: 6.1 $
3937 --**********************************************************************
3938 --
3939 -- ASN.1 for a tiny Bioseq in XML
3940 -- basically a structured FASTA file with a few extras
3941 -- in this case we drop all modularity of components
3942 -- All ids are Optional - simpler structure, less checking
3943 -- Components of organism are hard coded - can't easily add or change
3944 -- sequence is just string whether DNA or protein
3945 -- by James Ostell, 2000
3946 --
3947 --**********************************************************************
3948
3949 NCBI-TSeq DEFINITIONS ::=
3950 BEGIN
3951
3952 TSeq ::= SEQUENCE {
3953 seqtype ENUMERATED {
3954 nucleotide (1),
3955 protein (2) },
3956 gi INTEGER OPTIONAL,
3957 accver VisibleString OPTIONAL,
3958 sid VisibleString OPTIONAL,
3959 local VisibleString OPTIONAL,
3960 taxid INTEGER OPTIONAL,
3961 orgname VisibleString OPTIONAL,
3962 defline VisibleString,
3963 length INTEGER,
3964 sequence VisibleString }
3965
3966 TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them
3967
3968 END
3969
3970 --$Id: scoremat.asn,v 1.12 2008/04/15 15:55:45 kazimird Exp $
3971 -- ===========================================================================
3972 --
3973 -- PUBLIC DOMAIN NOTICE
3974 -- National Center for Biotechnology Information
3975 --
3976 -- This software/database is a "United States Government Work" under the
3977 -- terms of the United States Copyright Act. It was written as part of
3978 -- the author's official duties as a United States Government employee and
3979 -- thus cannot be copyrighted. This software/database is freely available
3980 -- to the public for use. The National Library of Medicine and the U.S.
3981 -- Government have not placed any restriction on its use or reproduction.
3982 --
3983 -- Although all reasonable efforts have been taken to ensure the accuracy
3984 -- and reliability of the software and data, the NLM and the U.S.
3985 -- Government do not and cannot warrant the performance or results that
3986 -- may be obtained by using this software or data. The NLM and the U.S.
3987 -- Government disclaim all warranties, express or implied, including
3988 -- warranties of performance, merchantability or fitness for any particular
3989 -- purpose.
3990 --
3991 -- Please cite the author in any work or product based on this material.
3992 --
3993 -- ===========================================================================
3994 --
3995 -- Author: Christiam Camacho
3996 --
3997 -- File Description:
3998 -- ASN.1 definitions for scoring matrix
3999 --
4000 -- ===========================================================================
4001
4002 NCBI-ScoreMat DEFINITIONS ::= BEGIN
4003
4004 EXPORTS Pssm, PssmIntermediateData, PssmFinalData,
4005 PssmParameters, PssmWithParameters;
4006
4007 IMPORTS Object-id FROM NCBI-General
4008 Seq-entry FROM NCBI-Seqset;
4009
4010 -- a rudimentary block/core-model, to be used with block-based alignment
4011 -- routines and threading
4012
4013 BlockProperty ::= SEQUENCE {
4014 type INTEGER { unassigned (0),
4015 threshold (1), -- score threshold for heuristics
4016 minscore (2), -- observed minimum score in CD
4017 maxscore (3), -- observed maximum score in CD
4018 meanscore (4), -- observed mean score in CD
4019 variance (5), -- observed score variance
4020 name (10), -- just name the block
4021 is-optional(20), -- block may not have to be used
4022 other (255) },
4023 intvalue INTEGER OPTIONAL,
4024 textvalue VisibleString OPTIONAL
4025 }
4026
4027 CoreBlock ::= SEQUENCE {
4028 start INTEGER, -- begin of block on query
4029 stop INTEGER, -- end of block on query
4030 minstart INTEGER OPTIONAL, -- optional N-terminal extension
4031 maxstop INTEGER OPTIONAL, -- optional C-terminal extension
4032 property SEQUENCE OF BlockProperty OPTIONAL
4033 }
4034
4035 LoopConstraint ::= SEQUENCE {
4036 minlength INTEGER DEFAULT 0, -- minimum length of unaligned region
4037 maxlength INTEGER DEFAULT 100000 -- maximum length of unaligned region
4038 }
4039
4040 CoreDef ::= SEQUENCE {
4041 nblocks INTEGER, -- number of core elements/blocks
4042 blocks SEQUENCE OF CoreBlock, -- nblocks locations
4043 loops SEQUENCE OF LoopConstraint -- (nblocks+1) constraints
4044 }
4045
4046 -- ===========================================================================
4047 -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
4048 -- ===========================================
4049 --
4050 -- Two possible inputs to PSI-BLAST and formatrpsdb:
4051 -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix
4052 -- of frequency ratios)
4053 -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of
4054 -- scores and statistical parameters) - such as written by cddumper
4055 --
4056 -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
4057 -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
4058 -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
4059 -- statistical parameters are used to perform the search in PSI-BLAST and the
4060 -- same data and the data in PssmWithParams::params::rpsdbparams is used to
4061 -- build the PSSM and ultimately the RPS-BLAST database
4062 --
4063 --
4064 -- reads ++++++++++++++ writes
4065 -- PssmWithParams ====> + PSI-BLAST + =====> PssmWithParams
4066 -- ++++++++++++++ | ^
4067 -- ^ | |
4068 -- | | |
4069 -- +===========================================+ |
4070 -- | |
4071 -- +===========================================+ |
4072 -- | |
4073 -- reads | |
4074 -- v |
4075 -- +++++++++++++++ writes +++++++++++++++++++++++ |
4076 -- | formatrpsdb | =====> | RPS-BLAST databases | |
4077 -- +++++++++++++++ +++++++++++++++++++++++ |
4078 -- ^ |
4079 -- | |
4080 -- | reads |
4081 -- +++++++++++++ |
4082 -- | RPS-BLAST | |
4083 -- +++++++++++++ |
4084 -- |
4085 -- reads ++++++++++++ writes |
4086 -- Cdd ======> | cddumper | =============================+
4087 -- ++++++++++++
4088 --
4089 -- ===========================================================================
4090
4091 -- Contains the PSSM's scores and its associated statistical parameters.
4092 -- Dimensions and order in which scores are stored must be the same as that
4093 -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
4094 PssmFinalData ::= SEQUENCE {
4095
4096 -- PSSM's scores
4097 scores SEQUENCE OF INTEGER,
4098
4099 -- Karlin & Altschul parameter produced during the PSSM's calculation
4100 lambda REAL,
4101
4102 -- Karlin & Altschul parameter produced during the PSSM's calculation
4103 kappa REAL,
4104
4105 -- Karlin & Altschul parameter produced during the PSSM's calculation
4106 h REAL,
4107
4108 -- scaling factor used to obtain more precision when building the PSSM.
4109 -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
4110 -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
4111 -- given a PSSM which contains a scaled-up PSSM (indicated by having a
4112 -- scalingFactor greater than 1), then it will scale down the PSSM to
4113 -- perform the initial stages of the search with it.
4114 -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided
4115 -- scaled-up PSSMs, it will ensure that all PSSMs used to build the
4116 -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST
4117 -- will silently produce incorrect results).
4118 scalingFactor INTEGER DEFAULT 1,
4119
4120 -- Karlin & Altschul parameter produced during the PSSM's calculation
4121 lambdaUngapped REAL OPTIONAL,
4122
4123 -- Karlin & Altschul parameter produced during the PSSM's calculation
4124 kappaUngapped REAL OPTIONAL,
4125
4126 -- Karlin & Altschul parameter produced during the PSSM's calculation
4127 hUngapped REAL OPTIONAL
4128 }
4129
4130 -- Contains the PSSM's intermediate data used to create the PSSM's scores
4131 -- and statistical parameters. Dimensions and order in which scores are
4132 -- stored must be the same as that specified in Pssm::numRows,
4133 -- Pssm::numColumns, and Pssm::byrow
4134 PssmIntermediateData ::= SEQUENCE {
4135
4136 -- observed residue frequencies (or counts) per position of the PSSM
4137 -- (prior to application of pseudocounts)
4138 resFreqsPerPos SEQUENCE OF INTEGER OPTIONAL,
4139
4140 -- Weighted observed residue frequencies per position of the PSSM.
4141 -- (N.B.: each position's weights should add up to 1.0).
4142 -- This field corresponds to f_i (f sub i) in equation 2 of
4143 -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4144 -- NOTE: this is needed for diagnostics information only (i.e.:
4145 -- -out_ascii_pssm option in psiblast)
4146 weightedResFreqsPerPos SEQUENCE OF REAL OPTIONAL,
4147
4148 -- PSSM's frequency ratios
4149 freqRatios SEQUENCE OF REAL,
4150
4151 -- Information content per position of the PSSM
4152 -- NOTE: this is needed for diagnostics information only (i.e.:
4153 -- -out_ascii_pssm option in psiblast)
4154 informationContent SEQUENCE OF REAL OPTIONAL,
4155
4156 -- Weights for columns of the PSSM without gaps
4157 -- NOTE: this is needed for diagnostics information only (i.e.:
4158 -- -out_ascii_pssm option in psiblast)
4159 gaplessColumnWeights SEQUENCE OF REAL OPTIONAL,
4160
4161 -- Used in sequence weights computation
4162 -- NOTE: this is needed for diagnostics information only (i.e.:
4163 -- -out_ascii_pssm option in psiblast)
4164 sigma SEQUENCE OF REAL OPTIONAL,
4165
4166 -- Length of the aligned regions per position of the query sequence
4167 -- NOTE: this is needed for diagnostics information only (i.e.:
4168 -- -out_ascii_pssm option in psiblast)
4169 intervalSizes SEQUENCE OF INTEGER OPTIONAL,
4170
4171 -- Number of matching sequences per position of the PSSM (including the
4172 -- query)
4173 -- NOTE: this is needed for diagnostics information only (i.e.:
4174 -- -out_ascii_pssm option in psiblast)
4175 numMatchingSeqs SEQUENCE OF INTEGER OPTIONAL
4176 }
4177
4178 -- Position-specific scoring matrix
4179 --
4180 -- Column indices on the PSSM refer to the positions corresponding to the
4181 -- query/master sequence, i.e. the number of columns (N) is the same
4182 -- as the length of the query/master sequence.
4183 -- Row indices refer to individual amino acid types, i.e. the number of
4184 -- rows (M) is the same as the number of different residues in the
4185 -- alphabet we use. Consequently, row labels are amino acid identifiers.
4186 --
4187 -- PSSMs are stored as linear arrays of integers. By default, we store
4188 -- them column-by-column, M values for the first column followed by M
4189 -- values for the second column, and so on. In order to provide
4190 -- flexibility for external applications, the boolean field "byrow" is
4191 -- provided to specify the storage order.
4192 Pssm ::= SEQUENCE {
4193
4194 -- Is the this a protein or nucleotide scoring matrix?
4195 isProtein BOOLEAN DEFAULT TRUE,
4196
4197 -- PSSM identifier
4198 identifier Object-id OPTIONAL,
4199
4200 -- The dimensions of the matrix are returned so the client can
4201 -- verify that all data was received.
4202
4203 numRows INTEGER, -- number of rows
4204 numColumns INTEGER, -- number of columns
4205
4206 -- row-labels is given to note the order of residue types so that it can
4207 -- be cross-checked between applications.
4208 -- If this field is not given, the matrix values are presented in
4209 -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
4210 -- for proteins the values returned correspond to
4211 -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
4212 rowLabels SEQUENCE OF VisibleString OPTIONAL,
4213
4214 -- are matrices stored row by row?
4215 byRow BOOLEAN DEFAULT FALSE,
4216
4217 -- PSSM representative sequence (master)
4218 query Seq-entry OPTIONAL,
4219
4220 -- both intermediateData and finalData can be provided, but at least one of
4221 -- them must be provided.
4222 -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData
4223 -- representation.
4224
4225 -- Intermediate or final data for the PSSM
4226 intermediateData PssmIntermediateData OPTIONAL,
4227
4228 -- Final representation for the PSSM
4229 finalData PssmFinalData OPTIONAL
4230 }
4231
4232 -- This structure is used to create the RPS-BLAST database auxiliary file
4233 -- (*.aux) and it contains parameters set at creation time of the PSSM.
4234 -- Also, the matrixName field is used by formatrpsdb to build a PSSM from
4235 -- a Pssm structure which only contains PssmIntermediateData.
4236 FormatRpsDbParameters ::= SEQUENCE {
4237
4238 -- name of the underlying score matrix whose frequency ratios were
4239 -- used in PSSM construction (e.g.: BLOSUM62)
4240 matrixName VisibleString,
4241
4242 -- gap opening penalty corresponding to the matrix above
4243 gapOpen INTEGER OPTIONAL,
4244
4245 -- gap extension penalty corresponding to the matrix above
4246 gapExtend INTEGER OPTIONAL
4247
4248 }
4249
4250 -- Populated by PSSM engine of PSI-BLAST, original source for these values
4251 -- are the PSI-BLAST options specified using the BLAST options API
4252 PssmParameters ::= SEQUENCE {
4253
4254 -- pseudocount constant used for PSSM. This field corresponds to beta in
4255 -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4256 pseudocount INTEGER OPTIONAL,
4257
4258 -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
4259 -- populated by PSI-BLAST
4260 rpsdbparams FormatRpsDbParameters OPTIONAL,
4261
4262 -- alignment constraints needed by sequence-structure threader
4263 -- and other global or local block-alignment algorithms
4264 constraints CoreDef OPTIONAL
4265 }
4266
4267 -- Envelope containing PSSM and the parameters used to create it.
4268 -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
4269 PssmWithParameters ::= SEQUENCE {
4270
4271 -- This field is applicable to PSI-BLAST and formatrpsdb.
4272 -- When both the intermediate and final PSSM data are provided in this
4273 -- field, the final data (matrix of scores and associated statistical
4274 -- parameters) takes precedence and that data is used for further
4275 -- processing. The rationale for this is that the PSSM's scores and
4276 -- statistical parameters might have been calculated by other applications
4277 -- and it might not be possible to recreate it by using PSI-BLAST's PSSM
4278 -- engine.
4279 pssm Pssm,
4280
4281 -- This field's rpsdbparams is used to specify the values of options
4282 -- for processing by formatrpsdb. If these are not set, the command
4283 -- line defaults of formatrpsdb are applied. This field is used
4284 -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
4285 -- the PSSM is the same as the one being specified through the BLAST
4286 -- Options API. If this field is omitted, no verification will be
4287 -- performed, so be careful to keep track of what matrix was used to build
4288 -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
4289 params PssmParameters OPTIONAL
4290 }
4291
4292 END
4293 --$Revision: 1.57 $
4294 --**********************************************************************
4295 --
4296 -- NCBI ASN.1 macro editing language specifications
4297 --
4298 -- by Colleen Bollin, 2007
4299 --
4300 --**********************************************************************
4301
4302 NCBI-Macro DEFINITIONS ::=
4303 BEGIN
4304
4305 EXPORTS AECR-action, Parse-action, Macro-action-list;
4306
4307 -- simple constraints --
4308
4309 String-location ::= ENUMERATED {
4310 contains (1) ,
4311 equals (2) ,
4312 starts (3) ,
4313 ends (4) ,
4314 inlist (5) }
4315
4316 String-constraint ::= SEQUENCE {
4317 match-text VisibleString ,
4318 match-location String-location DEFAULT contains ,
4319 case-sensitive BOOLEAN DEFAULT FALSE ,
4320 whole-word BOOLEAN DEFAULT FALSE ,
4321 not-present BOOLEAN DEFAULT FALSE }
4322
4323 Strand-constraint ::= ENUMERATED {
4324 any (0) ,
4325 plus (1) ,
4326 minus (2) }
4327
4328 Seqtype-constraint ::= ENUMERATED {
4329 any (0) ,
4330 nuc (1) ,
4331 prot (2) }
4332
4333 Partial-constraint ::= ENUMERATED {
4334 either (0) ,
4335 partial (1) ,
4336 complete (2) }
4337
4338 Location-constraint ::= SEQUENCE {
4339 strand Strand-constraint DEFAULT any ,
4340 seq-type Seqtype-constraint DEFAULT any ,
4341 partial5 Partial-constraint DEFAULT either ,
4342 partial3 Partial-constraint DEFAULT either }
4343
4344 Object-type-constraint ::= ENUMERATED {
4345 any (0) ,
4346 feature (1) ,
4347 descriptor (2) }
4348
4349
4350 -- feature values --
4351
4352 Feature-type ::= ENUMERATED {
4353 any (0) ,
4354 gene (1) ,
4355 org (2) ,
4356 cds (3) ,
4357 prot (4) ,
4358 preRNA (5) ,
4359 mRNA (6) ,
4360 tRNA (7) ,
4361 rRNA (8) ,
4362 snRNA (9) ,
4363 scRNA (10) ,
4364 otherRNA (11) ,
4365 pub (12) ,
4366 seq (13) ,
4367 imp (14) ,
4368 allele (15) ,
4369 attenuator (16) ,
4370 c-region (17) ,
4371 caat-signal (18) ,
4372 imp-CDS (19) ,
4373 conflict (20) ,
4374 d-loop (21) ,
4375 d-segment (22) ,
4376 enhancer (23) ,
4377 exon (24) ,
4378 gC-signal (25) ,
4379 iDNA (26) ,
4380 intron (27) ,
4381 j-segment (28) ,
4382 ltr (29) ,
4383 mat-peptide (30) ,
4384 misc-binding (31) ,
4385 misc-difference (32) ,
4386 misc-feature (33) ,
4387 misc-recomb (34) ,
4388 misc-RNA (35) ,
4389 misc-signal (36) ,
4390 misc-structure (37) ,
4391 modified-base (38) ,
4392 mutation (39) ,
4393 n-region (40) ,
4394 old-sequence (41) ,
4395 polyA-signal (42) ,
4396 polyA-site (43) ,
4397 precursor-RNA (44) ,
4398 prim-transcript (45) ,
4399 primer-bind (46) ,
4400 promoter (47) ,
4401 protein-bind (48) ,
4402 rbs (49) ,
4403 repeat-region (50) ,
4404 rep-origin (51) ,
4405 s-region (52) ,
4406 sig-peptide (53) ,
4407 source (54) ,
4408 stem-loop (55) ,
4409 sts (56) ,
4410 tata-signal (57) ,
4411 terminator (58) ,
4412 transit-peptide (59) ,
4413 unsure (60) ,
4414 v-region (61) ,
4415 v-segment (62) ,
4416 variation (63) ,
4417 virion (64) ,
4418 n3clip (65) ,
4419 n3UTR (66) ,
4420 n5clip (67) ,
4421 n5UTR (68) ,
4422 n10-signal (69) ,
4423 n35-signal (70) ,
4424 site-ref (71) ,
4425 region (72) ,
4426 comment (73) ,
4427 bond (74) ,
4428 site (75) ,
4429 rsite (76) ,
4430 user (77) ,
4431 txinit (78) ,
4432 num (79) ,
4433 psec-str (80) ,
4434 non-std-residue (81) ,
4435 het (82) ,
4436 biosrc (83) ,
4437 preprotein (84) ,
4438 mat-peptide-aa (85) ,
4439 sig-peptide-aa (86) ,
4440 transit-peptide-aa (87) ,
4441 snoRNA (88) ,
4442 gap (89) ,
4443 operon (90) ,
4444 oriT (91) ,
4445 ncRNA (92) ,
4446 tmRNA (93) }
4447
4448 Feat-qual-legal ::= ENUMERATED {
4449 allele (1) ,
4450 activity (2) ,
4451 anticodon (3) ,
4452 bound-moiety (4) ,
4453 chromosome (5),
4454 citation (6),
4455 codon (7) ,
4456 codon-start (8) ,
4457 codons-recognized (9) ,
4458 compare (10) ,
4459 cons-splice (11) ,
4460 db-xref (12) ,
4461 description (13) ,
4462 direction (14) ,
4463 ec-number (15) ,
4464 environmental-sample (16) ,
4465 evidence (17) ,
4466 exception (18) ,
4467 experiment (19) ,
4468 focus (20) ,
4469 frequency (21) ,
4470 function (22) ,
4471 gene (23) ,
4472 gene-description (24) ,
4473 inference (25) ,
4474 label (26) ,
4475 locus-tag (27) ,
4476 map (28) ,
4477 mobile-element (29) ,
4478 mod-base (30) ,
4479 mol-type (31) ,
4480 ncRNA-class (32) ,
4481 note (33) ,
4482 number (34) ,
4483 old-locus-tag (35) ,
4484 operon (36) ,
4485 organism (37) ,
4486 organelle (38) ,
4487 partial (39) ,
4488 phenotype (40) ,
4489 plasmid (41) ,
4490 product (42) ,
4491 protein-id (43) ,
4492 pseudo (44) ,
4493 rearranged (45) ,
4494 replace (46) ,
4495 rpt-family (47) ,
4496 rpt-type (48) ,
4497 rpt-unit (49) ,
4498 rpt-unit-seq (50) ,
4499 rpt-unit-range (51) ,
4500 segment (52) ,
4501 sequenced-mol (53) ,
4502 standard-name (54) ,
4503 synonym (55) ,
4504 transcript-id (56) ,
4505 transgenic (57) ,
4506 translation (58) ,
4507 transl-except (59) ,
4508 transl-table (60) ,
4509 usedin (61),
4510 mobile-element-type (62),
4511 mobile-element-name (63),
4512 gene-comment (64) ,
4513 satellite (65) ,
4514 satellite-type (66) ,
4515 satellite-name (67) ,
4516 location (68) ,
4517 tag-peptide (69) }
4518
4519 Feat-qual-legal-val ::= SEQUENCE {
4520 qual Feat-qual-legal ,
4521 val VisibleString }
4522
4523 Feat-qual-legal-val-choice ::= CHOICE {
4524 qual Feat-qual-legal-val }
4525
4526 Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice
4527
4528 Feat-qual-choice ::= CHOICE {
4529 legal-qual Feat-qual-legal ,
4530 illegal-qual String-constraint }
4531
4532 Feature-field ::= SEQUENCE {
4533 type Feature-type ,
4534 field Feat-qual-choice }
4535
4536 Feature-field-legal ::= SEQUENCE {
4537 type Feature-type ,
4538 field Feat-qual-legal }
4539
4540 Feature-field-pair ::= SEQUENCE {
4541 type Feature-type ,
4542 field-from Feat-qual-choice ,
4543 field-to Feat-qual-choice }
4544
4545 Rna-feat-type ::= CHOICE {
4546 preRNA NULL ,
4547 mRNA NULL ,
4548 tRNA NULL ,
4549 rRNA NULL ,
4550 ncRNA VisibleString ,
4551 tmRNA NULL,
4552 miscRNA NULL }
4553
4554 Rna-field ::= ENUMERATED {
4555 product (1) ,
4556 comment (2) ,
4557 codons-recognized (3) ,
4558 ncrna-class (4) ,
4559 anticodon (5) ,
4560 transcript-id (6) ,
4561 gene-locus (7) ,
4562 gene-description (8) ,
4563 gene-maploc (9) ,
4564 gene-locus-tag (10) ,
4565 gene-synonym (11) ,
4566 gene-comment (12) ,
4567 tag-peptide (13) }
4568
4569
4570 Rna-qual ::= SEQUENCE {
4571 type Rna-feat-type ,
4572 field Rna-field }
4573
4574 Rna-qual-pair ::= SEQUENCE {
4575 type Rna-feat-type ,
4576 field-from Rna-field ,
4577 field-to Rna-field }
4578
4579 Source-qual ::= ENUMERATED {
4580 acronym (1) ,
4581 anamorph (2) ,
4582 authority (3) ,
4583 bio-material (4) ,
4584 biotype (5) ,
4585 biovar (6) ,
4586 breed (7) ,
4587 cell-line (8) ,
4588 cell-type (9) ,
4589 chemovar (10) ,
4590 chromosome (11) ,
4591 clone (12) ,
4592 clone-lib (13) ,
4593 collected-by (14) ,
4594 collection-date (15) ,
4595 common (16) ,
4596 common-name (17) ,
4597 country (18) ,
4598 cultivar (19) ,
4599 culture-collection (20) ,
4600 dev-stage (21) ,
4601 division (22) ,
4602 dosage (23) ,
4603 ecotype (24) ,
4604 endogenous-virus-name (25) ,
4605 environmental-sample (26) ,
4606 forma (27) ,
4607 forma-specialis (28) ,
4608 frequency (29) ,
4609 fwd-primer-name (30) ,
4610 fwd-primer-seq (31) ,
4611 gb-acronym (32) ,
4612 gb-anamorph (33) ,
4613 gb-synonym (34) ,
4614 genotype (35) ,
4615 germline (36) ,
4616 group (37) ,
4617 haplotype (38) ,
4618 identified-by (39) ,
4619 insertion-seq-name (40) ,
4620 isolate (41) ,
4621 isolation-source (42) ,
4622 lab-host (43) ,
4623 lat-lon (44) ,
4624 lineage (45) ,
4625 map (46) ,
4626 metagenome-source (47) ,
4627 metagenomic (48) ,
4628 old-lineage (49) ,
4629 old-name (50) ,
4630 orgmod-note (51) ,
4631 nat-host (52) ,
4632 pathovar (53) ,
4633 plasmid-name (54) ,
4634 plastid-name (55) ,
4635 pop-variant (56) ,
4636 rearranged (57) ,
4637 rev-primer-name (58) ,
4638 rev-primer-seq (59) ,
4639 segment (60) ,
4640 serogroup (61) ,
4641 serotype (62) ,
4642 serovar (63) ,
4643 sex (64) ,
4644 specimen-voucher (65) ,
4645 strain (66) ,
4646 subclone (67) ,
4647 subgroup (68) ,
4648 subsource-note (69),
4649 sub-species (70) ,
4650 substrain (71) ,
4651 subtype (72) ,
4652 synonym (73) ,
4653 taxname (74) ,
4654 teleomorph (75) ,
4655 tissue-lib (76) ,
4656 tissue-type (77) ,
4657 transgenic (78) ,
4658 transposon-name (79) ,
4659 type (80) ,
4660 variety (81) ,
4661 specimen-voucher-INST (82) ,
4662 specimen-voucher-COLL (83) ,
4663 specimen-voucher-SpecID (84) ,
4664 culture-collection-INST (85) ,
4665 culture-collection-COLL (86) ,
4666 culture-collection-SpecID (87) ,
4667 bio-material-INST (88) ,
4668 bio-material-COLL (89) ,
4669 bio-material-SpecID (90),
4670 all-notes (91),
4671 mating-type (92),
4672 linkage-group (93) ,
4673 haplogroup (94),
4674 all-quals (95),
4675 dbxref (96)
4676 }
4677
4678 Source-qual-pair ::= SEQUENCE {
4679 field-from Source-qual ,
4680 field-to Source-qual }
4681
4682 Source-location ::= ENUMERATED {
4683 unknown (0) ,
4684 genomic (1) ,
4685 chloroplast (2) ,
4686 chromoplast (3) ,
4687 kinetoplast (4) ,
4688 mitochondrion (5) ,
4689 plastid (6) ,
4690 macronuclear (7) ,
4691 extrachrom (8) ,
4692 plasmid (9) ,
4693 transposon (10) ,
4694 insertion-seq (11) ,
4695 cyanelle (12) ,
4696 proviral (13) ,
4697 virion (14) ,
4698 nucleomorph (15) ,
4699 apicoplast (16) ,
4700 leucoplast (17) ,
4701 proplastid (18) ,
4702 endogenous-virus (19) ,
4703 hydrogenosome (20) ,
4704 chromosome (21) ,
4705 chromatophore (22) }
4706
4707 Source-origin ::= ENUMERATED {
4708 unknown (0) ,
4709 natural (1) ,
4710 natmut (2) ,
4711 mut (3) ,
4712 artificial (4) ,
4713 synthetic (5) ,
4714 other (255) }
4715
4716 Source-qual-choice ::= CHOICE {
4717 textqual Source-qual ,
4718 location Source-location,
4719 origin Source-origin ,
4720 gcode INTEGER ,
4721 mgcode INTEGER }
4722
4723 Source-qual-text-val ::= SEQUENCE {
4724 srcqual Source-qual ,
4725 val VisibleString }
4726
4727 Source-qual-val-choice ::= CHOICE {
4728 textqual Source-qual-text-val ,
4729 location Source-location,
4730 origin Source-origin ,
4731 gcode INTEGER ,
4732 mgcode INTEGER }
4733
4734 Source-qual-val-set ::= SET OF Source-qual-val-choice
4735
4736 CDSGeneProt-field ::= ENUMERATED {
4737 cds-comment (1) ,
4738 gene-locus (2) ,
4739 gene-description (3) ,
4740 gene-comment (4) ,
4741 gene-allele (5) ,
4742 gene-maploc (6) ,
4743 gene-locus-tag (7) ,
4744 gene-synonym (8) ,
4745 gene-old-locus-tag (9) ,
4746 mrna-product (10) ,
4747 mrna-comment (11) ,
4748 prot-name (12) ,
4749 prot-description (13) ,
4750 prot-ec-number (14) ,
4751 prot-activity (15) ,
4752 prot-comment (16) ,
4753 mat-peptide-name (17) ,
4754 mat-peptide-description (18) ,
4755 mat-peptide-ec-number (19) ,
4756 mat-peptide-activity (20) ,
4757 mat-peptide-comment (21) ,
4758 cds-inference (22) ,
4759 gene-inference (23) ,
4760 codon-start (24) }
4761
4762 CDSGeneProt-field-pair ::= SEQUENCE {
4763 field-from CDSGeneProt-field ,
4764 field-to CDSGeneProt-field }
4765
4766 Molecule-type ::= ENUMERATED {
4767 unknown (0) ,
4768 genomic (1) ,
4769 precursor-RNA (2) ,
4770 mRNA (3) ,
4771 rRNA (4) ,
4772 tRNA (5) ,
4773 genomic-mRNA (6) ,
4774 cRNA (7) ,
4775 transcribed-RNA (8) ,
4776 ncRNA (9) ,
4777 transfer-messenger-RNA (10) ,
4778 other (11) }
4779
4780 Technique-type ::= ENUMERATED {
4781 unknown (0) ,
4782 standard (1) ,
4783 est (2) ,
4784 sts (3) ,
4785 survey (4) ,
4786 genetic-map (5) ,
4787 physical-map (6) ,
4788 derived (7) ,
4789 concept-trans (8) ,
4790 seq-pept (9) ,
4791 both (10) ,
4792 seq-pept-overlap (11) ,
4793 seq-pept-homol (12) ,
4794 concept-trans-a (13) ,
4795 htgs-1 (14) ,
4796 htgs-2 (15) ,
4797 htgs-3 (16) ,
4798 fli-cDNA (17) ,
4799 htgs-0 (18) ,
4800 htc (19) ,
4801 wgs (20) ,
4802 barcode (21) ,
4803 composite-wgs-htgs (22) ,
4804 tsa (23) ,
4805 other (24) }
4806
4807 Completedness-type ::= ENUMERATED {
4808 unknown (0) ,
4809 complete (1) ,
4810 partial (2) ,
4811 no-left (3) ,
4812 no-right (4) ,
4813 no-ends (5) ,
4814 has-left (6) ,
4815 has-right (7) ,
4816 other (6) }
4817
4818 Molecule-class-type ::= ENUMERATED {
4819 unknown (0) ,
4820 dna (1) ,
4821 rna (2) ,
4822 protein (3) ,
4823 nucleotide (4),
4824 other (5) }
4825
4826 Topology-type ::= ENUMERATED {
4827 unknown (0) ,
4828 linear (1) ,
4829 circular (2) ,
4830 tandem (3) ,
4831 other (4) }
4832
4833 Strand-type ::= ENUMERATED {
4834 unknown (0) ,
4835 single (1) ,
4836 double (2) ,
4837 mixed (3) ,
4838 mixed-rev (4) ,
4839 other (5) }
4840
4841 Molinfo-field ::= CHOICE {
4842 molecule Molecule-type ,
4843 technique Technique-type ,
4844 completedness Completedness-type ,
4845 mol-class Molecule-class-type ,
4846 topology Topology-type ,
4847 strand Strand-type }
4848
4849 Molinfo-molecule-pair ::= SEQUENCE {
4850 from Molecule-type ,
4851 to Molecule-type }
4852
4853 Molinfo-technique-pair ::= SEQUENCE {
4854 from Technique-type ,
4855 to Technique-type }
4856
4857 Molinfo-completedness-pair ::= SEQUENCE {
4858 from Completedness-type ,
4859 to Completedness-type }
4860
4861 Molinfo-mol-class-pair ::= SEQUENCE {
4862 from Molecule-class-type ,
4863 to Molecule-class-type }
4864
4865 Molinfo-topology-pair ::= SEQUENCE {
4866 from Topology-type ,
4867 to Topology-type }
4868
4869 Molinfo-strand-pair ::= SEQUENCE {
4870 from Strand-type ,
4871 to Strand-type }
4872
4873 Molinfo-field-pair ::= CHOICE {
4874 molecule Molinfo-molecule-pair ,
4875 technique Molinfo-technique-pair ,
4876 completedness Molinfo-completedness-pair ,
4877 mol-class Molinfo-mol-class-pair ,
4878 topology Molinfo-topology-pair ,
4879 strand Molinfo-strand-pair }
4880
4881 Molinfo-field-list ::= SET OF Molinfo-field
4882
4883 -- publication fields --
4884
4885 Publication-field ::= ENUMERATED {
4886 cit (1) ,
4887 authors (2) ,
4888 journal (3) ,
4889 volume (4) ,
4890 issue (5) ,
4891 pages (6) ,
4892 date (7) ,
4893 serial-number (8) ,
4894 title (9) ,
4895 affiliation (10) ,
4896 affil-div (11) ,
4897 affil-city (12) ,
4898 affil-sub (13) ,
4899 affil-country (14) ,
4900 affil-street (15) ,
4901 affil-email (16) ,
4902 affil-fax (17) ,
4903 affil-phone (18) ,
4904 affil-zipcode (19),
4905 authors-initials (20)
4906 }
4907
4908 -- structured comment fields --
4909
4910 Structured-comment-field ::= CHOICE {
4911 database NULL ,
4912 named VisibleString ,
4913 field-name NULL
4914 }
4915
4916 Structured-comment-field-pair ::= SEQUENCE {
4917 from Structured-comment-field ,
4918 to Structured-comment-field
4919 }
4920
4921 -- misc fields --
4922 -- these would not appear in pairs --
4923 Misc-field ::= ENUMERATED {
4924 genome-project-id (1) ,
4925 comment-descriptor (2) ,
4926 defline (3) ,
4927 keyword (4)
4928 }
4929
4930 -- complex constraints --
4931
4932 Pub-type ::= ENUMERATED {
4933 any (0) ,
4934 published (1) ,
4935 unpublished (2) ,
4936 in-press (3) ,
4937 submitter-block (4) }
4938
4939 Pub-field-constraint ::= SEQUENCE {
4940 field Publication-field ,
4941 constraint String-constraint }
4942
4943 Publication-constraint ::= SEQUENCE {
4944 type Pub-type ,
4945 field Pub-field-constraint OPTIONAL }
4946
4947 Source-constraint ::= SEQUENCE {
4948 field1 Source-qual-choice OPTIONAL ,
4949 field2 Source-qual-choice OPTIONAL ,
4950 constraint String-constraint OPTIONAL ,
4951 type-constraint Object-type-constraint OPTIONAL }
4952
4953 CDSGeneProt-feature-type-constraint ::= ENUMERATED {
4954 gene (1) ,
4955 mRNA (2) ,
4956 cds (3) ,
4957 prot (4) ,
4958 exon (5) ,
4959 mat-peptide (6) }
4960
4961 CDSGeneProt-pseudo-constraint ::= SEQUENCE {
4962 feature CDSGeneProt-feature-type-constraint ,
4963 is-pseudo BOOLEAN DEFAULT TRUE }
4964
4965 CDSGeneProt-constraint-field ::= CHOICE {
4966 field CDSGeneProt-field }
4967
4968 CDSGeneProt-qual-constraint ::= SEQUENCE {
4969 field1 CDSGeneProt-constraint-field OPTIONAL ,
4970 field2 CDSGeneProt-constraint-field OPTIONAL ,
4971 constraint String-constraint OPTIONAL }
4972
4973 Field-constraint ::= SEQUENCE {
4974 field Field-type ,
4975 string-constraint String-constraint }
4976
4977 Sequence-constraint-rnamol ::= ENUMERATED {
4978 any (0) ,
4979 genomic (1) ,
4980 precursor-RNA (2) ,
4981 mRNA (3) ,
4982 rRNA (4) ,
4983 tRNA (5) ,
4984 genomic-mRNA (6) ,
4985 cRNA (7) ,
4986 transcribed-RNA (8) ,
4987 ncRNA (9) ,
4988 transfer-messenger-RNA (10) }
4989
4990 Sequence-constraint-mol-type-constraint ::= CHOICE {
4991 any NULL ,
4992 nucleotide NULL ,
4993 dna NULL ,
4994 rna Sequence-constraint-rnamol ,
4995 protein NULL }
4996
4997 Sequence-constraint ::= SEQUENCE {
4998 seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
4999 id String-constraint OPTIONAL ,
5000 feature Feature-type }
5001
5002 Constraint-choice ::= CHOICE {
5003 string String-constraint ,
5004 location Location-constraint ,
5005 field Field-constraint ,
5006 source Source-constraint ,
5007 cdsgeneprot-qual CDSGeneProt-qual-constraint ,
5008 cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
5009 sequence Sequence-constraint ,
5010 pub Publication-constraint }
5011
5012 Constraint-choice-set ::= SET OF Constraint-choice
5013
5014 Text-portion ::= SEQUENCE {
5015 left-text VisibleString OPTIONAL ,
5016 include-left BOOLEAN ,
5017 right-text VisibleString OPTIONAL ,
5018 include-right BOOLEAN ,
5019 inside BOOLEAN ,
5020 case-sensitive BOOLEAN DEFAULT FALSE ,
5021 whole-word BOOLEAN DEFAULT FALSE }
5022
5023 Field-edit-location ::= ENUMERATED {
5024 anywhere (0) ,
5025 beginning (1) ,
5026 end (2) }
5027
5028 Field-edit ::= SEQUENCE {
5029 find-txt VisibleString ,
5030 repl-txt VisibleString OPTIONAL ,
5031 location Field-edit-location DEFAULT anywhere }
5032
5033 Field-type ::= CHOICE {
5034 source-qual Source-qual-choice ,
5035 feature-field Feature-field ,
5036 rna-field Rna-qual ,
5037 cds-gene-prot CDSGeneProt-field ,
5038 molinfo-field Molinfo-field ,
5039 pub Publication-field ,
5040 struc-comment-field Structured-comment-field ,
5041 misc Misc-field }
5042
5043 Field-pair-type ::= CHOICE {
5044 source-qual Source-qual-pair ,
5045 feature-field Feature-field-pair ,
5046 rna-field Rna-qual-pair ,
5047 cds-gene-prot CDSGeneProt-field-pair ,
5048 molinfo-field Molinfo-field-pair ,
5049 struc-comment-field Structured-comment-field-pair }
5050
5051 ExistingTextOption ::= ENUMERATED {
5052 replace-old (1) ,
5053 append-semi (2) ,
5054 append-space (3) ,
5055 append-colon (4) ,
5056 append-comma (5) ,
5057 append-none (6) ,
5058 prefix-semi (7) ,
5059 prefix-space (8) ,
5060 prefix-colon (9) ,
5061 prefix-comma (10) ,
5062 prefix-none (11) ,
5063 leave-old (12) ,
5064 add-qual (13) }
5065
5066
5067 Apply-action ::= SEQUENCE {
5068 field Field-type ,
5069 value VisibleString ,
5070 existing-text ExistingTextOption }
5071
5072 Edit-action ::= SEQUENCE {
5073 edit Field-edit ,
5074 field Field-type }
5075
5076 Convert-action ::= SEQUENCE {
5077 fields Field-pair-type ,
5078 strip-name BOOLEAN DEFAULT FALSE ,
5079 keep-original BOOLEAN DEFAULT FALSE ,
5080 existing-text ExistingTextOption }
5081
5082 Copy-action ::= SEQUENCE {
5083 fields Field-pair-type ,
5084 existing-text ExistingTextOption }
5085
5086 Swap-action ::= SEQUENCE {
5087 fields Field-pair-type ,
5088 field-to Field-type }
5089
5090 AECRParse-action ::= SEQUENCE {
5091 portion Text-portion ,
5092 fields Field-pair-type ,
5093 remove-from-parsed BOOLEAN DEFAULT FALSE ,
5094 remove-left BOOLEAN DEFAULT FALSE ,
5095 remove-right BOOLEAN DEFAULT FALSE ,
5096 existing-text ExistingTextOption }
5097
5098 Remove-action ::= SEQUENCE {
5099 field Field-type }
5100
5101 Action-choice ::= CHOICE {
5102 apply Apply-action ,
5103 edit Edit-action ,
5104 convert Convert-action ,
5105 copy Copy-action ,
5106 swap Swap-action ,
5107 remove Remove-action ,
5108 parse AECRParse-action }
5109
5110 AECR-action ::= SEQUENCE {
5111 action Action-choice ,
5112 also-change-mrna BOOLEAN DEFAULT FALSE ,
5113 constraint Constraint-choice-set OPTIONAL }
5114
5115 Cap-change ::= ENUMERATED {
5116 none (0) ,
5117 tolower (1) ,
5118 toupper (2) ,
5119 firstcap (3) ,
5120 firstcaprestnochange (4) }
5121
5122 Parse-src-org-choice ::= CHOICE {
5123 source-qual Source-qual ,
5124 taxname-after-binomial NULL }
5125
5126 Parse-src-org ::= SEQUENCE {
5127 field Parse-src-org-choice ,
5128 type Object-type-constraint DEFAULT any }
5129
5130 Parse-src ::= CHOICE {
5131 defline NULL ,
5132 flatfile NULL ,
5133 local-id NULL ,
5134 org Parse-src-org ,
5135 comment NULL ,
5136 bankit-comment NULL ,
5137 structured-comment VisibleString ,
5138 file-id NULL }
5139
5140 Parse-dst-org ::= SEQUENCE {
5141 field Source-qual-choice ,
5142 type Object-type-constraint DEFAULT any }
5143
5144 Parse-dest ::= CHOICE {
5145 defline NULL ,
5146 org Parse-dst-org ,
5147 featqual Feature-field-legal ,
5148 comment-descriptor NULL ,
5149 dbxref VisibleString }
5150
5151 Parse-action ::= SEQUENCE {
5152 portion Text-portion ,
5153 src Parse-src ,
5154 dest Parse-dest ,
5155 capitalization Cap-change DEFAULT none ,
5156 remove-from-parsed BOOLEAN DEFAULT FALSE ,
5157 existing-text ExistingTextOption }
5158
5159
5160 Location-interval ::= SEQUENCE {
5161 from INTEGER ,
5162 to INTEGER }
5163
5164 Location-choice ::= CHOICE {
5165 interval Location-interval ,
5166 whole-sequence NULL }
5167
5168 Sequence-list ::= SET OF VisibleString
5169 Sequence-list-choice ::= CHOICE {
5170 list Sequence-list ,
5171 all NULL }
5172
5173 Apply-feature-action ::= SEQUENCE {
5174 type Feature-type ,
5175 partial5 BOOLEAN DEFAULT FALSE ,
5176 partial3 BOOLEAN DEFAULT FALSE ,
5177 plus-strand BOOLEAN DEFAULT TRUE ,
5178 location Location-choice ,
5179 seq-list Sequence-list-choice ,
5180 add-redundant BOOLEAN DEFAULT TRUE ,
5181 add-mrna BOOLEAN DEFAULT FALSE ,
5182 apply-to-parts BOOLEAN DEFAULT FALSE ,
5183 only-seg-num INTEGER DEFAULT -1 ,
5184 fields Feat-qual-legal-set OPTIONAL,
5185 src-fields Source-qual-val-set OPTIONAL }
5186
5187 Remove-feature-action ::= SEQUENCE {
5188 type Feature-type ,
5189 constraint Constraint-choice-set OPTIONAL }
5190
5191 -- for convert features --
5192 Convert-from-CDS-options ::= SEQUENCE {
5193 remove-mRNA BOOLEAN ,
5194 remove-gene BOOLEAN ,
5195 remove-transcript-id BOOLEAN }
5196
5197 Convert-feature-src-options ::= CHOICE {
5198 cds Convert-from-CDS-options }
5199
5200 Bond-type ::= ENUMERATED {
5201 disulfide (1) ,
5202 thioester (2) ,
5203 crosslink (3) ,
5204 thioether (4) ,
5205 other (5) }
5206
5207
5208 Site-type ::= ENUMERATED {
5209 active (1) ,
5210 binding (2) ,
5211 cleavage (3) ,
5212 inhibit (4) ,
5213 modified (5) ,
5214 glycosylation (6) ,
5215 myristoylation (7) ,
5216 mutagenized (8) ,
5217 metal-binding (9) ,
5218 phosphorylation (10) ,
5219 acetylation (11) ,
5220 amidation (12) ,
5221 methylation (13) ,
5222 hydroxylation (14) ,
5223 sulfatation (15) ,
5224 oxidative-deamination (16) ,
5225 pyrrolidone-carboxylic-acid (17) ,
5226 gamma-carboxyglutamic-acid (18) ,
5227 blocked (19) ,
5228 lipid-binding (20) ,
5229 np-binding (21) ,
5230 dna-binding (22) ,
5231 signal-peptide (23) ,
5232 transit-peptide (24) ,
5233 transmembrane-region (25) ,
5234 nitrosylation (26) ,
5235 other (27) }
5236
5237 -- other choice is to create protein sequences, skipping bad --
5238 Region-type ::= SEQUENCE {
5239 create-nucleotide BOOLEAN }
5240
5241 Convert-feature-dst-options ::= CHOICE {
5242 bond Bond-type ,
5243 site Site-type ,
5244 region Region-type ,
5245 ncrna-class VisibleString ,
5246 remove-original BOOLEAN }
5247
5248
5249 Convert-feature-action ::= SEQUENCE {
5250 type-from Feature-type ,
5251 type-to Feature-type ,
5252 src-options Convert-feature-src-options OPTIONAL ,
5253 dst-options Convert-feature-dst-options OPTIONAL ,
5254 leave-original BOOLEAN ,
5255 src-feat-constraint Constraint-choice-set OPTIONAL }
5256
5257
5258 Feature-location-strand-from ::= ENUMERATED {
5259 any (0) ,
5260 plus (1) ,
5261 minus (2) ,
5262 unknown (3) ,
5263 both (4) }
5264
5265 Feature-location-strand-to ::= ENUMERATED {
5266 plus (1) ,
5267 minus (2) ,
5268 unknown (3) ,
5269 both (4) ,
5270 reverse (5) }
5271
5272 Edit-location-strand ::= SEQUENCE {
5273 strand-from Feature-location-strand-from ,
5274 strand-to Feature-location-strand-to }
5275
5276 Partial-5-set-constraint ::= ENUMERATED {
5277 all (0) ,
5278 at-end (1) ,
5279 bad-start (2) ,
5280 frame-not-one (3) }
5281
5282 Partial-5-set-action ::= SEQUENCE {
5283 constraint Partial-5-set-constraint ,
5284 extend BOOLEAN }
5285
5286 Partial-5-clear-constraint ::= ENUMERATED {
5287 all (0) ,
5288 not-at-end (1) ,
5289 good-start (2) }
5290
5291 Partial-3-set-constraint ::= ENUMERATED {
5292 all (0) ,
5293 at-end (1) ,
5294 bad-end (2) }
5295
5296 Partial-3-set-action ::= SEQUENCE {
5297 constraint Partial-3-set-constraint ,
5298 extend BOOLEAN }
5299
5300 Partial-3-clear-constraint ::= ENUMERATED {
5301 all (0) ,
5302 not-at-end (1) ,
5303 good-end (2) }
5304
5305 Convert-location-type ::= ENUMERATED {
5306 join (1) ,
5307 order (2) ,
5308 merge (3) }
5309
5310 Location-edit-type ::= CHOICE {
5311 strand Edit-location-strand ,
5312 set-5-partial Partial-5-set-action ,
5313 clear-5-partial Partial-5-clear-constraint ,
5314 set-3-partial Partial-3-set-action ,
5315 clear-3-partial Partial-3-clear-constraint ,
5316 convert Convert-location-type }
5317
5318 Edit-feature-location-action ::= SEQUENCE {
5319 type Feature-type ,
5320 action Location-edit-type ,
5321 constraint Constraint-choice-set OPTIONAL }
5322
5323 Molinfo-block ::= SEQUENCE {
5324 to-list Molinfo-field-list ,
5325 from-list Molinfo-field-list OPTIONAL ,
5326 constraint Constraint-choice-set OPTIONAL }
5327
5328 Descriptor-type ::= ENUMERATED {
5329 all (0) ,
5330 title (1) ,
5331 source (2) ,
5332 publication (3) ,
5333 comment (4) ,
5334 genbank (5) ,
5335 user (6) ,
5336 create-date (7) ,
5337 update-date (8) ,
5338 mol-info (9) ,
5339 structured-comment (10) ,
5340 genome-project-id (11) }
5341
5342 Remove-descriptor-action ::= SEQUENCE {
5343 type Descriptor-type ,
5344 constraint Constraint-choice-set OPTIONAL }
5345
5346 Autodef-list-type ::= ENUMERATED {
5347 feature-list (1) ,
5348 complete-sequence (2) ,
5349 complete-genome (3) }
5350
5351 Autodef-action ::= SEQUENCE {
5352 modifiers SET OF Source-qual OPTIONAL ,
5353 clause-list-type Autodef-list-type }
5354
5355 Macro-action-choice ::= CHOICE {
5356 aecr AECR-action ,
5357 parse Parse-action ,
5358 add-feature Apply-feature-action ,
5359 remove-feature Remove-feature-action ,
5360 convert-feature Convert-feature-action ,
5361 edit-location Edit-feature-location-action ,
5362 remove-descriptor Remove-descriptor-action ,
5363 autodef Autodef-action }
5364
5365
5366 Macro-action-list ::= SET OF Macro-action-choice
5367
5368 END
|
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |