|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/asn/seq.asn |
source navigation diff markup identifier search freetext search file search |
1 --$Revision: 6.20 $
2 --**********************************************************************
3 --
4 -- NCBI Sequence elements
5 -- by James Ostell, 1990
6 -- Version 3.0 - June 1994
7 --
8 --**********************************************************************
9
10 NCBI-Sequence DEFINITIONS ::=
11 BEGIN
12
13 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
14 Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
15 Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext;
16
17 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
18 Seq-align FROM NCBI-Seqalign
19 Seq-feat FROM NCBI-Seqfeat
20 Seq-graph FROM NCBI-Seqres
21 Pub-equiv FROM NCBI-Pub
22 Org-ref FROM NCBI-Organism
23 BioSource FROM NCBI-BioSource
24 Seq-id, Seq-loc FROM NCBI-Seqloc
25 GB-block FROM GenBank-General
26 PIR-block FROM PIR-General
27 EMBL-block FROM EMBL-General
28 SP-block FROM SP-General
29 PRF-block FROM PRF-General
30 PDB-block FROM PDB-General
31 Seq-table FROM NCBI-SeqTable;
32
33 --*** Sequence ********************************
34 --*
35
36 Bioseq ::= SEQUENCE {
37 id SET OF Seq-id , -- equivalent identifiers
38 descr Seq-descr OPTIONAL , -- descriptors
39 inst Seq-inst , -- the sequence data
40 annot SET OF Seq-annot OPTIONAL }
41
42 --*** Descriptors *****************************
43 --*
44
45 Seq-descr ::= SET OF Seqdesc
46
47 Seqdesc ::= CHOICE {
48 mol-type GIBB-mol , -- type of molecule
49 modif SET OF GIBB-mod , -- modifiers
50 method GIBB-method , -- sequencing method
51 name VisibleString , -- a name for this sequence
52 title VisibleString , -- a title for this sequence
53 org Org-ref , -- if all from one organism
54 comment VisibleString , -- a more extensive comment
55 num Numbering , -- a numbering system
56 maploc Dbtag , -- map location of this sequence
57 pir PIR-block , -- PIR specific info
58 genbank GB-block , -- GenBank specific info
59 pub Pubdesc , -- a reference to the publication
60 region VisibleString , -- overall region (globin locus)
61 user User-object , -- user defined object
62 sp SP-block , -- SWISSPROT specific info
63 dbxref Dbtag , -- xref to other databases
64 embl EMBL-block , -- EMBL specific information
65 create-date Date , -- date entry first created/released
66 update-date Date , -- date of last update
67 prf PRF-block , -- PRF specific information
68 pdb PDB-block , -- PDB specific information
69 het Heterogen , -- cofactor, etc associated but not bound
70 source BioSource , -- source of materials, includes Org-ref
71 molinfo MolInfo } -- info on the molecule and techniques
72
73 --******* NOTE:
74 --* mol-type, modif, method, and org are consolidated and expanded
75 --* in Org-ref, BioSource, and MolInfo in this specification. They
76 --* will be removed in later specifications. Do not use them in the
77 --* the future. Instead expect the new structures.
78 --*
79 --***************************
80
81 --********************************************************************
82 --
83 -- MolInfo gives information on the
84 -- classification of the type and quality of the sequence
85 --
86 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
87 --
88 --********************************************************************
89
90 MolInfo ::= SEQUENCE {
91 biomol INTEGER {
92 unknown (0) ,
93 genomic (1) ,
94 pre-RNA (2) , -- precursor RNA of any sort really
95 mRNA (3) ,
96 rRNA (4) ,
97 tRNA (5) ,
98 snRNA (6) ,
99 scRNA (7) ,
100 peptide (8) ,
101 other-genetic (9) , -- other genetic material
102 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
103 cRNA (11) , -- viral RNA genome copy intermediate
104 snoRNA (12) , -- small nucleolar RNA
105 transcribed-RNA (13) , -- transcribed RNA other than existing classes
106 ncRNA (14) ,
107 tmRNA (15) ,
108 other (255) } DEFAULT unknown ,
109 tech INTEGER {
110 unknown (0) ,
111 standard (1) , -- standard sequencing
112 est (2) , -- Expressed Sequence Tag
113 sts (3) , -- Sequence Tagged Site
114 survey (4) , -- one-pass genomic sequence
115 genemap (5) , -- from genetic mapping techniques
116 physmap (6) , -- from physical mapping techniques
117 derived (7) , -- derived from other data, not a primary entity
118 concept-trans (8) , -- conceptual translation
119 seq-pept (9) , -- peptide was sequenced
120 both (10) , -- concept transl. w/ partial pept. seq.
121 seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
122 seq-pept-homol (12) , -- sequenced peptide, ordered by homology
123 concept-trans-a (13) , -- conceptual transl. supplied by author
124 htgs-1 (14) , -- unordered High Throughput sequence contig
125 htgs-2 (15) , -- ordered High Throughput sequence contig
126 htgs-3 (16) , -- finished High Throughput sequence
127 fli-cdna (17) , -- full length insert cDNA
128 htgs-0 (18) , -- single genomic reads for coordination
129 htc (19) , -- high throughput cDNA
130 wgs (20) , -- whole genome shotgun sequencing
131 barcode (21) , -- barcode of life project
132 composite-wgs-htgs (22) , -- composite of WGS and HTGS
133 tsa (23) , -- transcriptome shotgun assembly
134 other (255) } -- use Source.techexp
135 DEFAULT unknown ,
136 techexp VisibleString OPTIONAL , -- explanation if tech not enough
137 --
138 -- Completeness is not indicated in most records. For genomes, assume
139 -- the sequences are incomplete unless specifically marked as complete.
140 -- For mRNAs, assume the ends are not known exactly unless marked as
141 -- having the left or right end.
142 --
143 completeness INTEGER {
144 unknown (0) ,
145 complete (1) , -- complete biological entity
146 partial (2) , -- partial but no details given
147 no-left (3) , -- missing 5' or NH3 end
148 no-right (4) , -- missing 3' or COOH end
149 no-ends (5) , -- missing both ends
150 has-left (6) , -- 5' or NH3 end present
151 has-right (7) , -- 3' or COOH end present
152 other (255) } DEFAULT unknown ,
153 gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
154
155
156 GIBB-mol ::= ENUMERATED { -- type of molecule represented
157 unknown (0) ,
158 genomic (1) ,
159 pre-mRNA (2) , -- precursor RNA of any sort really
160 mRNA (3) ,
161 rRNA (4) ,
162 tRNA (5) ,
163 snRNA (6) ,
164 scRNA (7) ,
165 peptide (8) ,
166 other-genetic (9) , -- other genetic material
167 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
168 other (255) }
169
170 GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers
171 dna (0) ,
172 rna (1) ,
173 extrachrom (2) ,
174 plasmid (3) ,
175 mitochondrial (4) ,
176 chloroplast (5) ,
177 kinetoplast (6) ,
178 cyanelle (7) ,
179 synthetic (8) ,
180 recombinant (9) ,
181 partial (10) ,
182 complete (11) ,
183 mutagen (12) , -- subject of mutagenesis ?
184 natmut (13) , -- natural mutant ?
185 transposon (14) ,
186 insertion-seq (15) ,
187 no-left (16) , -- missing left end (5' for na, NH2 for aa)
188 no-right (17) , -- missing right end (3' or COOH)
189 macronuclear (18) ,
190 proviral (19) ,
191 est (20) , -- expressed sequence tag
192 sts (21) , -- sequence tagged site
193 survey (22) , -- one pass survey sequence
194 chromoplast (23) ,
195 genemap (24) , -- is a genetic map
196 restmap (25) , -- is an ordered restriction map
197 physmap (26) , -- is a physical map (not ordered restriction map)
198 other (255) }
199
200 GIBB-method ::= ENUMERATED { -- sequencing methods
201 concept-trans (1) , -- conceptual translation
202 seq-pept (2) , -- peptide was sequenced
203 both (3) , -- concept transl. w/ partial pept. seq.
204 seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
205 seq-pept-homol (5) , -- sequenced peptide, ordered by homology
206 concept-trans-a (6) , -- conceptual transl. supplied by author
207 other (255) }
208
209 Numbering ::= CHOICE { -- any display numbering system
210 cont Num-cont , -- continuous numbering
211 enum Num-enum , -- enumerated names for residues
212 ref Num-ref , -- by reference to another sequence
213 real Num-real } -- supports mapping to a float system
214
215 Num-cont ::= SEQUENCE { -- continuous display numbering system
216 refnum INTEGER DEFAULT 1, -- number assigned to first residue
217 has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
218 ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
219
220 Num-enum ::= SEQUENCE { -- any tags to residues
221 num INTEGER , -- number of tags to follow
222 names SEQUENCE OF VisibleString } -- the tags
223
224 Num-ref ::= SEQUENCE { -- by reference to other sequences
225 type ENUMERATED { -- type of reference
226 not-set (0) ,
227 sources (1) , -- by segmented or const seq sources
228 aligns (2) } , -- by alignments given below
229 aligns Seq-align OPTIONAL }
230
231 Num-real ::= SEQUENCE { -- mapping to floating point system
232 a REAL , -- from an integer system used by Bioseq
233 b REAL , -- position = (a * int_position) + b
234 units VisibleString OPTIONAL }
235
236 Pubdesc ::= SEQUENCE { -- how sequence presented in pub
237 pub Pub-equiv , -- the citation(s)
238 name VisibleString OPTIONAL , -- name used in paper
239 fig VisibleString OPTIONAL , -- figure in paper
240 num Numbering OPTIONAL , -- numbering from paper
241 numexc BOOLEAN OPTIONAL , -- numbering problem with paper
242 poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
243 maploc VisibleString OPTIONAL , -- map location reported in paper
244 seq-raw StringStore OPTIONAL , -- original sequence from paper
245 align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
246 comment VisibleString OPTIONAL, -- any comment on this pub in context
247 reftype INTEGER { -- type of reference in a GenBank record
248 seq (0) , -- refers to sequence
249 sites (1) , -- refers to unspecified features
250 feats (2) , -- refers to specified features
251 no-target (3) } -- nothing specified (EMBL)
252 DEFAULT seq }
253
254 Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc
255
256 --*** Instances of sequences *******************************
257 --*
258
259 Seq-inst ::= SEQUENCE { -- the sequence data itself
260 repr ENUMERATED { -- representation class
261 not-set (0) , -- empty
262 virtual (1) , -- no seq data
263 raw (2) , -- continuous sequence
264 seg (3) , -- segmented sequence
265 const (4) , -- constructed sequence
266 ref (5) , -- reference to another sequence
267 consen (6) , -- consensus sequence or pattern
268 map (7) , -- ordered map of any kind
269 delta (8) , -- sequence made by changes (delta) to others
270 other (255) } ,
271 mol ENUMERATED { -- molecule class in living organism
272 not-set (0) , -- > cdna = rna
273 dna (1) ,
274 rna (2) ,
275 aa (3) ,
276 na (4) , -- just a nucleic acid
277 other (255) } ,
278 length INTEGER OPTIONAL , -- length of sequence in residues
279 fuzz Int-fuzz OPTIONAL , -- length uncertainty
280 topology ENUMERATED { -- topology of molecule
281 not-set (0) ,
282 linear (1) ,
283 circular (2) ,
284 tandem (3) , -- some part of tandem repeat
285 other (255) } DEFAULT linear ,
286 strand ENUMERATED { -- strandedness in living organism
287 not-set (0) ,
288 ss (1) , -- single strand
289 ds (2) , -- double strand
290 mixed (3) ,
291 other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
292 seq-data Seq-data OPTIONAL , -- the sequence
293 ext Seq-ext OPTIONAL , -- extensions for special types
294 hist Seq-hist OPTIONAL } -- sequence history
295
296 --*** Sequence Extensions **********************************
297 --* for representing more complex types
298 --* const type uses Seq-hist.assembly
299
300 Seq-ext ::= CHOICE {
301 seg Seg-ext , -- segmented sequences
302 ref Ref-ext , -- hot link to another sequence (a view)
303 map Map-ext , -- ordered map of markers
304 delta Delta-ext }
305
306 Seg-ext ::= SEQUENCE OF Seq-loc
307
308 Ref-ext ::= Seq-loc
309
310 Map-ext ::= SEQUENCE OF Seq-feat
311
312 Delta-ext ::= SEQUENCE OF Delta-seq
313
314 Delta-seq ::= CHOICE {
315 loc Seq-loc , -- point to a sequence
316 literal Seq-literal } -- a piece of sequence
317
318 Seq-literal ::= SEQUENCE {
319 length INTEGER , -- must give a length in residues
320 fuzz Int-fuzz OPTIONAL , -- could be unsure
321 seq-data Seq-data OPTIONAL } -- may have the data
322
323 --*** Sequence History Record ***********************************
324 --** assembly = records how seq was assembled from others
325 --** replaces = records sequences made obsolete by this one
326 --** replaced-by = this seq is made obsolete by another(s)
327
328 Seq-hist ::= SEQUENCE {
329 assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
330 replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
331 replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
332 deleted CHOICE {
333 bool BOOLEAN ,
334 date Date } OPTIONAL }
335
336 Seq-hist-rec ::= SEQUENCE {
337 date Date OPTIONAL ,
338 ids SET OF Seq-id }
339
340 --*** Various internal sequence representations ************
341 --* all are controlled, fixed length forms
342
343 Seq-data ::= CHOICE { -- sequence representations
344 iupacna IUPACna , -- IUPAC 1 letter nuc acid code
345 iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
346 ncbi2na NCBI2na , -- 2 bit nucleic acid code
347 ncbi4na NCBI4na , -- 4 bit nucleic acid code
348 ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
349 ncbipna NCBIpna , -- nucleic acid probabilities
350 ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
351 ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
352 ncbipaa NCBIpaa , -- amino acid probabilities
353 ncbistdaa NCBIstdaa, -- consecutive codes for std aas
354 gap Seq-gap -- gap types
355 }
356
357 Seq-gap ::= SEQUENCE {
358 type INTEGER {
359 unknown(0),
360 fragment(1),
361 clone(2),
362 short-arm(3),
363 heterochromatin(4),
364 centromere(5),
365 telomere(6),
366 repeat(7),
367 contig(8),
368 other(255)
369 },
370 linkage INTEGER {
371 unlinked(0),
372 linked(1),
373 other(255)
374 } OPTIONAL
375 }
376
377 IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
378 IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
379 NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
380 NCBI4na ::= OCTET STRING -- 1 bit each for agct
381 -- 0001=A, 0010=C, 0100=G, 1000=T/U
382 -- 0101=Purine, 1010=Pyrimidine, etc
383 NCBI8na ::= OCTET STRING -- for modified nucleic acids
384 NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
385 -- probabilities are coded 0-255 = 0.0-1.0
386 NCBI8aa ::= OCTET STRING -- for modified amino acids
387 NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
388 -- IUPAC codes + U=selenocysteine
389 NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
390 -- A-Y,B,Z,X,(ter),anything
391 -- probabilities are coded 0-255 = 0.0-1.0
392 NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
393
394 --*** Sequence Annotation *************************************
395 --*
396
397 -- This is a replica of Textseq-id
398 -- This is specific for annotations, and exists to maintain a semantic
399 -- difference between IDs assigned to annotations and IDs assigned to
400 -- sequences
401 Textannot-id ::= SEQUENCE {
402 name VisibleString OPTIONAL ,
403 accession VisibleString OPTIONAL ,
404 release VisibleString OPTIONAL ,
405 version INTEGER OPTIONAL
406 }
407
408 Annot-id ::= CHOICE {
409 local Object-id ,
410 ncbi INTEGER ,
411 general Dbtag,
412 other Textannot-id
413 }
414
415 Annot-descr ::= SET OF Annotdesc
416
417 Annotdesc ::= CHOICE {
418 name VisibleString , -- a short name for this collection
419 title VisibleString , -- a title for this collection
420 comment VisibleString , -- a more extensive comment
421 pub Pubdesc , -- a reference to the publication
422 user User-object , -- user defined object
423 create-date Date , -- date entry first created/released
424 update-date Date , -- date of last update
425 src Seq-id , -- source sequence from which annot came
426 align Align-def, -- definition of the SeqAligns
427 region Seq-loc } -- all contents cover this region
428
429 Align-def ::= SEQUENCE {
430 align-type INTEGER { -- class of align Seq-annot
431 ref (1) , -- set of alignments to the same sequence
432 alt (2) , -- set of alternate alignments of the same seqs
433 blocks (3) , -- set of aligned blocks in the same seqs
434 other (255) } ,
435 ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
436
437 Seq-annot ::= SEQUENCE {
438 id SET OF Annot-id OPTIONAL ,
439 db INTEGER { -- source of annotation
440 genbank (1) ,
441 embl (2) ,
442 ddbj (3) ,
443 pir (4) ,
444 sp (5) ,
445 bbone (6) ,
446 pdb (7) ,
447 other (255) } OPTIONAL ,
448 name VisibleString OPTIONAL ,-- source if "other" above
449 desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
450 data CHOICE {
451 ftable SET OF Seq-feat ,
452 align SET OF Seq-align ,
453 graph SET OF Seq-graph ,
454 ids SET OF Seq-id , -- used for communication between tools
455 locs SET OF Seq-loc , -- used for communication between tools
456 seq-table Seq-table } } -- features in table form
457
458 END
459
460
|
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |