NCBI C Toolkit Cross Reference

C/asn/seq.asn


  1 --$Revision: 6.20 $
  2 --**********************************************************************
  3 --
  4 --  NCBI Sequence elements
  5 --  by James Ostell, 1990
  6 --  Version 3.0 - June 1994
  7 --
  8 --**********************************************************************
  9 
 10 NCBI-Sequence DEFINITIONS ::=
 11 BEGIN
 12 
 13 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
 14         Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
 15         Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext;
 16 
 17 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
 18         Seq-align FROM NCBI-Seqalign
 19         Seq-feat FROM NCBI-Seqfeat
 20         Seq-graph FROM NCBI-Seqres
 21         Pub-equiv FROM NCBI-Pub
 22         Org-ref FROM NCBI-Organism
 23         BioSource FROM NCBI-BioSource
 24         Seq-id, Seq-loc FROM NCBI-Seqloc
 25         GB-block FROM GenBank-General
 26         PIR-block FROM PIR-General
 27         EMBL-block FROM EMBL-General
 28         SP-block FROM SP-General
 29         PRF-block FROM PRF-General
 30         PDB-block FROM PDB-General
 31         Seq-table FROM NCBI-SeqTable;
 32 
 33 --*** Sequence ********************************
 34 --*
 35 
 36 Bioseq ::= SEQUENCE {
 37     id SET OF Seq-id ,            -- equivalent identifiers
 38     descr Seq-descr OPTIONAL , -- descriptors
 39     inst Seq-inst ,            -- the sequence data
 40     annot SET OF Seq-annot OPTIONAL }
 41 
 42 --*** Descriptors *****************************
 43 --*
 44 
 45 Seq-descr ::= SET OF Seqdesc
 46 
 47 Seqdesc ::= CHOICE {
 48     mol-type GIBB-mol ,          -- type of molecule
 49     modif SET OF GIBB-mod ,             -- modifiers
 50     method GIBB-method ,         -- sequencing method
 51     name VisibleString ,         -- a name for this sequence
 52     title VisibleString ,        -- a title for this sequence
 53     org Org-ref ,                -- if all from one organism
 54     comment VisibleString ,      -- a more extensive comment
 55     num Numbering ,              -- a numbering system
 56     maploc Dbtag ,               -- map location of this sequence
 57     pir PIR-block ,              -- PIR specific info
 58     genbank GB-block ,           -- GenBank specific info
 59     pub Pubdesc ,                -- a reference to the publication
 60     region VisibleString ,       -- overall region (globin locus)
 61     user User-object ,           -- user defined object
 62     sp SP-block ,                -- SWISSPROT specific info
 63     dbxref Dbtag ,               -- xref to other databases
 64     embl EMBL-block ,            -- EMBL specific information
 65     create-date Date ,           -- date entry first created/released
 66     update-date Date ,           -- date of last update
 67     prf PRF-block ,              -- PRF specific information
 68     pdb PDB-block ,              -- PDB specific information
 69     het Heterogen ,              -- cofactor, etc associated but not bound
 70     source BioSource ,           -- source of materials, includes Org-ref
 71     molinfo MolInfo }            -- info on the molecule and techniques
 72 
 73 --******* NOTE:
 74 --*       mol-type, modif, method, and org are consolidated and expanded
 75 --*       in Org-ref, BioSource, and MolInfo in this specification. They
 76 --*       will be removed in later specifications. Do not use them in the
 77 --*       the future. Instead expect the new structures.
 78 --*
 79 --***************************
 80 
 81 --********************************************************************
 82 --
 83 -- MolInfo gives information on the
 84 -- classification of the type and quality of the sequence
 85 --
 86 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
 87 --
 88 --********************************************************************
 89 
 90 MolInfo ::= SEQUENCE {
 91     biomol INTEGER {
 92         unknown (0) ,
 93         genomic (1) ,
 94         pre-RNA (2) ,              -- precursor RNA of any sort really 
 95         mRNA (3) ,
 96         rRNA (4) ,
 97         tRNA (5) ,
 98         snRNA (6) ,
 99         scRNA (7) ,
100         peptide (8) ,
101         other-genetic (9) ,      -- other genetic material
102         genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
103         cRNA (11) ,              -- viral RNA genome copy intermediate
104         snoRNA (12) ,            -- small nucleolar RNA
105         transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
106         ncRNA (14) ,
107         tmRNA (15) ,
108         other (255) } DEFAULT unknown ,
109     tech INTEGER {
110         unknown (0) ,
111         standard (1) ,          -- standard sequencing
112         est (2) ,               -- Expressed Sequence Tag
113         sts (3) ,               -- Sequence Tagged Site
114         survey (4) ,            -- one-pass genomic sequence
115         genemap (5) ,           -- from genetic mapping techniques
116         physmap (6) ,           -- from physical mapping techniques
117         derived (7) ,           -- derived from other data, not a primary entity
118         concept-trans (8) ,     -- conceptual translation
119         seq-pept (9) ,          -- peptide was sequenced
120         both (10) ,             -- concept transl. w/ partial pept. seq.
121         seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
122         seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
123         concept-trans-a (13) ,  -- conceptual transl. supplied by author
124         htgs-1 (14) ,           -- unordered High Throughput sequence contig
125         htgs-2 (15) ,           -- ordered High Throughput sequence contig
126         htgs-3 (16) ,           -- finished High Throughput sequence
127         fli-cdna (17) ,         -- full length insert cDNA
128         htgs-0 (18) ,           -- single genomic reads for coordination
129         htc (19) ,              -- high throughput cDNA
130         wgs (20) ,              -- whole genome shotgun sequencing
131         barcode (21) ,          -- barcode of life project
132         composite-wgs-htgs (22) , -- composite of WGS and HTGS
133         tsa (23) ,              -- transcriptome shotgun assembly
134         other (255) }           -- use Source.techexp
135                DEFAULT unknown ,
136     techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
137     --
138     -- Completeness is not indicated in most records.  For genomes, assume
139     -- the sequences are incomplete unless specifically marked as complete.
140     -- For mRNAs, assume the ends are not known exactly unless marked as
141     -- having the left or right end.
142     --
143     completeness INTEGER {
144       unknown (0) ,
145       complete (1) ,                   -- complete biological entity
146       partial (2) ,                    -- partial but no details given
147       no-left (3) ,                    -- missing 5' or NH3 end
148       no-right (4) ,                   -- missing 3' or COOH end
149       no-ends (5) ,                    -- missing both ends
150       has-left (6) ,                   -- 5' or NH3 end present
151       has-right (7) ,                  -- 3' or COOH end present
152       other (255) } DEFAULT unknown ,
153     gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
154 
155 
156 GIBB-mol ::= ENUMERATED {       -- type of molecule represented
157     unknown (0) ,
158     genomic (1) ,
159     pre-mRNA (2) ,              -- precursor RNA of any sort really 
160     mRNA (3) ,
161     rRNA (4) ,
162     tRNA (5) ,
163     snRNA (6) ,
164     scRNA (7) ,
165     peptide (8) ,
166     other-genetic (9) ,      -- other genetic material
167     genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
168     other (255) }
169     
170 GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
171     dna (0) ,
172     rna (1) ,
173     extrachrom (2) ,
174     plasmid (3) ,
175     mitochondrial (4) ,
176     chloroplast (5) ,
177     kinetoplast (6) ,
178     cyanelle (7) ,
179     synthetic (8) ,
180     recombinant (9) ,
181     partial (10) ,
182     complete (11) ,
183     mutagen (12) ,    -- subject of mutagenesis ?
184     natmut (13) ,     -- natural mutant ?
185     transposon (14) ,
186     insertion-seq (15) ,
187     no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
188     no-right (17) ,   -- missing right end (3' or COOH)
189     macronuclear (18) ,
190     proviral (19) ,
191     est (20) ,        -- expressed sequence tag
192     sts (21) ,        -- sequence tagged site
193     survey (22) ,     -- one pass survey sequence
194     chromoplast (23) ,
195     genemap (24) ,    -- is a genetic map
196     restmap (25) ,    -- is an ordered restriction map
197     physmap (26) ,    -- is a physical map (not ordered restriction map)
198     other (255) }
199 
200 GIBB-method ::= ENUMERATED {        -- sequencing methods
201     concept-trans (1) ,    -- conceptual translation
202     seq-pept (2) ,         -- peptide was sequenced
203     both (3) ,             -- concept transl. w/ partial pept. seq.
204     seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
205     seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
206     concept-trans-a (6) ,  -- conceptual transl. supplied by author
207     other (255) }
208     
209 Numbering ::= CHOICE {           -- any display numbering system
210     cont Num-cont ,              -- continuous numbering
211     enum Num-enum ,              -- enumerated names for residues
212     ref Num-ref ,                -- by reference to another sequence
213     real Num-real }              -- supports mapping to a float system
214     
215 Num-cont ::= SEQUENCE {          -- continuous display numbering system
216     refnum INTEGER DEFAULT 1,         -- number assigned to first residue
217     has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
218     ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?
219 
220 Num-enum ::= SEQUENCE {          -- any tags to residues
221     num INTEGER ,                        -- number of tags to follow
222     names SEQUENCE OF VisibleString }    -- the tags
223 
224 Num-ref ::= SEQUENCE {           -- by reference to other sequences
225     type ENUMERATED {            -- type of reference
226         not-set (0) ,
227         sources (1) ,            -- by segmented or const seq sources
228         aligns (2) } ,           -- by alignments given below
229     aligns Seq-align OPTIONAL }
230 
231 Num-real ::= SEQUENCE {          -- mapping to floating point system
232     a REAL ,                     -- from an integer system used by Bioseq
233     b REAL ,                     -- position = (a * int_position) + b
234     units VisibleString OPTIONAL }
235 
236 Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
237     pub Pub-equiv ,                 -- the citation(s)
238     name VisibleString OPTIONAL ,   -- name used in paper
239     fig VisibleString OPTIONAL ,    -- figure in paper
240     num Numbering OPTIONAL ,        -- numbering from paper
241     numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
242     poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
243     maploc VisibleString OPTIONAL , -- map location reported in paper
244     seq-raw StringStore OPTIONAL ,  -- original sequence from paper
245     align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
246     comment VisibleString OPTIONAL, -- any comment on this pub in context
247     reftype INTEGER {           -- type of reference in a GenBank record
248         seq (0) ,               -- refers to sequence
249         sites (1) ,             -- refers to unspecified features
250         feats (2) ,             -- refers to specified features
251         no-target (3) }         -- nothing specified (EMBL)
252         DEFAULT seq }
253 
254 Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc
255 
256 --*** Instances of sequences *******************************
257 --*
258 
259 Seq-inst ::= SEQUENCE {            -- the sequence data itself
260     repr ENUMERATED {              -- representation class
261         not-set (0) ,              -- empty
262         virtual (1) ,              -- no seq data
263         raw (2) ,                  -- continuous sequence
264         seg (3) ,                  -- segmented sequence
265         const (4) ,                -- constructed sequence
266         ref (5) ,                  -- reference to another sequence
267         consen (6) ,               -- consensus sequence or pattern
268         map (7) ,                  -- ordered map of any kind
269         delta (8) ,              -- sequence made by changes (delta) to others
270         other (255) } ,
271     mol ENUMERATED {               -- molecule class in living organism
272         not-set (0) ,              --   > cdna = rna
273         dna (1) ,
274         rna (2) ,
275         aa (3) ,
276         na (4) ,                   -- just a nucleic acid
277         other (255) } ,
278     length INTEGER OPTIONAL ,      -- length of sequence in residues
279     fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
280     topology ENUMERATED {          -- topology of molecule
281         not-set (0) ,
282         linear (1) ,
283         circular (2) ,
284         tandem (3) ,               -- some part of tandem repeat
285         other (255) } DEFAULT linear ,
286     strand ENUMERATED {            -- strandedness in living organism
287         not-set (0) ,
288         ss (1) ,                   -- single strand
289         ds (2) ,                   -- double strand
290         mixed (3) ,
291         other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
292     seq-data Seq-data OPTIONAL ,   -- the sequence
293     ext Seq-ext OPTIONAL ,         -- extensions for special types
294     hist Seq-hist OPTIONAL }       -- sequence history
295 
296 --*** Sequence Extensions **********************************
297 --*  for representing more complex types
298 --*  const type uses Seq-hist.assembly
299 
300 Seq-ext ::= CHOICE {
301     seg Seg-ext ,        -- segmented sequences
302     ref Ref-ext ,        -- hot link to another sequence (a view)
303     map Map-ext ,        -- ordered map of markers
304     delta Delta-ext }
305 
306 Seg-ext ::= SEQUENCE OF Seq-loc
307 
308 Ref-ext ::= Seq-loc
309 
310 Map-ext ::= SEQUENCE OF Seq-feat
311 
312 Delta-ext ::= SEQUENCE OF Delta-seq
313 
314 Delta-seq ::= CHOICE {
315     loc Seq-loc ,       -- point to a sequence
316     literal Seq-literal }   -- a piece of sequence
317 
318 Seq-literal ::= SEQUENCE {
319     length INTEGER ,         -- must give a length in residues
320     fuzz Int-fuzz OPTIONAL , -- could be unsure
321     seq-data Seq-data OPTIONAL } -- may have the data
322 
323 --*** Sequence History Record ***********************************
324 --** assembly = records how seq was assembled from others
325 --** replaces = records sequences made obsolete by this one
326 --** replaced-by = this seq is made obsolete by another(s)
327 
328 Seq-hist ::= SEQUENCE {
329     assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
330     replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
331     replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
332     deleted CHOICE {
333         bool BOOLEAN ,
334         date Date } OPTIONAL }
335 
336 Seq-hist-rec ::= SEQUENCE {
337     date Date OPTIONAL ,
338     ids SET OF Seq-id }
339     
340 --*** Various internal sequence representations ************
341 --*      all are controlled, fixed length forms
342 
343 Seq-data ::= CHOICE {              -- sequence representations
344     iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
345     iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
346     ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
347     ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
348     ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
349     ncbipna NCBIpna ,              -- nucleic acid probabilities
350     ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
351     ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
352     ncbipaa NCBIpaa ,              -- amino acid probabilities
353     ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
354     gap Seq-gap                    -- gap types
355 }
356 
357 Seq-gap ::= SEQUENCE {
358     type INTEGER {
359         unknown(0),
360         fragment(1),
361         clone(2),
362         short-arm(3),
363         heterochromatin(4),
364         centromere(5),
365         telomere(6),
366         repeat(7),
367         contig(8),
368         other(255)
369     },
370     linkage INTEGER {
371         unlinked(0),
372         linked(1),
373         other(255)
374     } OPTIONAL
375 }
376 
377 IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
378 IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
379 NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
380 NCBI4na ::= OCTET STRING      -- 1 bit each for agct
381                               -- 0001=A, 0010=C, 0100=G, 1000=T/U
382                               -- 0101=Purine, 1010=Pyrimidine, etc
383 NCBI8na ::= OCTET STRING      -- for modified nucleic acids
384 NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
385                               -- probabilities are coded 0-255 = 0.0-1.0
386 NCBI8aa ::= OCTET STRING      -- for modified amino acids
387 NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
388                               -- IUPAC codes + U=selenocysteine
389 NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
390                               -- A-Y,B,Z,X,(ter),anything
391                               -- probabilities are coded 0-255 = 0.0-1.0
392 NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte
393 
394 --*** Sequence Annotation *************************************
395 --*
396 
397 -- This is a replica of Textseq-id
398 -- This is specific for annotations, and exists to maintain a semantic
399 -- difference between IDs assigned to annotations and IDs assigned to
400 -- sequences
401 Textannot-id ::= SEQUENCE {
402     name          VisibleString OPTIONAL ,
403     accession VisibleString OPTIONAL ,
404     release   VisibleString OPTIONAL ,
405     version   INTEGER       OPTIONAL
406 }
407 
408 Annot-id ::= CHOICE {
409     local Object-id ,
410     ncbi INTEGER ,
411     general Dbtag,
412     other Textannot-id
413 }
414     
415 Annot-descr ::= SET OF Annotdesc
416 
417 Annotdesc ::= CHOICE {
418     name VisibleString ,         -- a short name for this collection
419     title VisibleString ,        -- a title for this collection
420     comment VisibleString ,      -- a more extensive comment
421     pub Pubdesc ,                -- a reference to the publication
422     user User-object ,           -- user defined object
423     create-date Date ,           -- date entry first created/released
424     update-date Date ,           -- date of last update
425     src Seq-id ,                 -- source sequence from which annot came
426     align Align-def,             -- definition of the SeqAligns
427     region Seq-loc }             -- all contents cover this region
428 
429 Align-def ::= SEQUENCE {
430     align-type INTEGER {         -- class of align Seq-annot
431       ref (1) ,                  -- set of alignments to the same sequence
432       alt (2) ,                  -- set of alternate alignments of the same seqs
433       blocks (3) ,               -- set of aligned blocks in the same seqs
434       other (255) } ,
435     ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
436 
437 Seq-annot ::= SEQUENCE {
438     id SET OF Annot-id OPTIONAL ,
439     db INTEGER {                 -- source of annotation
440         genbank (1) ,
441         embl (2) ,
442         ddbj (3) ,
443         pir  (4) ,
444         sp   (5) ,
445         bbone (6) ,
446         pdb   (7) ,
447         other (255) } OPTIONAL ,
448     name VisibleString OPTIONAL ,-- source if "other" above
449     desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
450     data CHOICE {
451         ftable SET OF Seq-feat ,
452         align SET OF Seq-align ,
453         graph SET OF Seq-graph ,
454         ids SET OF Seq-id ,      -- used for communication between tools
455         locs SET OF Seq-loc ,    -- used for communication between tools
456         seq-table Seq-table } }  -- features in table form
457 
458 END
459 
460 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.