NCBI C Toolkit Cross Reference

C/asn/seqalign.asn


  1 --$Revision: 6.4 $
  2 --**********************************************************************
  3 --
  4 --  NCBI Sequence Alignment elements
  5 --  by James Ostell, 1990
  6 --
  7 --**********************************************************************
  8 
  9 NCBI-Seqalign DEFINITIONS ::=
 10 BEGIN
 11 
 12 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
 13 
 14 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
 15         User-object, Object-id FROM NCBI-General;
 16 
 17 --*** Sequence Alignment ********************************
 18 --*
 19 
 20 Seq-align-set ::= SET OF Seq-align
 21 
 22 Seq-align ::= SEQUENCE {
 23     type ENUMERATED {
 24         not-set (0) ,
 25         global (1) ,
 26         diags (2) ,     -- unbroken, but not ordered, diagonals
 27         partial (3) ,   -- mapping pieces together
 28         disc (4) ,      -- discontinuous alignment
 29         other (255) } ,
 30     dim INTEGER OPTIONAL ,     -- dimensionality
 31     score SET OF Score OPTIONAL ,   -- for whole alignment
 32     segs CHOICE {                   -- alignment data
 33         dendiag SEQUENCE OF Dense-diag ,
 34         denseg              Dense-seg ,
 35         std     SEQUENCE OF Std-seg ,
 36         packed              Packed-seg ,
 37         disc                Seq-align-set,
 38         spliced             Spliced-seg,
 39         sparse              Sparse-seg
 40     } ,
 41     
 42     -- regions of sequence over which align
 43     --  was computed
 44     bounds SET OF Seq-loc OPTIONAL,
 45 
 46     -- alignment id
 47     id SEQUENCE OF Object-id OPTIONAL,
 48 
 49     --extra info
 50     ext SEQUENCE OF User-object OPTIONAL
 51 }
 52 
 53 Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
 54     dim INTEGER DEFAULT 2 ,    -- dimensionality
 55     ids SEQUENCE OF Seq-id ,   -- sequences in order
 56     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
 57     len INTEGER ,                 -- len of aligned segments
 58     strands SEQUENCE OF Na-strand OPTIONAL ,
 59     scores SET OF Score OPTIONAL }
 60 
 61     -- Dense-seg: the densist packing for sequence alignments only.
 62     --            a start of -1 indicates a gap for that sequence of
 63     --            length lens.
 64     --
 65     -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
 66     -- id=200  AAGGCCTTTTAG.......GATGATGATGA
 67     -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
 68     --
 69     -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
 70     -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
 71     -- lens = { 4, 8, 7, 3, 4, 4 }
 72     --
 73 
 74 Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
 75     dim INTEGER DEFAULT 2 ,       -- dimensionality
 76     numseg INTEGER ,              -- number of segments here
 77     ids SEQUENCE OF Seq-id ,      -- sequences in order
 78     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
 79     lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
 80     strands SEQUENCE OF Na-strand OPTIONAL ,
 81     scores SEQUENCE OF Score OPTIONAL }  -- score for each seg
 82 
 83 Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
 84     dim INTEGER DEFAULT 2 ,       -- dimensionality
 85     numseg INTEGER ,              -- number of segments here
 86     ids SEQUENCE OF Seq-id ,      -- sequences in order
 87     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
 88     present OCTET STRING ,        -- Boolean if each sequence present or absent in
 89                                   --   each segment
 90     lens SEQUENCE OF INTEGER ,    -- length of each segment
 91     strands SEQUENCE OF Na-strand OPTIONAL ,
 92     scores SEQUENCE OF Score OPTIONAL }  -- score for each segment
 93 
 94 Std-seg ::= SEQUENCE {
 95     dim INTEGER DEFAULT 2 ,       -- dimensionality
 96     ids SEQUENCE OF Seq-id OPTIONAL ,
 97     loc SEQUENCE OF Seq-loc ,
 98     scores SET OF Score OPTIONAL }
 99 
100 
101 Spliced-seg ::= SEQUENCE {
102     -- product is either protein or transcript (cDNA)
103     product-id Seq-id OPTIONAL,
104     genomic-id Seq-id OPTIONAL,
105 
106     -- should be 'plus' or 'minus'
107     product-strand Na-strand OPTIONAL ,
108     genomic-strand Na-strand OPTIONAL ,
109     
110     product-type ENUMERATED {
111         transcript(0),
112         protein(1)
113     },
114 
115     -- set of segments involved
116     -- each segment corresponds to one exon
117     -- exons are always in biological order
118     exons SEQUENCE OF Spliced-exon ,
119 
120     -- optional poly(A) tail
121     poly-a INTEGER OPTIONAL,
122 
123     -- length of the product, in bases/residues
124     -- from this, a 3' unaligned length can be extracted; this also captures
125     -- the case in which a protein aligns leaving a partial codon alignment
126     -- at the 3' end
127     product-length INTEGER OPTIONAL,
128 
129     -- alignment descriptors / modifiers
130     -- this provides us a set for extension
131     modifiers SET OF Spliced-seg-modifier OPTIONAL
132 }
133 
134 Spliced-seg-modifier ::= CHOICE {
135     -- protein aligns from the start and the first codon 
136     -- on both product and genomic is start codon
137     start-codon-found BOOLEAN,
138     
139     -- protein aligns to it's end and there is stop codon 
140     -- on the genomic right after the alignment
141     stop-codon-found BOOLEAN
142 }
143 
144 
145 -- complete or partial exon
146 -- two consecutive Spliced-exons may belong to one exon
147 Spliced-exon ::= SEQUENCE {
148     -- product-end >= product-start
149     product-start Product-pos ,
150     product-end Product-pos ,
151 
152     -- genomic-end >= genomic-start
153     genomic-start INTEGER ,
154     genomic-end INTEGER ,
155 
156     -- product is either protein or transcript (cDNA)
157     product-id Seq-id OPTIONAL ,
158     genomic-id Seq-id OPTIONAL ,
159 
160     -- should be 'plus' or 'minus'
161     product-strand Na-strand OPTIONAL ,
162     
163     -- genomic-strand represents the strand of translation
164     genomic-strand Na-strand OPTIONAL ,
165 
166     -- basic seqments always are in biologic order
167     parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
168 
169     -- scores for this exon
170     scores Score-set OPTIONAL ,
171 
172     -- splice sites
173     acceptor-before-exon Splice-site OPTIONAL,
174     donor-after-exon Splice-site OPTIONAL,
175     
176     -- flag: is this exon complete or partial?
177     partial BOOLEAN OPTIONAL,
178 
179     --extra info
180     ext SEQUENCE OF User-object OPTIONAL
181 }
182 
183 
184 Product-pos ::= CHOICE {
185     nucpos INTEGER,
186     protpos Prot-pos
187 }
188 
189 
190 -- codon based position on protein (1/3 of aminoacid)
191 Prot-pos ::= SEQUENCE {
192     -- standard protein position
193     amin INTEGER ,
194 
195     -- 0, 1, 2, or 3 as for Cdregion
196     -- 0 = not set
197     -- 1, 2, 3 = actual frame
198     frame INTEGER DEFAULT 0
199 }
200 
201 
202 -- Spliced-exon-chunk: piece of an exon
203 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
204 -- protein)
205 Spliced-exon-chunk ::= CHOICE {
206     -- both sequences represented, product and genomic sequences match
207     match INTEGER ,
208 
209     -- both sequences represented, product and genomic sequences do not match
210     mismatch INTEGER ,
211 
212     -- both sequences are represented, there is sufficient similarity 
213     -- between product and genomic sequences. Can be used to replace stretches
214     -- of matches and mismatches, mostly for protein to genomic where 
215     -- definition of match or mismatch depends on translation table
216     diag INTEGER ,
217 
218      -- insertion in product sequence (i.e. gap in the genomic sequence)
219     product-ins INTEGER ,
220 
221      -- insertion in genomic sequence (i.e. gap in the product sequence)
222     genomic-ins INTEGER
223 }
224 
225 
226 -- site involved in splice
227 Splice-site ::= SEQUENCE {
228     -- typically two bases in the intronic region, always
229     -- in IUPAC format
230     bases VisibleString
231 }
232 
233 
234 -- ==========================================================================
235 --
236 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
237 -- representing sparse multiple alignments
238 --
239 -- ==========================================================================
240 
241 
242 Sparse-seg ::= SEQUENCE {
243     master-id Seq-id OPTIONAL,
244 
245     -- pairwise alignments constituting this multiple alignment
246     rows SET OF Sparse-align,
247 
248     -- per-row scores
249     row-scores SET OF Score OPTIONAL,
250 
251     -- index of extra items
252     ext  SET OF Sparse-seg-ext OPTIONAL
253 }
254 
255 Sparse-align ::= SEQUENCE {
256     first-id Seq-id,
257     second-id Seq-id,
258 
259     numseg INTEGER,                      --number of segments
260     first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
261     second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
262     lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
263     second-strands SEQUENCE OF Na-strand OPTIONAL ,
264 
265     -- per-segment scores
266     seg-scores SET OF Score OPTIONAL
267 }
268 
269 Sparse-seg-ext ::= SEQUENCE {
270     --seg-ext SET OF {
271     --    index INTEGER,
272     --    data User-field
273     -- }
274     index INTEGER
275 }
276 
277 
278 
279 -- use of Score is discouraged for external ASN.1 specifications
280 Score ::= SEQUENCE {
281     id Object-id OPTIONAL ,
282     value CHOICE {
283         real REAL ,
284         int INTEGER
285     }
286 }
287 
288 -- use of Score-set is encouraged for external ASN.1 specifications
289 Score-set ::= SET OF Score
290 
291 END 
292 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.