|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/asn/seqalign.asn |
source navigation diff markup identifier search freetext search file search |
1 --$Revision: 6.4 $
2 --**********************************************************************
3 --
4 -- NCBI Sequence Alignment elements
5 -- by James Ostell, 1990
6 --
7 --**********************************************************************
8
9 NCBI-Seqalign DEFINITIONS ::=
10 BEGIN
11
12 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
13
14 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
15 User-object, Object-id FROM NCBI-General;
16
17 --*** Sequence Alignment ********************************
18 --*
19
20 Seq-align-set ::= SET OF Seq-align
21
22 Seq-align ::= SEQUENCE {
23 type ENUMERATED {
24 not-set (0) ,
25 global (1) ,
26 diags (2) , -- unbroken, but not ordered, diagonals
27 partial (3) , -- mapping pieces together
28 disc (4) , -- discontinuous alignment
29 other (255) } ,
30 dim INTEGER OPTIONAL , -- dimensionality
31 score SET OF Score OPTIONAL , -- for whole alignment
32 segs CHOICE { -- alignment data
33 dendiag SEQUENCE OF Dense-diag ,
34 denseg Dense-seg ,
35 std SEQUENCE OF Std-seg ,
36 packed Packed-seg ,
37 disc Seq-align-set,
38 spliced Spliced-seg,
39 sparse Sparse-seg
40 } ,
41
42 -- regions of sequence over which align
43 -- was computed
44 bounds SET OF Seq-loc OPTIONAL,
45
46 -- alignment id
47 id SEQUENCE OF Object-id OPTIONAL,
48
49 --extra info
50 ext SEQUENCE OF User-object OPTIONAL
51 }
52
53 Dense-diag ::= SEQUENCE { -- for (multiway) diagonals
54 dim INTEGER DEFAULT 2 , -- dimensionality
55 ids SEQUENCE OF Seq-id , -- sequences in order
56 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order
57 len INTEGER , -- len of aligned segments
58 strands SEQUENCE OF Na-strand OPTIONAL ,
59 scores SET OF Score OPTIONAL }
60
61 -- Dense-seg: the densist packing for sequence alignments only.
62 -- a start of -1 indicates a gap for that sequence of
63 -- length lens.
64 --
65 -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA
66 -- id=200 AAGGCCTTTTAG.......GATGATGATGA
67 -- id=300 ....CCTTTTAGAGATGATGAT....ATGA
68 --
69 -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
70 -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
71 -- lens = { 4, 8, 7, 3, 4, 4 }
72 --
73
74 Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
75 dim INTEGER DEFAULT 2 , -- dimensionality
76 numseg INTEGER , -- number of segments here
77 ids SEQUENCE OF Seq-id , -- sequences in order
78 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs
79 lens SEQUENCE OF INTEGER , -- lengths in ids order within segs
80 strands SEQUENCE OF Na-strand OPTIONAL ,
81 scores SEQUENCE OF Score OPTIONAL } -- score for each seg
82
83 Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
84 dim INTEGER DEFAULT 2 , -- dimensionality
85 numseg INTEGER , -- number of segments here
86 ids SEQUENCE OF Seq-id , -- sequences in order
87 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment
88 present OCTET STRING , -- Boolean if each sequence present or absent in
89 -- each segment
90 lens SEQUENCE OF INTEGER , -- length of each segment
91 strands SEQUENCE OF Na-strand OPTIONAL ,
92 scores SEQUENCE OF Score OPTIONAL } -- score for each segment
93
94 Std-seg ::= SEQUENCE {
95 dim INTEGER DEFAULT 2 , -- dimensionality
96 ids SEQUENCE OF Seq-id OPTIONAL ,
97 loc SEQUENCE OF Seq-loc ,
98 scores SET OF Score OPTIONAL }
99
100
101 Spliced-seg ::= SEQUENCE {
102 -- product is either protein or transcript (cDNA)
103 product-id Seq-id OPTIONAL,
104 genomic-id Seq-id OPTIONAL,
105
106 -- should be 'plus' or 'minus'
107 product-strand Na-strand OPTIONAL ,
108 genomic-strand Na-strand OPTIONAL ,
109
110 product-type ENUMERATED {
111 transcript(0),
112 protein(1)
113 },
114
115 -- set of segments involved
116 -- each segment corresponds to one exon
117 -- exons are always in biological order
118 exons SEQUENCE OF Spliced-exon ,
119
120 -- optional poly(A) tail
121 poly-a INTEGER OPTIONAL,
122
123 -- length of the product, in bases/residues
124 -- from this, a 3' unaligned length can be extracted; this also captures
125 -- the case in which a protein aligns leaving a partial codon alignment
126 -- at the 3' end
127 product-length INTEGER OPTIONAL,
128
129 -- alignment descriptors / modifiers
130 -- this provides us a set for extension
131 modifiers SET OF Spliced-seg-modifier OPTIONAL
132 }
133
134 Spliced-seg-modifier ::= CHOICE {
135 -- protein aligns from the start and the first codon
136 -- on both product and genomic is start codon
137 start-codon-found BOOLEAN,
138
139 -- protein aligns to it's end and there is stop codon
140 -- on the genomic right after the alignment
141 stop-codon-found BOOLEAN
142 }
143
144
145 -- complete or partial exon
146 -- two consecutive Spliced-exons may belong to one exon
147 Spliced-exon ::= SEQUENCE {
148 -- product-end >= product-start
149 product-start Product-pos ,
150 product-end Product-pos ,
151
152 -- genomic-end >= genomic-start
153 genomic-start INTEGER ,
154 genomic-end INTEGER ,
155
156 -- product is either protein or transcript (cDNA)
157 product-id Seq-id OPTIONAL ,
158 genomic-id Seq-id OPTIONAL ,
159
160 -- should be 'plus' or 'minus'
161 product-strand Na-strand OPTIONAL ,
162
163 -- genomic-strand represents the strand of translation
164 genomic-strand Na-strand OPTIONAL ,
165
166 -- basic seqments always are in biologic order
167 parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
168
169 -- scores for this exon
170 scores Score-set OPTIONAL ,
171
172 -- splice sites
173 acceptor-before-exon Splice-site OPTIONAL,
174 donor-after-exon Splice-site OPTIONAL,
175
176 -- flag: is this exon complete or partial?
177 partial BOOLEAN OPTIONAL,
178
179 --extra info
180 ext SEQUENCE OF User-object OPTIONAL
181 }
182
183
184 Product-pos ::= CHOICE {
185 nucpos INTEGER,
186 protpos Prot-pos
187 }
188
189
190 -- codon based position on protein (1/3 of aminoacid)
191 Prot-pos ::= SEQUENCE {
192 -- standard protein position
193 amin INTEGER ,
194
195 -- 0, 1, 2, or 3 as for Cdregion
196 -- 0 = not set
197 -- 1, 2, 3 = actual frame
198 frame INTEGER DEFAULT 0
199 }
200
201
202 -- Spliced-exon-chunk: piece of an exon
203 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
204 -- protein)
205 Spliced-exon-chunk ::= CHOICE {
206 -- both sequences represented, product and genomic sequences match
207 match INTEGER ,
208
209 -- both sequences represented, product and genomic sequences do not match
210 mismatch INTEGER ,
211
212 -- both sequences are represented, there is sufficient similarity
213 -- between product and genomic sequences. Can be used to replace stretches
214 -- of matches and mismatches, mostly for protein to genomic where
215 -- definition of match or mismatch depends on translation table
216 diag INTEGER ,
217
218 -- insertion in product sequence (i.e. gap in the genomic sequence)
219 product-ins INTEGER ,
220
221 -- insertion in genomic sequence (i.e. gap in the product sequence)
222 genomic-ins INTEGER
223 }
224
225
226 -- site involved in splice
227 Splice-site ::= SEQUENCE {
228 -- typically two bases in the intronic region, always
229 -- in IUPAC format
230 bases VisibleString
231 }
232
233
234 -- ==========================================================================
235 --
236 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
237 -- representing sparse multiple alignments
238 --
239 -- ==========================================================================
240
241
242 Sparse-seg ::= SEQUENCE {
243 master-id Seq-id OPTIONAL,
244
245 -- pairwise alignments constituting this multiple alignment
246 rows SET OF Sparse-align,
247
248 -- per-row scores
249 row-scores SET OF Score OPTIONAL,
250
251 -- index of extra items
252 ext SET OF Sparse-seg-ext OPTIONAL
253 }
254
255 Sparse-align ::= SEQUENCE {
256 first-id Seq-id,
257 second-id Seq-id,
258
259 numseg INTEGER, --number of segments
260 first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg]
261 second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg]
262 lens SEQUENCE OF INTEGER , --lengths of segments [numseg]
263 second-strands SEQUENCE OF Na-strand OPTIONAL ,
264
265 -- per-segment scores
266 seg-scores SET OF Score OPTIONAL
267 }
268
269 Sparse-seg-ext ::= SEQUENCE {
270 --seg-ext SET OF {
271 -- index INTEGER,
272 -- data User-field
273 -- }
274 index INTEGER
275 }
276
277
278
279 -- use of Score is discouraged for external ASN.1 specifications
280 Score ::= SEQUENCE {
281 id Object-id OPTIONAL ,
282 value CHOICE {
283 real REAL ,
284 int INTEGER
285 }
286 }
287
288 -- use of Score-set is encouraged for external ASN.1 specifications
289 Score-set ::= SET OF Score
290
291 END
292
|
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |