|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/biostruc/mmdb1.asn |
source navigation diff markup identifier search freetext search file search |
1 --$Revision: 6.1 $
2 --**********************************************************************
3 --
4 -- Biological Macromolecule 3-D Structure Data Types for MMDB,
5 -- A Molecular Modeling Database
6 --
7 -- Definitions for a biomolecular assembly and the MMDB database
8 --
9 -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant
10 --
11 -- National Center for Biotechnology Information
12 -- National Institutes of Health
13 -- Bethesda, MD 20894 USA
14 --
15 -- July 1995
16 --
17 --**********************************************************************
18
19 -- Contents of the MMDB database are currently based on files distributed by
20 -- the Protein Data Bank, PDB. These data are changed in form, as described
21 -- in this specification. To some extent they are also changed in content, in
22 -- that many data items implicit in PDB are made explicit, and others are
23 -- corrected or omitted as a consequence of validation checks. The semantics
24 -- of MMDB data items are indicated by comments within the specification below.
25 -- These comments explain in detail the manner in which data items from PDB
26 -- have been mapped into MMDB.
27
28 MMDB DEFINITIONS ::=
29
30 BEGIN
31
32 EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
33 Biostruc-residue-graph-set;
34
35 IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph
36 Biostruc-model FROM MMDB-Structural-model
37 Biostruc-feature-set FROM MMDB-Features
38 Pub FROM NCBI-Pub
39 Date, Object-id, Dbtag FROM NCBI-General;
40
41 -- A structure report or "biostruc" describes the components of a biomolecular
42 -- assembly in terms of their names and descriptions, and a chemical graph
43 -- giving atomic formula, connectivity and chirality. It also gives one or more
44 -- three-dimensional model structures, literally a mapping of the atoms,
45 -- residues and/or molecules of each component into a measured three-
46 -- dimensional space. Structure may also be described by named features, which
47 -- associate nodes in the chemical graph, or regions in space, with text or
48 -- numeric descriptors.
49
50 -- Note that a biostruc may also contain cross references to other databases,
51 -- including citations to relevant scientific literature. These cross
52 -- references use object types from other NCBI data specifications, which are
53 -- "imported" into MMDB, and not repeated in this specification.
54
55 Biostruc ::= SEQUENCE {
56 id SEQUENCE OF Biostruc-id,
57 descr SEQUENCE OF Biostruc-descr OPTIONAL,
58 chemical-graph Biostruc-graph,
59 features SEQUENCE OF Biostruc-feature-set OPTIONAL,
60 model SEQUENCE OF Biostruc-model OPTIONAL }
61
62 -- A Biostruc-id is a collection identifiers for the molecular assembly.
63 -- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable
64 -- identifiers. Other-id's are synonyms.
65
66 Biostruc-id ::= CHOICE {
67 mmdb-id Mmdb-id,
68 other-database Dbtag,
69 local-id Object-id }
70
71 Mmdb-id ::= INTEGER
72
73
74 -- The description of a biostruc refers to both the reported chemical and
75 -- spatial structure of a biomolecular assembly. PDB-derived descriptors
76 -- which refer specifically to the chemical components or spatial structure
77 -- are not provided here, but instead as descriptors of the biostruc-graph or
78 -- biostruc-model. For PDB-derived structures the biostruc name is the PDB
79 -- id-code. PDB-derived citations appear as publications within the biostruc
80 -- description, and include a data-submission citation derived from PDB AUTHOR
81 -- records. Citations are described using the NCBI Pub specification.
82
83 Biostruc-descr ::= CHOICE {
84 name VisibleString,
85 pdb-comment VisibleString,
86 other-comment VisibleString,
87 history Biostruc-history,
88 attribution Pub }
89
90
91 -- The history of a biostruc indicates it's origin and it's update history
92 -- within MMDB, the NCBI-maintained molecular structure database.
93
94 Biostruc-history ::= SEQUENCE {
95 replaces Biostruc-replace OPTIONAL,
96 replaced-by Biostruc-replace OPTIONAL,
97 data-source Biostruc-source OPTIONAL }
98
99 Biostruc-replace ::= SEQUENCE {
100 id Biostruc-id,
101 date Date }
102
103 -- The origin of a biostruc is a reference to another database. PDB release
104 -- date and PDB-assigned id codes are recorded here, as are the PDB-assigned
105 -- entry date and replacement history.
106
107 Biostruc-source ::= SEQUENCE {
108 name-of-database VisibleString,
109 version-of-database CHOICE {
110 release-date Date,
111 release-code VisibleString } OPTIONAL,
112 database-entry-id Biostruc-id,
113 database-entry-date Date,
114 database-entry-history SEQUENCE OF VisibleString OPTIONAL}
115
116
117 -- A biostruc set is a means to collect ASN.1 data for many biostrucs in
118 -- one file, as convenient for application programs. The object type is not
119 -- inteded to imply similarity of the biostrucs grouped together.
120
121 Biostruc-set ::= SEQUENCE {
122 id SEQUENCE OF Biostruc-id OPTIONAL,
123 descr SEQUENCE OF Biostruc-descr OPTIONAL,
124 biostrucs SEQUENCE OF Biostruc }
125
126
127 -- A biostruc annotation set is a means to collect ASN.1 data for biostruc
128 -- features into one file. The object type is intended as a means to store
129 -- feature annotation of similar type, such as "core" definitions for a
130 -- threading program, or structure-structure alignments for a structure-
131 -- similarity browser.
132
133 Biostruc-annot-set ::= SEQUENCE {
134 id SEQUENCE OF Biostruc-id OPTIONAL,
135 descr SEQUENCE OF Biostruc-descr OPTIONAL,
136 features SEQUENCE OF Biostruc-feature-set }
137
138
139 -- A biostruc residue graph set is a collection of residue graphs. The object
140 -- type is intended as a means to record dictionaries containing the chemical
141 -- subgraphs of "standard" residue types, which are used as a means to
142 -- simplify discription of the covalent structure of a biomolecular assembly.
143 -- The standard residue graph dictionary supplied with the MMDB database
144 -- contains 20 standard L amino acids and 8 standard ribonucleotide groups.
145 -- These graphs are complete, including explicit hydrogen atoms and separate
146 -- instances for the terminal polypeptide and polynucleotide residues.
147
148 Biostruc-residue-graph-set ::= SEQUENCE {
149 id SEQUENCE OF Biostruc-id OPTIONAL,
150 descr SEQUENCE OF Biomol-descr OPTIONAL,
151 residue-graphs SEQUENCE OF Residue-graph }
152
153 END
154
155
156
157 --**********************************************************************
158 --
159 -- Biological Macromolecule 3-D Structure Data Types for MMDB,
160 -- A Molecular Modeling Database
161 --
162 -- Definitions for a chemical graph
163 --
164 -- By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant
165 --
166 -- National Center for Biotechnology Information
167 -- National Institutes of Health
168 -- Bethesda, MD 20894 USA
169 --
170 -- July, 1995
171 --
172 --**********************************************************************
173
174 MMDB-Chemical-graph DEFINITIONS ::=
175
176 BEGIN
177
178 EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
179 Molecule-id, PCSubstance-id, Residue-id, Atom-id;
180
181 IMPORTS Pub FROM NCBI-Pub
182 BioSource FROM NCBI-BioSource
183 Seq-id FROM NCBI-Seqloc
184 Biostruc-id FROM MMDB;
185
186 -- A biostruc graph contains the complete chemical graph of the biomolecular
187 -- assembly. The assembly graph is defined hierarchically, in terms of
188 -- subgraphs graphs of component molecules. For PDB-derived biostrucs,
189 -- the molecules forming the assembly are the individual biopolymer chains and
190 -- any non-polymer or "heterogen" groups which are present.
191
192 -- The PDB-derived "compound name" field appears as the name within the
193 -- biostruc-graph description. PDB "class" and "source" fields appear as
194 -- explicit attributes. PDB-derived structures are assigned an assembly type
195 -- of "other" unless they have been further classified as the "physiological
196 -- form" or "crystallographic cell" contents. If they have, the source of the
197 -- type classification appears as a citation within the assembly description.
198
199 -- Note that the biostruc-graph also includes as literals the subgraphs of
200 -- any nonstandard residues present within it. For PDB-derived biostrucs these
201 -- subgraphs are constructed automatically, with validation as described below.
202
203 Biostruc-graph ::= SEQUENCE {
204 descr SEQUENCE OF Biomol-descr OPTIONAL,
205 molecule-graphs SEQUENCE OF Molecule-graph,
206 inter-molecule-bonds SEQUENCE OF Inter-residue-bond OPTIONAL,
207 residue-graphs SEQUENCE OF Residue-graph OPTIONAL }
208
209 -- A biomolecule description refers to the chemical structure of a molecule or
210 -- component substructures. This descriptor type is used at the level of
211 -- assemblies, molecules and residues, and also for residue-graph dictionaries.
212 -- The BioSource object type is drawn from NCBI taxonomy data specifications,
213 -- and is not repeated here.
214
215 Biomol-descr ::= CHOICE {
216 name VisibleString,
217 pdb-class VisibleString,
218 pdb-source VisibleString,
219 pdb-comment VisibleString,
220 other-comment VisibleString,
221 organism BioSource,
222 attribution Pub,
223 assembly-type INTEGER { physiological-form(1),
224 crystallographic-cell(2),
225 other(255) },
226 molecule-type INTEGER { dna(1),
227 rna(2),
228 protein(3),
229 other-biopolymer(4),
230 solvent(5),
231 other-nonpolymer(6),
232 other(255) } }
233
234 -- A molecule chemical graph is defined by a sequence of residues. Nonpolymers
235 -- are described in the same way, but may contain only a single residue.
236
237 -- Biopolymer molecules are identified within PDB entries according to their
238 -- appearance on SEQRES records, which formally define a biopolymer as such.
239 -- Biopolymers are defined by the distinction between ATOM and HETATM
240 -- coordinate records only in cases where the chemical sequence from SEQRES
241 -- is in conflict with coordinate data. The PDB-assigned chain code appears as
242 -- the name within the molecule descriptions of the biopolymers.
243
244 -- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups,
245 -- excluding any HETEROGEN groups which represent modified biopolymer residues.
246 -- These molecules are named according to the chain, residue type and residue
247 -- number fields as assigned by PDB. Any description appearing in the PDB HET
248 -- record appears as a pdb-comment within the molecule description.
249
250 -- Molecule types for PDB-derived molecule graphs are assigned by matching
251 -- residue and atom names against the PDB-documented standard types for protein,
252 -- DNA and RNA, and against residue codes commonly used to indicate solvent.
253 -- Classification is by "majority rule". If more than half of the residues in
254 -- a biopolymer are standard groups of one type, then the molecule is of that
255 -- type, and otherwise classified as "other". Note that this classification does
256 -- not preclude the presence of modified residues, but insists they constitute
257 -- less than half the biopolymer. Non-polymers are classified only as "solvent"
258 -- or "other".
259
260 -- Note that a molecule graph may also contain a set of cross references
261 -- to biopolymer sequence databases. All biopolymer molecules in MMDB contain
262 -- appropriate identifiers for the corresponding entry in the NCBI-Sequences
263 -- database, in particular the NCBI "gi" number, which may be used for sequence
264 -- retrieval. The Seq-id object type is defined in the NCBI molecular sequence
265 -- specification, and not repeated here.
266
267 Molecule-graph ::= SEQUENCE {
268 id Molecule-id,
269 descr SEQUENCE OF Biomol-descr OPTIONAL,
270 seq-id Seq-id OPTIONAL,
271 residue-sequence SEQUENCE OF Residue,
272 inter-residue-bonds SEQUENCE OF Inter-residue-bond OPTIONAL,
273 sid PCSubstance-id OPTIONAL }
274
275 Molecule-id ::= INTEGER
276
277 -- Pubchem substance id
278
279 PCSubstance-id ::= INTEGER
280
281 -- Residues may be assigned a text-string name as well as an id number. PDB
282 -- assigned residue numbers appear as the residue name.
283
284 Residue ::= SEQUENCE {
285 id Residue-id,
286 name VisibleString OPTIONAL,
287 residue-graph Residue-graph-pntr }
288
289 Residue-id ::= INTEGER
290
291
292 -- Residue graphs from different sources may be referenced within a molecule
293 -- graph. The allowed choices are the nonstandard residue graphs included in
294 -- the present biostruc, residue graphs within other biostrucs, or residue
295 -- graphs within tables of standard residue definitions.
296
297 Residue-graph-pntr ::= CHOICE {
298 local Residue-graph-id,
299 biostruc Biostruc-graph-pntr,
300 standard Biostruc-residue-graph-set-pntr }
301
302 Biostruc-graph-pntr ::= SEQUENCE {
303 biostruc-id Biostruc-id,
304 residue-graph-id Residue-graph-id }
305
306 Biostruc-residue-graph-set-pntr ::= SEQUENCE {
307 biostruc-residue-graph-set-id Biostruc-id,
308 residue-graph-id Residue-graph-id }
309
310
311 -- Residue graphs define atomic formulae, connectivity, chirality, and names.
312 -- For standard residue graphs from the MMDB dictionary the PDB-assigned
313 -- residue-type code appears as the name within the residue graph description,
314 -- and the full trivial name of the residue as a comment within that
315 -- description. For any nonstandard residue graphs provided with an MMDB
316 -- biostruc the PDB-assigned residue-type code similarly appears as the name
317 -- within the description, and any information provided on PDB HET records as
318 -- a pdb-comment within that description.
319
320 -- Note that nonstandard residue graphs for a PDB-derived biostruc may be
321 -- incomplete. Current PDB format cannot represent connectivity for groups
322 -- which are disordered, and for which no coordinates are given. In these
323 -- cases the residue graph defined in MMDB represents only the subgraph that
324 -- could be identified from available ATOM, HETATM and CONECT records.
325
326 Residue-graph ::= SEQUENCE {
327 id Residue-graph-id,
328 descr SEQUENCE OF Biomol-descr OPTIONAL,
329 residue-type INTEGER { deoxyribonucleotide(1),
330 ribonucleotide(2),
331 amino-acid(3),
332 other(255) } OPTIONAL,
333 iupac-code SEQUENCE OF VisibleString OPTIONAL,
334 atoms SEQUENCE OF Atom,
335 bonds SEQUENCE OF Intra-residue-bond,
336 chiral-centers SEQUENCE OF Chiral-center OPTIONAL }
337
338 Residue-graph-id ::= INTEGER
339
340 -- Atoms in residue graphs are defined by elemental symbols and names. PDB-
341 -- assigned atom names appear here in the name field, except in cases of known
342 -- PDB synonyms. In these cases atom names are mapped to the names used in the
343 -- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where
344 -- PDB practice allows synonyms for several atom types. For PDB atoms the
345 -- elemental symbol is obtained by parsing the PDB atom name field, allowing
346 -- for known special-semantics cases where the atom name does not follow the
347 -- documented encoding rule. Ionizable protons are identified within standard
348 -- residue graphs in the MMDB dictionary, but not within automatically-defined
349 -- nonstandard graphs.
350
351 Atom ::= SEQUENCE {
352 id Atom-id,
353 name VisibleString OPTIONAL,
354 iupac-code SEQUENCE OF VisibleString OPTIONAL,
355 element ENUMERATED {
356 h(1), he(2), li(3), be(4), b(5),
357 c(6), n(7), o(8), f(9), ne(10),
358 na(11), mg(12), al(13), si(14), p(15),
359 s(16), cl(17), ar(18), k(19), ca(20),
360 sc(21), ti(22), v(23), cr(24), mn(25),
361 fe(26), co(27), ni(28), cu(29), zn(30),
362 ga(31), ge(32), as(33), se(34), br(35),
363 kr(36), rb(37), sr(38), y(39), zr(40),
364 nb(41), mo(42), tc(43), ru(44), rh(45),
365 pd(46), ag(47), cd(48), in(49), sn(50),
366 sb(51), te(52), i(53), xe(54), cs(55),
367 ba(56), la(57), ce(58), pr(59), nd(60),
368 pm(61), sm(62), eu(63), gd(64), tb(65),
369 dy(66), ho(67), er(68), tm(69), yb(70),
370 lu(71), hf(72), ta(73), w(74), re(75),
371 os(76), ir(77), pt(78), au(79), hg(80),
372 tl(81), pb(82), bi(83), po(84), at(85),
373 rn(86), fr(87), ra(88), ac(89), th(90),
374 pa(91), u(92), np(93), pu(94), am(95),
375 cm(96), bk(97), cf(98), es(99),
376 fm(100), md(101), no(102), lr(103),
377 other(254), unknown(255) },
378 ionizable-proton ENUMERATED {
379 true(1),
380 false(2),
381 unknown(255) } OPTIONAL }
382
383 Atom-id ::= INTEGER
384
385 -- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
386 -- Unlike Inter-residue-bond defined later, its participating atoms are part of
387 -- a residue subgraph dictionary, not part of a specific biostruc-graph.
388
389 -- For residue graphs in the standard MMDB dictionary bonds are defined from
390 -- the known chemical structures of amino acids and nucleotides. For
391 -- nonstandard residue graphs bonds are defined from PDB CONECT records, with
392 -- validation for consistency with coordinate data, and from stereochemical
393 -- calculation to identify unreported bonds. Validation and bond identification
394 -- are based on comparison of inter-atomic distances to the sum of covalent
395 -- radii for the corresponding elements.
396
397 Intra-residue-bond ::= SEQUENCE {
398 atom-id-1 Atom-id,
399 atom-id-2 Atom-id,
400 bond-order INTEGER {
401 single(1),
402 partial-double(2),
403 aromatic(3),
404 double(4),
405 triple(5),
406 other(6),
407 unknown(255)} OPTIONAL }
408
409 -- Chiral centers are atoms with tetrahedral geometry. Chirality is defined
410 -- by a chiral volume involving the chiral center and 3 other atoms bonded to
411 -- it. For any coordinates assigned to atoms c, n1, n2, and n3, the vector
412 -- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
413 -- sign. The calculation assumes an orthogonal right-handed coordinate system
414 -- as is used for MMDB model structures.
415
416 -- Chirality is defined for standard residues in the MMDB dictionary, but is
417 -- not assigned automatically for PDB-derived nonstandard residues. If assigned
418 -- for nonstandard residues, the source of chirality information is described
419 -- by a citation within the residue description.
420
421 Chiral-center ::= SEQUENCE {
422 c Atom-id,
423 n1 Atom-id,
424 n2 Atom-id,
425 n3 Atom-id,
426 sign ENUMERATED { positive(1),
427 negative(2) } }
428
429 -- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived
430 -- structures bonds are identified from biopolymer connectivity according to
431 -- SEQRES and from other connectivity information on SSBOND and CONECT
432 -- records. These data are validated and unreported bonds identified by
433 -- stereochemical calculation, using the same criteria as for intra-residue
434 -- bonds.
435
436 Inter-residue-bond ::= SEQUENCE {
437 atom-id-1 Atom-pntr,
438 atom-id-2 Atom-pntr,
439 bond-order INTEGER {
440 single(1),
441 partial-double(2),
442 aromatic(3),
443 double(4),
444 triple(5),
445 other(6),
446 unknown(255)} OPTIONAL }
447
448 -- Atoms, residues and molecules within the current biostruc are referenced
449 -- by hierarchical pointers.
450
451 Atom-pntr ::= SEQUENCE {
452 molecule-id Molecule-id,
453 residue-id Residue-id,
454 atom-id Atom-id }
455
456 Atom-pntr-set ::= SEQUENCE OF Atom-pntr
457
458 END
|
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |