NCBI C Toolkit Cross Reference

C/biostruc/mmdb1.asn


  1 --$Revision: 6.1 $
  2 --**********************************************************************
  3 --
  4 --  Biological Macromolecule 3-D Structure Data Types for MMDB,
  5 --                A Molecular Modeling Database
  6 --
  7 --  Definitions for a biomolecular assembly and the MMDB database
  8 --
  9 --  By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant 
 10 --
 11 --  National Center for Biotechnology Information
 12 --  National Institutes of Health
 13 --  Bethesda, MD 20894 USA
 14 --
 15 --  July 1995
 16 --
 17 --**********************************************************************
 18 
 19 -- Contents of the MMDB database are currently based on files distributed by
 20 -- the Protein Data Bank, PDB.  These data are changed in form, as described
 21 -- in this specification. To some extent they are also changed in content, in 
 22 -- that many data items implicit in PDB are made explicit, and others are
 23 -- corrected or omitted as a consequence of validation checks.  The semantics
 24 -- of MMDB data items are indicated by comments within the specification below.
 25 -- These comments explain in detail the manner in which data items from  PDB 
 26 -- have been mapped into MMDB. 
 27 
 28 MMDB DEFINITIONS ::=
 29 
 30 BEGIN
 31 
 32 EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
 33         Biostruc-residue-graph-set;
 34 
 35 IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph 
 36         Biostruc-model FROM MMDB-Structural-model
 37         Biostruc-feature-set FROM MMDB-Features
 38         Pub FROM NCBI-Pub
 39         Date, Object-id, Dbtag FROM NCBI-General;
 40 
 41 -- A structure report or "biostruc" describes the components of a biomolecular 
 42 -- assembly in terms of their names and descriptions, and a chemical graph 
 43 -- giving atomic formula, connectivity and chirality. It also gives one or more
 44 -- three-dimensional model structures, literally a mapping of the atoms, 
 45 -- residues and/or molecules of each component into a measured three-
 46 -- dimensional space. Structure may also be described by named features, which 
 47 -- associate nodes in the chemical graph, or regions in space, with text or 
 48 -- numeric descriptors.
 49 
 50 -- Note that a biostruc may also contain cross references to other databases,
 51 -- including citations to relevant scientific literature. These cross 
 52 -- references use object types from other NCBI data specifications, which are 
 53 -- "imported" into MMDB, and not repeated in this specification. 
 54 
 55 Biostruc ::= SEQUENCE {
 56         id                      SEQUENCE OF Biostruc-id,
 57         descr                   SEQUENCE OF Biostruc-descr OPTIONAL,
 58         chemical-graph          Biostruc-graph,
 59         features                SEQUENCE OF Biostruc-feature-set OPTIONAL,
 60         model                   SEQUENCE OF Biostruc-model OPTIONAL }
 61 
 62 -- A Biostruc-id is a collection identifiers for the molecular assembly.
 63 -- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable 
 64 -- identifiers.  Other-id's are synonyms.
 65 
 66 Biostruc-id ::= CHOICE {
 67         mmdb-id                 Mmdb-id,
 68         other-database          Dbtag,
 69         local-id                Object-id }
 70 
 71 Mmdb-id ::= INTEGER
 72 
 73 
 74 -- The description of a biostruc refers to both the reported chemical and 
 75 -- spatial structure of a biomolecular assembly.  PDB-derived descriptors
 76 -- which refer specifically to the chemical components or spatial structure
 77 -- are not provided here, but instead as descriptors of the biostruc-graph or 
 78 -- biostruc-model. For PDB-derived structures the biostruc name is the PDB 
 79 -- id-code.  PDB-derived citations appear as publications within the biostruc 
 80 -- description, and include a data-submission citation derived from PDB AUTHOR 
 81 -- records.  Citations are described using the NCBI Pub specification.
 82 
 83 Biostruc-descr ::= CHOICE {
 84         name                    VisibleString,
 85         pdb-comment             VisibleString,
 86         other-comment           VisibleString,
 87         history                 Biostruc-history, 
 88         attribution             Pub }
 89 
 90 
 91 -- The history of a biostruc indicates it's origin and it's update history
 92 -- within MMDB, the NCBI-maintained molecular structure database.  
 93 
 94 Biostruc-history ::= SEQUENCE {
 95         replaces                Biostruc-replace OPTIONAL,
 96         replaced-by             Biostruc-replace OPTIONAL,
 97         data-source             Biostruc-source OPTIONAL }
 98 
 99 Biostruc-replace ::= SEQUENCE {
100         id                      Biostruc-id,
101         date                    Date }
102 
103 -- The origin of a biostruc is a reference to another database.  PDB release 
104 -- date and PDB-assigned id codes are recorded here, as are the PDB-assigned 
105 -- entry date and replacement history.
106 
107 Biostruc-source ::= SEQUENCE {
108         name-of-database        VisibleString,
109         version-of-database     CHOICE {
110                 release-date            Date,
111                 release-code            VisibleString } OPTIONAL,
112         database-entry-id       Biostruc-id,
113         database-entry-date     Date,
114         database-entry-history  SEQUENCE OF VisibleString OPTIONAL}
115 
116 
117 -- A biostruc set is a means to collect ASN.1 data for many biostrucs in 
118 -- one file, as convenient for application programs.  The object type is not
119 -- inteded to imply similarity of the biostrucs grouped together.
120 
121 Biostruc-set ::= SEQUENCE {
122         id              SEQUENCE OF Biostruc-id OPTIONAL,
123         descr           SEQUENCE OF Biostruc-descr OPTIONAL,
124         biostrucs       SEQUENCE OF Biostruc }
125 
126 
127 -- A biostruc annotation set is a means to collect ASN.1 data for biostruc
128 -- features into one file. The object type is intended as a means to store 
129 -- feature annotation of similar type, such as "core" definitions for a 
130 -- threading program, or structure-structure alignments for a structure-
131 -- similarity browser.
132 
133 Biostruc-annot-set ::= SEQUENCE {
134         id              SEQUENCE OF Biostruc-id OPTIONAL,
135         descr           SEQUENCE OF Biostruc-descr OPTIONAL,
136         features        SEQUENCE OF Biostruc-feature-set }
137 
138 
139 -- A biostruc residue graph set is a collection of residue graphs.  The object
140 -- type is intended as a means to record dictionaries containing the chemical
141 -- subgraphs of "standard" residue types, which are used as a means to 
142 -- simplify discription of the covalent structure of a biomolecular assembly.
143 -- The standard residue graph dictionary supplied with the MMDB database 
144 -- contains 20 standard L amino acids and 8 standard ribonucleotide groups. 
145 -- These graphs are complete, including explicit hydrogen atoms and separate 
146 -- instances for the terminal polypeptide and polynucleotide residues. 
147 
148 Biostruc-residue-graph-set ::= SEQUENCE {
149         id                      SEQUENCE OF Biostruc-id OPTIONAL,
150         descr                   SEQUENCE OF Biomol-descr OPTIONAL,
151         residue-graphs          SEQUENCE OF Residue-graph }
152 
153 END
154 
155 
156 
157 --**********************************************************************
158 --
159 --  Biological Macromolecule 3-D Structure Data Types for MMDB,
160 --                A Molecular Modeling Database
161 --
162 --  Definitions for a chemical graph
163 --
164 --  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
165 --
166 --  National Center for Biotechnology Information
167 --  National Institutes of Health
168 --  Bethesda, MD 20894 USA
169 --
170 --  July, 1995
171 --
172 --**********************************************************************
173 
174 MMDB-Chemical-graph DEFINITIONS ::=
175 
176 BEGIN
177 
178 EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
179         Molecule-id, PCSubstance-id, Residue-id, Atom-id;
180 
181 IMPORTS Pub FROM NCBI-Pub
182         BioSource FROM NCBI-BioSource
183         Seq-id FROM NCBI-Seqloc
184         Biostruc-id FROM MMDB;
185 
186 -- A biostruc graph contains the complete chemical graph of the biomolecular 
187 -- assembly.  The assembly graph is defined hierarchically, in terms of 
188 -- subgraphs graphs of component molecules.  For PDB-derived biostrucs,
189 -- the molecules forming the assembly are the individual biopolymer chains and 
190 -- any non-polymer or "heterogen" groups which are present. 
191 
192 -- The PDB-derived  "compound name" field appears as the name within the
193 -- biostruc-graph description.  PDB "class" and "source" fields appear as 
194 -- explicit attributes.  PDB-derived structures are assigned an assembly type 
195 -- of "other" unless they have been further classified as the "physiological
196 -- form" or "crystallographic cell" contents.  If they have, the source of the 
197 -- type classification appears as a citation within the  assembly description. 
198 
199 -- Note that the biostruc-graph also includes as literals the subgraphs of 
200 -- any nonstandard residues present within it. For PDB-derived biostrucs these 
201 -- subgraphs are constructed automatically, with validation as described below.
202 
203 Biostruc-graph ::= SEQUENCE {
204         descr                   SEQUENCE OF Biomol-descr OPTIONAL,
205         molecule-graphs         SEQUENCE OF Molecule-graph,
206         inter-molecule-bonds    SEQUENCE OF Inter-residue-bond OPTIONAL,
207         residue-graphs          SEQUENCE OF Residue-graph OPTIONAL }
208 
209 -- A biomolecule description refers to the chemical structure of a molecule or 
210 -- component substructures.  This descriptor type is used at the level of
211 -- assemblies, molecules and residues, and also for residue-graph dictionaries.
212 -- The BioSource object type is drawn from NCBI taxonomy data specifications,
213 -- and is not repeated here.
214 
215 Biomol-descr ::= CHOICE {
216         name                    VisibleString,
217         pdb-class               VisibleString,
218         pdb-source              VisibleString,
219         pdb-comment             VisibleString,
220         other-comment           VisibleString,
221         organism                BioSource,
222         attribution             Pub,
223         assembly-type           INTEGER {       physiological-form(1),
224                                                 crystallographic-cell(2),
225                                                 other(255) },
226         molecule-type           INTEGER {       dna(1),
227                                                 rna(2),
228                                                 protein(3),
229                                                 other-biopolymer(4),
230                                                 solvent(5),
231                                                 other-nonpolymer(6),
232                                                 other(255) } }
233 
234 -- A molecule chemical graph is defined by a sequence of residues.  Nonpolymers
235 -- are described in the same way, but may contain only a single residue.  
236 
237 -- Biopolymer molecules are identified within PDB entries according to their
238 -- appearance on SEQRES records, which formally define a biopolymer as such. 
239 -- Biopolymers are defined by the distinction between ATOM and HETATM 
240 -- coordinate records only in cases where the chemical sequence from SEQRES
241 -- is in conflict with coordinate data. The PDB-assigned chain code appears as 
242 -- the name within the molecule descriptions of the biopolymers.
243 
244 -- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups, 
245 -- excluding any HETEROGEN groups which represent modified biopolymer residues.
246 -- These molecules are named according to the chain, residue type and residue 
247 -- number fields as assigned by PDB. Any description appearing in the PDB HET 
248 -- record appears as a pdb-comment within the molecule description. 
249 
250 -- Molecule types for PDB-derived molecule graphs are assigned by matching 
251 -- residue and atom names against the PDB-documented standard types for protein,
252 -- DNA and RNA, and against residue codes commonly used to indicate solvent.
253 -- Classification is by "majority rule". If more than half of the residues in
254 -- a biopolymer are standard groups of one type, then the molecule is of that 
255 -- type, and otherwise classified as "other". Note that this classification does
256 -- not preclude the presence of modified residues, but insists they constitute 
257 -- less than half the biopolymer. Non-polymers are classified only as "solvent"
258 -- or "other".  
259 
260 -- Note that a molecule graph may also contain a set of cross references 
261 -- to biopolymer sequence databases.  All biopolymer molecules in MMDB contain 
262 -- appropriate identifiers for the corresponding entry in the NCBI-Sequences 
263 -- database, in particular the NCBI "gi" number, which may be used for sequence
264 -- retrieval. The Seq-id object type is defined in the NCBI molecular sequence 
265 -- specification, and not repeated here.
266 
267 Molecule-graph ::= SEQUENCE {
268         id                      Molecule-id,
269         descr                   SEQUENCE OF Biomol-descr OPTIONAL,
270         seq-id                  Seq-id OPTIONAL,
271         residue-sequence        SEQUENCE OF Residue,
272         inter-residue-bonds     SEQUENCE OF Inter-residue-bond OPTIONAL, 
273         sid                     PCSubstance-id OPTIONAL }
274    
275 Molecule-id ::= INTEGER
276 
277 -- Pubchem substance id
278 
279 PCSubstance-id ::= INTEGER
280 
281 -- Residues may be assigned a text-string name as well as an id number. PDB 
282 -- assigned residue numbers appear as the residue name.
283 
284 Residue ::= SEQUENCE {
285         id                      Residue-id,
286         name                    VisibleString OPTIONAL,
287         residue-graph           Residue-graph-pntr }
288 
289 Residue-id ::= INTEGER
290 
291 
292 -- Residue graphs from different sources may be referenced within a molecule
293 -- graph.  The allowed choices are the nonstandard residue graphs included in 
294 -- the present biostruc, residue graphs within other biostrucs, or residue 
295 -- graphs within tables of standard residue definitions.
296 
297 Residue-graph-pntr ::= CHOICE {
298         local                   Residue-graph-id,
299         biostruc                Biostruc-graph-pntr,
300         standard                Biostruc-residue-graph-set-pntr }
301         
302 Biostruc-graph-pntr ::= SEQUENCE {
303         biostruc-id             Biostruc-id,
304         residue-graph-id        Residue-graph-id }
305 
306 Biostruc-residue-graph-set-pntr ::= SEQUENCE {
307         biostruc-residue-graph-set-id   Biostruc-id,
308         residue-graph-id                Residue-graph-id } 
309 
310 
311 -- Residue graphs define atomic formulae, connectivity, chirality, and names.
312 -- For standard residue graphs from the MMDB dictionary the PDB-assigned 
313 -- residue-type code appears as the name within the residue graph description,
314 -- and the full trivial name of the residue as a comment within that 
315 -- description.  For any nonstandard residue graphs provided with an MMDB 
316 -- biostruc the PDB-assigned residue-type code similarly appears as the name 
317 -- within the description, and any information provided on PDB HET records as 
318 -- a pdb-comment within that description.  
319 
320 -- Note that nonstandard residue graphs for a PDB-derived biostruc may be 
321 -- incomplete. Current PDB format cannot represent connectivity for groups 
322 -- which are disordered, and for which no coordinates are given.  In these 
323 -- cases the residue graph defined in MMDB represents only the subgraph that 
324 -- could be identified from available ATOM, HETATM and CONECT records.
325 
326 Residue-graph ::= SEQUENCE {
327         id                      Residue-graph-id,
328         descr                   SEQUENCE OF Biomol-descr OPTIONAL,
329         residue-type            INTEGER {       deoxyribonucleotide(1),
330                                                 ribonucleotide(2),
331                                                 amino-acid(3),
332                                                 other(255) } OPTIONAL,
333         iupac-code              SEQUENCE OF VisibleString OPTIONAL,
334         atoms                   SEQUENCE OF Atom,
335         bonds                   SEQUENCE OF Intra-residue-bond,
336         chiral-centers          SEQUENCE OF Chiral-center OPTIONAL }
337         
338 Residue-graph-id ::= INTEGER
339 
340 -- Atoms in residue graphs are defined by elemental symbols and names.  PDB-
341 -- assigned atom names appear here in the name field, except in cases of known 
342 -- PDB synonyms.  In these cases atom names are mapped to the names used in the
343 -- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where 
344 -- PDB practice allows synonyms for several atom types.  For PDB atoms the 
345 -- elemental symbol is obtained by parsing the PDB atom name field, allowing 
346 -- for known special-semantics cases where the atom name does not follow the
347 -- documented encoding rule.  Ionizable protons are identified within standard 
348 -- residue graphs in the MMDB dictionary, but not within automatically-defined
349 -- nonstandard graphs.
350 
351 Atom ::= SEQUENCE {
352         id                      Atom-id,
353         name                    VisibleString OPTIONAL,
354         iupac-code              SEQUENCE OF VisibleString OPTIONAL,
355         element                 ENUMERATED {
356                                 h(1),   he(2),  li(3),  be(4),  b(5), 
357                                 c(6),   n(7),   o(8),   f(9),   ne(10), 
358                                 na(11), mg(12), al(13), si(14), p(15), 
359                                 s(16),  cl(17), ar(18), k(19),  ca(20), 
360                                 sc(21), ti(22), v(23),  cr(24), mn(25), 
361                                 fe(26), co(27), ni(28), cu(29), zn(30), 
362                                 ga(31), ge(32), as(33), se(34), br(35), 
363                                 kr(36), rb(37), sr(38), y(39),  zr(40),
364                                 nb(41), mo(42), tc(43), ru(44), rh(45),
365                                 pd(46), ag(47), cd(48), in(49), sn(50),
366                                 sb(51), te(52), i(53),  xe(54), cs(55),
367                                 ba(56), la(57), ce(58), pr(59), nd(60),
368                                 pm(61), sm(62), eu(63), gd(64), tb(65),
369                                 dy(66), ho(67), er(68), tm(69), yb(70),
370                                 lu(71), hf(72), ta(73), w(74),  re(75),
371                                 os(76), ir(77), pt(78), au(79), hg(80),
372                                 tl(81), pb(82), bi(83), po(84), at(85),
373                                 rn(86), fr(87), ra(88), ac(89), th(90),
374                                 pa(91), u(92),  np(93), pu(94), am(95),
375                                 cm(96), bk(97), cf(98), es(99), 
376                                 fm(100), md(101), no(102), lr(103),
377                                 other(254), unknown(255) },
378         ionizable-proton        ENUMERATED {
379                                         true(1),
380                                         false(2),
381                                         unknown(255) } OPTIONAL }
382         
383 Atom-id ::= INTEGER
384 
385 -- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
386 -- Unlike Inter-residue-bond defined later, its participating atoms are part of
387 -- a residue subgraph dictionary, not part of a specific biostruc-graph.
388 
389 -- For residue graphs in the standard MMDB dictionary bonds are defined from
390 -- the known chemical structures of amino acids and nucleotides.  For 
391 -- nonstandard residue graphs bonds are defined from PDB CONECT records, with
392 -- validation for consistency with coordinate data, and from stereochemical
393 -- calculation to identify unreported bonds.  Validation and bond identification
394 -- are based on comparison of inter-atomic distances to the sum of covalent
395 -- radii for the corresponding elements. 
396 
397 Intra-residue-bond ::= SEQUENCE {
398         atom-id-1               Atom-id,
399         atom-id-2               Atom-id,
400         bond-order              INTEGER {
401                                         single(1), 
402                                         partial-double(2),
403                                         aromatic(3), 
404                                         double(4),
405                                         triple(5),
406                                         other(6),
407                                         unknown(255)} OPTIONAL }
408 
409 -- Chiral centers are atoms with tetrahedral geometry.  Chirality is defined
410 -- by a chiral volume involving the chiral center and 3 other atoms bonded to 
411 -- it.  For any coordinates assigned to atoms c, n1, n2, and n3, the vector 
412 -- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
413 -- sign.  The calculation assumes an orthogonal right-handed coordinate system
414 -- as is used for MMDB model structures.  
415 
416 -- Chirality is defined for standard residues in the MMDB dictionary, but is 
417 -- not assigned automatically for PDB-derived nonstandard residues. If assigned
418 -- for nonstandard residues, the source of chirality information is described 
419 -- by a citation within the residue description.
420 
421 Chiral-center ::= SEQUENCE {
422         c                       Atom-id,
423         n1                      Atom-id,
424         n2                      Atom-id,
425         n3                      Atom-id,
426         sign                    ENUMERATED { positive(1),
427                                              negative(2) } }
428 
429 -- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived 
430 -- structures bonds are identified from biopolymer connectivity according to
431 -- SEQRES and from other connectivity information on SSBOND and CONECT 
432 -- records. These data are validated and unreported bonds identified by
433 -- stereochemical calculation, using the same criteria as for intra-residue 
434 -- bonds.
435 
436 Inter-residue-bond ::= SEQUENCE {
437         atom-id-1               Atom-pntr,
438         atom-id-2               Atom-pntr,
439         bond-order              INTEGER {
440                                         single(1), 
441                                         partial-double(2),
442                                         aromatic(3), 
443                                         double(4),
444                                         triple(5),
445                                         other(6),
446                                         unknown(255)} OPTIONAL }
447 
448 -- Atoms, residues and molecules within the current biostruc are referenced 
449 -- by hierarchical pointers.
450 
451 Atom-pntr ::= SEQUENCE {
452         molecule-id             Molecule-id,
453         residue-id              Residue-id,
454         atom-id                 Atom-id }
455 
456 Atom-pntr-set ::= SEQUENCE OF Atom-pntr
457 
458 END

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.