--$Revision: 150675 $ --********************************************************************** -- -- Definitions for CDD's -- -- NCBI Structure Group -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- October 1999 -- -- asntool -m cdd.asn -w 100 -o cdd.h -- asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h \ -- -M asn.all --********************************************************************** NCBI-Cdd DEFINITIONS ::= -- NCBI Conserved Domain Definition BEGIN EXPORTS Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set, Cdd-pref-nodes, Cdd-Project; IMPORTS Date FROM NCBI-General Pub FROM NCBI-Pub Biostruc-annot-set FROM MMDB Bioseq FROM NCBI-Sequence Seq-annot FROM NCBI-Sequence Seq-entry FROM NCBI-Seqset Org-ref FROM NCBI-Organism Seq-id FROM NCBI-Seqloc Seq-interval FROM NCBI-Seqloc Seq-loc FROM NCBI-Seqloc Seq-feat FROM NCBI-Seqfeat Score-set FROM NCBI-Seqalign Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d PssmWithParameters FROM NCBI-ScoreMat; -- dealing with lists of preferred tax-nodes Cdd-org-ref ::= SEQUENCE { reference Org-ref, active BOOLEAN DEFAULT TRUE, parent-tax-id INTEGER OPTIONAL, rank VisibleString OPTIONAL } Cdd-org-ref-set ::= SET OF Cdd-org-ref Cdd-pref-node-descr ::= CHOICE { create-date Date, description VisibleString } Cdd-pref-node-descr-set ::= SET OF Cdd-pref-node-descr Cdd-pref-nodes ::= SEQUENCE { preferred-nodes Cdd-org-ref-set, model-organisms Cdd-org-ref-set OPTIONAL, optional-nodes Cdd-org-ref-set OPTIONAL, description Cdd-pref-node-descr-set OPTIONAL } -- Cdd's should not exist without a unique accession, but alternative id's may -- be present as well. It is conceivable that a CD which is created as a merged -- product of two highly redundant CDs will retain the source ids in addition -- to its new unique id Global-id ::= SEQUENCE { accession VisibleString, -- SMART, Pfam, LOAD or CD accession release VisibleString OPTIONAL, -- to hold CD-Database release number -- if desired, currently not used version INTEGER OPTIONAL, -- version 0 is the seed, version -- numbers increase with update/curate -- cycles database VisibleString OPTIONAL -- this is NOT the source!, rather the } -- database the object resides in -- currently not in use Cdd-id ::= CHOICE { uid INTEGER, -- for synchronization with Entrez -- holds PSSM-Ids gid Global-id -- holds accession/version pairs } Cdd-id-set ::= SEQUENCE OF Cdd-id Cdd-repeat ::= SEQUENCE { -- record whether the CD contains -- repeated sequence/structure motifs count INTEGER, -- number of tandem repeats in the CD location Seq-loc OPTIONAL, -- location on the representative avglen INTEGER OPTIONAL -- average repeat length } Cdd-book-ref ::= SEQUENCE { -- record a link to Entrez Books bookname VisibleString, -- abbreviated book title textelement ENUMERATED { unassigned(0), -- type of element section(1), -- a section or paragraph figgrp(2), -- a figure or set of figures table(3), -- a table chapter(4), -- a whole chapter biblist(5), -- a lisf of references box(6), -- an inserted box glossary(7), -- glossary appendix(8), -- appendix other(255) }, elementid INTEGER OPTIONAL, -- numerical address of the text-element subelementid INTEGER OPTIONAL, -- exact address, used with section celementid VisibleString OPTIONAL, -- address of the text element, if character string csubelementid VisibleString OPTIONAL -- exact address, if character string } -- The description of CDD's refers to the specific set of aligned sequences, -- the region that is being aligned and the information contained in the -- alignment. It may contain a lengthy comment -- describing the function of the domain as well as its origin and all -- other anecdotal information that can't be pressed into a rigid scheme. -- Crosslinks to reference papers available in PubMed are possible as well. -- There can be as many of these as you want in the CDD. Cdd-descr ::= CHOICE { othername VisibleString, -- alternative names for the CDD -- if domain has several common names category VisibleString, -- intracellular, extracellular, etc. -- to record spatial and/or temporal -- expression in free-text format comment VisibleString, -- this is where descriptions go reference Pub, -- a citation describing the domain create-date Date, -- Date of first creation/dump tax-source Org-ref, -- holds the highest common tax node source VisibleString, -- the database the seeds were created -- from, e.g. SMART, PFAM, etc.. status INTEGER { unassigned(0), finished-ok(1), -- a public curated CD pending-release(2), -- needs work done, not yet released other-asis(3), -- imported as-is, immediate release matrix-only(4), -- CD holds a Psi-Blast PSSM only, -- does not contain alignment data update-running(5), -- has been flagged for -- update (in queue) auto-updated(6), -- update finished, no -- work necessary claimed(7), -- is earmarked for curation curated-complete(8),-- public curated member of a -- completed family other(255) }, -- for CD production? update-date Date, -- Date of last version change scrapbook SEQUENCE OF VisibleString, -- for storing curation notes -- those won't make it into public -- distributions source-id Cdd-id-set, -- for linking back to source db repeats Cdd-repeat, -- to record repeat counts old-root Cdd-id-set, -- to record short-term history curation-status INTEGER { unassigned(0), -- to record curation status prein (1), -- when CD is checked out from ofc (2), -- the tracking database, for iac (3), -- use within curation software ofv1 (4), iav1 (5), ofv2 (6), iav2 (7), postin (8), other (255) }, readonly-status INTEGER { unassigned(0), -- to record read-only status readonly (1), -- when CD is checked out from readwrite (2), -- the tracking database, for other (255) }, -- use within curation software book-ref Cdd-book-ref, -- links to Entrez/books attribution Pub, -- add citations and/or author names title VisibleString -- hold short descriptive text } Cdd-descr-set ::= SET OF Cdd-descr -- the Cdd-tree stores the hierarchy of CDDs. These objects are stored separate -- from the CDs to allow for fast retrieval and use as an 'index' into CDs -- all the components in a CD-tree match components in the full-sized CD -- and should be synchronized Cdd-tree ::= SEQUENCE { name VisibleString, -- short name copied from CD id Cdd-id-set, -- IDs copied from CD description Cdd-descr-set OPTIONAL, -- description copied from CD parent Cdd-id OPTIONAL, -- CD is the result of a split/merge children Cdd-id-set OPTIONAL, -- this CD has been split siblings Cdd-id-set OPTIONAL, -- related CDs (have common hits) neighbors Cdd-id-set OPTIONAL -- co-occurring CDs (non-overlapping -- hits to same sequences) } Cdd-tree-set ::= SEQUENCE OF Cdd-tree -- Matrix definitions, these are supposed to store PSSMs and corresponding -- matrices of relative residue frequencies. -- the number of columns and rows is listed explicitly, values in columns -- are stored column by column, i.e. in groups of nrows values for each column Matrix ::= SEQUENCE { ncolumns INTEGER, nrows INTEGER, row-labels SEQUENCE OF VisibleString OPTIONAL, scale-factor INTEGER, columns SEQUENCE OF INTEGER } -- definition for matrix of pairwise "distances", stored as the upper -- triangle of a squared n x n matrix (excluding the diagonal), this is -- supposed to store pairwise percentages of identical residues, pairwise -- alignment scores or E-values from pairwise BLAST sequence comparisons Triangle ::= SEQUENCE { nelements INTEGER, scores Score-set OPTIONAL, div-ranks SEQUENCE OF INTEGER OPTIONAL } -- Update-align is supposed to contain alignments that still need some work -- done to fit into the CD-proper alignment. These originate from the -- CD update process (generated by Blast, for example) or may be created in -- an editing session to save its state Update-comment ::= CHOICE { comment VisibleString, -- free text to describe nature of -- Update-align addthis Seq-loc, -- suggestion for inclusion in the CD -- without corresponding alignment replaces Seq-loc, -- if one or several alignment rows are -- to be replaced by the Update-align reject-loc Seq-loc, -- if used with Reject-id, specify a -- location on a sequence which should -- not be used reference Pub -- if update alignment imported from -- citation and for whenever it seems -- necessary to cite } -- Both fields are optional, as the Update-align may be a Seq-annot without -- description, or a suggestion to add a sequence without the corresponding -- alignment Update-align ::= SEQUENCE { description SEQUENCE OF Update-comment OPTIONAL, seqannot Seq-annot OPTIONAL, -- contains the SeqAlign type INTEGER { unassigned(0), update(1), update-3d(2), demoted(51), demoted-3d(52), other(255)} } Reject-id ::= SEQUENCE { description SEQUENCE OF Update-comment OPTIONAL, ids SET OF Seq-id } Feature-evidence ::= CHOICE { comment VisibleString, -- so we can spell out what doesn't -- fit in any other category reference Pub, -- evidence via a literature reference bsannot Biostruc-annot-set, -- evidence via Biostruc-features, such -- as structure superpositions seqfeat Seq-feat, -- evidence is a Sequence feature found -- elsewhere book-ref Cdd-book-ref -- evidence is a book chapter or figure } Align-annot ::= SEQUENCE { location Seq-loc, -- points to a location in one of the -- aligned sequences, usually the -- master/representative description VisibleString OPTIONAL, -- to hold descriptions/names like -- "Heme binding site" or "catalytic -- triad" etc., something that should -- be used for labels in visualization evidence SEQUENCE OF Feature-evidence OPTIONAL, -- evidence we can -- compute with type INTEGER OPTIONAL -- for typing annotated features } Align-annot-set ::= SEQUENCE OF Align-annot -- the Domain-parent records an evolutionary relationship which may not be -- as simple as a classical parent-child relationship in a typical hierarchy, -- i.e. where a CD is merely a specific subgroup ("child") of a more general -- diverse alignment model ("parent"). A CD alignment model may be the result -- of an ancient fusion event, combining two or more domains into a bigger unit -- which has subsequently undergone a divergent evolutionary process similar to -- what may have happened to a single "domain". A CD alignment model may -- also reflect the result of a deletion event, where a specific subgroup -- lacks part of a (set of) domain(s), but where the part present is found to -- be highly similar to a putative "parent", with some added evidence for -- an actual deletion, like from the distribution of truncated copies in phylogenetic -- lineages. Deletion events which affect different parts of a set of -- duplicated domain architectures may be indistinguishable from actual -- fission events, which means that we may want to represent the latter as -- deletions after duplication and do not need a special case for fissions. Domain-parent ::= SEQUENCE { parent-type INTEGER { classical (0), -- the classification of parent child relations fusion (1), deletion (2), permutation (3), other (255) }, parentid Cdd-id, -- identify the section parent by accession seqannot Seq-annot OPTIONAL -- contains the sequence alignment linking -- CD alignment models, should align the -- masters/representatives of each CD } -- record sequence trees generated by a suitable algorithm. Sequence-tree ::= SEQUENCE { cdAccession VisibleString OPTIONAL, algorithm Algorithm-type, isAnnotated BOOLEAN DEFAULT FALSE, root SeqTree-node } SeqTree-node ::= SEQUENCE { isAnnotated BOOLEAN DEFAULT FALSE, name VisibleString OPTIONAL, distance REAL OPTIONAL, children CHOICE { children SEQUENCE OF SeqTree-node, footprint SEQUENCE { seqRange Seq-interval, rowId INTEGER OPTIONAL } }, annotation Node-annotation OPTIONAL } Algorithm-type ::= SEQUENCE { scoring-Scheme INTEGER { unassigned (0), percent-id (1), kimura-corrected (2), aligned-score (3), aligned-score-ext (4), aligned-score-filled (5), blast-footprint (6), blast-full (7), hybrid-aligned-score (8), other (255) }, clustering-Method INTEGER { unassigned (0), single-linkage (1), neighbor-joining (2), fast-minimum-evolution (3), other (255) }, score-Matrix INTEGER { unassigned (0), blosum45 (1), blosum62 (2), blosum80 (3), pam30 (4), pam70 (5), pam250 (6), other (255) } OPTIONAL, gapOpen INTEGER OPTIONAL, gapExtend INTEGER OPTIONAL, gapScaleFactor INTEGER OPTIONAL, nTerminalExt INTEGER OPTIONAL, cTerminalExt INTEGER OPTIONAL, tree-scope INTEGER { allDescendants (0), immediateChildrenOnly(1), selfOnly (2), other (255) } OPTIONAL, coloring-scope INTEGER { allDescendants (0), immediateChildrenOnly (1), other (255) } OPTIONAL } Node-annotation ::= SEQUENCE { presentInChildCD VisibleString OPTIONAL, note VisibleString OPTIONAL } -- the Cdd is the basic ASN.1 object storing an annotated and curated set of -- alignments (formulated as a set of pairwise master-slave alignments). -- The alignment data are contained in Seq-annots, and a special type of -- object, the Update-align, contains additional alignment data from unfinished -- editing sessions and update processes. The Biostruc-annot-set holds -- structure superposition information for multiple structure-derived rows in -- the alignment. -- Version numbers in Global-ids are meant to be updated every time the Cdd is -- changed in a way that does not require Global-ids to be changed (sequences -- added in update cycle, annotation changed, alignment errors fixed) Cdd ::= SEQUENCE { name VisibleString, -- a short name (can be the accession..) id Cdd-id-set, -- this CD's Ids description Cdd-descr-set OPTIONAL, -- status, references, etc. seqannot SEQUENCE OF Seq-annot OPTIONAL, -- contains the CD alignment features Biostruc-annot-set OPTIONAL, -- contains structure -- alignment data -- or "core" definitions sequences Seq-entry OPTIONAL, -- store as bioseq-set inside seq-entry profile-range Seq-interval OPTIONAL, -- profile for this region only -- also stores the Seq-id of the master trunc-master Bioseq OPTIONAL, -- holds the truncated master, which -- may be something like a consensus, -- uses the same sequence coordinate -- frame as the profile-range posfreq Matrix OPTIONAL, -- relative residue frequencies scoremat Matrix OPTIONAL, -- Position dependent score matrix distance Triangle OPTIONAL, -- pairwise distances for all seqs. parent Cdd-id OPTIONAL, -- this CD is the result of a split children Cdd-id-set OPTIONAL, -- this CD has been split, not used siblings Cdd-id-set OPTIONAL, -- related CDs (common hits), clusters neighbors Cdd-id-set OPTIONAL, -- co-occurring CDs, not used pending SEQUENCE OF Update-align OPTIONAL, -- contains alignments from -- update or "lower panel" rejects SEQUENCE OF Reject-id OPTIONAL, -- SeqIds of rejected CD- -- members, ignore in update master3d SET OF Seq-id OPTIONAL, -- record if CD has a 3D representative alignannot Align-annot-set OPTIONAL, -- alignment annotation style-dictionary Cn3d-style-dictionary OPTIONAL, -- record rendering styles user-annotations Cn3d-user-annotations OPTIONAL, -- user annotations in Cn3D ancestors SEQUENCE OF Domain-parent OPTIONAL, -- list of parents scoreparams PssmWithParameters OPTIONAL, seqtree Sequence-tree OPTIONAL } Cdd-set ::= SET OF Cdd -- Cdd projects store a set of CDs, typically related to each other -- relationships would be specified using the ancestors fields in the -- individual CD objects. For use with CD-Tree, a program to visualize -- curated CD hierarchies and evidence for hierarchical family structures. Cdd-Viewer-Rect ::= SEQUENCE { top INTEGER, -- top coordinate left INTEGER, -- left coordinate width INTEGER, -- width height INTEGER -- height } Cdd-Viewer ::= SEQUENCE { ctrl INTEGER { -- viewer type unassigned (0), cd-info (1), align-annot (2), seq-list (3), seq-tree (4), merge-preview (5), cross-hits (6), notes (7), tax-tree (8), dart (9), dart-selected-rows (10), other (255) }, rect Cdd-Viewer-Rect OPTIONAL, -- viewer rectangle accessions SEQUENCE OF VisibleString -- list of accessions associated with a viewer } Cdd-Script ::= SEQUENCE { type INTEGER { unassigned (0), user-recorded (1), server-generated (2), other (255) } OPTIONAL, name VisibleString OPTIONAL, -- user assigned name/description commands VisibleString -- actual script commands } -- cd colors are as: 0000FF for red, 00FF00 for green, FF0000 for blue Cdd-Project ::= SEQUENCE { cds SEQUENCE OF Cdd , -- cds cdcolor SEQUENCE OF INTEGER, -- colors viewers SEQUENCE OF Cdd-Viewer, -- Sequence viewers log VisibleString, -- log scripts SEQUENCE OF Cdd-Script OPTIONAL -- command scripts } END