|
NCBI Home IEB Home C Toolkit docs C++ Toolkit source browser C Toolkit source browser (2) |
NCBI C Toolkit Cross ReferenceC/asn/asn.all |
source navigation diff markup identifier search freetext search file search |
1 --$Revision: 6.0 $
2 --**********************************************************************
3 --
4 -- asn.all
5 -- this file contains all NCBI ASN.1 specifications together
6 --
7 -- by James Ostell, 1990
8 --
9 --**********************************************************************
10
11 --$Revision: 6.5 $
12 --**********************************************************************
13 --
14 -- NCBI General Data elements
15 -- by James Ostell, 1990
16 -- Version 3.0 - June 1994
17 --
18 --**********************************************************************
19
20 NCBI-General DEFINITIONS ::=
21 BEGIN
22
23 EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;
24
25 -- StringStore is really a VisibleString. It is used to define very
26 -- long strings which may need to be stored by the receiving program
27 -- in special structures, such as a ByteStore, but it's just a hint.
28 -- AsnTool stores StringStores in ByteStore structures.
29 -- OCTET STRINGs are also stored in ByteStores by AsnTool
30 --
31 -- typedef struct bsunit { /* for building multiline strings */
32 -- Nlm_Handle str; /* the string piece */
33 -- Nlm_Int2 len_avail,
34 -- len;
35 -- struct bsunit PNTR next; } /* the next one */
36 -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
37 --
38 -- typedef struct bytestore {
39 -- Nlm_Int4 seekptr, /* current position */
40 -- totlen, /* total stored data length in bytes */
41 -- chain_offset; /* offset in ByteStore of first byte in curchain */
42 -- Nlm_BSUnitPtr chain, /* chain of elements */
43 -- curchain; /* the BSUnit containing seekptr */
44 -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
45 --
46 -- AsnTool incorporates this as a primitive type, so the definition
47 -- is here just for completeness
48 --
49 -- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
50 --
51
52 -- BigInt is really an INTEGER. It is used to warn the receiving code to expect
53 -- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
54 --
55 -- Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
56 -- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
57 --
58
59 -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
60 -- of ASN.1
61 -- It stores only a date
62 --
63
64 Date ::= CHOICE {
65 str VisibleString , -- for those unparsed dates
66 std Date-std } -- use this if you can
67
68 Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct
69 year INTEGER , -- full year (including 1900)
70 month INTEGER OPTIONAL , -- month (1-12)
71 day INTEGER OPTIONAL , -- day of month (1-31)
72 season VisibleString OPTIONAL , -- for "spring", "may-june", etc
73 hour INTEGER OPTIONAL , -- hour of day (0-23)
74 minute INTEGER OPTIONAL , -- minute of hour (0-59)
75 second INTEGER OPTIONAL } -- second of minute (0-59)
76
77 -- Dbtag is generalized for tagging
78 -- eg. { "Social Security", str "023-79-8841" }
79 -- or { "member", id 8882224 }
80
81 Dbtag ::= SEQUENCE {
82 db VisibleString , -- name of database or system
83 tag Object-id } -- appropriate tag
84
85 -- Object-id can tag or name anything
86 --
87
88 Object-id ::= CHOICE {
89 id INTEGER ,
90 str VisibleString }
91
92 -- Person-id is to define a std element for people
93 --
94
95 Person-id ::= CHOICE {
96 dbtag Dbtag , -- any defined database tag
97 name Name-std , -- structured name
98 ml VisibleString , -- MEDLINE name (semi-structured)
99 -- eg. "Jones RM"
100 str VisibleString, -- unstructured name
101 consortium VisibleString } -- consortium name
102
103 Name-std ::= SEQUENCE { -- Structured names
104 last VisibleString ,
105 first VisibleString OPTIONAL ,
106 middle VisibleString OPTIONAL ,
107 full VisibleString OPTIONAL , -- full name eg. "J. John Smith, Esq"
108 initials VisibleString OPTIONAL, -- first + middle initials
109 suffix VisibleString OPTIONAL , -- Jr, Sr, III
110 title VisibleString OPTIONAL } -- Dr., Sister, etc
111
112 --**** Int-fuzz **********************************************
113 --*
114 --* uncertainties in integer values
115
116 Int-fuzz ::= CHOICE {
117 p-m INTEGER , -- plus or minus fixed amount
118 range SEQUENCE { -- max to min
119 max INTEGER ,
120 min INTEGER } ,
121 pct INTEGER , -- % plus or minus (x10) 0-1000
122 lim ENUMERATED { -- some limit value
123 unk (0) , -- unknown
124 gt (1) , -- greater than
125 lt (2) , -- less than
126 tr (3) , -- space to right of position
127 tl (4) , -- space to left of position
128 circle (5) , -- artificial break at origin of circle
129 other (255) } , -- something else
130 alt SET OF INTEGER } -- set of alternatives for the integer
131
132
133 --**** User-object **********************************************
134 --*
135 --* a general object for a user defined structured data item
136 --* used by Seq-feat and Seq-descr
137
138 User-object ::= SEQUENCE {
139 class VisibleString OPTIONAL , -- endeavor which designed this object
140 type Object-id , -- type of object within class
141 data SEQUENCE OF User-field } -- the object itself
142
143 User-field ::= SEQUENCE {
144 label Object-id , -- field label
145 num INTEGER OPTIONAL , -- required for strs, ints, reals, oss
146 data CHOICE { -- field contents
147 str VisibleString ,
148 int INTEGER ,
149 real REAL ,
150 bool BOOLEAN ,
151 os OCTET STRING ,
152 object User-object , -- for using other definitions
153 strs SEQUENCE OF VisibleString ,
154 ints SEQUENCE OF INTEGER ,
155 reals SEQUENCE OF REAL ,
156 oss SEQUENCE OF OCTET STRING ,
157 fields SEQUENCE OF User-field ,
158 objects SEQUENCE OF User-object } }
159
160
161
162 END
163
164 --$Revision: 6.3 $
165 --****************************************************************
166 --
167 -- NCBI Bibliographic data elements
168 -- by James Ostell, 1990
169 --
170 -- Taken from the American National Standard for
171 -- Bibliographic References
172 -- ANSI Z39.29-1977
173 -- Version 3.0 - June 1994
174 -- PubMedId added in 1996
175 -- ArticleIds and eprint elements added in 1999
176 --
177 --****************************************************************
178
179 NCBI-Biblio DEFINITIONS ::=
180 BEGIN
181
182 EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
183 Cit-proc, Cit-sub, Title, Author, PubMedId, DOI;
184
185 IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
186
187 -- Article Ids
188
189 ArticleId ::= CHOICE { -- can be many ids for an article
190 pubmed PubMedId , -- see types below
191 medline MedlineUID ,
192 doi DOI ,
193 pii PII ,
194 pmcid PmcID ,
195 pmcpid PmcPid ,
196 pmpid PmPid ,
197 other Dbtag } -- generic catch all
198
199 PubMedId ::= INTEGER -- Id from the PubMed database at NCBI
200 MedlineUID ::= INTEGER -- Id from MEDLINE
201 DOI ::= VisibleString -- Document Object Identifier
202 PII ::= VisibleString -- Controlled Publisher Identifier
203 PmcID ::= INTEGER -- PubMed Central Id
204 PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central
205 PmPid ::= VisibleString -- Publisher Id supplied to PubMed
206
207 ArticleIdSet ::= SET OF ArticleId
208
209 -- Status Dates
210
211 PubStatus ::= INTEGER { -- points of publication
212 received (1) , -- date manuscript received for review
213 accepted (2) , -- accepted for publication
214 epublish (3) , -- published electronically by publisher
215 ppublish (4) , -- published in print by publisher
216 revised (5) , -- article revised by publisher/author
217 pmc (6) , -- article first appeared in PubMed Central
218 pmcr (7) , -- article revision in PubMed Central
219 pubmed (8) , -- article citation first appeared in PubMed
220 pubmedr (9) , -- article citation revision in PubMed
221 aheadofprint (10), -- epublish, but will be followed by print
222 premedline (11), -- date into PreMedline status
223 medline (12), -- date made a MEDLINE record
224 other (255) }
225
226 PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added
227 pubstatus PubStatus ,
228 date Date } -- time may be added later
229
230 PubStatusDateSet ::= SET OF PubStatusDate
231
232 -- Citation Types
233
234 Cit-art ::= SEQUENCE { -- article in journal or book
235 title Title OPTIONAL , -- title of paper (ANSI requires)
236 authors Auth-list OPTIONAL , -- authors (ANSI requires)
237 from CHOICE { -- journal or book
238 journal Cit-jour ,
239 book Cit-book ,
240 proc Cit-proc } ,
241 ids ArticleIdSet OPTIONAL } -- lots of ids
242
243 Cit-jour ::= SEQUENCE { -- Journal citation
244 title Title , -- title of journal
245 imp Imprint }
246
247 Cit-book ::= SEQUENCE { -- Book citation
248 title Title , -- Title of book
249 coll Title OPTIONAL , -- part of a collection
250 authors Auth-list, -- authors
251 imp Imprint }
252
253 Cit-proc ::= SEQUENCE { -- Meeting proceedings
254 book Cit-book , -- citation to meeting
255 meet Meeting } -- time and location of meeting
256
257 -- Patent number and date-issue were made optional in 1997 to
258 -- support patent applications being issued from the USPTO
259 -- Semantically a Cit-pat must have either a patent number or
260 -- an application number (or both) to be valid
261
262 Cit-pat ::= SEQUENCE { -- patent citation
263 title VisibleString ,
264 authors Auth-list, -- author/inventor
265 country VisibleString , -- Patent Document Country
266 doc-type VisibleString , -- Patent Document Type
267 number VisibleString OPTIONAL, -- Patent Document Number
268 date-issue Date OPTIONAL, -- Patent Issue/Pub Date
269 class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code
270 app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
271 app-date Date OPTIONAL , -- Patent Appl File Date
272 applicants Auth-list OPTIONAL , -- Applicants
273 assignees Auth-list OPTIONAL , -- Assignees
274 priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
275 abstract VisibleString OPTIONAL } -- abstract of patent
276
277 Patent-priority ::= SEQUENCE {
278 country VisibleString , -- Patent country code
279 number VisibleString , -- number assigned in that country
280 date Date } -- date of application
281
282 Id-pat ::= SEQUENCE { -- just to identify a patent
283 country VisibleString , -- Patent Document Country
284 id CHOICE {
285 number VisibleString , -- Patent Document Number
286 app-number VisibleString } , -- Patent Doc Appl Number
287 doc-type VisibleString OPTIONAL } -- Patent Doc Type
288
289 Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript
290 cit Cit-book , -- same fields as a book
291 man-id VisibleString OPTIONAL , -- Manuscript identifier
292 type ENUMERATED {
293 manuscript (1) ,
294 letter (2) ,
295 thesis (3) } OPTIONAL }
296 -- NOTE: this is just to cite a
297 -- direct data submission, see NCBI-Submit
298 -- for the form of a sequence submission
299 Cit-sub ::= SEQUENCE { -- citation for a direct submission
300 authors Auth-list , -- not necessarily authors of the paper
301 imp Imprint OPTIONAL , -- this only used to get date.. will go
302 medium ENUMERATED { -- medium of submission
303 paper (1) ,
304 tape (2) ,
305 floppy (3) ,
306 email (4) ,
307 other (255) } OPTIONAL ,
308 date Date OPTIONAL , -- replaces imp, will become required
309 descr VisibleString OPTIONAL } -- description of changes for public view
310
311 Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall
312 cit VisibleString OPTIONAL , -- anything, not parsable
313 authors Auth-list OPTIONAL ,
314 muid INTEGER OPTIONAL , -- medline uid
315 journal Title OPTIONAL ,
316 volume VisibleString OPTIONAL ,
317 issue VisibleString OPTIONAL ,
318 pages VisibleString OPTIONAL ,
319 date Date OPTIONAL ,
320 serial-number INTEGER OPTIONAL , -- for GenBank style references
321 title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title"
322 pmid PubMedId OPTIONAL } -- PubMed Id
323
324
325 -- Authorship Group
326 Auth-list ::= SEQUENCE {
327 names CHOICE {
328 std SEQUENCE OF Author , -- full citations
329 ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured
330 str SEQUENCE OF VisibleString } , -- free for all
331 affil Affil OPTIONAL } -- author affiliation
332
333 Author ::= SEQUENCE {
334 name Person-id , -- Author, Primary or Secondary
335 level ENUMERATED {
336 primary (1),
337 secondary (2) } OPTIONAL ,
338 role ENUMERATED { -- Author Role Indicator
339 compiler (1),
340 editor (2),
341 patent-assignee (3),
342 translator (4) } OPTIONAL ,
343 affil Affil OPTIONAL ,
344 is-corr BOOLEAN OPTIONAL } -- TRUE if corresponding author
345
346 Affil ::= CHOICE {
347 str VisibleString , -- unparsed string
348 std SEQUENCE { -- std representation
349 affil VisibleString OPTIONAL , -- Author Affiliation, Name
350 div VisibleString OPTIONAL , -- Author Affiliation, Division
351 city VisibleString OPTIONAL , -- Author Affiliation, City
352 sub VisibleString OPTIONAL , -- Author Affiliation, County Sub
353 country VisibleString OPTIONAL , -- Author Affiliation, Country
354 street VisibleString OPTIONAL , -- street address, not ANSI
355 email VisibleString OPTIONAL ,
356 fax VisibleString OPTIONAL ,
357 phone VisibleString OPTIONAL ,
358 postal-code VisibleString OPTIONAL }}
359
360 -- Title Group
361 -- Valid for = A = Analytic (Cit-art)
362 -- J = Journals (Cit-jour)
363 -- B = Book (Cit-book)
364 -- Valid for:
365 Title ::= SET OF CHOICE {
366 name VisibleString , -- Title, Anal,Coll,Mono AJB
367 tsub VisibleString , -- Title, Subordinate A B
368 trans VisibleString , -- Title, Translated AJB
369 jta VisibleString , -- Title, Abbreviated J
370 iso-jta VisibleString , -- specifically ISO jta J
371 ml-jta VisibleString , -- specifically MEDLINE jta J
372 coden VisibleString , -- a coden J
373 issn VisibleString , -- ISSN J
374 abr VisibleString , -- Title, Abbreviated B
375 isbn VisibleString } -- ISBN B
376
377 Imprint ::= SEQUENCE { -- Imprint group
378 date Date , -- date of publication
379 volume VisibleString OPTIONAL ,
380 issue VisibleString OPTIONAL ,
381 pages VisibleString OPTIONAL ,
382 section VisibleString OPTIONAL ,
383 pub Affil OPTIONAL, -- publisher, required for book
384 cprt Date OPTIONAL, -- copyright date, " " "
385 part-sup VisibleString OPTIONAL , -- part/sup of volume
386 language VisibleString DEFAULT "ENG" , -- put here for simplicity
387 prepub ENUMERATED { -- for prepublication citations
388 submitted (1) , -- submitted, not accepted
389 in-press (2) , -- accepted, not published
390 other (255) } OPTIONAL ,
391 part-supi VisibleString OPTIONAL , -- part/sup on issue
392 retract CitRetract OPTIONAL , -- retraction info
393 pubstatus PubStatus OPTIONAL , -- current status of this publication
394 history PubStatusDateSet OPTIONAL } -- dates for this record
395
396 CitRetract ::= SEQUENCE {
397 type ENUMERATED { -- retraction of an entry
398 retracted (1) , -- this citation retracted
399 notice (2) , -- this citation is a retraction notice
400 in-error (3) , -- an erratum was published about this
401 erratum (4) } , -- this is a published erratum
402 exp VisibleString OPTIONAL } -- citation and/or explanation
403
404 Meeting ::= SEQUENCE {
405 number VisibleString ,
406 date Date ,
407 place Affil OPTIONAL }
408
409
410 END
411
412
413 --$Revision: 6.0 $
414 --**********************************************************************
415 --
416 -- MEDLINE data definitions
417 -- James Ostell, 1990
418 --
419 -- enhanced in 1996 to support PubMed records as well by simply adding
420 -- the PubMedId and making MedlineId optional
421 --
422 --**********************************************************************
423
424 NCBI-Medline DEFINITIONS ::=
425 BEGIN
426
427 EXPORTS Medline-entry, Medline-si;
428
429 IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
430 Date FROM NCBI-General;
431
432 -- a MEDLINE or PubMed entry
433 Medline-entry ::= SEQUENCE {
434 uid INTEGER OPTIONAL , -- MEDLINE UID, sometimes not yet available if from PubMed
435 em Date , -- Entry Month
436 cit Cit-art , -- article citation
437 abstract VisibleString OPTIONAL ,
438 mesh SET OF Medline-mesh OPTIONAL ,
439 substance SET OF Medline-rn OPTIONAL ,
440 xref SET OF Medline-si OPTIONAL ,
441 idnum SET OF VisibleString OPTIONAL , -- ID Number (grants, contracts)
442 gene SET OF VisibleString OPTIONAL ,
443 pmid PubMedId OPTIONAL , -- MEDLINE records may include the PubMedId
444 pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
445 mlfield SET OF Medline-field OPTIONAL , -- additional Medline field types
446 status INTEGER {
447 publisher (1) , -- record as supplied by publisher
448 premedline (2) , -- premedline record
449 medline (3) } DEFAULT medline } -- regular medline record
450
451 Medline-mesh ::= SEQUENCE {
452 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point (*)
453 term VisibleString , -- the MeSH term
454 qual SET OF Medline-qual OPTIONAL } -- qualifiers
455
456 Medline-qual ::= SEQUENCE {
457 mp BOOLEAN DEFAULT FALSE , -- TRUE if main point
458 subh VisibleString } -- the subheading
459
460 Medline-rn ::= SEQUENCE { -- medline substance records
461 type ENUMERATED { -- type of record
462 nameonly (0) ,
463 cas (1) , -- CAS number
464 ec (2) } , -- EC number
465 cit VisibleString OPTIONAL , -- CAS or EC number if present
466 name VisibleString } -- name (always present)
467
468 Medline-si ::= SEQUENCE { -- medline cross reference records
469 type ENUMERATED { -- type of xref
470 ddbj (1) , -- DNA Data Bank of Japan
471 carbbank (2) , -- Carbohydrate Structure Database
472 embl (3) , -- EMBL Data Library
473 hdb (4) , -- Hybridoma Data Bank
474 genbank (5) , -- GenBank
475 hgml (6) , -- Human Gene Map Library
476 mim (7) , -- Mendelian Inheritance in Man
477 msd (8) , -- Microbial Strains Database
478 pdb (9) , -- Protein Data Bank (Brookhaven)
479 pir (10) , -- Protein Identification Resource
480 prfseqdb (11) , -- Protein Research Foundation (Japan)
481 psd (12) , -- Protein Sequence Database (Japan)
482 swissprot (13) , -- SwissProt
483 gdb (14) } , -- Genome Data Base
484 cit VisibleString OPTIONAL } -- the citation/accession number
485
486 Medline-field ::= SEQUENCE {
487 type INTEGER { -- Keyed type
488 other (0) , -- look in line code
489 comment (1) , -- comment line
490 erratum (2) } , -- retracted, corrected, etc
491 str VisibleString , -- the text
492 ids SEQUENCE OF DocRef OPTIONAL } -- pointers relevant to this text
493
494 DocRef ::= SEQUENCE { -- reference to a document
495 type INTEGER {
496 medline (1) ,
497 pubmed (2) ,
498 ncbigi (3) } ,
499 uid INTEGER }
500
501 END
502
503 --$Revision: 6.0 $
504 --**********************************************************************
505 --
506 -- PUBMED data definitions
507 --
508 --**********************************************************************
509
510 NCBI-PubMed DEFINITIONS ::=
511 BEGIN
512
513 EXPORTS Pubmed-entry, Pubmed-url;
514
515 IMPORTS PubMedId FROM NCBI-Biblio
516 Medline-entry FROM NCBI-Medline;
517
518 Pubmed-entry ::= SEQUENCE { -- a PubMed entry
519 -- PUBMED records must include the PubMedId
520 pmid PubMedId,
521
522 -- Medline entry information
523 medent Medline-entry OPTIONAL,
524
525 -- Publisher name
526 publisher VisibleString OPTIONAL,
527
528 -- List of URL to publisher cite
529 urls SET OF Pubmed-url OPTIONAL,
530
531 -- Publisher's article identifier
532 pubid VisibleString OPTIONAL
533 }
534
535 Pubmed-url ::= SEQUENCE {
536 location VisibleString OPTIONAL, -- Location code
537 url VisibleString -- Selected URL for location
538 }
539
540 END
541 --$Revision: 6.0 $
542 --**********************************************************************
543 --
544 -- MEDLARS data definitions
545 -- Grigoriy Starchenko, 1997
546 --
547 --**********************************************************************
548
549 NCBI-Medlars DEFINITIONS ::=
550 BEGIN
551
552 EXPORTS Medlars-entry, Medlars-record;
553
554 IMPORTS PubMedId FROM NCBI-Biblio;
555
556 Medlars-entry ::= SEQUENCE { -- a MEDLARS entry
557 pmid PubMedId, -- All entries in PubMed must have it
558 muid INTEGER OPTIONAL, -- Medline(OCCS) id
559 recs SET OF Medlars-record -- List of Medlars records
560 }
561
562 Medlars-record ::= SEQUENCE {
563 code INTEGER, -- Unit record field type integer form
564 abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
565 data VisibleString -- Unit record data
566 }
567
568 END
569 --$Revision: 6.0 $
570 --********************************************************************
571 --
572 -- Publication common set
573 -- James Ostell, 1990
574 --
575 -- This is the base class definitions for Publications of all sorts
576 --
577 -- support for PubMedId added in 1996
578 --********************************************************************
579
580 NCBI-Pub DEFINITIONS ::=
581 BEGIN
582
583 EXPORTS Pub, Pub-set, Pub-equiv;
584
585 IMPORTS Medline-entry FROM NCBI-Medline
586 Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
587 Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
588
589 Pub ::= CHOICE {
590 gen Cit-gen , -- general or generic unparsed
591 sub Cit-sub , -- submission
592 medline Medline-entry ,
593 muid INTEGER , -- medline uid
594 article Cit-art ,
595 journal Cit-jour ,
596 book Cit-book ,
597 proc Cit-proc , -- proceedings of a meeting
598 patent Cit-pat ,
599 pat-id Id-pat , -- identify a patent
600 man Cit-let , -- manuscript, thesis, or letter
601 equiv Pub-equiv, -- to cite a variety of ways
602 pmid PubMedId } -- PubMedId
603
604 Pub-equiv ::= SET OF Pub -- equivalent identifiers for same citation
605
606 Pub-set ::= CHOICE {
607 pub SET OF Pub ,
608 medline SET OF Medline-entry ,
609 article SET OF Cit-art ,
610 journal SET OF Cit-jour ,
611 book SET OF Cit-book ,
612 proc SET OF Cit-proc , -- proceedings of a meeting
613 patent SET OF Cit-pat }
614
615 END
616
617 --$Revision: 6.5 $
618 --**********************************************************************
619 --
620 -- NCBI Sequence location and identifier elements
621 -- by James Ostell, 1990
622 --
623 -- Version 3.0 - 1994
624 --
625 --**********************************************************************
626
627 NCBI-Seqloc DEFINITIONS ::=
628 BEGIN
629
630 EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
631 Na-strand, Giimport-id;
632
633 IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
634 Id-pat FROM NCBI-Biblio
635 Feat-id FROM NCBI-Seqfeat;
636
637 --*** Sequence identifiers ********************************
638 --*
639
640 Seq-id ::= CHOICE {
641 local Object-id , -- local use
642 gibbsq INTEGER , -- Geninfo backbone seqid
643 gibbmt INTEGER , -- Geninfo backbone moltype
644 giim Giimport-id , -- Geninfo import id
645 genbank Textseq-id ,
646 embl Textseq-id ,
647 pir Textseq-id ,
648 swissprot Textseq-id ,
649 patent Patent-seq-id ,
650 other Textseq-id , -- for historical reasons, 'other' = 'refseq'
651 general Dbtag , -- for other databases
652 gi INTEGER , -- GenInfo Integrated Database
653 ddbj Textseq-id , -- DDBJ
654 prf Textseq-id , -- PRF SEQDB
655 pdb PDB-seq-id , -- PDB sequence
656 tpg Textseq-id , -- Third Party Annot/Seq Genbank
657 tpe Textseq-id , -- Third Party Annot/Seq EMBL
658 tpd Textseq-id , -- Third Party Annot/Seq DDBJ
659 gpipe Textseq-id , -- Internal NCBI genome pipeline processing ID
660 named-annot-track Textseq-id -- Internal named annotation tracking ID
661 }
662
663 Seq-id-set ::= SET OF Seq-id
664
665
666 Patent-seq-id ::= SEQUENCE {
667 seqid INTEGER , -- number of sequence in patent
668 cit Id-pat } -- patent citation
669
670 Textseq-id ::= SEQUENCE {
671 name VisibleString OPTIONAL ,
672 accession VisibleString OPTIONAL ,
673 release VisibleString OPTIONAL ,
674 version INTEGER OPTIONAL }
675
676 Giimport-id ::= SEQUENCE {
677 id INTEGER , -- the id to use here
678 db VisibleString OPTIONAL , -- dbase used in
679 release VisibleString OPTIONAL } -- the release
680
681 PDB-seq-id ::= SEQUENCE {
682 mol PDB-mol-id , -- the molecule name
683 chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
684 rel Date OPTIONAL } -- release date, month and year
685
686 PDB-mol-id ::= VisibleString -- name of mol, 4 chars
687
688 --*** Sequence locations **********************************
689 --*
690
691 Seq-loc ::= CHOICE {
692 null NULL , -- not placed
693 empty Seq-id , -- to NULL one Seq-id in a collection
694 whole Seq-id , -- whole sequence
695 int Seq-interval , -- from to
696 packed-int Packed-seqint ,
697 pnt Seq-point ,
698 packed-pnt Packed-seqpnt ,
699 mix Seq-loc-mix ,
700 equiv Seq-loc-equiv , -- equivalent sets of locations
701 bond Seq-bond ,
702 feat Feat-id } -- indirect, through a Seq-feat
703
704
705 Seq-interval ::= SEQUENCE {
706 from INTEGER ,
707 to INTEGER ,
708 strand Na-strand OPTIONAL ,
709 id Seq-id , -- WARNING: this used to be optional
710 fuzz-from Int-fuzz OPTIONAL ,
711 fuzz-to Int-fuzz OPTIONAL }
712
713 Packed-seqint ::= SEQUENCE OF Seq-interval
714
715 Seq-point ::= SEQUENCE {
716 point INTEGER ,
717 strand Na-strand OPTIONAL ,
718 id Seq-id , -- WARNING: this used to be optional
719 fuzz Int-fuzz OPTIONAL }
720
721 Packed-seqpnt ::= SEQUENCE {
722 strand Na-strand OPTIONAL ,
723 id Seq-id ,
724 fuzz Int-fuzz OPTIONAL ,
725 points SEQUENCE OF INTEGER }
726
727 Na-strand ::= ENUMERATED { -- strand of nucleic acid
728 unknown (0) ,
729 plus (1) ,
730 minus (2) ,
731 both (3) , -- in forward orientation
732 both-rev (4) , -- in reverse orientation
733 other (255) }
734
735 Seq-bond ::= SEQUENCE { -- bond between residues
736 a Seq-point , -- connection to a least one residue
737 b Seq-point OPTIONAL } -- other end may not be available
738
739 Seq-loc-mix ::= SEQUENCE OF Seq-loc -- this will hold anything
740
741 Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations
742
743 END
744
745
746 --$Revision: 6.24 $
747 --**********************************************************************
748 --
749 -- NCBI Sequence elements
750 -- by James Ostell, 1990
751 -- Version 3.0 - June 1994
752 --
753 --**********************************************************************
754
755 NCBI-Sequence DEFINITIONS ::=
756 BEGIN
757
758 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
759 Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
760 Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;
761
762 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
763 Seq-align FROM NCBI-Seqalign
764 Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
765 Seq-graph FROM NCBI-Seqres
766 Pub-equiv FROM NCBI-Pub
767 Org-ref FROM NCBI-Organism
768 BioSource FROM NCBI-BioSource
769 Seq-id, Seq-loc FROM NCBI-Seqloc
770 GB-block FROM GenBank-General
771 PIR-block FROM PIR-General
772 EMBL-block FROM EMBL-General
773 SP-block FROM SP-General
774 PRF-block FROM PRF-General
775 PDB-block FROM PDB-General
776 Seq-table FROM NCBI-SeqTable;
777
778 --*** Sequence ********************************
779 --*
780
781 Bioseq ::= SEQUENCE {
782 id SET OF Seq-id , -- equivalent identifiers
783 descr Seq-descr OPTIONAL , -- descriptors
784 inst Seq-inst , -- the sequence data
785 annot SET OF Seq-annot OPTIONAL }
786
787 --*** Descriptors *****************************
788 --*
789
790 Seq-descr ::= SET OF Seqdesc
791
792 Seqdesc ::= CHOICE {
793 mol-type GIBB-mol , -- type of molecule
794 modif SET OF GIBB-mod , -- modifiers
795 method GIBB-method , -- sequencing method
796 name VisibleString , -- a name for this sequence
797 title VisibleString , -- a title for this sequence
798 org Org-ref , -- if all from one organism
799 comment VisibleString , -- a more extensive comment
800 num Numbering , -- a numbering system
801 maploc Dbtag , -- map location of this sequence
802 pir PIR-block , -- PIR specific info
803 genbank GB-block , -- GenBank specific info
804 pub Pubdesc , -- a reference to the publication
805 region VisibleString , -- overall region (globin locus)
806 user User-object , -- user defined object
807 sp SP-block , -- SWISSPROT specific info
808 dbxref Dbtag , -- xref to other databases
809 embl EMBL-block , -- EMBL specific information
810 create-date Date , -- date entry first created/released
811 update-date Date , -- date of last update
812 prf PRF-block , -- PRF specific information
813 pdb PDB-block , -- PDB specific information
814 het Heterogen , -- cofactor, etc associated but not bound
815 source BioSource , -- source of materials, includes Org-ref
816 molinfo MolInfo , -- info on the molecule and techniques
817 modelev ModelEvidenceSupport -- model evidence for XM records
818 }
819
820 --******* NOTE:
821 --* mol-type, modif, method, and org are consolidated and expanded
822 --* in Org-ref, BioSource, and MolInfo in this specification. They
823 --* will be removed in later specifications. Do not use them in the
824 --* the future. Instead expect the new structures.
825 --*
826 --***************************
827
828 --********************************************************************
829 --
830 -- MolInfo gives information on the
831 -- classification of the type and quality of the sequence
832 --
833 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
834 --
835 --********************************************************************
836
837 MolInfo ::= SEQUENCE {
838 biomol INTEGER {
839 unknown (0) ,
840 genomic (1) ,
841 pre-RNA (2) , -- precursor RNA of any sort really
842 mRNA (3) ,
843 rRNA (4) ,
844 tRNA (5) ,
845 snRNA (6) ,
846 scRNA (7) ,
847 peptide (8) ,
848 other-genetic (9) , -- other genetic material
849 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
850 cRNA (11) , -- viral RNA genome copy intermediate
851 snoRNA (12) , -- small nucleolar RNA
852 transcribed-RNA (13) , -- transcribed RNA other than existing classes
853 ncRNA (14) ,
854 tmRNA (15) ,
855 other (255) } DEFAULT unknown ,
856 tech INTEGER {
857 unknown (0) ,
858 standard (1) , -- standard sequencing
859 est (2) , -- Expressed Sequence Tag
860 sts (3) , -- Sequence Tagged Site
861 survey (4) , -- one-pass genomic sequence
862 genemap (5) , -- from genetic mapping techniques
863 physmap (6) , -- from physical mapping techniques
864 derived (7) , -- derived from other data, not a primary entity
865 concept-trans (8) , -- conceptual translation
866 seq-pept (9) , -- peptide was sequenced
867 both (10) , -- concept transl. w/ partial pept. seq.
868 seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
869 seq-pept-homol (12) , -- sequenced peptide, ordered by homology
870 concept-trans-a (13) , -- conceptual transl. supplied by author
871 htgs-1 (14) , -- unordered High Throughput sequence contig
872 htgs-2 (15) , -- ordered High Throughput sequence contig
873 htgs-3 (16) , -- finished High Throughput sequence
874 fli-cdna (17) , -- full length insert cDNA
875 htgs-0 (18) , -- single genomic reads for coordination
876 htc (19) , -- high throughput cDNA
877 wgs (20) , -- whole genome shotgun sequencing
878 barcode (21) , -- barcode of life project
879 composite-wgs-htgs (22) , -- composite of WGS and HTGS
880 tsa (23) , -- transcriptome shotgun assembly
881 other (255) } -- use Source.techexp
882 DEFAULT unknown ,
883 techexp VisibleString OPTIONAL , -- explanation if tech not enough
884 --
885 -- Completeness is not indicated in most records. For genomes, assume
886 -- the sequences are incomplete unless specifically marked as complete.
887 -- For mRNAs, assume the ends are not known exactly unless marked as
888 -- having the left or right end.
889 --
890 completeness INTEGER {
891 unknown (0) ,
892 complete (1) , -- complete biological entity
893 partial (2) , -- partial but no details given
894 no-left (3) , -- missing 5' or NH3 end
895 no-right (4) , -- missing 3' or COOH end
896 no-ends (5) , -- missing both ends
897 has-left (6) , -- 5' or NH3 end present
898 has-right (7) , -- 3' or COOH end present
899 other (255) } DEFAULT unknown ,
900 gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
901
902
903 GIBB-mol ::= ENUMERATED { -- type of molecule represented
904 unknown (0) ,
905 genomic (1) ,
906 pre-mRNA (2) , -- precursor RNA of any sort really
907 mRNA (3) ,
908 rRNA (4) ,
909 tRNA (5) ,
910 snRNA (6) ,
911 scRNA (7) ,
912 peptide (8) ,
913 other-genetic (9) , -- other genetic material
914 genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence
915 other (255) }
916
917 GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers
918 dna (0) ,
919 rna (1) ,
920 extrachrom (2) ,
921 plasmid (3) ,
922 mitochondrial (4) ,
923 chloroplast (5) ,
924 kinetoplast (6) ,
925 cyanelle (7) ,
926 synthetic (8) ,
927 recombinant (9) ,
928 partial (10) ,
929 complete (11) ,
930 mutagen (12) , -- subject of mutagenesis ?
931 natmut (13) , -- natural mutant ?
932 transposon (14) ,
933 insertion-seq (15) ,
934 no-left (16) , -- missing left end (5' for na, NH2 for aa)
935 no-right (17) , -- missing right end (3' or COOH)
936 macronuclear (18) ,
937 proviral (19) ,
938 est (20) , -- expressed sequence tag
939 sts (21) , -- sequence tagged site
940 survey (22) , -- one pass survey sequence
941 chromoplast (23) ,
942 genemap (24) , -- is a genetic map
943 restmap (25) , -- is an ordered restriction map
944 physmap (26) , -- is a physical map (not ordered restriction map)
945 other (255) }
946
947 GIBB-method ::= ENUMERATED { -- sequencing methods
948 concept-trans (1) , -- conceptual translation
949 seq-pept (2) , -- peptide was sequenced
950 both (3) , -- concept transl. w/ partial pept. seq.
951 seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
952 seq-pept-homol (5) , -- sequenced peptide, ordered by homology
953 concept-trans-a (6) , -- conceptual transl. supplied by author
954 other (255) }
955
956 Numbering ::= CHOICE { -- any display numbering system
957 cont Num-cont , -- continuous numbering
958 enum Num-enum , -- enumerated names for residues
959 ref Num-ref , -- by reference to another sequence
960 real Num-real } -- supports mapping to a float system
961
962 Num-cont ::= SEQUENCE { -- continuous display numbering system
963 refnum INTEGER DEFAULT 1, -- number assigned to first residue
964 has-zero BOOLEAN DEFAULT FALSE , -- 0 used?
965 ascending BOOLEAN DEFAULT TRUE } -- ascending numbers?
966
967 Num-enum ::= SEQUENCE { -- any tags to residues
968 num INTEGER , -- number of tags to follow
969 names SEQUENCE OF VisibleString } -- the tags
970
971 Num-ref ::= SEQUENCE { -- by reference to other sequences
972 type ENUMERATED { -- type of reference
973 not-set (0) ,
974 sources (1) , -- by segmented or const seq sources
975 aligns (2) } , -- by alignments given below
976 aligns Seq-align OPTIONAL }
977
978 Num-real ::= SEQUENCE { -- mapping to floating point system
979 a REAL , -- from an integer system used by Bioseq
980 b REAL , -- position = (a * int_position) + b
981 units VisibleString OPTIONAL }
982
983 Pubdesc ::= SEQUENCE { -- how sequence presented in pub
984 pub Pub-equiv , -- the citation(s)
985 name VisibleString OPTIONAL , -- name used in paper
986 fig VisibleString OPTIONAL , -- figure in paper
987 num Numbering OPTIONAL , -- numbering from paper
988 numexc BOOLEAN OPTIONAL , -- numbering problem with paper
989 poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure?
990 maploc VisibleString OPTIONAL , -- map location reported in paper
991 seq-raw StringStore OPTIONAL , -- original sequence from paper
992 align-group INTEGER OPTIONAL , -- this seq aligned with others in paper
993 comment VisibleString OPTIONAL, -- any comment on this pub in context
994 reftype INTEGER { -- type of reference in a GenBank record
995 seq (0) , -- refers to sequence
996 sites (1) , -- refers to unspecified features
997 feats (2) , -- refers to specified features
998 no-target (3) } -- nothing specified (EMBL)
999 DEFAULT seq }
1000
1001 Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc
1002
1003 --*** Instances of sequences *******************************
1004 --*
1005
1006 Seq-inst ::= SEQUENCE { -- the sequence data itself
1007 repr ENUMERATED { -- representation class
1008 not-set (0) , -- empty
1009 virtual (1) , -- no seq data
1010 raw (2) , -- continuous sequence
1011 seg (3) , -- segmented sequence
1012 const (4) , -- constructed sequence
1013 ref (5) , -- reference to another sequence
1014 consen (6) , -- consensus sequence or pattern
1015 map (7) , -- ordered map of any kind
1016 delta (8) , -- sequence made by changes (delta) to others
1017 other (255) } ,
1018 mol ENUMERATED { -- molecule class in living organism
1019 not-set (0) , -- > cdna = rna
1020 dna (1) ,
1021 rna (2) ,
1022 aa (3) ,
1023 na (4) , -- just a nucleic acid
1024 other (255) } ,
1025 length INTEGER OPTIONAL , -- length of sequence in residues
1026 fuzz Int-fuzz OPTIONAL , -- length uncertainty
1027 topology ENUMERATED { -- topology of molecule
1028 not-set (0) ,
1029 linear (1) ,
1030 circular (2) ,
1031 tandem (3) , -- some part of tandem repeat
1032 other (255) } DEFAULT linear ,
1033 strand ENUMERATED { -- strandedness in living organism
1034 not-set (0) ,
1035 ss (1) , -- single strand
1036 ds (2) , -- double strand
1037 mixed (3) ,
1038 other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept
1039 seq-data Seq-data OPTIONAL , -- the sequence
1040 ext Seq-ext OPTIONAL , -- extensions for special types
1041 hist Seq-hist OPTIONAL } -- sequence history
1042
1043 --*** Sequence Extensions **********************************
1044 --* for representing more complex types
1045 --* const type uses Seq-hist.assembly
1046
1047 Seq-ext ::= CHOICE {
1048 seg Seg-ext , -- segmented sequences
1049 ref Ref-ext , -- hot link to another sequence (a view)
1050 map Map-ext , -- ordered map of markers
1051 delta Delta-ext }
1052
1053 Seg-ext ::= SEQUENCE OF Seq-loc
1054
1055 Ref-ext ::= Seq-loc
1056
1057 Map-ext ::= SEQUENCE OF Seq-feat
1058
1059 Delta-ext ::= SEQUENCE OF Delta-seq
1060
1061 Delta-seq ::= CHOICE {
1062 loc Seq-loc , -- point to a sequence
1063 literal Seq-literal } -- a piece of sequence
1064
1065 Seq-literal ::= SEQUENCE {
1066 length INTEGER , -- must give a length in residues
1067 fuzz Int-fuzz OPTIONAL , -- could be unsure
1068 seq-data Seq-data OPTIONAL } -- may have the data
1069
1070 --*** Sequence History Record ***********************************
1071 --** assembly = records how seq was assembled from others
1072 --** replaces = records sequences made obsolete by this one
1073 --** replaced-by = this seq is made obsolete by another(s)
1074
1075 Seq-hist ::= SEQUENCE {
1076 assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
1077 replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete
1078 replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
1079 deleted CHOICE {
1080 bool BOOLEAN ,
1081 date Date } OPTIONAL }
1082
1083 Seq-hist-rec ::= SEQUENCE {
1084 date Date OPTIONAL ,
1085 ids SET OF Seq-id }
1086
1087 --*** Various internal sequence representations ************
1088 --* all are controlled, fixed length forms
1089
1090 Seq-data ::= CHOICE { -- sequence representations
1091 iupacna IUPACna , -- IUPAC 1 letter nuc acid code
1092 iupacaa IUPACaa , -- IUPAC 1 letter amino acid code
1093 ncbi2na NCBI2na , -- 2 bit nucleic acid code
1094 ncbi4na NCBI4na , -- 4 bit nucleic acid code
1095 ncbi8na NCBI8na , -- 8 bit extended nucleic acid code
1096 ncbipna NCBIpna , -- nucleic acid probabilities
1097 ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes
1098 ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes
1099 ncbipaa NCBIpaa , -- amino acid probabilities
1100 ncbistdaa NCBIstdaa, -- consecutive codes for std aas
1101 gap Seq-gap -- gap types
1102 }
1103
1104 Seq-gap ::= SEQUENCE {
1105 type INTEGER {
1106 unknown(0),
1107 fragment(1), -- Deprecated. Used only for AGP 1.1
1108 clone(2), -- Deprecated. Used only for AGP 1.1
1109 short-arm(3),
1110 heterochromatin(4),
1111 centromere(5),
1112 telomere(6),
1113 repeat(7),
1114 contig(8),
1115 scaffold(9),
1116 other(255)
1117 },
1118 linkage INTEGER {
1119 unlinked(0),
1120 linked(1),
1121 other(255)
1122 } OPTIONAL,
1123 linkage-evidence SET OF Linkage-evidence OPTIONAL
1124 }
1125
1126 Linkage-evidence ::= SEQUENCE {
1127 type INTEGER {
1128 paired-ends(0),
1129 align-genus(1),
1130 align-xgenus(2),
1131 align-trnscpt(3),
1132 within-clone(4),
1133 clone-contig(5),
1134 map(6),
1135 strobe(7),
1136 unspecified(8),
1137 other(255)
1138 }
1139 }
1140
1141 IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces
1142 IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces
1143 NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T
1144 NCBI4na ::= OCTET STRING -- 1 bit each for agct
1145 -- 0001=A, 0010=C, 0100=G, 1000=T/U
1146 -- 0101=Purine, 1010=Pyrimidine, etc
1147 NCBI8na ::= OCTET STRING -- for modified nucleic acids
1148 NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n
1149 -- probabilities are coded 0-255 = 0.0-1.0
1150 NCBI8aa ::= OCTET STRING -- for modified amino acids
1151 NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes
1152 -- IUPAC codes + U=selenocysteine
1153 NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order:
1154 -- A-Y,B,Z,X,(ter),anything
1155 -- probabilities are coded 0-255 = 0.0-1.0
1156 NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte
1157
1158 --*** Sequence Annotation *************************************
1159 --*
1160
1161 -- This is a replica of Textseq-id
1162 -- This is specific for annotations, and exists to maintain a semantic
1163 -- difference between IDs assigned to annotations and IDs assigned to
1164 -- sequences
1165 Textannot-id ::= SEQUENCE {
1166 name VisibleString OPTIONAL ,
1167 accession VisibleString OPTIONAL ,
1168 release VisibleString OPTIONAL ,
1169 version INTEGER OPTIONAL
1170 }
1171
1172 Annot-id ::= CHOICE {
1173 local Object-id ,
1174 ncbi INTEGER ,
1175 general Dbtag,
1176 other Textannot-id
1177 }
1178
1179 Annot-descr ::= SET OF Annotdesc
1180
1181 Annotdesc ::= CHOICE {
1182 name VisibleString , -- a short name for this collection
1183 title VisibleString , -- a title for this collection
1184 comment VisibleString , -- a more extensive comment
1185 pub Pubdesc , -- a reference to the publication
1186 user User-object , -- user defined object
1187 create-date Date , -- date entry first created/released
1188 update-date Date , -- date of last update
1189 src Seq-id , -- source sequence from which annot came
1190 align Align-def, -- definition of the SeqAligns
1191 region Seq-loc } -- all contents cover this region
1192
1193 Align-def ::= SEQUENCE {
1194 align-type INTEGER { -- class of align Seq-annot
1195 ref (1) , -- set of alignments to the same sequence
1196 alt (2) , -- set of alternate alignments of the same seqs
1197 blocks (3) , -- set of aligned blocks in the same seqs
1198 other (255) } ,
1199 ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
1200
1201 Seq-annot ::= SEQUENCE {
1202 id SET OF Annot-id OPTIONAL ,
1203 db INTEGER { -- source of annotation
1204 genbank (1) ,
1205 embl (2) ,
1206 ddbj (3) ,
1207 pir (4) ,
1208 sp (5) ,
1209 bbone (6) ,
1210 pdb (7) ,
1211 other (255) } OPTIONAL ,
1212 name VisibleString OPTIONAL ,-- source if "other" above
1213 desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots
1214 data CHOICE {
1215 ftable SET OF Seq-feat ,
1216 align SET OF Seq-align ,
1217 graph SET OF Seq-graph ,
1218 ids SET OF Seq-id , -- used for communication between tools
1219 locs SET OF Seq-loc , -- used for communication between tools
1220 seq-table Seq-table } } -- features in table form
1221
1222 END
1223
1224
1225 --$Revision: 6.6 $
1226 --**********************************************************************
1227 --
1228 -- NCBI Sequence Collections
1229 -- by James Ostell, 1990
1230 --
1231 -- Version 3.0 - 1994
1232 --
1233 --**********************************************************************
1234
1235 NCBI-Seqset DEFINITIONS ::=
1236 BEGIN
1237
1238 EXPORTS Bioseq-set, Seq-entry;
1239
1240 IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1241 Object-id, Dbtag, Date FROM NCBI-General;
1242
1243 --*** Sequence Collections ********************************
1244 --*
1245
1246 Bioseq-set ::= SEQUENCE { -- just a collection
1247 id Object-id OPTIONAL ,
1248 coll Dbtag OPTIONAL , -- to identify a collection
1249 level INTEGER OPTIONAL , -- nesting level
1250 class ENUMERATED {
1251 not-set (0) ,
1252 nuc-prot (1) , -- nuc acid and coded proteins
1253 segset (2) , -- segmented sequence + parts
1254 conset (3) , -- constructed sequence + parts
1255 parts (4) , -- parts for 2 or 3
1256 gibb (5) , -- geninfo backbone
1257 gi (6) , -- geninfo
1258 genbank (7) , -- converted genbank
1259 pir (8) , -- converted pir
1260 pub-set (9) , -- all the seqs from a single publication
1261 equiv (10) , -- a set of equivalent maps or seqs
1262 swissprot (11) , -- converted SWISSPROT
1263 pdb-entry (12) , -- a complete PDB entry
1264 mut-set (13) , -- set of mutations
1265 pop-set (14) , -- population study
1266 phy-set (15) , -- phylogenetic study
1267 eco-set (16) , -- ecological sample study
1268 gen-prod-set (17) , -- genomic products, chrom+mRNA+protein
1269 wgs-set (18) , -- whole genome shotgun project
1270 named-annot (19) , -- named annotation set
1271 named-annot-prod (20) , -- with instantiated mRNA+protein
1272 read-set (21) , -- set from a single read
1273 paired-end-reads (22) , -- paired sequences within a read-set
1274 small-genome-set (23) , -- viral segments or mitochondrial minicircles
1275 other (255) } DEFAULT not-set ,
1276 release VisibleString OPTIONAL ,
1277 date Date OPTIONAL ,
1278 descr Seq-descr OPTIONAL ,
1279 seq-set SEQUENCE OF Seq-entry ,
1280 annot SET OF Seq-annot OPTIONAL }
1281
1282 Seq-entry ::= CHOICE {
1283 seq Bioseq ,
1284 set Bioseq-set }
1285
1286 END
1287
1288 --$Revision: 6.0 $
1289 -- *********************************************************************
1290 --
1291 -- These are code and conversion tables for NCBI sequence codes
1292 -- ASN.1 for the sequences themselves are define in seq.asn
1293 --
1294 -- Seq-map-table and Seq-code-table REQUIRE that codes start with 0
1295 -- and increase continuously. So IUPAC codes, which are upper case
1296 -- letters will always have 65 0 cells before the codes begin. This
1297 -- allows all codes to do indexed lookups for things
1298 --
1299 -- Valid names for code tables are:
1300 -- IUPACna
1301 -- IUPACaa
1302 -- IUPACeaa
1303 -- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa
1304 -- display only, not a data exchange type
1305 -- NCBI2na
1306 -- NCBI4na
1307 -- NCBI8na
1308 -- NCBI8aa
1309 -- NCBIstdaa
1310 -- probability types map to IUPAC types for display as characters
1311
1312 NCBI-SeqCode DEFINITIONS ::=
1313 BEGIN
1314
1315 EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
1316
1317 Seq-code-type ::= ENUMERATED { -- sequence representations
1318 iupacna (1) , -- IUPAC 1 letter nuc acid code
1319 iupacaa (2) , -- IUPAC 1 letter amino acid code
1320 ncbi2na (3) , -- 2 bit nucleic acid code
1321 ncbi4na (4) , -- 4 bit nucleic acid code
1322 ncbi8na (5) , -- 8 bit extended nucleic acid code
1323 ncbipna (6) , -- nucleic acid probabilities
1324 ncbi8aa (7) , -- 8 bit extended amino acid codes
1325 ncbieaa (8) , -- extended ASCII 1 letter aa codes
1326 ncbipaa (9) , -- amino acid probabilities
1327 iupacaa3 (10) , -- 3 letter code only for display
1328 ncbistdaa (11) } -- consecutive codes for std aas, 0-25
1329
1330 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings
1331 from Seq-code-type , -- code to map from
1332 to Seq-code-type , -- code to map to
1333 num INTEGER , -- number of rows in table
1334 start-at INTEGER DEFAULT 0 , -- index offset of first element
1335 table SEQUENCE OF INTEGER } -- table of values, in from-to order
1336
1337 Seq-code-table ::= SEQUENCE { -- for names of coded values
1338 code Seq-code-type , -- name of code
1339 num INTEGER , -- number of rows in table
1340 one-letter BOOLEAN , -- symbol is ALWAYS 1 letter?
1341 start-at INTEGER DEFAULT 0 , -- index offset of first element
1342 table SEQUENCE OF
1343 SEQUENCE {
1344 symbol VisibleString , -- the printed symbol or letter
1345 name VisibleString } , -- an explanatory name or string
1346 comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
1347
1348 Seq-code-set ::= SEQUENCE { -- for distribution
1349 codes SET OF Seq-code-table OPTIONAL ,
1350 maps SET OF Seq-map-table OPTIONAL }
1351
1352 END
1353
1354 --$Revision: 6.0 $
1355 --*********************************************************************
1356 --
1357 -- 1990 - J.Ostell
1358 -- Version 3.0 - June 1994
1359 --
1360 --*********************************************************************
1361 --*********************************************************************
1362 --
1363 -- EMBL specific data
1364 -- This block of specifications was developed by Reiner Fuchs of EMBL
1365 -- Updated by J.Ostell, 1994
1366 --
1367 --*********************************************************************
1368
1369 EMBL-General DEFINITIONS ::=
1370 BEGIN
1371
1372 EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
1373
1374 IMPORTS Date, Object-id FROM NCBI-General;
1375
1376 EMBL-dbname ::= CHOICE {
1377 code ENUMERATED {
1378 embl(0),
1379 genbank(1),
1380 ddbj(2),
1381 geninfo(3),
1382 medline(4),
1383 swissprot(5),
1384 pir(6),
1385 pdb(7),
1386 epd(8),
1387 ecd(9),
1388 tfd(10),
1389 flybase(11),
1390 prosite(12),
1391 enzyme(13),
1392 mim(14),
1393 ecoseq(15),
1394 hiv(16) ,
1395 other (255) } ,
1396 name VisibleString }
1397
1398 EMBL-xref ::= SEQUENCE {
1399 dbname EMBL-dbname,
1400 id SEQUENCE OF Object-id }
1401
1402 EMBL-block ::= SEQUENCE {
1403 class ENUMERATED {
1404 not-set(0),
1405 standard(1),
1406 unannotated(2),
1407 other(255) } DEFAULT standard,
1408 div ENUMERATED {
1409 fun(0),
1410 inv(1),
1411 mam(2),
1412 org(3),
1413 phg(4),
1414 pln(5),
1415 pri(6),
1416 pro(7),
1417 rod(8),
1418 syn(9),
1419 una(10),
1420 vrl(11),
1421 vrt(12),
1422 pat(13),
1423 est(14),
1424 sts(15),
1425 other (255) } OPTIONAL,
1426 creation-date Date,
1427 update-date Date,
1428 extra-acc SEQUENCE OF VisibleString OPTIONAL,
1429 keywords SEQUENCE OF VisibleString OPTIONAL,
1430 xref SEQUENCE OF EMBL-xref OPTIONAL }
1431
1432 END
1433
1434 --*********************************************************************
1435 --
1436 -- SWISSPROT specific data
1437 -- This block of specifications was developed by Mark Cavanaugh of
1438 -- NCBI working with Amos Bairoch of SWISSPROT
1439 --
1440 --*********************************************************************
1441
1442 SP-General DEFINITIONS ::=
1443 BEGIN
1444
1445 EXPORTS SP-block;
1446
1447 IMPORTS Date, Dbtag FROM NCBI-General
1448 Seq-id FROM NCBI-Seqloc;
1449
1450 SP-block ::= SEQUENCE { -- SWISSPROT specific descriptions
1451 class ENUMERATED {
1452 not-set (0) ,
1453 standard (1) , -- conforms to all SWISSPROT checks
1454 prelim (2) , -- only seq and biblio checked
1455 other (255) } ,
1456 extra-acc SET OF VisibleString OPTIONAL , -- old SWISSPROT ids
1457 imeth BOOLEAN DEFAULT FALSE , -- seq known to start with Met
1458 plasnm SET OF VisibleString OPTIONAL, -- plasmid names carrying gene
1459 seqref SET OF Seq-id OPTIONAL, -- xref to other sequences
1460 dbref SET OF Dbtag OPTIONAL , -- xref to non-sequence dbases
1461 keywords SET OF VisibleString OPTIONAL , -- keywords
1462 created Date OPTIONAL , -- creation date
1463 sequpd Date OPTIONAL , -- sequence update
1464 annotupd Date OPTIONAL } -- annotation update
1465
1466 END
1467
1468 --*********************************************************************
1469 --
1470 -- PIR specific data
1471 -- This block of specifications was developed by Jim Ostell of
1472 -- NCBI
1473 --
1474 --*********************************************************************
1475
1476 PIR-General DEFINITIONS ::=
1477 BEGIN
1478
1479 EXPORTS PIR-block;
1480
1481 IMPORTS Seq-id FROM NCBI-Seqloc;
1482
1483 PIR-block ::= SEQUENCE { -- PIR specific descriptions
1484 had-punct BOOLEAN OPTIONAL , -- had punctuation in sequence ?
1485 host VisibleString OPTIONAL ,
1486 source VisibleString OPTIONAL , -- source line
1487 summary VisibleString OPTIONAL ,
1488 genetic VisibleString OPTIONAL ,
1489 includes VisibleString OPTIONAL ,
1490 placement VisibleString OPTIONAL ,
1491 superfamily VisibleString OPTIONAL ,
1492 keywords SEQUENCE OF VisibleString OPTIONAL ,
1493 cross-reference VisibleString OPTIONAL ,
1494 date VisibleString OPTIONAL ,
1495 seq-raw VisibleString OPTIONAL , -- seq with punctuation
1496 seqref SET OF Seq-id OPTIONAL } -- xref to other sequences
1497
1498 END
1499
1500 --*********************************************************************
1501 --
1502 -- GenBank specific data
1503 -- This block of specifications was developed by Jim Ostell of
1504 -- NCBI
1505 --
1506 --*********************************************************************
1507
1508 GenBank-General DEFINITIONS ::=
1509 BEGIN
1510
1511 EXPORTS GB-block;
1512
1513 IMPORTS Date FROM NCBI-General;
1514
1515 GB-block ::= SEQUENCE { -- GenBank specific descriptions
1516 extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
1517 source VisibleString OPTIONAL , -- source line
1518 keywords SEQUENCE OF VisibleString OPTIONAL ,
1519 origin VisibleString OPTIONAL,
1520 date VisibleString OPTIONAL , -- OBSOLETE old form Entry Date
1521 entry-date Date OPTIONAL , -- replaces date
1522 div VisibleString OPTIONAL , -- GenBank division
1523 taxonomy VisibleString OPTIONAL } -- continuation line of organism
1524
1525 END
1526
1527 --**********************************************************************
1528 -- PRF specific definition
1529 -- PRF is a protein sequence database crated and maintained by
1530 -- Protein Research Foundation, Minoo-city, Osaka, Japan.
1531 --
1532 -- Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1533 -- Kyoto Univ., Japan
1534 --
1535 --**********************************************************************
1536
1537 PRF-General DEFINITIONS ::=
1538 BEGIN
1539
1540 EXPORTS PRF-block;
1541
1542 PRF-block ::= SEQUENCE {
1543 extra-src PRF-ExtraSrc OPTIONAL,
1544 keywords SEQUENCE OF VisibleString OPTIONAL
1545 }
1546
1547 PRF-ExtraSrc ::= SEQUENCE {
1548 host VisibleString OPTIONAL,
1549 part VisibleString OPTIONAL,
1550 state VisibleString OPTIONAL,
1551 strain VisibleString OPTIONAL,
1552 taxon VisibleString OPTIONAL
1553 }
1554
1555 END
1556
1557 --*********************************************************************
1558 --
1559 -- PDB specific data
1560 -- This block of specifications was developed by Jim Ostell and
1561 -- Steve Bryant of NCBI
1562 --
1563 --*********************************************************************
1564
1565 PDB-General DEFINITIONS ::=
1566 BEGIN
1567
1568 EXPORTS PDB-block;
1569
1570 IMPORTS Date FROM NCBI-General;
1571
1572 PDB-block ::= SEQUENCE { -- PDB specific descriptions
1573 deposition Date , -- deposition date month,year
1574 class VisibleString ,
1575 compound SEQUENCE OF VisibleString ,
1576 source SEQUENCE OF VisibleString ,
1577 exp-method VisibleString OPTIONAL , -- present if NOT X-ray diffraction
1578 replace PDB-replace OPTIONAL } -- replacement history
1579
1580 PDB-replace ::= SEQUENCE {
1581 date Date ,
1582 ids SEQUENCE OF VisibleString } -- entry ids replace by this one
1583
1584 END
1585
1586 --$Revision: 6.49 $
1587 --**********************************************************************
1588 --
1589 -- NCBI Sequence Feature elements
1590 -- by James Ostell, 1990
1591 -- Version 3.0 - June 1994
1592 --
1593 --**********************************************************************
1594
1595 NCBI-Seqfeat DEFINITIONS ::=
1596 BEGIN
1597
1598 EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;
1599
1600 IMPORTS Gene-ref FROM NCBI-Gene
1601 Prot-ref FROM NCBI-Protein
1602 Org-ref FROM NCBI-Organism
1603 Variation-ref FROM NCBI-Variation
1604 BioSource FROM NCBI-BioSource
1605 RNA-ref FROM NCBI-RNA
1606 Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
1607 Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1608 Rsite-ref FROM NCBI-Rsite
1609 Txinit FROM NCBI-TxInit
1610 DOI, PubMedId FROM NCBI-Biblio
1611 Pub-set FROM NCBI-Pub
1612 Object-id, Dbtag, User-object FROM NCBI-General;
1613
1614 --*** Feature identifiers ********************************
1615 --*
1616
1617 Feat-id ::= CHOICE {
1618 gibb INTEGER , -- geninfo backbone
1619 giim Giimport-id , -- geninfo import
1620 local Object-id , -- for local software use
1621 general Dbtag } -- for use by various databases
1622
1623 --*** Seq-feat *******************************************
1624 --* sequence feature generalization
1625
1626 Seq-feat ::= SEQUENCE {
1627 id Feat-id OPTIONAL ,
1628 data SeqFeatData , -- the specific data
1629 partial BOOLEAN OPTIONAL , -- incomplete in some way?
1630 except BOOLEAN OPTIONAL , -- something funny about this?
1631 comment VisibleString OPTIONAL ,
1632 product Seq-loc OPTIONAL , -- product of process
1633 location Seq-loc , -- feature made from
1634 qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers
1635 title VisibleString OPTIONAL , -- for user defined label
1636 ext User-object OPTIONAL , -- user defined structure extension
1637 cit Pub-set OPTIONAL , -- citations for this feature
1638 exp-ev ENUMERATED { -- evidence for existence of feature
1639 experimental (1) , -- any reasonable experimental check
1640 not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1641 xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features
1642 dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases
1643 pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene?
1644 except-text VisibleString OPTIONAL , -- explain if except=TRUE
1645 ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field
1646 exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
1647 support SeqFeatSupport OPTIONAL -- will replace /experiment, /inference, model-evidence
1648 }
1649
1650 SeqFeatData ::= CHOICE {
1651 gene Gene-ref ,
1652 org Org-ref ,
1653 cdregion Cdregion ,
1654 prot Prot-ref ,
1655 rna RNA-ref ,
1656 pub Pubdesc , -- publication applies to this seq
1657 seq Seq-loc , -- to annotate origin from another seq
1658 imp Imp-feat ,
1659 region VisibleString, -- named region (globin locus)
1660 comment NULL , -- just a comment
1661 bond ENUMERATED {
1662 disulfide (1) ,
1663 thiolester (2) ,
1664 xlink (3) ,
1665 thioether (4) ,
1666 other (255) } ,
1667 site ENUMERATED {
1668 active (1) ,
1669 binding (2) ,
1670 cleavage (3) ,
1671 inhibit (4) ,
1672 modified (5),
1673 glycosylation (6) ,
1674 myristoylation (7) ,
1675 mutagenized (8) ,
1676 metal-binding (9) ,
1677 phosphorylation (10) ,
1678 acetylation (11) ,
1679 amidation (12) ,
1680 methylation (13) ,
1681 hydroxylation (14) ,
1682 sulfatation (15) ,
1683 oxidative-deamination (16) ,
1684 pyrrolidone-carboxylic-acid (17) ,
1685 gamma-carboxyglutamic-acid (18) ,
1686 blocked (19) ,
1687 lipid-binding (20) ,
1688 np-binding (21) ,
1689 dna-binding (22) ,
1690 signal-peptide (23) ,
1691 transit-peptide (24) ,
1692 transmembrane-region (25) ,
1693 nitrosylation (26) ,
1694 other (255) } ,
1695 rsite Rsite-ref , -- restriction site (for maps really)
1696 user User-object , -- user defined structure
1697 txinit Txinit , -- transcription initiation
1698 num Numbering , -- a numbering system
1699 psec-str ENUMERATED { -- protein secondary structure
1700 helix (1) , -- any helix
1701 sheet (2) , -- beta sheet
1702 turn (3) } , -- beta or gamma turn
1703 non-std-residue VisibleString , -- non-standard residue here in seq
1704 het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq
1705 biosrc BioSource,
1706 clone Clone-ref,
1707 variation Variation-ref
1708 }
1709
1710 SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both
1711 id Feat-id OPTIONAL , -- the feature copied
1712 data SeqFeatData OPTIONAL } -- the specific data
1713
1714 SeqFeatSupport ::= SEQUENCE {
1715 experiment SET OF ExperimentSupport OPTIONAL ,
1716 inference SET OF InferenceSupport OPTIONAL ,
1717 model-evidence SET OF ModelEvidenceSupport OPTIONAL
1718 }
1719
1720 EvidenceCategory ::= INTEGER {
1721 not-set (0) ,
1722 coordinates (1) ,
1723 description (2) ,
1724 existence (3)
1725 }
1726
1727 ExperimentSupport ::= SEQUENCE {
1728 category EvidenceCategory OPTIONAL ,
1729 explanation VisibleString ,
1730 pmids SET OF PubMedId OPTIONAL ,
1731 dois SET OF DOI OPTIONAL
1732 }
1733
1734 Program-id ::= SEQUENCE {
1735 name VisibleString ,
1736 version VisibleString OPTIONAL
1737 }
1738
1739 EvidenceBasis ::= SEQUENCE {
1740 programs SET OF Program-id OPTIONAL ,
1741 accessions SET OF Seq-id OPTIONAL
1742 }
1743
1744 InferenceSupport ::= SEQUENCE {
1745 category EvidenceCategory OPTIONAL ,
1746 type INTEGER {
1747 not-set (0) ,
1748 similar-to-sequence (1) ,
1749 similar-to-aa (2) ,
1750 similar-to-dna (3) ,
1751 similar-to-rna (4) ,
1752 similar-to-mrna (5) ,
1753 similiar-to-est (6) ,
1754 similar-to-other-rna (7) ,
1755 profile (8) ,
1756 nucleotide-motif (9) ,
1757 protein-motif (10) ,
1758 ab-initio-prediction (11) ,
1759 alignment (12) ,
1760 other (255)
1761 } DEFAULT not-set ,
1762 other-type VisibleString OPTIONAL ,
1763 same-species BOOLEAN DEFAULT FALSE ,
1764 basis EvidenceBasis ,
1765 pmids SET OF PubMedId OPTIONAL ,
1766 dois SET OF DOI OPTIONAL
1767 }
1768
1769 ModelEvidenceItem ::= SEQUENCE {
1770 id Seq-id ,
1771 exon-count INTEGER OPTIONAL ,
1772 exon-length INTEGER OPTIONAL ,
1773 full-length BOOLEAN DEFAULT FALSE ,
1774 supports-all-exon-combo BOOLEAN DEFAULT FALSE
1775 }
1776
1777 ModelEvidenceSupport ::= SEQUENCE {
1778 method VisibleString OPTIONAL ,
1779 mrna SET OF ModelEvidenceItem OPTIONAL ,
1780 est SET OF ModelEvidenceItem OPTIONAL ,
1781 protein SET OF ModelEvidenceItem OPTIONAL ,
1782 identification Seq-id OPTIONAL ,
1783 dbxref SET OF Dbtag OPTIONAL ,
1784 exon-count INTEGER OPTIONAL ,
1785 exon-length INTEGER OPTIONAL ,
1786 full-length BOOLEAN DEFAULT FALSE ,
1787 supports-all-exon-combo BOOLEAN DEFAULT FALSE
1788 }
1789
1790 --*** CdRegion ***********************************************
1791 --*
1792 --* Instructions to translate from a nucleic acid to a peptide
1793 --* conflict means it's supposed to translate but doesn't
1794 --*
1795
1796
1797 Cdregion ::= SEQUENCE {
1798 orf BOOLEAN OPTIONAL , -- just an ORF ?
1799 frame ENUMERATED {
1800 not-set (0) , -- not set, code uses one
1801 one (1) ,
1802 two (2) ,
1803 three (3) } DEFAULT not-set , -- reading frame
1804 conflict BOOLEAN OPTIONAL , -- conflict
1805 gaps INTEGER OPTIONAL , -- number of gaps on conflict/except
1806 mismatch INTEGER OPTIONAL , -- number of mismatches on above
1807 code Genetic-code OPTIONAL , -- genetic code used
1808 code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions
1809 stops INTEGER OPTIONAL } -- number of stop codons on above
1810
1811 -- each code is 64 cells long, in the order where
1812 -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1813 -- NOTE: this order does NOT correspond to a Seq-data
1814 -- encoding. It is "natural" to codon usage instead.
1815 -- the value in each cell is the AA coded for
1816 -- start= AA coded only if first in peptide
1817 -- in start array, if codon is not a legitimate start
1818 -- codon, that cell will have the "gap" symbol for
1819 -- that alphabet. Otherwise it will have the AA
1820 -- encoded when that codon is used at the start.
1821
1822 Genetic-code ::= SET OF CHOICE {
1823 name VisibleString , -- name of a code
1824 id INTEGER , -- id in dbase
1825 ncbieaa VisibleString , -- indexed to IUPAC extended
1826 ncbi8aa OCTET STRING , -- indexed to NCBI8aa
1827 ncbistdaa OCTET STRING , -- indexed to NCBIstdaa
1828 sncbieaa VisibleString , -- start, indexed to IUPAC extended
1829 sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa
1830 sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa
1831
1832 Code-break ::= SEQUENCE { -- specific codon exceptions
1833 loc Seq-loc , -- location of exception
1834 aa CHOICE { -- the amino acid
1835 ncbieaa INTEGER , -- ASCII value of NCBIeaa code
1836 ncbi8aa INTEGER , -- NCBI8aa code
1837 ncbistdaa INTEGER } } -- NCBIstdaa code
1838
1839 Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes
1840
1841 --*** Import ***********************************************
1842 --*
1843 --* Features imported from other databases
1844 --*
1845
1846 Imp-feat ::= SEQUENCE {
1847 key VisibleString ,
1848 loc VisibleString OPTIONAL , -- original location string
1849 descr VisibleString OPTIONAL } -- text description
1850
1851 Gb-qual ::= SEQUENCE {
1852 qual VisibleString ,
1853 val VisibleString }
1854
1855
1856 --*** Clone-ref ***********************************************
1857 --*
1858 --* Specification of clone features
1859 --*
1860
1861 Clone-ref ::= SEQUENCE {
1862 name VisibleString, -- Official clone symbol
1863 library VisibleString OPTIONAL, -- Library name
1864
1865 concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1866 unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1867 placement-method INTEGER {
1868 end-seq (0), -- Clone placed by end sequence
1869 insert-alignment (1), -- Clone placed by insert alignment
1870 sts (2), -- Clone placed by STS
1871 fish (3),
1872 fingerprint (4),
1873 end-seq-insert-alignment (5), -- combined end-seq and insert align
1874 external (253), -- Placement provided externally
1875 curated (254), -- Human placed or approved
1876 other (255)
1877 } OPTIONAL,
1878 clone-seq Clone-seq-set OPTIONAL
1879 }
1880
1881 Clone-seq-set ::= SET OF Clone-seq
1882
1883
1884 Clone-seq ::= SEQUENCE {
1885 type INTEGER {
1886 insert (0),
1887 end (1),
1888 other (255)
1889 },
1890 confidence INTEGER {
1891 multiple (0), -- Multiple hits
1892 na (1), -- Unspecified
1893 nohit-rep (2), -- No hits, end flagged repetitive
1894 nohitnorep (3), -- No hits, end not flagged repetitive
1895 other-chrm (4), -- Hit on different chromosome
1896 unique (5),
1897 virtual (6), -- Virtual (hasn't been sequenced)
1898 multiple-rep (7), -- Multiple hits, end flagged repetitive
1899 multiplenorep (8), -- Multiple hits, end not flagged repetitive
1900 no-hit (9), -- No hits
1901 other (255)
1902 } OPTIONAL,
1903 location Seq-loc, -- location on sequence
1904 seq Seq-loc OPTIONAL, -- clone sequence location
1905 align-id Dbtag OPTIONAL, -- internal alignment identifier
1906 support INTEGER {
1907 prototype (0), -- sequence used to place clone
1908 supporting (1), -- sequence supports placement
1909 supports-other(2), -- supports a different placement
1910 non-supporting (3) -- does not support any placement
1911 } OPTIONAL
1912 }
1913
1914 END
1915
1916
1917 --*** Variation-ref ***********************************************
1918 --*
1919 --* Specification of variation features
1920 --*
1921
1922 NCBI-Variation DEFINITIONS ::=
1923 BEGIN
1924
1925 EXPORTS Variation-ref, Variation-inst, VariantProperties,
1926 Population-data, Phenotype;
1927
1928 IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
1929 Seq-literal FROM NCBI-Sequence
1930 SubSource FROM NCBI-BioSource
1931 Seq-loc FROM NCBI-Seqloc
1932 Pub FROM NCBI-Pub;
1933
1934
1935 -- --------------------------------------------------------------------------
1936 -- Historically, the dbSNP definitions document data structures used in the
1937 -- processing and annotation of variations by the dbSNP group. The intention
1938 -- is to provide information to clients that reflect internal information
1939 -- produced during the mapping of SNPs
1940 -- --------------------------------------------------------------------------
1941
1942 VariantProperties ::= SEQUENCE {
1943 version INTEGER,
1944
1945 -- NOTE:
1946 -- The format for most of these values is as an integer
1947 -- Unless otherwise noted, these integers represent a bitwise OR (= simple
1948 -- sum) of the possible values, and as such, these values represent the
1949 -- specific bit flags that may be set for each of the possible attributes
1950 -- here.
1951
1952 resource-link INTEGER {
1953 preserved (1), -- Clinical, Pubmed, Cited, (0x01)
1954 provisional (2), -- Provisional Third Party Annotations (0x02)
1955 has3D (4), -- Has 3D strcture SNP3D table (0x04)
1956 submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
1957 clinical (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
1958 genotypeKit (32) -- Marker exists on high density genotyping kit
1959 -- (0x20)
1960 } OPTIONAL,
1961
1962 gene-location INTEGER {
1963 in-gene (1), -- Sequence intervals covered by a gene ID but not
1964 -- having an aligned transcript (0x01)
1965 near-gene-5 (2), -- Within 2kb of the 5' end of a gene feature
1966 near-gene-3 (4), -- Within 0.5kb of the 3' end of a gene feature
1967 intron (8), -- In Intron (0x08)
1968 donor (16), -- In donor splice-site (0x10)
1969 acceptor (32), -- In acceptor splice-site (0x20)
1970 utr-5 (64), -- In 5' UTR (0x40)
1971 utr-3 (128), -- In 3' UTR (0x80)
1972 in-start-codon(256), -- the variant is observed in a start codon
1973 -- (0x100)
1974 in-stop-codon (512), -- the variant is observed in a stop codon
1975 -- (0x200)
1976 intergenic (1024), -- variant located between genes (0x400)
1977 conserved-noncoding(2048) -- variant is located in a conserved
1978 -- non-coding region (0x800)
1979 } OPTIONAL,
1980
1981 effect INTEGER {
1982 no-change (0), -- known to cause no functional changes
1983 -- since 0 does not combine with any other bit
1984 -- value, 'no-change' specifically implies that
1985 -- there are no consequences
1986 synonymous (1), -- one allele in the set does not change the encoded
1987 -- amino acid (0x1)
1988 nonsense (2), -- one allele in the set changes to STOP codon
1989 -- (TER). (0x2)
1990 missense (4), -- one allele in the set changes protein peptide
1991 -- (0x4)
1992 frameshift (8), -- one allele in the set changes all downstream
1993 -- amino acids (0x8)
1994
1995 up-regulator (16), -- the variant causes increased transcription
1996 -- (0x10)
1997 down-regulator(32), -- the variant causes decreased transcription
1998 -- (0x20)
1999 methylation (64),
2000 stop-gain (128), -- reference codon is not stop codon, but the snp
2001 -- variant allele changes the codon to a
2002 -- terminating codon.
2003 stop-loss (256) -- reverse of STOP-GAIN: reference codon is a
2004 -- stop codon, but a snp variant allele changes
2005 -- the codon to a non-terminating codon.
2006 } OPTIONAL,
2007
2008 mapping INTEGER {
2009 has-other-snp (1), -- Another SNP has the same mapped positions
2010 -- on reference assembly (0x01)
2011 has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
2012 -- chromosomes on different assemblies (0x02)
2013 is-assembly-specific (4) -- Only maps to 1 assembly (0x04)
2014 } OPTIONAL,
2015
2016 -- map-weight captures specificity of placement
2017 -- NOTE: This is *NOT* a bitfield
2018 map-weight INTEGER {
2019 is-uniquely-placed(1),
2020 placed-twice-on-same-chrom(2),
2021 placed-twice-on-diff-chrom(3),
2022 many-placements(10)
2023 } OPTIONAL,
2024
2025 frequency-based-validation INTEGER {
2026 is-mutation (1), -- low frequency variation that is cited in
2027 -- journal or other reputable sources (0x01)
2028 above-5pct-all (2), -- >5% minor allele freq in each and all
2029 -- populations (0x02)
2030 above-5pct-1plus (4), -- >5% minor allele freq in 1+ populations (0x04)
2031 validated (8), -- Bit is set if the variant has a minor allele
2032 -- observed in two or more separate chromosomes
2033 above-1pct-all (16), -- >1% minor allele freq in each and all
2034 -- populations (0x10)
2035 above-1pct-1plus (32) -- >1% minor allele freq in 1+ populations (0x20)
2036 } OPTIONAL,
2037
2038 genotype INTEGER {
2039 in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
2040 has-genotypes (2) -- SNP has individual genotype (0x02)
2041 } OPTIONAL,
2042
2043 -- project IDs are IDs from BioProjects
2044 -- in order to report information about project relationships, we
2045 -- require projects to be registered
2046 -- This field in many ways duplicates dbxrefs; however, the
2047 -- intention of this field is to more adequately reflect
2048 -- ownership and data source
2049 --
2050 -- 11/9/2010: DO NOT USE
2051 -- This field was changed in the spec in a breaking way; using it will
2052 -- break clients. We are officially suppressing / abandoning this field.
2053 -- Clients who need to use this should instead place the data in
2054 -- Seq-feat.dbxref, using the db name 'BioProject'
2055 project-data SET OF INTEGER OPTIONAL,
2056
2057 quality-check INTEGER {
2058 contig-allele-missing (1), -- Reference sequence allele at the mapped
2059 -- position is not present in the SNP
2060 -- allele list, adjusted for orientation
2061 -- (0x01)
2062 withdrawn-by-submitter (2), -- One member SS is withdrawn by submitter
2063 -- (0x02)
2064 non-overlapping-alleles (4), -- RS set has 2+ alleles from different
2065 -- submissions and these sets share no
2066 -- alleles in common (0x04)
2067 strain-specific (8), -- Straing specific fixed difference (0x08)
2068 genotype-conflict (16) -- Has Genotype Conflict (0x10)
2069 } OPTIONAL,
2070
2071 confidence INTEGER {
2072 unknown (0),
2073 likely-artifact (1),
2074 other (255)
2075 } OPTIONAL,
2076
2077 -- has this variant been validated?
2078 -- While a boolean flag offers no subtle distinctions of validation
2079 -- methods, occasionally it is only known as a single boolean value
2080 -- NOTE: this flag is redundant and should be omitted if more comprehensive
2081 -- validation information is present
2082 other-validation BOOLEAN OPTIONAL,
2083
2084 -- origin of this allele, if known
2085 -- note that these are powers-of-two, and represent bits; thus, we can
2086 -- represent more than one state simultaneously through a bitwise OR
2087 allele-origin INTEGER {
2088 unknown (0),
2089 germline (1),
2090 somatic (2),
2091 inherited (4),
2092 paternal (8),
2093 maternal (16),
2094 de-novo (32),
2095 biparental (64),
2096 uniparental (128),
2097 not-tested (256),
2098 tested-inconclusive (512),
2099 not-reported (1024),
2100
2101 -- stopper - 2^31
2102 other (1073741824)
2103 } OPTIONAL,
2104
2105 -- observed allele state, if known
2106 -- NOTE: THIS IS NOT A BITFIELD!
2107 allele-state INTEGER {
2108 unknown (0),
2109 homozygous (1),
2110 heterozygous (2),
2111 hemizygous (3),
2112 nullizygous (4),
2113 other (255)
2114 } OPTIONAL,
2115
2116 -- NOTE:
2117 -- 'allele-frequency' here refers to the minor allele frequency of the
2118 -- default population
2119 allele-frequency REAL OPTIONAL,
2120
2121 -- is this variant the ancestral allele?
2122 is-ancestral-allele BOOLEAN OPTIONAL
2123 }
2124
2125 Phenotype ::= SEQUENCE {
2126 source VisibleString OPTIONAL,
2127 term VisibleString OPTIONAL,
2128 xref SET OF Dbtag OPTIONAL,
2129
2130 -- does this variant have known clinical significance?
2131 clinical-significance INTEGER {
2132 unknown (0),
2133 untested (1),
2134 non-pathogenic (2),
2135 probable-non-pathogenic (3),
2136 probable-pathogenic (4),
2137 pathogenic (5),
2138 drug-response (6),
2139 histocompatibility (7),
2140 other (255)
2141 } OPTIONAL
2142 }
2143
2144 Population-data ::= SEQUENCE {
2145 -- assayed population (e.g. HAPMAP-CEU)
2146 population VisibleString,
2147 genotype-frequency REAL OPTIONAL,
2148 chromosomes-tested INTEGER OPTIONAL,
2149 sample-ids SET OF Object-id OPTIONAL,
2150 allele-frequency REAL OPTIONAL,
2151
2152 -- This field is an explicit bit-field
2153 -- Valid values should be a bitwise combination (= simple sum)
2154 -- of any of the values below
2155 flags INTEGER {
2156 is-default-population (1),
2157 is-minor-allele (2),
2158 is-rare-allele (4)
2159 } OPTIONAL
2160 }
2161
2162 Ext-loc ::= SEQUENCE {
2163 id Object-id,
2164 location Seq-loc
2165 }
2166
2167
2168 Variation-ref ::= SEQUENCE {
2169 -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
2170 -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
2171 --
2172 -- we relate three kinds of IDs here:
2173 -- - our current object's id
2174 -- - the id of this object's parent, if it exists
2175 -- - the sample ID that this item originates from
2176 id Dbtag OPTIONAL,
2177 parent-id Dbtag OPTIONAL,
2178 sample-id Object-id OPTIONAL,
2179 other-ids SET OF Dbtag OPTIONAL,
2180
2181 -- names and synonyms
2182 -- some variants have well-known canonical names and possible accepted
2183 -- synonyms
2184 name VisibleString OPTIONAL,
2185 synonyms SET OF VisibleString OPTIONAL,
2186
2187 -- tag for comment and descriptions
2188 description VisibleString OPTIONAL,
2189
2190 -- phenotype
2191 phenotype SET OF Phenotype OPTIONAL,
2192
2193 -- sequencing / acuisition method
2194 method SET OF INTEGER {
2195 unknown (0),
2196 bac-acgh (1),
2197 computational (2),
2198 curated (3),
2199 digital-array (4),
2200 expression-array (5),
2201 fish (6),
2202 flanking-sequence (7),
2203 maph (8),
2204 mcd-analysis (9),
2205 mlpa (10),
2206 oea-assembly (11),
2207 oligo-acgh (12),
2208 paired-end (13),
2209 pcr (14),
2210 qpcr (15),
2211 read-depth (16),
2212 roma (17),
2213 rt-pcr (18),
2214 sage (19),
2215 sequence-alignment (20),
2216 sequencing (21),
2217 snp-array (22),
2218 snp-genoytyping (23),
2219 southern (24),
2220 western (25),
2221 optical-mapping (26),
2222
2223 other (255)
2224 } OPTIONAL,
2225
2226 -- Note about SNP representation and pretinent fields: allele-frequency,
2227 -- population, quality-codes:
2228 -- The case of multiple alleles for a SNP would be described by
2229 -- parent-feature of type Variation-set.diff-alleles, where the child
2230 -- features of type Variation-inst, all at the same location, would
2231 -- describe individual alleles.
2232
2233 -- population data
2234 -- DEPRECATED - do not use
2235 population-data SET OF Population-data OPTIONAL,
2236
2237 -- variant properties bit fields
2238 variant-prop VariantProperties OPTIONAL,
2239
2240 -- has this variant been validated?
2241 -- DEPRECATED: new field = VariantProperties.other-validation
2242 validated BOOLEAN OPTIONAL,
2243
2244 -- link-outs to GeneTests database
2245 -- DEPRECATED - do not use
2246 clinical-test SET OF Dbtag OPTIONAL,
2247
2248 -- origin of this allele, if known
2249 -- note that these are powers-of-two, and represent bits; thus, we can
2250 -- represent more than one state simultaneously through a bitwise OR
2251 -- DEPRECATED: new field = VariantProperties.allele-origin
2252 allele-origin INTEGER {
2253 unknown (0),
2254 germline (1),
2255 somatic (2),
2256 inherited (4),
2257 paternal (8),
2258 maternal (16),
2259 de-novo (32),
2260 biparental (64),
2261 uniparental (128),
2262 not-tested (256),
2263 tested-inconclusive (512),
2264
2265 -- stopper - 2^31
2266 other (1073741824)
2267 } OPTIONAL,
2268
2269 -- observed allele state, if known
2270 -- DEPRECATED: new field = VariantProperties.allele-state
2271 allele-state INTEGER {
2272 unknown (0),
2273 homozygous (1),
2274 heterozygous (2),
2275 hemizygous (3),
2276 nullizygous (4),
2277 other (255)
2278 } OPTIONAL,
2279
2280 -- NOTE:
2281 -- 'allele-frequency' here refers to the minor allele frequency of the
2282 -- default population
2283 -- DEPRECATED: new field = VariantProperties.allele-frequency
2284 allele-frequency REAL OPTIONAL,
2285
2286 -- is this variant the ancestral allele?
2287 -- DEPRECATED: new field = VariantProperties.is-ancestral-allele
2288 is-ancestral-allele BOOLEAN OPTIONAL,
2289
2290 -- publication support.
2291 -- Note: made this pub instead of pub-equiv, since
2292 -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
2293 -- Pub is more often used as top-level container
2294 -- DEPRECATED - do not use; use Seq-feat.dbxref instead
2295 pub Pub OPTIONAL,
2296
2297 data CHOICE {
2298 unknown NULL,
2299 note VisibleString, --free-form
2300 uniparental-disomy NULL,
2301
2302 -- actual sequence-edit at feat.location
2303 instance Variation-inst,
2304
2305 -- Set of related Variations.
2306 -- Location of the set equals to the union of member locations
2307 set SEQUENCE {
2308 type INTEGER {
2309 unknown (0),
2310 compound (1), -- complex change at the same location on the
2311 -- same molecule
2312 products (2), -- different products arising from the same
2313 -- variation in a precursor, e.g. r.[13g>a,
2314 -- 13_88del]
2315 haplotype (3), -- changes on the same allele, e.g
2316 -- r.[13g>a;15u>c]
2317 genotype (4), -- changes on different alleles in the same
2318 -- genotype, e.g. g.[476C>T]+[476C>T]
2319 mosaic (5), -- different genotypes in the same individual
2320 individual (6), -- same organism; allele relationship unknown,
2321 -- e.g. g.[476C>T(+)183G>C]
2322 population (7), -- population
2323 alleles (8), -- set represents a set of observed alleles
2324 package (9), -- set represents a package of observations at
2325 -- a given location, generally containing
2326 -- asserted + reference
2327 other (255)
2328 },
2329 variations SET OF Variation-ref,
2330 name VisibleString OPTIONAL
2331 },
2332
2333 -- variant is a complex and undescribed change at the location
2334 -- This type of variant is known to occur in dbVar submissions
2335 complex NULL
2336 },
2337
2338 consequence SET OF CHOICE {
2339 unknown NULL,
2340 splicing NULL, --some effect on splicing
2341 note VisibleString, --freeform
2342
2343 -- Describe resulting variation in the product, e.g. missense,
2344 -- nonsense, silent, neutral, etc in a protein, that arises from
2345 -- THIS variation.
2346 variation Variation-ref,
2347
2348 -- see http://www.hgvs.org/mutnomen/recs-prot.html
2349 frameshift SEQUENCE {
2350 phase INTEGER OPTIONAL,
2351 x-length INTEGER OPTIONAL
2352 },
2353
2354 loss-of-heterozygosity SEQUENCE {
2355 -- In germline comparison, it will be reference genome assembly
2356 -- (default) or reference/normal population. In somatic mutation,
2357 -- it will be a name of the normal tissue.
2358 reference VisibleString OPTIONAL,
2359
2360 -- Name of the testing subject type or the testing tissue.
2361 test VisibleString OPTIONAL
2362 }
2363 } OPTIONAL,
2364
2365 -- Observed location, if different from the parent set or feature.location.
2366 -- DEPRECATED - do not use
2367 location Seq-loc OPTIONAL,
2368
2369 -- reference other locs, e.g. mapped source
2370 -- DEPRECATED - do not use
2371 ext-locs SET OF Ext-loc OPTIONAL,
2372
2373 -- DEPRECATED - do not use; use Seq-feat.exts instead
2374 ext User-object OPTIONAL,
2375
2376 somatic-origin SET OF SEQUENCE {
2377 -- description of the somatic origin itself
2378 source SubSource OPTIONAL,
2379 -- condition related to this origin's type
2380 condition SEQUENCE {
2381 description VisibleString OPTIONAL,
2382 -- reference to BioTerm / other descriptive database
2383 object-id SET OF Dbtag OPTIONAL
2384 } OPTIONAL
2385 } OPTIONAL
2386
2387 }
2388
2389
2390 Delta-item ::= SEQUENCE {
2391 seq CHOICE {
2392 literal Seq-literal,
2393 loc Seq-loc,
2394 this NULL --same location as variation-ref itself
2395 } OPTIONAL,
2396
2397 -- Multiplier allows representing a tandem, e.g. ATATAT as AT*3
2398 -- This allows describing CNV/SSR where delta=self with a
2399 -- multiplier which specifies the count of the repeat unit.
2400
2401 multiplier INTEGER OPTIONAL, --assumed 1 if not specified.
2402 multiplier-fuzz Int-fuzz OPTIONAL,
2403
2404 action INTEGER {
2405
2406 -- replace len(seq) positions starting with location.start with seq
2407 morph (0),
2408
2409 -- go downstream by distance specified by multiplier (upstream if < 0),
2410 -- in genomic context.
2411 offset (1),
2412
2413 -- excise sequence at location
2414 -- if multiplier is specified, delete len(location)*multiplier
2415 -- positions downstream
2416 del-at (2),
2417
2418 -- insert seq before the location.start
2419 ins-before (3)
2420
2421 } DEFAULT morph
2422 }
2423
2424
2425 -- Variation instance
2426 Variation-inst ::= SEQUENCE {
2427 type INTEGER {
2428 unknown (0), -- delta=[]
2429 identity (1), -- delta=[]
2430 inv (2), -- delta=[del, ins.seq=
2431 -- RevComp(variation-location)]
2432 snv (3), -- delta=[morph of length 1]
2433 -- NOTE: this is snV not snP; the latter
2434 -- requires frequency-based validation to be
2435 -- established in VariantProperties
2436 -- the strict definition of SNP is an SNV with
2437 -- an established population frequency of at
2438 -- least 1% in at least 1 popuplation
2439 mnp (4), -- delta=[morph of length >1]
2440 delins (5), -- delta=[del, ins]
2441 del (6), -- delta=[del]
2442 ins (7), -- delta=[ins]
2443 microsatellite (8), -- delta=[del, ins.seq= repeat-unit with fuzzy
2444 -- multiplier]
2445 -- variation-location is the microsat expansion
2446 -- on the sequence
2447 transposon (9), -- delta=[del, ins.seq= known donor or 'this']
2448 -- variation-location is equiv of transposon
2449 -- locs.
2450 cnv (10), -- delta=[del, ins= 'this' with fuzzy
2451 -- multiplier]
2452 direct-copy (11), -- delta=[ins.seq= upstream location on the
2453 -- same strand]
2454 rev-direct-copy (12), -- delta=[ins.seq= downstream location on the
2455 -- same strand]
2456 inverted-copy (13), -- delta=[ins.seq= upstream location on the
2457 -- opposite strand]
2458 everted-copy (14), -- delta=[ins.seq= downstream location on the
2459 -- opposite strand]
2460 translocation (15), -- delta=like delins
2461 prot-missense (16), -- delta=[morph of length 1]
2462 prot-nonsense (17), -- delta=[del]; variation-location is the tail
2463 -- of the protein being truncated
2464 prot-neutral (18), -- delta=[morph of length 1]
2465 prot-silent (19), -- delta=[morph of length 1, same AA as at
2466 -- variation-location]
2467 prot-other (20), -- delta=any
2468
2469 other (255) -- delta=any
2470 },
2471
2472 -- Sequence that replaces the location, in biological order.
2473 delta SEQUENCE OF Delta-item,
2474
2475 -- 'observation' is used to label items in a Variation-ref package
2476 -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
2477 -- of the values may be observed.
2478 observation INTEGER {
2479 asserted (1), -- inst represents the asserted base at a
2480 -- position
2481 reference (2), -- inst represents the reference base at the
2482 -- position
2483 variant (4) -- inst represent the observed variant at a
2484 -- given position
2485 } OPTIONAL
2486 }
2487
2488 END
2489
2490
2491 --**********************************************************************
2492 --
2493 -- NCBI Restriction Sites
2494 -- by James Ostell, 1990
2495 -- version 0.8
2496 --
2497 --**********************************************************************
2498
2499 NCBI-Rsite DEFINITIONS ::=
2500 BEGIN
2501
2502 EXPORTS Rsite-ref;
2503
2504 IMPORTS Dbtag FROM NCBI-General;
2505
2506 Rsite-ref ::= CHOICE {
2507 str VisibleString , -- may be unparsable
2508 db Dbtag } -- pointer to a restriction site database
2509
2510 END
2511
2512 --**********************************************************************
2513 --
2514 -- NCBI RNAs
2515 -- by James Ostell, 1990
2516 -- version 0.8
2517 --
2518 --**********************************************************************
2519
2520 NCBI-RNA DEFINITIONS ::=
2521 BEGIN
2522
2523 EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
2524
2525 IMPORTS Seq-loc FROM NCBI-Seqloc;
2526
2527 --*** rnas ***********************************************
2528 --*
2529 --* various rnas
2530 --*
2531 -- minimal RNA sequence
2532 RNA-ref ::= SEQUENCE {
2533 type ENUMERATED { -- type of RNA feature
2534 unknown (0) ,
2535 premsg (1) ,
2536 mRNA (2) ,
2537 tRNA (3) ,
2538 rRNA (4) ,
2539 snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA
2540 scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA
2541 snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA
2542 ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
2543 tmRNA (9) ,
2544 miscRNA (10) ,
2545 other (255) } ,
2546 pseudo BOOLEAN OPTIONAL ,
2547 ext CHOICE {
2548 name VisibleString , -- for naming "other" type
2549 tRNA Trna-ext , -- for tRNAs
2550 gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA
2551 }
2552
2553 Trna-ext ::= SEQUENCE { -- tRNA feature extensions
2554 aa CHOICE { -- aa this carries
2555 iupacaa INTEGER ,
2556 ncbieaa INTEGER ,
2557 ncbi8aa INTEGER ,
2558 ncbistdaa INTEGER } OPTIONAL ,
2559 codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code
2560 anticodon Seq-loc OPTIONAL } -- location of anticodon
2561
2562 RNA-gen ::= SEQUENCE {
2563 class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA:
2564 -- examples: antisense_RNA, guide_RNA, snRNA
2565 product VisibleString OPTIONAL ,
2566 quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs
2567 }
2568
2569 RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen,
2570 qual VisibleString , -- in a tag (qual), value (val) format
2571 val VisibleString }
2572
2573 RNA-qual-set ::= SEQUENCE OF RNA-qual
2574
2575 END
2576
2577 --**********************************************************************
2578 --
2579 -- NCBI Genes
2580 -- by James Ostell, 1990
2581 -- version 0.8
2582 --
2583 --**********************************************************************
2584
2585 NCBI-Gene DEFINITIONS ::=
2586 BEGIN
2587
2588 EXPORTS Gene-ref, Gene-nomenclature;
2589
2590 IMPORTS Dbtag FROM NCBI-General;
2591
2592 --*** Gene ***********************************************
2593 --*
2594 --* reference to a gene
2595 --*
2596
2597 Gene-ref ::= SEQUENCE {
2598 locus VisibleString OPTIONAL , -- Official gene symbol
2599 allele VisibleString OPTIONAL , -- Official allele designation
2600 desc VisibleString OPTIONAL , -- descriptive name
2601 maploc VisibleString OPTIONAL , -- descriptive map location
2602 pseudo BOOLEAN DEFAULT FALSE , -- pseudogene
2603 db SET OF Dbtag OPTIONAL , -- ids in other dbases
2604 syn SET OF VisibleString OPTIONAL , -- synonyms for locus
2605 locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069)
2606 formal-name Gene-nomenclature OPTIONAL
2607 }
2608
2609 Gene-nomenclature ::= SEQUENCE {
2610 status ENUMERATED {
2611 unknown (0) ,
2612 official (1) ,
2613 interim (2)
2614 } ,
2615 symbol VisibleString OPTIONAL ,
2616 name VisibleString OPTIONAL ,
2617 source Dbtag OPTIONAL
2618 }
2619
2620 END
2621
2622
2623 --**********************************************************************
2624 --
2625 -- NCBI Organism
2626 -- by James Ostell, 1994
2627 -- version 3.0
2628 --
2629 --**********************************************************************
2630
2631 NCBI-Organism DEFINITIONS ::=
2632 BEGIN
2633
2634 EXPORTS Org-ref;
2635
2636 IMPORTS Dbtag FROM NCBI-General;
2637
2638 --*** Org-ref ***********************************************
2639 --*
2640 --* Reference to an organism
2641 --* defines only the organism.. lower levels of detail for biological
2642 --* molecules are provided by the Source object
2643 --*
2644
2645 Org-ref ::= SEQUENCE {
2646 taxname VisibleString OPTIONAL , -- preferred formal name
2647 common VisibleString OPTIONAL , -- common name
2648 mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
2649 db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases
2650 syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common
2651 orgname OrgName OPTIONAL }
2652
2653
2654 OrgName ::= SEQUENCE {
2655 name CHOICE {
2656 binomial BinomialOrgName , -- genus/species type name
2657 virus VisibleString , -- virus names are different
2658 hybrid MultiOrgName , -- hybrid between organisms
2659 namedhybrid BinomialOrgName , -- some hybrids have genus x species name
2660 partial PartialOrgName } OPTIONAL , -- when genus not known
2661 attrib VisibleString OPTIONAL , -- attribution of name
2662 mod SEQUENCE OF OrgMod OPTIONAL ,
2663 lineage VisibleString OPTIONAL , -- lineage with semicolon separators
2664 gcode INTEGER OPTIONAL , -- genetic code (see CdRegion)
2665 mgcode INTEGER OPTIONAL , -- mitochondrial genetic code
2666 div VisibleString OPTIONAL , -- GenBank division code
2667 pgcode INTEGER OPTIONAL } -- plastid genetic code
2668
2669
2670 OrgMod ::= SEQUENCE {
2671 subtype INTEGER {
2672 strain (2) ,
2673 substrain (3) ,
2674 type (4) ,
2675 subtype (5) ,
2676 variety (6) ,
2677 serotype (7) ,
2678 serogroup (8) ,
2679 serovar (9) ,
2680 cultivar (10) ,
2681 pathovar (11) ,
2682 chemovar (12) ,
2683 biovar (13) ,
2684 biotype (14) ,
2685 group (15) ,
2686 subgroup (16) ,
2687 isolate (17) ,
2688 common (18) ,
2689 acronym (19) ,
2690 dosage (20) , -- chromosome dosage of hybrid
2691 nat-host (21) , -- natural host of this specimen
2692 sub-species (22) ,
2693 specimen-voucher (23) ,
2694 authority (24) ,
2695 forma (25) ,
2696 forma-specialis (26) ,
2697 ecotype (27) ,
2698 synonym (28) ,
2699 anamorph (29) ,
2700 teleomorph (30) ,
2701 breed (31) ,
2702 gb-acronym (32) , -- used by taxonomy database
2703 gb-anamorph (33) , -- used by taxonomy database
2704 gb-synonym (34) , -- used by taxonomy database
2705 culture-collection (35) ,
2706 bio-material (36) ,
2707 metagenome-source (37) ,
2708 old-lineage (253) ,
2709 old-name (254) ,
2710 other (255) } , -- ASN5: old-name (254) will be added to next spec
2711 subname VisibleString ,
2712 attrib VisibleString OPTIONAL } -- attribution/source of name
2713
2714 BinomialOrgName ::= SEQUENCE {
2715 genus VisibleString , -- required
2716 species VisibleString OPTIONAL , -- species required if subspecies used
2717 subspecies VisibleString OPTIONAL }
2718
2719 MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division
2720
2721 PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus
2722
2723 TaxElement ::= SEQUENCE {
2724 fixed-level INTEGER {
2725 other (0) , -- level must be set in string
2726 family (1) ,
2727 order (2) ,
2728 class (3) } ,
2729 level VisibleString OPTIONAL ,
2730 name VisibleString }
2731
2732 END
2733
2734
2735 --**********************************************************************
2736 --
2737 -- NCBI BioSource
2738 -- by James Ostell, 1994
2739 -- version 3.0
2740 --
2741 --**********************************************************************
2742
2743 NCBI-BioSource DEFINITIONS ::=
2744 BEGIN
2745
2746 EXPORTS BioSource, SubSource;
2747
2748 IMPORTS Org-ref FROM NCBI-Organism;
2749
2750 --********************************************************************
2751 --
2752 -- BioSource gives the source of the biological material
2753 -- for sequences
2754 --
2755 --********************************************************************
2756
2757 BioSource ::= SEQUENCE {
2758 genome INTEGER { -- biological context
2759 unknown (0) ,
2760 genomic (1) ,
2761 chloroplast (2) ,
2762 chromoplast (3) ,
2763 kinetoplast (4) ,
2764 mitochondrion (5) ,
2765 plastid (6) ,
2766 macronuclear (7) ,
2767 extrachrom (8) ,
2768 plasmid (9) ,
2769 transposon (10) ,
2770 insertion-seq (11) ,
2771 cyanelle (12) ,
2772 proviral (13) ,
2773 virion (14) ,
2774 nucleomorph (15) ,
2775 apicoplast (16) ,
2776 leucoplast (17) ,
2777 proplastid (18) ,
2778 endogenous-virus (19) ,
2779 hydrogenosome (20) ,
2780 chromosome (21) ,
2781 chromatophore (22)
2782 } DEFAULT unknown ,
2783 origin INTEGER {
2784 unknown (0) ,
2785 natural (1) , -- normal biological entity
2786 natmut (2) , -- naturally occurring mutant
2787 mut (3) , -- artificially mutagenized
2788 artificial (4) , -- artificially engineered
2789 synthetic (5) , -- purely synthetic
2790 other (255)
2791 } DEFAULT unknown ,
2792 org Org-ref ,
2793 subtype SEQUENCE OF SubSource OPTIONAL ,
2794 is-focus NULL OPTIONAL , -- to distinguish biological focus
2795 pcr-primers PCRReactionSet OPTIONAL }
2796
2797 PCRReactionSet ::= SET OF PCRReaction
2798
2799 PCRReaction ::= SEQUENCE {
2800 forward PCRPrimerSet OPTIONAL ,
2801 reverse PCRPrimerSet OPTIONAL }
2802
2803 PCRPrimerSet ::= SET OF PCRPrimer
2804
2805 PCRPrimer ::= SEQUENCE {
2806 seq PCRPrimerSeq OPTIONAL ,
2807 name PCRPrimerName OPTIONAL }
2808
2809 PCRPrimerSeq ::= VisibleString
2810
2811 PCRPrimerName ::= VisibleString
2812
2813 SubSource ::= SEQUENCE {
2814 subtype INTEGER {
2815 chromosome (1) ,
2816 map (2) ,
2817 clone (3) ,
2818 subclone (4) ,
2819 haplotype (5) ,
2820 genotype (6) ,
2821 sex (7) ,
2822 cell-line (8) ,
2823 cell-type (9) ,
2824 tissue-type (10) ,
2825 clone-lib (11) ,
2826 dev-stage (12) ,
2827 frequency (13) ,
2828 germline (14) ,
2829 rearranged (15) ,
2830 lab-host (16) ,
2831 pop-variant (17) ,
2832 tissue-lib (18) ,
2833 plasmid-name (19) ,
2834 transposon-name (20) ,
2835 insertion-seq-name (21) ,
2836 plastid-name (22) ,
2837 country (23) ,
2838 segment (24) ,
2839 endogenous-virus-name (25) ,
2840 transgenic (26) ,
2841 environmental-sample (27) ,
2842 isolation-source (28) ,
2843 lat-lon (29) , -- +/- decimal degrees
2844 collection-date (30) , -- DD-MMM-YYYY format
2845 collected-by (31) , -- name of person who collected the sample
2846 identified-by (32) , -- name of person who identified the sample
2847 fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated)
2848 rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated)
2849 fwd-primer-name (35) ,
2850 rev-primer-name (36) ,
2851 metagenomic (37) ,
2852 mating-type (38) ,
2853 linkage-group (39) ,
2854 haplogroup (40) ,
2855 whole-replicon (41) ,
2856 phenotype (42) ,
2857 other (255) } ,
2858 name VisibleString ,
2859 attrib VisibleString OPTIONAL } -- attribution/source of this name
2860
2861 END
2862
2863 --**********************************************************************
2864 --
2865 -- NCBI Protein
2866 -- by James Ostell, 1990
2867 -- version 0.8
2868 --
2869 --**********************************************************************
2870
2871 NCBI-Protein DEFINITIONS ::=
2872 BEGIN
2873
2874 EXPORTS Prot-ref;
2875
2876 IMPORTS Dbtag FROM NCBI-General;
2877
2878 --*** Prot-ref ***********************************************
2879 --*
2880 --* Reference to a protein name
2881 --*
2882
2883 Prot-ref ::= SEQUENCE {
2884 name SET OF VisibleString OPTIONAL , -- protein name
2885 desc VisibleString OPTIONAL , -- description (instead of name)
2886 ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
2887 activity SET OF VisibleString OPTIONAL , -- activities
2888 db SET OF Dbtag OPTIONAL , -- ids in other dbases
2889 processed ENUMERATED { -- processing status
2890 not-set (0) ,
2891 preprotein (1) ,
2892 mature (2) ,
2893 signal-peptide (3) ,
2894 transit-peptide (4) } DEFAULT not-set }
2895
2896 END
2897 --********************************************************************
2898 --
2899 -- Transcription Initiation Site Feature Data Block
2900 -- James Ostell, 1991
2901 -- Philip Bucher, David Ghosh
2902 -- version 1.1
2903 --
2904 --
2905 --
2906 --********************************************************************
2907
2908 NCBI-TxInit DEFINITIONS ::=
2909 BEGIN
2910
2911 EXPORTS Txinit;
2912
2913 IMPORTS Gene-ref FROM NCBI-Gene
2914 Prot-ref FROM NCBI-Protein
2915 Org-ref FROM NCBI-Organism;
2916
2917 Txinit ::= SEQUENCE {
2918 name VisibleString , -- descriptive name of initiation site
2919 syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms
2920 gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed
2921 protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced
2922 rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced
2923 expression VisibleString OPTIONAL , -- tissue/time of expression
2924 txsystem ENUMERATED { -- transcription apparatus used at this site
2925 unknown (0) ,
2926 pol1 (1) , -- eukaryotic Pol I
2927 pol2 (2) , -- eukaryotic Pol II
2928 pol3 (3) , -- eukaryotic Pol III
2929 bacterial (4) ,
2930 viral (5) ,
2931 rna (6) , -- RNA replicase
2932 organelle (7) ,
2933 other (255) } ,
2934 txdescr VisibleString OPTIONAL , -- modifiers on txsystem
2935 txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus
2936 mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx
2937 location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
2938 inittype ENUMERATED {
2939 unknown (0) ,
2940 single (1) ,
2941 multiple (2) ,
2942 region (3) } OPTIONAL ,
2943 evidence SET OF Tx-evidence OPTIONAL }
2944
2945 Tx-evidence ::= SEQUENCE {
2946 exp-code ENUMERATED {
2947 unknown (0) ,
2948 rna-seq (1) , -- direct RNA sequencing
2949 rna-size (2) , -- RNA length measurement
2950 np-map (3) , -- nuclease protection mapping with homologous sequence ladder
2951 np-size (4) , -- nuclease protected fragment length measurement
2952 pe-seq (5) , -- dideoxy RNA sequencing
2953 cDNA-seq (6) , -- full-length cDNA sequencing
2954 pe-map (7) , -- primer extension mapping with homologous sequence ladder
2955 pe-size (8) , -- primer extension product length measurement
2956 pseudo-seq (9) , -- full-length processed pseudogene sequencing
2957 rev-pe-map (10) , -- see NOTE (1) below
2958 other (255) } ,
2959 expression-system ENUMERATED {
2960 unknown (0) ,
2961 physiological (1) ,
2962 in-vitro (2) ,
2963 oocyte (3) ,
2964 transfection (4) ,
2965 transgenic (5) ,
2966 other (255) } DEFAULT physiological ,
2967 low-prec-data BOOLEAN DEFAULT FALSE ,
2968 from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on
2969 -- close homolog
2970
2971 -- NOTE (1) length measurement of a reverse direction primer-extension
2972 -- product (blocked by RNA 5'end) by comparison with
2973 -- homologous sequence ladder (J. Mol. Biol. 199, 587)
2974
2975 END
2976
2977 --$Revision: 1.7 $
2978 -- ----------------------------------------------------------------------------
2979 --
2980 -- PUBLIC DOMAIN NOTICE
2981 -- National Center for Biotechnology Information
2982 --
2983 -- This software/database is a "United States Government Work" under the terms
2984 -- of the United States Copyright Act. It was written as part of the author's
2985 -- official duties as a United States Government employee and thus cannot be
2986 -- copyrighted. This software/database is freely available to the public for
2987 -- use. The National Library of Medicine and the U.S. Government have not
2988 -- placed any restriction on its use or reproduction.
2989 --
2990 -- Although all reasonable efforts have been taken to ensure the accuracy and
2991 -- reliability of the software and data, the NLM and the U.S. Government do not
2992 -- and cannot warrant the performance or results that may be obtained by using
2993 -- this software or data. The NLM and the U.S. Government disclaim all
2994 -- warranties, express or implied, including warranties of performance,
2995 -- merchantability or fitness for any particular purpose.
2996 --
2997 -- Please cite the authors in any work or product based on this material.
2998 --
2999 -- ----------------------------------------------------------------------------
3000 --
3001 -- Authors: Mike DiCuccio, Eugene Vasilchenko
3002 --
3003 -- ASN.1 interface to table readers
3004 --
3005 -- ----------------------------------------------------------------------------
3006
3007 NCBI-SeqTable DEFINITIONS ::=
3008
3009 BEGIN
3010
3011 EXPORTS
3012 SeqTable-column-info, SeqTable-column, Seq-table;
3013
3014 IMPORTS
3015 Seq-id, Seq-loc, Seq-interval FROM NCBI-Seqloc;
3016
3017
3018 SeqTable-column-info ::= SEQUENCE {
3019 -- user friendly column name, can be skipped
3020 title VisibleString OPTIONAL,
3021
3022 -- identification of the column data in the objects described by the table
3023 field-id INTEGER { -- known column data types
3024 -- position types
3025 location (0), -- location as Seq-loc
3026 location-id (1), -- location Seq-id
3027 location-gi (2), -- gi
3028 location-from (3), -- interval from
3029 location-to (4), -- interval to
3030 location-strand (5), -- location strand
3031 location-fuzz-from-lim (6),
3032 location-fuzz-to-lim (7),
3033
3034 product (10), -- product as Seq-loc
3035 product-id (11), -- product Seq-id
3036 product-gi (12), -- product gi
3037 product-from (13), -- product interval from
3038 product-to (14), -- product interval to
3039 product-strand (15), -- product strand
3040 product-fuzz-from-lim (16),
3041 product-fuzz-to-lim (17),
3042
3043 -- main feature fields
3044 id-local (20), -- id.local.id
3045 xref-id-local (21), -- xref.id.local.id
3046 partial (22),
3047 comment (23),
3048 title (24),
3049 ext (25), -- field-name must be "E.xxx", see below
3050 qual (26), -- field-name must be "Q.xxx", see below
3051 dbxref (27), -- field-name must be "D.xxx", see below
3052
3053 -- various data fields
3054 data-imp-key (30),
3055 data-region (31),
3056 data-cdregion-frame (32),
3057
3058 -- extra fields, see also special values for str below
3059 ext-type (40),
3060 qual-qual (41),
3061 qual-val (42),
3062 dbxref-db (43),
3063 dbxref-tag (44)
3064 } OPTIONAL,
3065
3066 -- any column can be identified by ASN.1 text locator string
3067 -- with omitted object type.
3068 -- examples:
3069 -- "data.gene.locus" for Seq-feat.data.gene.locus
3070 -- "data.imp.key" for Seq-feat.data.imp.key
3071 -- "qual.qual"
3072 -- - Seq-feat.qual is SEQUENCE so several columns are allowed
3073 -- see also "Q.xxx" special value for shorter qual representation
3074 -- "ext.type.str"
3075 -- "ext.data.label.str"
3076 -- "ext.data.data.int"
3077 -- see also "E.xxx" special value for shorter ext representation
3078 -- special values start with capital letter:
3079 -- "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
3080 -- - Seq-feat.ext.data is SEQUENCE so several columns are allowed
3081 -- "Q.xxx" - qual.qual = xxx, qual.val = data
3082 -- - Seq-feat.qual is SEQUENCE so several columns are allowed
3083 -- "D.xxx" - dbxref.id = xxx, dbxref.tag = data
3084 -- - Seq-feat.dbxref is SET so several columns are allowed
3085 field-name VisibleString OPTIONAL
3086 }
3087
3088
3089 CommonString-table ::= SEQUENCE {
3090 -- set of possible values
3091 strings SEQUENCE OF UTF8String,
3092
3093 -- indexes of values
3094 indexes SEQUENCE OF INTEGER
3095 }
3096
3097
3098 CommonBytes-table ::= SEQUENCE {
3099 -- set of possible values
3100 bytes SEQUENCE OF OCTET STRING,
3101
3102 -- indexes of values
3103 indexes SEQUENCE OF INTEGER
3104 }
3105
3106
3107 SeqTable-multi-data ::= CHOICE {
3108 -- a set of integers, one per row
3109 int SEQUENCE OF INTEGER,
3110
3111 -- a set of reals, one per row
3112 real SEQUENCE OF REAL,
3113
3114 -- a set of strings, one per row
3115 string SEQUENCE OF UTF8String,
3116
3117 -- a set of byte arrays, one per row
3118 bytes SEQUENCE OF OCTET STRING,
3119
3120 -- a set of string with small set of possible values
3121 common-string CommonString-table,
3122
3123 -- a set of byte arrays with small set of possible values
3124 common-bytes CommonBytes-table,
3125
3126 -- a set of bits, one per row
3127 -- this uses bm::bvector<> as its storage mechanism
3128 bit OCTET STRING,
3129
3130 -- a set of locations, one per row
3131 loc SEQUENCE OF Seq-loc,
3132 id SEQUENCE OF Seq-id,
3133 interval SEQUENCE OF Seq-interval
3134 }
3135
3136
3137 SeqTable-single-data ::= CHOICE {
3138 -- integer
3139 int INTEGER,
3140
3141 -- real
3142 real REAL,
3143
3144 -- string
3145 string UTF8String,
3146
3147 -- byte array
3148 bytes OCTET STRING,
3149
3150 -- bit
3151 bit BOOLEAN,
3152
3153 -- location
3154 loc Seq-loc,
3155 id Seq-id,
3156 interval Seq-interval
3157 }
3158
3159
3160 SeqTable-sparse-index ::= CHOICE {
3161 -- indexed of rows with values
3162 indexes SEQUENCE OF INTEGER,
3163
3164 -- bitset of rows with values
3165 bit-set OCTET STRING
3166 }
3167
3168
3169 SeqTable-column ::= SEQUENCE {
3170 -- column description or reference to previously defined info
3171 header SeqTable-column-info, -- information about data
3172
3173 -- row data
3174 data SeqTable-multi-data OPTIONAL,
3175
3176 -- in case not all rows contain data this field will contain sparse info
3177 sparse SeqTable-sparse-index OPTIONAL,
3178
3179 -- default value for sparse table, or if row data is too short
3180 default SeqTable-single-data OPTIONAL,
3181
3182 -- single value for indexes not listed in sparse table
3183 sparse-other SeqTable-single-data OPTIONAL
3184 }
3185
3186
3187 Seq-table ::= SEQUENCE {
3188 -- type of features in this table, equal to Seq-feat.data variant index
3189 feat-type INTEGER,
3190
3191 -- subtype of features in this table, defined in header SeqFeatData.hpp
3192 feat-subtype INTEGER OPTIONAL,
3193
3194 -- number of rows
3195 num-rows INTEGER,
3196
3197 -- data in columns
3198 columns SEQUENCE OF SeqTable-column
3199 }
3200
3201
3202 END
3203 --$Revision: 6.4 $
3204 --**********************************************************************
3205 --
3206 -- NCBI Sequence Alignment elements
3207 -- by James Ostell, 1990
3208 --
3209 --**********************************************************************
3210
3211 NCBI-Seqalign DEFINITIONS ::=
3212 BEGIN
3213
3214 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
3215
3216 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
3217 User-object, Object-id FROM NCBI-General;
3218
3219 --*** Sequence Alignment ********************************
3220 --*
3221
3222 Seq-align-set ::= SET OF Seq-align
3223
3224 Seq-align ::= SEQUENCE {
3225 type ENUMERATED {
3226 not-set (0) ,
3227 global (1) ,
3228 diags (2) , -- unbroken, but not ordered, diagonals
3229 partial (3) , -- mapping pieces together
3230 disc (4) , -- discontinuous alignment
3231 other (255) } ,
3232 dim INTEGER OPTIONAL , -- dimensionality
3233 score SET OF Score OPTIONAL , -- for whole alignment
3234 segs CHOICE { -- alignment data
3235 dendiag SEQUENCE OF Dense-diag ,
3236 denseg Dense-seg ,
3237 std SEQUENCE OF Std-seg ,
3238 packed Packed-seg ,
3239 disc Seq-align-set,
3240 spliced Spliced-seg,
3241 sparse Sparse-seg
3242 } ,
3243
3244 -- regions of sequence over which align
3245 -- was computed
3246 bounds SET OF Seq-loc OPTIONAL,
3247
3248 -- alignment id
3249 id SEQUENCE OF Object-id OPTIONAL,
3250
3251 --extra info
3252 ext SEQUENCE OF User-object OPTIONAL
3253 }
3254
3255 Dense-diag ::= SEQUENCE { -- for (multiway) diagonals
3256 dim INTEGER DEFAULT 2 , -- dimensionality
3257 ids SEQUENCE OF Seq-id , -- sequences in order
3258 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order
3259 len INTEGER , -- len of aligned segments
3260 strands SEQUENCE OF Na-strand OPTIONAL ,
3261 scores SET OF Score OPTIONAL }
3262
3263 -- Dense-seg: the densist packing for sequence alignments only.
3264 -- a start of -1 indicates a gap for that sequence of
3265 -- length lens.
3266 --
3267 -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA
3268 -- id=200 AAGGCCTTTTAG.......GATGATGATGA
3269 -- id=300 ....CCTTTTAGAGATGATGAT....ATGA
3270 --
3271 -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
3272 -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
3273 -- lens = { 4, 8, 7, 3, 4, 4 }
3274 --
3275
3276 Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
3277 dim INTEGER DEFAULT 2 , -- dimensionality
3278 numseg INTEGER , -- number of segments here
3279 ids SEQUENCE OF Seq-id , -- sequences in order
3280 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs
3281 lens SEQUENCE OF INTEGER , -- lengths in ids order within segs
3282 strands SEQUENCE OF Na-strand OPTIONAL ,
3283 scores SEQUENCE OF Score OPTIONAL } -- score for each seg
3284
3285 Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments
3286 dim INTEGER DEFAULT 2 , -- dimensionality
3287 numseg INTEGER , -- number of segments here
3288 ids SEQUENCE OF Seq-id , -- sequences in order
3289 starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment
3290 present OCTET STRING , -- Boolean if each sequence present or absent in
3291 -- each segment
3292 lens SEQUENCE OF INTEGER , -- length of each segment
3293 strands SEQUENCE OF Na-strand OPTIONAL ,
3294 scores SEQUENCE OF Score OPTIONAL } -- score for each segment
3295
3296 Std-seg ::= SEQUENCE {
3297 dim INTEGER DEFAULT 2 , -- dimensionality
3298 ids SEQUENCE OF Seq-id OPTIONAL ,
3299 loc SEQUENCE OF Seq-loc ,
3300 scores SET OF Score OPTIONAL }
3301
3302
3303 Spliced-seg ::= SEQUENCE {
3304 -- product is either protein or transcript (cDNA)
3305 product-id Seq-id OPTIONAL,
3306 genomic-id Seq-id OPTIONAL,
3307
3308 -- should be 'plus' or 'minus'
3309 product-strand Na-strand OPTIONAL ,
3310 genomic-strand Na-strand OPTIONAL ,
3311
3312 product-type ENUMERATED {
3313 transcript(0),
3314 protein(1)
3315 },
3316
3317 -- set of segments involved
3318 -- each segment corresponds to one exon
3319 -- exons are always in biological order
3320 exons SEQUENCE OF Spliced-exon ,
3321
3322 -- optional poly(A) tail
3323 poly-a INTEGER OPTIONAL,
3324
3325 -- length of the product, in bases/residues
3326 -- from this, a 3' unaligned length can be extracted; this also captures
3327 -- the case in which a protein aligns leaving a partial codon alignment
3328 -- at the 3' end
3329 product-length INTEGER OPTIONAL,
3330
3331 -- alignment descriptors / modifiers
3332 -- this provides us a set for extension
3333 modifiers SET OF Spliced-seg-modifier OPTIONAL
3334 }
3335
3336 Spliced-seg-modifier ::= CHOICE {
3337 -- protein aligns from the start and the first codon
3338 -- on both product and genomic is start codon
3339 start-codon-found BOOLEAN,
3340
3341 -- protein aligns to it's end and there is stop codon
3342 -- on the genomic right after the alignment
3343 stop-codon-found BOOLEAN
3344 }
3345
3346
3347 -- complete or partial exon
3348 -- two consecutive Spliced-exons may belong to one exon
3349 Spliced-exon ::= SEQUENCE {
3350 -- product-end >= product-start
3351 product-start Product-pos ,
3352 product-end Product-pos ,
3353
3354 -- genomic-end >= genomic-start
3355 genomic-start INTEGER ,
3356 genomic-end INTEGER ,
3357
3358 -- product is either protein or transcript (cDNA)
3359 product-id Seq-id OPTIONAL ,
3360 genomic-id Seq-id OPTIONAL ,
3361
3362 -- should be 'plus' or 'minus'
3363 product-strand Na-strand OPTIONAL ,
3364
3365 -- genomic-strand represents the strand of translation
3366 genomic-strand Na-strand OPTIONAL ,
3367
3368 -- basic seqments always are in biologic order
3369 parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
3370
3371 -- scores for this exon
3372 scores Score-set OPTIONAL ,
3373
3374 -- splice sites
3375 acceptor-before-exon Splice-site OPTIONAL,
3376 donor-after-exon Splice-site OPTIONAL,
3377
3378 -- flag: is this exon complete or partial?
3379 partial BOOLEAN OPTIONAL,
3380
3381 --extra info
3382 ext SEQUENCE OF User-object OPTIONAL
3383 }
3384
3385
3386 Product-pos ::= CHOICE {
3387 nucpos INTEGER,
3388 protpos Prot-pos
3389 }
3390
3391
3392 -- codon based position on protein (1/3 of aminoacid)
3393 Prot-pos ::= SEQUENCE {
3394 -- standard protein position
3395 amin INTEGER ,
3396
3397 -- 0, 1, 2, or 3 as for Cdregion
3398 -- 0 = not set
3399 -- 1, 2, 3 = actual frame
3400 frame INTEGER DEFAULT 0
3401 }
3402
3403
3404 -- Spliced-exon-chunk: piece of an exon
3405 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
3406 -- protein)
3407 Spliced-exon-chunk ::= CHOICE {
3408 -- both sequences represented, product and genomic sequences match
3409 match INTEGER ,
3410
3411 -- both sequences represented, product and genomic sequences do not match
3412 mismatch INTEGER ,
3413
3414 -- both sequences are represented, there is sufficient similarity
3415 -- between product and genomic sequences. Can be used to replace stretches
3416 -- of matches and mismatches, mostly for protein to genomic where
3417 -- definition of match or mismatch depends on translation table
3418 diag INTEGER ,
3419
3420 -- insertion in product sequence (i.e. gap in the genomic sequence)
3421 product-ins INTEGER ,
3422
3423 -- insertion in genomic sequence (i.e. gap in the product sequence)
3424 genomic-ins INTEGER
3425 }
3426
3427
3428 -- site involved in splice
3429 Splice-site ::= SEQUENCE {
3430 -- typically two bases in the intronic region, always
3431 -- in IUPAC format
3432 bases VisibleString
3433 }
3434
3435
3436 -- ==========================================================================
3437 --
3438 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
3439 -- representing sparse multiple alignments
3440 --
3441 -- ==========================================================================
3442
3443
3444 Sparse-seg ::= SEQUENCE {
3445 master-id Seq-id OPTIONAL,
3446
3447 -- pairwise alignments constituting this multiple alignment
3448 rows SET OF Sparse-align,
3449
3450 -- per-row scores
3451 row-scores SET OF Score OPTIONAL,
3452
3453 -- index of extra items
3454 ext SET OF Sparse-seg-ext OPTIONAL
3455 }
3456
3457 Sparse-align ::= SEQUENCE {
3458 first-id Seq-id,
3459 second-id Seq-id,
3460
3461 numseg INTEGER, --number of segments
3462 first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg]
3463 second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg]
3464 lens SEQUENCE OF INTEGER , --lengths of segments [numseg]
3465 second-strands SEQUENCE OF Na-strand OPTIONAL ,
3466
3467 -- per-segment scores
3468 seg-scores SET OF Score OPTIONAL
3469 }
3470
3471 Sparse-seg-ext ::= SEQUENCE {
3472 --seg-ext SET OF {
3473 -- index INTEGER,
3474 -- data User-field
3475 -- }
3476 index INTEGER
3477 }
3478
3479
3480
3481 -- use of Score is discouraged for external ASN.1 specifications
3482 Score ::= SEQUENCE {
3483 id Object-id OPTIONAL ,
3484 value CHOICE {
3485 real REAL ,
3486 int INTEGER
3487 }
3488 }
3489
3490 -- use of Score-set is encouraged for external ASN.1 specifications
3491 Score-set ::= SET OF Score
3492
3493 END
3494
3495 --$Revision: 6.0 $
3496 --**********************************************************************
3497 --
3498 -- NCBI Sequence Analysis Results (other than alignments)
3499 -- by James Ostell, 1990
3500 --
3501 --**********************************************************************
3502
3503 NCBI-Seqres DEFINITIONS ::=
3504 BEGIN
3505
3506 EXPORTS Seq-graph;
3507
3508 IMPORTS Seq-loc FROM NCBI-Seqloc;
3509
3510 --*** Sequence Graph ********************************
3511 --*
3512 --* for values mapped by residue or range to sequence
3513 --*
3514
3515 Seq-graph ::= SEQUENCE {
3516 title VisibleString OPTIONAL ,
3517 comment VisibleString OPTIONAL ,
3518 loc Seq-loc , -- region this applies to
3519 title-x VisibleString OPTIONAL , -- title for x-axis
3520 title-y VisibleString OPTIONAL ,
3521 comp INTEGER OPTIONAL , -- compression (residues/value)
3522 a REAL OPTIONAL , -- for scaling values
3523 b REAL OPTIONAL , -- display = (a x value) + b
3524 numval INTEGER , -- number of values in graph
3525 graph CHOICE {
3526 real Real-graph ,
3527 int Int-graph ,
3528 byte Byte-graph } }
3529
3530 Real-graph ::= SEQUENCE {
3531 max REAL , -- top of graph
3532 min REAL , -- bottom of graph
3533 axis REAL , -- value to draw axis on
3534 values SEQUENCE OF REAL }
3535
3536 Int-graph ::= SEQUENCE {
3537 max INTEGER ,
3538 min INTEGER ,
3539 axis INTEGER ,
3540 values SEQUENCE OF INTEGER }
3541
3542 Byte-graph ::= SEQUENCE { -- integer from 0-255
3543 max INTEGER ,
3544 min INTEGER ,
3545 axis INTEGER ,
3546 values OCTET STRING }
3547
3548 END
3549
3550 --$Revision: 6.1 $
3551 --********************************************************************
3552 --
3553 -- Direct Submission of Sequence Data
3554 -- James Ostell, 1991
3555 --
3556 -- This is a trial specification for direct submission of sequence
3557 -- data worked out between NCBI and EMBL
3558 -- Later revised to reflect work with GenBank and Integrated database
3559 --
3560 -- Version 3.0, 1994
3561 -- This is the official NCBI sequence submission format now.
3562 --
3563 --********************************************************************
3564
3565 NCBI-Submit DEFINITIONS ::=
3566 BEGIN
3567
3568 EXPORTS Seq-submit, Contact-info;
3569
3570 IMPORTS Cit-sub, Author FROM NCBI-Biblio
3571 Date, Object-id FROM NCBI-General
3572 Seq-annot FROM NCBI-Sequence
3573 Seq-id FROM NCBI-Seqloc
3574 Seq-entry FROM NCBI-Seqset;
3575
3576 Seq-submit ::= SEQUENCE {
3577 sub Submit-block ,
3578 data CHOICE {
3579 entrys SET OF Seq-entry , -- sequence(s)
3580 annots SET OF Seq-annot , -- annotation(s)
3581 delete SET OF Seq-id } } -- deletions of entries
3582
3583 Submit-block ::= SEQUENCE {
3584 contact Contact-info , -- who to contact
3585 cit Cit-sub , -- citation for this submission
3586 hup BOOLEAN DEFAULT FALSE , -- hold until publish
3587 reldate Date OPTIONAL , -- release by date
3588 subtype INTEGER { -- type of submission
3589 new (1) , -- new data
3590 update (2) , -- update by author
3591 revision (3) , -- 3rd party (non-author) update
3592 other (255) } OPTIONAL ,
3593 tool VisibleString OPTIONAL, -- tool used to make submission
3594 user-tag VisibleString OPTIONAL, -- user supplied id for this submission
3595 comment VisibleString OPTIONAL } -- user comments/advice to database
3596
3597 Contact-info ::= SEQUENCE { -- who to contact to discuss the submission
3598 name VisibleString OPTIONAL , -- OBSOLETE: will be removed
3599 address SEQUENCE OF VisibleString OPTIONAL ,
3600 phone VisibleString OPTIONAL ,
3601 fax VisibleString OPTIONAL ,
3602 email VisibleString OPTIONAL ,
3603 telex VisibleString OPTIONAL ,
3604 owner-id Object-id OPTIONAL , -- for owner accounts
3605 password OCTET STRING OPTIONAL ,
3606 last-name VisibleString OPTIONAL , -- structured to replace name above
3607 first-name VisibleString OPTIONAL ,
3608 middle-initial VisibleString OPTIONAL ,
3609 contact Author OPTIONAL } -- WARNING: this will replace the above
3610
3611 END
3612
3613 --$Revision: 1.15 $
3614 --**********************************************************************
3615 --
3616 -- Definitions for Cn3D-specific data (rendering settings,
3617 -- user annotations, etc.)
3618 --
3619 -- by Paul Thiessen
3620 --
3621 -- National Center for Biotechnology Information
3622 -- National Institutes of Health
3623 -- Bethesda, MD 20894 USA
3624 --
3625 -- asntool -m cn3d.asn -w 100 -o cn3d.h
3626 -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
3627 -- -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
3628 --**********************************************************************
3629
3630 NCBI-Cn3d DEFINITIONS ::=
3631 -- Cn3D-specific information
3632
3633 BEGIN
3634
3635 EXPORTS Cn3d-style-dictionary, Cn3d-user-annotations;
3636
3637 IMPORTS Biostruc-id FROM MMDB
3638 Molecule-id, Residue-id FROM MMDB-Chemical-graph;
3639
3640
3641 -- values of enumerations must match those in cn3d/style_manager.hpp!
3642
3643 Cn3d-backbone-type ::= ENUMERATED { -- for different types of backbones
3644 off (1),
3645 trace (2),
3646 partial (3),
3647 complete (4)
3648 }
3649
3650 Cn3d-drawing-style ::= ENUMERATED { -- atom/bond/object rendering styles
3651 -- for atoms and bonds
3652 wire (1),
3653 tubes (2),
3654 ball-and-stick (3),
3655 space-fill (4),
3656 wire-worm (5),
3657 tube-worm (6),
3658 -- for 3d-objects
3659 with-arrows (7),
3660 without-arrows (8)
3661 }
3662
3663 Cn3d-color-scheme ::= ENUMERATED { -- available color schemes (not all
3664 -- necessarily applicable to all objects)
3665 element (1),
3666 object (2),
3667 molecule (3),
3668 domain (4),
3669 residue (20),
3670 secondary-structure (5),
3671 user-select (6),
3672 -- different alignment conservation coloring (currently only for proteins)
3673 aligned (7),
3674 identity (8),
3675 variety (9),
3676 weighted-variety (10),
3677 information-content (11),
3678 fit (12),
3679 block-fit (17),
3680 block-z-fit (18),
3681 block-row-fit (19),
3682 -- other schemes
3683 temperature (13),
3684 hydrophobicity (14),
3685 charge (15),
3686 rainbow (16)
3687 }
3688
3689 -- RGB triplet, interpreted (after division by the scale-factor) as floating
3690 -- point values which should range from [0..1]. The default scale-factor is
3691 -- 255, so that one can conveniently set integer byte values [0..255] for
3692 -- colors with the scale-factor already set appropriately to map to [0..1].
3693 -- An alpha value is allowed, but is currently ignored by Cn3D.
3694 Cn3d-color ::= SEQUENCE {
3695 scale-factor INTEGER DEFAULT 255,
3696 red INTEGER,
3697 green INTEGER,
3698 blue INTEGER,
3699 alpha INTEGER DEFAULT 255
3700 }
3701
3702 Cn3d-backbone-style ::= SEQUENCE { -- style blob for backbones only
3703 type Cn3d-backbone-type,
3704 style Cn3d-drawing-style,
3705 color-scheme Cn3d-color-scheme,
3706 user-color Cn3d-color
3707 }
3708
3709 Cn3d-general-style ::= SEQUENCE { -- style blob for other objects
3710 is-on BOOLEAN,
3711 style Cn3d-drawing-style,
3712 color-scheme Cn3d-color-scheme,
3713 user-color Cn3d-color
3714 }
3715
3716 Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
3717 spacing INTEGER, -- zero means none
3718 type ENUMERATED {
3719 one-letter (1),
3720 three-letter (2)
3721 },
3722 number ENUMERATED {
3723 none (0),
3724 sequential (1), -- from 1, by residues present, to match sequence
3725 pdb (2) -- use number assigned by PDB
3726 },
3727 termini BOOLEAN,
3728 white BOOLEAN -- all white, or (if false) color of alpha carbon
3729 }
3730
3731 -- rendering settings for Cn3D (mirrors StyleSettings class)
3732 Cn3d-style-settings ::= SEQUENCE {
3733 name VisibleString OPTIONAL, -- a name (for favorites)
3734 protein-backbone Cn3d-backbone-style, -- backbone styles
3735 nucleotide-backbone Cn3d-backbone-style,
3736 protein-sidechains Cn3d-general-style, -- styles for other stuff
3737 nucleotide-sidechains Cn3d-general-style,
3738 heterogens Cn3d-general-style,
3739 solvents Cn3d-general-style,
3740 connections Cn3d-general-style,
3741 helix-objects Cn3d-general-style,
3742 strand-objects Cn3d-general-style,
3743 virtual-disulfides-on BOOLEAN, -- virtual disulfides
3744 virtual-disulfide-color Cn3d-color,
3745 hydrogens-on BOOLEAN, -- hydrogens
3746 background-color Cn3d-color, -- background
3747 -- floating point parameters - scale-factor applies to all the following:
3748 scale-factor INTEGER,
3749 space-fill-proportion INTEGER,
3750 ball-radius INTEGER,
3751 stick-radius INTEGER,
3752 tube-radius INTEGER,
3753 tube-worm-radius INTEGER,
3754 helix-radius INTEGER,
3755 strand-width INTEGER,
3756 strand-thickness INTEGER,
3757 -- backbone labels (no labels if not present)
3758 protein-labels Cn3d-backbone-label-style OPTIONAL,
3759 nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
3760 -- ion labels
3761 ion-labels BOOLEAN OPTIONAL
3762 }
3763
3764 Cn3d-style-settings-set ::= SET OF Cn3d-style-settings
3765
3766 Cn3d-style-table-id ::= INTEGER
3767
3768 Cn3d-style-table-item ::= SEQUENCE {
3769 id Cn3d-style-table-id,
3770 style Cn3d-style-settings
3771 }
3772
3773 -- the global settings, and a lookup table of styles for user annotations.
3774 Cn3d-style-dictionary ::= SEQUENCE {
3775 global-style Cn3d-style-settings,
3776 style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
3777 }
3778
3779 -- a range of residues in a chain, identified by MMDB residue-id
3780 -- (e.g., numbered from 1)
3781 Cn3d-residue-range ::= SEQUENCE {
3782 from Residue-id,
3783 to Residue-id
3784 }
3785
3786 -- set of locations on a particular chain
3787 Cn3d-molecule-location ::= SEQUENCE {
3788 molecule-id Molecule-id, -- MMDB molecule id
3789 -- which residues; whole molecule implied if absent
3790 residues SEQUENCE OF Cn3d-residue-range OPTIONAL
3791 }
3792
3793 -- set of locations on a particular structure object (e.g., a PDB/MMDB
3794 -- structure), which may include multiple ranges of residues each on
3795 -- multiple chains.
3796 Cn3d-object-location ::= SEQUENCE {
3797 structure-id Biostruc-id,
3798 residues SEQUENCE OF Cn3d-molecule-location
3799 }
3800
3801 -- information for an individual user annotation
3802 Cn3d-user-annotation ::= SEQUENCE {
3803 name VisibleString, -- a (short) name for this annotation
3804 description VisibleString OPTIONAL, -- an optional longer description
3805 style-id Cn3d-style-table-id, -- how to draw this annotation
3806 residues SEQUENCE OF Cn3d-object-location, -- which residues to cover
3807 is-on BOOLEAN -- whether this annotation is to be turned on in Cn3D
3808 }
3809
3810 -- a GL-ordered transformation matrix
3811 Cn3d-GL-matrix ::= SEQUENCE {
3812 m0 REAL, m1 REAL, m2 REAL, m3 REAL,
3813 m4 REAL, m5 REAL, m6 REAL, m7 REAL,
3814 m8 REAL, m9 REAL, m10 REAL, m11 REAL,
3815 m12 REAL, m13 REAL, m14 REAL, m15 REAL
3816 }
3817
3818 -- a floating point 3d vector
3819 Cn3d-vector ::= SEQUENCE {
3820 x REAL,
3821 y REAL,
3822 z REAL
3823 }
3824
3825 -- parameters used to set up the camera in Cn3D
3826 Cn3d-view-settings ::= SEQUENCE {
3827 camera-distance REAL, -- camera on +Z axis this distance from origin
3828 camera-angle-rad REAL, -- camera angle
3829 camera-look-at-X REAL, -- X,Y of point in Z=0 plane camera points at
3830 camera-look-at-Y REAL,
3831 camera-clip-near REAL, -- distance of clipping planes from camera
3832 camera-clip-far REAL,
3833 matrix Cn3d-GL-matrix, -- transformation of objects in the scene
3834 rotation-center Cn3d-vector -- center of rotation of whole scene
3835 }
3836
3837 -- The list of annotations for a given CDD/mime. If residue regions overlap
3838 -- between annotations that are turned on, the last annotation in this list
3839 -- that contains these residues will be used as the display style for these
3840 -- residues.
3841 -- Also contains the current viewpoint, so that user's camera angle
3842 -- can be stored and reproduced, for illustrations, on-line figures, etc.
3843 Cn3d-user-annotations ::= SEQUENCE {
3844 annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
3845 view Cn3d-view-settings OPTIONAL
3846 }
3847
3848 END
3849
3850 --$Revision: 6.3 $
3851 --****************************************************************
3852 --
3853 -- NCBI Project Definition Module
3854 -- by Jim Ostell and Jonathan Kans, 1998
3855 --
3856 --****************************************************************
3857
3858 NCBI-Project DEFINITIONS ::=
3859 BEGIN
3860
3861 EXPORTS Project, Project-item;
3862
3863 IMPORTS Date FROM NCBI-General
3864 PubMedId FROM NCBI-Biblio
3865 Seq-id, Seq-loc FROM NCBI-Seqloc
3866 Seq-annot, Pubdesc FROM NCBI-Sequence
3867 Seq-entry FROM NCBI-Seqset
3868 Pubmed-entry FROM NCBI-PubMed;
3869
3870 Project ::= SEQUENCE {
3871 descr Project-descr OPTIONAL ,
3872 data Project-item }
3873
3874 Project-item ::= CHOICE {
3875 pmuid SET OF INTEGER ,
3876 protuid SET OF INTEGER ,
3877 nucuid SET OF INTEGER ,
3878 sequid SET OF INTEGER ,
3879 genomeuid SET OF INTEGER ,
3880 structuid SET OF INTEGER ,
3881 pmid SET OF PubMedId ,
3882 protid SET OF Seq-id ,
3883 nucid SET OF Seq-id ,
3884 seqid SET OF Seq-id ,
3885 genomeid SET OF Seq-id ,
3886 structid NULL ,
3887 pment SET OF Pubmed-entry ,
3888 protent SET OF Seq-entry ,
3889 nucent SET OF Seq-entry ,
3890 seqent SET OF Seq-entry ,
3891 genomeent SET OF Seq-entry ,
3892 structent NULL ,
3893 seqannot SET OF Seq-annot ,
3894 loc SET OF Seq-loc ,
3895 proj SET OF Project
3896 }
3897
3898 Project-descr ::= SEQUENCE {
3899 id SET OF Project-id ,
3900 name VisibleString OPTIONAL ,
3901 descr SET OF Projdesc OPTIONAL }
3902
3903 Projdesc ::= CHOICE {
3904 pub Pubdesc ,
3905 date Date ,
3906 comment VisibleString ,
3907 title VisibleString
3908 }
3909
3910 Project-id ::= VisibleString
3911
3912 END
3913
3914
3915 --$Revision: 6.0 $
3916 --*********************************************************************
3917 --
3918 -- access.asn
3919 --
3920 -- messages for data access
3921 --
3922 --*********************************************************************
3923
3924 NCBI-Access DEFINITIONS ::=
3925 BEGIN
3926
3927 EXPORTS Link-set;
3928
3929 -- links between same class = neighbors
3930 -- links between other classes = links
3931
3932 Link-set ::= SEQUENCE {
3933 num INTEGER , -- number of links to this doc type
3934 uids SEQUENCE OF INTEGER OPTIONAL , -- the links
3935 weights SEQUENCE OF INTEGER OPTIONAL } -- the weights
3936
3937
3938 END
3939 --$Revision: 6.0 $
3940 --**********************************************************************
3941 --
3942 -- NCBI Sequence Feature Definition Module
3943 -- by James Ostell, 1994
3944 --
3945 --**********************************************************************
3946
3947 NCBI-FeatDef DEFINITIONS ::=
3948 BEGIN
3949
3950 EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
3951
3952
3953 FeatDef ::= SEQUENCE {
3954 typelabel VisibleString , -- short label for type eg "CDS"
3955 menulabel VisibleString , -- label for a menu eg "Coding Region"
3956 featdef-key INTEGER , -- unique for this feature definition
3957 seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h
3958 entrygroup INTEGER , -- Group for data entry
3959 displaygroup INTEGER , -- Group for data display
3960 molgroup FeatMolType -- Type of Molecule used for
3961 }
3962
3963 FeatMolType ::= ENUMERATED {
3964 aa (1), -- proteins
3965 na (2), -- nucleic acids
3966 both (3) } -- both
3967
3968 FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions
3969
3970 FeatDispGroup ::= SEQUENCE {
3971 groupkey INTEGER ,
3972 groupname VisibleString }
3973
3974 FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
3975
3976 FeatDefGroupSet ::= SEQUENCE {
3977 groups FeatDispGroupSet ,
3978 defs FeatDefSet }
3979
3980 END
3981
3982
3983 --$Revision: 6.12 $
3984 --****************************************************************
3985 --
3986 -- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
3987 -- by Jonathan Epstein, February 1996
3988 --
3989 --****************************************************************
3990
3991 NCBI-Mime DEFINITIONS ::=
3992 BEGIN
3993
3994 EXPORTS Ncbi-mime-asn1;
3995 IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
3996 Cdd FROM NCBI-Cdd
3997 Seq-entry FROM NCBI-Seqset
3998 Seq-annot FROM NCBI-Sequence
3999 Medline-entry FROM NCBI-Medline
4000 Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;
4001
4002 Ncbi-mime-asn1 ::= CHOICE {
4003 entrez Entrez-general, -- just a structure
4004 alignstruc Biostruc-align, -- structures & sequences & alignments
4005 alignseq Biostruc-align-seq, -- sequence alignment
4006 strucseq Biostruc-seq, -- structure & sequences
4007 strucseqs Biostruc-seqs, -- structure & sequences & alignments
4008 general Biostruc-seqs-aligns-cdd -- all-purpose "grab bag"
4009 -- others may be added here in the future
4010 }
4011
4012 -- generic bundle of sequence and alignment info
4013 Bundle-seqs-aligns ::= SEQUENCE {
4014 sequences SET OF Seq-entry OPTIONAL, -- sequences
4015 seqaligns SET OF Seq-annot OPTIONAL, -- sequence alignments
4016 strucaligns Biostruc-annot-set OPTIONAL, -- structure alignments
4017 imports SET OF Seq-annot OPTIONAL, -- imports (updates in Cn3D)
4018 style-dictionary Cn3d-style-dictionary OPTIONAL, -- Cn3D stuff
4019 user-annotations Cn3d-user-annotations OPTIONAL
4020 }
4021
4022 Biostruc-seqs-aligns-cdd ::= SEQUENCE {
4023 seq-align-data CHOICE {
4024 bundle Bundle-seqs-aligns, -- either seqs + alignments
4025 cdd Cdd -- or CDD (which contains these)
4026 },
4027 structures SET OF Biostruc OPTIONAL, -- structures
4028 structure-type ENUMERATED { -- type of structures to load if
4029 ncbi-backbone(2), -- not present; meanings and
4030 ncbi-all-atom(3), -- values are same as MMDB's
4031 pdb-model(4) -- Model-type
4032 } OPTIONAL
4033 }
4034
4035 Biostruc-align ::= SEQUENCE {
4036 master Biostruc,
4037 slaves SET OF Biostruc,
4038 alignments Biostruc-annot-set, -- structure alignments
4039 sequences SET OF Seq-entry, -- sequences
4040 seqalign SET OF Seq-annot,
4041 style-dictionary Cn3d-style-dictionary OPTIONAL,
4042 user-annotations Cn3d-user-annotations OPTIONAL
4043 }
4044
4045 Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only
4046 sequences SET OF Seq-entry, -- sequences
4047 seqalign SET OF Seq-annot,
4048 style-dictionary Cn3d-style-dictionary OPTIONAL,
4049 user-annotations Cn3d-user-annotations OPTIONAL
4050 }
4051
4052 Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli
4053 structure Biostruc,
4054 sequences SET OF Seq-entry,
4055 style-dictionary Cn3d-style-dictionary OPTIONAL,
4056 user-annotations Cn3d-user-annotations OPTIONAL
4057 }
4058
4059 Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
4060 structure Biostruc,
4061 sequences SET OF Seq-entry, -- sequences
4062 seqalign SET OF Seq-annot,
4063 style-dictionary Cn3d-style-dictionary OPTIONAL,
4064 user-annotations Cn3d-user-annotations OPTIONAL
4065 }
4066
4067 Entrez-style ::= ENUMERATED {
4068 docsum (1),
4069 genbank (2) ,
4070 genpept (3) ,
4071 fasta (4) ,
4072 asn1 (5) ,
4073 graphic (6) ,
4074 alignment (7) ,
4075 globalview (8) ,
4076 report (9) ,
4077 medlars (10) ,
4078 embl (11) ,
4079 pdb (12) ,
4080 kinemage (13) }
4081
4082 Entrez-general ::= SEQUENCE {
4083 title VisibleString OPTIONAL,
4084 data CHOICE {
4085 ml Medline-entry ,
4086 prot Seq-entry ,
4087 nuc Seq-entry ,
4088 genome Seq-entry ,
4089 structure Biostruc ,
4090 strucAnnot Biostruc-annot-set } ,
4091 style Entrez-style ,
4092 location VisibleString OPTIONAL }
4093 END
4094 --$Revision: 6.0 $
4095 --********************************************************************
4096 --
4097 -- Print Templates
4098 -- James Ostell, 1993
4099 --
4100 --
4101 --********************************************************************
4102
4103 NCBI-ObjPrt DEFINITIONS ::=
4104 BEGIN
4105
4106 EXPORTS PrintTemplate, PrintTemplateSet;
4107
4108 PrintTemplate ::= SEQUENCE {
4109 name TemplateName , -- name for this template
4110 labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from
4111 format PrintFormat }
4112
4113 TemplateName ::= VisibleString
4114
4115 PrintTemplateSet ::= SEQUENCE OF PrintTemplate
4116
4117 PrintFormat ::= SEQUENCE {
4118 asn1 VisibleString , -- ASN.1 partial path for this
4119 label VisibleString OPTIONAL , -- printable label
4120 prefix VisibleString OPTIONAL,
4121 suffix VisibleString OPTIONAL,
4122 form PrintForm }
4123
4124 PrintForm ::= CHOICE { -- Forms for various ASN.1 components
4125 block PrintFormBlock,
4126 boolean PrintFormBoolean,
4127 enum PrintFormEnum,
4128 text PrintFormText,
4129 use-template TemplateName,
4130 user UserFormat ,
4131 null NULL } -- rarely used
4132
4133 UserFormat ::= SEQUENCE {
4134 printfunc VisibleString ,
4135 defaultfunc VisibleString OPTIONAL }
4136
4137 PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET
4138 separator VisibleString OPTIONAL ,
4139 components SEQUENCE OF PrintFormat }
4140
4141 PrintFormBoolean ::= SEQUENCE {
4142 true VisibleString OPTIONAL ,
4143 false VisibleString OPTIONAL }
4144
4145 PrintFormEnum ::= SEQUENCE {
4146 values SEQUENCE OF VisibleString OPTIONAL }
4147
4148 PrintFormText ::= SEQUENCE {
4149 textfunc VisibleString OPTIONAL }
4150
4151 END
4152
4153 --$Revision: 6.10 $
4154 --*********************************************************
4155 --
4156 -- ASN.1 and XML for the components of a GenBank format sequence
4157 -- J.Ostell 2002
4158 -- Updated 25 May 2010
4159 --
4160 --*********************************************************
4161
4162 NCBI-GBSeq DEFINITIONS ::=
4163 BEGIN
4164
4165 --********
4166 -- GBSeq represents the elements in a GenBank style report
4167 -- of a sequence with some small additions to structure and support
4168 -- for protein (GenPept) versions of GenBank format as seen in
4169 -- Entrez. While this represents the simplification, reduction of
4170 -- detail, and flattening to a single sequence perspective of GenBank
4171 -- format (compared with the full ASN.1 or XML from which GenBank and
4172 -- this format is derived at NCBI), it is presented in ASN.1 or XML for
4173 -- automated parsing and processing. It is hoped that this compromise
4174 -- will be useful for those bulk processing at the GenBank format level
4175 -- of detail today. Since it is a compromise, a number of pragmatic
4176 -- decisions have been made.
4177 --
4178 -- In pursuit of simplicity and familiarity a number of
4179 -- fields do not have full substructure defined here where there is
4180 -- already a standard GenBank format string. For example:
4181 --
4182 -- Date DD-Mon-YYYY
4183 -- Authors LastName, Intials (with periods)
4184 -- Journal JounalName Volume (issue), page-range (year)
4185 -- FeatureLocations as per GenBank feature table, but FeatureIntervals
4186 -- may also be provided as a convenience
4187 -- FeatureQualifiers as per GenBank feature table
4188 -- Primary has a string that represents a table to construct
4189 -- a third party (TPA) sequence.
4190 -- other-seqids can have strings with the "vertical bar format" sequence
4191 -- identifiers used in BLAST for example, when they are non-genbank types.
4192 -- Currently in GenBank format you only see GI, but there are others, like
4193 -- patents, submitter clone names, etc which will appear here, as they
4194 -- always have in the ASN.1 format, and full XML format.
4195 -- source-db is a formatted text block for peptides in GenPept format that
4196 -- carries information from the source protein database.
4197 --
4198 -- There are also a number of elements that could have been
4199 -- more exactly specified, but in the interest of simplicity
4200 -- have been simply left as options. For example..
4201 --
4202 -- accession and accession.version will always appear in a GenBank record
4203 -- they are optional because this format can also be used for non-GenBank
4204 -- sequences, and in that case will have only "other-seqids".
4205 --
4206 -- sequences will normally all have "sequence" filled in. But contig records
4207 -- will have a "join" statement in the "contig" slot, and no "sequence".
4208 -- We also may consider a retrieval option with no sequence of any kind
4209 -- and no feature table to quickly check minimal values.
4210 --
4211 -- a reference may have an author list, or be from a consortium, or both.
4212 --
4213 -- some fields, such as taxonomy, do appear as separate elements in GenBank
4214 -- format but without a specific linetype (in GenBank format this comes
4215 -- under ORGANISM). Another example is the separation of primary accession
4216 -- from the list of secondary accessions. In GenBank format primary
4217 -- accession is just the first one on the list that includes all secondaries
4218 -- after it.
4219 --
4220 -- create-date deserves special comment. The date you see on the right hand
4221 -- side of the LOCUS line in GenBank format is actually the last date the
4222 -- the record was modified (or the update-date). The date the record was
4223 -- first submitted to GenBank appears in the first submission citation in
4224 -- the reference section. Internally in the databases and ASN.1 NCBI keeps
4225 -- the first date the record was released into the sequence database at
4226 -- NCBI as create-date. For records from EMBL, which supports create-date,
4227 -- it is the date provided by EMBL. For DDBJ records, which do not supply
4228 -- a create-date (same as GenBank format) the create-date is the first date
4229 -- NCBI saw the record from DDBJ. For older GenBank records, before NCBI
4230 -- took responsibility for GenBank, it is just the first date NCBI saw the
4231 -- record. Create-date can be very useful, so we expose it here, but users
4232 -- must understand it is only an approximation and comes from many sources,
4233 -- and with many exceptions and caveats. It does NOT tell you the first
4234 -- date the public might have seen this record and thus is NOT an accurate
4235 -- measure for legal issues of precedence.
4236 --
4237 --********
4238
4239 GBSet ::= SEQUENCE OF GBSeq
4240
4241 GBSeq ::= SEQUENCE {
4242 locus VisibleString ,
4243 length INTEGER ,
4244 strandedness VisibleString OPTIONAL ,
4245 moltype VisibleString ,
4246 topology VisibleString OPTIONAL ,
4247 division VisibleString ,
4248 update-date VisibleString ,
4249 create-date VisibleString OPTIONAL ,
4250 update-release VisibleString OPTIONAL ,
4251 create-release VisibleString OPTIONAL ,
4252 definition VisibleString ,
4253 primary-accession VisibleString OPTIONAL ,
4254 entry-version VisibleString OPTIONAL ,
4255 accession-version VisibleString OPTIONAL ,
4256 other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
4257 secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
4258 project VisibleString OPTIONAL ,
4259 keywords SEQUENCE OF GBKeyword OPTIONAL ,
4260 segment VisibleString OPTIONAL ,
4261 source VisibleString OPTIONAL ,
4262 organism VisibleString OPTIONAL ,
4263 taxonomy VisibleString OPTIONAL ,
4264 references SEQUENCE OF GBReference OPTIONAL ,
4265 comment VisibleString OPTIONAL ,
4266 comment-set SEQUENCE OF GBComment OPTIONAL ,
4267 struc-comments SEQUENCE OF GBStrucComment OPTIONAL ,
4268 primary VisibleString OPTIONAL ,
4269 source-db VisibleString OPTIONAL ,
4270 database-reference VisibleString OPTIONAL ,
4271 feature-table SEQUENCE OF GBFeature OPTIONAL ,
4272 feature-set SEQUENCE OF GBFeatureSet OPTIONAL ,
4273 sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc.
4274 contig VisibleString OPTIONAL ,
4275 alt-seq SEQUENCE OF GBAltSeqData OPTIONAL
4276 }
4277
4278 GBSeqid ::= VisibleString
4279
4280 GBSecondary-accn ::= VisibleString
4281
4282 GBKeyword ::= VisibleString
4283
4284 GBReference ::= SEQUENCE {
4285 reference VisibleString ,
4286 position VisibleString OPTIONAL ,
4287 authors SEQUENCE OF GBAuthor OPTIONAL ,
4288 consortium VisibleString OPTIONAL ,
4289 title VisibleString OPTIONAL ,
4290 journal VisibleString ,
4291 xref SEQUENCE OF GBXref OPTIONAL ,
4292 pubmed INTEGER OPTIONAL ,
4293 remark VisibleString OPTIONAL
4294 }
4295
4296 GBAuthor ::= VisibleString
4297
4298 GBXref ::= SEQUENCE {
4299 dbname VisibleString ,
4300 id VisibleString
4301 }
4302
4303 GBComment ::= SEQUENCE {
4304 type VisibleString OPTIONAL ,
4305 paragraphs SEQUENCE OF GBCommentParagraph
4306 }
4307
4308 GBCommentParagraph ::= SEQUENCE {
4309 items SEQUENCE OF GBCommentItem
4310 }
4311
4312 GBCommentItem ::= SEQUENCE {
4313 value VisibleString OPTIONAL ,
4314 url VisibleString OPTIONAL
4315 }
4316
4317 GBStrucComment ::= SEQUENCE {
4318 name VisibleString OPTIONAL ,
4319 items SEQUENCE OF GBStrucCommentItem
4320 }
4321
4322 GBStrucCommentItem ::= SEQUENCE {
4323 tag VisibleString OPTIONAL ,
4324 value VisibleString OPTIONAL ,
4325 url VisibleString OPTIONAL
4326 }
4327
4328 GBFeatureSet ::= SEQUENCE {
4329 annot-source VisibleString OPTIONAL ,
4330 features SEQUENCE OF GBFeature
4331 }
4332
4333 GBFeature ::= SEQUENCE {
4334 key VisibleString ,
4335 location VisibleString ,
4336 intervals SEQUENCE OF GBInterval OPTIONAL ,
4337 operator VisibleString OPTIONAL ,
4338 partial5 BOOLEAN OPTIONAL ,
4339 partial3 BOOLEAN OPTIONAL ,
4340 quals SEQUENCE OF GBQualifier OPTIONAL ,
4341 xrefs SEQUENCE OF GBXref OPTIONAL
4342 }
4343
4344 GBInterval ::= SEQUENCE {
4345 from INTEGER OPTIONAL ,
4346 to INTEGER OPTIONAL ,
4347 point INTEGER OPTIONAL ,
4348 iscomp BOOLEAN OPTIONAL ,
4349 interbp BOOLEAN OPTIONAL ,
4350 accession VisibleString
4351 }
4352
4353 GBQualifier ::= SEQUENCE {
4354 name VisibleString ,
4355 value VisibleString OPTIONAL
4356 }
4357
4358 GBAltSeqData ::= SEQUENCE {
4359 name VisibleString , -- e.g., contig, wgs, scaffold, cage, genome
4360 items SEQUENCE OF GBAltSeqItem OPTIONAL
4361 }
4362
4363 GBAltSeqItem ::= SEQUENCE {
4364 interval GBInterval OPTIONAL ,
4365 isgap BOOLEAN OPTIONAL ,
4366 gap-length INTEGER OPTIONAL ,
4367 gap-type VisibleString OPTIONAL ,
4368 gap-linkage VisibleString OPTIONAL ,
4369 gap-comment VisibleString OPTIONAL ,
4370 first-accn VisibleString OPTIONAL ,
4371 last-accn VisibleString OPTIONAL ,
4372 value VisibleString OPTIONAL
4373 }
4374
4375 END
4376
4377 --$Revision: 1.8 $
4378 --************************************************************************
4379 --
4380 -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
4381 -- The International Nucleotide Sequence Database (INSD) collaboration
4382 -- Version 1.6, 25 May 2010
4383 --
4384 --************************************************************************
4385
4386 INSD-INSDSeq DEFINITIONS ::=
4387 BEGIN
4388
4389 -- INSDSeq provides the elements of a sequence as presented in the
4390 -- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
4391 -- additional structure.
4392 -- Although this single perspective of the three flatfile formats
4393 -- provides a useful simplification, it hides to some extent the
4394 -- details of the actual data underlying those formats. Nevertheless,
4395 -- the XML version of INSD-Seq is being provided with
4396 -- the hopes that it will prove useful to those who bulk-process
4397 -- sequence data at the flatfile-format level of detail. Further
4398 -- documentation regarding the content and conventions of those formats
4399 -- can be found at:
4400 --
4401 -- URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
4402 -- http://www.ddbj.nig.ac.jp/FT/full_index.html
4403 -- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
4404 -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
4405 --
4406 -- URLs for DDBJ, EMBL, and GenBank Release Notes :
4407 -- ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
4408 -- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
4409 -- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
4410 --
4411 -- Because INSDSeq is a compromise, a number of pragmatic decisions have
4412 -- been made:
4413 --
4414 -- In pursuit of simplicity and familiarity a number of fields do not
4415 -- have full substructure defined here where there is already a
4416 -- standard flatfile format string. For example:
4417 --
4418 -- Dates: DD-MON-YYYY (eg 10-JUN-2003)
4419 --
4420 -- Author: LastName, Initials (eg Smith, J.N.)
4421 -- or Lastname Initials (eg Smith J.N.)
4422 --
4423 -- Journal: JournalName Volume (issue), page-range (year)
4424 -- or JournalName Volume(issue):page-range(year)
4425 -- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
4426 -- Appl. Environ. Microbiol. 61(4):1646-1648(1995).
4427 --
4428 -- FeatureLocations are representated as in the flatfile feature table,
4429 -- but FeatureIntervals may also be provided as a convenience
4430 --
4431 -- FeatureQualifiers are represented as in the flatfile feature table.
4432 --
4433 -- Primary has a string that represents a table to construct
4434 -- a third party (TPA) sequence.
4435 --
4436 -- other-seqids can have strings with the "vertical bar format" sequence
4437 -- identifiers used in BLAST for example, when they are non-INSD types.
4438 --
4439 -- Currently in flatfile format you only see Accession numbers, but there
4440 -- are others, like patents, submitter clone names, etc which will
4441 -- appear here
4442 --
4443 -- There are also a number of elements that could have been more exactly
4444 -- specified, but in the interest of simplicity have been simply left as
4445 -- optional. For example:
4446 --
4447 -- All publicly accessible sequence records in INSDSeq format will
4448 -- include accession and accession.version. However, these elements are
4449 -- optional in optional in INSDSeq so that this format can also be used
4450 -- for non-public sequence data, prior to the assignment of accessions and
4451 -- version numbers. In such cases, records will have only "other-seqids".
4452 --
4453 -- sequences will normally all have "sequence" filled in. But contig records
4454 -- will have a "join" statement in the "contig" slot, and no "sequence".
4455 -- We also may consider a retrieval option with no sequence of any kind
4456 -- and no feature table to quickly check minimal values.
4457 --
4458 -- Four (optional) elements are specific to records represented via the EMBL
4459 -- sequence database: INSDSeq_update-release, INSDSeq_create-release,
4460 -- INSDSeq_entry-version, and INSDSeq_database-reference.
4461 --
4462 -- One (optional) element is specific to records originating at the GenBank
4463 -- and DDBJ sequence databases: INSDSeq_segment.
4464 --
4465 --********
4466
4467 INSDSet ::= SEQUENCE OF INSDSeq
4468
4469 INSDSeq ::= SEQUENCE {
4470 locus VisibleString ,
4471 length INTEGER ,
4472 strandedness VisibleString OPTIONAL ,
4473 moltype VisibleString ,
4474 topology VisibleString OPTIONAL ,
4475 division VisibleString ,
4476 update-date VisibleString ,
4477 create-date VisibleString OPTIONAL ,
4478 update-release VisibleString OPTIONAL ,
4479 create-release VisibleString OPTIONAL ,
4480 definition VisibleString ,
4481 primary-accession VisibleString OPTIONAL ,
4482 entry-version VisibleString OPTIONAL ,
4483 accession-version VisibleString OPTIONAL ,
4484 other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
4485 secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
4486 project VisibleString OPTIONAL ,
4487 keywords SEQUENCE OF INSDKeyword OPTIONAL ,
4488 segment VisibleString OPTIONAL ,
4489 source VisibleString OPTIONAL ,
4490 organism VisibleString OPTIONAL ,
4491 taxonomy VisibleString OPTIONAL ,
4492 references SEQUENCE OF INSDReference OPTIONAL ,
4493 comment VisibleString OPTIONAL ,
4494 comment-set SEQUENCE OF INSDComment OPTIONAL ,
4495 struc-comments SEQUENCE OF INSDStrucComment OPTIONAL ,
4496 primary VisibleString OPTIONAL ,
4497 source-db VisibleString OPTIONAL ,
4498 database-reference VisibleString OPTIONAL ,
4499 feature-table SEQUENCE OF INSDFeature OPTIONAL ,
4500 feature-set SEQUENCE OF INSDFeatureSet OPTIONAL ,
4501 sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc.
4502 contig VisibleString OPTIONAL ,
4503 alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL
4504 }
4505
4506 INSDSeqid ::= VisibleString
4507
4508 INSDSecondary-accn ::= VisibleString
4509
4510 INSDKeyword ::= VisibleString
4511
4512 -- INSDReference_position contains a string value indicating the
4513 -- basepair span(s) to which a reference applies. The allowable
4514 -- formats are:
4515 --
4516 -- X..Y : Where X and Y are integers separated by two periods,
4517 -- X >= 1 , Y <= sequence length, and X <= Y
4518 --
4519 -- Multiple basepair spans can exist, separated by a
4520 -- semi-colon and a space. For example : 10..20; 100..500
4521 --
4522 -- sites : The string literal 'sites', indicating that a reference
4523 -- provides sequence annotation information, but the specific
4524 -- basepair spans are either not captured, or were too numerous
4525 -- to record.
4526 --
4527 -- The 'sites' literal string is singly occuring, and
4528 -- cannot be used in conjunction with any X..Y basepair spans.
4529 --
4530 -- References that lack an INSDReference_position element apply
4531 -- to the entire sequence.
4532
4533 INSDReference ::= SEQUENCE {
4534 reference VisibleString ,
4535 position VisibleString OPTIONAL ,
4536 authors SEQUENCE OF INSDAuthor OPTIONAL ,
4537 consortium VisibleString OPTIONAL ,
4538 title VisibleString OPTIONAL ,
4539 journal VisibleString ,
4540 xref SEQUENCE OF INSDXref OPTIONAL ,
4541 pubmed INTEGER OPTIONAL ,
4542 remark VisibleString OPTIONAL
4543 }
4544
4545 INSDAuthor ::= VisibleString
4546
4547 -- INSDXref provides a method for referring to records in
4548 -- other databases. INSDXref_dbname is a string value that
4549 -- provides the name of the database, and INSDXref_dbname
4550 -- is a string value that provides the record's identifier
4551 -- in that database.
4552
4553 INSDXref ::= SEQUENCE {
4554 dbname VisibleString ,
4555 id VisibleString
4556 }
4557
4558 INSDComment ::= SEQUENCE {
4559 type VisibleString OPTIONAL ,
4560 paragraphs SEQUENCE OF INSDCommentParagraph
4561 }
4562
4563 INSDCommentParagraph ::= SEQUENCE {
4564 items SEQUENCE OF INSDCommentItem
4565 }
4566
4567 INSDCommentItem ::= SEQUENCE {
4568 value VisibleString OPTIONAL ,
4569 url VisibleString OPTIONAL
4570 }
4571
4572 INSDStrucComment ::= SEQUENCE {
4573 name VisibleString OPTIONAL ,
4574 items SEQUENCE OF INSDStrucCommentItem
4575 }
4576
4577 INSDStrucCommentItem ::= SEQUENCE {
4578 tag VisibleString OPTIONAL ,
4579 value VisibleString OPTIONAL ,
4580 url VisibleString OPTIONAL
4581 }
4582
4583 -- INSDFeature_operator contains a string value describing
4584 -- the relationship among a set of INSDInterval within
4585 -- INSDFeature_intervals. The allowable formats are:
4586 --
4587 -- join : The string literal 'join' indicates that the
4588 -- INSDInterval intervals are biologically joined
4589 -- together into a contiguous molecule.
4590 --
4591 -- order : The string literal 'order' indicates that the
4592 -- INSDInterval intervals are in the presented
4593 -- order, but they are not necessarily contiguous.
4594 --
4595 -- Either 'join' or 'order' is required if INSDFeature_intervals
4596 -- is comprised of more than one INSDInterval .
4597
4598 INSDFeatureSet ::= SEQUENCE {
4599 annot-source VisibleString OPTIONAL ,
4600 features SEQUENCE OF INSDFeature
4601 }
4602
4603 INSDFeature ::= SEQUENCE {
4604 key VisibleString ,
4605 location VisibleString ,
4606 intervals SEQUENCE OF INSDInterval OPTIONAL ,
4607 operator VisibleString OPTIONAL ,
4608 partial5 BOOLEAN OPTIONAL ,
4609 partial3 BOOLEAN OPTIONAL ,
4610 quals SEQUENCE OF INSDQualifier OPTIONAL ,
4611 xrefs SEQUENCE OF INSDXref OPTIONAL
4612 }
4613
4614 -- INSDInterval_iscomp is a boolean indicating whether
4615 -- an INSDInterval_from / INSDInterval_to location
4616 -- represents a location on the complement strand.
4617 -- When INSDInterval_iscomp is TRUE, it essentially
4618 -- confirms that a 'from' value which is greater than
4619 -- a 'to' value is intentional, because the location
4620 -- is on the opposite strand of the presented sequence.
4621
4622 -- INSDInterval_interbp is a boolean indicating whether
4623 -- a feature (such as a restriction site) is located
4624 -- between two adjacent basepairs. When INSDInterval_iscomp
4625 -- is TRUE, the 'from' and 'to' values must differ by
4626 -- exactly one base.
4627
4628 INSDInterval ::= SEQUENCE {
4629 from INTEGER OPTIONAL ,
4630 to INTEGER OPTIONAL ,
4631 point INTEGER OPTIONAL ,
4632 iscomp BOOLEAN OPTIONAL ,
4633 interbp BOOLEAN OPTIONAL ,
4634 accession VisibleString
4635 }
4636
4637 INSDQualifier ::= SEQUENCE {
4638 name VisibleString ,
4639 value VisibleString OPTIONAL
4640 }
4641
4642 INSDAltSeqData ::= SEQUENCE {
4643 name VisibleString , -- e.g., CON-division-join, WGS-contig-range,
4644 -- WGS-scaffold-range, MGA/CAGE-range, genome
4645 items SEQUENCE OF INSDAltSeqItem OPTIONAL
4646 }
4647
4648 INSDAltSeqItem ::= SEQUENCE {
4649 interval INSDInterval OPTIONAL ,
4650 isgap BOOLEAN OPTIONAL ,
4651 gap-length INTEGER OPTIONAL ,
4652 gap-type VisibleString OPTIONAL ,
4653 gap-linkage VisibleString OPTIONAL ,
4654 gap-comment VisibleString OPTIONAL ,
4655 first-accn VisibleString OPTIONAL ,
4656 last-accn VisibleString OPTIONAL ,
4657 value VisibleString OPTIONAL
4658 }
4659
4660 END
4661
4662 --$Revision: 6.1 $
4663 --**********************************************************************
4664 --
4665 -- ASN.1 for a tiny Bioseq in XML
4666 -- basically a structured FASTA file with a few extras
4667 -- in this case we drop all modularity of components
4668 -- All ids are Optional - simpler structure, less checking
4669 -- Components of organism are hard coded - can't easily add or change
4670 -- sequence is just string whether DNA or protein
4671 -- by James Ostell, 2000
4672 --
4673 --**********************************************************************
4674
4675 NCBI-TSeq DEFINITIONS ::=
4676 BEGIN
4677
4678 TSeq ::= SEQUENCE {
4679 seqtype ENUMERATED {
4680 nucleotide (1),
4681 protein (2) },
4682 gi INTEGER OPTIONAL,
4683 accver VisibleString OPTIONAL,
4684 sid VisibleString OPTIONAL,
4685 local VisibleString OPTIONAL,
4686 taxid INTEGER OPTIONAL,
4687 orgname VisibleString OPTIONAL,
4688 defline VisibleString,
4689 length INTEGER,
4690 sequence VisibleString }
4691
4692 TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them
4693
4694 END
4695
4696 --$Id: scoremat.asn,v 1.14 2011/12/21 15:29:33 kazimird Exp $
4697 -- ===========================================================================
4698 --
4699 -- PUBLIC DOMAIN NOTICE
4700 -- National Center for Biotechnology Information
4701 --
4702 -- This software/database is a "United States Government Work" under the
4703 -- terms of the United States Copyright Act. It was written as part of
4704 -- the author's official duties as a United States Government employee and
4705 -- thus cannot be copyrighted. This software/database is freely available
4706 -- to the public for use. The National Library of Medicine and the U.S.
4707 -- Government have not placed any restriction on its use or reproduction.
4708 --
4709 -- Although all reasonable efforts have been taken to ensure the accuracy
4710 -- and reliability of the software and data, the NLM and the U.S.
4711 -- Government do not and cannot warrant the performance or results that
4712 -- may be obtained by using this software or data. The NLM and the U.S.
4713 -- Government disclaim all warranties, express or implied, including
4714 -- warranties of performance, merchantability or fitness for any particular
4715 -- purpose.
4716 --
4717 -- Please cite the author in any work or product based on this material.
4718 --
4719 -- ===========================================================================
4720 --
4721 -- Author: Christiam Camacho
4722 --
4723 -- File Description:
4724 -- ASN.1 definitions for scoring matrix
4725 --
4726 -- ===========================================================================
4727
4728 NCBI-ScoreMat DEFINITIONS ::= BEGIN
4729
4730 EXPORTS Pssm, PssmIntermediateData, PssmFinalData,
4731 PssmParameters, PssmWithParameters;
4732
4733 IMPORTS Object-id FROM NCBI-General
4734 Seq-entry FROM NCBI-Seqset;
4735
4736 -- a rudimentary block/core-model, to be used with block-based alignment
4737 -- routines and threading
4738
4739 BlockProperty ::= SEQUENCE {
4740 type INTEGER { unassigned (0),
4741 threshold (1), -- score threshold for heuristics
4742 minscore (2), -- observed minimum score in CD
4743 maxscore (3), -- observed maximum score in CD
4744 meanscore (4), -- observed mean score in CD
4745 variance (5), -- observed score variance
4746 name (10), -- just name the block
4747 is-optional(20), -- block may not have to be used
4748 other (255) },
4749 intvalue INTEGER OPTIONAL,
4750 textvalue VisibleString OPTIONAL
4751 }
4752
4753 CoreBlock ::= SEQUENCE {
4754 start INTEGER, -- begin of block on query
4755 stop INTEGER, -- end of block on query
4756 minstart INTEGER OPTIONAL, -- optional N-terminal extension
4757 maxstop INTEGER OPTIONAL, -- optional C-terminal extension
4758 property SEQUENCE OF BlockProperty OPTIONAL
4759 }
4760
4761 LoopConstraint ::= SEQUENCE {
4762 minlength INTEGER DEFAULT 0, -- minimum length of unaligned region
4763 maxlength INTEGER DEFAULT 100000 -- maximum length of unaligned region
4764 }
4765
4766 CoreDef ::= SEQUENCE {
4767 nblocks INTEGER, -- number of core elements/blocks
4768 blocks SEQUENCE OF CoreBlock, -- nblocks locations
4769 loops SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints
4770
4771 isDiscontinuous BOOLEAN OPTIONAL, -- is it a discontinuous domain
4772
4773 insertions SEQUENCE OF INTEGER OPTIONAL -- positions of long insertions
4774 }
4775
4776 Site-annot ::= SEQUENCE {
4777 startPosition INTEGER, -- location of the annotation,
4778 stopPosition INTEGER, -- start and stop position in the
4779 -- PSSM
4780
4781 description VisibleString OPTIONAL, -- holds description or names, that
4782 -- can be used for labels in
4783 -- visualization
4784
4785 type INTEGER OPTIONAL, -- type of the annotated feature,
4786 -- similarly to Align-annot in
4787 -- NCBI-Cdd
4788
4789 aliases SEQUENCE OF VisibleString OPTIONAL, -- additional names for
4790 -- the annotation
4791
4792 motif VisibleString OPTIONAL, -- motif to validate mapping of sites
4793
4794 motifuse INTEGER OPTIONAL -- 0 for validation
4795 -- 1 for motif in seqloc
4796 -- 2 for multiple motifs in seqloc
4797 }
4798
4799 Site-annot-set ::= SEQUENCE OF Site-annot
4800
4801 -- ===========================================================================
4802 -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
4803 -- ===========================================
4804 --
4805 -- Two possible inputs to PSI-BLAST and formatrpsdb:
4806 -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix
4807 -- of frequency ratios)
4808 -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of
4809 -- scores and statistical parameters) - such as written by cddumper
4810 --
4811 -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
4812 -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
4813 -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
4814 -- statistical parameters are used to perform the search in PSI-BLAST and the
4815 -- same data and the data in PssmWithParams::params::rpsdbparams is used to
4816 -- build the PSSM and ultimately the RPS-BLAST database
4817 --
4818 --
4819 -- reads ++++++++++++++ writes
4820 -- PssmWithParams ====> + PSI-BLAST + =====> PssmWithParams
4821 -- ++++++++++++++ | ^
4822 -- ^ | |
4823 -- | | |
4824 -- +===========================================+ |
4825 -- | |
4826 -- +===========================================+ |
4827 -- | |
4828 -- reads | |
4829 -- v |
4830 -- +++++++++++++++ writes +++++++++++++++++++++++ |
4831 -- | formatrpsdb | =====> | RPS-BLAST databases | |
4832 -- +++++++++++++++ +++++++++++++++++++++++ |
4833 -- ^ |
4834 -- | |
4835 -- | reads |
4836 -- +++++++++++++ |
4837 -- | RPS-BLAST | |
4838 -- +++++++++++++ |
4839 -- |
4840 -- reads ++++++++++++ writes |
4841 -- Cdd ======> | cddumper | =============================+
4842 -- ++++++++++++
4843 --
4844 -- ===========================================================================
4845
4846 -- Contains the PSSM's scores and its associated statistical parameters.
4847 -- Dimensions and order in which scores are stored must be the same as that
4848 -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
4849 PssmFinalData ::= SEQUENCE {
4850
4851 -- PSSM's scores
4852 scores SEQUENCE OF INTEGER,
4853
4854 -- Karlin & Altschul parameter produced during the PSSM's calculation
4855 lambda REAL,
4856
4857 -- Karlin & Altschul parameter produced during the PSSM's calculation
4858 kappa REAL,
4859
4860 -- Karlin & Altschul parameter produced during the PSSM's calculation
4861 h REAL,
4862
4863 -- scaling factor used to obtain more precision when building the PSSM.
4864 -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
4865 -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
4866 -- given a PSSM which contains a scaled-up PSSM (indicated by having a
4867 -- scalingFactor greater than 1), then it will scale down the PSSM to
4868 -- perform the initial stages of the search with it.
4869 -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided
4870 -- scaled-up PSSMs, it will ensure that all PSSMs used to build the
4871 -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST
4872 -- will silently produce incorrect results).
4873 scalingFactor INTEGER DEFAULT 1,
4874
4875 -- Karlin & Altschul parameter produced during the PSSM's calculation
4876 lambdaUngapped REAL OPTIONAL,
4877
4878 -- Karlin & Altschul parameter produced during the PSSM's calculation
4879 kappaUngapped REAL OPTIONAL,
4880
4881 -- Karlin & Altschul parameter produced during the PSSM's calculation
4882 hUngapped REAL OPTIONAL
4883 }
4884
4885 -- Contains the PSSM's intermediate data used to create the PSSM's scores
4886 -- and statistical parameters. Dimensions and order in which scores are
4887 -- stored must be the same as that specified in Pssm::numRows,
4888 -- Pssm::numColumns, and Pssm::byrow
4889 PssmIntermediateData ::= SEQUENCE {
4890
4891 -- observed residue frequencies (or counts) per position of the PSSM
4892 -- (prior to application of pseudocounts)
4893 resFreqsPerPos SEQUENCE OF INTEGER OPTIONAL,
4894
4895 -- Weighted observed residue frequencies per position of the PSSM.
4896 -- (N.B.: each position's weights should add up to 1.0).
4897 -- This field corresponds to f_i (f sub i) in equation 2 of
4898 -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4899 -- NOTE: this is needed for diagnostics information only (i.e.:
4900 -- -out_ascii_pssm option in psiblast)
4901 weightedResFreqsPerPos SEQUENCE OF REAL OPTIONAL,
4902
4903 -- PSSM's frequency ratios
4904 freqRatios SEQUENCE OF REAL,
4905
4906 -- Information content per position of the PSSM
4907 -- NOTE: this is needed for diagnostics information only (i.e.:
4908 -- -out_ascii_pssm option in psiblast)
4909 informationContent SEQUENCE OF REAL OPTIONAL,
4910
4911 -- Relative weight for columns of the PSSM without gaps to pseudocounts
4912 -- NOTE: this is needed for diagnostics information only (i.e.:
4913 -- -out_ascii_pssm option in psiblast)
4914 gaplessColumnWeights SEQUENCE OF REAL OPTIONAL,
4915
4916 -- Used in sequence weights computation
4917 -- NOTE: this is needed for diagnostics information only (i.e.:
4918 -- -out_ascii_pssm option in psiblast)
4919 sigma SEQUENCE OF REAL OPTIONAL,
4920
4921 -- Length of the aligned regions per position of the query sequence
4922 -- NOTE: this is needed for diagnostics information only (i.e.:
4923 -- -out_ascii_pssm option in psiblast)
4924 intervalSizes SEQUENCE OF INTEGER OPTIONAL,
4925
4926 -- Number of matching sequences per position of the PSSM (including the
4927 -- query)
4928 -- NOTE: this is needed for diagnostics information only (i.e.:
4929 -- -out_ascii_pssm option in psiblast)
4930 numMatchingSeqs SEQUENCE OF INTEGER OPTIONAL,
4931
4932 -- Number of independent observations per position of the PSSM
4933 -- NOTE: this is needed for building CDD database for DELTA-BLAST
4934 numIndeptObsr SEQUENCE OF REAL OPTIONAL
4935 }
4936
4937 -- Position-specific scoring matrix
4938 --
4939 -- Column indices on the PSSM refer to the positions corresponding to the
4940 -- query/master sequence, i.e. the number of columns (N) is the same
4941 -- as the length of the query/master sequence.
4942 -- Row indices refer to individual amino acid types, i.e. the number of
4943 -- rows (M) is the same as the number of different residues in the
4944 -- alphabet we use. Consequently, row labels are amino acid identifiers.
4945 --
4946 -- PSSMs are stored as linear arrays of integers. By default, we store
4947 -- them column-by-column, M values for the first column followed by M
4948 -- values for the second column, and so on. In order to provide
4949 -- flexibility for external applications, the boolean field "byrow" is
4950 -- provided to specify the storage order.
4951 Pssm ::= SEQUENCE {
4952
4953 -- Is the this a protein or nucleotide scoring matrix?
4954 isProtein BOOLEAN DEFAULT TRUE,
4955
4956 -- PSSM identifier
4957 identifier Object-id OPTIONAL,
4958
4959 -- The dimensions of the matrix are returned so the client can
4960 -- verify that all data was received.
4961
4962 numRows INTEGER, -- number of rows
4963 numColumns INTEGER, -- number of columns
4964
4965 -- row-labels is given to note the order of residue types so that it can
4966 -- be cross-checked between applications.
4967 -- If this field is not given, the matrix values are presented in
4968 -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
4969 -- for proteins the values returned correspond to
4970 -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
4971 rowLabels SEQUENCE OF VisibleString OPTIONAL,
4972
4973 -- are matrices stored row by row?
4974 byRow BOOLEAN DEFAULT FALSE,
4975
4976 -- PSSM representative sequence (master)
4977 query Seq-entry OPTIONAL,
4978
4979 -- both intermediateData and finalData can be provided, but at least one of
4980 -- them must be provided.
4981 -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData
4982 -- representation.
4983
4984 -- Intermediate or final data for the PSSM
4985 intermediateData PssmIntermediateData OPTIONAL,
4986
4987 -- Final representation for the PSSM
4988 finalData PssmFinalData OPTIONAL
4989 }
4990
4991 -- This structure is used to create the RPS-BLAST database auxiliary file
4992 -- (*.aux) and it contains parameters set at creation time of the PSSM.
4993 -- Also, the matrixName field is used by formatrpsdb to build a PSSM from
4994 -- a Pssm structure which only contains PssmIntermediateData.
4995 FormatRpsDbParameters ::= SEQUENCE {
4996
4997 -- name of the underlying score matrix whose frequency ratios were
4998 -- used in PSSM construction (e.g.: BLOSUM62)
4999 matrixName VisibleString,
5000
5001 -- gap opening penalty corresponding to the matrix above
5002 gapOpen INTEGER OPTIONAL,
5003
5004 -- gap extension penalty corresponding to the matrix above
5005 gapExtend INTEGER OPTIONAL
5006
5007 }
5008
5009 -- Populated by PSSM engine of PSI-BLAST, original source for these values
5010 -- are the PSI-BLAST options specified using the BLAST options API
5011 PssmParameters ::= SEQUENCE {
5012
5013 -- pseudocount constant used for PSSM. This field corresponds to beta in
5014 -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
5015 pseudocount INTEGER OPTIONAL,
5016
5017 -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
5018 -- populated by PSI-BLAST
5019 rpsdbparams FormatRpsDbParameters OPTIONAL,
5020
5021 -- alignment constraints needed by sequence-structure threader
5022 -- and other global or local block-alignment algorithms
5023 constraints CoreDef OPTIONAL,
5024
5025 -- bit score threshold for specific conserved domain hits
5026 bitScoreThresh REAL OPTIONAL,
5027
5028 -- conserved functional sites with annotations
5029 annotatedSites Site-annot-set OPTIONAL
5030 }
5031
5032 -- Envelope containing PSSM and the parameters used to create it.
5033 -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
5034 PssmWithParameters ::= SEQUENCE {
5035
5036 -- This field is applicable to PSI-BLAST and formatrpsdb.
5037 -- When both the intermediate and final PSSM data are provided in this
5038 -- field, the final data (matrix of scores and associated statistical
5039 -- parameters) takes precedence and that data is used for further
5040 -- processing. The rationale for this is that the PSSM's scores and
5041 -- statistical parameters might have been calculated by other applications
5042 -- and it might not be possible to recreate it by using PSI-BLAST's PSSM
5043 -- engine.
5044 pssm Pssm,
5045
5046 -- This field's rpsdbparams is used to specify the values of options
5047 -- for processing by formatrpsdb. If these are not set, the command
5048 -- line defaults of formatrpsdb are applied. This field is used
5049 -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
5050 -- the PSSM is the same as the one being specified through the BLAST
5051 -- Options API. If this field is omitted, no verification will be
5052 -- performed, so be careful to keep track of what matrix was used to build
5053 -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
5054 params PssmParameters OPTIONAL
5055 }
5056
5057 END
5058 --$Revision: 1.142 $
5059 --**********************************************************************
5060 --
5061 -- NCBI ASN.1 macro editing language specifications
5062 --
5063 -- by Colleen Bollin, 2007
5064 --
5065 --**********************************************************************
5066
5067 NCBI-Macro DEFINITIONS ::=
5068 BEGIN
5069
5070 EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set;
5071
5072 -- simple constraints --
5073
5074 String-location ::= ENUMERATED {
5075 contains (1) ,
5076 equals (2) ,
5077 starts (3) ,
5078 ends (4) ,
5079 inlist (5) }
5080
5081 Word-substitution ::= SEQUENCE {
5082 word VisibleString OPTIONAL ,
5083 synonyms SET OF VisibleString OPTIONAL ,
5084 case-sensitive BOOLEAN DEFAULT FALSE ,
5085 whole-word BOOLEAN DEFAULT FALSE }
5086
5087 Word-substitution-set ::= SET OF Word-substitution
5088
5089 String-constraint ::= SEQUENCE {
5090 match-text VisibleString OPTIONAL ,
5091 match-location String-location DEFAULT contains ,
5092 case-sensitive BOOLEAN DEFAULT FALSE ,
5093 ignore-space BOOLEAN DEFAULT FALSE ,
5094 ignore-punct BOOLEAN DEFAULT FALSE ,
5095 ignore-words Word-substitution-set OPTIONAL ,
5096 whole-word BOOLEAN DEFAULT FALSE ,
5097 not-present BOOLEAN DEFAULT FALSE ,
5098 is-all-caps BOOLEAN DEFAULT FALSE ,
5099 is-all-lower BOOLEAN DEFAULT FALSE ,
5100 is-all-punct BOOLEAN DEFAULT FALSE ,
5101 ignore-weasel BOOLEAN DEFAULT FALSE }
5102
5103 String-constraint-set ::= SET OF String-constraint
5104
5105 Strand-constraint ::= ENUMERATED {
5106 any (0) ,
5107 plus (1) ,
5108 minus (2) }
5109
5110 Seqtype-constraint ::= ENUMERATED {
5111 any (0) ,
5112 nuc (1) ,
5113 prot (2) }
5114
5115 Partial-constraint ::= ENUMERATED {
5116 either (0) ,
5117 partial (1) ,
5118 complete (2) }
5119
5120 Location-type-constraint ::= ENUMERATED {
5121 any (0) ,
5122 single-interval (1) ,
5123 joined (2) ,
5124 ordered (3) }
5125
5126 Location-pos-constraint ::= CHOICE {
5127 dist-from-end INTEGER ,
5128 max-dist-from-end INTEGER ,
5129 min-dist-from-end INTEGER }
5130
5131 Location-constraint ::= SEQUENCE {
5132 strand Strand-constraint DEFAULT any ,
5133 seq-type Seqtype-constraint DEFAULT any ,
5134 partial5 Partial-constraint DEFAULT either ,
5135 partial3 Partial-constraint DEFAULT either ,
5136 location-type Location-type-constraint DEFAULT any ,
5137 end5 Location-pos-constraint OPTIONAL ,
5138 end3 Location-pos-constraint OPTIONAL }
5139
5140 Object-type-constraint ::= ENUMERATED {
5141 any (0) ,
5142 feature (1) ,
5143 descriptor (2) }
5144
5145
5146 -- feature values --
5147
5148 Macro-feature-type ::= ENUMERATED {
5149 any (0) ,
5150 gene (1) ,
5151 org (2) ,
5152 cds (3) ,
5153 prot (4) ,
5154 preRNA (5) ,
5155 mRNA (6) ,
5156 tRNA (7) ,
5157 rRNA (8) ,
5158 snRNA (9) ,
5159 scRNA (10) ,
5160 otherRNA (11) ,
5161 pub (12) ,
5162 seq (13) ,
5163 imp (14) ,
5164 allele (15) ,
5165 attenuator (16) ,
5166 c-region (17) ,
5167 caat-signal (18) ,
5168 imp-CDS (19) ,
5169 conflict (20) ,
5170 d-loop (21) ,
5171 d-segment (22) ,
5172 enhancer (23) ,
5173 exon (24) ,
5174 gC-signal (25) ,
5175 iDNA (26) ,
5176 intron (27) ,
5177 j-segment (28) ,
5178 ltr (29) ,
5179 mat-peptide (30) ,
5180 misc-binding (31) ,
5181 misc-difference (32) ,
5182 misc-feature (33) ,
5183 misc-recomb (34) ,
5184 misc-RNA (35) ,
5185 misc-signal (36) ,
5186 misc-structure (37) ,
5187 modified-base (38) ,
5188 mutation (39) ,
5189 n-region (40) ,
5190 old-sequence (41) ,
5191 polyA-signal (42) ,
5192 polyA-site (43) ,
5193 precursor-RNA (44) ,
5194 prim-transcript (45) ,
5195 primer-bind (46) ,
5196 promoter (47) ,
5197 protein-bind (48) ,
5198 rbs (49) ,
5199 repeat-region (50) ,
5200 rep-origin (51) ,
5201 s-region (52) ,
5202 sig-peptide (53) ,
5203 source (54) ,
5204 stem-loop (55) ,
5205 sts (56) ,
5206 tata-signal (57) ,
5207 terminator (58) ,
5208 transit-peptide (59) ,
5209 unsure (60) ,
5210 v-region (61) ,
5211 v-segment (62) ,
5212 variation (63) ,
5213 virion (64) ,
5214 n3clip (65) ,
5215 n3UTR (66) ,
5216 n5clip (67) ,
5217 n5UTR (68) ,
5218 n10-signal (69) ,
5219 n35-signal (70) ,
5220 site-ref (71) ,
5221 region (72) ,
5222 comment (73) ,
5223 bond (74) ,
5224 site (75) ,
5225 rsite (76) ,
5226 user (77) ,
5227 txinit (78) ,
5228 num (79) ,
5229 psec-str (80) ,
5230 non-std-residue (81) ,
5231 het (82) ,
5232 biosrc (83) ,
5233 preprotein (84) ,
5234 mat-peptide-aa (85) ,
5235 sig-peptide-aa (86) ,
5236 transit-peptide-aa (87) ,
5237 snoRNA (88) ,
5238 gap (89) ,
5239 operon (90) ,
5240 oriT (91) ,
5241 ncRNA (92) ,
5242 tmRNA (93) ,
5243 mobile-element (94) }
5244
5245 Feat-qual-legal ::= ENUMERATED {
5246 allele (1) ,
5247 activity (2) ,
5248 anticodon (3) ,
5249 bound-moiety (4) ,
5250 chromosome (5),
5251 citation (6),
5252 codon (7) ,
5253 codon-start (8) ,
5254 codons-recognized (9) ,
5255 compare (10) ,
5256 cons-splice (11) ,
5257 db-xref (12) ,
5258 description (13) ,
5259 direction (14) ,
5260 ec-number (15) ,
5261 environmental-sample (16) ,
5262 evidence (17) ,
5263 exception (18) ,
5264 experiment (19) ,
5265 focus (20) ,
5266 frequency (21) ,
5267 function (22) ,
5268 gene (23) ,
5269 gene-description (24) ,
5270 inference (25) ,
5271 label (26) ,
5272 locus-tag (27) ,
5273 map (28) ,
5274 mobile-element (29) ,
5275 mod-base (30) ,
5276 mol-type (31) ,
5277 ncRNA-class (32) ,
5278 note (33) ,
5279 number (34) ,
5280 old-locus-tag (35) ,
5281 operon (36) ,
5282 organism (37) ,
5283 organelle (38) ,
5284 partial (39) ,
5285 phenotype (40) ,
5286 plasmid (41) ,
5287 product (42) ,
5288 protein-id (43) ,
5289 pseudo (44) ,
5290 rearranged (45) ,
5291 replace (46) ,
5292 rpt-family (47) ,
5293 rpt-type (48) ,
5294 rpt-unit (49) ,
5295 rpt-unit-seq (50) ,
5296 rpt-unit-range (51) ,
5297 segment (52) ,
5298 sequenced-mol (53) ,
5299 standard-name (54) ,
5300 synonym (55) ,
5301 transcript-id (56) ,
5302 transgenic (57) ,
5303 translation (58) ,
5304 transl-except (59) ,
5305 transl-table (60) ,
5306 usedin (61),
5307 mobile-element-type (62),
5308 mobile-element-name (63),
5309 gene-comment (64) ,
5310 satellite (65) ,
5311 satellite-type (66) ,
5312 satellite-name (67) ,
5313 location (68) ,
5314 tag-peptide (69) ,
5315 mobile-element-type-type (70) ,
5316 name (71) }
5317
5318 Feat-qual-legal-val ::= SEQUENCE {
5319 qual Feat-qual-legal ,
5320 val VisibleString }
5321
5322 Feat-qual-legal-val-choice ::= CHOICE {
5323 qual Feat-qual-legal-val }
5324
5325 Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice
5326
5327 Feat-qual-choice ::= CHOICE {
5328 legal-qual Feat-qual-legal ,
5329 illegal-qual String-constraint }
5330
5331 Feature-field ::= SEQUENCE {
5332 type Macro-feature-type ,
5333 field Feat-qual-choice }
5334
5335 Feature-field-legal ::= SEQUENCE {
5336 type Macro-feature-type ,
5337 field Feat-qual-legal }
5338
5339 Feature-field-pair ::= SEQUENCE {
5340 type Macro-feature-type ,
5341 field-from Feat-qual-choice ,
5342 field-to Feat-qual-choice }
5343
5344 Rna-feat-type ::= CHOICE {
5345 any NULL ,
5346 preRNA NULL ,
5347 mRNA NULL ,
5348 tRNA NULL ,
5349 rRNA NULL ,
5350 ncRNA VisibleString ,
5351 tmRNA NULL,
5352 miscRNA NULL }
5353
5354 Rna-field ::= ENUMERATED {
5355 product (1) ,
5356 comment (2) ,
5357 codons-recognized (3) ,
5358 ncrna-class (4) ,
5359 anticodon (5) ,
5360 transcript-id (6) ,
5361 gene-locus (7) ,
5362 gene-description (8) ,
5363 gene-maploc (9) ,
5364 gene-locus-tag (10) ,
5365 gene-synonym (11) ,
5366 gene-comment (12) ,
5367 tag-peptide (13) }
5368
5369
5370 Rna-qual ::= SEQUENCE {
5371 type Rna-feat-type ,
5372 field Rna-field }
5373
5374 Rna-qual-pair ::= SEQUENCE {
5375 type Rna-feat-type ,
5376 field-from Rna-field ,
5377 field-to Rna-field }
5378
5379 Source-qual ::= ENUMERATED {
5380 acronym (1) ,
5381 anamorph (2) ,
5382 authority (3) ,
5383 bio-material (4) ,
5384 biotype (5) ,
5385 biovar (6) ,
5386 breed (7) ,
5387 cell-line (8) ,
5388 cell-type (9) ,
5389 chemovar (10) ,
5390 chromosome (11) ,
5391 clone (12) ,
5392 clone-lib (13) ,
5393 collected-by (14) ,
5394 collection-date (15) ,
5395 common (16) ,
5396 common-name (17) ,
5397 country (18) ,
5398 cultivar (19) ,
5399 culture-collection (20) ,
5400 dev-stage (21) ,
5401 division (22) ,
5402 dosage (23) ,
5403 ecotype (24) ,
5404 endogenous-virus-name (25) ,
5405 environmental-sample (26) ,
5406 forma (27) ,
5407 forma-specialis (28) ,
5408 frequency (29) ,
5409 fwd-primer-name (30) ,
5410 fwd-primer-seq (31) ,
5411 gb-acronym (32) ,
5412 gb-anamorph (33) ,
5413 gb-synonym (34) ,
5414 genotype (35) ,
5415 germline (36) ,
5416 group (37) ,
5417 haplotype (38) ,
5418 identified-by (39) ,
5419 insertion-seq-name (40) ,
5420 isolate (41) ,
5421 isolation-source (42) ,
5422 lab-host (43) ,
5423 lat-lon (44) ,
5424 lineage (45) ,
5425 map (46) ,
5426 metagenome-source (47) ,
5427 metagenomic (48) ,
5428 old-lineage (49) ,
5429 old-name (50) ,
5430 orgmod-note (51) ,
5431 nat-host (52) ,
5432 pathovar (53) ,
5433 plasmid-name (54) ,
5434 plastid-name (55) ,
5435 pop-variant (56) ,
5436 rearranged (57) ,
5437 rev-primer-name (58) ,
5438 rev-primer-seq (59) ,
5439 segment (60) ,
5440 serogroup (61) ,
5441 serotype (62) ,
5442 serovar (63) ,
5443 sex (64) ,
5444 specimen-voucher (65) ,
5445 strain (66) ,
5446 subclone (67) ,
5447 subgroup (68) ,
5448 subsource-note (69),
5449 sub-species (70) ,
5450 substrain (71) ,
5451 subtype (72) ,
5452 synonym (73) ,
5453 taxname (74) ,
5454 teleomorph (75) ,
5455 tissue-lib (76) ,
5456 tissue-type (77) ,
5457 transgenic (78) ,
5458 transposon-name (79) ,
5459 type (80) ,
5460 variety (81) ,
5461 specimen-voucher-INST (82) ,
5462 specimen-voucher-COLL (83) ,
5463 specimen-voucher-SpecID (84) ,
5464 culture-collection-INST (85) ,
5465 culture-collection-COLL (86) ,
5466 culture-collection-SpecID (87) ,
5467 bio-material-INST (88) ,
5468 bio-material-COLL (89) ,
5469 bio-material-SpecID (90),
5470 all-notes (91),
5471 mating-type (92),
5472 linkage-group (93) ,
5473 haplogroup (94),
5474 all-quals (95),
5475 dbxref (96) ,
5476 taxid (97)
5477 }
5478
5479 Source-qual-pair ::= SEQUENCE {
5480 field-from Source-qual ,
5481 field-to Source-qual }
5482
5483 Source-location ::= ENUMERATED {
5484 unknown (0) ,
5485 genomic (1) ,
5486 chloroplast (2) ,
5487 chromoplast (3) ,
5488 kinetoplast (4) ,
5489 mitochondrion (5) ,
5490 plastid (6) ,
5491 macronuclear (7) ,
5492 extrachrom (8) ,
5493 plasmid (9) ,
5494 transposon (10) ,
5495 insertion-seq (11) ,
5496 cyanelle (12) ,
5497 proviral (13) ,
5498 virion (14) ,
5499 nucleomorph (15) ,
5500 apicoplast (16) ,
5501 leucoplast (17) ,
5502 proplastid (18) ,
5503 endogenous-virus (19) ,
5504 hydrogenosome (20) ,
5505 chromosome (21) ,
5506 chromatophore (22) }
5507
5508 Source-origin ::= ENUMERATED {
5509 unknown (0) ,
5510 natural (1) ,
5511 natmut (2) ,
5512 mut (3) ,
5513 artificial (4) ,
5514 synthetic (5) ,
5515 other (255) }
5516
5517 Source-qual-choice ::= CHOICE {
5518 textqual Source-qual ,
5519 location Source-location,
5520 origin Source-origin ,
5521 gcode INTEGER ,
5522 mgcode INTEGER }
5523
5524 Source-qual-text-val ::= SEQUENCE {
5525 srcqual Source-qual ,
5526 val VisibleString }
5527
5528 Source-qual-val-choice ::= CHOICE {
5529 textqual Source-qual-text-val ,
5530 location Source-location,
5531 origin Source-origin ,
5532 gcode INTEGER ,
5533 mgcode INTEGER }
5534
5535 Source-qual-val-set ::= SET OF Source-qual-val-choice
5536
5537 CDSGeneProt-field ::= ENUMERATED {
5538 cds-comment (1) ,
5539 gene-locus (2) ,
5540 gene-description (3) ,
5541 gene-comment (4) ,
5542 gene-allele (5) ,
5543 gene-maploc (6) ,
5544 gene-locus-tag (7) ,
5545 gene-synonym (8) ,
5546 gene-old-locus-tag (9) ,
5547 mrna-product (10) ,
5548 mrna-comment (11) ,
5549 prot-name (12) ,
5550 prot-description (13) ,
5551 prot-ec-number (14) ,
5552 prot-activity (15) ,
5553 prot-comment (16) ,
5554 mat-peptide-name (17) ,
5555 mat-peptide-description (18) ,
5556 mat-peptide-ec-number (19) ,
5557 mat-peptide-activity (20) ,
5558 mat-peptide-comment (21) ,
5559 cds-inference (22) ,
5560 gene-inference (23) ,
5561 codon-start (24) }
5562
5563 CDSGeneProt-field-pair ::= SEQUENCE {
5564 field-from CDSGeneProt-field ,
5565 field-to CDSGeneProt-field }
5566
5567 Molecule-type ::= ENUMERATED {
5568 unknown (0) ,
5569 genomic (1) ,
5570 precursor-RNA (2) ,
5571 mRNA (3) ,
5572 rRNA (4) ,
5573 tRNA (5) ,
5574 genomic-mRNA (6) ,
5575 cRNA (7) ,
5576 transcribed-RNA (8) ,
5577 ncRNA (9) ,
5578 transfer-messenger-RNA (10) ,
5579 macro-other (11) }
5580
5581 Technique-type ::= ENUMERATED {
5582 unknown (0) ,
5583 standard (1) ,
5584 est (2) ,
5585 sts (3) ,
5586 survey (4) ,
5587 genetic-map (5) ,
5588 physical-map (6) ,
5589 derived (7) ,
5590 concept-trans (8) ,
5591 seq-pept (9) ,
5592 both (10) ,
5593 seq-pept-overlap (11) ,
5594 seq-pept-homol (12) ,
5595 concept-trans-a (13) ,
5596 htgs-1 (14) ,
5597 htgs-2 (15) ,
5598 htgs-3 (16) ,
5599 fli-cDNA (17) ,
5600 htgs-0 (18) ,
5601 htc (19) ,
5602 wgs (20) ,
5603 barcode (21) ,
5604 composite-wgs-htgs (22) ,
5605 tsa (23) ,
5606 other (24) }
5607
5608 Completedness-type ::= ENUMERATED {
5609 unknown (0) ,
5610 complete (1) ,
5611 partial (2) ,
5612 no-left (3) ,
5613 no-right (4) ,
5614 no-ends (5) ,
5615 has-left (6) ,
5616 has-right (7) ,
5617 other (6) }
5618
5619 Molecule-class-type ::= ENUMERATED {
5620 unknown (0) ,
5621 dna (1) ,
5622 rna (2) ,
5623 protein (3) ,
5624 nucleotide (4),
5625 other (5) }
5626
5627 Topology-type ::= ENUMERATED {
5628 unknown (0) ,
5629 linear (1) ,
5630 circular (2) ,
5631 tandem (3) ,
5632 other (4) }
5633
5634 Strand-type ::= ENUMERATED {
5635 unknown (0) ,
5636 single (1) ,
5637 double (2) ,
5638 mixed (3) ,
5639 mixed-rev (4) ,
5640 other (5) }
5641
5642 Molinfo-field ::= CHOICE {
5643 molecule Molecule-type ,
5644 technique Technique-type ,
5645 completedness Completedness-type ,
5646 mol-class Molecule-class-type ,
5647 topology Topology-type ,
5648 strand Strand-type }
5649
5650 Molinfo-molecule-pair ::= SEQUENCE {
5651 from Molecule-type ,
5652 to Molecule-type }
5653
5654 Molinfo-technique-pair ::= SEQUENCE {
5655 from Technique-type ,
5656 to Technique-type }
5657
5658 Molinfo-completedness-pair ::= SEQUENCE {
5659 from Completedness-type ,
5660 to Completedness-type }
5661
5662 Molinfo-mol-class-pair ::= SEQUENCE {
5663 from Molecule-class-type ,
5664 to Molecule-class-type }
5665
5666 Molinfo-topology-pair ::= SEQUENCE {
5667 from Topology-type ,
5668 to Topology-type }
5669
5670 Molinfo-strand-pair ::= SEQUENCE {
5671 from Strand-type ,
5672 to Strand-type }
5673
5674 Molinfo-field-pair ::= CHOICE {
5675 molecule Molinfo-molecule-pair ,
5676 technique Molinfo-technique-pair ,
5677 completedness Molinfo-completedness-pair ,
5678 mol-class Molinfo-mol-class-pair ,
5679 topology Molinfo-topology-pair ,
5680 strand Molinfo-strand-pair }
5681
5682 Molinfo-field-list ::= SET OF Molinfo-field
5683
5684 Molinfo-field-constraint ::= SEQUENCE {
5685 field Molinfo-field ,
5686 is-not BOOLEAN DEFAULT FALSE }
5687
5688 -- publication fields --
5689
5690 Publication-field ::= ENUMERATED {
5691 cit (1) ,
5692 authors (2) ,
5693 journal (3) ,
5694 volume (4) ,
5695 issue (5) ,
5696 pages (6) ,
5697 date (7) ,
5698 serial-number (8) ,
5699 title (9) ,
5700 affiliation (10) ,
5701 affil-div (11) ,
5702 affil-city (12) ,
5703 affil-sub (13) ,
5704 affil-country (14) ,
5705 affil-street (15) ,
5706 affil-email (16) ,
5707 affil-fax (17) ,
5708 affil-phone (18) ,
5709 affil-zipcode (19),
5710 authors-initials (20)
5711 }
5712
5713 -- structured comment fields --
5714
5715 Structured-comment-field ::= CHOICE {
5716 database NULL ,
5717 named VisibleString ,
5718 field-name NULL
5719 }
5720
5721 Structured-comment-field-pair ::= SEQUENCE {
5722 from Structured-comment-field ,
5723 to Structured-comment-field
5724 }
5725
5726 -- misc fields --
5727 -- these would not appear in pairs --
5728 Misc-field ::= ENUMERATED {
5729 genome-project-id (1) ,
5730 comment-descriptor (2) ,
5731 defline (3) ,
5732 keyword (4)
5733 }
5734
5735 -- dblink fields --
5736 DBLink-field-type ::= ENUMERATED {
5737 trace-assembly (1) ,
5738 bio-sample (2) ,
5739 probe-db (3) ,
5740 sequence-read-archve (4) ,
5741 bio-project (5) }
5742
5743 DBLink-field-pair ::= SEQUENCE {
5744 from DBLink-field-type ,
5745 to DBLink-field-type
5746 }
5747
5748 -- complex constraints --
5749
5750 Pub-type ::= ENUMERATED {
5751 any (0) ,
5752 published (1) ,
5753 unpublished (2) ,
5754 in-press (3) ,
5755 submitter-block (4) }
5756
5757 Pub-field-constraint ::= SEQUENCE {
5758 field Publication-field ,
5759 constraint String-constraint }
5760
5761 Pub-field-special-constraint-type ::= CHOICE {
5762 is-present NULL ,
5763 is-not-present NULL ,
5764 is-all-caps NULL ,
5765 is-all-lower NULL ,
5766 is-all-punct NULL }
5767
5768 Pub-field-special-constraint ::= SEQUENCE {
5769 field Publication-field ,
5770 constraint Pub-field-special-constraint-type }
5771
5772 Publication-constraint ::= SEQUENCE {
5773 type Pub-type ,
5774 field Pub-field-constraint OPTIONAL ,
5775 special-field Pub-field-special-constraint OPTIONAL }
5776
5777 Source-constraint ::= SEQUENCE {
5778 field1 Source-qual-choice OPTIONAL ,
5779 field2 Source-qual-choice OPTIONAL ,
5780 constraint String-constraint OPTIONAL ,
5781 type-constraint Object-type-constraint OPTIONAL }
5782
5783 CDSGeneProt-feature-type-constraint ::= ENUMERATED {
5784 gene (1) ,
5785 mRNA (2) ,
5786 cds (3) ,
5787 prot (4) ,
5788 exon (5) ,
5789 mat-peptide (6) }
5790
5791 CDSGeneProt-pseudo-constraint ::= SEQUENCE {
5792 feature CDSGeneProt-feature-type-constraint ,
5793 is-pseudo BOOLEAN DEFAULT TRUE }
5794
5795 CDSGeneProt-constraint-field ::= CHOICE {
5796 field CDSGeneProt-field }
5797
5798 CDSGeneProt-qual-constraint ::= SEQUENCE {
5799 field1 CDSGeneProt-constraint-field OPTIONAL ,
5800 field2 CDSGeneProt-constraint-field OPTIONAL ,
5801 constraint String-constraint OPTIONAL }
5802
5803 Field-constraint ::= SEQUENCE {
5804 field Field-type ,
5805 string-constraint String-constraint }
5806
5807 Sequence-constraint-rnamol ::= ENUMERATED {
5808 any (0) ,
5809 genomic (1) ,
5810 precursor-RNA (2) ,
5811 mRNA (3) ,
5812 rRNA (4) ,
5813 tRNA (5) ,
5814 genomic-mRNA (6) ,
5815 cRNA (7) ,
5816 transcribed-RNA (8) ,
5817 ncRNA (9) ,
5818 transfer-messenger-RNA (10) }
5819
5820 Sequence-constraint-mol-type-constraint ::= CHOICE {
5821 any NULL ,
5822 nucleotide NULL ,
5823 dna NULL ,
5824 rna Sequence-constraint-rnamol ,
5825 protein NULL }
5826
5827 Quantity-constraint ::= CHOICE {
5828 equals INTEGER ,
5829 greater-than INTEGER ,
5830 less-than INTEGER }
5831
5832 Feature-strandedness-constraint ::= ENUMERATED {
5833 any (0) ,
5834 minus-only (1) ,
5835 plus-only (2) ,
5836 at-least-one-minus (3) ,
5837 at-least-one-plus (4) ,
5838 no-minus (5) ,
5839 no-plus (6) }
5840
5841 Sequence-constraint ::= SEQUENCE {
5842 seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
5843 id String-constraint OPTIONAL ,
5844 feature Macro-feature-type ,
5845 num-type-features Quantity-constraint OPTIONAL ,
5846 num-features Quantity-constraint OPTIONAL ,
5847 length Quantity-constraint OPTIONAL ,
5848 strandedness Feature-strandedness-constraint DEFAULT any }
5849
5850 Match-type-constraint ::= ENUMERATED {
5851 dont-care (0) ,
5852 yes (1) ,
5853 no (2) }
5854
5855 Translation-constraint ::= SEQUENCE {
5856 actual-strings String-constraint-set ,
5857 transl-strings String-constraint-set ,
5858 internal-stops Match-type-constraint DEFAULT dont-care ,
5859 num-mismatches Quantity-constraint OPTIONAL }
5860
5861 Constraint-choice ::= CHOICE {
5862 string String-constraint ,
5863 location Location-constraint ,
5864 field Field-constraint ,
5865 source Source-constraint ,
5866 cdsgeneprot-qual CDSGeneProt-qual-constraint ,
5867 cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
5868 sequence Sequence-constraint ,
5869 pub Publication-constraint ,
5870 molinfo Molinfo-field-constraint ,
5871 field-missing Field-type ,
5872 translation Translation-constraint }
5873
5874 Constraint-choice-set ::= SET OF Constraint-choice
5875
5876 Text-marker ::= CHOICE {
5877 free-text VisibleString ,
5878 digits NULL ,
5879 letters NULL }
5880
5881 Text-portion ::= SEQUENCE {
5882 left-marker Text-marker OPTIONAL ,
5883 include-left BOOLEAN ,
5884 right-marker Text-marker OPTIONAL ,
5885 include-right BOOLEAN ,
5886 inside BOOLEAN ,
5887 case-sensitive BOOLEAN DEFAULT FALSE ,
5888 whole-word BOOLEAN DEFAULT FALSE }
5889
5890 Field-edit-location ::= ENUMERATED {
5891 anywhere (0) ,
5892 beginning (1) ,
5893 end (2) }
5894
5895 Field-edit ::= SEQUENCE {
5896 find-txt VisibleString ,
5897 repl-txt VisibleString OPTIONAL ,
5898 location Field-edit-location DEFAULT anywhere ,
5899 case-insensitive BOOLEAN DEFAULT FALSE }
5900
5901 Field-type ::= CHOICE {
5902 source-qual Source-qual-choice ,
5903 feature-field Feature-field ,
5904 rna-field Rna-qual ,
5905 cds-gene-prot CDSGeneProt-field ,
5906 molinfo-field Molinfo-field ,
5907 pub Publication-field ,
5908 struc-comment-field Structured-comment-field ,
5909 misc Misc-field ,
5910 dblink DBLink-field-type }
5911
5912 Field-pair-type ::= CHOICE {
5913 source-qual Source-qual-pair ,
5914 feature-field Feature-field-pair ,
5915 rna-field Rna-qual-pair ,
5916 cds-gene-prot CDSGeneProt-field-pair ,
5917 molinfo-field Molinfo-field-pair ,
5918 struc-comment-field Structured-comment-field-pair ,
5919 dblink DBLink-field-pair}
5920
5921 ExistingTextOption ::= ENUMERATED {
5922 replace-old (1) ,
5923 append-semi (2) ,
5924 append-space (3) ,
5925 append-colon (4) ,
5926 append-comma (5) ,
5927 append-none (6) ,
5928 prefix-semi (7) ,
5929 prefix-space (8) ,
5930 prefix-colon (9) ,
5931 prefix-comma (10) ,
5932 prefix-none (11) ,
5933 leave-old (12) ,
5934 add-qual (13) }
5935
5936
5937 Apply-action ::= SEQUENCE {
5938 field Field-type ,
5939 value VisibleString ,
5940 existing-text ExistingTextOption }
5941
5942 Edit-action ::= SEQUENCE {
5943 edit Field-edit ,
5944 field Field-type }
5945
5946 Cap-change ::= ENUMERATED {
5947 none (0) ,
5948 tolower (1) ,
5949 toupper (2) ,
5950 firstcap (3) ,
5951 firstcaprestnochange (4) ,
5952 firstlower-restnochange (5) ,
5953 cap-word-space (6) ,
5954 cap-word-space-punc (7)
5955 }
5956
5957 Text-transform ::= CHOICE {
5958 edit Field-edit ,
5959 caps Cap-change ,
5960 remove Text-portion }
5961
5962 Text-transform-set ::= SET OF Text-transform
5963
5964 Convert-action ::= SEQUENCE {
5965 fields Field-pair-type ,
5966 strip-name BOOLEAN DEFAULT FALSE ,
5967 keep-original BOOLEAN DEFAULT FALSE ,
5968 capitalization Cap-change DEFAULT none ,
5969 existing-text ExistingTextOption }
5970
5971 Copy-action ::= SEQUENCE {
5972 fields Field-pair-type ,
5973 existing-text ExistingTextOption }
5974
5975 Swap-action ::= SEQUENCE {
5976 fields Field-pair-type ,
5977 field-to Field-type }
5978
5979 AECRParse-action ::= SEQUENCE {
5980 portion Text-portion ,
5981 fields Field-pair-type ,
5982 remove-from-parsed BOOLEAN DEFAULT FALSE ,
5983 remove-left BOOLEAN DEFAULT FALSE ,
5984 remove-right BOOLEAN DEFAULT FALSE ,
5985 transform Text-transform-set OPTIONAL ,
5986 existing-text ExistingTextOption }
5987
5988 Remove-action ::= SEQUENCE {
5989 field Field-type }
5990
5991 Remove-outside-action ::= SEQUENCE {
5992 portion Text-portion ,
5993 field Field-type ,
5994 remove-if-not-found BOOLEAN DEFAULT FALSE }
5995
5996 Action-choice ::= CHOICE {
5997 apply Apply-action ,
5998 edit Edit-action ,
5999 convert Convert-action ,
6000 copy Copy-action ,
6001 swap Swap-action ,
6002 remove Remove-action ,
6003 parse AECRParse-action ,
6004 remove-outside Remove-outside-action }
6005
6006 AECR-action ::= SEQUENCE {
6007 action Action-choice ,
6008 also-change-mrna BOOLEAN DEFAULT FALSE ,
6009 constraint Constraint-choice-set OPTIONAL }
6010
6011 Parse-src-org-choice ::= CHOICE {
6012 source-qual Source-qual ,
6013 taxname-after-binomial NULL }
6014
6015 Parse-src-org ::= SEQUENCE {
6016 field Parse-src-org-choice ,
6017 type Object-type-constraint DEFAULT any }
6018
6019 -- For Parse-src-general-id tag, specify the db of the id from which you
6020 -- want to retrieve the tag. If empty or null, any db will do.
6021 Parse-src-general-id ::= CHOICE {
6022 whole-text NULL ,
6023 db NULL ,
6024 tag VisibleString }
6025
6026 Parse-src ::= CHOICE {
6027 defline NULL ,
6028 flatfile NULL ,
6029 local-id NULL ,
6030 org Parse-src-org ,
6031 comment NULL ,
6032 bankit-comment NULL ,
6033 structured-comment VisibleString ,
6034 file-id NULL ,
6035 general-id Parse-src-general-id }
6036
6037 Parse-dst-org ::= SEQUENCE {
6038 field Source-qual-choice ,
6039 type Object-type-constraint DEFAULT any }
6040
6041 Parse-dest ::= CHOICE {
6042 defline NULL ,
6043 org Parse-dst-org ,
6044 featqual Feature-field-legal ,
6045 comment-descriptor NULL ,
6046 dbxref VisibleString }
6047
6048 Parse-action ::= SEQUENCE {
6049 portion Text-portion ,
6050 src Parse-src ,
6051 dest Parse-dest ,
6052 capitalization Cap-change DEFAULT none ,
6053 remove-from-parsed BOOLEAN DEFAULT FALSE ,
6054 transform Text-transform-set OPTIONAL ,
6055 existing-text ExistingTextOption }
6056
6057
6058 Location-interval ::= SEQUENCE {
6059 from INTEGER ,
6060 to INTEGER }
6061
6062 Location-choice ::= CHOICE {
6063 interval Location-interval ,
6064 whole-sequence NULL ,
6065 point INTEGER }
6066
6067 Sequence-list ::= SET OF VisibleString
6068 Sequence-list-choice ::= CHOICE {
6069 list Sequence-list ,
6070 all NULL }
6071
6072 Apply-feature-action ::= SEQUENCE {
6073 type Macro-feature-type ,
6074 partial5 BOOLEAN DEFAULT FALSE ,
6075 partial3 BOOLEAN DEFAULT FALSE ,
6076 plus-strand BOOLEAN DEFAULT TRUE ,
6077 location Location-choice ,
6078 seq-list Sequence-list-choice ,
6079 add-redundant BOOLEAN DEFAULT TRUE ,
6080 add-mrna BOOLEAN DEFAULT FALSE ,
6081 apply-to-parts BOOLEAN DEFAULT FALSE ,
6082 only-seg-num INTEGER DEFAULT -1 ,
6083 fields Feat-qual-legal-set OPTIONAL,
6084 src-fields Source-qual-val-set OPTIONAL }
6085
6086 Remove-feature-action ::= SEQUENCE {
6087 type Macro-feature-type ,
6088 constraint Constraint-choice-set OPTIONAL }
6089
6090 -- for convert features --
6091 Convert-from-CDS-options ::= SEQUENCE {
6092 remove-mRNA BOOLEAN ,
6093 remove-gene BOOLEAN ,
6094 remove-transcript-id BOOLEAN }
6095
6096 Convert-feature-src-options ::= CHOICE {
6097 cds Convert-from-CDS-options }
6098
6099 Bond-type ::= ENUMERATED {
6100 disulfide (1) ,
6101 thioester (2) ,
6102 crosslink (3) ,
6103 thioether (4) ,
6104 other (5) }
6105
6106
6107 Site-type ::= ENUMERATED {
6108 active (1) ,
6109 binding (2) ,
6110 cleavage (3) ,
6111 inhibit (4) ,
6112 modified (5) ,
6113 glycosylation (6) ,
6114 myristoylation (7) ,
6115 mutagenized (8) ,
6116 metal-binding (9) ,
6117 phosphorylation (10) ,
6118 acetylation (11) ,
6119 amidation (12) ,
6120 methylation (13) ,
6121 hydroxylation (14) ,
6122 sulfatation (15) ,
6123 oxidative-deamination (16) ,
6124 pyrrolidone-carboxylic-acid (17) ,
6125 gamma-carboxyglutamic-acid (18) ,
6126 blocked (19) ,
6127 lipid-binding (20) ,
6128 np-binding (21) ,
6129 dna-binding (22) ,
6130 signal-peptide (23) ,
6131 transit-peptide (24) ,
6132 transmembrane-region (25) ,
6133 nitrosylation (26) ,
6134 other (27) }
6135
6136 -- other choice is to create protein sequences, skipping bad --
6137 Region-type ::= SEQUENCE {
6138 create-nucleotide BOOLEAN }
6139
6140 Convert-feature-dst-options ::= CHOICE {
6141 bond Bond-type ,
6142 site Site-type ,
6143 region Region-type ,
6144 ncrna-class VisibleString ,
6145 remove-original BOOLEAN }
6146
6147
6148 Convert-feature-action ::= SEQUENCE {
6149 type-from Macro-feature-type ,
6150 type-to Macro-feature-type ,
6151 src-options Convert-feature-src-options OPTIONAL ,
6152 dst-options Convert-feature-dst-options OPTIONAL ,
6153 leave-original BOOLEAN ,
6154 src-feat-constraint Constraint-choice-set OPTIONAL }
6155
6156
6157 Feature-location-strand-from ::= ENUMERATED {
6158 any (0) ,
6159 plus (1) ,
6160 minus (2) ,
6161 unknown (3) ,
6162 both (4) }
6163
6164 Feature-location-strand-to ::= ENUMERATED {
6165 plus (1) ,
6166 minus (2) ,
6167 unknown (3) ,
6168 both (4) ,
6169 reverse (5) }
6170
6171 Edit-location-strand ::= SEQUENCE {
6172 strand-from Feature-location-strand-from ,
6173 strand-to Feature-location-strand-to }
6174
6175 Partial-5-set-constraint ::= ENUMERATED {
6176 all (0) ,
6177 at-end (1) ,
6178 bad-start (2) ,
6179 frame-not-one (3) }
6180
6181 Partial-5-set-action ::= SEQUENCE {
6182 constraint Partial-5-set-constraint ,
6183 extend BOOLEAN }
6184
6185 Partial-5-clear-constraint ::= ENUMERATED {
6186 all (0) ,
6187 not-at-end (1) ,
6188 good-start (2) }
6189
6190 Partial-3-set-constraint ::= ENUMERATED {
6191 all (0) ,
6192 at-end (1) ,
6193 bad-end (2) }
6194
6195 Partial-3-set-action ::= SEQUENCE {
6196 constraint Partial-3-set-constraint ,
6197 extend BOOLEAN }
6198
6199 Partial-3-clear-constraint ::= ENUMERATED {
6200 all (0) ,
6201 not-at-end (1) ,
6202 good-end (2) }
6203
6204 Partial-both-set-constraint ::= ENUMERATED {
6205 all (0) ,
6206 at-end (1) }
6207
6208 Partial-both-set-action ::= SEQUENCE {
6209 constraint Partial-both-set-constraint ,
6210 extend BOOLEAN }
6211
6212 Partial-both-clear-constraint ::= ENUMERATED {
6213 all (0) ,
6214 not-at-end (1) }
6215
6216 Convert-location-type ::= ENUMERATED {
6217 join (1) ,
6218 order (2) ,
6219 merge (3) }
6220
6221 Extend-to-feature ::= SEQUENCE {
6222 type Macro-feature-type ,
6223 include-feat BOOLEAN ,
6224 distance Quantity-constraint OPTIONAL }
6225
6226 Location-edit-type ::= CHOICE {
6227 strand Edit-location-strand ,
6228 set-5-partial Partial-5-set-action ,
6229 clear-5-partial Partial-5-clear-constraint ,
6230 set-3-partial Partial-3-set-action ,
6231 clear-3-partial Partial-3-clear-constraint ,
6232 set-both-partial Partial-both-set-action ,
6233 clear-both-partial Partial-both-clear-constraint ,
6234 convert Convert-location-type ,
6235 extend-5 NULL ,
6236 extend-3 NULL ,
6237 extend-5-to-feat Extend-to-feature ,
6238 extend-3-to-feat Extend-to-feature }
6239
6240 Edit-feature-location-action ::= SEQUENCE {
6241 type Macro-feature-type ,
6242 action Location-edit-type ,
6243 retranslate-cds BOOLEAN OPTIONAL ,
6244 also-edit-gene BOOLEAN OPTIONAL ,
6245 constraint Constraint-choice-set OPTIONAL }
6246
6247 Molinfo-block ::= SEQUENCE {
6248 to-list Molinfo-field-list ,
6249 from-list Molinfo-field-list OPTIONAL ,
6250 constraint Constraint-choice-set OPTIONAL }
6251
6252 Descriptor-type ::= ENUMERATED {
6253 all (0) ,
6254 title (1) ,
6255 source (2) ,
6256 publication (3) ,
6257 comment (4) ,
6258 genbank (5) ,
6259 user (6) ,
6260 create-date (7) ,
6261 update-date (8) ,
6262 mol-info (9) ,
6263 structured-comment (10) ,
6264 genome-project-id (11) }
6265
6266 Remove-descriptor-action ::= SEQUENCE {
6267 type Descriptor-type ,
6268 constraint Constraint-choice-set OPTIONAL }
6269
6270 Autodef-list-type ::= ENUMERATED {
6271 feature-list (1) ,
6272 complete-sequence (2) ,
6273 complete-genome (3) ,
6274 sequence (4) }
6275
6276 Autodef-misc-feat-parse-rule ::= ENUMERATED {
6277 use-comment-before-first-semicolon (1) ,
6278 look-for-noncoding-products (2) }
6279
6280 Autodef-action ::= SEQUENCE {
6281 modifiers SET OF Source-qual OPTIONAL ,
6282 clause-list-type Autodef-list-type ,
6283 misc-feat-parse-rule Autodef-misc-feat-parse-rule DEFAULT look-for-noncoding-products }
6284
6285 Fix-pub-caps-action ::= SEQUENCE {
6286 title BOOLEAN OPTIONAL ,
6287 authors BOOLEAN OPTIONAL ,
6288 affiliation BOOLEAN OPTIONAL ,
6289 affil-country BOOLEAN OPTIONAL ,
6290 punct-only BOOLEAN DEFAULT FALSE ,
6291 constraint Constraint-choice-set OPTIONAL }
6292
6293 Sort-order ::= ENUMERATED {
6294 short-to-long (1) ,
6295 long-to-short (2) ,
6296 alphabetical (3) }
6297
6298 Sort-fields-action ::= SEQUENCE {
6299 field Field-type ,
6300 order Sort-order ,
6301 constraint Constraint-choice-set OPTIONAL }
6302
6303 Fix-author-caps ::= SEQUENCE {
6304 last-name-only BOOLEAN }
6305
6306 Fix-caps-action ::= CHOICE {
6307 pub Fix-pub-caps-action ,
6308 src-country NULL ,
6309 mouse-strain NULL ,
6310 src-qual Source-qual ,
6311 author Fix-author-caps }
6312
6313 Fix-format-action ::= CHOICE {
6314 collection-date NULL ,
6315 lat-lon NULL ,
6316 primers NULL ,
6317 protein-name NULL }
6318
6319 Remove-duplicate-feature-action ::= SEQUENCE {
6320 type Macro-feature-type ,
6321 ignore-partials BOOLEAN ,
6322 case-sensitive BOOLEAN ,
6323 remove-proteins BOOLEAN ,
6324 rd-constraint Constraint-choice-set OPTIONAL }
6325
6326 Gene-xref-suppression-type ::= ENUMERATED {
6327 any (0) ,
6328 suppressing (1) ,
6329 non-suppressing (2) }
6330
6331 Gene-xref-necessary-type ::= ENUMERATED {
6332 any (0) ,
6333 necessary (1) ,
6334 unnecessary (2) }
6335
6336 Gene-xref-type ::= SEQUENCE {
6337 feature Macro-feature-type ,
6338 suppression Gene-xref-suppression-type ,
6339 necessary Gene-xref-necessary-type }
6340
6341 Xref-type ::= CHOICE {
6342 gene Gene-xref-type }
6343
6344 Remove-xrefs-action ::= SEQUENCE {
6345 xref-type Xref-type ,
6346 constraint Constraint-choice-set OPTIONAL }
6347
6348 Make-gene-xref-action ::= SEQUENCE {
6349 feature Macro-feature-type ,
6350 constraint Constraint-choice-set OPTIONAL }
6351
6352 Author-fix-type ::= ENUMERATED {
6353 truncate-middle-initials (1) ,
6354 strip-suffix (2) ,
6355 move-middle-to-first (3) }
6356
6357 Author-fix-action ::= SEQUENCE {
6358 fix-type Author-fix-type ,
6359 constraint Constraint-choice-set OPTIONAL }
6360
6361 Update-sequences-action ::= SEQUENCE {
6362 filename VisibleString ,
6363 add-cit-subs BOOLEAN DEFAULT FALSE }
6364
6365 Create-TSA-ids-src ::= CHOICE {
6366 local-id NULL ,
6367 defline Text-portion
6368 }
6369
6370 Create-TSA-ids-action ::= SEQUENCE {
6371 src Create-TSA-ids-src ,
6372 suffix VisibleString OPTIONAL ,
6373 id-text-portion Text-portion OPTIONAL }
6374
6375 Autofix-action ::= SEQUENCE {
6376 test-name VisibleString }
6377
6378 Fix-sets-action ::= CHOICE {
6379 remove-single-item-set NULL ,
6380 renormalize-nuc-prot-sets NULL ,
6381 fix-pop-to-phy NULL
6382 }
6383
6384 Table-match-type ::= CHOICE {
6385 feature-id NULL ,
6386 gene-locus-tag NULL ,
6387 protein-id NULL,
6388 dbxref NULL ,
6389 nuc-id NULL ,
6390 src-qual Source-qual-choice ,
6391 protein-name NULL ,
6392 any NULL
6393 }
6394
6395 Table-match ::= SEQUENCE {
6396 match-type Table-match-type ,
6397 match-location String-location DEFAULT equals
6398 }
6399
6400
6401 Apply-table-extra-data ::= CHOICE {
6402 table NULL }
6403
6404 Apply-table-action ::= SEQUENCE {
6405 filename VisibleString ,
6406 match-type Table-match ,
6407 in-memory-table Apply-table-extra-data OPTIONAL
6408 }
6409
6410 Add-file-action ::= SEQUENCE {
6411 filename VisibleString ,
6412 in-memory-table Apply-table-extra-data OPTIONAL
6413 }
6414
6415 Add-descriptor-list-action ::= SEQUENCE {
6416 descriptor-list Add-file-action ,
6417 constraint Constraint-choice-set OPTIONAL
6418 }
6419
6420 Remove-sequences-action ::= SEQUENCE {
6421 constraint Constraint-choice-set
6422 }
6423
6424 Macro-action-choice ::= CHOICE {
6425 aecr AECR-action ,
6426 parse Parse-action ,
6427 add-feature Apply-feature-action ,
6428 remove-feature Remove-feature-action ,
6429 convert-feature Convert-feature-action ,
6430 edit-location Edit-feature-location-action ,
6431 remove-descriptor Remove-descriptor-action ,
6432 autodef Autodef-action ,
6433 removesets NULL ,
6434 trim-junk-from-primer-seq NULL ,
6435 trim-stop-from-complete-cds NULL ,
6436 fix-usa-and-states NULL ,
6437 synchronize-cds-partials NULL ,
6438 adjust-for-consensus-splice NULL ,
6439 fix-pub-caps Fix-pub-caps-action ,
6440 remove-seg-gaps NULL ,
6441 sort-fields Sort-fields-action ,
6442 apply-molinfo-block Molinfo-block ,
6443 fix-caps Fix-caps-action ,
6444 fix-format Fix-format-action ,
6445 fix-spell NULL ,
6446 remove-duplicate-features Remove-duplicate-feature-action ,
6447 remove-lineage-notes NULL ,
6448 remove-xrefs Remove-xrefs-action ,
6449 make-gene-xrefs Make-gene-xref-action ,
6450 make-bold-xrefs NULL ,
6451 fix-author Author-fix-action ,
6452 update-sequences Update-sequences-action ,
6453 add-trans-splicing NULL ,
6454 remove-invalid-ecnumbers NULL ,
6455 create-tsa-ids Create-TSA-ids-action ,
6456 perform-autofix Autofix-action ,
6457 fix-sets Fix-sets-action ,
6458 apply-table Apply-table-action ,
6459 remove-sequences Remove-sequences-action ,
6460 propagate-sequence-technology NULL ,
6461 add-file-descriptors Add-descriptor-list-action ,
6462 propagate-missing-old-name NULL }
6463
6464
6465 Macro-action-list ::= SET OF Macro-action-choice
6466
6467
6468 Search-func ::= CHOICE {
6469 string-constraint String-constraint ,
6470 contains-plural NULL ,
6471 n-or-more-brackets-or-parentheses INTEGER ,
6472 three-numbers NULL ,
6473 underscore NULL ,
6474 prefix-and-numbers VisibleString ,
6475 all-caps NULL ,
6476 unbalanced-paren NULL ,
6477 too-long INTEGER ,
6478 has-term VisibleString }
6479
6480 Simple-replace ::= SEQUENCE {
6481 replace VisibleString OPTIONAL,
6482 whole-string BOOLEAN DEFAULT FALSE ,
6483 weasel-to-putative BOOLEAN DEFAULT FALSE }
6484
6485 Replace-func ::= CHOICE {
6486 simple-replace Simple-replace ,
6487 haem-replace VisibleString }
6488
6489 Replace-rule ::= SEQUENCE {
6490 replace-func Replace-func ,
6491 move-to-note BOOLEAN DEFAULT FALSE }
6492
6493 Fix-type ::= ENUMERATED {
6494 none (0) ,
6495 typo (1) ,
6496 putative-typo (2) ,
6497 quickfix (3) ,
6498 no-organelle-for-prokaryote (4),
6499 might-be-nonfunctional (5),
6500 database (6),
6501 remove-organism-name (7),
6502 inappropriate-symbol (8),
6503 evolutionary-relationship (9),
6504 use-protein (10),
6505 hypothetical (11),
6506 british (12),
6507 description (13),
6508 gene (14) }
6509
6510 Suspect-rule ::= SEQUENCE {
6511 find Search-func ,
6512 except Search-func OPTIONAL ,
6513 feat-constraint Constraint-choice-set OPTIONAL ,
6514 rule-type Fix-type DEFAULT none ,
6515 replace Replace-rule OPTIONAL ,
6516 description VisibleString OPTIONAL }
6517
6518 Suspect-rule-set ::= SET OF Suspect-rule
6519
6520
6521
6522 END
|
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |