NCBI C Toolkit Cross Reference

C/asn/asn.all


  1 --$Revision: 6.0 $
  2 --**********************************************************************
  3 --
  4 --  asn.all
  5 --  this file contains all NCBI ASN.1 specifications together
  6 --
  7 --  by James Ostell, 1990
  8 --
  9 --**********************************************************************
 10 
 11 --$Revision: 6.6 $
 12 --**********************************************************************
 13 --
 14 --  NCBI General Data elements
 15 --  by James Ostell, 1990
 16 --  Version 3.0 - June 1994
 17 --
 18 --**********************************************************************
 19 
 20 NCBI-General DEFINITIONS ::=
 21 BEGIN
 22 
 23 EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;
 24 
 25 -- StringStore is really a VisibleString.  It is used to define very
 26 --   long strings which may need to be stored by the receiving program
 27 --   in special structures, such as a ByteStore, but it's just a hint.
 28 --   AsnTool stores StringStores in ByteStore structures.
 29 -- OCTET STRINGs are also stored in ByteStores by AsnTool
 30 -- 
 31 -- typedef struct bsunit {             /* for building multiline strings */
 32    -- Nlm_Handle str;            /* the string piece */
 33    -- Nlm_Int2 len_avail,
 34        -- len;
 35    -- struct bsunit PNTR next; }       /* the next one */
 36 -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
 37 -- 
 38 -- typedef struct bytestore {
 39    -- Nlm_Int4 seekptr,       /* current position */
 40       -- totlen,             /* total stored data length in bytes */
 41       -- chain_offset;       /* offset in ByteStore of first byte in curchain */
 42    -- Nlm_BSUnitPtr chain,       /* chain of elements */
 43       -- curchain;           /* the BSUnit containing seekptr */
 44 -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
 45 --
 46 -- AsnTool incorporates this as a primitive type, so the definition
 47 --   is here just for completeness
 48 -- 
 49 --  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
 50 --
 51 
 52 -- BigInt is really an INTEGER. It is used to warn the receiving code to expect
 53 --   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
 54 --
 55 --   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
 56 --   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
 57 --
 58 
 59 -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
 60 --  of ASN.1
 61 --  It stores only a date
 62 --
 63 
 64 Date ::= CHOICE {
 65     str VisibleString ,        -- for those unparsed dates
 66     std Date-std }             -- use this if you can
 67 
 68 Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
 69     year INTEGER ,             -- full year (including 1900)
 70     month INTEGER OPTIONAL ,   -- month (1-12)
 71     day INTEGER OPTIONAL ,     -- day of month (1-31)
 72     season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
 73     hour INTEGER OPTIONAL ,    -- hour of day (0-23)
 74     minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
 75     second INTEGER OPTIONAL }  -- second of minute (0-59)
 76 
 77 -- Dbtag is generalized for tagging
 78 -- eg. { "Social Security", str "023-79-8841" }
 79 -- or  { "member", id 8882224 }
 80 
 81 Dbtag ::= SEQUENCE {
 82     db VisibleString ,          -- name of database or system
 83     tag Object-id }         -- appropriate tag
 84 
 85 -- Object-id can tag or name anything
 86 --
 87 
 88 Object-id ::= CHOICE {
 89     id INTEGER ,
 90     str VisibleString }
 91 
 92 -- Person-id is to define a std element for people
 93 --
 94 
 95 Person-id ::= CHOICE {
 96     dbtag Dbtag ,               -- any defined database tag
 97     name Name-std ,             -- structured name
 98     ml VisibleString ,          -- MEDLINE name (semi-structured)
 99                                 --    eg. "Jones RM"
100     str VisibleString,          -- unstructured name
101     consortium VisibleString }  -- consortium name
102 
103 Name-std ::= SEQUENCE { -- Structured names
104     last VisibleString ,
105     first VisibleString OPTIONAL ,
106     middle VisibleString OPTIONAL ,
107     full VisibleString OPTIONAL ,    -- full name eg. "J. John Smith, Esq"
108     initials VisibleString OPTIONAL,  -- first + middle initials
109     suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
110     title VisibleString OPTIONAL }    -- Dr., Sister, etc
111 
112 --**** Int-fuzz **********************************************
113 --*
114 --*   uncertainties in integer values
115 
116 Int-fuzz ::= CHOICE {
117     p-m INTEGER ,                    -- plus or minus fixed amount
118     range SEQUENCE {                 -- max to min
119         max INTEGER ,
120         min INTEGER } ,
121     pct INTEGER ,                    -- % plus or minus (x10) 0-1000
122     lim ENUMERATED {                 -- some limit value
123         unk (0) ,                    -- unknown
124         gt (1) ,                     -- greater than
125         lt (2) ,                     -- less than
126         tr (3) ,                     -- space to right of position
127         tl (4) ,                     -- space to left of position
128         circle (5) ,                 -- artificial break at origin of circle
129         other (255) } ,              -- something else
130     alt SET OF INTEGER }             -- set of alternatives for the integer
131 
132 
133 --**** User-object **********************************************
134 --*
135 --*   a general object for a user defined structured data item
136 --*    used by Seq-feat and Seq-descr
137 
138 User-object ::= SEQUENCE {
139     class VisibleString OPTIONAL ,   -- endeavor which designed this object
140     type Object-id ,                 -- type of object within class
141     data SEQUENCE OF User-field }    -- the object itself
142 
143 User-field ::= SEQUENCE {
144     label Object-id ,                -- field label
145     num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
146     data CHOICE {                    -- field contents
147         str UTF8String ,
148         int INTEGER ,
149         real REAL ,
150         bool BOOLEAN ,
151         os OCTET STRING ,
152         object User-object ,         -- for using other definitions
153         strs SEQUENCE OF UTF8String ,
154         ints SEQUENCE OF INTEGER ,
155         reals SEQUENCE OF REAL ,
156         oss SEQUENCE OF OCTET STRING ,
157         fields SEQUENCE OF User-field ,
158         objects SEQUENCE OF User-object } }
159 
160 
161 
162 END
163 
164 --$Revision: 6.3 $
165 --****************************************************************
166 --
167 --  NCBI Bibliographic data elements
168 --  by James Ostell, 1990
169 --
170 --  Taken from the American National Standard for
171 --      Bibliographic References
172 --      ANSI Z39.29-1977
173 --  Version 3.0 - June 1994
174 --  PubMedId added in 1996
175 --  ArticleIds and eprint elements added in 1999
176 --
177 --****************************************************************
178 
179 NCBI-Biblio DEFINITIONS ::=
180 BEGIN
181 
182 EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
183         Cit-proc, Cit-sub, Title, Author, PubMedId, DOI;
184 
185 IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
186 
187     -- Article Ids
188 
189 ArticleId ::= CHOICE {         -- can be many ids for an article
190         pubmed PubMedId ,      -- see types below
191         medline MedlineUID ,
192         doi DOI ,
193         pii PII ,
194         pmcid PmcID ,
195         pmcpid PmcPid ,
196         pmpid PmPid ,
197         other Dbtag  }    -- generic catch all
198     
199 PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
200 MedlineUID ::= INTEGER         -- Id from MEDLINE
201 DOI ::= VisibleString          -- Document Object Identifier
202 PII ::= VisibleString          -- Controlled Publisher Identifier
203 PmcID ::= INTEGER              -- PubMed Central Id
204 PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
205 PmPid ::= VisibleString        -- Publisher Id supplied to PubMed
206 
207 ArticleIdSet ::= SET OF ArticleId
208 
209     -- Status Dates
210 
211 PubStatus ::= INTEGER {            -- points of publication
212     received  (1) ,            -- date manuscript received for review
213     accepted  (2) ,            -- accepted for publication
214     epublish  (3) ,            -- published electronically by publisher
215     ppublish  (4) ,            -- published in print by publisher
216     revised   (5) ,            -- article revised by publisher/author
217     pmc       (6) ,            -- article first appeared in PubMed Central
218     pmcr      (7) ,            -- article revision in PubMed Central
219     pubmed    (8) ,            -- article citation first appeared in PubMed
220     pubmedr   (9) ,            -- article citation revision in PubMed
221     aheadofprint (10),         -- epublish, but will be followed by print
222     premedline (11),           -- date into PreMedline status
223     medline    (12),           -- date made a MEDLINE record
224     other    (255) }
225 
226 PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
227     pubstatus PubStatus ,
228     date Date }                -- time may be added later
229 
230 PubStatusDateSet ::= SET OF PubStatusDate
231     
232     -- Citation Types
233 
234 Cit-art ::= SEQUENCE {                  -- article in journal or book
235     title Title OPTIONAL ,              -- title of paper (ANSI requires)
236     authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
237     from CHOICE {                       -- journal or book
238         journal Cit-jour ,
239         book Cit-book ,
240         proc Cit-proc } ,
241     ids ArticleIdSet OPTIONAL }         -- lots of ids
242 
243 Cit-jour ::= SEQUENCE {             -- Journal citation
244     title Title ,                   -- title of journal
245     imp Imprint }
246 
247 Cit-book ::= SEQUENCE {              -- Book citation
248     title Title ,                    -- Title of book
249     coll Title OPTIONAL ,            -- part of a collection
250     authors Auth-list,               -- authors
251     imp Imprint }
252 
253 Cit-proc ::= SEQUENCE {             -- Meeting proceedings
254     book Cit-book ,                 -- citation to meeting
255     meet Meeting }                  -- time and location of meeting
256 
257     -- Patent number and date-issue were made optional in 1997 to
258     --   support patent applications being issued from the USPTO
259     --   Semantically a Cit-pat must have either a patent number or
260     --   an application number (or both) to be valid
261 
262 Cit-pat ::= SEQUENCE {                  -- patent citation
263     title VisibleString ,
264     authors Auth-list,                  -- author/inventor
265     country VisibleString ,             -- Patent Document Country
266     doc-type VisibleString ,            -- Patent Document Type
267     number VisibleString OPTIONAL,      -- Patent Document Number
268     date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
269     class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code 
270     app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
271     app-date Date OPTIONAL ,            -- Patent Appl File Date
272     applicants Auth-list OPTIONAL ,     -- Applicants
273     assignees Auth-list OPTIONAL ,      -- Assignees
274     priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
275     abstract VisibleString OPTIONAL }   -- abstract of patent
276 
277 Patent-priority ::= SEQUENCE {
278     country VisibleString ,             -- Patent country code
279     number VisibleString ,              -- number assigned in that country
280     date Date }                         -- date of application
281 
282 Id-pat ::= SEQUENCE {                   -- just to identify a patent
283     country VisibleString ,             -- Patent Document Country
284     id CHOICE {
285         number VisibleString ,          -- Patent Document Number
286         app-number VisibleString } ,    -- Patent Doc Appl Number
287     doc-type VisibleString OPTIONAL }   -- Patent Doc Type
288 
289 Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
290     cit Cit-book ,                      -- same fields as a book
291     man-id VisibleString OPTIONAL ,     -- Manuscript identifier
292     type ENUMERATED {
293         manuscript (1) ,
294         letter (2) ,
295         thesis (3) } OPTIONAL }
296                                 -- NOTE: this is just to cite a
297                                 -- direct data submission, see NCBI-Submit
298                                 -- for the form of a sequence submission
299 Cit-sub ::= SEQUENCE {               -- citation for a direct submission
300     authors Auth-list ,              -- not necessarily authors of the paper
301     imp Imprint OPTIONAL ,                       -- this only used to get date.. will go
302     medium ENUMERATED {              -- medium of submission
303         paper   (1) ,
304         tape    (2) ,
305         floppy  (3) ,
306         email   (4) ,
307         other   (255) } OPTIONAL ,
308     date Date OPTIONAL ,              -- replaces imp, will become required
309     descr VisibleString OPTIONAL }    -- description of changes for public view
310     
311 Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
312     cit VisibleString OPTIONAL ,     -- anything, not parsable
313     authors Auth-list OPTIONAL ,
314     muid INTEGER OPTIONAL ,      -- medline uid
315     journal Title OPTIONAL ,
316     volume VisibleString OPTIONAL ,
317     issue VisibleString OPTIONAL ,
318     pages VisibleString OPTIONAL ,
319     date Date OPTIONAL ,
320     serial-number INTEGER OPTIONAL ,   -- for GenBank style references
321     title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
322         pmid PubMedId OPTIONAL }           -- PubMed Id
323     
324     
325     -- Authorship Group
326 Auth-list ::= SEQUENCE {
327         names CHOICE {
328             std SEQUENCE OF Author ,        -- full citations
329             ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
330             str SEQUENCE OF VisibleString } , -- free for all
331         affil Affil OPTIONAL }        -- author affiliation
332 
333 Author ::= SEQUENCE {
334     name Person-id ,                        -- Author, Primary or Secondary
335     level ENUMERATED {
336         primary (1),
337         secondary (2) } OPTIONAL ,
338     role ENUMERATED {                   -- Author Role Indicator
339         compiler (1),
340         editor (2),
341         patent-assignee (3),
342         translator (4) } OPTIONAL ,
343     affil Affil OPTIONAL ,
344     is-corr BOOLEAN OPTIONAL }          -- TRUE if corresponding author
345 
346 Affil ::= CHOICE {
347     str VisibleString ,                 -- unparsed string
348     std SEQUENCE {                      -- std representation
349     affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
350     div VisibleString OPTIONAL ,        -- Author Affiliation, Division
351     city VisibleString OPTIONAL ,       -- Author Affiliation, City
352     sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
353     country VisibleString OPTIONAL ,    -- Author Affiliation, Country
354     street VisibleString OPTIONAL ,    -- street address, not ANSI
355     email VisibleString OPTIONAL ,
356     fax VisibleString OPTIONAL ,
357     phone VisibleString OPTIONAL ,
358     postal-code VisibleString OPTIONAL }}
359 
360     -- Title Group
361     -- Valid for = A = Analytic (Cit-art)
362     --             J = Journals (Cit-jour)
363     --             B = Book (Cit-book)
364                                                  -- Valid for:
365 Title ::= SET OF CHOICE {
366     name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
367     tsub VisibleString ,    -- Title, Subordinate       A B
368     trans VisibleString ,   -- Title, Translated        AJB
369     jta VisibleString ,     -- Title, Abbreviated        J
370     iso-jta VisibleString , -- specifically ISO jta      J
371     ml-jta VisibleString ,  -- specifically MEDLINE jta  J
372     coden VisibleString ,   -- a coden                   J
373     issn VisibleString ,    -- ISSN                      J
374     abr VisibleString ,     -- Title, Abbreviated         B
375     isbn VisibleString }    -- ISBN                       B
376 
377 Imprint ::= SEQUENCE {                  -- Imprint group
378     date Date ,                         -- date of publication
379     volume VisibleString OPTIONAL ,
380     issue VisibleString OPTIONAL ,
381     pages VisibleString OPTIONAL ,
382     section VisibleString OPTIONAL ,
383     pub Affil OPTIONAL,                     -- publisher, required for book
384     cprt Date OPTIONAL,                     -- copyright date, "    "   "
385     part-sup VisibleString OPTIONAL ,       -- part/sup of volume
386     language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
387     prepub ENUMERATED {                     -- for prepublication citations
388         submitted (1) ,                     -- submitted, not accepted
389         in-press (2) ,                      -- accepted, not published
390         other (255)  } OPTIONAL ,
391     part-supi VisibleString OPTIONAL ,      -- part/sup on issue
392     retract CitRetract OPTIONAL ,           -- retraction info
393     pubstatus PubStatus OPTIONAL ,          -- current status of this publication
394     history PubStatusDateSet OPTIONAL }     -- dates for this record
395 
396 CitRetract ::= SEQUENCE {
397     type ENUMERATED {                    -- retraction of an entry
398         retracted (1) ,               -- this citation retracted
399         notice (2) ,                  -- this citation is a retraction notice
400         in-error (3) ,                -- an erratum was published about this
401         erratum (4) } ,               -- this is a published erratum
402     exp VisibleString OPTIONAL }      -- citation and/or explanation
403 
404 Meeting ::= SEQUENCE {
405     number VisibleString ,
406     date Date ,
407     place Affil OPTIONAL }
408 
409             
410 END
411 
412 
413 --$Revision: 6.0 $
414 --**********************************************************************
415 --
416 --  MEDLINE data definitions
417 --  James Ostell, 1990
418 --
419 --  enhanced in 1996 to support PubMed records as well by simply adding
420 --    the PubMedId and making MedlineId optional
421 --
422 --**********************************************************************
423 
424 NCBI-Medline DEFINITIONS ::=
425 BEGIN
426 
427 EXPORTS Medline-entry, Medline-si;
428 
429 IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
430         Date FROM NCBI-General;
431 
432                                 -- a MEDLINE or PubMed entry
433 Medline-entry ::= SEQUENCE {
434     uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
435     em Date ,                   -- Entry Month
436     cit Cit-art ,               -- article citation
437     abstract VisibleString OPTIONAL ,
438     mesh SET OF Medline-mesh OPTIONAL ,
439     substance SET OF Medline-rn OPTIONAL ,
440     xref SET OF Medline-si OPTIONAL ,
441     idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
442     gene SET OF VisibleString OPTIONAL ,
443     pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
444     pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
445     mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
446     status INTEGER {
447         publisher (1) ,      -- record as supplied by publisher
448         premedline (2) ,     -- premedline record
449         medline (3) } DEFAULT medline }  -- regular medline record
450 
451 Medline-mesh ::= SEQUENCE {
452     mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
453     term VisibleString ,                   -- the MeSH term
454     qual SET OF Medline-qual OPTIONAL }    -- qualifiers
455 
456 Medline-qual ::= SEQUENCE {
457     mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
458     subh VisibleString }             -- the subheading
459 
460 Medline-rn ::= SEQUENCE {       -- medline substance records
461     type ENUMERATED {           -- type of record
462         nameonly (0) ,
463         cas (1) ,               -- CAS number
464         ec (2) } ,              -- EC number
465     cit VisibleString OPTIONAL ,  -- CAS or EC number if present
466     name VisibleString }          -- name (always present)
467 
468 Medline-si ::= SEQUENCE {       -- medline cross reference records
469     type ENUMERATED {           -- type of xref
470         ddbj (1) ,              -- DNA Data Bank of Japan
471         carbbank (2) ,          -- Carbohydrate Structure Database
472         embl (3) ,              -- EMBL Data Library
473         hdb (4) ,               -- Hybridoma Data Bank
474         genbank (5) ,           -- GenBank
475         hgml (6) ,              -- Human Gene Map Library
476         mim (7) ,               -- Mendelian Inheritance in Man
477         msd (8) ,               -- Microbial Strains Database
478         pdb (9) ,               -- Protein Data Bank (Brookhaven)
479         pir (10) ,              -- Protein Identification Resource
480         prfseqdb (11) ,         -- Protein Research Foundation (Japan)
481         psd (12) ,              -- Protein Sequence Database (Japan)
482         swissprot (13) ,        -- SwissProt
483         gdb (14) } ,            -- Genome Data Base
484     cit VisibleString OPTIONAL }    -- the citation/accession number
485 
486 Medline-field ::= SEQUENCE {
487     type INTEGER {              -- Keyed type
488         other (0) ,             -- look in line code
489         comment (1) ,           -- comment line
490         erratum (2) } ,         -- retracted, corrected, etc
491     str VisibleString ,         -- the text
492     ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text
493 
494 DocRef ::= SEQUENCE {           -- reference to a document
495     type INTEGER {
496         medline (1) ,
497         pubmed (2) ,
498         ncbigi (3) } ,
499     uid INTEGER }
500 
501 END
502 
503 --$Revision: 6.0 $
504 --**********************************************************************
505 --
506 --  PUBMED data definitions
507 --
508 --**********************************************************************
509 
510 NCBI-PubMed DEFINITIONS ::=
511 BEGIN
512 
513 EXPORTS Pubmed-entry, Pubmed-url;
514 
515 IMPORTS PubMedId FROM NCBI-Biblio
516         Medline-entry FROM NCBI-Medline;
517 
518 Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
519     -- PUBMED records must include the PubMedId
520     pmid PubMedId,
521 
522     -- Medline entry information
523     medent Medline-entry OPTIONAL,
524 
525     -- Publisher name
526     publisher VisibleString OPTIONAL,
527 
528     -- List of URL to publisher cite
529     urls SET OF Pubmed-url OPTIONAL,
530 
531     -- Publisher's article identifier
532     pubid VisibleString OPTIONAL
533 }
534 
535 Pubmed-url ::= SEQUENCE {
536     location VisibleString OPTIONAL, -- Location code
537     url VisibleString                -- Selected URL for location
538 }
539 
540 END
541 --$Revision: 6.0 $
542 --**********************************************************************
543 --
544 --  MEDLARS data definitions
545 --  Grigoriy Starchenko, 1997
546 --
547 --**********************************************************************
548 
549 NCBI-Medlars DEFINITIONS ::=
550 BEGIN
551 
552 EXPORTS Medlars-entry, Medlars-record;
553 
554 IMPORTS PubMedId FROM NCBI-Biblio;
555 
556 Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
557     pmid PubMedId,               -- All entries in PubMed must have it
558     muid INTEGER OPTIONAL,       -- Medline(OCCS) id
559     recs SET OF Medlars-record   -- List of Medlars records
560 }
561 
562 Medlars-record ::= SEQUENCE {
563     code INTEGER,                -- Unit record field type integer form
564     abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
565     data VisibleString           -- Unit record data
566 }
567 
568 END
569 --$Revision: 6.0 $
570 --********************************************************************
571 --
572 --  Publication common set
573 --  James Ostell, 1990
574 --
575 --  This is the base class definitions for Publications of all sorts
576 --
577 --  support for PubMedId added in 1996
578 --********************************************************************
579 
580 NCBI-Pub DEFINITIONS ::=
581 BEGIN
582 
583 EXPORTS Pub, Pub-set, Pub-equiv;
584 
585 IMPORTS Medline-entry FROM NCBI-Medline
586         Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
587         Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
588 
589 Pub ::= CHOICE {
590     gen Cit-gen ,        -- general or generic unparsed
591     sub Cit-sub ,        -- submission
592     medline Medline-entry ,
593     muid INTEGER ,       -- medline uid
594     article Cit-art ,
595     journal Cit-jour ,
596     book Cit-book ,
597     proc Cit-proc ,      -- proceedings of a meeting
598     patent Cit-pat ,
599     pat-id Id-pat ,      -- identify a patent
600     man Cit-let ,        -- manuscript, thesis, or letter
601     equiv Pub-equiv,     -- to cite a variety of ways
602         pmid PubMedId }      -- PubMedId
603 
604 Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation
605 
606 Pub-set ::= CHOICE {
607     pub SET OF Pub ,
608     medline SET OF Medline-entry ,
609     article SET OF Cit-art ,
610     journal SET OF Cit-jour ,
611     book SET OF Cit-book ,
612     proc SET OF Cit-proc ,      -- proceedings of a meeting
613     patent SET OF Cit-pat }
614 
615 END
616 
617 --$Revision: 6.5 $
618 --**********************************************************************
619 --
620 --  NCBI Sequence location and identifier elements
621 --  by James Ostell, 1990
622 --
623 --  Version 3.0 - 1994
624 --
625 --**********************************************************************
626 
627 NCBI-Seqloc DEFINITIONS ::=
628 BEGIN
629 
630 EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
631         Na-strand, Giimport-id;
632 
633 IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
634         Id-pat FROM NCBI-Biblio
635         Feat-id FROM NCBI-Seqfeat;
636 
637 --*** Sequence identifiers ********************************
638 --*
639 
640 Seq-id ::= CHOICE {
641     local Object-id ,            -- local use
642     gibbsq INTEGER ,             -- Geninfo backbone seqid
643     gibbmt INTEGER ,             -- Geninfo backbone moltype
644     giim Giimport-id ,           -- Geninfo import id
645     genbank Textseq-id ,
646     embl Textseq-id ,
647     pir Textseq-id ,
648     swissprot Textseq-id ,
649     patent Patent-seq-id ,
650     other Textseq-id ,           -- for historical reasons, 'other' = 'refseq'
651     general Dbtag ,              -- for other databases
652     gi INTEGER ,                 -- GenInfo Integrated Database
653     ddbj Textseq-id ,            -- DDBJ
654     prf Textseq-id ,             -- PRF SEQDB
655     pdb PDB-seq-id ,             -- PDB sequence
656     tpg Textseq-id ,             -- Third Party Annot/Seq Genbank
657     tpe Textseq-id ,             -- Third Party Annot/Seq EMBL
658     tpd Textseq-id ,             -- Third Party Annot/Seq DDBJ
659     gpipe Textseq-id ,           -- Internal NCBI genome pipeline processing ID
660     named-annot-track Textseq-id -- Internal named annotation tracking ID
661 }
662 
663 Seq-id-set ::= SET OF Seq-id
664 
665 
666 Patent-seq-id ::= SEQUENCE {
667     seqid INTEGER ,         -- number of sequence in patent
668     cit Id-pat }           -- patent citation
669 
670 Textseq-id ::= SEQUENCE {
671     name VisibleString OPTIONAL ,
672     accession VisibleString OPTIONAL ,
673     release VisibleString OPTIONAL ,
674     version INTEGER OPTIONAL }
675 
676 Giimport-id ::= SEQUENCE {
677     id INTEGER ,                     -- the id to use here
678     db VisibleString OPTIONAL ,      -- dbase used in
679     release VisibleString OPTIONAL } -- the release
680 
681 PDB-seq-id ::= SEQUENCE {
682     mol PDB-mol-id ,           -- the molecule name
683     chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
684     rel Date OPTIONAL }        -- release date, month and year
685 
686 PDB-mol-id ::= VisibleString  -- name of mol, 4 chars
687     
688 --*** Sequence locations **********************************
689 --*
690 
691 Seq-loc ::= CHOICE {
692     null NULL ,           -- not placed
693     empty Seq-id ,        -- to NULL one Seq-id in a collection
694     whole Seq-id ,        -- whole sequence
695     int Seq-interval ,    -- from to
696     packed-int Packed-seqint ,
697     pnt Seq-point ,
698     packed-pnt Packed-seqpnt ,
699     mix Seq-loc-mix ,
700     equiv Seq-loc-equiv ,  -- equivalent sets of locations
701     bond Seq-bond ,
702     feat Feat-id }         -- indirect, through a Seq-feat
703     
704 
705 Seq-interval ::= SEQUENCE {
706     from INTEGER ,
707     to INTEGER ,
708     strand Na-strand OPTIONAL ,
709     id Seq-id ,    -- WARNING: this used to be optional
710     fuzz-from Int-fuzz OPTIONAL ,
711     fuzz-to Int-fuzz OPTIONAL }
712 
713 Packed-seqint ::= SEQUENCE OF Seq-interval
714 
715 Seq-point ::= SEQUENCE {
716     point INTEGER ,
717     strand Na-strand OPTIONAL ,
718     id Seq-id ,     -- WARNING: this used to be optional
719     fuzz Int-fuzz OPTIONAL }
720 
721 Packed-seqpnt ::= SEQUENCE {
722     strand Na-strand OPTIONAL ,
723     id Seq-id ,
724     fuzz Int-fuzz OPTIONAL ,
725     points SEQUENCE OF INTEGER }
726 
727 Na-strand ::= ENUMERATED {          -- strand of nucleic acid
728     unknown (0) ,
729     plus (1) ,
730     minus (2) ,               
731     both (3) ,                -- in forward orientation
732     both-rev (4) ,            -- in reverse orientation
733     other (255) }
734 
735 Seq-bond ::= SEQUENCE {         -- bond between residues
736     a Seq-point ,           -- connection to a least one residue
737     b Seq-point OPTIONAL }  -- other end may not be available
738 
739 Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything
740 
741 Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations
742 
743 END
744     
745 
746 --$Revision: 6.25 $
747 --**********************************************************************
748 --
749 --  NCBI Sequence elements
750 --  by James Ostell, 1990
751 --  Version 3.0 - June 1994
752 --
753 --**********************************************************************
754 
755 NCBI-Sequence DEFINITIONS ::=
756 BEGIN
757 
758 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
759         Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
760         Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;
761 
762 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
763         Seq-align FROM NCBI-Seqalign
764         Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
765         Seq-graph FROM NCBI-Seqres
766         Pub-equiv FROM NCBI-Pub
767         Org-ref FROM NCBI-Organism
768         BioSource FROM NCBI-BioSource
769         Seq-id, Seq-loc FROM NCBI-Seqloc
770         GB-block FROM GenBank-General
771         PIR-block FROM PIR-General
772         EMBL-block FROM EMBL-General
773         SP-block FROM SP-General
774         PRF-block FROM PRF-General
775         PDB-block FROM PDB-General
776         Seq-table FROM NCBI-SeqTable;
777 
778 --*** Sequence ********************************
779 --*
780 
781 Bioseq ::= SEQUENCE {
782     id SET OF Seq-id ,            -- equivalent identifiers
783     descr Seq-descr OPTIONAL , -- descriptors
784     inst Seq-inst ,            -- the sequence data
785     annot SET OF Seq-annot OPTIONAL }
786 
787 --*** Descriptors *****************************
788 --*
789 
790 Seq-descr ::= SET OF Seqdesc
791 
792 Seqdesc ::= CHOICE {
793     mol-type GIBB-mol ,          -- type of molecule
794     modif SET OF GIBB-mod ,             -- modifiers
795     method GIBB-method ,         -- sequencing method
796     name VisibleString ,         -- a name for this sequence
797     title VisibleString ,        -- a title for this sequence
798     org Org-ref ,                -- if all from one organism
799     comment VisibleString ,      -- a more extensive comment
800     num Numbering ,              -- a numbering system
801     maploc Dbtag ,               -- map location of this sequence
802     pir PIR-block ,              -- PIR specific info
803     genbank GB-block ,           -- GenBank specific info
804     pub Pubdesc ,                -- a reference to the publication
805     region VisibleString ,       -- overall region (globin locus)
806     user User-object ,           -- user defined object
807     sp SP-block ,                -- SWISSPROT specific info
808     dbxref Dbtag ,               -- xref to other databases
809     embl EMBL-block ,            -- EMBL specific information
810     create-date Date ,           -- date entry first created/released
811     update-date Date ,           -- date of last update
812     prf PRF-block ,              -- PRF specific information
813     pdb PDB-block ,              -- PDB specific information
814     het Heterogen ,              -- cofactor, etc associated but not bound
815     source BioSource ,           -- source of materials, includes Org-ref
816     molinfo MolInfo ,            -- info on the molecule and techniques
817     modelev ModelEvidenceSupport -- model evidence for XM records
818 }
819 
820 --******* NOTE:
821 --*       mol-type, modif, method, and org are consolidated and expanded
822 --*       in Org-ref, BioSource, and MolInfo in this specification. They
823 --*       will be removed in later specifications. Do not use them in the
824 --*       the future. Instead expect the new structures.
825 --*
826 --***************************
827 
828 --********************************************************************
829 --
830 -- MolInfo gives information on the
831 -- classification of the type and quality of the sequence
832 --
833 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
834 --
835 --********************************************************************
836 
837 MolInfo ::= SEQUENCE {
838     biomol INTEGER {
839         unknown (0) ,
840         genomic (1) ,
841         pre-RNA (2) ,              -- precursor RNA of any sort really
842         mRNA (3) ,
843         rRNA (4) ,
844         tRNA (5) ,
845         snRNA (6) ,
846         scRNA (7) ,
847         peptide (8) ,
848         other-genetic (9) ,      -- other genetic material
849         genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
850         cRNA (11) ,              -- viral RNA genome copy intermediate
851         snoRNA (12) ,            -- small nucleolar RNA
852         transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
853         ncRNA (14) ,
854         tmRNA (15) ,
855         other (255) } DEFAULT unknown ,
856     tech INTEGER {
857         unknown (0) ,
858         standard (1) ,          -- standard sequencing
859         est (2) ,               -- Expressed Sequence Tag
860         sts (3) ,               -- Sequence Tagged Site
861         survey (4) ,            -- one-pass genomic sequence
862         genemap (5) ,           -- from genetic mapping techniques
863         physmap (6) ,           -- from physical mapping techniques
864         derived (7) ,           -- derived from other data, not a primary entity
865         concept-trans (8) ,     -- conceptual translation
866         seq-pept (9) ,          -- peptide was sequenced
867         both (10) ,             -- concept transl. w/ partial pept. seq.
868         seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
869         seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
870         concept-trans-a (13) ,  -- conceptual transl. supplied by author
871         htgs-1 (14) ,           -- unordered High Throughput sequence contig
872         htgs-2 (15) ,           -- ordered High Throughput sequence contig
873         htgs-3 (16) ,           -- finished High Throughput sequence
874         fli-cdna (17) ,         -- full length insert cDNA
875         htgs-0 (18) ,           -- single genomic reads for coordination
876         htc (19) ,              -- high throughput cDNA
877         wgs (20) ,              -- whole genome shotgun sequencing
878         barcode (21) ,          -- barcode of life project
879         composite-wgs-htgs (22) , -- composite of WGS and HTGS
880         tsa (23) ,              -- transcriptome shotgun assembly
881         other (255) }           -- use Source.techexp
882                DEFAULT unknown ,
883     techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
884     --
885     -- Completeness is not indicated in most records.  For genomes, assume
886     -- the sequences are incomplete unless specifically marked as complete.
887     -- For mRNAs, assume the ends are not known exactly unless marked as
888     -- having the left or right end.
889     --
890     completeness INTEGER {
891       unknown (0) ,
892       complete (1) ,                   -- complete biological entity
893       partial (2) ,                    -- partial but no details given
894       no-left (3) ,                    -- missing 5' or NH3 end
895       no-right (4) ,                   -- missing 3' or COOH end
896       no-ends (5) ,                    -- missing both ends
897       has-left (6) ,                   -- 5' or NH3 end present
898       has-right (7) ,                  -- 3' or COOH end present
899       other (255) } DEFAULT unknown ,
900     gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
901 
902 
903 GIBB-mol ::= ENUMERATED {       -- type of molecule represented
904     unknown (0) ,
905     genomic (1) ,
906     pre-mRNA (2) ,              -- precursor RNA of any sort really
907     mRNA (3) ,
908     rRNA (4) ,
909     tRNA (5) ,
910     snRNA (6) ,
911     scRNA (7) ,
912     peptide (8) ,
913     other-genetic (9) ,      -- other genetic material
914     genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
915     other (255) }
916 
917 GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
918     dna (0) ,
919     rna (1) ,
920     extrachrom (2) ,
921     plasmid (3) ,
922     mitochondrial (4) ,
923     chloroplast (5) ,
924     kinetoplast (6) ,
925     cyanelle (7) ,
926     synthetic (8) ,
927     recombinant (9) ,
928     partial (10) ,
929     complete (11) ,
930     mutagen (12) ,    -- subject of mutagenesis ?
931     natmut (13) ,     -- natural mutant ?
932     transposon (14) ,
933     insertion-seq (15) ,
934     no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
935     no-right (17) ,   -- missing right end (3' or COOH)
936     macronuclear (18) ,
937     proviral (19) ,
938     est (20) ,        -- expressed sequence tag
939     sts (21) ,        -- sequence tagged site
940     survey (22) ,     -- one pass survey sequence
941     chromoplast (23) ,
942     genemap (24) ,    -- is a genetic map
943     restmap (25) ,    -- is an ordered restriction map
944     physmap (26) ,    -- is a physical map (not ordered restriction map)
945     other (255) }
946 
947 GIBB-method ::= ENUMERATED {        -- sequencing methods
948     concept-trans (1) ,    -- conceptual translation
949     seq-pept (2) ,         -- peptide was sequenced
950     both (3) ,             -- concept transl. w/ partial pept. seq.
951     seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
952     seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
953     concept-trans-a (6) ,  -- conceptual transl. supplied by author
954     other (255) }
955 
956 Numbering ::= CHOICE {           -- any display numbering system
957     cont Num-cont ,              -- continuous numbering
958     enum Num-enum ,              -- enumerated names for residues
959     ref Num-ref ,                -- by reference to another sequence
960     real Num-real }              -- supports mapping to a float system
961 
962 Num-cont ::= SEQUENCE {          -- continuous display numbering system
963     refnum INTEGER DEFAULT 1,         -- number assigned to first residue
964     has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
965     ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?
966 
967 Num-enum ::= SEQUENCE {          -- any tags to residues
968     num INTEGER ,                        -- number of tags to follow
969     names SEQUENCE OF VisibleString }    -- the tags
970 
971 Num-ref ::= SEQUENCE {           -- by reference to other sequences
972     type ENUMERATED {            -- type of reference
973         not-set (0) ,
974         sources (1) ,            -- by segmented or const seq sources
975         aligns (2) } ,           -- by alignments given below
976     aligns Seq-align OPTIONAL }
977 
978 Num-real ::= SEQUENCE {          -- mapping to floating point system
979     a REAL ,                     -- from an integer system used by Bioseq
980     b REAL ,                     -- position = (a * int_position) + b
981     units VisibleString OPTIONAL }
982 
983 Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
984     pub Pub-equiv ,                 -- the citation(s)
985     name VisibleString OPTIONAL ,   -- name used in paper
986     fig VisibleString OPTIONAL ,    -- figure in paper
987     num Numbering OPTIONAL ,        -- numbering from paper
988     numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
989     poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
990     maploc VisibleString OPTIONAL , -- map location reported in paper
991     seq-raw StringStore OPTIONAL ,  -- original sequence from paper
992     align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
993     comment VisibleString OPTIONAL, -- any comment on this pub in context
994     reftype INTEGER {           -- type of reference in a GenBank record
995         seq (0) ,               -- refers to sequence
996         sites (1) ,             -- refers to unspecified features
997         feats (2) ,             -- refers to specified features
998         no-target (3) }         -- nothing specified (EMBL)
999         DEFAULT seq }
1000 
1001 Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc
1002 
1003 --*** Instances of sequences *******************************
1004 --*
1005 
1006 Seq-inst ::= SEQUENCE {            -- the sequence data itself
1007     repr ENUMERATED {              -- representation class
1008         not-set (0) ,              -- empty
1009         virtual (1) ,              -- no seq data
1010         raw (2) ,                  -- continuous sequence
1011         seg (3) ,                  -- segmented sequence
1012         const (4) ,                -- constructed sequence
1013         ref (5) ,                  -- reference to another sequence
1014         consen (6) ,               -- consensus sequence or pattern
1015         map (7) ,                  -- ordered map of any kind
1016         delta (8) ,              -- sequence made by changes (delta) to others
1017         other (255) } ,
1018     mol ENUMERATED {               -- molecule class in living organism
1019         not-set (0) ,              --   > cdna = rna
1020         dna (1) ,
1021         rna (2) ,
1022         aa (3) ,
1023         na (4) ,                   -- just a nucleic acid
1024         other (255) } ,
1025     length INTEGER OPTIONAL ,      -- length of sequence in residues
1026     fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
1027     topology ENUMERATED {          -- topology of molecule
1028         not-set (0) ,
1029         linear (1) ,
1030         circular (2) ,
1031         tandem (3) ,               -- some part of tandem repeat
1032         other (255) } DEFAULT linear ,
1033     strand ENUMERATED {            -- strandedness in living organism
1034         not-set (0) ,
1035         ss (1) ,                   -- single strand
1036         ds (2) ,                   -- double strand
1037         mixed (3) ,
1038         other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
1039     seq-data Seq-data OPTIONAL ,   -- the sequence
1040     ext Seq-ext OPTIONAL ,         -- extensions for special types
1041     hist Seq-hist OPTIONAL }       -- sequence history
1042 
1043 --*** Sequence Extensions **********************************
1044 --*  for representing more complex types
1045 --*  const type uses Seq-hist.assembly
1046 
1047 Seq-ext ::= CHOICE {
1048     seg Seg-ext ,        -- segmented sequences
1049     ref Ref-ext ,        -- hot link to another sequence (a view)
1050     map Map-ext ,        -- ordered map of markers
1051     delta Delta-ext }
1052 
1053 Seg-ext ::= SEQUENCE OF Seq-loc
1054 
1055 Ref-ext ::= Seq-loc
1056 
1057 Map-ext ::= SEQUENCE OF Seq-feat
1058 
1059 Delta-ext ::= SEQUENCE OF Delta-seq
1060 
1061 Delta-seq ::= CHOICE {
1062     loc Seq-loc ,       -- point to a sequence
1063     literal Seq-literal }   -- a piece of sequence
1064 
1065 Seq-literal ::= SEQUENCE {
1066     length INTEGER ,         -- must give a length in residues
1067     fuzz Int-fuzz OPTIONAL , -- could be unsure
1068     seq-data Seq-data OPTIONAL } -- may have the data
1069 
1070 --*** Sequence History Record ***********************************
1071 --** assembly = records how seq was assembled from others
1072 --** replaces = records sequences made obsolete by this one
1073 --** replaced-by = this seq is made obsolete by another(s)
1074 
1075 Seq-hist ::= SEQUENCE {
1076     assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
1077     replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
1078     replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
1079     deleted CHOICE {
1080         bool BOOLEAN ,
1081         date Date } OPTIONAL }
1082 
1083 Seq-hist-rec ::= SEQUENCE {
1084     date Date OPTIONAL ,
1085     ids SET OF Seq-id }
1086 
1087 --*** Various internal sequence representations ************
1088 --*      all are controlled, fixed length forms
1089 
1090 Seq-data ::= CHOICE {              -- sequence representations
1091     iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
1092     iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
1093     ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
1094     ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
1095     ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
1096     ncbipna NCBIpna ,              -- nucleic acid probabilities
1097     ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
1098     ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
1099     ncbipaa NCBIpaa ,              -- amino acid probabilities
1100     ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
1101     gap Seq-gap                    -- gap types
1102 }
1103 
1104 Seq-gap ::= SEQUENCE {
1105     type INTEGER {
1106         unknown(0),
1107         fragment(1),               -- Deprecated. Used only for AGP 1.1
1108         clone(2),                  -- Deprecated. Used only for AGP 1.1
1109         short-arm(3),
1110         heterochromatin(4),
1111         centromere(5),
1112         telomere(6),
1113         repeat(7),
1114         contig(8),
1115         scaffold(9),
1116         other(255)
1117     },
1118     linkage INTEGER {
1119         unlinked(0),
1120         linked(1),
1121         other(255)
1122     } OPTIONAL,
1123     linkage-evidence SET OF Linkage-evidence OPTIONAL
1124 }
1125 
1126 Linkage-evidence ::= SEQUENCE {
1127     type INTEGER {
1128         paired-ends(0),
1129         align-genus(1),
1130         align-xgenus(2),
1131         align-trnscpt(3),
1132         within-clone(4),
1133         clone-contig(5),
1134         map(6),
1135         strobe(7),
1136         unspecified(8),
1137         pcr(9),
1138         other(255)
1139     }
1140 }
1141 
1142 IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
1143 IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
1144 NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
1145 NCBI4na ::= OCTET STRING      -- 1 bit each for agct
1146                               -- 0001=A, 0010=C, 0100=G, 1000=T/U
1147                               -- 0101=Purine, 1010=Pyrimidine, etc
1148 NCBI8na ::= OCTET STRING      -- for modified nucleic acids
1149 NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
1150                               -- probabilities are coded 0-255 = 0.0-1.0
1151 NCBI8aa ::= OCTET STRING      -- for modified amino acids
1152 NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
1153                               -- IUPAC codes + U=selenocysteine
1154 NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
1155                               -- A-Y,B,Z,X,(ter),anything
1156                               -- probabilities are coded 0-255 = 0.0-1.0
1157 NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte
1158 
1159 --*** Sequence Annotation *************************************
1160 --*
1161 
1162 -- This is a replica of Textseq-id
1163 -- This is specific for annotations, and exists to maintain a semantic
1164 -- difference between IDs assigned to annotations and IDs assigned to
1165 -- sequences
1166 Textannot-id ::= SEQUENCE {
1167     name          VisibleString OPTIONAL ,
1168     accession VisibleString OPTIONAL ,
1169     release   VisibleString OPTIONAL ,
1170     version   INTEGER       OPTIONAL
1171 }
1172 
1173 Annot-id ::= CHOICE {
1174     local Object-id ,
1175     ncbi INTEGER ,
1176     general Dbtag,
1177     other Textannot-id
1178 }
1179 
1180 Annot-descr ::= SET OF Annotdesc
1181 
1182 Annotdesc ::= CHOICE {
1183     name VisibleString ,         -- a short name for this collection
1184     title VisibleString ,        -- a title for this collection
1185     comment VisibleString ,      -- a more extensive comment
1186     pub Pubdesc ,                -- a reference to the publication
1187     user User-object ,           -- user defined object
1188     create-date Date ,           -- date entry first created/released
1189     update-date Date ,           -- date of last update
1190     src Seq-id ,                 -- source sequence from which annot came
1191     align Align-def,             -- definition of the SeqAligns
1192     region Seq-loc }             -- all contents cover this region
1193 
1194 Align-def ::= SEQUENCE {
1195     align-type INTEGER {         -- class of align Seq-annot
1196       ref (1) ,                  -- set of alignments to the same sequence
1197       alt (2) ,                  -- set of alternate alignments of the same seqs
1198       blocks (3) ,               -- set of aligned blocks in the same seqs
1199       other (255) } ,
1200     ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
1201 
1202 Seq-annot ::= SEQUENCE {
1203     id SET OF Annot-id OPTIONAL ,
1204     db INTEGER {                 -- source of annotation
1205         genbank (1) ,
1206         embl (2) ,
1207         ddbj (3) ,
1208         pir  (4) ,
1209         sp   (5) ,
1210         bbone (6) ,
1211         pdb   (7) ,
1212         other (255) } OPTIONAL ,
1213     name VisibleString OPTIONAL ,-- source if "other" above
1214     desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
1215     data CHOICE {
1216         ftable SET OF Seq-feat ,
1217         align SET OF Seq-align ,
1218         graph SET OF Seq-graph ,
1219         ids SET OF Seq-id ,      -- used for communication between tools
1220         locs SET OF Seq-loc ,    -- used for communication between tools
1221         seq-table Seq-table } }  -- features in table form
1222 
1223 END
1224 
1225 
1226 --$Revision: 6.6 $
1227 --**********************************************************************
1228 --
1229 --  NCBI Sequence Collections
1230 --  by James Ostell, 1990
1231 --
1232 --  Version 3.0 - 1994
1233 --
1234 --**********************************************************************
1235 
1236 NCBI-Seqset DEFINITIONS ::=
1237 BEGIN
1238 
1239 EXPORTS Bioseq-set, Seq-entry;
1240 
1241 IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1242         Object-id, Dbtag, Date FROM NCBI-General;
1243 
1244 --*** Sequence Collections ********************************
1245 --*
1246 
1247 Bioseq-set ::= SEQUENCE {      -- just a collection
1248     id Object-id OPTIONAL ,
1249     coll Dbtag OPTIONAL ,          -- to identify a collection
1250     level INTEGER OPTIONAL ,       -- nesting level
1251     class ENUMERATED {
1252         not-set (0) ,
1253         nuc-prot (1) ,              -- nuc acid and coded proteins
1254         segset (2) ,                -- segmented sequence + parts
1255         conset (3) ,                -- constructed sequence + parts
1256         parts (4) ,                 -- parts for 2 or 3
1257         gibb (5) ,                  -- geninfo backbone
1258         gi (6) ,                    -- geninfo
1259         genbank (7) ,               -- converted genbank
1260         pir (8) ,                   -- converted pir
1261         pub-set (9) ,               -- all the seqs from a single publication
1262         equiv (10) ,                -- a set of equivalent maps or seqs
1263         swissprot (11) ,            -- converted SWISSPROT
1264         pdb-entry (12) ,            -- a complete PDB entry
1265         mut-set (13) ,              -- set of mutations
1266         pop-set (14) ,              -- population study
1267         phy-set (15) ,              -- phylogenetic study
1268         eco-set (16) ,              -- ecological sample study
1269         gen-prod-set (17) ,         -- genomic products, chrom+mRNA+protein
1270         wgs-set (18) ,              -- whole genome shotgun project
1271         named-annot (19) ,          -- named annotation set
1272         named-annot-prod (20) ,     -- with instantiated mRNA+protein
1273         read-set (21) ,             -- set from a single read
1274         paired-end-reads (22) ,     -- paired sequences within a read-set
1275         small-genome-set (23) ,     -- viral segments or mitochondrial minicircles
1276         other (255) } DEFAULT not-set ,
1277     release VisibleString OPTIONAL ,
1278     date Date OPTIONAL ,
1279     descr Seq-descr OPTIONAL ,
1280     seq-set SEQUENCE OF Seq-entry ,
1281     annot SET OF Seq-annot OPTIONAL }
1282 
1283 Seq-entry ::= CHOICE {
1284         seq Bioseq ,
1285         set Bioseq-set }
1286 
1287 END
1288 
1289 --$Revision: 6.0 $
1290 --  *********************************************************************
1291 --
1292 --  These are code and conversion tables for NCBI sequence codes
1293 --  ASN.1 for the sequences themselves are define in seq.asn
1294 --
1295 --  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
1296 --    and increase continuously.  So IUPAC codes, which are upper case
1297 --    letters will always have 65 0 cells before the codes begin.  This
1298 --    allows all codes to do indexed lookups for things
1299 --
1300 --  Valid names for code tables are:
1301 --    IUPACna
1302 --    IUPACaa
1303 --    IUPACeaa
1304 --    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
1305 --                   display only, not a data exchange type
1306 --    NCBI2na
1307 --    NCBI4na
1308 --    NCBI8na
1309 --    NCBI8aa
1310 --    NCBIstdaa
1311 --     probability types map to IUPAC types for display as characters
1312 
1313 NCBI-SeqCode DEFINITIONS ::=
1314 BEGIN
1315 
1316 EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
1317 
1318 Seq-code-type ::= ENUMERATED {              -- sequence representations
1319     iupacna (1) ,              -- IUPAC 1 letter nuc acid code
1320     iupacaa (2) ,              -- IUPAC 1 letter amino acid code
1321     ncbi2na (3) ,              -- 2 bit nucleic acid code
1322     ncbi4na (4) ,              -- 4 bit nucleic acid code
1323     ncbi8na (5) ,              -- 8 bit extended nucleic acid code
1324     ncbipna (6) ,              -- nucleic acid probabilities
1325     ncbi8aa (7) ,              -- 8 bit extended amino acid codes
1326     ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
1327     ncbipaa (9) ,              -- amino acid probabilities
1328     iupacaa3 (10) ,            -- 3 letter code only for display
1329     ncbistdaa (11) }           -- consecutive codes for std aas, 0-25
1330 
1331 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings 
1332     from Seq-code-type ,      -- code to map from
1333     to Seq-code-type ,        -- code to map to
1334     num INTEGER ,             -- number of rows in table
1335     start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1336     table SEQUENCE OF INTEGER }  -- table of values, in from-to order
1337 
1338 Seq-code-table ::= SEQUENCE { -- for names of coded values
1339     code Seq-code-type ,      -- name of code
1340     num INTEGER ,             -- number of rows in table
1341     one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
1342     start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1343     table SEQUENCE OF
1344         SEQUENCE {
1345             symbol VisibleString ,      -- the printed symbol or letter
1346             name VisibleString } ,      -- an explanatory name or string
1347     comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
1348 
1349 Seq-code-set ::= SEQUENCE {    -- for distribution
1350     codes SET OF Seq-code-table OPTIONAL ,
1351     maps SET OF Seq-map-table OPTIONAL }
1352 
1353 END
1354 
1355 --$Revision: 6.0 $
1356 --*********************************************************************
1357 --
1358 -- 1990 - J.Ostell
1359 -- Version 3.0 - June 1994
1360 --
1361 --*********************************************************************
1362 --*********************************************************************
1363 --
1364 --  EMBL specific data
1365 --  This block of specifications was developed by Reiner Fuchs of EMBL
1366 --  Updated by J.Ostell, 1994
1367 --
1368 --*********************************************************************
1369 
1370 EMBL-General DEFINITIONS ::=
1371 BEGIN
1372 
1373 EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
1374 
1375 IMPORTS Date, Object-id FROM NCBI-General;
1376 
1377 EMBL-dbname ::= CHOICE {
1378     code ENUMERATED {
1379         embl(0),
1380         genbank(1),
1381         ddbj(2),
1382         geninfo(3),
1383         medline(4),
1384         swissprot(5),
1385         pir(6),
1386         pdb(7),
1387         epd(8),
1388         ecd(9),
1389         tfd(10),
1390         flybase(11),
1391         prosite(12),
1392         enzyme(13),
1393         mim(14),
1394         ecoseq(15),
1395         hiv(16) ,
1396         other (255) } ,
1397     name    VisibleString }
1398 
1399 EMBL-xref ::= SEQUENCE {
1400     dbname EMBL-dbname,
1401     id SEQUENCE OF Object-id }
1402 
1403 EMBL-block ::= SEQUENCE {
1404     class ENUMERATED {
1405         not-set(0),
1406         standard(1),
1407         unannotated(2),
1408         other(255) } DEFAULT standard,
1409     div ENUMERATED {
1410         fun(0),
1411         inv(1),
1412         mam(2),
1413         org(3),
1414         phg(4),
1415         pln(5),
1416         pri(6),
1417         pro(7),
1418         rod(8),
1419         syn(9),
1420         una(10),
1421         vrl(11),
1422         vrt(12),
1423         pat(13),
1424         est(14),
1425         sts(15),
1426         other (255) } OPTIONAL,
1427     creation-date Date,
1428     update-date Date,
1429     extra-acc SEQUENCE OF VisibleString OPTIONAL,
1430     keywords SEQUENCE OF VisibleString OPTIONAL,
1431     xref SEQUENCE OF EMBL-xref OPTIONAL }
1432 
1433 END
1434 
1435 --*********************************************************************
1436 --
1437 --  SWISSPROT specific data
1438 --  This block of specifications was developed by Mark Cavanaugh of
1439 --      NCBI working with Amos Bairoch of SWISSPROT
1440 --
1441 --*********************************************************************
1442 
1443 SP-General DEFINITIONS ::=
1444 BEGIN
1445 
1446 EXPORTS SP-block;
1447 
1448 IMPORTS Date, Dbtag FROM NCBI-General
1449         Seq-id FROM NCBI-Seqloc;
1450 
1451 SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
1452     class ENUMERATED {
1453         not-set (0) ,
1454         standard (1) ,      -- conforms to all SWISSPROT checks
1455         prelim (2) ,        -- only seq and biblio checked
1456         other (255) } ,
1457     extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
1458     imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
1459     plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
1460     seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
1461     dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
1462     keywords SET OF VisibleString OPTIONAL , -- keywords
1463     created Date OPTIONAL ,         -- creation date
1464     sequpd Date OPTIONAL ,          -- sequence update
1465     annotupd Date OPTIONAL }        -- annotation update
1466 
1467 END
1468 
1469 --*********************************************************************
1470 --
1471 --  PIR specific data
1472 --  This block of specifications was developed by Jim Ostell of
1473 --      NCBI
1474 --
1475 --*********************************************************************
1476 
1477 PIR-General DEFINITIONS ::=
1478 BEGIN
1479 
1480 EXPORTS PIR-block;
1481 
1482 IMPORTS Seq-id FROM NCBI-Seqloc;
1483 
1484 PIR-block ::= SEQUENCE {          -- PIR specific descriptions
1485     had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
1486     host VisibleString OPTIONAL ,
1487     source VisibleString OPTIONAL ,     -- source line
1488     summary VisibleString OPTIONAL ,
1489     genetic VisibleString OPTIONAL ,
1490     includes VisibleString OPTIONAL ,
1491     placement VisibleString OPTIONAL ,
1492     superfamily VisibleString OPTIONAL ,
1493     keywords SEQUENCE OF VisibleString OPTIONAL ,
1494     cross-reference VisibleString OPTIONAL ,
1495     date VisibleString OPTIONAL ,
1496     seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
1497     seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences
1498 
1499 END
1500 
1501 --*********************************************************************
1502 --
1503 --  GenBank specific data
1504 --  This block of specifications was developed by Jim Ostell of
1505 --      NCBI
1506 --
1507 --*********************************************************************
1508 
1509 GenBank-General DEFINITIONS ::=
1510 BEGIN
1511 
1512 EXPORTS GB-block;
1513 
1514 IMPORTS Date FROM NCBI-General;
1515 
1516 GB-block ::= SEQUENCE {          -- GenBank specific descriptions
1517     extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
1518     source VisibleString OPTIONAL ,     -- source line
1519     keywords SEQUENCE OF VisibleString OPTIONAL ,
1520     origin VisibleString OPTIONAL,
1521     date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
1522     entry-date Date OPTIONAL ,          -- replaces date
1523     div VisibleString OPTIONAL ,        -- GenBank division
1524     taxonomy VisibleString OPTIONAL }   -- continuation line of organism
1525 
1526 END
1527 
1528 --**********************************************************************
1529 -- PRF specific definition
1530 --    PRF is a protein sequence database crated and maintained by
1531 --    Protein Research Foundation, Minoo-city, Osaka, Japan.
1532 --
1533 --    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1534 --            Kyoto Univ., Japan
1535 --
1536 --**********************************************************************
1537 
1538 PRF-General DEFINITIONS ::=
1539 BEGIN
1540 
1541 EXPORTS PRF-block;
1542 
1543 PRF-block ::= SEQUENCE {
1544       extra-src       PRF-ExtraSrc OPTIONAL,
1545       keywords        SEQUENCE OF VisibleString OPTIONAL
1546 }
1547 
1548 PRF-ExtraSrc ::= SEQUENCE {
1549       host    VisibleString OPTIONAL,
1550       part    VisibleString OPTIONAL,
1551       state   VisibleString OPTIONAL,
1552       strain  VisibleString OPTIONAL,
1553       taxon   VisibleString OPTIONAL
1554 }
1555 
1556 END
1557 
1558 --*********************************************************************
1559 --
1560 --  PDB specific data
1561 --  This block of specifications was developed by Jim Ostell and
1562 --      Steve Bryant of NCBI
1563 --
1564 --*********************************************************************
1565 
1566 PDB-General DEFINITIONS ::=
1567 BEGIN
1568 
1569 EXPORTS PDB-block;
1570 
1571 IMPORTS Date FROM NCBI-General;
1572 
1573 PDB-block ::= SEQUENCE {          -- PDB specific descriptions
1574     deposition Date ,         -- deposition date  month,year
1575     class VisibleString ,
1576     compound SEQUENCE OF VisibleString ,
1577     source SEQUENCE OF VisibleString ,
1578     exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
1579     replace PDB-replace OPTIONAL } -- replacement history
1580 
1581 PDB-replace ::= SEQUENCE {
1582     date Date ,
1583     ids SEQUENCE OF VisibleString }   -- entry ids replace by this one
1584 
1585 END
1586 
1587 --$Revision: 6.50 $
1588 --**********************************************************************
1589 --
1590 --  NCBI Sequence Feature elements
1591 --  by James Ostell, 1990
1592 --  Version 3.0 - June 1994
1593 --
1594 --**********************************************************************
1595 
1596 NCBI-Seqfeat DEFINITIONS ::=
1597 BEGIN
1598 
1599 EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;
1600 
1601 IMPORTS Gene-ref FROM NCBI-Gene
1602         Prot-ref FROM NCBI-Protein
1603         Org-ref FROM NCBI-Organism
1604         Variation-ref FROM NCBI-Variation
1605         BioSource FROM NCBI-BioSource
1606         RNA-ref FROM NCBI-RNA
1607         Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
1608         Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1609         Rsite-ref FROM NCBI-Rsite
1610         Txinit FROM NCBI-TxInit
1611         DOI, PubMedId FROM NCBI-Biblio
1612         Pub-set FROM NCBI-Pub
1613         Object-id, Dbtag, User-object FROM NCBI-General;
1614 
1615 --*** Feature identifiers ********************************
1616 --*
1617 
1618 Feat-id ::= CHOICE {
1619     gibb INTEGER ,            -- geninfo backbone
1620     giim Giimport-id ,        -- geninfo import
1621     local Object-id ,         -- for local software use
1622     general Dbtag }           -- for use by various databases
1623 
1624 --*** Seq-feat *******************************************
1625 --*  sequence feature generalization
1626 
1627 Seq-feat ::= SEQUENCE {
1628     id Feat-id OPTIONAL ,
1629     data SeqFeatData ,           -- the specific data
1630     partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
1631     except BOOLEAN OPTIONAL ,     -- something funny about this?
1632     comment VisibleString OPTIONAL ,
1633     product Seq-loc OPTIONAL ,    -- product of process
1634     location Seq-loc ,            -- feature made from
1635     qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
1636     title VisibleString OPTIONAL ,   -- for user defined label
1637     ext User-object OPTIONAL ,    -- user defined structure extension
1638     cit Pub-set OPTIONAL ,        -- citations for this feature
1639     exp-ev ENUMERATED {           -- evidence for existence of feature
1640         experimental (1) ,        -- any reasonable experimental check
1641         not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1642     xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
1643     dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
1644     pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
1645     except-text VisibleString OPTIONAL , -- explain if except=TRUE
1646     ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
1647     exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
1648     support SeqFeatSupport OPTIONAL  -- will replace /experiment, /inference, model-evidence
1649 }
1650 
1651 SeqFeatData ::= CHOICE {
1652     gene Gene-ref ,
1653     org Org-ref ,
1654     cdregion Cdregion ,
1655     prot Prot-ref ,
1656     rna RNA-ref ,
1657     pub Pubdesc ,              -- publication applies to this seq
1658     seq Seq-loc ,              -- to annotate origin from another seq
1659     imp Imp-feat ,
1660     region VisibleString,      -- named region (globin locus)
1661     comment NULL ,             -- just a comment
1662     bond ENUMERATED {
1663         disulfide (1) ,
1664         thiolester (2) ,
1665         xlink (3) ,
1666         thioether (4) ,
1667         other (255) } ,
1668     site ENUMERATED {
1669         active (1) ,
1670         binding (2) ,
1671         cleavage (3) ,
1672         inhibit (4) ,
1673         modified (5),
1674         glycosylation (6) ,
1675         myristoylation (7) ,
1676         mutagenized (8) ,
1677         metal-binding (9) ,
1678         phosphorylation (10) ,
1679         acetylation (11) ,
1680         amidation (12) ,
1681         methylation (13) ,
1682         hydroxylation (14) ,
1683         sulfatation (15) ,
1684         oxidative-deamination (16) ,
1685         pyrrolidone-carboxylic-acid (17) ,
1686         gamma-carboxyglutamic-acid (18) ,
1687         blocked (19) ,
1688         lipid-binding (20) ,
1689         np-binding (21) ,
1690         dna-binding (22) ,
1691         signal-peptide (23) ,
1692         transit-peptide (24) ,
1693         transmembrane-region (25) ,
1694         nitrosylation (26) ,
1695         other (255) } ,
1696     rsite Rsite-ref ,       -- restriction site  (for maps really)
1697     user User-object ,      -- user defined structure
1698     txinit Txinit ,         -- transcription initiation
1699     num Numbering ,         -- a numbering system
1700     psec-str ENUMERATED {   -- protein secondary structure
1701         helix (1) ,         -- any helix
1702         sheet (2) ,         -- beta sheet
1703         turn  (3) } ,       -- beta or gamma turn
1704     non-std-residue VisibleString ,  -- non-standard residue here in seq
1705     het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
1706     biosrc BioSource,
1707     clone Clone-ref,
1708     variation Variation-ref
1709 }
1710 
1711 SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
1712     id Feat-id OPTIONAL ,        -- the feature copied
1713     data SeqFeatData OPTIONAL }  -- the specific data
1714 
1715 SeqFeatSupport ::= SEQUENCE {
1716   experiment SET OF ExperimentSupport OPTIONAL ,
1717   inference SET OF InferenceSupport OPTIONAL ,
1718   model-evidence SET OF ModelEvidenceSupport OPTIONAL
1719 }
1720 
1721 EvidenceCategory ::= INTEGER {
1722   not-set (0) ,
1723   coordinates (1) ,
1724   description (2) ,
1725   existence (3)
1726 }
1727 
1728 ExperimentSupport ::= SEQUENCE {
1729   category EvidenceCategory OPTIONAL ,
1730   explanation VisibleString ,
1731   pmids SET OF PubMedId OPTIONAL ,
1732   dois SET OF DOI OPTIONAL
1733 }
1734 
1735 Program-id ::= SEQUENCE {
1736   name VisibleString ,
1737   version VisibleString OPTIONAL
1738 }
1739 
1740 EvidenceBasis ::= SEQUENCE {
1741   programs SET OF Program-id OPTIONAL ,
1742   accessions SET OF Seq-id OPTIONAL
1743 }
1744 
1745 InferenceSupport ::= SEQUENCE {
1746   category EvidenceCategory OPTIONAL ,
1747   type INTEGER {
1748     not-set (0) ,
1749     similar-to-sequence (1) ,
1750     similar-to-aa (2) ,
1751     similar-to-dna (3) ,
1752     similar-to-rna (4) ,
1753     similar-to-mrna (5) ,
1754     similiar-to-est (6) ,
1755     similar-to-other-rna (7) ,
1756     profile (8) ,
1757     nucleotide-motif (9) ,
1758     protein-motif (10) ,
1759     ab-initio-prediction (11) ,
1760     alignment (12) ,
1761     other (255)
1762   } DEFAULT not-set ,
1763   other-type VisibleString OPTIONAL ,
1764   same-species BOOLEAN DEFAULT FALSE ,
1765   basis EvidenceBasis ,
1766   pmids SET OF PubMedId OPTIONAL ,
1767   dois SET OF DOI OPTIONAL
1768 }
1769 
1770 ModelEvidenceItem ::= SEQUENCE {
1771   id Seq-id ,
1772   exon-count INTEGER OPTIONAL ,
1773   exon-length INTEGER OPTIONAL ,
1774   full-length BOOLEAN DEFAULT FALSE ,
1775   supports-all-exon-combo BOOLEAN DEFAULT FALSE
1776 }
1777 
1778 ModelEvidenceSupport ::= SEQUENCE {
1779   method VisibleString OPTIONAL ,
1780   mrna SET OF ModelEvidenceItem OPTIONAL ,
1781   est SET OF ModelEvidenceItem OPTIONAL ,
1782   protein SET OF ModelEvidenceItem OPTIONAL ,
1783   identification Seq-id OPTIONAL ,
1784   dbxref SET OF Dbtag OPTIONAL ,
1785   exon-count INTEGER OPTIONAL ,
1786   exon-length INTEGER OPTIONAL ,
1787   full-length BOOLEAN DEFAULT FALSE ,
1788   supports-all-exon-combo BOOLEAN DEFAULT FALSE
1789 }
1790 
1791 --*** CdRegion ***********************************************
1792 --*
1793 --*  Instructions to translate from a nucleic acid to a peptide
1794 --*    conflict means it's supposed to translate but doesn't
1795 --*
1796 
1797 
1798 Cdregion ::= SEQUENCE {
1799     orf BOOLEAN OPTIONAL ,             -- just an ORF ?
1800     frame ENUMERATED {
1801         not-set (0) ,                  -- not set, code uses one
1802         one (1) ,
1803         two (2) ,
1804         three (3) } DEFAULT not-set ,      -- reading frame
1805     conflict BOOLEAN OPTIONAL ,        -- conflict
1806     gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
1807     mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
1808     code Genetic-code OPTIONAL ,       -- genetic code used
1809     code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
1810     stops INTEGER OPTIONAL }           -- number of stop codons on above
1811 
1812                     -- each code is 64 cells long, in the order where
1813                     -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1814                     -- NOTE: this order does NOT correspond to a Seq-data
1815                     -- encoding.  It is "natural" to codon usage instead.
1816                     -- the value in each cell is the AA coded for
1817                     -- start= AA coded only if first in peptide
1818                     --   in start array, if codon is not a legitimate start
1819                     --   codon, that cell will have the "gap" symbol for
1820                     --   that alphabet.  Otherwise it will have the AA
1821                     --   encoded when that codon is used at the start.
1822 
1823 Genetic-code ::= SET OF CHOICE {
1824     name VisibleString ,               -- name of a code
1825     id INTEGER ,                       -- id in dbase
1826     ncbieaa VisibleString ,            -- indexed to IUPAC extended
1827     ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
1828     ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
1829     sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
1830     sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
1831     sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa
1832 
1833 Code-break ::= SEQUENCE {              -- specific codon exceptions
1834     loc Seq-loc ,                      -- location of exception
1835     aa CHOICE {                        -- the amino acid
1836         ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
1837         ncbi8aa INTEGER ,              -- NCBI8aa code
1838         ncbistdaa INTEGER } }           -- NCBIstdaa code
1839 
1840 Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes
1841 
1842 --*** Import ***********************************************
1843 --*
1844 --*  Features imported from other databases
1845 --*
1846 
1847 Imp-feat ::= SEQUENCE {
1848     key VisibleString ,
1849     loc VisibleString OPTIONAL ,         -- original location string
1850     descr VisibleString OPTIONAL }       -- text description
1851 
1852 Gb-qual ::= SEQUENCE {
1853     qual VisibleString ,
1854     val VisibleString }
1855 
1856 
1857 --*** Clone-ref ***********************************************
1858 --*
1859 --*  Specification of clone features
1860 --*
1861 
1862 Clone-ref ::= SEQUENCE {
1863     name VisibleString,        -- Official clone symbol
1864     library VisibleString OPTIONAL,     -- Library name
1865 
1866     concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1867     unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1868     placement-method INTEGER {
1869         end-seq (0),           -- Clone placed by end sequence
1870         insert-alignment (1),  -- Clone placed by insert alignment
1871         sts (2),               -- Clone placed by STS
1872         fish (3),
1873         fingerprint (4),
1874         end-seq-insert-alignment (5), -- combined end-seq and insert align
1875         external (253),           -- Placement provided externally
1876         curated (254),            -- Human placed or approved
1877         other (255)
1878     } OPTIONAL,
1879     clone-seq Clone-seq-set OPTIONAL
1880 }
1881 
1882 Clone-seq-set ::= SET OF Clone-seq
1883 
1884 
1885 Clone-seq ::= SEQUENCE {
1886     type INTEGER {
1887         insert (0),
1888         end (1),
1889         other (255)
1890     },
1891     confidence INTEGER {
1892         multiple (0),        -- Multiple hits
1893         na (1),              -- Unspecified
1894         nohit-rep (2),       -- No hits, end flagged repetitive
1895         nohitnorep (3),      -- No hits, end not flagged repetitive
1896         other-chrm (4),      -- Hit on different chromosome
1897         unique (5),
1898         virtual (6),         -- Virtual (hasn't been sequenced)
1899         multiple-rep (7),    -- Multiple hits, end flagged repetitive
1900         multiplenorep (8),   -- Multiple hits, end not flagged repetitive
1901         no-hit (9),          -- No hits
1902         other (255)
1903     } OPTIONAL,
1904     location Seq-loc,        -- location on sequence
1905     seq Seq-loc OPTIONAL,    -- clone sequence location
1906     align-id Dbtag OPTIONAL, -- internal alignment identifier
1907     support INTEGER {
1908         prototype (0),       -- sequence used to place clone
1909         supporting (1),      -- sequence supports placement
1910         supports-other(2),   -- supports a different placement
1911         non-supporting (3)   -- does not support any placement
1912     } OPTIONAL
1913 }
1914 
1915 END
1916 
1917 
1918 --*** Variation-ref ***********************************************
1919 --*
1920 --*  Specification of variation features
1921 --*
1922 
1923 NCBI-Variation DEFINITIONS ::=
1924 BEGIN
1925 
1926 EXPORTS Variation-ref, Variation-inst, VariantProperties,
1927         Population-data, Phenotype;
1928 
1929 IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
1930         Seq-literal FROM NCBI-Sequence
1931         SubSource FROM NCBI-BioSource
1932         Seq-loc FROM NCBI-Seqloc
1933         Pub FROM NCBI-Pub;
1934 
1935 
1936 -- --------------------------------------------------------------------------
1937 -- Historically, the dbSNP definitions document data structures used in the
1938 -- processing and annotation of variations by the dbSNP group.  The intention
1939 -- is to provide information to clients that reflect internal information
1940 -- produced during the mapping of SNPs
1941 -- --------------------------------------------------------------------------
1942 
1943 VariantProperties ::= SEQUENCE {
1944     version INTEGER,
1945 
1946     -- NOTE:
1947     -- The format for most of these values is as an integer
1948     -- Unless otherwise noted, these integers represent a bitwise OR (= simple
1949     -- sum) of the possible values, and as such, these values represent the
1950     -- specific bit flags that may be set for each of the possible attributes
1951     -- here.
1952 
1953     resource-link INTEGER {
1954         preserved        (1), -- Clinical, Pubmed, Cited, (0x01)
1955         provisional      (2), -- Provisional Third Party Annotations (0x02)
1956         has3D            (4), -- Has 3D strcture SNP3D table (0x04)
1957         submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
1958         clinical        (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
1959         genotypeKit     (32)  -- Marker exists on high density genotyping kit
1960                               -- (0x20)
1961     } OPTIONAL,
1962 
1963     gene-location INTEGER {
1964         in-gene         (1), -- Sequence intervals covered by a gene ID but not
1965                              -- having an aligned transcript (0x01)
1966         near-gene-5     (2), -- Within 2kb of the 5' end of a gene feature
1967         near-gene-3     (4), -- Within 0.5kb of the 3' end of a gene feature
1968         intron          (8), -- In Intron (0x08)
1969         donor          (16), -- In donor splice-site (0x10)
1970         acceptor       (32), -- In acceptor splice-site (0x20)
1971         utr-5          (64), -- In 5' UTR (0x40)
1972         utr-3         (128), -- In 3' UTR (0x80)
1973         in-start-codon(256), -- the variant is observed in a start codon
1974                              -- (0x100)
1975         in-stop-codon (512), -- the variant is observed in a stop codon
1976                              -- (0x200)
1977         intergenic   (1024), -- variant located between genes (0x400)
1978         conserved-noncoding(2048) -- variant is located in a conserved
1979                                   -- non-coding region (0x800)
1980     } OPTIONAL,
1981 
1982     effect INTEGER {
1983         no-change      (0), -- known to cause no functional changes
1984                             -- since 0 does not combine with any other bit
1985                             -- value, 'no-change' specifically implies that
1986                             -- there are no consequences
1987         synonymous     (1), -- one allele in the set does not change the encoded
1988                             -- amino acid (0x1)
1989         nonsense       (2), -- one allele in the set changes to STOP codon
1990                             -- (TER).  (0x2)
1991         missense       (4), -- one allele in the set changes protein peptide
1992                             -- (0x4)
1993         frameshift     (8), -- one allele in the set changes all downstream
1994                             -- amino acids (0x8)
1995 
1996         up-regulator  (16), -- the variant causes increased transcription
1997                             -- (0x10)
1998         down-regulator(32), -- the variant causes decreased transcription
1999                             -- (0x20)
2000         methylation   (64),
2001         stop-gain     (128), -- reference codon is not stop codon, but the snp
2002                              -- variant allele changes the codon to a
2003                              -- terminating codon.
2004         stop-loss     (256)  -- reverse of STOP-GAIN: reference codon is a
2005                              -- stop codon, but a snp variant allele changes
2006                              -- the codon to a non-terminating codon.
2007     } OPTIONAL,
2008 
2009     mapping INTEGER {
2010         has-other-snp         (1), -- Another SNP has the same mapped positions
2011                                    -- on reference assembly (0x01)
2012         has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
2013                                    -- chromosomes on different assemblies (0x02)
2014         is-assembly-specific  (4)  -- Only maps to 1 assembly (0x04)
2015     } OPTIONAL,
2016 
2017     -- map-weight captures specificity of placement
2018     -- NOTE: This is *NOT* a bitfield
2019     map-weight INTEGER {
2020         is-uniquely-placed(1),
2021         placed-twice-on-same-chrom(2),
2022         placed-twice-on-diff-chrom(3),
2023         many-placements(10)
2024     } OPTIONAL,
2025 
2026     frequency-based-validation INTEGER {
2027         is-mutation       (1), -- low frequency variation that is cited in
2028                                -- journal or other reputable sources (0x01)
2029         above-5pct-all    (2), -- >5% minor allele freq in each and all
2030                                -- populations (0x02)
2031         above-5pct-1plus  (4), -- >5% minor allele freq in 1+ populations (0x04)
2032         validated         (8), -- Bit is set if the variant has a minor allele
2033                                -- observed in two or more separate chromosomes
2034         above-1pct-all   (16), -- >1% minor allele freq in each and all
2035                                -- populations (0x10)
2036         above-1pct-1plus (32)  -- >1% minor allele freq in 1+ populations (0x20)
2037     } OPTIONAL,
2038 
2039     genotype INTEGER {
2040         in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
2041         has-genotypes    (2)  -- SNP has individual genotype (0x02)
2042     } OPTIONAL,
2043 
2044     -- project IDs are IDs from BioProjects
2045     -- in order to report information about project relationships, we
2046     -- require projects to be registered
2047     -- This field in many ways duplicates dbxrefs; however, the
2048     -- intention of this field is to more adequately reflect
2049     -- ownership and data source
2050     --
2051     -- 11/9/2010: DO NOT USE
2052     -- This field was changed in the spec in a breaking way; using it will
2053     -- break clients.  We are officially suppressing / abandoning this field.
2054     -- Clients who need to use this should instead place the data in
2055     -- Seq-feat.dbxref, using the db name 'BioProject'
2056     project-data SET OF INTEGER OPTIONAL,
2057 
2058     quality-check INTEGER {
2059         contig-allele-missing   (1), -- Reference sequence allele at the mapped
2060                                      -- position is not present in the SNP
2061                                      -- allele list, adjusted for orientation
2062                                      -- (0x01)
2063         withdrawn-by-submitter  (2), -- One member SS is withdrawn by submitter
2064                                      -- (0x02)
2065         non-overlapping-alleles (4), -- RS set has 2+ alleles from different
2066                                      -- submissions and these sets share no
2067                                      -- alleles in common (0x04)
2068         strain-specific         (8), -- Straing specific fixed difference (0x08)
2069         genotype-conflict      (16)  -- Has Genotype Conflict (0x10)
2070     } OPTIONAL,
2071 
2072     confidence INTEGER {
2073         unknown         (0),
2074         likely-artifact (1),
2075         other           (255)
2076     } OPTIONAL,
2077 
2078     -- has this variant been validated?
2079     -- While a boolean flag offers no subtle distinctions of validation
2080     -- methods, occasionally it is only known as a single boolean value
2081     -- NOTE: this flag is redundant and should be omitted if more comprehensive
2082     -- validation information is present
2083     other-validation BOOLEAN OPTIONAL,
2084 
2085     -- origin of this allele, if known
2086     -- note that these are powers-of-two, and represent bits; thus, we can
2087     -- represent more than one state simultaneously through a bitwise OR
2088     allele-origin INTEGER {
2089         unknown         (0),
2090         germline        (1),
2091         somatic         (2),
2092         inherited       (4),
2093         paternal        (8),
2094         maternal        (16),
2095         de-novo         (32),
2096         biparental      (64),
2097         uniparental     (128),
2098         not-tested      (256),
2099         tested-inconclusive (512),
2100         not-reported   (1024),
2101 
2102         -- stopper - 2^31
2103         other           (1073741824)
2104     } OPTIONAL,
2105 
2106     -- observed allele state, if known
2107     -- NOTE: THIS IS NOT A BITFIELD!
2108     allele-state INTEGER {
2109         unknown         (0),
2110         homozygous      (1),
2111         heterozygous    (2),
2112         hemizygous      (3),
2113         nullizygous     (4),
2114         other           (255)
2115     } OPTIONAL,
2116 
2117     -- NOTE:
2118     -- 'allele-frequency' here refers to the minor allele frequency of the
2119     -- default population
2120     allele-frequency REAL OPTIONAL,
2121 
2122     -- is this variant the ancestral allele?
2123     is-ancestral-allele BOOLEAN OPTIONAL
2124 }
2125 
2126 Phenotype ::= SEQUENCE {
2127     source VisibleString OPTIONAL,
2128     term VisibleString OPTIONAL,
2129     xref SET OF Dbtag OPTIONAL,
2130 
2131     -- does this variant have known clinical significance?
2132     clinical-significance INTEGER {
2133         unknown                 (0),
2134         untested                (1),
2135         non-pathogenic          (2),
2136         probable-non-pathogenic (3),
2137         probable-pathogenic     (4),
2138         pathogenic              (5),
2139         drug-response           (6),
2140         histocompatibility      (7),
2141         other                   (255)
2142     } OPTIONAL
2143 }
2144 
2145 Population-data ::= SEQUENCE {
2146     -- assayed population (e.g. HAPMAP-CEU)
2147     population VisibleString,
2148     genotype-frequency REAL OPTIONAL,
2149     chromosomes-tested INTEGER OPTIONAL,
2150     sample-ids SET OF Object-id OPTIONAL,
2151     allele-frequency REAL OPTIONAL,
2152 
2153     -- This field is an explicit bit-field
2154     -- Valid values should be a bitwise combination (= simple sum)
2155     -- of any of the values below
2156     flags INTEGER {
2157         is-default-population   (1),
2158         is-minor-allele         (2),
2159         is-rare-allele          (4)
2160     } OPTIONAL
2161 }
2162 
2163 Ext-loc ::= SEQUENCE {
2164     id Object-id,
2165     location Seq-loc
2166 }
2167 
2168 
2169 Variation-ref ::= SEQUENCE {
2170     -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
2171     -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
2172     --
2173     -- we relate three kinds of IDs here:
2174     --  - our current object's id
2175     --  - the id of this object's parent, if it exists
2176     --  - the sample ID that this item originates from
2177     id        Dbtag OPTIONAL,
2178     parent-id Dbtag OPTIONAL,
2179     sample-id Object-id OPTIONAL,
2180     other-ids SET OF Dbtag OPTIONAL,
2181 
2182     -- names and synonyms
2183     -- some variants have well-known canonical names and possible accepted
2184     -- synonyms
2185     name VisibleString OPTIONAL,
2186     synonyms SET OF VisibleString OPTIONAL,
2187 
2188     -- tag for comment and descriptions
2189     description VisibleString OPTIONAL,
2190 
2191     -- phenotype
2192     phenotype SET OF Phenotype OPTIONAL,
2193 
2194     -- sequencing / acuisition method
2195     method SET OF INTEGER {
2196         unknown             (0),
2197         bac-acgh            (1),
2198         computational       (2),
2199         curated             (3),
2200         digital-array       (4),
2201         expression-array    (5),
2202         fish                (6),
2203         flanking-sequence   (7),
2204         maph                (8),
2205         mcd-analysis        (9),
2206         mlpa                (10),
2207         oea-assembly        (11),
2208         oligo-acgh          (12),
2209         paired-end          (13),
2210         pcr                 (14),
2211         qpcr                (15),
2212         read-depth          (16),
2213         roma                (17),
2214         rt-pcr              (18),
2215         sage                (19),
2216         sequence-alignment  (20),
2217         sequencing          (21),
2218         snp-array           (22),
2219         snp-genoytyping     (23),
2220         southern            (24),
2221         western             (25),
2222         optical-mapping     (26),
2223 
2224         other               (255)
2225     } OPTIONAL,
2226 
2227     -- Note about SNP representation and pretinent fields: allele-frequency,
2228     -- population, quality-codes:
2229     -- The case of multiple alleles for a SNP would be described by
2230     -- parent-feature of type Variation-set.diff-alleles, where the child
2231     -- features of type Variation-inst, all at the same location, would
2232     -- describe individual alleles.
2233 
2234     -- population data
2235     -- DEPRECATED - do not use
2236     population-data SET OF Population-data OPTIONAL,
2237 
2238     -- variant properties bit fields
2239     variant-prop VariantProperties OPTIONAL,
2240 
2241     -- has this variant been validated?
2242     -- DEPRECATED: new field = VariantProperties.other-validation
2243     validated BOOLEAN OPTIONAL,
2244 
2245     -- link-outs to GeneTests database
2246     -- DEPRECATED - do not use
2247     clinical-test SET OF Dbtag OPTIONAL,
2248 
2249     -- origin of this allele, if known
2250     -- note that these are powers-of-two, and represent bits; thus, we can
2251     -- represent more than one state simultaneously through a bitwise OR
2252     -- DEPRECATED: new field = VariantProperties.allele-origin
2253     allele-origin INTEGER {
2254         unknown         (0),
2255         germline        (1),
2256         somatic         (2),
2257         inherited       (4),
2258         paternal        (8),
2259         maternal        (16),
2260         de-novo         (32),
2261         biparental      (64),
2262         uniparental     (128),
2263         not-tested      (256),
2264         tested-inconclusive (512),
2265 
2266         -- stopper - 2^31
2267         other           (1073741824)
2268     } OPTIONAL,
2269 
2270     -- observed allele state, if known
2271     -- DEPRECATED: new field = VariantProperties.allele-state
2272     allele-state INTEGER {
2273         unknown         (0),
2274         homozygous      (1),
2275         heterozygous    (2),
2276         hemizygous      (3),
2277         nullizygous     (4),
2278         other           (255)
2279     } OPTIONAL,
2280 
2281     -- NOTE:
2282     -- 'allele-frequency' here refers to the minor allele frequency of the
2283     -- default population
2284     -- DEPRECATED: new field = VariantProperties.allele-frequency
2285     allele-frequency REAL OPTIONAL,
2286 
2287     -- is this variant the ancestral allele?
2288     -- DEPRECATED: new field = VariantProperties.is-ancestral-allele
2289     is-ancestral-allele BOOLEAN OPTIONAL,
2290 
2291     -- publication support.
2292     -- Note: made this pub instead of pub-equiv, since
2293     -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
2294     -- Pub is more often used as top-level container
2295     -- DEPRECATED - do not use; use Seq-feat.dbxref instead
2296     pub Pub OPTIONAL,
2297 
2298     data CHOICE {
2299         unknown NULL,
2300         note    VisibleString, --free-form
2301         uniparental-disomy NULL,
2302 
2303         -- actual sequence-edit at feat.location
2304         instance        Variation-inst,
2305 
2306         -- Set of related Variations.
2307         -- Location of the set equals to the union of member locations
2308         set SEQUENCE {
2309             type INTEGER {
2310                 unknown     (0),
2311                 compound    (1), -- complex change at the same location on the
2312                                  -- same molecule
2313                 products    (2), -- different products arising from the same
2314                                  -- variation in a precursor, e.g. r.[13g>a,
2315                                  -- 13_88del]
2316                 haplotype   (3), -- changes on the same allele, e.g
2317                                  -- r.[13g>a;15u>c]
2318                 genotype    (4), -- changes on different alleles in the same
2319                                  -- genotype, e.g. g.[476C>T]+[476C>T]
2320                 mosaic      (5), -- different genotypes in the same individual
2321                 individual  (6), -- same organism; allele relationship unknown,
2322                                  -- e.g. g.[476C>T(+)183G>C]
2323                 population  (7), -- population
2324                 alleles     (8), -- set represents a set of observed alleles
2325                 package     (9), -- set represents a package of observations at
2326                                  -- a given location, generally containing
2327                                  -- asserted + reference
2328                 other       (255)
2329             },
2330             variations SET OF Variation-ref,
2331             name  VisibleString OPTIONAL
2332         },
2333 
2334         -- variant is a complex and undescribed change at the location
2335         -- This type of variant is known to occur in dbVar submissions
2336         complex NULL
2337     },
2338 
2339     consequence SET OF CHOICE {
2340         unknown     NULL,
2341         splicing    NULL, --some effect on splicing
2342         note        VisibleString,  --freeform
2343 
2344         -- Describe resulting variation in the product, e.g. missense,
2345         -- nonsense, silent, neutral, etc in a protein, that arises from
2346         -- THIS variation.
2347         variation   Variation-ref,
2348 
2349         -- see http://www.hgvs.org/mutnomen/recs-prot.html
2350         frameshift SEQUENCE {
2351             phase INTEGER OPTIONAL,
2352             x-length INTEGER OPTIONAL
2353         },
2354 
2355         loss-of-heterozygosity SEQUENCE {
2356             -- In germline comparison, it will be reference genome assembly
2357             -- (default) or reference/normal population. In somatic mutation,
2358             -- it will be a name of the normal tissue.
2359             reference VisibleString OPTIONAL,
2360 
2361             -- Name of the testing subject type or the testing tissue.
2362             test VisibleString OPTIONAL
2363         }
2364     } OPTIONAL,
2365 
2366     -- Observed location, if different from the parent set or feature.location.
2367     -- DEPRECATED - do not use
2368     location        Seq-loc OPTIONAL,
2369 
2370     -- reference other locs, e.g. mapped source
2371     -- DEPRECATED - do not use
2372     ext-locs SET OF Ext-loc OPTIONAL,
2373 
2374     -- DEPRECATED - do not use; use Seq-feat.exts instead
2375     ext             User-object OPTIONAL,
2376 
2377     somatic-origin SET OF SEQUENCE {
2378         -- description of the somatic origin itself
2379         source SubSource OPTIONAL,
2380         -- condition related to this origin's type
2381         condition SEQUENCE {
2382             description VisibleString OPTIONAL,
2383             -- reference to BioTerm / other descriptive database
2384             object-id SET OF Dbtag OPTIONAL
2385         } OPTIONAL
2386     } OPTIONAL
2387 
2388 }
2389 
2390 
2391 Delta-item ::= SEQUENCE {
2392     seq CHOICE {
2393         literal Seq-literal,
2394         loc Seq-loc,
2395         this NULL --same location as variation-ref itself
2396     } OPTIONAL,
2397 
2398     -- Multiplier allows representing a tandem, e.g.  ATATAT as AT*3
2399     -- This allows describing CNV/SSR where delta=self  with a
2400     -- multiplier which specifies the count of the repeat unit.
2401 
2402     multiplier          INTEGER OPTIONAL, --assumed 1 if not specified.
2403     multiplier-fuzz     Int-fuzz OPTIONAL,
2404 
2405     action INTEGER {
2406 
2407         -- replace len(seq) positions starting with location.start with seq
2408         morph      (0),
2409 
2410         -- go downstream by distance specified by multiplier (upstream if < 0),
2411         -- in genomic context.
2412         offset     (1),
2413 
2414         -- excise sequence at location
2415         -- if multiplier is specified, delete len(location)*multiplier
2416         -- positions downstream
2417         del-at     (2),
2418 
2419         -- insert seq before the location.start
2420         ins-before (3)
2421 
2422     } DEFAULT morph
2423 }
2424 
2425 
2426 -- Variation instance
2427 Variation-inst ::= SEQUENCE {
2428     type INTEGER {
2429         unknown         (0),    -- delta=[]
2430         identity        (1),    -- delta=[]
2431         inv             (2),    -- delta=[del, ins.seq=
2432                                 -- RevComp(variation-location)]
2433         snv             (3),    -- delta=[morph of length 1]
2434                                 -- NOTE: this is snV not snP; the latter
2435                                 -- requires frequency-based validation to be
2436                                 -- established in VariantProperties
2437                                 -- the strict definition of SNP is an SNV with
2438                                 -- an established population frequency of at
2439                                 -- least 1% in at least 1 popuplation
2440         mnp             (4),    -- delta=[morph of length >1]
2441         delins          (5),    -- delta=[del, ins]
2442         del             (6),    -- delta=[del]
2443         ins             (7),    -- delta=[ins]
2444         microsatellite  (8),    -- delta=[del, ins.seq= repeat-unit with fuzzy
2445                                 -- multiplier]
2446                                 -- variation-location is the microsat expansion
2447                                 -- on the sequence
2448         transposon      (9),    -- delta=[del, ins.seq= known donor or 'this']
2449                                 -- variation-location is equiv of transposon
2450                                 -- locs.
2451         cnv             (10),   -- delta=[del, ins= 'this' with fuzzy
2452                                 -- multiplier]
2453         direct-copy     (11),   -- delta=[ins.seq= upstream location on the
2454                                 -- same strand]
2455         rev-direct-copy (12),   -- delta=[ins.seq= downstream location on the
2456                                 -- same strand]
2457         inverted-copy   (13),   -- delta=[ins.seq= upstream location on the
2458                                 -- opposite strand]
2459         everted-copy    (14),   -- delta=[ins.seq= downstream location on the
2460                                 -- opposite strand]
2461         translocation   (15),   -- delta=like delins
2462         prot-missense   (16),   -- delta=[morph of length 1]
2463         prot-nonsense   (17),   -- delta=[del]; variation-location is the tail
2464                                 -- of the protein being truncated
2465         prot-neutral    (18),   -- delta=[morph of length 1]
2466         prot-silent     (19),   -- delta=[morph of length 1, same AA as at
2467                                 -- variation-location]
2468         prot-other      (20),   -- delta=any
2469 
2470         other           (255)   -- delta=any
2471     },
2472 
2473     -- Sequence that replaces the location, in biological order.
2474     delta SEQUENCE OF Delta-item,
2475 
2476     -- 'observation' is used to label items in a Variation-ref package
2477     -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
2478     -- of the values may be observed.
2479     observation INTEGER {
2480         asserted        (1),   -- inst represents the asserted base at a
2481                                -- position
2482         reference       (2),   -- inst represents the reference base at the
2483                                -- position
2484         variant         (4)    -- inst represent the observed variant at a
2485                                -- given position
2486     } OPTIONAL
2487 }
2488 
2489 END
2490 
2491 
2492 --**********************************************************************
2493 --
2494 --  NCBI Restriction Sites
2495 --  by James Ostell, 1990
2496 --  version 0.8
2497 --
2498 --**********************************************************************
2499 
2500 NCBI-Rsite DEFINITIONS ::=
2501 BEGIN
2502 
2503 EXPORTS Rsite-ref;
2504 
2505 IMPORTS Dbtag FROM NCBI-General;
2506 
2507 Rsite-ref ::= CHOICE {
2508     str VisibleString ,     -- may be unparsable
2509     db  Dbtag }             -- pointer to a restriction site database
2510 
2511 END
2512 
2513 --**********************************************************************
2514 --
2515 --  NCBI RNAs
2516 --  by James Ostell, 1990
2517 --  version 0.8
2518 --
2519 --**********************************************************************
2520 
2521 NCBI-RNA DEFINITIONS ::=
2522 BEGIN
2523 
2524 EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
2525 
2526 IMPORTS Seq-loc FROM NCBI-Seqloc;
2527 
2528 --*** rnas ***********************************************
2529 --*
2530 --*  various rnas
2531 --*
2532                          -- minimal RNA sequence
2533 RNA-ref ::= SEQUENCE {
2534     type ENUMERATED {            -- type of RNA feature
2535         unknown (0) ,
2536         premsg (1) ,
2537         mRNA (2) ,
2538         tRNA (3) ,
2539         rRNA (4) ,
2540         snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
2541         scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
2542         snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
2543         ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
2544         tmRNA (9) ,
2545         miscRNA (10) ,
2546         other (255) } ,
2547     pseudo BOOLEAN OPTIONAL ,
2548     ext CHOICE {
2549         name VisibleString ,        -- for naming "other" type
2550         tRNA Trna-ext ,             -- for tRNAs
2551         gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
2552     }
2553 
2554 Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
2555     aa CHOICE {                         -- aa this carries
2556         iupacaa INTEGER ,
2557         ncbieaa INTEGER ,
2558         ncbi8aa INTEGER ,
2559         ncbistdaa INTEGER } OPTIONAL ,
2560     codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
2561     anticodon Seq-loc OPTIONAL }        -- location of anticodon
2562 
2563 RNA-gen ::= SEQUENCE {
2564     class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
2565                                         -- examples: antisense_RNA, guide_RNA, snRNA
2566     product VisibleString OPTIONAL ,
2567     quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
2568 }
2569 
2570 RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
2571     qual VisibleString ,                -- in a tag (qual), value (val) format
2572     val VisibleString }
2573 
2574 RNA-qual-set ::= SEQUENCE OF RNA-qual
2575 
2576 END
2577 
2578 --**********************************************************************
2579 --
2580 --  NCBI Genes
2581 --  by James Ostell, 1990
2582 --  version 0.8
2583 --
2584 --**********************************************************************
2585 
2586 NCBI-Gene DEFINITIONS ::=
2587 BEGIN
2588 
2589 EXPORTS Gene-ref, Gene-nomenclature;
2590 
2591 IMPORTS Dbtag FROM NCBI-General;
2592 
2593 --*** Gene ***********************************************
2594 --*
2595 --*  reference to a gene
2596 --*
2597 
2598 Gene-ref ::= SEQUENCE {
2599     locus VisibleString OPTIONAL ,        -- Official gene symbol
2600     allele VisibleString OPTIONAL ,       -- Official allele designation
2601     desc VisibleString OPTIONAL ,         -- descriptive name
2602     maploc VisibleString OPTIONAL ,       -- descriptive map location
2603     pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
2604     db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
2605     syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
2606     locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
2607     formal-name Gene-nomenclature OPTIONAL
2608 }
2609 
2610 Gene-nomenclature ::= SEQUENCE {
2611     status ENUMERATED {
2612         unknown (0) ,
2613         official (1) ,
2614         interim (2)
2615     } ,
2616     symbol VisibleString OPTIONAL ,
2617     name VisibleString OPTIONAL ,
2618     source Dbtag OPTIONAL
2619 }
2620 
2621 END
2622 
2623 
2624 --**********************************************************************
2625 --
2626 --  NCBI Organism
2627 --  by James Ostell, 1994
2628 --  version 3.0
2629 --
2630 --**********************************************************************
2631 
2632 NCBI-Organism DEFINITIONS ::=
2633 BEGIN
2634 
2635 EXPORTS Org-ref;
2636 
2637 IMPORTS Dbtag FROM NCBI-General;
2638 
2639 --*** Org-ref ***********************************************
2640 --*
2641 --*  Reference to an organism
2642 --*     defines only the organism.. lower levels of detail for biological
2643 --*     molecules are provided by the Source object
2644 --*
2645 
2646 Org-ref ::= SEQUENCE {
2647     taxname VisibleString OPTIONAL ,   -- preferred formal name
2648     common VisibleString OPTIONAL ,    -- common name
2649     mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
2650     db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
2651     syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
2652     orgname OrgName OPTIONAL }
2653 
2654 
2655 OrgName ::= SEQUENCE {
2656     name CHOICE {
2657         binomial BinomialOrgName ,         -- genus/species type name
2658         virus VisibleString ,              -- virus names are different
2659         hybrid MultiOrgName ,              -- hybrid between organisms
2660         namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
2661         partial PartialOrgName } OPTIONAL , -- when genus not known
2662     attrib VisibleString OPTIONAL ,        -- attribution of name
2663     mod SEQUENCE OF OrgMod OPTIONAL ,
2664     lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
2665     gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
2666     mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
2667     div VisibleString OPTIONAL ,           -- GenBank division code
2668     pgcode INTEGER OPTIONAL }              -- plastid genetic code
2669 
2670 
2671 OrgMod ::= SEQUENCE {
2672     subtype INTEGER {
2673         strain (2) ,
2674         substrain (3) ,
2675         type (4) ,
2676         subtype (5) ,
2677         variety (6) ,
2678         serotype (7) ,
2679         serogroup (8) ,
2680         serovar (9) ,
2681         cultivar (10) ,
2682         pathovar (11) ,
2683         chemovar (12) ,
2684         biovar (13) ,
2685         biotype (14) ,
2686         group (15) ,
2687         subgroup (16) ,
2688         isolate (17) ,
2689         common (18) ,
2690         acronym (19) ,
2691         dosage (20) ,          -- chromosome dosage of hybrid
2692         nat-host (21) ,        -- natural host of this specimen
2693         sub-species (22) ,
2694         specimen-voucher (23) ,
2695         authority (24) ,
2696         forma (25) ,
2697         forma-specialis (26) ,
2698         ecotype (27) ,
2699         synonym (28) ,
2700         anamorph (29) ,
2701         teleomorph (30) ,
2702         breed (31) ,
2703         gb-acronym (32) ,       -- used by taxonomy database
2704         gb-anamorph (33) ,      -- used by taxonomy database
2705         gb-synonym (34) ,       -- used by taxonomy database
2706         culture-collection (35) ,
2707         bio-material (36) ,
2708         metagenome-source (37) ,
2709         old-lineage (253) ,
2710         old-name (254) ,
2711         other (255) } ,         -- ASN5: old-name (254) will be added to next spec
2712     subname VisibleString ,
2713     attrib VisibleString OPTIONAL }  -- attribution/source of name
2714 
2715 BinomialOrgName ::= SEQUENCE {
2716     genus VisibleString ,               -- required
2717     species VisibleString OPTIONAL ,    -- species required if subspecies used
2718     subspecies VisibleString OPTIONAL }
2719 
2720 MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division
2721 
2722 PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus
2723 
2724 TaxElement ::= SEQUENCE {
2725     fixed-level INTEGER {
2726        other (0) ,                     -- level must be set in string
2727        family (1) ,
2728        order (2) ,
2729        class (3) } ,
2730     level VisibleString OPTIONAL ,
2731     name VisibleString }
2732 
2733 END
2734 
2735 
2736 --**********************************************************************
2737 --
2738 --  NCBI BioSource
2739 --  by James Ostell, 1994
2740 --  version 3.0
2741 --
2742 --**********************************************************************
2743 
2744 NCBI-BioSource DEFINITIONS ::=
2745 BEGIN
2746 
2747 EXPORTS BioSource, SubSource;
2748 
2749 IMPORTS Org-ref FROM NCBI-Organism;
2750 
2751 --********************************************************************
2752 --
2753 -- BioSource gives the source of the biological material
2754 --   for sequences
2755 --
2756 --********************************************************************
2757 
2758 BioSource ::= SEQUENCE {
2759     genome INTEGER {         -- biological context
2760         unknown (0) ,
2761         genomic (1) ,
2762         chloroplast (2) ,
2763         chromoplast (3) ,
2764         kinetoplast (4) ,
2765         mitochondrion (5) ,
2766         plastid (6) ,
2767         macronuclear (7) ,
2768         extrachrom (8) ,
2769         plasmid (9) ,
2770         transposon (10) ,
2771         insertion-seq (11) ,
2772         cyanelle (12) ,
2773         proviral (13) ,
2774         virion (14) ,
2775         nucleomorph (15) ,
2776         apicoplast (16) ,
2777         leucoplast (17) ,
2778         proplastid (18) ,
2779         endogenous-virus (19) ,
2780         hydrogenosome (20) ,
2781         chromosome (21) ,
2782         chromatophore (22)
2783       } DEFAULT unknown ,
2784     origin INTEGER {
2785       unknown (0) ,
2786       natural (1) ,                    -- normal biological entity
2787       natmut (2) ,                     -- naturally occurring mutant
2788       mut (3) ,                        -- artificially mutagenized
2789       artificial (4) ,                 -- artificially engineered
2790       synthetic (5) ,                  -- purely synthetic
2791       other (255)
2792     } DEFAULT unknown ,
2793     org Org-ref ,
2794     subtype SEQUENCE OF SubSource OPTIONAL ,
2795     is-focus NULL OPTIONAL ,           -- to distinguish biological focus
2796     pcr-primers PCRReactionSet OPTIONAL }
2797 
2798 PCRReactionSet ::= SET OF PCRReaction
2799 
2800 PCRReaction ::= SEQUENCE {
2801     forward PCRPrimerSet OPTIONAL ,
2802     reverse PCRPrimerSet OPTIONAL }
2803 
2804 PCRPrimerSet ::= SET OF PCRPrimer
2805 
2806 PCRPrimer ::= SEQUENCE {
2807     seq PCRPrimerSeq OPTIONAL ,
2808     name PCRPrimerName OPTIONAL }
2809 
2810 PCRPrimerSeq ::= VisibleString
2811 
2812 PCRPrimerName ::= VisibleString
2813 
2814 SubSource ::= SEQUENCE {
2815     subtype INTEGER {
2816         chromosome (1) ,
2817         map (2) ,
2818         clone (3) ,
2819         subclone (4) ,
2820         haplotype (5) ,
2821         genotype (6) ,
2822         sex (7) ,
2823         cell-line (8) ,
2824         cell-type (9) ,
2825         tissue-type (10) ,
2826         clone-lib (11) ,
2827         dev-stage (12) ,
2828         frequency (13) ,
2829         germline (14) ,
2830         rearranged (15) ,
2831         lab-host (16) ,
2832         pop-variant (17) ,
2833         tissue-lib (18) ,
2834         plasmid-name (19) ,
2835         transposon-name (20) ,
2836         insertion-seq-name (21) ,
2837         plastid-name (22) ,
2838         country (23) ,
2839         segment (24) ,
2840         endogenous-virus-name (25) ,
2841         transgenic (26) ,
2842         environmental-sample (27) ,
2843         isolation-source (28) ,
2844         lat-lon (29) ,          -- +/- decimal degrees
2845         collection-date (30) ,  -- DD-MMM-YYYY format
2846         collected-by (31) ,     -- name of person who collected the sample
2847         identified-by (32) ,    -- name of person who identified the sample
2848         fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
2849         rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
2850         fwd-primer-name (35) ,
2851         rev-primer-name (36) ,
2852         metagenomic (37) ,
2853         mating-type (38) ,
2854         linkage-group (39) ,
2855         haplogroup (40) ,
2856         whole-replicon (41) ,
2857         phenotype (42) ,
2858         altitude (43) ,
2859         other (255) } ,
2860     name VisibleString ,
2861     attrib VisibleString OPTIONAL }    -- attribution/source of this name
2862 
2863 END
2864 
2865 --**********************************************************************
2866 --
2867 --  NCBI Protein
2868 --  by James Ostell, 1990
2869 --  version 0.8
2870 --
2871 --**********************************************************************
2872 
2873 NCBI-Protein DEFINITIONS ::=
2874 BEGIN
2875 
2876 EXPORTS Prot-ref;
2877 
2878 IMPORTS Dbtag FROM NCBI-General;
2879 
2880 --*** Prot-ref ***********************************************
2881 --*
2882 --*  Reference to a protein name
2883 --*
2884 
2885 Prot-ref ::= SEQUENCE {
2886     name SET OF VisibleString OPTIONAL ,      -- protein name
2887     desc VisibleString OPTIONAL ,      -- description (instead of name)
2888     ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
2889     activity SET OF VisibleString OPTIONAL ,  -- activities
2890     db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
2891     processed ENUMERATED {             -- processing status
2892        not-set (0) ,
2893        preprotein (1) ,
2894        mature (2) ,
2895        signal-peptide (3) ,
2896        transit-peptide (4) } DEFAULT not-set }
2897 
2898 END
2899 --********************************************************************
2900 --
2901 --  Transcription Initiation Site Feature Data Block
2902 --  James Ostell, 1991
2903 --  Philip Bucher, David Ghosh
2904 --  version 1.1
2905 --
2906 --
2907 --
2908 --********************************************************************
2909 
2910 NCBI-TxInit DEFINITIONS ::=
2911 BEGIN
2912 
2913 EXPORTS Txinit;
2914 
2915 IMPORTS Gene-ref FROM NCBI-Gene
2916         Prot-ref FROM NCBI-Protein
2917         Org-ref FROM NCBI-Organism;
2918 
2919 Txinit ::= SEQUENCE {
2920     name VisibleString ,    -- descriptive name of initiation site
2921     syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
2922     gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
2923     protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
2924     rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
2925     expression VisibleString OPTIONAL ,  -- tissue/time of expression
2926     txsystem ENUMERATED {       -- transcription apparatus used at this site
2927         unknown (0) ,
2928         pol1 (1) ,      -- eukaryotic Pol I
2929         pol2 (2) ,      -- eukaryotic Pol II
2930         pol3 (3) ,      -- eukaryotic Pol III
2931         bacterial (4) ,
2932         viral (5) ,
2933         rna (6) ,       -- RNA replicase
2934         organelle (7) ,
2935         other (255) } ,
2936     txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
2937     txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
2938     mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
2939     location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
2940     inittype ENUMERATED {
2941         unknown (0) ,
2942         single (1) ,
2943         multiple (2) ,
2944         region (3) } OPTIONAL ,
2945     evidence SET OF Tx-evidence OPTIONAL }
2946 
2947 Tx-evidence ::= SEQUENCE {
2948     exp-code ENUMERATED {
2949         unknown (0) ,
2950         rna-seq (1) ,   -- direct RNA sequencing
2951         rna-size (2) ,  -- RNA length measurement
2952         np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
2953         np-size (4) ,   -- nuclease protected fragment length measurement
2954         pe-seq (5) ,    -- dideoxy RNA sequencing
2955         cDNA-seq (6) ,  -- full-length cDNA sequencing
2956         pe-map (7) ,    -- primer extension mapping with homologous sequence ladder
2957         pe-size (8) ,   -- primer extension product length measurement
2958         pseudo-seq (9) , -- full-length processed pseudogene sequencing
2959         rev-pe-map (10) ,   -- see NOTE (1) below
2960         other (255) } ,
2961     expression-system ENUMERATED {
2962         unknown (0) ,
2963         physiological (1) ,
2964         in-vitro (2) ,
2965         oocyte (3) ,
2966         transfection (4) ,
2967         transgenic (5) ,
2968         other (255) } DEFAULT physiological ,
2969     low-prec-data BOOLEAN DEFAULT FALSE ,
2970     from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
2971                                              --  close homolog
2972 
2973     -- NOTE (1) length measurement of a reverse direction primer-extension
2974     --          product (blocked  by  RNA  5'end)  by  comparison with
2975     --          homologous sequence ladder (J. Mol. Biol. 199, 587)
2976 
2977 END
2978 
2979 --$Revision: 1.8 $
2980 --  ----------------------------------------------------------------------------
2981 --
2982 --                            PUBLIC DOMAIN NOTICE
2983 --                National Center for Biotechnology Information
2984 --
2985 --  This software/database is a "United States Government Work" under the terms
2986 --  of the United States Copyright Act.  It was written as part of the author's
2987 --  official duties as a United States Government employee and thus cannot be
2988 --  copyrighted.  This software/database is freely available to the public for
2989 --  use.  The National Library of Medicine and the U.S. Government have not
2990 --  placed any restriction on its use or reproduction.
2991 --
2992 --  Although all reasonable efforts have been taken to ensure the accuracy and
2993 --  reliability of the software and data, the NLM and the U.S. Government do not
2994 --  and cannot warrant the performance or results that may be obtained by using
2995 --  this software or data.  The NLM and the U.S. Government disclaim all
2996 --  warranties, express or implied, including warranties of performance,
2997 --  merchantability or fitness for any particular purpose.
2998 --
2999 --  Please cite the authors in any work or product based on this material.
3000 --
3001 --  ----------------------------------------------------------------------------
3002 --
3003 --  Authors: Mike DiCuccio, Eugene Vasilchenko
3004 --
3005 --  ASN.1 interface to table readers
3006 --
3007 --  ----------------------------------------------------------------------------
3008 
3009 NCBI-SeqTable DEFINITIONS ::=
3010 
3011 BEGIN
3012 
3013 EXPORTS
3014     SeqTable-column-info, SeqTable-column, Seq-table;
3015     
3016 IMPORTS
3017     Seq-id, Seq-loc, Seq-interval   FROM NCBI-Seqloc;
3018 
3019 
3020 SeqTable-column-info ::= SEQUENCE {
3021     -- user friendly column name, can be skipped
3022     title VisibleString OPTIONAL,
3023 
3024     -- identification of the column data in the objects described by the table
3025     field-id INTEGER { -- known column data types
3026         -- position types
3027         location        (0), -- location as Seq-loc
3028         location-id     (1), -- location Seq-id
3029         location-gi     (2), -- gi
3030         location-from   (3), -- interval from
3031         location-to     (4), -- interval to
3032         location-strand (5), -- location strand
3033         location-fuzz-from-lim (6),
3034         location-fuzz-to-lim   (7),
3035 
3036         product         (10), -- product as Seq-loc
3037         product-id      (11), -- product Seq-id
3038         product-gi      (12), -- product gi
3039         product-from    (13), -- product interval from
3040         product-to      (14), -- product interval to
3041         product-strand  (15), -- product strand
3042         product-fuzz-from-lim (16),
3043         product-fuzz-to-lim   (17),
3044         
3045         -- main feature fields
3046         id-local        (20), -- id.local.id
3047         xref-id-local   (21), -- xref.id.local.id
3048         partial         (22),
3049         comment         (23),
3050         title           (24),
3051         ext             (25), -- field-name must be "E.xxx", see below
3052         qual            (26), -- field-name must be "Q.xxx", see below
3053         dbxref          (27), -- field-name must be "D.xxx", see below
3054 
3055         -- various data fields
3056         data-imp-key        (30),
3057         data-region         (31),
3058         data-cdregion-frame (32),
3059 
3060         -- extra fields, see also special values for str below
3061         ext-type        (40),
3062         qual-qual       (41),
3063         qual-val        (42),
3064         dbxref-db       (43),
3065         dbxref-tag      (44)
3066     } OPTIONAL,
3067 
3068     -- any column can be identified by ASN.1 text locator string
3069     -- with omitted object type.
3070     -- examples:
3071     --   "data.gene.locus" for Seq-feat.data.gene.locus
3072     --   "data.imp.key" for Seq-feat.data.imp.key
3073     --   "qual.qual"
3074     --    - Seq-feat.qual is SEQUENCE so several columns are allowed
3075     --      see also "Q.xxx" special value for shorter qual representation
3076     --   "ext.type.str"
3077     --   "ext.data.label.str"
3078     --   "ext.data.data.int"
3079     --      see also "E.xxx" special value for shorter ext representation
3080     -- special values start with capital letter:
3081     --   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
3082     --    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
3083     --   "Q.xxx" - qual.qual = xxx, qual.val = data
3084     --    - Seq-feat.qual is SEQUENCE so several columns are allowed
3085     --   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
3086     --    - Seq-feat.dbxref is SET so several columns are allowed
3087     field-name  VisibleString OPTIONAL
3088 }
3089 
3090 
3091 CommonString-table ::= SEQUENCE {
3092     -- set of possible values
3093     strings     SEQUENCE OF UTF8String,
3094 
3095     -- indexes of values
3096     indexes     SEQUENCE OF INTEGER
3097 }
3098 
3099 
3100 CommonBytes-table ::= SEQUENCE {
3101     -- set of possible values
3102     bytes       SEQUENCE OF OCTET STRING,
3103 
3104     -- indexes of values
3105     indexes     SEQUENCE OF INTEGER
3106 }
3107 
3108 
3109 Scaled-int-multi-data ::= SEQUENCE {
3110     -- output data[i] = data[i]*mul+add
3111     mul     INTEGER,
3112     add     INTEGER,
3113     data    SeqTable-multi-data
3114 }
3115 
3116 
3117 Scaled-real-multi-data ::= SEQUENCE {
3118     -- output data[i] = data[i]*mul+add
3119     mul     REAL,
3120     add     REAL,
3121     data    SeqTable-multi-data
3122 }
3123 
3124 
3125 SeqTable-multi-data ::= CHOICE {
3126     -- a set of integers, one per row
3127     int         SEQUENCE OF INTEGER,
3128     
3129     -- a set of reals, one per row
3130     real        SEQUENCE OF REAL,
3131 
3132     -- a set of strings, one per row
3133     string      SEQUENCE OF UTF8String,
3134 
3135     -- a set of byte arrays, one per row
3136     bytes       SEQUENCE OF OCTET STRING,
3137 
3138     -- a set of string with small set of possible values
3139     common-string   CommonString-table,
3140 
3141     -- a set of byte arrays with small set of possible values
3142     common-bytes    CommonBytes-table,
3143 
3144     -- a set of bits, one per row
3145     -- this uses bm::bvector<> as its storage mechanism
3146     bit         OCTET STRING,
3147 
3148     -- a set of locations, one per row
3149     loc         SEQUENCE OF Seq-loc,
3150     id          SEQUENCE OF Seq-id,
3151     interval    SEQUENCE OF Seq-interval,
3152 
3153     -- delta-encoded data (int/bit -> int)
3154     int-delta   SeqTable-multi-data,
3155 
3156     -- scaled data (int/bit -> int)
3157     int-scaled  Scaled-int-multi-data,
3158 
3159     -- scaled data (int/bit -> real)
3160     real-scaled Scaled-real-multi-data,
3161 
3162     -- a set of bit, represented as serialized bvector,
3163     -- see include/util/bitset/bm.h
3164     bit-bvector OCTET STRING
3165 }
3166 
3167 
3168 SeqTable-single-data ::= CHOICE {
3169     -- integer
3170     int         INTEGER,
3171     
3172     -- real
3173     real        REAL,
3174 
3175     -- string
3176     string      UTF8String,
3177 
3178     -- byte array
3179     bytes       OCTET STRING,
3180 
3181     -- bit
3182     bit         BOOLEAN,
3183 
3184     -- location
3185     loc         Seq-loc,
3186     id          Seq-id,
3187     interval    Seq-interval
3188 }
3189 
3190 
3191 SeqTable-sparse-index ::= CHOICE {
3192     -- Indexes of rows with values
3193     indexes SEQUENCE OF INTEGER,
3194 
3195     -- Bitset of rows with values, set bit means the row has value.
3196     -- Most-significant bit in an octet comes first.
3197     bit-set OCTET STRING,
3198 
3199     -- Indexes of rows with values, delta-encoded
3200     indexes-delta SEQUENCE OF INTEGER,
3201     
3202     -- Bitset of rows with values, as serialized bvector<>,
3203     -- see include/util/bitset/bm.h
3204     bit-set-bvector OCTET STRING
3205 }
3206 
3207 
3208 SeqTable-column ::= SEQUENCE {
3209     -- column description or reference to previously defined info
3210     header      SeqTable-column-info,   -- information about data
3211 
3212     -- row data
3213     data        SeqTable-multi-data OPTIONAL,
3214 
3215     -- in case not all rows contain data this field will contain sparse info
3216     sparse      SeqTable-sparse-index OPTIONAL,
3217 
3218     -- default value for sparse table, or if row data is too short
3219     default     SeqTable-single-data OPTIONAL,
3220 
3221     -- single value for indexes not listed in sparse table
3222     sparse-other SeqTable-single-data OPTIONAL
3223 }
3224 
3225 
3226 Seq-table ::= SEQUENCE {
3227     -- type of features in this table, equal to Seq-feat.data variant index
3228     feat-type   INTEGER,
3229 
3230     -- subtype of features in this table, defined in header SeqFeatData.hpp
3231     feat-subtype INTEGER OPTIONAL,
3232 
3233     -- number of rows
3234     num-rows    INTEGER,
3235 
3236     -- data in columns
3237     columns     SEQUENCE OF SeqTable-column
3238 }
3239 
3240 
3241 END
3242 --$Revision: 6.4 $
3243 --**********************************************************************
3244 --
3245 --  NCBI Sequence Alignment elements
3246 --  by James Ostell, 1990
3247 --
3248 --**********************************************************************
3249 
3250 NCBI-Seqalign DEFINITIONS ::=
3251 BEGIN
3252 
3253 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
3254 
3255 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
3256         User-object, Object-id FROM NCBI-General;
3257 
3258 --*** Sequence Alignment ********************************
3259 --*
3260 
3261 Seq-align-set ::= SET OF Seq-align
3262 
3263 Seq-align ::= SEQUENCE {
3264     type ENUMERATED {
3265         not-set (0) ,
3266         global (1) ,
3267         diags (2) ,     -- unbroken, but not ordered, diagonals
3268         partial (3) ,   -- mapping pieces together
3269         disc (4) ,      -- discontinuous alignment
3270         other (255) } ,
3271     dim INTEGER OPTIONAL ,     -- dimensionality
3272     score SET OF Score OPTIONAL ,   -- for whole alignment
3273     segs CHOICE {                   -- alignment data
3274         dendiag SEQUENCE OF Dense-diag ,
3275         denseg              Dense-seg ,
3276         std     SEQUENCE OF Std-seg ,
3277         packed              Packed-seg ,
3278         disc                Seq-align-set,
3279         spliced             Spliced-seg,
3280         sparse              Sparse-seg
3281     } ,
3282     
3283     -- regions of sequence over which align
3284     --  was computed
3285     bounds SET OF Seq-loc OPTIONAL,
3286 
3287     -- alignment id
3288     id SEQUENCE OF Object-id OPTIONAL,
3289 
3290     --extra info
3291     ext SEQUENCE OF User-object OPTIONAL
3292 }
3293 
3294 Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
3295     dim INTEGER DEFAULT 2 ,    -- dimensionality
3296     ids SEQUENCE OF Seq-id ,   -- sequences in order
3297     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
3298     len INTEGER ,                 -- len of aligned segments
3299     strands SEQUENCE OF Na-strand OPTIONAL ,
3300     scores SET OF Score OPTIONAL }
3301 
3302     -- Dense-seg: the densist packing for sequence alignments only.
3303     --            a start of -1 indicates a gap for that sequence of
3304     --            length lens.
3305     --
3306     -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
3307     -- id=200  AAGGCCTTTTAG.......GATGATGATGA
3308     -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
3309     --
3310     -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
3311     -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
3312     -- lens = { 4, 8, 7, 3, 4, 4 }
3313     --
3314 
3315 Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
3316     dim INTEGER DEFAULT 2 ,       -- dimensionality
3317     numseg INTEGER ,              -- number of segments here
3318     ids SEQUENCE OF Seq-id ,      -- sequences in order
3319     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
3320     lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
3321     strands SEQUENCE OF Na-strand OPTIONAL ,
3322     scores SEQUENCE OF Score OPTIONAL }  -- score for each seg
3323 
3324 Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
3325     dim INTEGER DEFAULT 2 ,       -- dimensionality
3326     numseg INTEGER ,              -- number of segments here
3327     ids SEQUENCE OF Seq-id ,      -- sequences in order
3328     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
3329     present OCTET STRING ,        -- Boolean if each sequence present or absent in
3330                                   --   each segment
3331     lens SEQUENCE OF INTEGER ,    -- length of each segment
3332     strands SEQUENCE OF Na-strand OPTIONAL ,
3333     scores SEQUENCE OF Score OPTIONAL }  -- score for each segment
3334 
3335 Std-seg ::= SEQUENCE {
3336     dim INTEGER DEFAULT 2 ,       -- dimensionality
3337     ids SEQUENCE OF Seq-id OPTIONAL ,
3338     loc SEQUENCE OF Seq-loc ,
3339     scores SET OF Score OPTIONAL }
3340 
3341 
3342 Spliced-seg ::= SEQUENCE {
3343     -- product is either protein or transcript (cDNA)
3344     product-id Seq-id OPTIONAL,
3345     genomic-id Seq-id OPTIONAL,
3346 
3347     -- should be 'plus' or 'minus'
3348     product-strand Na-strand OPTIONAL ,
3349     genomic-strand Na-strand OPTIONAL ,
3350     
3351     product-type ENUMERATED {
3352         transcript(0),
3353         protein(1)
3354     },
3355 
3356     -- set of segments involved
3357     -- each segment corresponds to one exon
3358     -- exons are always in biological order
3359     exons SEQUENCE OF Spliced-exon ,
3360 
3361     -- optional poly(A) tail
3362     poly-a INTEGER OPTIONAL,
3363 
3364     -- length of the product, in bases/residues
3365     -- from this, a 3' unaligned length can be extracted; this also captures
3366     -- the case in which a protein aligns leaving a partial codon alignment
3367     -- at the 3' end
3368     product-length INTEGER OPTIONAL,
3369 
3370     -- alignment descriptors / modifiers
3371     -- this provides us a set for extension
3372     modifiers SET OF Spliced-seg-modifier OPTIONAL
3373 }
3374 
3375 Spliced-seg-modifier ::= CHOICE {
3376     -- protein aligns from the start and the first codon 
3377     -- on both product and genomic is start codon
3378     start-codon-found BOOLEAN,
3379     
3380     -- protein aligns to it's end and there is stop codon 
3381     -- on the genomic right after the alignment
3382     stop-codon-found BOOLEAN
3383 }
3384 
3385 
3386 -- complete or partial exon
3387 -- two consecutive Spliced-exons may belong to one exon
3388 Spliced-exon ::= SEQUENCE {
3389     -- product-end >= product-start
3390     product-start Product-pos ,
3391     product-end Product-pos ,
3392 
3393     -- genomic-end >= genomic-start
3394     genomic-start INTEGER ,
3395     genomic-end INTEGER ,
3396 
3397     -- product is either protein or transcript (cDNA)
3398     product-id Seq-id OPTIONAL ,
3399     genomic-id Seq-id OPTIONAL ,
3400 
3401     -- should be 'plus' or 'minus'
3402     product-strand Na-strand OPTIONAL ,
3403     
3404     -- genomic-strand represents the strand of translation
3405     genomic-strand Na-strand OPTIONAL ,
3406 
3407     -- basic seqments always are in biologic order
3408     parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
3409 
3410     -- scores for this exon
3411     scores Score-set OPTIONAL ,
3412 
3413     -- splice sites
3414     acceptor-before-exon Splice-site OPTIONAL,
3415     donor-after-exon Splice-site OPTIONAL,
3416     
3417     -- flag: is this exon complete or partial?
3418     partial BOOLEAN OPTIONAL,
3419 
3420     --extra info
3421     ext SEQUENCE OF User-object OPTIONAL
3422 }
3423 
3424 
3425 Product-pos ::= CHOICE {
3426     nucpos INTEGER,
3427     protpos Prot-pos
3428 }
3429 
3430 
3431 -- codon based position on protein (1/3 of aminoacid)
3432 Prot-pos ::= SEQUENCE {
3433     -- standard protein position
3434     amin INTEGER ,
3435 
3436     -- 0, 1, 2, or 3 as for Cdregion
3437     -- 0 = not set
3438     -- 1, 2, 3 = actual frame
3439     frame INTEGER DEFAULT 0
3440 }
3441 
3442 
3443 -- Spliced-exon-chunk: piece of an exon
3444 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
3445 -- protein)
3446 Spliced-exon-chunk ::= CHOICE {
3447     -- both sequences represented, product and genomic sequences match
3448     match INTEGER ,
3449 
3450     -- both sequences represented, product and genomic sequences do not match
3451     mismatch INTEGER ,
3452 
3453     -- both sequences are represented, there is sufficient similarity 
3454     -- between product and genomic sequences. Can be used to replace stretches
3455     -- of matches and mismatches, mostly for protein to genomic where 
3456     -- definition of match or mismatch depends on translation table
3457     diag INTEGER ,
3458 
3459      -- insertion in product sequence (i.e. gap in the genomic sequence)
3460     product-ins INTEGER ,
3461 
3462      -- insertion in genomic sequence (i.e. gap in the product sequence)
3463     genomic-ins INTEGER
3464 }
3465 
3466 
3467 -- site involved in splice
3468 Splice-site ::= SEQUENCE {
3469     -- typically two bases in the intronic region, always
3470     -- in IUPAC format
3471     bases VisibleString
3472 }
3473 
3474 
3475 -- ==========================================================================
3476 --
3477 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
3478 -- representing sparse multiple alignments
3479 --
3480 -- ==========================================================================
3481 
3482 
3483 Sparse-seg ::= SEQUENCE {
3484     master-id Seq-id OPTIONAL,
3485 
3486     -- pairwise alignments constituting this multiple alignment
3487     rows SET OF Sparse-align,
3488 
3489     -- per-row scores
3490     row-scores SET OF Score OPTIONAL,
3491 
3492     -- index of extra items
3493     ext  SET OF Sparse-seg-ext OPTIONAL
3494 }
3495 
3496 Sparse-align ::= SEQUENCE {
3497     first-id Seq-id,
3498     second-id Seq-id,
3499 
3500     numseg INTEGER,                      --number of segments
3501     first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
3502     second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
3503     lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
3504     second-strands SEQUENCE OF Na-strand OPTIONAL ,
3505 
3506     -- per-segment scores
3507     seg-scores SET OF Score OPTIONAL
3508 }
3509 
3510 Sparse-seg-ext ::= SEQUENCE {
3511     --seg-ext SET OF {
3512     --    index INTEGER,
3513     --    data User-field
3514     -- }
3515     index INTEGER
3516 }
3517 
3518 
3519 
3520 -- use of Score is discouraged for external ASN.1 specifications
3521 Score ::= SEQUENCE {
3522     id Object-id OPTIONAL ,
3523     value CHOICE {
3524         real REAL ,
3525         int INTEGER
3526     }
3527 }
3528 
3529 -- use of Score-set is encouraged for external ASN.1 specifications
3530 Score-set ::= SET OF Score
3531 
3532 END 
3533 
3534 --$Revision: 6.0 $
3535 --**********************************************************************
3536 --
3537 --  NCBI Sequence Analysis Results (other than alignments)
3538 --  by James Ostell, 1990
3539 --
3540 --**********************************************************************
3541 
3542 NCBI-Seqres DEFINITIONS ::=
3543 BEGIN
3544 
3545 EXPORTS Seq-graph;
3546 
3547 IMPORTS Seq-loc FROM NCBI-Seqloc;
3548 
3549 --*** Sequence Graph ********************************
3550 --*
3551 --*   for values mapped by residue or range to sequence
3552 --*
3553 
3554 Seq-graph ::= SEQUENCE {
3555     title VisibleString OPTIONAL ,
3556     comment VisibleString OPTIONAL ,
3557     loc Seq-loc ,                       -- region this applies to
3558     title-x VisibleString OPTIONAL ,    -- title for x-axis
3559     title-y VisibleString OPTIONAL ,
3560     comp INTEGER OPTIONAL ,             -- compression (residues/value)
3561     a REAL OPTIONAL ,                   -- for scaling values
3562     b REAL OPTIONAL ,                   -- display = (a x value) + b
3563     numval INTEGER ,                    -- number of values in graph
3564     graph CHOICE {
3565         real Real-graph ,
3566         int Int-graph ,
3567         byte Byte-graph } }
3568 
3569 Real-graph ::= SEQUENCE {
3570     max REAL ,                          -- top of graph
3571     min REAL ,                          -- bottom of graph
3572     axis REAL ,                         -- value to draw axis on
3573     values SEQUENCE OF REAL }
3574 
3575 Int-graph ::= SEQUENCE {
3576     max INTEGER ,
3577     min INTEGER ,
3578     axis INTEGER ,
3579     values SEQUENCE OF INTEGER } 
3580 
3581 Byte-graph ::= SEQUENCE {              -- integer from 0-255
3582     max INTEGER ,
3583     min INTEGER ,
3584     axis INTEGER ,
3585     values OCTET STRING }
3586 
3587 END
3588 
3589 --$Revision: 6.1 $
3590 --********************************************************************
3591 --
3592 --  Direct Submission of Sequence Data
3593 --  James Ostell, 1991
3594 --
3595 --  This is a trial specification for direct submission of sequence
3596 --    data worked out between NCBI and EMBL
3597 --  Later revised to reflect work with GenBank and Integrated database
3598 --
3599 --  Version 3.0, 1994
3600 --    This is the official NCBI sequence submission format now.
3601 --
3602 --********************************************************************
3603 
3604 NCBI-Submit DEFINITIONS ::=
3605 BEGIN
3606 
3607 EXPORTS Seq-submit, Contact-info;
3608 
3609 IMPORTS Cit-sub, Author FROM NCBI-Biblio
3610         Date, Object-id FROM NCBI-General
3611         Seq-annot FROM NCBI-Sequence
3612         Seq-id FROM NCBI-Seqloc
3613         Seq-entry FROM NCBI-Seqset;
3614 
3615 Seq-submit ::= SEQUENCE {
3616     sub Submit-block ,
3617     data CHOICE {
3618         entrys  SET OF Seq-entry ,  -- sequence(s)
3619         annots  SET OF Seq-annot ,  -- annotation(s)
3620         delete  SET OF Seq-id } } -- deletions of entries
3621 
3622 Submit-block ::= SEQUENCE {
3623     contact Contact-info ,        -- who to contact
3624     cit Cit-sub ,                 -- citation for this submission
3625     hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
3626     reldate Date OPTIONAL ,       -- release by date
3627     subtype INTEGER {             -- type of submission
3628         new (1) ,                 -- new data
3629         update (2) ,              -- update by author
3630         revision (3) ,            -- 3rd party (non-author) update
3631         other (255) } OPTIONAL ,
3632     tool VisibleString OPTIONAL,  -- tool used to make submission
3633     user-tag VisibleString OPTIONAL, -- user supplied id for this submission
3634     comment VisibleString OPTIONAL } -- user comments/advice to database
3635 
3636 Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
3637     name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
3638     address SEQUENCE OF VisibleString OPTIONAL ,
3639     phone VisibleString OPTIONAL ,
3640     fax VisibleString OPTIONAL ,
3641     email VisibleString OPTIONAL ,
3642     telex VisibleString OPTIONAL ,
3643     owner-id Object-id OPTIONAL ,         -- for owner accounts
3644     password OCTET STRING OPTIONAL ,
3645     last-name VisibleString OPTIONAL ,  -- structured to replace name above
3646     first-name VisibleString OPTIONAL ,
3647     middle-initial VisibleString OPTIONAL ,
3648     contact Author OPTIONAL }           -- WARNING: this will replace the above
3649 
3650 END
3651 
3652 --$Revision: 1.15 $
3653 --**********************************************************************
3654 --
3655 --  Definitions for Cn3D-specific data (rendering settings,
3656 --    user annotations, etc.)
3657 --
3658 --  by Paul Thiessen
3659 --
3660 --  National Center for Biotechnology Information
3661 --  National Institutes of Health
3662 --  Bethesda, MD 20894 USA
3663 --
3664 -- asntool -m cn3d.asn -w 100 -o cn3d.h
3665 -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
3666 --   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
3667 --**********************************************************************
3668 
3669 NCBI-Cn3d DEFINITIONS ::=
3670 -- Cn3D-specific information
3671 
3672 BEGIN
3673 
3674 EXPORTS  Cn3d-style-dictionary, Cn3d-user-annotations;
3675 
3676 IMPORTS  Biostruc-id FROM MMDB
3677          Molecule-id, Residue-id FROM MMDB-Chemical-graph;
3678 
3679 
3680 -- values of enumerations must match those in cn3d/style_manager.hpp!
3681 
3682 Cn3d-backbone-type ::= ENUMERATED {     -- for different types of backbones
3683     off (1),
3684     trace (2),
3685     partial (3),
3686     complete (4)
3687 }
3688 
3689 Cn3d-drawing-style ::= ENUMERATED {     -- atom/bond/object rendering styles
3690     -- for atoms and bonds
3691     wire (1),
3692     tubes (2),
3693     ball-and-stick (3),
3694     space-fill (4),
3695     wire-worm (5),
3696     tube-worm (6),
3697     -- for 3d-objects
3698     with-arrows (7),
3699     without-arrows (8)
3700 }
3701 
3702 Cn3d-color-scheme ::= ENUMERATED {  -- available color schemes (not all
3703                                     -- necessarily applicable to all objects)
3704     element (1),
3705     object (2),
3706     molecule (3),
3707     domain (4),
3708     residue (20),
3709     secondary-structure (5),
3710     user-select (6),
3711     -- different alignment conservation coloring (currently only for proteins)
3712     aligned (7),
3713     identity (8),
3714     variety (9),
3715     weighted-variety (10),
3716     information-content (11),
3717     fit (12),
3718     block-fit (17),
3719     block-z-fit (18),
3720     block-row-fit (19),
3721     -- other schemes
3722     temperature (13),
3723     hydrophobicity (14),
3724     charge (15),
3725     rainbow (16)
3726 }
3727 
3728 -- RGB triplet, interpreted (after division by the scale-factor) as floating
3729 -- point values which should range from [0..1]. The default scale-factor is
3730 -- 255, so that one can conveniently set integer byte values [0..255] for
3731 -- colors with the scale-factor already set appropriately to map to [0..1].
3732 --    An alpha value is allowed, but is currently ignored by Cn3D.
3733 Cn3d-color ::= SEQUENCE {
3734     scale-factor INTEGER DEFAULT 255,
3735     red INTEGER,
3736     green INTEGER,
3737     blue INTEGER,
3738     alpha INTEGER DEFAULT 255
3739 }
3740 
3741 Cn3d-backbone-style ::= SEQUENCE {  -- style blob for backbones only
3742     type Cn3d-backbone-type,
3743     style Cn3d-drawing-style,
3744     color-scheme Cn3d-color-scheme,
3745     user-color Cn3d-color
3746 }
3747 
3748 Cn3d-general-style ::= SEQUENCE {   -- style blob for other objects
3749     is-on BOOLEAN,
3750     style Cn3d-drawing-style,
3751     color-scheme Cn3d-color-scheme,
3752     user-color Cn3d-color
3753 }
3754 
3755 Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
3756     spacing INTEGER,        -- zero means none
3757     type ENUMERATED {
3758         one-letter (1),
3759         three-letter (2)
3760     },
3761     number ENUMERATED {
3762         none (0),
3763         sequential (1),     -- from 1, by residues present, to match sequence
3764         pdb (2)             -- use number assigned by PDB
3765     },
3766     termini BOOLEAN,
3767     white BOOLEAN           -- all white, or (if false) color of alpha carbon
3768 }
3769 
3770 -- rendering settings for Cn3D (mirrors StyleSettings class)
3771 Cn3d-style-settings ::= SEQUENCE {
3772     name VisibleString OPTIONAL,                -- a name (for favorites)
3773     protein-backbone Cn3d-backbone-style,       -- backbone styles
3774     nucleotide-backbone Cn3d-backbone-style,
3775     protein-sidechains Cn3d-general-style,      -- styles for other stuff
3776     nucleotide-sidechains Cn3d-general-style,
3777     heterogens Cn3d-general-style,
3778     solvents Cn3d-general-style,
3779     connections Cn3d-general-style,
3780     helix-objects Cn3d-general-style,
3781     strand-objects Cn3d-general-style,
3782     virtual-disulfides-on BOOLEAN,              -- virtual disulfides
3783     virtual-disulfide-color Cn3d-color,
3784     hydrogens-on BOOLEAN,                       -- hydrogens
3785     background-color Cn3d-color,                -- background
3786     -- floating point parameters - scale-factor applies to all the following:
3787     scale-factor INTEGER,
3788     space-fill-proportion INTEGER,
3789     ball-radius INTEGER,
3790     stick-radius INTEGER,
3791     tube-radius INTEGER,
3792     tube-worm-radius INTEGER,
3793     helix-radius INTEGER,
3794     strand-width INTEGER,
3795     strand-thickness INTEGER,
3796     -- backbone labels (no labels if not present)
3797     protein-labels Cn3d-backbone-label-style OPTIONAL,
3798     nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
3799     -- ion labels
3800     ion-labels BOOLEAN OPTIONAL
3801 }
3802 
3803 Cn3d-style-settings-set ::= SET OF Cn3d-style-settings
3804 
3805 Cn3d-style-table-id ::= INTEGER
3806 
3807 Cn3d-style-table-item ::= SEQUENCE {
3808     id Cn3d-style-table-id,
3809     style Cn3d-style-settings
3810 }
3811 
3812 -- the global settings, and a lookup table of styles for user annotations.
3813 Cn3d-style-dictionary ::= SEQUENCE {
3814     global-style Cn3d-style-settings,
3815     style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
3816 }
3817 
3818 -- a range of residues in a chain, identified by MMDB residue-id
3819 -- (e.g., numbered from 1)
3820 Cn3d-residue-range ::= SEQUENCE {
3821     from Residue-id,
3822     to Residue-id
3823 }
3824 
3825 -- set of locations on a particular chain
3826 Cn3d-molecule-location ::= SEQUENCE {
3827     molecule-id Molecule-id,    -- MMDB molecule id
3828     -- which residues; whole molecule implied if absent
3829     residues SEQUENCE OF Cn3d-residue-range OPTIONAL
3830 }
3831 
3832 -- set of locations on a particular structure object (e.g., a PDB/MMDB
3833 -- structure), which may include multiple ranges of residues each on
3834 -- multiple chains.
3835 Cn3d-object-location ::= SEQUENCE {
3836     structure-id Biostruc-id,
3837     residues SEQUENCE OF Cn3d-molecule-location
3838 }
3839 
3840 -- information for an individual user annotation
3841 Cn3d-user-annotation ::= SEQUENCE {
3842     name VisibleString,                 -- a (short) name for this annotation
3843     description VisibleString OPTIONAL, -- an optional longer description
3844     style-id Cn3d-style-table-id,       -- how to draw this annotation
3845     residues SEQUENCE OF Cn3d-object-location,  -- which residues to cover
3846     is-on BOOLEAN   -- whether this annotation is to be turned on in Cn3D
3847 }
3848 
3849 -- a GL-ordered transformation matrix
3850 Cn3d-GL-matrix ::= SEQUENCE {
3851     m0  REAL, m1  REAL, m2  REAL, m3  REAL,
3852     m4  REAL, m5  REAL, m6  REAL, m7  REAL,
3853     m8  REAL, m9  REAL, m10 REAL, m11 REAL,
3854     m12 REAL, m13 REAL, m14 REAL, m15 REAL
3855 }
3856 
3857 -- a floating point 3d vector
3858 Cn3d-vector ::= SEQUENCE {
3859     x REAL,
3860     y REAL,
3861     z REAL
3862 }
3863 
3864 -- parameters used to set up the camera in Cn3D
3865 Cn3d-view-settings ::= SEQUENCE {
3866     camera-distance REAL,       -- camera on +Z axis this distance from origin
3867     camera-angle-rad REAL,      -- camera angle
3868     camera-look-at-X REAL,      -- X,Y of point in Z=0 plane camera points at
3869     camera-look-at-Y REAL,
3870     camera-clip-near REAL,      -- distance of clipping planes from camera
3871     camera-clip-far REAL,
3872     matrix Cn3d-GL-matrix,      -- transformation of objects in the scene
3873     rotation-center Cn3d-vector -- center of rotation of whole scene
3874 }
3875 
3876 -- The list of annotations for a given CDD/mime. If residue regions overlap
3877 -- between annotations that are turned on, the last annotation in this list
3878 -- that contains these residues will be used as the display style for these
3879 -- residues.
3880 --   Also contains the current viewpoint, so that user's camera angle
3881 -- can be stored and reproduced, for illustrations, on-line figures, etc.
3882 Cn3d-user-annotations ::= SEQUENCE {
3883     annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
3884     view Cn3d-view-settings OPTIONAL
3885 }
3886 
3887 END
3888 
3889 --$Revision: 6.3 $
3890 --****************************************************************
3891 --
3892 --  NCBI Project Definition Module
3893 --  by Jim Ostell and Jonathan Kans, 1998
3894 --
3895 --****************************************************************
3896 
3897 NCBI-Project DEFINITIONS ::=
3898 BEGIN
3899 
3900 EXPORTS Project, Project-item;
3901 
3902 IMPORTS Date FROM NCBI-General
3903         PubMedId FROM NCBI-Biblio
3904         Seq-id, Seq-loc FROM NCBI-Seqloc
3905         Seq-annot, Pubdesc FROM NCBI-Sequence
3906         Seq-entry FROM NCBI-Seqset
3907         Pubmed-entry FROM NCBI-PubMed;
3908 
3909 Project ::= SEQUENCE {
3910     descr Project-descr OPTIONAL ,
3911     data Project-item }
3912 
3913 Project-item ::= CHOICE {
3914     pmuid SET OF INTEGER ,
3915     protuid SET OF INTEGER ,
3916     nucuid SET OF INTEGER ,
3917     sequid SET OF INTEGER ,
3918     genomeuid SET OF INTEGER ,
3919     structuid SET OF INTEGER ,
3920     pmid SET OF PubMedId ,
3921     protid SET OF Seq-id ,
3922     nucid SET OF Seq-id ,
3923     seqid SET OF Seq-id ,
3924     genomeid SET OF Seq-id ,
3925     structid NULL ,
3926     pment SET OF Pubmed-entry ,
3927     protent SET OF Seq-entry ,
3928     nucent SET OF Seq-entry ,
3929     seqent SET OF Seq-entry ,
3930     genomeent SET OF Seq-entry ,
3931     structent NULL ,
3932     seqannot SET OF Seq-annot ,
3933     loc SET OF Seq-loc ,
3934     proj SET OF Project
3935 }
3936 
3937 Project-descr ::= SEQUENCE {
3938     id SET OF Project-id ,
3939     name VisibleString OPTIONAL ,
3940     descr SET OF Projdesc OPTIONAL }
3941 
3942 Projdesc ::= CHOICE {
3943     pub Pubdesc ,
3944     date Date ,
3945     comment VisibleString ,
3946     title VisibleString
3947 }
3948 
3949 Project-id ::= VisibleString
3950 
3951 END
3952 
3953 
3954 --$Revision: 6.0 $
3955 --*********************************************************************
3956 --
3957 --  access.asn
3958 --
3959 --     messages for data access
3960 --
3961 --*********************************************************************
3962 
3963 NCBI-Access DEFINITIONS ::=
3964 BEGIN
3965 
3966 EXPORTS Link-set;
3967 
3968     -- links between same class = neighbors
3969     -- links between other classes = links
3970 
3971 Link-set ::= SEQUENCE {
3972     num INTEGER ,                         -- number of links to this doc type
3973     uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
3974     weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights
3975 
3976 
3977 END
3978 --$Revision: 6.0 $
3979 --**********************************************************************
3980 --
3981 --  NCBI Sequence Feature Definition Module
3982 --  by James Ostell, 1994
3983 --
3984 --**********************************************************************
3985 
3986 NCBI-FeatDef DEFINITIONS ::=
3987 BEGIN
3988 
3989 EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
3990 
3991 
3992 FeatDef ::= SEQUENCE {
3993     typelabel VisibleString ,      -- short label for type eg "CDS"
3994     menulabel VisibleString ,      -- label for a menu eg "Coding Region"
3995     featdef-key INTEGER ,                  -- unique for this feature definition
3996     seqfeat-key INTEGER ,                  -- SeqFeat.data.choice from objfeat.h
3997     entrygroup INTEGER ,                   -- Group for data entry
3998     displaygroup INTEGER ,                 -- Group for data display
3999     molgroup FeatMolType           -- Type of Molecule used for
4000 }
4001 
4002 FeatMolType ::= ENUMERATED {
4003         aa (1),  -- proteins
4004     na (2),  -- nucleic acids
4005     both (3) }  -- both
4006 
4007 FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions
4008 
4009 FeatDispGroup ::= SEQUENCE {
4010         groupkey INTEGER ,
4011     groupname VisibleString }
4012 
4013 FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
4014 
4015 FeatDefGroupSet ::= SEQUENCE {
4016         groups FeatDispGroupSet ,
4017         defs FeatDefSet }
4018 
4019 END
4020 
4021     
4022 --$Revision: 6.12 $
4023 --****************************************************************
4024 --
4025 --  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
4026 --  by Jonathan Epstein, February 1996
4027 --
4028 --****************************************************************
4029 
4030 NCBI-Mime DEFINITIONS ::=
4031 BEGIN
4032 
4033 EXPORTS Ncbi-mime-asn1;
4034 IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
4035     Cdd FROM NCBI-Cdd
4036         Seq-entry FROM NCBI-Seqset
4037         Seq-annot FROM NCBI-Sequence
4038     Medline-entry FROM NCBI-Medline
4039     Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;
4040 
4041 Ncbi-mime-asn1 ::= CHOICE {
4042         entrez  Entrez-general,                 -- just a structure
4043     alignstruc  Biostruc-align,     -- structures & sequences & alignments
4044         alignseq        Biostruc-align-seq,     -- sequence alignment
4045     strucseq    Biostruc-seq,       -- structure & sequences
4046     strucseqs   Biostruc-seqs,      -- structure & sequences & alignments
4047     general     Biostruc-seqs-aligns-cdd    -- all-purpose "grab bag"
4048         -- others may be added here in the future
4049 }
4050 
4051 -- generic bundle of sequence and alignment info
4052 Bundle-seqs-aligns ::= SEQUENCE {
4053     sequences SET OF Seq-entry OPTIONAL,        -- sequences
4054     seqaligns SET OF Seq-annot OPTIONAL,        -- sequence alignments
4055     strucaligns Biostruc-annot-set OPTIONAL,    -- structure alignments
4056     imports SET OF Seq-annot OPTIONAL,          -- imports (updates in Cn3D)
4057     style-dictionary Cn3d-style-dictionary OPTIONAL,    -- Cn3D stuff
4058     user-annotations Cn3d-user-annotations OPTIONAL
4059 }
4060 
4061 Biostruc-seqs-aligns-cdd ::= SEQUENCE {
4062     seq-align-data CHOICE {
4063         bundle Bundle-seqs-aligns,          -- either seqs + alignments
4064         cdd Cdd                             -- or CDD (which contains these)
4065     },
4066     structures SET OF Biostruc OPTIONAL,    -- structures
4067     structure-type ENUMERATED {             -- type of structures to load if
4068         ncbi-backbone(2),                   -- not present; meanings and
4069         ncbi-all-atom(3),                   -- values are same as MMDB's
4070         pdb-model(4)                        -- Model-type
4071     } OPTIONAL
4072 }
4073 
4074 Biostruc-align ::= SEQUENCE {
4075         master  Biostruc,
4076         slaves  SET OF Biostruc,
4077         alignments      Biostruc-annot-set,     -- structure alignments
4078         sequences SET OF Seq-entry,     -- sequences
4079         seqalign SET OF Seq-annot,
4080         style-dictionary Cn3d-style-dictionary OPTIONAL,
4081         user-annotations Cn3d-user-annotations OPTIONAL
4082 }
4083 
4084 Biostruc-align-seq ::= SEQUENCE {       -- display seq structure align only
4085         sequences SET OF Seq-entry,     -- sequences
4086         seqalign SET OF Seq-annot,
4087         style-dictionary Cn3d-style-dictionary OPTIONAL,
4088         user-annotations Cn3d-user-annotations OPTIONAL
4089 }
4090 
4091 Biostruc-seq ::= SEQUENCE {     -- display  structure seq added by yanli
4092         structure Biostruc,
4093         sequences SET OF Seq-entry,
4094         style-dictionary Cn3d-style-dictionary OPTIONAL,
4095         user-annotations Cn3d-user-annotations OPTIONAL
4096 }
4097 
4098 Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
4099         structure Biostruc,
4100         sequences SET OF Seq-entry,     -- sequences
4101         seqalign SET OF Seq-annot,
4102         style-dictionary Cn3d-style-dictionary OPTIONAL,
4103         user-annotations Cn3d-user-annotations OPTIONAL
4104 }
4105 
4106 Entrez-style ::= ENUMERATED {
4107         docsum (1),
4108         genbank (2) ,
4109         genpept (3) ,
4110         fasta (4) ,
4111         asn1 (5) ,
4112         graphic (6) ,
4113         alignment (7) ,
4114         globalview (8) ,
4115         report (9) ,
4116         medlars (10) ,
4117         embl (11) ,
4118         pdb (12) ,
4119         kinemage (13) }
4120 
4121 Entrez-general ::= SEQUENCE {
4122         title VisibleString OPTIONAL,
4123         data CHOICE {
4124                 ml      Medline-entry ,
4125                 prot    Seq-entry ,
4126                 nuc     Seq-entry ,
4127                 genome  Seq-entry ,
4128                 structure Biostruc ,
4129                 strucAnnot Biostruc-annot-set } ,
4130         style Entrez-style ,
4131         location VisibleString OPTIONAL }
4132 END
4133 --$Revision: 6.0 $
4134 --********************************************************************
4135 --
4136 --  Print Templates
4137 --  James Ostell, 1993
4138 --
4139 --
4140 --********************************************************************
4141 
4142 NCBI-ObjPrt DEFINITIONS ::=
4143 BEGIN
4144 
4145 EXPORTS PrintTemplate, PrintTemplateSet;
4146 
4147 PrintTemplate ::= SEQUENCE {
4148     name TemplateName ,  -- name for this template
4149     labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
4150     format PrintFormat }
4151 
4152 TemplateName ::= VisibleString
4153 
4154 PrintTemplateSet ::= SEQUENCE OF PrintTemplate
4155 
4156 PrintFormat ::= SEQUENCE {
4157     asn1 VisibleString ,    -- ASN.1 partial path for this
4158     label VisibleString OPTIONAL ,   -- printable label
4159     prefix VisibleString OPTIONAL,
4160     suffix VisibleString OPTIONAL,
4161     form PrintForm }
4162 
4163 PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
4164     block PrintFormBlock,
4165     boolean PrintFormBoolean,
4166     enum PrintFormEnum,
4167     text PrintFormText,
4168     use-template TemplateName,
4169     user UserFormat ,
4170     null NULL }               -- rarely used
4171 
4172 UserFormat ::= SEQUENCE {
4173     printfunc VisibleString ,
4174     defaultfunc VisibleString OPTIONAL }
4175 
4176 PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
4177     separator VisibleString OPTIONAL ,
4178     components SEQUENCE OF PrintFormat }
4179 
4180 PrintFormBoolean ::= SEQUENCE {
4181     true VisibleString OPTIONAL ,
4182     false VisibleString OPTIONAL }
4183 
4184 PrintFormEnum ::= SEQUENCE {
4185     values SEQUENCE OF VisibleString OPTIONAL }
4186 
4187 PrintFormText ::= SEQUENCE {
4188     textfunc VisibleString OPTIONAL }
4189     
4190 END
4191 
4192 --$Revision: 6.10 $
4193 --*********************************************************
4194 --
4195 -- ASN.1 and XML for the components of a GenBank format sequence
4196 -- J.Ostell 2002
4197 -- Updated 25 May 2010
4198 --
4199 --*********************************************************
4200 
4201 NCBI-GBSeq DEFINITIONS ::=
4202 BEGIN
4203 
4204 --********
4205 --  GBSeq represents the elements in a GenBank style report
4206 --    of a sequence with some small additions to structure and support
4207 --    for protein (GenPept) versions of GenBank format as seen in
4208 --    Entrez. While this represents the simplification, reduction of
4209 --    detail, and flattening to a single sequence perspective of GenBank
4210 --    format (compared with the full ASN.1 or XML from which GenBank and
4211 --    this format is derived at NCBI), it is presented in ASN.1 or XML for
4212 --    automated parsing and processing. It is hoped that this compromise
4213 --    will be useful for those bulk processing at the GenBank format level
4214 --    of detail today. Since it is a compromise, a number of pragmatic
4215 --    decisions have been made.
4216 --
4217 --  In pursuit of simplicity and familiarity a number of
4218 --    fields do not have full substructure defined here where there is
4219 --    already a standard GenBank format string. For example:
4220 --
4221 --   Date  DD-Mon-YYYY
4222 --   Authors   LastName, Intials (with periods)
4223 --   Journal   JounalName Volume (issue), page-range (year)
4224 --   FeatureLocations as per GenBank feature table, but FeatureIntervals
4225 --    may also be provided as a convenience
4226 --   FeatureQualifiers  as per GenBank feature table
4227 --   Primary has a string that represents a table to construct
4228 --    a third party (TPA) sequence.
4229 --   other-seqids can have strings with the "vertical bar format" sequence
4230 --    identifiers used in BLAST for example, when they are non-genbank types.
4231 --    Currently in GenBank format you only see GI, but there are others, like
4232 --    patents, submitter clone names, etc which will appear here, as they
4233 --    always have in the ASN.1 format, and full XML format.
4234 --   source-db is a formatted text block for peptides in GenPept format that
4235 --    carries information from the source protein database.
4236 --
4237 --  There are also a number of elements that could have been
4238 --   more exactly specified, but in the interest of simplicity
4239 --   have been simply left as options. For example..
4240 --
4241 --  accession and accession.version will always appear in a GenBank record
4242 --   they are optional because this format can also be used for non-GenBank
4243 --   sequences, and in that case will have only "other-seqids".
4244 --
4245 --  sequences will normally all have "sequence" filled in. But contig records
4246 --    will have a "join" statement in the "contig" slot, and no "sequence".
4247 --    We also may consider a retrieval option with no sequence of any kind
4248 --     and no feature table to quickly check minimal values.
4249 --
4250 --  a reference may have an author list, or be from a consortium, or both.
4251 --
4252 --  some fields, such as taxonomy, do appear as separate elements in GenBank
4253 --    format but without a specific linetype (in GenBank format this comes
4254 --    under ORGANISM). Another example is the separation of primary accession
4255 --    from the list of secondary accessions. In GenBank format primary
4256 --    accession is just the first one on the list that includes all secondaries
4257 --    after it.
4258 --
4259 --  create-date deserves special comment. The date you see on the right hand
4260 --    side of the LOCUS line in GenBank format is actually the last date the
4261 --    the record was modified (or the update-date). The date the record was
4262 --    first submitted to GenBank appears in the first submission citation in
4263 --    the reference section. Internally in the databases and ASN.1 NCBI keeps
4264 --    the first date the record was released into the sequence database at
4265 --    NCBI as create-date. For records from EMBL, which supports create-date,
4266 --    it is the date provided by EMBL. For DDBJ records, which do not supply
4267 --    a create-date (same as GenBank format) the create-date is the first date
4268 --    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
4269 --    took responsibility for GenBank, it is just the first date NCBI saw the
4270 --    record. Create-date can be very useful, so we expose it here, but users
4271 --    must understand it is only an approximation and comes from many sources,
4272 --    and with many exceptions and caveats. It does NOT tell you the first
4273 --    date the public might have seen this record and thus is NOT an accurate
4274 --    measure for legal issues of precedence.
4275 --
4276 --********
4277 
4278 GBSet ::= SEQUENCE OF GBSeq
4279         
4280 GBSeq ::= SEQUENCE {
4281     locus VisibleString ,
4282     length INTEGER ,
4283     strandedness VisibleString OPTIONAL ,
4284     moltype VisibleString ,
4285     topology VisibleString OPTIONAL ,
4286     division VisibleString ,
4287     update-date VisibleString ,
4288     create-date VisibleString OPTIONAL ,
4289     update-release VisibleString OPTIONAL ,
4290     create-release VisibleString OPTIONAL ,
4291     definition VisibleString ,
4292     primary-accession VisibleString OPTIONAL ,
4293     entry-version VisibleString OPTIONAL ,
4294     accession-version VisibleString OPTIONAL ,
4295     other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
4296     secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
4297     project VisibleString OPTIONAL ,
4298     keywords SEQUENCE OF GBKeyword OPTIONAL ,
4299     segment VisibleString OPTIONAL ,
4300     source VisibleString OPTIONAL ,
4301     organism VisibleString OPTIONAL ,
4302     taxonomy VisibleString OPTIONAL ,
4303     references SEQUENCE OF GBReference OPTIONAL ,
4304     comment VisibleString OPTIONAL ,
4305     comment-set SEQUENCE OF GBComment OPTIONAL ,
4306     struc-comments SEQUENCE OF GBStrucComment OPTIONAL ,
4307     primary VisibleString OPTIONAL ,
4308     source-db VisibleString OPTIONAL ,
4309     database-reference VisibleString OPTIONAL ,
4310     feature-table SEQUENCE OF GBFeature OPTIONAL ,
4311     feature-set SEQUENCE OF GBFeatureSet OPTIONAL ,
4312     sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
4313     contig VisibleString OPTIONAL ,
4314     alt-seq SEQUENCE OF GBAltSeqData OPTIONAL
4315 }
4316 
4317 GBSeqid ::= VisibleString
4318 
4319 GBSecondary-accn ::= VisibleString
4320 
4321 GBKeyword ::= VisibleString
4322 
4323 GBReference ::= SEQUENCE {
4324     reference VisibleString ,
4325     position VisibleString OPTIONAL ,
4326     authors SEQUENCE OF GBAuthor OPTIONAL ,
4327     consortium VisibleString OPTIONAL ,
4328     title VisibleString OPTIONAL ,
4329     journal VisibleString ,
4330     xref SEQUENCE OF GBXref OPTIONAL ,
4331     pubmed INTEGER OPTIONAL ,
4332     remark VisibleString OPTIONAL
4333 }
4334 
4335 GBAuthor ::= VisibleString
4336 
4337 GBXref ::= SEQUENCE {
4338     dbname VisibleString ,
4339     id VisibleString
4340 }
4341 
4342 GBComment ::= SEQUENCE {
4343     type VisibleString OPTIONAL ,
4344     paragraphs SEQUENCE OF GBCommentParagraph
4345 }
4346 
4347 GBCommentParagraph ::= SEQUENCE {
4348     items SEQUENCE OF GBCommentItem
4349 }
4350 
4351 GBCommentItem ::= SEQUENCE {
4352     value VisibleString OPTIONAL ,
4353     url VisibleString OPTIONAL
4354 }
4355 
4356 GBStrucComment ::= SEQUENCE {
4357     name VisibleString OPTIONAL ,
4358     items SEQUENCE OF GBStrucCommentItem
4359 }
4360 
4361 GBStrucCommentItem ::= SEQUENCE {
4362     tag VisibleString OPTIONAL ,
4363     value VisibleString OPTIONAL ,
4364     url VisibleString OPTIONAL
4365 }
4366 
4367 GBFeatureSet ::= SEQUENCE {
4368     annot-source VisibleString OPTIONAL ,
4369     features SEQUENCE OF GBFeature
4370 }
4371 
4372 GBFeature ::= SEQUENCE {
4373     key VisibleString ,
4374     location VisibleString ,
4375     intervals SEQUENCE OF GBInterval OPTIONAL ,
4376     operator VisibleString OPTIONAL ,
4377     partial5 BOOLEAN OPTIONAL ,
4378     partial3 BOOLEAN OPTIONAL ,
4379     quals SEQUENCE OF GBQualifier OPTIONAL ,
4380     xrefs SEQUENCE OF GBXref OPTIONAL
4381 }
4382 
4383 GBInterval ::= SEQUENCE {
4384     from INTEGER OPTIONAL ,
4385     to INTEGER OPTIONAL ,
4386     point INTEGER OPTIONAL ,
4387     iscomp BOOLEAN OPTIONAL ,
4388     interbp BOOLEAN OPTIONAL ,
4389     accession VisibleString
4390 }
4391 
4392 GBQualifier ::= SEQUENCE {
4393     name VisibleString ,
4394     value VisibleString OPTIONAL
4395 }
4396 
4397 GBAltSeqData ::= SEQUENCE {
4398     name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
4399     items SEQUENCE OF GBAltSeqItem OPTIONAL
4400 }
4401 
4402 GBAltSeqItem ::= SEQUENCE {
4403     interval GBInterval OPTIONAL ,
4404     isgap BOOLEAN OPTIONAL ,
4405     gap-length INTEGER OPTIONAL ,
4406     gap-type VisibleString OPTIONAL ,
4407     gap-linkage VisibleString OPTIONAL ,
4408     gap-comment VisibleString OPTIONAL ,
4409     first-accn VisibleString OPTIONAL ,
4410     last-accn VisibleString OPTIONAL ,
4411     value VisibleString OPTIONAL
4412 }
4413 
4414 END
4415 
4416 --$Revision: 1.8 $
4417 --************************************************************************
4418 --
4419 -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
4420 -- The International Nucleotide Sequence Database (INSD) collaboration
4421 -- Version 1.6, 25 May 2010
4422 --
4423 --************************************************************************
4424 
4425 INSD-INSDSeq DEFINITIONS ::=
4426 BEGIN
4427 
4428 --  INSDSeq provides the elements of a sequence as presented in the
4429 --    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
4430 --    additional structure.
4431 --    Although this single perspective of the three flatfile formats
4432 --    provides a useful simplification, it hides to some extent the
4433 --    details of the actual data underlying those formats. Nevertheless,
4434 --    the XML version of INSD-Seq is being provided with
4435 --    the hopes that it will prove useful to those who bulk-process
4436 --    sequence data at the flatfile-format level of detail. Further 
4437 --    documentation regarding the content and conventions of those formats 
4438 --    can be found at:
4439 --
4440 --    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
4441 --    http://www.ddbj.nig.ac.jp/FT/full_index.html
4442 --    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
4443 --    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
4444 --
4445 --    URLs for DDBJ, EMBL, and GenBank Release Notes :
4446 --    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
4447 --    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
4448 --    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
4449 --
4450 --    Because INSDSeq is a compromise, a number of pragmatic decisions have
4451 --    been made:
4452 --
4453 --  In pursuit of simplicity and familiarity a number of fields do not
4454 --    have full substructure defined here where there is already a
4455 --    standard flatfile format string. For example:
4456 --
4457 --   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
4458 --
4459 --   Author:     LastName, Initials  (eg Smith, J.N.)
4460 --            or Lastname Initials   (eg Smith J.N.)
4461 --
4462 --   Journal:    JournalName Volume (issue), page-range (year)
4463 --            or JournalName Volume(issue):page-range(year)
4464 --            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
4465 --               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
4466 --
4467 --  FeatureLocations are representated as in the flatfile feature table,
4468 --    but FeatureIntervals may also be provided as a convenience
4469 --
4470 --  FeatureQualifiers are represented as in the flatfile feature table.
4471 --
4472 --  Primary has a string that represents a table to construct
4473 --    a third party (TPA) sequence.
4474 --
4475 --  other-seqids can have strings with the "vertical bar format" sequence
4476 --    identifiers used in BLAST for example, when they are non-INSD types.
4477 --
4478 --  Currently in flatfile format you only see Accession numbers, but there 
4479 --    are others, like patents, submitter clone names, etc which will 
4480 --    appear here
4481 --
4482 --  There are also a number of elements that could have been more exactly
4483 --    specified, but in the interest of simplicity have been simply left as
4484 --    optional. For example:
4485 --
4486 --  All publicly accessible sequence records in INSDSeq format will
4487 --    include accession and accession.version. However, these elements are 
4488 --    optional in optional in INSDSeq so that this format can also be used   
4489 --    for non-public sequence data, prior to the assignment of accessions and 
4490 --    version numbers. In such cases, records will have only "other-seqids".
4491 --
4492 --  sequences will normally all have "sequence" filled in. But contig records
4493 --    will have a "join" statement in the "contig" slot, and no "sequence".
4494 --    We also may consider a retrieval option with no sequence of any kind
4495 --    and no feature table to quickly check minimal values.
4496 --
4497 --  Four (optional) elements are specific to records represented via the EMBL
4498 --    sequence database: INSDSeq_update-release, INSDSeq_create-release,
4499 --    INSDSeq_entry-version, and INSDSeq_database-reference.
4500 --
4501 --  One (optional) element is specific to records originating at the GenBank
4502 --    and DDBJ sequence databases: INSDSeq_segment.
4503 --
4504 --********
4505 
4506 INSDSet ::= SEQUENCE OF INSDSeq
4507 
4508 INSDSeq ::= SEQUENCE {
4509     locus VisibleString ,
4510     length INTEGER ,
4511     strandedness VisibleString OPTIONAL ,
4512     moltype VisibleString ,
4513     topology VisibleString OPTIONAL ,
4514     division VisibleString ,
4515     update-date VisibleString ,
4516     create-date VisibleString OPTIONAL ,
4517     update-release VisibleString OPTIONAL ,
4518     create-release VisibleString OPTIONAL ,
4519     definition VisibleString ,
4520     primary-accession VisibleString OPTIONAL ,
4521     entry-version VisibleString OPTIONAL ,
4522     accession-version VisibleString OPTIONAL ,
4523     other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
4524     secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
4525     project VisibleString OPTIONAL ,
4526     keywords SEQUENCE OF INSDKeyword OPTIONAL ,
4527     segment VisibleString OPTIONAL ,
4528     source VisibleString OPTIONAL ,
4529     organism VisibleString OPTIONAL ,
4530     taxonomy VisibleString OPTIONAL ,
4531     references SEQUENCE OF INSDReference OPTIONAL ,
4532     comment VisibleString OPTIONAL ,
4533     comment-set SEQUENCE OF INSDComment OPTIONAL ,
4534     struc-comments SEQUENCE OF INSDStrucComment OPTIONAL ,
4535     primary VisibleString OPTIONAL ,
4536     source-db VisibleString OPTIONAL ,
4537     database-reference VisibleString OPTIONAL ,
4538     feature-table SEQUENCE OF INSDFeature OPTIONAL ,
4539     feature-set SEQUENCE OF INSDFeatureSet OPTIONAL ,
4540     sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
4541     contig VisibleString OPTIONAL ,
4542     alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL
4543 }
4544 
4545 INSDSeqid ::= VisibleString
4546 
4547 INSDSecondary-accn ::= VisibleString
4548 
4549 INSDKeyword ::= VisibleString
4550 
4551 -- INSDReference_position contains a string value indicating the
4552 -- basepair span(s) to which a reference applies. The allowable
4553 -- formats are:
4554 -- 
4555 --   X..Y  : Where X and Y are integers separated by two periods,
4556 --           X >= 1 , Y <= sequence length, and X <= Y 
4557 --
4558 --           Multiple basepair spans can exist, separated by a
4559 --           semi-colon and a space. For example : 10..20; 100..500
4560 --             
4561 --   sites : The string literal 'sites', indicating that a reference
4562 --           provides sequence annotation information, but the specific
4563 --           basepair spans are either not captured, or were too numerous
4564 --           to record.
4565 -- 
4566 --           The 'sites' literal string is singly occuring, and
4567 --            cannot be used in conjunction with any X..Y basepair spans.
4568 -- 
4569 --   References that lack an INSDReference_position element apply
4570 --   to the entire sequence.
4571 
4572 INSDReference ::= SEQUENCE {
4573     reference VisibleString ,
4574     position VisibleString OPTIONAL ,
4575     authors SEQUENCE OF INSDAuthor OPTIONAL ,
4576     consortium VisibleString OPTIONAL ,
4577     title VisibleString OPTIONAL ,
4578     journal VisibleString ,
4579     xref SEQUENCE OF INSDXref OPTIONAL ,
4580     pubmed INTEGER OPTIONAL ,
4581     remark VisibleString OPTIONAL
4582 }
4583 
4584 INSDAuthor ::= VisibleString
4585 
4586 -- INSDXref provides a method for referring to records in
4587 -- other databases. INSDXref_dbname is a string value that
4588 -- provides the name of the database, and INSDXref_dbname
4589 -- is a string value that provides the record's identifier
4590 -- in that database.
4591 
4592 INSDXref ::= SEQUENCE {
4593     dbname VisibleString ,
4594     id VisibleString
4595 }
4596 
4597 INSDComment ::= SEQUENCE {
4598     type VisibleString OPTIONAL ,
4599     paragraphs SEQUENCE OF INSDCommentParagraph
4600 }
4601 
4602 INSDCommentParagraph ::= SEQUENCE {
4603     items SEQUENCE OF INSDCommentItem
4604 }
4605 
4606 INSDCommentItem ::= SEQUENCE {
4607     value VisibleString OPTIONAL ,
4608     url VisibleString OPTIONAL
4609 }
4610 
4611 INSDStrucComment ::= SEQUENCE {
4612     name VisibleString OPTIONAL ,
4613     items SEQUENCE OF INSDStrucCommentItem
4614 }
4615 
4616 INSDStrucCommentItem ::= SEQUENCE {
4617     tag VisibleString OPTIONAL ,
4618     value VisibleString OPTIONAL ,
4619     url VisibleString OPTIONAL
4620 }
4621 
4622 -- INSDFeature_operator contains a string value describing
4623 -- the relationship among a set of INSDInterval within
4624 -- INSDFeature_intervals. The allowable formats are:
4625 -- 
4626 --   join :  The string literal 'join' indicates that the
4627 --           INSDInterval intervals are biologically joined
4628 --           together into a contiguous molecule.
4629 -- 
4630 --   order : The string literal 'order' indicates that the
4631 --           INSDInterval intervals are in the presented
4632 --           order, but they are not necessarily contiguous.
4633 -- 
4634 --   Either 'join' or 'order' is required if INSDFeature_intervals
4635 --   is comprised of more than one INSDInterval .
4636 
4637 INSDFeatureSet ::= SEQUENCE {
4638     annot-source VisibleString OPTIONAL ,
4639     features SEQUENCE OF INSDFeature
4640 }
4641 
4642 INSDFeature ::= SEQUENCE {
4643     key VisibleString ,
4644     location VisibleString ,
4645     intervals SEQUENCE OF INSDInterval OPTIONAL ,
4646     operator VisibleString OPTIONAL ,
4647     partial5 BOOLEAN OPTIONAL ,
4648     partial3 BOOLEAN OPTIONAL ,
4649     quals SEQUENCE OF INSDQualifier OPTIONAL ,
4650     xrefs SEQUENCE OF INSDXref OPTIONAL
4651 }
4652 
4653 -- INSDInterval_iscomp is a boolean indicating whether
4654 -- an INSDInterval_from / INSDInterval_to location
4655 -- represents a location on the complement strand.
4656 -- When INSDInterval_iscomp is TRUE, it essentially
4657 -- confirms that a 'from' value which is greater than
4658 -- a 'to' value is intentional, because the location
4659 -- is on the opposite strand of the presented sequence.
4660 
4661 -- INSDInterval_interbp is a boolean indicating whether
4662 -- a feature (such as a restriction site) is located
4663 -- between two adjacent basepairs. When INSDInterval_iscomp
4664 -- is TRUE, the 'from' and 'to' values must differ by
4665 -- exactly one base.
4666 
4667 INSDInterval ::= SEQUENCE {
4668     from INTEGER OPTIONAL ,
4669     to INTEGER OPTIONAL ,
4670     point INTEGER OPTIONAL ,
4671     iscomp BOOLEAN OPTIONAL ,
4672     interbp BOOLEAN OPTIONAL ,
4673     accession VisibleString
4674 }
4675 
4676 INSDQualifier ::= SEQUENCE {
4677     name VisibleString ,
4678     value VisibleString OPTIONAL
4679 }
4680 
4681 INSDAltSeqData ::= SEQUENCE {
4682     name VisibleString ,  -- e.g., CON-division-join, WGS-contig-range,
4683                           -- WGS-scaffold-range, MGA/CAGE-range, genome
4684     items SEQUENCE OF INSDAltSeqItem OPTIONAL
4685 }
4686 
4687 INSDAltSeqItem ::= SEQUENCE {
4688     interval INSDInterval OPTIONAL ,
4689     isgap BOOLEAN OPTIONAL ,
4690     gap-length INTEGER OPTIONAL ,
4691     gap-type VisibleString OPTIONAL ,
4692     gap-linkage VisibleString OPTIONAL ,
4693     gap-comment VisibleString OPTIONAL ,
4694     first-accn VisibleString OPTIONAL ,
4695     last-accn VisibleString OPTIONAL ,
4696     value VisibleString OPTIONAL
4697 }
4698 
4699 END
4700 
4701 --$Revision: 6.1 $
4702 --**********************************************************************
4703 --
4704 --  ASN.1 for a tiny Bioseq in XML
4705 --    basically a structured FASTA file with a few extras
4706 --    in this case we drop all modularity of components
4707 --      All ids are Optional - simpler structure, less checking
4708 --      Components of organism are hard coded - can't easily add or change
4709 --      sequence is just string whether DNA or protein
4710 --  by James Ostell, 2000
4711 --
4712 --**********************************************************************
4713 
4714 NCBI-TSeq DEFINITIONS ::=
4715 BEGIN
4716 
4717 TSeq ::= SEQUENCE {
4718         seqtype ENUMERATED {
4719                 nucleotide (1),
4720                 protein (2) },
4721         gi INTEGER OPTIONAL,
4722         accver VisibleString OPTIONAL,
4723         sid VisibleString OPTIONAL,
4724         local VisibleString OPTIONAL,
4725         taxid INTEGER OPTIONAL,
4726         orgname VisibleString OPTIONAL,
4727         defline VisibleString,
4728         length INTEGER,
4729         sequence VisibleString }
4730 
4731 TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them
4732 
4733 END
4734 
4735 --$Id: scoremat.asn,v 1.14 2011/12/21 15:29:33 kazimird Exp $
4736 -- ===========================================================================
4737 --
4738 --                            PUBLIC DOMAIN NOTICE
4739 --               National Center for Biotechnology Information
4740 --
4741 --  This software/database is a "United States Government Work" under the
4742 --  terms of the United States Copyright Act.  It was written as part of
4743 --  the author's official duties as a United States Government employee and
4744 --  thus cannot be copyrighted.  This software/database is freely available
4745 --  to the public for use. The National Library of Medicine and the U.S.
4746 --  Government have not placed any restriction on its use or reproduction.
4747 --
4748 --  Although all reasonable efforts have been taken to ensure the accuracy
4749 --  and reliability of the software and data, the NLM and the U.S.
4750 --  Government do not and cannot warrant the performance or results that
4751 --  may be obtained by using this software or data. The NLM and the U.S.
4752 --  Government disclaim all warranties, express or implied, including
4753 --  warranties of performance, merchantability or fitness for any particular
4754 --  purpose.
4755 --
4756 --  Please cite the author in any work or product based on this material.
4757 --
4758 -- ===========================================================================
4759 --
4760 -- Author:  Christiam Camacho
4761 --
4762 -- File Description:
4763 --      ASN.1 definitions for scoring matrix
4764 --
4765 -- ===========================================================================
4766 
4767 NCBI-ScoreMat DEFINITIONS ::= BEGIN
4768 
4769 EXPORTS    Pssm, PssmIntermediateData, PssmFinalData, 
4770            PssmParameters, PssmWithParameters;
4771     
4772 IMPORTS    Object-id   FROM NCBI-General
4773            Seq-entry   FROM NCBI-Seqset;
4774 
4775 -- a rudimentary block/core-model, to be used with block-based alignment 
4776 -- routines and threading
4777 
4778 BlockProperty ::= SEQUENCE {
4779   type     INTEGER { unassigned  (0),
4780                      threshold   (1),       -- score threshold for heuristics
4781                      minscore    (2),       -- observed minimum score in CD
4782                      maxscore    (3),       -- observed maximum score in CD
4783                      meanscore   (4),       -- observed mean score in CD
4784                      variance    (5),       -- observed score variance
4785                      name       (10),       -- just name the block
4786                      is-optional(20),       -- block may not have to be used    
4787                      other     (255) },
4788   intvalue  INTEGER OPTIONAL,
4789   textvalue VisibleString OPTIONAL
4790 }
4791 
4792 CoreBlock ::= SEQUENCE {
4793   start          INTEGER,                   -- begin of block on query
4794   stop           INTEGER,                   -- end of block on query
4795   minstart       INTEGER OPTIONAL,          -- optional N-terminal extension
4796   maxstop        INTEGER OPTIONAL,          -- optional C-terminal extension
4797   property       SEQUENCE OF BlockProperty OPTIONAL
4798 }
4799 
4800 LoopConstraint ::= SEQUENCE {
4801   minlength      INTEGER DEFAULT 0,         -- minimum length of unaligned region
4802   maxlength      INTEGER DEFAULT 100000     -- maximum length of unaligned region
4803 }
4804 
4805 CoreDef ::= SEQUENCE {
4806   nblocks        INTEGER,                   -- number of core elements/blocks
4807   blocks         SEQUENCE OF CoreBlock,     -- nblocks locations
4808   loops          SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints
4809 
4810   isDiscontinuous BOOLEAN OPTIONAL,         -- is it a discontinuous domain
4811 
4812   insertions SEQUENCE OF INTEGER OPTIONAL   -- positions of long insertions
4813 }
4814 
4815 Site-annot ::= SEQUENCE {
4816   startPosition  INTEGER,                -- location of the annotation,
4817   stopPosition   INTEGER,                -- start and stop position in the
4818                                          -- PSSM
4819 
4820   description    VisibleString OPTIONAL, -- holds description or names, that
4821                                          -- can be used for labels in
4822                                          -- visualization
4823 
4824   type           INTEGER OPTIONAL,       -- type of the annotated feature,
4825                                          -- similarly to Align-annot in
4826                                          -- NCBI-Cdd
4827 
4828   aliases        SEQUENCE OF VisibleString OPTIONAL, -- additional names for
4829                                                      -- the annotation
4830 
4831   motif          VisibleString OPTIONAL, -- motif to validate mapping of sites
4832 
4833   motifuse       INTEGER OPTIONAL        -- 0 for validation
4834                                          -- 1 for motif in seqloc
4835                                          -- 2 for multiple motifs in seqloc
4836 }
4837 
4838 Site-annot-set ::= SEQUENCE OF Site-annot
4839 
4840 -- ===========================================================================
4841 -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
4842 -- ===========================================
4843 --
4844 -- Two possible inputs to PSI-BLAST and formatrpsdb:
4845 -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix 
4846 --    of frequency ratios)
4847 -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of 
4848 --    scores and statistical parameters) - such as written by cddumper
4849 --
4850 -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
4851 -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
4852 -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
4853 -- statistical parameters are used to perform the search in PSI-BLAST and the
4854 -- same data and the data in PssmWithParams::params::rpsdbparams is used to
4855 -- build the PSSM and ultimately the RPS-BLAST database
4856 -- 
4857 -- 
4858 --                 reads    ++++++++++++++ writes
4859 -- PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
4860 --                          ++++++++++++++             |  ^
4861 --         ^                                           |  |
4862 --         |                                           |  |
4863 --         +===========================================+  |
4864 --                                                     |  |
4865 --         +===========================================+  |
4866 --         |                                              |
4867 -- reads   |                                              | 
4868 --         v                                              |
4869 --  +++++++++++++++ writes +++++++++++++++++++++++        |
4870 --  | formatrpsdb | =====> | RPS-BLAST databases |        |
4871 --  +++++++++++++++        +++++++++++++++++++++++        |
4872 --                                   ^                    |
4873 --                                   |                    |
4874 --                                   | reads              |
4875 --                             +++++++++++++              |
4876 --                             | RPS-BLAST |              |
4877 --                             +++++++++++++              |
4878 --                                                        |
4879 --       reads  ++++++++++++               writes         |
4880 --  Cdd ======> | cddumper | =============================+
4881 --              ++++++++++++
4882 --
4883 -- ===========================================================================
4884 
4885 -- Contains the PSSM's scores and its associated statistical parameters. 
4886 -- Dimensions and order in which scores are stored must be the same as that 
4887 -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
4888 PssmFinalData ::= SEQUENCE {
4889 
4890     -- PSSM's scores
4891     scores              SEQUENCE OF INTEGER, 
4892 
4893     -- Karlin & Altschul parameter produced during the PSSM's calculation
4894     lambda              REAL,
4895 
4896     -- Karlin & Altschul parameter produced during the PSSM's calculation
4897         kappa               REAL,
4898 
4899     -- Karlin & Altschul parameter produced during the PSSM's calculation
4900     h                   REAL,
4901 
4902     -- scaling factor used to obtain more precision when building the PSSM.
4903     -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
4904     -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
4905     -- given a PSSM which contains a scaled-up PSSM (indicated by having a
4906     -- scalingFactor greater than 1), then it will scale down the PSSM to
4907     -- perform the initial stages of the search with it.
4908     -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided 
4909     -- scaled-up PSSMs, it will ensure that all PSSMs used to build the 
4910     -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST 
4911     -- will silently produce incorrect results).
4912     scalingFactor       INTEGER DEFAULT 1,
4913 
4914     -- Karlin & Altschul parameter produced during the PSSM's calculation
4915     lambdaUngapped      REAL OPTIONAL,
4916 
4917     -- Karlin & Altschul parameter produced during the PSSM's calculation
4918         kappaUngapped       REAL OPTIONAL,
4919 
4920     -- Karlin & Altschul parameter produced during the PSSM's calculation
4921     hUngapped           REAL OPTIONAL
4922 }
4923 
4924 -- Contains the PSSM's intermediate data used to create the PSSM's scores 
4925 -- and statistical parameters. Dimensions and order in which scores are 
4926 -- stored must be the same as that specified in Pssm::numRows, 
4927 -- Pssm::numColumns, and Pssm::byrow
4928 PssmIntermediateData ::= SEQUENCE {
4929 
4930     -- observed residue frequencies (or counts) per position of the PSSM 
4931     -- (prior to application of pseudocounts)
4932     resFreqsPerPos              SEQUENCE OF INTEGER OPTIONAL, 
4933 
4934     -- Weighted observed residue frequencies per position of the PSSM.
4935     -- (N.B.: each position's weights should add up to 1.0).
4936     -- This field corresponds to f_i (f sub i) in equation 2 of 
4937     -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4938     -- NOTE: this is needed for diagnostics information only (i.e.:
4939     -- -out_ascii_pssm option in psiblast)
4940     weightedResFreqsPerPos      SEQUENCE OF REAL OPTIONAL,
4941 
4942     -- PSSM's frequency ratios
4943     freqRatios                  SEQUENCE OF REAL,
4944 
4945     -- Information content per position of the PSSM
4946     -- NOTE: this is needed for diagnostics information only (i.e.:
4947     -- -out_ascii_pssm option in psiblast)
4948     informationContent          SEQUENCE OF REAL OPTIONAL,
4949 
4950     -- Relative weight for columns of the PSSM without gaps to pseudocounts
4951     -- NOTE: this is needed for diagnostics information only (i.e.:
4952     -- -out_ascii_pssm option in psiblast)
4953     gaplessColumnWeights        SEQUENCE OF REAL OPTIONAL,
4954 
4955     -- Used in sequence weights computation
4956     -- NOTE: this is needed for diagnostics information only (i.e.:
4957     -- -out_ascii_pssm option in psiblast)
4958     sigma                       SEQUENCE OF REAL OPTIONAL,
4959 
4960     -- Length of the aligned regions per position of the query sequence
4961     -- NOTE: this is needed for diagnostics information only (i.e.:
4962     -- -out_ascii_pssm option in psiblast)
4963     intervalSizes               SEQUENCE OF INTEGER OPTIONAL,
4964 
4965     -- Number of matching sequences per position of the PSSM (including the
4966     -- query)
4967     -- NOTE: this is needed for diagnostics information only (i.e.:
4968     -- -out_ascii_pssm option in psiblast)
4969     numMatchingSeqs             SEQUENCE OF INTEGER OPTIONAL,
4970 
4971     -- Number of independent observations per position of the PSSM
4972     -- NOTE: this is needed for building CDD database for DELTA-BLAST
4973     numIndeptObsr               SEQUENCE OF REAL OPTIONAL
4974 }
4975 
4976 -- Position-specific scoring matrix
4977 --
4978 -- Column indices on the PSSM refer to the positions corresponding to the
4979 -- query/master sequence, i.e. the number of columns (N) is the same
4980 -- as the length of the query/master sequence. 
4981 -- Row indices refer to individual amino acid types, i.e. the number of 
4982 -- rows (M) is the same as the number of different residues in the 
4983 -- alphabet we use. Consequently, row labels are amino acid identifiers.
4984 --
4985 -- PSSMs are stored as linear arrays of integers. By default, we store
4986 -- them column-by-column, M values for the first column followed by M
4987 -- values for the second column, and so on. In order to provide
4988 -- flexibility for external applications, the boolean field "byrow" is 
4989 -- provided to specify the storage order.
4990 Pssm ::= SEQUENCE {
4991 
4992     -- Is the this a protein or nucleotide scoring matrix?
4993     isProtein       BOOLEAN DEFAULT TRUE,       
4994 
4995     -- PSSM identifier
4996     identifier      Object-id OPTIONAL, 
4997 
4998     -- The dimensions of the matrix are returned so the client can
4999     -- verify that all data was received.
5000 
5001     numRows         INTEGER,    -- number of rows
5002     numColumns      INTEGER,    -- number of columns
5003 
5004     -- row-labels is given to note the order of residue types so that it can
5005     -- be cross-checked between applications.
5006     -- If this field is not given, the matrix values are presented in 
5007     -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
5008     -- for proteins the values returned correspond to 
5009     -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
5010     rowLabels       SEQUENCE OF VisibleString OPTIONAL,
5011 
5012     -- are matrices stored row by row?
5013     byRow           BOOLEAN DEFAULT FALSE, 
5014 
5015     -- PSSM representative sequence (master) 
5016     query           Seq-entry OPTIONAL,           
5017 
5018     -- both intermediateData and finalData can be provided, but at least one of
5019     -- them must be provided.
5020     -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData 
5021     -- representation. 
5022 
5023     -- Intermediate or final data for the PSSM
5024     intermediateData    PssmIntermediateData OPTIONAL,
5025 
5026     -- Final representation for the PSSM
5027     finalData           PssmFinalData OPTIONAL
5028 }
5029 
5030 -- This structure is used to create the RPS-BLAST database auxiliary file 
5031 -- (*.aux) and it contains parameters set at creation time of the PSSM.
5032 -- Also, the matrixName field is used by formatrpsdb to build a PSSM from 
5033 -- a Pssm structure which only contains PssmIntermediateData.
5034 FormatRpsDbParameters ::= SEQUENCE {
5035 
5036     -- name of the underlying score matrix whose frequency ratios were
5037     -- used in PSSM construction (e.g.: BLOSUM62)
5038     matrixName   VisibleString,
5039 
5040     -- gap opening penalty corresponding to the matrix above
5041     gapOpen      INTEGER OPTIONAL,             
5042 
5043     -- gap extension penalty corresponding to the matrix above
5044     gapExtend    INTEGER OPTIONAL
5045 
5046 }
5047 
5048 -- Populated by PSSM engine of PSI-BLAST, original source for these values 
5049 -- are the PSI-BLAST options specified using the BLAST options API
5050 PssmParameters ::= SEQUENCE {
5051 
5052     -- pseudocount constant used for PSSM. This field corresponds to beta in 
5053     -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
5054     pseudocount INTEGER OPTIONAL,             
5055 
5056     -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
5057     -- populated by PSI-BLAST
5058     rpsdbparams     FormatRpsDbParameters OPTIONAL,
5059 
5060     -- alignment constraints needed by sequence-structure threader
5061     -- and other global or local block-alignment algorithms
5062     constraints     CoreDef OPTIONAL,
5063 
5064     -- bit score threshold for specific conserved domain hits
5065     bitScoreThresh  REAL OPTIONAL,
5066 
5067     -- conserved functional sites with annotations
5068     annotatedSites  Site-annot-set OPTIONAL
5069 }
5070 
5071 -- Envelope containing PSSM and the parameters used to create it. 
5072 -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
5073 PssmWithParameters ::= SEQUENCE {
5074 
5075     -- This field is applicable to PSI-BLAST and formatrpsdb.
5076     -- When both the intermediate and final PSSM data are provided in this
5077     -- field, the final data (matrix of scores and associated statistical
5078     -- parameters) takes precedence and that data is used for further
5079     -- processing. The rationale for this is that the PSSM's scores and
5080     -- statistical parameters might have been calculated by other applications
5081     -- and it might not be possible to recreate it by using PSI-BLAST's PSSM 
5082     -- engine.
5083         pssm        Pssm,
5084 
5085     -- This field's rpsdbparams is used to specify the values of options 
5086     -- for processing by formatrpsdb. If these are not set, the command 
5087     -- line defaults of formatrpsdb are applied. This field is used
5088     -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
5089     -- the PSSM is the same as the one being specified through the BLAST
5090     -- Options API. If this field is omitted, no verification will be
5091     -- performed, so be careful to keep track of what matrix was used to build
5092     -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
5093     params      PssmParameters OPTIONAL
5094 }
5095 
5096 END
5097 --$Revision: 1.160 $
5098 --**********************************************************************
5099 --
5100 --  NCBI ASN.1 macro editing language specifications
5101 --
5102 --  by Colleen Bollin, 2007
5103 --
5104 --**********************************************************************
5105 
5106 NCBI-Macro DEFINITIONS ::=
5107 BEGIN
5108 
5109 EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set;
5110 
5111 -- simple constraints --
5112 
5113 String-location ::= ENUMERATED {
5114     contains (1) ,
5115     equals (2) ,
5116     starts (3) ,
5117     ends (4) ,
5118     inlist (5) }
5119 
5120 Word-substitution ::= SEQUENCE {
5121     word VisibleString OPTIONAL ,
5122     synonyms SET OF VisibleString OPTIONAL ,
5123     case-sensitive BOOLEAN DEFAULT FALSE ,
5124     whole-word BOOLEAN DEFAULT FALSE }
5125 
5126 Word-substitution-set ::= SET OF Word-substitution
5127 
5128 String-constraint ::= SEQUENCE {
5129     match-text VisibleString OPTIONAL ,
5130     match-location String-location DEFAULT contains ,
5131     case-sensitive BOOLEAN DEFAULT FALSE ,
5132     ignore-space BOOLEAN DEFAULT FALSE ,
5133     ignore-punct BOOLEAN DEFAULT FALSE ,
5134     ignore-words Word-substitution-set OPTIONAL ,
5135     whole-word BOOLEAN DEFAULT FALSE ,
5136     not-present BOOLEAN DEFAULT FALSE ,
5137     is-all-caps BOOLEAN DEFAULT FALSE ,
5138     is-all-lower BOOLEAN DEFAULT FALSE ,
5139     is-all-punct BOOLEAN DEFAULT FALSE ,
5140     ignore-weasel BOOLEAN DEFAULT FALSE }
5141 
5142 String-constraint-set ::= SET OF String-constraint
5143 
5144 Strand-constraint ::= ENUMERATED {
5145     any (0) ,
5146     plus (1) ,
5147     minus (2) }
5148 
5149 Seqtype-constraint ::= ENUMERATED {
5150     any (0) ,
5151     nuc (1) ,
5152     prot (2) }
5153 
5154 Partial-constraint ::= ENUMERATED {
5155     either (0) ,
5156     partial (1) ,
5157     complete (2) }
5158 
5159 Location-type-constraint ::= ENUMERATED {
5160     any (0) ,
5161     single-interval (1) ,
5162     joined (2) ,
5163     ordered (3) }
5164 
5165 Location-pos-constraint ::= CHOICE {
5166     dist-from-end INTEGER ,
5167     max-dist-from-end INTEGER ,
5168     min-dist-from-end INTEGER }
5169 
5170 Location-constraint ::= SEQUENCE {
5171     strand Strand-constraint DEFAULT any ,
5172     seq-type Seqtype-constraint DEFAULT any ,
5173     partial5 Partial-constraint DEFAULT either ,
5174     partial3 Partial-constraint DEFAULT either ,
5175     location-type Location-type-constraint DEFAULT any ,
5176     end5 Location-pos-constraint OPTIONAL ,
5177     end3 Location-pos-constraint OPTIONAL }
5178 
5179 Object-type-constraint ::= ENUMERATED {
5180     any (0) ,
5181     feature (1) ,
5182     descriptor (2) }
5183 
5184 
5185 -- feature values --
5186 
5187 Macro-feature-type ::= ENUMERATED {
5188     any (0) ,
5189     gene (1) ,
5190     org (2) ,
5191     cds (3) ,
5192     prot (4) ,
5193     preRNA (5) ,
5194     mRNA (6) ,
5195     tRNA (7) ,
5196     rRNA (8) ,
5197     snRNA (9) ,
5198     scRNA (10) ,
5199     otherRNA (11) ,
5200     pub (12) ,
5201     seq (13) ,
5202     imp (14) ,
5203     allele (15) ,
5204     attenuator (16) ,
5205     c-region (17) ,
5206     caat-signal (18) ,
5207     imp-CDS (19) ,
5208     conflict (20) ,
5209     d-loop (21) ,
5210     d-segment (22) ,
5211     enhancer (23) ,
5212     exon (24) ,
5213     gC-signal (25) ,
5214     iDNA (26) ,
5215     intron (27) ,
5216     j-segment (28) ,
5217     ltr (29) ,
5218     mat-peptide (30) ,
5219     misc-binding (31) ,
5220     misc-difference (32) ,
5221     misc-feature (33) ,
5222     misc-recomb (34) ,
5223     misc-RNA (35) ,
5224     misc-signal (36) ,
5225     misc-structure (37) ,
5226     modified-base (38) ,
5227     mutation (39) ,
5228     n-region (40) ,
5229     old-sequence (41) ,
5230     polyA-signal (42) ,
5231     polyA-site (43) ,
5232     precursor-RNA (44) ,
5233     prim-transcript (45) ,
5234     primer-bind (46) ,
5235     promoter (47) ,
5236     protein-bind (48) ,
5237     rbs (49) ,
5238     repeat-region (50) ,
5239     rep-origin (51) ,
5240     s-region (52) ,
5241     sig-peptide (53) ,
5242     source (54) ,
5243     stem-loop (55) ,
5244     sts (56) ,
5245     tata-signal (57) ,
5246     terminator (58) ,
5247     transit-peptide (59) ,
5248     unsure (60) ,
5249     v-region (61) ,
5250     v-segment (62) ,
5251     variation (63) ,
5252     virion (64) ,
5253     n3clip (65) ,
5254     n3UTR (66) ,
5255     n5clip (67) ,
5256     n5UTR (68) ,
5257     n10-signal (69) ,
5258     n35-signal (70) ,
5259     site-ref (71) ,
5260     region (72) ,
5261     comment (73) ,
5262     bond (74) ,
5263     site (75) ,
5264     rsite (76) ,
5265     user (77) ,
5266     txinit (78) ,
5267     num (79) ,
5268     psec-str (80) ,
5269     non-std-residue (81) ,
5270     het (82) ,
5271     biosrc (83) ,
5272     preprotein (84) ,
5273     mat-peptide-aa (85) ,
5274     sig-peptide-aa (86) ,
5275     transit-peptide-aa (87) ,
5276     snoRNA (88) ,
5277     gap (89) ,
5278     operon (90) ,
5279     oriT (91) ,
5280     ncRNA (92) ,
5281     tmRNA (93) ,
5282     mobile-element (94) }
5283 
5284 Feat-qual-legal ::= ENUMERATED {
5285     allele (1) ,
5286     activity (2) ,
5287     anticodon (3) ,
5288     bound-moiety (4) ,
5289     chromosome (5),
5290     citation (6),
5291     codon (7) ,
5292     codon-start (8) ,
5293     codons-recognized (9) ,
5294     compare (10) ,
5295     cons-splice (11) ,
5296     db-xref (12) ,
5297     description (13) ,
5298     direction (14) ,
5299     ec-number (15) ,
5300     environmental-sample (16) ,
5301     evidence (17) ,
5302     exception (18) ,
5303     experiment (19) ,
5304     focus (20) ,
5305     frequency (21) ,
5306     function (22) ,
5307     gene (23) ,
5308     gene-description (24) ,
5309     inference (25) ,
5310     label (26) ,
5311     locus-tag (27) ,
5312     map (28) ,
5313     mobile-element (29) ,
5314     mod-base (30) ,
5315     mol-type (31) ,
5316     ncRNA-class (32) ,
5317     note (33) ,
5318     number (34) ,
5319     old-locus-tag (35) ,
5320     operon (36) ,
5321     organism (37) ,
5322     organelle (38) ,
5323     partial (39) ,
5324     phenotype (40) ,
5325     plasmid (41) ,
5326     product (42) ,
5327     protein-id (43) ,
5328     pseudo (44) ,
5329     rearranged (45) ,
5330     replace (46) ,
5331     rpt-family (47) ,
5332     rpt-type (48) ,
5333     rpt-unit (49) ,
5334     rpt-unit-seq (50) ,
5335     rpt-unit-range (51) ,
5336     segment (52) ,
5337     sequenced-mol (53) ,
5338     standard-name (54) ,
5339     synonym (55) ,
5340     transcript-id (56) ,
5341     transgenic (57) ,
5342     translation (58) ,
5343     transl-except (59) ,
5344     transl-table (60) ,
5345     usedin (61),
5346     mobile-element-type (62),
5347     mobile-element-name (63),
5348     gene-comment (64) ,
5349     satellite (65) ,
5350     satellite-type (66) ,
5351     satellite-name (67) ,
5352     location (68) ,
5353     tag-peptide (69) ,
5354     mobile-element-type-type (70) ,
5355     name (71) ,
5356     pcr-conditions (72) }
5357 
5358 Feat-qual-legal-val ::= SEQUENCE {
5359     qual Feat-qual-legal ,
5360     val  VisibleString }
5361 
5362 Feat-qual-legal-val-choice ::= CHOICE {
5363     qual Feat-qual-legal-val }
5364 
5365 Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice
5366 
5367 Feat-qual-choice ::= CHOICE {
5368     legal-qual Feat-qual-legal ,
5369     illegal-qual String-constraint }
5370 
5371 Feature-field ::= SEQUENCE {
5372     type Macro-feature-type ,
5373     field Feat-qual-choice }
5374 
5375 Feature-field-legal ::= SEQUENCE {
5376     type Macro-feature-type ,
5377     field Feat-qual-legal }
5378 
5379 Feature-field-pair ::= SEQUENCE {
5380     type Macro-feature-type ,
5381     field-from Feat-qual-choice ,
5382     field-to Feat-qual-choice }
5383 
5384 Rna-feat-type ::= CHOICE {
5385     any NULL ,
5386     preRNA NULL ,
5387     mRNA NULL ,
5388     tRNA NULL ,
5389     rRNA NULL ,
5390     ncRNA VisibleString ,
5391     tmRNA NULL,
5392     miscRNA NULL }
5393 
5394 Rna-field ::= ENUMERATED {
5395     product (1) ,
5396     comment (2) ,
5397     codons-recognized (3) ,
5398     ncrna-class (4) ,
5399     anticodon (5) ,
5400     transcript-id (6) ,
5401     gene-locus (7) ,
5402     gene-description (8) ,
5403     gene-maploc (9) ,
5404     gene-locus-tag (10) ,
5405     gene-synonym (11) ,
5406     gene-comment (12) ,
5407     tag-peptide (13) }
5408     
5409 
5410 Rna-qual ::= SEQUENCE {
5411     type Rna-feat-type ,
5412     field Rna-field }    
5413 
5414 Rna-qual-pair ::= SEQUENCE {
5415     type Rna-feat-type ,
5416     field-from Rna-field ,
5417     field-to Rna-field }
5418 
5419 Source-qual ::= ENUMERATED {
5420     acronym (1) ,
5421     anamorph (2) ,
5422     authority (3) ,
5423     bio-material (4) ,
5424     biotype (5) ,
5425     biovar (6) ,
5426     breed (7) ,
5427     cell-line (8) ,
5428     cell-type (9) ,
5429     chemovar (10) ,
5430     chromosome (11) ,
5431     clone (12) ,
5432     clone-lib (13) ,
5433     collected-by (14) ,
5434     collection-date (15) ,
5435     common (16) ,
5436     common-name (17) ,
5437     country (18) ,
5438     cultivar (19) ,
5439     culture-collection (20) ,
5440     dev-stage (21) ,
5441     division (22) ,
5442     dosage (23) ,
5443     ecotype (24) ,
5444     endogenous-virus-name (25) ,
5445     environmental-sample (26) ,
5446     forma (27) ,
5447     forma-specialis (28) ,
5448     frequency (29) ,
5449     fwd-primer-name (30) ,
5450     fwd-primer-seq (31) ,
5451     gb-acronym (32) ,
5452     gb-anamorph (33) ,
5453     gb-synonym (34) ,
5454     genotype (35) ,
5455     germline (36) ,
5456     group (37) ,
5457     haplotype (38) ,
5458     identified-by (39) ,
5459     insertion-seq-name (40) ,
5460     isolate (41) ,
5461     isolation-source (42) ,
5462     lab-host (43) ,
5463     lat-lon (44) ,
5464     lineage (45) ,
5465     map (46) ,
5466     metagenome-source (47) ,
5467     metagenomic (48) ,
5468     old-lineage (49) ,
5469     old-name (50) ,
5470     orgmod-note (51) ,
5471     nat-host (52) ,
5472     pathovar (53) ,
5473     plasmid-name (54) ,
5474     plastid-name (55) ,
5475     pop-variant (56) ,
5476     rearranged (57) ,
5477     rev-primer-name (58) ,
5478     rev-primer-seq (59) ,
5479     segment (60) ,
5480     serogroup (61) ,
5481     serotype (62) ,
5482     serovar (63) ,
5483     sex (64) ,
5484     specimen-voucher (65) ,
5485     strain (66) ,
5486     subclone (67) ,
5487     subgroup (68) ,
5488     subsource-note (69),
5489     sub-species (70) ,
5490     substrain (71) ,
5491     subtype (72) ,
5492     synonym (73) ,
5493     taxname (74) ,
5494     teleomorph (75) ,
5495     tissue-lib (76) ,
5496     tissue-type (77) ,
5497     transgenic (78) ,
5498     transposon-name (79) ,
5499     type (80) ,
5500     variety (81) ,
5501     specimen-voucher-INST (82) ,
5502     specimen-voucher-COLL (83) ,
5503     specimen-voucher-SpecID (84) ,
5504     culture-collection-INST (85) ,
5505     culture-collection-COLL (86) ,
5506     culture-collection-SpecID (87) ,
5507     bio-material-INST (88) ,
5508     bio-material-COLL (89) ,
5509     bio-material-SpecID (90),
5510     all-notes (91),
5511     mating-type (92),
5512     linkage-group (93) ,
5513     haplogroup (94),
5514     all-quals (95),
5515     dbxref (96) ,
5516     taxid (97) ,
5517     all-primers (98) ,
5518     altitude (99)
5519 }
5520 
5521 Source-qual-pair ::= SEQUENCE {
5522     field-from Source-qual ,
5523     field-to Source-qual }
5524 
5525 Source-location ::= ENUMERATED {
5526     unknown (0) ,
5527     genomic (1) ,
5528     chloroplast (2) ,
5529     chromoplast (3) ,
5530     kinetoplast (4) ,
5531     mitochondrion (5) ,
5532     plastid (6) ,
5533     macronuclear (7) ,
5534     extrachrom (8) ,
5535     plasmid (9) ,
5536     transposon (10) ,
5537     insertion-seq (11) ,
5538     cyanelle (12) ,
5539     proviral (13) ,
5540     virion (14) ,
5541     nucleomorph (15) ,
5542     apicoplast (16) ,
5543     leucoplast (17) ,
5544     proplastid (18) ,
5545     endogenous-virus (19) ,
5546     hydrogenosome (20) ,
5547     chromosome (21) ,
5548     chromatophore (22) }
5549 
5550 Source-origin ::= ENUMERATED {
5551     unknown (0) ,
5552     natural (1) ,
5553     natmut (2) ,
5554     mut (3) ,
5555     artificial (4) ,
5556     synthetic (5) ,
5557     other (255) }
5558 
5559 Source-qual-choice ::= CHOICE {
5560     textqual Source-qual ,
5561     location Source-location,
5562     origin Source-origin ,
5563     gcode INTEGER  ,
5564     mgcode INTEGER  }
5565 
5566 Source-qual-text-val ::= SEQUENCE {
5567     srcqual Source-qual ,
5568     val VisibleString } 
5569     
5570 Source-qual-val-choice ::= CHOICE {
5571     textqual Source-qual-text-val ,
5572     location Source-location,
5573     origin Source-origin ,
5574     gcode INTEGER ,
5575     mgcode INTEGER }
5576 
5577 Source-qual-val-set ::= SET OF Source-qual-val-choice
5578 
5579 CDSGeneProt-field ::= ENUMERATED {
5580     cds-comment (1) ,
5581     gene-locus (2) ,
5582     gene-description (3) ,
5583     gene-comment (4) ,
5584     gene-allele (5) ,
5585     gene-maploc (6) ,
5586     gene-locus-tag (7) ,
5587     gene-synonym (8) ,
5588     gene-old-locus-tag (9) ,
5589     mrna-product (10) ,
5590     mrna-comment (11) ,
5591     prot-name (12) ,
5592     prot-description (13) ,
5593     prot-ec-number (14) ,
5594     prot-activity (15) ,
5595     prot-comment (16) ,
5596     mat-peptide-name (17) ,
5597     mat-peptide-description (18) ,
5598     mat-peptide-ec-number (19) ,
5599     mat-peptide-activity (20) ,
5600     mat-peptide-comment (21) ,
5601     cds-inference (22) ,
5602     gene-inference (23) ,
5603     codon-start (24) }
5604 
5605 CDSGeneProt-field-pair ::= SEQUENCE {
5606     field-from CDSGeneProt-field ,
5607     field-to CDSGeneProt-field }
5608 
5609 Molecule-type ::= ENUMERATED {
5610   unknown (0) ,
5611   genomic (1) ,
5612   precursor-RNA (2) ,
5613   mRNA (3) ,
5614   rRNA (4) ,
5615   tRNA (5) ,
5616   genomic-mRNA (6) ,
5617   cRNA (7) ,
5618   transcribed-RNA (8) ,
5619   ncRNA (9) ,
5620   transfer-messenger-RNA (10) ,
5621   macro-other (11) }
5622 
5623 Technique-type ::= ENUMERATED {
5624   unknown (0) , 
5625   standard (1) ,
5626   est (2) ,
5627   sts (3) ,
5628   survey (4) ,
5629   genetic-map (5) ,
5630   physical-map (6) ,
5631   derived (7) ,
5632   concept-trans (8) ,
5633   seq-pept (9) ,
5634   both (10) ,
5635   seq-pept-overlap (11) ,
5636   seq-pept-homol (12) , 
5637   concept-trans-a (13) ,
5638   htgs-1 (14) ,
5639   htgs-2 (15) ,
5640   htgs-3 (16) ,
5641   fli-cDNA (17) ,
5642   htgs-0 (18) ,
5643   htc (19) ,
5644   wgs (20) ,
5645   barcode (21) ,
5646   composite-wgs-htgs (22) ,
5647   tsa (23) ,
5648   other (24) }
5649 
5650 Completedness-type ::= ENUMERATED {
5651   unknown (0) ,
5652   complete (1) ,
5653   partial (2) ,
5654   no-left (3) ,
5655   no-right (4) ,
5656   no-ends (5) ,
5657   has-left (6) ,
5658   has-right (7) ,
5659   other (6) }
5660 
5661 Molecule-class-type ::= ENUMERATED {
5662   unknown (0) ,
5663   dna (1) ,
5664   rna (2) ,
5665   protein (3) ,
5666   nucleotide (4),
5667   other (5) }
5668 
5669 Topology-type ::= ENUMERATED {
5670   unknown (0) ,
5671   linear (1) ,
5672   circular (2) ,
5673   tandem (3) ,
5674   other (4) }
5675 
5676 Strand-type ::= ENUMERATED {
5677   unknown (0) ,
5678   single (1) ,
5679   double (2) ,
5680   mixed (3) ,
5681   mixed-rev (4) ,
5682   other (5) }
5683 
5684 Molinfo-field ::= CHOICE {
5685     molecule Molecule-type ,
5686     technique Technique-type ,
5687     completedness Completedness-type ,
5688     mol-class Molecule-class-type ,
5689     topology Topology-type ,
5690     strand Strand-type }
5691 
5692 Molinfo-molecule-pair ::= SEQUENCE {
5693     from Molecule-type ,
5694     to Molecule-type }
5695 
5696 Molinfo-technique-pair ::= SEQUENCE {
5697     from Technique-type ,
5698     to Technique-type }
5699 
5700 Molinfo-completedness-pair ::= SEQUENCE {
5701     from Completedness-type ,
5702     to Completedness-type }
5703 
5704 Molinfo-mol-class-pair ::= SEQUENCE {
5705     from Molecule-class-type ,
5706     to Molecule-class-type }
5707 
5708 Molinfo-topology-pair ::= SEQUENCE {
5709     from Topology-type ,
5710     to Topology-type }
5711 
5712 Molinfo-strand-pair ::= SEQUENCE {
5713     from Strand-type ,
5714     to Strand-type }
5715 
5716 Molinfo-field-pair ::= CHOICE {
5717     molecule Molinfo-molecule-pair ,
5718     technique Molinfo-technique-pair ,
5719     completedness Molinfo-completedness-pair ,
5720     mol-class Molinfo-mol-class-pair ,
5721     topology Molinfo-topology-pair ,
5722     strand Molinfo-strand-pair }
5723 
5724 Molinfo-field-list ::= SET OF Molinfo-field
5725 
5726 Molinfo-field-constraint ::= SEQUENCE {
5727     field Molinfo-field ,
5728     is-not BOOLEAN DEFAULT FALSE }
5729 
5730 -- publication fields --
5731 
5732 Publication-field ::=  ENUMERATED {
5733     cit (1) ,
5734     authors (2) ,
5735     journal (3) ,
5736     volume (4) ,
5737     issue (5) ,
5738     pages (6) ,
5739     date (7) ,
5740     serial-number (8) ,
5741     title (9) ,
5742     affiliation (10) ,
5743     affil-div (11) ,
5744     affil-city (12) ,
5745     affil-sub (13) ,
5746     affil-country (14) ,
5747     affil-street (15) ,
5748     affil-email (16) ,
5749     affil-fax (17) ,
5750     affil-phone (18) ,
5751     affil-zipcode (19),
5752     authors-initials (20),
5753     pmid (21),
5754     pub-class (22)
5755     }
5756   
5757 -- structured comment fields --
5758 
5759 Structured-comment-field ::= CHOICE {
5760   database NULL ,
5761   named VisibleString ,
5762   field-name NULL
5763   }
5764 
5765 Structured-comment-field-pair ::= SEQUENCE {
5766   from Structured-comment-field ,
5767   to Structured-comment-field
5768   }
5769   
5770 -- misc fields --
5771 -- these would not appear in pairs --
5772 Misc-field ::= ENUMERATED {
5773     genome-project-id (1) ,
5774     comment-descriptor (2) ,
5775     defline (3) ,
5776     keyword (4)
5777     }
5778 
5779 -- dblink fields --
5780 DBLink-field-type ::= ENUMERATED {
5781   trace-assembly (1) ,
5782   bio-sample (2) ,
5783   probe-db (3) ,
5784   sequence-read-archve (4) ,
5785   bio-project (5) ,
5786   assembly (6) }
5787 
5788 DBLink-field-pair ::= SEQUENCE {
5789   from DBLink-field-type ,
5790   to DBLink-field-type
5791   }
5792      
5793 -- complex constraints --
5794 
5795 Pub-type ::= ENUMERATED {
5796   any (0) ,
5797   published (1) ,
5798   unpublished (2) ,
5799   in-press (3) ,
5800   submitter-block (4) }
5801 
5802 Pub-field-constraint ::= SEQUENCE {
5803   field Publication-field ,
5804   constraint String-constraint }
5805 
5806 Pub-field-special-constraint-type ::= CHOICE {
5807   is-present NULL ,
5808   is-not-present NULL ,
5809   is-all-caps NULL ,
5810   is-all-lower NULL ,
5811   is-all-punct NULL }
5812 
5813 Pub-field-special-constraint ::= SEQUENCE {
5814   field Publication-field ,
5815   constraint Pub-field-special-constraint-type }
5816   
5817 Publication-constraint ::= SEQUENCE {
5818   type Pub-type ,
5819   field Pub-field-constraint OPTIONAL ,
5820   special-field Pub-field-special-constraint OPTIONAL }
5821 
5822 Source-constraint ::= SEQUENCE {
5823   field1 Source-qual-choice OPTIONAL ,
5824   field2 Source-qual-choice OPTIONAL ,
5825   constraint String-constraint OPTIONAL ,
5826   type-constraint Object-type-constraint OPTIONAL }
5827 
5828 CDSGeneProt-feature-type-constraint ::= ENUMERATED {
5829     gene (1) ,
5830     mRNA (2) ,
5831     cds (3) ,
5832     prot (4) ,
5833     exon (5) ,
5834     mat-peptide (6) }
5835 
5836 CDSGeneProt-pseudo-constraint ::= SEQUENCE {
5837     feature CDSGeneProt-feature-type-constraint ,
5838     is-pseudo BOOLEAN DEFAULT TRUE }
5839 
5840 CDSGeneProt-constraint-field ::= CHOICE {
5841   field CDSGeneProt-field }
5842 
5843 CDSGeneProt-qual-constraint ::= SEQUENCE {
5844   field1 CDSGeneProt-constraint-field OPTIONAL ,
5845   field2 CDSGeneProt-constraint-field OPTIONAL ,
5846   constraint String-constraint OPTIONAL }
5847 
5848 Field-constraint ::= SEQUENCE {
5849   field Field-type ,
5850   string-constraint String-constraint }
5851 
5852 Sequence-constraint-rnamol ::= ENUMERATED {
5853   any (0) ,
5854   genomic (1) ,
5855   precursor-RNA (2) ,
5856   mRNA (3) ,
5857   rRNA (4) ,
5858   tRNA (5) ,
5859   genomic-mRNA (6) ,
5860   cRNA (7) ,
5861   transcribed-RNA (8) ,
5862   ncRNA (9) ,
5863   transfer-messenger-RNA (10) }
5864 
5865 Sequence-constraint-mol-type-constraint ::= CHOICE {
5866   any NULL ,
5867   nucleotide NULL ,
5868   dna NULL ,
5869   rna Sequence-constraint-rnamol ,
5870   protein NULL }
5871 
5872 Quantity-constraint ::= CHOICE {
5873   equals INTEGER ,
5874   greater-than INTEGER ,
5875   less-than INTEGER }
5876 
5877 Feature-strandedness-constraint ::= ENUMERATED {
5878   any (0) ,
5879   minus-only (1) ,
5880   plus-only (2) ,
5881   at-least-one-minus (3) ,
5882   at-least-one-plus (4) ,
5883   no-minus (5) ,
5884   no-plus (6) }
5885 
5886 Sequence-constraint ::= SEQUENCE {
5887     seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
5888     id String-constraint OPTIONAL ,
5889     feature Macro-feature-type ,
5890     num-type-features Quantity-constraint OPTIONAL ,
5891     num-features Quantity-constraint OPTIONAL ,
5892     length Quantity-constraint OPTIONAL ,
5893     strandedness Feature-strandedness-constraint DEFAULT any }
5894 
5895 Match-type-constraint ::= ENUMERATED {
5896   dont-care (0) ,
5897   yes (1) ,
5898   no (2) }
5899 
5900 Translation-constraint ::= SEQUENCE {
5901   actual-strings String-constraint-set ,
5902   transl-strings String-constraint-set ,
5903   internal-stops Match-type-constraint DEFAULT dont-care ,
5904   num-mismatches Quantity-constraint OPTIONAL }
5905 
5906 Constraint-choice ::= CHOICE {
5907     string String-constraint ,
5908     location Location-constraint ,
5909     field  Field-constraint ,
5910     source Source-constraint ,
5911     cdsgeneprot-qual CDSGeneProt-qual-constraint ,
5912     cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
5913     sequence Sequence-constraint ,
5914     pub Publication-constraint ,
5915     molinfo Molinfo-field-constraint ,
5916     field-missing Field-type ,
5917     translation Translation-constraint }
5918 
5919 Constraint-choice-set ::= SET OF Constraint-choice
5920 
5921 Text-marker ::= CHOICE {
5922     free-text VisibleString ,
5923     digits NULL ,
5924     letters NULL }
5925 
5926 Text-portion ::= SEQUENCE {
5927     left-marker Text-marker  OPTIONAL ,
5928     include-left BOOLEAN ,
5929     right-marker Text-marker  OPTIONAL ,
5930     include-right BOOLEAN ,
5931     inside BOOLEAN ,
5932     case-sensitive BOOLEAN DEFAULT FALSE ,
5933     whole-word BOOLEAN DEFAULT FALSE }
5934 
5935 Field-edit-location ::= ENUMERATED {
5936     anywhere (0) ,
5937     beginning (1) ,
5938     end (2) }
5939 
5940 Field-edit ::= SEQUENCE {
5941     find-txt VisibleString ,
5942     repl-txt VisibleString OPTIONAL ,
5943     location Field-edit-location DEFAULT anywhere ,
5944     case-insensitive BOOLEAN DEFAULT FALSE }
5945 
5946 Field-type ::= CHOICE {
5947     source-qual Source-qual-choice ,
5948     feature-field Feature-field ,
5949     rna-field Rna-qual ,
5950     cds-gene-prot CDSGeneProt-field ,
5951     molinfo-field Molinfo-field ,
5952     pub Publication-field ,
5953     struc-comment-field Structured-comment-field ,
5954     misc Misc-field ,
5955     dblink DBLink-field-type }
5956 
5957 Field-pair-type ::= CHOICE {
5958     source-qual Source-qual-pair ,
5959     feature-field Feature-field-pair ,
5960     rna-field Rna-qual-pair ,
5961     cds-gene-prot CDSGeneProt-field-pair ,
5962     molinfo-field Molinfo-field-pair ,
5963     struc-comment-field Structured-comment-field-pair ,
5964     dblink DBLink-field-pair} 
5965 
5966 ExistingTextOption ::= ENUMERATED {
5967   replace-old (1) ,
5968   append-semi (2) ,
5969   append-space (3) ,
5970   append-colon (4) ,
5971   append-comma (5) ,
5972   append-none (6) ,
5973   prefix-semi (7) ,
5974   prefix-space (8) ,
5975   prefix-colon (9) ,
5976   prefix-comma (10) ,
5977   prefix-none (11) ,
5978   leave-old (12) ,
5979   add-qual (13) }
5980 
5981 
5982 Apply-action ::= SEQUENCE {
5983     field Field-type ,
5984     value VisibleString ,
5985     existing-text ExistingTextOption }
5986 
5987 Edit-action ::= SEQUENCE {
5988     edit Field-edit ,
5989     field Field-type }
5990 
5991 Cap-change ::= ENUMERATED {
5992     none (0) ,
5993     tolower (1) ,
5994     toupper (2) ,
5995     firstcap (3) ,
5996     firstcaprestnochange (4) ,
5997     firstlower-restnochange (5) ,
5998     cap-word-space (6) ,
5999     cap-word-space-punc (7)
6000     }
6001 
6002 Text-transform ::= CHOICE {
6003   edit Field-edit ,
6004   caps Cap-change ,
6005   remove Text-portion }
6006 
6007 Text-transform-set ::= SET OF Text-transform
6008 
6009 Convert-action ::= SEQUENCE {
6010     fields Field-pair-type ,
6011     strip-name BOOLEAN DEFAULT FALSE ,
6012     keep-original BOOLEAN DEFAULT FALSE ,
6013     capitalization Cap-change DEFAULT none ,
6014     existing-text ExistingTextOption }
6015 
6016 Copy-action ::= SEQUENCE {
6017     fields Field-pair-type ,
6018     existing-text ExistingTextOption }
6019 
6020 Swap-action ::= SEQUENCE {
6021     fields Field-pair-type }
6022 
6023 AECRParse-action ::= SEQUENCE {
6024     portion Text-portion ,
6025     fields Field-pair-type ,
6026     remove-from-parsed BOOLEAN DEFAULT FALSE ,
6027     remove-left BOOLEAN DEFAULT FALSE ,
6028     remove-right BOOLEAN DEFAULT FALSE ,
6029     transform Text-transform-set OPTIONAL ,
6030     existing-text ExistingTextOption }
6031 
6032 Remove-action ::= SEQUENCE {
6033     field Field-type }
6034 
6035 Remove-outside-action ::= SEQUENCE {
6036     portion Text-portion ,
6037     field Field-type ,
6038     remove-if-not-found BOOLEAN DEFAULT FALSE }
6039 
6040 Action-choice ::= CHOICE {
6041     apply Apply-action ,
6042     edit Edit-action ,
6043     convert Convert-action ,
6044     copy Copy-action ,
6045     swap Swap-action ,
6046     remove Remove-action ,
6047     parse AECRParse-action ,
6048     remove-outside Remove-outside-action }
6049 
6050 AECR-action ::= SEQUENCE {
6051     action Action-choice ,
6052     also-change-mrna BOOLEAN DEFAULT FALSE ,
6053     constraint Constraint-choice-set OPTIONAL }
6054 
6055 Parse-src-org-choice ::= CHOICE {
6056     source-qual Source-qual ,
6057     taxname-after-binomial NULL }
6058 
6059 Parse-src-org ::= SEQUENCE {
6060     field Parse-src-org-choice ,
6061     type Object-type-constraint DEFAULT any }
6062 
6063 -- For Parse-src-general-id tag, specify the db of the id from which you
6064 -- want to retrieve the tag.  If empty or null, any db will do.
6065 Parse-src-general-id ::= CHOICE {
6066     whole-text NULL ,
6067     db NULL ,
6068     tag VisibleString }
6069 
6070 Parse-src ::= CHOICE {
6071     defline NULL ,
6072     flatfile NULL ,
6073     local-id NULL ,
6074     org Parse-src-org ,
6075     comment NULL ,
6076     bankit-comment NULL ,
6077     structured-comment VisibleString ,
6078     file-id NULL ,
6079     general-id Parse-src-general-id }
6080 
6081 Parse-dst-org ::= SEQUENCE {
6082     field Source-qual-choice ,
6083     type Object-type-constraint DEFAULT any }
6084 
6085 Parse-dest ::= CHOICE {
6086     defline NULL ,
6087     org Parse-dst-org ,
6088     featqual Feature-field-legal ,
6089     comment-descriptor NULL ,
6090     dbxref VisibleString }
6091 
6092 Parse-action ::= SEQUENCE {
6093     portion Text-portion ,
6094     src Parse-src ,
6095     dest Parse-dest ,
6096     capitalization Cap-change DEFAULT none ,
6097     remove-from-parsed BOOLEAN DEFAULT FALSE ,
6098     transform Text-transform-set OPTIONAL ,
6099     existing-text ExistingTextOption }
6100 
6101 
6102 Location-interval ::= SEQUENCE {
6103     from INTEGER ,
6104     to INTEGER  }
6105 
6106 Location-choice ::= CHOICE {
6107     interval Location-interval ,
6108     whole-sequence NULL ,
6109     point INTEGER }
6110 
6111 Sequence-list ::= SET OF VisibleString
6112 Sequence-list-choice ::= CHOICE {
6113     list Sequence-list ,
6114     all NULL }
6115     
6116 Apply-feature-action ::= SEQUENCE {
6117     type Macro-feature-type ,
6118     partial5 BOOLEAN DEFAULT FALSE ,
6119     partial3 BOOLEAN DEFAULT FALSE ,
6120     plus-strand BOOLEAN DEFAULT TRUE ,
6121     location Location-choice ,
6122     seq-list Sequence-list-choice ,
6123     add-redundant BOOLEAN DEFAULT TRUE ,
6124     add-mrna BOOLEAN DEFAULT FALSE ,
6125     apply-to-parts BOOLEAN DEFAULT FALSE ,
6126     only-seg-num INTEGER DEFAULT -1 ,
6127     fields Feat-qual-legal-set OPTIONAL,
6128     src-fields Source-qual-val-set OPTIONAL }
6129 
6130 Remove-feature-action ::= SEQUENCE {
6131     type Macro-feature-type ,
6132     constraint Constraint-choice-set OPTIONAL }
6133 
6134 -- for convert features --
6135 Convert-from-CDS-options ::= SEQUENCE {
6136   remove-mRNA BOOLEAN ,
6137   remove-gene BOOLEAN ,
6138   remove-transcript-id BOOLEAN }
6139 
6140 Convert-feature-src-options ::= CHOICE { 
6141   cds Convert-from-CDS-options }
6142 
6143 Bond-type ::= ENUMERATED {
6144   disulfide (1) ,
6145   thioester (2) ,
6146   crosslink (3) ,
6147   thioether (4) ,
6148   other (5) }
6149 
6150 
6151 Site-type ::= ENUMERATED {
6152   active (1) ,
6153   binding (2) ,
6154   cleavage (3) ,
6155   inhibit (4) ,
6156   modified (5) ,
6157   glycosylation (6) ,
6158   myristoylation (7) ,
6159   mutagenized (8) ,
6160   metal-binding (9) ,
6161   phosphorylation (10) ,
6162   acetylation (11) ,
6163   amidation (12) ,
6164   methylation (13) ,
6165   hydroxylation (14) ,
6166   sulfatation (15) ,
6167   oxidative-deamination (16) ,
6168   pyrrolidone-carboxylic-acid (17) ,
6169   gamma-carboxyglutamic-acid (18) ,
6170   blocked (19) ,
6171   lipid-binding (20) ,
6172   np-binding (21) ,
6173   dna-binding (22) ,
6174   signal-peptide (23) ,
6175   transit-peptide (24) ,
6176   transmembrane-region (25) ,
6177   nitrosylation (26) ,
6178   other (27) }
6179 
6180 -- other choice is to create protein sequences, skipping bad --
6181 Region-type ::= SEQUENCE {
6182   create-nucleotide BOOLEAN } 
6183 
6184 Convert-feature-dst-options ::= CHOICE {
6185   bond Bond-type ,
6186   site Site-type ,
6187   region Region-type ,
6188   ncrna-class VisibleString ,
6189   remove-original BOOLEAN }
6190 
6191 
6192 Convert-feature-action ::= SEQUENCE {
6193   type-from Macro-feature-type ,
6194   type-to Macro-feature-type ,
6195   src-options Convert-feature-src-options OPTIONAL ,
6196   dst-options Convert-feature-dst-options OPTIONAL ,
6197   leave-original BOOLEAN ,
6198   src-feat-constraint Constraint-choice-set OPTIONAL } 
6199 
6200 
6201 Feature-location-strand-from ::= ENUMERATED {
6202   any (0) ,
6203   plus (1) ,
6204   minus (2) ,
6205   unknown (3) ,
6206   both (4) }
6207 
6208 Feature-location-strand-to ::= ENUMERATED {
6209   plus (1) ,
6210   minus (2) ,
6211   unknown (3) ,
6212   both (4) ,
6213   reverse (5) }
6214 
6215 Edit-location-strand ::= SEQUENCE {
6216   strand-from Feature-location-strand-from ,
6217   strand-to   Feature-location-strand-to } 
6218 
6219 Partial-5-set-constraint ::= ENUMERATED {
6220   all (0) ,
6221   at-end (1) ,
6222   bad-start (2) ,
6223   frame-not-one (3) }
6224 
6225 Partial-5-set-action ::= SEQUENCE {
6226   constraint Partial-5-set-constraint ,
6227   extend BOOLEAN }
6228 
6229 Partial-5-clear-constraint ::= ENUMERATED {
6230   all (0) ,
6231   not-at-end (1) ,
6232   good-start (2) }
6233 
6234 Partial-3-set-constraint ::= ENUMERATED {
6235   all (0) ,
6236   at-end (1) ,
6237   bad-end (2) }
6238 
6239 Partial-3-set-action ::= SEQUENCE {
6240   constraint Partial-3-set-constraint ,
6241   extend BOOLEAN }
6242 
6243 Partial-3-clear-constraint ::= ENUMERATED {
6244   all (0) ,
6245   not-at-end (1) ,
6246   good-end (2) }
6247 
6248 Partial-both-set-constraint ::= ENUMERATED {
6249   all (0) ,
6250   at-end (1) }
6251 
6252 Partial-both-set-action ::= SEQUENCE {
6253   constraint Partial-both-set-constraint ,
6254   extend BOOLEAN }
6255 
6256 Partial-both-clear-constraint ::= ENUMERATED {
6257   all (0) ,
6258   not-at-end (1) }
6259   
6260 Convert-location-type ::= ENUMERATED {
6261   join (1) ,
6262   order (2) ,
6263   merge (3) }
6264 
6265 Extend-to-feature ::= SEQUENCE {
6266   type Macro-feature-type ,
6267   include-feat BOOLEAN ,
6268   distance Quantity-constraint OPTIONAL }
6269   
6270 Location-edit-type ::= CHOICE {
6271   strand Edit-location-strand ,
6272   set-5-partial Partial-5-set-action ,
6273   clear-5-partial Partial-5-clear-constraint ,
6274   set-3-partial Partial-3-set-action ,
6275   clear-3-partial Partial-3-clear-constraint ,
6276   set-both-partial Partial-both-set-action ,
6277   clear-both-partial Partial-both-clear-constraint ,
6278   convert Convert-location-type ,
6279   extend-5 NULL ,
6280   extend-3 NULL ,
6281   extend-5-to-feat Extend-to-feature ,
6282   extend-3-to-feat Extend-to-feature }
6283 
6284 Edit-feature-location-action ::= SEQUENCE {
6285   type Macro-feature-type ,
6286   action Location-edit-type ,
6287   retranslate-cds BOOLEAN OPTIONAL ,
6288   also-edit-gene BOOLEAN OPTIONAL ,
6289   constraint Constraint-choice-set OPTIONAL }
6290 
6291 Molinfo-block ::= SEQUENCE {
6292     to-list Molinfo-field-list  ,
6293     from-list Molinfo-field-list OPTIONAL ,
6294     constraint Constraint-choice-set OPTIONAL }
6295 
6296 Descriptor-type ::= ENUMERATED {
6297   all (0) ,
6298   title (1) ,
6299   source (2) ,
6300   publication (3) ,
6301   comment (4) ,
6302   genbank (5) ,
6303   user (6) ,
6304   create-date (7) ,
6305   update-date (8) ,
6306   mol-info (9) ,
6307   structured-comment (10) ,
6308   genome-project-id (11) }
6309 
6310 Remove-descriptor-action ::= SEQUENCE {
6311   type Descriptor-type ,
6312   constraint Constraint-choice-set OPTIONAL }  
6313 
6314 Autodef-list-type ::= ENUMERATED {
6315   feature-list (1) ,
6316   complete-sequence (2) ,
6317   complete-genome (3) ,
6318   sequence (4) }
6319   
6320 Autodef-misc-feat-parse-rule ::= ENUMERATED {
6321   use-comment-before-first-semicolon (1) ,
6322   look-for-noncoding-products (2) }
6323 
6324 Autodef-action ::= SEQUENCE {
6325   modifiers SET OF Source-qual OPTIONAL ,
6326   clause-list-type Autodef-list-type ,
6327   misc-feat-parse-rule Autodef-misc-feat-parse-rule DEFAULT look-for-noncoding-products }
6328 
6329 Fix-pub-caps-action ::= SEQUENCE {
6330   title BOOLEAN OPTIONAL ,
6331   authors BOOLEAN OPTIONAL ,
6332   affiliation BOOLEAN OPTIONAL ,
6333   affil-country BOOLEAN OPTIONAL ,
6334   punct-only BOOLEAN DEFAULT FALSE ,
6335   constraint Constraint-choice-set OPTIONAL }
6336 
6337 Sort-order ::= ENUMERATED {
6338   short-to-long (1) ,
6339   long-to-short (2) ,
6340   alphabetical (3) }
6341 
6342 Sort-fields-action ::= SEQUENCE {
6343   field Field-type ,
6344   order Sort-order ,
6345   constraint Constraint-choice-set OPTIONAL }
6346   
6347 Fix-author-caps ::= SEQUENCE {
6348   last-name-only BOOLEAN }
6349 
6350 Fix-caps-action ::= CHOICE {
6351   pub Fix-pub-caps-action ,
6352   src-country NULL ,
6353   mouse-strain NULL ,
6354   src-qual Source-qual ,
6355   author Fix-author-caps }
6356 
6357 Fix-format-action ::= CHOICE {
6358   collection-date NULL ,
6359   lat-lon NULL ,
6360   primers NULL ,
6361   protein-name NULL }
6362 
6363 Remove-duplicate-feature-action ::= SEQUENCE {
6364   type Macro-feature-type ,
6365   ignore-partials BOOLEAN ,
6366   case-sensitive BOOLEAN ,
6367   remove-proteins BOOLEAN ,
6368   rd-constraint Constraint-choice-set OPTIONAL }
6369 
6370 Gene-xref-suppression-type ::= ENUMERATED {
6371   any (0) ,
6372   suppressing (1) ,
6373   non-suppressing (2) }
6374 
6375 Gene-xref-necessary-type ::= ENUMERATED {
6376   any (0) ,
6377   necessary (1) ,
6378   unnecessary (2) }
6379 
6380 Gene-xref-type ::= SEQUENCE {
6381   feature Macro-feature-type ,
6382   suppression Gene-xref-suppression-type ,
6383   necessary Gene-xref-necessary-type }
6384   
6385 Xref-type ::= CHOICE {
6386   gene Gene-xref-type }
6387 
6388 Remove-xrefs-action ::= SEQUENCE {
6389   xref-type Xref-type ,
6390   constraint Constraint-choice-set OPTIONAL } 
6391 
6392 Make-gene-xref-action ::= SEQUENCE {
6393   feature Macro-feature-type ,
6394   constraint Constraint-choice-set OPTIONAL } 
6395 
6396 Author-fix-type ::= ENUMERATED {
6397   truncate-middle-initials (1) ,
6398   strip-suffix (2) ,
6399   move-middle-to-first (3) }
6400 
6401 Author-fix-action ::= SEQUENCE {
6402   fix-type Author-fix-type ,
6403   constraint Constraint-choice-set OPTIONAL }  
6404 
6405 Update-sequences-action ::= SEQUENCE {
6406   filename VisibleString ,
6407   add-cit-subs BOOLEAN DEFAULT FALSE }
6408 
6409 Create-TSA-ids-src ::= CHOICE {
6410   local-id NULL ,
6411   defline Text-portion
6412 }
6413 
6414 Create-TSA-ids-action ::= SEQUENCE {
6415   src Create-TSA-ids-src ,
6416   suffix VisibleString OPTIONAL ,
6417   id-text-portion Text-portion OPTIONAL }  
6418 
6419 Autofix-action ::= SEQUENCE {
6420   test-name VisibleString }
6421 
6422 Fix-sets-action ::= CHOICE {
6423   remove-single-item-set NULL ,
6424   renormalize-nuc-prot-sets NULL ,
6425   fix-pop-to-phy NULL
6426 }
6427 
6428 Table-match-type ::= CHOICE {
6429   feature-id NULL ,
6430   gene-locus-tag NULL ,
6431   protein-id NULL,
6432   dbxref NULL ,
6433   nuc-id NULL ,
6434   src-qual Source-qual-choice ,
6435   protein-name NULL ,
6436   any NULL
6437 }
6438 
6439 Table-match ::= SEQUENCE {
6440   match-type Table-match-type ,
6441   match-location String-location DEFAULT equals
6442 }
6443 
6444 
6445 Apply-table-extra-data ::= CHOICE {
6446   table NULL }
6447 
6448 Apply-table-action ::= SEQUENCE {
6449   filename VisibleString ,
6450   match-type Table-match ,
6451   in-memory-table Apply-table-extra-data OPTIONAL ,
6452   also-change-mrna BOOLEAN DEFAULT FALSE ,
6453   skip-blanks BOOLEAN DEFAULT TRUE
6454 }
6455 
6456 Add-file-action ::= SEQUENCE {
6457   filename VisibleString ,
6458   in-memory-table Apply-table-extra-data OPTIONAL
6459 } 
6460 
6461 Add-descriptor-list-action ::= SEQUENCE {
6462   descriptor-list Add-file-action ,
6463   constraint Constraint-choice-set OPTIONAL
6464 }
6465 
6466 Remove-sequences-action ::= SEQUENCE {
6467   constraint Constraint-choice-set
6468 }
6469 
6470 Update-replaced-ec-numbers-action ::= SEQUENCE {
6471   delete-improper-format BOOLEAN ,
6472   delete-unrecognized BOOLEAN ,
6473   delete-multiple-replacement BOOLEAN
6474 }
6475 
6476 
6477 Retranslate-cds-action ::= SEQUENCE {
6478   obey-stop-codon BOOLEAN
6479 }
6480 
6481 
6482 Macro-action-choice ::= CHOICE {
6483   aecr AECR-action ,
6484   parse Parse-action ,
6485   add-feature Apply-feature-action ,
6486   remove-feature Remove-feature-action ,
6487   convert-feature Convert-feature-action ,
6488   edit-location Edit-feature-location-action ,
6489   remove-descriptor Remove-descriptor-action ,
6490   autodef Autodef-action ,
6491   removesets NULL ,
6492   trim-junk-from-primer-seq NULL ,
6493   trim-stop-from-complete-cds NULL ,
6494   fix-usa-and-states NULL ,
6495   synchronize-cds-partials NULL ,
6496   adjust-for-consensus-splice NULL ,
6497   fix-pub-caps Fix-pub-caps-action ,
6498   remove-seg-gaps NULL ,
6499   sort-fields Sort-fields-action ,
6500   apply-molinfo-block Molinfo-block ,
6501   fix-caps Fix-caps-action ,
6502   fix-format Fix-format-action ,
6503   fix-spell NULL ,
6504   remove-duplicate-features Remove-duplicate-feature-action ,
6505   remove-lineage-notes NULL ,
6506   remove-xrefs Remove-xrefs-action ,
6507   make-gene-xrefs Make-gene-xref-action ,
6508   make-bold-xrefs NULL ,
6509   fix-author Author-fix-action ,
6510   update-sequences Update-sequences-action ,
6511   add-trans-splicing NULL ,
6512   remove-invalid-ecnumbers NULL ,
6513   create-tsa-ids Create-TSA-ids-action ,
6514   perform-autofix Autofix-action ,
6515   fix-sets Fix-sets-action ,
6516   apply-table Apply-table-action ,
6517   remove-sequences Remove-sequences-action ,
6518   propagate-sequence-technology NULL ,
6519   add-file-descriptors Add-descriptor-list-action ,
6520   propagate-missing-old-name NULL ,
6521   autoapply-structured-comments NULL ,
6522   reorder-structured-comments NULL ,
6523   remove-duplicate-structured-comments NULL ,
6524   lookup-taxonomy NULL ,
6525   lookup-pubs NULL ,
6526   trim-terminal-ns NULL ,
6527   update-replaced-ecnumbers Update-replaced-ec-numbers-action ,
6528   instantiate-protein-titles NULL ,
6529   retranslate-cds Retranslate-cds-action ,
6530   add-selenocysteine-except NULL ,
6531   join-short-trnas NULL }
6532 
6533 
6534 Macro-action-list ::= SET OF Macro-action-choice
6535 
6536 
6537 Search-func ::= CHOICE {
6538   string-constraint String-constraint ,
6539   contains-plural NULL ,
6540   n-or-more-brackets-or-parentheses INTEGER ,
6541   three-numbers NULL ,
6542   underscore NULL ,
6543   prefix-and-numbers VisibleString ,
6544   all-caps NULL ,
6545   unbalanced-paren NULL ,
6546   too-long INTEGER ,
6547   has-term VisibleString }
6548 
6549 Simple-replace ::= SEQUENCE {
6550   replace VisibleString OPTIONAL,
6551   whole-string BOOLEAN DEFAULT FALSE ,
6552   weasel-to-putative BOOLEAN DEFAULT FALSE }
6553 
6554 Replace-func ::= CHOICE {
6555   simple-replace Simple-replace ,
6556   haem-replace VisibleString }
6557 
6558 Replace-rule ::= SEQUENCE {
6559   replace-func Replace-func ,
6560   move-to-note BOOLEAN DEFAULT FALSE }
6561 
6562 Fix-type ::= ENUMERATED {
6563   none (0) ,
6564   typo (1) ,
6565   putative-typo (2) ,
6566   quickfix (3) ,
6567   no-organelle-for-prokaryote (4),
6568   might-be-nonfunctional (5),
6569   database (6),
6570   remove-organism-name (7),
6571   inappropriate-symbol (8),
6572   evolutionary-relationship (9),
6573   use-protein (10),
6574   hypothetical (11),
6575   british (12),
6576   description (13),
6577   gene (14) }  
6578 
6579 Suspect-rule ::= SEQUENCE {
6580   find Search-func ,
6581   except Search-func OPTIONAL ,
6582   feat-constraint Constraint-choice-set OPTIONAL ,
6583   rule-type Fix-type DEFAULT none ,
6584   replace Replace-rule OPTIONAL ,
6585   description VisibleString OPTIONAL }  
6586 
6587 Suspect-rule-set ::= SET OF Suspect-rule
6588 
6589 
6590 
6591 END

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.