NCBI C Toolkit Cross Reference

C/asn/asn.all


  1 --$Revision: 6.0 $
  2 --**********************************************************************
  3 --
  4 --  asn.all
  5 --  this file contains all NCBI ASN.1 specifications together
  6 --
  7 --  by James Ostell, 1990
  8 --
  9 --**********************************************************************
 10 
 11 --$Revision: 6.5 $
 12 --**********************************************************************
 13 --
 14 --  NCBI General Data elements
 15 --  by James Ostell, 1990
 16 --  Version 3.0 - June 1994
 17 --
 18 --**********************************************************************
 19 
 20 NCBI-General DEFINITIONS ::=
 21 BEGIN
 22 
 23 EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;
 24 
 25 -- StringStore is really a VisibleString.  It is used to define very
 26 --   long strings which may need to be stored by the receiving program
 27 --   in special structures, such as a ByteStore, but it's just a hint.
 28 --   AsnTool stores StringStores in ByteStore structures.
 29 -- OCTET STRINGs are also stored in ByteStores by AsnTool
 30 -- 
 31 -- typedef struct bsunit {             /* for building multiline strings */
 32    -- Nlm_Handle str;            /* the string piece */
 33    -- Nlm_Int2 len_avail,
 34        -- len;
 35    -- struct bsunit PNTR next; }       /* the next one */
 36 -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
 37 -- 
 38 -- typedef struct bytestore {
 39    -- Nlm_Int4 seekptr,       /* current position */
 40       -- totlen,             /* total stored data length in bytes */
 41       -- chain_offset;       /* offset in ByteStore of first byte in curchain */
 42    -- Nlm_BSUnitPtr chain,       /* chain of elements */
 43       -- curchain;           /* the BSUnit containing seekptr */
 44 -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
 45 --
 46 -- AsnTool incorporates this as a primitive type, so the definition
 47 --   is here just for completeness
 48 -- 
 49 --  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
 50 --
 51 
 52 -- BigInt is really an INTEGER. It is used to warn the receiving code to expect
 53 --   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
 54 --
 55 --   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
 56 --   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
 57 --
 58 
 59 -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
 60 --  of ASN.1
 61 --  It stores only a date
 62 --
 63 
 64 Date ::= CHOICE {
 65     str VisibleString ,        -- for those unparsed dates
 66     std Date-std }             -- use this if you can
 67 
 68 Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
 69     year INTEGER ,             -- full year (including 1900)
 70     month INTEGER OPTIONAL ,   -- month (1-12)
 71     day INTEGER OPTIONAL ,     -- day of month (1-31)
 72     season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
 73     hour INTEGER OPTIONAL ,    -- hour of day (0-23)
 74     minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
 75     second INTEGER OPTIONAL }  -- second of minute (0-59)
 76 
 77 -- Dbtag is generalized for tagging
 78 -- eg. { "Social Security", str "023-79-8841" }
 79 -- or  { "member", id 8882224 }
 80 
 81 Dbtag ::= SEQUENCE {
 82     db VisibleString ,          -- name of database or system
 83     tag Object-id }         -- appropriate tag
 84 
 85 -- Object-id can tag or name anything
 86 --
 87 
 88 Object-id ::= CHOICE {
 89     id INTEGER ,
 90     str VisibleString }
 91 
 92 -- Person-id is to define a std element for people
 93 --
 94 
 95 Person-id ::= CHOICE {
 96     dbtag Dbtag ,               -- any defined database tag
 97     name Name-std ,             -- structured name
 98     ml VisibleString ,          -- MEDLINE name (semi-structured)
 99                                 --    eg. "Jones RM"
100     str VisibleString,          -- unstructured name
101     consortium VisibleString }  -- consortium name
102 
103 Name-std ::= SEQUENCE { -- Structured names
104     last VisibleString ,
105     first VisibleString OPTIONAL ,
106     middle VisibleString OPTIONAL ,
107     full VisibleString OPTIONAL ,    -- full name eg. "J. John Smith, Esq"
108     initials VisibleString OPTIONAL,  -- first + middle initials
109     suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
110     title VisibleString OPTIONAL }    -- Dr., Sister, etc
111 
112 --**** Int-fuzz **********************************************
113 --*
114 --*   uncertainties in integer values
115 
116 Int-fuzz ::= CHOICE {
117     p-m INTEGER ,                    -- plus or minus fixed amount
118     range SEQUENCE {                 -- max to min
119         max INTEGER ,
120         min INTEGER } ,
121     pct INTEGER ,                    -- % plus or minus (x10) 0-1000
122     lim ENUMERATED {                 -- some limit value
123         unk (0) ,                    -- unknown
124         gt (1) ,                     -- greater than
125         lt (2) ,                     -- less than
126         tr (3) ,                     -- space to right of position
127         tl (4) ,                     -- space to left of position
128         circle (5) ,                 -- artificial break at origin of circle
129         other (255) } ,              -- something else
130     alt SET OF INTEGER }             -- set of alternatives for the integer
131 
132 
133 --**** User-object **********************************************
134 --*
135 --*   a general object for a user defined structured data item
136 --*    used by Seq-feat and Seq-descr
137 
138 User-object ::= SEQUENCE {
139     class VisibleString OPTIONAL ,   -- endeavor which designed this object
140     type Object-id ,                 -- type of object within class
141     data SEQUENCE OF User-field }    -- the object itself
142 
143 User-field ::= SEQUENCE {
144     label Object-id ,                -- field label
145     num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
146     data CHOICE {                    -- field contents
147         str VisibleString ,
148         int INTEGER ,
149         real REAL ,
150         bool BOOLEAN ,
151         os OCTET STRING ,
152         object User-object ,         -- for using other definitions
153         strs SEQUENCE OF VisibleString ,
154         ints SEQUENCE OF INTEGER ,
155         reals SEQUENCE OF REAL ,
156         oss SEQUENCE OF OCTET STRING ,
157         fields SEQUENCE OF User-field ,
158         objects SEQUENCE OF User-object } }
159 
160 
161 
162 END
163 
164 --$Revision: 6.3 $
165 --****************************************************************
166 --
167 --  NCBI Bibliographic data elements
168 --  by James Ostell, 1990
169 --
170 --  Taken from the American National Standard for
171 --      Bibliographic References
172 --      ANSI Z39.29-1977
173 --  Version 3.0 - June 1994
174 --  PubMedId added in 1996
175 --  ArticleIds and eprint elements added in 1999
176 --
177 --****************************************************************
178 
179 NCBI-Biblio DEFINITIONS ::=
180 BEGIN
181 
182 EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
183         Cit-proc, Cit-sub, Title, Author, PubMedId, DOI;
184 
185 IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
186 
187     -- Article Ids
188 
189 ArticleId ::= CHOICE {         -- can be many ids for an article
190         pubmed PubMedId ,      -- see types below
191         medline MedlineUID ,
192         doi DOI ,
193         pii PII ,
194         pmcid PmcID ,
195         pmcpid PmcPid ,
196         pmpid PmPid ,
197         other Dbtag  }    -- generic catch all
198     
199 PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
200 MedlineUID ::= INTEGER         -- Id from MEDLINE
201 DOI ::= VisibleString          -- Document Object Identifier
202 PII ::= VisibleString          -- Controlled Publisher Identifier
203 PmcID ::= INTEGER              -- PubMed Central Id
204 PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
205 PmPid ::= VisibleString        -- Publisher Id supplied to PubMed
206 
207 ArticleIdSet ::= SET OF ArticleId
208 
209     -- Status Dates
210 
211 PubStatus ::= INTEGER {            -- points of publication
212     received  (1) ,            -- date manuscript received for review
213     accepted  (2) ,            -- accepted for publication
214     epublish  (3) ,            -- published electronically by publisher
215     ppublish  (4) ,            -- published in print by publisher
216     revised   (5) ,            -- article revised by publisher/author
217     pmc       (6) ,            -- article first appeared in PubMed Central
218     pmcr      (7) ,            -- article revision in PubMed Central
219     pubmed    (8) ,            -- article citation first appeared in PubMed
220     pubmedr   (9) ,            -- article citation revision in PubMed
221     aheadofprint (10),         -- epublish, but will be followed by print
222     premedline (11),           -- date into PreMedline status
223     medline    (12),           -- date made a MEDLINE record
224     other    (255) }
225 
226 PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
227     pubstatus PubStatus ,
228     date Date }                -- time may be added later
229 
230 PubStatusDateSet ::= SET OF PubStatusDate
231     
232     -- Citation Types
233 
234 Cit-art ::= SEQUENCE {                  -- article in journal or book
235     title Title OPTIONAL ,              -- title of paper (ANSI requires)
236     authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
237     from CHOICE {                       -- journal or book
238         journal Cit-jour ,
239         book Cit-book ,
240         proc Cit-proc } ,
241     ids ArticleIdSet OPTIONAL }         -- lots of ids
242 
243 Cit-jour ::= SEQUENCE {             -- Journal citation
244     title Title ,                   -- title of journal
245     imp Imprint }
246 
247 Cit-book ::= SEQUENCE {              -- Book citation
248     title Title ,                    -- Title of book
249     coll Title OPTIONAL ,            -- part of a collection
250     authors Auth-list,               -- authors
251     imp Imprint }
252 
253 Cit-proc ::= SEQUENCE {             -- Meeting proceedings
254     book Cit-book ,                 -- citation to meeting
255     meet Meeting }                  -- time and location of meeting
256 
257     -- Patent number and date-issue were made optional in 1997 to
258     --   support patent applications being issued from the USPTO
259     --   Semantically a Cit-pat must have either a patent number or
260     --   an application number (or both) to be valid
261 
262 Cit-pat ::= SEQUENCE {                  -- patent citation
263     title VisibleString ,
264     authors Auth-list,                  -- author/inventor
265     country VisibleString ,             -- Patent Document Country
266     doc-type VisibleString ,            -- Patent Document Type
267     number VisibleString OPTIONAL,      -- Patent Document Number
268     date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
269     class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code 
270     app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
271     app-date Date OPTIONAL ,            -- Patent Appl File Date
272     applicants Auth-list OPTIONAL ,     -- Applicants
273     assignees Auth-list OPTIONAL ,      -- Assignees
274     priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
275     abstract VisibleString OPTIONAL }   -- abstract of patent
276 
277 Patent-priority ::= SEQUENCE {
278     country VisibleString ,             -- Patent country code
279     number VisibleString ,              -- number assigned in that country
280     date Date }                         -- date of application
281 
282 Id-pat ::= SEQUENCE {                   -- just to identify a patent
283     country VisibleString ,             -- Patent Document Country
284     id CHOICE {
285         number VisibleString ,          -- Patent Document Number
286         app-number VisibleString } ,    -- Patent Doc Appl Number
287     doc-type VisibleString OPTIONAL }   -- Patent Doc Type
288 
289 Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
290     cit Cit-book ,                      -- same fields as a book
291     man-id VisibleString OPTIONAL ,     -- Manuscript identifier
292     type ENUMERATED {
293         manuscript (1) ,
294         letter (2) ,
295         thesis (3) } OPTIONAL }
296                                 -- NOTE: this is just to cite a
297                                 -- direct data submission, see NCBI-Submit
298                                 -- for the form of a sequence submission
299 Cit-sub ::= SEQUENCE {               -- citation for a direct submission
300     authors Auth-list ,              -- not necessarily authors of the paper
301     imp Imprint OPTIONAL ,                       -- this only used to get date.. will go
302     medium ENUMERATED {              -- medium of submission
303         paper   (1) ,
304         tape    (2) ,
305         floppy  (3) ,
306         email   (4) ,
307         other   (255) } OPTIONAL ,
308     date Date OPTIONAL ,              -- replaces imp, will become required
309     descr VisibleString OPTIONAL }    -- description of changes for public view
310     
311 Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
312     cit VisibleString OPTIONAL ,     -- anything, not parsable
313     authors Auth-list OPTIONAL ,
314     muid INTEGER OPTIONAL ,      -- medline uid
315     journal Title OPTIONAL ,
316     volume VisibleString OPTIONAL ,
317     issue VisibleString OPTIONAL ,
318     pages VisibleString OPTIONAL ,
319     date Date OPTIONAL ,
320     serial-number INTEGER OPTIONAL ,   -- for GenBank style references
321     title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
322         pmid PubMedId OPTIONAL }           -- PubMed Id
323     
324     
325     -- Authorship Group
326 Auth-list ::= SEQUENCE {
327         names CHOICE {
328             std SEQUENCE OF Author ,        -- full citations
329             ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
330             str SEQUENCE OF VisibleString } , -- free for all
331         affil Affil OPTIONAL }        -- author affiliation
332 
333 Author ::= SEQUENCE {
334     name Person-id ,                        -- Author, Primary or Secondary
335     level ENUMERATED {
336         primary (1),
337         secondary (2) } OPTIONAL ,
338     role ENUMERATED {                   -- Author Role Indicator
339         compiler (1),
340         editor (2),
341         patent-assignee (3),
342         translator (4) } OPTIONAL ,
343     affil Affil OPTIONAL ,
344     is-corr BOOLEAN OPTIONAL }          -- TRUE if corresponding author
345 
346 Affil ::= CHOICE {
347     str VisibleString ,                 -- unparsed string
348     std SEQUENCE {                      -- std representation
349     affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
350     div VisibleString OPTIONAL ,        -- Author Affiliation, Division
351     city VisibleString OPTIONAL ,       -- Author Affiliation, City
352     sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
353     country VisibleString OPTIONAL ,    -- Author Affiliation, Country
354     street VisibleString OPTIONAL ,    -- street address, not ANSI
355     email VisibleString OPTIONAL ,
356     fax VisibleString OPTIONAL ,
357     phone VisibleString OPTIONAL ,
358     postal-code VisibleString OPTIONAL }}
359 
360     -- Title Group
361     -- Valid for = A = Analytic (Cit-art)
362     --             J = Journals (Cit-jour)
363     --             B = Book (Cit-book)
364                                                  -- Valid for:
365 Title ::= SET OF CHOICE {
366     name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
367     tsub VisibleString ,    -- Title, Subordinate       A B
368     trans VisibleString ,   -- Title, Translated        AJB
369     jta VisibleString ,     -- Title, Abbreviated        J
370     iso-jta VisibleString , -- specifically ISO jta      J
371     ml-jta VisibleString ,  -- specifically MEDLINE jta  J
372     coden VisibleString ,   -- a coden                   J
373     issn VisibleString ,    -- ISSN                      J
374     abr VisibleString ,     -- Title, Abbreviated         B
375     isbn VisibleString }    -- ISBN                       B
376 
377 Imprint ::= SEQUENCE {                  -- Imprint group
378     date Date ,                         -- date of publication
379     volume VisibleString OPTIONAL ,
380     issue VisibleString OPTIONAL ,
381     pages VisibleString OPTIONAL ,
382     section VisibleString OPTIONAL ,
383     pub Affil OPTIONAL,                     -- publisher, required for book
384     cprt Date OPTIONAL,                     -- copyright date, "    "   "
385     part-sup VisibleString OPTIONAL ,       -- part/sup of volume
386     language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
387     prepub ENUMERATED {                     -- for prepublication citations
388         submitted (1) ,                     -- submitted, not accepted
389         in-press (2) ,                      -- accepted, not published
390         other (255)  } OPTIONAL ,
391     part-supi VisibleString OPTIONAL ,      -- part/sup on issue
392     retract CitRetract OPTIONAL ,           -- retraction info
393     pubstatus PubStatus OPTIONAL ,          -- current status of this publication
394     history PubStatusDateSet OPTIONAL }     -- dates for this record
395 
396 CitRetract ::= SEQUENCE {
397     type ENUMERATED {                    -- retraction of an entry
398         retracted (1) ,               -- this citation retracted
399         notice (2) ,                  -- this citation is a retraction notice
400         in-error (3) ,                -- an erratum was published about this
401         erratum (4) } ,               -- this is a published erratum
402     exp VisibleString OPTIONAL }      -- citation and/or explanation
403 
404 Meeting ::= SEQUENCE {
405     number VisibleString ,
406     date Date ,
407     place Affil OPTIONAL }
408 
409             
410 END
411 
412 
413 --$Revision: 6.0 $
414 --**********************************************************************
415 --
416 --  MEDLINE data definitions
417 --  James Ostell, 1990
418 --
419 --  enhanced in 1996 to support PubMed records as well by simply adding
420 --    the PubMedId and making MedlineId optional
421 --
422 --**********************************************************************
423 
424 NCBI-Medline DEFINITIONS ::=
425 BEGIN
426 
427 EXPORTS Medline-entry, Medline-si;
428 
429 IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
430         Date FROM NCBI-General;
431 
432                                 -- a MEDLINE or PubMed entry
433 Medline-entry ::= SEQUENCE {
434     uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
435     em Date ,                   -- Entry Month
436     cit Cit-art ,               -- article citation
437     abstract VisibleString OPTIONAL ,
438     mesh SET OF Medline-mesh OPTIONAL ,
439     substance SET OF Medline-rn OPTIONAL ,
440     xref SET OF Medline-si OPTIONAL ,
441     idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
442     gene SET OF VisibleString OPTIONAL ,
443     pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
444     pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
445     mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
446     status INTEGER {
447         publisher (1) ,      -- record as supplied by publisher
448         premedline (2) ,     -- premedline record
449         medline (3) } DEFAULT medline }  -- regular medline record
450 
451 Medline-mesh ::= SEQUENCE {
452     mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
453     term VisibleString ,                   -- the MeSH term
454     qual SET OF Medline-qual OPTIONAL }    -- qualifiers
455 
456 Medline-qual ::= SEQUENCE {
457     mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
458     subh VisibleString }             -- the subheading
459 
460 Medline-rn ::= SEQUENCE {       -- medline substance records
461     type ENUMERATED {           -- type of record
462         nameonly (0) ,
463         cas (1) ,               -- CAS number
464         ec (2) } ,              -- EC number
465     cit VisibleString OPTIONAL ,  -- CAS or EC number if present
466     name VisibleString }          -- name (always present)
467 
468 Medline-si ::= SEQUENCE {       -- medline cross reference records
469     type ENUMERATED {           -- type of xref
470         ddbj (1) ,              -- DNA Data Bank of Japan
471         carbbank (2) ,          -- Carbohydrate Structure Database
472         embl (3) ,              -- EMBL Data Library
473         hdb (4) ,               -- Hybridoma Data Bank
474         genbank (5) ,           -- GenBank
475         hgml (6) ,              -- Human Gene Map Library
476         mim (7) ,               -- Mendelian Inheritance in Man
477         msd (8) ,               -- Microbial Strains Database
478         pdb (9) ,               -- Protein Data Bank (Brookhaven)
479         pir (10) ,              -- Protein Identification Resource
480         prfseqdb (11) ,         -- Protein Research Foundation (Japan)
481         psd (12) ,              -- Protein Sequence Database (Japan)
482         swissprot (13) ,        -- SwissProt
483         gdb (14) } ,            -- Genome Data Base
484     cit VisibleString OPTIONAL }    -- the citation/accession number
485 
486 Medline-field ::= SEQUENCE {
487     type INTEGER {              -- Keyed type
488         other (0) ,             -- look in line code
489         comment (1) ,           -- comment line
490         erratum (2) } ,         -- retracted, corrected, etc
491     str VisibleString ,         -- the text
492     ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text
493 
494 DocRef ::= SEQUENCE {           -- reference to a document
495     type INTEGER {
496         medline (1) ,
497         pubmed (2) ,
498         ncbigi (3) } ,
499     uid INTEGER }
500 
501 END
502 
503 --$Revision: 6.0 $
504 --**********************************************************************
505 --
506 --  PUBMED data definitions
507 --
508 --**********************************************************************
509 
510 NCBI-PubMed DEFINITIONS ::=
511 BEGIN
512 
513 EXPORTS Pubmed-entry, Pubmed-url;
514 
515 IMPORTS PubMedId FROM NCBI-Biblio
516         Medline-entry FROM NCBI-Medline;
517 
518 Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
519     -- PUBMED records must include the PubMedId
520     pmid PubMedId,
521 
522     -- Medline entry information
523     medent Medline-entry OPTIONAL,
524 
525     -- Publisher name
526     publisher VisibleString OPTIONAL,
527 
528     -- List of URL to publisher cite
529     urls SET OF Pubmed-url OPTIONAL,
530 
531     -- Publisher's article identifier
532     pubid VisibleString OPTIONAL
533 }
534 
535 Pubmed-url ::= SEQUENCE {
536     location VisibleString OPTIONAL, -- Location code
537     url VisibleString                -- Selected URL for location
538 }
539 
540 END
541 --$Revision: 6.0 $
542 --**********************************************************************
543 --
544 --  MEDLARS data definitions
545 --  Grigoriy Starchenko, 1997
546 --
547 --**********************************************************************
548 
549 NCBI-Medlars DEFINITIONS ::=
550 BEGIN
551 
552 EXPORTS Medlars-entry, Medlars-record;
553 
554 IMPORTS PubMedId FROM NCBI-Biblio;
555 
556 Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
557     pmid PubMedId,               -- All entries in PubMed must have it
558     muid INTEGER OPTIONAL,       -- Medline(OCCS) id
559     recs SET OF Medlars-record   -- List of Medlars records
560 }
561 
562 Medlars-record ::= SEQUENCE {
563     code INTEGER,                -- Unit record field type integer form
564     abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
565     data VisibleString           -- Unit record data
566 }
567 
568 END
569 --$Revision: 6.0 $
570 --********************************************************************
571 --
572 --  Publication common set
573 --  James Ostell, 1990
574 --
575 --  This is the base class definitions for Publications of all sorts
576 --
577 --  support for PubMedId added in 1996
578 --********************************************************************
579 
580 NCBI-Pub DEFINITIONS ::=
581 BEGIN
582 
583 EXPORTS Pub, Pub-set, Pub-equiv;
584 
585 IMPORTS Medline-entry FROM NCBI-Medline
586         Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
587         Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
588 
589 Pub ::= CHOICE {
590     gen Cit-gen ,        -- general or generic unparsed
591     sub Cit-sub ,        -- submission
592     medline Medline-entry ,
593     muid INTEGER ,       -- medline uid
594     article Cit-art ,
595     journal Cit-jour ,
596     book Cit-book ,
597     proc Cit-proc ,      -- proceedings of a meeting
598     patent Cit-pat ,
599     pat-id Id-pat ,      -- identify a patent
600     man Cit-let ,        -- manuscript, thesis, or letter
601     equiv Pub-equiv,     -- to cite a variety of ways
602         pmid PubMedId }      -- PubMedId
603 
604 Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation
605 
606 Pub-set ::= CHOICE {
607     pub SET OF Pub ,
608     medline SET OF Medline-entry ,
609     article SET OF Cit-art ,
610     journal SET OF Cit-jour ,
611     book SET OF Cit-book ,
612     proc SET OF Cit-proc ,      -- proceedings of a meeting
613     patent SET OF Cit-pat }
614 
615 END
616 
617 --$Revision: 6.5 $
618 --**********************************************************************
619 --
620 --  NCBI Sequence location and identifier elements
621 --  by James Ostell, 1990
622 --
623 --  Version 3.0 - 1994
624 --
625 --**********************************************************************
626 
627 NCBI-Seqloc DEFINITIONS ::=
628 BEGIN
629 
630 EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
631         Na-strand, Giimport-id;
632 
633 IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
634         Id-pat FROM NCBI-Biblio
635         Feat-id FROM NCBI-Seqfeat;
636 
637 --*** Sequence identifiers ********************************
638 --*
639 
640 Seq-id ::= CHOICE {
641     local Object-id ,            -- local use
642     gibbsq INTEGER ,             -- Geninfo backbone seqid
643     gibbmt INTEGER ,             -- Geninfo backbone moltype
644     giim Giimport-id ,           -- Geninfo import id
645     genbank Textseq-id ,
646     embl Textseq-id ,
647     pir Textseq-id ,
648     swissprot Textseq-id ,
649     patent Patent-seq-id ,
650     other Textseq-id ,           -- for historical reasons, 'other' = 'refseq'
651     general Dbtag ,              -- for other databases
652     gi INTEGER ,                 -- GenInfo Integrated Database
653     ddbj Textseq-id ,            -- DDBJ
654     prf Textseq-id ,             -- PRF SEQDB
655     pdb PDB-seq-id ,             -- PDB sequence
656     tpg Textseq-id ,             -- Third Party Annot/Seq Genbank
657     tpe Textseq-id ,             -- Third Party Annot/Seq EMBL
658     tpd Textseq-id ,             -- Third Party Annot/Seq DDBJ
659     gpipe Textseq-id ,           -- Internal NCBI genome pipeline processing ID
660     named-annot-track Textseq-id -- Internal named annotation tracking ID
661 }
662 
663 Seq-id-set ::= SET OF Seq-id
664 
665 
666 Patent-seq-id ::= SEQUENCE {
667     seqid INTEGER ,         -- number of sequence in patent
668     cit Id-pat }           -- patent citation
669 
670 Textseq-id ::= SEQUENCE {
671     name VisibleString OPTIONAL ,
672     accession VisibleString OPTIONAL ,
673     release VisibleString OPTIONAL ,
674     version INTEGER OPTIONAL }
675 
676 Giimport-id ::= SEQUENCE {
677     id INTEGER ,                     -- the id to use here
678     db VisibleString OPTIONAL ,      -- dbase used in
679     release VisibleString OPTIONAL } -- the release
680 
681 PDB-seq-id ::= SEQUENCE {
682     mol PDB-mol-id ,           -- the molecule name
683     chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
684     rel Date OPTIONAL }        -- release date, month and year
685 
686 PDB-mol-id ::= VisibleString  -- name of mol, 4 chars
687     
688 --*** Sequence locations **********************************
689 --*
690 
691 Seq-loc ::= CHOICE {
692     null NULL ,           -- not placed
693     empty Seq-id ,        -- to NULL one Seq-id in a collection
694     whole Seq-id ,        -- whole sequence
695     int Seq-interval ,    -- from to
696     packed-int Packed-seqint ,
697     pnt Seq-point ,
698     packed-pnt Packed-seqpnt ,
699     mix Seq-loc-mix ,
700     equiv Seq-loc-equiv ,  -- equivalent sets of locations
701     bond Seq-bond ,
702     feat Feat-id }         -- indirect, through a Seq-feat
703     
704 
705 Seq-interval ::= SEQUENCE {
706     from INTEGER ,
707     to INTEGER ,
708     strand Na-strand OPTIONAL ,
709     id Seq-id ,    -- WARNING: this used to be optional
710     fuzz-from Int-fuzz OPTIONAL ,
711     fuzz-to Int-fuzz OPTIONAL }
712 
713 Packed-seqint ::= SEQUENCE OF Seq-interval
714 
715 Seq-point ::= SEQUENCE {
716     point INTEGER ,
717     strand Na-strand OPTIONAL ,
718     id Seq-id ,     -- WARNING: this used to be optional
719     fuzz Int-fuzz OPTIONAL }
720 
721 Packed-seqpnt ::= SEQUENCE {
722     strand Na-strand OPTIONAL ,
723     id Seq-id ,
724     fuzz Int-fuzz OPTIONAL ,
725     points SEQUENCE OF INTEGER }
726 
727 Na-strand ::= ENUMERATED {          -- strand of nucleic acid
728     unknown (0) ,
729     plus (1) ,
730     minus (2) ,               
731     both (3) ,                -- in forward orientation
732     both-rev (4) ,            -- in reverse orientation
733     other (255) }
734 
735 Seq-bond ::= SEQUENCE {         -- bond between residues
736     a Seq-point ,           -- connection to a least one residue
737     b Seq-point OPTIONAL }  -- other end may not be available
738 
739 Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything
740 
741 Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations
742 
743 END
744     
745 
746 --$Revision: 6.24 $
747 --**********************************************************************
748 --
749 --  NCBI Sequence elements
750 --  by James Ostell, 1990
751 --  Version 3.0 - June 1994
752 --
753 --**********************************************************************
754 
755 NCBI-Sequence DEFINITIONS ::=
756 BEGIN
757 
758 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
759         Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
760         Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;
761 
762 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
763         Seq-align FROM NCBI-Seqalign
764         Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
765         Seq-graph FROM NCBI-Seqres
766         Pub-equiv FROM NCBI-Pub
767         Org-ref FROM NCBI-Organism
768         BioSource FROM NCBI-BioSource
769         Seq-id, Seq-loc FROM NCBI-Seqloc
770         GB-block FROM GenBank-General
771         PIR-block FROM PIR-General
772         EMBL-block FROM EMBL-General
773         SP-block FROM SP-General
774         PRF-block FROM PRF-General
775         PDB-block FROM PDB-General
776         Seq-table FROM NCBI-SeqTable;
777 
778 --*** Sequence ********************************
779 --*
780 
781 Bioseq ::= SEQUENCE {
782     id SET OF Seq-id ,            -- equivalent identifiers
783     descr Seq-descr OPTIONAL , -- descriptors
784     inst Seq-inst ,            -- the sequence data
785     annot SET OF Seq-annot OPTIONAL }
786 
787 --*** Descriptors *****************************
788 --*
789 
790 Seq-descr ::= SET OF Seqdesc
791 
792 Seqdesc ::= CHOICE {
793     mol-type GIBB-mol ,          -- type of molecule
794     modif SET OF GIBB-mod ,             -- modifiers
795     method GIBB-method ,         -- sequencing method
796     name VisibleString ,         -- a name for this sequence
797     title VisibleString ,        -- a title for this sequence
798     org Org-ref ,                -- if all from one organism
799     comment VisibleString ,      -- a more extensive comment
800     num Numbering ,              -- a numbering system
801     maploc Dbtag ,               -- map location of this sequence
802     pir PIR-block ,              -- PIR specific info
803     genbank GB-block ,           -- GenBank specific info
804     pub Pubdesc ,                -- a reference to the publication
805     region VisibleString ,       -- overall region (globin locus)
806     user User-object ,           -- user defined object
807     sp SP-block ,                -- SWISSPROT specific info
808     dbxref Dbtag ,               -- xref to other databases
809     embl EMBL-block ,            -- EMBL specific information
810     create-date Date ,           -- date entry first created/released
811     update-date Date ,           -- date of last update
812     prf PRF-block ,              -- PRF specific information
813     pdb PDB-block ,              -- PDB specific information
814     het Heterogen ,              -- cofactor, etc associated but not bound
815     source BioSource ,           -- source of materials, includes Org-ref
816     molinfo MolInfo ,            -- info on the molecule and techniques
817     modelev ModelEvidenceSupport -- model evidence for XM records
818 }
819 
820 --******* NOTE:
821 --*       mol-type, modif, method, and org are consolidated and expanded
822 --*       in Org-ref, BioSource, and MolInfo in this specification. They
823 --*       will be removed in later specifications. Do not use them in the
824 --*       the future. Instead expect the new structures.
825 --*
826 --***************************
827 
828 --********************************************************************
829 --
830 -- MolInfo gives information on the
831 -- classification of the type and quality of the sequence
832 --
833 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
834 --
835 --********************************************************************
836 
837 MolInfo ::= SEQUENCE {
838     biomol INTEGER {
839         unknown (0) ,
840         genomic (1) ,
841         pre-RNA (2) ,              -- precursor RNA of any sort really
842         mRNA (3) ,
843         rRNA (4) ,
844         tRNA (5) ,
845         snRNA (6) ,
846         scRNA (7) ,
847         peptide (8) ,
848         other-genetic (9) ,      -- other genetic material
849         genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
850         cRNA (11) ,              -- viral RNA genome copy intermediate
851         snoRNA (12) ,            -- small nucleolar RNA
852         transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
853         ncRNA (14) ,
854         tmRNA (15) ,
855         other (255) } DEFAULT unknown ,
856     tech INTEGER {
857         unknown (0) ,
858         standard (1) ,          -- standard sequencing
859         est (2) ,               -- Expressed Sequence Tag
860         sts (3) ,               -- Sequence Tagged Site
861         survey (4) ,            -- one-pass genomic sequence
862         genemap (5) ,           -- from genetic mapping techniques
863         physmap (6) ,           -- from physical mapping techniques
864         derived (7) ,           -- derived from other data, not a primary entity
865         concept-trans (8) ,     -- conceptual translation
866         seq-pept (9) ,          -- peptide was sequenced
867         both (10) ,             -- concept transl. w/ partial pept. seq.
868         seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
869         seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
870         concept-trans-a (13) ,  -- conceptual transl. supplied by author
871         htgs-1 (14) ,           -- unordered High Throughput sequence contig
872         htgs-2 (15) ,           -- ordered High Throughput sequence contig
873         htgs-3 (16) ,           -- finished High Throughput sequence
874         fli-cdna (17) ,         -- full length insert cDNA
875         htgs-0 (18) ,           -- single genomic reads for coordination
876         htc (19) ,              -- high throughput cDNA
877         wgs (20) ,              -- whole genome shotgun sequencing
878         barcode (21) ,          -- barcode of life project
879         composite-wgs-htgs (22) , -- composite of WGS and HTGS
880         tsa (23) ,              -- transcriptome shotgun assembly
881         other (255) }           -- use Source.techexp
882                DEFAULT unknown ,
883     techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
884     --
885     -- Completeness is not indicated in most records.  For genomes, assume
886     -- the sequences are incomplete unless specifically marked as complete.
887     -- For mRNAs, assume the ends are not known exactly unless marked as
888     -- having the left or right end.
889     --
890     completeness INTEGER {
891       unknown (0) ,
892       complete (1) ,                   -- complete biological entity
893       partial (2) ,                    -- partial but no details given
894       no-left (3) ,                    -- missing 5' or NH3 end
895       no-right (4) ,                   -- missing 3' or COOH end
896       no-ends (5) ,                    -- missing both ends
897       has-left (6) ,                   -- 5' or NH3 end present
898       has-right (7) ,                  -- 3' or COOH end present
899       other (255) } DEFAULT unknown ,
900     gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
901 
902 
903 GIBB-mol ::= ENUMERATED {       -- type of molecule represented
904     unknown (0) ,
905     genomic (1) ,
906     pre-mRNA (2) ,              -- precursor RNA of any sort really
907     mRNA (3) ,
908     rRNA (4) ,
909     tRNA (5) ,
910     snRNA (6) ,
911     scRNA (7) ,
912     peptide (8) ,
913     other-genetic (9) ,      -- other genetic material
914     genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
915     other (255) }
916 
917 GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
918     dna (0) ,
919     rna (1) ,
920     extrachrom (2) ,
921     plasmid (3) ,
922     mitochondrial (4) ,
923     chloroplast (5) ,
924     kinetoplast (6) ,
925     cyanelle (7) ,
926     synthetic (8) ,
927     recombinant (9) ,
928     partial (10) ,
929     complete (11) ,
930     mutagen (12) ,    -- subject of mutagenesis ?
931     natmut (13) ,     -- natural mutant ?
932     transposon (14) ,
933     insertion-seq (15) ,
934     no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
935     no-right (17) ,   -- missing right end (3' or COOH)
936     macronuclear (18) ,
937     proviral (19) ,
938     est (20) ,        -- expressed sequence tag
939     sts (21) ,        -- sequence tagged site
940     survey (22) ,     -- one pass survey sequence
941     chromoplast (23) ,
942     genemap (24) ,    -- is a genetic map
943     restmap (25) ,    -- is an ordered restriction map
944     physmap (26) ,    -- is a physical map (not ordered restriction map)
945     other (255) }
946 
947 GIBB-method ::= ENUMERATED {        -- sequencing methods
948     concept-trans (1) ,    -- conceptual translation
949     seq-pept (2) ,         -- peptide was sequenced
950     both (3) ,             -- concept transl. w/ partial pept. seq.
951     seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
952     seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
953     concept-trans-a (6) ,  -- conceptual transl. supplied by author
954     other (255) }
955 
956 Numbering ::= CHOICE {           -- any display numbering system
957     cont Num-cont ,              -- continuous numbering
958     enum Num-enum ,              -- enumerated names for residues
959     ref Num-ref ,                -- by reference to another sequence
960     real Num-real }              -- supports mapping to a float system
961 
962 Num-cont ::= SEQUENCE {          -- continuous display numbering system
963     refnum INTEGER DEFAULT 1,         -- number assigned to first residue
964     has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
965     ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?
966 
967 Num-enum ::= SEQUENCE {          -- any tags to residues
968     num INTEGER ,                        -- number of tags to follow
969     names SEQUENCE OF VisibleString }    -- the tags
970 
971 Num-ref ::= SEQUENCE {           -- by reference to other sequences
972     type ENUMERATED {            -- type of reference
973         not-set (0) ,
974         sources (1) ,            -- by segmented or const seq sources
975         aligns (2) } ,           -- by alignments given below
976     aligns Seq-align OPTIONAL }
977 
978 Num-real ::= SEQUENCE {          -- mapping to floating point system
979     a REAL ,                     -- from an integer system used by Bioseq
980     b REAL ,                     -- position = (a * int_position) + b
981     units VisibleString OPTIONAL }
982 
983 Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
984     pub Pub-equiv ,                 -- the citation(s)
985     name VisibleString OPTIONAL ,   -- name used in paper
986     fig VisibleString OPTIONAL ,    -- figure in paper
987     num Numbering OPTIONAL ,        -- numbering from paper
988     numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
989     poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
990     maploc VisibleString OPTIONAL , -- map location reported in paper
991     seq-raw StringStore OPTIONAL ,  -- original sequence from paper
992     align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
993     comment VisibleString OPTIONAL, -- any comment on this pub in context
994     reftype INTEGER {           -- type of reference in a GenBank record
995         seq (0) ,               -- refers to sequence
996         sites (1) ,             -- refers to unspecified features
997         feats (2) ,             -- refers to specified features
998         no-target (3) }         -- nothing specified (EMBL)
999         DEFAULT seq }
1000 
1001 Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc
1002 
1003 --*** Instances of sequences *******************************
1004 --*
1005 
1006 Seq-inst ::= SEQUENCE {            -- the sequence data itself
1007     repr ENUMERATED {              -- representation class
1008         not-set (0) ,              -- empty
1009         virtual (1) ,              -- no seq data
1010         raw (2) ,                  -- continuous sequence
1011         seg (3) ,                  -- segmented sequence
1012         const (4) ,                -- constructed sequence
1013         ref (5) ,                  -- reference to another sequence
1014         consen (6) ,               -- consensus sequence or pattern
1015         map (7) ,                  -- ordered map of any kind
1016         delta (8) ,              -- sequence made by changes (delta) to others
1017         other (255) } ,
1018     mol ENUMERATED {               -- molecule class in living organism
1019         not-set (0) ,              --   > cdna = rna
1020         dna (1) ,
1021         rna (2) ,
1022         aa (3) ,
1023         na (4) ,                   -- just a nucleic acid
1024         other (255) } ,
1025     length INTEGER OPTIONAL ,      -- length of sequence in residues
1026     fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
1027     topology ENUMERATED {          -- topology of molecule
1028         not-set (0) ,
1029         linear (1) ,
1030         circular (2) ,
1031         tandem (3) ,               -- some part of tandem repeat
1032         other (255) } DEFAULT linear ,
1033     strand ENUMERATED {            -- strandedness in living organism
1034         not-set (0) ,
1035         ss (1) ,                   -- single strand
1036         ds (2) ,                   -- double strand
1037         mixed (3) ,
1038         other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
1039     seq-data Seq-data OPTIONAL ,   -- the sequence
1040     ext Seq-ext OPTIONAL ,         -- extensions for special types
1041     hist Seq-hist OPTIONAL }       -- sequence history
1042 
1043 --*** Sequence Extensions **********************************
1044 --*  for representing more complex types
1045 --*  const type uses Seq-hist.assembly
1046 
1047 Seq-ext ::= CHOICE {
1048     seg Seg-ext ,        -- segmented sequences
1049     ref Ref-ext ,        -- hot link to another sequence (a view)
1050     map Map-ext ,        -- ordered map of markers
1051     delta Delta-ext }
1052 
1053 Seg-ext ::= SEQUENCE OF Seq-loc
1054 
1055 Ref-ext ::= Seq-loc
1056 
1057 Map-ext ::= SEQUENCE OF Seq-feat
1058 
1059 Delta-ext ::= SEQUENCE OF Delta-seq
1060 
1061 Delta-seq ::= CHOICE {
1062     loc Seq-loc ,       -- point to a sequence
1063     literal Seq-literal }   -- a piece of sequence
1064 
1065 Seq-literal ::= SEQUENCE {
1066     length INTEGER ,         -- must give a length in residues
1067     fuzz Int-fuzz OPTIONAL , -- could be unsure
1068     seq-data Seq-data OPTIONAL } -- may have the data
1069 
1070 --*** Sequence History Record ***********************************
1071 --** assembly = records how seq was assembled from others
1072 --** replaces = records sequences made obsolete by this one
1073 --** replaced-by = this seq is made obsolete by another(s)
1074 
1075 Seq-hist ::= SEQUENCE {
1076     assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
1077     replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
1078     replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
1079     deleted CHOICE {
1080         bool BOOLEAN ,
1081         date Date } OPTIONAL }
1082 
1083 Seq-hist-rec ::= SEQUENCE {
1084     date Date OPTIONAL ,
1085     ids SET OF Seq-id }
1086 
1087 --*** Various internal sequence representations ************
1088 --*      all are controlled, fixed length forms
1089 
1090 Seq-data ::= CHOICE {              -- sequence representations
1091     iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
1092     iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
1093     ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
1094     ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
1095     ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
1096     ncbipna NCBIpna ,              -- nucleic acid probabilities
1097     ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
1098     ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
1099     ncbipaa NCBIpaa ,              -- amino acid probabilities
1100     ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
1101     gap Seq-gap                    -- gap types
1102 }
1103 
1104 Seq-gap ::= SEQUENCE {
1105     type INTEGER {
1106         unknown(0),
1107         fragment(1),               -- Deprecated. Used only for AGP 1.1
1108         clone(2),                  -- Deprecated. Used only for AGP 1.1
1109         short-arm(3),
1110         heterochromatin(4),
1111         centromere(5),
1112         telomere(6),
1113         repeat(7),
1114         contig(8),
1115         scaffold(9),
1116         other(255)
1117     },
1118     linkage INTEGER {
1119         unlinked(0),
1120         linked(1),
1121         other(255)
1122     } OPTIONAL,
1123     linkage-evidence SET OF Linkage-evidence OPTIONAL
1124 }
1125 
1126 Linkage-evidence ::= SEQUENCE {
1127     type INTEGER {
1128         paired-ends(0),
1129         align-genus(1),
1130         align-xgenus(2),
1131         align-trnscpt(3),
1132         within-clone(4),
1133         clone-contig(5),
1134         map(6),
1135         strobe(7),
1136         unspecified(8),
1137         other(255)
1138     }
1139 }
1140 
1141 IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
1142 IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
1143 NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
1144 NCBI4na ::= OCTET STRING      -- 1 bit each for agct
1145                               -- 0001=A, 0010=C, 0100=G, 1000=T/U
1146                               -- 0101=Purine, 1010=Pyrimidine, etc
1147 NCBI8na ::= OCTET STRING      -- for modified nucleic acids
1148 NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
1149                               -- probabilities are coded 0-255 = 0.0-1.0
1150 NCBI8aa ::= OCTET STRING      -- for modified amino acids
1151 NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
1152                               -- IUPAC codes + U=selenocysteine
1153 NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
1154                               -- A-Y,B,Z,X,(ter),anything
1155                               -- probabilities are coded 0-255 = 0.0-1.0
1156 NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte
1157 
1158 --*** Sequence Annotation *************************************
1159 --*
1160 
1161 -- This is a replica of Textseq-id
1162 -- This is specific for annotations, and exists to maintain a semantic
1163 -- difference between IDs assigned to annotations and IDs assigned to
1164 -- sequences
1165 Textannot-id ::= SEQUENCE {
1166     name          VisibleString OPTIONAL ,
1167     accession VisibleString OPTIONAL ,
1168     release   VisibleString OPTIONAL ,
1169     version   INTEGER       OPTIONAL
1170 }
1171 
1172 Annot-id ::= CHOICE {
1173     local Object-id ,
1174     ncbi INTEGER ,
1175     general Dbtag,
1176     other Textannot-id
1177 }
1178 
1179 Annot-descr ::= SET OF Annotdesc
1180 
1181 Annotdesc ::= CHOICE {
1182     name VisibleString ,         -- a short name for this collection
1183     title VisibleString ,        -- a title for this collection
1184     comment VisibleString ,      -- a more extensive comment
1185     pub Pubdesc ,                -- a reference to the publication
1186     user User-object ,           -- user defined object
1187     create-date Date ,           -- date entry first created/released
1188     update-date Date ,           -- date of last update
1189     src Seq-id ,                 -- source sequence from which annot came
1190     align Align-def,             -- definition of the SeqAligns
1191     region Seq-loc }             -- all contents cover this region
1192 
1193 Align-def ::= SEQUENCE {
1194     align-type INTEGER {         -- class of align Seq-annot
1195       ref (1) ,                  -- set of alignments to the same sequence
1196       alt (2) ,                  -- set of alternate alignments of the same seqs
1197       blocks (3) ,               -- set of aligned blocks in the same seqs
1198       other (255) } ,
1199     ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
1200 
1201 Seq-annot ::= SEQUENCE {
1202     id SET OF Annot-id OPTIONAL ,
1203     db INTEGER {                 -- source of annotation
1204         genbank (1) ,
1205         embl (2) ,
1206         ddbj (3) ,
1207         pir  (4) ,
1208         sp   (5) ,
1209         bbone (6) ,
1210         pdb   (7) ,
1211         other (255) } OPTIONAL ,
1212     name VisibleString OPTIONAL ,-- source if "other" above
1213     desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
1214     data CHOICE {
1215         ftable SET OF Seq-feat ,
1216         align SET OF Seq-align ,
1217         graph SET OF Seq-graph ,
1218         ids SET OF Seq-id ,      -- used for communication between tools
1219         locs SET OF Seq-loc ,    -- used for communication between tools
1220         seq-table Seq-table } }  -- features in table form
1221 
1222 END
1223 
1224 
1225 --$Revision: 6.6 $
1226 --**********************************************************************
1227 --
1228 --  NCBI Sequence Collections
1229 --  by James Ostell, 1990
1230 --
1231 --  Version 3.0 - 1994
1232 --
1233 --**********************************************************************
1234 
1235 NCBI-Seqset DEFINITIONS ::=
1236 BEGIN
1237 
1238 EXPORTS Bioseq-set, Seq-entry;
1239 
1240 IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1241         Object-id, Dbtag, Date FROM NCBI-General;
1242 
1243 --*** Sequence Collections ********************************
1244 --*
1245 
1246 Bioseq-set ::= SEQUENCE {      -- just a collection
1247     id Object-id OPTIONAL ,
1248     coll Dbtag OPTIONAL ,          -- to identify a collection
1249     level INTEGER OPTIONAL ,       -- nesting level
1250     class ENUMERATED {
1251         not-set (0) ,
1252         nuc-prot (1) ,              -- nuc acid and coded proteins
1253         segset (2) ,                -- segmented sequence + parts
1254         conset (3) ,                -- constructed sequence + parts
1255         parts (4) ,                 -- parts for 2 or 3
1256         gibb (5) ,                  -- geninfo backbone
1257         gi (6) ,                    -- geninfo
1258         genbank (7) ,               -- converted genbank
1259         pir (8) ,                   -- converted pir
1260         pub-set (9) ,               -- all the seqs from a single publication
1261         equiv (10) ,                -- a set of equivalent maps or seqs
1262         swissprot (11) ,            -- converted SWISSPROT
1263         pdb-entry (12) ,            -- a complete PDB entry
1264         mut-set (13) ,              -- set of mutations
1265         pop-set (14) ,              -- population study
1266         phy-set (15) ,              -- phylogenetic study
1267         eco-set (16) ,              -- ecological sample study
1268         gen-prod-set (17) ,         -- genomic products, chrom+mRNA+protein
1269         wgs-set (18) ,              -- whole genome shotgun project
1270         named-annot (19) ,          -- named annotation set
1271         named-annot-prod (20) ,     -- with instantiated mRNA+protein
1272         read-set (21) ,             -- set from a single read
1273         paired-end-reads (22) ,     -- paired sequences within a read-set
1274         small-genome-set (23) ,     -- viral segments or mitochondrial minicircles
1275         other (255) } DEFAULT not-set ,
1276     release VisibleString OPTIONAL ,
1277     date Date OPTIONAL ,
1278     descr Seq-descr OPTIONAL ,
1279     seq-set SEQUENCE OF Seq-entry ,
1280     annot SET OF Seq-annot OPTIONAL }
1281 
1282 Seq-entry ::= CHOICE {
1283         seq Bioseq ,
1284         set Bioseq-set }
1285 
1286 END
1287 
1288 --$Revision: 6.0 $
1289 --  *********************************************************************
1290 --
1291 --  These are code and conversion tables for NCBI sequence codes
1292 --  ASN.1 for the sequences themselves are define in seq.asn
1293 --
1294 --  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
1295 --    and increase continuously.  So IUPAC codes, which are upper case
1296 --    letters will always have 65 0 cells before the codes begin.  This
1297 --    allows all codes to do indexed lookups for things
1298 --
1299 --  Valid names for code tables are:
1300 --    IUPACna
1301 --    IUPACaa
1302 --    IUPACeaa
1303 --    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
1304 --                   display only, not a data exchange type
1305 --    NCBI2na
1306 --    NCBI4na
1307 --    NCBI8na
1308 --    NCBI8aa
1309 --    NCBIstdaa
1310 --     probability types map to IUPAC types for display as characters
1311 
1312 NCBI-SeqCode DEFINITIONS ::=
1313 BEGIN
1314 
1315 EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
1316 
1317 Seq-code-type ::= ENUMERATED {              -- sequence representations
1318     iupacna (1) ,              -- IUPAC 1 letter nuc acid code
1319     iupacaa (2) ,              -- IUPAC 1 letter amino acid code
1320     ncbi2na (3) ,              -- 2 bit nucleic acid code
1321     ncbi4na (4) ,              -- 4 bit nucleic acid code
1322     ncbi8na (5) ,              -- 8 bit extended nucleic acid code
1323     ncbipna (6) ,              -- nucleic acid probabilities
1324     ncbi8aa (7) ,              -- 8 bit extended amino acid codes
1325     ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
1326     ncbipaa (9) ,              -- amino acid probabilities
1327     iupacaa3 (10) ,            -- 3 letter code only for display
1328     ncbistdaa (11) }           -- consecutive codes for std aas, 0-25
1329 
1330 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings 
1331     from Seq-code-type ,      -- code to map from
1332     to Seq-code-type ,        -- code to map to
1333     num INTEGER ,             -- number of rows in table
1334     start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1335     table SEQUENCE OF INTEGER }  -- table of values, in from-to order
1336 
1337 Seq-code-table ::= SEQUENCE { -- for names of coded values
1338     code Seq-code-type ,      -- name of code
1339     num INTEGER ,             -- number of rows in table
1340     one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
1341     start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1342     table SEQUENCE OF
1343         SEQUENCE {
1344             symbol VisibleString ,      -- the printed symbol or letter
1345             name VisibleString } ,      -- an explanatory name or string
1346     comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
1347 
1348 Seq-code-set ::= SEQUENCE {    -- for distribution
1349     codes SET OF Seq-code-table OPTIONAL ,
1350     maps SET OF Seq-map-table OPTIONAL }
1351 
1352 END
1353 
1354 --$Revision: 6.0 $
1355 --*********************************************************************
1356 --
1357 -- 1990 - J.Ostell
1358 -- Version 3.0 - June 1994
1359 --
1360 --*********************************************************************
1361 --*********************************************************************
1362 --
1363 --  EMBL specific data
1364 --  This block of specifications was developed by Reiner Fuchs of EMBL
1365 --  Updated by J.Ostell, 1994
1366 --
1367 --*********************************************************************
1368 
1369 EMBL-General DEFINITIONS ::=
1370 BEGIN
1371 
1372 EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
1373 
1374 IMPORTS Date, Object-id FROM NCBI-General;
1375 
1376 EMBL-dbname ::= CHOICE {
1377     code ENUMERATED {
1378         embl(0),
1379         genbank(1),
1380         ddbj(2),
1381         geninfo(3),
1382         medline(4),
1383         swissprot(5),
1384         pir(6),
1385         pdb(7),
1386         epd(8),
1387         ecd(9),
1388         tfd(10),
1389         flybase(11),
1390         prosite(12),
1391         enzyme(13),
1392         mim(14),
1393         ecoseq(15),
1394         hiv(16) ,
1395         other (255) } ,
1396     name    VisibleString }
1397 
1398 EMBL-xref ::= SEQUENCE {
1399     dbname EMBL-dbname,
1400     id SEQUENCE OF Object-id }
1401 
1402 EMBL-block ::= SEQUENCE {
1403     class ENUMERATED {
1404         not-set(0),
1405         standard(1),
1406         unannotated(2),
1407         other(255) } DEFAULT standard,
1408     div ENUMERATED {
1409         fun(0),
1410         inv(1),
1411         mam(2),
1412         org(3),
1413         phg(4),
1414         pln(5),
1415         pri(6),
1416         pro(7),
1417         rod(8),
1418         syn(9),
1419         una(10),
1420         vrl(11),
1421         vrt(12),
1422         pat(13),
1423         est(14),
1424         sts(15),
1425         other (255) } OPTIONAL,
1426     creation-date Date,
1427     update-date Date,
1428     extra-acc SEQUENCE OF VisibleString OPTIONAL,
1429     keywords SEQUENCE OF VisibleString OPTIONAL,
1430     xref SEQUENCE OF EMBL-xref OPTIONAL }
1431 
1432 END
1433 
1434 --*********************************************************************
1435 --
1436 --  SWISSPROT specific data
1437 --  This block of specifications was developed by Mark Cavanaugh of
1438 --      NCBI working with Amos Bairoch of SWISSPROT
1439 --
1440 --*********************************************************************
1441 
1442 SP-General DEFINITIONS ::=
1443 BEGIN
1444 
1445 EXPORTS SP-block;
1446 
1447 IMPORTS Date, Dbtag FROM NCBI-General
1448         Seq-id FROM NCBI-Seqloc;
1449 
1450 SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
1451     class ENUMERATED {
1452         not-set (0) ,
1453         standard (1) ,      -- conforms to all SWISSPROT checks
1454         prelim (2) ,        -- only seq and biblio checked
1455         other (255) } ,
1456     extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
1457     imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
1458     plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
1459     seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
1460     dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
1461     keywords SET OF VisibleString OPTIONAL , -- keywords
1462     created Date OPTIONAL ,         -- creation date
1463     sequpd Date OPTIONAL ,          -- sequence update
1464     annotupd Date OPTIONAL }        -- annotation update
1465 
1466 END
1467 
1468 --*********************************************************************
1469 --
1470 --  PIR specific data
1471 --  This block of specifications was developed by Jim Ostell of
1472 --      NCBI
1473 --
1474 --*********************************************************************
1475 
1476 PIR-General DEFINITIONS ::=
1477 BEGIN
1478 
1479 EXPORTS PIR-block;
1480 
1481 IMPORTS Seq-id FROM NCBI-Seqloc;
1482 
1483 PIR-block ::= SEQUENCE {          -- PIR specific descriptions
1484     had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
1485     host VisibleString OPTIONAL ,
1486     source VisibleString OPTIONAL ,     -- source line
1487     summary VisibleString OPTIONAL ,
1488     genetic VisibleString OPTIONAL ,
1489     includes VisibleString OPTIONAL ,
1490     placement VisibleString OPTIONAL ,
1491     superfamily VisibleString OPTIONAL ,
1492     keywords SEQUENCE OF VisibleString OPTIONAL ,
1493     cross-reference VisibleString OPTIONAL ,
1494     date VisibleString OPTIONAL ,
1495     seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
1496     seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences
1497 
1498 END
1499 
1500 --*********************************************************************
1501 --
1502 --  GenBank specific data
1503 --  This block of specifications was developed by Jim Ostell of
1504 --      NCBI
1505 --
1506 --*********************************************************************
1507 
1508 GenBank-General DEFINITIONS ::=
1509 BEGIN
1510 
1511 EXPORTS GB-block;
1512 
1513 IMPORTS Date FROM NCBI-General;
1514 
1515 GB-block ::= SEQUENCE {          -- GenBank specific descriptions
1516     extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
1517     source VisibleString OPTIONAL ,     -- source line
1518     keywords SEQUENCE OF VisibleString OPTIONAL ,
1519     origin VisibleString OPTIONAL,
1520     date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
1521     entry-date Date OPTIONAL ,          -- replaces date
1522     div VisibleString OPTIONAL ,        -- GenBank division
1523     taxonomy VisibleString OPTIONAL }   -- continuation line of organism
1524 
1525 END
1526 
1527 --**********************************************************************
1528 -- PRF specific definition
1529 --    PRF is a protein sequence database crated and maintained by
1530 --    Protein Research Foundation, Minoo-city, Osaka, Japan.
1531 --
1532 --    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1533 --            Kyoto Univ., Japan
1534 --
1535 --**********************************************************************
1536 
1537 PRF-General DEFINITIONS ::=
1538 BEGIN
1539 
1540 EXPORTS PRF-block;
1541 
1542 PRF-block ::= SEQUENCE {
1543       extra-src       PRF-ExtraSrc OPTIONAL,
1544       keywords        SEQUENCE OF VisibleString OPTIONAL
1545 }
1546 
1547 PRF-ExtraSrc ::= SEQUENCE {
1548       host    VisibleString OPTIONAL,
1549       part    VisibleString OPTIONAL,
1550       state   VisibleString OPTIONAL,
1551       strain  VisibleString OPTIONAL,
1552       taxon   VisibleString OPTIONAL
1553 }
1554 
1555 END
1556 
1557 --*********************************************************************
1558 --
1559 --  PDB specific data
1560 --  This block of specifications was developed by Jim Ostell and
1561 --      Steve Bryant of NCBI
1562 --
1563 --*********************************************************************
1564 
1565 PDB-General DEFINITIONS ::=
1566 BEGIN
1567 
1568 EXPORTS PDB-block;
1569 
1570 IMPORTS Date FROM NCBI-General;
1571 
1572 PDB-block ::= SEQUENCE {          -- PDB specific descriptions
1573     deposition Date ,         -- deposition date  month,year
1574     class VisibleString ,
1575     compound SEQUENCE OF VisibleString ,
1576     source SEQUENCE OF VisibleString ,
1577     exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
1578     replace PDB-replace OPTIONAL } -- replacement history
1579 
1580 PDB-replace ::= SEQUENCE {
1581     date Date ,
1582     ids SEQUENCE OF VisibleString }   -- entry ids replace by this one
1583 
1584 END
1585 
1586 --$Revision: 6.49 $
1587 --**********************************************************************
1588 --
1589 --  NCBI Sequence Feature elements
1590 --  by James Ostell, 1990
1591 --  Version 3.0 - June 1994
1592 --
1593 --**********************************************************************
1594 
1595 NCBI-Seqfeat DEFINITIONS ::=
1596 BEGIN
1597 
1598 EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;
1599 
1600 IMPORTS Gene-ref FROM NCBI-Gene
1601         Prot-ref FROM NCBI-Protein
1602         Org-ref FROM NCBI-Organism
1603         Variation-ref FROM NCBI-Variation
1604         BioSource FROM NCBI-BioSource
1605         RNA-ref FROM NCBI-RNA
1606         Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
1607         Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1608         Rsite-ref FROM NCBI-Rsite
1609         Txinit FROM NCBI-TxInit
1610         DOI, PubMedId FROM NCBI-Biblio
1611         Pub-set FROM NCBI-Pub
1612         Object-id, Dbtag, User-object FROM NCBI-General;
1613 
1614 --*** Feature identifiers ********************************
1615 --*
1616 
1617 Feat-id ::= CHOICE {
1618     gibb INTEGER ,            -- geninfo backbone
1619     giim Giimport-id ,        -- geninfo import
1620     local Object-id ,         -- for local software use
1621     general Dbtag }           -- for use by various databases
1622 
1623 --*** Seq-feat *******************************************
1624 --*  sequence feature generalization
1625 
1626 Seq-feat ::= SEQUENCE {
1627     id Feat-id OPTIONAL ,
1628     data SeqFeatData ,           -- the specific data
1629     partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
1630     except BOOLEAN OPTIONAL ,     -- something funny about this?
1631     comment VisibleString OPTIONAL ,
1632     product Seq-loc OPTIONAL ,    -- product of process
1633     location Seq-loc ,            -- feature made from
1634     qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
1635     title VisibleString OPTIONAL ,   -- for user defined label
1636     ext User-object OPTIONAL ,    -- user defined structure extension
1637     cit Pub-set OPTIONAL ,        -- citations for this feature
1638     exp-ev ENUMERATED {           -- evidence for existence of feature
1639         experimental (1) ,        -- any reasonable experimental check
1640         not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1641     xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
1642     dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
1643     pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
1644     except-text VisibleString OPTIONAL , -- explain if except=TRUE
1645     ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
1646     exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
1647     support SeqFeatSupport OPTIONAL  -- will replace /experiment, /inference, model-evidence
1648 }
1649 
1650 SeqFeatData ::= CHOICE {
1651     gene Gene-ref ,
1652     org Org-ref ,
1653     cdregion Cdregion ,
1654     prot Prot-ref ,
1655     rna RNA-ref ,
1656     pub Pubdesc ,              -- publication applies to this seq
1657     seq Seq-loc ,              -- to annotate origin from another seq
1658     imp Imp-feat ,
1659     region VisibleString,      -- named region (globin locus)
1660     comment NULL ,             -- just a comment
1661     bond ENUMERATED {
1662         disulfide (1) ,
1663         thiolester (2) ,
1664         xlink (3) ,
1665         thioether (4) ,
1666         other (255) } ,
1667     site ENUMERATED {
1668         active (1) ,
1669         binding (2) ,
1670         cleavage (3) ,
1671         inhibit (4) ,
1672         modified (5),
1673         glycosylation (6) ,
1674         myristoylation (7) ,
1675         mutagenized (8) ,
1676         metal-binding (9) ,
1677         phosphorylation (10) ,
1678         acetylation (11) ,
1679         amidation (12) ,
1680         methylation (13) ,
1681         hydroxylation (14) ,
1682         sulfatation (15) ,
1683         oxidative-deamination (16) ,
1684         pyrrolidone-carboxylic-acid (17) ,
1685         gamma-carboxyglutamic-acid (18) ,
1686         blocked (19) ,
1687         lipid-binding (20) ,
1688         np-binding (21) ,
1689         dna-binding (22) ,
1690         signal-peptide (23) ,
1691         transit-peptide (24) ,
1692         transmembrane-region (25) ,
1693         nitrosylation (26) ,
1694         other (255) } ,
1695     rsite Rsite-ref ,       -- restriction site  (for maps really)
1696     user User-object ,      -- user defined structure
1697     txinit Txinit ,         -- transcription initiation
1698     num Numbering ,         -- a numbering system
1699     psec-str ENUMERATED {   -- protein secondary structure
1700         helix (1) ,         -- any helix
1701         sheet (2) ,         -- beta sheet
1702         turn  (3) } ,       -- beta or gamma turn
1703     non-std-residue VisibleString ,  -- non-standard residue here in seq
1704     het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
1705     biosrc BioSource,
1706     clone Clone-ref,
1707     variation Variation-ref
1708 }
1709 
1710 SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
1711     id Feat-id OPTIONAL ,        -- the feature copied
1712     data SeqFeatData OPTIONAL }  -- the specific data
1713 
1714 SeqFeatSupport ::= SEQUENCE {
1715   experiment SET OF ExperimentSupport OPTIONAL ,
1716   inference SET OF InferenceSupport OPTIONAL ,
1717   model-evidence SET OF ModelEvidenceSupport OPTIONAL
1718 }
1719 
1720 EvidenceCategory ::= INTEGER {
1721   not-set (0) ,
1722   coordinates (1) ,
1723   description (2) ,
1724   existence (3)
1725 }
1726 
1727 ExperimentSupport ::= SEQUENCE {
1728   category EvidenceCategory OPTIONAL ,
1729   explanation VisibleString ,
1730   pmids SET OF PubMedId OPTIONAL ,
1731   dois SET OF DOI OPTIONAL
1732 }
1733 
1734 Program-id ::= SEQUENCE {
1735   name VisibleString ,
1736   version VisibleString OPTIONAL
1737 }
1738 
1739 EvidenceBasis ::= SEQUENCE {
1740   programs SET OF Program-id OPTIONAL ,
1741   accessions SET OF Seq-id OPTIONAL
1742 }
1743 
1744 InferenceSupport ::= SEQUENCE {
1745   category EvidenceCategory OPTIONAL ,
1746   type INTEGER {
1747     not-set (0) ,
1748     similar-to-sequence (1) ,
1749     similar-to-aa (2) ,
1750     similar-to-dna (3) ,
1751     similar-to-rna (4) ,
1752     similar-to-mrna (5) ,
1753     similiar-to-est (6) ,
1754     similar-to-other-rna (7) ,
1755     profile (8) ,
1756     nucleotide-motif (9) ,
1757     protein-motif (10) ,
1758     ab-initio-prediction (11) ,
1759     alignment (12) ,
1760     other (255)
1761   } DEFAULT not-set ,
1762   other-type VisibleString OPTIONAL ,
1763   same-species BOOLEAN DEFAULT FALSE ,
1764   basis EvidenceBasis ,
1765   pmids SET OF PubMedId OPTIONAL ,
1766   dois SET OF DOI OPTIONAL
1767 }
1768 
1769 ModelEvidenceItem ::= SEQUENCE {
1770   id Seq-id ,
1771   exon-count INTEGER OPTIONAL ,
1772   exon-length INTEGER OPTIONAL ,
1773   full-length BOOLEAN DEFAULT FALSE ,
1774   supports-all-exon-combo BOOLEAN DEFAULT FALSE
1775 }
1776 
1777 ModelEvidenceSupport ::= SEQUENCE {
1778   method VisibleString OPTIONAL ,
1779   mrna SET OF ModelEvidenceItem OPTIONAL ,
1780   est SET OF ModelEvidenceItem OPTIONAL ,
1781   protein SET OF ModelEvidenceItem OPTIONAL ,
1782   identification Seq-id OPTIONAL ,
1783   dbxref SET OF Dbtag OPTIONAL ,
1784   exon-count INTEGER OPTIONAL ,
1785   exon-length INTEGER OPTIONAL ,
1786   full-length BOOLEAN DEFAULT FALSE ,
1787   supports-all-exon-combo BOOLEAN DEFAULT FALSE
1788 }
1789 
1790 --*** CdRegion ***********************************************
1791 --*
1792 --*  Instructions to translate from a nucleic acid to a peptide
1793 --*    conflict means it's supposed to translate but doesn't
1794 --*
1795 
1796 
1797 Cdregion ::= SEQUENCE {
1798     orf BOOLEAN OPTIONAL ,             -- just an ORF ?
1799     frame ENUMERATED {
1800         not-set (0) ,                  -- not set, code uses one
1801         one (1) ,
1802         two (2) ,
1803         three (3) } DEFAULT not-set ,      -- reading frame
1804     conflict BOOLEAN OPTIONAL ,        -- conflict
1805     gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
1806     mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
1807     code Genetic-code OPTIONAL ,       -- genetic code used
1808     code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
1809     stops INTEGER OPTIONAL }           -- number of stop codons on above
1810 
1811                     -- each code is 64 cells long, in the order where
1812                     -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1813                     -- NOTE: this order does NOT correspond to a Seq-data
1814                     -- encoding.  It is "natural" to codon usage instead.
1815                     -- the value in each cell is the AA coded for
1816                     -- start= AA coded only if first in peptide
1817                     --   in start array, if codon is not a legitimate start
1818                     --   codon, that cell will have the "gap" symbol for
1819                     --   that alphabet.  Otherwise it will have the AA
1820                     --   encoded when that codon is used at the start.
1821 
1822 Genetic-code ::= SET OF CHOICE {
1823     name VisibleString ,               -- name of a code
1824     id INTEGER ,                       -- id in dbase
1825     ncbieaa VisibleString ,            -- indexed to IUPAC extended
1826     ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
1827     ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
1828     sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
1829     sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
1830     sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa
1831 
1832 Code-break ::= SEQUENCE {              -- specific codon exceptions
1833     loc Seq-loc ,                      -- location of exception
1834     aa CHOICE {                        -- the amino acid
1835         ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
1836         ncbi8aa INTEGER ,              -- NCBI8aa code
1837         ncbistdaa INTEGER } }           -- NCBIstdaa code
1838 
1839 Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes
1840 
1841 --*** Import ***********************************************
1842 --*
1843 --*  Features imported from other databases
1844 --*
1845 
1846 Imp-feat ::= SEQUENCE {
1847     key VisibleString ,
1848     loc VisibleString OPTIONAL ,         -- original location string
1849     descr VisibleString OPTIONAL }       -- text description
1850 
1851 Gb-qual ::= SEQUENCE {
1852     qual VisibleString ,
1853     val VisibleString }
1854 
1855 
1856 --*** Clone-ref ***********************************************
1857 --*
1858 --*  Specification of clone features
1859 --*
1860 
1861 Clone-ref ::= SEQUENCE {
1862     name VisibleString,        -- Official clone symbol
1863     library VisibleString OPTIONAL,     -- Library name
1864 
1865     concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1866     unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1867     placement-method INTEGER {
1868         end-seq (0),           -- Clone placed by end sequence
1869         insert-alignment (1),  -- Clone placed by insert alignment
1870         sts (2),               -- Clone placed by STS
1871         fish (3),
1872         fingerprint (4),
1873         end-seq-insert-alignment (5), -- combined end-seq and insert align
1874         external (253),           -- Placement provided externally
1875         curated (254),            -- Human placed or approved
1876         other (255)
1877     } OPTIONAL,
1878     clone-seq Clone-seq-set OPTIONAL
1879 }
1880 
1881 Clone-seq-set ::= SET OF Clone-seq
1882 
1883 
1884 Clone-seq ::= SEQUENCE {
1885     type INTEGER {
1886         insert (0),
1887         end (1),
1888         other (255)
1889     },
1890     confidence INTEGER {
1891         multiple (0),        -- Multiple hits
1892         na (1),              -- Unspecified
1893         nohit-rep (2),       -- No hits, end flagged repetitive
1894         nohitnorep (3),      -- No hits, end not flagged repetitive
1895         other-chrm (4),      -- Hit on different chromosome
1896         unique (5),
1897         virtual (6),         -- Virtual (hasn't been sequenced)
1898         multiple-rep (7),    -- Multiple hits, end flagged repetitive
1899         multiplenorep (8),   -- Multiple hits, end not flagged repetitive
1900         no-hit (9),          -- No hits
1901         other (255)
1902     } OPTIONAL,
1903     location Seq-loc,        -- location on sequence
1904     seq Seq-loc OPTIONAL,    -- clone sequence location
1905     align-id Dbtag OPTIONAL, -- internal alignment identifier
1906     support INTEGER {
1907         prototype (0),       -- sequence used to place clone
1908         supporting (1),      -- sequence supports placement
1909         supports-other(2),   -- supports a different placement
1910         non-supporting (3)   -- does not support any placement
1911     } OPTIONAL
1912 }
1913 
1914 END
1915 
1916 
1917 --*** Variation-ref ***********************************************
1918 --*
1919 --*  Specification of variation features
1920 --*
1921 
1922 NCBI-Variation DEFINITIONS ::=
1923 BEGIN
1924 
1925 EXPORTS Variation-ref, Variation-inst, VariantProperties,
1926         Population-data, Phenotype;
1927 
1928 IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
1929         Seq-literal FROM NCBI-Sequence
1930         SubSource FROM NCBI-BioSource
1931         Seq-loc FROM NCBI-Seqloc
1932         Pub FROM NCBI-Pub;
1933 
1934 
1935 -- --------------------------------------------------------------------------
1936 -- Historically, the dbSNP definitions document data structures used in the
1937 -- processing and annotation of variations by the dbSNP group.  The intention
1938 -- is to provide information to clients that reflect internal information
1939 -- produced during the mapping of SNPs
1940 -- --------------------------------------------------------------------------
1941 
1942 VariantProperties ::= SEQUENCE {
1943     version INTEGER,
1944 
1945     -- NOTE:
1946     -- The format for most of these values is as an integer
1947     -- Unless otherwise noted, these integers represent a bitwise OR (= simple
1948     -- sum) of the possible values, and as such, these values represent the
1949     -- specific bit flags that may be set for each of the possible attributes
1950     -- here.
1951 
1952     resource-link INTEGER {
1953         preserved        (1), -- Clinical, Pubmed, Cited, (0x01)
1954         provisional      (2), -- Provisional Third Party Annotations (0x02)
1955         has3D            (4), -- Has 3D strcture SNP3D table (0x04)
1956         submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
1957         clinical        (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
1958         genotypeKit     (32)  -- Marker exists on high density genotyping kit
1959                               -- (0x20)
1960     } OPTIONAL,
1961 
1962     gene-location INTEGER {
1963         in-gene         (1), -- Sequence intervals covered by a gene ID but not
1964                              -- having an aligned transcript (0x01)
1965         near-gene-5     (2), -- Within 2kb of the 5' end of a gene feature
1966         near-gene-3     (4), -- Within 0.5kb of the 3' end of a gene feature
1967         intron          (8), -- In Intron (0x08)
1968         donor          (16), -- In donor splice-site (0x10)
1969         acceptor       (32), -- In acceptor splice-site (0x20)
1970         utr-5          (64), -- In 5' UTR (0x40)
1971         utr-3         (128), -- In 3' UTR (0x80)
1972         in-start-codon(256), -- the variant is observed in a start codon
1973                              -- (0x100)
1974         in-stop-codon (512), -- the variant is observed in a stop codon
1975                              -- (0x200)
1976         intergenic   (1024), -- variant located between genes (0x400)
1977         conserved-noncoding(2048) -- variant is located in a conserved
1978                                   -- non-coding region (0x800)
1979     } OPTIONAL,
1980 
1981     effect INTEGER {
1982         no-change      (0), -- known to cause no functional changes
1983                             -- since 0 does not combine with any other bit
1984                             -- value, 'no-change' specifically implies that
1985                             -- there are no consequences
1986         synonymous     (1), -- one allele in the set does not change the encoded
1987                             -- amino acid (0x1)
1988         nonsense       (2), -- one allele in the set changes to STOP codon
1989                             -- (TER).  (0x2)
1990         missense       (4), -- one allele in the set changes protein peptide
1991                             -- (0x4)
1992         frameshift     (8), -- one allele in the set changes all downstream
1993                             -- amino acids (0x8)
1994 
1995         up-regulator  (16), -- the variant causes increased transcription
1996                             -- (0x10)
1997         down-regulator(32), -- the variant causes decreased transcription
1998                             -- (0x20)
1999         methylation   (64),
2000         stop-gain     (128), -- reference codon is not stop codon, but the snp
2001                              -- variant allele changes the codon to a
2002                              -- terminating codon.
2003         stop-loss     (256)  -- reverse of STOP-GAIN: reference codon is a
2004                              -- stop codon, but a snp variant allele changes
2005                              -- the codon to a non-terminating codon.
2006     } OPTIONAL,
2007 
2008     mapping INTEGER {
2009         has-other-snp         (1), -- Another SNP has the same mapped positions
2010                                    -- on reference assembly (0x01)
2011         has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
2012                                    -- chromosomes on different assemblies (0x02)
2013         is-assembly-specific  (4)  -- Only maps to 1 assembly (0x04)
2014     } OPTIONAL,
2015 
2016     -- map-weight captures specificity of placement
2017     -- NOTE: This is *NOT* a bitfield
2018     map-weight INTEGER {
2019         is-uniquely-placed(1),
2020         placed-twice-on-same-chrom(2),
2021         placed-twice-on-diff-chrom(3),
2022         many-placements(10)
2023     } OPTIONAL,
2024 
2025     frequency-based-validation INTEGER {
2026         is-mutation       (1), -- low frequency variation that is cited in
2027                                -- journal or other reputable sources (0x01)
2028         above-5pct-all    (2), -- >5% minor allele freq in each and all
2029                                -- populations (0x02)
2030         above-5pct-1plus  (4), -- >5% minor allele freq in 1+ populations (0x04)
2031         validated         (8), -- Bit is set if the variant has a minor allele
2032                                -- observed in two or more separate chromosomes
2033         above-1pct-all   (16), -- >1% minor allele freq in each and all
2034                                -- populations (0x10)
2035         above-1pct-1plus (32)  -- >1% minor allele freq in 1+ populations (0x20)
2036     } OPTIONAL,
2037 
2038     genotype INTEGER {
2039         in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
2040         has-genotypes    (2)  -- SNP has individual genotype (0x02)
2041     } OPTIONAL,
2042 
2043     -- project IDs are IDs from BioProjects
2044     -- in order to report information about project relationships, we
2045     -- require projects to be registered
2046     -- This field in many ways duplicates dbxrefs; however, the
2047     -- intention of this field is to more adequately reflect
2048     -- ownership and data source
2049     --
2050     -- 11/9/2010: DO NOT USE
2051     -- This field was changed in the spec in a breaking way; using it will
2052     -- break clients.  We are officially suppressing / abandoning this field.
2053     -- Clients who need to use this should instead place the data in
2054     -- Seq-feat.dbxref, using the db name 'BioProject'
2055     project-data SET OF INTEGER OPTIONAL,
2056 
2057     quality-check INTEGER {
2058         contig-allele-missing   (1), -- Reference sequence allele at the mapped
2059                                      -- position is not present in the SNP
2060                                      -- allele list, adjusted for orientation
2061                                      -- (0x01)
2062         withdrawn-by-submitter  (2), -- One member SS is withdrawn by submitter
2063                                      -- (0x02)
2064         non-overlapping-alleles (4), -- RS set has 2+ alleles from different
2065                                      -- submissions and these sets share no
2066                                      -- alleles in common (0x04)
2067         strain-specific         (8), -- Straing specific fixed difference (0x08)
2068         genotype-conflict      (16)  -- Has Genotype Conflict (0x10)
2069     } OPTIONAL,
2070 
2071     confidence INTEGER {
2072         unknown         (0),
2073         likely-artifact (1),
2074         other           (255)
2075     } OPTIONAL,
2076 
2077     -- has this variant been validated?
2078     -- While a boolean flag offers no subtle distinctions of validation
2079     -- methods, occasionally it is only known as a single boolean value
2080     -- NOTE: this flag is redundant and should be omitted if more comprehensive
2081     -- validation information is present
2082     other-validation BOOLEAN OPTIONAL,
2083 
2084     -- origin of this allele, if known
2085     -- note that these are powers-of-two, and represent bits; thus, we can
2086     -- represent more than one state simultaneously through a bitwise OR
2087     allele-origin INTEGER {
2088         unknown         (0),
2089         germline        (1),
2090         somatic         (2),
2091         inherited       (4),
2092         paternal        (8),
2093         maternal        (16),
2094         de-novo         (32),
2095         biparental      (64),
2096         uniparental     (128),
2097         not-tested      (256),
2098         tested-inconclusive (512),
2099         not-reported   (1024),
2100 
2101         -- stopper - 2^31
2102         other           (1073741824)
2103     } OPTIONAL,
2104 
2105     -- observed allele state, if known
2106     -- NOTE: THIS IS NOT A BITFIELD!
2107     allele-state INTEGER {
2108         unknown         (0),
2109         homozygous      (1),
2110         heterozygous    (2),
2111         hemizygous      (3),
2112         nullizygous     (4),
2113         other           (255)
2114     } OPTIONAL,
2115 
2116     -- NOTE:
2117     -- 'allele-frequency' here refers to the minor allele frequency of the
2118     -- default population
2119     allele-frequency REAL OPTIONAL,
2120 
2121     -- is this variant the ancestral allele?
2122     is-ancestral-allele BOOLEAN OPTIONAL
2123 }
2124 
2125 Phenotype ::= SEQUENCE {
2126     source VisibleString OPTIONAL,
2127     term VisibleString OPTIONAL,
2128     xref SET OF Dbtag OPTIONAL,
2129 
2130     -- does this variant have known clinical significance?
2131     clinical-significance INTEGER {
2132         unknown                 (0),
2133         untested                (1),
2134         non-pathogenic          (2),
2135         probable-non-pathogenic (3),
2136         probable-pathogenic     (4),
2137         pathogenic              (5),
2138         drug-response           (6),
2139         histocompatibility      (7),
2140         other                   (255)
2141     } OPTIONAL
2142 }
2143 
2144 Population-data ::= SEQUENCE {
2145     -- assayed population (e.g. HAPMAP-CEU)
2146     population VisibleString,
2147     genotype-frequency REAL OPTIONAL,
2148     chromosomes-tested INTEGER OPTIONAL,
2149     sample-ids SET OF Object-id OPTIONAL,
2150     allele-frequency REAL OPTIONAL,
2151 
2152     -- This field is an explicit bit-field
2153     -- Valid values should be a bitwise combination (= simple sum)
2154     -- of any of the values below
2155     flags INTEGER {
2156         is-default-population   (1),
2157         is-minor-allele         (2),
2158         is-rare-allele          (4)
2159     } OPTIONAL
2160 }
2161 
2162 Ext-loc ::= SEQUENCE {
2163     id Object-id,
2164     location Seq-loc
2165 }
2166 
2167 
2168 Variation-ref ::= SEQUENCE {
2169     -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
2170     -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
2171     --
2172     -- we relate three kinds of IDs here:
2173     --  - our current object's id
2174     --  - the id of this object's parent, if it exists
2175     --  - the sample ID that this item originates from
2176     id        Dbtag OPTIONAL,
2177     parent-id Dbtag OPTIONAL,
2178     sample-id Object-id OPTIONAL,
2179     other-ids SET OF Dbtag OPTIONAL,
2180 
2181     -- names and synonyms
2182     -- some variants have well-known canonical names and possible accepted
2183     -- synonyms
2184     name VisibleString OPTIONAL,
2185     synonyms SET OF VisibleString OPTIONAL,
2186 
2187     -- tag for comment and descriptions
2188     description VisibleString OPTIONAL,
2189 
2190     -- phenotype
2191     phenotype SET OF Phenotype OPTIONAL,
2192 
2193     -- sequencing / acuisition method
2194     method SET OF INTEGER {
2195         unknown             (0),
2196         bac-acgh            (1),
2197         computational       (2),
2198         curated             (3),
2199         digital-array       (4),
2200         expression-array    (5),
2201         fish                (6),
2202         flanking-sequence   (7),
2203         maph                (8),
2204         mcd-analysis        (9),
2205         mlpa                (10),
2206         oea-assembly        (11),
2207         oligo-acgh          (12),
2208         paired-end          (13),
2209         pcr                 (14),
2210         qpcr                (15),
2211         read-depth          (16),
2212         roma                (17),
2213         rt-pcr              (18),
2214         sage                (19),
2215         sequence-alignment  (20),
2216         sequencing          (21),
2217         snp-array           (22),
2218         snp-genoytyping     (23),
2219         southern            (24),
2220         western             (25),
2221         optical-mapping     (26),
2222 
2223         other               (255)
2224     } OPTIONAL,
2225 
2226     -- Note about SNP representation and pretinent fields: allele-frequency,
2227     -- population, quality-codes:
2228     -- The case of multiple alleles for a SNP would be described by
2229     -- parent-feature of type Variation-set.diff-alleles, where the child
2230     -- features of type Variation-inst, all at the same location, would
2231     -- describe individual alleles.
2232 
2233     -- population data
2234     -- DEPRECATED - do not use
2235     population-data SET OF Population-data OPTIONAL,
2236 
2237     -- variant properties bit fields
2238     variant-prop VariantProperties OPTIONAL,
2239 
2240     -- has this variant been validated?
2241     -- DEPRECATED: new field = VariantProperties.other-validation
2242     validated BOOLEAN OPTIONAL,
2243 
2244     -- link-outs to GeneTests database
2245     -- DEPRECATED - do not use
2246     clinical-test SET OF Dbtag OPTIONAL,
2247 
2248     -- origin of this allele, if known
2249     -- note that these are powers-of-two, and represent bits; thus, we can
2250     -- represent more than one state simultaneously through a bitwise OR
2251     -- DEPRECATED: new field = VariantProperties.allele-origin
2252     allele-origin INTEGER {
2253         unknown         (0),
2254         germline        (1),
2255         somatic         (2),
2256         inherited       (4),
2257         paternal        (8),
2258         maternal        (16),
2259         de-novo         (32),
2260         biparental      (64),
2261         uniparental     (128),
2262         not-tested      (256),
2263         tested-inconclusive (512),
2264 
2265         -- stopper - 2^31
2266         other           (1073741824)
2267     } OPTIONAL,
2268 
2269     -- observed allele state, if known
2270     -- DEPRECATED: new field = VariantProperties.allele-state
2271     allele-state INTEGER {
2272         unknown         (0),
2273         homozygous      (1),
2274         heterozygous    (2),
2275         hemizygous      (3),
2276         nullizygous     (4),
2277         other           (255)
2278     } OPTIONAL,
2279 
2280     -- NOTE:
2281     -- 'allele-frequency' here refers to the minor allele frequency of the
2282     -- default population
2283     -- DEPRECATED: new field = VariantProperties.allele-frequency
2284     allele-frequency REAL OPTIONAL,
2285 
2286     -- is this variant the ancestral allele?
2287     -- DEPRECATED: new field = VariantProperties.is-ancestral-allele
2288     is-ancestral-allele BOOLEAN OPTIONAL,
2289 
2290     -- publication support.
2291     -- Note: made this pub instead of pub-equiv, since
2292     -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
2293     -- Pub is more often used as top-level container
2294     -- DEPRECATED - do not use; use Seq-feat.dbxref instead
2295     pub Pub OPTIONAL,
2296 
2297     data CHOICE {
2298         unknown NULL,
2299         note    VisibleString, --free-form
2300         uniparental-disomy NULL,
2301 
2302         -- actual sequence-edit at feat.location
2303         instance        Variation-inst,
2304 
2305         -- Set of related Variations.
2306         -- Location of the set equals to the union of member locations
2307         set SEQUENCE {
2308             type INTEGER {
2309                 unknown     (0),
2310                 compound    (1), -- complex change at the same location on the
2311                                  -- same molecule
2312                 products    (2), -- different products arising from the same
2313                                  -- variation in a precursor, e.g. r.[13g>a,
2314                                  -- 13_88del]
2315                 haplotype   (3), -- changes on the same allele, e.g
2316                                  -- r.[13g>a;15u>c]
2317                 genotype    (4), -- changes on different alleles in the same
2318                                  -- genotype, e.g. g.[476C>T]+[476C>T]
2319                 mosaic      (5), -- different genotypes in the same individual
2320                 individual  (6), -- same organism; allele relationship unknown,
2321                                  -- e.g. g.[476C>T(+)183G>C]
2322                 population  (7), -- population
2323                 alleles     (8), -- set represents a set of observed alleles
2324                 package     (9), -- set represents a package of observations at
2325                                  -- a given location, generally containing
2326                                  -- asserted + reference
2327                 other       (255)
2328             },
2329             variations SET OF Variation-ref,
2330             name  VisibleString OPTIONAL
2331         },
2332 
2333         -- variant is a complex and undescribed change at the location
2334         -- This type of variant is known to occur in dbVar submissions
2335         complex NULL
2336     },
2337 
2338     consequence SET OF CHOICE {
2339         unknown     NULL,
2340         splicing    NULL, --some effect on splicing
2341         note        VisibleString,  --freeform
2342 
2343         -- Describe resulting variation in the product, e.g. missense,
2344         -- nonsense, silent, neutral, etc in a protein, that arises from
2345         -- THIS variation.
2346         variation   Variation-ref,
2347 
2348         -- see http://www.hgvs.org/mutnomen/recs-prot.html
2349         frameshift SEQUENCE {
2350             phase INTEGER OPTIONAL,
2351             x-length INTEGER OPTIONAL
2352         },
2353 
2354         loss-of-heterozygosity SEQUENCE {
2355             -- In germline comparison, it will be reference genome assembly
2356             -- (default) or reference/normal population. In somatic mutation,
2357             -- it will be a name of the normal tissue.
2358             reference VisibleString OPTIONAL,
2359 
2360             -- Name of the testing subject type or the testing tissue.
2361             test VisibleString OPTIONAL
2362         }
2363     } OPTIONAL,
2364 
2365     -- Observed location, if different from the parent set or feature.location.
2366     -- DEPRECATED - do not use
2367     location        Seq-loc OPTIONAL,
2368 
2369     -- reference other locs, e.g. mapped source
2370     -- DEPRECATED - do not use
2371     ext-locs SET OF Ext-loc OPTIONAL,
2372 
2373     -- DEPRECATED - do not use; use Seq-feat.exts instead
2374     ext             User-object OPTIONAL,
2375 
2376     somatic-origin SET OF SEQUENCE {
2377         -- description of the somatic origin itself
2378         source SubSource OPTIONAL,
2379         -- condition related to this origin's type
2380         condition SEQUENCE {
2381             description VisibleString OPTIONAL,
2382             -- reference to BioTerm / other descriptive database
2383             object-id SET OF Dbtag OPTIONAL
2384         } OPTIONAL
2385     } OPTIONAL
2386 
2387 }
2388 
2389 
2390 Delta-item ::= SEQUENCE {
2391     seq CHOICE {
2392         literal Seq-literal,
2393         loc Seq-loc,
2394         this NULL --same location as variation-ref itself
2395     } OPTIONAL,
2396 
2397     -- Multiplier allows representing a tandem, e.g.  ATATAT as AT*3
2398     -- This allows describing CNV/SSR where delta=self  with a
2399     -- multiplier which specifies the count of the repeat unit.
2400 
2401     multiplier          INTEGER OPTIONAL, --assumed 1 if not specified.
2402     multiplier-fuzz     Int-fuzz OPTIONAL,
2403 
2404     action INTEGER {
2405 
2406         -- replace len(seq) positions starting with location.start with seq
2407         morph      (0),
2408 
2409         -- go downstream by distance specified by multiplier (upstream if < 0),
2410         -- in genomic context.
2411         offset     (1),
2412 
2413         -- excise sequence at location
2414         -- if multiplier is specified, delete len(location)*multiplier
2415         -- positions downstream
2416         del-at     (2),
2417 
2418         -- insert seq before the location.start
2419         ins-before (3)
2420 
2421     } DEFAULT morph
2422 }
2423 
2424 
2425 -- Variation instance
2426 Variation-inst ::= SEQUENCE {
2427     type INTEGER {
2428         unknown         (0),    -- delta=[]
2429         identity        (1),    -- delta=[]
2430         inv             (2),    -- delta=[del, ins.seq=
2431                                 -- RevComp(variation-location)]
2432         snv             (3),    -- delta=[morph of length 1]
2433                                 -- NOTE: this is snV not snP; the latter
2434                                 -- requires frequency-based validation to be
2435                                 -- established in VariantProperties
2436                                 -- the strict definition of SNP is an SNV with
2437                                 -- an established population frequency of at
2438                                 -- least 1% in at least 1 popuplation
2439         mnp             (4),    -- delta=[morph of length >1]
2440         delins          (5),    -- delta=[del, ins]
2441         del             (6),    -- delta=[del]
2442         ins             (7),    -- delta=[ins]
2443         microsatellite  (8),    -- delta=[del, ins.seq= repeat-unit with fuzzy
2444                                 -- multiplier]
2445                                 -- variation-location is the microsat expansion
2446                                 -- on the sequence
2447         transposon      (9),    -- delta=[del, ins.seq= known donor or 'this']
2448                                 -- variation-location is equiv of transposon
2449                                 -- locs.
2450         cnv             (10),   -- delta=[del, ins= 'this' with fuzzy
2451                                 -- multiplier]
2452         direct-copy     (11),   -- delta=[ins.seq= upstream location on the
2453                                 -- same strand]
2454         rev-direct-copy (12),   -- delta=[ins.seq= downstream location on the
2455                                 -- same strand]
2456         inverted-copy   (13),   -- delta=[ins.seq= upstream location on the
2457                                 -- opposite strand]
2458         everted-copy    (14),   -- delta=[ins.seq= downstream location on the
2459                                 -- opposite strand]
2460         translocation   (15),   -- delta=like delins
2461         prot-missense   (16),   -- delta=[morph of length 1]
2462         prot-nonsense   (17),   -- delta=[del]; variation-location is the tail
2463                                 -- of the protein being truncated
2464         prot-neutral    (18),   -- delta=[morph of length 1]
2465         prot-silent     (19),   -- delta=[morph of length 1, same AA as at
2466                                 -- variation-location]
2467         prot-other      (20),   -- delta=any
2468 
2469         other           (255)   -- delta=any
2470     },
2471 
2472     -- Sequence that replaces the location, in biological order.
2473     delta SEQUENCE OF Delta-item,
2474 
2475     -- 'observation' is used to label items in a Variation-ref package
2476     -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
2477     -- of the values may be observed.
2478     observation INTEGER {
2479         asserted        (1),   -- inst represents the asserted base at a
2480                                -- position
2481         reference       (2),   -- inst represents the reference base at the
2482                                -- position
2483         variant         (4)    -- inst represent the observed variant at a
2484                                -- given position
2485     } OPTIONAL
2486 }
2487 
2488 END
2489 
2490 
2491 --**********************************************************************
2492 --
2493 --  NCBI Restriction Sites
2494 --  by James Ostell, 1990
2495 --  version 0.8
2496 --
2497 --**********************************************************************
2498 
2499 NCBI-Rsite DEFINITIONS ::=
2500 BEGIN
2501 
2502 EXPORTS Rsite-ref;
2503 
2504 IMPORTS Dbtag FROM NCBI-General;
2505 
2506 Rsite-ref ::= CHOICE {
2507     str VisibleString ,     -- may be unparsable
2508     db  Dbtag }             -- pointer to a restriction site database
2509 
2510 END
2511 
2512 --**********************************************************************
2513 --
2514 --  NCBI RNAs
2515 --  by James Ostell, 1990
2516 --  version 0.8
2517 --
2518 --**********************************************************************
2519 
2520 NCBI-RNA DEFINITIONS ::=
2521 BEGIN
2522 
2523 EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
2524 
2525 IMPORTS Seq-loc FROM NCBI-Seqloc;
2526 
2527 --*** rnas ***********************************************
2528 --*
2529 --*  various rnas
2530 --*
2531                          -- minimal RNA sequence
2532 RNA-ref ::= SEQUENCE {
2533     type ENUMERATED {            -- type of RNA feature
2534         unknown (0) ,
2535         premsg (1) ,
2536         mRNA (2) ,
2537         tRNA (3) ,
2538         rRNA (4) ,
2539         snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
2540         scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
2541         snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
2542         ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
2543         tmRNA (9) ,
2544         miscRNA (10) ,
2545         other (255) } ,
2546     pseudo BOOLEAN OPTIONAL ,
2547     ext CHOICE {
2548         name VisibleString ,        -- for naming "other" type
2549         tRNA Trna-ext ,             -- for tRNAs
2550         gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
2551     }
2552 
2553 Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
2554     aa CHOICE {                         -- aa this carries
2555         iupacaa INTEGER ,
2556         ncbieaa INTEGER ,
2557         ncbi8aa INTEGER ,
2558         ncbistdaa INTEGER } OPTIONAL ,
2559     codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
2560     anticodon Seq-loc OPTIONAL }        -- location of anticodon
2561 
2562 RNA-gen ::= SEQUENCE {
2563     class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
2564                                         -- examples: antisense_RNA, guide_RNA, snRNA
2565     product VisibleString OPTIONAL ,
2566     quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
2567 }
2568 
2569 RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
2570     qual VisibleString ,                -- in a tag (qual), value (val) format
2571     val VisibleString }
2572 
2573 RNA-qual-set ::= SEQUENCE OF RNA-qual
2574 
2575 END
2576 
2577 --**********************************************************************
2578 --
2579 --  NCBI Genes
2580 --  by James Ostell, 1990
2581 --  version 0.8
2582 --
2583 --**********************************************************************
2584 
2585 NCBI-Gene DEFINITIONS ::=
2586 BEGIN
2587 
2588 EXPORTS Gene-ref, Gene-nomenclature;
2589 
2590 IMPORTS Dbtag FROM NCBI-General;
2591 
2592 --*** Gene ***********************************************
2593 --*
2594 --*  reference to a gene
2595 --*
2596 
2597 Gene-ref ::= SEQUENCE {
2598     locus VisibleString OPTIONAL ,        -- Official gene symbol
2599     allele VisibleString OPTIONAL ,       -- Official allele designation
2600     desc VisibleString OPTIONAL ,         -- descriptive name
2601     maploc VisibleString OPTIONAL ,       -- descriptive map location
2602     pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
2603     db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
2604     syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
2605     locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
2606     formal-name Gene-nomenclature OPTIONAL
2607 }
2608 
2609 Gene-nomenclature ::= SEQUENCE {
2610     status ENUMERATED {
2611         unknown (0) ,
2612         official (1) ,
2613         interim (2)
2614     } ,
2615     symbol VisibleString OPTIONAL ,
2616     name VisibleString OPTIONAL ,
2617     source Dbtag OPTIONAL
2618 }
2619 
2620 END
2621 
2622 
2623 --**********************************************************************
2624 --
2625 --  NCBI Organism
2626 --  by James Ostell, 1994
2627 --  version 3.0
2628 --
2629 --**********************************************************************
2630 
2631 NCBI-Organism DEFINITIONS ::=
2632 BEGIN
2633 
2634 EXPORTS Org-ref;
2635 
2636 IMPORTS Dbtag FROM NCBI-General;
2637 
2638 --*** Org-ref ***********************************************
2639 --*
2640 --*  Reference to an organism
2641 --*     defines only the organism.. lower levels of detail for biological
2642 --*     molecules are provided by the Source object
2643 --*
2644 
2645 Org-ref ::= SEQUENCE {
2646     taxname VisibleString OPTIONAL ,   -- preferred formal name
2647     common VisibleString OPTIONAL ,    -- common name
2648     mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
2649     db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
2650     syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
2651     orgname OrgName OPTIONAL }
2652 
2653 
2654 OrgName ::= SEQUENCE {
2655     name CHOICE {
2656         binomial BinomialOrgName ,         -- genus/species type name
2657         virus VisibleString ,              -- virus names are different
2658         hybrid MultiOrgName ,              -- hybrid between organisms
2659         namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
2660         partial PartialOrgName } OPTIONAL , -- when genus not known
2661     attrib VisibleString OPTIONAL ,        -- attribution of name
2662     mod SEQUENCE OF OrgMod OPTIONAL ,
2663     lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
2664     gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
2665     mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
2666     div VisibleString OPTIONAL ,           -- GenBank division code
2667     pgcode INTEGER OPTIONAL }              -- plastid genetic code
2668 
2669 
2670 OrgMod ::= SEQUENCE {
2671     subtype INTEGER {
2672         strain (2) ,
2673         substrain (3) ,
2674         type (4) ,
2675         subtype (5) ,
2676         variety (6) ,
2677         serotype (7) ,
2678         serogroup (8) ,
2679         serovar (9) ,
2680         cultivar (10) ,
2681         pathovar (11) ,
2682         chemovar (12) ,
2683         biovar (13) ,
2684         biotype (14) ,
2685         group (15) ,
2686         subgroup (16) ,
2687         isolate (17) ,
2688         common (18) ,
2689         acronym (19) ,
2690         dosage (20) ,          -- chromosome dosage of hybrid
2691         nat-host (21) ,        -- natural host of this specimen
2692         sub-species (22) ,
2693         specimen-voucher (23) ,
2694         authority (24) ,
2695         forma (25) ,
2696         forma-specialis (26) ,
2697         ecotype (27) ,
2698         synonym (28) ,
2699         anamorph (29) ,
2700         teleomorph (30) ,
2701         breed (31) ,
2702         gb-acronym (32) ,       -- used by taxonomy database
2703         gb-anamorph (33) ,      -- used by taxonomy database
2704         gb-synonym (34) ,       -- used by taxonomy database
2705         culture-collection (35) ,
2706         bio-material (36) ,
2707         metagenome-source (37) ,
2708         old-lineage (253) ,
2709         old-name (254) ,
2710         other (255) } ,         -- ASN5: old-name (254) will be added to next spec
2711     subname VisibleString ,
2712     attrib VisibleString OPTIONAL }  -- attribution/source of name
2713 
2714 BinomialOrgName ::= SEQUENCE {
2715     genus VisibleString ,               -- required
2716     species VisibleString OPTIONAL ,    -- species required if subspecies used
2717     subspecies VisibleString OPTIONAL }
2718 
2719 MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division
2720 
2721 PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus
2722 
2723 TaxElement ::= SEQUENCE {
2724     fixed-level INTEGER {
2725        other (0) ,                     -- level must be set in string
2726        family (1) ,
2727        order (2) ,
2728        class (3) } ,
2729     level VisibleString OPTIONAL ,
2730     name VisibleString }
2731 
2732 END
2733 
2734 
2735 --**********************************************************************
2736 --
2737 --  NCBI BioSource
2738 --  by James Ostell, 1994
2739 --  version 3.0
2740 --
2741 --**********************************************************************
2742 
2743 NCBI-BioSource DEFINITIONS ::=
2744 BEGIN
2745 
2746 EXPORTS BioSource, SubSource;
2747 
2748 IMPORTS Org-ref FROM NCBI-Organism;
2749 
2750 --********************************************************************
2751 --
2752 -- BioSource gives the source of the biological material
2753 --   for sequences
2754 --
2755 --********************************************************************
2756 
2757 BioSource ::= SEQUENCE {
2758     genome INTEGER {         -- biological context
2759         unknown (0) ,
2760         genomic (1) ,
2761         chloroplast (2) ,
2762         chromoplast (3) ,
2763         kinetoplast (4) ,
2764         mitochondrion (5) ,
2765         plastid (6) ,
2766         macronuclear (7) ,
2767         extrachrom (8) ,
2768         plasmid (9) ,
2769         transposon (10) ,
2770         insertion-seq (11) ,
2771         cyanelle (12) ,
2772         proviral (13) ,
2773         virion (14) ,
2774         nucleomorph (15) ,
2775         apicoplast (16) ,
2776         leucoplast (17) ,
2777         proplastid (18) ,
2778         endogenous-virus (19) ,
2779         hydrogenosome (20) ,
2780         chromosome (21) ,
2781         chromatophore (22)
2782       } DEFAULT unknown ,
2783     origin INTEGER {
2784       unknown (0) ,
2785       natural (1) ,                    -- normal biological entity
2786       natmut (2) ,                     -- naturally occurring mutant
2787       mut (3) ,                        -- artificially mutagenized
2788       artificial (4) ,                 -- artificially engineered
2789       synthetic (5) ,                  -- purely synthetic
2790       other (255)
2791     } DEFAULT unknown ,
2792     org Org-ref ,
2793     subtype SEQUENCE OF SubSource OPTIONAL ,
2794     is-focus NULL OPTIONAL ,           -- to distinguish biological focus
2795     pcr-primers PCRReactionSet OPTIONAL }
2796 
2797 PCRReactionSet ::= SET OF PCRReaction
2798 
2799 PCRReaction ::= SEQUENCE {
2800     forward PCRPrimerSet OPTIONAL ,
2801     reverse PCRPrimerSet OPTIONAL }
2802 
2803 PCRPrimerSet ::= SET OF PCRPrimer
2804 
2805 PCRPrimer ::= SEQUENCE {
2806     seq PCRPrimerSeq OPTIONAL ,
2807     name PCRPrimerName OPTIONAL }
2808 
2809 PCRPrimerSeq ::= VisibleString
2810 
2811 PCRPrimerName ::= VisibleString
2812 
2813 SubSource ::= SEQUENCE {
2814     subtype INTEGER {
2815         chromosome (1) ,
2816         map (2) ,
2817         clone (3) ,
2818         subclone (4) ,
2819         haplotype (5) ,
2820         genotype (6) ,
2821         sex (7) ,
2822         cell-line (8) ,
2823         cell-type (9) ,
2824         tissue-type (10) ,
2825         clone-lib (11) ,
2826         dev-stage (12) ,
2827         frequency (13) ,
2828         germline (14) ,
2829         rearranged (15) ,
2830         lab-host (16) ,
2831         pop-variant (17) ,
2832         tissue-lib (18) ,
2833         plasmid-name (19) ,
2834         transposon-name (20) ,
2835         insertion-seq-name (21) ,
2836         plastid-name (22) ,
2837         country (23) ,
2838         segment (24) ,
2839         endogenous-virus-name (25) ,
2840         transgenic (26) ,
2841         environmental-sample (27) ,
2842         isolation-source (28) ,
2843         lat-lon (29) ,          -- +/- decimal degrees
2844         collection-date (30) ,  -- DD-MMM-YYYY format
2845         collected-by (31) ,     -- name of person who collected the sample
2846         identified-by (32) ,    -- name of person who identified the sample
2847         fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
2848         rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
2849         fwd-primer-name (35) ,
2850         rev-primer-name (36) ,
2851         metagenomic (37) ,
2852         mating-type (38) ,
2853         linkage-group (39) ,
2854         haplogroup (40) ,
2855         whole-replicon (41) ,
2856         phenotype (42) ,
2857         other (255) } ,
2858     name VisibleString ,
2859     attrib VisibleString OPTIONAL }    -- attribution/source of this name
2860 
2861 END
2862 
2863 --**********************************************************************
2864 --
2865 --  NCBI Protein
2866 --  by James Ostell, 1990
2867 --  version 0.8
2868 --
2869 --**********************************************************************
2870 
2871 NCBI-Protein DEFINITIONS ::=
2872 BEGIN
2873 
2874 EXPORTS Prot-ref;
2875 
2876 IMPORTS Dbtag FROM NCBI-General;
2877 
2878 --*** Prot-ref ***********************************************
2879 --*
2880 --*  Reference to a protein name
2881 --*
2882 
2883 Prot-ref ::= SEQUENCE {
2884     name SET OF VisibleString OPTIONAL ,      -- protein name
2885     desc VisibleString OPTIONAL ,      -- description (instead of name)
2886     ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
2887     activity SET OF VisibleString OPTIONAL ,  -- activities
2888     db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
2889     processed ENUMERATED {             -- processing status
2890        not-set (0) ,
2891        preprotein (1) ,
2892        mature (2) ,
2893        signal-peptide (3) ,
2894        transit-peptide (4) } DEFAULT not-set }
2895 
2896 END
2897 --********************************************************************
2898 --
2899 --  Transcription Initiation Site Feature Data Block
2900 --  James Ostell, 1991
2901 --  Philip Bucher, David Ghosh
2902 --  version 1.1
2903 --
2904 --
2905 --
2906 --********************************************************************
2907 
2908 NCBI-TxInit DEFINITIONS ::=
2909 BEGIN
2910 
2911 EXPORTS Txinit;
2912 
2913 IMPORTS Gene-ref FROM NCBI-Gene
2914         Prot-ref FROM NCBI-Protein
2915         Org-ref FROM NCBI-Organism;
2916 
2917 Txinit ::= SEQUENCE {
2918     name VisibleString ,    -- descriptive name of initiation site
2919     syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
2920     gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
2921     protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
2922     rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
2923     expression VisibleString OPTIONAL ,  -- tissue/time of expression
2924     txsystem ENUMERATED {       -- transcription apparatus used at this site
2925         unknown (0) ,
2926         pol1 (1) ,      -- eukaryotic Pol I
2927         pol2 (2) ,      -- eukaryotic Pol II
2928         pol3 (3) ,      -- eukaryotic Pol III
2929         bacterial (4) ,
2930         viral (5) ,
2931         rna (6) ,       -- RNA replicase
2932         organelle (7) ,
2933         other (255) } ,
2934     txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
2935     txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
2936     mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
2937     location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
2938     inittype ENUMERATED {
2939         unknown (0) ,
2940         single (1) ,
2941         multiple (2) ,
2942         region (3) } OPTIONAL ,
2943     evidence SET OF Tx-evidence OPTIONAL }
2944 
2945 Tx-evidence ::= SEQUENCE {
2946     exp-code ENUMERATED {
2947         unknown (0) ,
2948         rna-seq (1) ,   -- direct RNA sequencing
2949         rna-size (2) ,  -- RNA length measurement
2950         np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
2951         np-size (4) ,   -- nuclease protected fragment length measurement
2952         pe-seq (5) ,    -- dideoxy RNA sequencing
2953         cDNA-seq (6) ,  -- full-length cDNA sequencing
2954         pe-map (7) ,    -- primer extension mapping with homologous sequence ladder
2955         pe-size (8) ,   -- primer extension product length measurement
2956         pseudo-seq (9) , -- full-length processed pseudogene sequencing
2957         rev-pe-map (10) ,   -- see NOTE (1) below
2958         other (255) } ,
2959     expression-system ENUMERATED {
2960         unknown (0) ,
2961         physiological (1) ,
2962         in-vitro (2) ,
2963         oocyte (3) ,
2964         transfection (4) ,
2965         transgenic (5) ,
2966         other (255) } DEFAULT physiological ,
2967     low-prec-data BOOLEAN DEFAULT FALSE ,
2968     from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
2969                                              --  close homolog
2970 
2971     -- NOTE (1) length measurement of a reverse direction primer-extension
2972     --          product (blocked  by  RNA  5'end)  by  comparison with
2973     --          homologous sequence ladder (J. Mol. Biol. 199, 587)
2974 
2975 END
2976 
2977 --$Revision: 1.7 $
2978 --  ----------------------------------------------------------------------------
2979 --
2980 --                            PUBLIC DOMAIN NOTICE
2981 --                National Center for Biotechnology Information
2982 --
2983 --  This software/database is a "United States Government Work" under the terms
2984 --  of the United States Copyright Act.  It was written as part of the author's
2985 --  official duties as a United States Government employee and thus cannot be
2986 --  copyrighted.  This software/database is freely available to the public for
2987 --  use.  The National Library of Medicine and the U.S. Government have not
2988 --  placed any restriction on its use or reproduction.
2989 --
2990 --  Although all reasonable efforts have been taken to ensure the accuracy and
2991 --  reliability of the software and data, the NLM and the U.S. Government do not
2992 --  and cannot warrant the performance or results that may be obtained by using
2993 --  this software or data.  The NLM and the U.S. Government disclaim all
2994 --  warranties, express or implied, including warranties of performance,
2995 --  merchantability or fitness for any particular purpose.
2996 --
2997 --  Please cite the authors in any work or product based on this material.
2998 --
2999 --  ----------------------------------------------------------------------------
3000 --
3001 --  Authors: Mike DiCuccio, Eugene Vasilchenko
3002 --
3003 --  ASN.1 interface to table readers
3004 --
3005 --  ----------------------------------------------------------------------------
3006 
3007 NCBI-SeqTable DEFINITIONS ::=
3008 
3009 BEGIN
3010 
3011 EXPORTS
3012     SeqTable-column-info, SeqTable-column, Seq-table;
3013     
3014 IMPORTS
3015     Seq-id, Seq-loc, Seq-interval   FROM NCBI-Seqloc;
3016 
3017 
3018 SeqTable-column-info ::= SEQUENCE {
3019     -- user friendly column name, can be skipped
3020     title VisibleString OPTIONAL,
3021 
3022     -- identification of the column data in the objects described by the table
3023     field-id INTEGER { -- known column data types
3024         -- position types
3025         location        (0), -- location as Seq-loc
3026         location-id     (1), -- location Seq-id
3027         location-gi     (2), -- gi
3028         location-from   (3), -- interval from
3029         location-to     (4), -- interval to
3030         location-strand (5), -- location strand
3031         location-fuzz-from-lim (6),
3032         location-fuzz-to-lim   (7),
3033 
3034         product         (10), -- product as Seq-loc
3035         product-id      (11), -- product Seq-id
3036         product-gi      (12), -- product gi
3037         product-from    (13), -- product interval from
3038         product-to      (14), -- product interval to
3039         product-strand  (15), -- product strand
3040         product-fuzz-from-lim (16),
3041         product-fuzz-to-lim   (17),
3042         
3043         -- main feature fields
3044         id-local        (20), -- id.local.id
3045         xref-id-local   (21), -- xref.id.local.id
3046         partial         (22),
3047         comment         (23),
3048         title           (24),
3049         ext             (25), -- field-name must be "E.xxx", see below
3050         qual            (26), -- field-name must be "Q.xxx", see below
3051         dbxref          (27), -- field-name must be "D.xxx", see below
3052 
3053         -- various data fields
3054         data-imp-key        (30),
3055         data-region         (31),
3056         data-cdregion-frame (32),
3057 
3058         -- extra fields, see also special values for str below
3059         ext-type        (40),
3060         qual-qual       (41),
3061         qual-val        (42),
3062         dbxref-db       (43),
3063         dbxref-tag      (44)
3064     } OPTIONAL,
3065 
3066     -- any column can be identified by ASN.1 text locator string
3067     -- with omitted object type.
3068     -- examples:
3069     --   "data.gene.locus" for Seq-feat.data.gene.locus
3070     --   "data.imp.key" for Seq-feat.data.imp.key
3071     --   "qual.qual"
3072     --    - Seq-feat.qual is SEQUENCE so several columns are allowed
3073     --      see also "Q.xxx" special value for shorter qual representation
3074     --   "ext.type.str"
3075     --   "ext.data.label.str"
3076     --   "ext.data.data.int"
3077     --      see also "E.xxx" special value for shorter ext representation
3078     -- special values start with capital letter:
3079     --   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
3080     --    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
3081     --   "Q.xxx" - qual.qual = xxx, qual.val = data
3082     --    - Seq-feat.qual is SEQUENCE so several columns are allowed
3083     --   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
3084     --    - Seq-feat.dbxref is SET so several columns are allowed
3085     field-name  VisibleString OPTIONAL
3086 }
3087 
3088 
3089 CommonString-table ::= SEQUENCE {
3090     -- set of possible values
3091     strings     SEQUENCE OF UTF8String,
3092 
3093     -- indexes of values
3094     indexes     SEQUENCE OF INTEGER
3095 }
3096 
3097 
3098 CommonBytes-table ::= SEQUENCE {
3099     -- set of possible values
3100     bytes       SEQUENCE OF OCTET STRING,
3101 
3102     -- indexes of values
3103     indexes     SEQUENCE OF INTEGER
3104 }
3105 
3106 
3107 SeqTable-multi-data ::= CHOICE {
3108     -- a set of integers, one per row
3109     int         SEQUENCE OF INTEGER,
3110     
3111     -- a set of reals, one per row
3112     real        SEQUENCE OF REAL,
3113 
3114     -- a set of strings, one per row
3115     string      SEQUENCE OF UTF8String,
3116 
3117     -- a set of byte arrays, one per row
3118     bytes       SEQUENCE OF OCTET STRING,
3119 
3120     -- a set of string with small set of possible values
3121     common-string   CommonString-table,
3122 
3123     -- a set of byte arrays with small set of possible values
3124     common-bytes    CommonBytes-table,
3125 
3126     -- a set of bits, one per row
3127     -- this uses bm::bvector<> as its storage mechanism
3128     bit         OCTET STRING,
3129 
3130     -- a set of locations, one per row
3131     loc         SEQUENCE OF Seq-loc,
3132     id          SEQUENCE OF Seq-id,
3133     interval    SEQUENCE OF Seq-interval
3134 }
3135 
3136 
3137 SeqTable-single-data ::= CHOICE {
3138     -- integer
3139     int         INTEGER,
3140     
3141     -- real
3142     real        REAL,
3143 
3144     -- string
3145     string      UTF8String,
3146 
3147     -- byte array
3148     bytes       OCTET STRING,
3149 
3150     -- bit
3151     bit         BOOLEAN,
3152 
3153     -- location
3154     loc         Seq-loc,
3155     id          Seq-id,
3156     interval    Seq-interval
3157 }
3158 
3159 
3160 SeqTable-sparse-index ::= CHOICE {
3161     -- indexed of rows with values
3162     indexes SEQUENCE OF INTEGER,
3163 
3164     -- bitset of rows with values
3165     bit-set OCTET STRING
3166 }
3167 
3168 
3169 SeqTable-column ::= SEQUENCE {
3170     -- column description or reference to previously defined info
3171     header      SeqTable-column-info,   -- information about data
3172 
3173     -- row data
3174     data        SeqTable-multi-data OPTIONAL,
3175 
3176     -- in case not all rows contain data this field will contain sparse info
3177     sparse      SeqTable-sparse-index OPTIONAL,
3178 
3179     -- default value for sparse table, or if row data is too short
3180     default     SeqTable-single-data OPTIONAL,
3181 
3182     -- single value for indexes not listed in sparse table
3183     sparse-other SeqTable-single-data OPTIONAL
3184 }
3185 
3186 
3187 Seq-table ::= SEQUENCE {
3188     -- type of features in this table, equal to Seq-feat.data variant index
3189     feat-type   INTEGER,
3190 
3191     -- subtype of features in this table, defined in header SeqFeatData.hpp
3192     feat-subtype INTEGER OPTIONAL,
3193 
3194     -- number of rows
3195     num-rows    INTEGER,
3196 
3197     -- data in columns
3198     columns     SEQUENCE OF SeqTable-column
3199 }
3200 
3201 
3202 END
3203 --$Revision: 6.4 $
3204 --**********************************************************************
3205 --
3206 --  NCBI Sequence Alignment elements
3207 --  by James Ostell, 1990
3208 --
3209 --**********************************************************************
3210 
3211 NCBI-Seqalign DEFINITIONS ::=
3212 BEGIN
3213 
3214 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
3215 
3216 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
3217         User-object, Object-id FROM NCBI-General;
3218 
3219 --*** Sequence Alignment ********************************
3220 --*
3221 
3222 Seq-align-set ::= SET OF Seq-align
3223 
3224 Seq-align ::= SEQUENCE {
3225     type ENUMERATED {
3226         not-set (0) ,
3227         global (1) ,
3228         diags (2) ,     -- unbroken, but not ordered, diagonals
3229         partial (3) ,   -- mapping pieces together
3230         disc (4) ,      -- discontinuous alignment
3231         other (255) } ,
3232     dim INTEGER OPTIONAL ,     -- dimensionality
3233     score SET OF Score OPTIONAL ,   -- for whole alignment
3234     segs CHOICE {                   -- alignment data
3235         dendiag SEQUENCE OF Dense-diag ,
3236         denseg              Dense-seg ,
3237         std     SEQUENCE OF Std-seg ,
3238         packed              Packed-seg ,
3239         disc                Seq-align-set,
3240         spliced             Spliced-seg,
3241         sparse              Sparse-seg
3242     } ,
3243     
3244     -- regions of sequence over which align
3245     --  was computed
3246     bounds SET OF Seq-loc OPTIONAL,
3247 
3248     -- alignment id
3249     id SEQUENCE OF Object-id OPTIONAL,
3250 
3251     --extra info
3252     ext SEQUENCE OF User-object OPTIONAL
3253 }
3254 
3255 Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
3256     dim INTEGER DEFAULT 2 ,    -- dimensionality
3257     ids SEQUENCE OF Seq-id ,   -- sequences in order
3258     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
3259     len INTEGER ,                 -- len of aligned segments
3260     strands SEQUENCE OF Na-strand OPTIONAL ,
3261     scores SET OF Score OPTIONAL }
3262 
3263     -- Dense-seg: the densist packing for sequence alignments only.
3264     --            a start of -1 indicates a gap for that sequence of
3265     --            length lens.
3266     --
3267     -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
3268     -- id=200  AAGGCCTTTTAG.......GATGATGATGA
3269     -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
3270     --
3271     -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
3272     -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
3273     -- lens = { 4, 8, 7, 3, 4, 4 }
3274     --
3275 
3276 Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
3277     dim INTEGER DEFAULT 2 ,       -- dimensionality
3278     numseg INTEGER ,              -- number of segments here
3279     ids SEQUENCE OF Seq-id ,      -- sequences in order
3280     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
3281     lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
3282     strands SEQUENCE OF Na-strand OPTIONAL ,
3283     scores SEQUENCE OF Score OPTIONAL }  -- score for each seg
3284 
3285 Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
3286     dim INTEGER DEFAULT 2 ,       -- dimensionality
3287     numseg INTEGER ,              -- number of segments here
3288     ids SEQUENCE OF Seq-id ,      -- sequences in order
3289     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
3290     present OCTET STRING ,        -- Boolean if each sequence present or absent in
3291                                   --   each segment
3292     lens SEQUENCE OF INTEGER ,    -- length of each segment
3293     strands SEQUENCE OF Na-strand OPTIONAL ,
3294     scores SEQUENCE OF Score OPTIONAL }  -- score for each segment
3295 
3296 Std-seg ::= SEQUENCE {
3297     dim INTEGER DEFAULT 2 ,       -- dimensionality
3298     ids SEQUENCE OF Seq-id OPTIONAL ,
3299     loc SEQUENCE OF Seq-loc ,
3300     scores SET OF Score OPTIONAL }
3301 
3302 
3303 Spliced-seg ::= SEQUENCE {
3304     -- product is either protein or transcript (cDNA)
3305     product-id Seq-id OPTIONAL,
3306     genomic-id Seq-id OPTIONAL,
3307 
3308     -- should be 'plus' or 'minus'
3309     product-strand Na-strand OPTIONAL ,
3310     genomic-strand Na-strand OPTIONAL ,
3311     
3312     product-type ENUMERATED {
3313         transcript(0),
3314         protein(1)
3315     },
3316 
3317     -- set of segments involved
3318     -- each segment corresponds to one exon
3319     -- exons are always in biological order
3320     exons SEQUENCE OF Spliced-exon ,
3321 
3322     -- optional poly(A) tail
3323     poly-a INTEGER OPTIONAL,
3324 
3325     -- length of the product, in bases/residues
3326     -- from this, a 3' unaligned length can be extracted; this also captures
3327     -- the case in which a protein aligns leaving a partial codon alignment
3328     -- at the 3' end
3329     product-length INTEGER OPTIONAL,
3330 
3331     -- alignment descriptors / modifiers
3332     -- this provides us a set for extension
3333     modifiers SET OF Spliced-seg-modifier OPTIONAL
3334 }
3335 
3336 Spliced-seg-modifier ::= CHOICE {
3337     -- protein aligns from the start and the first codon 
3338     -- on both product and genomic is start codon
3339     start-codon-found BOOLEAN,
3340     
3341     -- protein aligns to it's end and there is stop codon 
3342     -- on the genomic right after the alignment
3343     stop-codon-found BOOLEAN
3344 }
3345 
3346 
3347 -- complete or partial exon
3348 -- two consecutive Spliced-exons may belong to one exon
3349 Spliced-exon ::= SEQUENCE {
3350     -- product-end >= product-start
3351     product-start Product-pos ,
3352     product-end Product-pos ,
3353 
3354     -- genomic-end >= genomic-start
3355     genomic-start INTEGER ,
3356     genomic-end INTEGER ,
3357 
3358     -- product is either protein or transcript (cDNA)
3359     product-id Seq-id OPTIONAL ,
3360     genomic-id Seq-id OPTIONAL ,
3361 
3362     -- should be 'plus' or 'minus'
3363     product-strand Na-strand OPTIONAL ,
3364     
3365     -- genomic-strand represents the strand of translation
3366     genomic-strand Na-strand OPTIONAL ,
3367 
3368     -- basic seqments always are in biologic order
3369     parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
3370 
3371     -- scores for this exon
3372     scores Score-set OPTIONAL ,
3373 
3374     -- splice sites
3375     acceptor-before-exon Splice-site OPTIONAL,
3376     donor-after-exon Splice-site OPTIONAL,
3377     
3378     -- flag: is this exon complete or partial?
3379     partial BOOLEAN OPTIONAL,
3380 
3381     --extra info
3382     ext SEQUENCE OF User-object OPTIONAL
3383 }
3384 
3385 
3386 Product-pos ::= CHOICE {
3387     nucpos INTEGER,
3388     protpos Prot-pos
3389 }
3390 
3391 
3392 -- codon based position on protein (1/3 of aminoacid)
3393 Prot-pos ::= SEQUENCE {
3394     -- standard protein position
3395     amin INTEGER ,
3396 
3397     -- 0, 1, 2, or 3 as for Cdregion
3398     -- 0 = not set
3399     -- 1, 2, 3 = actual frame
3400     frame INTEGER DEFAULT 0
3401 }
3402 
3403 
3404 -- Spliced-exon-chunk: piece of an exon
3405 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
3406 -- protein)
3407 Spliced-exon-chunk ::= CHOICE {
3408     -- both sequences represented, product and genomic sequences match
3409     match INTEGER ,
3410 
3411     -- both sequences represented, product and genomic sequences do not match
3412     mismatch INTEGER ,
3413 
3414     -- both sequences are represented, there is sufficient similarity 
3415     -- between product and genomic sequences. Can be used to replace stretches
3416     -- of matches and mismatches, mostly for protein to genomic where 
3417     -- definition of match or mismatch depends on translation table
3418     diag INTEGER ,
3419 
3420      -- insertion in product sequence (i.e. gap in the genomic sequence)
3421     product-ins INTEGER ,
3422 
3423      -- insertion in genomic sequence (i.e. gap in the product sequence)
3424     genomic-ins INTEGER
3425 }
3426 
3427 
3428 -- site involved in splice
3429 Splice-site ::= SEQUENCE {
3430     -- typically two bases in the intronic region, always
3431     -- in IUPAC format
3432     bases VisibleString
3433 }
3434 
3435 
3436 -- ==========================================================================
3437 --
3438 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
3439 -- representing sparse multiple alignments
3440 --
3441 -- ==========================================================================
3442 
3443 
3444 Sparse-seg ::= SEQUENCE {
3445     master-id Seq-id OPTIONAL,
3446 
3447     -- pairwise alignments constituting this multiple alignment
3448     rows SET OF Sparse-align,
3449 
3450     -- per-row scores
3451     row-scores SET OF Score OPTIONAL,
3452 
3453     -- index of extra items
3454     ext  SET OF Sparse-seg-ext OPTIONAL
3455 }
3456 
3457 Sparse-align ::= SEQUENCE {
3458     first-id Seq-id,
3459     second-id Seq-id,
3460 
3461     numseg INTEGER,                      --number of segments
3462     first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
3463     second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
3464     lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
3465     second-strands SEQUENCE OF Na-strand OPTIONAL ,
3466 
3467     -- per-segment scores
3468     seg-scores SET OF Score OPTIONAL
3469 }
3470 
3471 Sparse-seg-ext ::= SEQUENCE {
3472     --seg-ext SET OF {
3473     --    index INTEGER,
3474     --    data User-field
3475     -- }
3476     index INTEGER
3477 }
3478 
3479 
3480 
3481 -- use of Score is discouraged for external ASN.1 specifications
3482 Score ::= SEQUENCE {
3483     id Object-id OPTIONAL ,
3484     value CHOICE {
3485         real REAL ,
3486         int INTEGER
3487     }
3488 }
3489 
3490 -- use of Score-set is encouraged for external ASN.1 specifications
3491 Score-set ::= SET OF Score
3492 
3493 END 
3494 
3495 --$Revision: 6.0 $
3496 --**********************************************************************
3497 --
3498 --  NCBI Sequence Analysis Results (other than alignments)
3499 --  by James Ostell, 1990
3500 --
3501 --**********************************************************************
3502 
3503 NCBI-Seqres DEFINITIONS ::=
3504 BEGIN
3505 
3506 EXPORTS Seq-graph;
3507 
3508 IMPORTS Seq-loc FROM NCBI-Seqloc;
3509 
3510 --*** Sequence Graph ********************************
3511 --*
3512 --*   for values mapped by residue or range to sequence
3513 --*
3514 
3515 Seq-graph ::= SEQUENCE {
3516     title VisibleString OPTIONAL ,
3517     comment VisibleString OPTIONAL ,
3518     loc Seq-loc ,                       -- region this applies to
3519     title-x VisibleString OPTIONAL ,    -- title for x-axis
3520     title-y VisibleString OPTIONAL ,
3521     comp INTEGER OPTIONAL ,             -- compression (residues/value)
3522     a REAL OPTIONAL ,                   -- for scaling values
3523     b REAL OPTIONAL ,                   -- display = (a x value) + b
3524     numval INTEGER ,                    -- number of values in graph
3525     graph CHOICE {
3526         real Real-graph ,
3527         int Int-graph ,
3528         byte Byte-graph } }
3529 
3530 Real-graph ::= SEQUENCE {
3531     max REAL ,                          -- top of graph
3532     min REAL ,                          -- bottom of graph
3533     axis REAL ,                         -- value to draw axis on
3534     values SEQUENCE OF REAL }
3535 
3536 Int-graph ::= SEQUENCE {
3537     max INTEGER ,
3538     min INTEGER ,
3539     axis INTEGER ,
3540     values SEQUENCE OF INTEGER } 
3541 
3542 Byte-graph ::= SEQUENCE {              -- integer from 0-255
3543     max INTEGER ,
3544     min INTEGER ,
3545     axis INTEGER ,
3546     values OCTET STRING }
3547 
3548 END
3549 
3550 --$Revision: 6.1 $
3551 --********************************************************************
3552 --
3553 --  Direct Submission of Sequence Data
3554 --  James Ostell, 1991
3555 --
3556 --  This is a trial specification for direct submission of sequence
3557 --    data worked out between NCBI and EMBL
3558 --  Later revised to reflect work with GenBank and Integrated database
3559 --
3560 --  Version 3.0, 1994
3561 --    This is the official NCBI sequence submission format now.
3562 --
3563 --********************************************************************
3564 
3565 NCBI-Submit DEFINITIONS ::=
3566 BEGIN
3567 
3568 EXPORTS Seq-submit, Contact-info;
3569 
3570 IMPORTS Cit-sub, Author FROM NCBI-Biblio
3571         Date, Object-id FROM NCBI-General
3572         Seq-annot FROM NCBI-Sequence
3573         Seq-id FROM NCBI-Seqloc
3574         Seq-entry FROM NCBI-Seqset;
3575 
3576 Seq-submit ::= SEQUENCE {
3577     sub Submit-block ,
3578     data CHOICE {
3579         entrys  SET OF Seq-entry ,  -- sequence(s)
3580         annots  SET OF Seq-annot ,  -- annotation(s)
3581         delete  SET OF Seq-id } } -- deletions of entries
3582 
3583 Submit-block ::= SEQUENCE {
3584     contact Contact-info ,        -- who to contact
3585     cit Cit-sub ,                 -- citation for this submission
3586     hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
3587     reldate Date OPTIONAL ,       -- release by date
3588     subtype INTEGER {             -- type of submission
3589         new (1) ,                 -- new data
3590         update (2) ,              -- update by author
3591         revision (3) ,            -- 3rd party (non-author) update
3592         other (255) } OPTIONAL ,
3593     tool VisibleString OPTIONAL,  -- tool used to make submission
3594     user-tag VisibleString OPTIONAL, -- user supplied id for this submission
3595     comment VisibleString OPTIONAL } -- user comments/advice to database
3596 
3597 Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
3598     name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
3599     address SEQUENCE OF VisibleString OPTIONAL ,
3600     phone VisibleString OPTIONAL ,
3601     fax VisibleString OPTIONAL ,
3602     email VisibleString OPTIONAL ,
3603     telex VisibleString OPTIONAL ,
3604     owner-id Object-id OPTIONAL ,         -- for owner accounts
3605     password OCTET STRING OPTIONAL ,
3606     last-name VisibleString OPTIONAL ,  -- structured to replace name above
3607     first-name VisibleString OPTIONAL ,
3608     middle-initial VisibleString OPTIONAL ,
3609     contact Author OPTIONAL }           -- WARNING: this will replace the above
3610 
3611 END
3612 
3613 --$Revision: 1.15 $
3614 --**********************************************************************
3615 --
3616 --  Definitions for Cn3D-specific data (rendering settings,
3617 --    user annotations, etc.)
3618 --
3619 --  by Paul Thiessen
3620 --
3621 --  National Center for Biotechnology Information
3622 --  National Institutes of Health
3623 --  Bethesda, MD 20894 USA
3624 --
3625 -- asntool -m cn3d.asn -w 100 -o cn3d.h
3626 -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
3627 --   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
3628 --**********************************************************************
3629 
3630 NCBI-Cn3d DEFINITIONS ::=
3631 -- Cn3D-specific information
3632 
3633 BEGIN
3634 
3635 EXPORTS  Cn3d-style-dictionary, Cn3d-user-annotations;
3636 
3637 IMPORTS  Biostruc-id FROM MMDB
3638          Molecule-id, Residue-id FROM MMDB-Chemical-graph;
3639 
3640 
3641 -- values of enumerations must match those in cn3d/style_manager.hpp!
3642 
3643 Cn3d-backbone-type ::= ENUMERATED {     -- for different types of backbones
3644     off (1),
3645     trace (2),
3646     partial (3),
3647     complete (4)
3648 }
3649 
3650 Cn3d-drawing-style ::= ENUMERATED {     -- atom/bond/object rendering styles
3651     -- for atoms and bonds
3652     wire (1),
3653     tubes (2),
3654     ball-and-stick (3),
3655     space-fill (4),
3656     wire-worm (5),
3657     tube-worm (6),
3658     -- for 3d-objects
3659     with-arrows (7),
3660     without-arrows (8)
3661 }
3662 
3663 Cn3d-color-scheme ::= ENUMERATED {  -- available color schemes (not all
3664                                     -- necessarily applicable to all objects)
3665     element (1),
3666     object (2),
3667     molecule (3),
3668     domain (4),
3669     residue (20),
3670     secondary-structure (5),
3671     user-select (6),
3672     -- different alignment conservation coloring (currently only for proteins)
3673     aligned (7),
3674     identity (8),
3675     variety (9),
3676     weighted-variety (10),
3677     information-content (11),
3678     fit (12),
3679     block-fit (17),
3680     block-z-fit (18),
3681     block-row-fit (19),
3682     -- other schemes
3683     temperature (13),
3684     hydrophobicity (14),
3685     charge (15),
3686     rainbow (16)
3687 }
3688 
3689 -- RGB triplet, interpreted (after division by the scale-factor) as floating
3690 -- point values which should range from [0..1]. The default scale-factor is
3691 -- 255, so that one can conveniently set integer byte values [0..255] for
3692 -- colors with the scale-factor already set appropriately to map to [0..1].
3693 --    An alpha value is allowed, but is currently ignored by Cn3D.
3694 Cn3d-color ::= SEQUENCE {
3695     scale-factor INTEGER DEFAULT 255,
3696     red INTEGER,
3697     green INTEGER,
3698     blue INTEGER,
3699     alpha INTEGER DEFAULT 255
3700 }
3701 
3702 Cn3d-backbone-style ::= SEQUENCE {  -- style blob for backbones only
3703     type Cn3d-backbone-type,
3704     style Cn3d-drawing-style,
3705     color-scheme Cn3d-color-scheme,
3706     user-color Cn3d-color
3707 }
3708 
3709 Cn3d-general-style ::= SEQUENCE {   -- style blob for other objects
3710     is-on BOOLEAN,
3711     style Cn3d-drawing-style,
3712     color-scheme Cn3d-color-scheme,
3713     user-color Cn3d-color
3714 }
3715 
3716 Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
3717     spacing INTEGER,        -- zero means none
3718     type ENUMERATED {
3719         one-letter (1),
3720         three-letter (2)
3721     },
3722     number ENUMERATED {
3723         none (0),
3724         sequential (1),     -- from 1, by residues present, to match sequence
3725         pdb (2)             -- use number assigned by PDB
3726     },
3727     termini BOOLEAN,
3728     white BOOLEAN           -- all white, or (if false) color of alpha carbon
3729 }
3730 
3731 -- rendering settings for Cn3D (mirrors StyleSettings class)
3732 Cn3d-style-settings ::= SEQUENCE {
3733     name VisibleString OPTIONAL,                -- a name (for favorites)
3734     protein-backbone Cn3d-backbone-style,       -- backbone styles
3735     nucleotide-backbone Cn3d-backbone-style,
3736     protein-sidechains Cn3d-general-style,      -- styles for other stuff
3737     nucleotide-sidechains Cn3d-general-style,
3738     heterogens Cn3d-general-style,
3739     solvents Cn3d-general-style,
3740     connections Cn3d-general-style,
3741     helix-objects Cn3d-general-style,
3742     strand-objects Cn3d-general-style,
3743     virtual-disulfides-on BOOLEAN,              -- virtual disulfides
3744     virtual-disulfide-color Cn3d-color,
3745     hydrogens-on BOOLEAN,                       -- hydrogens
3746     background-color Cn3d-color,                -- background
3747     -- floating point parameters - scale-factor applies to all the following:
3748     scale-factor INTEGER,
3749     space-fill-proportion INTEGER,
3750     ball-radius INTEGER,
3751     stick-radius INTEGER,
3752     tube-radius INTEGER,
3753     tube-worm-radius INTEGER,
3754     helix-radius INTEGER,
3755     strand-width INTEGER,
3756     strand-thickness INTEGER,
3757     -- backbone labels (no labels if not present)
3758     protein-labels Cn3d-backbone-label-style OPTIONAL,
3759     nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
3760     -- ion labels
3761     ion-labels BOOLEAN OPTIONAL
3762 }
3763 
3764 Cn3d-style-settings-set ::= SET OF Cn3d-style-settings
3765 
3766 Cn3d-style-table-id ::= INTEGER
3767 
3768 Cn3d-style-table-item ::= SEQUENCE {
3769     id Cn3d-style-table-id,
3770     style Cn3d-style-settings
3771 }
3772 
3773 -- the global settings, and a lookup table of styles for user annotations.
3774 Cn3d-style-dictionary ::= SEQUENCE {
3775     global-style Cn3d-style-settings,
3776     style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
3777 }
3778 
3779 -- a range of residues in a chain, identified by MMDB residue-id
3780 -- (e.g., numbered from 1)
3781 Cn3d-residue-range ::= SEQUENCE {
3782     from Residue-id,
3783     to Residue-id
3784 }
3785 
3786 -- set of locations on a particular chain
3787 Cn3d-molecule-location ::= SEQUENCE {
3788     molecule-id Molecule-id,    -- MMDB molecule id
3789     -- which residues; whole molecule implied if absent
3790     residues SEQUENCE OF Cn3d-residue-range OPTIONAL
3791 }
3792 
3793 -- set of locations on a particular structure object (e.g., a PDB/MMDB
3794 -- structure), which may include multiple ranges of residues each on
3795 -- multiple chains.
3796 Cn3d-object-location ::= SEQUENCE {
3797     structure-id Biostruc-id,
3798     residues SEQUENCE OF Cn3d-molecule-location
3799 }
3800 
3801 -- information for an individual user annotation
3802 Cn3d-user-annotation ::= SEQUENCE {
3803     name VisibleString,                 -- a (short) name for this annotation
3804     description VisibleString OPTIONAL, -- an optional longer description
3805     style-id Cn3d-style-table-id,       -- how to draw this annotation
3806     residues SEQUENCE OF Cn3d-object-location,  -- which residues to cover
3807     is-on BOOLEAN   -- whether this annotation is to be turned on in Cn3D
3808 }
3809 
3810 -- a GL-ordered transformation matrix
3811 Cn3d-GL-matrix ::= SEQUENCE {
3812     m0  REAL, m1  REAL, m2  REAL, m3  REAL,
3813     m4  REAL, m5  REAL, m6  REAL, m7  REAL,
3814     m8  REAL, m9  REAL, m10 REAL, m11 REAL,
3815     m12 REAL, m13 REAL, m14 REAL, m15 REAL
3816 }
3817 
3818 -- a floating point 3d vector
3819 Cn3d-vector ::= SEQUENCE {
3820     x REAL,
3821     y REAL,
3822     z REAL
3823 }
3824 
3825 -- parameters used to set up the camera in Cn3D
3826 Cn3d-view-settings ::= SEQUENCE {
3827     camera-distance REAL,       -- camera on +Z axis this distance from origin
3828     camera-angle-rad REAL,      -- camera angle
3829     camera-look-at-X REAL,      -- X,Y of point in Z=0 plane camera points at
3830     camera-look-at-Y REAL,
3831     camera-clip-near REAL,      -- distance of clipping planes from camera
3832     camera-clip-far REAL,
3833     matrix Cn3d-GL-matrix,      -- transformation of objects in the scene
3834     rotation-center Cn3d-vector -- center of rotation of whole scene
3835 }
3836 
3837 -- The list of annotations for a given CDD/mime. If residue regions overlap
3838 -- between annotations that are turned on, the last annotation in this list
3839 -- that contains these residues will be used as the display style for these
3840 -- residues.
3841 --   Also contains the current viewpoint, so that user's camera angle
3842 -- can be stored and reproduced, for illustrations, on-line figures, etc.
3843 Cn3d-user-annotations ::= SEQUENCE {
3844     annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
3845     view Cn3d-view-settings OPTIONAL
3846 }
3847 
3848 END
3849 
3850 --$Revision: 6.3 $
3851 --****************************************************************
3852 --
3853 --  NCBI Project Definition Module
3854 --  by Jim Ostell and Jonathan Kans, 1998
3855 --
3856 --****************************************************************
3857 
3858 NCBI-Project DEFINITIONS ::=
3859 BEGIN
3860 
3861 EXPORTS Project, Project-item;
3862 
3863 IMPORTS Date FROM NCBI-General
3864         PubMedId FROM NCBI-Biblio
3865         Seq-id, Seq-loc FROM NCBI-Seqloc
3866         Seq-annot, Pubdesc FROM NCBI-Sequence
3867         Seq-entry FROM NCBI-Seqset
3868         Pubmed-entry FROM NCBI-PubMed;
3869 
3870 Project ::= SEQUENCE {
3871     descr Project-descr OPTIONAL ,
3872     data Project-item }
3873 
3874 Project-item ::= CHOICE {
3875     pmuid SET OF INTEGER ,
3876     protuid SET OF INTEGER ,
3877     nucuid SET OF INTEGER ,
3878     sequid SET OF INTEGER ,
3879     genomeuid SET OF INTEGER ,
3880     structuid SET OF INTEGER ,
3881     pmid SET OF PubMedId ,
3882     protid SET OF Seq-id ,
3883     nucid SET OF Seq-id ,
3884     seqid SET OF Seq-id ,
3885     genomeid SET OF Seq-id ,
3886     structid NULL ,
3887     pment SET OF Pubmed-entry ,
3888     protent SET OF Seq-entry ,
3889     nucent SET OF Seq-entry ,
3890     seqent SET OF Seq-entry ,
3891     genomeent SET OF Seq-entry ,
3892     structent NULL ,
3893     seqannot SET OF Seq-annot ,
3894     loc SET OF Seq-loc ,
3895     proj SET OF Project
3896 }
3897 
3898 Project-descr ::= SEQUENCE {
3899     id SET OF Project-id ,
3900     name VisibleString OPTIONAL ,
3901     descr SET OF Projdesc OPTIONAL }
3902 
3903 Projdesc ::= CHOICE {
3904     pub Pubdesc ,
3905     date Date ,
3906     comment VisibleString ,
3907     title VisibleString
3908 }
3909 
3910 Project-id ::= VisibleString
3911 
3912 END
3913 
3914 
3915 --$Revision: 6.0 $
3916 --*********************************************************************
3917 --
3918 --  access.asn
3919 --
3920 --     messages for data access
3921 --
3922 --*********************************************************************
3923 
3924 NCBI-Access DEFINITIONS ::=
3925 BEGIN
3926 
3927 EXPORTS Link-set;
3928 
3929     -- links between same class = neighbors
3930     -- links between other classes = links
3931 
3932 Link-set ::= SEQUENCE {
3933     num INTEGER ,                         -- number of links to this doc type
3934     uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
3935     weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights
3936 
3937 
3938 END
3939 --$Revision: 6.0 $
3940 --**********************************************************************
3941 --
3942 --  NCBI Sequence Feature Definition Module
3943 --  by James Ostell, 1994
3944 --
3945 --**********************************************************************
3946 
3947 NCBI-FeatDef DEFINITIONS ::=
3948 BEGIN
3949 
3950 EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
3951 
3952 
3953 FeatDef ::= SEQUENCE {
3954     typelabel VisibleString ,      -- short label for type eg "CDS"
3955     menulabel VisibleString ,      -- label for a menu eg "Coding Region"
3956     featdef-key INTEGER ,                  -- unique for this feature definition
3957     seqfeat-key INTEGER ,                  -- SeqFeat.data.choice from objfeat.h
3958     entrygroup INTEGER ,                   -- Group for data entry
3959     displaygroup INTEGER ,                 -- Group for data display
3960     molgroup FeatMolType           -- Type of Molecule used for
3961 }
3962 
3963 FeatMolType ::= ENUMERATED {
3964         aa (1),  -- proteins
3965     na (2),  -- nucleic acids
3966     both (3) }  -- both
3967 
3968 FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions
3969 
3970 FeatDispGroup ::= SEQUENCE {
3971         groupkey INTEGER ,
3972     groupname VisibleString }
3973 
3974 FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
3975 
3976 FeatDefGroupSet ::= SEQUENCE {
3977         groups FeatDispGroupSet ,
3978         defs FeatDefSet }
3979 
3980 END
3981 
3982     
3983 --$Revision: 6.12 $
3984 --****************************************************************
3985 --
3986 --  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
3987 --  by Jonathan Epstein, February 1996
3988 --
3989 --****************************************************************
3990 
3991 NCBI-Mime DEFINITIONS ::=
3992 BEGIN
3993 
3994 EXPORTS Ncbi-mime-asn1;
3995 IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
3996     Cdd FROM NCBI-Cdd
3997         Seq-entry FROM NCBI-Seqset
3998         Seq-annot FROM NCBI-Sequence
3999     Medline-entry FROM NCBI-Medline
4000     Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;
4001 
4002 Ncbi-mime-asn1 ::= CHOICE {
4003         entrez  Entrez-general,                 -- just a structure
4004     alignstruc  Biostruc-align,     -- structures & sequences & alignments
4005         alignseq        Biostruc-align-seq,     -- sequence alignment
4006     strucseq    Biostruc-seq,       -- structure & sequences
4007     strucseqs   Biostruc-seqs,      -- structure & sequences & alignments
4008     general     Biostruc-seqs-aligns-cdd    -- all-purpose "grab bag"
4009         -- others may be added here in the future
4010 }
4011 
4012 -- generic bundle of sequence and alignment info
4013 Bundle-seqs-aligns ::= SEQUENCE {
4014     sequences SET OF Seq-entry OPTIONAL,        -- sequences
4015     seqaligns SET OF Seq-annot OPTIONAL,        -- sequence alignments
4016     strucaligns Biostruc-annot-set OPTIONAL,    -- structure alignments
4017     imports SET OF Seq-annot OPTIONAL,          -- imports (updates in Cn3D)
4018     style-dictionary Cn3d-style-dictionary OPTIONAL,    -- Cn3D stuff
4019     user-annotations Cn3d-user-annotations OPTIONAL
4020 }
4021 
4022 Biostruc-seqs-aligns-cdd ::= SEQUENCE {
4023     seq-align-data CHOICE {
4024         bundle Bundle-seqs-aligns,          -- either seqs + alignments
4025         cdd Cdd                             -- or CDD (which contains these)
4026     },
4027     structures SET OF Biostruc OPTIONAL,    -- structures
4028     structure-type ENUMERATED {             -- type of structures to load if
4029         ncbi-backbone(2),                   -- not present; meanings and
4030         ncbi-all-atom(3),                   -- values are same as MMDB's
4031         pdb-model(4)                        -- Model-type
4032     } OPTIONAL
4033 }
4034 
4035 Biostruc-align ::= SEQUENCE {
4036         master  Biostruc,
4037         slaves  SET OF Biostruc,
4038         alignments      Biostruc-annot-set,     -- structure alignments
4039         sequences SET OF Seq-entry,     -- sequences
4040         seqalign SET OF Seq-annot,
4041         style-dictionary Cn3d-style-dictionary OPTIONAL,
4042         user-annotations Cn3d-user-annotations OPTIONAL
4043 }
4044 
4045 Biostruc-align-seq ::= SEQUENCE {       -- display seq structure align only
4046         sequences SET OF Seq-entry,     -- sequences
4047         seqalign SET OF Seq-annot,
4048         style-dictionary Cn3d-style-dictionary OPTIONAL,
4049         user-annotations Cn3d-user-annotations OPTIONAL
4050 }
4051 
4052 Biostruc-seq ::= SEQUENCE {     -- display  structure seq added by yanli
4053         structure Biostruc,
4054         sequences SET OF Seq-entry,
4055         style-dictionary Cn3d-style-dictionary OPTIONAL,
4056         user-annotations Cn3d-user-annotations OPTIONAL
4057 }
4058 
4059 Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
4060         structure Biostruc,
4061         sequences SET OF Seq-entry,     -- sequences
4062         seqalign SET OF Seq-annot,
4063         style-dictionary Cn3d-style-dictionary OPTIONAL,
4064         user-annotations Cn3d-user-annotations OPTIONAL
4065 }
4066 
4067 Entrez-style ::= ENUMERATED {
4068         docsum (1),
4069         genbank (2) ,
4070         genpept (3) ,
4071         fasta (4) ,
4072         asn1 (5) ,
4073         graphic (6) ,
4074         alignment (7) ,
4075         globalview (8) ,
4076         report (9) ,
4077         medlars (10) ,
4078         embl (11) ,
4079         pdb (12) ,
4080         kinemage (13) }
4081 
4082 Entrez-general ::= SEQUENCE {
4083         title VisibleString OPTIONAL,
4084         data CHOICE {
4085                 ml      Medline-entry ,
4086                 prot    Seq-entry ,
4087                 nuc     Seq-entry ,
4088                 genome  Seq-entry ,
4089                 structure Biostruc ,
4090                 strucAnnot Biostruc-annot-set } ,
4091         style Entrez-style ,
4092         location VisibleString OPTIONAL }
4093 END
4094 --$Revision: 6.0 $
4095 --********************************************************************
4096 --
4097 --  Print Templates
4098 --  James Ostell, 1993
4099 --
4100 --
4101 --********************************************************************
4102 
4103 NCBI-ObjPrt DEFINITIONS ::=
4104 BEGIN
4105 
4106 EXPORTS PrintTemplate, PrintTemplateSet;
4107 
4108 PrintTemplate ::= SEQUENCE {
4109     name TemplateName ,  -- name for this template
4110     labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
4111     format PrintFormat }
4112 
4113 TemplateName ::= VisibleString
4114 
4115 PrintTemplateSet ::= SEQUENCE OF PrintTemplate
4116 
4117 PrintFormat ::= SEQUENCE {
4118     asn1 VisibleString ,    -- ASN.1 partial path for this
4119     label VisibleString OPTIONAL ,   -- printable label
4120     prefix VisibleString OPTIONAL,
4121     suffix VisibleString OPTIONAL,
4122     form PrintForm }
4123 
4124 PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
4125     block PrintFormBlock,
4126     boolean PrintFormBoolean,
4127     enum PrintFormEnum,
4128     text PrintFormText,
4129     use-template TemplateName,
4130     user UserFormat ,
4131     null NULL }               -- rarely used
4132 
4133 UserFormat ::= SEQUENCE {
4134     printfunc VisibleString ,
4135     defaultfunc VisibleString OPTIONAL }
4136 
4137 PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
4138     separator VisibleString OPTIONAL ,
4139     components SEQUENCE OF PrintFormat }
4140 
4141 PrintFormBoolean ::= SEQUENCE {
4142     true VisibleString OPTIONAL ,
4143     false VisibleString OPTIONAL }
4144 
4145 PrintFormEnum ::= SEQUENCE {
4146     values SEQUENCE OF VisibleString OPTIONAL }
4147 
4148 PrintFormText ::= SEQUENCE {
4149     textfunc VisibleString OPTIONAL }
4150     
4151 END
4152 
4153 --$Revision: 6.10 $
4154 --*********************************************************
4155 --
4156 -- ASN.1 and XML for the components of a GenBank format sequence
4157 -- J.Ostell 2002
4158 -- Updated 25 May 2010
4159 --
4160 --*********************************************************
4161 
4162 NCBI-GBSeq DEFINITIONS ::=
4163 BEGIN
4164 
4165 --********
4166 --  GBSeq represents the elements in a GenBank style report
4167 --    of a sequence with some small additions to structure and support
4168 --    for protein (GenPept) versions of GenBank format as seen in
4169 --    Entrez. While this represents the simplification, reduction of
4170 --    detail, and flattening to a single sequence perspective of GenBank
4171 --    format (compared with the full ASN.1 or XML from which GenBank and
4172 --    this format is derived at NCBI), it is presented in ASN.1 or XML for
4173 --    automated parsing and processing. It is hoped that this compromise
4174 --    will be useful for those bulk processing at the GenBank format level
4175 --    of detail today. Since it is a compromise, a number of pragmatic
4176 --    decisions have been made.
4177 --
4178 --  In pursuit of simplicity and familiarity a number of
4179 --    fields do not have full substructure defined here where there is
4180 --    already a standard GenBank format string. For example:
4181 --
4182 --   Date  DD-Mon-YYYY
4183 --   Authors   LastName, Intials (with periods)
4184 --   Journal   JounalName Volume (issue), page-range (year)
4185 --   FeatureLocations as per GenBank feature table, but FeatureIntervals
4186 --    may also be provided as a convenience
4187 --   FeatureQualifiers  as per GenBank feature table
4188 --   Primary has a string that represents a table to construct
4189 --    a third party (TPA) sequence.
4190 --   other-seqids can have strings with the "vertical bar format" sequence
4191 --    identifiers used in BLAST for example, when they are non-genbank types.
4192 --    Currently in GenBank format you only see GI, but there are others, like
4193 --    patents, submitter clone names, etc which will appear here, as they
4194 --    always have in the ASN.1 format, and full XML format.
4195 --   source-db is a formatted text block for peptides in GenPept format that
4196 --    carries information from the source protein database.
4197 --
4198 --  There are also a number of elements that could have been
4199 --   more exactly specified, but in the interest of simplicity
4200 --   have been simply left as options. For example..
4201 --
4202 --  accession and accession.version will always appear in a GenBank record
4203 --   they are optional because this format can also be used for non-GenBank
4204 --   sequences, and in that case will have only "other-seqids".
4205 --
4206 --  sequences will normally all have "sequence" filled in. But contig records
4207 --    will have a "join" statement in the "contig" slot, and no "sequence".
4208 --    We also may consider a retrieval option with no sequence of any kind
4209 --     and no feature table to quickly check minimal values.
4210 --
4211 --  a reference may have an author list, or be from a consortium, or both.
4212 --
4213 --  some fields, such as taxonomy, do appear as separate elements in GenBank
4214 --    format but without a specific linetype (in GenBank format this comes
4215 --    under ORGANISM). Another example is the separation of primary accession
4216 --    from the list of secondary accessions. In GenBank format primary
4217 --    accession is just the first one on the list that includes all secondaries
4218 --    after it.
4219 --
4220 --  create-date deserves special comment. The date you see on the right hand
4221 --    side of the LOCUS line in GenBank format is actually the last date the
4222 --    the record was modified (or the update-date). The date the record was
4223 --    first submitted to GenBank appears in the first submission citation in
4224 --    the reference section. Internally in the databases and ASN.1 NCBI keeps
4225 --    the first date the record was released into the sequence database at
4226 --    NCBI as create-date. For records from EMBL, which supports create-date,
4227 --    it is the date provided by EMBL. For DDBJ records, which do not supply
4228 --    a create-date (same as GenBank format) the create-date is the first date
4229 --    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
4230 --    took responsibility for GenBank, it is just the first date NCBI saw the
4231 --    record. Create-date can be very useful, so we expose it here, but users
4232 --    must understand it is only an approximation and comes from many sources,
4233 --    and with many exceptions and caveats. It does NOT tell you the first
4234 --    date the public might have seen this record and thus is NOT an accurate
4235 --    measure for legal issues of precedence.
4236 --
4237 --********
4238 
4239 GBSet ::= SEQUENCE OF GBSeq
4240         
4241 GBSeq ::= SEQUENCE {
4242     locus VisibleString ,
4243     length INTEGER ,
4244     strandedness VisibleString OPTIONAL ,
4245     moltype VisibleString ,
4246     topology VisibleString OPTIONAL ,
4247     division VisibleString ,
4248     update-date VisibleString ,
4249     create-date VisibleString OPTIONAL ,
4250     update-release VisibleString OPTIONAL ,
4251     create-release VisibleString OPTIONAL ,
4252     definition VisibleString ,
4253     primary-accession VisibleString OPTIONAL ,
4254     entry-version VisibleString OPTIONAL ,
4255     accession-version VisibleString OPTIONAL ,
4256     other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
4257     secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
4258     project VisibleString OPTIONAL ,
4259     keywords SEQUENCE OF GBKeyword OPTIONAL ,
4260     segment VisibleString OPTIONAL ,
4261     source VisibleString OPTIONAL ,
4262     organism VisibleString OPTIONAL ,
4263     taxonomy VisibleString OPTIONAL ,
4264     references SEQUENCE OF GBReference OPTIONAL ,
4265     comment VisibleString OPTIONAL ,
4266     comment-set SEQUENCE OF GBComment OPTIONAL ,
4267     struc-comments SEQUENCE OF GBStrucComment OPTIONAL ,
4268     primary VisibleString OPTIONAL ,
4269     source-db VisibleString OPTIONAL ,
4270     database-reference VisibleString OPTIONAL ,
4271     feature-table SEQUENCE OF GBFeature OPTIONAL ,
4272     feature-set SEQUENCE OF GBFeatureSet OPTIONAL ,
4273     sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
4274     contig VisibleString OPTIONAL ,
4275     alt-seq SEQUENCE OF GBAltSeqData OPTIONAL
4276 }
4277 
4278 GBSeqid ::= VisibleString
4279 
4280 GBSecondary-accn ::= VisibleString
4281 
4282 GBKeyword ::= VisibleString
4283 
4284 GBReference ::= SEQUENCE {
4285     reference VisibleString ,
4286     position VisibleString OPTIONAL ,
4287     authors SEQUENCE OF GBAuthor OPTIONAL ,
4288     consortium VisibleString OPTIONAL ,
4289     title VisibleString OPTIONAL ,
4290     journal VisibleString ,
4291     xref SEQUENCE OF GBXref OPTIONAL ,
4292     pubmed INTEGER OPTIONAL ,
4293     remark VisibleString OPTIONAL
4294 }
4295 
4296 GBAuthor ::= VisibleString
4297 
4298 GBXref ::= SEQUENCE {
4299     dbname VisibleString ,
4300     id VisibleString
4301 }
4302 
4303 GBComment ::= SEQUENCE {
4304     type VisibleString OPTIONAL ,
4305     paragraphs SEQUENCE OF GBCommentParagraph
4306 }
4307 
4308 GBCommentParagraph ::= SEQUENCE {
4309     items SEQUENCE OF GBCommentItem
4310 }
4311 
4312 GBCommentItem ::= SEQUENCE {
4313     value VisibleString OPTIONAL ,
4314     url VisibleString OPTIONAL
4315 }
4316 
4317 GBStrucComment ::= SEQUENCE {
4318     name VisibleString OPTIONAL ,
4319     items SEQUENCE OF GBStrucCommentItem
4320 }
4321 
4322 GBStrucCommentItem ::= SEQUENCE {
4323     tag VisibleString OPTIONAL ,
4324     value VisibleString OPTIONAL ,
4325     url VisibleString OPTIONAL
4326 }
4327 
4328 GBFeatureSet ::= SEQUENCE {
4329     annot-source VisibleString OPTIONAL ,
4330     features SEQUENCE OF GBFeature
4331 }
4332 
4333 GBFeature ::= SEQUENCE {
4334     key VisibleString ,
4335     location VisibleString ,
4336     intervals SEQUENCE OF GBInterval OPTIONAL ,
4337     operator VisibleString OPTIONAL ,
4338     partial5 BOOLEAN OPTIONAL ,
4339     partial3 BOOLEAN OPTIONAL ,
4340     quals SEQUENCE OF GBQualifier OPTIONAL ,
4341     xrefs SEQUENCE OF GBXref OPTIONAL
4342 }
4343 
4344 GBInterval ::= SEQUENCE {
4345     from INTEGER OPTIONAL ,
4346     to INTEGER OPTIONAL ,
4347     point INTEGER OPTIONAL ,
4348     iscomp BOOLEAN OPTIONAL ,
4349     interbp BOOLEAN OPTIONAL ,
4350     accession VisibleString
4351 }
4352 
4353 GBQualifier ::= SEQUENCE {
4354     name VisibleString ,
4355     value VisibleString OPTIONAL
4356 }
4357 
4358 GBAltSeqData ::= SEQUENCE {
4359     name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
4360     items SEQUENCE OF GBAltSeqItem OPTIONAL
4361 }
4362 
4363 GBAltSeqItem ::= SEQUENCE {
4364     interval GBInterval OPTIONAL ,
4365     isgap BOOLEAN OPTIONAL ,
4366     gap-length INTEGER OPTIONAL ,
4367     gap-type VisibleString OPTIONAL ,
4368     gap-linkage VisibleString OPTIONAL ,
4369     gap-comment VisibleString OPTIONAL ,
4370     first-accn VisibleString OPTIONAL ,
4371     last-accn VisibleString OPTIONAL ,
4372     value VisibleString OPTIONAL
4373 }
4374 
4375 END
4376 
4377 --$Revision: 1.8 $
4378 --************************************************************************
4379 --
4380 -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
4381 -- The International Nucleotide Sequence Database (INSD) collaboration
4382 -- Version 1.6, 25 May 2010
4383 --
4384 --************************************************************************
4385 
4386 INSD-INSDSeq DEFINITIONS ::=
4387 BEGIN
4388 
4389 --  INSDSeq provides the elements of a sequence as presented in the
4390 --    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
4391 --    additional structure.
4392 --    Although this single perspective of the three flatfile formats
4393 --    provides a useful simplification, it hides to some extent the
4394 --    details of the actual data underlying those formats. Nevertheless,
4395 --    the XML version of INSD-Seq is being provided with
4396 --    the hopes that it will prove useful to those who bulk-process
4397 --    sequence data at the flatfile-format level of detail. Further 
4398 --    documentation regarding the content and conventions of those formats 
4399 --    can be found at:
4400 --
4401 --    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
4402 --    http://www.ddbj.nig.ac.jp/FT/full_index.html
4403 --    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
4404 --    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
4405 --
4406 --    URLs for DDBJ, EMBL, and GenBank Release Notes :
4407 --    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
4408 --    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
4409 --    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
4410 --
4411 --    Because INSDSeq is a compromise, a number of pragmatic decisions have
4412 --    been made:
4413 --
4414 --  In pursuit of simplicity and familiarity a number of fields do not
4415 --    have full substructure defined here where there is already a
4416 --    standard flatfile format string. For example:
4417 --
4418 --   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
4419 --
4420 --   Author:     LastName, Initials  (eg Smith, J.N.)
4421 --            or Lastname Initials   (eg Smith J.N.)
4422 --
4423 --   Journal:    JournalName Volume (issue), page-range (year)
4424 --            or JournalName Volume(issue):page-range(year)
4425 --            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
4426 --               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
4427 --
4428 --  FeatureLocations are representated as in the flatfile feature table,
4429 --    but FeatureIntervals may also be provided as a convenience
4430 --
4431 --  FeatureQualifiers are represented as in the flatfile feature table.
4432 --
4433 --  Primary has a string that represents a table to construct
4434 --    a third party (TPA) sequence.
4435 --
4436 --  other-seqids can have strings with the "vertical bar format" sequence
4437 --    identifiers used in BLAST for example, when they are non-INSD types.
4438 --
4439 --  Currently in flatfile format you only see Accession numbers, but there 
4440 --    are others, like patents, submitter clone names, etc which will 
4441 --    appear here
4442 --
4443 --  There are also a number of elements that could have been more exactly
4444 --    specified, but in the interest of simplicity have been simply left as
4445 --    optional. For example:
4446 --
4447 --  All publicly accessible sequence records in INSDSeq format will
4448 --    include accession and accession.version. However, these elements are 
4449 --    optional in optional in INSDSeq so that this format can also be used   
4450 --    for non-public sequence data, prior to the assignment of accessions and 
4451 --    version numbers. In such cases, records will have only "other-seqids".
4452 --
4453 --  sequences will normally all have "sequence" filled in. But contig records
4454 --    will have a "join" statement in the "contig" slot, and no "sequence".
4455 --    We also may consider a retrieval option with no sequence of any kind
4456 --    and no feature table to quickly check minimal values.
4457 --
4458 --  Four (optional) elements are specific to records represented via the EMBL
4459 --    sequence database: INSDSeq_update-release, INSDSeq_create-release,
4460 --    INSDSeq_entry-version, and INSDSeq_database-reference.
4461 --
4462 --  One (optional) element is specific to records originating at the GenBank
4463 --    and DDBJ sequence databases: INSDSeq_segment.
4464 --
4465 --********
4466 
4467 INSDSet ::= SEQUENCE OF INSDSeq
4468 
4469 INSDSeq ::= SEQUENCE {
4470     locus VisibleString ,
4471     length INTEGER ,
4472     strandedness VisibleString OPTIONAL ,
4473     moltype VisibleString ,
4474     topology VisibleString OPTIONAL ,
4475     division VisibleString ,
4476     update-date VisibleString ,
4477     create-date VisibleString OPTIONAL ,
4478     update-release VisibleString OPTIONAL ,
4479     create-release VisibleString OPTIONAL ,
4480     definition VisibleString ,
4481     primary-accession VisibleString OPTIONAL ,
4482     entry-version VisibleString OPTIONAL ,
4483     accession-version VisibleString OPTIONAL ,
4484     other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
4485     secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
4486     project VisibleString OPTIONAL ,
4487     keywords SEQUENCE OF INSDKeyword OPTIONAL ,
4488     segment VisibleString OPTIONAL ,
4489     source VisibleString OPTIONAL ,
4490     organism VisibleString OPTIONAL ,
4491     taxonomy VisibleString OPTIONAL ,
4492     references SEQUENCE OF INSDReference OPTIONAL ,
4493     comment VisibleString OPTIONAL ,
4494     comment-set SEQUENCE OF INSDComment OPTIONAL ,
4495     struc-comments SEQUENCE OF INSDStrucComment OPTIONAL ,
4496     primary VisibleString OPTIONAL ,
4497     source-db VisibleString OPTIONAL ,
4498     database-reference VisibleString OPTIONAL ,
4499     feature-table SEQUENCE OF INSDFeature OPTIONAL ,
4500     feature-set SEQUENCE OF INSDFeatureSet OPTIONAL ,
4501     sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
4502     contig VisibleString OPTIONAL ,
4503     alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL
4504 }
4505 
4506 INSDSeqid ::= VisibleString
4507 
4508 INSDSecondary-accn ::= VisibleString
4509 
4510 INSDKeyword ::= VisibleString
4511 
4512 -- INSDReference_position contains a string value indicating the
4513 -- basepair span(s) to which a reference applies. The allowable
4514 -- formats are:
4515 -- 
4516 --   X..Y  : Where X and Y are integers separated by two periods,
4517 --           X >= 1 , Y <= sequence length, and X <= Y 
4518 --
4519 --           Multiple basepair spans can exist, separated by a
4520 --           semi-colon and a space. For example : 10..20; 100..500
4521 --             
4522 --   sites : The string literal 'sites', indicating that a reference
4523 --           provides sequence annotation information, but the specific
4524 --           basepair spans are either not captured, or were too numerous
4525 --           to record.
4526 -- 
4527 --           The 'sites' literal string is singly occuring, and
4528 --            cannot be used in conjunction with any X..Y basepair spans.
4529 -- 
4530 --   References that lack an INSDReference_position element apply
4531 --   to the entire sequence.
4532 
4533 INSDReference ::= SEQUENCE {
4534     reference VisibleString ,
4535     position VisibleString OPTIONAL ,
4536     authors SEQUENCE OF INSDAuthor OPTIONAL ,
4537     consortium VisibleString OPTIONAL ,
4538     title VisibleString OPTIONAL ,
4539     journal VisibleString ,
4540     xref SEQUENCE OF INSDXref OPTIONAL ,
4541     pubmed INTEGER OPTIONAL ,
4542     remark VisibleString OPTIONAL
4543 }
4544 
4545 INSDAuthor ::= VisibleString
4546 
4547 -- INSDXref provides a method for referring to records in
4548 -- other databases. INSDXref_dbname is a string value that
4549 -- provides the name of the database, and INSDXref_dbname
4550 -- is a string value that provides the record's identifier
4551 -- in that database.
4552 
4553 INSDXref ::= SEQUENCE {
4554     dbname VisibleString ,
4555     id VisibleString
4556 }
4557 
4558 INSDComment ::= SEQUENCE {
4559     type VisibleString OPTIONAL ,
4560     paragraphs SEQUENCE OF INSDCommentParagraph
4561 }
4562 
4563 INSDCommentParagraph ::= SEQUENCE {
4564     items SEQUENCE OF INSDCommentItem
4565 }
4566 
4567 INSDCommentItem ::= SEQUENCE {
4568     value VisibleString OPTIONAL ,
4569     url VisibleString OPTIONAL
4570 }
4571 
4572 INSDStrucComment ::= SEQUENCE {
4573     name VisibleString OPTIONAL ,
4574     items SEQUENCE OF INSDStrucCommentItem
4575 }
4576 
4577 INSDStrucCommentItem ::= SEQUENCE {
4578     tag VisibleString OPTIONAL ,
4579     value VisibleString OPTIONAL ,
4580     url VisibleString OPTIONAL
4581 }
4582 
4583 -- INSDFeature_operator contains a string value describing
4584 -- the relationship among a set of INSDInterval within
4585 -- INSDFeature_intervals. The allowable formats are:
4586 -- 
4587 --   join :  The string literal 'join' indicates that the
4588 --           INSDInterval intervals are biologically joined
4589 --           together into a contiguous molecule.
4590 -- 
4591 --   order : The string literal 'order' indicates that the
4592 --           INSDInterval intervals are in the presented
4593 --           order, but they are not necessarily contiguous.
4594 -- 
4595 --   Either 'join' or 'order' is required if INSDFeature_intervals
4596 --   is comprised of more than one INSDInterval .
4597 
4598 INSDFeatureSet ::= SEQUENCE {
4599     annot-source VisibleString OPTIONAL ,
4600     features SEQUENCE OF INSDFeature
4601 }
4602 
4603 INSDFeature ::= SEQUENCE {
4604     key VisibleString ,
4605     location VisibleString ,
4606     intervals SEQUENCE OF INSDInterval OPTIONAL ,
4607     operator VisibleString OPTIONAL ,
4608     partial5 BOOLEAN OPTIONAL ,
4609     partial3 BOOLEAN OPTIONAL ,
4610     quals SEQUENCE OF INSDQualifier OPTIONAL ,
4611     xrefs SEQUENCE OF INSDXref OPTIONAL
4612 }
4613 
4614 -- INSDInterval_iscomp is a boolean indicating whether
4615 -- an INSDInterval_from / INSDInterval_to location
4616 -- represents a location on the complement strand.
4617 -- When INSDInterval_iscomp is TRUE, it essentially
4618 -- confirms that a 'from' value which is greater than
4619 -- a 'to' value is intentional, because the location
4620 -- is on the opposite strand of the presented sequence.
4621 
4622 -- INSDInterval_interbp is a boolean indicating whether
4623 -- a feature (such as a restriction site) is located
4624 -- between two adjacent basepairs. When INSDInterval_iscomp
4625 -- is TRUE, the 'from' and 'to' values must differ by
4626 -- exactly one base.
4627 
4628 INSDInterval ::= SEQUENCE {
4629     from INTEGER OPTIONAL ,
4630     to INTEGER OPTIONAL ,
4631     point INTEGER OPTIONAL ,
4632     iscomp BOOLEAN OPTIONAL ,
4633     interbp BOOLEAN OPTIONAL ,
4634     accession VisibleString
4635 }
4636 
4637 INSDQualifier ::= SEQUENCE {
4638     name VisibleString ,
4639     value VisibleString OPTIONAL
4640 }
4641 
4642 INSDAltSeqData ::= SEQUENCE {
4643     name VisibleString ,  -- e.g., CON-division-join, WGS-contig-range,
4644                           -- WGS-scaffold-range, MGA/CAGE-range, genome
4645     items SEQUENCE OF INSDAltSeqItem OPTIONAL
4646 }
4647 
4648 INSDAltSeqItem ::= SEQUENCE {
4649     interval INSDInterval OPTIONAL ,
4650     isgap BOOLEAN OPTIONAL ,
4651     gap-length INTEGER OPTIONAL ,
4652     gap-type VisibleString OPTIONAL ,
4653     gap-linkage VisibleString OPTIONAL ,
4654     gap-comment VisibleString OPTIONAL ,
4655     first-accn VisibleString OPTIONAL ,
4656     last-accn VisibleString OPTIONAL ,
4657     value VisibleString OPTIONAL
4658 }
4659 
4660 END
4661 
4662 --$Revision: 6.1 $
4663 --**********************************************************************
4664 --
4665 --  ASN.1 for a tiny Bioseq in XML
4666 --    basically a structured FASTA file with a few extras
4667 --    in this case we drop all modularity of components
4668 --      All ids are Optional - simpler structure, less checking
4669 --      Components of organism are hard coded - can't easily add or change
4670 --      sequence is just string whether DNA or protein
4671 --  by James Ostell, 2000
4672 --
4673 --**********************************************************************
4674 
4675 NCBI-TSeq DEFINITIONS ::=
4676 BEGIN
4677 
4678 TSeq ::= SEQUENCE {
4679         seqtype ENUMERATED {
4680                 nucleotide (1),
4681                 protein (2) },
4682         gi INTEGER OPTIONAL,
4683         accver VisibleString OPTIONAL,
4684         sid VisibleString OPTIONAL,
4685         local VisibleString OPTIONAL,
4686         taxid INTEGER OPTIONAL,
4687         orgname VisibleString OPTIONAL,
4688         defline VisibleString,
4689         length INTEGER,
4690         sequence VisibleString }
4691 
4692 TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them
4693 
4694 END
4695 
4696 --$Id: scoremat.asn,v 1.14 2011/12/21 15:29:33 kazimird Exp $
4697 -- ===========================================================================
4698 --
4699 --                            PUBLIC DOMAIN NOTICE
4700 --               National Center for Biotechnology Information
4701 --
4702 --  This software/database is a "United States Government Work" under the
4703 --  terms of the United States Copyright Act.  It was written as part of
4704 --  the author's official duties as a United States Government employee and
4705 --  thus cannot be copyrighted.  This software/database is freely available
4706 --  to the public for use. The National Library of Medicine and the U.S.
4707 --  Government have not placed any restriction on its use or reproduction.
4708 --
4709 --  Although all reasonable efforts have been taken to ensure the accuracy
4710 --  and reliability of the software and data, the NLM and the U.S.
4711 --  Government do not and cannot warrant the performance or results that
4712 --  may be obtained by using this software or data. The NLM and the U.S.
4713 --  Government disclaim all warranties, express or implied, including
4714 --  warranties of performance, merchantability or fitness for any particular
4715 --  purpose.
4716 --
4717 --  Please cite the author in any work or product based on this material.
4718 --
4719 -- ===========================================================================
4720 --
4721 -- Author:  Christiam Camacho
4722 --
4723 -- File Description:
4724 --      ASN.1 definitions for scoring matrix
4725 --
4726 -- ===========================================================================
4727 
4728 NCBI-ScoreMat DEFINITIONS ::= BEGIN
4729 
4730 EXPORTS    Pssm, PssmIntermediateData, PssmFinalData, 
4731            PssmParameters, PssmWithParameters;
4732     
4733 IMPORTS    Object-id   FROM NCBI-General
4734            Seq-entry   FROM NCBI-Seqset;
4735 
4736 -- a rudimentary block/core-model, to be used with block-based alignment 
4737 -- routines and threading
4738 
4739 BlockProperty ::= SEQUENCE {
4740   type     INTEGER { unassigned  (0),
4741                      threshold   (1),       -- score threshold for heuristics
4742                      minscore    (2),       -- observed minimum score in CD
4743                      maxscore    (3),       -- observed maximum score in CD
4744                      meanscore   (4),       -- observed mean score in CD
4745                      variance    (5),       -- observed score variance
4746                      name       (10),       -- just name the block
4747                      is-optional(20),       -- block may not have to be used    
4748                      other     (255) },
4749   intvalue  INTEGER OPTIONAL,
4750   textvalue VisibleString OPTIONAL
4751 }
4752 
4753 CoreBlock ::= SEQUENCE {
4754   start          INTEGER,                   -- begin of block on query
4755   stop           INTEGER,                   -- end of block on query
4756   minstart       INTEGER OPTIONAL,          -- optional N-terminal extension
4757   maxstop        INTEGER OPTIONAL,          -- optional C-terminal extension
4758   property       SEQUENCE OF BlockProperty OPTIONAL
4759 }
4760 
4761 LoopConstraint ::= SEQUENCE {
4762   minlength      INTEGER DEFAULT 0,         -- minimum length of unaligned region
4763   maxlength      INTEGER DEFAULT 100000     -- maximum length of unaligned region
4764 }
4765 
4766 CoreDef ::= SEQUENCE {
4767   nblocks        INTEGER,                   -- number of core elements/blocks
4768   blocks         SEQUENCE OF CoreBlock,     -- nblocks locations
4769   loops          SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints
4770 
4771   isDiscontinuous BOOLEAN OPTIONAL,         -- is it a discontinuous domain
4772 
4773   insertions SEQUENCE OF INTEGER OPTIONAL   -- positions of long insertions
4774 }
4775 
4776 Site-annot ::= SEQUENCE {
4777   startPosition  INTEGER,                -- location of the annotation,
4778   stopPosition   INTEGER,                -- start and stop position in the
4779                                          -- PSSM
4780 
4781   description    VisibleString OPTIONAL, -- holds description or names, that
4782                                          -- can be used for labels in
4783                                          -- visualization
4784 
4785   type           INTEGER OPTIONAL,       -- type of the annotated feature,
4786                                          -- similarly to Align-annot in
4787                                          -- NCBI-Cdd
4788 
4789   aliases        SEQUENCE OF VisibleString OPTIONAL, -- additional names for
4790                                                      -- the annotation
4791 
4792   motif          VisibleString OPTIONAL, -- motif to validate mapping of sites
4793 
4794   motifuse       INTEGER OPTIONAL        -- 0 for validation
4795                                          -- 1 for motif in seqloc
4796                                          -- 2 for multiple motifs in seqloc
4797 }
4798 
4799 Site-annot-set ::= SEQUENCE OF Site-annot
4800 
4801 -- ===========================================================================
4802 -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
4803 -- ===========================================
4804 --
4805 -- Two possible inputs to PSI-BLAST and formatrpsdb:
4806 -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix 
4807 --    of frequency ratios)
4808 -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of 
4809 --    scores and statistical parameters) - such as written by cddumper
4810 --
4811 -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
4812 -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
4813 -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
4814 -- statistical parameters are used to perform the search in PSI-BLAST and the
4815 -- same data and the data in PssmWithParams::params::rpsdbparams is used to
4816 -- build the PSSM and ultimately the RPS-BLAST database
4817 -- 
4818 -- 
4819 --                 reads    ++++++++++++++ writes
4820 -- PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
4821 --                          ++++++++++++++             |  ^
4822 --         ^                                           |  |
4823 --         |                                           |  |
4824 --         +===========================================+  |
4825 --                                                     |  |
4826 --         +===========================================+  |
4827 --         |                                              |
4828 -- reads   |                                              | 
4829 --         v                                              |
4830 --  +++++++++++++++ writes +++++++++++++++++++++++        |
4831 --  | formatrpsdb | =====> | RPS-BLAST databases |        |
4832 --  +++++++++++++++        +++++++++++++++++++++++        |
4833 --                                   ^                    |
4834 --                                   |                    |
4835 --                                   | reads              |
4836 --                             +++++++++++++              |
4837 --                             | RPS-BLAST |              |
4838 --                             +++++++++++++              |
4839 --                                                        |
4840 --       reads  ++++++++++++               writes         |
4841 --  Cdd ======> | cddumper | =============================+
4842 --              ++++++++++++
4843 --
4844 -- ===========================================================================
4845 
4846 -- Contains the PSSM's scores and its associated statistical parameters. 
4847 -- Dimensions and order in which scores are stored must be the same as that 
4848 -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
4849 PssmFinalData ::= SEQUENCE {
4850 
4851     -- PSSM's scores
4852     scores              SEQUENCE OF INTEGER, 
4853 
4854     -- Karlin & Altschul parameter produced during the PSSM's calculation
4855     lambda              REAL,
4856 
4857     -- Karlin & Altschul parameter produced during the PSSM's calculation
4858         kappa               REAL,
4859 
4860     -- Karlin & Altschul parameter produced during the PSSM's calculation
4861     h                   REAL,
4862 
4863     -- scaling factor used to obtain more precision when building the PSSM.
4864     -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
4865     -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
4866     -- given a PSSM which contains a scaled-up PSSM (indicated by having a
4867     -- scalingFactor greater than 1), then it will scale down the PSSM to
4868     -- perform the initial stages of the search with it.
4869     -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided 
4870     -- scaled-up PSSMs, it will ensure that all PSSMs used to build the 
4871     -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST 
4872     -- will silently produce incorrect results).
4873     scalingFactor       INTEGER DEFAULT 1,
4874 
4875     -- Karlin & Altschul parameter produced during the PSSM's calculation
4876     lambdaUngapped      REAL OPTIONAL,
4877 
4878     -- Karlin & Altschul parameter produced during the PSSM's calculation
4879         kappaUngapped       REAL OPTIONAL,
4880 
4881     -- Karlin & Altschul parameter produced during the PSSM's calculation
4882     hUngapped           REAL OPTIONAL
4883 }
4884 
4885 -- Contains the PSSM's intermediate data used to create the PSSM's scores 
4886 -- and statistical parameters. Dimensions and order in which scores are 
4887 -- stored must be the same as that specified in Pssm::numRows, 
4888 -- Pssm::numColumns, and Pssm::byrow
4889 PssmIntermediateData ::= SEQUENCE {
4890 
4891     -- observed residue frequencies (or counts) per position of the PSSM 
4892     -- (prior to application of pseudocounts)
4893     resFreqsPerPos              SEQUENCE OF INTEGER OPTIONAL, 
4894 
4895     -- Weighted observed residue frequencies per position of the PSSM.
4896     -- (N.B.: each position's weights should add up to 1.0).
4897     -- This field corresponds to f_i (f sub i) in equation 2 of 
4898     -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4899     -- NOTE: this is needed for diagnostics information only (i.e.:
4900     -- -out_ascii_pssm option in psiblast)
4901     weightedResFreqsPerPos      SEQUENCE OF REAL OPTIONAL,
4902 
4903     -- PSSM's frequency ratios
4904     freqRatios                  SEQUENCE OF REAL,
4905 
4906     -- Information content per position of the PSSM
4907     -- NOTE: this is needed for diagnostics information only (i.e.:
4908     -- -out_ascii_pssm option in psiblast)
4909     informationContent          SEQUENCE OF REAL OPTIONAL,
4910 
4911     -- Relative weight for columns of the PSSM without gaps to pseudocounts
4912     -- NOTE: this is needed for diagnostics information only (i.e.:
4913     -- -out_ascii_pssm option in psiblast)
4914     gaplessColumnWeights        SEQUENCE OF REAL OPTIONAL,
4915 
4916     -- Used in sequence weights computation
4917     -- NOTE: this is needed for diagnostics information only (i.e.:
4918     -- -out_ascii_pssm option in psiblast)
4919     sigma                       SEQUENCE OF REAL OPTIONAL,
4920 
4921     -- Length of the aligned regions per position of the query sequence
4922     -- NOTE: this is needed for diagnostics information only (i.e.:
4923     -- -out_ascii_pssm option in psiblast)
4924     intervalSizes               SEQUENCE OF INTEGER OPTIONAL,
4925 
4926     -- Number of matching sequences per position of the PSSM (including the
4927     -- query)
4928     -- NOTE: this is needed for diagnostics information only (i.e.:
4929     -- -out_ascii_pssm option in psiblast)
4930     numMatchingSeqs             SEQUENCE OF INTEGER OPTIONAL,
4931 
4932     -- Number of independent observations per position of the PSSM
4933     -- NOTE: this is needed for building CDD database for DELTA-BLAST
4934     numIndeptObsr               SEQUENCE OF REAL OPTIONAL
4935 }
4936 
4937 -- Position-specific scoring matrix
4938 --
4939 -- Column indices on the PSSM refer to the positions corresponding to the
4940 -- query/master sequence, i.e. the number of columns (N) is the same
4941 -- as the length of the query/master sequence. 
4942 -- Row indices refer to individual amino acid types, i.e. the number of 
4943 -- rows (M) is the same as the number of different residues in the 
4944 -- alphabet we use. Consequently, row labels are amino acid identifiers.
4945 --
4946 -- PSSMs are stored as linear arrays of integers. By default, we store
4947 -- them column-by-column, M values for the first column followed by M
4948 -- values for the second column, and so on. In order to provide
4949 -- flexibility for external applications, the boolean field "byrow" is 
4950 -- provided to specify the storage order.
4951 Pssm ::= SEQUENCE {
4952 
4953     -- Is the this a protein or nucleotide scoring matrix?
4954     isProtein       BOOLEAN DEFAULT TRUE,       
4955 
4956     -- PSSM identifier
4957     identifier      Object-id OPTIONAL, 
4958 
4959     -- The dimensions of the matrix are returned so the client can
4960     -- verify that all data was received.
4961 
4962     numRows         INTEGER,    -- number of rows
4963     numColumns      INTEGER,    -- number of columns
4964 
4965     -- row-labels is given to note the order of residue types so that it can
4966     -- be cross-checked between applications.
4967     -- If this field is not given, the matrix values are presented in 
4968     -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
4969     -- for proteins the values returned correspond to 
4970     -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
4971     rowLabels       SEQUENCE OF VisibleString OPTIONAL,
4972 
4973     -- are matrices stored row by row?
4974     byRow           BOOLEAN DEFAULT FALSE, 
4975 
4976     -- PSSM representative sequence (master) 
4977     query           Seq-entry OPTIONAL,           
4978 
4979     -- both intermediateData and finalData can be provided, but at least one of
4980     -- them must be provided.
4981     -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData 
4982     -- representation. 
4983 
4984     -- Intermediate or final data for the PSSM
4985     intermediateData    PssmIntermediateData OPTIONAL,
4986 
4987     -- Final representation for the PSSM
4988     finalData           PssmFinalData OPTIONAL
4989 }
4990 
4991 -- This structure is used to create the RPS-BLAST database auxiliary file 
4992 -- (*.aux) and it contains parameters set at creation time of the PSSM.
4993 -- Also, the matrixName field is used by formatrpsdb to build a PSSM from 
4994 -- a Pssm structure which only contains PssmIntermediateData.
4995 FormatRpsDbParameters ::= SEQUENCE {
4996 
4997     -- name of the underlying score matrix whose frequency ratios were
4998     -- used in PSSM construction (e.g.: BLOSUM62)
4999     matrixName   VisibleString,
5000 
5001     -- gap opening penalty corresponding to the matrix above
5002     gapOpen      INTEGER OPTIONAL,             
5003 
5004     -- gap extension penalty corresponding to the matrix above
5005     gapExtend    INTEGER OPTIONAL
5006 
5007 }
5008 
5009 -- Populated by PSSM engine of PSI-BLAST, original source for these values 
5010 -- are the PSI-BLAST options specified using the BLAST options API
5011 PssmParameters ::= SEQUENCE {
5012 
5013     -- pseudocount constant used for PSSM. This field corresponds to beta in 
5014     -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
5015     pseudocount INTEGER OPTIONAL,             
5016 
5017     -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
5018     -- populated by PSI-BLAST
5019     rpsdbparams     FormatRpsDbParameters OPTIONAL,
5020 
5021     -- alignment constraints needed by sequence-structure threader
5022     -- and other global or local block-alignment algorithms
5023     constraints     CoreDef OPTIONAL,
5024 
5025     -- bit score threshold for specific conserved domain hits
5026     bitScoreThresh  REAL OPTIONAL,
5027 
5028     -- conserved functional sites with annotations
5029     annotatedSites  Site-annot-set OPTIONAL
5030 }
5031 
5032 -- Envelope containing PSSM and the parameters used to create it. 
5033 -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
5034 PssmWithParameters ::= SEQUENCE {
5035 
5036     -- This field is applicable to PSI-BLAST and formatrpsdb.
5037     -- When both the intermediate and final PSSM data are provided in this
5038     -- field, the final data (matrix of scores and associated statistical
5039     -- parameters) takes precedence and that data is used for further
5040     -- processing. The rationale for this is that the PSSM's scores and
5041     -- statistical parameters might have been calculated by other applications
5042     -- and it might not be possible to recreate it by using PSI-BLAST's PSSM 
5043     -- engine.
5044         pssm        Pssm,
5045 
5046     -- This field's rpsdbparams is used to specify the values of options 
5047     -- for processing by formatrpsdb. If these are not set, the command 
5048     -- line defaults of formatrpsdb are applied. This field is used
5049     -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
5050     -- the PSSM is the same as the one being specified through the BLAST
5051     -- Options API. If this field is omitted, no verification will be
5052     -- performed, so be careful to keep track of what matrix was used to build
5053     -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
5054     params      PssmParameters OPTIONAL
5055 }
5056 
5057 END
5058 --$Revision: 1.142 $
5059 --**********************************************************************
5060 --
5061 --  NCBI ASN.1 macro editing language specifications
5062 --
5063 --  by Colleen Bollin, 2007
5064 --
5065 --**********************************************************************
5066 
5067 NCBI-Macro DEFINITIONS ::=
5068 BEGIN
5069 
5070 EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set;
5071 
5072 -- simple constraints --
5073 
5074 String-location ::= ENUMERATED {
5075     contains (1) ,
5076     equals (2) ,
5077     starts (3) ,
5078     ends (4) ,
5079     inlist (5) }
5080 
5081 Word-substitution ::= SEQUENCE {
5082     word VisibleString OPTIONAL ,
5083     synonyms SET OF VisibleString OPTIONAL ,
5084     case-sensitive BOOLEAN DEFAULT FALSE ,
5085     whole-word BOOLEAN DEFAULT FALSE }
5086 
5087 Word-substitution-set ::= SET OF Word-substitution
5088 
5089 String-constraint ::= SEQUENCE {
5090     match-text VisibleString OPTIONAL ,
5091     match-location String-location DEFAULT contains ,
5092     case-sensitive BOOLEAN DEFAULT FALSE ,
5093     ignore-space BOOLEAN DEFAULT FALSE ,
5094     ignore-punct BOOLEAN DEFAULT FALSE ,
5095     ignore-words Word-substitution-set OPTIONAL ,
5096     whole-word BOOLEAN DEFAULT FALSE ,
5097     not-present BOOLEAN DEFAULT FALSE ,
5098     is-all-caps BOOLEAN DEFAULT FALSE ,
5099     is-all-lower BOOLEAN DEFAULT FALSE ,
5100     is-all-punct BOOLEAN DEFAULT FALSE ,
5101     ignore-weasel BOOLEAN DEFAULT FALSE }
5102 
5103 String-constraint-set ::= SET OF String-constraint
5104 
5105 Strand-constraint ::= ENUMERATED {
5106     any (0) ,
5107     plus (1) ,
5108     minus (2) }
5109 
5110 Seqtype-constraint ::= ENUMERATED {
5111     any (0) ,
5112     nuc (1) ,
5113     prot (2) }
5114 
5115 Partial-constraint ::= ENUMERATED {
5116     either (0) ,
5117     partial (1) ,
5118     complete (2) }
5119 
5120 Location-type-constraint ::= ENUMERATED {
5121     any (0) ,
5122     single-interval (1) ,
5123     joined (2) ,
5124     ordered (3) }
5125 
5126 Location-pos-constraint ::= CHOICE {
5127     dist-from-end INTEGER ,
5128     max-dist-from-end INTEGER ,
5129     min-dist-from-end INTEGER }
5130 
5131 Location-constraint ::= SEQUENCE {
5132     strand Strand-constraint DEFAULT any ,
5133     seq-type Seqtype-constraint DEFAULT any ,
5134     partial5 Partial-constraint DEFAULT either ,
5135     partial3 Partial-constraint DEFAULT either ,
5136     location-type Location-type-constraint DEFAULT any ,
5137     end5 Location-pos-constraint OPTIONAL ,
5138     end3 Location-pos-constraint OPTIONAL }
5139 
5140 Object-type-constraint ::= ENUMERATED {
5141     any (0) ,
5142     feature (1) ,
5143     descriptor (2) }
5144 
5145 
5146 -- feature values --
5147 
5148 Macro-feature-type ::= ENUMERATED {
5149     any (0) ,
5150     gene (1) ,
5151     org (2) ,
5152     cds (3) ,
5153     prot (4) ,
5154     preRNA (5) ,
5155     mRNA (6) ,
5156     tRNA (7) ,
5157     rRNA (8) ,
5158     snRNA (9) ,
5159     scRNA (10) ,
5160     otherRNA (11) ,
5161     pub (12) ,
5162     seq (13) ,
5163     imp (14) ,
5164     allele (15) ,
5165     attenuator (16) ,
5166     c-region (17) ,
5167     caat-signal (18) ,
5168     imp-CDS (19) ,
5169     conflict (20) ,
5170     d-loop (21) ,
5171     d-segment (22) ,
5172     enhancer (23) ,
5173     exon (24) ,
5174     gC-signal (25) ,
5175     iDNA (26) ,
5176     intron (27) ,
5177     j-segment (28) ,
5178     ltr (29) ,
5179     mat-peptide (30) ,
5180     misc-binding (31) ,
5181     misc-difference (32) ,
5182     misc-feature (33) ,
5183     misc-recomb (34) ,
5184     misc-RNA (35) ,
5185     misc-signal (36) ,
5186     misc-structure (37) ,
5187     modified-base (38) ,
5188     mutation (39) ,
5189     n-region (40) ,
5190     old-sequence (41) ,
5191     polyA-signal (42) ,
5192     polyA-site (43) ,
5193     precursor-RNA (44) ,
5194     prim-transcript (45) ,
5195     primer-bind (46) ,
5196     promoter (47) ,
5197     protein-bind (48) ,
5198     rbs (49) ,
5199     repeat-region (50) ,
5200     rep-origin (51) ,
5201     s-region (52) ,
5202     sig-peptide (53) ,
5203     source (54) ,
5204     stem-loop (55) ,
5205     sts (56) ,
5206     tata-signal (57) ,
5207     terminator (58) ,
5208     transit-peptide (59) ,
5209     unsure (60) ,
5210     v-region (61) ,
5211     v-segment (62) ,
5212     variation (63) ,
5213     virion (64) ,
5214     n3clip (65) ,
5215     n3UTR (66) ,
5216     n5clip (67) ,
5217     n5UTR (68) ,
5218     n10-signal (69) ,
5219     n35-signal (70) ,
5220     site-ref (71) ,
5221     region (72) ,
5222     comment (73) ,
5223     bond (74) ,
5224     site (75) ,
5225     rsite (76) ,
5226     user (77) ,
5227     txinit (78) ,
5228     num (79) ,
5229     psec-str (80) ,
5230     non-std-residue (81) ,
5231     het (82) ,
5232     biosrc (83) ,
5233     preprotein (84) ,
5234     mat-peptide-aa (85) ,
5235     sig-peptide-aa (86) ,
5236     transit-peptide-aa (87) ,
5237     snoRNA (88) ,
5238     gap (89) ,
5239     operon (90) ,
5240     oriT (91) ,
5241     ncRNA (92) ,
5242     tmRNA (93) ,
5243     mobile-element (94) }
5244 
5245 Feat-qual-legal ::= ENUMERATED {
5246     allele (1) ,
5247     activity (2) ,
5248     anticodon (3) ,
5249     bound-moiety (4) ,
5250     chromosome (5),
5251     citation (6),
5252     codon (7) ,
5253     codon-start (8) ,
5254     codons-recognized (9) ,
5255     compare (10) ,
5256     cons-splice (11) ,
5257     db-xref (12) ,
5258     description (13) ,
5259     direction (14) ,
5260     ec-number (15) ,
5261     environmental-sample (16) ,
5262     evidence (17) ,
5263     exception (18) ,
5264     experiment (19) ,
5265     focus (20) ,
5266     frequency (21) ,
5267     function (22) ,
5268     gene (23) ,
5269     gene-description (24) ,
5270     inference (25) ,
5271     label (26) ,
5272     locus-tag (27) ,
5273     map (28) ,
5274     mobile-element (29) ,
5275     mod-base (30) ,
5276     mol-type (31) ,
5277     ncRNA-class (32) ,
5278     note (33) ,
5279     number (34) ,
5280     old-locus-tag (35) ,
5281     operon (36) ,
5282     organism (37) ,
5283     organelle (38) ,
5284     partial (39) ,
5285     phenotype (40) ,
5286     plasmid (41) ,
5287     product (42) ,
5288     protein-id (43) ,
5289     pseudo (44) ,
5290     rearranged (45) ,
5291     replace (46) ,
5292     rpt-family (47) ,
5293     rpt-type (48) ,
5294     rpt-unit (49) ,
5295     rpt-unit-seq (50) ,
5296     rpt-unit-range (51) ,
5297     segment (52) ,
5298     sequenced-mol (53) ,
5299     standard-name (54) ,
5300     synonym (55) ,
5301     transcript-id (56) ,
5302     transgenic (57) ,
5303     translation (58) ,
5304     transl-except (59) ,
5305     transl-table (60) ,
5306     usedin (61),
5307     mobile-element-type (62),
5308     mobile-element-name (63),
5309     gene-comment (64) ,
5310     satellite (65) ,
5311     satellite-type (66) ,
5312     satellite-name (67) ,
5313     location (68) ,
5314     tag-peptide (69) ,
5315     mobile-element-type-type (70) ,
5316     name (71) }
5317 
5318 Feat-qual-legal-val ::= SEQUENCE {
5319     qual Feat-qual-legal ,
5320     val  VisibleString }
5321 
5322 Feat-qual-legal-val-choice ::= CHOICE {
5323     qual Feat-qual-legal-val }
5324 
5325 Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice
5326 
5327 Feat-qual-choice ::= CHOICE {
5328     legal-qual Feat-qual-legal ,
5329     illegal-qual String-constraint }
5330 
5331 Feature-field ::= SEQUENCE {
5332     type Macro-feature-type ,
5333     field Feat-qual-choice }
5334 
5335 Feature-field-legal ::= SEQUENCE {
5336     type Macro-feature-type ,
5337     field Feat-qual-legal }
5338 
5339 Feature-field-pair ::= SEQUENCE {
5340     type Macro-feature-type ,
5341     field-from Feat-qual-choice ,
5342     field-to Feat-qual-choice }
5343 
5344 Rna-feat-type ::= CHOICE {
5345     any NULL ,
5346     preRNA NULL ,
5347     mRNA NULL ,
5348     tRNA NULL ,
5349     rRNA NULL ,
5350     ncRNA VisibleString ,
5351     tmRNA NULL,
5352     miscRNA NULL }
5353 
5354 Rna-field ::= ENUMERATED {
5355     product (1) ,
5356     comment (2) ,
5357     codons-recognized (3) ,
5358     ncrna-class (4) ,
5359     anticodon (5) ,
5360     transcript-id (6) ,
5361     gene-locus (7) ,
5362     gene-description (8) ,
5363     gene-maploc (9) ,
5364     gene-locus-tag (10) ,
5365     gene-synonym (11) ,
5366     gene-comment (12) ,
5367     tag-peptide (13) }
5368     
5369 
5370 Rna-qual ::= SEQUENCE {
5371     type Rna-feat-type ,
5372     field Rna-field }    
5373 
5374 Rna-qual-pair ::= SEQUENCE {
5375     type Rna-feat-type ,
5376     field-from Rna-field ,
5377     field-to Rna-field }
5378 
5379 Source-qual ::= ENUMERATED {
5380     acronym (1) ,
5381     anamorph (2) ,
5382     authority (3) ,
5383     bio-material (4) ,
5384     biotype (5) ,
5385     biovar (6) ,
5386     breed (7) ,
5387     cell-line (8) ,
5388     cell-type (9) ,
5389     chemovar (10) ,
5390     chromosome (11) ,
5391     clone (12) ,
5392     clone-lib (13) ,
5393     collected-by (14) ,
5394     collection-date (15) ,
5395     common (16) ,
5396     common-name (17) ,
5397     country (18) ,
5398     cultivar (19) ,
5399     culture-collection (20) ,
5400     dev-stage (21) ,
5401     division (22) ,
5402     dosage (23) ,
5403     ecotype (24) ,
5404     endogenous-virus-name (25) ,
5405     environmental-sample (26) ,
5406     forma (27) ,
5407     forma-specialis (28) ,
5408     frequency (29) ,
5409     fwd-primer-name (30) ,
5410     fwd-primer-seq (31) ,
5411     gb-acronym (32) ,
5412     gb-anamorph (33) ,
5413     gb-synonym (34) ,
5414     genotype (35) ,
5415     germline (36) ,
5416     group (37) ,
5417     haplotype (38) ,
5418     identified-by (39) ,
5419     insertion-seq-name (40) ,
5420     isolate (41) ,
5421     isolation-source (42) ,
5422     lab-host (43) ,
5423     lat-lon (44) ,
5424     lineage (45) ,
5425     map (46) ,
5426     metagenome-source (47) ,
5427     metagenomic (48) ,
5428     old-lineage (49) ,
5429     old-name (50) ,
5430     orgmod-note (51) ,
5431     nat-host (52) ,
5432     pathovar (53) ,
5433     plasmid-name (54) ,
5434     plastid-name (55) ,
5435     pop-variant (56) ,
5436     rearranged (57) ,
5437     rev-primer-name (58) ,
5438     rev-primer-seq (59) ,
5439     segment (60) ,
5440     serogroup (61) ,
5441     serotype (62) ,
5442     serovar (63) ,
5443     sex (64) ,
5444     specimen-voucher (65) ,
5445     strain (66) ,
5446     subclone (67) ,
5447     subgroup (68) ,
5448     subsource-note (69),
5449     sub-species (70) ,
5450     substrain (71) ,
5451     subtype (72) ,
5452     synonym (73) ,
5453     taxname (74) ,
5454     teleomorph (75) ,
5455     tissue-lib (76) ,
5456     tissue-type (77) ,
5457     transgenic (78) ,
5458     transposon-name (79) ,
5459     type (80) ,
5460     variety (81) ,
5461     specimen-voucher-INST (82) ,
5462     specimen-voucher-COLL (83) ,
5463     specimen-voucher-SpecID (84) ,
5464     culture-collection-INST (85) ,
5465     culture-collection-COLL (86) ,
5466     culture-collection-SpecID (87) ,
5467     bio-material-INST (88) ,
5468     bio-material-COLL (89) ,
5469     bio-material-SpecID (90),
5470     all-notes (91),
5471     mating-type (92),
5472     linkage-group (93) ,
5473     haplogroup (94),
5474     all-quals (95),
5475     dbxref (96) ,
5476     taxid (97)
5477 }
5478 
5479 Source-qual-pair ::= SEQUENCE {
5480     field-from Source-qual ,
5481     field-to Source-qual }
5482 
5483 Source-location ::= ENUMERATED {
5484     unknown (0) ,
5485     genomic (1) ,
5486     chloroplast (2) ,
5487     chromoplast (3) ,
5488     kinetoplast (4) ,
5489     mitochondrion (5) ,
5490     plastid (6) ,
5491     macronuclear (7) ,
5492     extrachrom (8) ,
5493     plasmid (9) ,
5494     transposon (10) ,
5495     insertion-seq (11) ,
5496     cyanelle (12) ,
5497     proviral (13) ,
5498     virion (14) ,
5499     nucleomorph (15) ,
5500     apicoplast (16) ,
5501     leucoplast (17) ,
5502     proplastid (18) ,
5503     endogenous-virus (19) ,
5504     hydrogenosome (20) ,
5505     chromosome (21) ,
5506     chromatophore (22) }
5507 
5508 Source-origin ::= ENUMERATED {
5509     unknown (0) ,
5510     natural (1) ,
5511     natmut (2) ,
5512     mut (3) ,
5513     artificial (4) ,
5514     synthetic (5) ,
5515     other (255) }
5516 
5517 Source-qual-choice ::= CHOICE {
5518     textqual Source-qual ,
5519     location Source-location,
5520     origin Source-origin ,
5521     gcode INTEGER  ,
5522     mgcode INTEGER  }
5523 
5524 Source-qual-text-val ::= SEQUENCE {
5525     srcqual Source-qual ,
5526     val VisibleString } 
5527     
5528 Source-qual-val-choice ::= CHOICE {
5529     textqual Source-qual-text-val ,
5530     location Source-location,
5531     origin Source-origin ,
5532     gcode INTEGER ,
5533     mgcode INTEGER }
5534 
5535 Source-qual-val-set ::= SET OF Source-qual-val-choice
5536 
5537 CDSGeneProt-field ::= ENUMERATED {
5538     cds-comment (1) ,
5539     gene-locus (2) ,
5540     gene-description (3) ,
5541     gene-comment (4) ,
5542     gene-allele (5) ,
5543     gene-maploc (6) ,
5544     gene-locus-tag (7) ,
5545     gene-synonym (8) ,
5546     gene-old-locus-tag (9) ,
5547     mrna-product (10) ,
5548     mrna-comment (11) ,
5549     prot-name (12) ,
5550     prot-description (13) ,
5551     prot-ec-number (14) ,
5552     prot-activity (15) ,
5553     prot-comment (16) ,
5554     mat-peptide-name (17) ,
5555     mat-peptide-description (18) ,
5556     mat-peptide-ec-number (19) ,
5557     mat-peptide-activity (20) ,
5558     mat-peptide-comment (21) ,
5559     cds-inference (22) ,
5560     gene-inference (23) ,
5561     codon-start (24) }
5562 
5563 CDSGeneProt-field-pair ::= SEQUENCE {
5564     field-from CDSGeneProt-field ,
5565     field-to CDSGeneProt-field }
5566 
5567 Molecule-type ::= ENUMERATED {
5568   unknown (0) ,
5569   genomic (1) ,
5570   precursor-RNA (2) ,
5571   mRNA (3) ,
5572   rRNA (4) ,
5573   tRNA (5) ,
5574   genomic-mRNA (6) ,
5575   cRNA (7) ,
5576   transcribed-RNA (8) ,
5577   ncRNA (9) ,
5578   transfer-messenger-RNA (10) ,
5579   macro-other (11) }
5580 
5581 Technique-type ::= ENUMERATED {
5582   unknown (0) , 
5583   standard (1) ,
5584   est (2) ,
5585   sts (3) ,
5586   survey (4) ,
5587   genetic-map (5) ,
5588   physical-map (6) ,
5589   derived (7) ,
5590   concept-trans (8) ,
5591   seq-pept (9) ,
5592   both (10) ,
5593   seq-pept-overlap (11) ,
5594   seq-pept-homol (12) , 
5595   concept-trans-a (13) ,
5596   htgs-1 (14) ,
5597   htgs-2 (15) ,
5598   htgs-3 (16) ,
5599   fli-cDNA (17) ,
5600   htgs-0 (18) ,
5601   htc (19) ,
5602   wgs (20) ,
5603   barcode (21) ,
5604   composite-wgs-htgs (22) ,
5605   tsa (23) ,
5606   other (24) }
5607 
5608 Completedness-type ::= ENUMERATED {
5609   unknown (0) ,
5610   complete (1) ,
5611   partial (2) ,
5612   no-left (3) ,
5613   no-right (4) ,
5614   no-ends (5) ,
5615   has-left (6) ,
5616   has-right (7) ,
5617   other (6) }
5618 
5619 Molecule-class-type ::= ENUMERATED {
5620   unknown (0) ,
5621   dna (1) ,
5622   rna (2) ,
5623   protein (3) ,
5624   nucleotide (4),
5625   other (5) }
5626 
5627 Topology-type ::= ENUMERATED {
5628   unknown (0) ,
5629   linear (1) ,
5630   circular (2) ,
5631   tandem (3) ,
5632   other (4) }
5633 
5634 Strand-type ::= ENUMERATED {
5635   unknown (0) ,
5636   single (1) ,
5637   double (2) ,
5638   mixed (3) ,
5639   mixed-rev (4) ,
5640   other (5) }
5641 
5642 Molinfo-field ::= CHOICE {
5643     molecule Molecule-type ,
5644     technique Technique-type ,
5645     completedness Completedness-type ,
5646     mol-class Molecule-class-type ,
5647     topology Topology-type ,
5648     strand Strand-type }
5649 
5650 Molinfo-molecule-pair ::= SEQUENCE {
5651     from Molecule-type ,
5652     to Molecule-type }
5653 
5654 Molinfo-technique-pair ::= SEQUENCE {
5655     from Technique-type ,
5656     to Technique-type }
5657 
5658 Molinfo-completedness-pair ::= SEQUENCE {
5659     from Completedness-type ,
5660     to Completedness-type }
5661 
5662 Molinfo-mol-class-pair ::= SEQUENCE {
5663     from Molecule-class-type ,
5664     to Molecule-class-type }
5665 
5666 Molinfo-topology-pair ::= SEQUENCE {
5667     from Topology-type ,
5668     to Topology-type }
5669 
5670 Molinfo-strand-pair ::= SEQUENCE {
5671     from Strand-type ,
5672     to Strand-type }
5673 
5674 Molinfo-field-pair ::= CHOICE {
5675     molecule Molinfo-molecule-pair ,
5676     technique Molinfo-technique-pair ,
5677     completedness Molinfo-completedness-pair ,
5678     mol-class Molinfo-mol-class-pair ,
5679     topology Molinfo-topology-pair ,
5680     strand Molinfo-strand-pair }
5681 
5682 Molinfo-field-list ::= SET OF Molinfo-field
5683 
5684 Molinfo-field-constraint ::= SEQUENCE {
5685     field Molinfo-field ,
5686     is-not BOOLEAN DEFAULT FALSE }
5687 
5688 -- publication fields --
5689 
5690 Publication-field ::=  ENUMERATED {
5691     cit (1) ,
5692     authors (2) ,
5693     journal (3) ,
5694     volume (4) ,
5695     issue (5) ,
5696     pages (6) ,
5697     date (7) ,
5698     serial-number (8) ,
5699     title (9) ,
5700     affiliation (10) ,
5701     affil-div (11) ,
5702     affil-city (12) ,
5703     affil-sub (13) ,
5704     affil-country (14) ,
5705     affil-street (15) ,
5706     affil-email (16) ,
5707     affil-fax (17) ,
5708     affil-phone (18) ,
5709     affil-zipcode (19),
5710     authors-initials (20)
5711     }
5712   
5713 -- structured comment fields --
5714 
5715 Structured-comment-field ::= CHOICE {
5716   database NULL ,
5717   named VisibleString ,
5718   field-name NULL
5719   }
5720 
5721 Structured-comment-field-pair ::= SEQUENCE {
5722   from Structured-comment-field ,
5723   to Structured-comment-field
5724   }
5725   
5726 -- misc fields --
5727 -- these would not appear in pairs --
5728 Misc-field ::= ENUMERATED {
5729     genome-project-id (1) ,
5730     comment-descriptor (2) ,
5731     defline (3) ,
5732     keyword (4)
5733     }
5734 
5735 -- dblink fields --
5736 DBLink-field-type ::= ENUMERATED {
5737   trace-assembly (1) ,
5738   bio-sample (2) ,
5739   probe-db (3) ,
5740   sequence-read-archve (4) ,
5741   bio-project (5) }
5742 
5743 DBLink-field-pair ::= SEQUENCE {
5744   from DBLink-field-type ,
5745   to DBLink-field-type
5746   }
5747      
5748 -- complex constraints --
5749 
5750 Pub-type ::= ENUMERATED {
5751   any (0) ,
5752   published (1) ,
5753   unpublished (2) ,
5754   in-press (3) ,
5755   submitter-block (4) }
5756 
5757 Pub-field-constraint ::= SEQUENCE {
5758   field Publication-field ,
5759   constraint String-constraint }
5760 
5761 Pub-field-special-constraint-type ::= CHOICE {
5762   is-present NULL ,
5763   is-not-present NULL ,
5764   is-all-caps NULL ,
5765   is-all-lower NULL ,
5766   is-all-punct NULL }
5767 
5768 Pub-field-special-constraint ::= SEQUENCE {
5769   field Publication-field ,
5770   constraint Pub-field-special-constraint-type }
5771   
5772 Publication-constraint ::= SEQUENCE {
5773   type Pub-type ,
5774   field Pub-field-constraint OPTIONAL ,
5775   special-field Pub-field-special-constraint OPTIONAL }
5776 
5777 Source-constraint ::= SEQUENCE {
5778   field1 Source-qual-choice OPTIONAL ,
5779   field2 Source-qual-choice OPTIONAL ,
5780   constraint String-constraint OPTIONAL ,
5781   type-constraint Object-type-constraint OPTIONAL }
5782 
5783 CDSGeneProt-feature-type-constraint ::= ENUMERATED {
5784     gene (1) ,
5785     mRNA (2) ,
5786     cds (3) ,
5787     prot (4) ,
5788     exon (5) ,
5789     mat-peptide (6) }
5790 
5791 CDSGeneProt-pseudo-constraint ::= SEQUENCE {
5792     feature CDSGeneProt-feature-type-constraint ,
5793     is-pseudo BOOLEAN DEFAULT TRUE }
5794 
5795 CDSGeneProt-constraint-field ::= CHOICE {
5796   field CDSGeneProt-field }
5797 
5798 CDSGeneProt-qual-constraint ::= SEQUENCE {
5799   field1 CDSGeneProt-constraint-field OPTIONAL ,
5800   field2 CDSGeneProt-constraint-field OPTIONAL ,
5801   constraint String-constraint OPTIONAL }
5802 
5803 Field-constraint ::= SEQUENCE {
5804   field Field-type ,
5805   string-constraint String-constraint }
5806 
5807 Sequence-constraint-rnamol ::= ENUMERATED {
5808   any (0) ,
5809   genomic (1) ,
5810   precursor-RNA (2) ,
5811   mRNA (3) ,
5812   rRNA (4) ,
5813   tRNA (5) ,
5814   genomic-mRNA (6) ,
5815   cRNA (7) ,
5816   transcribed-RNA (8) ,
5817   ncRNA (9) ,
5818   transfer-messenger-RNA (10) }
5819 
5820 Sequence-constraint-mol-type-constraint ::= CHOICE {
5821   any NULL ,
5822   nucleotide NULL ,
5823   dna NULL ,
5824   rna Sequence-constraint-rnamol ,
5825   protein NULL }
5826 
5827 Quantity-constraint ::= CHOICE {
5828   equals INTEGER ,
5829   greater-than INTEGER ,
5830   less-than INTEGER }
5831 
5832 Feature-strandedness-constraint ::= ENUMERATED {
5833   any (0) ,
5834   minus-only (1) ,
5835   plus-only (2) ,
5836   at-least-one-minus (3) ,
5837   at-least-one-plus (4) ,
5838   no-minus (5) ,
5839   no-plus (6) }
5840 
5841 Sequence-constraint ::= SEQUENCE {
5842     seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
5843     id String-constraint OPTIONAL ,
5844     feature Macro-feature-type ,
5845     num-type-features Quantity-constraint OPTIONAL ,
5846     num-features Quantity-constraint OPTIONAL ,
5847     length Quantity-constraint OPTIONAL ,
5848     strandedness Feature-strandedness-constraint DEFAULT any }
5849 
5850 Match-type-constraint ::= ENUMERATED {
5851   dont-care (0) ,
5852   yes (1) ,
5853   no (2) }
5854 
5855 Translation-constraint ::= SEQUENCE {
5856   actual-strings String-constraint-set ,
5857   transl-strings String-constraint-set ,
5858   internal-stops Match-type-constraint DEFAULT dont-care ,
5859   num-mismatches Quantity-constraint OPTIONAL }
5860 
5861 Constraint-choice ::= CHOICE {
5862     string String-constraint ,
5863     location Location-constraint ,
5864     field  Field-constraint ,
5865     source Source-constraint ,
5866     cdsgeneprot-qual CDSGeneProt-qual-constraint ,
5867     cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
5868     sequence Sequence-constraint ,
5869     pub Publication-constraint ,
5870     molinfo Molinfo-field-constraint ,
5871     field-missing Field-type ,
5872     translation Translation-constraint }
5873 
5874 Constraint-choice-set ::= SET OF Constraint-choice
5875 
5876 Text-marker ::= CHOICE {
5877     free-text VisibleString ,
5878     digits NULL ,
5879     letters NULL }
5880 
5881 Text-portion ::= SEQUENCE {
5882     left-marker Text-marker  OPTIONAL ,
5883     include-left BOOLEAN ,
5884     right-marker Text-marker  OPTIONAL ,
5885     include-right BOOLEAN ,
5886     inside BOOLEAN ,
5887     case-sensitive BOOLEAN DEFAULT FALSE ,
5888     whole-word BOOLEAN DEFAULT FALSE }
5889 
5890 Field-edit-location ::= ENUMERATED {
5891     anywhere (0) ,
5892     beginning (1) ,
5893     end (2) }
5894 
5895 Field-edit ::= SEQUENCE {
5896     find-txt VisibleString ,
5897     repl-txt VisibleString OPTIONAL ,
5898     location Field-edit-location DEFAULT anywhere ,
5899     case-insensitive BOOLEAN DEFAULT FALSE }
5900 
5901 Field-type ::= CHOICE {
5902     source-qual Source-qual-choice ,
5903     feature-field Feature-field ,
5904     rna-field Rna-qual ,
5905     cds-gene-prot CDSGeneProt-field ,
5906     molinfo-field Molinfo-field ,
5907     pub Publication-field ,
5908     struc-comment-field Structured-comment-field ,
5909     misc Misc-field ,
5910     dblink DBLink-field-type }
5911 
5912 Field-pair-type ::= CHOICE {
5913     source-qual Source-qual-pair ,
5914     feature-field Feature-field-pair ,
5915     rna-field Rna-qual-pair ,
5916     cds-gene-prot CDSGeneProt-field-pair ,
5917     molinfo-field Molinfo-field-pair ,
5918     struc-comment-field Structured-comment-field-pair ,
5919     dblink DBLink-field-pair} 
5920 
5921 ExistingTextOption ::= ENUMERATED {
5922   replace-old (1) ,
5923   append-semi (2) ,
5924   append-space (3) ,
5925   append-colon (4) ,
5926   append-comma (5) ,
5927   append-none (6) ,
5928   prefix-semi (7) ,
5929   prefix-space (8) ,
5930   prefix-colon (9) ,
5931   prefix-comma (10) ,
5932   prefix-none (11) ,
5933   leave-old (12) ,
5934   add-qual (13) }
5935 
5936 
5937 Apply-action ::= SEQUENCE {
5938     field Field-type ,
5939     value VisibleString ,
5940     existing-text ExistingTextOption }
5941 
5942 Edit-action ::= SEQUENCE {
5943     edit Field-edit ,
5944     field Field-type }
5945 
5946 Cap-change ::= ENUMERATED {
5947     none (0) ,
5948     tolower (1) ,
5949     toupper (2) ,
5950     firstcap (3) ,
5951     firstcaprestnochange (4) ,
5952     firstlower-restnochange (5) ,
5953     cap-word-space (6) ,
5954     cap-word-space-punc (7)
5955     }
5956 
5957 Text-transform ::= CHOICE {
5958   edit Field-edit ,
5959   caps Cap-change ,
5960   remove Text-portion }
5961 
5962 Text-transform-set ::= SET OF Text-transform
5963 
5964 Convert-action ::= SEQUENCE {
5965     fields Field-pair-type ,
5966     strip-name BOOLEAN DEFAULT FALSE ,
5967     keep-original BOOLEAN DEFAULT FALSE ,
5968     capitalization Cap-change DEFAULT none ,
5969     existing-text ExistingTextOption }
5970 
5971 Copy-action ::= SEQUENCE {
5972     fields Field-pair-type ,
5973     existing-text ExistingTextOption }
5974 
5975 Swap-action ::= SEQUENCE {
5976     fields Field-pair-type ,
5977     field-to Field-type }
5978 
5979 AECRParse-action ::= SEQUENCE {
5980     portion Text-portion ,
5981     fields Field-pair-type ,
5982     remove-from-parsed BOOLEAN DEFAULT FALSE ,
5983     remove-left BOOLEAN DEFAULT FALSE ,
5984     remove-right BOOLEAN DEFAULT FALSE ,
5985     transform Text-transform-set OPTIONAL ,
5986     existing-text ExistingTextOption }
5987 
5988 Remove-action ::= SEQUENCE {
5989     field Field-type }
5990 
5991 Remove-outside-action ::= SEQUENCE {
5992     portion Text-portion ,
5993     field Field-type ,
5994     remove-if-not-found BOOLEAN DEFAULT FALSE }
5995 
5996 Action-choice ::= CHOICE {
5997     apply Apply-action ,
5998     edit Edit-action ,
5999     convert Convert-action ,
6000     copy Copy-action ,
6001     swap Swap-action ,
6002     remove Remove-action ,
6003     parse AECRParse-action ,
6004     remove-outside Remove-outside-action }
6005 
6006 AECR-action ::= SEQUENCE {
6007     action Action-choice ,
6008     also-change-mrna BOOLEAN DEFAULT FALSE ,
6009     constraint Constraint-choice-set OPTIONAL }
6010 
6011 Parse-src-org-choice ::= CHOICE {
6012     source-qual Source-qual ,
6013     taxname-after-binomial NULL }
6014 
6015 Parse-src-org ::= SEQUENCE {
6016     field Parse-src-org-choice ,
6017     type Object-type-constraint DEFAULT any }
6018 
6019 -- For Parse-src-general-id tag, specify the db of the id from which you
6020 -- want to retrieve the tag.  If empty or null, any db will do.
6021 Parse-src-general-id ::= CHOICE {
6022     whole-text NULL ,
6023     db NULL ,
6024     tag VisibleString }
6025 
6026 Parse-src ::= CHOICE {
6027     defline NULL ,
6028     flatfile NULL ,
6029     local-id NULL ,
6030     org Parse-src-org ,
6031     comment NULL ,
6032     bankit-comment NULL ,
6033     structured-comment VisibleString ,
6034     file-id NULL ,
6035     general-id Parse-src-general-id }
6036 
6037 Parse-dst-org ::= SEQUENCE {
6038     field Source-qual-choice ,
6039     type Object-type-constraint DEFAULT any }
6040 
6041 Parse-dest ::= CHOICE {
6042     defline NULL ,
6043     org Parse-dst-org ,
6044     featqual Feature-field-legal ,
6045     comment-descriptor NULL ,
6046     dbxref VisibleString }
6047 
6048 Parse-action ::= SEQUENCE {
6049     portion Text-portion ,
6050     src Parse-src ,
6051     dest Parse-dest ,
6052     capitalization Cap-change DEFAULT none ,
6053     remove-from-parsed BOOLEAN DEFAULT FALSE ,
6054     transform Text-transform-set OPTIONAL ,
6055     existing-text ExistingTextOption }
6056 
6057 
6058 Location-interval ::= SEQUENCE {
6059     from INTEGER ,
6060     to INTEGER  }
6061 
6062 Location-choice ::= CHOICE {
6063     interval Location-interval ,
6064     whole-sequence NULL ,
6065     point INTEGER }
6066 
6067 Sequence-list ::= SET OF VisibleString
6068 Sequence-list-choice ::= CHOICE {
6069     list Sequence-list ,
6070     all NULL }
6071     
6072 Apply-feature-action ::= SEQUENCE {
6073     type Macro-feature-type ,
6074     partial5 BOOLEAN DEFAULT FALSE ,
6075     partial3 BOOLEAN DEFAULT FALSE ,
6076     plus-strand BOOLEAN DEFAULT TRUE ,
6077     location Location-choice ,
6078     seq-list Sequence-list-choice ,
6079     add-redundant BOOLEAN DEFAULT TRUE ,
6080     add-mrna BOOLEAN DEFAULT FALSE ,
6081     apply-to-parts BOOLEAN DEFAULT FALSE ,
6082     only-seg-num INTEGER DEFAULT -1 ,
6083     fields Feat-qual-legal-set OPTIONAL,
6084     src-fields Source-qual-val-set OPTIONAL }
6085 
6086 Remove-feature-action ::= SEQUENCE {
6087     type Macro-feature-type ,
6088     constraint Constraint-choice-set OPTIONAL }
6089 
6090 -- for convert features --
6091 Convert-from-CDS-options ::= SEQUENCE {
6092   remove-mRNA BOOLEAN ,
6093   remove-gene BOOLEAN ,
6094   remove-transcript-id BOOLEAN }
6095 
6096 Convert-feature-src-options ::= CHOICE { 
6097   cds Convert-from-CDS-options }
6098 
6099 Bond-type ::= ENUMERATED {
6100   disulfide (1) ,
6101   thioester (2) ,
6102   crosslink (3) ,
6103   thioether (4) ,
6104   other (5) }
6105 
6106 
6107 Site-type ::= ENUMERATED {
6108   active (1) ,
6109   binding (2) ,
6110   cleavage (3) ,
6111   inhibit (4) ,
6112   modified (5) ,
6113   glycosylation (6) ,
6114   myristoylation (7) ,
6115   mutagenized (8) ,
6116   metal-binding (9) ,
6117   phosphorylation (10) ,
6118   acetylation (11) ,
6119   amidation (12) ,
6120   methylation (13) ,
6121   hydroxylation (14) ,
6122   sulfatation (15) ,
6123   oxidative-deamination (16) ,
6124   pyrrolidone-carboxylic-acid (17) ,
6125   gamma-carboxyglutamic-acid (18) ,
6126   blocked (19) ,
6127   lipid-binding (20) ,
6128   np-binding (21) ,
6129   dna-binding (22) ,
6130   signal-peptide (23) ,
6131   transit-peptide (24) ,
6132   transmembrane-region (25) ,
6133   nitrosylation (26) ,
6134   other (27) }
6135 
6136 -- other choice is to create protein sequences, skipping bad --
6137 Region-type ::= SEQUENCE {
6138   create-nucleotide BOOLEAN } 
6139 
6140 Convert-feature-dst-options ::= CHOICE {
6141   bond Bond-type ,
6142   site Site-type ,
6143   region Region-type ,
6144   ncrna-class VisibleString ,
6145   remove-original BOOLEAN }
6146 
6147 
6148 Convert-feature-action ::= SEQUENCE {
6149   type-from Macro-feature-type ,
6150   type-to Macro-feature-type ,
6151   src-options Convert-feature-src-options OPTIONAL ,
6152   dst-options Convert-feature-dst-options OPTIONAL ,
6153   leave-original BOOLEAN ,
6154   src-feat-constraint Constraint-choice-set OPTIONAL } 
6155 
6156 
6157 Feature-location-strand-from ::= ENUMERATED {
6158   any (0) ,
6159   plus (1) ,
6160   minus (2) ,
6161   unknown (3) ,
6162   both (4) }
6163 
6164 Feature-location-strand-to ::= ENUMERATED {
6165   plus (1) ,
6166   minus (2) ,
6167   unknown (3) ,
6168   both (4) ,
6169   reverse (5) }
6170 
6171 Edit-location-strand ::= SEQUENCE {
6172   strand-from Feature-location-strand-from ,
6173   strand-to   Feature-location-strand-to } 
6174 
6175 Partial-5-set-constraint ::= ENUMERATED {
6176   all (0) ,
6177   at-end (1) ,
6178   bad-start (2) ,
6179   frame-not-one (3) }
6180 
6181 Partial-5-set-action ::= SEQUENCE {
6182   constraint Partial-5-set-constraint ,
6183   extend BOOLEAN }
6184 
6185 Partial-5-clear-constraint ::= ENUMERATED {
6186   all (0) ,
6187   not-at-end (1) ,
6188   good-start (2) }
6189 
6190 Partial-3-set-constraint ::= ENUMERATED {
6191   all (0) ,
6192   at-end (1) ,
6193   bad-end (2) }
6194 
6195 Partial-3-set-action ::= SEQUENCE {
6196   constraint Partial-3-set-constraint ,
6197   extend BOOLEAN }
6198 
6199 Partial-3-clear-constraint ::= ENUMERATED {
6200   all (0) ,
6201   not-at-end (1) ,
6202   good-end (2) }
6203 
6204 Partial-both-set-constraint ::= ENUMERATED {
6205   all (0) ,
6206   at-end (1) }
6207 
6208 Partial-both-set-action ::= SEQUENCE {
6209   constraint Partial-both-set-constraint ,
6210   extend BOOLEAN }
6211 
6212 Partial-both-clear-constraint ::= ENUMERATED {
6213   all (0) ,
6214   not-at-end (1) }
6215   
6216 Convert-location-type ::= ENUMERATED {
6217   join (1) ,
6218   order (2) ,
6219   merge (3) }
6220 
6221 Extend-to-feature ::= SEQUENCE {
6222   type Macro-feature-type ,
6223   include-feat BOOLEAN ,
6224   distance Quantity-constraint OPTIONAL }
6225   
6226 Location-edit-type ::= CHOICE {
6227   strand Edit-location-strand ,
6228   set-5-partial Partial-5-set-action ,
6229   clear-5-partial Partial-5-clear-constraint ,
6230   set-3-partial Partial-3-set-action ,
6231   clear-3-partial Partial-3-clear-constraint ,
6232   set-both-partial Partial-both-set-action ,
6233   clear-both-partial Partial-both-clear-constraint ,
6234   convert Convert-location-type ,
6235   extend-5 NULL ,
6236   extend-3 NULL ,
6237   extend-5-to-feat Extend-to-feature ,
6238   extend-3-to-feat Extend-to-feature }
6239 
6240 Edit-feature-location-action ::= SEQUENCE {
6241   type Macro-feature-type ,
6242   action Location-edit-type ,
6243   retranslate-cds BOOLEAN OPTIONAL ,
6244   also-edit-gene BOOLEAN OPTIONAL ,
6245   constraint Constraint-choice-set OPTIONAL }
6246 
6247 Molinfo-block ::= SEQUENCE {
6248     to-list Molinfo-field-list  ,
6249     from-list Molinfo-field-list OPTIONAL ,
6250     constraint Constraint-choice-set OPTIONAL }
6251 
6252 Descriptor-type ::= ENUMERATED {
6253   all (0) ,
6254   title (1) ,
6255   source (2) ,
6256   publication (3) ,
6257   comment (4) ,
6258   genbank (5) ,
6259   user (6) ,
6260   create-date (7) ,
6261   update-date (8) ,
6262   mol-info (9) ,
6263   structured-comment (10) ,
6264   genome-project-id (11) }
6265 
6266 Remove-descriptor-action ::= SEQUENCE {
6267   type Descriptor-type ,
6268   constraint Constraint-choice-set OPTIONAL }  
6269 
6270 Autodef-list-type ::= ENUMERATED {
6271   feature-list (1) ,
6272   complete-sequence (2) ,
6273   complete-genome (3) ,
6274   sequence (4) }
6275   
6276 Autodef-misc-feat-parse-rule ::= ENUMERATED {
6277   use-comment-before-first-semicolon (1) ,
6278   look-for-noncoding-products (2) }
6279 
6280 Autodef-action ::= SEQUENCE {
6281   modifiers SET OF Source-qual OPTIONAL ,
6282   clause-list-type Autodef-list-type ,
6283   misc-feat-parse-rule Autodef-misc-feat-parse-rule DEFAULT look-for-noncoding-products }
6284 
6285 Fix-pub-caps-action ::= SEQUENCE {
6286   title BOOLEAN OPTIONAL ,
6287   authors BOOLEAN OPTIONAL ,
6288   affiliation BOOLEAN OPTIONAL ,
6289   affil-country BOOLEAN OPTIONAL ,
6290   punct-only BOOLEAN DEFAULT FALSE ,
6291   constraint Constraint-choice-set OPTIONAL }
6292 
6293 Sort-order ::= ENUMERATED {
6294   short-to-long (1) ,
6295   long-to-short (2) ,
6296   alphabetical (3) }
6297 
6298 Sort-fields-action ::= SEQUENCE {
6299   field Field-type ,
6300   order Sort-order ,
6301   constraint Constraint-choice-set OPTIONAL }
6302   
6303 Fix-author-caps ::= SEQUENCE {
6304   last-name-only BOOLEAN }
6305 
6306 Fix-caps-action ::= CHOICE {
6307   pub Fix-pub-caps-action ,
6308   src-country NULL ,
6309   mouse-strain NULL ,
6310   src-qual Source-qual ,
6311   author Fix-author-caps }
6312 
6313 Fix-format-action ::= CHOICE {
6314   collection-date NULL ,
6315   lat-lon NULL ,
6316   primers NULL ,
6317   protein-name NULL }
6318 
6319 Remove-duplicate-feature-action ::= SEQUENCE {
6320   type Macro-feature-type ,
6321   ignore-partials BOOLEAN ,
6322   case-sensitive BOOLEAN ,
6323   remove-proteins BOOLEAN ,
6324   rd-constraint Constraint-choice-set OPTIONAL }
6325 
6326 Gene-xref-suppression-type ::= ENUMERATED {
6327   any (0) ,
6328   suppressing (1) ,
6329   non-suppressing (2) }
6330 
6331 Gene-xref-necessary-type ::= ENUMERATED {
6332   any (0) ,
6333   necessary (1) ,
6334   unnecessary (2) }
6335 
6336 Gene-xref-type ::= SEQUENCE {
6337   feature Macro-feature-type ,
6338   suppression Gene-xref-suppression-type ,
6339   necessary Gene-xref-necessary-type }
6340   
6341 Xref-type ::= CHOICE {
6342   gene Gene-xref-type }
6343 
6344 Remove-xrefs-action ::= SEQUENCE {
6345   xref-type Xref-type ,
6346   constraint Constraint-choice-set OPTIONAL } 
6347 
6348 Make-gene-xref-action ::= SEQUENCE {
6349   feature Macro-feature-type ,
6350   constraint Constraint-choice-set OPTIONAL } 
6351 
6352 Author-fix-type ::= ENUMERATED {
6353   truncate-middle-initials (1) ,
6354   strip-suffix (2) ,
6355   move-middle-to-first (3) }
6356 
6357 Author-fix-action ::= SEQUENCE {
6358   fix-type Author-fix-type ,
6359   constraint Constraint-choice-set OPTIONAL }  
6360 
6361 Update-sequences-action ::= SEQUENCE {
6362   filename VisibleString ,
6363   add-cit-subs BOOLEAN DEFAULT FALSE }
6364 
6365 Create-TSA-ids-src ::= CHOICE {
6366   local-id NULL ,
6367   defline Text-portion
6368 }
6369 
6370 Create-TSA-ids-action ::= SEQUENCE {
6371   src Create-TSA-ids-src ,
6372   suffix VisibleString OPTIONAL ,
6373   id-text-portion Text-portion OPTIONAL }  
6374 
6375 Autofix-action ::= SEQUENCE {
6376   test-name VisibleString }
6377 
6378 Fix-sets-action ::= CHOICE {
6379   remove-single-item-set NULL ,
6380   renormalize-nuc-prot-sets NULL ,
6381   fix-pop-to-phy NULL
6382 }
6383 
6384 Table-match-type ::= CHOICE {
6385   feature-id NULL ,
6386   gene-locus-tag NULL ,
6387   protein-id NULL,
6388   dbxref NULL ,
6389   nuc-id NULL ,
6390   src-qual Source-qual-choice ,
6391   protein-name NULL ,
6392   any NULL
6393 }
6394 
6395 Table-match ::= SEQUENCE {
6396   match-type Table-match-type ,
6397   match-location String-location DEFAULT equals
6398 }
6399 
6400 
6401 Apply-table-extra-data ::= CHOICE {
6402   table NULL }
6403 
6404 Apply-table-action ::= SEQUENCE {
6405   filename VisibleString ,
6406   match-type Table-match ,
6407   in-memory-table Apply-table-extra-data OPTIONAL
6408 }
6409 
6410 Add-file-action ::= SEQUENCE {
6411   filename VisibleString ,
6412   in-memory-table Apply-table-extra-data OPTIONAL
6413 } 
6414 
6415 Add-descriptor-list-action ::= SEQUENCE {
6416   descriptor-list Add-file-action ,
6417   constraint Constraint-choice-set OPTIONAL
6418 }
6419 
6420 Remove-sequences-action ::= SEQUENCE {
6421   constraint Constraint-choice-set
6422 }
6423 
6424 Macro-action-choice ::= CHOICE {
6425   aecr AECR-action ,
6426   parse Parse-action ,
6427   add-feature Apply-feature-action ,
6428   remove-feature Remove-feature-action ,
6429   convert-feature Convert-feature-action ,
6430   edit-location Edit-feature-location-action ,
6431   remove-descriptor Remove-descriptor-action ,
6432   autodef Autodef-action ,
6433   removesets NULL ,
6434   trim-junk-from-primer-seq NULL ,
6435   trim-stop-from-complete-cds NULL ,
6436   fix-usa-and-states NULL ,
6437   synchronize-cds-partials NULL ,
6438   adjust-for-consensus-splice NULL ,
6439   fix-pub-caps Fix-pub-caps-action ,
6440   remove-seg-gaps NULL ,
6441   sort-fields Sort-fields-action ,
6442   apply-molinfo-block Molinfo-block ,
6443   fix-caps Fix-caps-action ,
6444   fix-format Fix-format-action ,
6445   fix-spell NULL ,
6446   remove-duplicate-features Remove-duplicate-feature-action ,
6447   remove-lineage-notes NULL ,
6448   remove-xrefs Remove-xrefs-action ,
6449   make-gene-xrefs Make-gene-xref-action ,
6450   make-bold-xrefs NULL ,
6451   fix-author Author-fix-action ,
6452   update-sequences Update-sequences-action ,
6453   add-trans-splicing NULL ,
6454   remove-invalid-ecnumbers NULL ,
6455   create-tsa-ids Create-TSA-ids-action ,
6456   perform-autofix Autofix-action ,
6457   fix-sets Fix-sets-action ,
6458   apply-table Apply-table-action ,
6459   remove-sequences Remove-sequences-action ,
6460   propagate-sequence-technology NULL ,
6461   add-file-descriptors Add-descriptor-list-action ,
6462   propagate-missing-old-name NULL }
6463 
6464 
6465 Macro-action-list ::= SET OF Macro-action-choice
6466 
6467 
6468 Search-func ::= CHOICE {
6469   string-constraint String-constraint ,
6470   contains-plural NULL ,
6471   n-or-more-brackets-or-parentheses INTEGER ,
6472   three-numbers NULL ,
6473   underscore NULL ,
6474   prefix-and-numbers VisibleString ,
6475   all-caps NULL ,
6476   unbalanced-paren NULL ,
6477   too-long INTEGER ,
6478   has-term VisibleString }
6479 
6480 Simple-replace ::= SEQUENCE {
6481   replace VisibleString OPTIONAL,
6482   whole-string BOOLEAN DEFAULT FALSE ,
6483   weasel-to-putative BOOLEAN DEFAULT FALSE }
6484 
6485 Replace-func ::= CHOICE {
6486   simple-replace Simple-replace ,
6487   haem-replace VisibleString }
6488 
6489 Replace-rule ::= SEQUENCE {
6490   replace-func Replace-func ,
6491   move-to-note BOOLEAN DEFAULT FALSE }
6492 
6493 Fix-type ::= ENUMERATED {
6494   none (0) ,
6495   typo (1) ,
6496   putative-typo (2) ,
6497   quickfix (3) ,
6498   no-organelle-for-prokaryote (4),
6499   might-be-nonfunctional (5),
6500   database (6),
6501   remove-organism-name (7),
6502   inappropriate-symbol (8),
6503   evolutionary-relationship (9),
6504   use-protein (10),
6505   hypothetical (11),
6506   british (12),
6507   description (13),
6508   gene (14) }  
6509 
6510 Suspect-rule ::= SEQUENCE {
6511   find Search-func ,
6512   except Search-func OPTIONAL ,
6513   feat-constraint Constraint-choice-set OPTIONAL ,
6514   rule-type Fix-type DEFAULT none ,
6515   replace Replace-rule OPTIONAL ,
6516   description VisibleString OPTIONAL }  
6517 
6518 Suspect-rule-set ::= SET OF Suspect-rule
6519 
6520 
6521 
6522 END

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.