NCBI C Toolkit Cross Reference

C/asn/asn.all


  1 --$Revision: 6.0 $
  2 --**********************************************************************
  3 --
  4 --  asn.all
  5 --  this file contains all NCBI ASN.1 specifications together
  6 --
  7 --  by James Ostell, 1990
  8 --
  9 --**********************************************************************
 10 
 11 --$Revision: 6.5 $
 12 --**********************************************************************
 13 --
 14 --  NCBI General Data elements
 15 --  by James Ostell, 1990
 16 --  Version 3.0 - June 1994
 17 --
 18 --**********************************************************************
 19 
 20 NCBI-General DEFINITIONS ::=
 21 BEGIN
 22 
 23 EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;
 24 
 25 -- StringStore is really a VisibleString.  It is used to define very
 26 --   long strings which may need to be stored by the receiving program
 27 --   in special structures, such as a ByteStore, but it's just a hint.
 28 --   AsnTool stores StringStores in ByteStore structures.
 29 -- OCTET STRINGs are also stored in ByteStores by AsnTool
 30 -- 
 31 -- typedef struct bsunit {             /* for building multiline strings */
 32    -- Nlm_Handle str;            /* the string piece */
 33    -- Nlm_Int2 len_avail,
 34        -- len;
 35    -- struct bsunit PNTR next; }       /* the next one */
 36 -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
 37 -- 
 38 -- typedef struct bytestore {
 39    -- Nlm_Int4 seekptr,       /* current position */
 40       -- totlen,             /* total stored data length in bytes */
 41       -- chain_offset;       /* offset in ByteStore of first byte in curchain */
 42    -- Nlm_BSUnitPtr chain,       /* chain of elements */
 43       -- curchain;           /* the BSUnit containing seekptr */
 44 -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
 45 --
 46 -- AsnTool incorporates this as a primitive type, so the definition
 47 --   is here just for completeness
 48 -- 
 49 --  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
 50 --
 51 
 52 -- BigInt is really an INTEGER. It is used to warn the receiving code to expect
 53 --   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
 54 --
 55 --   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
 56 --   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
 57 --
 58 
 59 -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
 60 --  of ASN.1
 61 --  It stores only a date
 62 --
 63 
 64 Date ::= CHOICE {
 65     str VisibleString ,        -- for those unparsed dates
 66     std Date-std }             -- use this if you can
 67 
 68 Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
 69     year INTEGER ,             -- full year (including 1900)
 70     month INTEGER OPTIONAL ,   -- month (1-12)
 71     day INTEGER OPTIONAL ,     -- day of month (1-31)
 72     season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
 73     hour INTEGER OPTIONAL ,    -- hour of day (0-23)
 74     minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
 75     second INTEGER OPTIONAL }  -- second of minute (0-59)
 76 
 77 -- Dbtag is generalized for tagging
 78 -- eg. { "Social Security", str "023-79-8841" }
 79 -- or  { "member", id 8882224 }
 80 
 81 Dbtag ::= SEQUENCE {
 82     db VisibleString ,          -- name of database or system
 83     tag Object-id }         -- appropriate tag
 84 
 85 -- Object-id can tag or name anything
 86 --
 87 
 88 Object-id ::= CHOICE {
 89     id INTEGER ,
 90     str VisibleString }
 91 
 92 -- Person-id is to define a std element for people
 93 --
 94 
 95 Person-id ::= CHOICE {
 96     dbtag Dbtag ,               -- any defined database tag
 97     name Name-std ,             -- structured name
 98     ml VisibleString ,          -- MEDLINE name (semi-structured)
 99                                 --    eg. "Jones RM"
100     str VisibleString,          -- unstructured name
101     consortium VisibleString }  -- consortium name
102 
103 Name-std ::= SEQUENCE { -- Structured names
104     last VisibleString ,
105     first VisibleString OPTIONAL ,
106     middle VisibleString OPTIONAL ,
107     full VisibleString OPTIONAL ,    -- full name eg. "J. John Smith, Esq"
108     initials VisibleString OPTIONAL,  -- first + middle initials
109     suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
110     title VisibleString OPTIONAL }    -- Dr., Sister, etc
111 
112 --**** Int-fuzz **********************************************
113 --*
114 --*   uncertainties in integer values
115 
116 Int-fuzz ::= CHOICE {
117     p-m INTEGER ,                    -- plus or minus fixed amount
118     range SEQUENCE {                 -- max to min
119         max INTEGER ,
120         min INTEGER } ,
121     pct INTEGER ,                    -- % plus or minus (x10) 0-1000
122     lim ENUMERATED {                 -- some limit value
123         unk (0) ,                    -- unknown
124         gt (1) ,                     -- greater than
125         lt (2) ,                     -- less than
126         tr (3) ,                     -- space to right of position
127         tl (4) ,                     -- space to left of position
128         circle (5) ,                 -- artificial break at origin of circle
129         other (255) } ,              -- something else
130     alt SET OF INTEGER }             -- set of alternatives for the integer
131 
132 
133 --**** User-object **********************************************
134 --*
135 --*   a general object for a user defined structured data item
136 --*    used by Seq-feat and Seq-descr
137 
138 User-object ::= SEQUENCE {
139     class VisibleString OPTIONAL ,   -- endeavor which designed this object
140     type Object-id ,                 -- type of object within class
141     data SEQUENCE OF User-field }    -- the object itself
142 
143 User-field ::= SEQUENCE {
144     label Object-id ,                -- field label
145     num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
146     data CHOICE {                    -- field contents
147         str VisibleString ,
148         int INTEGER ,
149         real REAL ,
150         bool BOOLEAN ,
151         os OCTET STRING ,
152         object User-object ,         -- for using other definitions
153         strs SEQUENCE OF VisibleString ,
154         ints SEQUENCE OF INTEGER ,
155         reals SEQUENCE OF REAL ,
156         oss SEQUENCE OF OCTET STRING ,
157         fields SEQUENCE OF User-field ,
158         objects SEQUENCE OF User-object } }
159 
160 
161 
162 END
163 
164 --$Revision: 6.2 $
165 --****************************************************************
166 --
167 --  NCBI Bibliographic data elements
168 --  by James Ostell, 1990
169 --
170 --  Taken from the American National Standard for
171 --      Bibliographic References
172 --      ANSI Z39.29-1977
173 --  Version 3.0 - June 1994
174 --  PubMedId added in 1996
175 --  ArticleIds and eprint elements added in 1999
176 --
177 --****************************************************************
178 
179 NCBI-Biblio DEFINITIONS ::=
180 BEGIN
181 
182 EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
183         Cit-proc, Cit-sub, Title, Author, PubMedId;
184 
185 IMPORTS Person-id, Date, Dbtag FROM NCBI-General;
186 
187     -- Article Ids
188 
189 ArticleId ::= CHOICE {         -- can be many ids for an article
190         pubmed PubMedId ,      -- see types below
191         medline MedlineUID ,
192         doi DOI ,
193         pii PII ,
194         pmcid PmcID ,
195         pmcpid PmcPid ,
196         pmpid PmPid ,
197         other Dbtag  }    -- generic catch all
198     
199 PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
200 MedlineUID ::= INTEGER         -- Id from MEDLINE
201 DOI ::= VisibleString          -- Document Object Identifier
202 PII ::= VisibleString          -- Controlled Publisher Identifier
203 PmcID ::= INTEGER              -- PubMed Central Id
204 PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
205 PmPid ::= VisibleString        -- Publisher Id supplied to PubMed
206 
207 ArticleIdSet ::= SET OF ArticleId
208 
209     -- Status Dates
210 
211 PubStatus ::= INTEGER {            -- points of publication
212     received  (1) ,            -- date manuscript received for review
213     accepted  (2) ,            -- accepted for publication
214     epublish  (3) ,            -- published electronically by publisher
215     ppublish  (4) ,            -- published in print by publisher
216     revised   (5) ,            -- article revised by publisher/author
217     pmc       (6) ,            -- article first appeared in PubMed Central
218     pmcr      (7) ,            -- article revision in PubMed Central
219     pubmed    (8) ,            -- article citation first appeared in PubMed
220     pubmedr   (9) ,            -- article citation revision in PubMed
221     aheadofprint (10),         -- epublish, but will be followed by print
222     premedline (11),           -- date into PreMedline status
223     medline    (12),           -- date made a MEDLINE record
224     other    (255) }
225 
226 PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
227     pubstatus PubStatus ,
228     date Date }                -- time may be added later
229 
230 PubStatusDateSet ::= SET OF PubStatusDate
231     
232     -- Citation Types
233 
234 Cit-art ::= SEQUENCE {                  -- article in journal or book
235     title Title OPTIONAL ,              -- title of paper (ANSI requires)
236     authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
237     from CHOICE {                       -- journal or book
238         journal Cit-jour ,
239         book Cit-book ,
240         proc Cit-proc } ,
241     ids ArticleIdSet OPTIONAL }         -- lots of ids
242 
243 Cit-jour ::= SEQUENCE {             -- Journal citation
244     title Title ,                   -- title of journal
245     imp Imprint }
246 
247 Cit-book ::= SEQUENCE {              -- Book citation
248     title Title ,                    -- Title of book
249     coll Title OPTIONAL ,            -- part of a collection
250     authors Auth-list,               -- authors
251     imp Imprint }
252 
253 Cit-proc ::= SEQUENCE {             -- Meeting proceedings
254     book Cit-book ,                 -- citation to meeting
255     meet Meeting }                  -- time and location of meeting
256 
257     -- Patent number and date-issue were made optional in 1997 to
258     --   support patent applications being issued from the USPTO
259     --   Semantically a Cit-pat must have either a patent number or
260     --   an application number (or both) to be valid
261 
262 Cit-pat ::= SEQUENCE {                  -- patent citation
263     title VisibleString ,
264     authors Auth-list,                  -- author/inventor
265     country VisibleString ,             -- Patent Document Country
266     doc-type VisibleString ,            -- Patent Document Type
267     number VisibleString OPTIONAL,      -- Patent Document Number
268     date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
269     class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code 
270     app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
271     app-date Date OPTIONAL ,            -- Patent Appl File Date
272     applicants Auth-list OPTIONAL ,     -- Applicants
273     assignees Auth-list OPTIONAL ,      -- Assignees
274     priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
275     abstract VisibleString OPTIONAL }   -- abstract of patent
276 
277 Patent-priority ::= SEQUENCE {
278     country VisibleString ,             -- Patent country code
279     number VisibleString ,              -- number assigned in that country
280     date Date }                         -- date of application
281 
282 Id-pat ::= SEQUENCE {                   -- just to identify a patent
283     country VisibleString ,             -- Patent Document Country
284     id CHOICE {
285         number VisibleString ,          -- Patent Document Number
286         app-number VisibleString } ,    -- Patent Doc Appl Number
287     doc-type VisibleString OPTIONAL }   -- Patent Doc Type
288 
289 Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
290     cit Cit-book ,                      -- same fields as a book
291     man-id VisibleString OPTIONAL ,     -- Manuscript identifier
292     type ENUMERATED {
293         manuscript (1) ,
294         letter (2) ,
295         thesis (3) } OPTIONAL }
296                                 -- NOTE: this is just to cite a
297                                 -- direct data submission, see NCBI-Submit
298                                 -- for the form of a sequence submission
299 Cit-sub ::= SEQUENCE {               -- citation for a direct submission
300     authors Auth-list ,              -- not necessarily authors of the paper
301     imp Imprint OPTIONAL ,                       -- this only used to get date.. will go
302     medium ENUMERATED {              -- medium of submission
303         paper   (1) ,
304         tape    (2) ,
305         floppy  (3) ,
306         email   (4) ,
307         other   (255) } OPTIONAL ,
308     date Date OPTIONAL ,              -- replaces imp, will become required
309     descr VisibleString OPTIONAL }    -- description of changes for public view
310     
311 Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
312     cit VisibleString OPTIONAL ,     -- anything, not parsable
313     authors Auth-list OPTIONAL ,
314     muid INTEGER OPTIONAL ,      -- medline uid
315     journal Title OPTIONAL ,
316     volume VisibleString OPTIONAL ,
317     issue VisibleString OPTIONAL ,
318     pages VisibleString OPTIONAL ,
319     date Date OPTIONAL ,
320     serial-number INTEGER OPTIONAL ,   -- for GenBank style references
321     title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
322         pmid PubMedId OPTIONAL }           -- PubMed Id
323     
324     
325     -- Authorship Group
326 Auth-list ::= SEQUENCE {
327         names CHOICE {
328             std SEQUENCE OF Author ,        -- full citations
329             ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
330             str SEQUENCE OF VisibleString } , -- free for all
331         affil Affil OPTIONAL }        -- author affiliation
332 
333 Author ::= SEQUENCE {
334     name Person-id ,                        -- Author, Primary or Secondary
335     level ENUMERATED {
336         primary (1),
337         secondary (2) } OPTIONAL ,
338     role ENUMERATED {                   -- Author Role Indicator
339         compiler (1),
340         editor (2),
341         patent-assignee (3),
342         translator (4) } OPTIONAL ,
343     affil Affil OPTIONAL ,
344     is-corr BOOLEAN OPTIONAL }          -- TRUE if corresponding author
345 
346 Affil ::= CHOICE {
347     str VisibleString ,                 -- unparsed string
348     std SEQUENCE {                      -- std representation
349     affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
350     div VisibleString OPTIONAL ,        -- Author Affiliation, Division
351     city VisibleString OPTIONAL ,       -- Author Affiliation, City
352     sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
353     country VisibleString OPTIONAL ,    -- Author Affiliation, Country
354     street VisibleString OPTIONAL ,    -- street address, not ANSI
355     email VisibleString OPTIONAL ,
356     fax VisibleString OPTIONAL ,
357     phone VisibleString OPTIONAL ,
358     postal-code VisibleString OPTIONAL }}
359 
360     -- Title Group
361     -- Valid for = A = Analytic (Cit-art)
362     --             J = Journals (Cit-jour)
363     --             B = Book (Cit-book)
364                                                  -- Valid for:
365 Title ::= SET OF CHOICE {
366     name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
367     tsub VisibleString ,    -- Title, Subordinate       A B
368     trans VisibleString ,   -- Title, Translated        AJB
369     jta VisibleString ,     -- Title, Abbreviated        J
370     iso-jta VisibleString , -- specifically ISO jta      J
371     ml-jta VisibleString ,  -- specifically MEDLINE jta  J
372     coden VisibleString ,   -- a coden                   J
373     issn VisibleString ,    -- ISSN                      J
374     abr VisibleString ,     -- Title, Abbreviated         B
375     isbn VisibleString }    -- ISBN                       B
376 
377 Imprint ::= SEQUENCE {                  -- Imprint group
378     date Date ,                         -- date of publication
379     volume VisibleString OPTIONAL ,
380     issue VisibleString OPTIONAL ,
381     pages VisibleString OPTIONAL ,
382     section VisibleString OPTIONAL ,
383     pub Affil OPTIONAL,                     -- publisher, required for book
384     cprt Date OPTIONAL,                     -- copyright date, "    "   "
385     part-sup VisibleString OPTIONAL ,       -- part/sup of volume
386     language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
387     prepub ENUMERATED {                     -- for prepublication citations
388         submitted (1) ,                     -- submitted, not accepted
389         in-press (2) ,                      -- accepted, not published
390         other (255)  } OPTIONAL ,
391     part-supi VisibleString OPTIONAL ,      -- part/sup on issue
392     retract CitRetract OPTIONAL ,           -- retraction info
393     pubstatus PubStatus OPTIONAL ,          -- current status of this publication
394     history PubStatusDateSet OPTIONAL }     -- dates for this record
395 
396 CitRetract ::= SEQUENCE {
397     type ENUMERATED {                    -- retraction of an entry
398         retracted (1) ,               -- this citation retracted
399         notice (2) ,                  -- this citation is a retraction notice
400         in-error (3) ,                -- an erratum was published about this
401         erratum (4) } ,               -- this is a published erratum
402     exp VisibleString OPTIONAL }      -- citation and/or explanation
403 
404 Meeting ::= SEQUENCE {
405     number VisibleString ,
406     date Date ,
407     place Affil OPTIONAL }
408 
409             
410 END
411 
412 
413 --$Revision: 6.0 $
414 --**********************************************************************
415 --
416 --  MEDLINE data definitions
417 --  James Ostell, 1990
418 --
419 --  enhanced in 1996 to support PubMed records as well by simply adding
420 --    the PubMedId and making MedlineId optional
421 --
422 --**********************************************************************
423 
424 NCBI-Medline DEFINITIONS ::=
425 BEGIN
426 
427 EXPORTS Medline-entry, Medline-si;
428 
429 IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
430         Date FROM NCBI-General;
431 
432                                 -- a MEDLINE or PubMed entry
433 Medline-entry ::= SEQUENCE {
434     uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
435     em Date ,                   -- Entry Month
436     cit Cit-art ,               -- article citation
437     abstract VisibleString OPTIONAL ,
438     mesh SET OF Medline-mesh OPTIONAL ,
439     substance SET OF Medline-rn OPTIONAL ,
440     xref SET OF Medline-si OPTIONAL ,
441     idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
442     gene SET OF VisibleString OPTIONAL ,
443     pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
444     pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
445     mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
446     status INTEGER {
447         publisher (1) ,      -- record as supplied by publisher
448         premedline (2) ,     -- premedline record
449         medline (3) } DEFAULT medline }  -- regular medline record
450 
451 Medline-mesh ::= SEQUENCE {
452     mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
453     term VisibleString ,                   -- the MeSH term
454     qual SET OF Medline-qual OPTIONAL }    -- qualifiers
455 
456 Medline-qual ::= SEQUENCE {
457     mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
458     subh VisibleString }             -- the subheading
459 
460 Medline-rn ::= SEQUENCE {       -- medline substance records
461     type ENUMERATED {           -- type of record
462         nameonly (0) ,
463         cas (1) ,               -- CAS number
464         ec (2) } ,              -- EC number
465     cit VisibleString OPTIONAL ,  -- CAS or EC number if present
466     name VisibleString }          -- name (always present)
467 
468 Medline-si ::= SEQUENCE {       -- medline cross reference records
469     type ENUMERATED {           -- type of xref
470         ddbj (1) ,              -- DNA Data Bank of Japan
471         carbbank (2) ,          -- Carbohydrate Structure Database
472         embl (3) ,              -- EMBL Data Library
473         hdb (4) ,               -- Hybridoma Data Bank
474         genbank (5) ,           -- GenBank
475         hgml (6) ,              -- Human Gene Map Library
476         mim (7) ,               -- Mendelian Inheritance in Man
477         msd (8) ,               -- Microbial Strains Database
478         pdb (9) ,               -- Protein Data Bank (Brookhaven)
479         pir (10) ,              -- Protein Identification Resource
480         prfseqdb (11) ,         -- Protein Research Foundation (Japan)
481         psd (12) ,              -- Protein Sequence Database (Japan)
482         swissprot (13) ,        -- SwissProt
483         gdb (14) } ,            -- Genome Data Base
484     cit VisibleString OPTIONAL }    -- the citation/accession number
485 
486 Medline-field ::= SEQUENCE {
487     type INTEGER {              -- Keyed type
488         other (0) ,             -- look in line code
489         comment (1) ,           -- comment line
490         erratum (2) } ,         -- retracted, corrected, etc
491     str VisibleString ,         -- the text
492     ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text
493 
494 DocRef ::= SEQUENCE {           -- reference to a document
495     type INTEGER {
496         medline (1) ,
497         pubmed (2) ,
498         ncbigi (3) } ,
499     uid INTEGER }
500 
501 END
502 
503 --$Revision: 6.0 $
504 --**********************************************************************
505 --
506 --  PUBMED data definitions
507 --
508 --**********************************************************************
509 
510 NCBI-PubMed DEFINITIONS ::=
511 BEGIN
512 
513 EXPORTS Pubmed-entry, Pubmed-url;
514 
515 IMPORTS PubMedId FROM NCBI-Biblio
516         Medline-entry FROM NCBI-Medline;
517 
518 Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
519     -- PUBMED records must include the PubMedId
520     pmid PubMedId,
521 
522     -- Medline entry information
523     medent Medline-entry OPTIONAL,
524 
525     -- Publisher name
526     publisher VisibleString OPTIONAL,
527 
528     -- List of URL to publisher cite
529     urls SET OF Pubmed-url OPTIONAL,
530 
531     -- Publisher's article identifier
532     pubid VisibleString OPTIONAL
533 }
534 
535 Pubmed-url ::= SEQUENCE {
536     location VisibleString OPTIONAL, -- Location code
537     url VisibleString                -- Selected URL for location
538 }
539 
540 END
541 --$Revision: 6.0 $
542 --**********************************************************************
543 --
544 --  MEDLARS data definitions
545 --  Grigoriy Starchenko, 1997
546 --
547 --**********************************************************************
548 
549 NCBI-Medlars DEFINITIONS ::=
550 BEGIN
551 
552 EXPORTS Medlars-entry, Medlars-record;
553 
554 IMPORTS PubMedId FROM NCBI-Biblio;
555 
556 Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
557     pmid PubMedId,               -- All entries in PubMed must have it
558     muid INTEGER OPTIONAL,       -- Medline(OCCS) id
559     recs SET OF Medlars-record   -- List of Medlars records
560 }
561 
562 Medlars-record ::= SEQUENCE {
563     code INTEGER,                -- Unit record field type integer form
564     abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
565     data VisibleString           -- Unit record data
566 }
567 
568 END
569 --$Revision: 6.0 $
570 --********************************************************************
571 --
572 --  Publication common set
573 --  James Ostell, 1990
574 --
575 --  This is the base class definitions for Publications of all sorts
576 --
577 --  support for PubMedId added in 1996
578 --********************************************************************
579 
580 NCBI-Pub DEFINITIONS ::=
581 BEGIN
582 
583 EXPORTS Pub, Pub-set, Pub-equiv;
584 
585 IMPORTS Medline-entry FROM NCBI-Medline
586         Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
587         Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;
588 
589 Pub ::= CHOICE {
590     gen Cit-gen ,        -- general or generic unparsed
591     sub Cit-sub ,        -- submission
592     medline Medline-entry ,
593     muid INTEGER ,       -- medline uid
594     article Cit-art ,
595     journal Cit-jour ,
596     book Cit-book ,
597     proc Cit-proc ,      -- proceedings of a meeting
598     patent Cit-pat ,
599     pat-id Id-pat ,      -- identify a patent
600     man Cit-let ,        -- manuscript, thesis, or letter
601     equiv Pub-equiv,     -- to cite a variety of ways
602         pmid PubMedId }      -- PubMedId
603 
604 Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation
605 
606 Pub-set ::= CHOICE {
607     pub SET OF Pub ,
608     medline SET OF Medline-entry ,
609     article SET OF Cit-art ,
610     journal SET OF Cit-jour ,
611     book SET OF Cit-book ,
612     proc SET OF Cit-proc ,      -- proceedings of a meeting
613     patent SET OF Cit-pat }
614 
615 END
616 
617 --$Revision: 6.4 $
618 --**********************************************************************
619 --
620 --  NCBI Sequence location and identifier elements
621 --  by James Ostell, 1990
622 --
623 --  Version 3.0 - 1994
624 --
625 --**********************************************************************
626 
627 NCBI-Seqloc DEFINITIONS ::=
628 BEGIN
629 
630 EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
631         Na-strand, Giimport-id;
632 
633 IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
634         Id-pat FROM NCBI-Biblio
635         Feat-id FROM NCBI-Seqfeat;
636 
637 --*** Sequence identifiers ********************************
638 --*
639 
640 Seq-id ::= CHOICE {
641     local Object-id ,            -- local use
642     gibbsq INTEGER ,             -- Geninfo backbone seqid
643     gibbmt INTEGER ,             -- Geninfo backbone moltype
644     giim Giimport-id ,           -- Geninfo import id
645     genbank Textseq-id ,
646     embl Textseq-id ,
647     pir Textseq-id ,
648     swissprot Textseq-id ,
649     patent Patent-seq-id ,
650     other Textseq-id ,           -- for historical reasons, 'other' = 'refseq'
651     general Dbtag ,              -- for other databases
652     gi INTEGER ,                 -- GenInfo Integrated Database
653     ddbj Textseq-id ,            -- DDBJ
654     prf Textseq-id ,             -- PRF SEQDB
655     pdb PDB-seq-id ,             -- PDB sequence
656     tpg Textseq-id ,             -- Third Party Annot/Seq Genbank
657     tpe Textseq-id ,             -- Third Party Annot/Seq EMBL
658     tpd Textseq-id ,             -- Third Party Annot/Seq DDBJ
659     gpipe Textseq-id ,           -- Internal NCBI genome pipeline processing ID
660     named-annot-track Textseq-id -- Internal named annotation tracking ID
661 }
662 
663 
664 Patent-seq-id ::= SEQUENCE {
665     seqid INTEGER ,         -- number of sequence in patent
666     cit Id-pat }           -- patent citation
667 
668 Textseq-id ::= SEQUENCE {
669     name VisibleString OPTIONAL ,
670     accession VisibleString OPTIONAL ,
671     release VisibleString OPTIONAL ,
672     version INTEGER OPTIONAL }
673 
674 Giimport-id ::= SEQUENCE {
675     id INTEGER ,                     -- the id to use here
676     db VisibleString OPTIONAL ,      -- dbase used in
677     release VisibleString OPTIONAL } -- the release
678 
679 PDB-seq-id ::= SEQUENCE {
680     mol PDB-mol-id ,           -- the molecule name
681     chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
682     rel Date OPTIONAL }        -- release date, month and year
683 
684 PDB-mol-id ::= VisibleString  -- name of mol, 4 chars
685     
686 --*** Sequence locations **********************************
687 --*
688 
689 Seq-loc ::= CHOICE {
690     null NULL ,           -- not placed
691     empty Seq-id ,        -- to NULL one Seq-id in a collection
692     whole Seq-id ,        -- whole sequence
693     int Seq-interval ,    -- from to
694     packed-int Packed-seqint ,
695     pnt Seq-point ,
696     packed-pnt Packed-seqpnt ,
697     mix Seq-loc-mix ,
698     equiv Seq-loc-equiv ,  -- equivalent sets of locations
699     bond Seq-bond ,
700     feat Feat-id }         -- indirect, through a Seq-feat
701     
702 
703 Seq-interval ::= SEQUENCE {
704     from INTEGER ,
705     to INTEGER ,
706     strand Na-strand OPTIONAL ,
707     id Seq-id ,    -- WARNING: this used to be optional
708     fuzz-from Int-fuzz OPTIONAL ,
709     fuzz-to Int-fuzz OPTIONAL }
710 
711 Packed-seqint ::= SEQUENCE OF Seq-interval
712 
713 Seq-point ::= SEQUENCE {
714     point INTEGER ,
715     strand Na-strand OPTIONAL ,
716     id Seq-id ,     -- WARNING: this used to be optional
717     fuzz Int-fuzz OPTIONAL }
718 
719 Packed-seqpnt ::= SEQUENCE {
720     strand Na-strand OPTIONAL ,
721     id Seq-id ,
722     fuzz Int-fuzz OPTIONAL ,
723     points SEQUENCE OF INTEGER }
724 
725 Na-strand ::= ENUMERATED {          -- strand of nucleic acid
726     unknown (0) ,
727     plus (1) ,
728     minus (2) ,               
729     both (3) ,                -- in forward orientation
730     both-rev (4) ,            -- in reverse orientation
731     other (255) }
732 
733 Seq-bond ::= SEQUENCE {         -- bond between residues
734     a Seq-point ,           -- connection to a least one residue
735     b Seq-point OPTIONAL }  -- other end may not be available
736 
737 Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything
738 
739 Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations
740 
741 END
742     
743 
744 --$Revision: 6.20 $
745 --**********************************************************************
746 --
747 --  NCBI Sequence elements
748 --  by James Ostell, 1990
749 --  Version 3.0 - June 1994
750 --
751 --**********************************************************************
752 
753 NCBI-Sequence DEFINITIONS ::=
754 BEGIN
755 
756 EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
757         Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
758         Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext;
759 
760 IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
761         Seq-align FROM NCBI-Seqalign
762         Seq-feat FROM NCBI-Seqfeat
763         Seq-graph FROM NCBI-Seqres
764         Pub-equiv FROM NCBI-Pub
765         Org-ref FROM NCBI-Organism
766         BioSource FROM NCBI-BioSource
767         Seq-id, Seq-loc FROM NCBI-Seqloc
768         GB-block FROM GenBank-General
769         PIR-block FROM PIR-General
770         EMBL-block FROM EMBL-General
771         SP-block FROM SP-General
772         PRF-block FROM PRF-General
773         PDB-block FROM PDB-General
774         Seq-table FROM NCBI-SeqTable;
775 
776 --*** Sequence ********************************
777 --*
778 
779 Bioseq ::= SEQUENCE {
780     id SET OF Seq-id ,            -- equivalent identifiers
781     descr Seq-descr OPTIONAL , -- descriptors
782     inst Seq-inst ,            -- the sequence data
783     annot SET OF Seq-annot OPTIONAL }
784 
785 --*** Descriptors *****************************
786 --*
787 
788 Seq-descr ::= SET OF Seqdesc
789 
790 Seqdesc ::= CHOICE {
791     mol-type GIBB-mol ,          -- type of molecule
792     modif SET OF GIBB-mod ,             -- modifiers
793     method GIBB-method ,         -- sequencing method
794     name VisibleString ,         -- a name for this sequence
795     title VisibleString ,        -- a title for this sequence
796     org Org-ref ,                -- if all from one organism
797     comment VisibleString ,      -- a more extensive comment
798     num Numbering ,              -- a numbering system
799     maploc Dbtag ,               -- map location of this sequence
800     pir PIR-block ,              -- PIR specific info
801     genbank GB-block ,           -- GenBank specific info
802     pub Pubdesc ,                -- a reference to the publication
803     region VisibleString ,       -- overall region (globin locus)
804     user User-object ,           -- user defined object
805     sp SP-block ,                -- SWISSPROT specific info
806     dbxref Dbtag ,               -- xref to other databases
807     embl EMBL-block ,            -- EMBL specific information
808     create-date Date ,           -- date entry first created/released
809     update-date Date ,           -- date of last update
810     prf PRF-block ,              -- PRF specific information
811     pdb PDB-block ,              -- PDB specific information
812     het Heterogen ,              -- cofactor, etc associated but not bound
813     source BioSource ,           -- source of materials, includes Org-ref
814     molinfo MolInfo }            -- info on the molecule and techniques
815 
816 --******* NOTE:
817 --*       mol-type, modif, method, and org are consolidated and expanded
818 --*       in Org-ref, BioSource, and MolInfo in this specification. They
819 --*       will be removed in later specifications. Do not use them in the
820 --*       the future. Instead expect the new structures.
821 --*
822 --***************************
823 
824 --********************************************************************
825 --
826 -- MolInfo gives information on the
827 -- classification of the type and quality of the sequence
828 --
829 -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
830 --
831 --********************************************************************
832 
833 MolInfo ::= SEQUENCE {
834     biomol INTEGER {
835         unknown (0) ,
836         genomic (1) ,
837         pre-RNA (2) ,              -- precursor RNA of any sort really 
838         mRNA (3) ,
839         rRNA (4) ,
840         tRNA (5) ,
841         snRNA (6) ,
842         scRNA (7) ,
843         peptide (8) ,
844         other-genetic (9) ,      -- other genetic material
845         genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
846         cRNA (11) ,              -- viral RNA genome copy intermediate
847         snoRNA (12) ,            -- small nucleolar RNA
848         transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
849         ncRNA (14) ,
850         tmRNA (15) ,
851         other (255) } DEFAULT unknown ,
852     tech INTEGER {
853         unknown (0) ,
854         standard (1) ,          -- standard sequencing
855         est (2) ,               -- Expressed Sequence Tag
856         sts (3) ,               -- Sequence Tagged Site
857         survey (4) ,            -- one-pass genomic sequence
858         genemap (5) ,           -- from genetic mapping techniques
859         physmap (6) ,           -- from physical mapping techniques
860         derived (7) ,           -- derived from other data, not a primary entity
861         concept-trans (8) ,     -- conceptual translation
862         seq-pept (9) ,          -- peptide was sequenced
863         both (10) ,             -- concept transl. w/ partial pept. seq.
864         seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
865         seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
866         concept-trans-a (13) ,  -- conceptual transl. supplied by author
867         htgs-1 (14) ,           -- unordered High Throughput sequence contig
868         htgs-2 (15) ,           -- ordered High Throughput sequence contig
869         htgs-3 (16) ,           -- finished High Throughput sequence
870         fli-cdna (17) ,         -- full length insert cDNA
871         htgs-0 (18) ,           -- single genomic reads for coordination
872         htc (19) ,              -- high throughput cDNA
873         wgs (20) ,              -- whole genome shotgun sequencing
874         barcode (21) ,          -- barcode of life project
875         composite-wgs-htgs (22) , -- composite of WGS and HTGS
876         tsa (23) ,              -- transcriptome shotgun assembly
877         other (255) }           -- use Source.techexp
878                DEFAULT unknown ,
879     techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
880     --
881     -- Completeness is not indicated in most records.  For genomes, assume
882     -- the sequences are incomplete unless specifically marked as complete.
883     -- For mRNAs, assume the ends are not known exactly unless marked as
884     -- having the left or right end.
885     --
886     completeness INTEGER {
887       unknown (0) ,
888       complete (1) ,                   -- complete biological entity
889       partial (2) ,                    -- partial but no details given
890       no-left (3) ,                    -- missing 5' or NH3 end
891       no-right (4) ,                   -- missing 3' or COOH end
892       no-ends (5) ,                    -- missing both ends
893       has-left (6) ,                   -- 5' or NH3 end present
894       has-right (7) ,                  -- 3' or COOH end present
895       other (255) } DEFAULT unknown ,
896     gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA
897 
898 
899 GIBB-mol ::= ENUMERATED {       -- type of molecule represented
900     unknown (0) ,
901     genomic (1) ,
902     pre-mRNA (2) ,              -- precursor RNA of any sort really 
903     mRNA (3) ,
904     rRNA (4) ,
905     tRNA (5) ,
906     snRNA (6) ,
907     scRNA (7) ,
908     peptide (8) ,
909     other-genetic (9) ,      -- other genetic material
910     genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
911     other (255) }
912     
913 GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
914     dna (0) ,
915     rna (1) ,
916     extrachrom (2) ,
917     plasmid (3) ,
918     mitochondrial (4) ,
919     chloroplast (5) ,
920     kinetoplast (6) ,
921     cyanelle (7) ,
922     synthetic (8) ,
923     recombinant (9) ,
924     partial (10) ,
925     complete (11) ,
926     mutagen (12) ,    -- subject of mutagenesis ?
927     natmut (13) ,     -- natural mutant ?
928     transposon (14) ,
929     insertion-seq (15) ,
930     no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
931     no-right (17) ,   -- missing right end (3' or COOH)
932     macronuclear (18) ,
933     proviral (19) ,
934     est (20) ,        -- expressed sequence tag
935     sts (21) ,        -- sequence tagged site
936     survey (22) ,     -- one pass survey sequence
937     chromoplast (23) ,
938     genemap (24) ,    -- is a genetic map
939     restmap (25) ,    -- is an ordered restriction map
940     physmap (26) ,    -- is a physical map (not ordered restriction map)
941     other (255) }
942 
943 GIBB-method ::= ENUMERATED {        -- sequencing methods
944     concept-trans (1) ,    -- conceptual translation
945     seq-pept (2) ,         -- peptide was sequenced
946     both (3) ,             -- concept transl. w/ partial pept. seq.
947     seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
948     seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
949     concept-trans-a (6) ,  -- conceptual transl. supplied by author
950     other (255) }
951     
952 Numbering ::= CHOICE {           -- any display numbering system
953     cont Num-cont ,              -- continuous numbering
954     enum Num-enum ,              -- enumerated names for residues
955     ref Num-ref ,                -- by reference to another sequence
956     real Num-real }              -- supports mapping to a float system
957     
958 Num-cont ::= SEQUENCE {          -- continuous display numbering system
959     refnum INTEGER DEFAULT 1,         -- number assigned to first residue
960     has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
961     ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?
962 
963 Num-enum ::= SEQUENCE {          -- any tags to residues
964     num INTEGER ,                        -- number of tags to follow
965     names SEQUENCE OF VisibleString }    -- the tags
966 
967 Num-ref ::= SEQUENCE {           -- by reference to other sequences
968     type ENUMERATED {            -- type of reference
969         not-set (0) ,
970         sources (1) ,            -- by segmented or const seq sources
971         aligns (2) } ,           -- by alignments given below
972     aligns Seq-align OPTIONAL }
973 
974 Num-real ::= SEQUENCE {          -- mapping to floating point system
975     a REAL ,                     -- from an integer system used by Bioseq
976     b REAL ,                     -- position = (a * int_position) + b
977     units VisibleString OPTIONAL }
978 
979 Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
980     pub Pub-equiv ,                 -- the citation(s)
981     name VisibleString OPTIONAL ,   -- name used in paper
982     fig VisibleString OPTIONAL ,    -- figure in paper
983     num Numbering OPTIONAL ,        -- numbering from paper
984     numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
985     poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
986     maploc VisibleString OPTIONAL , -- map location reported in paper
987     seq-raw StringStore OPTIONAL ,  -- original sequence from paper
988     align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
989     comment VisibleString OPTIONAL, -- any comment on this pub in context
990     reftype INTEGER {           -- type of reference in a GenBank record
991         seq (0) ,               -- refers to sequence
992         sites (1) ,             -- refers to unspecified features
993         feats (2) ,             -- refers to specified features
994         no-target (3) }         -- nothing specified (EMBL)
995         DEFAULT seq }
996 
997 Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc
998 
999 --*** Instances of sequences *******************************
1000 --*
1001 
1002 Seq-inst ::= SEQUENCE {            -- the sequence data itself
1003     repr ENUMERATED {              -- representation class
1004         not-set (0) ,              -- empty
1005         virtual (1) ,              -- no seq data
1006         raw (2) ,                  -- continuous sequence
1007         seg (3) ,                  -- segmented sequence
1008         const (4) ,                -- constructed sequence
1009         ref (5) ,                  -- reference to another sequence
1010         consen (6) ,               -- consensus sequence or pattern
1011         map (7) ,                  -- ordered map of any kind
1012         delta (8) ,              -- sequence made by changes (delta) to others
1013         other (255) } ,
1014     mol ENUMERATED {               -- molecule class in living organism
1015         not-set (0) ,              --   > cdna = rna
1016         dna (1) ,
1017         rna (2) ,
1018         aa (3) ,
1019         na (4) ,                   -- just a nucleic acid
1020         other (255) } ,
1021     length INTEGER OPTIONAL ,      -- length of sequence in residues
1022     fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
1023     topology ENUMERATED {          -- topology of molecule
1024         not-set (0) ,
1025         linear (1) ,
1026         circular (2) ,
1027         tandem (3) ,               -- some part of tandem repeat
1028         other (255) } DEFAULT linear ,
1029     strand ENUMERATED {            -- strandedness in living organism
1030         not-set (0) ,
1031         ss (1) ,                   -- single strand
1032         ds (2) ,                   -- double strand
1033         mixed (3) ,
1034         other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
1035     seq-data Seq-data OPTIONAL ,   -- the sequence
1036     ext Seq-ext OPTIONAL ,         -- extensions for special types
1037     hist Seq-hist OPTIONAL }       -- sequence history
1038 
1039 --*** Sequence Extensions **********************************
1040 --*  for representing more complex types
1041 --*  const type uses Seq-hist.assembly
1042 
1043 Seq-ext ::= CHOICE {
1044     seg Seg-ext ,        -- segmented sequences
1045     ref Ref-ext ,        -- hot link to another sequence (a view)
1046     map Map-ext ,        -- ordered map of markers
1047     delta Delta-ext }
1048 
1049 Seg-ext ::= SEQUENCE OF Seq-loc
1050 
1051 Ref-ext ::= Seq-loc
1052 
1053 Map-ext ::= SEQUENCE OF Seq-feat
1054 
1055 Delta-ext ::= SEQUENCE OF Delta-seq
1056 
1057 Delta-seq ::= CHOICE {
1058     loc Seq-loc ,       -- point to a sequence
1059     literal Seq-literal }   -- a piece of sequence
1060 
1061 Seq-literal ::= SEQUENCE {
1062     length INTEGER ,         -- must give a length in residues
1063     fuzz Int-fuzz OPTIONAL , -- could be unsure
1064     seq-data Seq-data OPTIONAL } -- may have the data
1065 
1066 --*** Sequence History Record ***********************************
1067 --** assembly = records how seq was assembled from others
1068 --** replaces = records sequences made obsolete by this one
1069 --** replaced-by = this seq is made obsolete by another(s)
1070 
1071 Seq-hist ::= SEQUENCE {
1072     assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
1073     replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
1074     replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
1075     deleted CHOICE {
1076         bool BOOLEAN ,
1077         date Date } OPTIONAL }
1078 
1079 Seq-hist-rec ::= SEQUENCE {
1080     date Date OPTIONAL ,
1081     ids SET OF Seq-id }
1082     
1083 --*** Various internal sequence representations ************
1084 --*      all are controlled, fixed length forms
1085 
1086 Seq-data ::= CHOICE {              -- sequence representations
1087     iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
1088     iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
1089     ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
1090     ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
1091     ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
1092     ncbipna NCBIpna ,              -- nucleic acid probabilities
1093     ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
1094     ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
1095     ncbipaa NCBIpaa ,              -- amino acid probabilities
1096     ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
1097     gap Seq-gap                    -- gap types
1098 }
1099 
1100 Seq-gap ::= SEQUENCE {
1101     type INTEGER {
1102         unknown(0),
1103         fragment(1),
1104         clone(2),
1105         short-arm(3),
1106         heterochromatin(4),
1107         centromere(5),
1108         telomere(6),
1109         repeat(7),
1110         contig(8),
1111         other(255)
1112     },
1113     linkage INTEGER {
1114         unlinked(0),
1115         linked(1),
1116         other(255)
1117     } OPTIONAL
1118 }
1119 
1120 IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
1121 IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
1122 NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
1123 NCBI4na ::= OCTET STRING      -- 1 bit each for agct
1124                               -- 0001=A, 0010=C, 0100=G, 1000=T/U
1125                               -- 0101=Purine, 1010=Pyrimidine, etc
1126 NCBI8na ::= OCTET STRING      -- for modified nucleic acids
1127 NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
1128                               -- probabilities are coded 0-255 = 0.0-1.0
1129 NCBI8aa ::= OCTET STRING      -- for modified amino acids
1130 NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
1131                               -- IUPAC codes + U=selenocysteine
1132 NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
1133                               -- A-Y,B,Z,X,(ter),anything
1134                               -- probabilities are coded 0-255 = 0.0-1.0
1135 NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte
1136 
1137 --*** Sequence Annotation *************************************
1138 --*
1139 
1140 -- This is a replica of Textseq-id
1141 -- This is specific for annotations, and exists to maintain a semantic
1142 -- difference between IDs assigned to annotations and IDs assigned to
1143 -- sequences
1144 Textannot-id ::= SEQUENCE {
1145     name          VisibleString OPTIONAL ,
1146     accession VisibleString OPTIONAL ,
1147     release   VisibleString OPTIONAL ,
1148     version   INTEGER       OPTIONAL
1149 }
1150 
1151 Annot-id ::= CHOICE {
1152     local Object-id ,
1153     ncbi INTEGER ,
1154     general Dbtag,
1155     other Textannot-id
1156 }
1157     
1158 Annot-descr ::= SET OF Annotdesc
1159 
1160 Annotdesc ::= CHOICE {
1161     name VisibleString ,         -- a short name for this collection
1162     title VisibleString ,        -- a title for this collection
1163     comment VisibleString ,      -- a more extensive comment
1164     pub Pubdesc ,                -- a reference to the publication
1165     user User-object ,           -- user defined object
1166     create-date Date ,           -- date entry first created/released
1167     update-date Date ,           -- date of last update
1168     src Seq-id ,                 -- source sequence from which annot came
1169     align Align-def,             -- definition of the SeqAligns
1170     region Seq-loc }             -- all contents cover this region
1171 
1172 Align-def ::= SEQUENCE {
1173     align-type INTEGER {         -- class of align Seq-annot
1174       ref (1) ,                  -- set of alignments to the same sequence
1175       alt (2) ,                  -- set of alternate alignments of the same seqs
1176       blocks (3) ,               -- set of aligned blocks in the same seqs
1177       other (255) } ,
1178     ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now
1179 
1180 Seq-annot ::= SEQUENCE {
1181     id SET OF Annot-id OPTIONAL ,
1182     db INTEGER {                 -- source of annotation
1183         genbank (1) ,
1184         embl (2) ,
1185         ddbj (3) ,
1186         pir  (4) ,
1187         sp   (5) ,
1188         bbone (6) ,
1189         pdb   (7) ,
1190         other (255) } OPTIONAL ,
1191     name VisibleString OPTIONAL ,-- source if "other" above
1192     desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
1193     data CHOICE {
1194         ftable SET OF Seq-feat ,
1195         align SET OF Seq-align ,
1196         graph SET OF Seq-graph ,
1197         ids SET OF Seq-id ,      -- used for communication between tools
1198         locs SET OF Seq-loc ,    -- used for communication between tools
1199         seq-table Seq-table } }  -- features in table form
1200 
1201 END
1202 
1203 
1204 --$Revision: 6.5 $
1205 --**********************************************************************
1206 --
1207 --  NCBI Sequence Collections
1208 --  by James Ostell, 1990
1209 --
1210 --  Version 3.0 - 1994
1211 --
1212 --**********************************************************************
1213 
1214 NCBI-Seqset DEFINITIONS ::=
1215 BEGIN
1216 
1217 EXPORTS Bioseq-set, Seq-entry;
1218 
1219 IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
1220         Object-id, Dbtag, Date FROM NCBI-General;
1221 
1222 --*** Sequence Collections ********************************
1223 --*
1224 
1225 Bioseq-set ::= SEQUENCE {      -- just a collection
1226     id Object-id OPTIONAL ,
1227     coll Dbtag OPTIONAL ,          -- to identify a collection
1228     level INTEGER OPTIONAL ,       -- nesting level
1229     class ENUMERATED {
1230         not-set (0) ,
1231         nuc-prot (1) ,              -- nuc acid and coded proteins
1232         segset (2) ,                -- segmented sequence + parts
1233         conset (3) ,                -- constructed sequence + parts
1234         parts (4) ,                 -- parts for 2 or 3
1235         gibb (5) ,                  -- geninfo backbone
1236         gi (6) ,                    -- geninfo
1237         genbank (7) ,               -- converted genbank
1238         pir (8) ,                   -- converted pir
1239         pub-set (9) ,               -- all the seqs from a single publication
1240         equiv (10) ,                -- a set of equivalent maps or seqs
1241         swissprot (11) ,            -- converted SWISSPROT
1242         pdb-entry (12) ,            -- a complete PDB entry
1243         mut-set (13) ,              -- set of mutations
1244         pop-set (14) ,              -- population study
1245         phy-set (15) ,              -- phylogenetic study
1246         eco-set (16) ,              -- ecological sample study
1247         gen-prod-set (17) ,         -- genomic products, chrom+mRNA+protein
1248         wgs-set (18) ,              -- whole genome shotgun project
1249         named-annot (19) ,          -- named annotation set
1250         named-annot-prod (20) ,     -- with instantiated mRNA+protein
1251         read-set (21) ,             -- set from a single read
1252         paired-end-reads (22) ,     -- paired sequences within a read-set
1253         other (255) } DEFAULT not-set ,
1254     release VisibleString OPTIONAL ,
1255     date Date OPTIONAL ,
1256     descr Seq-descr OPTIONAL ,
1257     seq-set SEQUENCE OF Seq-entry ,
1258     annot SET OF Seq-annot OPTIONAL }
1259 
1260 Seq-entry ::= CHOICE {
1261         seq Bioseq ,
1262         set Bioseq-set }
1263 
1264 END
1265 
1266 --$Revision: 6.0 $
1267 --  *********************************************************************
1268 --
1269 --  These are code and conversion tables for NCBI sequence codes
1270 --  ASN.1 for the sequences themselves are define in seq.asn
1271 --
1272 --  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
1273 --    and increase continuously.  So IUPAC codes, which are upper case
1274 --    letters will always have 65 0 cells before the codes begin.  This
1275 --    allows all codes to do indexed lookups for things
1276 --
1277 --  Valid names for code tables are:
1278 --    IUPACna
1279 --    IUPACaa
1280 --    IUPACeaa
1281 --    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
1282 --                   display only, not a data exchange type
1283 --    NCBI2na
1284 --    NCBI4na
1285 --    NCBI8na
1286 --    NCBI8aa
1287 --    NCBIstdaa
1288 --     probability types map to IUPAC types for display as characters
1289 
1290 NCBI-SeqCode DEFINITIONS ::=
1291 BEGIN
1292 
1293 EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;
1294 
1295 Seq-code-type ::= ENUMERATED {              -- sequence representations
1296     iupacna (1) ,              -- IUPAC 1 letter nuc acid code
1297     iupacaa (2) ,              -- IUPAC 1 letter amino acid code
1298     ncbi2na (3) ,              -- 2 bit nucleic acid code
1299     ncbi4na (4) ,              -- 4 bit nucleic acid code
1300     ncbi8na (5) ,              -- 8 bit extended nucleic acid code
1301     ncbipna (6) ,              -- nucleic acid probabilities
1302     ncbi8aa (7) ,              -- 8 bit extended amino acid codes
1303     ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
1304     ncbipaa (9) ,              -- amino acid probabilities
1305     iupacaa3 (10) ,            -- 3 letter code only for display
1306     ncbistdaa (11) }           -- consecutive codes for std aas, 0-25
1307 
1308 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings 
1309     from Seq-code-type ,      -- code to map from
1310     to Seq-code-type ,        -- code to map to
1311     num INTEGER ,             -- number of rows in table
1312     start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1313     table SEQUENCE OF INTEGER }  -- table of values, in from-to order
1314 
1315 Seq-code-table ::= SEQUENCE { -- for names of coded values
1316     code Seq-code-type ,      -- name of code
1317     num INTEGER ,             -- number of rows in table
1318     one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
1319     start-at INTEGER DEFAULT 0 ,   -- index offset of first element
1320     table SEQUENCE OF
1321         SEQUENCE {
1322             symbol VisibleString ,      -- the printed symbol or letter
1323             name VisibleString } ,      -- an explanatory name or string
1324     comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid
1325 
1326 Seq-code-set ::= SEQUENCE {    -- for distribution
1327     codes SET OF Seq-code-table OPTIONAL ,
1328     maps SET OF Seq-map-table OPTIONAL }
1329 
1330 END
1331 
1332 --$Revision: 6.0 $
1333 --*********************************************************************
1334 --
1335 -- 1990 - J.Ostell
1336 -- Version 3.0 - June 1994
1337 --
1338 --*********************************************************************
1339 --*********************************************************************
1340 --
1341 --  EMBL specific data
1342 --  This block of specifications was developed by Reiner Fuchs of EMBL
1343 --  Updated by J.Ostell, 1994
1344 --
1345 --*********************************************************************
1346 
1347 EMBL-General DEFINITIONS ::=
1348 BEGIN
1349 
1350 EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;
1351 
1352 IMPORTS Date, Object-id FROM NCBI-General;
1353 
1354 EMBL-dbname ::= CHOICE {
1355     code ENUMERATED {
1356         embl(0),
1357         genbank(1),
1358         ddbj(2),
1359         geninfo(3),
1360         medline(4),
1361         swissprot(5),
1362         pir(6),
1363         pdb(7),
1364         epd(8),
1365         ecd(9),
1366         tfd(10),
1367         flybase(11),
1368         prosite(12),
1369         enzyme(13),
1370         mim(14),
1371         ecoseq(15),
1372         hiv(16) ,
1373         other (255) } ,
1374     name    VisibleString }
1375 
1376 EMBL-xref ::= SEQUENCE {
1377     dbname EMBL-dbname,
1378     id SEQUENCE OF Object-id }
1379 
1380 EMBL-block ::= SEQUENCE {
1381     class ENUMERATED {
1382         not-set(0),
1383         standard(1),
1384         unannotated(2),
1385         other(255) } DEFAULT standard,
1386     div ENUMERATED {
1387         fun(0),
1388         inv(1),
1389         mam(2),
1390         org(3),
1391         phg(4),
1392         pln(5),
1393         pri(6),
1394         pro(7),
1395         rod(8),
1396         syn(9),
1397         una(10),
1398         vrl(11),
1399         vrt(12),
1400         pat(13),
1401         est(14),
1402         sts(15),
1403         other (255) } OPTIONAL,
1404     creation-date Date,
1405     update-date Date,
1406     extra-acc SEQUENCE OF VisibleString OPTIONAL,
1407     keywords SEQUENCE OF VisibleString OPTIONAL,
1408     xref SEQUENCE OF EMBL-xref OPTIONAL }
1409 
1410 END
1411 
1412 --*********************************************************************
1413 --
1414 --  SWISSPROT specific data
1415 --  This block of specifications was developed by Mark Cavanaugh of
1416 --      NCBI working with Amos Bairoch of SWISSPROT
1417 --
1418 --*********************************************************************
1419 
1420 SP-General DEFINITIONS ::=
1421 BEGIN
1422 
1423 EXPORTS SP-block;
1424 
1425 IMPORTS Date, Dbtag FROM NCBI-General
1426         Seq-id FROM NCBI-Seqloc;
1427 
1428 SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
1429     class ENUMERATED {
1430         not-set (0) ,
1431         standard (1) ,      -- conforms to all SWISSPROT checks
1432         prelim (2) ,        -- only seq and biblio checked
1433         other (255) } ,
1434     extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
1435     imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
1436     plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
1437     seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
1438     dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
1439     keywords SET OF VisibleString OPTIONAL , -- keywords
1440     created Date OPTIONAL ,         -- creation date
1441     sequpd Date OPTIONAL ,          -- sequence update
1442     annotupd Date OPTIONAL }        -- annotation update
1443 
1444 END
1445 
1446 --*********************************************************************
1447 --
1448 --  PIR specific data
1449 --  This block of specifications was developed by Jim Ostell of
1450 --      NCBI
1451 --
1452 --*********************************************************************
1453 
1454 PIR-General DEFINITIONS ::=
1455 BEGIN
1456 
1457 EXPORTS PIR-block;
1458 
1459 IMPORTS Seq-id FROM NCBI-Seqloc;
1460 
1461 PIR-block ::= SEQUENCE {          -- PIR specific descriptions
1462     had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
1463     host VisibleString OPTIONAL ,
1464     source VisibleString OPTIONAL ,     -- source line
1465     summary VisibleString OPTIONAL ,
1466     genetic VisibleString OPTIONAL ,
1467     includes VisibleString OPTIONAL ,
1468     placement VisibleString OPTIONAL ,
1469     superfamily VisibleString OPTIONAL ,
1470     keywords SEQUENCE OF VisibleString OPTIONAL ,
1471     cross-reference VisibleString OPTIONAL ,
1472     date VisibleString OPTIONAL ,
1473     seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
1474     seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences
1475 
1476 END
1477 
1478 --*********************************************************************
1479 --
1480 --  GenBank specific data
1481 --  This block of specifications was developed by Jim Ostell of
1482 --      NCBI
1483 --
1484 --*********************************************************************
1485 
1486 GenBank-General DEFINITIONS ::=
1487 BEGIN
1488 
1489 EXPORTS GB-block;
1490 
1491 IMPORTS Date FROM NCBI-General;
1492 
1493 GB-block ::= SEQUENCE {          -- GenBank specific descriptions
1494     extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
1495     source VisibleString OPTIONAL ,     -- source line
1496     keywords SEQUENCE OF VisibleString OPTIONAL ,
1497     origin VisibleString OPTIONAL,
1498     date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
1499     entry-date Date OPTIONAL ,          -- replaces date
1500     div VisibleString OPTIONAL ,        -- GenBank division
1501     taxonomy VisibleString OPTIONAL }   -- continuation line of organism
1502 
1503 END
1504 
1505 --**********************************************************************
1506 -- PRF specific definition
1507 --    PRF is a protein sequence database crated and maintained by
1508 --    Protein Research Foundation, Minoo-city, Osaka, Japan.
1509 --
1510 --    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
1511 --            Kyoto Univ., Japan
1512 --
1513 --**********************************************************************
1514 
1515 PRF-General DEFINITIONS ::=
1516 BEGIN
1517 
1518 EXPORTS PRF-block;
1519 
1520 PRF-block ::= SEQUENCE {
1521       extra-src       PRF-ExtraSrc OPTIONAL,
1522       keywords        SEQUENCE OF VisibleString OPTIONAL
1523 }
1524 
1525 PRF-ExtraSrc ::= SEQUENCE {
1526       host    VisibleString OPTIONAL,
1527       part    VisibleString OPTIONAL,
1528       state   VisibleString OPTIONAL,
1529       strain  VisibleString OPTIONAL,
1530       taxon   VisibleString OPTIONAL
1531 }
1532 
1533 END
1534 
1535 --*********************************************************************
1536 --
1537 --  PDB specific data
1538 --  This block of specifications was developed by Jim Ostell and
1539 --      Steve Bryant of NCBI
1540 --
1541 --*********************************************************************
1542 
1543 PDB-General DEFINITIONS ::=
1544 BEGIN
1545 
1546 EXPORTS PDB-block;
1547 
1548 IMPORTS Date FROM NCBI-General;
1549 
1550 PDB-block ::= SEQUENCE {          -- PDB specific descriptions
1551     deposition Date ,         -- deposition date  month,year
1552     class VisibleString ,
1553     compound SEQUENCE OF VisibleString ,
1554     source SEQUENCE OF VisibleString ,
1555     exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
1556     replace PDB-replace OPTIONAL } -- replacement history
1557 
1558 PDB-replace ::= SEQUENCE {
1559     date Date ,
1560     ids SEQUENCE OF VisibleString }   -- entry ids replace by this one
1561 
1562 END
1563 
1564 --$Revision: 6.27 $
1565 --**********************************************************************
1566 --
1567 --  NCBI Sequence Feature elements
1568 --  by James Ostell, 1990
1569 --  Version 3.0 - June 1994
1570 --
1571 --**********************************************************************
1572 
1573 NCBI-Seqfeat DEFINITIONS ::=
1574 BEGIN
1575 
1576 EXPORTS Seq-feat, Feat-id, Genetic-code;
1577 
1578 IMPORTS Gene-ref FROM NCBI-Gene
1579         Prot-ref FROM NCBI-Protein
1580         Org-ref FROM NCBI-Organism
1581         BioSource FROM NCBI-BioSource
1582         RNA-ref FROM NCBI-RNA
1583         Seq-loc, Giimport-id FROM NCBI-Seqloc
1584         Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
1585         Rsite-ref FROM NCBI-Rsite
1586         Txinit FROM NCBI-TxInit
1587         Pub-set FROM NCBI-Pub
1588         Object-id, Dbtag, User-object FROM NCBI-General;
1589 
1590 --*** Feature identifiers ********************************
1591 --*
1592 
1593 Feat-id ::= CHOICE {
1594     gibb INTEGER ,            -- geninfo backbone
1595     giim Giimport-id ,        -- geninfo import
1596     local Object-id ,         -- for local software use
1597     general Dbtag }           -- for use by various databases
1598 
1599 --*** Seq-feat *******************************************
1600 --*  sequence feature generalization
1601 
1602 Seq-feat ::= SEQUENCE {
1603     id Feat-id OPTIONAL ,
1604     data SeqFeatData ,           -- the specific data
1605     partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
1606     except BOOLEAN OPTIONAL ,     -- something funny about this?
1607     comment VisibleString OPTIONAL ,
1608     product Seq-loc OPTIONAL ,    -- product of process
1609     location Seq-loc ,            -- feature made from
1610     qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
1611     title VisibleString OPTIONAL ,   -- for user defined label
1612     ext User-object OPTIONAL ,    -- user defined structure extension
1613     cit Pub-set OPTIONAL ,        -- citations for this feature
1614     exp-ev ENUMERATED {           -- evidence for existence of feature
1615         experimental (1) ,        -- any reasonable experimental check
1616         not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
1617     xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
1618     dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
1619     pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
1620     except-text VisibleString OPTIONAL , -- explain if except=TRUE
1621     ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
1622     exts SET OF User-object OPTIONAL }  -- set of extensions; will replace 'ext' field
1623 
1624 SeqFeatData ::= CHOICE {
1625     gene Gene-ref ,
1626     org Org-ref ,
1627     cdregion Cdregion ,
1628     prot Prot-ref ,
1629     rna RNA-ref ,
1630     pub Pubdesc ,              -- publication applies to this seq 
1631     seq Seq-loc ,              -- to annotate origin from another seq
1632     imp Imp-feat ,
1633     region VisibleString,      -- named region (globin locus)
1634     comment NULL ,             -- just a comment
1635     bond ENUMERATED {
1636         disulfide (1) ,
1637         thiolester (2) ,
1638         xlink (3) ,
1639         thioether (4) ,
1640         other (255) } ,
1641     site ENUMERATED {
1642         active (1) ,
1643         binding (2) ,
1644         cleavage (3) ,
1645         inhibit (4) ,
1646         modified (5),
1647         glycosylation (6) ,
1648         myristoylation (7) ,
1649         mutagenized (8) ,
1650         metal-binding (9) ,
1651         phosphorylation (10) ,
1652         acetylation (11) ,
1653         amidation (12) ,
1654         methylation (13) ,
1655         hydroxylation (14) ,
1656         sulfatation (15) ,
1657         oxidative-deamination (16) ,
1658         pyrrolidone-carboxylic-acid (17) ,
1659         gamma-carboxyglutamic-acid (18) ,
1660         blocked (19) ,
1661         lipid-binding (20) ,
1662         np-binding (21) ,
1663         dna-binding (22) ,
1664         signal-peptide (23) ,
1665         transit-peptide (24) ,
1666         transmembrane-region (25) ,
1667         nitrosylation (26) ,
1668         other (255) } ,
1669     rsite Rsite-ref ,       -- restriction site  (for maps really)
1670     user User-object ,      -- user defined structure
1671     txinit Txinit ,         -- transcription initiation
1672     num Numbering ,         -- a numbering system
1673     psec-str ENUMERATED {   -- protein secondary structure
1674         helix (1) ,         -- any helix
1675         sheet (2) ,         -- beta sheet
1676         turn  (3) } ,       -- beta or gamma turn
1677     non-std-residue VisibleString ,  -- non-standard residue here in seq
1678     het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
1679     biosrc BioSource,
1680     clone Clone-ref
1681 }
1682 
1683 SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
1684     id Feat-id OPTIONAL ,        -- the feature copied
1685     data SeqFeatData OPTIONAL }  -- the specific data
1686     
1687 --*** CdRegion ***********************************************
1688 --*
1689 --*  Instructions to translate from a nucleic acid to a peptide
1690 --*    conflict means it's supposed to translate but doesn't
1691 --*
1692 
1693 
1694 Cdregion ::= SEQUENCE {
1695     orf BOOLEAN OPTIONAL ,             -- just an ORF ?
1696     frame ENUMERATED {
1697         not-set (0) ,                  -- not set, code uses one
1698         one (1) ,
1699         two (2) ,
1700         three (3) } DEFAULT not-set ,      -- reading frame
1701     conflict BOOLEAN OPTIONAL ,        -- conflict
1702     gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
1703     mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
1704     code Genetic-code OPTIONAL ,       -- genetic code used
1705     code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
1706     stops INTEGER OPTIONAL }           -- number of stop codons on above
1707 
1708                     -- each code is 64 cells long, in the order where
1709                     -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
1710                     -- NOTE: this order does NOT correspond to a Seq-data
1711                     -- encoding.  It is "natural" to codon usage instead.
1712                     -- the value in each cell is the AA coded for
1713                     -- start= AA coded only if first in peptide
1714                     --   in start array, if codon is not a legitimate start
1715                     --   codon, that cell will have the "gap" symbol for
1716                     --   that alphabet.  Otherwise it will have the AA
1717                     --   encoded when that codon is used at the start.
1718 
1719 Genetic-code ::= SET OF CHOICE {
1720     name VisibleString ,               -- name of a code
1721     id INTEGER ,                       -- id in dbase
1722     ncbieaa VisibleString ,            -- indexed to IUPAC extended
1723     ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
1724     ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
1725     sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
1726     sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
1727     sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa
1728 
1729 Code-break ::= SEQUENCE {              -- specific codon exceptions
1730     loc Seq-loc ,                      -- location of exception
1731     aa CHOICE {                        -- the amino acid
1732         ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
1733         ncbi8aa INTEGER ,              -- NCBI8aa code
1734         ncbistdaa INTEGER } }           -- NCBIstdaa code
1735 
1736 Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes
1737 
1738 --*** Import ***********************************************
1739 --*
1740 --*  Features imported from other databases
1741 --*
1742 
1743 Imp-feat ::= SEQUENCE {
1744     key VisibleString ,
1745     loc VisibleString OPTIONAL ,         -- original location string
1746     descr VisibleString OPTIONAL }       -- text description
1747 
1748 Gb-qual ::= SEQUENCE {
1749     qual VisibleString ,
1750     val VisibleString }
1751 
1752 
1753 --*** Clone-ref ***********************************************
1754 --*
1755 --*  Specification of clone features
1756 --*
1757 
1758 Clone-ref ::= SEQUENCE {
1759     name VisibleString,        -- Official clone symbol
1760     library VisibleString OPTIONAL,     -- Library name
1761 
1762     concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1763     unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
1764     placement-method INTEGER {
1765         end-seq (0),           -- Clone placed by end sequence
1766         insert-alignment (1),  -- Clone placed by insert alignment
1767         sts (2),               -- Clone placed by STS
1768         fish (3),
1769         fingerprint (4),
1770         other (255)
1771     } OPTIONAL,
1772     clone-seq Clone-seq-set OPTIONAL
1773 }
1774 
1775 Clone-seq-set ::= SET OF Clone-seq
1776 
1777 
1778 Clone-seq ::= SEQUENCE {
1779     type INTEGER {
1780         insert (0),
1781         end (1),
1782         other (255)
1783     },
1784     confidence INTEGER {
1785         multiple (0),     -- Multiple hits
1786         na (1),           -- Unspecified
1787         nohit-rep (2),    -- No hits, repetitive
1788         nohitnorep (3),   -- No hits, not repetitive
1789         other-chrm (4),   -- Hit on different chromosome
1790         unique (5),
1791         virtual (6),      -- Virtual (hasn't been sequenced)
1792         other (255)
1793     } OPTIONAL,
1794     location Seq-loc,     -- location on sequence
1795     seq Seq-loc OPTIONAL, -- clone sequence location
1796     align-id Dbtag OPTIONAL
1797 }
1798 
1799 
1800 END 
1801 
1802 --**********************************************************************
1803 --
1804 --  NCBI Restriction Sites
1805 --  by James Ostell, 1990
1806 --  version 0.8
1807 --
1808 --**********************************************************************
1809 
1810 NCBI-Rsite DEFINITIONS ::=
1811 BEGIN
1812 
1813 EXPORTS Rsite-ref;
1814 
1815 IMPORTS Dbtag FROM NCBI-General;
1816 
1817 Rsite-ref ::= CHOICE {
1818     str VisibleString ,     -- may be unparsable
1819     db  Dbtag }             -- pointer to a restriction site database
1820 
1821 END
1822 
1823 --**********************************************************************
1824 --
1825 --  NCBI RNAs
1826 --  by James Ostell, 1990
1827 --  version 0.8
1828 --
1829 --**********************************************************************
1830 
1831 NCBI-RNA DEFINITIONS ::=
1832 BEGIN
1833 
1834 EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;
1835 
1836 IMPORTS Seq-loc FROM NCBI-Seqloc;
1837 
1838 --*** rnas ***********************************************
1839 --*
1840 --*  various rnas
1841 --*
1842                          -- minimal RNA sequence
1843 RNA-ref ::= SEQUENCE {
1844     type ENUMERATED {            -- type of RNA feature
1845         unknown (0) ,
1846         premsg (1) ,
1847         mRNA (2) ,
1848         tRNA (3) ,
1849         rRNA (4) ,
1850         snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
1851         scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
1852         snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
1853         ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
1854         tmRNA (9) ,
1855         miscRNA (10) ,
1856         other (255) } ,
1857     pseudo BOOLEAN OPTIONAL ,
1858     ext CHOICE {
1859         name VisibleString ,        -- for naming "other" type
1860         tRNA Trna-ext ,             -- for tRNAs
1861         gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
1862     }
1863 
1864 Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
1865     aa CHOICE {                         -- aa this carries
1866         iupacaa INTEGER ,
1867         ncbieaa INTEGER ,
1868         ncbi8aa INTEGER ,
1869         ncbistdaa INTEGER } OPTIONAL ,
1870     codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
1871     anticodon Seq-loc OPTIONAL }        -- location of anticodon
1872 
1873 RNA-gen ::= SEQUENCE {
1874     class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
1875                                         -- examples: antisense_RNA, guide_RNA, snRNA
1876     product VisibleString OPTIONAL ,
1877     quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
1878 }
1879 
1880 RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
1881     qual VisibleString ,                -- in a tag (qual), value (val) format
1882     val VisibleString }
1883 
1884 RNA-qual-set ::= SEQUENCE OF RNA-qual
1885 
1886 END
1887 
1888 --**********************************************************************
1889 --
1890 --  NCBI Genes
1891 --  by James Ostell, 1990
1892 --  version 0.8
1893 --
1894 --**********************************************************************
1895 
1896 NCBI-Gene DEFINITIONS ::=
1897 BEGIN
1898 
1899 EXPORTS Gene-ref, Gene-nomenclature;
1900 
1901 IMPORTS Dbtag FROM NCBI-General;
1902 
1903 --*** Gene ***********************************************
1904 --*
1905 --*  reference to a gene
1906 --*
1907 
1908 Gene-ref ::= SEQUENCE {
1909     locus VisibleString OPTIONAL ,        -- Official gene symbol
1910     allele VisibleString OPTIONAL ,       -- Official allele designation
1911     desc VisibleString OPTIONAL ,         -- descriptive name
1912     maploc VisibleString OPTIONAL ,       -- descriptive map location
1913     pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
1914     db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
1915     syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
1916     locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
1917     formal-name Gene-nomenclature OPTIONAL
1918 }
1919 
1920 Gene-nomenclature ::= SEQUENCE {
1921     status ENUMERATED {
1922         unknown (0) ,
1923         official (1) ,
1924         interim (2) 
1925     } ,
1926     symbol VisibleString OPTIONAL ,
1927     name VisibleString OPTIONAL ,
1928     source Dbtag OPTIONAL
1929 }
1930 
1931 END
1932 
1933 
1934 --**********************************************************************
1935 --
1936 --  NCBI Organism
1937 --  by James Ostell, 1994
1938 --  version 3.0
1939 --
1940 --**********************************************************************
1941 
1942 NCBI-Organism DEFINITIONS ::=
1943 BEGIN
1944 
1945 EXPORTS Org-ref;
1946 
1947 IMPORTS Dbtag FROM NCBI-General;
1948 
1949 --*** Org-ref ***********************************************
1950 --*
1951 --*  Reference to an organism
1952 --*     defines only the organism.. lower levels of detail for biological
1953 --*     molecules are provided by the Source object
1954 --*
1955 
1956 Org-ref ::= SEQUENCE {
1957     taxname VisibleString OPTIONAL ,   -- preferred formal name
1958     common VisibleString OPTIONAL ,    -- common name
1959     mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
1960     db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
1961     syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
1962     orgname OrgName OPTIONAL }
1963     
1964 
1965 OrgName ::= SEQUENCE {
1966     name CHOICE {
1967         binomial BinomialOrgName ,         -- genus/species type name
1968         virus VisibleString ,              -- virus names are different
1969         hybrid MultiOrgName ,              -- hybrid between organisms
1970         namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
1971         partial PartialOrgName } OPTIONAL , -- when genus not known
1972     attrib VisibleString OPTIONAL ,        -- attribution of name
1973     mod SEQUENCE OF OrgMod OPTIONAL ,
1974     lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
1975     gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
1976     mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
1977     div VisibleString OPTIONAL }           -- GenBank division code
1978     
1979 
1980 OrgMod ::= SEQUENCE {
1981     subtype INTEGER {
1982         strain (2) ,
1983         substrain (3) ,
1984         type (4) ,
1985         subtype (5) ,
1986         variety (6) ,
1987         serotype (7) ,
1988         serogroup (8) ,
1989         serovar (9) ,
1990         cultivar (10) ,
1991         pathovar (11) ,
1992         chemovar (12) ,
1993         biovar (13) ,
1994         biotype (14) ,
1995         group (15) ,
1996         subgroup (16) ,
1997         isolate (17) ,
1998         common (18) ,
1999         acronym (19) ,
2000         dosage (20) ,          -- chromosome dosage of hybrid
2001         nat-host (21) ,        -- natural host of this specimen
2002         sub-species (22) ,
2003         specimen-voucher (23) ,
2004         authority (24) ,
2005         forma (25) ,
2006         forma-specialis (26) ,
2007         ecotype (27) ,
2008         synonym (28) ,
2009         anamorph (29) ,
2010         teleomorph (30) ,
2011         breed (31) ,
2012         gb-acronym (32) ,       -- used by taxonomy database
2013         gb-anamorph (33) ,      -- used by taxonomy database
2014         gb-synonym (34) ,       -- used by taxonomy database
2015         culture-collection (35) ,
2016         bio-material (36) ,
2017         metagenome-source (37) ,
2018         old-lineage (253) ,
2019         old-name (254) ,
2020         other (255) } ,         -- ASN5: old-name (254) will be added to next spec
2021     subname VisibleString ,
2022     attrib VisibleString OPTIONAL }  -- attribution/source of name
2023 
2024 BinomialOrgName ::= SEQUENCE {
2025     genus VisibleString ,               -- required
2026     species VisibleString OPTIONAL ,    -- species required if subspecies used
2027     subspecies VisibleString OPTIONAL }
2028 
2029 MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division
2030 
2031 PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus
2032 
2033 TaxElement ::= SEQUENCE {
2034     fixed-level INTEGER {
2035        other (0) ,                     -- level must be set in string
2036        family (1) ,
2037        order (2) ,
2038        class (3) } ,
2039     level VisibleString OPTIONAL ,
2040     name VisibleString }
2041 
2042 END
2043 
2044 
2045 --**********************************************************************
2046 --
2047 --  NCBI BioSource
2048 --  by James Ostell, 1994
2049 --  version 3.0
2050 --
2051 --**********************************************************************
2052 
2053 NCBI-BioSource DEFINITIONS ::=
2054 BEGIN
2055 
2056 EXPORTS BioSource;
2057 
2058 IMPORTS Org-ref FROM NCBI-Organism;
2059 
2060 --********************************************************************
2061 --
2062 -- BioSource gives the source of the biological material
2063 --   for sequences
2064 --
2065 --********************************************************************
2066 
2067 BioSource ::= SEQUENCE {
2068     genome INTEGER {         -- biological context
2069         unknown (0) ,
2070         genomic (1) ,
2071         chloroplast (2) ,
2072         chromoplast (3) ,
2073         kinetoplast (4) ,
2074         mitochondrion (5) ,
2075         plastid (6) ,
2076         macronuclear (7) ,
2077         extrachrom (8) ,
2078         plasmid (9) ,
2079         transposon (10) ,
2080         insertion-seq (11) ,
2081         cyanelle (12) ,
2082         proviral (13) ,
2083         virion (14) ,
2084         nucleomorph (15) ,
2085         apicoplast (16) ,
2086         leucoplast (17) ,
2087         proplastid (18) ,
2088         endogenous-virus (19) ,
2089         hydrogenosome (20) ,
2090         chromosome (21) ,
2091         chromatophore (22)
2092       } DEFAULT unknown ,
2093     origin INTEGER {
2094       unknown (0) ,
2095       natural (1) ,                    -- normal biological entity
2096       natmut (2) ,                     -- naturally occurring mutant
2097       mut (3) ,                        -- artificially mutagenized
2098       artificial (4) ,                 -- artificially engineered
2099       synthetic (5) ,                  -- purely synthetic
2100       other (255)
2101     } DEFAULT unknown , 
2102     org Org-ref ,
2103     subtype SEQUENCE OF SubSource OPTIONAL ,
2104     is-focus NULL OPTIONAL ,           -- to distinguish biological focus
2105     pcr-primers PCRReactionSet OPTIONAL }
2106 
2107 PCRReactionSet ::= SET OF PCRReaction
2108 
2109 PCRReaction ::= SEQUENCE {
2110     forward PCRPrimerSet OPTIONAL ,
2111     reverse PCRPrimerSet OPTIONAL }
2112 
2113 PCRPrimerSet ::= SET OF PCRPrimer
2114 
2115 PCRPrimer ::= SEQUENCE {
2116     seq PCRPrimerSeq OPTIONAL ,
2117     name PCRPrimerName OPTIONAL }
2118 
2119 PCRPrimerSeq ::= VisibleString
2120 
2121 PCRPrimerName ::= VisibleString
2122 
2123 SubSource ::= SEQUENCE {
2124     subtype INTEGER {
2125         chromosome (1) ,
2126         map (2) ,
2127         clone (3) ,
2128         subclone (4) ,
2129         haplotype (5) ,
2130         genotype (6) ,
2131         sex (7) ,
2132         cell-line (8) ,
2133         cell-type (9) ,
2134         tissue-type (10) ,
2135         clone-lib (11) ,
2136         dev-stage (12) ,
2137         frequency (13) ,
2138         germline (14) ,
2139         rearranged (15) ,
2140         lab-host (16) ,
2141         pop-variant (17) ,
2142         tissue-lib (18) ,
2143         plasmid-name (19) ,
2144         transposon-name (20) ,
2145         insertion-seq-name (21) ,
2146         plastid-name (22) ,
2147         country (23) ,
2148         segment (24) ,
2149         endogenous-virus-name (25) ,
2150         transgenic (26) ,
2151         environmental-sample (27) ,
2152         isolation-source (28) ,
2153         lat-lon (29) ,          -- +/- decimal degrees
2154         collection-date (30) ,  -- DD-MMM-YYYY format
2155         collected-by (31) ,     -- name of person who collected the sample
2156         identified-by (32) ,    -- name of person who identified the sample
2157         fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
2158         rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
2159         fwd-primer-name (35) ,
2160         rev-primer-name (36) ,
2161         metagenomic (37) ,
2162         mating-type (38) ,
2163         linkage-group (39) ,
2164         haplogroup (40) ,
2165         other (255) } ,
2166     name VisibleString ,
2167     attrib VisibleString OPTIONAL }    -- attribution/source of this name
2168         
2169 END
2170 
2171 --**********************************************************************
2172 --
2173 --  NCBI Protein
2174 --  by James Ostell, 1990
2175 --  version 0.8
2176 --
2177 --**********************************************************************
2178 
2179 NCBI-Protein DEFINITIONS ::=
2180 BEGIN
2181 
2182 EXPORTS Prot-ref;
2183 
2184 IMPORTS Dbtag FROM NCBI-General;
2185 
2186 --*** Prot-ref ***********************************************
2187 --*
2188 --*  Reference to a protein name
2189 --*
2190 
2191 Prot-ref ::= SEQUENCE {
2192     name SET OF VisibleString OPTIONAL ,      -- protein name
2193     desc VisibleString OPTIONAL ,      -- description (instead of name)
2194     ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
2195     activity SET OF VisibleString OPTIONAL ,  -- activities
2196     db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
2197     processed ENUMERATED {             -- processing status
2198        not-set (0) ,
2199        preprotein (1) ,
2200        mature (2) ,
2201        signal-peptide (3) ,
2202        transit-peptide (4) } DEFAULT not-set }
2203 
2204 
2205 
2206 END 
2207 --********************************************************************
2208 --
2209 --  Transcription Initiation Site Feature Data Block
2210 --  James Ostell, 1991
2211 --  Philip Bucher, David Ghosh
2212 --  version 1.1
2213 --
2214 --  
2215 --
2216 --********************************************************************
2217 
2218 NCBI-TxInit DEFINITIONS ::=
2219 BEGIN
2220 
2221 EXPORTS Txinit;
2222 
2223 IMPORTS Gene-ref FROM NCBI-Gene
2224         Prot-ref FROM NCBI-Protein
2225         Org-ref FROM NCBI-Organism;
2226 
2227 Txinit ::= SEQUENCE {
2228     name VisibleString ,    -- descriptive name of initiation site
2229     syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
2230     gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
2231     protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
2232     rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
2233     expression VisibleString OPTIONAL ,  -- tissue/time of expression
2234     txsystem ENUMERATED {       -- transcription apparatus used at this site
2235         unknown (0) ,
2236         pol1 (1) ,      -- eukaryotic Pol I
2237         pol2 (2) ,      -- eukaryotic Pol II
2238         pol3 (3) ,      -- eukaryotic Pol III
2239         bacterial (4) ,
2240         viral (5) ,
2241         rna (6) ,       -- RNA replicase
2242         organelle (7) ,
2243         other (255) } ,
2244     txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
2245     txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
2246     mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
2247     location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
2248     inittype ENUMERATED {
2249         unknown (0) ,
2250         single (1) ,
2251         multiple (2) ,
2252         region (3) } OPTIONAL ,
2253     evidence SET OF Tx-evidence OPTIONAL }
2254 
2255 Tx-evidence ::= SEQUENCE {
2256     exp-code ENUMERATED {
2257         unknown (0) ,    
2258         rna-seq (1) ,   -- direct RNA sequencing
2259         rna-size (2) ,  -- RNA length measurement
2260         np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
2261         np-size (4) ,   -- nuclease protected fragment length measurement
2262         pe-seq (5) ,    -- dideoxy RNA sequencing 
2263         cDNA-seq (6) ,  -- full-length cDNA sequencing
2264         pe-map (7) ,    -- primer extension mapping with homologous sequence ladder    
2265         pe-size (8) ,   -- primer extension product length measurement
2266         pseudo-seq (9) , -- full-length processed pseudogene sequencing
2267         rev-pe-map (10) ,   -- see NOTE (1) below
2268         other (255) } ,
2269     expression-system ENUMERATED {
2270         unknown (0) ,
2271         physiological (1) ,
2272         in-vitro (2) ,
2273         oocyte (3) ,
2274         transfection (4) ,
2275         transgenic (5) ,
2276         other (255) } DEFAULT physiological ,
2277     low-prec-data BOOLEAN DEFAULT FALSE ,
2278     from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
2279                                              --  close homolog
2280 
2281     -- NOTE (1) length measurement of a reverse direction primer-extension
2282     --          product (blocked  by  RNA  5'end)  by  comparison with
2283     --          homologous sequence ladder (J. Mol. Biol. 199, 587)
2284 
2285     
2286 END
2287 
2288 --$Revision: 1.5 $
2289 --  ----------------------------------------------------------------------------
2290 --
2291 --                            PUBLIC DOMAIN NOTICE
2292 --                National Center for Biotechnology Information
2293 --
2294 --  This software/database is a "United States Government Work" under the terms
2295 --  of the United States Copyright Act.  It was written as part of the author's
2296 --  official duties as a United States Government employee and thus cannot be
2297 --  copyrighted.  This software/database is freely available to the public for
2298 --  use.  The National Library of Medicine and the U.S. Government have not
2299 --  placed any restriction on its use or reproduction.
2300 --
2301 --  Although all reasonable efforts have been taken to ensure the accuracy and
2302 --  reliability of the software and data, the NLM and the U.S. Government do not
2303 --  and cannot warrant the performance or results that may be obtained by using
2304 --  this software or data.  The NLM and the U.S. Government disclaim all
2305 --  warranties, express or implied, including warranties of performance,
2306 --  merchantability or fitness for any particular purpose.
2307 --
2308 --  Please cite the authors in any work or product based on this material.
2309 --
2310 --  ----------------------------------------------------------------------------
2311 --
2312 --  Authors: Mike DiCuccio, Eugene Vasilchenko
2313 --
2314 --  ASN.1 interface to table readers
2315 --
2316 --  ----------------------------------------------------------------------------
2317 
2318 NCBI-SeqTable DEFINITIONS ::=
2319 
2320 BEGIN
2321 
2322 EXPORTS
2323     SeqTable-column-info, SeqTable-column, Seq-table;
2324     
2325 IMPORTS
2326     Seq-id, Seq-loc, Seq-interval   FROM NCBI-Seqloc;
2327 
2328 
2329 SeqTable-column-info ::= SEQUENCE {
2330     -- user friendly column name, can be skipped
2331     title VisibleString OPTIONAL,
2332 
2333     -- identification of the column data in the objects described by the table
2334     field-id INTEGER { -- known column data types
2335         -- position types
2336         location        (0), -- location as Seq-loc
2337         location-id     (1), -- location Seq-id
2338         location-gi     (2), -- gi
2339         location-from   (3), -- interval from
2340         location-to     (4), -- interval to
2341         location-strand (5), -- location strand
2342         location-fuzz-from-lim (6),
2343         location-fuzz-to-lim   (7),
2344 
2345         product         (10), -- product as Seq-loc
2346         product-id      (11), -- product Seq-id
2347         product-gi      (12), -- product gi
2348         product-from    (13), -- product interval from
2349         product-to      (14), -- product interval to
2350         product-strand  (15), -- product strand
2351         product-fuzz-from-lim (16),
2352         product-fuzz-to-lim   (17),
2353         
2354         -- main feature fields
2355         id-local        (20), -- id.local.id
2356         xref-id-local   (21), -- xref.id.local.id
2357         partial         (22),
2358         comment         (23),
2359         title           (24),
2360         ext             (25), -- field-name must be "E.xxx", see below
2361         qual            (26), -- field-name must be "Q.xxx", see below
2362         dbxref          (27), -- field-name must be "D.xxx", see below
2363 
2364         -- various data fields
2365         data-imp-key        (30),
2366         data-region         (31),
2367         data-cdregion-frame (32),
2368 
2369         -- extra fields, see also special values for str below
2370         ext-type        (40),
2371         qual-qual       (41),
2372         qual-val        (42),
2373         dbxref-db       (43),
2374         dbxref-tag      (44)
2375     } OPTIONAL,
2376 
2377     -- any column can be identified by ASN.1 text locator string
2378     -- with omitted object type.
2379     -- examples:
2380     --   "data.gene.locus" for Seq-feat.data.gene.locus
2381     --   "data.imp.key" for Seq-feat.data.imp.key
2382     --   "qual.qual"
2383     --    - Seq-feat.qual is SEQUENCE so several columns are allowed
2384     --      see also "Q.xxx" special value for shorter qual representation
2385     --   "ext.type.str"
2386     --   "ext.data.label.str"
2387     --   "ext.data.data.int"
2388     --      see also "E.xxx" special value for shorter ext representation
2389     -- special values start with capital letter:
2390     --   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
2391     --    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
2392     --   "Q.xxx" - qual.qual = xxx, qual.val = data
2393     --    - Seq-feat.qual is SEQUENCE so several columns are allowed
2394     --   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
2395     --    - Seq-feat.dbxref is SET so several columns are allowed
2396     field-name  VisibleString OPTIONAL
2397 }
2398 
2399 
2400 CommonString-table ::= SEQUENCE {
2401     -- set of possible values
2402     strings     SEQUENCE OF VisibleString,
2403 
2404     -- indexes of values
2405     indexes     SEQUENCE OF INTEGER
2406 }
2407 
2408 
2409 CommonBytes-table ::= SEQUENCE {
2410     -- set of possible values
2411     bytes       SEQUENCE OF OCTET STRING,
2412 
2413     -- indexes of values
2414     indexes     SEQUENCE OF INTEGER
2415 }
2416 
2417 
2418 SeqTable-multi-data ::= CHOICE {
2419     -- a set of integers, one per row
2420     int         SEQUENCE OF INTEGER,
2421     
2422     -- a set of reals, one per row
2423     real        SEQUENCE OF REAL,
2424 
2425     -- a set of strings, one per row
2426     string      SEQUENCE OF VisibleString,
2427 
2428     -- a set of byte arrays, one per row
2429     bytes       SEQUENCE OF OCTET STRING,
2430 
2431     -- a set of string with small set of possible values
2432     common-string   CommonString-table,
2433 
2434     -- a set of byte arrays with small set of possible values
2435     common-bytes    CommonBytes-table,
2436 
2437     -- a set of bits, one per row
2438     -- this uses bm::bvector<> as its storage mechanism
2439     bit         OCTET STRING,
2440 
2441     -- a set of locations, one per row
2442     loc         SEQUENCE OF Seq-loc,
2443     id          SEQUENCE OF Seq-id,
2444     interval    SEQUENCE OF Seq-interval
2445 }
2446 
2447 
2448 SeqTable-single-data ::= CHOICE {
2449     -- integer
2450     int         INTEGER,
2451     
2452     -- real
2453     real        REAL,
2454 
2455     -- string
2456     string      VisibleString,
2457 
2458     -- byte array
2459     bytes       OCTET STRING,
2460 
2461     -- bit
2462     bit         BOOLEAN,
2463 
2464     -- location
2465     loc         Seq-loc,
2466     id          Seq-id,
2467     interval    Seq-interval
2468 }
2469 
2470 
2471 SeqTable-sparse-index ::= CHOICE {
2472     -- indexed of rows with values
2473     indexes SEQUENCE OF INTEGER,
2474 
2475     -- bitset of rows with values
2476     bit-set OCTET STRING
2477 }
2478 
2479 
2480 SeqTable-column ::= SEQUENCE {
2481     -- column description or reference to previously defined info
2482     header      SeqTable-column-info,   -- information about data
2483 
2484     -- row data
2485     data        SeqTable-multi-data OPTIONAL,
2486 
2487     -- in case not all rows contain data this field will contain sparse info
2488     sparse      SeqTable-sparse-index OPTIONAL,
2489 
2490     -- default value for sparse table, or if row data is too short
2491     default     SeqTable-single-data OPTIONAL,
2492 
2493     -- single value for indexes not listed in sparse table
2494     sparse-other SeqTable-single-data OPTIONAL
2495 }
2496 
2497 
2498 Seq-table ::= SEQUENCE {
2499     -- type of features in this table, equal to Seq-feat.data variant index
2500     feat-type   INTEGER,
2501 
2502     -- subtype of features in this table, defined in header SeqFeatData.hpp
2503     feat-subtype INTEGER OPTIONAL,
2504 
2505     -- number of rows
2506     num-rows    INTEGER,
2507 
2508     -- data in columns
2509     columns     SEQUENCE OF SeqTable-column
2510 }
2511 
2512 
2513 END
2514 --$Revision: 6.4 $
2515 --**********************************************************************
2516 --
2517 --  NCBI Sequence Alignment elements
2518 --  by James Ostell, 1990
2519 --
2520 --**********************************************************************
2521 
2522 NCBI-Seqalign DEFINITIONS ::=
2523 BEGIN
2524 
2525 EXPORTS Seq-align, Score, Score-set, Seq-align-set;
2526 
2527 IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
2528         User-object, Object-id FROM NCBI-General;
2529 
2530 --*** Sequence Alignment ********************************
2531 --*
2532 
2533 Seq-align-set ::= SET OF Seq-align
2534 
2535 Seq-align ::= SEQUENCE {
2536     type ENUMERATED {
2537         not-set (0) ,
2538         global (1) ,
2539         diags (2) ,     -- unbroken, but not ordered, diagonals
2540         partial (3) ,   -- mapping pieces together
2541         disc (4) ,      -- discontinuous alignment
2542         other (255) } ,
2543     dim INTEGER OPTIONAL ,     -- dimensionality
2544     score SET OF Score OPTIONAL ,   -- for whole alignment
2545     segs CHOICE {                   -- alignment data
2546         dendiag SEQUENCE OF Dense-diag ,
2547         denseg              Dense-seg ,
2548         std     SEQUENCE OF Std-seg ,
2549         packed              Packed-seg ,
2550         disc                Seq-align-set,
2551         spliced             Spliced-seg,
2552         sparse              Sparse-seg
2553     } ,
2554     
2555     -- regions of sequence over which align
2556     --  was computed
2557     bounds SET OF Seq-loc OPTIONAL,
2558 
2559     -- alignment id
2560     id SEQUENCE OF Object-id OPTIONAL,
2561 
2562     --extra info
2563     ext SEQUENCE OF User-object OPTIONAL
2564 }
2565 
2566 Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
2567     dim INTEGER DEFAULT 2 ,    -- dimensionality
2568     ids SEQUENCE OF Seq-id ,   -- sequences in order
2569     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
2570     len INTEGER ,                 -- len of aligned segments
2571     strands SEQUENCE OF Na-strand OPTIONAL ,
2572     scores SET OF Score OPTIONAL }
2573 
2574     -- Dense-seg: the densist packing for sequence alignments only.
2575     --            a start of -1 indicates a gap for that sequence of
2576     --            length lens.
2577     --
2578     -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
2579     -- id=200  AAGGCCTTTTAG.......GATGATGATGA
2580     -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
2581     --
2582     -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
2583     -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
2584     -- lens = { 4, 8, 7, 3, 4, 4 }
2585     --
2586 
2587 Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
2588     dim INTEGER DEFAULT 2 ,       -- dimensionality
2589     numseg INTEGER ,              -- number of segments here
2590     ids SEQUENCE OF Seq-id ,      -- sequences in order
2591     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
2592     lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
2593     strands SEQUENCE OF Na-strand OPTIONAL ,
2594     scores SEQUENCE OF Score OPTIONAL }  -- score for each seg
2595 
2596 Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
2597     dim INTEGER DEFAULT 2 ,       -- dimensionality
2598     numseg INTEGER ,              -- number of segments here
2599     ids SEQUENCE OF Seq-id ,      -- sequences in order
2600     starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
2601     present OCTET STRING ,        -- Boolean if each sequence present or absent in
2602                                   --   each segment
2603     lens SEQUENCE OF INTEGER ,    -- length of each segment
2604     strands SEQUENCE OF Na-strand OPTIONAL ,
2605     scores SEQUENCE OF Score OPTIONAL }  -- score for each segment
2606 
2607 Std-seg ::= SEQUENCE {
2608     dim INTEGER DEFAULT 2 ,       -- dimensionality
2609     ids SEQUENCE OF Seq-id OPTIONAL ,
2610     loc SEQUENCE OF Seq-loc ,
2611     scores SET OF Score OPTIONAL }
2612 
2613 
2614 Spliced-seg ::= SEQUENCE {
2615     -- product is either protein or transcript (cDNA)
2616     product-id Seq-id OPTIONAL,
2617     genomic-id Seq-id OPTIONAL,
2618 
2619     -- should be 'plus' or 'minus'
2620     product-strand Na-strand OPTIONAL ,
2621     genomic-strand Na-strand OPTIONAL ,
2622     
2623     product-type ENUMERATED {
2624         transcript(0),
2625         protein(1)
2626     },
2627 
2628     -- set of segments involved
2629     -- each segment corresponds to one exon
2630     -- exons are always in biological order
2631     exons SEQUENCE OF Spliced-exon ,
2632 
2633     -- optional poly(A) tail
2634     poly-a INTEGER OPTIONAL,
2635 
2636     -- length of the product, in bases/residues
2637     -- from this, a 3' unaligned length can be extracted; this also captures
2638     -- the case in which a protein aligns leaving a partial codon alignment
2639     -- at the 3' end
2640     product-length INTEGER OPTIONAL,
2641 
2642     -- alignment descriptors / modifiers
2643     -- this provides us a set for extension
2644     modifiers SET OF Spliced-seg-modifier OPTIONAL
2645 }
2646 
2647 Spliced-seg-modifier ::= CHOICE {
2648     -- protein aligns from the start and the first codon 
2649     -- on both product and genomic is start codon
2650     start-codon-found BOOLEAN,
2651     
2652     -- protein aligns to it's end and there is stop codon 
2653     -- on the genomic right after the alignment
2654     stop-codon-found BOOLEAN
2655 }
2656 
2657 
2658 -- complete or partial exon
2659 -- two consecutive Spliced-exons may belong to one exon
2660 Spliced-exon ::= SEQUENCE {
2661     -- product-end >= product-start
2662     product-start Product-pos ,
2663     product-end Product-pos ,
2664 
2665     -- genomic-end >= genomic-start
2666     genomic-start INTEGER ,
2667     genomic-end INTEGER ,
2668 
2669     -- product is either protein or transcript (cDNA)
2670     product-id Seq-id OPTIONAL ,
2671     genomic-id Seq-id OPTIONAL ,
2672 
2673     -- should be 'plus' or 'minus'
2674     product-strand Na-strand OPTIONAL ,
2675     
2676     -- genomic-strand represents the strand of translation
2677     genomic-strand Na-strand OPTIONAL ,
2678 
2679     -- basic seqments always are in biologic order
2680     parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,
2681 
2682     -- scores for this exon
2683     scores Score-set OPTIONAL ,
2684 
2685     -- splice sites
2686     acceptor-before-exon Splice-site OPTIONAL,
2687     donor-after-exon Splice-site OPTIONAL,
2688     
2689     -- flag: is this exon complete or partial?
2690     partial BOOLEAN OPTIONAL,
2691 
2692     --extra info
2693     ext SEQUENCE OF User-object OPTIONAL
2694 }
2695 
2696 
2697 Product-pos ::= CHOICE {
2698     nucpos INTEGER,
2699     protpos Prot-pos
2700 }
2701 
2702 
2703 -- codon based position on protein (1/3 of aminoacid)
2704 Prot-pos ::= SEQUENCE {
2705     -- standard protein position
2706     amin INTEGER ,
2707 
2708     -- 0, 1, 2, or 3 as for Cdregion
2709     -- 0 = not set
2710     -- 1, 2, 3 = actual frame
2711     frame INTEGER DEFAULT 0
2712 }
2713 
2714 
2715 -- Spliced-exon-chunk: piece of an exon
2716 -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
2717 -- protein)
2718 Spliced-exon-chunk ::= CHOICE {
2719     -- both sequences represented, product and genomic sequences match
2720     match INTEGER ,
2721 
2722     -- both sequences represented, product and genomic sequences do not match
2723     mismatch INTEGER ,
2724 
2725     -- both sequences are represented, there is sufficient similarity 
2726     -- between product and genomic sequences. Can be used to replace stretches
2727     -- of matches and mismatches, mostly for protein to genomic where 
2728     -- definition of match or mismatch depends on translation table
2729     diag INTEGER ,
2730 
2731      -- insertion in product sequence (i.e. gap in the genomic sequence)
2732     product-ins INTEGER ,
2733 
2734      -- insertion in genomic sequence (i.e. gap in the product sequence)
2735     genomic-ins INTEGER
2736 }
2737 
2738 
2739 -- site involved in splice
2740 Splice-site ::= SEQUENCE {
2741     -- typically two bases in the intronic region, always
2742     -- in IUPAC format
2743     bases VisibleString
2744 }
2745 
2746 
2747 -- ==========================================================================
2748 --
2749 -- Sparse-seg follows the semantics of dense-seg and is more optimal for
2750 -- representing sparse multiple alignments
2751 --
2752 -- ==========================================================================
2753 
2754 
2755 Sparse-seg ::= SEQUENCE {
2756     master-id Seq-id OPTIONAL,
2757 
2758     -- pairwise alignments constituting this multiple alignment
2759     rows SET OF Sparse-align,
2760 
2761     -- per-row scores
2762     row-scores SET OF Score OPTIONAL,
2763 
2764     -- index of extra items
2765     ext  SET OF Sparse-seg-ext OPTIONAL
2766 }
2767 
2768 Sparse-align ::= SEQUENCE {
2769     first-id Seq-id,
2770     second-id Seq-id,
2771 
2772     numseg INTEGER,                      --number of segments
2773     first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
2774     second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
2775     lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
2776     second-strands SEQUENCE OF Na-strand OPTIONAL ,
2777 
2778     -- per-segment scores
2779     seg-scores SET OF Score OPTIONAL
2780 }
2781 
2782 Sparse-seg-ext ::= SEQUENCE {
2783     --seg-ext SET OF {
2784     --    index INTEGER,
2785     --    data User-field
2786     -- }
2787     index INTEGER
2788 }
2789 
2790 
2791 
2792 -- use of Score is discouraged for external ASN.1 specifications
2793 Score ::= SEQUENCE {
2794     id Object-id OPTIONAL ,
2795     value CHOICE {
2796         real REAL ,
2797         int INTEGER
2798     }
2799 }
2800 
2801 -- use of Score-set is encouraged for external ASN.1 specifications
2802 Score-set ::= SET OF Score
2803 
2804 END 
2805 
2806 --$Revision: 6.0 $
2807 --**********************************************************************
2808 --
2809 --  NCBI Sequence Analysis Results (other than alignments)
2810 --  by James Ostell, 1990
2811 --
2812 --**********************************************************************
2813 
2814 NCBI-Seqres DEFINITIONS ::=
2815 BEGIN
2816 
2817 EXPORTS Seq-graph;
2818 
2819 IMPORTS Seq-loc FROM NCBI-Seqloc;
2820 
2821 --*** Sequence Graph ********************************
2822 --*
2823 --*   for values mapped by residue or range to sequence
2824 --*
2825 
2826 Seq-graph ::= SEQUENCE {
2827     title VisibleString OPTIONAL ,
2828     comment VisibleString OPTIONAL ,
2829     loc Seq-loc ,                       -- region this applies to
2830     title-x VisibleString OPTIONAL ,    -- title for x-axis
2831     title-y VisibleString OPTIONAL ,
2832     comp INTEGER OPTIONAL ,             -- compression (residues/value)
2833     a REAL OPTIONAL ,                   -- for scaling values
2834     b REAL OPTIONAL ,                   -- display = (a x value) + b
2835     numval INTEGER ,                    -- number of values in graph
2836     graph CHOICE {
2837         real Real-graph ,
2838         int Int-graph ,
2839         byte Byte-graph } }
2840 
2841 Real-graph ::= SEQUENCE {
2842     max REAL ,                          -- top of graph
2843     min REAL ,                          -- bottom of graph
2844     axis REAL ,                         -- value to draw axis on
2845     values SEQUENCE OF REAL }
2846 
2847 Int-graph ::= SEQUENCE {
2848     max INTEGER ,
2849     min INTEGER ,
2850     axis INTEGER ,
2851     values SEQUENCE OF INTEGER } 
2852 
2853 Byte-graph ::= SEQUENCE {              -- integer from 0-255
2854     max INTEGER ,
2855     min INTEGER ,
2856     axis INTEGER ,
2857     values OCTET STRING }
2858 
2859 END
2860 
2861 --$Revision: 6.1 $
2862 --********************************************************************
2863 --
2864 --  Direct Submission of Sequence Data
2865 --  James Ostell, 1991
2866 --
2867 --  This is a trial specification for direct submission of sequence
2868 --    data worked out between NCBI and EMBL
2869 --  Later revised to reflect work with GenBank and Integrated database
2870 --
2871 --  Version 3.0, 1994
2872 --    This is the official NCBI sequence submission format now.
2873 --
2874 --********************************************************************
2875 
2876 NCBI-Submit DEFINITIONS ::=
2877 BEGIN
2878 
2879 EXPORTS Seq-submit, Contact-info;
2880 
2881 IMPORTS Cit-sub, Author FROM NCBI-Biblio
2882         Date, Object-id FROM NCBI-General
2883         Seq-annot FROM NCBI-Sequence
2884         Seq-id FROM NCBI-Seqloc
2885         Seq-entry FROM NCBI-Seqset;
2886 
2887 Seq-submit ::= SEQUENCE {
2888     sub Submit-block ,
2889     data CHOICE {
2890         entrys  SET OF Seq-entry ,  -- sequence(s)
2891         annots  SET OF Seq-annot ,  -- annotation(s)
2892         delete  SET OF Seq-id } } -- deletions of entries
2893 
2894 Submit-block ::= SEQUENCE {
2895     contact Contact-info ,        -- who to contact
2896     cit Cit-sub ,                 -- citation for this submission
2897     hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
2898     reldate Date OPTIONAL ,       -- release by date
2899     subtype INTEGER {             -- type of submission
2900         new (1) ,                 -- new data
2901         update (2) ,              -- update by author
2902         revision (3) ,            -- 3rd party (non-author) update
2903         other (255) } OPTIONAL ,
2904     tool VisibleString OPTIONAL,  -- tool used to make submission
2905     user-tag VisibleString OPTIONAL, -- user supplied id for this submission
2906     comment VisibleString OPTIONAL } -- user comments/advice to database
2907 
2908 Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
2909     name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
2910     address SEQUENCE OF VisibleString OPTIONAL ,
2911     phone VisibleString OPTIONAL ,
2912     fax VisibleString OPTIONAL ,
2913     email VisibleString OPTIONAL ,
2914     telex VisibleString OPTIONAL ,
2915     owner-id Object-id OPTIONAL ,         -- for owner accounts
2916     password OCTET STRING OPTIONAL ,
2917     last-name VisibleString OPTIONAL ,  -- structured to replace name above
2918     first-name VisibleString OPTIONAL ,
2919     middle-initial VisibleString OPTIONAL ,
2920     contact Author OPTIONAL }           -- WARNING: this will replace the above
2921 
2922 END
2923 
2924 --$Revision: 1.15 $
2925 --**********************************************************************
2926 --
2927 --  Definitions for Cn3D-specific data (rendering settings,
2928 --    user annotations, etc.)
2929 --
2930 --  by Paul Thiessen
2931 --
2932 --  National Center for Biotechnology Information
2933 --  National Institutes of Health
2934 --  Bethesda, MD 20894 USA
2935 --
2936 -- asntool -m cn3d.asn -w 100 -o cn3d.h
2937 -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
2938 --   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
2939 --**********************************************************************
2940 
2941 NCBI-Cn3d DEFINITIONS ::=
2942 -- Cn3D-specific information
2943 
2944 BEGIN
2945 
2946 EXPORTS  Cn3d-style-dictionary, Cn3d-user-annotations;
2947 
2948 IMPORTS  Biostruc-id FROM MMDB
2949          Molecule-id, Residue-id FROM MMDB-Chemical-graph;
2950 
2951 
2952 -- values of enumerations must match those in cn3d/style_manager.hpp!
2953 
2954 Cn3d-backbone-type ::= ENUMERATED {     -- for different types of backbones
2955     off (1),
2956     trace (2),
2957     partial (3),
2958     complete (4)
2959 }
2960 
2961 Cn3d-drawing-style ::= ENUMERATED {     -- atom/bond/object rendering styles
2962     -- for atoms and bonds
2963     wire (1),
2964     tubes (2),
2965     ball-and-stick (3),
2966     space-fill (4),
2967     wire-worm (5),
2968     tube-worm (6),
2969     -- for 3d-objects
2970     with-arrows (7),
2971     without-arrows (8)
2972 }
2973 
2974 Cn3d-color-scheme ::= ENUMERATED {  -- available color schemes (not all
2975                                     -- necessarily applicable to all objects)
2976     element (1),
2977     object (2),
2978     molecule (3),
2979     domain (4),
2980     residue (20),
2981     secondary-structure (5),
2982     user-select (6),
2983     -- different alignment conservation coloring (currently only for proteins)
2984     aligned (7),
2985     identity (8),
2986     variety (9),
2987     weighted-variety (10),
2988     information-content (11),
2989     fit (12),
2990     block-fit (17),
2991     block-z-fit (18),
2992     block-row-fit (19),
2993     -- other schemes
2994     temperature (13),
2995     hydrophobicity (14),
2996     charge (15),
2997     rainbow (16)
2998 }
2999 
3000 -- RGB triplet, interpreted (after division by the scale-factor) as floating
3001 -- point values which should range from [0..1]. The default scale-factor is
3002 -- 255, so that one can conveniently set integer byte values [0..255] for
3003 -- colors with the scale-factor already set appropriately to map to [0..1].
3004 --    An alpha value is allowed, but is currently ignored by Cn3D.
3005 Cn3d-color ::= SEQUENCE {
3006     scale-factor INTEGER DEFAULT 255,
3007     red INTEGER,
3008     green INTEGER,
3009     blue INTEGER,
3010     alpha INTEGER DEFAULT 255
3011 }
3012 
3013 Cn3d-backbone-style ::= SEQUENCE {  -- style blob for backbones only
3014     type Cn3d-backbone-type,
3015     style Cn3d-drawing-style,
3016     color-scheme Cn3d-color-scheme,
3017     user-color Cn3d-color
3018 }
3019 
3020 Cn3d-general-style ::= SEQUENCE {   -- style blob for other objects
3021     is-on BOOLEAN,
3022     style Cn3d-drawing-style,
3023     color-scheme Cn3d-color-scheme,
3024     user-color Cn3d-color
3025 }
3026 
3027 Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
3028     spacing INTEGER,        -- zero means none
3029     type ENUMERATED {
3030         one-letter (1),
3031         three-letter (2)
3032     },
3033     number ENUMERATED {
3034         none (0),
3035         sequential (1),     -- from 1, by residues present, to match sequence
3036         pdb (2)             -- use number assigned by PDB
3037     },
3038     termini BOOLEAN,
3039     white BOOLEAN           -- all white, or (if false) color of alpha carbon
3040 }
3041 
3042 -- rendering settings for Cn3D (mirrors StyleSettings class)
3043 Cn3d-style-settings ::= SEQUENCE {
3044     name VisibleString OPTIONAL,                -- a name (for favorites)
3045     protein-backbone Cn3d-backbone-style,       -- backbone styles
3046     nucleotide-backbone Cn3d-backbone-style,
3047     protein-sidechains Cn3d-general-style,      -- styles for other stuff
3048     nucleotide-sidechains Cn3d-general-style,
3049     heterogens Cn3d-general-style,
3050     solvents Cn3d-general-style,
3051     connections Cn3d-general-style,
3052     helix-objects Cn3d-general-style,
3053     strand-objects Cn3d-general-style,
3054     virtual-disulfides-on BOOLEAN,              -- virtual disulfides
3055     virtual-disulfide-color Cn3d-color,
3056     hydrogens-on BOOLEAN,                       -- hydrogens
3057     background-color Cn3d-color,                -- background
3058     -- floating point parameters - scale-factor applies to all the following:
3059     scale-factor INTEGER,
3060     space-fill-proportion INTEGER,
3061     ball-radius INTEGER,
3062     stick-radius INTEGER,
3063     tube-radius INTEGER,
3064     tube-worm-radius INTEGER,
3065     helix-radius INTEGER,
3066     strand-width INTEGER,
3067     strand-thickness INTEGER,
3068     -- backbone labels (no labels if not present)
3069     protein-labels Cn3d-backbone-label-style OPTIONAL,
3070     nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
3071     -- ion labels
3072     ion-labels BOOLEAN OPTIONAL
3073 }
3074 
3075 Cn3d-style-settings-set ::= SET OF Cn3d-style-settings
3076 
3077 Cn3d-style-table-id ::= INTEGER
3078 
3079 Cn3d-style-table-item ::= SEQUENCE {
3080     id Cn3d-style-table-id,
3081     style Cn3d-style-settings
3082 }
3083 
3084 -- the global settings, and a lookup table of styles for user annotations.
3085 Cn3d-style-dictionary ::= SEQUENCE {
3086     global-style Cn3d-style-settings,
3087     style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
3088 }
3089 
3090 -- a range of residues in a chain, identified by MMDB residue-id
3091 -- (e.g., numbered from 1)
3092 Cn3d-residue-range ::= SEQUENCE {
3093     from Residue-id,
3094     to Residue-id
3095 }
3096 
3097 -- set of locations on a particular chain
3098 Cn3d-molecule-location ::= SEQUENCE {
3099     molecule-id Molecule-id,    -- MMDB molecule id
3100     -- which residues; whole molecule implied if absent
3101     residues SEQUENCE OF Cn3d-residue-range OPTIONAL
3102 }
3103 
3104 -- set of locations on a particular structure object (e.g., a PDB/MMDB
3105 -- structure), which may include multiple ranges of residues each on
3106 -- multiple chains.
3107 Cn3d-object-location ::= SEQUENCE {
3108     structure-id Biostruc-id,
3109     residues SEQUENCE OF Cn3d-molecule-location
3110 }
3111 
3112 -- information for an individual user annotation
3113 Cn3d-user-annotation ::= SEQUENCE {
3114     name VisibleString,                 -- a (short) name for this annotation
3115     description VisibleString OPTIONAL, -- an optional longer description
3116     style-id Cn3d-style-table-id,       -- how to draw this annotation
3117     residues SEQUENCE OF Cn3d-object-location,  -- which residues to cover
3118     is-on BOOLEAN   -- whether this annotation is to be turned on in Cn3D
3119 }
3120 
3121 -- a GL-ordered transformation matrix
3122 Cn3d-GL-matrix ::= SEQUENCE {
3123     m0  REAL, m1  REAL, m2  REAL, m3  REAL,
3124     m4  REAL, m5  REAL, m6  REAL, m7  REAL,
3125     m8  REAL, m9  REAL, m10 REAL, m11 REAL,
3126     m12 REAL, m13 REAL, m14 REAL, m15 REAL
3127 }
3128 
3129 -- a floating point 3d vector
3130 Cn3d-vector ::= SEQUENCE {
3131     x REAL,
3132     y REAL,
3133     z REAL
3134 }
3135 
3136 -- parameters used to set up the camera in Cn3D
3137 Cn3d-view-settings ::= SEQUENCE {
3138     camera-distance REAL,       -- camera on +Z axis this distance from origin
3139     camera-angle-rad REAL,      -- camera angle
3140     camera-look-at-X REAL,      -- X,Y of point in Z=0 plane camera points at
3141     camera-look-at-Y REAL,
3142     camera-clip-near REAL,      -- distance of clipping planes from camera
3143     camera-clip-far REAL,
3144     matrix Cn3d-GL-matrix,      -- transformation of objects in the scene
3145     rotation-center Cn3d-vector -- center of rotation of whole scene
3146 }
3147 
3148 -- The list of annotations for a given CDD/mime. If residue regions overlap
3149 -- between annotations that are turned on, the last annotation in this list
3150 -- that contains these residues will be used as the display style for these
3151 -- residues.
3152 --   Also contains the current viewpoint, so that user's camera angle
3153 -- can be stored and reproduced, for illustrations, on-line figures, etc.
3154 Cn3d-user-annotations ::= SEQUENCE {
3155     annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
3156     view Cn3d-view-settings OPTIONAL
3157 }
3158 
3159 END
3160 
3161 --$Revision: 6.3 $
3162 --****************************************************************
3163 --
3164 --  NCBI Project Definition Module
3165 --  by Jim Ostell and Jonathan Kans, 1998
3166 --
3167 --****************************************************************
3168 
3169 NCBI-Project DEFINITIONS ::=
3170 BEGIN
3171 
3172 EXPORTS Project, Project-item;
3173 
3174 IMPORTS Date FROM NCBI-General
3175         PubMedId FROM NCBI-Biblio
3176         Seq-id, Seq-loc FROM NCBI-Seqloc
3177         Seq-annot, Pubdesc FROM NCBI-Sequence
3178         Seq-entry FROM NCBI-Seqset
3179         Pubmed-entry FROM NCBI-PubMed;
3180 
3181 Project ::= SEQUENCE {
3182     descr Project-descr OPTIONAL ,
3183     data Project-item }
3184 
3185 Project-item ::= CHOICE {
3186     pmuid SET OF INTEGER ,
3187     protuid SET OF INTEGER ,
3188     nucuid SET OF INTEGER ,
3189     sequid SET OF INTEGER ,
3190     genomeuid SET OF INTEGER ,
3191     structuid SET OF INTEGER ,
3192     pmid SET OF PubMedId ,
3193     protid SET OF Seq-id ,
3194     nucid SET OF Seq-id ,
3195     seqid SET OF Seq-id ,
3196     genomeid SET OF Seq-id ,
3197     structid NULL ,
3198     pment SET OF Pubmed-entry ,
3199     protent SET OF Seq-entry ,
3200     nucent SET OF Seq-entry ,
3201     seqent SET OF Seq-entry ,
3202     genomeent SET OF Seq-entry ,
3203     structent NULL ,
3204     seqannot SET OF Seq-annot ,
3205     loc SET OF Seq-loc ,
3206     proj SET OF Project
3207 }
3208 
3209 Project-descr ::= SEQUENCE {
3210     id SET OF Project-id ,
3211     name VisibleString OPTIONAL ,
3212     descr SET OF Projdesc OPTIONAL }
3213 
3214 Projdesc ::= CHOICE {
3215     pub Pubdesc ,
3216     date Date ,
3217     comment VisibleString ,
3218     title VisibleString
3219 }
3220 
3221 Project-id ::= VisibleString
3222 
3223 END
3224 
3225 
3226 --$Revision: 6.0 $
3227 --*********************************************************************
3228 --
3229 --  access.asn
3230 --
3231 --     messages for data access
3232 --
3233 --*********************************************************************
3234 
3235 NCBI-Access DEFINITIONS ::=
3236 BEGIN
3237 
3238 EXPORTS Link-set;
3239 
3240     -- links between same class = neighbors
3241     -- links between other classes = links
3242 
3243 Link-set ::= SEQUENCE {
3244     num INTEGER ,                         -- number of links to this doc type
3245     uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
3246     weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights
3247 
3248 
3249 END
3250 --$Revision: 6.0 $
3251 --**********************************************************************
3252 --
3253 --  NCBI Sequence Feature Definition Module
3254 --  by James Ostell, 1994
3255 --
3256 --**********************************************************************
3257 
3258 NCBI-FeatDef DEFINITIONS ::=
3259 BEGIN
3260 
3261 EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;
3262 
3263 
3264 FeatDef ::= SEQUENCE {
3265     typelabel VisibleString ,      -- short label for type eg "CDS"
3266     menulabel VisibleString ,      -- label for a menu eg "Coding Region"
3267     featdef-key INTEGER ,                  -- unique for this feature definition
3268     seqfeat-key INTEGER ,                  -- SeqFeat.data.choice from objfeat.h
3269     entrygroup INTEGER ,                   -- Group for data entry
3270     displaygroup INTEGER ,                 -- Group for data display
3271     molgroup FeatMolType           -- Type of Molecule used for
3272 }
3273 
3274 FeatMolType ::= ENUMERATED {
3275         aa (1),  -- proteins
3276     na (2),  -- nucleic acids
3277     both (3) }  -- both
3278 
3279 FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions
3280 
3281 FeatDispGroup ::= SEQUENCE {
3282         groupkey INTEGER ,
3283     groupname VisibleString }
3284 
3285 FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup
3286 
3287 FeatDefGroupSet ::= SEQUENCE {
3288         groups FeatDispGroupSet ,
3289         defs FeatDefSet }
3290 
3291 END
3292 
3293     
3294 --$Revision: 6.12 $
3295 --****************************************************************
3296 --
3297 --  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
3298 --  by Jonathan Epstein, February 1996
3299 --
3300 --****************************************************************
3301 
3302 NCBI-Mime DEFINITIONS ::=
3303 BEGIN
3304 
3305 EXPORTS Ncbi-mime-asn1;
3306 IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
3307     Cdd FROM NCBI-Cdd
3308         Seq-entry FROM NCBI-Seqset
3309         Seq-annot FROM NCBI-Sequence
3310     Medline-entry FROM NCBI-Medline
3311     Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;
3312 
3313 Ncbi-mime-asn1 ::= CHOICE {
3314         entrez  Entrez-general,                 -- just a structure
3315     alignstruc  Biostruc-align,     -- structures & sequences & alignments
3316         alignseq        Biostruc-align-seq,     -- sequence alignment
3317     strucseq    Biostruc-seq,       -- structure & sequences
3318     strucseqs   Biostruc-seqs,      -- structure & sequences & alignments
3319     general     Biostruc-seqs-aligns-cdd    -- all-purpose "grab bag"
3320         -- others may be added here in the future
3321 }
3322 
3323 -- generic bundle of sequence and alignment info
3324 Bundle-seqs-aligns ::= SEQUENCE {
3325     sequences SET OF Seq-entry OPTIONAL,        -- sequences
3326     seqaligns SET OF Seq-annot OPTIONAL,        -- sequence alignments
3327     strucaligns Biostruc-annot-set OPTIONAL,    -- structure alignments
3328     imports SET OF Seq-annot OPTIONAL,          -- imports (updates in Cn3D)
3329     style-dictionary Cn3d-style-dictionary OPTIONAL,    -- Cn3D stuff
3330     user-annotations Cn3d-user-annotations OPTIONAL
3331 }
3332 
3333 Biostruc-seqs-aligns-cdd ::= SEQUENCE {
3334     seq-align-data CHOICE {
3335         bundle Bundle-seqs-aligns,          -- either seqs + alignments
3336         cdd Cdd                             -- or CDD (which contains these)
3337     },
3338     structures SET OF Biostruc OPTIONAL,    -- structures
3339     structure-type ENUMERATED {             -- type of structures to load if
3340         ncbi-backbone(2),                   -- not present; meanings and
3341         ncbi-all-atom(3),                   -- values are same as MMDB's
3342         pdb-model(4)                        -- Model-type
3343     } OPTIONAL
3344 }
3345 
3346 Biostruc-align ::= SEQUENCE {
3347         master  Biostruc,
3348         slaves  SET OF Biostruc,
3349         alignments      Biostruc-annot-set,     -- structure alignments
3350         sequences SET OF Seq-entry,     -- sequences
3351         seqalign SET OF Seq-annot,
3352         style-dictionary Cn3d-style-dictionary OPTIONAL,
3353         user-annotations Cn3d-user-annotations OPTIONAL
3354 }
3355 
3356 Biostruc-align-seq ::= SEQUENCE {       -- display seq structure align only
3357         sequences SET OF Seq-entry,     -- sequences
3358         seqalign SET OF Seq-annot,
3359         style-dictionary Cn3d-style-dictionary OPTIONAL,
3360         user-annotations Cn3d-user-annotations OPTIONAL
3361 }
3362 
3363 Biostruc-seq ::= SEQUENCE {     -- display  structure seq added by yanli
3364         structure Biostruc,
3365         sequences SET OF Seq-entry,
3366         style-dictionary Cn3d-style-dictionary OPTIONAL,
3367         user-annotations Cn3d-user-annotations OPTIONAL
3368 }
3369 
3370 Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
3371         structure Biostruc,
3372         sequences SET OF Seq-entry,     -- sequences
3373         seqalign SET OF Seq-annot,
3374         style-dictionary Cn3d-style-dictionary OPTIONAL,
3375         user-annotations Cn3d-user-annotations OPTIONAL
3376 }
3377 
3378 Entrez-style ::= ENUMERATED {
3379         docsum (1),
3380         genbank (2) ,
3381         genpept (3) ,
3382         fasta (4) ,
3383         asn1 (5) ,
3384         graphic (6) ,
3385         alignment (7) ,
3386         globalview (8) ,
3387         report (9) ,
3388         medlars (10) ,
3389         embl (11) ,
3390         pdb (12) ,
3391         kinemage (13) }
3392 
3393 Entrez-general ::= SEQUENCE {
3394         title VisibleString OPTIONAL,
3395         data CHOICE {
3396                 ml      Medline-entry ,
3397                 prot    Seq-entry ,
3398                 nuc     Seq-entry ,
3399                 genome  Seq-entry ,
3400                 structure Biostruc ,
3401                 strucAnnot Biostruc-annot-set } ,
3402         style Entrez-style ,
3403         location VisibleString OPTIONAL }
3404 END
3405 --$Revision: 6.0 $
3406 --********************************************************************
3407 --
3408 --  Print Templates
3409 --  James Ostell, 1993
3410 --
3411 --
3412 --********************************************************************
3413 
3414 NCBI-ObjPrt DEFINITIONS ::=
3415 BEGIN
3416 
3417 EXPORTS PrintTemplate, PrintTemplateSet;
3418 
3419 PrintTemplate ::= SEQUENCE {
3420     name TemplateName ,  -- name for this template
3421     labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
3422     format PrintFormat }
3423 
3424 TemplateName ::= VisibleString
3425 
3426 PrintTemplateSet ::= SEQUENCE OF PrintTemplate
3427 
3428 PrintFormat ::= SEQUENCE {
3429     asn1 VisibleString ,    -- ASN.1 partial path for this
3430     label VisibleString OPTIONAL ,   -- printable label
3431     prefix VisibleString OPTIONAL,
3432     suffix VisibleString OPTIONAL,
3433     form PrintForm }
3434 
3435 PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
3436     block PrintFormBlock,
3437     boolean PrintFormBoolean,
3438     enum PrintFormEnum,
3439     text PrintFormText,
3440     use-template TemplateName,
3441     user UserFormat ,
3442     null NULL }               -- rarely used
3443 
3444 UserFormat ::= SEQUENCE {
3445     printfunc VisibleString ,
3446     defaultfunc VisibleString OPTIONAL }
3447 
3448 PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
3449     separator VisibleString OPTIONAL ,
3450     components SEQUENCE OF PrintFormat }
3451 
3452 PrintFormBoolean ::= SEQUENCE {
3453     true VisibleString OPTIONAL ,
3454     false VisibleString OPTIONAL }
3455 
3456 PrintFormEnum ::= SEQUENCE {
3457     values SEQUENCE OF VisibleString OPTIONAL }
3458 
3459 PrintFormText ::= SEQUENCE {
3460     textfunc VisibleString OPTIONAL }
3461     
3462 END
3463 
3464 --$Revision: 6.7 $
3465 --*********************************************************
3466 --
3467 -- ASN.1 and XML for the components of a GenBank format sequence
3468 -- J.Ostell 2002
3469 -- Updated 15 January 2009
3470 --
3471 --*********************************************************
3472 
3473 NCBI-GBSeq DEFINITIONS ::=
3474 BEGIN
3475 
3476 --********
3477 --  GBSeq represents the elements in a GenBank style report
3478 --    of a sequence with some small additions to structure and support
3479 --    for protein (GenPept) versions of GenBank format as seen in
3480 --    Entrez. While this represents the simplification, reduction of
3481 --    detail, and flattening to a single sequence perspective of GenBank
3482 --    format (compared with the full ASN.1 or XML from which GenBank and
3483 --    this format is derived at NCBI), it is presented in ASN.1 or XML for
3484 --    automated parsing and processing. It is hoped that this compromise
3485 --    will be useful for those bulk processing at the GenBank format level
3486 --    of detail today. Since it is a compromise, a number of pragmatic
3487 --    decisions have been made.
3488 --
3489 --  In pursuit of simplicity and familiarity a number of
3490 --    fields do not have full substructure defined here where there is
3491 --    already a standard GenBank format string. For example:
3492 --
3493 --    Date  DD-Mon-YYYY
3494 --    Authors   LastName, Intials (with periods)
3495 --   Journal   JounalName Volume (issue), page-range (year)
3496 --   FeatureLocations as per GenBank feature table, but FeatureIntervals
3497 --    may also be provided as a convenience
3498 --   FeatureQualifiers  as per GenBank feature table
3499 --   Primary has a string that represents a table to construct
3500 --    a third party (TPA) sequence.
3501 --   other-seqids can have strings with the "vertical bar format" sequence
3502 --    identifiers used in BLAST for example, when they are non-genbank types.
3503 --    Currently in GenBank format you only see GI, but there are others, like
3504 --    patents, submitter clone names, etc which will appear here, as they
3505 --    always have in the ASN.1 format, and full XML format.
3506 --   source-db is a formatted text block for peptides in GenPept format that
3507 --    carries information from the source protein database.
3508 --
3509 --  There are also a number of elements that could have been
3510 --   more exactly specified, but in the interest of simplicity
3511 --   have been simply left as options. For example..
3512 --
3513 --  accession and accession.version will always appear in a GenBank record
3514 --   they are optional because this format can also be used for non-GenBank
3515 --   sequences, and in that case will have only "other-seqids".
3516 --
3517 --  sequences will normally all have "sequence" filled in. But contig records
3518 --    will have a "join" statement in the "contig" slot, and no "sequence".
3519 --    We also may consider a retrieval option with no sequence of any kind
3520 --     and no feature table to quickly check minimal values.
3521 --
3522 --  a reference may have an author list, or be from a consortium, or both.
3523 --
3524 --  some fields, such as taxonomy, do appear as separate elements in GenBank
3525 --    format but without a specific linetype (in GenBank format this comes
3526 --    under ORGANISM). Another example is the separation of primary accession
3527 --    from the list of secondary accessions. In GenBank format primary
3528 --    accession is just the first one on the list that includes all secondaries
3529 --    after it.
3530 --
3531 --  create-date deserves special comment. The date you see on the right hand
3532 --    side of the LOCUS line in GenBank format is actually the last date the
3533 --    the record was modified (or the update-date). The date the record was
3534 --    first submitted to GenBank appears in the first submission citation in
3535 --    the reference section. Internally in the databases and ASN.1 NCBI keeps
3536 --    the first date the record was released into the sequence database at
3537 --    NCBI as create-date. For records from EMBL, which supports create-date,
3538 --    it is the date provided by EMBL. For DDBJ records, which do not supply
3539 --    a create-date (same as GenBank format) the create-date is the first date
3540 --    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
3541 --    took responsibility for GenBank, it is just the first date NCBI saw the
3542 --    record. Create-date can be very useful, so we expose it here, but users
3543 --    must understand it is only an approximation and comes from many sources,
3544 --    and with many exceptions and caveats. It does NOT tell you the first
3545 --    date the public might have seen this record and thus is NOT an accurate
3546 --    measure for legal issues of precedence.
3547 --
3548 --********
3549 
3550 GBSet ::= SEQUENCE OF GBSeq
3551         
3552 GBSeq ::= SEQUENCE {
3553     locus VisibleString ,
3554     length INTEGER ,
3555     strandedness VisibleString OPTIONAL ,
3556     moltype VisibleString ,
3557     topology VisibleString OPTIONAL ,
3558     division VisibleString ,
3559     update-date VisibleString ,
3560     create-date VisibleString OPTIONAL ,
3561     update-release VisibleString OPTIONAL ,
3562     create-release VisibleString OPTIONAL ,
3563     definition VisibleString ,
3564     primary-accession VisibleString OPTIONAL ,
3565     entry-version VisibleString OPTIONAL ,
3566     accession-version VisibleString OPTIONAL ,
3567     other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
3568     secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
3569     project VisibleString OPTIONAL ,
3570     keywords SEQUENCE OF GBKeyword OPTIONAL ,
3571     segment VisibleString OPTIONAL ,
3572     source VisibleString OPTIONAL ,
3573     organism VisibleString OPTIONAL ,
3574     taxonomy VisibleString OPTIONAL ,
3575     references SEQUENCE OF GBReference OPTIONAL ,
3576     comment VisibleString OPTIONAL ,
3577     tagset GBTagset OPTIONAL ,
3578     primary VisibleString OPTIONAL ,
3579     source-db VisibleString OPTIONAL ,
3580     database-reference VisibleString OPTIONAL ,
3581     feature-table SEQUENCE OF GBFeature OPTIONAL ,
3582     sequence VisibleString OPTIONAL ,  -- Optional for other dump forms
3583     contig VisibleString OPTIONAL
3584 }
3585 
3586 GBSecondary-accn ::= VisibleString
3587 
3588 GBSeqid ::= VisibleString
3589 
3590 GBKeyword ::= VisibleString
3591 
3592 GBAuthor ::= VisibleString
3593 
3594 GBReference ::= SEQUENCE {
3595     reference VisibleString ,
3596     position VisibleString OPTIONAL ,
3597     authors SEQUENCE OF GBAuthor OPTIONAL ,
3598     consortium VisibleString OPTIONAL ,
3599     title VisibleString OPTIONAL ,
3600     journal VisibleString ,
3601     xref SET OF GBXref OPTIONAL ,
3602     pubmed INTEGER OPTIONAL ,
3603     remark VisibleString OPTIONAL
3604 }
3605 
3606 GBXref ::= SEQUENCE {
3607     dbname VisibleString ,
3608     id VisibleString
3609 }
3610 
3611 GBTagset ::= SEQUENCE {
3612     authority VisibleString OPTIONAL ,
3613     version VisibleString OPTIONAL ,
3614     url VisibleString OPTIONAL ,
3615     tags GBTags OPTIONAL
3616 }
3617 
3618 GBTags ::= SEQUENCE OF GBTag
3619 
3620 GBTag ::= SEQUENCE {
3621     name VisibleString OPTIONAL ,
3622     value VisibleString OPTIONAL ,
3623     unit VisibleString OPTIONAL
3624 }
3625 
3626 GBFeature ::= SEQUENCE {
3627     key VisibleString ,
3628     location VisibleString ,
3629     intervals SEQUENCE OF GBInterval OPTIONAL ,
3630     operator VisibleString OPTIONAL ,
3631     partial5 BOOLEAN OPTIONAL ,
3632     partial3 BOOLEAN OPTIONAL ,
3633     quals SEQUENCE OF GBQualifier OPTIONAL
3634 }
3635 
3636 GBInterval ::= SEQUENCE {
3637     from INTEGER OPTIONAL ,
3638     to INTEGER OPTIONAL ,
3639     point INTEGER OPTIONAL ,
3640     iscomp BOOLEAN OPTIONAL ,
3641     interbp BOOLEAN OPTIONAL ,
3642     accession VisibleString
3643 }
3644 
3645 GBQualifier ::= SEQUENCE {
3646     name VisibleString ,
3647     value VisibleString OPTIONAL
3648 }
3649 
3650 GBTagsetRules ::= SEQUENCE {
3651     authority VisibleString OPTIONAL ,
3652     version VisibleString OPTIONAL ,
3653     mandatorytags GBTagNames OPTIONAL ,
3654     optionaltags GBTagNames OPTIONAL ,
3655     uniquetags GBTagNames OPTIONAL ,
3656     extensible BOOLEAN OPTIONAL
3657 }
3658 
3659 GBTagNames ::= SEQUENCE OF VisibleString
3660 
3661 GBTagsetRuleSet ::= SEQUENCE OF GBTagsetRules
3662 
3663 END
3664 
3665 --$Revision: 1.7 $
3666 --************************************************************************
3667 --
3668 -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
3669 -- The International Nucleotide Sequence Database (INSD) collaboration
3670 -- Version 1.5, 15 January 2009
3671 --
3672 --************************************************************************
3673 
3674 INSD-INSDSeq DEFINITIONS ::=
3675 BEGIN
3676 
3677 --  INSDSeq provides the elements of a sequence as presented in the
3678 --    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
3679 --    additional structure.
3680 --    Although this single perspective of the three flatfile formats
3681 --    provides a useful simplification, it hides to some extent the
3682 --    details of the actual data underlying those formats. Nevertheless,
3683 --    the XML version of INSD-Seq is being provided with
3684 --    the hopes that it will prove useful to those who bulk-process
3685 --    sequence data at the flatfile-format level of detail. Further 
3686 --    documentation regarding the content and conventions of those formats 
3687 --    can be found at:
3688 --
3689 --    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
3690 --    http://www.ddbj.nig.ac.jp/FT/full_index.html
3691 --    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
3692 --    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
3693 --
3694 --    URLs for DDBJ, EMBL, and GenBank Release Notes :
3695 --    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
3696 --    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
3697 --    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
3698 --
3699 --    Because INSDSeq is a compromise, a number of pragmatic decisions have
3700 --    been made:
3701 --
3702 --  In pursuit of simplicity and familiarity a number of fields do not
3703 --    have full substructure defined here where there is already a
3704 --    standard flatfile format string. For example:
3705 --
3706 --   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
3707 --
3708 --   Author:     LastName, Initials  (eg Smith, J.N.)
3709 --            or Lastname Initials   (eg Smith J.N.)
3710 --
3711 --   Journal:    JournalName Volume (issue), page-range (year)
3712 --            or JournalName Volume(issue):page-range(year)
3713 --            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
3714 --               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
3715 --
3716 --  FeatureLocations are representated as in the flatfile feature table,
3717 --    but FeatureIntervals may also be provided as a convenience
3718 --
3719 --  FeatureQualifiers are represented as in the flatfile feature table.
3720 --
3721 --  Primary has a string that represents a table to construct
3722 --    a third party (TPA) sequence.
3723 --
3724 --  other-seqids can have strings with the "vertical bar format" sequence
3725 --    identifiers used in BLAST for example, when they are non-INSD types.
3726 --
3727 --  Currently in flatfile format you only see Accession numbers, but there 
3728 --    are others, like patents, submitter clone names, etc which will 
3729 --    appear here
3730 --
3731 --  There are also a number of elements that could have been more exactly
3732 --    specified, but in the interest of simplicity have been simply left as
3733 --    optional. For example:
3734 --
3735 --  All publicly accessible sequence records in INSDSeq format will
3736 --    include accession and accession.version. However, these elements are 
3737 --    optional in optional in INSDSeq so that this format can also be used   
3738 --    for non-public sequence data, prior to the assignment of accessions and 
3739 --    version numbers. In such cases, records will have only "other-seqids".
3740 --
3741 --  sequences will normally all have "sequence" filled in. But contig records
3742 --    will have a "join" statement in the "contig" slot, and no "sequence".
3743 --    We also may consider a retrieval option with no sequence of any kind
3744 --    and no feature table to quickly check minimal values.
3745 --
3746 --  Four (optional) elements are specific to records represented via the EMBL
3747 --    sequence database: INSDSeq_update-release, INSDSeq_create-release,
3748 --    INSDSeq_entry-version, and INSDSeq_database-reference.
3749 --
3750 --  One (optional) element is specific to records originating at the GenBank
3751 --    and DDBJ sequence databases: INSDSeq_segment.
3752 --
3753 --********
3754 
3755 INSDSet ::= SEQUENCE OF INSDSeq
3756 
3757 INSDSeq ::= SEQUENCE {
3758     locus VisibleString ,
3759     length INTEGER ,
3760     strandedness VisibleString OPTIONAL ,
3761     moltype VisibleString ,
3762     topology VisibleString OPTIONAL ,
3763     division VisibleString ,
3764     update-date VisibleString ,
3765     create-date VisibleString OPTIONAL ,
3766     update-release VisibleString OPTIONAL ,
3767     create-release VisibleString OPTIONAL ,
3768     definition VisibleString ,
3769     primary-accession VisibleString OPTIONAL ,
3770     entry-version VisibleString OPTIONAL ,
3771     accession-version VisibleString OPTIONAL ,
3772     other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
3773     secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,
3774     project VisibleString OPTIONAL ,
3775     keywords SEQUENCE OF INSDKeyword OPTIONAL ,
3776     segment VisibleString OPTIONAL ,
3777     source VisibleString OPTIONAL ,
3778     organism VisibleString OPTIONAL ,
3779     taxonomy VisibleString OPTIONAL ,
3780     references SEQUENCE OF INSDReference OPTIONAL ,
3781     comment VisibleString OPTIONAL ,
3782     tagset INSDTagset OPTIONAL ,
3783     primary VisibleString OPTIONAL ,
3784     source-db VisibleString OPTIONAL ,
3785     database-reference VisibleString OPTIONAL ,
3786     feature-table SEQUENCE OF INSDFeature OPTIONAL ,
3787     sequence VisibleString OPTIONAL ,  -- Optional for other dump forms
3788     contig VisibleString OPTIONAL
3789 }
3790 
3791 INSDSeqid ::= VisibleString
3792 
3793 INSDSecondary-accn ::= VisibleString
3794 
3795 INSDKeyword ::= VisibleString
3796 
3797 -- INSDReference_position contains a string value indicating the
3798 -- basepair span(s) to which a reference applies. The allowable
3799 -- formats are:
3800 -- 
3801 --   X..Y  : Where X and Y are integers separated by two periods,
3802 --           X >= 1 , Y <= sequence length, and X <= Y 
3803 --
3804 --           Multiple basepair spans can exist, separated by a
3805 --           semi-colon and a space. For example : 10..20; 100..500
3806 --             
3807 --   sites : The string literal 'sites', indicating that a reference
3808 --           provides sequence annotation information, but the specific
3809 --           basepair spans are either not captured, or were too numerous
3810 --           to record.
3811 -- 
3812 --           The 'sites' literal string is singly occuring, and
3813 --            cannot be used in conjunction with any X..Y basepair spans.
3814 -- 
3815 --   References that lack an INSDReference_position element apply
3816 --   to the entire sequence.
3817 
3818 INSDAuthor ::= VisibleString
3819 
3820 INSDReference ::= SEQUENCE {
3821     reference VisibleString ,
3822     position VisibleString OPTIONAL ,
3823     authors SEQUENCE OF INSDAuthor OPTIONAL ,
3824     consortium VisibleString OPTIONAL ,
3825     title VisibleString OPTIONAL ,
3826     journal VisibleString ,
3827     xref SET OF INSDXref OPTIONAL ,
3828     pubmed INTEGER OPTIONAL ,
3829     remark VisibleString OPTIONAL
3830 }
3831 
3832 -- INSDXref provides a method for referring to records in
3833 -- other databases. INSDXref_dbname is a string value that
3834 -- provides the name of the database, and INSDXref_dbname
3835 -- is a string value that provides the record's identifier
3836 -- in that database.
3837 
3838 INSDXref ::= SEQUENCE {
3839     dbname VisibleString ,
3840     id VisibleString
3841 }
3842 
3843 -- INSDTagset is used for community-specific data elements
3844 -- in a tag/value format.
3845 
3846 INSDTagset ::= SEQUENCE {
3847     authority VisibleString OPTIONAL ,
3848     version VisibleString OPTIONAL ,
3849     url VisibleString OPTIONAL ,
3850     tags INSDTags OPTIONAL
3851 }
3852 
3853 INSDTags ::= SEQUENCE OF INSDTag
3854 
3855 INSDTag ::= SEQUENCE {
3856     name VisibleString OPTIONAL ,
3857     value VisibleString OPTIONAL ,
3858     unit VisibleString OPTIONAL
3859 }
3860 
3861 -- INSDFeature_operator contains a string value describing
3862 -- the relationship among a set of INSDInterval within
3863 -- INSDFeature_intervals. The allowable formats are:
3864 -- 
3865 --   join :  The string literal 'join' indicates that the
3866 --           INSDInterval intervals are biologically joined
3867 --           together into a contiguous molecule.
3868 -- 
3869 --   order : The string literal 'order' indicates that the
3870 --           INSDInterval intervals are in the presented
3871 --           order, but they are not necessarily contiguous.
3872 -- 
3873 --   Either 'join' or 'order' is required if INSDFeature_intervals
3874 --   is comprised of more than one INSDInterval .
3875 
3876 INSDFeature ::= SEQUENCE {
3877     key VisibleString ,
3878     location VisibleString ,
3879     intervals SEQUENCE OF INSDInterval OPTIONAL ,
3880     operator VisibleString OPTIONAL ,
3881     partial5 BOOLEAN OPTIONAL ,
3882     partial3 BOOLEAN OPTIONAL ,
3883     quals SEQUENCE OF INSDQualifier OPTIONAL
3884 }
3885 
3886 -- INSDInterval_iscomp is a boolean indicating whether
3887 -- an INSDInterval_from / INSDInterval_to location
3888 -- represents a location on the complement strand.
3889 -- When INSDInterval_iscomp is TRUE, it essentially
3890 -- confirms that a 'from' value which is greater than
3891 -- a 'to' value is intentional, because the location
3892 -- is on the opposite strand of the presented sequence.
3893 
3894 -- INSDInterval_interbp is a boolean indicating whether
3895 -- a feature (such as a restriction site) is located
3896 -- between two adjacent basepairs. When INSDInterval_iscomp
3897 -- is TRUE, the 'from' and 'to' values must differ by
3898 -- exactly one base.
3899 
3900 INSDInterval ::= SEQUENCE {
3901     from INTEGER OPTIONAL ,
3902     to INTEGER OPTIONAL ,
3903     point INTEGER OPTIONAL ,
3904     iscomp BOOLEAN OPTIONAL ,
3905     interbp BOOLEAN OPTIONAL ,
3906     accession VisibleString
3907 }
3908 
3909 INSDQualifier ::= SEQUENCE {
3910     name VisibleString ,
3911     value VisibleString OPTIONAL
3912 }
3913 
3914 -- INSDTagsetRules defines mandatory, optional, and unique tags
3915 -- for a given community's INSDTagset. If the tagset is extensible,
3916 -- then additional tags which are not included in the list of
3917 -- mandatory or optional tags may be present. The uniquetags
3918 -- element provides a list of the tags that may occur only once
3919 -- in a given tagset.
3920 
3921 INSDTagsetRules ::= SEQUENCE {
3922     authority VisibleString OPTIONAL ,
3923     version VisibleString OPTIONAL ,
3924     mandatorytags INSDTagNames OPTIONAL ,
3925     optionaltags INSDTagNames OPTIONAL ,
3926     uniquetags INSDTagNames OPTIONAL ,
3927     extensible BOOLEAN OPTIONAL
3928 }
3929 
3930 INSDTagNames ::= SEQUENCE OF VisibleString
3931 
3932 INSDTagsetRuleSet ::= SEQUENCE OF INSDTagsetRules
3933 
3934 END
3935 
3936 --$Revision: 6.1 $
3937 --**********************************************************************
3938 --
3939 --  ASN.1 for a tiny Bioseq in XML
3940 --    basically a structured FASTA file with a few extras
3941 --    in this case we drop all modularity of components
3942 --      All ids are Optional - simpler structure, less checking
3943 --      Components of organism are hard coded - can't easily add or change
3944 --      sequence is just string whether DNA or protein
3945 --  by James Ostell, 2000
3946 --
3947 --**********************************************************************
3948 
3949 NCBI-TSeq DEFINITIONS ::=
3950 BEGIN
3951 
3952 TSeq ::= SEQUENCE {
3953         seqtype ENUMERATED {
3954                 nucleotide (1),
3955                 protein (2) },
3956         gi INTEGER OPTIONAL,
3957         accver VisibleString OPTIONAL,
3958         sid VisibleString OPTIONAL,
3959         local VisibleString OPTIONAL,
3960         taxid INTEGER OPTIONAL,
3961         orgname VisibleString OPTIONAL,
3962         defline VisibleString,
3963         length INTEGER,
3964         sequence VisibleString }
3965 
3966 TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them
3967 
3968 END
3969 
3970 --$Id: scoremat.asn,v 1.12 2008/04/15 15:55:45 kazimird Exp $
3971 -- ===========================================================================
3972 --
3973 --                            PUBLIC DOMAIN NOTICE
3974 --               National Center for Biotechnology Information
3975 --
3976 --  This software/database is a "United States Government Work" under the
3977 --  terms of the United States Copyright Act.  It was written as part of
3978 --  the author's official duties as a United States Government employee and
3979 --  thus cannot be copyrighted.  This software/database is freely available
3980 --  to the public for use. The National Library of Medicine and the U.S.
3981 --  Government have not placed any restriction on its use or reproduction.
3982 --
3983 --  Although all reasonable efforts have been taken to ensure the accuracy
3984 --  and reliability of the software and data, the NLM and the U.S.
3985 --  Government do not and cannot warrant the performance or results that
3986 --  may be obtained by using this software or data. The NLM and the U.S.
3987 --  Government disclaim all warranties, express or implied, including
3988 --  warranties of performance, merchantability or fitness for any particular
3989 --  purpose.
3990 --
3991 --  Please cite the author in any work or product based on this material.
3992 --
3993 -- ===========================================================================
3994 --
3995 -- Author:  Christiam Camacho
3996 --
3997 -- File Description:
3998 --      ASN.1 definitions for scoring matrix
3999 --
4000 -- ===========================================================================
4001 
4002 NCBI-ScoreMat DEFINITIONS ::= BEGIN
4003 
4004 EXPORTS    Pssm, PssmIntermediateData, PssmFinalData, 
4005            PssmParameters, PssmWithParameters;
4006     
4007 IMPORTS    Object-id   FROM NCBI-General
4008            Seq-entry   FROM NCBI-Seqset;
4009 
4010 -- a rudimentary block/core-model, to be used with block-based alignment 
4011 -- routines and threading
4012 
4013 BlockProperty ::= SEQUENCE {
4014   type     INTEGER { unassigned  (0),
4015                      threshold   (1),       -- score threshold for heuristics
4016                      minscore    (2),       -- observed minimum score in CD
4017                      maxscore    (3),       -- observed maximum score in CD
4018                      meanscore   (4),       -- observed mean score in CD
4019                      variance    (5),       -- observed score variance
4020                      name       (10),       -- just name the block
4021                      is-optional(20),       -- block may not have to be used    
4022                      other     (255) },
4023   intvalue  INTEGER OPTIONAL,
4024   textvalue VisibleString OPTIONAL
4025 }
4026 
4027 CoreBlock ::= SEQUENCE {
4028   start          INTEGER,                   -- begin of block on query
4029   stop           INTEGER,                   -- end of block on query
4030   minstart       INTEGER OPTIONAL,          -- optional N-terminal extension
4031   maxstop        INTEGER OPTIONAL,          -- optional C-terminal extension
4032   property       SEQUENCE OF BlockProperty OPTIONAL
4033 }
4034 
4035 LoopConstraint ::= SEQUENCE {
4036   minlength      INTEGER DEFAULT 0,         -- minimum length of unaligned region
4037   maxlength      INTEGER DEFAULT 100000     -- maximum length of unaligned region
4038 }
4039 
4040 CoreDef ::= SEQUENCE {
4041   nblocks        INTEGER,                   -- number of core elements/blocks
4042   blocks         SEQUENCE OF CoreBlock,     -- nblocks locations
4043   loops          SEQUENCE OF LoopConstraint -- (nblocks+1) constraints 
4044 }
4045 
4046 -- ===========================================================================
4047 -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
4048 -- ===========================================
4049 --
4050 -- Two possible inputs to PSI-BLAST and formatrpsdb:
4051 -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix 
4052 --    of frequency ratios)
4053 -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of 
4054 --    scores and statistical parameters) - such as written by cddumper
4055 --
4056 -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
4057 -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
4058 -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
4059 -- statistical parameters are used to perform the search in PSI-BLAST and the
4060 -- same data and the data in PssmWithParams::params::rpsdbparams is used to
4061 -- build the PSSM and ultimately the RPS-BLAST database
4062 -- 
4063 -- 
4064 --                 reads    ++++++++++++++ writes
4065 -- PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
4066 --                          ++++++++++++++             |  ^
4067 --         ^                                           |  |
4068 --         |                                           |  |
4069 --         +===========================================+  |
4070 --                                                     |  |
4071 --         +===========================================+  |
4072 --         |                                              |
4073 -- reads   |                                              | 
4074 --         v                                              |
4075 --  +++++++++++++++ writes +++++++++++++++++++++++        |
4076 --  | formatrpsdb | =====> | RPS-BLAST databases |        |
4077 --  +++++++++++++++        +++++++++++++++++++++++        |
4078 --                                   ^                    |
4079 --                                   |                    |
4080 --                                   | reads              |
4081 --                             +++++++++++++              |
4082 --                             | RPS-BLAST |              |
4083 --                             +++++++++++++              |
4084 --                                                        |
4085 --       reads  ++++++++++++               writes         |
4086 --  Cdd ======> | cddumper | =============================+
4087 --              ++++++++++++
4088 --
4089 -- ===========================================================================
4090 
4091 -- Contains the PSSM's scores and its associated statistical parameters. 
4092 -- Dimensions and order in which scores are stored must be the same as that 
4093 -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
4094 PssmFinalData ::= SEQUENCE {
4095 
4096     -- PSSM's scores
4097     scores              SEQUENCE OF INTEGER, 
4098 
4099     -- Karlin & Altschul parameter produced during the PSSM's calculation
4100     lambda              REAL,
4101 
4102     -- Karlin & Altschul parameter produced during the PSSM's calculation
4103         kappa               REAL,
4104 
4105     -- Karlin & Altschul parameter produced during the PSSM's calculation
4106     h                   REAL,
4107 
4108     -- scaling factor used to obtain more precision when building the PSSM.
4109     -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
4110     -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
4111     -- given a PSSM which contains a scaled-up PSSM (indicated by having a
4112     -- scalingFactor greater than 1), then it will scale down the PSSM to
4113     -- perform the initial stages of the search with it.
4114     -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided 
4115     -- scaled-up PSSMs, it will ensure that all PSSMs used to build the 
4116     -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST 
4117     -- will silently produce incorrect results).
4118     scalingFactor       INTEGER DEFAULT 1,
4119 
4120     -- Karlin & Altschul parameter produced during the PSSM's calculation
4121     lambdaUngapped      REAL OPTIONAL,
4122 
4123     -- Karlin & Altschul parameter produced during the PSSM's calculation
4124         kappaUngapped       REAL OPTIONAL,
4125 
4126     -- Karlin & Altschul parameter produced during the PSSM's calculation
4127     hUngapped           REAL OPTIONAL
4128 }
4129 
4130 -- Contains the PSSM's intermediate data used to create the PSSM's scores 
4131 -- and statistical parameters. Dimensions and order in which scores are 
4132 -- stored must be the same as that specified in Pssm::numRows, 
4133 -- Pssm::numColumns, and Pssm::byrow
4134 PssmIntermediateData ::= SEQUENCE {
4135 
4136     -- observed residue frequencies (or counts) per position of the PSSM 
4137     -- (prior to application of pseudocounts)
4138     resFreqsPerPos              SEQUENCE OF INTEGER OPTIONAL, 
4139 
4140     -- Weighted observed residue frequencies per position of the PSSM.
4141     -- (N.B.: each position's weights should add up to 1.0).
4142     -- This field corresponds to f_i (f sub i) in equation 2 of 
4143     -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4144     -- NOTE: this is needed for diagnostics information only (i.e.:
4145     -- -out_ascii_pssm option in psiblast)
4146     weightedResFreqsPerPos      SEQUENCE OF REAL OPTIONAL,
4147 
4148     -- PSSM's frequency ratios
4149     freqRatios                  SEQUENCE OF REAL,
4150 
4151     -- Information content per position of the PSSM
4152     -- NOTE: this is needed for diagnostics information only (i.e.:
4153     -- -out_ascii_pssm option in psiblast)
4154     informationContent          SEQUENCE OF REAL OPTIONAL,
4155 
4156     -- Weights for columns of the PSSM without gaps
4157     -- NOTE: this is needed for diagnostics information only (i.e.:
4158     -- -out_ascii_pssm option in psiblast)
4159     gaplessColumnWeights        SEQUENCE OF REAL OPTIONAL,
4160 
4161     -- Used in sequence weights computation
4162     -- NOTE: this is needed for diagnostics information only (i.e.:
4163     -- -out_ascii_pssm option in psiblast)
4164     sigma                       SEQUENCE OF REAL OPTIONAL,
4165 
4166     -- Length of the aligned regions per position of the query sequence
4167     -- NOTE: this is needed for diagnostics information only (i.e.:
4168     -- -out_ascii_pssm option in psiblast)
4169     intervalSizes               SEQUENCE OF INTEGER OPTIONAL,
4170 
4171     -- Number of matching sequences per position of the PSSM (including the
4172     -- query)
4173     -- NOTE: this is needed for diagnostics information only (i.e.:
4174     -- -out_ascii_pssm option in psiblast)
4175     numMatchingSeqs             SEQUENCE OF INTEGER OPTIONAL
4176 }
4177 
4178 -- Position-specific scoring matrix
4179 --
4180 -- Column indices on the PSSM refer to the positions corresponding to the
4181 -- query/master sequence, i.e. the number of columns (N) is the same
4182 -- as the length of the query/master sequence. 
4183 -- Row indices refer to individual amino acid types, i.e. the number of 
4184 -- rows (M) is the same as the number of different residues in the 
4185 -- alphabet we use. Consequently, row labels are amino acid identifiers.
4186 --
4187 -- PSSMs are stored as linear arrays of integers. By default, we store
4188 -- them column-by-column, M values for the first column followed by M
4189 -- values for the second column, and so on. In order to provide
4190 -- flexibility for external applications, the boolean field "byrow" is 
4191 -- provided to specify the storage order.
4192 Pssm ::= SEQUENCE {
4193 
4194     -- Is the this a protein or nucleotide scoring matrix?
4195     isProtein       BOOLEAN DEFAULT TRUE,       
4196 
4197     -- PSSM identifier
4198     identifier      Object-id OPTIONAL, 
4199 
4200     -- The dimensions of the matrix are returned so the client can
4201     -- verify that all data was received.
4202 
4203     numRows         INTEGER,    -- number of rows
4204     numColumns      INTEGER,    -- number of columns
4205 
4206     -- row-labels is given to note the order of residue types so that it can
4207     -- be cross-checked between applications.
4208     -- If this field is not given, the matrix values are presented in 
4209     -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
4210     -- for proteins the values returned correspond to 
4211     -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
4212     rowLabels       SEQUENCE OF VisibleString OPTIONAL,
4213 
4214     -- are matrices stored row by row?
4215     byRow           BOOLEAN DEFAULT FALSE, 
4216 
4217     -- PSSM representative sequence (master) 
4218     query           Seq-entry OPTIONAL,           
4219 
4220     -- both intermediateData and finalData can be provided, but at least one of
4221     -- them must be provided.
4222     -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData 
4223     -- representation. 
4224 
4225     -- Intermediate or final data for the PSSM
4226     intermediateData    PssmIntermediateData OPTIONAL,
4227 
4228     -- Final representation for the PSSM
4229     finalData           PssmFinalData OPTIONAL
4230 }
4231 
4232 -- This structure is used to create the RPS-BLAST database auxiliary file 
4233 -- (*.aux) and it contains parameters set at creation time of the PSSM.
4234 -- Also, the matrixName field is used by formatrpsdb to build a PSSM from 
4235 -- a Pssm structure which only contains PssmIntermediateData.
4236 FormatRpsDbParameters ::= SEQUENCE {
4237 
4238     -- name of the underlying score matrix whose frequency ratios were
4239     -- used in PSSM construction (e.g.: BLOSUM62)
4240     matrixName   VisibleString,
4241 
4242     -- gap opening penalty corresponding to the matrix above
4243     gapOpen      INTEGER OPTIONAL,             
4244 
4245     -- gap extension penalty corresponding to the matrix above
4246     gapExtend    INTEGER OPTIONAL
4247 
4248 }
4249 
4250 -- Populated by PSSM engine of PSI-BLAST, original source for these values 
4251 -- are the PSI-BLAST options specified using the BLAST options API
4252 PssmParameters ::= SEQUENCE {
4253 
4254     -- pseudocount constant used for PSSM. This field corresponds to beta in 
4255     -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
4256     pseudocount INTEGER OPTIONAL,             
4257 
4258     -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
4259     -- populated by PSI-BLAST
4260     rpsdbparams     FormatRpsDbParameters OPTIONAL,
4261 
4262     -- alignment constraints needed by sequence-structure threader
4263     -- and other global or local block-alignment algorithms
4264         constraints     CoreDef OPTIONAL
4265 }
4266 
4267 -- Envelope containing PSSM and the parameters used to create it. 
4268 -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
4269 PssmWithParameters ::= SEQUENCE {
4270 
4271     -- This field is applicable to PSI-BLAST and formatrpsdb.
4272     -- When both the intermediate and final PSSM data are provided in this
4273     -- field, the final data (matrix of scores and associated statistical
4274     -- parameters) takes precedence and that data is used for further
4275     -- processing. The rationale for this is that the PSSM's scores and
4276     -- statistical parameters might have been calculated by other applications
4277     -- and it might not be possible to recreate it by using PSI-BLAST's PSSM 
4278     -- engine.
4279         pssm        Pssm,
4280 
4281     -- This field's rpsdbparams is used to specify the values of options 
4282     -- for processing by formatrpsdb. If these are not set, the command 
4283     -- line defaults of formatrpsdb are applied. This field is used
4284     -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
4285     -- the PSSM is the same as the one being specified through the BLAST
4286     -- Options API. If this field is omitted, no verification will be
4287     -- performed, so be careful to keep track of what matrix was used to build
4288     -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
4289     params      PssmParameters OPTIONAL
4290 }
4291 
4292 END
4293 --$Revision: 1.57 $
4294 --**********************************************************************
4295 --
4296 --  NCBI ASN.1 macro editing language specifications
4297 --
4298 --  by Colleen Bollin, 2007
4299 --
4300 --**********************************************************************
4301 
4302 NCBI-Macro DEFINITIONS ::=
4303 BEGIN
4304 
4305 EXPORTS AECR-action, Parse-action, Macro-action-list;
4306 
4307 -- simple constraints --
4308 
4309 String-location ::= ENUMERATED {
4310     contains (1) ,
4311     equals (2) ,
4312     starts (3) ,
4313     ends (4) ,
4314     inlist (5) }
4315 
4316 String-constraint ::= SEQUENCE {
4317     match-text VisibleString ,
4318     match-location String-location DEFAULT contains ,
4319     case-sensitive BOOLEAN DEFAULT FALSE ,
4320     whole-word BOOLEAN DEFAULT FALSE ,
4321     not-present BOOLEAN DEFAULT FALSE }
4322 
4323 Strand-constraint ::= ENUMERATED {
4324     any (0) ,
4325     plus (1) ,
4326     minus (2) }
4327 
4328 Seqtype-constraint ::= ENUMERATED {
4329     any (0) ,
4330     nuc (1) ,
4331     prot (2) }
4332 
4333 Partial-constraint ::= ENUMERATED {
4334     either (0) ,
4335     partial (1) ,
4336     complete (2) }
4337 
4338 Location-constraint ::= SEQUENCE {
4339     strand Strand-constraint DEFAULT any ,
4340     seq-type Seqtype-constraint DEFAULT any ,
4341     partial5 Partial-constraint DEFAULT either ,
4342     partial3 Partial-constraint DEFAULT either }
4343 
4344 Object-type-constraint ::= ENUMERATED {
4345     any (0) ,
4346     feature (1) ,
4347     descriptor (2) }
4348 
4349 
4350 -- feature values --
4351 
4352 Feature-type ::= ENUMERATED {
4353     any (0) ,
4354     gene (1) ,
4355     org (2) ,
4356     cds (3) ,
4357     prot (4) ,
4358     preRNA (5) ,
4359     mRNA (6) ,
4360     tRNA (7) ,
4361     rRNA (8) ,
4362     snRNA (9) ,
4363     scRNA (10) ,
4364     otherRNA (11) ,
4365     pub (12) ,
4366     seq (13) ,
4367     imp (14) ,
4368     allele (15) ,
4369     attenuator (16) ,
4370     c-region (17) ,
4371     caat-signal (18) ,
4372     imp-CDS (19) ,
4373     conflict (20) ,
4374     d-loop (21) ,
4375     d-segment (22) ,
4376     enhancer (23) ,
4377     exon (24) ,
4378     gC-signal (25) ,
4379     iDNA (26) ,
4380     intron (27) ,
4381     j-segment (28) ,
4382     ltr (29) ,
4383     mat-peptide (30) ,
4384     misc-binding (31) ,
4385     misc-difference (32) ,
4386     misc-feature (33) ,
4387     misc-recomb (34) ,
4388     misc-RNA (35) ,
4389     misc-signal (36) ,
4390     misc-structure (37) ,
4391     modified-base (38) ,
4392     mutation (39) ,
4393     n-region (40) ,
4394     old-sequence (41) ,
4395     polyA-signal (42) ,
4396     polyA-site (43) ,
4397     precursor-RNA (44) ,
4398     prim-transcript (45) ,
4399     primer-bind (46) ,
4400     promoter (47) ,
4401     protein-bind (48) ,
4402     rbs (49) ,
4403     repeat-region (50) ,
4404     rep-origin (51) ,
4405     s-region (52) ,
4406     sig-peptide (53) ,
4407     source (54) ,
4408     stem-loop (55) ,
4409     sts (56) ,
4410     tata-signal (57) ,
4411     terminator (58) ,
4412     transit-peptide (59) ,
4413     unsure (60) ,
4414     v-region (61) ,
4415     v-segment (62) ,
4416     variation (63) ,
4417     virion (64) ,
4418     n3clip (65) ,
4419     n3UTR (66) ,
4420     n5clip (67) ,
4421     n5UTR (68) ,
4422     n10-signal (69) ,
4423     n35-signal (70) ,
4424     site-ref (71) ,
4425     region (72) ,
4426     comment (73) ,
4427     bond (74) ,
4428     site (75) ,
4429     rsite (76) ,
4430     user (77) ,
4431     txinit (78) ,
4432     num (79) ,
4433     psec-str (80) ,
4434     non-std-residue (81) ,
4435     het (82) ,
4436     biosrc (83) ,
4437     preprotein (84) ,
4438     mat-peptide-aa (85) ,
4439     sig-peptide-aa (86) ,
4440     transit-peptide-aa (87) ,
4441     snoRNA (88) ,
4442     gap (89) ,
4443     operon (90) ,
4444     oriT (91) ,
4445     ncRNA (92) ,
4446     tmRNA (93) }
4447 
4448 Feat-qual-legal ::= ENUMERATED {
4449     allele (1) ,
4450     activity (2) ,
4451     anticodon (3) ,
4452     bound-moiety (4) ,
4453     chromosome (5),
4454     citation (6),
4455     codon (7) ,
4456     codon-start (8) ,
4457     codons-recognized (9) ,
4458     compare (10) ,
4459     cons-splice (11) ,
4460     db-xref (12) ,
4461     description (13) ,
4462     direction (14) ,
4463     ec-number (15) ,
4464     environmental-sample (16) ,
4465     evidence (17) ,
4466     exception (18) ,
4467     experiment (19) ,
4468     focus (20) ,
4469     frequency (21) ,
4470     function (22) ,
4471     gene (23) ,
4472     gene-description (24) ,
4473     inference (25) ,
4474     label (26) ,
4475     locus-tag (27) ,
4476     map (28) ,
4477     mobile-element (29) ,
4478     mod-base (30) ,
4479     mol-type (31) ,
4480     ncRNA-class (32) ,
4481     note (33) ,
4482     number (34) ,
4483     old-locus-tag (35) ,
4484     operon (36) ,
4485     organism (37) ,
4486     organelle (38) ,
4487     partial (39) ,
4488     phenotype (40) ,
4489     plasmid (41) ,
4490     product (42) ,
4491     protein-id (43) ,
4492     pseudo (44) ,
4493     rearranged (45) ,
4494     replace (46) ,
4495     rpt-family (47) ,
4496     rpt-type (48) ,
4497     rpt-unit (49) ,
4498     rpt-unit-seq (50) ,
4499     rpt-unit-range (51) ,
4500     segment (52) ,
4501     sequenced-mol (53) ,
4502     standard-name (54) ,
4503     synonym (55) ,
4504     transcript-id (56) ,
4505     transgenic (57) ,
4506     translation (58) ,
4507     transl-except (59) ,
4508     transl-table (60) ,
4509     usedin (61),
4510     mobile-element-type (62),
4511     mobile-element-name (63),
4512     gene-comment (64) ,
4513     satellite (65) ,
4514     satellite-type (66) ,
4515     satellite-name (67) ,
4516     location (68) ,
4517     tag-peptide (69) }
4518 
4519 Feat-qual-legal-val ::= SEQUENCE {
4520     qual Feat-qual-legal ,
4521     val  VisibleString }
4522 
4523 Feat-qual-legal-val-choice ::= CHOICE {
4524     qual Feat-qual-legal-val }
4525 
4526 Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice
4527 
4528 Feat-qual-choice ::= CHOICE {
4529     legal-qual Feat-qual-legal ,
4530     illegal-qual String-constraint }
4531 
4532 Feature-field ::= SEQUENCE {
4533     type Feature-type ,
4534     field Feat-qual-choice }
4535 
4536 Feature-field-legal ::= SEQUENCE {
4537     type Feature-type ,
4538     field Feat-qual-legal }
4539 
4540 Feature-field-pair ::= SEQUENCE {
4541     type Feature-type ,
4542     field-from Feat-qual-choice ,
4543     field-to Feat-qual-choice }
4544 
4545 Rna-feat-type ::= CHOICE {
4546     preRNA NULL ,
4547     mRNA NULL ,
4548     tRNA NULL ,
4549     rRNA NULL ,
4550     ncRNA VisibleString ,
4551     tmRNA NULL,
4552     miscRNA NULL }
4553 
4554 Rna-field ::= ENUMERATED {
4555     product (1) ,
4556     comment (2) ,
4557     codons-recognized (3) ,
4558     ncrna-class (4) ,
4559     anticodon (5) ,
4560     transcript-id (6) ,
4561     gene-locus (7) ,
4562     gene-description (8) ,
4563     gene-maploc (9) ,
4564     gene-locus-tag (10) ,
4565     gene-synonym (11) ,
4566     gene-comment (12) ,
4567     tag-peptide (13) }
4568     
4569 
4570 Rna-qual ::= SEQUENCE {
4571     type Rna-feat-type ,
4572     field Rna-field }    
4573 
4574 Rna-qual-pair ::= SEQUENCE {
4575     type Rna-feat-type ,
4576     field-from Rna-field ,
4577     field-to Rna-field }
4578 
4579 Source-qual ::= ENUMERATED {
4580     acronym (1) ,
4581     anamorph (2) ,
4582     authority (3) ,
4583     bio-material (4) ,
4584     biotype (5) ,
4585     biovar (6) ,
4586     breed (7) ,
4587     cell-line (8) ,
4588     cell-type (9) ,
4589     chemovar (10) ,
4590     chromosome (11) ,
4591     clone (12) ,
4592     clone-lib (13) ,
4593     collected-by (14) ,
4594     collection-date (15) ,
4595     common (16) ,
4596     common-name (17) ,
4597     country (18) ,
4598     cultivar (19) ,
4599     culture-collection (20) ,
4600     dev-stage (21) ,
4601     division (22) ,
4602     dosage (23) ,
4603     ecotype (24) ,
4604     endogenous-virus-name (25) ,
4605     environmental-sample (26) ,
4606     forma (27) ,
4607     forma-specialis (28) ,
4608     frequency (29) ,
4609     fwd-primer-name (30) ,
4610     fwd-primer-seq (31) ,
4611     gb-acronym (32) ,
4612     gb-anamorph (33) ,
4613     gb-synonym (34) ,
4614     genotype (35) ,
4615     germline (36) ,
4616     group (37) ,
4617     haplotype (38) ,
4618     identified-by (39) ,
4619     insertion-seq-name (40) ,
4620     isolate (41) ,
4621     isolation-source (42) ,
4622     lab-host (43) ,
4623     lat-lon (44) ,
4624     lineage (45) ,
4625     map (46) ,
4626     metagenome-source (47) ,
4627     metagenomic (48) ,
4628     old-lineage (49) ,
4629     old-name (50) ,
4630     orgmod-note (51) ,
4631     nat-host (52) ,
4632     pathovar (53) ,
4633     plasmid-name (54) ,
4634     plastid-name (55) ,
4635     pop-variant (56) ,
4636     rearranged (57) ,
4637     rev-primer-name (58) ,
4638     rev-primer-seq (59) ,
4639     segment (60) ,
4640     serogroup (61) ,
4641     serotype (62) ,
4642     serovar (63) ,
4643     sex (64) ,
4644     specimen-voucher (65) ,
4645     strain (66) ,
4646     subclone (67) ,
4647     subgroup (68) ,
4648     subsource-note (69),
4649     sub-species (70) ,
4650     substrain (71) ,
4651     subtype (72) ,
4652     synonym (73) ,
4653     taxname (74) ,
4654     teleomorph (75) ,
4655     tissue-lib (76) ,
4656     tissue-type (77) ,
4657     transgenic (78) ,
4658     transposon-name (79) ,
4659     type (80) ,
4660     variety (81) ,
4661     specimen-voucher-INST (82) ,
4662     specimen-voucher-COLL (83) ,
4663     specimen-voucher-SpecID (84) ,
4664     culture-collection-INST (85) ,
4665     culture-collection-COLL (86) ,
4666     culture-collection-SpecID (87) ,
4667     bio-material-INST (88) ,
4668     bio-material-COLL (89) ,
4669     bio-material-SpecID (90),
4670     all-notes (91),
4671     mating-type (92),
4672     linkage-group (93) ,
4673     haplogroup (94),
4674     all-quals (95),
4675     dbxref (96) 
4676 }
4677 
4678 Source-qual-pair ::= SEQUENCE {
4679     field-from Source-qual ,
4680     field-to Source-qual }
4681 
4682 Source-location ::= ENUMERATED {
4683     unknown (0) ,
4684     genomic (1) ,
4685     chloroplast (2) ,
4686     chromoplast (3) ,
4687     kinetoplast (4) ,
4688     mitochondrion (5) ,
4689     plastid (6) ,
4690     macronuclear (7) ,
4691     extrachrom (8) ,
4692     plasmid (9) ,
4693     transposon (10) ,
4694     insertion-seq (11) ,
4695     cyanelle (12) ,
4696     proviral (13) ,
4697     virion (14) ,
4698     nucleomorph (15) ,
4699     apicoplast (16) ,
4700     leucoplast (17) ,
4701     proplastid (18) ,
4702     endogenous-virus (19) ,
4703     hydrogenosome (20) ,
4704     chromosome (21) ,
4705     chromatophore (22) }
4706 
4707 Source-origin ::= ENUMERATED {
4708     unknown (0) ,
4709     natural (1) ,
4710     natmut (2) ,
4711     mut (3) ,
4712     artificial (4) ,
4713     synthetic (5) ,
4714     other (255) }
4715 
4716 Source-qual-choice ::= CHOICE {
4717     textqual Source-qual ,
4718     location Source-location,
4719     origin Source-origin ,
4720     gcode INTEGER  ,
4721     mgcode INTEGER  }
4722 
4723 Source-qual-text-val ::= SEQUENCE {
4724     srcqual Source-qual ,
4725     val VisibleString } 
4726     
4727 Source-qual-val-choice ::= CHOICE {
4728     textqual Source-qual-text-val ,
4729     location Source-location,
4730     origin Source-origin ,
4731     gcode INTEGER ,
4732     mgcode INTEGER }
4733 
4734 Source-qual-val-set ::= SET OF Source-qual-val-choice
4735 
4736 CDSGeneProt-field ::= ENUMERATED {
4737     cds-comment (1) ,
4738     gene-locus (2) ,
4739     gene-description (3) ,
4740     gene-comment (4) ,
4741     gene-allele (5) ,
4742     gene-maploc (6) ,
4743     gene-locus-tag (7) ,
4744     gene-synonym (8) ,
4745     gene-old-locus-tag (9) ,
4746     mrna-product (10) ,
4747     mrna-comment (11) ,
4748     prot-name (12) ,
4749     prot-description (13) ,
4750     prot-ec-number (14) ,
4751     prot-activity (15) ,
4752     prot-comment (16) ,
4753     mat-peptide-name (17) ,
4754     mat-peptide-description (18) ,
4755     mat-peptide-ec-number (19) ,
4756     mat-peptide-activity (20) ,
4757     mat-peptide-comment (21) ,
4758     cds-inference (22) ,
4759     gene-inference (23) ,
4760     codon-start (24) }
4761 
4762 CDSGeneProt-field-pair ::= SEQUENCE {
4763     field-from CDSGeneProt-field ,
4764     field-to CDSGeneProt-field }
4765 
4766 Molecule-type ::= ENUMERATED {
4767   unknown (0) ,
4768   genomic (1) ,
4769   precursor-RNA (2) ,
4770   mRNA (3) ,
4771   rRNA (4) ,
4772   tRNA (5) ,
4773   genomic-mRNA (6) ,
4774   cRNA (7) ,
4775   transcribed-RNA (8) ,
4776   ncRNA (9) ,
4777   transfer-messenger-RNA (10) ,
4778   other (11) }
4779 
4780 Technique-type ::= ENUMERATED {
4781   unknown (0) , 
4782   standard (1) ,
4783   est (2) ,
4784   sts (3) ,
4785   survey (4) ,
4786   genetic-map (5) ,
4787   physical-map (6) ,
4788   derived (7) ,
4789   concept-trans (8) ,
4790   seq-pept (9) ,
4791   both (10) ,
4792   seq-pept-overlap (11) ,
4793   seq-pept-homol (12) , 
4794   concept-trans-a (13) ,
4795   htgs-1 (14) ,
4796   htgs-2 (15) ,
4797   htgs-3 (16) ,
4798   fli-cDNA (17) ,
4799   htgs-0 (18) ,
4800   htc (19) ,
4801   wgs (20) ,
4802   barcode (21) ,
4803   composite-wgs-htgs (22) ,
4804   tsa (23) ,
4805   other (24) }
4806 
4807 Completedness-type ::= ENUMERATED {
4808   unknown (0) ,
4809   complete (1) ,
4810   partial (2) ,
4811   no-left (3) ,
4812   no-right (4) ,
4813   no-ends (5) ,
4814   has-left (6) ,
4815   has-right (7) ,
4816   other (6) }
4817 
4818 Molecule-class-type ::= ENUMERATED {
4819   unknown (0) ,
4820   dna (1) ,
4821   rna (2) ,
4822   protein (3) ,
4823   nucleotide (4),
4824   other (5) }
4825 
4826 Topology-type ::= ENUMERATED {
4827   unknown (0) ,
4828   linear (1) ,
4829   circular (2) ,
4830   tandem (3) ,
4831   other (4) }
4832 
4833 Strand-type ::= ENUMERATED {
4834   unknown (0) ,
4835   single (1) ,
4836   double (2) ,
4837   mixed (3) ,
4838   mixed-rev (4) ,
4839   other (5) }
4840 
4841 Molinfo-field ::= CHOICE {
4842     molecule Molecule-type ,
4843     technique Technique-type ,
4844     completedness Completedness-type ,
4845     mol-class Molecule-class-type ,
4846     topology Topology-type ,
4847     strand Strand-type }
4848 
4849 Molinfo-molecule-pair ::= SEQUENCE {
4850     from Molecule-type ,
4851     to Molecule-type }
4852 
4853 Molinfo-technique-pair ::= SEQUENCE {
4854     from Technique-type ,
4855     to Technique-type }
4856 
4857 Molinfo-completedness-pair ::= SEQUENCE {
4858     from Completedness-type ,
4859     to Completedness-type }
4860 
4861 Molinfo-mol-class-pair ::= SEQUENCE {
4862     from Molecule-class-type ,
4863     to Molecule-class-type }
4864 
4865 Molinfo-topology-pair ::= SEQUENCE {
4866     from Topology-type ,
4867     to Topology-type }
4868 
4869 Molinfo-strand-pair ::= SEQUENCE {
4870     from Strand-type ,
4871     to Strand-type }
4872 
4873 Molinfo-field-pair ::= CHOICE {
4874     molecule Molinfo-molecule-pair ,
4875     technique Molinfo-technique-pair ,
4876     completedness Molinfo-completedness-pair ,
4877     mol-class Molinfo-mol-class-pair ,
4878     topology Molinfo-topology-pair ,
4879     strand Molinfo-strand-pair }
4880 
4881 Molinfo-field-list ::= SET OF Molinfo-field
4882 
4883 -- publication fields --
4884 
4885 Publication-field ::=  ENUMERATED {
4886     cit (1) ,
4887     authors (2) ,
4888     journal (3) ,
4889     volume (4) ,
4890     issue (5) ,
4891     pages (6) ,
4892     date (7) ,
4893     serial-number (8) ,
4894     title (9) ,
4895     affiliation (10) ,
4896     affil-div (11) ,
4897     affil-city (12) ,
4898     affil-sub (13) ,
4899     affil-country (14) ,
4900     affil-street (15) ,
4901     affil-email (16) ,
4902     affil-fax (17) ,
4903     affil-phone (18) ,
4904     affil-zipcode (19),
4905     authors-initials (20)
4906     }
4907   
4908 -- structured comment fields --
4909 
4910 Structured-comment-field ::= CHOICE {
4911   database NULL ,
4912   named VisibleString ,
4913   field-name NULL
4914   }
4915 
4916 Structured-comment-field-pair ::= SEQUENCE {
4917   from Structured-comment-field ,
4918   to Structured-comment-field
4919   }
4920   
4921 -- misc fields --
4922 -- these would not appear in pairs --
4923 Misc-field ::= ENUMERATED {
4924     genome-project-id (1) ,
4925     comment-descriptor (2) ,
4926     defline (3) ,
4927     keyword (4)
4928     }
4929      
4930 -- complex constraints --
4931 
4932 Pub-type ::= ENUMERATED {
4933   any (0) ,
4934   published (1) ,
4935   unpublished (2) ,
4936   in-press (3) ,
4937   submitter-block (4) }
4938 
4939 Pub-field-constraint ::= SEQUENCE {
4940   field Publication-field ,
4941   constraint String-constraint }
4942   
4943 Publication-constraint ::= SEQUENCE {
4944   type Pub-type ,
4945   field Pub-field-constraint OPTIONAL }
4946 
4947 Source-constraint ::= SEQUENCE {
4948   field1 Source-qual-choice OPTIONAL ,
4949   field2 Source-qual-choice OPTIONAL ,
4950   constraint String-constraint OPTIONAL ,
4951   type-constraint Object-type-constraint OPTIONAL }
4952 
4953 CDSGeneProt-feature-type-constraint ::= ENUMERATED {
4954     gene (1) ,
4955     mRNA (2) ,
4956     cds (3) ,
4957     prot (4) ,
4958     exon (5) ,
4959     mat-peptide (6) }
4960 
4961 CDSGeneProt-pseudo-constraint ::= SEQUENCE {
4962     feature CDSGeneProt-feature-type-constraint ,
4963     is-pseudo BOOLEAN DEFAULT TRUE }
4964 
4965 CDSGeneProt-constraint-field ::= CHOICE {
4966   field CDSGeneProt-field }
4967 
4968 CDSGeneProt-qual-constraint ::= SEQUENCE {
4969   field1 CDSGeneProt-constraint-field OPTIONAL ,
4970   field2 CDSGeneProt-constraint-field OPTIONAL ,
4971   constraint String-constraint OPTIONAL }
4972 
4973 Field-constraint ::= SEQUENCE {
4974   field Field-type ,
4975   string-constraint String-constraint }
4976 
4977 Sequence-constraint-rnamol ::= ENUMERATED {
4978   any (0) ,
4979   genomic (1) ,
4980   precursor-RNA (2) ,
4981   mRNA (3) ,
4982   rRNA (4) ,
4983   tRNA (5) ,
4984   genomic-mRNA (6) ,
4985   cRNA (7) ,
4986   transcribed-RNA (8) ,
4987   ncRNA (9) ,
4988   transfer-messenger-RNA (10) }
4989 
4990 Sequence-constraint-mol-type-constraint ::= CHOICE {
4991   any NULL ,
4992   nucleotide NULL ,
4993   dna NULL ,
4994   rna Sequence-constraint-rnamol ,
4995   protein NULL }
4996 
4997 Sequence-constraint ::= SEQUENCE {
4998     seqtype Sequence-constraint-mol-type-constraint OPTIONAL ,
4999     id String-constraint OPTIONAL ,
5000     feature Feature-type }
5001 
5002 Constraint-choice ::= CHOICE {
5003     string String-constraint ,
5004     location Location-constraint ,
5005     field  Field-constraint ,
5006     source Source-constraint ,
5007     cdsgeneprot-qual CDSGeneProt-qual-constraint ,
5008     cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint ,
5009     sequence Sequence-constraint ,
5010     pub Publication-constraint }
5011 
5012 Constraint-choice-set ::= SET OF Constraint-choice
5013 
5014 Text-portion ::= SEQUENCE {
5015     left-text VisibleString OPTIONAL ,
5016     include-left BOOLEAN ,
5017     right-text VisibleString OPTIONAL ,
5018     include-right BOOLEAN ,
5019     inside BOOLEAN ,
5020     case-sensitive BOOLEAN DEFAULT FALSE ,
5021     whole-word BOOLEAN DEFAULT FALSE }
5022 
5023 Field-edit-location ::= ENUMERATED {
5024     anywhere (0) ,
5025     beginning (1) ,
5026     end (2) }
5027 
5028 Field-edit ::= SEQUENCE {
5029     find-txt VisibleString ,
5030     repl-txt VisibleString OPTIONAL ,
5031     location Field-edit-location DEFAULT anywhere }
5032 
5033 Field-type ::= CHOICE {
5034     source-qual Source-qual-choice ,
5035     feature-field Feature-field ,
5036     rna-field Rna-qual ,
5037     cds-gene-prot CDSGeneProt-field ,
5038     molinfo-field Molinfo-field ,
5039     pub Publication-field ,
5040     struc-comment-field Structured-comment-field ,
5041     misc Misc-field }
5042 
5043 Field-pair-type ::= CHOICE {
5044     source-qual Source-qual-pair ,
5045     feature-field Feature-field-pair ,
5046     rna-field Rna-qual-pair ,
5047     cds-gene-prot CDSGeneProt-field-pair ,
5048     molinfo-field Molinfo-field-pair ,
5049     struc-comment-field Structured-comment-field-pair } 
5050 
5051 ExistingTextOption ::= ENUMERATED {
5052   replace-old (1) ,
5053   append-semi (2) ,
5054   append-space (3) ,
5055   append-colon (4) ,
5056   append-comma (5) ,
5057   append-none (6) ,
5058   prefix-semi (7) ,
5059   prefix-space (8) ,
5060   prefix-colon (9) ,
5061   prefix-comma (10) ,
5062   prefix-none (11) ,
5063   leave-old (12) ,
5064   add-qual (13) }
5065 
5066 
5067 Apply-action ::= SEQUENCE {
5068     field Field-type ,
5069     value VisibleString ,
5070     existing-text ExistingTextOption }
5071 
5072 Edit-action ::= SEQUENCE {
5073     edit Field-edit ,
5074     field Field-type }
5075 
5076 Convert-action ::= SEQUENCE {
5077     fields Field-pair-type ,
5078     strip-name BOOLEAN DEFAULT FALSE ,
5079     keep-original BOOLEAN DEFAULT FALSE ,
5080     existing-text ExistingTextOption }
5081 
5082 Copy-action ::= SEQUENCE {
5083     fields Field-pair-type ,
5084     existing-text ExistingTextOption }
5085 
5086 Swap-action ::= SEQUENCE {
5087     fields Field-pair-type ,
5088     field-to Field-type }
5089 
5090 AECRParse-action ::= SEQUENCE {
5091     portion Text-portion ,
5092     fields Field-pair-type ,
5093     remove-from-parsed BOOLEAN DEFAULT FALSE ,
5094     remove-left BOOLEAN DEFAULT FALSE ,
5095     remove-right BOOLEAN DEFAULT FALSE ,
5096     existing-text ExistingTextOption }
5097 
5098 Remove-action ::= SEQUENCE {
5099     field Field-type }
5100 
5101 Action-choice ::= CHOICE {
5102     apply Apply-action ,
5103     edit Edit-action ,
5104     convert Convert-action ,
5105     copy Copy-action ,
5106     swap Swap-action ,
5107     remove Remove-action ,
5108     parse AECRParse-action }
5109 
5110 AECR-action ::= SEQUENCE {
5111     action Action-choice ,
5112     also-change-mrna BOOLEAN DEFAULT FALSE ,
5113     constraint Constraint-choice-set OPTIONAL }
5114 
5115 Cap-change ::= ENUMERATED {
5116     none (0) ,
5117     tolower (1) ,
5118     toupper (2) ,
5119     firstcap (3) ,
5120     firstcaprestnochange (4) }
5121 
5122 Parse-src-org-choice ::= CHOICE {
5123     source-qual Source-qual ,
5124     taxname-after-binomial NULL }
5125 
5126 Parse-src-org ::= SEQUENCE {
5127     field Parse-src-org-choice ,
5128     type Object-type-constraint DEFAULT any }
5129 
5130 Parse-src ::= CHOICE {
5131     defline NULL ,
5132     flatfile NULL ,
5133     local-id NULL ,
5134     org Parse-src-org ,
5135     comment NULL ,
5136     bankit-comment NULL ,
5137     structured-comment VisibleString ,
5138     file-id NULL }
5139 
5140 Parse-dst-org ::= SEQUENCE {
5141     field Source-qual-choice ,
5142     type Object-type-constraint DEFAULT any }
5143 
5144 Parse-dest ::= CHOICE {
5145     defline NULL ,
5146     org Parse-dst-org ,
5147     featqual Feature-field-legal ,
5148     comment-descriptor NULL ,
5149     dbxref VisibleString }
5150 
5151 Parse-action ::= SEQUENCE {
5152     portion Text-portion ,
5153     src Parse-src ,
5154     dest Parse-dest ,
5155     capitalization Cap-change DEFAULT none ,
5156     remove-from-parsed BOOLEAN DEFAULT FALSE ,
5157     existing-text ExistingTextOption }
5158 
5159 
5160 Location-interval ::= SEQUENCE {
5161     from INTEGER ,
5162     to INTEGER  }
5163 
5164 Location-choice ::= CHOICE {
5165     interval Location-interval ,
5166     whole-sequence NULL }
5167 
5168 Sequence-list ::= SET OF VisibleString
5169 Sequence-list-choice ::= CHOICE {
5170     list Sequence-list ,
5171     all NULL }
5172     
5173 Apply-feature-action ::= SEQUENCE {
5174     type Feature-type ,
5175     partial5 BOOLEAN DEFAULT FALSE ,
5176     partial3 BOOLEAN DEFAULT FALSE ,
5177     plus-strand BOOLEAN DEFAULT TRUE ,
5178     location Location-choice ,
5179     seq-list Sequence-list-choice ,
5180     add-redundant BOOLEAN DEFAULT TRUE ,
5181     add-mrna BOOLEAN DEFAULT FALSE ,
5182     apply-to-parts BOOLEAN DEFAULT FALSE ,
5183     only-seg-num INTEGER DEFAULT -1 ,
5184     fields Feat-qual-legal-set OPTIONAL,
5185     src-fields Source-qual-val-set OPTIONAL }
5186 
5187 Remove-feature-action ::= SEQUENCE {
5188     type Feature-type ,
5189     constraint Constraint-choice-set OPTIONAL }
5190 
5191 -- for convert features --
5192 Convert-from-CDS-options ::= SEQUENCE {
5193   remove-mRNA BOOLEAN ,
5194   remove-gene BOOLEAN ,
5195   remove-transcript-id BOOLEAN }
5196 
5197 Convert-feature-src-options ::= CHOICE { 
5198   cds Convert-from-CDS-options }
5199 
5200 Bond-type ::= ENUMERATED {
5201   disulfide (1) ,
5202   thioester (2) ,
5203   crosslink (3) ,
5204   thioether (4) ,
5205   other (5) }
5206 
5207 
5208 Site-type ::= ENUMERATED {
5209   active (1) ,
5210   binding (2) ,
5211   cleavage (3) ,
5212   inhibit (4) ,
5213   modified (5) ,
5214   glycosylation (6) ,
5215   myristoylation (7) ,
5216   mutagenized (8) ,
5217   metal-binding (9) ,
5218   phosphorylation (10) ,
5219   acetylation (11) ,
5220   amidation (12) ,
5221   methylation (13) ,
5222   hydroxylation (14) ,
5223   sulfatation (15) ,
5224   oxidative-deamination (16) ,
5225   pyrrolidone-carboxylic-acid (17) ,
5226   gamma-carboxyglutamic-acid (18) ,
5227   blocked (19) ,
5228   lipid-binding (20) ,
5229   np-binding (21) ,
5230   dna-binding (22) ,
5231   signal-peptide (23) ,
5232   transit-peptide (24) ,
5233   transmembrane-region (25) ,
5234   nitrosylation (26) ,
5235   other (27) }
5236 
5237 -- other choice is to create protein sequences, skipping bad --
5238 Region-type ::= SEQUENCE {
5239   create-nucleotide BOOLEAN } 
5240 
5241 Convert-feature-dst-options ::= CHOICE {
5242   bond Bond-type ,
5243   site Site-type ,
5244   region Region-type ,
5245   ncrna-class VisibleString ,
5246   remove-original BOOLEAN }
5247 
5248 
5249 Convert-feature-action ::= SEQUENCE {
5250   type-from Feature-type ,
5251   type-to Feature-type ,
5252   src-options Convert-feature-src-options OPTIONAL ,
5253   dst-options Convert-feature-dst-options OPTIONAL ,
5254   leave-original BOOLEAN ,
5255   src-feat-constraint Constraint-choice-set OPTIONAL } 
5256 
5257 
5258 Feature-location-strand-from ::= ENUMERATED {
5259   any (0) ,
5260   plus (1) ,
5261   minus (2) ,
5262   unknown (3) ,
5263   both (4) }
5264 
5265 Feature-location-strand-to ::= ENUMERATED {
5266   plus (1) ,
5267   minus (2) ,
5268   unknown (3) ,
5269   both (4) ,
5270   reverse (5) }
5271 
5272 Edit-location-strand ::= SEQUENCE {
5273   strand-from Feature-location-strand-from ,
5274   strand-to   Feature-location-strand-to } 
5275 
5276 Partial-5-set-constraint ::= ENUMERATED {
5277   all (0) ,
5278   at-end (1) ,
5279   bad-start (2) ,
5280   frame-not-one (3) }
5281 
5282 Partial-5-set-action ::= SEQUENCE {
5283   constraint Partial-5-set-constraint ,
5284   extend BOOLEAN }
5285 
5286 Partial-5-clear-constraint ::= ENUMERATED {
5287   all (0) ,
5288   not-at-end (1) ,
5289   good-start (2) }
5290 
5291 Partial-3-set-constraint ::= ENUMERATED {
5292   all (0) ,
5293   at-end (1) ,
5294   bad-end (2) }
5295 
5296 Partial-3-set-action ::= SEQUENCE {
5297   constraint Partial-3-set-constraint ,
5298   extend BOOLEAN }
5299 
5300 Partial-3-clear-constraint ::= ENUMERATED {
5301   all (0) ,
5302   not-at-end (1) ,
5303   good-end (2) }
5304 
5305 Convert-location-type ::= ENUMERATED {
5306   join (1) ,
5307   order (2) ,
5308   merge (3) }
5309 
5310 Location-edit-type ::= CHOICE {
5311   strand Edit-location-strand ,
5312   set-5-partial Partial-5-set-action ,
5313   clear-5-partial Partial-5-clear-constraint ,
5314   set-3-partial Partial-3-set-action ,
5315   clear-3-partial Partial-3-clear-constraint ,
5316   convert Convert-location-type }
5317 
5318 Edit-feature-location-action ::= SEQUENCE {
5319   type Feature-type ,
5320   action Location-edit-type ,
5321   constraint Constraint-choice-set OPTIONAL }
5322 
5323 Molinfo-block ::= SEQUENCE {
5324     to-list Molinfo-field-list  ,
5325     from-list Molinfo-field-list OPTIONAL ,
5326     constraint Constraint-choice-set OPTIONAL }
5327 
5328 Descriptor-type ::= ENUMERATED {
5329   all (0) ,
5330   title (1) ,
5331   source (2) ,
5332   publication (3) ,
5333   comment (4) ,
5334   genbank (5) ,
5335   user (6) ,
5336   create-date (7) ,
5337   update-date (8) ,
5338   mol-info (9) ,
5339   structured-comment (10) ,
5340   genome-project-id (11) }
5341 
5342 Remove-descriptor-action ::= SEQUENCE {
5343   type Descriptor-type ,
5344   constraint Constraint-choice-set OPTIONAL }  
5345 
5346 Autodef-list-type ::= ENUMERATED {
5347   feature-list (1) ,
5348   complete-sequence (2) ,
5349   complete-genome (3) }
5350   
5351 Autodef-action ::= SEQUENCE {
5352   modifiers SET OF Source-qual OPTIONAL ,
5353   clause-list-type Autodef-list-type }
5354 
5355 Macro-action-choice ::= CHOICE {
5356   aecr AECR-action ,
5357   parse Parse-action ,
5358   add-feature Apply-feature-action ,
5359   remove-feature Remove-feature-action ,
5360   convert-feature Convert-feature-action ,
5361   edit-location Edit-feature-location-action ,
5362   remove-descriptor Remove-descriptor-action ,
5363   autodef Autodef-action }
5364 
5365 
5366 Macro-action-list ::= SET OF Macro-action-choice
5367 
5368 END

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.