NCBI C++ ToolKit
gc_assembly_parser.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 #ifndef OBJMGR_GC_ASSEMBLY_PARSER__HPP
00002 #define OBJMGR_GC_ASSEMBLY_PARSER__HPP
00003 
00004 /*  $Id: gc_assembly_parser.hpp 61899 2014-02-25 18:00:57Z grichenk $
00005 * ===========================================================================
00006 *
00007 *                            PUBLIC DOMAIN NOTICE
00008 *               National Center for Biotechnology Information
00009 *
00010 *  This software/database is a "United States Government Work" under the
00011 *  terms of the United States Copyright Act.  It was written as part of
00012 *  the author's official duties as a United States Government employee and
00013 *  thus cannot be copyrighted.  This software/database is freely available
00014 *  to the public for use. The National Library of Medicine and the U.S.
00015 *  Government have not placed any restriction on its use or reproduction.
00016 *
00017 *  Although all reasonable efforts have been taken to ensure the accuracy
00018 *  and reliability of the software and data, the NLM and the U.S.
00019 *  Government do not and cannot warrant the performance or results that
00020 *  may be obtained by using this software or data. The NLM and the U.S.
00021 *  Government disclaim all warranties, express or implied, including
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.
00024 *
00025 *  Please cite the author in any work or product based on this material.
00026 *
00027 * ===========================================================================
00028 *
00029 * Authors:
00030 *           Aleksey Grichenko
00031 *
00032 * File Description:
00033 *           GC-Assembly parser used by CScope and CSeq_loc_Mapper to
00034 *           convert assemblies to seq-entries.
00035 *
00036 */
00037 
00038 #include <objects/seqset/Seq_entry.hpp>
00039 #include <objects/seq/seq_id_handle.hpp>
00040 #include <objects/genomecoll/GC_Assembly.hpp>
00041 #include <corelib/ncbiobj.hpp>
00042 #include <set>
00043 
00044 BEGIN_NCBI_SCOPE
00045 BEGIN_SCOPE(objects)
00046 
00047 /** @addtogroup ObjectManagerCore
00048  *
00049  * @{
00050  */
00051 
00052 
00053 // fwd decl
00054 class CGC_AssemblyDesc;
00055 
00056 
00057 /////////////////////////////////////////////////////////////////////////////
00058 ///
00059 ///  CGC_Assembly_Parser --
00060 ///
00061 ///    GC-Assembly parser used by CScope and CSeq_loc_Mapper to
00062 ///    convert assemblies to seq-entries.
00063 ///
00064 
00065 class NCBI_XOBJMGR_EXPORT CGC_Assembly_Parser : public CObject
00066 {
00067 public:
00068     /// Parser options.
00069     enum FParserFlags {
00070         /// Do not add local private and external ids to bioseqs.
00071         fIgnoreLocalIds             = 1 << 0,
00072         /// Do not add external ids to bioseqs.
00073         fIgnoreExternalIds          = 1 << 1,
00074         /// Do not add annotations to seq-entries and bioseqs.
00075         fIgnoreAnnots               = 1 << 2,
00076         /// Do not add descriptions to seq-entries and bioseqs.
00077         fIgnoreDescr                = 1 << 3,
00078         /// Skip duplicate sequences (all synonyms are checked).
00079         fSkipDuplicates             = 1 << 4,
00080 
00081         fDefault = fIgnoreLocalIds | fIgnoreAnnots | fIgnoreDescr | fSkipDuplicates
00082     };
00083     typedef int TParserFlags;
00084 
00085     /// Parse the assembly, convert it to seq-entry, collect additional
00086     /// information (top-level sequences etc).
00087     CGC_Assembly_Parser(const CGC_Assembly& assembly,
00088                         TParserFlags        flags = fDefault);
00089 
00090     virtual ~CGC_Assembly_Parser(void);
00091 
00092     /// Create seq-entry with all parsed sequences, annotations etc.
00093     CRef<CSeq_entry> GetTSE(void) const { return m_TSE; }
00094 
00095     typedef set<CSeq_id_Handle> TSeqIds;
00096 
00097     /// Get seq-ids for all top-level sequences in the assembly.
00098     const TSeqIds& GetTopLevelSequences(void) const { return m_TopSeqs; }
00099 
00100 private:
00101     void x_ParseGCAssembly(const CGC_Assembly& gc_assembly,
00102                            CRef<CSeq_entry>    parent_entry);
00103     void x_ParseGCSequence(const CGC_Sequence& gc_seq,
00104                            const CGC_Sequence* parent_seq,
00105                            CRef<CSeq_entry>    parent_entry,
00106                            CRef<CSeq_id>       override_id);
00107     void x_AddBioseq(CRef<CSeq_entry>  parent_entry,
00108                      const TSeqIds&    synonyms,
00109                      const CDelta_ext* delta);
00110     void x_InitSeq_entry(CRef<CSeq_entry> entry,
00111                          CRef<CSeq_entry> parent);
00112     void x_CopyData(const CGC_AssemblyDesc& assm_desc,
00113                       CSeq_entry&              entry);
00114 
00115     TParserFlags     m_Flags;
00116     CRef<CSeq_entry> m_TSE;
00117     TSeqIds          m_TopSeqs;
00118     TSeqIds          m_AllSeqs;
00119 };
00120 
00121 
00122 /// Seq-loc and seq-align mapper exceptions
00123 class NCBI_XOBJMGR_EXPORT CAssemblyParserException : public CException
00124 {
00125 public:
00126     enum EErrCode {
00127         eUnsupported,    ///< Unsupported type/flag.
00128         eOtherError
00129     };
00130     virtual const char* GetErrCodeString(void) const;
00131     NCBI_EXCEPTION_DEFAULT(CAssemblyParserException, CException);
00132 };
00133 
00134 
00135 /////////////////////////////////////////////////////////////////////////////
00136 // CGC_Assembly_Parser inline methods
00137 /////////////////////////////////////////////////////////////////////////////
00138 
00139 
00140 
00141 
00142 /* @} */
00143 
00144 
00145 END_SCOPE(objects)
00146 END_NCBI_SCOPE
00147 
00148 #endif//OBJMGR_GC_ASSEMBLY_PARSER__HPP
Modified on Wed Jan 28 16:37:21 2015 by modify_doxy.py rev. 426318