include/algo/align/nw/nw_formatter.hpp

Go to the documentation of this file.
00001 #ifndef ALGO_ALIGN__NW_FORMAT__HPP
00002 #define ALGO_ALIGN__NW_FORMAT__HPP
00003 
00004 /* $Id: nw_formatter.hpp 138540 2008-08-26 19:14:06Z kapustin $
00005 * ===========================================================================
00006 *
00007 *                            public DOMAIN NOTICE                          
00008 *               National Center for Biotechnology Information
00009 *                                                                          
00010 *  This software/database is a "United States Government Work" under the   
00011 *  terms of the United States Copyright Act.  It was written as part of    
00012 *  the author's official duties as a United States Government employee and 
00013 *  thus cannot be copyrighted.  This software/database is freely available 
00014 *  to the public for use. The National Library of Medicine and the U.S.    
00015 *  Government have not placed any restriction on its use or reproduction.  
00016 *                                                                          
00017 *  Although all reasonable efforts have been taken to ensure the accuracy  
00018 *  and reliability of the software and data, the NLM and the U.S.          
00019 *  Government do not and cannot warrant the performance or results that    
00020 *  may be obtained by using this software or data. The NLM and the U.S.    
00021 *  Government disclaim all warranties, express or implied, including       
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.                                                                
00024 *                                                                          
00025 *  Please cite the author in any work or product based on this material.
00026 *
00027 * ===========================================================================
00028 *
00029 * Author:  Yuri Kapustin
00030 *
00031 * File Description:
00032 *   Library's formatting functionality.
00033 */
00034 
00035 /** @addtogroup AlgoAlignFormat
00036  *
00037  * @{
00038  */
00039 
00040 #include <corelib/ncbistd.hpp>
00041 #include <corelib/ncbiobj.hpp>
00042 #include <objects/seqloc/Na_strand.hpp>
00043 #include <objects/seqloc/Seq_id.hpp>
00044 #include <algo/align/nw/nw_spliced_aligner.hpp>
00045 
00046 #include <deque>
00047 
00048 BEGIN_NCBI_SCOPE
00049 
00050 
00051 BEGIN_SCOPE(objects)
00052     class CSeq_align;
00053 END_SCOPE(objects)
00054 
00055 
00056 class  CNWFormatter: public CObject
00057 {
00058 public:
00059 
00060     CNWFormatter(const CNWAligner& aligner);
00061     
00062     // supported text formats
00063     enum ETextFormatType {
00064         eFormatType1,
00065         eFormatType2,
00066         eFormatAsn,
00067         eFormatFastA,
00068         eFormatExonTable,  // spliced alignments
00069         eFormatExonTableEx //
00070     };
00071 
00072     // seq-align format flags
00073     enum ESeqAlignFormatFlags {
00074         eSAFF_None = 0,
00075         eSAFF_DynProgScore = 1,
00076         eSAFF_Identity = 2
00077     };
00078 
00079     // setters
00080 
00081     void SetSeqIds(CConstRef<objects::CSeq_id> id1, 
00082                    CConstRef<objects::CSeq_id> id2);
00083 
00084     // formatters
00085 
00086     void AsText(string* output, ETextFormatType type,
00087                 size_t line_width = 100) const;
00088 
00089     CRef<objects::CSeq_align> AsSeqAlign (
00090         TSeqPos query_start, objects::ENa_strand query_strand,
00091         TSeqPos subj_start,  objects::ENa_strand subj_strand,
00092         ESeqAlignFormatFlags flags = eSAFF_None) const;
00093 
00094 
00095     // SSegment is a structural unit of a spliced alignment. It represents
00096     // either an exon or an unaligned segment.
00097     struct  SSegment {
00098         
00099     public:
00100         
00101         bool   m_exon;    // true == exon; false == unaligned
00102         double m_idty;    // ranges from 0.0 to 1.0
00103         size_t m_len;     // lenths of the alignment, not of an interval
00104         size_t m_box [4]; // query([0],[1]) and subj([2],[3]) coordinates
00105         string m_annot;   // text description like AG<exon>GT
00106         string m_details; // transcript for exons, '-' for gaps
00107 
00108         float  m_score;   // dynprog score (normalized)
00109         
00110         void ExtendLeft(int extent, const char* seq1, const char* seq2,
00111                              CConstRef<CSplicedAligner> aligner);
00112         void ExtendRight(int extent, const char* seq1, const char* seq2,
00113                              CConstRef<CSplicedAligner> aligner);
00114 
00115         void ImproveFromLeft(const char* seq1, const char* seq2,
00116                              CConstRef<CSplicedAligner> aligner);
00117         void ImproveFromRight(const char* seq1, const char* seq2,
00118                               CConstRef<CSplicedAligner> aligner);
00119         
00120         void Update(const CNWAligner* aligner); // recompute members
00121         const char* GetDonor(void) const;       // raw pointers to parts of annot
00122         const char* GetAcceptor(void) const;    // or zero if less than 2 chars
00123 
00124         static bool s_IsConsensusSplice(const char* donor, const char* acceptor,
00125                                         bool semi_as_cons = false);
00126         
00127         // NetCache-related serialization
00128         typedef vector<char> TNetCacheBuffer;
00129         void ToBuffer   (TNetCacheBuffer* buf) const;
00130         void FromBuffer (const TNetCacheBuffer& buf);
00131     };
00132 
00133     // partition a spliced alignment into SSegment's
00134     void MakeSegments(deque<SSegment>* psegments) const;
00135 
00136 private:
00137 
00138     const CNWAligner*                 m_aligner;
00139     CConstRef<objects::CSeq_id>       m_Seq1Id, m_Seq2Id;
00140 
00141     size_t x_ApplyTranscript(vector<char>* seq1_transformed,
00142                              vector<char>* seq2_transformed) const;    
00143 };
00144 
00145 
00146 END_NCBI_SCOPE
00147 
00148 /* @} */
00149 
00150 #endif  /* ALGO_ALIGN__NW_FORMAT__HPP */
00151 
00152 

Generated on Wed Dec 9 02:54:20 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:25 2009 by modify_doxy.py rev. 173732