include/algo/align/util/align_shadow.hpp

Go to the documentation of this file.
00001 #ifndef ALGO_ALIGN_UTIL_ALIGN_SHADOW__HPP
00002 #define ALGO_ALIGN_UTIL_ALIGN_SHADOW__HPP
00003 
00004 /* $Id: align_shadow.hpp 159908 2009-05-11 13:18:03Z dicuccio $
00005 * ===========================================================================
00006 *
00007 *                            public DOMAIN NOTICE                          
00008 *               National Center for Biotechnology Information
00009 *                                                                          
00010 *  This software/database is a "United States Government Work" under the   
00011 *  terms of the United States Copyright Act.  It was written as part of    
00012 *  the author's official duties as a United States Government employee and 
00013 *  thus cannot be copyrighted.  This software/database is freely available 
00014 *  to the public for use. The National Library of Medicine and the U.S.    
00015 *  Government have not placed any restriction on its use or reproduction.  
00016 *                                                                          
00017 *  Although all reasonable efforts have been taken to ensure the accuracy  
00018 *  and reliability of the software and data, the NLM and the U.S.          
00019 *  Government do not and cannot warrant the performance or results that    
00020 *  may be obtained by using this software or data. The NLM and the U.S.    
00021 *  Government disclaim all warranties, express or implied, including       
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.                                                                
00024 *                                                                          
00025 *  Please cite the author in any work or product based on this material.   
00026 *
00027 * ===========================================================================
00028 *
00029 * Author:  Yuri Kapustin
00030 *
00031 * File Description:
00032 *   CAlignShadow class
00033 *
00034 * CAlignShadow is a transcript-based representation of a pairwise sequence alignment.
00035 *
00036 */
00037 
00038 
00039 #include <corelib/ncbiobj.hpp>
00040 
00041 #include <objects/seqloc/Seq_id.hpp>
00042 #include <objects/seqalign/Seq_align.hpp>
00043 #include <objects/seqalign/Dense_seg.hpp>
00044 
00045 #include <algo/align/util/algo_align_util_exceptions.hpp>
00046 
00047 #include <math.h>
00048 
00049 
00050 BEGIN_NCBI_SCOPE
00051 
00052 BEGIN_SCOPE(objects)
00053     class CSeq_id;
00054 END_SCOPE(objects)
00055 
00056 
00057 class  CAlignShadow: public CObject
00058 {
00059 public:
00060 
00061     typedef CConstRef<objects::CSeq_id> TId;
00062     typedef TSeqPos TCoord;
00063 
00064     // c'tors
00065     CAlignShadow(void);
00066 
00067     /// Create the object from a seq-align structure
00068     ///
00069     /// @param seq_align
00070     ///    Input seq-align structure to create from
00071     /// @param save_xcript
00072     ///    If true, the alignment transcript string will be run-length encoded
00073     ///    and saved in m_Transcript. All diagonals are recorded
00074     ///    as matches.
00075     CAlignShadow(const objects::CSeq_align& seq_align, bool save_xcript = false);
00076 
00077     /// Create the object from a transcript
00078     ///
00079     /// @param idquery
00080     ///    Query sequence ID
00081     /// @param qstart
00082     ///    Starting coordinate on the query
00083     /// @param qstrand
00084     ///    Query strand (direction)
00085     /// @param idsubj
00086     ///    Subject sequence ID
00087     /// @param sstart
00088     ///    Starting coordinate on the subject
00089     /// @param sstrand
00090     ///    Subject strand (direction)
00091     /// @param xcript
00092     ///    Plain alignment (edit) transcript.
00093     ///    Allowed characters are 'M', 'R', 'I', 'D'.
00094     ///
00095     CAlignShadow(const TId& idquery, TCoord qstart, bool qstrand,
00096                  const TId& idsubj, TCoord sstart, bool sstrand,
00097                  const string& xcript);
00098 
00099     virtual ~CAlignShadow() {}
00100 
00101     // getters / setters
00102     const TId& GetId(Uint1 where) const;
00103     const TId& GetQueryId(void) const;
00104     const TId& GetSubjId(void) const;
00105 
00106     void  SetId(Uint1 where, const TId& id);
00107     void  SetQueryId(const TId& id);
00108     void  SetSubjId(const TId& id);
00109 
00110     bool  GetStrand(Uint1 where) const;
00111     bool  GetQueryStrand(void) const;
00112     bool  GetSubjStrand(void) const;
00113 
00114     void  SetStrand(Uint1 where, bool strand);
00115     void  SetQueryStrand(bool strand);
00116     void  SetSubjStrand(bool strand);
00117     void  FlipStrands(void);
00118 
00119     void  SwapQS(void);
00120     
00121     const TCoord* GetBox(void) const;
00122     void  SetBox(const TCoord box [4]);
00123 
00124     TCoord GetMin(Uint1 where) const;
00125     TCoord GetMax(Uint1 where) const;
00126     TCoord GetQueryMin(void) const;
00127     TCoord GetQueryMax(void) const;
00128     TCoord GetSubjMin(void) const;
00129     TCoord GetSubjMax(void) const;
00130     void   SetMax(Uint1 where, TCoord pos);
00131     void   SetMin(Uint1 where, TCoord pos);
00132     void   SetQueryMin(TCoord pos);
00133     void   SetQueryMax(TCoord pos);
00134     void   SetSubjMin(TCoord pos);
00135     void   SetSubjMax(TCoord pos);
00136 
00137     TCoord GetQuerySpan(void) const;
00138     TCoord GetSubjSpan(void) const;
00139 
00140     TCoord GetStart(Uint1 where) const;
00141     TCoord GetStop(Uint1 where) const;
00142     TCoord GetQueryStart(void) const;
00143     TCoord GetQueryStop(void) const;
00144     TCoord GetSubjStart(void) const;
00145     TCoord GetSubjStop(void) const;
00146     void   SetStop(Uint1 where, TCoord pos);
00147     void   SetStart(Uint1 where, TCoord pos);
00148     void   SetQueryStart(TCoord pos);
00149     void   SetQueryStop(TCoord pos);
00150     void   SetSubjStart(TCoord pos);
00151     void   SetSubjStop(TCoord pos);
00152 
00153     void         Shift(Int4 shift_query, Int4 shift_subj);
00154 
00155     // 0 = query min, 1 = query max, 2 = subj min, 3 = subj max
00156     virtual void Modify(Uint1 point, TCoord new_pos);
00157 
00158     // tabular serialization
00159     friend  CNcbiOstream& operator << (CNcbiOstream& os, 
00160                                       const CAlignShadow& align_shadow);
00161 
00162     // optional alignment transcript
00163     typedef string TTranscript;
00164     const TTranscript& GetTranscript(void) const;
00165 
00166     static string s_RunLengthEncode(const string& in);
00167     static string s_RunLengthDecode(const string& in);
00168 
00169 
00170 protected:
00171     
00172     std::pair<TId,TId>   m_Id;     // Query and subj IDs
00173 
00174     TCoord  m_Box [4];    // [0] = query_start, [1] = query_stop
00175                           // [2] = subj_start,[3] = subj_stop, all zero-based;
00176                           // Order in which query and subj coordinates go
00177                           // reflects strand.
00178 
00179     // tabular serialization without IDs
00180     virtual void   x_PartialSerialize(CNcbiOstream& os) const;
00181 
00182     // alignment (edit) transcript is a sequence of elementary 
00183     // string editing commands followed by their counts (if > 1), e.g.:M23RI5M40D3M9
00184 
00185     TTranscript m_Transcript;
00186 };
00187 
00188 
00189 template <typename T>
00190 T round (const T& v)
00191 {
00192     const T fl (floor(v));
00193     return v < fl + 0.5? fl: fl + 1;
00194 }
00195 
00196 
00197 END_NCBI_SCOPE
00198 
00199 #endif /* ALGO_ALIGN_UTIL_ALIGN_SHADOW__HPP  */
00200 
00201 

Generated on Wed Dec 9 02:54:25 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:25 2009 by modify_doxy.py rev. 173732