include/algo/align/nw/nw_pssm_aligner.hpp

Go to the documentation of this file.
00001 #ifndef ALGO___NW_PSSM_ALIGNER__HPP
00002 #define ALGO___NW_PSSM_ALIGNER__HPP
00003 
00004 /* $Id: nw_pssm_aligner.hpp 111145 2007-09-24 15:37:09Z papadopo $
00005 * ===========================================================================
00006 *
00007 *                            public DOMAIN NOTICE                          
00008 *               National Center for Biotechnology Information
00009 *                                                                          
00010 *  This software/database is a "United States Government Work" under the   
00011 *  terms of the United States Copyright Act.  It was written as part of    
00012 *  the author's official duties as a United States Government employee and 
00013 *  thus cannot be copyrighted.  This software/database is freely available 
00014 *  to the public for use. The National Library of Medicine and the U.S.    
00015 *  Government have not placed any restriction on its use or reproduction.  
00016 *                                                                          
00017 *  Although all reasonable efforts have been taken to ensure the accuracy  
00018 *  and reliability of the software and data, the NLM and the U.S.          
00019 *  Government do not and cannot warrant the performance or results that    
00020 *  may be obtained by using this software or data. The NLM and the U.S.    
00021 *  Government disclaim all warranties, express or implied, including       
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.                                                                
00024 *                                                                          
00025 *  Please cite the author in any work or product based on this material.   
00026 *
00027 * ===========================================================================
00028 *
00029 * Author:  Jason Papadopoulos
00030 *
00031 * File Description:
00032 *   CPSSMAligner class definition
00033 *
00034 *   CPSSMAligner encapsulates a generic global (Needleman-Wunsch)
00035 *   alignment algorithm with affine gap penalty model and position-
00036 *   specific scoring for one or both input sequences.
00037 *
00038 */
00039 
00040 #include <corelib/ncbistd.hpp>
00041 #include <corelib/ncbiobj.hpp>
00042 #include <corelib/ncbi_limits.hpp>
00043 #include <util/tables/raw_scoremat.h>
00044 #include <algo/align/nw/nw_aligner.hpp>
00045 
00046 #include <vector>
00047 #include <string>
00048 
00049 
00050 /** @addtogroup AlgoAlignRoot
00051  *
00052  * @{
00053  */
00054 
00055 
00056 BEGIN_NCBI_SCOPE
00057 
00058 
00059 // Needleman Wunsch algorithm with position-specific scoring
00060 //
00061 
00062 class  CPSSMAligner: public CNWAligner
00063 {
00064 public:
00065     // ctors
00066     CPSSMAligner();
00067 
00068     CPSSMAligner(const CNWAligner::TScore** pssm1, size_t len1,
00069                  const char* seq2, size_t len2);
00070 
00071     CPSSMAligner(const double** freq1, size_t len1,
00072                  const double** freq2, size_t len2,
00073                  const SNCBIPackedScoreMatrix* scoremat,
00074                  const int scale = 1);
00075 
00076     virtual ~CPSSMAligner(void) {}
00077 
00078     // Compute the alignment
00079     virtual CNWAligner::TScore Run(void);
00080 
00081     // Setters
00082     void SetSequences(const char* seq1, size_t len1,
00083                       const char* seq2, size_t len2,
00084                       bool verify = true);
00085 
00086     void SetSequences(const CNWAligner::TScore** pssm1, size_t len1,
00087                       const char* seq2, size_t len2,
00088                       bool verify = true);
00089 
00090     void SetSequences(const double** freq1, size_t len1,
00091                       const double** freq2, size_t len2,
00092                       const int scale = 1);
00093 
00094     void SetScoreMatrix(const SNCBIPackedScoreMatrix* scoremat);
00095 
00096     void SetFreqScale(const int scale) {m_FreqScale = scale;}
00097 
00098     void SetWg  (TScore value)   // gap opening
00099     { 
00100         m_StartWg = m_Wg  = m_EndWg = value; 
00101     }
00102     void SetWs  (TScore value)   // gap extension
00103     { 
00104         m_StartWs = m_Ws  = m_EndWs = value; 
00105     }
00106     void SetStartWg(TScore value)  { m_StartWg = value; }   // gap opening
00107     void SetStartWs(TScore value)  { m_StartWs = value; }   // gap extension
00108     void SetEndWg(TScore value)    { m_EndWg = value; }   // gap opening
00109     void SetEndWs(TScore value)    { m_EndWs = value; }   // gap extension
00110 
00111     // Getters
00112     const CNWAligner::TScore** GetPssm1() const {return m_Pssm1;}
00113     const char* GetSeq1() const                 {return m_Seq1;}
00114     const double** GetFreq1() const             {return m_Freq1;}
00115     const double** GetFreq2() const             {return m_Freq2;}
00116     int GetFreqScale() const                    {return m_FreqScale;}
00117 
00118     TScore GetStartWg() const { return m_StartWg; }
00119     TScore GetStartWs() const { return m_StartWs; }
00120     TScore GetEndWg() const   { return m_EndWg; }
00121     TScore GetEndWs() const   { return m_EndWs; }
00122     SNCBIFullScoreMatrix& GetMatrix() { return m_ScoreMatrix; }
00123 
00124     virtual TScore ScoreFromTranscript(const TTranscript& transcript,
00125                                        size_t start1 = 0,
00126                                        size_t start2 = 0) const;
00127 
00128 protected:
00129 
00130     // only NCBIstdaa alphabet supported
00131     static const int kPSSM_ColumnSize = 28;
00132 
00133     // Source sequences
00134     const TScore** m_Pssm1;
00135     const double** m_Freq1;
00136 
00137     const char*    m_Seq2;
00138     const double** m_Freq2;
00139 
00140     // scale factor for position frequencies
00141     int                        m_FreqScale;
00142 
00143     TScore   m_StartWg;// gap opening penalty for initial gaps
00144     TScore   m_StartWs;// gap extension penalty for initial gaps
00145     TScore   m_EndWg;  // gap opening penalty for terminal gaps
00146     TScore   m_EndWs;  // gap extension penalty for terminal gaps
00147 
00148     // core dynamic programming
00149     virtual TScore x_Align (SAlignInOut* data);
00150     TScore x_AlignProfile (SAlignInOut* data);
00151     TScore x_AlignPSSM (SAlignInOut* data);
00152 
00153     // retrieve transcript symbol for a one-character diag
00154     virtual ETranscriptSymbol x_GetDiagTS(size_t i1, size_t i2) const;
00155 
00156     double m_DScoreMatrix[kPSSM_ColumnSize][kPSSM_ColumnSize];
00157 };
00158 
00159 
00160 END_NCBI_SCOPE
00161 
00162 
00163 /* @} */
00164 
00165 #endif  /* ALGO___NW_PSSM_ALIGNER__HPP */
00166 
00167 

Generated on Sun Dec 6 21:55:29 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:32 2009 by modify_doxy.py rev. 173732