src/app/oligofar/cseqcoding.hpp

Go to the documentation of this file.
00001 #ifndef OLIGOFAR_CSEQCODING__HPP
00002 #define OLIGOFAR_CSEQCODING__HPP
00003 
00004 #include "defs.hpp"
00005 #include <cmath>
00006 
00007 BEGIN_OLIGOFAR_SCOPES
00008 
00009 class CIupacnaBase;
00010 class CNcbipnaBase;
00011 class CNcbiqnaBase;
00012 class CNcbi8naBase;
00013 class CNcbi4naBase;
00014 class CNcbi2naBase;
00015 class CColorTwoBase;
00016 
00017 class CSeqCoding
00018 {
00019 public:
00020     enum ECoding {
00021         eCoding_iupacna,
00022         eCoding_ncbipna,
00023         eCoding_ncbi8na,
00024         eCoding_ncbi4na,
00025         eCoding_ncbi2na,
00026         eCoding_ncbiqna,
00027         eCoding_colorsp
00028     };
00029     enum EStrand { 
00030         eStrand_pos = 0, 
00031         eStrand_neg = 1 
00032     };
00033     // NB: following function should be rewritten if values of EStrand constants are different from 0 (+) and 1 (-)
00034     friend EStrand operator ^ ( EStrand a, EStrand b ) { return EStrand( int(a) ^ int(b) ); }
00035 };
00036 
00037 class CIupacnaBase : public CSeqCoding
00038 { 
00039 public: 
00040     CIupacnaBase( const char * c, EStrand strand ) : m_base( *c ) { if( strand == eStrand_neg ) m_base = s_complement[(int)m_base]; }
00041     CIupacnaBase( const char * c ) : m_base( *c ) {}
00042     CIupacnaBase( char c ) : m_base( c ) {}
00043     CIupacnaBase( const CNcbi8naBase& );
00044     CIupacnaBase( const CNcbi4naBase& );
00045     CIupacnaBase( const CNcbi2naBase& );
00046     CIupacnaBase( const CNcbiqnaBase& , int cutoff = 5 );
00047     CIupacnaBase( const CNcbipnaBase& , int score = 127 );
00048     CIupacnaBase( const CColorTwoBase& b );
00049     CIupacnaBase( const CIupacnaBase& b, const CColorTwoBase& c );
00050     CIupacnaBase Complement() const { return s_complement[(int)m_base]; }
00051     CIupacnaBase Get( EStrand strand ) const { return strand == eStrand_neg ? Complement() : *this; }
00052     operator char () const { return m_base; }
00053     static CIupacnaBase Any() { return CIupacnaBase('N'); }
00054     static int BytesPerBase() { return 1; }
00055 protected: 
00056     char m_base;
00057     static char s_complement[];
00058 };
00059 
00060 class CNcbipnaBase : public CSeqCoding
00061 {
00062 public:
00063     CNcbipnaBase( const char * base, EStrand strand ) { if( strand == eStrand_neg ) { copy( base + 4, base, m_base ); m_base[4] = base[4]; } else copy( base, base+5, m_base ); }
00064     CNcbipnaBase( const char * base ) { copy( base, base+5, m_base ); }
00065     CNcbipnaBase( const unsigned char * base ) { copy( base, base+5, m_base ); }
00066     CNcbipnaBase( const CIupacnaBase& b );
00067     CNcbipnaBase( const CNcbi8naBase& b ) { x_Init( b ); }
00068     CNcbipnaBase( const CNcbi4naBase& b );
00069     CNcbipnaBase( const CNcbi2naBase& );
00070     CNcbipnaBase( const CNcbiqnaBase& );
00071     CNcbipnaBase( const CColorTwoBase& b ) { THROW( logic_error, "CNcbipnaBase( CColorTwoBase ) should not be called" ); }
00072     operator char () const { THROW( logic_error, "CNcbiqnaBase::operator char () should not be called!" ); }
00073     operator const char * () const { return m_base; }
00074     unsigned char operator [] (int i) const { return m_base[i]; }
00075     CNcbipnaBase Complement() const { return CNcbipnaBase( m_base, eStrand_neg ); }
00076     CNcbipnaBase Get( EStrand strand ) const { return strand == eStrand_neg ? Complement() : *this; }
00077     static int BytesPerBase() { return 5; }
00078 protected:
00079     void x_Init( const CNcbi8naBase&  );
00080     char m_base[5]; // pointer will take 4 bytes in 32-bit architecture - so would save almost nothing
00081 };
00082 
00083 class CNcbiqnaBase : public CSeqCoding
00084 {
00085 public:
00086     CNcbiqnaBase( const char * base, EStrand strand ): m_base( *base ) { if( strand == eStrand_neg ) m_base = m_base ^ '\x03'; }
00087     CNcbiqnaBase( const char * c ) : m_base( *c ) {}
00088     CNcbiqnaBase( char c ) : m_base( c ) {}
00089     CNcbiqnaBase( int c ) : m_base( c ) {}
00090     CNcbiqnaBase( const CNcbipnaBase& b );
00091     CNcbiqnaBase( const CNcbi8naBase& b, unsigned score = 63 );
00092     CNcbiqnaBase( const CNcbi4naBase& b, unsigned score = 63 );
00093     CNcbiqnaBase( const CNcbi2naBase& b, unsigned score = 63 );
00094     CNcbiqnaBase( const CIupacnaBase& b, unsigned score = 63 );
00095     CNcbiqnaBase( const CColorTwoBase& b ) { THROW( logic_error, "CNcbiqnaBase( CColorTwoBase ) should not be called" ); }
00096     CNcbiqnaBase Complement() const { return m_base ^ '\x03'; }
00097     CNcbiqnaBase Get( EStrand strand ) const { return ( strand == eStrand_neg ) ? Complement() : *this; }
00098     operator char () const { return m_base; }
00099     int GetPhrapScore() const { return ((unsigned char)m_base) >> 2; }
00100     static char AdjustScore( int score ); 
00101     static CNcbiqnaBase Any() { return CNcbiqnaBase( 0 ); }
00102     static int BytesPerBase() { return 1; }
00103 protected:
00104     char m_base;
00105 };
00106 
00107 class CNcbi8naBase : public CSeqCoding
00108 {
00109 public:
00110     enum EBase { fBase_A = 0x01, fBase_C = 0x02, fBase_G = 0x04, fBase_T = 0x08 };
00111     CNcbi8naBase( const char * c, EStrand strand ) : m_base( *c ) { if( strand == eStrand_neg ) m_base = s_complement[(int)m_base]; }
00112     CNcbi8naBase( const char * c ) : m_base( *c ) {}
00113     CNcbi8naBase( char c, EStrand strand = eStrand_pos );
00114     CNcbi8naBase( int c, EStrand strand = eStrand_pos );
00115     CNcbi8naBase( unsigned int c, EStrand strand  = eStrand_pos );
00116     CNcbi8naBase( const CIupacnaBase& b );
00117     CNcbi8naBase( const CNcbipnaBase& b, int score = 127 );
00118     CNcbi8naBase( const CNcbiqnaBase& b, int cutoff = 5 );
00119     CNcbi8naBase( const CNcbi8naBase& b, int ) : m_base( b ) {}
00120     CNcbi8naBase( const CNcbi4naBase& b );
00121     CNcbi8naBase( const CNcbi2naBase& b );
00122     CNcbi8naBase( const CColorTwoBase& b ) { THROW( logic_error, "CNcbi8naBase( CColorTwoBase ) should not be called" ); }
00123     CNcbi8naBase( const CNcbi8naBase& b, const CColorTwoBase& c );
00124     CNcbi8naBase Complement() const;
00125     CNcbi8naBase Get( EStrand strand ) const { return ( strand == eStrand_neg ) ? Complement() : *this; }
00126     bool IsAmbiguous() const { return GetAltCount() > 1; }
00127     int GetAltCount() const { return s_altCount[(int)m_base]; }
00128     operator char () const { return m_base; }
00129     static CNcbi8naBase Any() { return CNcbi8naBase( '\x0f' ); }
00130     static int BytesPerBase() { return 1; }
00131     CNcbi2naBase GetSmallestNcbi2na() const;
00132 protected:
00133     char m_base;
00134     static char s_fromIupacna[];
00135     static char s_complement[];
00136     static char s_altCount[];
00137     static char s_smallestNcbi2na[];
00138 };
00139 
00140 class CNcbi4naBase : public CNcbi8naBase 
00141 {
00142 public:
00143     CNcbi4naBase( const CNcbi8naBase& b ) : CNcbi8naBase( b ) {}
00144 };
00145 
00146 class CNcbi2naBase : public CSeqCoding
00147 {
00148 public:
00149     CNcbi2naBase( const char * c, EStrand strand ) : m_base( *c ) { if( strand == eStrand_neg ) m_base = '\x03' ^ m_base; }
00150     CNcbi2naBase( const char * c ) : m_base( *c ) {}
00151     CNcbi2naBase( char c ) : m_base( c ) {}
00152     CNcbi2naBase( int c ) : m_base( c ) {}
00153     CNcbi2naBase( unsigned int c ) : m_base( c ) {}
00154     CNcbi2naBase( const CNcbipnaBase& b, int score = 127 );
00155     CNcbi2naBase( const CIupacnaBase& b );
00156     CNcbi2naBase( const CNcbi8naBase& b );
00157     CNcbi2naBase( const CNcbi4naBase& b );
00158     CNcbi2naBase( const CNcbiqnaBase& b );
00159     operator char () const { return m_base; }
00160     CNcbi2naBase Complement() const { return '\x03' ^ m_base; }
00161     CNcbi2naBase Get( EStrand strand ) const { return ( strand == eStrand_neg ) ? Complement() : *this; }
00162 protected:
00163     char m_base;
00164     static char s_iupacnaTable[];
00165     static char s_ncbi8naTable[];
00166 };
00167 
00168 class CColorTwoBase : public CSeqCoding
00169 {
00170 public:
00171     enum EColorHi { eColor_BLUE = 0x00, eColor_GREEN = 0x10, eColor_YELLOW = 0x20, eColor_RED = 30 };
00172     enum EFromASCII { eFromASCII };
00173     CColorTwoBase( char c ) : m_base( c ) {}
00174     CColorTwoBase( char c, EStrand ) : m_base( c ) {}
00175     CColorTwoBase( const char * c ) : m_base( *c ) {}
00176     CColorTwoBase( const char * c, EStrand ) : m_base( *c ) {}
00177     CColorTwoBase( EFromASCII, char c ) { x_Init( c ); }
00178     CColorTwoBase( CNcbi2naBase b ) : m_base( CNcbi8naBase( b ) ) {}
00179     CColorTwoBase( CNcbiqnaBase b ) : m_base( CNcbi8naBase( b ) ) {}
00180     CColorTwoBase( CNcbi8naBase b ) : m_base( b ) {}
00181     CColorTwoBase( CNcbipnaBase b ) : m_base( CNcbi8naBase(b) ) {}
00182     CColorTwoBase( CNcbi2naBase prev, CNcbi2naBase b ) :
00183         m_base( s_dibase2code[(prev << 2) | b] | CColorTwoBase(b) ) {}
00184     CColorTwoBase( CNcbiqnaBase prev, CNcbiqnaBase b ) :
00185         m_base( s_dibase2code[(prev&0x30 << 2) | b&0x30] | CColorTwoBase(b) ) {}
00186     CColorTwoBase( CNcbi2naBase prev, CNcbi8naBase b ) :
00187         m_base( s_dibase2code[(prev << 2) | b.GetSmallestNcbi2na()] | CColorTwoBase(b) ) {}
00188     CColorTwoBase( CColorTwoBase prev, CColorTwoBase b ) { THROW( logic_error, "CColorTwoBase( CColorTwoBase, CColorTwoBase ) should never be called" ); }
00189     CColorTwoBase( CNcbi8naBase prev, CNcbi8naBase b ) :
00190         m_base( s_dibase2code[(prev.GetSmallestNcbi2na() << 2) | b.GetSmallestNcbi2na()] | CColorTwoBase(b) ) {}
00191     CColorTwoBase( CNcbipnaBase prev, CNcbipnaBase b ) :
00192         m_base( s_dibase2code[(CNcbi8naBase(prev).GetSmallestNcbi2na() << 2) | CNcbi8naBase(b).GetSmallestNcbi2na()] | CColorTwoBase(b) ) {}
00193 //     template<class base>
00194 //     CColorTwoBase( base a, base b ) : m_base( s_dibase2code[(CNcbi8naBase( a ).GetSmallestNcbi2na()<<2) | CNcbi8naBase( b ).GetSmallestNcbi2na()] ) {}
00195     operator char () const { return m_base; }
00196     CNcbi8naBase GetBaseCalls() const { return m_base & 0xf; }
00197     CColorTwoBase Complement() const { return GetBaseCalls().Complement() | GetColor(); }
00198     int GetColor() const { return m_base & 0x30; }
00199     int GetColorOrd() const { return GetColor() >> 4; }
00200     char AsAscii( bool tryBaseCalls = true ) const { return tryBaseCalls && GetBaseCalls() ? (char)CIupacnaBase( GetBaseCalls() ) : ('0' + GetColorOrd()); }
00201     static int BytesPerBase() { return 1; }
00202 protected:
00203     void x_Init( char c );
00204 protected:
00205     static char s_dibase2code[];
00206     char m_base; // ..cc4444
00207 };
00208 
00209 ////////////////////////////////////////////////////////////////////////
00210 template<class BaseA, class BaseB>
00211 inline int ComputeScore( const BaseA& a, const BaseB& b )
00212 {
00213     return bool( CNcbi8naBase( a ) & CNcbi8naBase( b ) );
00214 }
00215 
00216 inline int ComputeScore( const CNcbi8naBase& a, const CNcbipnaBase& b )
00217 {
00218     return 
00219         (( a & 0x01 ? b[0] : 0 ) + 
00220          ( a & 0x02 ? b[1] : 0 ) +
00221          ( a & 0x04 ? b[2] : 0 ) +
00222          ( a & 0x08 ? b[3] : 0 )) / (b[0] + b[1] + b[2] + b[3]);
00223 }
00224 
00225 inline int ComputeScore( const CNcbipnaBase& b, const CNcbi8naBase& a )
00226 {
00227     return 
00228         (( a & 0x01 ? b[0] : 0 ) + 
00229          ( a & 0x02 ? b[1] : 0 ) +
00230          ( a & 0x04 ? b[2] : 0 ) +
00231          ( a & 0x08 ? b[3] : 0 )) / (b[0] + b[1] + b[2] + b[3]);
00232 }
00233 
00234 inline int operator * ( const CNcbi2naBase& a, const CNcbi2naBase& b )
00235 {
00236     return a == b;
00237 }
00238 
00239 ////////////////////////////////////////////////////////////////////////
00240 
00241 inline CIupacnaBase::CIupacnaBase( const CNcbi8naBase& b ) : m_base("-ACMGRSVTWYHKDBN"[b&0x0f]) {}
00242 inline CIupacnaBase::CIupacnaBase( const CNcbi4naBase& b ) : m_base("-ACMGRSVTWYHKDBN"[b&0x0f]) {}
00243 inline CIupacnaBase::CIupacnaBase( const CNcbi2naBase& b ) : m_base("ACGT"[b&0x03]) {}
00244 inline CIupacnaBase::CIupacnaBase( const CNcbipnaBase& b, int score ) : m_base(0) { m_base = CIupacnaBase( CNcbi8naBase( b, score ) ); }
00245 inline CIupacnaBase::CIupacnaBase( const CNcbiqnaBase& b, int cutoff ) : m_base( b.GetPhrapScore() > cutoff ? "ACGT"[b&3] : 'N' ) {}
00246 inline CIupacnaBase::CIupacnaBase( const CColorTwoBase& b ) { m_base = b.GetBaseCalls() ? char(CIupacnaBase( b.GetBaseCalls() )) : char(( b.GetColorOrd() + '0' )); }
00247 inline CIupacnaBase::CIupacnaBase( const CIupacnaBase& b, const CColorTwoBase& c ) 
00248 { m_base = "ACGTCATGGTACTGCA"[(CNcbi2naBase(b) << 2)|c.GetColorOrd()]; }
00249 
00250 ////////////////////////////////////////////////////////////////////////
00251 
00252 inline CNcbipnaBase::CNcbipnaBase( const CIupacnaBase& b ) 
00253 { 
00254     x_Init( CNcbi8naBase( b ) ); 
00255 }
00256 
00257 inline CNcbipnaBase::CNcbipnaBase( const CNcbi2naBase& b ) 
00258 {
00259     fill( m_base, m_base + 5, 0 );
00260     m_base[b] = m_base[4] = '\xff';
00261 }
00262 
00263 inline CNcbipnaBase::CNcbipnaBase( const CNcbi4naBase& b ) 
00264 { 
00265     x_Init( b ); 
00266 }
00267 
00268 inline void CNcbipnaBase::x_Init( const CNcbi8naBase& b ) 
00269 { 
00270     fill( m_base, m_base + 5, 0 );
00271     if( b & 0x01 ) m_base[0] = '\xff';
00272     if( b & 0x02 ) m_base[1] = '\xff';
00273     if( b & 0x03 ) m_base[2] = '\xff';
00274     if( b & 0x04 ) m_base[3] = '\xff';
00275     m_base[4] = b ? '\xff' : 0;
00276 }
00277 
00278 inline CNcbipnaBase::CNcbipnaBase( const CNcbiqnaBase& b ) 
00279 {
00280     Uint1 val = Uint1( 255 * ( std::pow(10.0, b.GetPhrapScore() / 10 ) ) );
00281     int x = CNcbi2naBase( b );
00282     m_base[0] = x-- ? val : '\xff';
00283     m_base[1] = x-- ? val : '\xff';
00284     m_base[2] = x-- ? val : '\xff';
00285     m_base[3] = x-- ? val : '\xff';
00286     m_base[4] = '\xff';
00287 }
00288 
00289 ////////////////////////////////////////////////////////////////////////
00290 
00291 inline char CNcbiqnaBase::AdjustScore( int score ) 
00292 {
00293     if( score < 0 ) score = 0;
00294     if( score > 63 ) score = 63;
00295     return score;
00296 }
00297 
00298 inline CNcbiqnaBase::CNcbiqnaBase( const CNcbipnaBase& b ) : m_base(0)
00299 {
00300     int max = 0, next = 0;
00301     for( int i = 0; i < 4; ++i ) {
00302         if( b[i] > max ) {
00303             m_base = i; 
00304             next = max;
00305             max = b[i];
00306         } else if( b[i] > next ) next = b[i];
00307     }
00308     if( next == 0 ) m_base |= 0xfc;
00309     else {
00310         double pscore = 10*std::log10( double( max ) / next );
00311         m_base |= AdjustScore( int(pscore) ) << 2;
00312     }
00313 }
00314 inline CNcbiqnaBase::CNcbiqnaBase( const CNcbi8naBase& b, unsigned score ) 
00315 {
00316     if( b.GetAltCount() > 1 ) m_base = 0;
00317     else m_base = CNcbi2naBase( b ) | (AdjustScore(score) << 2);
00318 }
00319 
00320 inline CNcbiqnaBase::CNcbiqnaBase( const CNcbi4naBase& b, unsigned score )
00321 {
00322     if( b.GetAltCount() > 1 ) m_base = 0;
00323     else m_base = CNcbi2naBase( b ) | (AdjustScore(score) << 2);
00324 }
00325 
00326 inline CNcbiqnaBase::CNcbiqnaBase( const CNcbi2naBase& b, unsigned score )
00327 {
00328     m_base = b | (AdjustScore(score) << 2);
00329 }
00330 
00331 inline CNcbiqnaBase::CNcbiqnaBase( const CIupacnaBase& b, unsigned score )
00332 {
00333     CNcbi8naBase _b( b );
00334     if( _b.GetAltCount() > 1 ) m_base = 0;
00335     else m_base = CNcbi2naBase( b ) | (AdjustScore(score) << 2);
00336 }
00337 
00338 ////////////////////////////////////////////////////////////////////////
00339 
00340 inline CNcbi8naBase::CNcbi8naBase( char c, EStrand strand ) : m_base( c ) 
00341 { 
00342     if( strand == eStrand_neg ) m_base = s_complement[int(m_base)]; 
00343 }
00344 
00345 inline CNcbi8naBase::CNcbi8naBase( int c, EStrand strand ) : m_base( c ) 
00346 { 
00347     if( strand == eStrand_neg ) m_base = s_complement[int(m_base)]; 
00348 }
00349 
00350 inline CNcbi8naBase::CNcbi8naBase( unsigned int c, EStrand strand ) : m_base( c ) 
00351 { 
00352     if( strand == eStrand_neg ) m_base = s_complement[int(m_base)]; 
00353 }
00354 
00355 inline CNcbi8naBase::CNcbi8naBase( const CNcbi4naBase& b ) : m_base( b ) {}
00356 inline CNcbi8naBase::CNcbi8naBase( const CNcbi2naBase& b ) : m_base( "\x01\x02\x04\x08"[(int)b] ) {}
00357 
00358 inline CNcbi8naBase::CNcbi8naBase( const CNcbipnaBase& b, int score ) : m_base(0) 
00359 {
00360     if( b[0] > score ) m_base |= 0x01;
00361     if( b[1] > score ) m_base |= 0x02;
00362     if( b[2] > score ) m_base |= 0x04;
00363     if( b[3] > score ) m_base |= 0x08;
00364     if( (!m_base) && ( b[4] > score ) ) m_base = 0x0f;
00365 }
00366 
00367 inline CNcbi8naBase::CNcbi8naBase( const CIupacnaBase& b ) : m_base( s_fromIupacna[b] ) 
00368 {
00369     if( m_base == '\xf0' ) THROW( runtime_error, "Invalid IUPACNA base ASCII[" << int(b) << "] " << b );
00370 }
00371 
00372 inline CNcbi8naBase::CNcbi8naBase( const CNcbi8naBase& b, const CColorTwoBase& c ) 
00373 { m_base = "\x1\x2\x4\x8\x2\x1\x8\x4\x4\x8\x1\x2\x8\x4\x2\x1"[(CNcbi2naBase(b) << 2)|c.GetColorOrd()]; }
00374 
00375 inline CNcbi8naBase::CNcbi8naBase( const CNcbiqnaBase& b, int cutoff ) : m_base( b.GetPhrapScore() > cutoff ? CNcbi8naBase( CNcbi2naBase( b&3 ) ) : Any() ) {}
00376 inline CNcbi8naBase CNcbi8naBase::Complement() const { return s_complement[(int)m_base]; }
00377 inline CNcbi2naBase CNcbi8naBase::GetSmallestNcbi2na() const { return s_smallestNcbi2na[int(m_base)]; }
00378 
00379 ////////////////////////////////////////////////////////////////////////
00380 
00381 inline CNcbi2naBase::CNcbi2naBase( const CIupacnaBase& b ) : m_base( s_iupacnaTable[b] ) 
00382 {
00383     if( m_base == '\xf0' ) THROW( runtime_error, "Invalid IUPACNA base ASCII[" << int(b) << "] " << b );
00384 }
00385 
00386 inline CNcbi2naBase::CNcbi2naBase( const CNcbi8naBase& b ) : m_base( s_ncbi8naTable[b] ) 
00387 {
00388     if( m_base == '\xf0' ) THROW( runtime_error, "Invalid NCBI8NA base 0x" << setw(2) << hex << setfill('0') << int(b) << " IUPACNA " << CIupacnaBase(b) );
00389 }
00390 
00391 inline CNcbi2naBase::CNcbi2naBase( const CNcbi4naBase& b ) : m_base( s_ncbi8naTable[b] ) 
00392 {
00393     if( m_base == '\xf0' ) THROW( runtime_error, "Invalid NCBI8NA base 0x" << setw(2) << hex << setfill('0') << int(b) << " IUPACNA " << CIupacnaBase(b) );
00394 }
00395 
00396 inline CNcbi2naBase::CNcbi2naBase( const CNcbiqnaBase& b ) : m_base( b & 3 ) {}
00397 
00398 ////////////////////////////////////////////////////////////////////////
00399 
00400 inline void CColorTwoBase::x_Init( char c )
00401 { 
00402     static const char * t = "0123ACGT";
00403     const char * x = strchr( t, toupper( c ) );
00404     if( x == 0 || *x == 0 ) THROW( runtime_error, "Invalid colorspace base " << c );
00405     m_base = x - t;
00406     if( m_base < 4 ) (m_base <<= 4);
00407     else m_base = CNcbi8naBase( CNcbi2naBase( m_base & 0x03 ) ); 
00408 }
00409 
00410 END_OLIGOFAR_SCOPES
00411 
00412 #endif
00413 
00414 

Generated on Sun Dec 6 22:21:13 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:55 2009 by modify_doxy.py rev. 173732