src/app/oligofar/csamalignments.hpp

Go to the documentation of this file.
00001 #ifndef OLIGOFAR_CSAMALIGNMENTS__HPP
00002 #define OLIGOFAR_CSAMALIGNMENTS__HPP
00003 
00004 #include "ialignmentmap.hpp"
00005 #include "csam.hpp"
00006 
00007 BEGIN_OLIGOFAR_SCOPES
00008 
00009 class CSRead : public IShortRead
00010 {
00011 public:
00012     ~CSRead() { delete[] m_data; --s_count; }
00013 
00014     virtual EPairType GetPairType() const { return x_GetPairType(); }
00015     virtual CSeqCoding::ECoding GetSequenceCoding() const { return x_GetSequenceCoding(); }
00016     virtual int GetSequenceLength() const { return x_GetSequenceLength(); }
00017     const IShortRead * GetMate() const { return x_GetPairType() ? x_GetMate() : 0; }
00018     const char * GetSequenceData() const { return m_data + x_GetSequenceOffset(); }
00019     const char * GetId() const { return x_GetPairType() == eRead_second ? x_GetMate()->GetId() : m_data + x_GetIdOffset(); }
00020 
00021     CSRead( const string& id, CSeqCoding::ECoding coding, const char * sequence, int length, EPairType intended = eRead_single );
00022     CSRead( const string& id, const string& sequence, const string& quality = "", EPairType intended = eRead_single );
00023     CSRead( const CSRead& r );
00024     CSRead& operator = ( const CSRead& r );
00025 
00026     void SetMate( CSRead * other ); // other becomes second sequence
00027 
00028     EPairType GetIntendedPairType() const { return EPairType((m_data[2] >> 2)&3); }
00029 
00030     string GetIupacna( CSeqCoding::EStrand = CSeqCoding::eStrand_pos ) const;
00031 
00032     static int GetCount() { return s_count; }
00033 
00034 protected:
00035     Uint2 x_GetIdLength() const { return Uint1( m_data[0] ); }
00036     Uint2 x_GetSequenceLength() const { return Uint1( m_data[1] ); }
00037     EPairType x_GetPairType() const { return EPairType( m_data[2] & 3 ); }
00038     CSeqCoding::ECoding x_GetSequenceCoding() const { return CSeqCoding::ECoding( Uint1( m_data[3] ) ); }
00039 
00040     Uint1 x_GetHeaderSize() const { return 4 + ( x_GetPairType() ? sizeof( void* ) : 0 ); }
00041     Uint2 x_GetIdOffset() const { return x_GetHeaderSize(); }
00042     Uint2 x_GetSequenceOffset() const { return x_GetIdOffset() + x_GetIdLength() + 1; }
00043     Uint2 x_GetDataSize() const { return x_GetSequenceOffset() + x_GetSequenceLength() + 1; }
00044     Uint2 x_GetDataSize( int idlen, int seqlen, EPairType type ) const {
00045         switch( type ) {
00046             case eRead_single: return 6 + idlen + seqlen;
00047             case eRead_first: return 6 + sizeof( void * ) + idlen + seqlen;
00048             case eRead_second: return 5 + sizeof( void * ) + seqlen;
00049         }
00050         THROW( logic_error, "Unknown read pair type" );
00051     }
00052     const CSRead * x_GetMate() const { return *(CSRead**)(m_data + 4); }
00053     CSRead * & x_SetMate() { return *(CSRead**)(m_data + 4); }
00054 protected:
00055     char * m_data;
00056     static int s_count;
00057 };
00058 
00059 class CContig : public INucSeq
00060 {
00061 public:
00062     ~CContig() { if( m_owns ) delete[] m_data; --s_count; }
00063     CContig( const string& name );
00064     CContig( const CContig& ctg );
00065     void SetSequenceData( char * data, size_t length, CSeqCoding::ECoding coding = CSeqCoding::eCoding_ncbi8na, bool owns = false ) { 
00066         if( m_owns ) delete[] m_data;
00067         m_data = data; m_size = length; m_coding = coding; m_owns = owns; 
00068     }
00069     virtual const char * GetId() const { return m_id.c_str(); }
00070     virtual const char * GetSequenceData() const { return m_data; } //&m_data[0]; }
00071     virtual int GetSequenceLength() const { return m_size; } // m_data.size(); }
00072     virtual CSeqCoding::ECoding GetSequenceCoding() const { return m_coding; } //CSeqCoding::eCoding_ncbi8na; }
00073 
00074     static int GetCount() { return s_count; }
00075 private:
00076     CContig& operator = ( const CContig& );
00077 protected:
00078     string m_id;
00079     char * m_data;
00080     size_t m_size;
00081     CSeqCoding::ECoding m_coding;
00082     bool m_owns;
00083 
00084     static int s_count;
00085 };
00086 
00087 class IAligner;
00088 class CMappedAlignment : public IMappedAlignment
00089 {
00090 public:
00091     typedef map<string,string> TTags;
00092     enum EFlags { fOwnQuery = 0x01, fOwnSubject = 0x02, fOwnSeqs = fOwnQuery|fOwnSubject };
00093     enum ETagType { eType_int = 'i', eType_float = 'f', eType_string = 'Z', eType_NONE = 0 };
00094     virtual const char * GetId() const { return 0; }
00095     virtual const IShortRead * GetQuery() const { return m_query; }
00096     virtual const INucSeq * GetSubject() const { return m_subject; }
00097     virtual TRange GetSubjectBounding() const { return TRange( m_sstart, m_sstart + m_slength - 1 ); }
00098     virtual TRange GetQueryBounding() const { return m_qlength > 0 ? TRange( m_qstart, m_qstart + m_qlength - 1 ) : TRange( m_qstart + m_qlength + 1, m_qstart ); }
00099     virtual const TTrSequence GetCigar() const { return m_cigar; }
00100     virtual bool IsReverseComplement() const { return m_qlength < 0; }
00101     virtual const IMappedAlignment * GetMate() const { return m_mate; }
00102     void SetMate( CMappedAlignment * other );
00103     void WriteAsSam( ostream& out ) const;
00104     void SetFlags( int flags, bool on = true ) { if( on ) m_flags |= flags; else m_flags &= ~flags; }
00105     void AdjustQueryFromBy( int );
00106     void AdjustQueryToBy( int  );
00107     void AdjustSubjectFromBy( int  );
00108     void AdjustSubjectToBy( int  );
00109     void AdjustQueryFromTo( int );
00110     void AdjustQueryToTo( int  );
00111     void AdjustSubjectFromTo( int  );
00112     void AdjustSubjectToTo( int  );
00113     void SetTagS( const string& name, const string& val );
00114     void SetTagI( const string& name, int val );
00115     void SetTagF( const string& name, float val );
00116     char GetTagType( const string& name ) const;
00117     string GetTagS( const string& name, char type = 'Z' ) const;
00118     int GetTagI( const string& name ) const;
00119     float GetTagF( const string& name ) const;
00120     void RemoveTag( const string& name );
00121     void Assign( const CMappedAlignment * other );
00122     void SetTags( TTags * tags ) { delete m_tags; m_tags = tags; }
00123     const TTags& GetTags() const { static TTags notags; if( m_tags ) return *m_tags; else return notags; }
00124     CMappedAlignment * MakeExtended( IAligner * aligner ) const;
00125     CMappedAlignment * Clone() const { return new CMappedAlignment( *this ); }
00126     CMappedAlignment( const CSRead * query, const CContig * subject, int from, const TTrVector& cigar, CSeqCoding::EStrand strand, int flags = 0, TTags * tags = 0 );
00127     ~CMappedAlignment() { if( m_flags & fOwnQuery ) delete m_query; if( m_flags & fOwnSubject ) delete m_subject; delete m_tags; --s_count; }
00128     void PrintDebug( ostream& out ) const {
00129         out << m_cigar.ToString() << DISPLAY( m_sstart ) << DISPLAY( m_slength ) << DISPLAY( m_qstart ) << DISPLAY( m_qlength ) << hex << DISPLAY( m_flags ) << dec;
00130     }
00131     bool ValidateConsistency( const string& context = "" ) const;
00132     bool operator == ( const CMappedAlignment& other ) const { 
00133         return 
00134             strcmp( m_query->GetId(), other.m_query->GetId() ) == 0 &&
00135             strcmp( m_subject->GetId(), other.m_subject->GetId() ) == 0 &&
00136             m_sstart == other.m_sstart && m_slength == other.m_slength &&
00137             m_qstart == other.m_qstart && m_qlength == other.m_qlength &&
00138             m_cigar == other.m_cigar;
00139     }
00140     bool operator != ( const CMappedAlignment& other ) const { return !operator == ( other ); }
00141 
00142 
00143     static int GetCount() { return s_count; }
00144 
00145     double ScoreAlignment( double id = 1, double mm = -1, double go = -3, double ge = -1.5 ) const;
00146     static double ScoreAlignment( const TTrSequence& cigar, double id = 1, double mm = -1, double go = -3, double ge = -1.5 );
00147 
00148 private:
00149     CMappedAlignment& operator = ( const CMappedAlignment& );
00150     enum EAdjustMode { eAdjust_query, eAdjust_subj };
00151     void AdjustAlignment( EAdjustMode, int fromBy, int toBy  );
00152 protected:
00153     //int x_AdjustCigar( int advanceFrontBy, EAdjustMode );
00154     CMappedAlignment( const CMappedAlignment& a );
00155 protected:
00156     const CSRead * m_query;
00157     const CContig * m_subject;
00158     const CMappedAlignment * m_mate;
00159     Int4 m_sstart;
00160     Int2 m_slength;
00161     Int2 m_qstart;
00162     Int2 m_qlength;
00163     Int2 m_flags;
00164     TTrSequence m_cigar;
00165     TTags * m_tags;
00166     static int s_count;
00167 public:
00168     static bool s_validate_consistency;
00169 };
00170 
00171 class CSamSource : public IAlignmentMap, public CSamBase
00172 {
00173 public:
00174     typedef vector<CMappedAlignment*> TAlignmentList;
00175     typedef vector<Uint8> TFileOffsets;
00176     typedef map<string,CSRead*> TReads;
00177     typedef map<string,CContig*> TContigs;
00178     typedef map<string,TAlignmentList> TAlignmentMap;
00179     typedef map<string,TFileOffsets> TAlignmentOffsetMap;
00180     typedef map<pair<string, pair<string,int> >, CMappedAlignment* > TPendingHits;
00181 
00182     enum ERegisterFlags { fRegisterQuery = 0x01, fRegisterSubject = 0x02, fRegisterAlignmentByQuery = 0x04, fRegisterAlignmentBySubject = 0x08 };
00183 
00184     ~CSamSource();
00185     CMappedAlignment * ParseSamLine( const vector<string>& samLine, int flags = 0 );
00186     CMappedAlignment * ParseSamLine( const string& samLine, int flags = 0 );
00187 
00188     void IndexFile( const string& samFile, unsigned start, unsigned limit );
00189     
00190     static TTrVector GetFullCigar( const TTrVector& cigar, const string& mismatches );
00191 
00192     virtual void GetAlignmentsForQueryId( ICallback *, const char * id, int mates = 3 ) {}
00193     virtual void GetAlignmentsForSubjectId( ICallback *, const char * id, int strands = 3, int from = kSequenceBegin, int to = kSequenceEnd ) {}
00194     virtual void GetAllAlignments( ICallback * ) {}
00195     virtual void GetAllQueries( ICallback * ) {}
00196     virtual void GetAllSubjects( ICallback * ) {}
00197 
00198     virtual void AddAlignment( IMappedAlignment * ma ) {}
00199     virtual void AddSubject( INucSeq * ma ) {}
00200     virtual void AddQuery( IShortRead * ma ) {}
00201 
00202     const TAlignmentOffsetMap& GetAlignmentOffsetMap() const { return m_alignmentOffsets; }
00203     const TFileOffsets& GetAlignmentOffsets( const string& ctg ) const { 
00204         TAlignmentOffsetMap::const_iterator x = m_alignmentOffsets.find( ctg ); 
00205         if( x == m_alignmentOffsets.end() ) {
00206             static TFileOffsets null;
00207             return null;
00208         }
00209         else return x->second;
00210     }
00211 protected:
00212     TContigs m_contigs;
00213     TReads m_reads;
00214     TAlignmentMap m_alignmentsByContig;
00215     TAlignmentMap m_alignmentsByRead;
00216     TAlignmentOffsetMap m_alignmentOffsets;
00217     TPendingHits m_pendingHits;
00218     auto_ptr<ifstream> m_samFile;
00219 };
00220 
00221 inline void CMappedAlignment::AdjustQueryFromBy( int by ) 
00222 {
00223     AdjustAlignment( eAdjust_query, by, 0 );
00224 }
00225 
00226 inline void CMappedAlignment::AdjustQueryToBy( int by ) 
00227 {
00228     AdjustAlignment( eAdjust_query, 0, by );
00229 }
00230 
00231 inline void CMappedAlignment::AdjustSubjectFromBy( int by ) 
00232 {
00233     AdjustAlignment( eAdjust_subj, by, 0 );
00234 }
00235 
00236 inline void CMappedAlignment::AdjustSubjectToBy( int by ) 
00237 {
00238     AdjustAlignment( eAdjust_subj, 0, by );
00239 }
00240 
00241 inline void CMappedAlignment::AdjustQueryFromTo( int newFrom ) 
00242 {
00243     AdjustQueryFromBy( newFrom - GetQueryBounding().GetFrom() );
00244 }
00245 
00246 inline void CMappedAlignment::AdjustQueryToTo( int newTo ) 
00247 {
00248     AdjustQueryToBy( newTo - GetQueryBounding().GetTo() );
00249 }
00250 
00251 inline void CMappedAlignment::AdjustSubjectFromTo( int newFrom ) 
00252 {
00253     AdjustSubjectFromBy( newFrom - GetSubjectBounding().GetFrom() );
00254 }
00255 
00256 inline void CMappedAlignment::AdjustSubjectToTo( int newTo ) 
00257 {
00258     AdjustSubjectFromBy( newTo - GetSubjectBounding().GetTo() );
00259 }
00260 
00261 END_OLIGOFAR_SCOPES
00262 
00263 #endif
00264 
00265 

Generated on Sun Dec 6 22:21:11 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:55 2009 by modify_doxy.py rev. 173732