00001 #ifndef ALGO_ALIGN_NW_NW_ALIGNER__HPP
00002 #define ALGO_ALIGN_NW_NW_ALIGNER__HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #include <corelib/ncbistd.hpp>
00040 #include <corelib/ncbiobj.hpp>
00041 #include <corelib/ncbi_limits.hpp>
00042 #include <util/tables/raw_scoremat.h>
00043 #include <objects/seqloc/Na_strand.hpp>
00044
00045 #include <vector>
00046 #include <string>
00047
00048
00049
00050
00051
00052
00053
00054
00055 BEGIN_NCBI_SCOPE
00056
00057 BEGIN_SCOPE(objects)
00058 class CDense_seg;
00059 class CSeq_id;
00060 END_SCOPE(objects)
00061
00062
00063
00064
00065 class CNWAligner: public CObject
00066 {
00067 public:
00068 typedef int TScore;
00069
00070
00071 CNWAligner(void);
00072
00073
00074 CNWAligner(const char* seq1, size_t len1,
00075 const char* seq2, size_t len2,
00076 const SNCBIPackedScoreMatrix* scoremat = 0);
00077
00078 CNWAligner(const string& seq1,
00079 const string& seq2,
00080 const SNCBIPackedScoreMatrix* scoremat = 0);
00081
00082 virtual ~CNWAligner(void) {}
00083
00084
00085 virtual TScore Run(void);
00086
00087
00088 virtual void SetSequences(const char* seq1, size_t len1,
00089 const char* seq2, size_t len2,
00090 bool verify = true);
00091
00092 void SetSequences(const string& seq1,
00093 const string& seq2,
00094 bool verify = true);
00095
00096 void SetScoreMatrix(const SNCBIPackedScoreMatrix* scoremat);
00097
00098 void SetWm (TScore value);
00099 void SetWms (TScore value);
00100 void SetWg (TScore value) { m_Wg = value; }
00101 void SetWs (TScore value) { m_Ws = value; }
00102
00103
00104 void SetEndSpaceFree(bool Left1, bool Right1, bool Left2, bool Right2);
00105
00106
00107 void SetPattern(const vector<size_t>& pattern);
00108
00109
00110 void SetSpaceLimit(const size_t& maxmem) { m_MaxMem = maxmem; }
00111
00112
00113 struct SProgressInfo
00114 {
00115 SProgressInfo(void): m_iter_done(0), m_iter_total(0), m_data(0) {}
00116 size_t m_iter_done;
00117 size_t m_iter_total;
00118 void* m_data;
00119 char m_text_buffer [1024];
00120 };
00121
00122
00123 typedef bool (*FProgressCallback) (SProgressInfo*);
00124 void SetProgressCallback ( FProgressCallback prg_callback, void* data );
00125
00126
00127 static TScore GetDefaultWm (void) { return 1; }
00128 static TScore GetDefaultWms (void) { return -2; }
00129 static TScore GetDefaultWg (void) { return -5; }
00130 static TScore GetDefaultWs (void) { return -2; }
00131
00132 TScore GetWm (void) const { return m_Wm; }
00133 TScore GetWms (void) const { return m_Wms; }
00134 TScore GetWg (void) const { return m_Wg; }
00135 TScore GetWs (void) const { return m_Ws; }
00136
00137 const char* GetSeq1(void) const { return m_Seq1; }
00138 size_t GetSeqLen1(void) const { return m_SeqLen1; }
00139 const char* GetSeq2(void) const { return m_Seq2; }
00140 size_t GetSeqLen2(void) const { return m_SeqLen2; }
00141
00142 void GetEndSpaceFree(bool* L1, bool* R1, bool* L2, bool* R2)
00143 const;
00144
00145 TScore GetScore(void) const;
00146
00147 size_t GetSpaceLimit(void) const { return m_MaxMem; }
00148 static size_t GetDefaultSpaceLimit(void) {
00149 return 0xFFFFFFFF;
00150 }
00151
00152
00153 enum ETranscriptSymbol {
00154 eTS_None = 0
00155 ,eTS_Delete = 'D'
00156 ,eTS_Insert = 'I'
00157 ,eTS_Match = 'M'
00158 ,eTS_Replace = 'R'
00159 ,eTS_Intron = 'Z'
00160 #ifdef ALGOALIGN_NW_SPLIGN_MAKE_PUBLIC_BINARY
00161 ,eTS_SlackDelete
00162 ,eTS_SlackInsert
00163 #endif
00164 };
00165 typedef vector<ETranscriptSymbol> TTranscript;
00166
00167
00168 TTranscript GetTranscript(bool reversed = true) const;
00169 void SetTranscript(const TTranscript& transcript);
00170
00171
00172 string GetTranscriptString(void) const;
00173
00174
00175
00176
00177 void SetPositivesAsMatches(bool positives_as_matches = true) {
00178 m_PositivesAsMatches = positives_as_matches;
00179 }
00180 bool GetPositivesAsMatches(void) const {
00181 return m_PositivesAsMatches;
00182 }
00183
00184
00185 size_t GetLeftSeg(size_t* q0, size_t* q1,
00186 size_t* s0, size_t* s1,
00187 size_t min_size) const;
00188 size_t GetRightSeg(size_t* q0, size_t* q1,
00189 size_t* s0, size_t* s1,
00190 size_t min_size) const;
00191 size_t GetLongestSeg(size_t* q0, size_t* q1,
00192 size_t* s0, size_t* s1) const;
00193
00194
00195 virtual size_t GetElemSize(void) const {
00196 return 1;
00197 }
00198
00199
00200
00201
00202 virtual TScore ScoreFromTranscript(const TTranscript& transcript,
00203 size_t start1 = kMax_UInt,
00204 size_t start2 = kMax_UInt ) const;
00205
00206 void EnableMultipleThreads(bool enable = true);
00207
00208
00209
00210 size_t MakePattern(const size_t hit_size = 100,
00211 const size_t core_size = 28);
00212
00213
00214 CRef<objects::CDense_seg> GetDense_seg(TSeqPos query_start,
00215 objects::ENa_strand query_strand,
00216 TSeqPos subj_start,
00217 objects::ENa_strand subj_strand)
00218 const;
00219
00220
00221 CRef<objects::CDense_seg> GetDense_seg(TSeqPos query_start,
00222 objects::ENa_strand query_strand,
00223 const objects::CSeq_id& query_id,
00224 TSeqPos subj_start,
00225 objects::ENa_strand subj_strand,
00226 const objects::CSeq_id& subj_id)
00227 const;
00228
00229 protected:
00230
00231
00232 TScore m_Wm;
00233 TScore m_Wms;
00234 TScore m_Wg;
00235 TScore m_Ws;
00236
00237
00238 bool m_esf_L1, m_esf_R1, m_esf_L2, m_esf_R2;
00239
00240
00241 const char* m_abc;
00242 SNCBIFullScoreMatrix m_ScoreMatrix;
00243 bool m_ScoreMatrixInvalid;
00244
00245
00246 FProgressCallback m_prg_callback;
00247
00248
00249 mutable SProgressInfo m_prg_info;
00250
00251
00252 mutable bool m_terminate;
00253
00254
00255 const char* m_Seq1;
00256 size_t m_SeqLen1;
00257 const char* m_Seq2;
00258 size_t m_SeqLen2;
00259 size_t x_CheckSequence(const char* seq, size_t len) const;
00260 virtual bool x_CheckMemoryLimit(void);
00261
00262
00263 unsigned char x_CalcFingerPrint64( const char* beg,
00264 const char* end,
00265 size_t& err_index );
00266 const char* x_FindFingerPrint64( const char* beg,
00267 const char* end,
00268 unsigned char fingerprint,
00269 size_t size,
00270 size_t& err_index );
00271
00272
00273 TTranscript m_Transcript;
00274 bool m_PositivesAsMatches;
00275 TScore m_score;
00276 vector<size_t> m_guides;
00277
00278
00279 bool m_mt;
00280 size_t m_maxthreads;
00281
00282
00283 size_t m_MaxMem;
00284
00285
00286 virtual TScore x_Run (void);
00287
00288
00289 struct SAlignInOut;
00290 virtual TScore x_Align (SAlignInOut* data);
00291
00292
00293 class CBacktraceMatrix4 {
00294 public:
00295
00296 CBacktraceMatrix4(size_t dim) {
00297 m_Buf = new Uint1 [dim / 2 + 1];
00298 m_Elem = 0;
00299 }
00300
00301 ~CBacktraceMatrix4() { delete [] m_Buf; }
00302
00303 void SetAt(size_t i, Uint1 v) {
00304 if(i & 1) {
00305 m_Buf[i >> 1] = m_Elem | (v << 4);
00306 }
00307 else {
00308 m_Elem = v;
00309 }
00310 }
00311
00312 void Purge(size_t i) {
00313 if(i & 1) {
00314 m_Buf[i >> 1] = m_Elem;
00315 }
00316 }
00317
00318 Uint1 operator[] (size_t i) const {
00319 return 0x0F & ((m_Buf[i >> 1]) >> ((i & 1) << 2));
00320 }
00321
00322 private:
00323
00324 Uint1 * m_Buf;
00325 Uint1 m_Elem;
00326 };
00327
00328 void x_DoBackTrace(const CBacktraceMatrix4 & backtrace,
00329 SAlignInOut* data);
00330
00331
00332 virtual ETranscriptSymbol x_GetDiagTS(size_t i1, size_t i2) const;
00333
00334 friend class CNWAlignerThread_Align;
00335 };
00336
00337
00338 struct CNWAligner::SAlignInOut {
00339
00340 SAlignInOut(): m_offset1(0), m_len1(0),
00341 m_offset2(0), m_len2(0),
00342 m_space(0) {}
00343
00344 SAlignInOut(size_t offset1, size_t len1, bool esfL1, bool esfR1,
00345 size_t offset2, size_t len2, bool esfL2, bool esfR2):
00346 m_offset1(offset1), m_len1(len1), m_esf_L1(esfL1), m_esf_R1(esfR1),
00347 m_offset2(offset2), m_len2(len2), m_esf_L2(esfL2), m_esf_R2(esfR2)
00348 {
00349 m_space = m_len1*m_len2;
00350 }
00351
00352
00353 size_t m_offset1;
00354 size_t m_len1;
00355 bool m_esf_L1, m_esf_R1;
00356
00357
00358 size_t m_offset2;
00359 size_t m_len2;
00360 bool m_esf_L2, m_esf_R2;
00361
00362
00363 TTranscript m_transcript;
00364
00365 size_t GetSpace(void) const {
00366 return m_space;
00367 }
00368
00369 static bool PSpace(const SAlignInOut* p1, const SAlignInOut* p2) {
00370 return p1->m_space >= p2->m_space;
00371 }
00372
00373 private:
00374
00375 size_t m_space;
00376 };
00377
00378
00379 namespace {
00380
00381 const char g_nwaligner_nucleotides [] = "AGTCBDHKMNRSVWY";
00382
00383 const CNWAligner::TScore kInfMinus ( -(numeric_limits<CNWAligner::TScore>::
00384 max() / 2) );
00385 }
00386
00387 END_NCBI_SCOPE
00388
00389
00390
00391
00392 #endif
00393
00394