|
NCBI C++ ToolKit
|
00001 /* $Id: aln_scoring.cpp 23124 2011-02-10 17:09:51Z kuznets $ 00002 * =========================================================================== 00003 * 00004 * PUBLIC DOMAIN NOTICE 00005 * National Center for Biotechnology Information 00006 * 00007 * This software/database is a "United States Government Work" under the 00008 * terms of the United States Copyright Act. It was written as part of 00009 * the author's official duties as a United States Government employee and 00010 * thus cannot be copyrighted. This software/database is freely available 00011 * to the public for use. The National Library of Medicine and the U.S. 00012 * Government have not placed any restriction on its use or reproduction. 00013 * 00014 * Although all reasonable efforts have been taken to ensure the accuracy 00015 * and reliability of the software and data, the NLM and the U.S. 00016 * Government do not and cannot warrant the performance or results that 00017 * may be obtained by using this software or data. The NLM and the U.S. 00018 * Government disclaim all warranties, express or implied, including 00019 * warranties of performance, merchantability or fitness for any particular 00020 * purpose. 00021 * 00022 * Please cite the author in any work or product based on this material. 00023 * 00024 * =========================================================================== 00025 * 00026 * Authors: Andrey Yazhuk 00027 */ 00028 00029 #include <ncbi_pch.hpp> 00030 00031 #include <corelib/ncbistd.hpp> 00032 #include <corelib/ncbistl.hpp> 00033 #include <corelib/ncbireg.hpp> 00034 #include <corelib/ncbifile.hpp> 00035 00036 #include <gui/widgets/data/aln_scoring.hpp> 00037 00038 #include <gui/utils/event_translator.hpp> 00039 #include <gui/utils/system_path.hpp> 00040 00041 #include <stdio.h> 00042 #include <math.h> 00043 #include <numeric> 00044 00045 BEGIN_NCBI_SCOPE 00046 00047 00048 /////////////////////////////////////////////////////////////////////////////// 00049 /// CScoringParams 00050 00051 CScoringParams::CScoringParams() 00052 : m_Alignment(NULL), 00053 m_Method(NULL), 00054 m_GradNumber(16) 00055 { 00056 } 00057 00058 00059 /////////////////////////////////////////////////////////////////////////////// 00060 /// CScoreCache 00061 00062 CScoreCache::CScoreCache() 00063 : m_ScoreColls(NULL), 00064 m_EnBgProcessing(true), 00065 m_JobID(-1), 00066 m_HasScores(false) 00067 { 00068 m_ScoreColls = new TScoreCollVector(); 00069 } 00070 00071 00072 CScoreCache::~CScoreCache() 00073 { 00074 if(x_IsJobRunning()) { 00075 x_DeleteJob(); 00076 } 00077 00078 delete m_ScoreColls; 00079 } 00080 00081 00082 BEGIN_EVENT_MAP(CScoreCache, CEventHandler) 00083 ON_MESSAGE(CAppJobNotification, CAppJobNotification::eStateChanged, 00084 &CScoreCache::OnAJNotification) 00085 ON_MESSAGE(CAppJobNotification, CAppJobNotification::eProgress, 00086 &CScoreCache::OnAJNotification) 00087 END_EVENT_MAP() 00088 00089 00090 void CScoreCache::SetListener(IListener* listener) 00091 { 00092 m_Listener = listener; 00093 } 00094 00095 00096 void CScoreCache::EnableBackgoundProcessing(bool en) 00097 { 00098 m_EnBgProcessing = en; 00099 } 00100 00101 00102 void CScoreCache::SetGradNumber(int grad_n) 00103 { 00104 _ASSERT(grad_n > 1 && grad_n <= 0xFFFF); 00105 m_Params.m_GradNumber = grad_n; 00106 } 00107 00108 00109 void CScoreCache::SetScoringMethod(IScoringMethod *method) 00110 { 00111 // changing method requires sopping the running job 00112 if(x_IsJobRunning()) { 00113 x_DeleteJob(); 00114 } 00115 00116 m_Params.m_Method = method; 00117 } 00118 00119 00120 IScoringMethod* CScoreCache::GetScoringMethod() 00121 { 00122 return m_Params.m_Method; 00123 } 00124 00125 00126 const IScoringMethod* CScoreCache::GetScoringMethod() const 00127 { 00128 return m_Params.m_Method; 00129 } 00130 00131 00132 void CScoreCache::SetAlignment(IScoringAlignment* aln) 00133 { 00134 m_Params.m_Alignment = aln; 00135 } 00136 00137 00138 const IScoringAlignment* CScoreCache::GetAlignment() const 00139 { 00140 return m_Params.m_Alignment; 00141 } 00142 00143 00144 void CScoreCache::ResetScores() 00145 { 00146 //LOG_POST("CScoreCache::CalculateScores() - Started "); 00147 CAppJobDispatcher& disp = CAppJobDispatcher::GetInstance(); 00148 00149 if(m_Job) { 00150 // job is already running - delete it 00151 if (m_EnBgProcessing) { 00152 disp.DeleteJob(m_JobID); 00153 m_JobID = -1; 00154 } 00155 m_Job.Reset(); 00156 } 00157 00158 m_HasScores = false; 00159 m_ScoreColls->clear(); 00160 } 00161 00162 /// Calculates scores for the given CAlnVec object and saves results in form of 00163 /// TScoreColl objects. 00164 void CScoreCache::CalculateScores() 00165 { 00166 ResetScores(); 00167 00168 if(m_Params.m_Method) { 00169 m_Job.Reset(new CScoringJob(m_Params)); 00170 00171 if(m_EnBgProcessing) { 00172 CAppJobDispatcher& disp = CAppJobDispatcher::GetInstance(); 00173 // use CAppJobDispatcher to execute background jobs 00174 m_JobID = disp.StartJob(*m_Job, "ThreadPool", *this, 1, true); 00175 } else { 00176 // do everything synchronously 00177 m_JobID = -1; 00178 m_Job->Run(); 00179 m_HasScores = x_TransferResults(m_Job->GetResult().GetPointer()); 00180 m_Job.Reset(); 00181 } 00182 } 00183 //LOG_POST("CScoreCache::CalculateScores() - Finished "); 00184 } 00185 00186 00187 void CScoreCache::OnAJNotification(CEvent* evt) 00188 { 00189 CAppJobNotification* notn = 00190 dynamic_cast<CAppJobNotification*>(evt); 00191 _ASSERT(notn); 00192 00193 if(notn) { 00194 int job_id = notn->GetJobID(); 00195 if(m_JobID != job_id) { 00196 ERR_POST("CScoringCache::OnAJNotification() - unknown Job ID " << job_id); 00197 } else { 00198 switch(notn->GetState()) { 00199 case IAppJob::eCompleted: 00200 x_OnJobCompleted(*notn); 00201 break; 00202 case IAppJob::eFailed: 00203 x_OnJobFailed(*notn); 00204 break; 00205 case IAppJob::eCanceled: 00206 x_OnJobCanceled(*notn); 00207 break; 00208 case IAppJob::eRunning: 00209 x_OnJobProgress(*notn); 00210 break; 00211 default: 00212 _ASSERT(false); 00213 } 00214 } 00215 } 00216 } 00217 00218 00219 bool CScoreCache::x_IsJobRunning() 00220 { 00221 return m_Job.GetPointer() != NULL; 00222 } 00223 00224 00225 void CScoreCache::x_DeleteJob() 00226 { 00227 if (m_EnBgProcessing) { 00228 _ASSERT(m_Job && m_JobID != -1); 00229 00230 CAppJobDispatcher& disp = CAppJobDispatcher::GetInstance(); 00231 disp.DeleteJob(m_JobID); 00232 00233 m_JobID = -1; 00234 } 00235 00236 m_Job.Reset(); 00237 00238 if(m_Listener) { 00239 m_Listener->OnScoringFinished(); 00240 } 00241 } 00242 00243 00244 void CScoreCache::x_OnJobCompleted(CAppJobNotification& notn) 00245 { 00246 CRef<CObject> obj = notn.GetResult(); 00247 x_TransferResults(obj.GetPointer()); 00248 00249 m_JobID = -1; 00250 m_Job.Reset(); 00251 00252 if(m_Listener) { 00253 //LOG_POST("Completed - m_Listener->OnScoringFinished()"); 00254 m_Listener->OnScoringFinished(); 00255 } 00256 } 00257 00258 00259 bool CScoreCache::x_TransferResults(CObject* result) 00260 { 00261 CScoringJobResult* sc_res = dynamic_cast<CScoringJobResult*>(result); 00262 if(sc_res) { 00263 delete m_ScoreColls; // delete old data 00264 00265 // take ownership of the results 00266 m_ScoreColls = sc_res->m_ScoreColls; 00267 sc_res->m_ScoreColls = NULL; 00268 00269 m_HasScores = true; 00270 _ASSERT(m_ScoreColls ); 00271 return true; 00272 } else { 00273 ERR_POST("CScoreCache::x_TransferResults() - invalid results!"); 00274 _ASSERT(sc_res); 00275 return false; 00276 } 00277 } 00278 00279 00280 void CScoreCache::x_OnJobFailed(CAppJobNotification& notn) 00281 { 00282 CConstIRef<IAppJobError> err = notn.GetError(); 00283 if(err) { 00284 //TODO 00285 } else { 00286 //TODO 00287 } 00288 00289 m_JobID = -1; 00290 m_Job.Reset(); 00291 00292 if(m_Listener) { 00293 //LOG_POST("Failed - m_Listener->OnScoringFinished()"); 00294 m_Listener->OnScoringFinished(); 00295 } 00296 } 00297 00298 00299 void CScoreCache::x_OnJobCanceled(CAppJobNotification& /*notn*/) 00300 { 00301 m_JobID = -1; 00302 m_Job.Reset(); 00303 00304 if(m_Listener) { 00305 //LOG_POST("Canceled - m_Listener->OnScoringFinished()"); 00306 m_Listener->OnScoringFinished(); 00307 } 00308 } 00309 00310 00311 void CScoreCache::x_OnJobProgress(CAppJobNotification& notn) 00312 { 00313 CConstIRef<IAppJobProgress> prg = notn.GetProgress(); 00314 if(prg) { 00315 00316 if(m_Listener) { 00317 float norm_done = prg->GetNormDone(); 00318 m_Listener->OnScoringProgress(norm_done, "Calculating alignment coloration..."); 00319 } 00320 } 00321 } 00322 00323 00324 bool CScoreCache::HasScores() const 00325 { 00326 return m_HasScores; 00327 } 00328 00329 00330 const CRgbaColor& CScoreCache::GetColorForNoScore(IScoringMethod::EColorType type) const 00331 { 00332 return m_Params.m_Method->GetColorForNoScore(type); 00333 } 00334 00335 00336 const CScoreCache::TScoreColl& CScoreCache::GetScores(TNumrow row) const 00337 { 00338 _ASSERT(m_ScoreColls && row >= 0 && row < (TNumrow) m_ScoreColls->size()); 00339 00340 return (*m_ScoreColls)[row]; 00341 } 00342 00343 00344 00345 /////////////////////////////////////////////////////////////////////////////// 00346 /// CScoringJob 00347 00348 CScoringJob::CScoringJob(CScoringParams& params) 00349 : m_ScoreColls(NULL), 00350 m_BufferStart(0), 00351 m_RowLength(0) 00352 { 00353 m_Params.m_Alignment = params.m_Alignment; 00354 00355 CIRef<IUITool> tool(params.m_Method->Clone()); // make a copy 00356 IScoringMethod* method = dynamic_cast<IScoringMethod*>(tool.GetPointer()); 00357 _ASSERT(method); 00358 m_Params.m_Method.Reset(method); 00359 00360 m_Params.m_GradNumber = params.m_GradNumber; 00361 00362 m_ScoreColls = new TScoreCollVector(); 00363 _ASSERT(m_Params.m_Method && m_Params.m_Alignment); 00364 00365 m_Descr = "Alignment Scoring Job, method " + m_Params.m_Method->GetName(); 00366 //LOG_POST("CScoringJob::CScoringJob() " << m_Descr); 00367 } 00368 00369 00370 CScoringJob::~CScoringJob() 00371 { 00372 //LOG_POST("CScoringJob::~CScoringJob() Destructor " << m_Descr); 00373 delete m_ScoreColls; 00374 } 00375 00376 00377 IAppJob::EJobState CScoringJob::Run() 00378 { 00379 /* 00380 LOG_POST(Info << "CScoringJob::Run() Started " << m_Descr 00381 << " BufferStart=" << m_BufferStart 00382 << " RowLength=" << m_RowLength 00383 << " m_vRows.size()= " << m_vRows.size() 00384 ); 00385 */ 00386 m_Result.Reset(); 00387 m_Error.Reset(); 00388 m_StopRequested.Set(0); 00389 00390 if(Calculate()) { 00391 // trnasfer collection onwership to the result 00392 m_Result.Reset(new CScoringJobResult(m_ScoreColls)); 00393 m_ScoreColls = NULL; 00394 return eCompleted; 00395 } 00396 //LOG_POST("CScoringJob::Run() Finished " << m_Descr); 00397 return eFailed; 00398 } 00399 00400 00401 00402 CConstIRef<IAppJobProgress> CScoringJob::GetProgress() 00403 { 00404 CAppJobProgress* prg = new CAppJobProgress(m_NormDone, ""); 00405 return CConstIRef<IAppJobProgress>(prg); 00406 } 00407 00408 00409 CRef<CObject> CScoringJob::GetResult() 00410 { 00411 return CRef<CObject>(m_Result.GetPointer()); 00412 } 00413 00414 00415 CConstIRef<IAppJobError> CScoringJob::GetError() 00416 { 00417 return CConstIRef<IAppJobError>(m_Error.GetPointer()); 00418 } 00419 00420 00421 string CScoringJob::GetDescr() const 00422 { 00423 return m_Descr; 00424 } 00425 00426 00427 bool CScoringJob::Calculate() 00428 { 00429 _ASSERT(m_ScoreColls); 00430 00431 if(m_Params.m_Method && m_Params.m_Alignment) { 00432 CStopWatch sw; 00433 sw.Start(); 00434 00435 TNumrow row_n = m_Params.m_Alignment->GetNumRows(); 00436 m_ScoreColls->resize(row_n); 00437 00438 IRowScoringMethod* row_method = dynamic_cast<IRowScoringMethod*>(m_Params.m_Method.GetPointer()); 00439 if(row_method) { 00440 x_CalculateSequenceScores(*row_method); 00441 } else { 00442 IColumnScoringMethod* col_method = 00443 dynamic_cast<IColumnScoringMethod*>(m_Params.m_Method.GetPointer()); 00444 00445 if(col_method) { 00446 if(col_method->CanCalculateScores(*m_Params.m_Alignment)) { 00447 x_CalculateAlignmentScores(*col_method); 00448 } else { 00449 return false; 00450 } 00451 00452 } else { 00453 _ASSERT(false); // invalid method 00454 } 00455 } 00456 IWindowScoringMethod * win_method 00457 = dynamic_cast<IWindowScoringMethod*>(m_Params.m_Method.GetPointer()); 00458 if (win_method && win_method->GetWindowSize() > 0) { 00459 x_WindowAvgScores(win_method->GetWindowSize()); 00460 } 00461 } 00462 return true; 00463 } 00464 00465 00466 CScoringJob::TScoreCollVector* CScoringJob::GetResults() 00467 { 00468 TScoreCollVector* res = m_ScoreColls; 00469 m_ScoreColls = NULL; 00470 return res; 00471 } 00472 00473 00474 /////////////////////////////////////////////////////////////////////////////// 00475 /// Sequence buffer management routins 00476 00477 inline char CScoringJob::x_BufferGetSeq(TSeqPos pos, TNumrow row) const 00478 { 00479 _ASSERT(pos >= m_BufferStart && pos < m_BufferStart + m_RowLength); 00480 _ASSERT(row >= 0 && row < (TNumrow) m_vRows.size()); 00481 00482 return m_vRows[row][pos - m_BufferStart]; 00483 } 00484 00485 00486 void CScoringJob::x_AllocBuffer(TSeqPos row_len) 00487 { 00488 _ASSERT(m_Params.m_Alignment); 00489 00490 int rows_n = m_Params.m_Alignment->GetNumRows(); 00491 if(rows_n != (TNumrow) m_vRows.size() || m_RowLength != row_len) { 00492 m_RowLength = row_len; 00493 00494 m_vRows.resize(rows_n); 00495 NON_CONST_ITERATE(vector<string>, itR, m_vRows) { 00496 itR->resize(m_RowLength); 00497 } 00498 } 00499 } 00500 00501 00502 void CScoringJob::x_FreeBuffer() 00503 { 00504 m_vRows.clear(); 00505 } 00506 00507 00508 void CScoringJob::x_UpdateBuffer(TSeqPos start, TSeqPos stop) 00509 { 00510 _ASSERT(m_Params.m_Alignment); 00511 _ASSERT( (stop - start + 1) <= m_RowLength); 00512 00513 m_BufferStart = start; 00514 00515 TNumrow row_n = (TNumrow) m_vRows.size(); 00516 for( TNumrow r = 0; r < row_n; r++ ) { 00517 m_Params.m_Alignment->GetAlnSeqString(r, m_vRows[r], IAlnExplorer::TSignedRange(start, stop)); 00518 } 00519 } 00520 00521 00522 void CScoringJob::x_BufferGetColumn(TSeqPos pos, string& column) const 00523 { 00524 _ASSERT(pos >= m_BufferStart && pos < m_BufferStart + m_RowLength); 00525 00526 size_t col = pos - m_BufferStart; 00527 for( size_t row = 0; row < m_vRows.size(); row++ ) { 00528 column[row] = m_vRows[row][col]; 00529 } 00530 } 00531 00532 00533 void CScoringJob::x_CalculateSequenceScores(IRowScoringMethod& method) 00534 { 00535 TNumrow row_n = m_Params.m_Alignment->GetNumRows(); 00536 for(TNumrow r = 0; r < row_n && ! x_IsCanceled(); r++ ) { 00537 m_NormDone = float(r) / row_n; 00538 method.CalculateScores(r, *m_Params.m_Alignment, (*m_ScoreColls)[r]); 00539 //Sleep(1000); 00540 } 00541 } 00542 00543 00544 void CScoringJob::x_CalculateAlignmentScores(IColumnScoringMethod& method) 00545 { 00546 _ASSERT(m_ScoreColls); 00547 00548 TSeqPos start = m_Params.m_Alignment->GetAlnStart(); 00549 TSeqPos stop = m_Params.m_Alignment->GetAlnStop(); 00550 TNumrow row_n = m_Params.m_Alignment->GetNumRows(); 00551 00552 // preparing score collections 00553 NON_CONST_ITERATE(TScoreCollVector, itC, *m_ScoreColls) { 00554 itC->SetFrom(start); // clear and initialize 00555 } 00556 string column(row_n, '\0'); 00557 TScore col_score = 0; 00558 TScoreVector v_col_scores(row_n, 0.0); 00559 00560 const TSeqPos kPageSize = 256; 00561 x_AllocBuffer(kPageSize); 00562 00563 TScore grad_n = (TScore) m_Params.m_GradNumber; 00564 00565 TNumrow cons_row = m_Params.m_Alignment->GetAnchor(); 00566 00567 // iterate from "start" to "stop" using "sliding buffer" 00568 for( TSeqPos pos = start; pos < stop && ! x_IsCanceled(); ) { 00569 m_NormDone = float(pos - start) / (stop - start); // update progress 00570 00571 TSeqPos pos_stop = min(pos + kPageSize -1, stop); 00572 x_UpdateBuffer(pos, pos_stop); // fetch next page in Seq Buffer 00573 00574 for( TSeqPos p = pos; p <= pos_stop ; p++ ) { // for each column 00575 x_BufferGetColumn(p, column); 00576 char cons = (cons_row > -1) ? column[cons_row] : 0; 00577 method.CalculateScores(cons, column, col_score, v_col_scores); 00578 00579 // append scores to collections 00580 for(TNumrow r = 0; r < row_n; r++ ) { 00581 TScore sc = v_col_scores[r]; 00582 sc = ((int) (sc * grad_n)) / grad_n; 00583 (*m_ScoreColls)[r].push_back(sc); 00584 } 00585 } 00586 pos = pos_stop + 1; 00587 //Sleep(1000); 00588 } 00589 } 00590 00591 00592 void CScoringJob::x_WindowAvgScores(size_t window_size) 00593 { 00594 typedef TScoreColl::const_pos_iterator TPosIt; 00595 00596 for( size_t i = 0; i < m_ScoreColls->size() && ! x_IsCanceled(); i++) { 00597 m_NormDone = float(i) / m_ScoreColls->size(); 00598 00599 TScoreColl& score_row = (*m_ScoreColls)[i]; 00600 TScoreColl avgd_score_row; 00601 TPosIt itC = score_row.begin_pos(); 00602 TPosIt itC_end = score_row.end_pos(); 00603 00604 // Min/Max before and after averaging to 00605 // renormalise values afterwards. 00606 TScore old_min = numeric_limits<TScore>::max(); 00607 TScore old_max = numeric_limits<TScore>::min(); 00608 ITERATE(TScoreColl, it_sc, score_row) { 00609 TScore this_score = it_sc->GetAttr(); 00610 if (this_score < old_min) old_min = this_score; 00611 if (this_score > old_max) old_max = this_score; 00612 } 00613 TScore new_min = numeric_limits<TScore>::max(); 00614 TScore new_max = numeric_limits<TScore>::min(); 00615 00616 // calculate our first windowed average. 00617 TPosIt win_begin = itC; 00618 TPosIt win_end = itC_end; 00619 if (window_size < score_row.GetLength()) { 00620 win_end = itC + 1 + int(window_size); 00621 } 00622 00623 TScore win_sum = (TScore) accumulate(win_begin, win_end, (TScore)0); 00624 TScore win_score = win_sum / (win_end - win_begin); 00625 avgd_score_row.push_back(win_score); 00626 if (win_score < new_min) { 00627 new_min = win_score; 00628 } 00629 if (win_score > new_max) { 00630 new_max = win_score; 00631 } 00632 00633 if(x_IsCanceled()) { 00634 return; 00635 } 00636 00637 for( ++itC; itC != itC_end; ++itC ) { 00638 // Move the window & update the average. 00639 if ((itC - win_begin) > (int) window_size) { 00640 // move the beginning of the window, the trailing edge. 00641 win_sum -= *win_begin; 00642 ++win_begin; 00643 } 00644 if (win_end < itC_end) { 00645 // move the end of the window, the leading edge. 00646 win_sum += *win_end; 00647 ++win_end; 00648 } 00649 win_score = win_sum/(win_end - win_begin); 00650 avgd_score_row.push_back(win_score); 00651 if (win_score < new_min) new_min = win_score; 00652 if (win_score > new_max) new_max = win_score; 00653 } 00654 00655 if(x_IsCanceled()) { 00656 return; 00657 } 00658 00659 // renormalize scores back to the dynamic range they had 00660 // before we averaged them. 00661 TScoreColl norm_score_row; 00662 TScore norm_slope = (old_max - old_min) / (new_max - new_min); 00663 ITERATE(TScoreColl, it_sc, avgd_score_row) { 00664 norm_score_row.push_back( 00665 (it_sc->GetAttr() - new_min) * norm_slope + old_min, 00666 it_sc->GetLength() ); 00667 } 00668 score_row.swap(norm_score_row); 00669 } 00670 } 00671 00672 00673 END_NCBI_SCOPE
1.7.5.1
Modified on Wed May 23 12:57:03 2012 by modify_doxy.py rev. 337098