NCBI C++ ToolKit
aln_scoring.cpp
Go to the documentation of this file.
00001 /*  $Id: aln_scoring.cpp 23124 2011-02-10 17:09:51Z kuznets $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data,  the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties,  express or implied,  including
00019  *  warranties of performance,  merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors:  Andrey Yazhuk
00027  */
00028 
00029 #include <ncbi_pch.hpp>
00030 
00031 #include <corelib/ncbistd.hpp>
00032 #include <corelib/ncbistl.hpp>
00033 #include <corelib/ncbireg.hpp>
00034 #include <corelib/ncbifile.hpp>
00035 
00036 #include <gui/widgets/data/aln_scoring.hpp>
00037 
00038 #include <gui/utils/event_translator.hpp>
00039 #include <gui/utils/system_path.hpp>
00040 
00041 #include <stdio.h>
00042 #include <math.h>
00043 #include <numeric>
00044 
00045 BEGIN_NCBI_SCOPE
00046 
00047 
00048 ///////////////////////////////////////////////////////////////////////////////
00049 /// CScoringParams
00050 
00051 CScoringParams::CScoringParams()
00052 : m_Alignment(NULL),
00053   m_Method(NULL),
00054   m_GradNumber(16)
00055 {
00056 }
00057 
00058 
00059 ///////////////////////////////////////////////////////////////////////////////
00060 /// CScoreCache
00061 
00062 CScoreCache::CScoreCache()
00063 : m_ScoreColls(NULL),
00064   m_EnBgProcessing(true),
00065   m_JobID(-1),
00066   m_HasScores(false)
00067 {
00068     m_ScoreColls = new TScoreCollVector();
00069 }
00070 
00071 
00072 CScoreCache::~CScoreCache()
00073 {
00074     if(x_IsJobRunning())   {
00075         x_DeleteJob();
00076     }
00077 
00078     delete m_ScoreColls;
00079 }
00080 
00081 
00082 BEGIN_EVENT_MAP(CScoreCache, CEventHandler)
00083     ON_MESSAGE(CAppJobNotification, CAppJobNotification::eStateChanged,
00084                &CScoreCache::OnAJNotification)
00085     ON_MESSAGE(CAppJobNotification, CAppJobNotification::eProgress,
00086                &CScoreCache::OnAJNotification)
00087 END_EVENT_MAP()
00088 
00089 
00090 void CScoreCache::SetListener(IListener* listener)
00091 {
00092     m_Listener = listener;
00093 }
00094 
00095 
00096 void CScoreCache::EnableBackgoundProcessing(bool en)
00097 {
00098     m_EnBgProcessing = en;
00099 }
00100 
00101 
00102 void CScoreCache::SetGradNumber(int grad_n)
00103 {
00104     _ASSERT(grad_n > 1  && grad_n <= 0xFFFF);
00105     m_Params.m_GradNumber = grad_n;
00106 }
00107 
00108 
00109 void CScoreCache::SetScoringMethod(IScoringMethod *method)
00110 {
00111     // changing method requires sopping the running job
00112     if(x_IsJobRunning())    {
00113         x_DeleteJob();
00114     }
00115 
00116     m_Params.m_Method = method;
00117 }
00118 
00119 
00120 IScoringMethod*    CScoreCache::GetScoringMethod()
00121 {
00122     return m_Params.m_Method;
00123 }
00124 
00125 
00126 const IScoringMethod*    CScoreCache::GetScoringMethod() const
00127 {
00128     return m_Params.m_Method;
00129 }
00130 
00131 
00132 void CScoreCache::SetAlignment(IScoringAlignment* aln)
00133 {
00134     m_Params.m_Alignment = aln;
00135 }
00136 
00137 
00138 const IScoringAlignment* CScoreCache::GetAlignment() const
00139 {
00140     return m_Params.m_Alignment;
00141 }
00142 
00143 
00144 void CScoreCache::ResetScores()
00145 {
00146     //LOG_POST("CScoreCache::CalculateScores() - Started ");
00147     CAppJobDispatcher& disp = CAppJobDispatcher::GetInstance();
00148 
00149     if(m_Job)   {
00150         // job is already running - delete it
00151         if (m_EnBgProcessing) {
00152             disp.DeleteJob(m_JobID);
00153             m_JobID = -1;
00154         }
00155         m_Job.Reset();
00156     }
00157 
00158     m_HasScores = false;
00159     m_ScoreColls->clear();
00160 }
00161 
00162 /// Calculates scores for the given CAlnVec object and saves results in form of
00163 /// TScoreColl objects.
00164 void CScoreCache::CalculateScores()
00165 {
00166     ResetScores();
00167 
00168     if(m_Params.m_Method)   {
00169         m_Job.Reset(new CScoringJob(m_Params));
00170 
00171         if(m_EnBgProcessing)    {
00172             CAppJobDispatcher& disp = CAppJobDispatcher::GetInstance();
00173             // use CAppJobDispatcher to execute background jobs
00174             m_JobID = disp.StartJob(*m_Job, "ThreadPool", *this, 1, true);
00175         } else {
00176             // do everything synchronously
00177             m_JobID = -1;
00178             m_Job->Run();
00179             m_HasScores = x_TransferResults(m_Job->GetResult().GetPointer());
00180             m_Job.Reset();
00181         }
00182     }
00183     //LOG_POST("CScoreCache::CalculateScores() - Finished ");
00184 }
00185 
00186 
00187 void CScoreCache::OnAJNotification(CEvent* evt)
00188 {
00189     CAppJobNotification* notn =
00190         dynamic_cast<CAppJobNotification*>(evt);
00191     _ASSERT(notn);
00192 
00193     if(notn)    {
00194         int job_id = notn->GetJobID();
00195         if(m_JobID != job_id) {
00196             ERR_POST("CScoringCache::OnAJNotification() - unknown Job ID " << job_id);
00197         } else {
00198             switch(notn->GetState())    {
00199             case IAppJob::eCompleted:
00200                 x_OnJobCompleted(*notn);
00201                 break;
00202             case IAppJob::eFailed:
00203                 x_OnJobFailed(*notn);
00204                 break;
00205             case IAppJob::eCanceled:
00206                 x_OnJobCanceled(*notn);
00207                 break;
00208             case IAppJob::eRunning:
00209                 x_OnJobProgress(*notn);
00210                 break;
00211             default:
00212                 _ASSERT(false);
00213             }
00214         }
00215     }
00216 }
00217 
00218 
00219 bool CScoreCache::x_IsJobRunning()
00220 {
00221     return m_Job.GetPointer() != NULL;
00222 }
00223 
00224 
00225 void CScoreCache::x_DeleteJob()
00226 {
00227     if (m_EnBgProcessing) {
00228         _ASSERT(m_Job  &&  m_JobID != -1);
00229 
00230         CAppJobDispatcher& disp = CAppJobDispatcher::GetInstance();
00231         disp.DeleteJob(m_JobID);
00232 
00233         m_JobID = -1;
00234     }
00235 
00236     m_Job.Reset();
00237 
00238     if(m_Listener)  {
00239         m_Listener->OnScoringFinished();
00240     }
00241 }
00242 
00243 
00244 void CScoreCache::x_OnJobCompleted(CAppJobNotification& notn)
00245 {
00246     CRef<CObject> obj = notn.GetResult();
00247     x_TransferResults(obj.GetPointer());
00248 
00249     m_JobID = -1;
00250     m_Job.Reset();
00251 
00252     if(m_Listener)  {
00253         //LOG_POST("Completed - m_Listener->OnScoringFinished()");
00254         m_Listener->OnScoringFinished();
00255     }
00256 }
00257 
00258 
00259 bool CScoreCache::x_TransferResults(CObject* result)
00260 {
00261     CScoringJobResult* sc_res = dynamic_cast<CScoringJobResult*>(result);
00262     if(sc_res) {
00263         delete m_ScoreColls; // delete old data
00264 
00265         // take ownership of the results
00266         m_ScoreColls = sc_res->m_ScoreColls;
00267         sc_res->m_ScoreColls = NULL;
00268 
00269         m_HasScores = true;
00270         _ASSERT(m_ScoreColls );
00271         return true;
00272     } else {
00273         ERR_POST("CScoreCache::x_TransferResults()  - invalid results!");
00274         _ASSERT(sc_res);
00275         return false;
00276     }
00277 }
00278 
00279 
00280 void CScoreCache::x_OnJobFailed(CAppJobNotification& notn)
00281 {
00282     CConstIRef<IAppJobError> err = notn.GetError();
00283     if(err) {
00284         //TODO
00285     } else {
00286         //TODO
00287     }
00288 
00289     m_JobID = -1;
00290     m_Job.Reset();
00291 
00292     if(m_Listener)  {
00293         //LOG_POST("Failed - m_Listener->OnScoringFinished()");
00294         m_Listener->OnScoringFinished();
00295     }
00296 }
00297 
00298 
00299 void CScoreCache::x_OnJobCanceled(CAppJobNotification& /*notn*/)
00300 {
00301     m_JobID = -1;
00302     m_Job.Reset();
00303 
00304     if(m_Listener)  {
00305         //LOG_POST("Canceled - m_Listener->OnScoringFinished()");
00306         m_Listener->OnScoringFinished();
00307     }
00308 }
00309 
00310 
00311 void CScoreCache::x_OnJobProgress(CAppJobNotification& notn)
00312 {
00313     CConstIRef<IAppJobProgress> prg = notn.GetProgress();
00314     if(prg) {
00315 
00316         if(m_Listener)  {
00317             float norm_done = prg->GetNormDone();
00318             m_Listener->OnScoringProgress(norm_done, "Calculating alignment coloration...");
00319         }
00320     }
00321 }
00322 
00323 
00324 bool    CScoreCache::HasScores() const
00325 {
00326     return m_HasScores;
00327 }
00328 
00329 
00330 const CRgbaColor& CScoreCache::GetColorForNoScore(IScoringMethod::EColorType type) const
00331 {
00332     return m_Params.m_Method->GetColorForNoScore(type);
00333 }
00334 
00335 
00336 const CScoreCache::TScoreColl&   CScoreCache::GetScores(TNumrow row) const
00337 {
00338     _ASSERT(m_ScoreColls  &&  row >= 0  && row < (TNumrow) m_ScoreColls->size());
00339 
00340     return (*m_ScoreColls)[row];
00341 }
00342 
00343 
00344 
00345 ///////////////////////////////////////////////////////////////////////////////
00346 /// CScoringJob
00347 
00348 CScoringJob::CScoringJob(CScoringParams& params)
00349 :   m_ScoreColls(NULL),
00350     m_BufferStart(0),
00351     m_RowLength(0)
00352 {
00353     m_Params.m_Alignment = params.m_Alignment;
00354 
00355     CIRef<IUITool> tool(params.m_Method->Clone()); // make a copy
00356     IScoringMethod* method = dynamic_cast<IScoringMethod*>(tool.GetPointer());
00357     _ASSERT(method);
00358     m_Params.m_Method.Reset(method);
00359 
00360     m_Params.m_GradNumber = params.m_GradNumber;
00361 
00362     m_ScoreColls = new TScoreCollVector();
00363     _ASSERT(m_Params.m_Method  &&  m_Params.m_Alignment);
00364 
00365     m_Descr = "Alignment Scoring Job, method " + m_Params.m_Method->GetName();
00366     //LOG_POST("CScoringJob::CScoringJob()  " << m_Descr);
00367 }
00368 
00369 
00370 CScoringJob::~CScoringJob()
00371 {
00372     //LOG_POST("CScoringJob::~CScoringJob() Destructor  " << m_Descr);
00373     delete m_ScoreColls;
00374 }
00375 
00376 
00377 IAppJob::EJobState CScoringJob::Run()
00378 {
00379 /*
00380     LOG_POST(Info << "CScoringJob::Run()  Started  " << m_Descr
00381                   << " BufferStart=" << m_BufferStart
00382                   << " RowLength="   << m_RowLength
00383                   << " m_vRows.size()= " << m_vRows.size()
00384         );
00385 */
00386     m_Result.Reset();
00387     m_Error.Reset();
00388     m_StopRequested.Set(0);
00389 
00390     if(Calculate()) {
00391         // trnasfer collection onwership to the result
00392         m_Result.Reset(new CScoringJobResult(m_ScoreColls));
00393         m_ScoreColls = NULL;
00394         return eCompleted;
00395     }
00396     //LOG_POST("CScoringJob::Run()  Finished  " << m_Descr);
00397     return eFailed;
00398 }
00399 
00400 
00401 
00402 CConstIRef<IAppJobProgress> CScoringJob::GetProgress()
00403 {
00404     CAppJobProgress* prg = new CAppJobProgress(m_NormDone, "");
00405     return CConstIRef<IAppJobProgress>(prg);
00406 }
00407 
00408 
00409 CRef<CObject> CScoringJob::GetResult()
00410 {
00411     return CRef<CObject>(m_Result.GetPointer());
00412 }
00413 
00414 
00415 CConstIRef<IAppJobError> CScoringJob::GetError()
00416 {
00417     return CConstIRef<IAppJobError>(m_Error.GetPointer());
00418 }
00419 
00420 
00421 string CScoringJob::GetDescr() const
00422 {
00423     return m_Descr;
00424 }
00425 
00426 
00427 bool CScoringJob::Calculate()
00428 {
00429     _ASSERT(m_ScoreColls);
00430 
00431     if(m_Params.m_Method  &&  m_Params.m_Alignment)   {
00432         CStopWatch sw;
00433         sw.Start();
00434 
00435         TNumrow row_n = m_Params.m_Alignment->GetNumRows();
00436         m_ScoreColls->resize(row_n);
00437 
00438         IRowScoringMethod* row_method = dynamic_cast<IRowScoringMethod*>(m_Params.m_Method.GetPointer());
00439         if(row_method)  {
00440             x_CalculateSequenceScores(*row_method);
00441         } else {
00442             IColumnScoringMethod* col_method =
00443                 dynamic_cast<IColumnScoringMethod*>(m_Params.m_Method.GetPointer());
00444 
00445             if(col_method)  {
00446                 if(col_method->CanCalculateScores(*m_Params.m_Alignment)) {
00447                     x_CalculateAlignmentScores(*col_method);
00448                 } else {
00449                     return false;
00450                 }
00451 
00452             } else  {
00453                 _ASSERT(false); // invalid method
00454             }
00455         }
00456         IWindowScoringMethod * win_method
00457             = dynamic_cast<IWindowScoringMethod*>(m_Params.m_Method.GetPointer());
00458         if (win_method  &&  win_method->GetWindowSize() > 0) {
00459             x_WindowAvgScores(win_method->GetWindowSize());
00460         }
00461     }
00462     return true;
00463 }
00464 
00465 
00466 CScoringJob::TScoreCollVector* CScoringJob::GetResults()
00467 {
00468     TScoreCollVector* res = m_ScoreColls;
00469     m_ScoreColls = NULL;
00470     return res;
00471 }
00472 
00473 
00474 ///////////////////////////////////////////////////////////////////////////////
00475 /// Sequence buffer management routins
00476 
00477 inline char CScoringJob::x_BufferGetSeq(TSeqPos pos,  TNumrow row) const
00478 {
00479     _ASSERT(pos >= m_BufferStart  &&  pos < m_BufferStart + m_RowLength);
00480     _ASSERT(row >= 0  &&  row < (TNumrow) m_vRows.size());
00481 
00482     return m_vRows[row][pos - m_BufferStart];
00483 }
00484 
00485 
00486 void CScoringJob::x_AllocBuffer(TSeqPos row_len)
00487 {
00488     _ASSERT(m_Params.m_Alignment);
00489 
00490     int rows_n = m_Params.m_Alignment->GetNumRows();
00491     if(rows_n != (TNumrow) m_vRows.size()  ||  m_RowLength != row_len)    {
00492         m_RowLength = row_len;
00493 
00494         m_vRows.resize(rows_n);
00495         NON_CONST_ITERATE(vector<string>,  itR,  m_vRows)   {
00496             itR->resize(m_RowLength);
00497         }
00498     }
00499 }
00500 
00501 
00502 void CScoringJob::x_FreeBuffer()
00503 {
00504     m_vRows.clear();
00505 }
00506 
00507 
00508 void CScoringJob::x_UpdateBuffer(TSeqPos start, TSeqPos stop)
00509 {
00510     _ASSERT(m_Params.m_Alignment);
00511     _ASSERT( (stop - start + 1) <= m_RowLength);
00512 
00513     m_BufferStart = start;
00514 
00515     TNumrow row_n = (TNumrow) m_vRows.size();
00516     for( TNumrow r = 0;  r < row_n; r++ )  {
00517         m_Params.m_Alignment->GetAlnSeqString(r, m_vRows[r], IAlnExplorer::TSignedRange(start, stop));
00518     }
00519 }
00520 
00521 
00522 void CScoringJob::x_BufferGetColumn(TSeqPos pos, string& column) const
00523 {
00524     _ASSERT(pos >= m_BufferStart  && pos < m_BufferStart + m_RowLength);
00525 
00526     size_t col = pos - m_BufferStart;
00527     for(  size_t row = 0;  row < m_vRows.size();  row++ )   {
00528        column[row] = m_vRows[row][col];
00529     }
00530 }
00531 
00532 
00533 void CScoringJob::x_CalculateSequenceScores(IRowScoringMethod& method)
00534 {
00535     TNumrow row_n = m_Params.m_Alignment->GetNumRows();
00536     for(TNumrow r = 0;  r < row_n  &&  ! x_IsCanceled();  r++ )  {
00537         m_NormDone = float(r) / row_n;
00538         method.CalculateScores(r,  *m_Params.m_Alignment,  (*m_ScoreColls)[r]);
00539         //Sleep(1000);
00540     }
00541 }
00542 
00543 
00544 void CScoringJob::x_CalculateAlignmentScores(IColumnScoringMethod& method)
00545 {
00546     _ASSERT(m_ScoreColls);
00547 
00548     TSeqPos start = m_Params.m_Alignment->GetAlnStart();
00549     TSeqPos stop = m_Params.m_Alignment->GetAlnStop();
00550     TNumrow row_n = m_Params.m_Alignment->GetNumRows();
00551 
00552     // preparing score collections
00553     NON_CONST_ITERATE(TScoreCollVector,  itC,  *m_ScoreColls)  {
00554         itC->SetFrom(start); // clear and initialize
00555     }
00556     string column(row_n,  '\0');
00557     TScore col_score = 0;
00558     TScoreVector v_col_scores(row_n,  0.0);
00559 
00560     const TSeqPos kPageSize = 256;
00561     x_AllocBuffer(kPageSize);
00562 
00563     TScore grad_n = (TScore) m_Params.m_GradNumber;
00564 
00565     TNumrow cons_row = m_Params.m_Alignment->GetAnchor();
00566 
00567     // iterate from "start" to "stop" using "sliding buffer"
00568     for( TSeqPos pos = start;  pos < stop  &&  ! x_IsCanceled(); )    {
00569         m_NormDone = float(pos - start) / (stop - start); // update progress
00570 
00571         TSeqPos pos_stop = min(pos + kPageSize -1,  stop);
00572         x_UpdateBuffer(pos,  pos_stop); // fetch next page in Seq Buffer
00573 
00574         for( TSeqPos p = pos;  p <= pos_stop ;  p++ )    { // for each column
00575             x_BufferGetColumn(p,  column);
00576             char cons = (cons_row > -1) ? column[cons_row] : 0;
00577             method.CalculateScores(cons,  column,  col_score,  v_col_scores);
00578 
00579             // append scores to collections
00580             for(TNumrow r = 0;  r < row_n;  r++ )  {
00581                 TScore sc = v_col_scores[r];
00582                 sc = ((int) (sc * grad_n)) / grad_n;
00583                 (*m_ScoreColls)[r].push_back(sc);
00584             }
00585         }
00586         pos = pos_stop + 1;
00587         //Sleep(1000);
00588     }
00589 }
00590 
00591 
00592 void CScoringJob::x_WindowAvgScores(size_t window_size)
00593 {
00594     typedef TScoreColl::const_pos_iterator TPosIt;
00595 
00596     for( size_t i = 0;  i < m_ScoreColls->size()  &&  ! x_IsCanceled();  i++) {
00597         m_NormDone = float(i) / m_ScoreColls->size();
00598 
00599         TScoreColl&  score_row = (*m_ScoreColls)[i];
00600         TScoreColl avgd_score_row;
00601         TPosIt itC = score_row.begin_pos();
00602         TPosIt itC_end = score_row.end_pos();
00603 
00604         // Min/Max before and after averaging to
00605         // renormalise values afterwards.
00606         TScore old_min = numeric_limits<TScore>::max();
00607         TScore old_max = numeric_limits<TScore>::min();
00608         ITERATE(TScoreColl, it_sc, score_row) {
00609             TScore this_score = it_sc->GetAttr();
00610             if (this_score < old_min) old_min = this_score;
00611             if (this_score > old_max) old_max = this_score;
00612         }
00613         TScore new_min = numeric_limits<TScore>::max();
00614         TScore new_max = numeric_limits<TScore>::min();
00615 
00616         // calculate our first windowed average.
00617         TPosIt win_begin = itC;
00618         TPosIt win_end = itC_end;
00619         if (window_size < score_row.GetLength()) {
00620             win_end = itC + 1 + int(window_size);
00621         }
00622 
00623         TScore win_sum = (TScore) accumulate(win_begin, win_end, (TScore)0);
00624         TScore win_score = win_sum / (win_end - win_begin);
00625         avgd_score_row.push_back(win_score);
00626         if (win_score < new_min) {
00627             new_min = win_score;
00628         }
00629         if (win_score > new_max) {
00630             new_max = win_score;
00631         }
00632 
00633         if(x_IsCanceled()) {
00634             return;
00635         }
00636 
00637         for(  ++itC;  itC != itC_end;  ++itC  ) {
00638             // Move the window & update the average.
00639             if ((itC - win_begin) > (int) window_size) {
00640                 // move the beginning of the window, the trailing edge.
00641                 win_sum -= *win_begin;
00642                 ++win_begin;
00643             }
00644             if (win_end < itC_end) {
00645                 // move the end of the window, the leading edge.
00646                 win_sum += *win_end;
00647                 ++win_end;
00648             }
00649             win_score = win_sum/(win_end - win_begin);
00650             avgd_score_row.push_back(win_score);
00651             if (win_score < new_min) new_min = win_score;
00652             if (win_score > new_max) new_max = win_score;
00653         }
00654 
00655         if(x_IsCanceled()) {
00656             return;
00657         }
00658 
00659         // renormalize scores back to the dynamic range they had
00660         // before we averaged them.
00661         TScoreColl norm_score_row;
00662         TScore norm_slope = (old_max - old_min) / (new_max - new_min);
00663         ITERATE(TScoreColl, it_sc, avgd_score_row) {
00664             norm_score_row.push_back(
00665                     (it_sc->GetAttr() - new_min) * norm_slope + old_min,
00666                     it_sc->GetLength() );
00667         }
00668         score_row.swap(norm_score_row);
00669     }
00670 }
00671 
00672 
00673 END_NCBI_SCOPE
Modified on Wed May 23 12:57:03 2012 by modify_doxy.py rev. 337098