NCBI C++ ToolKit
hit_clustering.cpp
Go to the documentation of this file.
00001 /*  $Id: hit_clustering.cpp 14565 2007-05-18 12:32:01Z dicuccio $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors:  Andrey Yazhuk
00027  *
00028  * File Description:
00029  *
00030  */
00031 
00032 #include <ncbi_pch.hpp>
00033 
00034 #include <gui/widgets/hit_matrix/hit_clustering.hpp>
00035 #include <gui/widgets/hit_matrix/hit_matrix_graph.hpp>
00036 
00037 #include <objects/seqloc/Na_strand.hpp>
00038 
00039 BEGIN_NCBI_SCOPE
00040 USING_SCOPE(objects);
00041 
00042 
00043 /*CHitCluster::~CHitCluster()
00044 {
00045     for( size_t i = 0;  i < m_Elems.size();  i++ )  {
00046         delete m_Clusters[i];
00047     }
00048 } */
00049 
00050 // the resolution is specified by two scales and threshould (in pixels)
00051 
00052 // takes two vectors
00053 // calculate distance vector in model units
00054 // project it to pixels using given scales
00055 // if distance is below threshold
00056  // calculate combined vector
00057  // calculate normals in model units
00058  // calculate normals in pixels
00059  // if normals good - produce a cluster, remember results
00060 
00061 double CHitClustering::m_MinSize = 2.0;
00062 double CHitClustering::m_Width = 4.0;
00063 
00064 // get the distance^2 in pixels between two points
00065 inline double PixDistanceSquare(const TVect& a, const TVect& b,
00066                             double scale_x, double scale_y)
00067 {
00068     TVect res(a);
00069     res -= b;
00070     double d_x = res.X() / scale_x;
00071     double d_y = res.Y() / scale_y;
00072     return d_x * d_x + d_y * d_y;
00073 }
00074 
00075 
00076 inline  double CalcDistanceSquare(const TVector& a, const TVector& b, double scale_x, double scale_y)
00077 {
00078     double min_d = PixDistanceSquare(a.m_From, b.m_From, scale_x, scale_y);
00079     min_d = min(min_d, PixDistanceSquare(a.m_To, b.m_From, scale_x, scale_y));
00080     min_d = min(min_d, PixDistanceSquare(a.m_From, b.m_To, scale_x, scale_y));
00081     min_d = min(min_d, PixDistanceSquare(a.m_To, b.m_To, scale_x, scale_y));
00082 }
00083 
00084 
00085 inline void GetVector(const CHitElemGlyph& glyph, TVector& v)
00086 {
00087     const IHitElement& elem = glyph.GetHitElem();
00088     double x1 = elem.GetSubjectStart();
00089     double y1 = elem.GetQueryStart();
00090     double l = elem.GetLength();
00091     double x2 = x1 + l;
00092     double y2 = y1 + l;
00093 
00094     if(elem.GetSubjectStrand() == eNa_strand_minus)    {
00095         swap(x1, x2);
00096     }
00097     if(elem.GetQueryStrand() == eNa_strand_minus)    {
00098         swap(y1, y2);
00099     }
00100     v.m_From.X() = x1;
00101     v.m_From.Y() = y1;
00102     v.m_To.X() = x2;
00103     v.m_To.Y() = y2;
00104 }
00105 
00106 
00107 // calculates a vector normal to "res"
00108 inline TVect   GetNorm(const TVector& L, const TVector& v)
00109 {
00110     double L_x = L.m_To.X() - L.m_From.X();
00111     double L_y = L.m_To.Y() - L.m_From.Y();
00112     double v_x = v.m_To.X() - v.m_From.X();
00113     double v_y = v.m_To.Y() - v.m_From.Y();
00114 
00115     TVect norm;
00116 
00117     double s = L_x * L_x + L_y * L_y;
00118     double k = -(L_x * v_x + L_y * v_y) / s;
00119 
00120     norm.X() = k * L_x + v_x;
00121     norm.Y() = k * L_y + v_y;
00122     return norm;
00123 }
00124 
00125 
00126 // check whether two vectors can be combined into one cluster
00127 bool CHitClustering::x_CanCombine(const TVector& a, const TVector& b, TVector& res,
00128                                   double scale_x, double scale_y)
00129 {
00130     // locate closest points
00131     double min_0 = PixDistanceSquare(a.m_From, b.m_From, scale_x, scale_y);
00132     double min_1 = PixDistanceSquare(a.m_To, b.m_From, scale_x, scale_y);
00133     double min_2 = PixDistanceSquare(a.m_From, b.m_To, scale_x, scale_y);
00134     double min_3 = PixDistanceSquare(a.m_To, b.m_To, scale_x, scale_y);
00135 
00136     unsigned min_i = 0;
00137     double min = min_0;
00138     if(min_1 < min) {
00139         min = min_1;
00140         min_i = 1;
00141     }
00142     if(min_2 < min) {
00143         min = min_2;
00144         min_i = 2;
00145     }
00146     if(min_3 < min) {
00147         min = min_3;
00148         min_i = 3;
00149     }
00150     if(min < m_MinSize * m_MinSize) {
00151         // calculate combined Lvector
00152         TVector v_L;
00153         v_L.m_From = (min_i & 0x1) ? a.m_From : a.m_To;
00154         v_L.m_To = (min_i & 0x2) ? b.m_From : b.m_To;
00155 
00156         double width = m_Width / 2; // width of the line in pixels
00157 
00158         TVect norm = GetNorm(v_L, a);
00159         double h_1 = norm.X() / scale_x + norm.Y() / scale_y;
00160         if(h_1 < width) {
00161             norm = GetNorm(v_L, b);
00162             double h_2 = norm.X() / scale_x + norm.Y() / scale_y;
00163 
00164             if(h_2 < width) {
00165                 // ok, we can merge them
00166                 res = v_L;
00167                 return true;
00168             }
00169         }
00170     }
00171     return false;
00172 }
00173 
00174 
00175 void CHitClustering::Build(vector<CHitGlyph*>& glyphs, double scale_x, double scale_y)
00176 {
00177     CStopWatch w;
00178     w.Start();
00179 
00180     Clear();
00181 
00182     vector<CHitElemGlyph*> gl_acc; //accumulator
00183 
00184     //iterate by hits
00185     for( size_t i = 0;  i < glyphs.size();  i++ )   {
00186         CHitGlyph& gl = *glyphs[i];
00187 
00188         vector<CHitElemGlyph>& elems = gl.GetElems();
00189         if(elems.size())   {
00190             TVector a, b, res;
00191             CHitElemGlyph& gl_a = elems[0];
00192             GetVector(gl_a, a);
00193             gl_acc.push_back(&gl_a); // see the accumulator
00194 
00195             // iterate by hit elements
00196             for( size_t e_i = 1;  e_i < elems.size();  e_i++ ) {
00197                 CHitElemGlyph& gl_b = elems[e_i];
00198                 GetVector(gl_b, b);
00199 
00200                 if(x_CanCombine(a, b, res, scale_x, scale_y))    { // accumulate
00201                     a = res;
00202                 } else {
00203                     if(gl_acc.size() > 1)   { // create a cluster
00204                         CHitCluster* cl = new CHitCluster(gl_acc, res);
00205                         m_Clusters.push_back(cl);
00206                     }
00207 
00208                     // reset
00209                     a = b;
00210                     gl_acc.clear();
00211                 }
00212                 gl_acc.push_back(&gl_b);
00213             }
00214 
00215             if(gl_acc.size() > 1)   { // create a cluster
00216                 CHitCluster* cl = new CHitCluster(gl_acc, res);
00217                 m_Clusters.push_back(cl);
00218                 gl_acc.clear();
00219             }
00220         }
00221     }
00222 
00223     //LOG_POST(Info << "CHitClustering::Build() time " << w.Elapsed());
00224 }
00225 
00226 
00227 void CHitClustering::Clear()
00228 {
00229     for( size_t i = 0;  i < m_Clusters.size(); i++ ) {
00230         delete m_Clusters[i];
00231     }
00232     m_Clusters.clear();
00233 }
00234 
00235 
00236 END_NCBI_SCOPE
Modified on Wed May 23 13:38:45 2012 by modify_doxy.py rev. 337098