include/algo/align/util/compartment_finder.hpp

Go to the documentation of this file.
00001 #ifndef ALGO_ALIGN_UTIL_COMPARTMENT_FINDER__HPP
00002 #define ALGO_ALIGN_UTIL_COMPARTMENT_FINDER__HPP
00003 
00004 /* $Id: compartment_finder.hpp 158539 2009-04-27 12:59:07Z kapustin $
00005 * ===========================================================================
00006 *
00007 *                            PUBLIC DOMAIN NOTICE                          
00008 *               National Center for Biotechnology Information
00009 *                                                                          
00010 *  This software/database is a "United States Government Work" under the   
00011 *  terms of the United States Copyright Act.  It was written as part of    
00012 *  the author's official duties as a United States Government employee and 
00013 *  thus cannot be copyrighted.  This software/database is freely available 
00014 *  to the public for use. The National Library of Medicine and the U.S.    
00015 *  Government have not placed any restriction on its use or reproduction.  
00016 *                                                                          
00017 *  Although all reasonable efforts have been taken to ensure the accuracy  
00018 *  and reliability of the software and data, the NLM and the U.S.          
00019 *  Government do not and cannot warrant the performance or results that    
00020 *  may be obtained by using this software or data. The NLM and the U.S.    
00021 *  Government disclaim all warranties, express or implied, including       
00022 *  warranties of performance, merchantability or fitness for any particular
00023 *  purpose.                                                                
00024 *                                                                          
00025 *  Please cite the author in any work or product based on this material.   
00026 *
00027 * ===========================================================================
00028 *
00029 * Author:  Yuri Kapustin, Alexander Souvorov
00030 *
00031 * File Description:  
00032 *
00033 *   cDNA-to-genomic compartmentization algortihms.
00034 *   The task of compartmentization is to identify alternative placements
00035 *   of a transcript on a genomic sequence.
00036 * 
00037 *   CCompartmentFinder      Identification of genomic compartments.
00038 *   CCompartmentAccessor    An aggregator class to CCompartmentFinder.
00039 *                   
00040 * ===========================================================================
00041 */
00042 
00043 
00044 #include <corelib/ncbi_limits.hpp>
00045 
00046 #include <algo/align/nw/align_exception.hpp>
00047 #include <algo/align/util/hit_filter.hpp>
00048 
00049 #include <objects/seqalign/Seq_align_set.hpp>
00050 #include <objects/seqalign/Seq_align.hpp>
00051 #include <objects/seqalign/Dense_seg.hpp>
00052 #include <objects/seqloc/Seq_loc.hpp>
00053 #include <objects/seqloc/Na_strand.hpp>
00054 
00055 #include <algorithm>
00056 #include <numeric>
00057 #include <vector>
00058 
00059 
00060 BEGIN_NCBI_SCOPE
00061 
00062 
00063 template<class THit>
00064 class CCompartmentFinder {
00065 
00066 public:
00067 
00068     typedef CRef<THit>            THitRef;
00069     typedef vector<THitRef>       THitRefs;
00070     typedef typename THit::TCoord TCoord;
00071 
00072     /// Create the object from the complete set of local alignments (hits)
00073     /// between the transcript and the genomic sequences.
00074     /// The hits are assumed to share the same query and subject and 
00075     /// to have plus strand on both sequences.
00076     ///
00077     /// @param start
00078     ///    Start of the input set of alignments.
00079     /// @param finish
00080     ///    End of the input set of alignments.
00081 
00082     CCompartmentFinder(typename THitRefs::const_iterator start,
00083                        typename THitRefs::const_iterator finish);
00084 
00085     /// Set the maximum length of an intron
00086     ///
00087     /// @param intr_max
00088     ///    Maximum length of an intron, in base pairs.
00089 
00090     void SetMaxIntron (TCoord intr_max) {
00091         m_intron_max = intr_max;
00092     }
00093 
00094     /// Retrieve the default maximum length of an intron
00095     
00096     static TCoord s_GetDefaultMaxIntron(void) {
00097                         // Some examples:
00098                         //    NM_147181.3 vs NC_000004.10 (~1.1M)
00099         return 1200000; //    NM_001128929.1 vs NC_000003.10 (~1.2M)
00100     }
00101 
00102 
00103     /// Set the penalty to open a compartment
00104     ///
00105     /// @param penalty
00106     ///    Compartment opening penalty, in base pairs.
00107 
00108     void SetPenalty(TCoord penalty) {
00109         m_penalty = penalty;
00110     }
00111         
00112     /// Set the minimum matching residues per compartment.
00113     ///
00114     /// @param min_matches
00115     ///    The min number of matching residues in base pairs
00116 
00117     void SetMinMatches(TCoord min_matches) {
00118         m_MinSingletonMatches = m_MinMatches = min_matches;
00119     }
00120         
00121     /// Set the minimum matching residues per singleton compartment.
00122     ///
00123     /// @param min_matches
00124     ///    The min number of matching residues in base pairs
00125 
00126     void SetMinSingletonMatches(TCoord min_matches) {
00127         m_MinSingletonMatches = min_matches;
00128     }
00129 
00130     /// Retrieve the default compartment penalty, in base pairs
00131 
00132     static TCoord s_GetDefaultPenalty(void) {
00133         return 500;
00134     }
00135 
00136     /// Retrieve the default minimum coverage, in base pairs
00137     
00138     static TCoord s_GetDefaultMinCoverage(void) {
00139         return 500;
00140     }
00141 
00142     /// Identify compartments.
00143     ///
00144     /// @param  cross_filter
00145     ///    When activated, cross filtering will ensure that only 
00146     ///    the alignments that provided non-ambguous mapping between 
00147     ///    the sequences will be reported in the output.
00148 
00149     size_t Run(bool cross_filter = false); // do the compartment search
00150 
00151     ///  Order compartments by lower subj coordinate
00152 
00153     void OrderCompartments(void);
00154 
00155     /// Individual compartment representation.
00156     /// Compartment members are the hits comprising the compartment.
00157 
00158     class CCompartment {
00159 
00160     public:
00161         
00162         /// Create an empty compartment
00163 
00164         CCompartment(void) {
00165             m_box[0] = m_box[2] = numeric_limits<TCoord>::max();
00166             m_box[1] = m_box[3] = 0;
00167         }
00168 
00169         /// Retrieve compartment's members
00170 
00171         const THitRefs& GetMembers(void) {
00172             return m_members;
00173         }
00174     
00175         /// Add a new member into the compartment
00176 
00177         void AddMember(const THitRef& hitref) {
00178             m_members.push_back(hitref);
00179         }
00180 
00181         /// Assign all members of the compartment
00182 
00183         THitRefs& SetMembers(void) {
00184             return m_members;
00185         }
00186     
00187         /// Make sure the compartment's box is up-to-date
00188 
00189         void UpdateMinMax(void);
00190 
00191         /// Retrieve the compartment's strand (true == plus)
00192         
00193         bool GetStrand(void) const;
00194 
00195         /// Retrieve the first member.
00196         /// 
00197         /// @return
00198         ///    A reference on the first member of the compartment,
00199         ///    or a null reference if the compartment is empty.
00200 
00201         const THitRef GetFirst(void) const {
00202             m_iter = 0;
00203             return GetNext();
00204         }
00205         
00206 
00207         /// Retrieve the next member.
00208         /// 
00209         /// @return
00210         ///    A reference on the next member of the compartment,
00211         ///    or a null reference if there are no more members.
00212 
00213         const THitRef GetNext(void) const {
00214             if(m_iter < m_members.size()) {
00215                 return m_members[m_iter++];
00216             }
00217             return THitRef(NULL);
00218         }
00219         
00220         /// Retrieve the compartment's box.
00221         ///
00222         /// @return
00223         ///    A const pointer at the compartment's box.
00224 
00225         const TCoord* GetBox(void) const {
00226             return m_box;
00227         }
00228               
00229         // helper predicate
00230         static bool s_PLowerSubj(const CCompartment& c1, const CCompartment& c2) {
00231 
00232             return c1.m_box[2] < c2.m_box[2];
00233         }
00234         
00235     protected:
00236         
00237         THitRefs            m_members;
00238         TCoord              m_box[4];
00239         mutable size_t      m_iter;
00240     };
00241 
00242     // iterate through compartments
00243 
00244     CCompartment* GetFirst(void);
00245     CCompartment* GetNext(void);
00246 
00247 private:
00248 
00249     TCoord                m_intron_max;    // max intron size
00250     TCoord                m_penalty;       // penalty per compartment
00251     TCoord                m_MinMatches;    // min approx matches to report
00252     TCoord                m_MinSingletonMatches; // min matches for singleton comps
00253 
00254     THitRefs              m_hitrefs;         // input hits
00255     vector<CCompartment>  m_compartments;    // final compartments
00256     int                   m_iter;            // GetFirst/Next index
00257     
00258     struct SHitStatus {
00259         
00260         enum EType {
00261             eNone, 
00262             eExtension,
00263             eOpening
00264         };
00265         
00266         EType  m_type;
00267         int    m_prev;
00268         double m_score;        
00269 
00270         SHitStatus(void): m_type(eNone), m_prev(-1), m_score(0) 
00271         {}
00272         SHitStatus(EType type, int prev, double score): 
00273             m_type(type), m_prev(prev), m_score(score) 
00274         {}
00275     };    
00276 
00277 
00278     static size_t sx_XFilter(THitRefs& hitrefs,
00279                              typename THitRefs::iterator ihr,
00280                              typename THitRefs::iterator ihr_e,
00281                              Uint1 w,
00282                              size_t min_compartment_hit_len);
00283 
00284     // helper predicate
00285 
00286     static bool s_PNullRef(const THitRef& hr) {
00287         return hr.IsNull();
00288     }
00289 };
00290 
00291 
00292 // Facilitate access to compartments over a hit set
00293 template<class THit>
00294 class CCompartmentAccessor
00295 {
00296 public:
00297 
00298     typedef CCompartmentFinder<THit> TCompartmentFinder;
00299     typedef typename TCompartmentFinder::THitRefs THitRefs;
00300     typedef typename TCompartmentFinder::TCoord   TCoord;
00301 
00302     /// Construct the object and assign the parameters of the algorithm.
00303     ///
00304     /// @param  comp_penalty_bps
00305     ///    Penalty to open a compartment, in base pairs.
00306     /// @param  min_matches
00307     ///    The minimum number of matching residues in a compartment, in base pairs.
00308     /// @param  min_singleton_matches
00309     ///    The minimum number of matching residues in a singleton, in base pairs.
00310     /// @param  cross_filter
00311     ///    Perform cross-filtering when true.
00312 
00313     CCompartmentAccessor(TCoord  comp_penalty_bps,
00314                          TCoord  min_matches,
00315                          TCoord  min_singleton_matches = numeric_limits<TCoord>::max(),
00316                          bool    cross_filter = false);
00317 
00318     /// Construct the object; assign the parameters of the algorithm; execute.
00319     /// The input range of alignments is assumed to contain the complete set of 
00320     /// alignments between the same pair of sequences.
00321     /// The alignments can be on one or both genomic strands.
00322     ///
00323     /// @param  start
00324     ///    Start of the input range of input alignments.
00325     /// @param  finish
00326     ///    Stop of the input range of input alignments.
00327     /// @param  comp_penalty_bps
00328     ///    Penalty to open a compartment, in base pairs.
00329     /// @param  min_matches
00330     ///    The minimum number of matching residues in a compartment, in base pairs.
00331     /// @param  min_singleton_matches
00332     ///    The minimum number of matching residues in a singleton, in base pairs.
00333     /// @param  cross_filter
00334     ///    Perform cross-filtering when true.
00335 
00336     CCompartmentAccessor(typename THitRefs::iterator start, 
00337                          typename THitRefs::iterator finish,
00338                          TCoord  comp_penalty_bps,
00339                          TCoord  min_matches,
00340                          TCoord  min_singleton_matches = numeric_limits<TCoord>::max(),
00341                          bool    cross_filter = false);
00342 
00343     /// Execute: identify compartments. The alignments can be on one 
00344     /// or both genomic strands.
00345     ///
00346     /// @param  start
00347     ///    Start of the input range of input alignments.
00348     /// @param  finish
00349     ///    Stop of the input range of input alignments.
00350 
00351     void Run(typename THitRefs::iterator start, 
00352              typename THitRefs::iterator finish);
00353     
00354     /// Assign the maximum intron length, in base pairs
00355 
00356     void   SetMaxIntron(TCoord mi) { m_MaxIntron = mi; }
00357 
00358     /// Retrieve the maximum intron length, in base pairs
00359 
00360     TCoord GetMaxIntron(void) const { return m_MaxIntron; }
00361 
00362     /// Initialize iteration over the results.
00363     ///
00364     /// @param  compartment
00365     ///    The first identified compartment
00366     /// @return
00367     ///    true if more compartments are available
00368 
00369     bool GetFirst(THitRefs& compartment);
00370 
00371     /// Proceed with iteration over the results.
00372     ///
00373     /// @param  compartment
00374     ///    The next identified compartment
00375     /// @return
00376     ///    true if more compartments are available
00377 
00378     bool GetNext(THitRefs& compartment);
00379     
00380     /// Retrieve the compartment counts.
00381     ///
00382     /// @return
00383     ///    The first number in the pair is the total count;
00384     ///    the second number is the number of compartments with the number of matches
00385     ///    abive the minimum.
00386     ///
00387 
00388     pair<size_t,size_t> GetCounts(void) const {
00389 
00390         // std::count() not supported on some platforms
00391         size_t valid_count (0);
00392         for(size_t i(0), n(m_status.size()); i != n; ++i) {
00393             if(m_status[i]) ++valid_count;
00394         }
00395 
00396         pair<size_t, size_t> rv (m_pending.size(), valid_count);
00397 
00398         return rv;
00399     }
00400     
00401     /// Retrieve a compartment by index.
00402     ///
00403     /// @param idx
00404     ///    The compartment's index
00405     /// @param compartment
00406     ///    The reference to receive the compartment requested.
00407 
00408     void Get(size_t idx, THitRefs& compartment) const {
00409         compartment = m_pending[idx];
00410     }
00411     
00412     const TCoord* GetBox(size_t i) const {
00413         return &m_ranges.front() + i*4;
00414     }
00415     
00416     bool GetStrand(size_t i) const {
00417         return m_strands[i];
00418     }
00419 
00420     bool GetStatus(size_t i) const {
00421         return m_status[i];
00422     }
00423 
00424     /// Retrieve all valid compartments in a seq-align-set.
00425     ///
00426     /// @return
00427     ///    A seq-align-set object with this hierarchy.
00428     ///      1. Compartment-level seq-align with bounds.
00429     ///      2. Seq-align-set keeping individual hits of the compartment.
00430     ///      3. Hit-level seq-align of the dense-seg type.
00431     CRef<objects::CSeq_align_set> AsSeqAlignSet(void) const;
00432         
00433 private:
00434 
00435     void x_Init(TCoord comp_penalty, TCoord  min_matches,
00436                 TCoord min_singleton_matches, bool cross_filter);
00437     
00438     // compartmentization parameters
00439     TCoord m_Penalty;
00440     TCoord m_MinMatches;
00441     TCoord m_MinSingletonMatches;
00442     TCoord m_MaxIntron;
00443     bool   m_CrossFiltering;
00444 
00445     // ancillary members
00446     vector<THitRefs>         m_pending;
00447     vector<TCoord>           m_ranges;
00448     vector<bool>             m_strands;
00449     vector<bool>             m_status;
00450     size_t                   m_iter;
00451     
00452     void x_Copy2Pending(TCompartmentFinder& finder);
00453 };
00454 
00455 
00456 
00457 ///  IMPLEMENTATION
00458 /////////////////////////////////////////////////////////////////////////////
00459 
00460 
00461 const double kPenaltyPerIntronBase (-2e-5); // a small penalty to prefer
00462                                             // more compact models among equal
00463 
00464 const double kPenaltyPerIntronPos  (-1e-9); // a small penalty to favor uniform
00465                                             // selection among equivalent chains
00466 
00467 template<class THit>
00468 void CCompartmentFinder<THit>::CCompartment::UpdateMinMax() 
00469 {
00470     typedef CHitFilter<THit> THitFilter;
00471     THitFilter::s_GetSpan(m_members, m_box);
00472 }
00473 
00474 
00475 template<class THit>
00476 bool CCompartmentFinder<THit>::CCompartment::GetStrand() const {
00477 
00478     if(m_members.size()) {
00479         return m_members.front()->GetSubjStrand();
00480     }
00481     else {
00482         NCBI_THROW( CAlgoAlignException,
00483                     eInternal,
00484                     "Strand requested on an empty compartment" );
00485     }
00486 }
00487 
00488 
00489 template<class THit>
00490 CCompartmentFinder<THit>::CCompartmentFinder(
00491     typename THitRefs::const_iterator start,
00492     typename THitRefs::const_iterator finish ):
00493 
00494     m_intron_max(s_GetDefaultMaxIntron()),
00495     m_penalty(s_GetDefaultPenalty()),
00496     m_MinMatches(1),
00497     m_MinSingletonMatches(1),
00498     m_iter(-1)
00499 {
00500     m_hitrefs.resize(finish - start);
00501     copy(start, finish, m_hitrefs.begin());
00502 }
00503 
00504 // Query match accumulator
00505 template<class THit>
00506 class CQueryMatchAccumulator:
00507     public binary_function<double, CRef<THit>, double>
00508 {
00509 public:
00510 
00511     CQueryMatchAccumulator(void): m_Finish(-1.0)
00512     {}
00513 
00514     double operator() (double acm, CRef<THit> ph)
00515     {
00516         const typename THit::TCoord qmin (ph->GetQueryMin()), 
00517             qmax (ph->GetQueryMax());
00518         if(qmin > m_Finish)
00519             return acm + ph->GetIdentity() * ((m_Finish = qmax) - qmin + 1);
00520         else {
00521             if(qmax > m_Finish) {
00522                 const double finish0 (m_Finish);
00523                 return acm + ph->GetIdentity() * ((m_Finish = qmax) - finish0);
00524             }
00525             else {
00526                 return acm;
00527             }
00528         }
00529     }
00530 
00531 private:
00532 
00533     double m_Finish;
00534 };
00535 
00536 
00537 template<class THit>
00538 double GetTotalMatches(
00539     const typename CCompartmentFinder<THit>::THitRefs& hitrefs0)
00540 {
00541     typename CCompartmentFinder<THit>::THitRefs hitrefs (hitrefs0);   
00542 
00543     typedef CHitComparator<THit> THitComparator;
00544     THitComparator sorter (THitComparator::eQueryMin);
00545     stable_sort(hitrefs.begin(), hitrefs.end(), sorter);
00546 
00547     const double rv (accumulate(hitrefs.begin(), hitrefs.end(), 0.0, 
00548                                 CQueryMatchAccumulator<THit>()));
00549     return rv;
00550 }
00551 
00552 
00553 template<class THit>
00554 size_t CCompartmentFinder<THit>::Run(bool cross_filter)
00555 {
00556     const double kMinusInf (-1e12);
00557 
00558     m_compartments.clear();
00559     const int dimhits = m_hitrefs.size();
00560     if(dimhits == 0) {
00561         return 0;
00562     }
00563 
00564     // sort the hits to make sure that each hit is placed after:
00565     // - hits from which to continue a compartment
00566     // - hits from which to open a new compartment
00567 
00568     typedef CHitComparator<THit> THitComparator;
00569     stable_sort(m_hitrefs.begin(), m_hitrefs.end(),
00570                 THitComparator(THitComparator::eSubjMaxQueryMax));
00571 
00572     // For every hit:
00573     // - evaluate its best extension potential
00574     // - evaluate its best potential to start a new compartment
00575     // - select the best variant
00576     // - update the hit status array
00577     
00578     typedef vector<SHitStatus> THitData;
00579     THitData hitstatus (dimhits);
00580 
00581     const TCoord subj_min_global (m_hitrefs.front()->GetSubjMin());
00582 
00583     int i_bestsofar (0);
00584     for(int i (0); i < dimhits; ++i) {
00585         
00586         const THitRef& h (m_hitrefs[i]);
00587         const double identity (h->GetIdentity());
00588         const typename THit::TCoord hbox [4] = {
00589             h->GetQueryMin(),  h->GetQueryMax(),
00590             h->GetSubjMin(),   h->GetSubjMax()
00591         };
00592 
00593 //#define CF_DBG_TRACE
00594 #ifdef CF_DBG_TRACE
00595         cerr << endl << *h << endl;
00596 #endif
00597 
00598         double best_ext_score (kMinusInf);
00599         int    i_best_ext (-1);
00600 
00601         double best_open_score (identity*(hbox[1] - hbox[0] + 1) - m_penalty);
00602         int    i_best_open (-1); // each can be a start of the very first compartment
00603 
00604         if(hbox[2] > m_hitrefs[i_bestsofar]->GetSubjMax()) {
00605 
00606             const double score_open (identity*(hbox[1] - hbox[0] + 1) 
00607                                      + hitstatus[i_bestsofar].m_score
00608                                      - m_penalty);
00609 
00610             if(score_open > best_open_score) {
00611                 best_open_score = score_open;
00612                 i_best_open = i_bestsofar;
00613             }
00614         }
00615         else {
00616             // try every prior hit
00617             for(int j (i - 1); j >= 0; --j) {
00618 
00619                 const double score_open (identity * (hbox[1] - hbox[0] + 1)
00620                                          + hitstatus[j].m_score
00621                                          - m_penalty);
00622 
00623                 if(score_open > best_open_score 
00624                    && hbox[2] > m_hitrefs[j]->GetSubjMax())
00625                 {
00626                     best_open_score = score_open;
00627                     i_best_open = j;
00628                 }
00629             }
00630         }
00631 
00632         for(int j = i; j >= 0; --j) {
00633             
00634             THitRef phc;
00635             typename THit::TCoord phcbox[4];
00636             if(j != i) {
00637 
00638                 phc = m_hitrefs[j];
00639                 phcbox[0] = phc->GetQueryMin();
00640                 phcbox[1] = phc->GetQueryMax();
00641                 phcbox[2] = phc->GetSubjMin();
00642                 phcbox[3] = phc->GetSubjMax();
00643 
00644 #ifdef CF_DBG_TRACE
00645                 cerr << '\t' << *phc << endl;
00646 #endif
00647             }
00648 #ifdef CF_DBG_TRACE
00649             else {
00650                 cerr << "\t[Dummy]" << endl;
00651             }
00652 #endif
00653             
00654             // check if good for extension
00655             {{
00656                 typename THit::TCoord q0, s0; // possible continuation
00657                 bool good (false);
00658                 int subj_space;
00659                 TCoord intron_start (0);
00660 
00661                 if(i != j) {
00662 
00663                     if(phcbox[1] < hbox[1] && phcbox[0] < hbox[0]) {
00664 
00665                         if(hbox[0] <= phcbox[1] && 
00666                            hbox[2] + phcbox[1] - hbox[0] >= phcbox[3]) {
00667 
00668                             q0 = phcbox[1] + 1;
00669                             s0 = hbox[2] + phcbox[1] - hbox[0];
00670                         }
00671                         else if(phcbox[3] >= hbox[2]) {
00672 
00673                             s0 = phcbox[3] + 1;
00674                             q0 = hbox[1] - (hbox[3] - s0);
00675                         }
00676                         else {
00677                             
00678                             q0 = hbox[0];
00679                             s0 = hbox[2];
00680                         }
00681                         subj_space = s0 - phcbox[3] - 1;
00682 
00683 
00684                         // max run of spaces inside an exon
00685                         // example: NM_021645.4 vs NC_000013.9: 51.4 - 51.51M
00686                         const TCoord max_gap (100);
00687 
00688                         good = (subj_space <= int(m_intron_max))
00689                             && (subj_space + max_gap >= q0 - phcbox[1] - 1)
00690                             && (s0 < hbox[3] && q0 < hbox[1]);
00691 
00692                         if(good) {
00693                             intron_start = phcbox[3];
00694                         }
00695                     }
00696                 }
00697                 
00698                 if(good) {
00699                     
00700                     const double jscore (hitstatus[j].m_score);
00701                     const double intron_penalty ((subj_space > 0)?
00702                          (kPenaltyPerIntronPos * (intron_start - subj_min_global) 
00703                          + subj_space * kPenaltyPerIntronBase): 0.0);
00704 
00705                     const double ext_score (jscore +
00706                         identity * (hbox[1] - q0 + 1) + intron_penalty);
00707 
00708                     if(ext_score > best_ext_score) {
00709                         best_ext_score = ext_score;
00710                         i_best_ext = j;
00711                     }
00712 #ifdef CF_DBG_TRACE
00713                     cerr << "\tGood for extension with score = " << ext_score << endl;
00714 #endif
00715                 }
00716             }}          
00717         }
00718         
00719         typename SHitStatus::EType hit_type;
00720         int prev_hit;
00721         double best_score;
00722         if(best_ext_score > best_open_score) {
00723 
00724             hit_type = SHitStatus::eExtension;
00725             prev_hit = i_best_ext;
00726             best_score = best_ext_score;
00727         }
00728         else {
00729 
00730             hit_type = SHitStatus::eOpening;
00731             prev_hit = i_best_open;
00732             best_score = best_open_score;
00733         }
00734                 
00735         hitstatus[i].m_type  = hit_type;
00736         hitstatus[i].m_prev  = prev_hit;
00737         hitstatus[i].m_score = best_score;
00738 
00739         if(best_score > hitstatus[i_bestsofar].m_score) {
00740             i_bestsofar = i;
00741         }
00742 
00743 #ifdef CF_DBG_TRACE
00744         cerr << "Status = " << ((hit_type == SHitStatus::eOpening)? "Open": "Extend")
00745              << '\t' << best_score << endl;
00746         if(prev_hit == -1) {
00747             cerr << "[Dummy]" << endl;
00748         }
00749         else {
00750             cerr << *(m_hitrefs[prev_hit]) << endl;
00751         }
00752 #endif
00753     }
00754     
00755 #ifdef CF_DBG_TRACE
00756     cerr << "\n\n--- BACKTRACE ---\n";
00757 #endif
00758 
00759     // *** backtrace ***
00760     // -  trace back the chain with the highest score
00761     const double score_best    (hitstatus[i_bestsofar].m_score);    
00762     const double min_matches   (m_MinSingletonMatches < m_MinMatches? 
00763         m_MinSingletonMatches: m_MinMatches);
00764 
00765     vector<bool> comp_status;
00766     if(score_best + m_penalty >= min_matches) {
00767 
00768         int i (i_bestsofar);
00769         bool new_compartment (true);
00770         THitRefs hitrefs;
00771         while(i != -1) {
00772 
00773             if(new_compartment) {
00774 
00775                 const double mp (GetTotalMatches<THit>(hitrefs));
00776                 if(mp > 0) {
00777                     // save the current compartment
00778                     m_compartments.push_back(CCompartment());
00779                     m_compartments.back().SetMembers() = hitrefs;
00780                     comp_status.push_back(mp >= m_MinMatches);
00781                 }
00782 
00783                 hitrefs.resize(0);
00784                 new_compartment = false;
00785             }
00786             hitrefs.push_back(m_hitrefs[i]);
00787 
00788 #ifdef CF_DBG_TRACE
00789             cerr << *(m_hitrefs[i]) << endl;
00790 #endif            
00791             
00792             if(hitstatus[i].m_type == SHitStatus::eOpening) {
00793                 new_compartment = true;
00794             }
00795             i = hitstatus[i].m_prev;
00796         }
00797 
00798         const double mp (GetTotalMatches<THit>(hitrefs));
00799         if(mp > 0) {
00800             bool status (m_compartments.size() == 0 && mp >= m_MinSingletonMatches 
00801                          || mp >= m_MinMatches);
00802             m_compartments.push_back(CCompartment());
00803             m_compartments.back().SetMembers() = hitrefs;
00804             comp_status.push_back(status);
00805         }
00806     }
00807 
00808     if(cross_filter && m_compartments.size()) {
00809 
00810         const TCoord kMinCompartmentHitLength (8);
00811 
00812         // partial x-filtering using compartment hits only
00813         for(size_t icn (m_compartments.size()), ic (0); ic < icn; ++ic) {
00814 
00815             CCompartment & comp (m_compartments[ic]);
00816             THitRefs& hitrefs (comp.SetMembers());
00817             size_t nullified (0);
00818             for(int in (hitrefs.size()), i (in - 1); i > 0; --i) {
00819 
00820                 int j1 (i);
00821                 while(j1 < in && hitrefs[j1].IsNull()) ++j1;
00822                 if(j1 == in) continue;
00823 
00824                 THitRef& h1 (hitrefs[j1]);
00825                 THitRef& h2 (hitrefs[i-1]);
00826 
00827                 if(h1.IsNull()) continue;
00828 
00829                 const TCoord * box1o (h1->GetBox());
00830                 TCoord box1 [4] = {box1o[0], box1o[1], box1o[2], box1o[3]};
00831                 const TCoord * box2o (h2->GetBox());
00832                 TCoord box2 [4] = {box2o[0], box2o[1], box2o[2], box2o[3]};
00833 
00834                 const int qd (box1[1] - box2[0] + 1);
00835                 const int sd (box1[3] - box2[2] + 1);
00836                 if(qd > sd && qd > 0) {
00837 
00838                     if(box2[0] <= box1[0] + kMinCompartmentHitLength) {
00839 
00840 #ifdef ALGOALIGNUTIL_COMPARTMENT_FINDER_KEEP_TERMINII
00841                         if(i + 1 == in) {
00842                             TCoord new_coord (box1[0] + (box1[1] - box1[0])/2);
00843                             if(box1[0] + 1 <= new_coord) {
00844                                 --new_coord;
00845                             }
00846                             else {
00847                                 new_coord = box1[0];
00848                             }
00849                             h1->Modify(1, new_coord);
00850                         }
00851                         else 
00852 #endif
00853                         {
00854                             h1.Reset(0);
00855                             ++nullified;
00856                         }
00857                     }
00858                     else {
00859                         h1->Modify(1, box2[0] - 1);
00860                     }
00861                     
00862                     if(box2[1] <= box1[1] + kMinCompartmentHitLength) {
00863 
00864 #ifdef ALGOALIGNUTIL_COMPARTMENT_FINDER_KEEP_TERMINII
00865                         if(i == 1) {
00866                             TCoord new_coord (box2[0] + (box2[1] - box2[0])/2);
00867                             if(box2[1] >= new_coord + 1) {
00868                                 ++new_coord;
00869                             }
00870                             else {
00871                                 new_coord = box2[1];
00872                             }
00873                             h2->Modify(0, new_coord);
00874                         }
00875                         else
00876 #endif
00877                         {
00878                             h2.Reset(0);
00879                             ++nullified;
00880                         }
00881                     }
00882                     else {
00883                         h2->Modify(0, box1[1] + 1);
00884                     }
00885                 }
00886                 else if (sd > 0) {
00887 
00888                     if(box2[2] <= box1[2] + kMinCompartmentHitLength) {
00889 
00890                         if(i + 1 == in) {
00891                             TCoord new_coord (box1[2] + (box1[3] - box1[2])/2);
00892                             if(box1[2] + 1 <= new_coord) {
00893                                 --new_coord;
00894                             }
00895                             else {
00896                                 new_coord = box1[2];
00897                             }
00898                             h1->Modify(3, new_coord);
00899                         }
00900                         else {
00901                             h1.Reset(0);
00902                             ++nullified;
00903                         }
00904                     }
00905                     else {
00906                         h1->Modify(3, box2[2] - 1);
00907                     }
00908                     
00909                     if(box2[3] <= box1[3] + kMinCompartmentHitLength) {
00910 
00911                         if(i == 1) {
00912                             TCoord new_coord (box2[2] + (box2[3] - box2[2])/2);
00913                             if(box2[3] >= new_coord + 1) {
00914                                 ++new_coord;
00915                             }
00916                             else {
00917                                 new_coord = box2[3];
00918                             }
00919 
00920                             h2->Modify(2, new_coord);
00921                         }
00922                         else {
00923                             h2.Reset(0);
00924                             ++nullified;
00925                         }
00926                     }
00927                     else {
00928                         h2->Modify(2, box1[3] + 1);
00929                     }
00930                 }
00931             }
00932 
00933             if(nullified > 0) {
00934                 hitrefs.erase(remove_if(hitrefs.begin(), hitrefs.end(),
00935                                         CCompartmentFinder<THit>::s_PNullRef),
00936                               hitrefs.end());
00937             }
00938         }
00939 
00940 #define ALGO_ALIGN_COMPARTMENT_FINDER_USE_FULL_XFILTERING
00941 #ifdef  ALGO_ALIGN_COMPARTMENT_FINDER_USE_FULL_XFILTERING
00942 
00943         typename THitRefs::iterator ihr_b (m_hitrefs.begin()),
00944             ihr_e(m_hitrefs.end()), ihr (ihr_b);
00945 
00946         stable_sort(ihr_b, ihr_e, THitComparator(THitComparator::eSubjMinSubjMax));
00947 
00948         // complete x-filtering using the full set of hits
00949         for(int icn (m_compartments.size()), ic (icn - 1); ic >= 0; --ic) {
00950 
00951             CCompartment & comp (m_compartments[ic]);
00952             THitRefs& hitrefs (comp.SetMembers());
00953 
00954             if(hitrefs.size() < 3) {
00955                 continue;
00956             }
00957             else {
00958                 NON_CONST_ITERATE(typename THitRefs, ii, hitrefs) {
00959                     (*ii)->SetScore(- (*ii)->GetScore());
00960                 }
00961                 hitrefs.front()->SetEValue(-1);
00962                 hitrefs.back()->SetEValue(-1);
00963             }
00964             
00965             const TCoord comp_subj_min (hitrefs.back()->GetSubjStart());
00966             const TCoord comp_subj_max (hitrefs.front()->GetSubjStop());
00967             while(ihr != ihr_e && ((*ihr)->GetSubjStart() < comp_subj_min
00968                                    || (*ihr)->GetScore() < 0))
00969             {
00970                 ++ihr;
00971             }
00972 
00973             if(ihr == ihr_e) break;
00974             if((*ihr)->GetSubjStart() > comp_subj_max) continue;
00975             typename THitRefs::iterator ihrc_b (ihr);
00976 
00977             typename THitRefs::iterator ihrc_e (ihr);
00978             while(ihrc_e != ihr_e && ((*ihrc_e)->GetSubjStart() < comp_subj_max
00979                                       || (*ihrc_e)->GetScore() < 0))
00980             {
00981                 ++ihrc_e;
00982             }
00983 
00984             size_t nullified (sx_XFilter(hitrefs, ihrc_b, ihrc_e, 1, 
00985                                          kMinCompartmentHitLength));
00986             sort(ihrc_b, ihrc_e, THitComparator (THitComparator::eQueryMinQueryMax));
00987 
00988             nullified += sx_XFilter(hitrefs, ihrc_b, ihrc_e, 0,
00989                                     kMinCompartmentHitLength);
00990 
00991             ihr = ihrc_e;
00992 
00993             if(nullified > 0) {
00994                 hitrefs.erase(remove_if(hitrefs.begin(), hitrefs.end(),
00995                                         CCompartmentFinder<THit>::s_PNullRef),
00996                               hitrefs.end());
00997             }
00998         }
00999 
01000         for(int icn (m_compartments.size()), ic (icn - 1); ic >= 0; --ic) {
01001 
01002             CCompartment & comp (m_compartments[ic]);
01003             THitRefs& hitrefs (comp.SetMembers());
01004             NON_CONST_ITERATE(typename THitRefs, ii, hitrefs) {
01005                 const double score ((*ii)->GetScore());
01006                 if(score < 0) {
01007                     (*ii)->SetScore(-score);
01008                 }
01009                 const double eval ((*ii)->GetEValue());
01010                 if(eval < 0) {
01011                     (*ii)->SetEValue(0);
01012                 }
01013             }
01014         }
01015 #endif
01016     }
01017 
01018     // mask out compartments with low identity
01019     for(size_t i (0), in (m_compartments.size()); i < in; ++i) {
01020         if(comp_status[i] == false) {
01021 
01022             THitRefs & hitrefs (m_compartments[i].SetMembers());
01023             NON_CONST_ITERATE(typename THitRefs, ii, hitrefs) {
01024                 THitRef & hr (*ii);
01025                 hr->SetScore(-hr->GetScore());
01026             }
01027         }
01028     }
01029 
01030     return m_compartments.size();
01031 }
01032 
01033 
01034 template<class THit>
01035 size_t CCompartmentFinder<THit>::sx_XFilter(
01036     THitRefs& hitrefs,
01037     typename THitRefs::iterator ihr,
01038     typename THitRefs::iterator ihr_e,
01039     Uint1 w,
01040     size_t min_compartment_hit_len)
01041 {
01042     size_t nullified (0);
01043     for(int in (hitrefs.size()), i (in - 2); i > 0 && ihr != ihr_e; --i) {
01044 
01045         THitRef& h1 (hitrefs[i]);
01046         if(h1.IsNull()) continue;
01047                       
01048         const TCoord * box1o (h1->GetBox());
01049         TCoord box1 [4] = {box1o[0], box1o[1], box1o[2], box1o[3]};
01050 
01051         // locate the first overlap
01052         for(; ihr != ihr_e; ++ihr) {
01053 
01054             THitRef hr (*ihr);
01055             if(hr.IsNull()) continue;
01056             if(hr->GetScore() > 0 && hr->GetStop(w) >= box1[2*w]) {
01057                 break;
01058             }
01059         }
01060 
01061         if(ihr == ihr_e) {
01062             break;
01063         }
01064 
01065         // find the smallest not overlapped hit coord and its interval
01066         TCoord ls0 (box1[2*w+1] + 1), cursegmax (0);
01067         TCoord segmax_start(box1[2*w]), segmax_stop(box1[2*w+1]);
01068 
01069         if(box1[2*w] < (*ihr)->GetStart(w)) {
01070 
01071             segmax_start = ls0 = box1[2*w];
01072             segmax_stop = (*ihr)->GetStart(w) - 1;
01073             cursegmax = segmax_stop - segmax_start + 1;
01074         }
01075         else {
01076 
01077             TCoord shrsmax ((*ihr)->GetStop(w));
01078             for(++ihr; ihr != ihr_e; ++ihr) {
01079 
01080                 THitRef hr (*ihr);
01081                 if(hr.IsNull() || hr->GetScore() < 0) {
01082                     continue;
01083                 }
01084 
01085                 const TCoord hrs0 (hr->GetStart(w));
01086                 if(hrs0 > box1[2*w+1]) {
01087                     segmax_stop = box1[2*w+1];
01088                     break;
01089                 }
01090 
01091                 if(hrs0 > shrsmax) {
01092                     segmax_stop = hrs0 - 1;
01093                     break;
01094                 }
01095 
01096                 const TCoord hrs1 (hr->GetStop(w));
01097                 if(hrs1 > shrsmax) {
01098                     shrsmax = hrs1;
01099                 }
01100             }
01101 
01102             if(shrsmax < box1[2*w+1]) {
01103                 segmax_start = ls0 = shrsmax + 1;
01104                 cursegmax = segmax_stop - segmax_start + 1;
01105             }
01106         }
01107                 
01108         if(ls0 > box1[2*w+1]) {
01109             h1.Reset(0);
01110             ++nullified;
01111             continue;
01112         }
01113 
01114         // find the longest surviving segment       
01115         for(; ihr != ihr_e; ++ihr) {
01116 
01117             THitRef hr (*ihr);
01118             if(hr.IsNull() || hr->GetScore() < 0) {
01119                 continue;
01120             }
01121 
01122             const TCoord hrs0 (hr->GetStart(w));
01123             if(hrs0 > box1[2*w+1]) {
01124                 if(ls0 <= box1[2*w+1] && box1[2*w+1] + 1 - ls0 > cursegmax) {
01125                     segmax_start = ls0;
01126                     segmax_stop = box1[2*w+1];
01127                     cursegmax = segmax_stop - segmax_start + 1;
01128                 }
01129                 break;
01130             }
01131 
01132             if(hrs0 > ls0) {
01133                 if(hrs0 - ls0 > cursegmax) {
01134                     segmax_start = ls0;
01135                     segmax_stop = hrs0 - 1;
01136                     cursegmax = segmax_stop - segmax_start + 1;
01137                 }
01138                 
01139                 ls0 = hr->GetStop(w) + 1;
01140             }
01141             else if(hr->GetStop(w) + 1 > ls0) {
01142                 ls0 = hr->GetStop(w) + 1;
01143             }
01144         }
01145 
01146         if(box1[2*w+1] > ls0 + cursegmax) {
01147             segmax_start = ls0;
01148             segmax_stop  = box1[2*w+1];
01149             cursegmax    = segmax_stop - segmax_start + 1;
01150         }
01151                 
01152         if(cursegmax < min_compartment_hit_len) {
01153             h1.Reset(0);
01154             ++nullified;
01155             continue;
01156         }
01157         else {
01158 
01159             if(box1[2*w] < segmax_start) {
01160                 h1->Modify(2 * w, segmax_start);
01161             }
01162             
01163             if(segmax_stop < box1[2*w+1]) {
01164                 h1->Modify(2 * w + 1, segmax_stop);
01165             }
01166         }
01167         
01168         if(ihr == ihr_e) break;
01169     }
01170 
01171     return nullified;
01172 }
01173 
01174 
01175 template<class THit>
01176 typename CCompartmentFinder<THit>::CCompartment* 
01177 CCompartmentFinder<THit>::GetFirst()
01178 {
01179     if(m_compartments.size()) {
01180         m_iter = 0;
01181         return &m_compartments[m_iter++];
01182     }
01183     else {
01184         m_iter = -1;
01185         return 0;
01186     }
01187 }
01188 
01189 template<class THit>
01190 void CCompartmentFinder<THit>::OrderCompartments(void) {
01191   
01192     for(size_t i = 0, dim = m_compartments.size(); i < dim; ++i) {
01193         m_compartments[i].UpdateMinMax();
01194     }
01195     
01196     stable_sort(m_compartments.begin(), m_compartments.end(), 
01197                 CCompartmentFinder<THit>::CCompartment::s_PLowerSubj);
01198 }
01199 
01200 template<class THit>
01201 typename CCompartmentFinder<THit>::CCompartment* 
01202 CCompartmentFinder<THit>::GetNext()
01203 {
01204     const size_t dim = m_compartments.size();
01205     if(m_iter == -1 || m_iter >= int(dim)) {
01206         m_iter = -1;
01207         return 0;
01208     }
01209     else {
01210         return &m_compartments[m_iter++];
01211     }
01212 }
01213 
01214 
01215 template<class THit>
01216 CCompartmentAccessor<THit>::CCompartmentAccessor(TCoord  comp_penalty,
01217                                                  TCoord  min_matches,
01218                                                  TCoord  min_singleton_matches,
01219                                                  bool    cross_filter)
01220 {
01221     x_Init(comp_penalty, min_matches, min_singleton_matches, cross_filter);
01222 }
01223 
01224 
01225 template<class THit>
01226 CCompartmentAccessor<THit>::CCompartmentAccessor(
01227      typename THitRefs::iterator istart,
01228      typename THitRefs::iterator ifinish,
01229      TCoord comp_penalty,
01230      TCoord min_matches,
01231      TCoord min_singleton_matches,
01232      bool   cross_filter)
01233 {
01234     x_Init(comp_penalty, min_matches, min_singleton_matches, cross_filter);
01235     Run(istart, ifinish);
01236 }
01237 
01238 
01239 template<class THit>
01240 void CCompartmentAccessor<THit>::x_Init(TCoord  comp_penalty,
01241                                         TCoord  min_matches,
01242                                         TCoord  min_singleton_matches,
01243                                         bool    cross_filter)
01244 {
01245     m_Penalty             = comp_penalty;
01246     m_MinMatches          = min_matches;
01247     m_MinSingletonMatches = min_singleton_matches;
01248     m_CrossFiltering      = cross_filter;
01249     m_MaxIntron           = CCompartmentFinder<THit>::s_GetDefaultMaxIntron();
01250 }
01251 
01252 
01253 template<class THit>
01254 void CCompartmentAccessor<THit>::Run(typename THitRefs::iterator istart,
01255                                      typename THitRefs::iterator ifinish)
01256 {
01257     const TCoord kMax_TCoord (numeric_limits<TCoord>::max());
01258 
01259     // separate strands for CompartmentFinder
01260     typename THitRefs::iterator ib = istart, ie = ifinish, ii = ib, 
01261         iplus_beg = ie;
01262     typedef CHitComparator<THit> THitComparator;
01263     THitComparator sorter (THitComparator::eSubjStrand);
01264     stable_sort(ib, ie, sorter);
01265 
01266     TCoord minus_subj_min = kMax_TCoord, minus_subj_max = 0;
01267     for(ii = ib; ii != ie; ++ii) {
01268         if((*ii)->GetSubjStrand()) {
01269             iplus_beg = ii;
01270             break;
01271         }
01272         else {
01273             if((*ii)->GetSubjMin() < minus_subj_min) {
01274                 minus_subj_min = (*ii)->GetSubjMin();
01275             }
01276             if((*ii)->GetSubjMax() > minus_subj_max) {
01277                 minus_subj_max = (*ii)->GetSubjMax();
01278             }
01279         }
01280     }
01281 
01282     // minus
01283     {{
01284         // flip
01285         for(ii = ib; ii != iplus_beg; ++ii) {
01286 
01287             const typename THit::TCoord s0 = minus_subj_min + minus_subj_max 
01288                 - (*ii)->GetSubjMax();
01289             const typename THit::TCoord s1 = minus_subj_min + minus_subj_max 
01290                 - (*ii)->GetSubjMin();
01291             (*ii)->SetSubjStart(s0);
01292             (*ii)->SetSubjStop(s1);
01293         }
01294         
01295         CCompartmentFinder<THit> finder (ib, iplus_beg);
01296         finder.SetPenalty(m_Penalty);
01297         finder.SetMinMatches(m_MinMatches);
01298         finder.SetMinSingletonMatches(m_MinSingletonMatches);
01299         finder.SetMaxIntron(m_MaxIntron);
01300         finder.Run(m_CrossFiltering);
01301         
01302         // un-flip
01303         for(ii = ib; ii != iplus_beg; ++ii) {
01304 
01305             const typename THit::TCoord s0 = minus_subj_min + minus_subj_max 
01306                 - (*ii)->GetSubjMax();
01307             const typename THit::TCoord s1 = minus_subj_min + minus_subj_max 
01308                 - (*ii)->GetSubjMin();
01309             (*ii)->SetSubjStart(s1);
01310             (*ii)->SetSubjStop(s0);
01311         }        
01312 
01313         x_Copy2Pending(finder);
01314     }}
01315 
01316     // plus
01317     {{
01318         CCompartmentFinder<THit> finder (iplus_beg, ie);
01319         finder.SetPenalty(m_Penalty);
01320         finder.SetMinMatches(m_MinMatches);
01321         finder.SetMinSingletonMatches(m_MinSingletonMatches);
01322         finder.SetMaxIntron(m_MaxIntron);
01323         finder.Run(m_CrossFiltering);
01324         x_Copy2Pending(finder);
01325     }}
01326 }
01327 
01328 
01329 template<class THit>
01330 void CCompartmentAccessor<THit>::x_Copy2Pending(
01331     CCompartmentFinder<THit>& finder)
01332 {
01333     finder.OrderCompartments();
01334 
01335     typedef typename CCompartmentFinder<THit>::THitRef THitRef;
01336 
01337     // copy to pending
01338     for(typename CCompartmentFinder<THit>::CCompartment* compartment =
01339             finder.GetFirst(); compartment; compartment = finder.GetNext()) {
01340         
01341         if(compartment->GetMembers().size() > 0) {
01342 
01343             m_pending.push_back(THitRefs(0));
01344             THitRefs& vh = m_pending.back();
01345         
01346             for(THitRef ph (compartment->GetFirst()); ph; ph = compartment->GetNext())
01347             {
01348                 vh.push_back(ph);
01349             }
01350         
01351             const TCoord* box = compartment->GetBox();
01352             m_ranges.push_back(box[0]);
01353             m_ranges.push_back(box[1]);
01354             m_ranges.push_back(box[2]);
01355             m_ranges.push_back(box[3]);
01356         
01357             m_strands.push_back(compartment->GetStrand());
01358             m_status.push_back(compartment->GetFirst()->GetScore() > 0);
01359         }
01360     }
01361 }
01362 
01363 
01364 template<class THit>
01365 bool CCompartmentAccessor<THit>::GetFirst(THitRefs& compartment) {
01366 
01367     m_iter = 0;
01368     return GetNext(compartment);
01369 }
01370 
01371 
01372 template<class THit>
01373 bool CCompartmentAccessor<THit>::GetNext(THitRefs& compartment) {
01374 
01375     compartment.clear();
01376     if(m_iter < m_pending.size()) {
01377         compartment = m_pending[m_iter++];
01378         return true;
01379     }
01380     else {
01381         return false;
01382     }
01383 }
01384 
01385 
01386 template<class THit>
01387 CRef<objects::CSeq_align_set> CCompartmentAccessor<THit>::AsSeqAlignSet(void) const
01388 {
01389     USING_SCOPE(objects);
01390 
01391     CRef<CSeq_align_set> rv (new CSeq_align_set);
01392 
01393     // retrieve the ids
01394     CRef<CSeq_id> seqid_query (new CSeq_id), seqid_subj (new CSeq_id);
01395     if(m_pending.size()) {
01396         if(m_pending.front().size()) {
01397             const THit & h (*m_pending.front().front());
01398             seqid_query->Assign(*(h.GetQueryId()));
01399             seqid_subj->Assign(*(h.GetSubjId()));
01400         }
01401     }
01402 
01403     CSeq_align_set::Tdata& sas1_data (rv->Set());
01404 
01405     for(size_t i(0), idim(m_pending.size()); i < idim; ++i) {
01406 
01407         if(m_status[i]) {
01408 
01409             // note the range
01410             TCoord range_min (i > 0 && m_strands[i-1] == m_strands[i]
01411                               ? m_ranges[4*i - 1]
01412                               : 0);
01413 
01414             TCoord range_max (i + 1 < idim && m_strands[i+1] == m_strands[i]
01415                               ? m_ranges[4*i + 6]
01416                               : numeric_limits<TCoord>::max());
01417 
01418             CRef<CSeq_align> sa2 (new CSeq_align);
01419             sa2->SetType(CSeq_align::eType_disc);
01420 
01421             CSeq_align::TBounds & bounds (sa2->SetBounds());
01422             const ENa_strand query_strand (eNa_strand_plus);
01423 
01424             const ENa_strand subj_strand (m_strands[i]? eNa_strand_plus: 
01425                                           eNa_strand_minus);
01426             CRef<CSeq_loc> seq_loc (new CSeq_loc (*seqid_subj, range_min, 
01427                                                   range_max, subj_strand));
01428             bounds.push_back(seq_loc);
01429 
01430             // add the hits
01431             CSeq_align_set::Tdata& sas2_data (sa2->SetSegs().SetDisc().Set());
01432             ITERATE(typename THitRefs, ii, m_pending[i]) {
01433 
01434                 const THit& h (**ii);
01435                 CRef<CSeq_align> sa3 (new CSeq_align);
01436                 sa3->SetType(CSeq_align::eType_global);
01437                 CDense_seg & ds (sa3->SetSegs().SetDenseg());
01438                 ds.FromTranscript(h.GetQueryStart(), query_strand, 
01439                                    h.GetSubjStart(), subj_strand, 
01440                                    h.GetTranscript());
01441                 ds.SetIds().push_back(seqid_query);
01442                 ds.SetIds().push_back(seqid_subj);
01443                 sas2_data.push_back(sa3);
01444             }
01445 
01446             // save into the seq-align-set
01447             sas1_data.push_back(sa2);
01448         }
01449     }
01450 
01451     return rv;
01452 }
01453 
01454 
01455 
01456 END_NCBI_SCOPE
01457 
01458 
01459 #endif
01460 
01461 

Generated on Sun Dec 6 21:55:32 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:32 2009 by modify_doxy.py rev. 173732