NCBI C++ ToolKit
best_feat_finder.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /*
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author:  Michael Kornbluh
00027  *
00028  * File Description:
00029  *   stores feats for efficient retrieval in finding the best one
00030  *
00031  */
00032 
00033 #include <ncbi_pch.hpp>
00034 
00035 #include "best_feat_finder.hpp"
00036 
00037 #include <objects/seqfeat/Seq_feat.hpp>
00038 #include <objects/seqloc/Seq_interval.hpp>
00039 
00040 BEGIN_NCBI_SCOPE
00041 BEGIN_objects_SCOPE // namespace ncbi::objects::
00042 
00043 CBestFeatFinder::CBestFeatFinder(void)
00044 {
00045     // nothing to do here
00046 }
00047 
00048 bool CBestFeatFinder::AddFeat( const CSeq_feat& new_feat )
00049 {
00050     CConstRef<CSeq_feat> new_feat_ref( &new_feat );
00051     CConstRef<CSeq_loc> new_feat_loc_ref( &new_feat.GetLocation() );
00052 
00053     if( new_feat_ref && new_feat_loc_ref ) {
00054         loc_to_feat_map.insert( TLocToFeatMap::value_type( new_feat_loc_ref, new_feat_ref ) );
00055         return true;
00056     } else {
00057         return false;
00058     }
00059 }
00060 
00061 CConstRef<CSeq_feat> 
00062 CBestFeatFinder::FindBestFeatForLoc( const CSeq_loc &sought_loc ) const
00063 {
00064     // Try to find the smallest CDS that contains the given location
00065     // (we use extremes as an approximation)
00066 
00067     CConstRef<CSeq_loc> sought_loc_ref( &sought_loc );
00068 
00069     const int loc_start = sought_loc.GetStart(eExtreme_Positional);
00070     const int loc_stop  = sought_loc.GetStop(eExtreme_Positional);
00071 
00072     return FindBestFeatForLoc( loc_start, loc_stop );
00073 }
00074 
00075 CConstRef<CSeq_feat> 
00076 CBestFeatFinder::FindBestFeatForLoc( const int loc_start, const int loc_stop ) const
00077 {
00078     // something wrong with sought_loc
00079     if( loc_start < 0 || loc_stop < 0 ) {
00080         return CConstRef<CSeq_feat>();
00081     }
00082 
00083     const int loc_len = (loc_stop - loc_start + 1);
00084 
00085     CRef<CSeq_loc> sought_loc_ref( new CSeq_loc );
00086     sought_loc_ref->SetInt().SetFrom(loc_start);
00087     sought_loc_ref->SetInt().SetTo(loc_stop);
00088 
00089     // find first feat which is to the right of sought_loc and therefore
00090     // can't possibly contain the whole thing.
00091     TLocToFeatMap::const_iterator feat_iter =
00092         loc_to_feat_map.upper_bound( sought_loc_ref );
00093 
00094     // go "leftwards" looking for best CDS
00095     int best_overlap_extra_bases = INT_MAX; // 0 would imply a perfect match
00096     CConstRef<CSeq_feat> best_feat;
00097     while( feat_iter != loc_to_feat_map.begin() ) {
00098         --feat_iter;
00099 
00100         const int feat_start = feat_iter->first->GetStart(eExtreme_Positional);
00101         const int feat_stop  = feat_iter->first->GetStop(eExtreme_Positional);
00102         const int feat_len = ( feat_stop - feat_start + 1 );
00103 
00104         // something wrong with feat loc
00105         if( feat_start < 0 || feat_stop < 0 ) {
00106             continue;
00107         }
00108 
00109         // see if we can't possibly find something better at this point
00110         // because we've gone too far left
00111         const int best_possible_overlap_extra_bases = ( loc_start - feat_start );
00112         if( best_possible_overlap_extra_bases > best_overlap_extra_bases ) {
00113             break;
00114         }
00115 
00116         if( loc_start >= feat_start && loc_stop <= feat_stop ) {
00117             const int overlap_extra_bases = ( feat_len - loc_len );
00118             if( overlap_extra_bases < best_overlap_extra_bases ) {
00119                 best_overlap_extra_bases = overlap_extra_bases;
00120                 best_feat = feat_iter->second;
00121                 if( best_overlap_extra_bases == 0 ) {
00122                     // found a perfect match
00123                     break;
00124                 }
00125             }
00126         }
00127     }
00128 
00129     return best_feat;
00130 }
00131 
00132 bool
00133 CBestFeatFinder::CSeqLocSort::operator()( 
00134     const CConstRef<CSeq_loc> &loc1, 
00135     const CConstRef<CSeq_loc> &loc2 ) const
00136 {
00137     // sort by location start
00138     const TSeqPos start1 = loc1->GetStart(eExtreme_Positional);
00139     const TSeqPos start2 = loc2->GetStart(eExtreme_Positional);
00140     if( start1 != start2 ) {
00141         return (start1 < start2);
00142     }
00143 
00144     // then by length (we use "stop" as a reasonable proxy for length comparisons)
00145     const TSeqPos stop1 = loc1->GetStop(eExtreme_Positional);
00146     const TSeqPos stop2 = loc2->GetStop(eExtreme_Positional);
00147     if( stop1 != stop2 ) {
00148         return (stop2 < stop1); // reversed because we want longest first
00149     }
00150 
00151     // extremes are equal
00152     return false;
00153 }
00154 
00155 END_objects_SCOPE
00156 END_NCBI_SCOPE
Modified on Fri Jul 11 17:19:19 2014 by modify_doxy.py rev. 426318