NCBI C++ ToolKit
best_feat_finder.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Michael Kornbluh
27  *
28  * File Description:
29  * stores feats for efficient retrieval in finding the best one
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include "best_feat_finder.hpp"
36 
39 
41 BEGIN_objects_SCOPE // namespace ncbi::objects::
42 
44 {
45  // nothing to do here
46 }
47 
48 bool CBestFeatFinder::AddFeat( const CSeq_feat& new_feat )
49 {
50  CConstRef<CSeq_feat> new_feat_ref( &new_feat );
51  CConstRef<CSeq_loc> new_feat_loc_ref( &new_feat.GetLocation() );
52 
53  if( new_feat_ref && new_feat_loc_ref ) {
54  loc_to_feat_map.insert( TLocToFeatMap::value_type( new_feat_loc_ref, new_feat_ref ) );
55  return true;
56  } else {
57  return false;
58  }
59 }
60 
63 {
64  // Try to find the smallest CDS that contains the given location
65  // (we use extremes as an approximation)
66 
67  CConstRef<CSeq_loc> sought_loc_ref( &sought_loc );
68 
69  const int loc_start = sought_loc.GetStart(eExtreme_Positional);
70  const int loc_stop = sought_loc.GetStop(eExtreme_Positional);
71 
72  return FindBestFeatForLoc( loc_start, loc_stop );
73 }
74 
76 CBestFeatFinder::FindBestFeatForLoc( const int loc_start, const int loc_stop ) const
77 {
78  // something wrong with sought_loc
79  if( loc_start < 0 || loc_stop < 0 ) {
80  return CConstRef<CSeq_feat>();
81  }
82 
83  const int loc_len = (loc_stop - loc_start + 1);
84 
85  CRef<CSeq_loc> sought_loc_ref( new CSeq_loc );
86  sought_loc_ref->SetInt().SetFrom(loc_start);
87  sought_loc_ref->SetInt().SetTo(loc_stop);
88 
89  // find first feat which is to the right of sought_loc and therefore
90  // can't possibly contain the whole thing.
91  TLocToFeatMap::const_iterator feat_iter =
92  loc_to_feat_map.upper_bound( sought_loc_ref );
93 
94  // go "leftwards" looking for best CDS
95  int best_overlap_extra_bases = INT_MAX; // 0 would imply a perfect match
96  CConstRef<CSeq_feat> best_feat;
97  while( feat_iter != loc_to_feat_map.begin() ) {
98  --feat_iter;
99 
100  const int feat_start = feat_iter->first->GetStart(eExtreme_Positional);
101  const int feat_stop = feat_iter->first->GetStop(eExtreme_Positional);
102  const int feat_len = ( feat_stop - feat_start + 1 );
103 
104  // something wrong with feat loc
105  if( feat_start < 0 || feat_stop < 0 ) {
106  continue;
107  }
108 
109  // see if we can't possibly find something better at this point
110  // because we've gone too far left
111  const int best_possible_overlap_extra_bases = ( loc_start - feat_start );
112  if( best_possible_overlap_extra_bases > best_overlap_extra_bases ) {
113  break;
114  }
115 
116  if( loc_start >= feat_start && loc_stop <= feat_stop ) {
117  const int overlap_extra_bases = ( feat_len - loc_len );
118  if( overlap_extra_bases < best_overlap_extra_bases ) {
119  best_overlap_extra_bases = overlap_extra_bases;
120  best_feat = feat_iter->second;
121  if( best_overlap_extra_bases == 0 ) {
122  // found a perfect match
123  break;
124  }
125  }
126  }
127  }
128 
129  return best_feat;
130 }
131 
132 bool
134  const CConstRef<CSeq_loc> &loc1,
135  const CConstRef<CSeq_loc> &loc2 ) const
136 {
137  // sort by location start
138  const TSeqPos start1 = loc1->GetStart(eExtreme_Positional);
139  const TSeqPos start2 = loc2->GetStart(eExtreme_Positional);
140  if( start1 != start2 ) {
141  return (start1 < start2);
142  }
143 
144  // then by length (we use "stop" as a reasonable proxy for length comparisons)
145  const TSeqPos stop1 = loc1->GetStop(eExtreme_Positional);
146  const TSeqPos stop2 = loc2->GetStop(eExtreme_Positional);
147  if( stop1 != stop2 ) {
148  return (stop2 < stop1); // reversed because we want longest first
149  }
150 
151  // extremes are equal
152  return false;
153 }
154 
155 END_objects_SCOPE
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:932
TLocToFeatMap loc_to_feat_map
CConstRef< CSeq_feat > FindBestFeatForLoc(const CSeq_loc &sought_loc) const
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1082
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:897
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:945
numerical value
Definition: Na_strand.hpp:63
void SetInt(TInt &v)
Definition: Seq_loc.hpp:958
bool AddFeat(const CSeq_feat &new_cds)
namespace ncbi::objects::
Definition: Seq_feat.hpp:55
bool operator()(const CConstRef< CSeq_loc > &loc1, const CConstRef< CSeq_loc > &loc2) const
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
Modified on Sat Feb 06 14:26:02 2016 by modify_doxy.py rev. 426318