NCBI C++ ToolKit
bins_ds.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bins_ds.cpp 38426 2017-05-09 04:06:43Z rudnev $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Melvin Quintos, Dmitry Rudnev
27  *
28  * Description: This file defines CBinsJob, CBinsDS, and CBinsDSType
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
38 
39 #include <gui/objutils/utils.hpp>
40 #include <gui/objutils/snp_gui.hpp>
43 
44 #include <objmgr/graph_ci.hpp>
47 #include <objmgr/seqdesc_ci.hpp>
48 #include <objmgr/table_field.hpp>
49 #include <objmgr/util/feature.hpp>
50 #include <objmgr/util/sequence.hpp>
51 #include <objmgr/util/sequence.hpp>
52 
55 
57 
59 #include <cmath>
60 
63 
64 ///////////////////////////////////////////////////////////////////////////////
65 /// CBinsJob
66 ///////////////////////////////////////////////////////////////////////////////
67 class CBinsJob : public CSGAnnotJob
68 {
69 public:
70  struct SParams {
71  bool isSeqView;
74  int binWidth;
75  int depth;
76  string name;
77  };
78 
79  CBinsJob(const CBioseq_Handle& handle, const TSeqRange& range,
80  const SParams& params)
81  : CSGAnnotJob("SNP", handle, SAnnotSelector(), range)
82  , m_Params(params) {}
83 
84  virtual EJobState x_Execute();
85 
86 private:
89 
90  // will try to load and create bins and return true;
91  // if however the amount of positions would be unreasonably (>500) big, will return
92  // false and glyphs will be undefined
93  bool x_PreFetchFtable(const SAnnotSelector& sel,
94  const objects::CSeq_loc &loc,
95  CSeqGlyph::TObjects* glyphs);
96 
97 private:
100 };
101 
103 {
105 
106  try {
107  CSeqGlyph::TObjects glyphs;
108 
109  CRef<CSeq_loc> loc = m_Handle.GetRangeSeq_loc(m_Range.GetFrom(), m_Range.GetTo());
110 
111  x_FetchFtable(*loc, &glyphs);
112 
114  m_Result.Reset(result);
115 
116  result->listObjs.swap(glyphs);
117  result->eDataType = m_eDataType;
118  }
119  catch (CException& ex) {
120  m_Error.Reset(new CAppJobError(ex.GetMsg()));
121  state = eFailed;
122  }
123  catch (std::exception& ex) {
124  m_Error.Reset(new CAppJobError(ex.what()));
125  state = eFailed;
126  }
127  catch(...) {
128  m_Error.Reset(new CAppJobError("Error when loading Named Annotation"));
129  state = eFailed;
130  }
131  return state;
132 }
133 
134 
136 {
137  SAnnotSelector sel;
139  sel);
140 
141  CSeqGlyph::TObjects tmpList;
142  bool isBinsLoaded(x_PreFetchFtable(sel, loc, &tmpList));
143 
144  if(isBinsLoaded) {
145  objs->swap(tmpList);
147  }
148  else {
149  x_FetchGraphs(loc, objs);
150  }
151 }
152 
154  const CSeq_loc &loc,
155  CSeqGlyph::TObjects *objs)
156 {
157  const CTableFieldHandle<int> col_type("trackType");
158  const CTableFieldHandle<string> col_sub_type("trackSubType");
159  int type;
160  string subType;
161  int pos_start, pos_end;
162  string title, comment;
163 
164  bool hasResult = false; // data has at least 1 non-zero bin
165  double scale = fabs(m_Params.scale);
166  const int binWidth = m_Params.binWidth;
167  // data has at least one non-0 pvalue
168  bool hasPvalue(false);
169 
170  double window = scale*binWidth;
171 
172  if (window < 1 || m_Params.isSeqView) {
173  window = 1;
174  }
175 
177  m_Range.GetFrom(),
178  m_Range.GetTo(),
179  window,
181 
182  CGeneMarkerGlyph::TGeneMarkers gene_markers;
183 
184  // Grab annotations and start processing data
185  CSeq_annot_Handle annot;
186  if(!NSnpBins::GetBinHandle(m_Handle.GetScope(), sel, loc, annot)) {
187  return false;
188  }
189  NSnpBins::ReadAnnotDesc(annot, title, comment);
190  NSnpBins::FindPosIndexRange(annot, (int)m_Range.GetFrom(), (int)m_Range.GetTo(), pos_start, pos_end);
191 
192  // Read the type information
193  // if none, assume eGAP
194  if(!col_type.TryGet(annot, 0, type)) {
195  type = NSnpBins::eGAP;
196  }
197  // get the potentially existing subtype for GAP tracks
198  if(type == NSnpBins::eGAP) {
199  col_sub_type.TryGet(annot, 0, subType);
200  }
201  bool isGeneMarker(NSnpBins::isGeneMarker(subType));
202 
203  // if there are too many rows, abort the load and indicate that a graph should be loaded instead
204  if(pos_end - pos_start > 500) {
205  return false;
206  }
207  // Read all the rows and columns
208  // the least common denominator is having the following column:
209  // pos (if absent, the line will be skipped entirely)
210  // if trackType is missing, it is assumed to be eGAP
211  // all others are allowed to be missing
212  // (although the data provider should strive to have all correct columns expected
213  // for a given trackType)
214  for(int row = pos_start; row < pos_end; ++row) {
215  CRef<NSnpBins::SBinEntry> BinEntry(NSnpBins::GetEntry(annot, row));
216  if(!BinEntry.IsNull()) {
217  if(!isGeneMarker) {
218  TSeqRange range = TSeqRange(BinEntry->pos, BinEntry->pos);
219  if(BinEntry->pvalue != 0) {
220  hasPvalue = true;
221  }
223  bin.obj.Reset(new NSnpBins::SBin);
224  bin.obj->count = 1;
225  bin.obj->type = type;
226  bin.obj->range = range;
227  bin.obj->m_EntryList.push_back(BinEntry);
228  bin.obj->m_SigEntry = BinEntry;
229 
230  dmap.AddRange(range, bin);
231  } else {
232  TSeqRange range = TSeqRange(BinEntry->pos, BinEntry->pos_end);
234  obj->count = 1;
235  obj->type = type;
236  obj->range = range;
237  obj->m_SigEntry = BinEntry;
238 
239  gene_markers.push_back(obj);
240  }
241  hasResult = true;
242  }
243  }
244  if (hasResult) {
245  if(!isGeneMarker) {
246  size_t binCount(0);
248  iBin->SequentialNumber = binCount++;
249  }
250  if(hasPvalue && (type == NSnpBins::eGAP || type == NSnpBins::eGCAT)) {
251  CRef<CScatterPlotGlyph> scatter_plot(new CScatterPlotGlyph(dmap, loc));
252  scatter_plot->SetTitle(title);
253  scatter_plot->SetAnnotName(m_Params.name);
254  CRef<CSeqGlyph> sref(scatter_plot.GetPointer());
255  objs->push_back(sref);
256  }
257  CRef<CBinsGlyph> bins;
258  bins.Reset(new CBinsGlyph(dmap, loc, &m_Handle.GetScope()));
259  bins->SetTitle(title);
260  bins->SetAnnotName(m_Params.name);
261  bins->SetType(type);
262  CRef<CSeqGlyph> fref(bins.GetPointer());
263  objs->push_back(fref);
264  } else {
265  CRef<CGeneMarkerGlyph> gene_marker_glyph(new CGeneMarkerGlyph(gene_markers, loc, &m_Handle.GetScope()));
266  gene_marker_glyph->SetTitle(title);
267  gene_marker_glyph->SetAnnotName(m_Params.name);
268  CRef<CSeqGlyph> fref(gene_marker_glyph.GetPointer());
269  objs->push_back(fref);
270  }
271  }
272  return true;
273 }
274 
276 {
277  SAnnotSelector sel;
282  CSeqGlyph::TObjects tmpList;
283 
284  CTableFieldHandle<int> col_pos("pos");
285 
286  int pos;
287  int pos_start, pos_end;
288 
290 
291  // Grab annotations and start processing data
292  CAnnot_CI iter(m_Handle.GetScope(), loc, sel);
293 
294  // internal error!
295  if (iter.size()!=1) {
296  NCBI_THROW(CException, eUnknown, "Incorrect bins data!");
297  }
298 
299  CSeq_annot_Handle annot = *iter;
300  string title, comment;
301  NSnpBins::ReadAnnotDesc(annot, title, comment);
302  NSnpBins::FindPosIndexRange(annot, (int)m_Range.GetFrom(), (int)m_Range.GetTo(), pos_start, pos_end);
303 
304  // Read all the rows and columns
305  for ( int row = pos_start; row < pos_end; ++row ) {
306  if ( col_pos.TryGet(annot, row, pos)) {
307 
308  TSeqRange range = TSeqRange(pos, pos);
309  dmap.AddRange(range);
310  }
311  }
313  hist->SetAnnotName(m_Params.name);
314  glyphs->push_back(CRef<CSeqGlyph>(hist));
315 
317 }
318 
319 
320 
321 ///////////////////////////////////////////////////////////////////////////////
322 /// CBinsDS
323 ///////////////////////////////////////////////////////////////////////////////
324 CBinsDS::CBinsDS(CScope& scope, const CSeq_id& id)
325  : CSGGenBankDS(scope, id)
326 {}
327 
330  const TSeqRange& range,
332 {
333  const CSeq_id& id = dynamic_cast<const CSeq_id&>(object.object.GetObject());
334  CBioseq_Handle handle = object.scope->GetBioseqHandle(id);
335 
336  // Find data
338  CSeqUtils::SetResolveDepth(sel, params.m_Adaptive, params.m_Level);
340  sel.SetCollectNames();
341 
343  range, eNa_strand_unknown, &sel);
345  if (iter->IsNamed()) {
346  names.insert(TAnnotNameTitleMap::value_type(iter->GetName(), ""));
347  } else {
348  names.insert(TAnnotNameTitleMap::value_type("Unnamed", ""));
349  }
350  }
351 }
352 
353 
354 void CBinsDS::SetName(const string& name)
355 {
356  m_Name = name;
357 }
358 
359 void CBinsDS::LoadData(const TSeqRange& range,
360  TModelUnit scale,
361  bool isSeqView)
362 {
363  CRef<CBinsJob> job;
364 
366  p.isSeqView = isSeqView;
368  p.scale = scale;
370  p.depth = m_Depth;
371  p.name = m_Name;
372 
373  job.Reset(new CBinsJob(m_Handle, range, p));
374 
375  x_LaunchJob(*job);
376 }
377 
378 ///////////////////////////////////////////////////////////////////////////////
379 /// CBinsDSType
380 ///////////////////////////////////////////////////////////////////////////////
381 
384 {
385  const CSeq_id& id = dynamic_cast<const CSeq_id&>(object.object.GetObject());
386  return new CBinsDS(object.scope.GetObject(), id);
387 }
388 
389 
391 {
392  static string sid("snp_bins_ds_type");
393  return sid;
394 }
395 
397 {
398  static string slabel("SNP Bin Track Data Source Type");
399  return slabel;
400 }
401 
403 {
404  return false;
405 }
406 
407 
CRef< SBinEntry > m_SigEntry
most significant entry in this bin
Definition: snp_bins.hpp:107
CBioseq_Handle –.
static void ReadAnnotDesc(const objects::CSeq_annot_Handle &handle, string &title, string &comment)
get title and comment out of annot.desc
Definition: snp_bins.cpp:75
CRef< CObject > m_Result
const TAnnotNames & GetAnnotNames(void) const
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:783
CRef< CAppJobError > m_Error
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:718
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
int count
number of entries in this bin
Definition: snp_bins.hpp:103
static bool GetBinHandle(objects::CScope &scope, const objects::SAnnotSelector &sel, const objects::CSeq_loc &loc, objects::CSeq_annot_Handle &annot)
get an annotation handle that is needed to load a singular bin on range
Definition: snp_bins.cpp:121
void SetTitle(const string &title)
bool m_Adaptive
adaptive/exact annot selector
TSeqRange range
Definition: snp_bins.hpp:105
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:970
GLdouble TModelUnit
Definition: gltypes.hpp:48
CBinsJob(const CBioseq_Handle &handle, const TSeqRange &range, const SParams &params)
Definition: bins_ds.cpp:79
SParams m_Params
Definition: bins_ds.cpp:98
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
Definition: bins_ds.cpp:390
bool TryGet(const CFeat_CI &feat_ci, Value &v) const
Definition: table_field.hpp:78
static void GetTrackNames(SConstScopedObject &object, TAnnotNameTitleMap &names, const TSeqRange &range, const ILayoutTrackFactory::SExtraParams &)
Definition: bins_ds.cpp:328
bool m_Adaptive
Adaptive/Exact selector.
TSeqPos pos
! arrange member names as in the dumped file
Definition: snp_bins.hpp:77
SBinsJobResult.
Definition: bins_ds.hpp:107
void SetTitle(const string &title)
Definition: bins_glyph.hpp:187
extra parameter for initializing a track.
static const int c_BinWidth
standard width and height of a bin
Definition: snp_gui.hpp:120
the only requerement to TDataType is that it has a method GetValue() that returns some kind of float-...
File Description:
SAnnotSelector –.
virtual ISGDataSource * CreateDS(SConstScopedObject &object) const
CBinsDSType.
Definition: bins_ds.cpp:383
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:155
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
static bool isGeneMarker(const string &trackSubType)
determine whether a string in TrackSubType describes a Gene Marker ("102_1" or "102_3") ...
Definition: snp_bins.hpp:225
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:776
User-defined methods of the data storage class.
representation of a bin
Definition: snp_bins.hpp:100
SAnnotSelector & SetCollectNames(bool value=true)
Collect available annot names rather than annots.
static int type
Definition: getdata.c:34
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
User-defined methods of the data storage class.
NHGRI GWAS Catalog Track (AKA Association Results)
Definition: snp_bins.hpp:55
bool bAdaptiveSelector
Definition: bins_ds.cpp:72
CAnnot_CI –.
Definition: annot_ci.hpp:58
static void FindPosIndexRange(const objects::CSeq_annot_Handle &annot, int pos_value_from, int pos_value_to, int &pos_index_begin, int &pos_index_end)
Perform iterative binary search to find table indexes (rows) 'pos_index_begin' and 'pos_index_end' in...
Definition: snp_bins.cpp:272
void SetAnnotName(const string &name)
Definition: bins_glyph.hpp:195
SAnnotSelector & IncludeNamedAnnotAccession(const string &acc, int zoom_level=0)
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" ) ...
Definition: bins_ds.cpp:396
User-defined methods of the data storage class.
objects::CBioseq_Handle m_Handle
target sequence
void LoadData(const TSeqRange &range, TModelUnit scale, bool isSeqView)
Definition: bins_ds.cpp:359
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
TModelUnit scale
Definition: bins_ds.cpp:73
virtual bool IsSharable() const
check if the data source can be shared.
Definition: bins_ds.cpp:402
TBinType type
Definition: snp_bins.hpp:102
TJobID x_LaunchJob(IAppJob &job, int report_period=1, const string &pool="ObjManagerEngine")
Launch either a background or foreground job.
static void GetBinSelector(const string &sTrackAccession, bool isAdaptive, int depth, objects::SAnnotSelector &sel)
get a selector for a bin from a NA track accession
Definition: snp_bins.cpp:94
static CRef< SBinEntry > GetEntry(const objects::CSeq_annot_Handle &annot, int row)
get a bin entry corresponding to a row position in the table presumed contained within the handle ...
Definition: snp_bins.cpp:190
list< CRef< objects::NSnpBins::SBin > > TGeneMarkers
Definition: gmark_glyph.hpp:55
CSeq_annot_Handle –.
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
virtual EJobState x_Execute()
method truly doing the job.
Definition: bins_ds.cpp:102
position_type GetTo(void) const
Definition: range.hpp:142
TSeqPos pos_end
gene end when trackSubType is (Gene association)
Definition: snp_bins.hpp:91
void x_FetchGraphs(const CSeq_loc &, CSeqGlyph::TObjects *)
Definition: bins_ds.cpp:275
static void SetResolveDepth(objects::SAnnotSelector &sel, bool adaptive, int depth=-1)
help function for setting selector resolve depth.
Definition: utils.cpp:393
CBinsDS(objects::CScope &scope, const objects::CSeq_id &id)
CBinsDS.
Definition: bins_ds.cpp:324
SBinsJobResult::EDataType m_eDataType
Definition: bins_ds.cpp:99
position_type GetFrom(void) const
Definition: range.hpp:134
CException –.
Definition: ncbiexpt.hpp:709
void SetAnnotName(const string &name)
CScope –.
Definition: scope.hpp:90
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1153
TSeqRange m_Range
target range
int m_Level
layout level that limits feature retrieving used by annotation selector.
else result
Definition: token2.c:20
bool x_PreFetchFtable(const SAnnotSelector &sel, const objects::CSeq_loc &loc, CSeqGlyph::TObjects *glyphs)
Definition: bins_ds.cpp:153
objects::CBioseq_Handle m_Handle
int m_Depth
annotation resolving depth
void SetAnnotName(const string &name)
EDataType eDataType
Definition: bins_ds.hpp:115
USING_SCOPE(objects)
list< CRef< CSeqGlyph > > TObjects
Definition: seq_glyph.hpp:83
CRef< objects::NSnpBins::SBin > obj
Definition: bins_glyph.hpp:56
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:546
void SetType(objects::NSnpBins::TBinType t)
Definition: bins_glyph.hpp:130
TAnnots m_Annots
particular annotations the track will be looking at.
string m_Name
Definition: bins_ds.hpp:77
static const struct name_t names[]
void SetTitle(const string &title)
CGeneMarkerGlyph inline method implementation.
#define fabs(v)
Definition: ncbi_dispd.c:46
CBinsJob.
Definition: bins_ds.cpp:67
dbGaP analysis files
Definition: snp_bins.hpp:54
void SetAnnotName(const string &name)
void x_FetchFtable(const CSeq_loc &, CSeqGlyph::TObjects *)
Definition: bins_ds.cpp:135
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:443
CSeqGlyph::TObjects listObjs
Definition: bins_ds.hpp:116
static std::unique_ptr< CParams > params
Definition: wgs_params.cpp:417
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
void SetName(const string &)
Definition: bins_ds.cpp:354
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
Modified on Wed Aug 16 05:49:53 2017 by modify_doxy.py rev. 533848