NCBI C++ ToolKit
merge_alignments_tool_manager.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: merge_alignments_tool_manager.cpp 34155 2015-11-13 17:39:19Z katargir $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 
33 
35 
37 
39 
40 #include <gui/objutils/label.hpp>
41 
43 
46 
55 
57 
60 
61 ///////////////////////////////////////////////////////////////////////////////
62 /// CMergeAlignmentsToolManager
64 : CAlgoToolManagerBase("Merge Alignments",
65  "",
66  "Combine alignments using many possible strategies",
67  "Create a pseudo-multiple alignment using many possible strategies",
68  "MERGE_ALIGNMENTS",
69  "Alignment Creation"),
70  m_ParamsPanel(NULL)
71 {
72 }
73 
74 
76 {
77  return "merge_alignments_tool_manager";
78 }
79 
80 
82 {
83  return "Merge Alignments Tool";
84 }
85 
86 
88 {
90 
92 }
93 
94 
96 {
98 
100 }
101 
102 
104 {
105  if(m_ParamsPanel == NULL) {
107 
109  m_ParamsPanel->Hide(); // to reduce flicker
112 
113  m_ParamsPanel->SetRegistryPath(m_RegPath + ".ParamsPanel");
115  }
116 }
117 
118 
120 {
121  if( m_Params.m_Alignments.size() < 1 ){
122  string err = "Please select at least one alignment!";
123 
125  return false;
126  }
127 
128  return true;
129 }
130 
131 
132 /// select only Seq-aligns
134 {
135  m_Params.m_Alignments.clear();
136  m_Alignments.clear();
137  ITERATE(vector<TConstScopedObjects>, it, m_InputObjects) {
138  ITERATE(TConstScopedObjects, it2, *it) {
139  const CSeq_align* align = dynamic_cast<const CSeq_align*>(it2->object.GetPointerOrNull());
140  if (align) m_Alignments.push_back(*it2);
141  }
142  }
143 
144  if (m_Alignments.empty()) {
145  x_ConvertInputObjects(CSeq_align::GetTypeInfo(), m_Alignments);
146  }
147 }
148 
149 
151 {
152  return m_ParamsPanel;
153 }
154 
155 
157 {
158  return &m_Params;
159 }
160 
161 
163 {
165  return job;
166 }
167 
168 
170 {
173 }
174 
175 
176 ///////////////////////////////////////////////////////////////////////////////
177 /// CMergeAlignmentsJob
179 : m_Params(params)
180 {
181  m_Descr = "Merging alignments"; //TODO
182 }
183 
184 
186 {
188 
189  ///
190  /// assure we're all in one scope
191  ///
192  CRef<CScope> scope;
193  {{
194  ITERATE (TConstScopedObjects, iter, aligns) {
195  if ( !scope ) {
196  scope.Reset(const_cast<CScope*>(&*iter->scope));
197  } else if (scope != &*iter->scope) {
198  scope.Reset();
200  "All alignments must be within the same project");
201  }
202  }
203  }}
204 
205  ///
206  /// do our work
207  ///
208 
209  CAlnContainer aln_container;
210 
211  ///
212  /// step 1: add to alignment container
213  ///
214  int count = 0;
215  int count_invalid = 0;
216  //bool all_pairwise = true;
217  ITERATE (TConstScopedObjects, iter, aligns) {
218 
219  try {
220  ++count;
221  CConstRef<CSeq_align> aln(dynamic_cast<const CSeq_align*>(iter->object.GetPointer()));
222 
223  ///
224  /// validation is optional!
225  aln->Validate(true);
226 
227  // if (aln->GetSegs().IsDenseg() &&
228  // aln->GetSegs().GetDenseg().GetDim() != 2) {
229  // all_pairwise = false;
230  // }
231 
232  aln_container.insert(*aln);
233  }
234  catch (CException& e) {
236  << "CMergeAlignmentsJob::x_CreateProjectItems(): "
237  << "failed to validate: " << e.GetMsg());
238  ++count_invalid;
239  }
240  }
241 
242  if (count_invalid) {
243  string msg;
244  msg += NStr::IntToString(count_invalid);
245  msg += "/";
246  msg += NStr::IntToString(count);
247  msg += " alignments failed validation.";
248  if (count_invalid == count) {
250  } else {
251  LOG_POST(Warning << msg);
252  }
253  }
254 
255  /// Types we use here:
256  typedef CSeq_align::TDim TDim;
257 
258  /// Create a vector of seq-ids per seq-align
259  TIdExtract id_extract;
260  TAlnIdMap aln_id_map(id_extract, aln_container.size());
261  size_t count_accepted = 0;
262  ITERATE(CAlnContainer, aln_it, aln_container) {
263  try {
264  aln_id_map.push_back(**aln_it);
265  ++count_accepted;
266  }
267  catch (CAlnException& e) {
269  << "CMergeAlignmentsJob::x_CreateProjectItems(): "
270  << "failed to extract IDs: " << e.GetMsg());
271  }
272  }
273 
274  if (count_accepted != aln_container.size()) {
275  if (count_accepted == 0) {
277  "No valid alignments found");
278  return;
279  }
280 
282  << count_accepted << "/" << aln_container.size()
283  << " alignments had no IDs to extract.");
284  }
285 
286 
287  ///
288  /// gather statistics about our alignment
289  ///
290  TAlnStats aln_stats(aln_id_map);
291 
292 
293  // auto-detect self-alignments
294  // if the input set of sequences correspond to one and only one sequence,
295  // force row preservation
296  // bool preserve_rows = false;
297  {{
299  ITERATE (TAlnStats::TIdVec, i, aln_stats.GetIdVec()) {
300  CSeq_id_Handle idh = CSeq_id_Handle::GetHandle((*i)->GetSeqId());
301  ids.insert(idh);
302  }
303  // if (ids.size() == 1) {
304  // preserve_rows = true;
305  // }
306  }}
307 
308  CAlnUserOptions opts;
309 
310 
311  /// always merge both directions
314 
315  ///
316  /// create a set of anchored alignments
317  ///
318  TAnchoredAlnVec anchored_aln_vec;
319  CreateAnchoredAlnVec(aln_stats, anchored_aln_vec, opts);
320 
323 
325 
326  opts.SetMergeFlags(flags, true);
327 
328  ///
329  /// now, build
330  ///
331  CAnchoredAln out_anchored_aln;
332  BuildAln(anchored_aln_vec, out_anchored_aln, opts);
333 
334  vector< CRef<CSeq_align> > ds_aligns;
336  (out_anchored_aln.GetPairwiseAlns(), out_anchored_aln.GetAnchorRow(),
337  ds_aligns, CSeq_align::TSegs::e_Denseg);
338 
339  typedef list< CRef<CSeq_align> > TAligns;
340  TAligns aligns_out;
341 
342  NON_CONST_ITERATE (vector< CRef<CSeq_align> >, it, ds_aligns) {
343  (*it)->SetType(CSeq_align::eType_partial);
344  aligns_out.push_back(*it);
345  }
346 
347  /// fill unaligned regions
349  NON_CONST_ITERATE (TAligns, align_iter, aligns_out) {
350  CRef<CDense_seg> ds = (*align_iter)->SetSegs().SetDenseg().FillUnaligned();
351  (*align_iter)->SetSegs().SetDenseg(*ds);
352  }
353  }
354 
355  if (aligns_out.size() == 0)
356  return;
357 
358  string annot_base_name("Merged Alignment: ");
359  CAlignGroup::TAnnotList annot_list;
360 
361  CAlignGroup align_group_sorter;
362  align_group_sorter.GroupByStrand(aligns_out,
363  annot_list,
364  annot_base_name,
365  *scope);
366 
367  // now create a Project Item for the data
368 
369  ITERATE(CAlignGroup::TAnnotList, iter, annot_list) {
370  CRef<objects::CSeq_annot> annot = *iter;
371 
372  annot->SetCreateDate(CTime(CTime::eCurrent));
373 
374  // encode the name correctly
375  // we previously used the 'name' not for a temporary computation
376  // we make this the real 'name' that the object manager will understand
377 
378  string name("Merged Alignment: ");
379  CLabel::GetLabel(*annot, &name, CLabel::eDefault, &*scope);
380  if ( !name.empty() ) {
381  annot->SetNameDesc(name);
382  annot->SetTitleDesc(name);
383  }
384 
385  CRef<CProjectItem> pitem(new CProjectItem());
386 
387  pitem->SetItem().SetAnnot(*annot);
388  pitem->SetLabel(name);
389 
390  AddProjectItem(*pitem);
391  }
392 }
393 
394 
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:900
void GroupByStrand(const TAlignList &aligns, TAnnotList &align_groups, const string &annot_base_name, objects::CScope &scope)
Group alignments into bins for each set of strands.
static const unsigned char msg[]
Definition: ccm.c:378
CIRef< T > GetServiceByType()
retrieves a typed reference to a service, the name of C++ type is used as the name of the service...
Definition: service.hpp:91
bool Create(wxWindow *parent, wxWindowID id=ID_CBLASTSEARCHOPTIONSPANEL, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxTAB_TRAVERSAL)
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
virtual CDataLoadingAppJob * x_CreateLoadingJob()
factory method for creating the job that executes the tool algorithm override in derived classes ...
CMergeAlignmentsToolManager()
CMergeAlignmentsToolManager.
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
string m_Descr
mutex to sync our internals
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any, between segments.
Definition: Dense_seg.cpp:1095
virtual void CleanUI()
override this function in a derived class and clean extra members
string m_RegPath
registry path to the settings
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1008
USING_SCOPE(objects)
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
mapping pieces together
Definition: Seq_align_.hpp:103
SProjectSelectorParams m_ProjectParams
CProjectService - a service providing API for operations with Workspaces and Projects.
size_type size(void) const
virtual void InitUI()
override this function in a derived class and initialize extra members
virtual bool x_ValidateParams()
validates user input in Parameters panel, report errors if any
#define NULL
Definition: ncbistd.hpp:225
list< CRef< objects::CSeq_annot > > TAnnotList
Definition: align_group.hpp:56
wxWindow * m_ParentWindow
a window that will serve as a parent for our panels
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:893
void SetMergeFlags(TMergeFlags flags, bool set)
Set/clear merge flags.
int i
virtual const string & GetLabel() const
Definition: ui_object.cpp:113
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
void SetLabel(const TLabel &value)
Assign a value to Label data member.
virtual void CleanUI()
override this function in a derived class and clean extra members
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5186
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
Container mapping seq-aligns to vectors of participating seq-ids.
Definition: aln_tests.hpp:55
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" ) ...
IAlnSeqId extracting functor.
virtual void x_CreateParamsPanelIfNeeded()
returns / creates Parameters panel, override in derived classes see cpp file for example ...
TDim GetAnchorRow(void) const
Which is the anchor row?
Options for different alignment manager operations.
void SelectProjectByObjects(TConstScopedObjects &objects, CProjectService *srv)
is all objects belong to the same project - selects the project
Query-anchored alignment can be 2 or multi-dimentional.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:198
CMergeAlignmentsParamsPanel * m_ParamsPanel
CTime –.
Definition: ncbitime.hpp:290
virtual void SetRegistryPath(const string &path)
CAlgoToolManagerParamsPanel.
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows...
Definition: aln_stats.hpp:56
virtual CAlgoToolManagerParamsPanel * x_GetParamsPanel()
returns a pointer to the parameters panel, override in derived classes
const TIdVec & GetIdVec(void) const
Get vector of all ids from all alignments.
Definition: aln_stats.hpp:241
virtual void InitUI()
override this function in a derived class and initialize extra members
CMergeAlignmentsJob(const SMergeAlignmentsParams &params)
CMergeAlignmentsJob.
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
CException –.
Definition: ncbiexpt.hpp:709
CDataLoadingAppJob - a base class for Jobs loading data into projects.
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1014
vector< TConstScopedObjects > m_InputObjects
original input objects, the tool needs to select a subset of objects that can serve as valid input ...
CAnchoredAln::TDim TDim
Only put the query seq on same row (input order is not significant).
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:107
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
void SetParams(SMergeAlignmentsParams *params, TConstScopedObjects *objects)
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
_TAlnIdVec::TIdVec TIdVec
Vector of ids used in all alignments.
Definition: aln_stats.hpp:70
virtual IRegSettings * x_GetParamsAsRegSetting()
return a pointer to Parameters object as IRegSettings interface
No filtering: use both direct and reverse sequences.
Merge all sequences (greedy algo).
CUIObject m_Descriptor
describes the Manager's UI properties
IRegSettings An interface for objects that save / restore settings using CGuiRegistry.
Use only sequences whose strand is opposite to that of the anchor.
void SetItem(TItem &value)
Assign a value to Item data member.
CAlgoToolManagerParamsPanel.
User-defined methods of the data storage class.
void x_ConvertInputObjects(const CTypeInfo *typeInfo, map< string, TConstScopedObjects > &results)
Use current time.
Definition: ncbitime.hpp:295
EDirection m_Direction
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:546
const_iterator insert(const CSeq_align &seq_align)
Insert new CSeq_align into the list.
void CreateSeqAlignFromEachPairwiseAln(const CAnchoredAln::TPairwiseAlnVector pairwises, CAnchoredAln::TDim anchor, vector< CRef< CSeq_align > > &out_seqaligns, CSeq_align::TSegs::E_Choice choice, CScope *scope=NULL)
Create seq-align from each of the pairwise alignments vs the selected anchor row. ...
CSeq_align container.
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:443
void NcbiErrorBox(const string &message, const string &title="Error")
specialized Message Box function for reporting critical errors
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
EMergeAlgo m_MergeAlgo
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
virtual void x_InitProjectParams()
init m_ProjectParams, in particular can select target project based on the tool input ...
CAlgoToolManagerBase This is base class for simple algorithmic tool managers.
void x_SelectCompatibleInputObjects()
select only Seq-aligns
void AddProjectItem(objects::CProjectItem &item)
Modified on Mon Mar 27 16:07:57 2017 by modify_doxy.py rev. 506947