NCBI C++ ToolKit
compart.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: compart.cpp 59163 2013-08-01 14:00:50Z chetvern $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Yuri Kapustin
27  *
28  * File Description: cDNA-to-Genomic local alignment (same species)
29  * and compartmentization utility
30 */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <math.h>
40 #include "compart.hpp"
41 
43 
44 
46 {
48 
51  "Compart v.1.35. Unless -qdb and -sdb are specified, "
52  "the tool expects tabular blast hits at stdin collated "
53  "by query and subject, e.g. with 'sort -k 1,1 -k 2,2'");
54 
55  argdescr->AddOptionalKey ("qdb", "qdb", "cDNA BLAST database",
57 
58  argdescr->AddOptionalKey ("sdb", "sdb", "Genomic BLAST database",
60 
61  argdescr->AddFlag ("ho", "Print raw hits only - no compartments");
62 
63  argdescr->AddDefaultKey("penalty", "penalty", "Per-compartment penalty",
65 
66  argdescr->AddDefaultKey("min_idty", "min_idty", "Minimal overall identity. Note: in current implementation there is no sense to set different 'min_idty' and 'min_singleton_idty' (minimum is used anyway).",
68 
69  argdescr->AddDefaultKey("min_singleton_idty", "min_singleton_idty",
70  "Minimal identity for singleton compartments. "
71  "The actual parameter passed to the compartmentization "
72  "procedure is least of this parameter multipled "
73  "by the seq length, and min_singleton_idty_bps. Note: in current implementation there is no sense to set different 'min_idty' and 'min_singleton_idty' (minimum is used anyway).",
75 
76  argdescr->AddDefaultKey("min_singleton_idty_bps", "min_singleton_idty_bps",
77  "Minimal identity for singleton compartments "
78  "in base pairs. Default = parameter disabled.",
79  CArgDescriptions::eInteger, "9999999");
80 
81  argdescr->AddDefaultKey ("max_intron", "max_intron",
82  "Maximum intron length (in base pairs)",
86 
87  argdescr->AddDefaultKey("dropoff", "dropoff",
88  "Max score drop-off during hit extension.",
91  s_GetDefaultDropOff()));
92 
93  argdescr->AddDefaultKey("min_query_len", "min_query_len",
94  "Minimum length for individual cDNA sequences.",
96 
97  argdescr->AddDefaultKey("min_hit_len", "min_hit_len",
98  "Minimum length for reported hits in hits-only mode. "
99  "No effect in compartments mode.",
101 
102  argdescr->AddDefaultKey ("maxvol", "maxvol",
103  "Maximum index volume size in MB (approximate)",
105  "512");
106 
107  argdescr->AddFlag("noxf", "[With external hits] Suppress overlap x-filtering: "
108  "print all compartment hits intact.");
109 
110  argdescr->AddOptionalKey("seqlens", "seqlens",
111  "[With external hits] Two-column file with sequence IDs "
112  "and their lengths. If none supplied, the program will "
113  "attempt fetching the lengths from GenBank. "
114  "Cannot be used with -qdb.",
116 
117  argdescr->AddDefaultKey("N", "N",
118  "[With external hits] Max number of compartments "
119  "per query (0 = All).",
121 
122  CArgAllow* constrain01 (new CArgAllow_Doubles(0.0, 1.0));
123  argdescr->SetConstraint("penalty", constrain01);
124  argdescr->SetConstraint("min_idty", constrain01);
125  argdescr->SetConstraint("min_singleton_idty", constrain01);
126 
127  CArgAllow_Integers* constrain_maxvol (new CArgAllow_Integers(128,1024));
128  argdescr->SetConstraint("maxvol", constrain_maxvol);
129 
130  CArgAllow_Integers* constrain_minqlen (new CArgAllow_Integers(21,99999));
131  argdescr->SetConstraint("min_query_len", constrain_minqlen);
132 
133  CArgAllow_Integers* constrain_minhitlen (new CArgAllow_Integers(1,99999));
134  argdescr->SetConstraint("min_hit_len", constrain_minhitlen);
135 
136  SetupArgDescriptions(argdescr.release());
137 }
138 
139 
141 {
142  m_id2len.clear();
143  while(istr) {
144  string id;
145  istr >> id;
146  if(id.size() && id[0] != '#') {
147  size_t len (0);
148  istr >> len;
149  if(len != 0) {
150  m_id2len[id] = len;
151  }
152  }
153  }
154 }
155 
156 
157 size_t CCompartApp::x_GetSeqLength(const string& id)
158 {
160  if(im != ie) {
161  return im->second;
162  }
163  else {
165 
166  CRef<CSeq_id> seqid;
167  try { seqid.Reset(new CSeq_id(id)); }
168  catch(CSeqIdException&) {
169  return 0;
170  }
171 
172  const size_t len (sequence::GetLength(*seqid, m_Scope.GetNonNullPointer()));
173 
174  m_id2len[id] = len;
175 
176  if(m_id2len.size() >= 1000) {
177  m_Scope->ResetHistory();
178  }
179 
180  return len;
181  }
182 }
183 
184 
186 {
187  const CArgs& args (GetArgs());
188 
189  const bool is_qdb (args["qdb"]);
190  const bool is_seqlens (args["seqlens"]);
191 
192 
193  /*
194  const bool is_sdb (args["sdb"]);
195  const bool is_ho (args["ho"]);
196  const bool is_maxvol (args["maxvol"]);
197  const bool is_n (args["N"]);
198 
199  bool invalid_args (false);
200  if(is_qdb ^ is_sdb) { invalid_args = true; }
201  if(is_qdb && is_seqlens) { invalid_args = true; }
202  if(is_qdb && is_n) { invalid_args = true; }
203  if(!is_qdb && is_ho) { invalid_args = true; }
204  if(!is_qdb && is_maxvol) { invalid_args = true; }
205  */
206 
207  m_NoXF = args["noxf"];
208  m_penalty = args["penalty"].AsDouble();
209  m_min_idty = args["min_idty"].AsDouble();
210  m_min_singleton_idty = args["min_singleton_idty"].AsDouble();
211  m_min_singleton_idty_bps = args["min_singleton_idty_bps"].AsInteger();
212  m_min_query_len = args["min_query_len"].AsInteger();
213  m_max_intron = args["max_intron"].AsInteger();
214 
215  int rv (0);
216  if(!is_qdb) {
217  if(is_seqlens) {
218  x_ReadSeqLens(args["seqlens"].AsInputFile());
219  }
220  else {
224  m_Scope = new CScope(*objmgr);
225  m_Scope->AddDefaults();
226  }
227  m_MaxCompsPerQuery = args["N"].AsInteger();
228  rv = x_DoWithExternalHits();
229  }
230  else {
231 
232  CBlastSequenceSource bss(args["qdb"].AsString());
233 
234  /*
235  class CTestSequenceSource : public ISequenceSource {
236  private:
237  virtual const vector<CSeq_id_Handle>& GetIds(void) const { return m_sih; }
238  virtual CBioseq_Handle GetSequence(const CSeq_id_Handle& sih) {
239  return m_scope->GetBioseqHandle(*sih.GetSeqId());
240  }
241  public:
242  CTestSequenceSource() {
243  m_sih.push_back( CSeq_id_Handle::GetGiHandle(21637378) );
244  m_sih.push_back( CSeq_id_Handle::GetGiHandle(47551258) );
245  m_object_manager = CObjectManager::GetInstance();
246  CGBDataLoader::RegisterInObjectManager(*m_object_manager);
247  m_scope = new CScope(*m_object_manager);
248  m_scope->AddDefaults();
249  }
250  protected:
251  vector<CSeq_id_Handle> m_sih;
252  CRef<CObjectManager> m_object_manager;
253  CRef<CScope> m_scope;
254 
255  };
256  CTestSequenceSource bss;
257 
258  cerr<<"number of seqs: "<< bss.GetNumSeqs()<<endl;
259  const char *seq;
260  int len = ((ISequenceSource *)&bss)->GetSequence(1, &seq);
261  cerr<<"sequence length: "<<len<<endl;
262  //string sseq(seq, seq+len);
263  ((ISequenceSource *)&bss)->RetSequence(&seq);
264  */
265 
266  CRef<CElementaryMatching> matcher (
267  new CElementaryMatching(&bss,
268  args["sdb"].AsString()));
269 
270  matcher->SetOutputMethod(true);
271 
273 
274  matcher->SetPenalty(m_penalty);
275  matcher->SetMinIdty(m_min_idty);
277  matcher->SetMaxIntron(m_max_intron);
278 
279  matcher->SetHitsOnly(args["ho"]);
280  matcher->SetMinHitLength(args["min_hit_len"].AsInteger());
281  matcher->SetMaxVolSize(1024 * 1024 * (args["maxvol"].AsInteger()));
282 
283  matcher->SetDropOff(args["dropoff"].AsInteger());
284 
285  try { matcher->Run(); }
286  catch(std::bad_alloc&) {
288  "Not enough memory available to run this program");
289  }
290 
291  /*
292  // set SetOutputMethod to false before Run to get the results as
293  // a collection of alignments.
294  CElementaryMatching::TResults results = matcher->GetResults();
295  const CSeq_align_set::Tdata& comp_data(compartments->Get());
296  ITERATE (CSeq_align_set::Tdata, i, comp_data) {
297  const CSeq_align& comp(**i);
298  cout << MSerial_AsnText << comp << endl;
299  }
300  */
301  }
302 
303  return rv;
304 }
305 
306 
308 {
309  m_CompartmentsPermanent.resize(0);
310  m_Allocated = 0;
311 
312  THitRefs hitrefs;
313 
314  typedef map<string,string> TIdToId;
315  TIdToId id2id;
316 
317  string line;
318  string query0, subj0;
319  while(cin) {
320 
321  getline(cin, line);
322  string s = NStr::TruncateSpaces(line);
323  if(s.size()) {
324 
325  THitRef hit (new THit(s.c_str()));
326 
327  const string query (hit->GetQueryId()->GetSeqIdString(true));
328  const string subj (hit->GetSubjId()->GetSeqIdString(true));
329 
330  if(query0.size() == 0 || subj0.size() == 0) {
331  query0 = query;
332  subj0 = subj;
333  id2id[query0] = subj0;
334  }
335  else {
336 
337  if(query != query0 || subj != subj0) {
338 
339  const int rv (x_ProcessPair(query0, hitrefs));
340  if(rv != 0) return rv;
341 
342  if(query != query0) {
343 
344  x_RankAndStore();
345 
346  if(m_Allocated > 128 * 1024 * 1024) {
347 
348  stable_sort(m_CompartmentsPermanent.begin(),
350 
352  cout << **ii << endl;
353  m_Allocated -= (*ii)->GetHitCount()*sizeof(THit);
354  }
355  m_CompartmentsPermanent.clear();
356  }
357  }
358 
359  query0 = query;
360  subj0 = subj;
361  hitrefs.clear();
362 
363  TIdToId::const_iterator im = id2id.find(query0);
364  if(im == id2id.end() || im->second != subj0) {
365  id2id[query0] = subj0;
366  }
367  else {
368  cerr << "Input hit stream not properly ordered" << endl;
369  return 2;
370  }
371  }
372  }
373 
374  hitrefs.push_back(hit);
375  }
376  }
377 
378  if(hitrefs.size()) {
379  int rv = x_ProcessPair(query0, hitrefs);
380  if(rv != 0) return rv;
381  x_RankAndStore();
382  hitrefs.clear();
383  }
384 
385  stable_sort(m_CompartmentsPermanent.begin(), m_CompartmentsPermanent.end());
386 
388  cout << **ii << endl;
389  }
390 
391  m_CompartmentsPermanent.clear();
392 
393  return 0;
394 }
395 
396 
397 int CCompartApp::x_ProcessPair(const string& query0, THitRefs& hitrefs)
398 {
399  const size_t qlen (x_GetSeqLength(query0));
400 
401  if(qlen == 0) {
402  cerr << "Cannot retrieve sequence lengths for: "
403  << query0 << endl;
404  return 1;
405  }
406 
407  if(qlen < m_min_query_len) {
408  return 0;
409  }
410 
411  typedef CCompartmentAccessor<THit> TAccessor;
412  typedef TAccessor::TCoord TCoord;
413 
414  const TCoord penalty_bps (TCoord(m_penalty * qlen + 0.5));
415  const TCoord min_matches (TCoord(m_min_idty * qlen + 0.5));
416  const TCoord msm1 (TCoord(m_min_singleton_idty * qlen + 0.5));
417  const TCoord msm2 (m_min_singleton_idty_bps);
418  const TCoord min_singleton_matches (min(msm1, msm2));
419 
420  TAccessor ca (penalty_bps, min_matches, min_singleton_matches, !m_NoXF);
421  ca.SetMaxIntron(m_max_intron);
422  ca.Run(hitrefs.begin(), hitrefs.end());
423 
424  THitRefs comp;
425  for(bool b0 (ca.GetFirst(comp)); b0 ; b0 = ca.GetNext(comp)) {
426 
427  TCompartRef cr (new CCompartment (comp, qlen));
428  m_Compartments.push_back(cr);
429  }
430 
431  return 0;
432 }
433 
434 
436  const CCompartApp::TCompartRef& rhs)
437 {
438  //#define PCOMPARTMENT_RANKER_M1
439 
440 #ifdef PCOMPARTMENT_RANKER_M1
441 
442  const size_t exons_lhs (lhs->GetExonCount());
443  const size_t exons_rhs (rhs->GetExonCount());
444  if(exons_lhs == exons_rhs) {
445  return lhs->GetMatchCount() > rhs->GetMatchCount();
446  }
447  else {
448  return exons_lhs > exons_rhs;
449  }
450 
451 #else
452 
453  const size_t idtybin_lhs (lhs->GetIdentityBin());
454  const size_t idtybin_rhs (rhs->GetIdentityBin());
455  if(idtybin_lhs == idtybin_rhs) {
456  const size_t exons_lhs (lhs->GetExonCount());
457  const size_t exons_rhs (rhs->GetExonCount());
458  if(exons_lhs == exons_rhs) {
459  return lhs->GetMatchCount() > rhs->GetMatchCount();
460  }
461  else {
462  return exons_lhs > exons_rhs;
463  }
464  }
465  else {
466  return idtybin_lhs > idtybin_rhs;
467  }
468 #endif
469 
470 #undef PCOMPARTMENT_RANKER_M1
471 }
472 
473 
475 {
476  const size_t cdim (m_Compartments.size());
477  if(cdim == 0) {
478  return;
479  }
480 
481  if(m_MaxCompsPerQuery > 0 && cdim > m_MaxCompsPerQuery) {
482  stable_sort(m_Compartments.begin(), m_Compartments.end(), PCompartmentRanker);
484  }
485 
486  for(size_t i (0), in (m_Compartments.size()); i < in; ++i) {
488  m_CompartmentsPermanent.push_back(cr);
489  m_Allocated += cr->GetHitCount() * sizeof(THit);
490  }
491 
492  m_Compartments.resize(0);
493 }
494 
495 
497 {
498  return;
499 }
500 
501 
503 {
504  if(m_HitRefs.size() == 0) {
505  NCBI_THROW(CException, eUnknown, "Span requested for empty compartment");
506  }
507  THit::TCoord a (m_HitRefs.front()->GetSubjStart()),
508  b (m_HitRefs.back()->GetSubjStop());
509  if(a > b) {
510  THit::TCoord c (a);
511  a = b;
512  b = c;
513  }
514 
516 }
517 
518 CCompartApp::CCompartment::CCompartment(const THitRefs& hitrefs, size_t length):
519  m_SeqLength(length), m_IdentityBin(0), m_ExonCount(0), m_MatchCount(0)
520 {
521  if(hitrefs.size() == 0) {
523  "Cannot init compartment with empty hit list");
524  }
525 
526  for(THitRefs::const_reverse_iterator ii(hitrefs.rbegin()), ie(hitrefs.rend());
527  ii != ie; x_AddHit(*ii++));
528 
529  x_EvalExons();
530 }
531 
532 
534 {
535  if(m_HitRefs.size() == 0) {
536  m_HitRefs.push_back(hitref);
537  }
538  else {
539 
540  const THitRef& hb (m_HitRefs.back());
541  const bool cs (hb->GetSubjStrand());
542  if(cs != hitref->GetSubjStrand()) {
543  NCBI_THROW(CException, eUnknown, "Hit being added has strand "
544  "different from that of the compartment.");
545  }
546 
547  m_HitRefs.push_back(hitref);
548  }
549 }
550 
551 
553 {
554  if(m_HitRefs.size()) {
555  return m_HitRefs.front()->GetSubjStrand();
556  }
557  NCBI_THROW(CException, eUnknown, "Cannot determine compartment strand");
558 }
559 
560 
561 // compares by subject, query, strand, then order on the subject
563 const
564 {
565  const THit::TId& subjid_lhs (m_HitRefs.front()->GetSubjId());
566  const THit::TId& subjid_rhs (rhs.m_HitRefs.front()->GetSubjId());
567  const TIntId co (subjid_lhs->CompareOrdered(*subjid_rhs));
568  if(co == 0) {
569 
570  const THit::TId& queryid_lhs (m_HitRefs.front()->GetQueryId());
571  const THit::TId& queryid_rhs (rhs.m_HitRefs.front()->GetQueryId());
572  const TIntId co (queryid_lhs->CompareOrdered(*queryid_rhs));
573 
574  if(co == 0) {
575 
576  const bool strand_lhs (GetStrand());
577  const bool strand_rhs (rhs.GetStrand());
578  if(strand_lhs == strand_rhs) {
579  if(strand_lhs) {
580  return GetSpan().first < rhs.GetSpan().first;
581  }
582  else {
583  return GetSpan().first > rhs.GetSpan().first;
584  }
585  }
586  else {
587  return strand_lhs < strand_rhs;
588  }
589  }
590  else {
591  return co < 0;
592  }
593  }
594  else {
595  return co < 0;
596  }
597 }
598 
599 
601  const CCompartApp::TCompartRef& rhs)
602 {
603  return *lhs < *rhs;
604 }
605 
606 
607 // Evaluate all variables used in comaprtment ranking. These are:
608 // - m_IdentityBin
609 // - m_ExonCount
610 // - m_MatchCount
612 {
613  const size_t kMinIntronLength (25);
614  const size_t kMinExonLength (10);
615 
616  size_t exons (1);
617  THitRef& h (m_HitRefs.front());
618  double matches ( h->GetLength() * h->GetIdentity() );
619 
620  if(m_HitRefs.size() > 1) {
621 
622  if(GetStrand()) {
623 
624  THitRef prev;
625  ITERATE(THitRefs, ii, m_HitRefs) {
626 
627  const THitRef& h (*ii);
628  if(prev.NotEmpty()) {
629 
630  const THit::TCoord q0 (prev->GetQueryStop());
631  if(q0 + kMinExonLength <= h->GetQueryStop()) {
632 
633  const THit::TCoord s0 (h->GetSubjStart()
634  - (h->GetQueryStart() - q0));
635  if(prev->GetSubjStop() + kMinIntronLength <= s0) {
636  ++exons;
637  }
638  const THit::TCoord q0max (max(q0,h->GetQueryStart()));
639  matches += (h->GetQueryStop() - q0max) * h->GetIdentity();
640  }
641  }
642  prev = h;
643  }
644  }
645  else {
646 
647  THitRef prev;
648  ITERATE(THitRefs, ii, m_HitRefs) {
649 
650  const THitRef& h (*ii);
651  if(prev.NotEmpty()) {
652 
653  const THit::TCoord q0 (prev->GetQueryStop());
654  if(q0 + kMinExonLength <= h->GetQueryStop()) {
655 
656  const THit::TCoord s0 (h->GetSubjStart()
657  + h->GetQueryStart() - q0);
658  if(s0 + kMinIntronLength <= prev->GetSubjStop()) {
659  ++exons;
660  }
661  const THit::TCoord q0max (max(q0,h->GetQueryStart()));
662  matches += (h->GetQueryStop() - q0max) * h->GetIdentity();
663  }
664  }
665  prev = h;
666  }
667  }
668  }
669 
670  m_ExonCount = exons;
671  m_MatchCount = size_t(round(matches));
672  m_IdentityBin = size_t(floor(double(m_MatchCount) / m_SeqLength / 0.1));
673 }
674 
675 
676 ostream& operator << (ostream& ostr, const CCompartApp::CCompartment& rhs)
677 {
679  ostr << **ii << endl;
680  }
681  return ostr;
682 }
683 
684 
686 
687 
689 
690 int main(int argc, const char* argv[])
691 {
692  return CCompartApp().AppMain(argc, argv, 0, eDS_Default, 0);
693 }
TRange GetSpan(void) const
Definition: compart.cpp:502
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:907
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:2879
size_type size() const
Definition: map.hpp:148
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
Definition: ncbiargs.cpp:2238
TCompartRefs m_CompartmentsPermanent
Definition: compart.hpp:136
bool PCompartmentRanker(const CCompartApp::TCompartRef &lhs, const CCompartApp::TCompartRef &rhs)
Definition: compart.cpp:435
void clear()
Definition: map.hpp:169
TCompartRefs m_Compartments
Definition: compart.hpp:135
size_t m_Allocated
Definition: compart.hpp:123
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:779
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:557
CArgAllow_Integers –.
Definition: ncbiargs.hpp:1638
pair< THit::TCoord, THit::TCoord > TRange
Definition: compart.hpp:63
Hide log file description.
Definition: ncbiapp.hpp:387
container_type::const_iterator const_iterator
Definition: map.hpp:53
void SetHitsOnly(bool hits_only)
Set or clear the "hits only" mode.
bool operator<(const CCompartment &rhs) const
Definition: compart.cpp:562
void SetMaxIntron(const size_t max_intron)
Int4 TIntId
Definition: ncbimisc.hpp:1019
Hide configuration file description.
Definition: ncbiapp.hpp:388
CRef< objects::CScope > m_Scope
Definition: compart.hpp:110
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:492
X * release(void)
Release pointer.
Definition: ncbimisc.hpp:365
void SetMinIdty(const double &min_idty)
void SetOutputMethod(bool om)
Compartmentization parameters - see CCompartmentFinder for details.
void x_ReadSeqLens(CNcbiIstream &istr)
Definition: compart.cpp:140
T round(const T &v)
void SetPenalty(const double &penalty)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:837
size_t m_min_query_len
Definition: compart.hpp:116
int i
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:225
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:4719
CBlastTabular THit
Definition: compart.hpp:54
virtual int Run()
Run the application.
Definition: compart.cpp:185
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:555
virtual void Exit()
Cleanup on application exit.
Definition: compart.cpp:496
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
USING_NCBI_SCOPE
Definition: compart.cpp:688
static string query
virtual void Init()
Initialize the application.
Definition: compart.cpp:45
void SetMinQueryLength(size_t min_qlen)
Sets the length cut-off for cDNA (query) sequences.
bool m_NoXF
Definition: compart.hpp:121
void SetMinSingletonIdty(const double &idty)
vector< TCompartRef > TCompartRefs
Definition: compart.hpp:102
void AddFlag(const string &name, const string &comment, bool set_value=true)
Add description for flag argument.
Definition: ncbiargs.cpp:2256
void x_RankAndStore(void)
Definition: compart.cpp:474
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:76
size_t x_GetSeqLength(const string &id)
Definition: compart.cpp:157
vector< THitRef > THitRefs
Definition: compart.hpp:56
CCompartment(const THitRefs &hitrefs, size_t length)
Definition: compart.cpp:518
size_t m_min_singleton_idty_bps
Definition: compart.hpp:115
size_t m_MaxCompsPerQuery
Definition: compart.hpp:119
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:914
double m_min_singleton_idty
Definition: compart.hpp:114
int size
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1740
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:185
Hide version description.
Definition: ncbiapp.hpp:389
T max(T x_, T y_)
CArgDescriptions –.
Definition: ncbiargs.hpp:503
ostream & operator<<(ostream &ostr, const CCompartApp::CCompartment &rhs)
Definition: compart.cpp:676
T min(T x_, T y_)
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1084
An arbitrary string.
Definition: ncbiargs.hpp:552
void SetDropOff(int dropoff)
Controls the drop-off parameter used to extend hits over possible defects.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
bool operator<(const CCompartApp::TCompartRef &lhs, const CCompartApp::TCompartRef &rhs)
Definition: compart.cpp:600
CException –.
Definition: ncbiexpt.hpp:709
double m_penalty
Definition: compart.hpp:112
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
Definition: ncbiapp.hpp:649
CScope –.
Definition: scope.hpp:90
CArgs –.
Definition: ncbiargs.hpp:354
CArgAllow –.
Definition: ncbiargs.hpp:1382
void SetMaxVolSize(size_t max_vol_size)
Controls the max size of an index volume.
TStrIdToLen m_id2len
Definition: compart.hpp:109
int len
double m_min_idty
Definition: compart.hpp:113
int x_DoWithExternalHits(void)
Definition: compart.cpp:307
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void SetUsageContext(const string &usage_name, const string &usage_description, bool usage_sort_args=false, SIZE_TYPE usage_width=78)
Set extra info to be used by PrintUsage().
Definition: ncbiargs.cpp:3081
CSeqIdException –.
Definition: Seq_id.hpp:694
void SetMinHitLength(size_t min_hit_len)
In "hits only" mode, sets the min length of hits to report.
#define GetProgramName
Avoid name clash with the NCBI C Toolkit.
Definition: ncbienv.hpp:49
TParent::TCoord TCoord
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:546
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:699
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
Definition: ncbiargs.cpp:2223
std::istream & in(std::istream &in_, double &x_)
int main(int argc, const char *argv[])
Definition: compart.cpp:690
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:556
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:147
int x_ProcessPair(const string &query0, THitRefs &hitrefs)
Definition: compart.cpp:397
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:746
bool GetStrand(void) const
Definition: compart.cpp:552
CArgAllow_Doubles –.
Definition: ncbiargs.hpp:1668
size_t m_max_intron
Definition: compart.hpp:117
void SetConstraint(const string &name, CArgAllow *constraint, EConstraintNegate negate=eConstraint)
Set additional user defined constraint on argument value.
Definition: ncbiargs.cpp:2383
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
void x_AddHit(const THitRef &hitref)
Definition: compart.cpp:533
Modified on Tue Jul 07 15:18:02 2015 by modify_doxy.py rev. 426318