NCBI C++ ToolKit
align_compare.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: align_compare.cpp 75893 2016-12-26 16:08:29Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
48 
51 
55 
56 #include <cmath>
57 #include <ctype.h>
58 
61 
62 
63 // Retrieve a list of interval-by-interval accounting within an alignment
64 //
66 
67 static void s_UpdateSpans(const TSeqRange &query_range,
68  const TSeqRange &subject_range,
69  CAlignCompare::SAlignment& align_info,
71 {
72  align_info.spans[row == CAlignCompare::e_Query ? query_range : subject_range] =
73  row == CAlignCompare::e_Subject ? subject_range : query_range;
74 }
75 
76 static void s_GetAlignmentSpans_Interval(const CSeq_align& align,
77  CAlignCompare::SAlignment& align_info,
79 {
80  switch (align.GetSegs().Which()) {
83  align.GetSegs().GetDisc().Get()) {
84  s_GetAlignmentSpans_Interval(**it, align_info, row);
85  }
86  break;
87 
89  {{
91  align.GetSegs().GetStd()) {
92  // our expectation is to find a set of two locs
93  // each loc is expected to be a Seq-interval
94 
95  // we expect std-seg alignments to be single intervals
96  // check this here
97  if ((*seg_it)->GetLoc().size() != 2) {
99  "Pairwise Std-seg alignments in comparison "
100  "should always have two locs");
101  }
102  CConstRef<CSeq_loc> loc1 = (*seg_it)->GetLoc()[0];
103  CConstRef<CSeq_loc> loc2 = (*seg_it)->GetLoc()[1];
104 
105  if (loc1->IsEmpty() || loc2->IsEmpty() ||
106  loc1->IsNull() || loc2->IsNull()) {
107  // gaps - omit
108  continue;
109  }
110 
111  if (!loc1->IsInt() || !loc2->IsInt()) {
113  "Pairwise Std-set alignments in comparison "
114  "should always be intervals");
115  }
117  loc2->GetTotalRange(),
118  align_info, row);
119  }
120  }}
121  break;
122 
123  default:
124  {{
125  CAlnSeqId id0(align.GetSeq_id(0));
126  CAlnSeqId id1(align.GetSeq_id(1));
127 
128  TAlnSeqIdIRef r0(&id0);
129  TAlnSeqIdIRef r1(&id1);
130  CPairwiseAln pw(r0, r1);
131  ConvertSeqAlignToPairwiseAln(pw, align, 0, 1);
132  ITERATE (CPairwiseAln, it, pw) {
133  const CPairwiseAln::TAlignRange& r = *it;
136  align_info, row);
137  }
138  }}
139  break;
140  }
141 }
142 
143 static void s_GetAlignmentMismatches(const CSeq_align& align,
144  CAlignCompare::SAlignment& align_info,
146 {
147  if (!align.GetSegs().IsSpliced()) {
149  string traceback = lookup.GetTraceback(align, 0);
150  if (traceback.empty()) {
152  "Comparing mismatches for dense-seg alignments requires "
153  "traceback information");
154  }
155  int product_dir = align.GetSeqStrand(0) == eNa_strand_minus
156  ? -1 : 1;
157  TSeqPos product_pos = product_dir == 1 ? align.GetSeqStart(0)
158  : align.GetSeqStop(0);
159  int genomic_dir = align.GetSeqStrand(1) == eNa_strand_minus
160  ? -1 : 1;
161  TSeqPos genomic_pos = product_dir == 1 ? align.GetSeqStart(1)
162  : align.GetSeqStop(1);
163  unsigned match = 0;
164  ITERATE (string, it, traceback) {
165  if (isdigit(*it)) {
166  match = match * 10 + (*it - '0');
167  continue;
168  }
169  product_pos += match * product_dir;
170  genomic_pos += match * genomic_dir;
171  match = 0;
172  bool genomic_ins = *it == '-';
173  bool product_ins = *++it == '-';
174  if (!genomic_ins && !product_ins) {
175  /// mismatch
176  if (row != CAlignCompare::e_Subject) {
177  align_info.query_mismatches += TSeqRange(
178  product_pos, product_pos);
179 
180  }
181  if (row != CAlignCompare::e_Query) {
182  align_info.subject_mismatches += TSeqRange(
183  genomic_pos, genomic_pos);
184  }
185  }
186  if (!genomic_ins) {
187  product_pos += product_dir;
188  }
189  if (!product_ins) {
190  genomic_pos += genomic_dir;
191  }
192  }
193  if (product_pos != (product_dir == 1 ? align.GetSeqStop(0)+1
194  : align.GetSeqStart(0)-1)
195  || genomic_pos != (genomic_dir == 1 ? align.GetSeqStop(1)+1
196  : align.GetSeqStart(1)-1))
197  {
199  "Inconsistent length of traceback string " + traceback);
200  }
201  return;
202  }
203 
204  bool is_product_minus = align.GetSegs().GetSpliced().IsSetProduct_strand() &&
205  align.GetSegs().GetSpliced().GetProduct_strand() == eNa_strand_minus;
206  bool is_genomic_minus = align.GetSegs().GetSpliced().IsSetGenomic_strand() &&
207  align.GetSegs().GetSpliced().GetGenomic_strand() == eNa_strand_minus;
209  {
210  const CSpliced_exon& exon = **it;
211  if (!exon.IsSetParts()) {
212  continue;
213  }
214  int product_dir = is_product_minus ||
215  (exon.IsSetProduct_strand() &&
216  exon.GetProduct_strand() == eNa_strand_minus)
217  ? -1 : 1;
218  TSeqPos product_pos = (product_dir == 1 ? exon.GetProduct_start()
219  : exon.GetProduct_end())
220  . AsSeqPos();
221  int genomic_dir = is_genomic_minus ||
222  (exon.IsSetGenomic_strand() &&
223  exon.GetGenomic_strand() == eNa_strand_minus)
224  ? -1 : 1;
225  TSeqPos genomic_pos = genomic_dir == 1 ? exon.GetGenomic_start()
226  : exon.GetGenomic_end();
227  ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
228  switch ((*part_it)->Which()) {
230  if (row != CAlignCompare::e_Subject) {
231  TSeqPos product_mismatch_end = product_pos +
232  product_dir * ((*part_it)->GetMismatch()-1);
233  align_info.query_mismatches += TSeqRange(
234  min(product_pos,product_mismatch_end),
235  max(product_pos,product_mismatch_end));
236 
237  }
238  if (row != CAlignCompare::e_Query) {
239  TSeqPos genomic_mismatch_end = genomic_pos +
240  genomic_dir * ((*part_it)->GetMismatch()-1);
241  align_info.subject_mismatches += TSeqRange(
242  min(genomic_pos,genomic_mismatch_end),
243  max(genomic_pos,genomic_mismatch_end));
244  }
245  product_pos += product_dir * (*part_it)->GetMismatch();
246  genomic_pos += genomic_dir * (*part_it)->GetMismatch();
247  break;
248 
250  product_pos += product_dir * (*part_it)->GetMatch();
251  genomic_pos += genomic_dir * (*part_it)->GetMatch();
252  break;
253 
255  product_pos += product_dir * (*part_it)->GetProduct_ins();
256  break;
257 
259  genomic_pos += genomic_dir * (*part_it)->GetGenomic_ins();
260  break;
261 
262  default:
264  "Unsupported exon part");
265  }
266  }
267  }
268 }
269 
270 // Retrieve a list of exon-by-exon accounting within an alignment
271 //
272 static void s_GetAlignmentSpans_Exon(const CSeq_align& align,
273  CAlignCompare::SAlignment &align_info,
275 {
276  switch (align.GetSegs().Which()) {
278  s_UpdateSpans(align.GetSeqRange(0), align.GetSeqRange(1), align_info, row);
279  break;
280 
282  /* UNTESTED */
284  align.GetSegs().GetDisc().Get()) {
285  s_UpdateSpans((*it)->GetSeqRange(0), (*it)->GetSeqRange(1), align_info, row);
286  }
287  break;
288 
290  /* UNTESTED */
291  ITERATE (CSeq_align::TSegs::TStd, it, align.GetSegs().GetStd()) {
292  const CStd_seg& seg = **it;
293  s_UpdateSpans(seg.GetLoc()[0]->GetTotalRange(),
294  seg.GetLoc()[1]->GetTotalRange(), align_info, row);
295  }
296  break;
297 
300  align.GetSegs().GetSpliced().GetExons()) {
301  const CSpliced_exon& exon = **it;
302  TSeqRange genomic(exon.GetGenomic_start(), exon.GetGenomic_end());
303  TSeqRange product;
304  product.SetFrom(exon.GetProduct_start().AsSeqPos());
305  product.SetTo(exon.GetProduct_end().AsSeqPos());
306  s_UpdateSpans(product, genomic, align_info, row);
307  }
308  break;
309 
310  default:
312  "unhandled alignment type");
313  }
314 }
315 
316 
317 // Retrieve a list of intron-by-intron accounting within an alignment;
318 // meaningful only for Spliced-seg alignments
319 //
320 static void s_GetAlignmentSpans_Intron(const CSeq_align& align,
321  CAlignCompare::SAlignment &align_info,
323 {
324  if (!align.GetSegs().IsSpliced() ||
325  !align.GetSegs().GetSpliced().CanGetProduct_strand() ||
327  {
329  "intron mode only meaningful for Spliced-seg alignments");
330  }
331 
332  bool is_reverse = align.GetSegs().GetSpliced().GetProduct_strand() !=
334 
335  CRef<CSpliced_exon> last_exon;
337  align.GetSegs().GetSpliced().GetExons()) {
338  CRef<CSpliced_exon> exon = *it;
339  if (last_exon) {
340  CRef<CSpliced_exon> first_exon = is_reverse ? exon : last_exon;
341  CRef<CSpliced_exon> second_exon = is_reverse ? last_exon : exon;
342  TSeqRange genomic(first_exon->GetGenomic_end(),
343  second_exon->GetGenomic_start());
344  TSeqRange product;
345  product.SetFrom(last_exon->GetProduct_end().AsSeqPos());
346  product.SetTo(exon->GetProduct_start().AsSeqPos());
347  s_UpdateSpans(product, genomic, align_info, row);
348  }
349  last_exon = exon;
350  }
351 }
352 
353 
354 // Retrieve a list of total range spans for an alignment
355 //
356 static void s_GetAlignmentSpans_Span(const CSeq_align& align,
357  CAlignCompare::SAlignment &align_info,
359 {
360  s_UpdateSpans(align.GetSeqRange(0), align.GetSeqRange(1), align_info, row);
361 }
362 
363 template<typename T>
364 void s_PopulateScores(const CSeq_align &align,
365  const vector<string> &score_list,
366  vector<T> &scores,
367  bool required = true)
368 {
370  ITERATE (vector<string>, it, score_list) {
371  T value = 0;
372  try {
373  value = lookup.GetScore(align, *it);
374  } catch(CAlgoAlignUtilException &e) {
375  /// If scores are not required, use value of 0 for scores that were not found
376  if (required || e.GetErrCode() != CAlgoAlignUtilException::eScoreNotFound) {
377  throw;
378  }
379  }
380  scores.push_back(value);
381  }
382 }
383 
384 static void s_PopulateScoreSet(const CSeq_align &align,
385  const set<string> &score_set,
386  bool score_set_as_blacklist,
387  CAlignCompare::TIntegerScoreSet &integer_scores,
388  CAlignCompare::TRealScoreSet &real_scores)
389 {
390  if (score_set_as_blacklist) {
391  ITERATE (CSeq_align::TScore, score_it, align.GetScore()) {
392  if ((*score_it)->GetId().IsStr() &&
393  !score_set.count((*score_it)->GetId().GetStr()))
394  {
395  if ((*score_it)->GetValue().IsInt()) {
396  integer_scores[(*score_it)->GetId().GetStr()] =
397  (*score_it)->GetValue().GetInt();
398  } else {
399  real_scores[(*score_it)->GetId().GetStr()] =
400  (*score_it)->GetValue().GetReal();
401  }
402  }
403  }
404  } else {
406  ITERATE (set<string>, score_it, score_set) {
407  double value = lookup.GetScore(align, *score_it);
408  if (lookup.IsIntegerScore(align, *score_it)) {
409  integer_scores[*score_it] = static_cast<int>(value);
410  } else {
411  real_scores[*score_it] = value;
412  }
413  }
414  }
415 }
416 
418  const CAlignCompare::TRealScoreSet &scores2,
419  double real_score_tolerance)
420 {
422  it2 = scores2.begin();
423  it1 != scores1.end() || it2 != scores2.end(); ++it1, ++it2)
424  {
425  if (it1 == scores1.end() || it2 == scores2.end()
426  || it1->first != it2->first)
427  {
428  /// The two don't have the same set of real-value scores
429  return false;
430  }
431  double allowed_diff = max(abs(it1->second), abs(it2->second))
432  * real_score_tolerance;
433  if (abs(it1->second - it2->second) > allowed_diff) {
434  return false;
435  }
436  }
437  return true;
438 }
439 
440 //////////////////////////////////////////////////////////////////////////////
441 //
442 // SComparison constructor does the hard work of verifying that two sets of alignment
443 // span ranges actually overlap, and determines by how much
444 //
446 {
447  SComparison();
448 
450  const CAlignCompare::SAlignment& second,
451  double real_score_tolerance);
452 
458  float overlap;
459 };
460 
462 : spans_in_common(0)
463 , spans_overlap(0)
464 , spans_unique_first(0)
465 , spans_unique_second(0)
466 , is_equivalent(false)
467 , overlap(0)
468 {
469 }
470 
472  const CAlignCompare::SAlignment& second,
473  double real_score_tolerance)
474 : spans_in_common(0)
475 , spans_overlap(0)
476 , spans_unique_first(0)
477 , spans_unique_second(0)
478 , is_equivalent(false)
479 , overlap(0)
480 {
481  if (first.CompareGroup(second, false) != 0) {
482  /// Alignments have different disambiguiting score values, can't be compared
483  return;
484  }
485 
486  float dot = 0;
487  float sum_a = 0;
488  float sum_b = 0;
489 
492  for ( ; first_it != first.spans.end() && second_it != second.spans.end(); ) {
493  if (*first_it == *second_it) {
494  TSeqPos intersecting_len = first_it->first.GetLength();
495  dot += float(intersecting_len) * float(intersecting_len);
496  sum_a += first_it->first.GetLength() * first_it->first.GetLength();
497  sum_b += second_it->first.GetLength() * second_it->first.GetLength();
498 
499  spans_in_common += intersecting_len;
500  ++first_it;
501  ++second_it;
502  } else {
503  bool overlap =
504  first_it->first.IntersectingWith(second_it->first) &&
505  first_it->second.IntersectingWith(second_it->second);
506  TSeqPos intersecting_len = 0;
507  if (overlap) {
508  TSeqRange r = first_it->first;
509  r.IntersectWith(second_it->first);
510 
511  intersecting_len = r.GetLength();
512  dot += float(intersecting_len) * float(intersecting_len);
513 
514  spans_overlap += intersecting_len;
515  spans_unique_first -= intersecting_len;
516  spans_unique_second -= intersecting_len;
517  }
518  if (*first_it < *second_it) {
519  sum_a += first_it->first.GetLength() * first_it->first.GetLength();
520  spans_unique_first += first_it->first.GetLength();
521  ++first_it;
522  } else {
523  sum_b += second_it->first.GetLength() * second_it->first.GetLength();
524  spans_unique_second += second_it->first.GetLength();
525  ++second_it;
526  }
527  }
528  }
529  is_equivalent = spans_in_common == first.length &&
530  spans_in_common == second.length &&
531  first.query_mismatches == second.query_mismatches &&
532  first.subject_mismatches == second.subject_mismatches &&
533  first.integer_scores == second.integer_scores &&
535  real_score_tolerance);
536  for ( ; first_it != first.spans.end(); ++first_it) {
537  sum_a += first_it->first.GetLength() * first_it->first.GetLength();
538  spans_unique_first += first_it->first.GetLength();
539  }
540  for ( ; second_it != second.spans.end(); ++second_it) {
541  sum_b += second_it->first.GetLength() * second_it->first.GetLength();
542  spans_unique_second += second_it->first.GetLength();
543  }
544 
545  overlap = dot == 0 ? 0 : dot / ::sqrt(sum_a * sum_b);
546 }
547 
548 
550 {
552  const CAlignCompare::SAlignment *ptr2) const
553  {
554  const CAlignCompare::SAlignment& k1 = *ptr1;
555  const CAlignCompare::SAlignment& k2 = *ptr2;
556 
557  if (k1.query < k2.query) { return true; }
558  if (k2.query < k1.query) { return false; }
559  if (k1.subject < k2.subject) { return true; }
560  if (k2.subject < k1.subject) { return false; }
561 
562  if (k1.scores < k2.scores) { return true; }
563  if (k2.scores < k1.scores) { return false; }
564 
565  if (k1.query_strand < k2.query_strand) { return true; }
566  if (k2.query_strand < k1.query_strand) { return false; }
567  if (k1.subject_strand < k2.subject_strand) { return true; }
568  if (k2.subject_strand < k1.subject_strand) { return false; }
569 
570  if (k1.subject_range < k2.subject_range) { return true; }
571  if (k2.subject_range < k1.subject_range) { return false; }
572  if (k1.query_range < k2.query_range) { return true; }
573  if (k2.query_range < k1.query_range) { return false; }
574 
575  return ptr1 < ptr2;
576  }
577 };
578 
580 typedef pair<CAlignCompare::SAlignment *, CAlignCompare::SAlignment *> TPtrPair;
581 typedef pair<TPtrPair, SComparison> TComp;
582 
584 {
586 
587  SComp_Less(bool strict = false)
588  : strict_compare(strict)
589  {
590  }
591 
592  bool operator()(const TComp& c1, const TComp& c2) const
593  {
594  // strict comparison amounts to placing all identical pairs either before
595  // or after non-identical ones
596  // putting identical pairs first means that we evaluate the best examples first, and
597  // can establish equality without polluting the comparison with weaker
598  // alignments; non-strict means we combine weaker overlapping
599  // alignments together into equivalence groups with alignments that are
600  // identical
601  if (strict_compare) {
602  if (c1.second.is_equivalent && !c2.second.is_equivalent) {
603  return true;
604  }
605  if (c2.second.is_equivalent && !c1.second.is_equivalent) {
606  return false;
607  }
608  }
609  else {
610  if (c1.second.is_equivalent && !c2.second.is_equivalent) {
611  return false;
612  }
613  if (c2.second.is_equivalent && !c1.second.is_equivalent) {
614  return true;
615  }
616  }
617 
618  if (c1.first.first->subject_range < c2.first.first->subject_range)
619  {
620  return false;
621  }
622  if (c2.first.first->subject_range < c1.first.first->subject_range)
623  {
624  return true;
625  }
626  return c1.first.second->query_range < c2.first.second->query_range;
627  }
628 };
629 
631 SAlignment(int s, const CRef<CSeq_align> &al, CAlignCompare &compare,
632  bool is_slice)
633 : source_set(s)
634 , query_strand(eNa_strand_unknown)
635 , subject_strand(eNa_strand_unknown)
636 , length(0)
637 , align(al)
638 , match_level(CAlignCompare::e_NoMatch)
639 , compare_object(compare)
640 {
641  try {
642  if (compare.m_Row != e_Subject) {
646  }
647  if (compare.m_Row != e_Query) {
651  }
652  s_PopulateScores(*align, compare.m_DisambiguitingScores.first, scores.first);
653  s_PopulateScores(*align, compare.m_DisambiguitingScores.second, scores.second, false);
657  switch (compare.m_Mode) {
658  case e_Full:
659  /// If this alignment was created by slicing an input alignment,
660  /// it doesn't have traceback data so mismatches can't be calculated
661  if (!is_slice) {
662  s_GetAlignmentMismatches(*align, *this, compare.m_Row);
663  }
664  // fall through
665 
666  case e_Interval:
667  s_GetAlignmentSpans_Interval(*align, *this, compare.m_Row);
668  break;
669 
670  case e_Exon:
671  s_GetAlignmentSpans_Exon(*align, *this, compare.m_Row);
672  break;
673 
674  case e_Span:
675  s_GetAlignmentSpans_Span(*align, *this, compare.m_Row);
676  break;
677 
678  case e_Intron:
679  s_GetAlignmentSpans_Intron(*align, *this, compare.m_Row);
680  break;
681  }
682  }
683  catch (CException& e) {
684  ERR_POST(Error << "alignment not processed: " << MSerial_AsnText << *align << e);
685  spans.clear();
686  }
687  ITERATE (TAlignmentSpans, it, spans) {
688  length += it->first.GetLength();
689  }
690 }
691 
693 CompareGroup(const SAlignment &o, bool strict_only) const
694 {
695  if (query.AsString() < o.query.AsString()) { return -1; }
696  if (o.query.AsString() < query.AsString()) { return 1; }
697 
698  if (subject.AsString() < o.subject.AsString()) { return -1; }
699  if (o.subject.AsString() < subject.AsString()) { return 1; }
700 
701  if (scores.first < o.scores.first) { return -1; }
702  if (o.scores.first < scores.first) { return 1; }
703 
704  if (strict_only) {
705  return 0;
706  }
707 
708  for (unsigned score_index = 0; score_index < scores.second.size(); ++score_index) {
709  if (scores.second[score_index] && o.scores.second[score_index]) {
710  if (scores.second[score_index] < o.scores.second[score_index]) { return -1; }
711  if (o.scores.second[score_index] < scores.second[score_index]) { return 1; }
712  }
713  }
714 
715  return 0;
716 }
717 
719 {
720  if (m_NextSet1Group.empty()) {
721  if (m_Set1.EndOfData()) {
722  return 2;
723  } else {
724  m_NextSet1Group.push_back(x_NextAlignment(1));
725  }
726  }
727  if (m_NextSet2Group.empty()) {
728  if (m_Set2.EndOfData()) {
729  return 1;
730  } else {
731  m_NextSet2Group.push_back(x_NextAlignment(2));
732  }
733  }
734  int compare_group = m_NextSet1Group.front()
735  ->CompareGroup(*m_NextSet2Group.front(), true);
736  if (compare_group < 0) {
737  return 1;
738  } else if (compare_group > 0) {
739  return 2;
740  } else {
741  return 3;
742  }
743 }
744 
746 {
747  IAlignSource &source = set == 1 ? m_Set1 : m_Set2;
748  list< AutoPtr<SAlignment> > &current_group =
750  list< AutoPtr<SAlignment> > &next_group =
751  set == 1 ? m_NextSet1Group : m_NextSet2Group;
752  current_group.clear();
753  current_group.splice(current_group.end(), next_group);
754  while (!source.EndOfData() && next_group.empty()) {
756  if (current_group.empty() || align->CompareGroup(*current_group.front(), true) == 0)
757  {
758  current_group.push_back(align);
759  } else {
760  next_group.push_back(align);
761  }
762  }
763 }
764 
766 {
767  ITERATE (TAlignmentSpans, it, spans) {
768  if (query) {
769  compare_object.m_BoundariesMap[query].insert(it->second.GetFrom());
770  compare_object.m_BoundariesMap[query].insert(it->second.GetToOpen());
771  }
772  if (subject) {
773  compare_object.m_BoundariesMap[subject].insert(it->first.GetFrom());
774  compare_object.m_BoundariesMap[subject].insert(it->first.GetToOpen());
775  }
776  }
777 }
778 
779 list< AutoPtr<CAlignCompare::SAlignment> > CAlignCompare::SAlignment::
780 BreakOnBoundaries(int row) const
781 {
782  list< AutoPtr<SAlignment> > align_parts;
783  const set<TSeqPos> &boundaries =
784  compare_object.m_BoundariesMap[row == 0 ? query : subject];
785  TSeqRange range = row == 0 ? query_range : subject_range;
786  TSeqPos last_boundary = range.GetFrom();
787  for (set<TSeqPos>::const_iterator it = boundaries.upper_bound(range.GetFrom());
788  it != boundaries.end() && *it <= range.GetToOpen(); ++it)
789  {
790  /// Extract slice, as long as it's not the the entire alignment
791  if (last_boundary > range.GetFrom() || *it < range.GetToOpen()) {
792  AutoPtr<SAlignment> part = Slice(row, last_boundary, *it-1);
793  if (part.get()) {
794  align_parts.push_back(part);
795  }
796  }
797  last_boundary = *it;
798  }
799  if (!align_parts.empty() && last_boundary < range.GetToOpen()) {
800  AutoPtr<SAlignment> part = Slice(row, last_boundary, range.GetTo());
801  if (part.get()) {
802  align_parts.push_back(part);
803  }
804  }
805  return align_parts;
806 }
807 
809 Slice(int row, TSeqPos from, TSeqPos to) const
810 {
811  if (align->GetSegs().IsDisc()) {
812  vector< AutoPtr<SAlignment> > seg_slices;
813  TSeqRange slice_range(min(from, to), max(from, to));
814  ITERATE (CSeq_align_set::Tdata, seg_it, align->GetSegs().GetDisc().Get())
815  {
816  TSeqRange seg_slice_range =
817  slice_range & (*seg_it)->GetSeqRange(row);
818  if (seg_slice_range.Empty()) {
819  continue;
820  }
821  AutoPtr<SAlignment> slice =
822  SAlignment(source_set, *seg_it, compare_object) . Slice(
823  row, seg_slice_range.GetFrom(), seg_slice_range.GetTo());
824  if (slice.get()) {
825  seg_slices.push_back(slice);
826  }
827  }
828  AutoPtr<SAlignment> complete_slice;
829  if (seg_slices.size() == 1) {
830  complete_slice = seg_slices.front();
831  } else if (seg_slices.size() > 1) {
832  CRef<CSeq_align> complete_align(new CSeq_align);
833  complete_slice.reset(new SAlignment(source_set, complete_align,
834  compare_object, true));
835  ITERATE (vector< AutoPtr<SAlignment> >, seg_it, seg_slices) {
836  complete_align->SetSegs().SetDisc().Set().push_back(
837  (*seg_it)->align);
838  complete_slice->query_mismatches += (*seg_it)->query_mismatches;
839  complete_slice->subject_mismatches += (*seg_it)->query_mismatches;
840  }
841  }
842  return complete_slice;
843  }
844 
845  if (!align->GetSegs().IsDenseg()) {
847  "Alignment splitting supported only for Dense-seq and "
848  "Disc-seg alignments");
849  }
850  AutoPtr<SAlignment> slice;
851  CRef<CSeq_align> slice_align(new CSeq_align);
852  slice_align->SetType(align->GetType());
853  if (align->IsSetDim()) {
854  slice_align->SetDim(align->GetDim());
855  }
856  CRef<CDense_seg> slice_seg = align->GetSegs().GetDenseg()
857  .ExtractSlice(row, from, to);
858  bool all_gaps = true;
859  for (int seg = 0; seg < slice_seg->GetNumseg() && all_gaps; ++seg) {
860  if (slice_seg->GetStarts()[seg*2] >= 0 &&
861  slice_seg->GetStarts()[seg*2+1] >= 0)
862  {
863  all_gaps = false;
864  }
865  }
866  if (all_gaps) {
867  /// Slice is completely within gaps, so no alignment
868  return slice;
869  }
870  slice_align->SetSegs().SetDenseg(*slice_seg);
871  ITERATE (CSeq_align::TScore, score_it, align->GetScore()) {
872  if ((*score_it)->GetId().IsStr() &&
873  compare_object.m_DistributiveScores.count(
874  (*score_it)->GetId().GetStr()))
875  {
876  slice_align->SetScore().push_back(*score_it);
877  }
878  }
879  slice.reset(new SAlignment(source_set, slice_align, compare_object, true));
880  /// Special case for full mode; extract the mismatches positions from the
881  /// ones calculated for the full alignment
882  if (compare_object.m_Mode == e_Full) {
883  slice->query_mismatches = query_mismatches;
884  slice->query_mismatches &= slice_align->GetSeqRange(0);
885  slice->subject_mismatches = subject_mismatches;
886  slice->subject_mismatches &= slice_align->GetSeqRange(1);
887  }
888  return slice;
889 }
890 
891 void CAlignCompare::x_SplitOnOverlaps(int group, int row)
892 {
893  list< AutoPtr<SAlignment> > orig_set;
894  list< AutoPtr<SAlignment> > &transformed_set =
895  group == 1 ? m_CurrentSet1Group : m_CurrentSet2Group;
896  CSeq_id_Handle id = row == 0 ? transformed_set.front()->query
897  : transformed_set.front()->subject;
898  if (!id) {
899  return;
900  }
901  orig_set.swap(transformed_set);
902  ITERATE (list< AutoPtr<SAlignment> >, it, orig_set) {
903  list< AutoPtr<CAlignCompare::SAlignment> > parts =
904  (*it)->BreakOnBoundaries(row);
905  if (parts.empty()) {
906  transformed_set.push_back(*it);
907  } else {
908  transformed_set.splice(transformed_set.end(), parts);
909  }
910  }
911 }
912 
914 {
915  while (!m_Set1.EndOfData()) {
916  x_NextAlignment(1, false)->PopulateBoundariesMap();
917  }
918  m_Set1.Reset();
919  while (!m_Set2.EndOfData()) {
920  x_NextAlignment(2, false)->PopulateBoundariesMap();
921  }
922  m_Set2.Reset();
923 }
924 
925 vector<const CAlignCompare::SAlignment *> CAlignCompare::NextGroup()
926 {
927  int next_group_set = x_DetermineNextGroupSet();
928  if (next_group_set & 1) {
930  }
931  if (next_group_set & 2) {
933  }
934 
935  vector<const SAlignment *> group;
936  switch (next_group_set) {
937  case 1:
938  if (!m_IgnoreNotPresent) {
942  m_CountBasesOnlySet1 += (*it)->length;
943  group.push_back(&**it);
944  }
945  }
946  break;
947 
948  case 2:
949  if (!m_IgnoreNotPresent) {
953  m_CountBasesOnlySet2 += (*it)->length;
954  group.push_back(&**it);
955  }
956  }
957  break;
958 
959  default:
960  {{
961  if (!m_BoundariesMap.empty()) {
962  x_SplitOnOverlaps(1, 0);
963  x_SplitOnOverlaps(1, 1);
964  x_SplitOnOverlaps(2, 0);
965  x_SplitOnOverlaps(2, 1);
968  }
969  TAlignPtrSet set1_aligns;
971  {
972  set1_aligns.insert(&**it);
973  }
974 
975  TAlignPtrSet set2_aligns;
977  {
978  set2_aligns.insert(&**it);
979  }
980 
981  set<SAlignment const*> red_color; // alignments from set2 that have equivalent mate
982  // from opposite set.
983  vector<TComp> comparisons;
984 
985  ITERATE (TAlignPtrSet, set1_it, set1_aligns) {
986  SAlignment const* lhs = *set1_it;
987 
988  ITERATE (TAlignPtrSet, set2_it, set2_aligns) {
989  SAlignment const* rhs = *set2_it;
990  // Check for equivalent alignment.
991  // In strict mode we do not combine overlapping and equiv. alignments.
992  if ( m_Strict && red_color.count(rhs) > 0 ) {
993  continue;
994  }
995 
996  if ( false == s_IsOverlapping(lhs, rhs, m_Row) ) {
997  continue;
998  }
999  // Check for overlap.
1000  comparisons.push_back(TComp(TPtrPair(const_cast<SAlignment*>(lhs), const_cast<SAlignment*>(rhs)),
1001  SComparison(*lhs, *rhs,
1003 
1004  // Post-processing:
1005  // - if two alignments are equivalent
1006  // -- color both alignments in red,
1007  // -- break out of the loop.
1008  TComp const& record = comparisons.back();
1009  SComparison const& comp = record.second;
1010  if ( comp.is_equivalent ) {
1011  red_color.insert(rhs); // set2's alignment.
1012  // In strict mode we do not combine overlapping and equiv. alignments.
1013  if ( m_Strict ) {
1014  break;
1015  }
1016  }
1017  }
1018  }
1019 
1020  std::sort(comparisons.begin(), comparisons.end(), SComp_Less(m_Strict));
1021 
1022  typedef pair<TAlignPtrSet, EMatchLevel> TAlignGroup;
1023 
1024  list<TAlignGroup> groups;
1026 
1027  ITERATE (vector<TComp>, it, comparisons) {
1028  bool is_equivalent = it->second.is_equivalent;
1029  /// This comparison counts if the two alignments are equivalent, or
1030  /// they overlap and we haven't yet seen an equivalence for either
1031  if (is_equivalent ||
1032  (it->second.overlap > 0 &&
1033  it->first.first->match_level != e_Equiv &&
1034  it->first.second->match_level != e_Equiv))
1035  {
1036  list<TAlignGroup>::iterator align1_group = groups.end(),
1037  align2_group = groups.end();
1038  if (set1_aligns.erase(it->first.first)) {
1039  it->first.first->match_level =
1040  is_equivalent ? e_Equiv : e_Overlap;
1041  group.push_back(it->first.first);
1042  if (is_equivalent) {
1043  ++m_CountEquivSet1;
1044  m_CountBasesEquivSet1 += it->second.spans_in_common;
1045  } else {
1047  m_CountBasesOverlapSet1 += it->second.spans_in_common
1048  + it->second.spans_overlap;
1049  m_CountBasesOnlySet1 += it->second.spans_unique_first;
1050  }
1051  } else {
1052  align1_group = group_map[it->first.first];
1053  }
1054  if (set2_aligns.erase(it->first.second)) {
1055  it->first.second->match_level =
1056  is_equivalent ? e_Equiv : e_Overlap;
1057  group.push_back(it->first.second);
1058  if (is_equivalent) {
1059  ++m_CountEquivSet2;
1060  m_CountBasesEquivSet2 += it->second.spans_in_common;
1061  } else {
1063  m_CountBasesOverlapSet2 += it->second.spans_in_common
1064  + it->second.spans_overlap;
1065  m_CountBasesOnlySet2 += it->second.spans_unique_second;
1066  }
1067  } else {
1068  align2_group = group_map[it->first.second];
1069  }
1070  if (align1_group == groups.end() &&
1071  align2_group == groups.end())
1072  {
1073  /// Neither alignemnts was encountered before, so create
1074  /// new group
1075  list<TAlignGroup>::iterator new_group =
1076  groups.insert(groups.end(), TAlignGroup());
1077  new_group->first.insert(it->first.first);
1078  new_group->first.insert(it->first.second);
1079  new_group->second = it->first.first->match_level;
1080  group_map[it->first.first] = new_group;
1081  group_map[it->first.second] = new_group;
1082  ++(is_equivalent ? m_CountEquivGroups
1084  } else if(align1_group == groups.end()) {
1085  /// alignment 1 is new, add it to existing group
1086  align2_group->first.insert(it->first.first);
1087  group_map[it->first.first] = align2_group;
1088  } else if(align2_group == groups.end()) {
1089  /// alignment 2 is new, add it to existing group
1090  align1_group->first.insert(it->first.second);
1091  group_map[it->first.second] = align1_group;
1092  } else if (align1_group != align2_group) {
1093  /// The alignments are in two separate groups; merge them
1094  ITERATE (TAlignPtrSet, group2_it, align2_group->first) {
1095  align1_group->first.insert(*group2_it);
1096  group_map[*group2_it] = align1_group;
1097  }
1098  if (align2_group->second == e_Overlap) {
1100  if (align1_group->second == e_Equiv) {
1101  /// Change the group from equivalence to overlap
1102  align1_group->second = e_Overlap;
1105  }
1106  } else {
1108  }
1109  groups.erase(align2_group);
1110  }
1111  }
1112  }
1113 
1114  ITERATE (list<TAlignGroup>, group_it, groups) {
1115  if (group_it->second == e_NoMatch) {
1116  continue;
1117  }
1118  if (group_it->second == e_Overlap && !m_QualityScores.empty())
1119  {
1120  /// Find which side is better
1121  vector<SAlignment *> best(3, static_cast<SAlignment *>(NULL));
1122  ITERATE (TAlignPtrSet, align_it, group_it->first) {
1123  SAlignment *&side_best = best[(*align_it)->source_set];
1124  if (!side_best || (*align_it)->quality_scores >
1125  side_best->quality_scores)
1126  {
1127  side_best = *align_it;
1128  }
1129  }
1130  if (best[1]->quality_scores != best[2]->quality_scores) {
1131  int better_side =
1132  best[1]->quality_scores > best[2]->quality_scores
1133  ? 1 : 2;
1134  ITERATE (TAlignPtrSet, align_it, group_it->first) {
1135  (*align_it)->match_level =
1136  (*align_it)->source_set == better_side
1138  }
1139  }
1140  }
1141  ITERATE (TAlignPtrSet, align1_it, group_it->first) {
1142  if ((*align1_it)->source_set != 1) {
1143  continue;
1144  }
1145  ITERATE (TAlignPtrSet, align2_it, group_it->first) {
1146  if ((*align2_it)->source_set != 2) {
1147  continue;
1148  }
1149  (*align1_it)->matched_alignments.push_back(*align2_it);
1150  (*align2_it)->matched_alignments.push_back(*align1_it);
1151  }
1152  }
1153  }
1154 
1155  /// Add remaining alignments, for which no match was found, in order
1156  /// of their appearance in alignment comparisons
1157  m_CountOnlySet1 += set1_aligns.size();
1158  m_CountOnlySet2 += set2_aligns.size();
1159  ITERATE (vector<TComp>, comp_it, comparisons) {
1160  if (set1_aligns.empty() && set2_aligns.empty()) {
1161  /// Found best comparison for all alignments
1162  break;
1163  }
1164  if (comp_it->second.overlap == 0) {
1165  if (set1_aligns.erase(comp_it->first.first)) {
1166  group.push_back(comp_it->first.first);
1167  m_CountBasesOnlySet1 += comp_it->first.first->length;
1168  }
1169  if (set2_aligns.erase(comp_it->first.second)) {
1170  group.push_back(comp_it->first.second);
1171  m_CountBasesOnlySet2 += comp_it->first.second->length;
1172  }
1173  }
1174  }
1175  ITERATE (TAlignPtrSet, set1_it, set1_aligns) {
1176  group.push_back(*set1_it);
1177  m_CountBasesOnlySet1 += (*set1_it)->length;
1178  }
1179  ITERATE (TAlignPtrSet, set2_it, set2_aligns) {
1180  group.push_back(*set2_it);
1181  m_CountBasesOnlySet2 += (*set2_it)->length;
1182  }
1183  }}
1184  }
1185 
1186  return group;
1187 }
1188 
1190 {
1191  bool overlap;
1192 
1193  switch ( row ) {
1195  overlap = lhs->query_range.IntersectingWith(rhs->query_range);
1196  break;
1197 
1199  overlap = lhs->subject_range.IntersectingWith(rhs->subject_range);
1200  break;
1201 
1202  default: //CAlignCompare::e_Both:
1203  if ( false == lhs->subject_range.IntersectingWith(rhs->subject_range) &&
1204  false == lhs->query_range.IntersectingWith(rhs->query_range) )
1205  {
1206  overlap = false;
1207  }
1208  else {
1209  overlap = true;
1210  }
1211  break;
1212  }
1213  return overlap;
1214 }
1215 
set< CAlignCompare::SAlignment *, SAlignment_PtrLess > TAlignPtrSet
User-defined methods of the data storage class.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:908
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
bool IsIntegerScore(const objects::CSeq_align &align, const string &score_name)
double GetScore(const objects::CSeq_align &align, const string &score_name)
Get requested score for alignment.
virtual void Reset()
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:889
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:783
size_t m_CountEquivGroups
void ConvertSeqAlignToPairwiseAln(CPairwiseAln &pairwise_aln, const objects::CSeq_align &sa, objects::CSeq_align::TDim row_1, objects::CSeq_align::TDim row_2, CAlnUserOptions::EDirection direction=CAlnUserOptions::eBothDirections, const TAlnSeqIdVec *ids=0)
Build pairwise alignment from the selected rows of a seq-align.
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
IAlignSource & m_Set2
position_type GetFirstTo(void) const
Definition: dbpivot.c:60
bool operator()(const TComp &c1, const TComp &c2) const
size_t m_CountOverlapGroups
string AsString(void) const
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
list< CRef< CSpliced_exon > > TExons
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:682
size_t m_CountEquivSet1
insertion in genomic sequence (i.e. gap in the product sequence)
#define T(s)
Definition: common.h:225
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:883
static bool s_EquivalentScores(const CAlignCompare::TRealScoreSet &scores1, const CAlignCompare::TRealScoreSet &scores2, double real_score_tolerance)
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:836
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:852
size_t spans_overlap
void clear()
Definition: map.hpp:169
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member...
const TParts & GetParts(void) const
Get the Parts member data.
position_type GetLength(void) const
Definition: range.hpp:158
A pairwise aln is a collection of ranges for a pair of rows.
ERowComparison m_Row
size_t m_CountBasesOverlapSet1
container_type::const_iterator const_iterator
Definition: map.hpp:53
TSeqPos AsSeqPos() const
Definition: Product_pos.cpp:56
static void s_UpdateSpans(const TSeqRange &query_range, const TSeqRange &subject_range, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:517
virtual bool EndOfData() const =0
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Std_seg_.hpp:347
CRef< CSeq_align > align
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
SAlignment(int s, const CRef< CSeq_align > &al, CAlignCompare &compare, bool is_slice=false)
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
both sequences represented, product and genomic sequences match
bool s_IsOverlapping(CAlignCompare::SAlignment const *lhs, CAlignCompare::SAlignment const *rhs, CAlignCompare::ERowComparison row)
TIntegerScoreSet integer_scores
const TExons & GetExons(void) const
Get the Exons member data.
size_type size() const
Definition: set.hpp:132
list< CRef< CSpliced_exon_chunk > > TParts
User-defined methods of the data storage class.
signed NCBI_INT8_TYPE Int8
Signed 8 byte sized integer.
Definition: ncbitype.h:143
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:897
TThisType & SetTo(position_type to)
Definition: range.hpp:180
Default IAlnSeqId implementation based on CSeq_id_Handle.
Definition: aln_seqid.hpp:114
#define NULL
Definition: ncbistd.hpp:225
static void s_GetAlignmentSpans_Span(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
void PopulateBoundariesMap() const
const_iterator end() const
Definition: set.hpp:136
TThisType & SetFrom(position_type from)
Definition: range.hpp:170
position_type GetToOpen(void) const
Definition: range.hpp:138
static void s_GetAlignmentSpans_Interval(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
map< CSeq_id_Handle, set< TSeqPos > > m_BoundariesMap
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
Int8 spans_unique_second
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:776
set< string > m_ScoreSet
size_t m_CountOverlapSet1
bool empty() const
Definition: set.hpp:133
both sequences represented, product and genomic sequences do not match
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
void x_GetCurrentGroup(int set)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
pair< CAlignCompare::SAlignment *, CAlignCompare::SAlignment * > TPtrPair
size_t m_CountOverlapSet2
size_t m_CountEquivSet2
const_iterator end() const
Definition: map.hpp:152
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
USING_SCOPE(objects)
bool m_ScoreSetAsBlacklist
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:452
list< CRef< CSeq_align > > Tdata
vector< const SAlignment * > NextGroup()
TDisambiguatingScoreValues scores
position_type GetFirstFrom(void) const
static void s_GetAlignmentSpans_Intron(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
static string query
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:185
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:53
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
bool operator()(const CAlignCompare::SAlignment *ptr1, const CAlignCompare::SAlignment *ptr2) const
list< AutoPtr< SAlignment > > m_CurrentSet2Group
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:805
Int8 spans_unique_first
bool Empty(void) const
Definition: range.hpp:148
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
AutoPtr< SAlignment > x_NextAlignment(int set, bool update_counts=true)
Get next alignment from the correct set.
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
position_type GetSecondTo(void) const
size_t m_CountOnlySet1
static int match(register const unsigned char *eptr, register const uschar *ecode, const unsigned char *mstart, int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, int flags, unsigned int rdepth)
Definition: pcre_exec.c:431
string GetTraceback(const CSeq_align &align, CSeq_align::TDim row)
bool empty() const
Definition: map.hpp:149
AutoPtr< SAlignment > Slice(int row, TSeqPos from, TSeqPos to) const
const CharType(& source)[N]
Definition: pointer.h:1107
insertion in product sequence (i.e. gap in the genomic sequence)
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
size_t m_CountSplitSet2
pair< TPtrPair, SComparison > TComp
#define abs(a)
Definition: ncbi_heapmgr.c:130
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
void x_SplitOnOverlaps(int group, int row)
size_t m_CountOnlySet2
static void s_PopulateScoreSet(const CSeq_align &align, const set< string > &score_set, bool score_set_as_blacklist, CAlignCompare::TIntegerScoreSet &integer_scores, CAlignCompare::TRealScoreSet &real_scores)
size_t m_CountBasesEquivSet1
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member...
T max(T x_, T y_)
position_type GetTo(void) const
Definition: range.hpp:142
CRangeCollection< TSeqPos > query_mismatches
static string subject
char value[7]
Definition: config.c:428
void erase(iterator pos)
Definition: set.hpp:151
T min(T x_, T y_)
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
static void s_GetAlignmentMismatches(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
User-defined methods of the data storage class.
size_t m_CountBasesOnlySet1
list< AutoPtr< SAlignment > > BreakOnBoundaries(int row) const
friend struct SAlignment
list< AutoPtr< SAlignment > > m_NextSet1Group
position_type GetFrom(void) const
Definition: range.hpp:134
size_t m_CountBasesOnlySet2
position_type GetSecondFrom(void) const
const Tdata & Get(void) const
Get the member data.
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1022
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
Definition: Dense_seg.cpp:734
size_t m_CountSplitSet1
list< CRef< CStd_seg > > TStd
Definition: Seq_align_.hpp:196
CRef –.
Definition: ncbiobj.hpp:616
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
AutoPtr –.
Definition: ncbimisc.hpp:384
static void s_GetAlignmentSpans_Exon(const CSeq_align &align, CAlignCompare::SAlignment &align_info, CAlignCompare::ERowComparison row)
size_t m_CountBasesOverlapSet2
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:436
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
list< AutoPtr< SAlignment > > m_NextSet2Group
User-defined methods of the data storage class.
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
void PopulateBoundariesMap()
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:453
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:309
User-defined methods of the data storage class.
int CompareGroup(const SAlignment &o, bool strict_only) const
size_t m_CountBasesEquivSet2
list< AutoPtr< SAlignment > > m_CurrentSet1Group
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:546
User-defined methods of the data storage class.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:464
TThisType & IntersectWith(const TThisType &r)
Definition: range.hpp:317
CAlignRange Represents an element of pairwise alignment of two sequences.
Definition: align_range.hpp:62
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member...
const_iterator begin() const
Definition: map.hpp:151
CRangeCollection< TSeqPos > subject_mismatches
IAlignSource & m_Set1
void s_PopulateScores(const CSeq_align &align, const vector< string > &score_list, vector< T > &scores, bool required=true)
int x_DetermineNextGroupSet()
Determine whether the next group of alignments should be taken from set 1 or 2.
vector< double > quality_scores
double m_RealScoreTolerance
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:765
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
TDisambiguatingScoreList m_DisambiguitingScores
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:739
vector< string > m_QualityScores
SComp_Less(bool strict=false)
const_iterator upper_bound(const key_type &key) const
Definition: set.hpp:139
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:678
size_t spans_in_common
Modified on Fri Sep 22 15:37:41 2017 by modify_doxy.py rev. 546573