NCBI C++ ToolKit
Dense_seg.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: Dense_seg.cpp 67084 2015-04-15 15:46:17Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the data definition file
34  * 'seqalign.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 #include <algorithm>
41 
42 // generated includes
44 
47 #include <serial/objistr.hpp>
48 #include <corelib/ncbi_param.hpp>
49 
50 // generated classes
51 
53 
54 BEGIN_objects_SCOPE // namespace ncbi::objects::
55 
56 // destructor
58 {
59 }
60 
61 
63 {
64  /// do the base copy
65  CSerialObject::Assign(obj, how);
66 
67  /// copy our specific items
68  if (GetTypeInfo() == obj.GetThisTypeInfo()) {
69  const CDense_seg& other = static_cast<const CDense_seg&>(obj);
70  m_set_State1[0] = other.m_set_State1[0];
71  m_Widths = other.m_Widths;
72  }
73 }
74 
75 
77 {
78  const size_t& dim = GetDim();
79  if (dim != GetIds().size()) {
80  NCBI_THROW(CSeqalignException, eInvalidAlignment,
81  "CDense_seg::CheckNumRows()"
82  " ids.size is inconsistent with dim");
83  }
84  return dim;
85 }
86 
87 
89 {
90  const CDense_seg::TStarts& starts = GetStarts();
91  const CDense_seg::TStrands& strands = GetStrands();
92  const CDense_seg::TLens& lens = GetLens();
93  const CDense_seg::TWidths& widths = GetWidths();
94 
95  const size_t& numrows = GetDim();
96  const size_t& numsegs = GetNumseg();
97  const size_t num = numrows * numsegs;
98 
99  if (starts.size() != num) {
100  string errstr = string("CDense_seg::CheckNumSegs():")
101  + " starts.size is inconsistent with dim * numseg";
102  NCBI_THROW(CSeqalignException, eInvalidAlignment, errstr);
103  }
104  if (lens.size() != numsegs) {
105  string errstr = string("CDense_seg::CheckNumSegs():")
106  + " lens.size is inconsistent with numseg";
107  NCBI_THROW(CSeqalignException, eInvalidAlignment, errstr);
108  }
109  if (strands.size() && strands.size() != num) {
110  string errstr = string("CDense_seg::CheckNumSegs():")
111  + " strands.size is inconsistent with dim * numseg";
112  NCBI_THROW(CSeqalignException, eInvalidAlignment, errstr);
113  }
114  if (widths.size() && widths.size() != numrows) {
115  string errstr = string("CDense_seg::CheckNumSegs():")
116  + " widths.size is inconsistent with dim";
117  NCBI_THROW(CSeqalignException, eInvalidAlignment, errstr);
118  }
119  return numsegs;
120 }
121 
122 
124 {
125  if ( IsSetIds() && (size_t)row < GetIds().size()) {
126  return *GetIds()[row];
127  }
128  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
129  "CDense_seg::GetSeq_id(): "
130  "can not get seq-id for the row requested.");
131 }
132 
133 
135 {
136  const TDim& dim = GetDim();
137  const TNumseg& numseg = CheckNumSegs();
138  const TStarts& starts = GetStarts();
139 
140  if (row < 0 || row >= dim) {
141  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
142  "CDense_seg::GetSeqStart():"
143  " Invalid row number");
144  }
145 
146  TSignedSeqPos start;
147  if (CanGetStrands() && GetStrands().size() && GetStrands()[row] == eNa_strand_minus) {
148  TNumseg seg = numseg;
149  int pos = (seg - 1) * dim + row;
150  while (seg--) {
151  if ((start = starts[pos]) >= 0) {
152  return start;
153  }
154  pos -= dim;
155  }
156  } else {
157  TNumseg seg = -1;
158  int pos = row;
159  while (++seg < numseg) {
160  if ((start = starts[pos]) >= 0) {
161  return start;
162  }
163  pos += dim;
164  }
165  }
166  NCBI_THROW(CSeqalignException, eInvalidAlignment,
167  "CDense_seg::GetSeqStart(): Row is empty");
168 }
169 
170 
172 {
173  const TDim& dim = GetDim();
174  const TNumseg& numseg = CheckNumSegs();
175  const TStarts& starts = GetStarts();
176 
177  if (row < 0 || row >= dim) {
178  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
179  "CDense_seg::GetSeqStop():"
180  " Invalid row number");
181  }
182 
183  TSignedSeqPos start;
184  if (CanGetStrands() && GetStrands().size() && GetStrands()[row] == eNa_strand_minus) {
185  TNumseg seg = -1;
186  int pos = row;
187  while (++seg < numseg) {
188  if ((start = starts[pos]) >= 0) {
189  return start + GetLens()[seg] - 1;
190  }
191  pos += dim;
192  }
193  } else {
194  TNumseg seg = numseg;
195  int pos = (seg - 1) * dim + row;
196  while (seg--) {
197  if ((start = starts[pos]) >= 0) {
198  return start + GetLens()[seg] - 1;
199  }
200  pos -= dim;
201  }
202  }
203  NCBI_THROW(CSeqalignException, eInvalidAlignment,
204  "CDense_seg::GetSeqStop(): Row is empty");
205 }
206 
207 
209 {
210  const size_t& strands_size = GetStrands().size();
211 
212  if ( !strands_size ) {
213  return eNa_strand_plus;
214  } else {
215  TDim dim = CheckNumRows();
216 
217  if (strands_size < (size_t) dim) {
218  // The ASN.1 spec technically requires numrows x numsegs
219  // strands, however in practice NCBI assumes that
220  // Dense-seg's strands are fixed per row. Since we will
221  // obtain the strand from the first segment (assuming they
222  // are fixed) and for efficiency (to eliminate unnecessary
223  // multiplication) we don't check for the full numrows x
224  // numsegs size here
225  NCBI_THROW(CSeqalignException, eInvalidAlignment,
226  "Invalid strands size");
227  }
228 
229  if (row < 0 || row >= dim) {
230  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
231  "CDense_seg::GetSeqStrand():"
232  " Invalid row number");
233  }
234 
235  return GetStrands()[row];
236  }
237 }
238 
239 
240 void CDense_seg::Validate(bool full_test) const
241 {
242  const CDense_seg::TStarts& starts = GetStarts();
243  const CDense_seg::TStrands& strands = GetStrands();
244  const CDense_seg::TLens& lens = GetLens();
245  const CDense_seg::TWidths& widths = GetWidths();
246 
247  const size_t& numrows = CheckNumRows();
248  const size_t& numsegs = CheckNumSegs();
249 
250  if (numsegs == 0) {
251  return;
252  }
253 
254  if (full_test) {
255  const size_t max = numrows * (numsegs -1);
256 
257  bool strands_exist = !strands.empty();
258 
259  size_t numseg = 0, numrow = 0, offset = 0;
260  for (numrow = 0; numrow < numrows; numrow++) {
261  TSignedSeqPos min_start = -1, start;
262  bool plus = strands_exist ?
263  strands[numrow] != eNa_strand_minus:
264  true;
265 
266  if (plus) {
267  offset = 0;
268  } else {
269  offset = max;
270  }
271 
272  for (numseg = 0; numseg < numsegs; numseg++) {
273  start = starts[offset + numrow];
274  if (start >= 0) {
275  if (start < min_start) {
276  string errstr = string("CDense_seg::Validate():")
277  + " Starts are not consistent!"
278  + " Row=" + NStr::SizetToString(numrow) +
279  " Seg=" + NStr::SizetToString(plus ? numseg :
280  numsegs - 1 - numseg) +
281  " MinStart=" + NStr::NumericToString(min_start) +
282  " Start=" + NStr::NumericToString(start);
283 
284  NCBI_THROW(CSeqalignException, eInvalidAlignment,
285  errstr);
286  }
287  min_start = start +
288  lens[plus ? numseg : numsegs - 1 - numseg] *
289  (widths.size() == numrows ?
290  widths[numrow] : 1);
291  }
292  if (plus) {
293  offset += numrows;
294  } else {
295  offset -= numrows;
296  }
297  }
298  if (min_start == -1) {
299  string errstr = string("CDense_seg::Validate():")
300  + " Row " + NStr::SizetToString(numrow) +
301  " is empty.";
302  NCBI_THROW(CSeqalignException, eInvalidAlignment,
303  errstr);
304  }
305  }
306  }
307 }
308 
309 
311 {
312  _ASSERT(GetNumseg() == static_cast<TNumseg>(GetLens().size()));
313  _ASSERT(GetNumseg() * GetDim() == static_cast<int>(GetStarts().size()));
315  || GetNumseg() * GetDim() == static_cast<int>(GetStrands().size()));
316  _ASSERT(GetDim() == static_cast<TDim>(GetIds().size()));
317 
318  list<TSignedSeqRange> delete_ranges;
319  int i;
320  int j;
321 
322  /// leading gap segments first
323  for (i = 0; i < GetNumseg(); ++i) {
324  int count_gaps = 0;
325  for (j = 0; j < GetDim(); ++j) {
326  TSignedSeqPos this_start = GetStarts()[i * GetDim() + j];
327  if (this_start == -1) {
328  ++count_gaps;
329  }
330  }
331 
332  if (GetDim() - count_gaps > 1) {
333  /// no can do
334  break;
335  }
336  }
337 
338  if (i == GetNumseg() + 1) {
339  /// error case - all gapped, so don't bother
340  return;
341  }
342  if (i != 0) {
343  delete_ranges.push_back(TSignedSeqRange(0, i));
344  }
345 
346  /// trailing gap segments next
347  for (i = GetNumseg() - 1; i >= 0; --i) {
348  int count_gaps = 0;
349  for (j = 0; j < GetDim(); ++j) {
350  TSignedSeqPos this_start = GetStarts()[i * GetDim() + j];
351  if (this_start == -1) {
352  ++count_gaps;
353  }
354  }
355 
356  if (GetDim() - count_gaps > 1) {
357  /// no can do
358  break;
359  }
360  }
361 
362  if (i != GetNumseg() - 1) {
363  delete_ranges.push_back(TSignedSeqRange(i + 1, GetNumseg()));
364  }
365 
366  list<TSignedSeqRange>::reverse_iterator iter = delete_ranges.rbegin();
367  list<TSignedSeqRange>::reverse_iterator end = delete_ranges.rend();
368  for ( ; iter != end; ++iter) {
369  TSignedSeqRange r = *iter;
370  if (r.GetLength() == 0) {
371  continue;
372  }
373 
374  /// we can trim the first i segments
375  if (IsSetStrands()) {
376  _ASSERT(static_cast<int>(GetStrands().size())
377  >= r.GetTo() * GetDim());
378  SetStrands().erase(SetStrands().begin() + r.GetFrom() * GetDim(),
379  SetStrands().begin() + r.GetTo() * GetDim());
380  }
381  if (IsSetStarts()) {
382  _ASSERT(static_cast<int>(GetStarts().size())
383  >= r.GetTo() * GetDim());
384  SetStarts().erase(SetStarts().begin() + r.GetFrom() * GetDim(),
385  SetStarts().begin() + r.GetTo() * GetDim());
386  }
387  if (IsSetLens()) {
388  _ASSERT(static_cast<int>(GetLens().size())
389  >= r.GetTo());
390  SetLens().erase(SetLens().begin() + r.GetFrom(),
391  SetLens().begin() + r.GetTo());
392  }
393  }
394 
395  /// fix our number of segments
396  SetNumseg(SetLens().size());
397 
398  _ASSERT(GetNumseg() == static_cast<TNumseg>(GetLens().size()));
399  _ASSERT(GetNumseg() * GetDim() == static_cast<int>(GetStarts().size()));
401  || GetNumseg() * GetDim() == static_cast<int>(GetStrands().size()));
402  _ASSERT(GetDim() == static_cast<TDim>(GetIds().size()));
403 }
404 
405 
407 {
408  _ASSERT(GetNumseg() == static_cast<TNumseg>(GetLens().size()));
409  _ASSERT(GetNumseg() * GetDim() == static_cast<int>(GetStarts().size()));
411  || GetNumseg() * GetDim() == static_cast<int>(GetStrands().size()));
412  _ASSERT(GetDim() == static_cast<TDim>(GetIds().size()));
413 
414  int i;
415  int j;
416  vector<bool> can_merge(GetNumseg() - 1, true); // start off with all true
417  unsigned int merge_count = 0;
418  for (i = 0; i < GetNumseg() - 1; ++i) {
419 
420  for (j = 0; j < GetDim(); ++j) {
421  TSignedSeqPos this_start = GetStarts()[i * GetDim() + j];
422  TSignedSeqPos next_start = GetStarts()[(i + 1) * GetDim() + j];
423 
424  /// check to make sure there is not a gap mismatch
425  if ( (this_start == -1 && next_start != -1)
426  || (this_start != -1 && next_start == -1) ) {
427  can_merge[i] = false;
428  break;
429  }
430 
431  /// check to make sure there is no unaligned space
432  /// between this segment and the next
433  if (this_start != -1 && next_start != -1) {
434  TSignedSeqPos seg_len = GetLens()[i];
435  if (IsSetStrands() &&
436  GetStrands()[i * GetDim() + j] == eNa_strand_minus) {
437  seg_len = GetLens()[i + 1];
438  seg_len = -seg_len;
439  }
440 
441  if (this_start + seg_len != next_start) {
442  can_merge[i] = false;
443  break;
444  }
445  }
446 
447  /// check that the strands all agree between this segment
448  /// and the next (although it is rare, it is legal for them
449  /// to be different)
450  if (IsSetStrands()) {
451  if (GetStrands()[i * GetDim() + j]
452  != GetStrands()[(i + 1) * GetDim() + j]) {
453  can_merge[i] = false;
454  break;
455  }
456  }
457  }
458  if (can_merge[i]) {
459  ++merge_count;
460  }
461  }
462 
463  if (merge_count == 0) {
464  // nothing needs to be done
465  return;
466  }
467 
468  CDense_seg::TStarts new_starts;
469  CDense_seg::TLens new_lens;
470  CDense_seg::TStrands new_strands;
471  new_starts.reserve((GetNumseg() - merge_count) * GetDim());
472  new_lens.reserve(GetNumseg() - merge_count);
473  if (IsSetStrands()) {
474  new_strands.reserve((GetNumseg() - merge_count) * GetDim());
475  }
476  for (i = 0; i < GetNumseg(); ++i) {
477  if (i > 0 && can_merge[i - 1]) {
478  // merge this segment into the last one
479  new_lens.back() += GetLens()[i];
480  if (IsSetStrands()) {
481  for (j = 0; j < GetDim(); ++j) {
482  if (GetStrands()[i * GetDim() + j] == eNa_strand_minus) {
483  new_starts[new_starts.size() - GetDim() + j] =
484  GetStarts()[i * GetDim() + j];
485  }
486  }
487  }
488  } else {
489  // just copy the original segment i onto the end of the new stuff
490  new_lens.push_back(GetLens()[i]);
491  for (j = 0; j < GetDim(); ++j) {
492  new_starts.push_back(GetStarts()[i * GetDim() + j]);
493  if (IsSetStrands()) {
494  new_strands.push_back(GetStrands()[i * GetDim() + j]);
495  }
496  }
497  }
498  }
499 
500  SetStarts().swap(new_starts);
501  SetLens().swap(new_lens);
502  if (IsSetStrands()) {
503  SetStrands().swap(new_strands);
504  }
505 
506  SetNumseg(SetLens().size());
507 
508  _ASSERT(GetNumseg() == static_cast<TNumseg>(GetLens().size()));
509  _ASSERT(GetNumseg() * GetDim() == static_cast<int>(GetStarts().size()));
511  || GetNumseg() * GetDim() == static_cast<int>(GetStrands().size()));
512  _ASSERT(GetDim() == static_cast<TDim>(GetIds().size()));
513 }
514 
515 
517 {
518  #define IDX(_x,_y) (((_x)*GetDim())+(_y))
519  bool swaps_made = false;
520  do {
521  swaps_made = false;
522  for(int i=0; i < GetNumseg()-1; ++i) {
523 
524  bool curr_gap = false, next_gap = false;
525  int curr_seq_row = GetDim()+1, next_seq_row = GetDim()+1;
526  for (int j=0; j < GetDim(); ++j) {
527  if (GetStarts()[IDX(i,j)] == -1)
528  curr_gap = true;
529  else
530  curr_seq_row = min(curr_seq_row, j);
531  if (GetStarts()[IDX(i+1,j)] == -1)
532  next_gap = true;
533  else
534  next_seq_row = min(next_seq_row, j);
535  }
536  if(!(curr_gap & next_gap))
537  continue;
538 
539  // if this Seg and next Seg are both gaps,
540  // and the first row with sequence is not curr
541  // swap the two Segs
542  if(next_seq_row < curr_seq_row) {
543  for(int j=0; j < GetDim(); ++j) {
544  swap(SetStarts()[IDX(i,j)], SetStarts()[IDX(i+1,j)]);
545  if (GetStrands().size() > (size_t)IDX(i+1,j))
546  swap(SetStrands()[IDX(i,j)], SetStrands()[IDX(i+1,j)]);
547  }
548  swap(SetLens()[i], SetLens()[i+1]);
549  swaps_made = true;
550  }
551  }
552  } while(swaps_made);
553 }
554 
555 
557 {
558  _ASSERT(GetNumseg() == static_cast<TNumseg>(GetLens().size()));
559  _ASSERT(GetNumseg() * GetDim() == static_cast<int>(GetStarts().size()));
561  || GetNumseg() * GetDim() == static_cast<int>(GetStrands().size()));
562  _ASSERT(GetDim() == static_cast<TDim>(GetIds().size()));
563 
564 
565  // consistency checks
566  TDim dim = CheckNumRows();
567  TNumseg numseg = CheckNumSegs();
568 
569  int i;
570  int j;
571  vector<bool> remove(numseg, true); // start out with all true
572  unsigned int remove_count = 0;
573  for (i = 0; i < numseg; ++i) {
574 
575  for (j = 0; j < dim; ++j) {
576  if (GetStarts()[i * dim + j] != -1) {
577  // not a gap
578  remove[i] = false;
579  break;
580  }
581  }
582  if (remove[i]) {
583  ++remove_count;
584  }
585  }
586 
587  if (remove_count == 0) {
588  // nothing to remove; leave unchanged
589  return;
590  }
591 
592  CDense_seg::TStarts new_starts;
593  CDense_seg::TLens new_lens;
594  CDense_seg::TStrands new_strands;
595  new_starts.reserve((numseg - remove_count) * dim);
596  new_lens.reserve(numseg - remove_count);
597  if (IsSetStrands()) {
598  new_strands.reserve((numseg - remove_count) * dim);
599  }
600  for (i = 0; i < numseg; ++i) {
601  if (!remove[i]) {
602  // copy the original segment i onto the end of the new stuff
603  new_lens.push_back(GetLens()[i]);
604  for (j = 0; j < dim; ++j) {
605  new_starts.push_back(GetStarts()[i * dim + j]);
606  if (IsSetStrands()) {
607  new_strands.push_back(GetStrands()[i * dim + j]);
608  }
609  }
610  }
611  }
612 
613  SetStarts().swap(new_starts);
614  SetLens().swap(new_lens);
615  if (IsSetStrands()) {
616  SetStrands().swap(new_strands);
617  }
618 
619  SetNumseg(SetLens().size());
620 
621 #ifdef _DEBUG
622  Validate(true);
623 #endif
624 
625 }
626 
627 
628 //-----------------------------------------------------------------------------
629 // PRE : none
630 // POST: same alignment, opposite orientation
632 {
633  //flip strands
634  if (IsSetStrands()) {
636  switch (*i) {
637  case eNa_strand_plus: *i = eNa_strand_minus; break;
638  case eNa_strand_minus: *i = eNa_strand_plus; break;
639  default: break;//do nothing if not + or -
640  }
641  }
642  } else {
643  // Interpret unset strands as plus strands.
644  // Since we're reversing, set them all to minus.
645  SetStrands().resize(GetStarts().size(), eNa_strand_minus);
646  }
647 
648  //reverse list o' lengths
649  {
650  CDense_seg::TLens::iterator f = SetLens().begin();
651  CDense_seg::TLens::iterator r = SetLens().end();
652  while (f < r) {
653  swap(*(f++), *(--r));
654  }
655  }
656 
657  //reverse list o' starts
658  CDense_seg::TStarts &starts = SetStarts();
659  int f = 0;
660  int r = (GetNumseg() - 1) * GetDim();
661  while (f < r) {
662  for (int i = 0; i < GetDim(); ++i) {
663  swap(starts[f+i], starts[r+i]);
664  }
665  f += GetDim();
666  r -= GetDim();
667  }
668 }
669 
670 //-----------------------------------------------------------------------------
671 // PRE : numbers of the rows to swap
672 // POST: alignment rearranged with row1 where row2 used to be & vice versa
674 {
675  if (row1 >= GetDim() || row1 < 0 ||
676  row2 >= GetDim() || row2 < 0) {
677  NCBI_THROW(CSeqalignException, eOutOfRange,
678  "Row numbers supplied to CDense_seg::SwapRows must be "
679  "in the range [0, dim)");
680  }
681 
682  //swap ids
683  swap(SetIds()[row1], SetIds()[row2]);
684 
685  int idxStop = GetNumseg()*GetDim();
686 
687  //swap starts
688  for(int i = 0; i < idxStop; i += GetDim()) {
689  swap(SetStarts()[i+row1], SetStarts()[i+row2]);
690  }
691 
692  //swap strands
693  if (IsSetStrands()) {
694  for(int i = 0; i < idxStop; i += GetDim()) {
695  swap(SetStrands()[i+row1], SetStrands()[i+row2]);
696  }
697  }
698 }
699 
700 
701 /*---------------------------------------------------------------------------*/
702 // PRE : this is a validated dense seg; row & sequence position on row in
703 // alignment
704 // POST: the number of the segment in which this sequence position falls
707 {
708  bool found = false;
710  for (seg = 0; seg < GetNumseg() && !found; ++seg) {
711  TSignedSeqPos start = GetStarts()[seg * GetDim() + row];
712  TSignedSeqPos len = GetLens()[seg];
713  if (start != -1) {
714  if (pos >= start && pos < start + len) {
715  found = true;
716  }
717  }
718  }
719  if (!found) {
720  NCBI_THROW(CSeqalignException, eInvalidAlignment,
721  "CDense_seg::x_FindSegment(): "
722  "Can't find a segment containing position " +
723  NStr::NumericToString(pos));
724  }
725 
726  return seg - 1;
727 }
728 
729 
730 //-----------------------------------------------------------------------------
731 // PRE : range on a row in the alignment
732 // POST: dst Dense_seg reset &
734 ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
735 {
736  if (row < 0 || row >= GetDim()) {
737  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
738  "CDense_seg::ExtractSlice():"
739  " Invalid row number ("
740  + NStr::NumericToString(row) + ")");
741  }
742 
743  if (from > to) {
744  swap(from, to);
745  }
746  if (from < GetSeqStart(row)) {
747  NCBI_THROW(CSeqalignException, eOutOfRange,
748  "CDense_seg::ExtractSlice(): "
749  "start position (" + NStr::NumericToString(from) +
750  ") off end of alignment");
751  }
752  if (to > GetSeqStop(row)) {
753  NCBI_THROW(CSeqalignException, eOutOfRange,
754  "CDense_seg::ExtractSlice(): "
755  "stop position (" + NStr::NumericToString(to) +
756  ") off end of alignment");
757  }
758 
759 
760  CRef<CDense_seg> ds(new CDense_seg);
761  ds->SetDim(GetDim());
762  ds->SetNumseg(0);
763  ITERATE(CDense_seg::TIds, idI, GetIds()) {
764  CSeq_id *si = new CSeq_id;
765  si->Assign(**idI);
766  ds->SetIds().push_back(CRef<CSeq_id>(si));
767  }
768 
769  //find start/stop segments
770  CDense_seg::TNumseg startSeg = x_FindSegment(row, from);
771  CDense_seg::TNumseg stopSeg = x_FindSegment(row, to);
772 
773  TSeqPos startOffset = from - GetStarts()[startSeg * GetDim() + row];
774  TSeqPos stopOffset = GetStarts()[stopSeg * GetDim() + row] +
775  GetLens()[stopSeg] - 1 - to;
776  if (IsSetStrands() && GetStrands()[row] == eNa_strand_minus) {
777  swap(startOffset, stopOffset);
778  swap(startSeg, stopSeg); // make sure startSeg is first
779  }
780 
781  for (CDense_seg::TNumseg seg = startSeg; seg <= stopSeg; ++seg) {
782  //starts
783  for (CDense_seg::TDim dim = 0; dim < GetDim(); ++dim) {
784  TSignedSeqPos start = GetStarts()[seg * GetDim() + dim];
785  if (start != -1) {
786  if (seg == startSeg && (!IsSetStrands() ||
787  GetStrands()[seg * GetDim() + dim] == eNa_strand_plus)) {
788  start += startOffset;
789  }
790  if (seg == stopSeg && IsSetStrands() &&
791  GetStrands()[seg * GetDim() + dim] == eNa_strand_minus) {
792  start += stopOffset;
793  }
794  }
795  ds->SetStarts().push_back(start);
796  }
797 
798  //len
799  TSeqPos len = GetLens()[seg];
800  if (seg == startSeg) {
801  len -= startOffset;
802  }
803  if (seg == stopSeg) {
804  len -= stopOffset;
805  }
806  ds->SetLens().push_back(len);
807 
808  //strands
809  if (IsSetStrands()) {
810  for (CDense_seg::TDim dim = 0; dim < GetDim(); ++dim) {
811  ds->SetStrands().push_back(GetStrands()[seg * GetDim() + dim]);
812  }
813  }
814  ++ds->SetNumseg();
815  }
816 
817 #ifdef _DEBUG
818  ds->Validate(true);
819 #endif
820  return ds;
821 }
822 
823 
824 CRef<CDense_seg> CDense_seg::ExtractRows(const vector<TDim>& rows) const
825 {
826  // consistency checks
827  TDim dim = CheckNumRows();
828  TNumseg numseg = CheckNumSegs();
829 
830  CRef<CDense_seg> new_ds(new CDense_seg);
831  new_ds->SetDim(rows.size());
832  new_ds->SetNumseg(GetNumseg());
833 
834  // reserve for efficiency
835  new_ds->SetIds().reserve(rows.size());
836  new_ds->SetStarts().reserve(rows.size() * GetNumseg());
837  new_ds->SetLens().reserve(GetNumseg());
838  if (IsSetStrands()) {
839  new_ds->SetStrands().reserve(rows.size() * GetNumseg());
840  }
841 
842  ITERATE (vector<TDim>, row, rows) {
843  // sole check that rows are not out of range
844  if (*row < 0 || *row >= dim) {
845  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
846  "CDense_seg::ExtractRows():"
847  " Invalid row number ("
848  + NStr::NumericToString(*row) + ")");
849  }
850  // *copy* the ID (don't just make a reference to it)
851  CRef<CSeq_id> id_copy(new CSeq_id);
852  id_copy->Assign(*GetIds()[*row]);
853  new_ds->SetIds().push_back(id_copy);
854  }
855  for (TNumseg segnum = 0; segnum < numseg; ++segnum) {
856  new_ds->SetLens().push_back(GetLens()[segnum]);
857  ITERATE (vector<TDim>, row, rows) {
858  int idx = segnum * dim + *row;
859  new_ds->SetStarts().push_back(GetStarts()[idx]);
860  if (IsSetStrands()) {
861  new_ds->SetStrands().push_back(GetStrands()[idx]);
862  }
863  }
864  }
865 
866  new_ds->Compact(); // even if original was compact, new one may not be
867 
868 #ifdef _DEBUG
869  new_ds->Validate(true);
870 #endif
871 
872  // Scores are not propagated
873  return new_ds;
874 }
875 
876 
879 {
880  if (offset == 0) return;
881 
882  // Check for out-of-range negative offset
883  if (offset < 0) {
884  for (TNumseg seg = 0, pos = row;
885  seg < GetNumseg();
886  ++seg, pos += GetDim()) {
887 
888  if (GetStarts()[pos] >= 0) {
889  if (GetStarts()[pos] < -offset) {
890  NCBI_THROW(CSeqalignException, eOutOfRange,
891  "Negative offset greater than seq position");
892  }
893  }
894  }
895  }
896 
897  // Modify positions
898  for (TNumseg seg = 0, pos = row;
899  seg < GetNumseg();
900  ++seg, pos += GetDim()) {
901  if (GetStarts()[pos] >= 0) {
902  SetStarts()[pos] += offset;
903  }
904  }
905 }
906 
907 
908 /// @deprecated
909 void CDense_seg::RemapToLoc(TDim row, const CSeq_loc& loc,
910  bool ignore_strand)
911 {
912  if (loc.IsWhole()) {
913  return;
914  }
915 
916  TSeqPos row_stop = GetSeqStop(row);
917 
918  size_t ttl_loc_len = 0;
919  {{
920  CSeq_loc_CI seq_loc_i(loc);
921  do {
922  ttl_loc_len += seq_loc_i.GetRange().GetLength();
923  } while (++seq_loc_i);
924  }}
925 
926  // check the validity of the seq-loc
927  if (ttl_loc_len < row_stop + 1) {
928  string errstr("CDense_seg::RemapToLoc():"
929  " Seq-loc is not long enough to"
930  " cover the alignment!"
931  " Maximum row seq pos is ");
932  errstr += NStr::NumericToString(row_stop);
933  errstr += ". The total seq-loc len is only ";
934  errstr += NStr::SizetToString(ttl_loc_len);
935  errstr += ", it should be at least ";
936  errstr += NStr::NumericToString(row_stop+1);
937  errstr += " (= max seq pos + 1).";
938  NCBI_THROW(CSeqalignException, eOutOfRange, errstr);
939  }
940 
941  const CDense_seg::TStarts& starts = GetStarts();
942  const CDense_seg::TStrands& strands = GetStrands();
943  const CDense_seg::TLens& lens = GetLens();
944 
945  const size_t& numrows = CheckNumRows();
946  const size_t& numsegs = CheckNumSegs();
947 
948  CSeq_loc_CI seq_loc_i(loc);
949 
950  TSeqPos start, loc_len, len, len_so_far;
951  start = seq_loc_i.GetRange().GetFrom();
952  len = loc_len = seq_loc_i.GetRange().GetLength();
953  len_so_far = 0;
954 
955  bool row_plus = !strands.size() || strands[row] != eNa_strand_minus;
956  bool loc_plus = seq_loc_i.GetStrand() != eNa_strand_minus;
957 
958  // iterate through segments
959  size_t idx = loc_plus ? row : (numsegs - 1) * numrows + row;
960  TNumseg seg = loc_plus ? 0 : numsegs - 1;
961  while (loc_plus ? seg < GetNumseg() : seg >= 0) {
962  if (starts[idx] == -1) {
963  // ignore gaps in our sequence
964  if (loc_plus) {
965  idx += numrows; seg++;
966  } else {
967  idx -= numrows; seg--;
968  }
969  continue;
970  }
971 
972  // iterate the seq-loc if needed
973  if ((loc_plus == row_plus ?
974  starts[idx] : ttl_loc_len - starts[idx] - lens[seg])
975  > len_so_far + loc_len) {
976 
977  if (++seq_loc_i) {
978  len_so_far += len;
979  len = seq_loc_i.GetRange().GetLength();
980  start = seq_loc_i.GetRange().GetFrom();
981  } else {
982  NCBI_THROW(CSeqalignException, eInvalidInputData,
983  "CDense_seg::RemapToLoc():"
984  " Internal error");
985  }
986 
987  // assert the strand is the same
988  if (loc_plus != (seq_loc_i.GetStrand() != eNa_strand_minus)) {
989  NCBI_THROW(CSeqalignException, eInvalidInputData,
990  "CDense_seg::RemapToLoc():"
991  " The strand should be the same accross"
992  " the input seq-loc");
993  }
994  }
995 
996  // offset for the starting position
997  if (loc_plus == row_plus) {
998  SetStarts()[idx] += start - len_so_far;
999  } else {
1000  SetStarts()[idx] =
1001  start - len_so_far + ttl_loc_len - starts[idx] - lens[seg];
1002  }
1003 
1004  if (lens[seg] > len) {
1005  TSignedSeqPos len_diff = lens[seg] - len;
1006  while (1) {
1007  // move to the next loc part that extends beyond our length
1008  ++seq_loc_i;
1009  if (seq_loc_i) {
1010  start = seq_loc_i.GetRange().GetFrom();
1011  } else {
1012  NCBI_THROW(CSeqalignException, eOutOfRange,
1013  "CDense_seg::RemapToLoc():"
1014  " Internal error");
1015  }
1016 
1017  // split our segment
1018  SetLens().insert(SetLens().begin() +
1019  (loc_plus ? seg : seg + 1),
1020  len);
1021  SetLens()[loc_plus ? seg + 1 : seg] = len_diff;
1022 
1023  // insert new data to account for our split segment
1024  TStarts temp_starts(numrows, -1);
1025  for (size_t row_i = 0, tmp_idx = seg * numrows;
1026  row_i < numrows; ++row_i, ++tmp_idx) {
1027  TSignedSeqPos& this_start = SetStarts()[tmp_idx];
1028  if (this_start != -1) {
1029  temp_starts[row_i] = this_start;
1030  if (loc_plus == (strands[row_i] != eNa_strand_minus)) {
1031  if ((size_t) row == row_i) {
1032  temp_starts[row_i] = start;
1033  } else {
1034  temp_starts[row_i] += len;
1035  }
1036  } else {
1037  this_start += len_diff;
1038  }
1039  }
1040  }
1041 
1042  len_so_far += loc_len;
1043  len = loc_len = seq_loc_i.GetRange().GetLength();
1044 
1045  SetStarts().insert(SetStarts().begin() +
1046  (loc_plus ? seg + 1 : seg) * numrows,
1047  temp_starts.begin(), temp_starts.end());
1048 
1049  if (strands.size()) {
1050  SetStrands().insert
1051  (SetStrands().begin(),
1052  strands.begin(), strands.begin() + numrows);
1053  }
1054 
1055  SetNumseg()++;
1056 
1057  if ((len_diff = lens[seg] - len) > 0) {
1058  if (loc_plus) {
1059  idx += numrows; seg++;
1060  } else {
1061  idx -= numrows; seg--;
1062  }
1063  } else {
1064  break;
1065  }
1066  }
1067  } else {
1068  len -= lens[seg];
1069  }
1070 
1071  if (loc_plus) {
1072  idx += numrows; seg++;
1073  } else {
1074  idx -= numrows; seg--;
1075  }
1076  } // while iterating through segments
1077 
1078  // finally, modify the strands if different
1079  if ( !ignore_strand ) {
1080  if (loc_plus != row_plus) {
1081  if (!strands.size()) {
1082  // strands do not exist, create them
1083  SetStrands().resize(GetNumseg() * GetDim(), eNa_strand_plus);
1084  }
1085  for (seg = 0, idx = row;
1086  seg < GetNumseg(); seg++, idx += numrows) {
1087  SetStrands()[idx] = loc_plus ? eNa_strand_plus : eNa_strand_minus;
1088  }
1089  }
1090  }
1091 
1092 }
1093 
1094 
1096 {
1097  // this dense-seg
1098  const CDense_seg::TStarts& starts = GetStarts();
1099  const CDense_seg::TStrands& strands = GetStrands();
1100  const CDense_seg::TLens& lens = GetLens();
1101  const CDense_seg::TIds& ids = GetIds();
1102 
1103  const size_t& numrows = CheckNumRows();
1104  const size_t& numsegs = CheckNumSegs();
1105 
1106  bool strands_exist = !strands.empty();
1107 
1108  size_t seg = 0, row = 0;
1109 
1110 
1111  // extra segments
1112  CDense_seg::TStarts extra_starts;
1113  CDense_seg::TLens extra_lens;
1114  size_t extra_numsegs = 0;
1115  size_t extra_seg = 0;
1116  vector<size_t> extra_segs;
1117 
1118 
1119  // new dense-seg
1120  CRef<CDense_seg> new_ds(new CDense_seg);
1121  CDense_seg::TStarts& new_starts = new_ds->SetStarts();
1122  CDense_seg::TStrands& new_strands = new_ds->SetStrands();
1123  CDense_seg::TLens& new_lens = new_ds->SetLens();
1124  CDense_seg::TIds& new_ids = new_ds->SetIds();
1125 
1126  // dimentions
1127  new_ds->SetDim(numrows);
1128  TNumseg& new_numsegs = new_ds->SetNumseg();
1129  new_numsegs = numsegs; // initialize
1130 
1131  // ids
1132  new_ids.resize(numrows);
1133  for (row = 0; row < numrows; row++) {
1134  CRef<CSeq_id> id(new CSeq_id);
1135  SerialAssign(*id, *ids[row]);
1136  new_ids[row] = id;
1137  }
1138 
1139  TNumseg new_seg = 0;
1140 
1141  // temporary data
1142  vector<TSignedSeqPos> expected_positions;
1143  expected_positions.resize(numrows, -1);
1144 
1145  vector<bool> plus;
1146  plus.resize(numrows, true);
1147  if (strands_exist) {
1148  for (row = 0; row < numrows; row++) {
1149  if (strands[row] == eNa_strand_minus) {
1150  plus[row] = false;
1151  }
1152  }
1153  }
1154 
1155  TSignedSeqPos extra_len = 0;
1156  size_t idx = 0, new_idx = 0, extra_idx = 0;
1157 
1158  // main loop through segments
1159  for (seg = 0; seg < numsegs; seg++) {
1160  const TSeqPos& len = lens[seg];
1161  for (row = 0; row < numrows; row++) {
1162 
1163  const TSignedSeqPos& start = starts[idx++];
1164 
1165  TSignedSeqPos& expected_pos = expected_positions[row];
1166 
1167  if (start >= 0) {
1168 
1169  if (expected_pos >= 0) {
1170  // check if there's an unaligned insert
1171 
1172  if (plus[row]) {
1173  extra_len = start - expected_pos;
1174  } else {
1175  extra_len = expected_pos - start - len;
1176  }
1177 
1178  if (extra_len < 0) {
1179  string errstr("CDense_seg::AddUnalignedSegments():"
1180  " Illegal overlap at Row ");
1181  errstr += NStr::SizetToString(row);
1182  errstr += " Segment ";
1183  errstr += NStr::SizetToString(seg);
1184  errstr += ".";
1185  NCBI_THROW(CSeqalignException, eInvalidAlignment,
1186  errstr);
1187  } else if (extra_len > 0) {
1188  // insert new segment
1189  extra_segs.push_back(seg);
1190  extra_lens.push_back(extra_len);
1191  extra_starts.resize(extra_idx + numrows, -1);
1192  extra_starts[extra_idx + row] =
1193  plus[row] ? expected_pos : start + len;
1194 
1195  extra_idx += numrows;
1196  ++extra_numsegs;
1197  }
1198  }
1199 
1200  // set the new expected_pos
1201  if (plus[row]) {
1202  expected_pos = start + len;
1203  } else {
1204  expected_pos = start;
1205  }
1206  }
1207  }
1208  }
1209 
1210  // lens & starts
1211  new_numsegs = numsegs + extra_numsegs;
1212  new_lens.resize(numsegs + extra_numsegs);
1213  new_starts.resize(numrows * new_numsegs);
1214  for (seg = 0, new_seg = 0, extra_seg = 0,
1215  idx = 0, new_idx = 0, extra_idx = 0;
1216  seg < numsegs;
1217  seg++, new_seg++) {
1218 
1219  // insert extra segments
1220  if (extra_numsegs) {
1221  for ( ; extra_seg < extra_segs.size() && extra_segs[extra_seg] == seg; ++new_seg, ++extra_seg) {
1222  new_lens[new_seg] = extra_lens[extra_seg];
1223  for (row = 0; row < numrows; ++row, ++new_idx, ++extra_idx) {
1224  new_starts[new_idx] = extra_starts[extra_idx];
1225  }
1226  }
1227  }
1228 
1229  // add the existing segment
1230  new_lens[new_seg] = lens[seg];
1231  for (row = 0; row < numrows; row++) {
1232  new_starts[new_idx++] = starts[idx++];
1233  }
1234  }
1235 
1236 
1237  // strands
1238  new_strands.resize(numrows * new_numsegs, eNa_strand_plus);
1239  if (strands_exist) {
1240  new_idx = 0;
1241  for (new_seg = 0; new_seg < new_numsegs; new_seg++) {
1242  for (row = 0; row < numrows; row++, new_idx++) {
1243  if ( !plus[row] ) {
1244  new_strands[new_idx] = eNa_strand_minus;
1245  }
1246  }
1247  }
1248  }
1249 
1250  return new_ds;
1251 }
1252 
1253 
1254 //-----------------------------------------------------------------------------
1255 // PRE : alignment transcript (RLE or not) and start coordinates
1256 // POST: Starts, lens and strands. Ids and scores not affected.
1257 
1258 // initialize from pairwise alignment transcript
1259 void CDense_seg::FromTranscript(TSeqPos query_start, ENa_strand query_strand,
1260  TSeqPos subj_start, ENa_strand subj_strand,
1261  const string& transcript )
1262 {
1263  // check that strands are specific
1264  bool query_strand_specific =
1265  query_strand == eNa_strand_plus || query_strand == eNa_strand_minus;
1266  bool subj_strand_specific =
1267  subj_strand == eNa_strand_plus || subj_strand == eNa_strand_minus;
1268 
1269  if(!query_strand_specific || !subj_strand_specific) {
1270  NCBI_THROW(CSeqalignException, eInvalidInputData, "Unknown strand");
1271  }
1272 
1273  TStarts &starts = SetStarts();
1274  starts.clear();
1275  TLens &lens = SetLens();
1276  lens.clear();
1277  TStrands &strands = SetStrands();
1278  strands.clear();
1279 
1280  SetDim(2);
1281 
1282  // iterate through the transcript
1283  size_t seg_count = 0;
1284 
1285  size_t start1 = 0, pos1 = 0; // relative to exon start in mrna
1286  size_t start2 = 0, pos2 = 0; // and genomic
1287  size_t seg_len = 0;
1288 
1289  string::const_iterator ib = transcript.begin();
1290  string::const_iterator ie = transcript.end();
1291  string::const_iterator ii = ib;
1292  unsigned char seg_type;
1293  static const char* badsymerr = "Unknown or unsupported transcript symbol";
1294  char c = ii != ie ? *ii++ : 0;
1295  if(c == 'M' || c == 'R') {
1296  seg_type = 0;
1297  ++pos1;
1298  ++pos2;
1299  }
1300  else if (c == 'I') {
1301  seg_type = 1;
1302  ++pos2;
1303  }
1304  else if (c == 'D') {
1305  seg_type = 2;
1306  ++pos1;
1307  }
1308  else {
1309  if (c == 0) {
1310  NCBI_THROW(CSeqalignException, eInvalidInputData,
1311  "Empty transcript");
1312  }
1313  else {
1314  NCBI_THROW(CSeqalignException, eInvalidInputData, badsymerr);
1315  }
1316  }
1317 
1318  while(ii < ie) {
1319 
1320  c = *ii;
1321  if(isalpha((unsigned char) c)) {
1322 
1323  if(seg_type == 0 && (c == 'M' || c == 'R')) {
1324  ++pos1;
1325  ++pos2;
1326  }
1327  else if(seg_type == 1 && c == 'I') {
1328  ++pos2;
1329  }
1330  else if(seg_type == 2 && c == 'D') {
1331  ++pos1;
1332  }
1333  else {
1334 
1335  // close current seg
1336  TSeqPos query_close = query_strand == eNa_strand_plus?
1337  start1: 1 - pos1;
1338  starts.push_back(seg_type == 1? (TSeqPos)-1: query_start + query_close);
1339  strands.push_back(query_strand);
1340 
1341  TSeqPos subj_close = subj_strand == eNa_strand_plus?
1342  start2: 1- pos2;
1343  starts.push_back(seg_type == 2? (TSeqPos)-1: subj_start + subj_close);
1344  strands.push_back(subj_strand);
1345 
1346  switch(seg_type) {
1347  case 0: seg_len = pos1 - start1; break;
1348  case 1: seg_len = pos2 - start2; break;
1349  case 2: seg_len = pos1 - start1; break;
1350  }
1351  lens.push_back(seg_len);
1352  ++seg_count;
1353 
1354  // start a new seg
1355  start1 = pos1;
1356  start2 = pos2;
1357 
1358  if(c == 'M' || c == 'R'){
1359  seg_type = 0; // matches and mismatches
1360  ++pos1;
1361  ++pos2;
1362  }
1363  else if (c == 'I') {
1364  seg_type = 1; // inserts
1365  ++pos2;
1366  }
1367  else if (c == 'D') {
1368  seg_type = 2; // dels
1369  ++pos1;
1370  }
1371  else {
1372 
1373  NCBI_THROW(CSeqalignException, eInvalidInputData,
1374  badsymerr);
1375  }
1376  }
1377  ++ii;
1378  }
1379  else {
1380 
1381  if(!isdigit((unsigned char) c)) {
1382 
1383  NCBI_THROW(CSeqalignException, eInvalidInputData,
1384  "Alignment transcript corrupt");
1385  }
1386 
1387  size_t len = 0;
1388  while(ii < ie && isdigit((unsigned char)(*ii))) {
1389  len = 10*len + *ii - '0';
1390  ++ii;
1391  }
1392  --len;
1393  switch(seg_type) {
1394  case 0: pos1 += len; pos2 += len; break;
1395  case 1: pos2 += len; break;
1396  case 2: pos1 += len; break;
1397  }
1398  }
1399  }
1400 
1401  TSeqPos query_close = query_strand == eNa_strand_plus? start1: 1 - pos1;
1402  starts.push_back(seg_type == 1? (TSeqPos)-1: query_start + query_close);
1403  strands.push_back(query_strand);
1404 
1405  TSeqPos subj_close = subj_strand == eNa_strand_plus? start2: 1 - pos2;
1406  starts.push_back(seg_type == 2? (TSeqPos)-1: subj_start + subj_close);
1407  strands.push_back(subj_strand);
1408 
1409  switch(seg_type) {
1410 
1411  case 0: seg_len = pos1 - start1; break;
1412  case 1: seg_len = pos2 - start2; break;
1413  case 2: seg_len = pos1 - start1; break;
1414  }
1415  lens.push_back(seg_len);
1416  ++seg_count;
1417 
1418  SetNumseg(seg_count);
1419 }
1420 
1421 
1423 {
1424  if (GetDim() <= row) {
1425  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
1426  "Invalid row number in CreateRowSeq_interval(): " +
1427  NStr::NumericToString(row));
1428  }
1430  TSeqPos from = kInvalidSeqPos;
1431  TSeqPos to = 0;
1432  TSeqPos plus_len = 0;
1433  TSeqPos minus_len = 0;
1434  ret->SetId().Assign(*GetIds()[row]);
1435  for (int seg = 0; seg < GetNumseg(); seg++) {
1436  int idx = seg*GetDim() + row;
1437  int start = GetStarts()[idx];
1438  if (start < 0) {
1439  continue;
1440  }
1441  if (TSeqPos(start) < from) {
1442  from = TSeqPos(start);
1443  }
1444  TSeqPos len = GetLens()[seg];
1445  if (start + len > to) {
1446  to = start + len;
1447  }
1448  if ( !IsSetStrands() || !IsReverse(GetStrands()[idx]) ) {
1449  plus_len += len;
1450  }
1451  else {
1452  minus_len += len;
1453  }
1454  }
1455  if (from == kInvalidSeqPos || to == 0) {
1456  NCBI_THROW(CSeqalignException, eOutOfRange,
1457  "Can not convert row to seq-interval - invalid from/to value");
1458  }
1459  ret->SetFrom(from);
1460  ret->SetTo(to - 1);
1461  if ( IsSetStrands() ) {
1462  // If more than 2/3 of the segment is on plus, use plus strand
1463  if (plus_len >= minus_len*2) {
1464  ret->SetStrand(eNa_strand_plus);
1465  }
1466  // If more than 2/3 is on minus, use minus strand
1467  else if (plus_len*2 < minus_len) {
1468  ret->SetStrand(eNa_strand_minus);
1469  }
1470  // Otherwise set strand to 'both'
1471  else {
1472  ret->SetStrand(eNa_strand_both);
1473  }
1474  }
1475  return ret;
1476 }
1477 
1478 
1479 NCBI_PARAM_DECL(bool, OBJECTS, DENSE_SEG_RESERVE);
1480 NCBI_PARAM_DEF_EX(bool, OBJECTS, DENSE_SEG_RESERVE, true,
1481  eParam_NoThread, OBJECTS_DENSE_SEG_RESERVE);
1482 
1484  const CObjectInfoMI& member)
1485 {
1487 
1488  if ( !s_Reserve->Get() ) {
1489  return;
1490  }
1492  size_t numseg = ds.GetNumseg();
1493  try {
1494  switch ( member.GetMemberIndex() ) {
1495  case 4: // "starts"
1496  ds.SetStarts().reserve(ds.GetDim()*numseg);
1497  break;
1498  case 5: // "lens"
1499  ds.SetLens().reserve(numseg);
1500  break;
1501  case 6: // "strands"
1502  ds.SetStrands().reserve(ds.GetDim()*numseg);
1503  break;
1504  default:
1505  break;
1506  }
1507  }
1508  catch ( bad_alloc& /*ignored*/ ) {
1509  // ignore insufficient memory exception from advisory reserve()
1510  }
1511 }
1512 
1513 
1514 /////////////////////////////////////////////////////////////////////////////
1515 // Read hooks to reserve memory of Dense-seg vector<> to estimated size.
1516 /////////////////////////////////////////////////////////////////////////////
1517 
1518 
1520 {
1524 }
1525 
1526 
1528 {
1532 }
1533 
1534 
1536  const CObjectInfoMI& member)
1537 {
1539  size_t size = ds.GetDim()*ds.GetNumseg();
1541  try {
1542  array.reserve(size);
1543  }
1544  catch ( bad_alloc& /*ignored*/ ) {
1545  // ignore insufficient memory exception from advisory reserve()
1546  }
1547  DefaultRead(in, member);
1548 }
1549 
1550 
1552 {
1554  return type.FindMember("starts");
1555 }
1556 
1557 
1559 {
1561  x_GetMember().SetLocalReadHook(in, hook);
1562 }
1563 
1564 
1566 {
1568  x_GetMember().SetGlobalReadHook(hook);
1569 }
1570 
1571 
1573  const CObjectInfoMI& member)
1574 {
1576  size_t size = ds.GetNumseg();
1578  try {
1579  array.reserve(size);
1580  }
1581  catch ( bad_alloc& /*ignored*/ ) {
1582  // ignore insufficient memory exception from advisory reserve()
1583  }
1584  DefaultRead(in, member);
1585 }
1586 
1587 
1589 {
1591  return type.FindMember("lens");
1592 }
1593 
1594 
1596 {
1598  x_GetMember().SetLocalReadHook(in, hook);
1599 }
1600 
1601 
1603 {
1605  x_GetMember().SetGlobalReadHook(hook);
1606 }
1607 
1608 
1610  const CObjectInfoMI& member)
1611 {
1613  size_t size = ds.GetDim()*ds.GetNumseg();
1615  try {
1616  array.reserve(size);
1617  }
1618  catch ( bad_alloc& /*ignored*/ ) {
1619  // ignore insufficient memory exception from advisory reserve()
1620  }
1621  DefaultRead(in, member);
1622 }
1623 
1624 
1626 {
1628  return type.FindMember("strands");
1629 }
1630 
1631 
1633 {
1635  x_GetMember().SetLocalReadHook(in, hook);
1636 }
1637 
1638 
1640 {
1642  x_GetMember().SetGlobalReadHook(hook);
1643 }
1644 
1645 
1646 END_objects_SCOPE // namespace ncbi::objects::
1647 
void SwapRows(TDim row1, TDim row2)
Swap two rows (changing *order*, not content)
Definition: Dense_seg.cpp:673
CObjectIStream –.
Definition: objistr.hpp:92
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:802
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
static CObjectTypeInfoMI x_GetMember(void)
Definition: Dense_seg.cpp:1625
vector< int > TWidths
Definition: Dense_seg.hpp:73
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:867
static void SetReserveHooks(CObjectIStream &in)
Definition: Dense_seg.cpp:1519
static string NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:664
CObjectTypeInfoMI –.
Definition: objectiter.hpp:245
const CSeq_id & GetSeq_id(TDim row) const
Definition: Dense_seg.cpp:123
CSafeStatic<>::
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
void SetTo(TTo value)
Assign a value to To data member.
void SetFrom(TFrom value)
Assign a value to From data member.
NCBI_PARAM_DECL(bool, OBJECTS, DENSE_SEG_RESERVE)
int offset
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2649
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
CObjectInfoMI –.
Definition: objectiter.hpp:431
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:855
TMemberIndex GetMemberIndex(void) const
Get index of the member in the class.
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any, between segments.
Definition: Dense_seg.cpp:1095
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1377
position_type GetLength(void) const
Definition: range.hpp:158
static CObjectTypeInfoMI x_GetMember(void)
Definition: Dense_seg.cpp:1588
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner) ...
Definition: Dense_seg.cpp:1259
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
CObjectTypeInfo –.
Definition: objectinfo.hpp:93
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1041
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read...
Definition: Dense_seg.cpp:1609
virtual void PreReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
Return true to invoke default reading method afterwards.
Definition: Dense_seg.cpp:1483
static char * s_Reserve(size_t size, CSimpleBufferT< char > &buffer)
Definition: bgzf.cpp:236
string
Definition: cgiapp.hpp:514
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
CRef< CDense_seg > ExtractRows(const vector< TDim > &rows) const
Extract specified rows of the alignment, in specified order.
Definition: Dense_seg.cpp:824
TNumseg & SetNumseg(void)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:481
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Definition: Dense_seg.cpp:877
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
Definition: Dense_seg_.hpp:568
ENa_strand GetSeqStrand(TDim row) const
Definition: Dense_seg.cpp:208
void OrderAdjacentGaps()
Order adjacent gaps, so that the side with sequence is in row-decending order.
Definition: Dense_seg.cpp:516
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const NCBI_NS_NCBI::CEnumeratedTypeValues *ENUM_METHOD_NAME() ENa_strand(void)
Access to ENa_strand's attributes (values, names) as defined in spec.
const CObjectInfo & GetClassObject(void) const
Get containing class data.
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read...
Definition: Dense_seg.cpp:1572
void SetStrand(TStrand value)
Assign a value to Strand data member.
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
Definition: Dense_seg.cpp:406
const TStrands & GetStrands(void) const
Get the Strands member data.
Definition: Dense_seg_.hpp:580
TWidths m_Widths
Definition: Dense_seg.hpp:177
bool IsSetIds(void) const
sequences in order Check if a value has been assigned to Ids data member.
Definition: Dense_seg_.hpp:493
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:858
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:795
static void SetHook(CObjectIStream &in)
Definition: Dense_seg.cpp:1558
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
Definition: serialbase.hpp:484
int i
static void SetHook(CObjectIStream &in)
Definition: Dense_seg.cpp:1632
Uint4 m_set_State1[1]
Definition: Dense_seg.hpp:176
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1027
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
TSeqPos GetSeqStop(TDim row) const
Definition: Dense_seg.cpp:171
void RemovePureGapSegs()
Remove any segments in which every row has a gap (these can arise when ExtractRows is used) ...
Definition: Dense_seg.cpp:556
static const char si[8][64]
Definition: des.c:146
static void SetGlobalReserveHooks(void)
Definition: Dense_seg.cpp:1527
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
static CObjectTypeInfoMI x_GetMember(void)
Definition: Dense_seg.cpp:1551
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:440
ESerialRecursionMode
How to assign and compare child sub-objects of serial objects.
Definition: serialdef.hpp:190
CMemberIterator FindMember(const string &memberName) const
Find class member by its name.
TNumseg CheckNumSegs(void) const
Definition: Dense_seg.cpp:88
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
#define IDX(_x, _y)
TSeqPos GetSeqStart(TDim row) const
Definition: Dense_seg.cpp:134
Definition: type.c:5
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void RemapToLoc(TDim row, const CSeq_loc &loc, bool ignore_strand=false)
Definition: Dense_seg.cpp:909
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
static void SetGlobalHook(void)
Definition: Dense_seg.cpp:1639
void Reverse(void)
Reverse the segments' orientation.
Definition: Dense_seg.cpp:631
TNumseg x_FindSegment(TDim row, TSignedSeqPos pos) const
Definition: Dense_seg.cpp:706
bool IsSetStarts(void) const
start OFFSETS in ids order within segs Check if a value has been assigned to Starts data member...
Definition: Dense_seg_.hpp:518
void SetId(TId &value)
Assign a value to Id data member.
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
static C * Get(const CTypesIterator &it)
Definition: objecttype.hpp:116
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
int size
CRef< CSeq_interval > CreateRowSeq_interval(TDim row) const
Definition: Dense_seg.cpp:1422
NCBI_PARAM_DEF_EX(bool, OBJECTS, DENSE_SEG_RESERVE, true, eParam_NoThread, OBJECTS_DENSE_SEG_RESERVE)
T max(T x_, T y_)
position_type GetTo(void) const
Definition: range.hpp:142
void Validate(bool full_test=false) const
Definition: Dense_seg.cpp:240
const TWidths & GetWidths(void) const
Definition: Dense_seg.hpp:210
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
T min(T x_, T y_)
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
void TrimEndGaps()
Trim leading/training gaps if possible.
Definition: Dense_seg.cpp:310
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
position_type GetFrom(void) const
Definition: range.hpp:134
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
Definition: Dense_seg.cpp:734
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
Definition: Dense_seg.cpp:62
int len
Base class for all serializable objects.
Definition: serialbase.hpp:149
Do not use per-thread values.
Definition: ncbi_param.hpp:502
TDim CheckNumRows(void) const
Definition: Dense_seg.cpp:76
static void SetGlobalHook(void)
Definition: Dense_seg.cpp:1602
#define _ASSERT
bool CanGetStrands(void) const
Check if it is safe to call GetStrands method.
Definition: Dense_seg_.hpp:574
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:278
TDim & SetDim(void)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:434
bool IsSetLens(void) const
lengths in ids order within segs Check if a value has been assigned to Lens data member.
Definition: Dense_seg_.hpp:543
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:547
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
~CDense_seg(void)
Definition: Dense_seg.cpp:57
T plus(T x_)
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
std::istream & in(std::istream &in_, double &x_)
static void SetHook(CObjectIStream &in)
Definition: Dense_seg.cpp:1595
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read...
Definition: Dense_seg.cpp:1535
static void SetGlobalHook(void)
Definition: Dense_seg.cpp:1565
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
T & Get(void)
Create the variable if not created yet, return the reference.
Modified on Sun May 20 14:26:26 2018 by modify_doxy.py rev. 546573