NCBI C++ ToolKit
seq_vector.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_vector.cpp 74559 2016-09-13 11:58:16Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Sequence data container for object manager
30 *
31 */
32 
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/seq_vector.hpp>
36 #include <objmgr/seq_vector_ci.hpp>
37 #include <corelib/ncbimtx.hpp>
41 #include <objmgr/seq_map.hpp>
44 #include <algorithm>
45 #include <map>
46 #include <vector>
47 #include <util/random_gen.hpp>
48 
51 
52 
53 ////////////////////////////////////////////////////////////////////
54 //
55 // CNcbi2naRandomizer::
56 //
57 
59 {
60 }
61 
62 
64 {
65  unsigned int bases[4]; // Count of each base in the random distribution
66  for (int na4 = 0; na4 < 16; na4++) {
67  int bit_count = 0;
68  char set_bit = 0;
69  for (int bit = 0; bit < 4; bit++) {
70  // na4 == 0 is special case (gap) should be treated as 0xf
71  if ( !na4 || (na4 & (1 << bit)) ) {
72  bit_count++;
73  bases[bit] = 1;
74  set_bit = (char)bit;
75  }
76  else {
77  bases[bit] = 0;
78  }
79  }
80  if (bit_count == 1) {
81  // Single base
82  m_FixedTable[na4] = set_bit;
83  continue;
84  }
86  // Ambiguity: create random distribution with possible bases
87  for (int bit = 0; bit < 4; bit++) {
88  bases[bit] *= kRandomDataSize/bit_count +
89  kRandomDataSize % bit_count;
90  }
91  for (int i = kRandomDataSize - 1; i >= 0; i--) {
92  CRandom::TValue rnd = gen.GetRand(0, i);
93  for (int base = 0; base < 4; base++) {
94  if (!bases[base] || rnd > bases[base]) {
95  rnd -= bases[base];
96  continue;
97  }
98  m_RandomTable[na4][i] = (char)base;
99  bases[base]--;
100  break;
101  }
102  }
103  }
104 }
105 
106 
108 {
109 }
110 
111 
113  size_t count,
114  TSeqPos pos)
115 {
116  for (char* stop = data + count; data < stop; ++data, ++pos) {
117  int base4na = *data;
118  char base2na = m_FixedTable[base4na];
119  if ( base2na == kRandomValue ) {
120  // Ambiguity, use random value
121  base2na = m_RandomTable[base4na][(pos & kRandomizerPosMask)];
122  }
123  *data = base2na;
124  }
125 }
126 
127 
128 ////////////////////////////////////////////////////////////////////
129 //
130 // CSeqVector::
131 //
132 
133 
135  : m_Size(0)
136 {
137 }
138 
139 
141  : m_Scope(vec.m_Scope),
142  m_SeqMap(vec.m_SeqMap),
143  m_TSE(vec.m_TSE),
144  m_Size(vec.m_Size),
145  m_Mol(vec.m_Mol),
146  m_Strand(vec.m_Strand),
147  m_Coding(vec.m_Coding)
148 {
149 }
150 
151 
153  EVectorCoding coding, ENa_strand strand)
154  : m_Scope(bioseq.GetScope()),
155  m_SeqMap(&bioseq.GetSeqMap()),
156  m_TSE(bioseq.GetTSE_Handle()),
157  m_Strand(strand),
158  m_Coding(CSeq_data::e_not_set)
159 {
160  m_Size = bioseq.GetBioseqLength();
161  m_Mol = bioseq.GetSequenceType();
162  SetCoding(coding);
163 }
164 
165 
166 CSeqVector::CSeqVector(const CSeqMap& seqMap, CScope& scope,
167  EVectorCoding coding, ENa_strand strand)
168  : m_Scope(&scope),
169  m_SeqMap(&seqMap),
170  m_Strand(strand),
171  m_Coding(CSeq_data::e_not_set)
172 {
174  m_Mol = m_SeqMap->GetMol();
175  SetCoding(coding);
176 }
177 
178 
179 CSeqVector::CSeqVector(const CSeqMap& seqMap, const CTSE_Handle& top_tse,
180  EVectorCoding coding, ENa_strand strand)
181  : m_Scope(top_tse.GetScope()),
182  m_SeqMap(&seqMap),
183  m_TSE(top_tse),
184  m_Strand(strand),
185  m_Coding(CSeq_data::e_not_set)
186 {
188  m_Mol = m_SeqMap->GetMol();
189  SetCoding(coding);
190 }
191 
192 
194  EVectorCoding coding, ENa_strand strand)
195  : m_Scope(&scope),
196  m_SeqMap(CSeqMap::GetSeqMapForSeq_loc(loc, &scope)),
197  m_Strand(strand),
198  m_Coding(CSeq_data::e_not_set)
199 {
200  if ( const CSeq_id* id = loc.GetId() ) {
201  if ( CBioseq_Handle bh = scope.GetBioseqHandle(*id) ) {
202  m_TSE = bh.GetTSE_Handle();
203  }
204  }
206  m_Mol = m_SeqMap->GetMol();
207  SetCoding(coding);
208 }
209 
210 
211 CSeqVector::CSeqVector(const CSeq_loc& loc, const CTSE_Handle& top_tse,
212  EVectorCoding coding, ENa_strand strand)
213  : m_Scope(top_tse.GetScope()),
214  m_SeqMap(CSeqMap::GetSeqMapForSeq_loc(loc, &top_tse.GetScope())),
215  m_TSE(top_tse),
216  m_Strand(strand),
217  m_Coding(CSeq_data::e_not_set)
218 {
220  m_Mol = m_SeqMap->GetMol();
221  SetCoding(coding);
222 }
223 
224 
226  CScope* scope,
227  EVectorCoding coding, ENa_strand strand)
228  : m_Scope(scope),
229  m_SeqMap(CSeqMap::CreateSeqMapForBioseq(bioseq)),
230  m_Strand(strand),
231  m_Coding(CSeq_data::e_not_set)
232 {
233  m_Size = m_SeqMap->GetLength(scope);
234  m_Mol = bioseq.GetInst().GetMol();
235  SetCoding(coding);
236 }
237 
238 
240 {
241 }
242 
243 
245 {
246  if ( &vec != this ) {
247  TMutexGuard guard(GetMutex());
248  m_Scope = vec.m_Scope;
249  m_SeqMap = vec.m_SeqMap;
250  m_TSE = vec.m_TSE;
251  m_Size = vec.m_Size;
252  m_Mol = vec.m_Mol;
253  m_Strand = vec.m_Strand;
254  m_Coding = vec.m_Coding;
255  m_Iterator.reset();
256  }
257  return *this;
258 }
259 
260 
262 {
263  CSeqVector_CI* iter;
264  m_Iterator.reset(iter = new CSeqVector_CI(*this, pos));
265  return iter;
266 }
267 
268 
270 {
271  if ( m_Iterator.get() ) {
272  TMutexGuard guard(GetMutex());
273  m_Iterator.reset();
274  }
275 }
276 
277 
279 {
280  TMutexGuard guard(GetMutex());
281  return x_GetIterator(pos).GetGapSizeForward();
282 }
283 
284 
286 {
287  TMutexGuard guard(GetMutex());
288  return x_GetIterator(pos).GetGapSeq_literal();
289 }
290 
291 
292 bool CSeqVector::CanGetRange(TSeqPos start, TSeqPos stop) const
293 {
294  try {
295  TMutexGuard guard(GetMutex());
296  return x_GetIterator(start).CanGetRange(start, stop);
297  }
298  catch ( CException& /*ignored*/ ) {
299  return false;
300  }
301 }
302 
303 
304 void CSeqVector::GetSeqData(TSeqPos start, TSeqPos stop, string& buffer) const
305 {
306  TMutexGuard guard(GetMutex());
307  x_GetIterator(start).GetSeqData(start, stop, buffer);
308 }
309 
310 
311 void CSeqVector::GetPackedSeqData(string& dst_str,
312  TSeqPos src_pos,
313  TSeqPos src_end)
314 {
315  dst_str.erase();
316  src_end = min(src_end, size());
317  if ( src_pos >= src_end ) {
318  return;
319  }
320 
321  if ( m_TSE && !CanGetRange(src_pos, src_end) ) {
323  "CSeqVector::GetPackedSeqData: "
324  "cannot get seq-data in range: "
325  <<src_pos<<"-"<<src_end);
326  }
327 
328  TCoding dst_coding = GetCoding();
329  switch ( dst_coding ) {
336  x_GetPacked8SeqData(dst_str, src_pos, src_end);
337  break;
339  x_GetPacked4naSeqData(dst_str, src_pos, src_end);
340  break;
342  x_GetPacked2naSeqData(dst_str, src_pos, src_end);
343  break;
344  default:
345  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
346  "Can not pack data using the selected coding: "<<
347  GetCoding());
348  }
349 }
350 
351 static const size_t kBufferSize = 1024; // must be multiple of 4
352 
353 static inline
354 void x_Append8To8(string& dst_str, const string& src_str,
355  size_t src_pos, size_t count)
356 {
357  _ASSERT(src_pos+count >= src_pos); // check for overflow
358  _ASSERT(src_pos+count <= src_str.size());
359  if ( count ) {
360  dst_str.append(src_str.data()+src_pos, count);
361  }
362 }
363 
364 
365 static inline
366 void x_Append8To8(string& dst_str, const vector<char>& src_str,
367  size_t src_pos, size_t count)
368 {
369  _ASSERT(src_pos+count >= src_pos); // check for overflow
370  _ASSERT(src_pos+count <= src_str.size());
371  if ( count ) {
372  dst_str.append(&src_str[src_pos], count);
373  }
374 }
375 
376 
377 static inline
378 void x_AppendGapTo8(string& dst_str, size_t count, char gap)
379 {
380  if ( count ) {
381  dst_str.append(count, gap);
382  }
383 }
384 
385 
386 static
387 void x_Append8To4(string& dst, char& dst_c, TSeqPos dst_pos,
388  const char* src, size_t count)
389 {
390  _ASSERT(src+count >= src); // check for overflow
391  if ( !count ) {
392  return;
393  }
394  if ( dst_pos & 1 ) {
395  dst += char((dst_c<<4)|*src);
396  dst_c = 0;
397  ++dst_pos;
398  ++src;
399  --count;
400  }
401  for ( ; count >= 2; dst_pos += 2, src += 2, count -= 2 ) {
402  dst += char((src[0]<<4)|src[1]);
403  }
404  if ( count&1 ) {
405  dst_c = *src;
406  }
407 }
408 
409 
410 static
411 void x_Append4To4(string& dst, char& dst_c, TSeqPos dst_pos,
412  const vector<char>& src, TSeqPos src_pos,
413  TSeqPos count)
414 {
415  _ASSERT(src_pos+count >= src_pos); // check for overflow
416  _ASSERT(src_pos+count <= src.size()*2);
417  if ( !count ) {
418  return;
419  }
420  if ( (src_pos^dst_pos) & 1 ) {
421  // misaligned data -> dst_str
422  if ( dst_pos & 1 ) {
423  // align dst_pos
424  dst += char((dst_c<<4)|((src[src_pos>>1]>>4)&15));
425  dst_c = 0;
426  ++dst_pos;
427  ++src_pos;
428  --count;
429  }
430  _ASSERT((src_pos&1));
431  size_t pos = src_pos>>1;
432  for ( ; count >= 2; dst_pos += 2, pos += 1, count -= 2 ) {
433  dst += char(((src[pos]<<4)&0xf0)|((src[pos+1]>>4)&0x0f));
434  }
435  if ( count&1 ) {
436  _ASSERT((src_pos&1));
437  dst_c = (src[pos])&15;
438  }
439  }
440  else {
441  // aligned data -> dst_str
442  if ( dst_pos & 1 ) {
443  // align dst_pos
444  dst += char((dst_c<<4)|((src[src_pos>>1])&15));
445  dst_c = 0;
446  ++dst_pos;
447  ++src_pos;
448  --count;
449  }
450  _ASSERT(!(src_pos&1));
451  _ASSERT(!(dst_pos&1));
452  size_t octets = count>>1;
453  size_t pos = src_pos>>1;
454  if ( octets ) {
455  dst.append(&src[pos], octets);
456  }
457  if ( count&1 ) {
458  _ASSERT(!(src_pos&1));
459  dst_c = (src[pos+octets]>>4)&15;
460  }
461  }
462 }
463 
464 
465 static
466 void x_AppendGapTo4(string& dst_str, char& dst_c, TSeqPos dst_pos,
467  TSeqPos count, char gap)
468 {
469  if ( !count ) {
470  return;
471  }
472  if ( dst_pos & 1 ) {
473  // align dst_pos
474  dst_str += char((dst_c << 4)|gap);
475  dst_c = 0;
476  ++dst_pos;
477  --count;
478  }
479  _ASSERT(!(dst_pos&1));
480  size_t octets = count>>1;
481  if ( octets ) {
482  dst_str.append(octets, char((gap<<4)|gap));
483  }
484  if ( count&1 ) {
485  dst_c = gap;
486  }
487 }
488 
489 
490 static
491 void x_Append8To2(string& dst_str, char& dst_c, TSeqPos dst_pos,
492  const char* buffer, TSeqPos count)
493 {
494  if ( !count ) {
495  return;
496  }
497  _ASSERT(dst_str.size() == dst_pos>>2);
498  const char* unpacked = buffer;
499  if ( dst_pos&3 ) {
500  char c = dst_c;
501  for ( ; count && (dst_pos&3); --count, ++dst_pos ) {
502  c = char((c<<2)|*unpacked++);
503  }
504  if ( (dst_pos&3) == 0 ) {
505  dst_str += c;
506  dst_c = 0;
507  }
508  else {
509  dst_c = c;
510  }
511  if ( !count ) {
512  return;
513  }
514  }
515  _ASSERT((dst_pos&3) == 0);
516  _ASSERT(dst_str.size() == dst_pos>>2);
517  char packed_buffer[kBufferSize/4];
518  char* packed_end = packed_buffer;
519  for ( ; count >= 4; count -= 4, unpacked += 4 ) {
520  *packed_end++ = char(
521  (unpacked[0]<<6)|(unpacked[1]<<4)|(unpacked[2]<<2)|unpacked[3] );
522  }
523  dst_str.append(packed_buffer, packed_end);
524  switch ( count ) {
525  case 1:
526  dst_c = unpacked[0];
527  break;
528  case 2:
529  dst_c = char((unpacked[0]<<2)|unpacked[1]);
530  break;
531  case 3:
532  dst_c = char((unpacked[0]<<4)|(unpacked[1]<<2)|unpacked[2]);
533  break;
534  default:
535  dst_c = 0;
536  break;
537  }
538 }
539 
540 
541 static
542 void x_Append2To2(string& dst, char& dst_c, TSeqPos dst_pos,
543  const vector<char>& src, TSeqPos src_pos,
544  TSeqPos count)
545 {
546  _ASSERT(src_pos+count >= src_pos); // check for overflow
547  _ASSERT(src_pos+count <= src.size()*4);
548  if ( !count ) {
549  return;
550  }
551  if ( (src_pos^dst_pos) & 3 ) {
552  // misaligned src -> dst
553  char buffer[kBufferSize];
554  while ( count ) {
555  // if count is larger than buffer size make sure
556  // that the next dst_pos is aligned to 4.
557  TSeqPos chunk = min(count, TSeqPos(kBufferSize-(dst_pos&3)));
558  copy_2bit(buffer, chunk, src, src_pos);
559  // Array buffer[] is properly initialized in copy_2bit()
560  // but Clang static analyzer fails to notice it
561  // and issues false warning inside x_Append8To2() call.
562  x_Append8To2(dst, dst_c, dst_pos, buffer, chunk);
563  dst_pos += chunk;
564  src_pos += chunk;
565  count -= chunk;
566  }
567  return;
568  }
569 
570  // aligned src -> dst
571  if ( dst_pos&3 ) {
572  // align dst_pos
573  TSeqPos add = 4-(dst_pos&3);
574  char c = char((dst_c<<(add*2))|(src[src_pos>>2]&((1<<(add*2))-1)));
575  if ( count < add ) {
576  dst_c = char(c >> (2*(add-count)));
577  return;
578  }
579  dst += c;
580  dst_c = 0;
581  src_pos += add;
582  // Dead increment: dst_pos is not used anymore
583  //dst_pos += add;
584  count -= add;
585  }
586  _ASSERT(!(src_pos&3));
587  size_t octets = count>>2;
588  size_t pos = src_pos>>2;
589  if ( octets ) {
590  dst.append(&src[pos], octets);
591  }
592  size_t rem = count&3;
593  if ( rem ) {
594  _ASSERT(!(src_pos&3));
595  dst_c = char((src[pos+octets]&255)>>(2*(4-rem)));
596  }
597 }
598 
599 
600 static
601 void x_AppendRandomTo2(string& dst_str, char& dst_c, TSeqPos dst_pos,
602  TSeqPos src_pos, TSeqPos count,
603  INcbi2naRandomizer& randomizer, char gap)
604 {
605  _ASSERT(src_pos+count >= src_pos); // check for overflow
606  char buffer[kBufferSize];
607  while ( count ) {
608  _ASSERT(dst_str.size() == dst_pos>>2);
609  // if count is larger than buffer size make sure
610  // that the next dst_pos is aligned to 4.
611  TSeqPos chunk = min(count, TSeqPos(kBufferSize-(dst_pos&3)));
612  fill_n(buffer, chunk, gap);
613  randomizer.RandomizeData(buffer, chunk, src_pos);
614  x_Append8To2(dst_str, dst_c, dst_pos, buffer, chunk);
615  count -= chunk;
616  src_pos += chunk;
617  dst_pos += chunk;
618  _ASSERT(dst_str.size() == dst_pos>>2);
619  }
620 }
621 
622 
623 static
624 void x_AppendAnyTo8(string& dst_str,
625  const CSeq_data& data, TSeqPos dataPos,
626  TSeqPos total_count,
627  const char* table = 0, bool reverse = false)
628 {
629  _ASSERT(dataPos+total_count >= dataPos); // check for overflow
630  char buffer[kBufferSize];
631  CSeq_data::E_Choice src_coding = data.Which();
632  if ( reverse ) {
633  dataPos += total_count;
634  }
635  while ( total_count ) {
636  TSeqPos count = min(total_count, TSeqPos(sizeof(buffer)));
637  if ( reverse ) {
638  dataPos -= count;
639  }
640  switch ( src_coding ) {
642  copy_8bit_any(buffer, count, data.GetIupacna().Get(), dataPos,
643  table, reverse);
644  break;
646  copy_8bit_any(buffer, count, data.GetIupacaa().Get(), dataPos,
647  table, reverse);
648  break;
650  copy_2bit_any(buffer, count, data.GetNcbi2na().Get(), dataPos,
651  table, reverse);
652  break;
654  copy_4bit_any(buffer, count, data.GetNcbi4na().Get(), dataPos,
655  table, reverse);
656  break;
658  copy_8bit_any(buffer, count, data.GetNcbi8na().Get(), dataPos,
659  table, reverse);
660  break;
662  copy_8bit_any(buffer, count, data.GetNcbi8aa().Get(), dataPos,
663  table, reverse);
664  break;
666  copy_8bit_any(buffer, count, data.GetNcbieaa().Get(), dataPos,
667  table, reverse);
668  break;
670  copy_8bit_any(buffer, count, data.GetNcbistdaa().Get(), dataPos,
671  table, reverse);
672  break;
673  default:
674  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
675  "Invalid data coding: "<<src_coding);
676  }
677  dst_str.append(buffer, count);
678  if ( !reverse ) {
679  dataPos += count;
680  }
681  total_count -= count;
682  }
683 }
684 
685 
686 static
687 void x_AppendAnyTo4(string& dst_str, char& dst_c, TSeqPos dst_pos,
688  const CSeq_data& data, TSeqPos dataPos,
689  TSeqPos total_count,
690  const char* table, bool reverse)
691 {
692  _ASSERT(dataPos+total_count >= dataPos); // check for overflow
693  _ASSERT(table || reverse);
694  char buffer[kBufferSize];
695  CSeq_data::E_Choice src_coding = data.Which();
696  if ( reverse ) {
697  dataPos += total_count;
698  }
699  while ( total_count ) {
700  TSeqPos count = min(total_count, TSeqPos(sizeof(buffer)));
701  if ( reverse ) {
702  dataPos -= count;
703  }
704  switch ( src_coding ) {
706  copy_8bit_any(buffer, count, data.GetIupacna().Get(), dataPos,
707  table, reverse);
708  break;
710  copy_8bit_any(buffer, count, data.GetIupacaa().Get(), dataPos,
711  table, reverse);
712  break;
714  copy_2bit_any(buffer, count, data.GetNcbi2na().Get(), dataPos,
715  table, reverse);
716  break;
718  copy_4bit_any(buffer, count, data.GetNcbi4na().Get(), dataPos,
719  table, reverse);
720  break;
722  copy_8bit_any(buffer, count, data.GetNcbi8na().Get(), dataPos,
723  table, reverse);
724  break;
726  copy_8bit_any(buffer, count, data.GetNcbi8aa().Get(), dataPos,
727  table, reverse);
728  break;
730  copy_8bit_any(buffer, count, data.GetNcbieaa().Get(), dataPos,
731  table, reverse);
732  break;
734  copy_8bit_any(buffer, count, data.GetNcbistdaa().Get(), dataPos,
735  table, reverse);
736  break;
737  default:
738  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
739  "Invalid data coding: "<<src_coding);
740  }
741  x_Append8To4(dst_str, dst_c, dst_pos, buffer, count);
742  if ( !reverse ) {
743  dataPos += count;
744  }
745  dst_pos += count;
746  total_count -= count;
747  }
748 }
749 
750 
751 static
752 void x_AppendAnyTo2(string& dst_str, char& dst_c, TSeqPos dst_pos,
753  const CSeq_data& data, TSeqPos dataPos,
754  TSeqPos total_count,
755  const char* table, bool reverse,
756  INcbi2naRandomizer* randomizer, TSeqPos randomizer_pos)
757 {
758  _ASSERT(dataPos+total_count >= dataPos); // check for overflow
759  _ASSERT(table || reverse || randomizer);
760  char buffer[kBufferSize];
761  CSeq_data::E_Choice src_coding = data.Which();
762  if ( reverse ) {
763  dataPos += total_count;
764  }
765  while ( total_count ) {
766  TSeqPos count = min(total_count, TSeqPos(sizeof(buffer)));
767  if ( reverse ) {
768  dataPos -= count;
769  }
770  switch ( src_coding ) {
772  copy_8bit_any(buffer, count, data.GetIupacna().Get(), dataPos,
773  table, reverse);
774  break;
776  copy_8bit_any(buffer, count, data.GetIupacaa().Get(), dataPos,
777  table, reverse);
778  break;
780  copy_2bit_any(buffer, count, data.GetNcbi2na().Get(), dataPos,
781  table, reverse);
782  break;
784  copy_4bit_any(buffer, count, data.GetNcbi4na().Get(), dataPos,
785  table, reverse);
786  break;
788  copy_8bit_any(buffer, count, data.GetNcbi8na().Get(), dataPos,
789  table, reverse);
790  break;
792  copy_8bit_any(buffer, count, data.GetNcbi8aa().Get(), dataPos,
793  table, reverse);
794  break;
796  copy_8bit_any(buffer, count, data.GetNcbieaa().Get(), dataPos,
797  table, reverse);
798  break;
800  copy_8bit_any(buffer, count, data.GetNcbistdaa().Get(), dataPos,
801  table, reverse);
802  break;
803  default:
804  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
805  "Invalid data coding: "<<src_coding);
806  }
807  if ( randomizer ) {
808  randomizer->RandomizeData(buffer, count, randomizer_pos);
809  }
810  x_Append8To2(dst_str, dst_c, dst_pos, buffer, count);
811  if ( !reverse ) {
812  dataPos += count;
813  }
814  dst_pos += count;
815  randomizer_pos += count;
816  total_count -= count;
817  }
818 }
819 
820 
821 void CSeqVector::x_GetPacked8SeqData(string& dst_str,
822  TSeqPos src_pos,
823  TSeqPos src_end)
824 {
825  ECaseConversion case_conversion = eCaseConversion_none;
827  sel.SetStrand(m_Strand);
828  if ( m_TSE ) {
829  sel.SetLinkUsedTSE(m_TSE);
830  }
831  CSeqMap_CI seg(m_SeqMap, m_Scope.GetScopeOrNull(), sel, src_pos);
832 
833  dst_str.reserve(src_end-src_pos);
834  TCoding dst_coding = GetCoding();
835  TSeqPos dst_pos = 0;
836  while ( src_pos < src_end ) {
837  _ASSERT(dst_str.size() == dst_pos);
838  TSeqPos count = min(src_end-src_pos, seg.GetEndPosition()-src_pos);
839  if ( seg.GetType() == CSeqMap::eSeqGap ) {
840  x_AppendGapTo8(dst_str, count, GetGapChar());
841  }
842  else {
843  const CSeq_data& data = seg.GetRefData();
844  bool reverse = seg.GetRefMinusStrand();
845  TCoding src_coding = data.Which();
846 
847  const char* table = 0;
848  if ( dst_coding != src_coding || reverse ||
849  case_conversion != eCaseConversion_none ) {
850  table = sx_GetConvertTable(src_coding, dst_coding,
851  reverse, case_conversion);
852  if ( !table && src_coding != dst_coding ) {
853  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
854  "Incompatible sequence codings: "<<
855  src_coding<<" -> "<<dst_coding);
856  }
857  }
858 
859  TSeqPos dataPos;
860  if ( reverse ) {
861  // Revert segment offset
862  dataPos = seg.GetRefEndPosition() -
863  (src_pos - seg.GetPosition()) - count;
864  }
865  else {
866  dataPos = seg.GetRefPosition() +
867  (src_pos - seg.GetPosition());
868  }
869 
870  if ( ( !table || table == sm_TrivialTable) && !reverse ) {
871  switch ( src_coding ) {
873  x_Append8To8(dst_str, data.GetIupacna().Get(),
874  dataPos, count);
875  break;
877  x_Append8To8(dst_str, data.GetIupacaa().Get(),
878  dataPos, count);
879  break;
881  x_Append8To8(dst_str, data.GetNcbi8na().Get(),
882  dataPos, count);
883  break;
885  x_Append8To8(dst_str, data.GetNcbi8aa().Get(),
886  dataPos, count);
887  break;
889  x_Append8To8(dst_str, data.GetNcbieaa().Get(),
890  dataPos, count);
891  break;
893  x_Append8To8(dst_str, data.GetNcbistdaa().Get(),
894  dataPos, count);
895  break;
896  default:
897  x_AppendAnyTo8(dst_str, data, dataPos, count);
898  break;
899  }
900  }
901  else {
902  x_AppendAnyTo8(dst_str, data, dataPos, count, table, reverse);
903  }
904  }
905  ++seg;
906  dst_pos += count;
907  src_pos += count;
908  _ASSERT(dst_str.size() == dst_pos);
909  }
910 }
911 
912 
914  TSeqPos src_pos,
915  TSeqPos src_end)
916 {
917  ECaseConversion case_conversion = eCaseConversion_none;
919  sel.SetStrand(m_Strand);
920  if ( m_TSE ) {
921  sel.SetLinkUsedTSE(m_TSE);
922  }
923  CSeqMap_CI seg(m_SeqMap, m_Scope.GetScopeOrNull(), sel, src_pos);
924 
925  dst_str.reserve((src_end-src_pos+1)>>1);
926  TCoding dst_coding = GetCoding();
927  TSeqPos dst_pos = 0;
928  char dst_c = 0;
929  while ( src_pos < src_end ) {
930  _ASSERT(dst_str.size() == dst_pos>>1);
931  TSeqPos count = min(src_end-src_pos, seg.GetEndPosition()-src_pos);
932  if ( seg.GetType() == CSeqMap::eSeqGap ) {
933  x_AppendGapTo4(dst_str, dst_c, dst_pos, count, GetGapChar());
934  }
935  else {
936  const CSeq_data& data = seg.GetRefData();
937  bool reverse = seg.GetRefMinusStrand();
938  TCoding src_coding = data.Which();
939 
940  const char* table = 0;
941  if ( dst_coding != src_coding || reverse ||
942  case_conversion != eCaseConversion_none ) {
943  table = sx_GetConvertTable(src_coding, dst_coding,
944  reverse, case_conversion);
945  if ( !table && src_coding != dst_coding ) {
946  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
947  "Incompatible sequence codings: "<<
948  src_coding<<" -> "<<dst_coding);
949  }
950  }
951 
952  if ( (table && table != sm_TrivialTable) || reverse ) {
953  TSeqPos dataPos;
954  if ( reverse ) {
955  // Revert segment offset
956  dataPos = seg.GetRefEndPosition() -
957  (src_pos - seg.GetPosition()) - count;
958  }
959  else {
960  dataPos = seg.GetRefPosition() +
961  (src_pos - seg.GetPosition());
962  }
963  x_AppendAnyTo4(dst_str, dst_c, dst_pos,
964  data, dataPos, count, table, reverse);
965  }
966  else {
967  TSeqPos dataPos = seg.GetRefPosition() +
968  (src_pos - seg.GetPosition());
969  x_Append4To4(dst_str, dst_c, dst_pos,
970  data.GetNcbi4na().Get(), dataPos, count);
971  }
972  }
973  ++seg;
974  dst_pos += count;
975  src_pos += count;
976  _ASSERT(dst_str.size() == dst_pos>>1);
977  }
978  if ( dst_pos&1 ) {
979  dst_str += char(dst_c<<4);
980  }
981 }
982 
983 
985  TSeqPos src_pos,
986  TSeqPos src_end)
987 {
988  ECaseConversion case_conversion = eCaseConversion_none;
990  sel.SetStrand(m_Strand);
991  if ( m_TSE ) {
992  sel.SetLinkUsedTSE(m_TSE);
993  }
994  CSeqMap_CI seg(m_SeqMap, m_Scope.GetScopeOrNull(), sel, src_pos);
995 
996  dst_str.reserve((src_end-src_pos+3)>>2);
998  TSeqPos dst_pos = 0;
999  char dst_c = 0;
1000  while ( src_pos < src_end ) {
1001  _ASSERT(dst_str.size() == dst_pos>>2);
1002  TSeqPos count = min(src_end-src_pos, seg.GetEndPosition()-src_pos);
1003  if ( seg.GetType() == CSeqMap::eSeqGap ) {
1004  if ( !m_Randomizer ) {
1005  NCBI_THROW(CSeqVectorException, eCodingError,
1006  "Cannot fill NCBI2na gap without randomizer");
1007  }
1008  x_AppendRandomTo2(dst_str, dst_c, dst_pos, src_pos, count,
1009  *m_Randomizer,
1012  }
1013  else {
1014  const CSeq_data& data = seg.GetRefData();
1015  bool reverse = seg.GetRefMinusStrand();
1016  TCoding src_coding = data.Which();
1017  TCoding dst_coding = CSeq_data::e_Ncbi2na;
1018  INcbi2naRandomizer* randomizer = 0;
1019  if ( src_coding != dst_coding && m_Randomizer) {
1020  randomizer = m_Randomizer.GetPointer();
1021  _ASSERT(randomizer);
1022  dst_coding = CSeq_data::e_Ncbi4na;
1023  }
1024 
1025  const char* table = 0;
1026  if ( dst_coding != src_coding || reverse ||
1027  case_conversion != eCaseConversion_none ) {
1028  table = sx_GetConvertTable(src_coding, dst_coding,
1029  reverse, case_conversion);
1030  if ( !table && src_coding != dst_coding ) {
1031  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
1032  "Incompatible sequence codings: "<<
1033  src_coding<<" -> "<<dst_coding);
1034  }
1035  }
1036 
1037  if ( (table && table != sm_TrivialTable) || reverse
1038  || randomizer ) {
1039  TSeqPos dataPos;
1040  if ( reverse ) {
1041  // Revert segment offset
1042  dataPos = seg.GetRefEndPosition() -
1043  (src_pos - seg.GetPosition()) - count;
1044  }
1045  else {
1046  dataPos = seg.GetRefPosition() +
1047  (src_pos - seg.GetPosition());
1048  }
1049  _ASSERT((!randomizer && dst_coding == CSeq_data::e_Ncbi2na) ||
1050  (randomizer && dst_coding == CSeq_data::e_Ncbi4na));
1051  x_AppendAnyTo2(dst_str, dst_c, dst_pos,
1052  data, dataPos, count, table, reverse,
1053  randomizer, src_pos);
1054  }
1055  else {
1056  _ASSERT(dst_coding == CSeq_data::e_Ncbi2na);
1057  TSeqPos dataPos = seg.GetRefPosition() +
1058  (src_pos - seg.GetPosition());
1059  x_Append2To2(dst_str, dst_c, dst_pos,
1060  data.GetNcbi2na().Get(), dataPos, count);
1061  }
1062  }
1063  ++seg;
1064  dst_pos += count;
1065  src_pos += count;
1066  _ASSERT(dst_str.size() == dst_pos>>2);
1067  }
1068  if ( dst_pos&3 ) {
1069  dst_str += char(dst_c << 2*TSeqPos(-TSignedSeqPos(dst_pos)&3));
1070  }
1071 }
1072 
1073 
1076 {
1077  switch (coding) {
1078  case CSeq_data::e_Iupacna: // DNA - N
1079  return case_cvt == eCaseConversion_lower? 'n': 'N';
1080 
1081  case CSeq_data::e_Ncbi8na: // DNA - bit representation
1082  case CSeq_data::e_Ncbi4na:
1083  return 0; // all bits set == any base
1084 
1085  case CSeq_data::e_Ncbieaa: // Proteins - X
1086  case CSeq_data::e_Ncbi8aa: // Protein - numeric representation
1087  return '-';
1088  case CSeq_data::e_Iupacaa:
1089  return case_cvt == eCaseConversion_lower? 'x': 'X';
1090 
1092  return 0;
1093 
1094  case CSeq_data::e_not_set:
1095  return 0; // It's not good to throw an exception here
1096 
1097  case CSeq_data::e_Ncbi2na: // Codings without gap symbols
1098  // Exception is not good here because it conflicts with CSeqVector_CI.
1099  return 0xff;
1100 
1101  case CSeq_data::e_Ncbipaa: //### Not sure about this
1102  case CSeq_data::e_Ncbipna: //### Not sure about this
1103  default:
1104  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
1105  "Can not indicate gap using the selected coding: "<<
1106  coding);
1107  }
1108 }
1109 
1110 
1111 DEFINE_STATIC_FAST_MUTEX(s_ConvertTableMutex2);
1112 
1113 const char*
1115  bool reverse, ECaseConversion case_cvt)
1116 {
1117  CFastMutexGuard guard(s_ConvertTableMutex2);
1118  typedef pair<TCoding, TCoding> TMainConversion;
1119  typedef pair<bool, ECaseConversion> TConversionFlags;
1120  typedef pair<TMainConversion, TConversionFlags> TConversionKey;
1121  typedef vector<char> TConversionTable;
1122  typedef map<TConversionKey, TConversionTable> TTables;
1123  static CSafeStatic<TTables> tables;
1124 
1125  TConversionKey key;
1126  key.first = TMainConversion(src, dst);
1127  key.second = TConversionFlags(reverse, case_cvt);
1128  TTables::iterator it = tables->find(key);
1129  if ( it != tables->end() ) {
1130  // already created, but may be a stand-in
1131  switch (it->second.size()) {
1132  case 0: return 0; // error -- incompatible codings or the like
1133  case 1: return sm_TrivialTable;
1134  default: return &it->second[0];
1135  }
1136  }
1137  TConversionTable& table = (*tables)[key];
1138  if ( !CSeqportUtil::IsCodeAvailable(src) ||
1140  // invalid types
1141  return 0;
1142  }
1143 
1144  const size_t COUNT = kMax_UChar+1;
1145  const unsigned kInvalidCode = kMax_UChar;
1146 
1147  pair<unsigned, unsigned> srcIndex = CSeqportUtil::GetCodeIndexFromTo(src);
1148  if ( srcIndex.second >= COUNT ) {
1149  // too large range
1150  return 0;
1151  }
1152 
1153  if ( reverse ) {
1154  // check if src needs complement conversion
1155  try {
1156  CSeqportUtil::GetIndexComplement(src, srcIndex.first);
1157  }
1158  catch ( exception& /*noComplement*/ ) {
1159  reverse = false;
1160  }
1161  }
1162  if ( case_cvt != eCaseConversion_none ) {
1163  // check if dst is text format
1164  if ( dst != CSeq_data::e_Iupacaa &&
1165  dst != CSeq_data::e_Iupacna &&
1166  dst != CSeq_data::e_Ncbieaa ) {
1167  case_cvt = eCaseConversion_none;
1168  }
1169  }
1170 
1171  if ( dst != src ) {
1172  pair<unsigned, unsigned> dstIndex =
1174  if ( dstIndex.second >= COUNT ) {
1175  // too large range
1176  return 0;
1177  }
1178 
1179  try {
1180  // check for types compatibility
1181  CSeqportUtil::GetMapToIndex(src, dst, srcIndex.first);
1182  }
1183  catch ( exception& /*badType*/ ) {
1184  // incompatible types
1185  return 0;
1186  }
1187  }
1188  else if ( !reverse && case_cvt == eCaseConversion_none ) {
1189  // no need to convert at all
1190  return 0;
1191  }
1192 
1193  table.resize(COUNT, char(kInvalidCode));
1194  bool different = false;
1195  for ( unsigned i = srcIndex.first; i <= srcIndex.second; ++i ) {
1196  try {
1197  unsigned code = i;
1198  if ( reverse ) {
1199  code = CSeqportUtil::GetIndexComplement(src, code);
1200  }
1201  if ( dst != src ) {
1202  code = CSeqportUtil::GetMapToIndex(src, dst, code);
1203  }
1204  code = min(kInvalidCode, code);
1205  if ( case_cvt == eCaseConversion_upper ) {
1206  code = toupper((unsigned char) code);
1207  }
1208  else if( case_cvt == eCaseConversion_lower ) {
1209  code = tolower((unsigned char) code);
1210  }
1211  if ( code != i ) {
1212  different = true;
1213  }
1214  table[i] = char(code);
1215  }
1216  catch ( exception& /*noConversion or noComplement*/ ) {
1217  different = true;
1218  }
1219  }
1220  if ( !different ) {
1221  table.resize(1);
1222  return sm_TrivialTable;
1223  }
1224  return &table[0];
1225 }
1226 
1227 
1228 const char CSeqVectorTypes::sm_TrivialTable[256] = {
1229  '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
1230  '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
1231  '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
1232  '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
1233  '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
1234  '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
1235  '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
1236  '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
1237  '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
1238  '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
1239  '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
1240  '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
1241  '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
1242  '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
1243  '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
1244  '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
1245  '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
1246  '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
1247  '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
1248  '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
1249  '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
1250  '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
1251  '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
1252  '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
1253  '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
1254  '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
1255  '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
1256  '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
1257  '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
1258  '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
1259  '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
1260  '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
1261 };
1262 
1263 
1265 {
1266  if ( strand != m_Strand ) {
1267  m_Strand = strand;
1268  x_ResetIterator();
1269  }
1270 }
1271 
1272 
1274 {
1275  if (m_Coding != coding) {
1276  m_Coding = coding;
1277  x_ResetIterator();
1278  }
1279 }
1280 
1281 
1283 {
1285 }
1286 
1287 
1289 {
1291 }
1292 
1293 
1295 {
1296  switch ( coding ) {
1298  SetIupacCoding();
1299  break;
1301  SetNcbiCoding();
1302  break;
1303  default:
1305  break;
1306  }
1307 }
1308 
1309 
1311 {
1312  CRandom random_gen;
1313  x_InitRandomizer(random_gen);
1314 }
1315 
1316 
1318 {
1319  CRandom random_gen(seed);
1320  x_InitRandomizer(random_gen);
1321 }
1322 
1323 
1325 {
1326  x_InitRandomizer(random_gen);
1327 }
1328 
1329 
1331 {
1332  CRef<INcbi2naRandomizer> randomizer(new CNcbi2naRandomizer(random_gen));
1333  SetRandomizeAmbiguities(randomizer);
1334 }
1335 
1336 
1338 {
1339  if ( m_Randomizer != randomizer ) {
1340  m_Randomizer = randomizer;
1341  x_ResetIterator();
1342  }
1343 }
1344 
1345 
1347 {
1349 }
1350 
1351 
CBioseq_Handle –.
CNcbi2naRandomizer(CRandom &gen)
Definition: seq_vector.cpp:63
TCoding m_Coding
Definition: seq_vector.hpp:205
static void x_AppendGapTo8(string &dst_str, size_t count, char gap)
Definition: seq_vector.cpp:378
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:562
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:251
Uint4 TValue
Type of the generated integer value and/or the seed value.
Definition: random_gen.hpp:69
CConstRef –.
Definition: ncbiobj.hpp:1192
SeqVector related exceptions.
Set coding to printable coding (Iupacna or Iupacaa)
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:856
CSafeStatic<>::
void SetCoding(TCoding coding)
AutoPtr< CSeqVector_CI > m_Iterator
Definition: seq_vector.hpp:209
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
Definition: Seq_data_.hpp:550
static int seed
Definition: test_table.cpp:132
static void x_Append2To2(string &dst, char &dst_c, TSeqPos dst_pos, const vector< char > &src, TSeqPos src_pos, TSeqPos count)
Definition: seq_vector.cpp:542
void GetPackedSeqData(string &buffer, TSeqPos start=0, TSeqPos stop=kInvalidSeqPos)
Definition: seq_vector.cpp:311
int tolower(Uchar c)
Definition: ncbictype.hpp:72
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:112
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:73
unsigned int Uint4
Alias for unsigned int.
Definition: ncbitype.h:121
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:970
CConstRef< CSeq_literal > GetGapSeq_literal(TSeqPos pos) const
returns gap Seq-literal object ref returns null if it's not a gap or an unspecified gap ...
Definition: seq_vector.cpp:285
const struct ncbi::grid::netcache::search::fields::KEY key
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:844
const TNcbi8aa & GetNcbi8aa(void) const
Get the variant data.
Definition: Seq_data_.hpp:630
void x_ResetIterator(void) const
Definition: seq_vector.cpp:269
void SetNoAmbiguities(void)
void copy_4bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
TCoding GetCoding(void) const
Target sequence coding.
Definition: seq_vector.hpp:312
CSeqVector_CI & x_GetIterator(TSeqPos pos) const
Definition: seq_vector.hpp:249
static void x_AppendAnyTo2(string &dst_str, char &dst_c, TSeqPos dst_pos, const CSeq_data &data, TSeqPos dataPos, TSeqPos total_count, const char *table, bool reverse, INcbi2naRandomizer *randomizer, TSeqPos randomizer_pos)
Definition: seq_vector.cpp:752
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
int toupper(Uchar c)
Definition: ncbictype.hpp:73
static void set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
Definition: pcre_study.c:74
void x_GetPacked4naSeqData(string &dst_str, TSeqPos src_pos, TSeqPos src_end)
Definition: seq_vector.cpp:913
bool CanGetRange(TSeqPos start, TSeqPos stop)
Check if the sequence can be obtained for the interval [start, stop)
CRef< INcbi2naRandomizer > m_Randomizer
Definition: seq_vector.hpp:206
static const size_t kBufferSize
Definition: seq_vector.cpp:351
const NCBI_NS_NCBI::CEnumeratedTypeValues *ENUM_METHOD_NAME() ENa_strand(void)
Access to ENa_strand's attributes (values, names) as defined in spec.
void copy_2bit(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos)
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
TSeqPos size(void) const
Definition: seq_vector.hpp:291
Set coding to binary coding (Ncbi4na or Ncbistdaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer)
Fill the buffer string with the sequence data for the interval [start, stop).
virtual void RandomizeData(char *buffer, size_t count, TSeqPos pos)=0
Convert count unpacked bases in buffer 4na -> 2na with randomization.
consecutive codes for std aas
Definition: Seq_data_.hpp:113
int i
void copy_2bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
virtual ~CSeqVector(void)
Definition: seq_vector.cpp:239
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:690
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
void x_GetPacked8SeqData(string &dst_str, TSeqPos src_pos, TSeqPos src_end)
Definition: seq_vector.cpp:821
static size_t rnd(size_t minimal, size_t maximal)
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
void RandomizeData(char *buffer, size_t count, TSeqPos pos)
Convert count unpacked bases in buffer 4na -> 2na with randomization.
Definition: seq_vector.cpp:112
#define kMax_UInt
Definition: ncbi_limits.h:185
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
TSeqPos m_Size
Definition: seq_vector.hpp:202
CTSE_Handle GetTSE_Handle(const CSeq_entry &tse, EMissing action=eMissing_Default)
Find object in scope If object is not found GetXxxHandle() methods will either throw an exception or ...
Definition: scope.cpp:121
static void x_Append8To4(string &dst, char &dst_c, TSeqPos dst_pos, const char *src, size_t count)
Definition: seq_vector.cpp:387
CConstRef< CSeq_literal > GetGapSeq_literal(void) const
returns gap Seq-data object ref returns null if it's not a gap or an unspecified gap ...
CSeqMap –.
Definition: seq_map.hpp:92
bool CanGetRange(TSeqPos start, TSeqPos stop) const
Check if the sequence data is available for the interval [start, stop).
Definition: seq_vector.cpp:292
amino acid probabilities
Definition: Seq_data_.hpp:112
EVectorCoding
CSeqVector constructor flags.
TSeqPos GetGapSizeForward(void) const
returns number of gap symbols ahead including current symbol returns 0 if current position is not in ...
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
CSeqVector –.
Definition: seq_vector.hpp:64
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
static bool IsCodeAvailable(CSeq_data::E_Choice code_type)
static void x_Append8To8(string &dst_str, const string &src_str, size_t src_pos, size_t count)
Definition: seq_vector.cpp:354
const TNcbi8na & GetNcbi8na(void) const
Get the variant data.
Definition: Seq_data_.hpp:590
static TResidue sx_GetGapChar(TCoding coding, ECaseConversion case_cvt)
void SetNcbiCoding(void)
Set coding to either Ncbi8aa or Ncbi8na depending on molecule type.
void SetRandomizeAmbiguities(void)
Randomization of ambiguities and gaps in ncbi2na coding.
CRandom::
Definition: random_gen.hpp:65
CScope & GetScope()
Definition: wgs_utils.cpp:48
static const char * sx_GetConvertTable(TCoding src, TCoding dst, bool reverse, ECaseConversion case_cvt)
SSeqMapSelector & SetLinkUsedTSE(bool link=true)
Definition: seq_map_ci.hpp:157
static void x_Append8To2(string &dst_str, char &dst_c, TSeqPos dst_pos, const char *buffer, TSeqPos count)
Definition: seq_vector.cpp:491
Definition: map.hpp:337
static void x_AppendGapTo4(string &dst_str, char &dst_c, TSeqPos dst_pos, TSeqPos count, char gap)
Definition: seq_vector.cpp:466
TMutex & GetMutex(void) const
Get mutex for a few non-MT-safe methods to make them MT-safe at a cost of performance.
Definition: seq_vector.hpp:263
CNcbi2naRandomizer –.
Definition: seq_vector.hpp:218
TMol GetMol(void) const
Definition: seq_map.hpp:490
CTSE_Handle m_TSE
Definition: seq_vector.hpp:201
static void x_AppendAnyTo8(string &dst_str, const CSeq_data &data, TSeqPos dataPos, TSeqPos total_count, const char *table=0, bool reverse=false)
Definition: seq_vector.cpp:624
void SetStrand(ENa_strand strand)
T min(T x_, T y_)
void x_GetPacked2naSeqData(string &dst_str, TSeqPos src_pos, TSeqPos src_end)
Definition: seq_vector.cpp:984
bool IsProtein(void) const
Definition: seq_vector.hpp:350
SSeqMapSelector & SetStrand(ENa_strand strand)
Set strand to iterate over.
Definition: seq_map_ci.hpp:144
CHeapScope m_Scope
Definition: seq_vector.hpp:199
Definition: inftrees.h:24
CSeqVector(void)
Definition: seq_vector.cpp:134
nucleic acid probabilities
Definition: Seq_data_.hpp:109
friend class CSeqVector_CI
Definition: seq_vector.hpp:179
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
TSeqPos GetBioseqLength(void) const
CScope –.
Definition: scope.hpp:90
void copy_8bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
2 bit nucleic acid code
Definition: Seq_data_.hpp:106
static CRef< CScope > m_Scope
Multi-threading – mutexes; rw-locks; semaphore.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
static void x_Append4To4(string &dst, char &dst_c, TSeqPos dst_pos, const vector< char > &src, TSeqPos src_pos, TSeqPos count)
Definition: seq_vector.cpp:411
CSeqVector & operator=(const CSeqVector &vec)
Definition: seq_vector.cpp:244
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:925
unsigned char TResidue
No variant selected.
Definition: Seq_data_.hpp:103
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
#define _ASSERT
CConstRef< CSeqMap > m_SeqMap
Definition: seq_vector.hpp:200
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:461
#define kMax_UChar
Definition: ncbi_limits.h:177
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:70
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:547
8 bit extended nucleic acid code
Definition: Seq_data_.hpp:108
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:472
TMol GetSequenceType(void) const
CSeqVector_CI * x_CreateIterator(TSeqPos pos) const
Definition: seq_vector.cpp:261
TResidue GetGapChar(ECaseConversion case_cvt=eCaseConversion_none) const
Return gap symbol corresponding to the selected coding.
Definition: seq_vector.hpp:318
CScope * GetScopeOrNull(void) const
Definition: heap_scope.cpp:74
static void x_AppendRandomTo2(string &dst_str, char &dst_c, TSeqPos dst_pos, TSeqPos src_pos, TSeqPos count, INcbi2naRandomizer &randomizer, char gap)
Definition: seq_vector.cpp:601
static const char * table
Definition: stats.c:22
void x_InitRandomizer(CRandom &random_gen)
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
Definition: Seq_data_.hpp:570
ENa_strand m_Strand
Definition: seq_vector.hpp:204
TSeqPos GetGapSizeForward(TSeqPos pos) const
returns number of gap symbols ahead including base at position 'pos' returns 0 if the position is not...
Definition: seq_vector.cpp:278
TSeqPos GetLength(CScope *scope) const
Definition: seq_map.hpp:480
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_data_.hpp:475
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
8 bit extended amino acid codes
Definition: Seq_data_.hpp:110
static void x_AppendAnyTo4(string &dst_str, char &dst_c, TSeqPos dst_pos, const CSeq_data &data, TSeqPos dataPos, TSeqPos total_count, const char *table, bool reverse)
Definition: seq_vector.cpp:687
static const char sm_TrivialTable[256]
static uschar * buffer
Definition: pcretest.c:187
char m_RandomTable[16][kRandomDataSize]
Definition: seq_vector.hpp:237
const TPrim & Get(void) const
Definition: serialbase.hpp:347
DEFINE_STATIC_FAST_MUTEX(s_ConvertTableMutex2)
static TPair GetCodeIndexFromTo(CSeq_data::E_Choice code_type)
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:650
static TIndex GetIndexComplement(CSeq_data::E_Choice code_type, TIndex idx)
const TIupacna & GetIupacna(void) const
Get the variant data.
Definition: Seq_data_.hpp:510
Modified on Mon Oct 16 16:19:36 2017 by modify_doxy.py rev. 546573