NCBI C++ ToolKit
seqdbbitset.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqdbbitset.cpp 74933 2016-10-06 12:29:07Z camacho $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Kevin Bealer
27  *
28  */
29 
30 /// @file seqdbbitset.cpp
31 /// Implementation for the CSeqDB_BitSet class, a bit vector.
32 #include <ncbi_pch.hpp>
33 #include "seqdbbitset.hpp"
34 
36 
38  size_t end,
39  const TByte * p1,
40  const TByte * p2)
41  : m_Start (start),
42  m_End (end),
43  m_Special(eNone)
44 {
45  _ASSERT(eWordShift); // must be 32 or 64
46  _ASSERT(TByte(0) < (TByte(-1))); // must be unsigned
47 
48  // Allocation is guaranteed to zero out the bit memory.
49  x_Alloc(end-start);
50 
51  size_t bytes = m_Bits.size();
52 
53  while(size_t(p2-p1) < bytes) {
54  bytes--;
55  }
56 
57  _ASSERT((eWordBits*m_Bits.size()) >= (bytes*8));
58  memcpy(& m_Bits[0], p1, bytes);
59 }
60 
61 void CSeqDB_BitSet::SetBit(size_t index)
62 {
64  _ASSERT(index >= m_Start);
65  _ASSERT(index < m_End);
66 
67  index -= m_Start;
68 
69  size_t vx = index >> eWordShift;
70  int wx = index & eWordMask;
71 
72  _ASSERT(m_Bits.size() > vx);
73  m_Bits[vx] |= (TByte(0x80 >> wx));
74 }
75 
76 void CSeqDB_BitSet::ClearBit(size_t index)
77 {
79  _ASSERT(index >= m_Start);
80  _ASSERT(index < m_End);
81 
82  index -= m_Start;
83 
84  size_t vx = index >> eWordShift;
85  int wx = index & eWordMask;
86 
87  _ASSERT(m_Bits.size() > vx);
88  m_Bits[vx] &= ~(TByte(0x80 >> wx));
89 }
90 
91 bool CSeqDB_BitSet::CheckOrFindBit(size_t & index) const
92 {
93  if (index < m_Start)
94  index = m_Start;
95 
96  if (index >= m_End)
97  return false;
98 
99  if (m_Special == eAllSet) {
100  return true;
101  }
102 
103  if (m_Special == eAllClear) {
104  return false;
105  }
106 
107  size_t nwords = m_Bits.size();
108  size_t ix = index - m_Start;
109  size_t vx = ix >> eWordShift;
110  size_t vx0 = vx;
111 
112  while(vx < nwords && ! m_Bits[vx]) {
113  vx ++;
114  }
115 
116  if (vx != vx0) {
117  ix = (vx << eWordShift);
118  }
119 
120  _ASSERT((ix + m_Start) >= index);
121 
122  size_t bitcount = m_End - m_Start;
123 
124  while(ix < bitcount) {
125  vx = ix >> eWordShift;
126  int wx = ix & eWordMask;
127 
128  _ASSERT(nwords > vx);
129  if (m_Bits[vx] & (TByte(0x80) >> wx))
130  break;
131 
132  ix ++;
133  }
134 
135  if (ix < bitcount) {
136  index = (ix + m_Start);
137  return true;
138  }
139 
140  return false;
141 }
142 
143 void CSeqDB_BitSet::UnionWith(CSeqDB_BitSet & other, bool consume)
144 {
145  if (other.m_Special == eAllClear) {
146  // Nothing to do.
147  return;
148  }
149 
150  if (m_Special == eAllClear) {
151  // Result is just 'other'.
152  x_Copy(other, consume);
153  return;
154  }
155 
156  // Our all-1s mask covers the other.
157 
158  if (other.m_Start >= m_Start &&
159  other.m_End <= m_End &&
160  m_Special == eAllSet) {
161 
162  return;
163  }
164 
165  // The other all-1s mask covers ours.
166 
167  if (other.m_Start <= m_Start &&
168  other.m_End >= m_End &&
169  other.m_Special == eAllSet) {
170 
171  // Copy is probably better than swap here.
172  x_Copy(other, consume);
173  return;
174  }
175 
176  // Adjust the range if needed; convert special cases to eNone.
177 
178  x_Normalize(other.m_Start, other.m_End);
179 
180  switch(other.m_Special) {
181  case eAllSet:
182  AssignBitRange(other.m_Start, other.m_End, true);
183  break;
184 
185  case eNone:
186  x_CopyBits(other);
187  break;
188 
189  case eAllClear:
190  _ASSERT(false);
191  }
192 }
193 
194 void CSeqDB_BitSet::IntersectWith(CSeqDB_BitSet & other, bool consume)
195 {
196  // All clear cases
197 
198  if (m_Special == eAllClear) {
199  return;
200  }
201 
202  if (other.m_Special == eAllClear) {
203  x_Copy(other, consume);
204  return;
205  }
206 
207  // All set cases.
208 
209  if (m_Special == eAllSet && other.m_Special == eAllSet) {
210  size_t start = std::max(m_Start, other.m_Start);
211  size_t end = std::min(m_End, other.m_End);
212 
213  if (start >= end) {
214  // The intersected ranges don't overlap.
216  } else {
217  m_Start = start;
218  m_End = end;
219  }
220  return;
221  }
222 
223  if (other.m_Special == eAllSet || m_Special == eAllSet) {
225  CSeqDB_BitSet range;
226 
227  if (m_Special == eAllSet) {
228  result.x_Copy(other, consume);
229  range.x_Copy(*this, true);
230  } else {
231  Swap(result);
232  range.x_Copy(other, consume);
233  }
234 
235  if (result.m_Start < range.m_Start)
236  result.AssignBitRange(result.m_Start, range.m_Start, false);
237 
238  if (result.m_End > range.m_End)
239  result.AssignBitRange(range.m_End, result.m_End, false);
240 
241  Swap(result);
242 
243  return;
244  }
245 
246  if ((m_Start == other.m_Start) &&
247  (m_Bits.size() == other.m_Bits.size()) &&
248  (m_Special == eNone) &&
249  (other.m_Special == eNone)) {
250 
251  size_t i = 0;
252  size_t end1 = (m_Bits.size() / sizeof(int)) * sizeof(int);
253  size_t end2 = m_Bits.size();
254 
255  // [ The first while() is only needed in the case of unaligned
256  // large-character-array allocation, which probably never
257  // happens in practice. ]
258 
259  while(i != end2 && (i & (sizeof(int)-1))) {
260  unsigned char * dst = & m_Bits[i];
261  unsigned char * src = & other.m_Bits[i];
262 
263  *dst &= *src;
264  i ++;
265  }
266 
267  while(i != end1) {
268  int * dst = (int*)(& m_Bits[i]);
269  int * src = (int*)(& other.m_Bits[i]);
270 
271  *dst &= *src;
272  i += sizeof(int);
273  }
274 
275  while(i != end2) {
276  unsigned char * dst = & m_Bits[i];
277  unsigned char * src = & other.m_Bits[i];
278 
279  *dst &= *src;
280  i ++;
281  }
282  return;
283  }
284 
285  // Intersection between unaligned or differently size bit sets.
286  // Some of these cases could be split off but this is currently
287  // not likely to happen in production code.
288 
289  for(size_t i=0; CheckOrFindBit(i); i++) {
290  if (! other.CheckOrFindBit(i)) {
291  ClearBit(i);
292  }
293  }
294 }
295 
296 void CSeqDB_BitSet::x_CopyBits(const CSeqDB_BitSet & src, size_t start, size_t end)
297 {
298  for(size_t i = start; src.CheckOrFindBit(i) && i < end; i++) {
299  SetBit(i);
300  }
301 }
302 
304 {
305  for(size_t i=0; src.CheckOrFindBit(i); i++) {
306  SetBit(i);
307  }
308 }
309 
310 void CSeqDB_BitSet::x_Normalize(size_t start, size_t end)
311 {
312  // Note: the "range change" paths are unlikely to be active for
313  // SeqDB, and could be improved (i.e. this is not the efficient
314  // way to move a range of bits).
315 
316  if (m_Start > start || m_End < end || m_Special != eNone) {
317  CSeqDB_BitSet dup(std::min(m_Start, start),
318  std::max(m_End, end));
319 
320  Swap(dup);
321 
322  switch(m_Special) {
323  case eAllClear:
324  break;
325 
326  case eAllSet:
327  AssignBitRange(m_Start, m_End, true);
328  break;
329 
330  case eNone:
331  x_CopyBits(dup);
332  break;
333  }
334  }
335 }
336 
337 void CSeqDB_BitSet::x_Copy(CSeqDB_BitSet & other, bool consume)
338 {
339  if (consume && other.m_Special == eNone) {
340  Swap(other);
341  } else {
342  m_Start = other.m_Start;
343  m_End = other.m_End;
344  m_Special = other.m_Special;
345  m_Bits = other.m_Bits;
346  }
347 }
348 
349 bool CSeqDB_BitSet::GetBit(size_t index) const
350 {
351  if (m_Special != eNone) {
352  return (m_Special == eAllSet) ? true : false;
353  }
354 
355  _ASSERT(index >= m_Start);
356  _ASSERT(index < m_End);
357 
358  index -= m_Start;
359 
360  size_t vx = index >> eWordShift;
361  int wx = index & eWordMask;
362 
363  _ASSERT(m_Bits.size() > vx);
364  return !! (m_Bits[vx] & (TByte(0x80) >> wx));
365 }
366 
368 {
369  std::swap(m_Start, other.m_Start);
370  std::swap(m_End, other.m_End);
371  std::swap(m_Special, other.m_Special);
372  other.m_Bits.swap(m_Bits);
373 }
374 
375 void CSeqDB_BitSet::AssignBitRange(size_t start, size_t end, bool value)
376 {
377  _ASSERT(start >= m_Start && end <= m_End);
378 
379  if ((start + eWordBits*3) > end) {
380  for(size_t i = start; i < end; i++) {
381  AssignBit(i, value);
382  }
383  return;
384  } else {
385  size_t i = start - m_Start;
386  size_t e = end - m_Start;
387 
388  while(i & eWordMask) {
389  AssignBit(i + m_Start, value);
390  i++;
391  }
392 
393  size_t vx = i >> eWordShift,
394  evx = e >> eWordShift;
395 
396  char mask = value ? 0xFF : 0;
397 
398  memset(& m_Bits[vx], mask, evx-vx);
399 
400  i = vx << eWordShift;
401 
402  while(i < e) {
403  AssignBit(i + m_Start, value);
404  i++;
405  }
406  }
407 }
408 
409 void CSeqDB_BitSet::AssignBit(size_t i, bool value)
410 {
411  if (value) {
412  SetBit(i);
413  } else {
414  ClearBit(i);
415  }
416 }
417 
419 {
420  if (m_Special != eNone) {
422  }
423 }
424 
425 void CSeqDB_BitSet::DebugDump(CDebugDumpContext ddc, unsigned int depth) const
426 {
427  ddc.SetFrame("CSeqDB_BitSet");
428  CObject::DebugDump(ddc, depth);
429  ddc.Log("m_Special", m_Special);
430  ddc.Log("m_Start", m_Start);
431  ddc.Log("m_End", m_End);
432  ddc.Log("m_Bits.size", m_Bits.size());
433 }
434 
436 
void x_CopyBits(const CSeqDB_BitSet &src)
Set all bits that are true in `src'.
ESpecialCase m_Special
Special edge cases.
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: ncbiobj.cpp:954
Number of bits per stored word.
Definition: seqdbbitset.hpp:63
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1337
void Normalize()
If this is a special case bitset, convert it to a normal one.
void x_Copy(CSeqDB_BitSet &other, bool consume)
Set this bitset to the value of the provided one.
All OIDs are clear.
Definition: seqdbbitset.hpp:77
Normal OID list.
Definition: seqdbbitset.hpp:75
vector< TByte > m_Bits
Representation of bit data.
void x_Alloc(size_t bits)
Allocate memory for the bit data.
void AssignBit(size_t i, bool value)
Store a value at the given index.
void ClearBit(size_t index)
Clear the specified bit (to false).
Definition: seqdbbitset.cpp:76
Shift to convert from bit index to vector index.
Definition: seqdbbitset.hpp:66
void Swap(CSeqDB_BitSet &other)
Swap two bitsets.
int i
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
Definition: ddumpable.cpp:151
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Allows to dump a snapshot of the object.
unsigned char TByte
Word size for vector elements.
Definition: seqdbbitset.hpp:58
void SetFrame(const string &frame)
Definition: ddumpable.cpp:137
bool CheckOrFindBit(size_t &index) const
Check if a bit is true or find the next bit that is.
Definition: seqdbbitset.cpp:91
Mask to compute bit index within word.
Definition: seqdbbitset.hpp:69
void SetBit(size_t index)
Set the specified bit (to true).
Definition: seqdbbitset.cpp:61
void x_Normalize(size_t start, size_t end)
Replace `special' with normal bitsets, adjust the index range.
T max(T x_, T y_)
CSeqDB_BitSet()
Default constructor for zero-length empty bit array.
Definition: seqdbbitset.hpp:81
char value[7]
Definition: config.c:428
T min(T x_, T y_)
Bit set class.
Definition: seqdbbitset.hpp:49
ncbi::TMaskedQueryRegions mask
All OIDs are set.
Definition: seqdbbitset.hpp:76
None specified.
Definition: blast_def.h:326
size_t m_Start
Number of bits stored here.
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1153
bool GetBit(size_t index) const
Get the value of the specified bit.
else result
Definition: token2.c:20
void UnionWith(CSeqDB_BitSet &other, bool consume)
This bitset is assigned to the union of it and another.
#define _ASSERT
size_t m_End
Number of bits stored here.
void IntersectWith(CSeqDB_BitSet &other, bool consume)
This bitset is assigned to the intersection of it and another.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
void AssignBitRange(size_t start, size_t end, bool value)
Store the provided value in a range of bits.
Implementation for the CSeqDB_BitSet class, a bit vector.
Modified on Fri Sep 22 15:41:21 2017 by modify_doxy.py rev. 546573