include/util/bitset/bmserial.h

Go to the documentation of this file.
00001 #ifndef BMSERIAL__H__INCLUDED__
00002 #define BMSERIAL__H__INCLUDED__
00003 /*
00004 Copyright(c) 2002-2009 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
00005 
00006 Permission is hereby granted, free of charge, to any person 
00007 obtaining a copy of this software and associated documentation 
00008 files (the "Software"), to deal in the Software without restriction, 
00009 including without limitation the rights to use, copy, modify, merge, 
00010 publish, distribute, sublicense, and/or sell copies of the Software, 
00011 and to permit persons to whom the Software is furnished to do so, 
00012 subject to the following conditions:
00013 
00014 The above copyright notice and this permission notice shall be included 
00015 in all copies or substantial portions of the Software.
00016 
00017 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
00018 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
00019 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
00020 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
00021 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
00022 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
00023 OTHER DEALINGS IN THE SOFTWARE.
00024 
00025 For more information please visit:  http://bmagic.sourceforge.net
00026 
00027 */
00028 
00029 
00030 /*! \defgroup bvserial bvector serialization  
00031  *  bvector serialization
00032  *  \ingroup bmagic 
00033  *
00034  */
00035 
00036 #ifndef BM__H__INCLUDED__
00037 #define BM__H__INCLUDED__
00038 
00039 #include "bm.h"
00040 
00041 #endif
00042 
00043 #ifdef _MSC_VER
00044 #pragma warning( disable : 4311 4312)
00045 #endif
00046 
00047 
00048 
00049 #include "encoding.h"
00050 #include "bmdef.h"
00051 #include "bmfunc.h"
00052 #include "bmtrans.h"
00053 #include "bmalgo_impl.h"
00054 #include "bmutil.h"
00055 
00056 //#include "bmgamma.h"
00057 
00058 
00059 namespace bm
00060 {
00061 
00062 
00063 // Serialization stream markup constants
00064 
00065 
00066 const unsigned char set_block_end               = 0;   //!< End of serialization
00067 const unsigned char set_block_1zero             = 1;   //!< One all-zero block
00068 const unsigned char set_block_1one              = 2;   //!< One block all-set (1111...)
00069 const unsigned char set_block_8zero             = 3;   //!< Up to 256 zero blocks
00070 const unsigned char set_block_8one              = 4;   //!< Up to 256 all-set blocks
00071 const unsigned char set_block_16zero            = 5;   //!< Up to 65536 zero blocks
00072 const unsigned char set_block_16one             = 6;   //!< UP to 65536 all-set blocks
00073 const unsigned char set_block_32zero            = 7;   //!< Up to 4G zero blocks
00074 const unsigned char set_block_32one             = 8;   //!< UP to 4G all-set blocks
00075 const unsigned char set_block_azero             = 9;   //!< All other blocks zero
00076 const unsigned char set_block_aone              = 10;  //!< All other blocks one
00077 const unsigned char set_block_bit               = 11;  //!< Plain bit block
00078 const unsigned char set_block_sgapbit           = 12;  //!< SGAP compressed bitblock
00079 const unsigned char set_block_sgapgap           = 13;  //!< SGAP compressed GAP block
00080 const unsigned char set_block_gap               = 14;  //!< Plain GAP block
00081 const unsigned char set_block_gapbit            = 15;  //!< GAP compressed bitblock 
00082 const unsigned char set_block_arrbit            = 16;  //!< List of bits ON
00083 const unsigned char set_block_bit_interval      = 17; //!< Interval block
00084 const unsigned char set_block_arrgap            = 18;  //!< List of bits ON (GAP block)
00085 const unsigned char set_block_bit_1bit          = 19; //!< Bit block with 1 bit ON
00086 const unsigned char set_block_gap_egamma        = 20; //!< Gamma compressed GAP block
00087 const unsigned char set_block_arrgap_egamma     = 21; //!< Gamma compressed delta GAP array
00088 const unsigned char set_block_bit_0runs         = 22; //!< Bit block with encoded zero intervals
00089 const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
00090 const unsigned char set_block_arrgap_inv        = 24;  //!< List of bits OFF (GAP block)
00091 
00092 
00093 /// \internal
00094 /// \ingroup bvserial 
00095 enum serialization_header_mask {
00096     BM_HM_DEFAULT = 1,
00097     BM_HM_RESIZE  = (1 << 1), ///< resized vector
00098     BM_HM_ID_LIST = (1 << 2), ///< id list stored
00099     BM_HM_NO_BO   = (1 << 3), ///< no byte-order
00100     BM_HM_NO_GAPL = (1 << 4)  ///< no GAP levels
00101 };
00102 
00103 
00104 
00105 #define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO) \
00106    if (nb == 1) \
00107       enc.put_8(B_1ZERO); \
00108    else if (nb < 256) \
00109    { \
00110       enc.put_8(B_8ZERO); \
00111       enc.put_8((unsigned char)nb); \
00112    } \
00113    else if (nb < 65536) \
00114    { \
00115       enc.put_8(B_16ZERO); \
00116       enc.put_16((unsigned short)nb); \
00117    } \
00118    else \
00119    {\
00120       enc.put_8(B_32ZERO); \
00121       enc.put_32(nb); \
00122    }
00123 
00124 
00125 #define BM_SET_ONE_BLOCKS(x) \
00126     {\
00127          unsigned end_block = i + x; \
00128          for (;i < end_block; ++i) \
00129             bman.set_block_all_set(i); \
00130     } \
00131     --i
00132 
00133 
00134 
00135 
00136 /**
00137     Bit-vector serialization class.
00138     
00139     Class designed to convert sparse bit-vector into a block of memory
00140     ready for file or database storage or network transfer.
00141     
00142     @ingroup bvserial 
00143 */
00144 template<class BV>
00145 class serializer
00146 {
00147 public:
00148     typedef typename BV::allocator_type      allocator_type;
00149     typedef typename BV::blocks_manager_type blocks_manager_type;
00150 public:
00151     serializer(const allocator_type&   alloc  = allocator_type());
00152     ~serializer();
00153 
00154     /**
00155         Set compression level. Higher compression takes more time to process.
00156 
00157         @param clevel - compression level (0-4)
00158     */
00159     void set_compression_level(unsigned clevel);
00160 
00161     /**
00162         Get compression level
00163     */
00164     unsigned get_compression_level() const;
00165     
00166     /**
00167         Bitvector serilization into memory block
00168         
00169         @param bv - input bitvector
00170         @param buf - out buffer (pre-allocated)
00171            No range checking is done in this method. 
00172            It is responsibility of caller to allocate sufficient 
00173            amount of memory using information from calc_stat() function.        
00174         
00175         @param buf_size - size of the output buffer
00176         
00177        @return Size of serialization block.
00178        @sa calc_stat     
00179     */
00180     unsigned serialize(const BV& bv, 
00181                        unsigned char* buf, unsigned buf_size);
00182 
00183     
00184     /**
00185         Set GAP length serialization (serializes GAP levels of the original vector)
00186                 
00187         @param value - when TRUE serialized vector includes GAP levels parameters
00188     */
00189     void gap_length_serialization(bool value);
00190     /**
00191         Set byte-order serialization (for cross platform compatibility)
00192         
00193         @param value - TRUE serialization format includes byte-order marker
00194     */
00195     void byte_order_serialization(bool value);
00196 
00197 protected:
00198     /**
00199         Encode serialization header information
00200     */
00201     void encode_header(const BV& bv, bm::encoder& enc);
00202     
00203     /**
00204         Encode GAP block
00205     */
00206     void encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00207 
00208     /**
00209         Encode GAP block with Elias Gamma coder
00210     */
00211     void gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00212 
00213     /**
00214         Encode GAP block as delta-array with Elias Gamma coder
00215     */
00216     void gamma_gap_array(const bm::gap_word_t* gap_block, 
00217                          unsigned              arr_len, 
00218                          bm::encoder&          enc,
00219                          bool                  inverted = false);
00220 
00221     /**
00222         Encode BIT block with repeatable runs of zeroes
00223     */
00224     void encode_bit_interval(const bm::word_t* blk, 
00225                              bm::encoder&      enc,
00226                              unsigned          size_control);
00227 
00228 private:
00229     serializer(const serializer&);
00230     serializer& operator=(const serializer&);
00231 
00232 private:
00233 
00234     typedef bm::bit_out<bm::encoder>  bit_out_type;
00235     typedef bm::gamma_encoder<bm::gap_word_t, bit_out_type> gamma_encoder_func;
00236 
00237 private:
00238     allocator_type alloc_;
00239     bool           gap_serial_;
00240     bool           byte_order_serial_;
00241     bm::word_t*    temp_block_;
00242     unsigned       compression_level_;
00243 };
00244 
00245 /**
00246     Base deserialization class
00247     \ingroup bvserial
00248 */
00249 template<class DEC>
00250 class deseriaizer_base
00251 {
00252 public:
00253     typedef DEC decoder_type;
00254 protected:
00255     deseriaizer_base(){}
00256 
00257     /// Read GAP block from the stream
00258     void read_gap_block(decoder_type&   decoder, 
00259                         unsigned        block_type, 
00260                         bm::gap_word_t* dst_block,
00261                         bm::gap_word_t& gap_head);
00262 };
00263 
00264 /**
00265     Class deserializer
00266     \ingroup bvserial 
00267 */
00268 template<class BV, class DEC>
00269 class deserializer : protected deseriaizer_base<DEC>
00270 {
00271 public:
00272     typedef BV bvector_type;
00273     typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00274 //    typedef DEC decoder_type;
00275 public:
00276     deserializer() : temp_block_(0) {}
00277     
00278     unsigned deserialize(bvector_type&        bv, 
00279                          const unsigned char* buf, 
00280                          bm::word_t*          temp_block);
00281 protected:
00282    typedef typename BV::blocks_manager_type blocks_manager_type;
00283    typedef typename BV::allocator_type allocator_type;
00284 
00285 protected:
00286    void deserialize_gap(unsigned char btype, decoder_type& dec, 
00287                         bvector_type&  bv, blocks_manager_type& bman,
00288                         unsigned i,
00289                         bm::word_t* blk);
00290 protected:
00291     bm::gap_word_t   gap_temp_block_[bm::gap_equiv_len * 3];
00292     bm::word_t*      temp_block_;
00293 };
00294 
00295 
00296 /**
00297     Iterator to walk forward the serialized stream.
00298 
00299     \internal
00300     \ingroup bvserial 
00301 */
00302 template<class BV, class SerialIterator>
00303 class iterator_deserializer
00304 {
00305 public:
00306     typedef BV              bvector_type;
00307     typedef SerialIterator  serial_iterator_type;
00308 public:
00309     static
00310     unsigned deserialize(bvector_type&         bv, 
00311                          serial_iterator_type& sit, 
00312                          bm::word_t*           temp_block,
00313                          set_operation         op = bm::set_OR);
00314 private:
00315     typedef typename BV::blocks_manager_type blocks_manager_type;
00316 
00317     /// load data from the iterator of type "id list"
00318     static
00319     void load_id_list(bvector_type&         bv, 
00320                       serial_iterator_type& sit,
00321                       unsigned              id_count,
00322                       bool                  set_clear);
00323 };
00324 
00325 /*!
00326     @brief Serialization stream iterator
00327 
00328     Iterates blocks and control tokens of serialized bit-stream
00329     \ingroup bvserial 
00330 */
00331 template<class DEC>
00332 class serial_stream_iterator : protected deseriaizer_base<DEC>
00333 {
00334 public:
00335     typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00336 public:
00337     serial_stream_iterator(const unsigned char* buf);
00338 
00339     /// serialized bitvector size
00340     unsigned bv_size() const { return bv_size_; }
00341 
00342     /// Returns true if end of bit-stream reached 
00343     bool is_eof() const { return end_of_stream_; }
00344 
00345     /// get next block
00346     void next();
00347 
00348     /// read bit block, using logical operation
00349     unsigned get_bit_block(bm::word_t*       dst_block, 
00350                            bm::word_t*       tmp_block,
00351                            set_operation     op);
00352 
00353 
00354     /// Read gap block data (with head)
00355     void get_gap_block(bm::gap_word_t* dst_block);
00356 
00357     /// Return current decoder size
00358     unsigned dec_size() const { return decoder_.size(); }
00359 
00360     /// Get low level access to the decoder (use carefully)
00361     decoder_type& decoder() { return decoder_; }
00362 
00363     /// iterator is a state machine, this enum encodes 
00364     /// its key value
00365     ///
00366     enum iterator_state 
00367     {
00368         e_unknown = 0,
00369         e_list_ids,     ///< plain int array
00370         e_blocks,       ///< stream of blocks
00371         e_zero_blocks,  ///< one or more zero bit blocks
00372         e_one_blocks,   ///< one or more all-1 bit blocks
00373         e_bit_block,    ///< one bit block
00374         e_gap_block     ///< one gap block
00375 
00376     };
00377 
00378     /// Returns iterator internal state
00379     iterator_state state() const { return this->state_; }
00380 
00381     iterator_state get_state() const { return this->state_; }
00382     /// Number of ids in the inverted list (valid for e_list_ids)
00383     unsigned get_id_count() const { return this->id_cnt_; }
00384 
00385     /// Get last id from the id list
00386     bm::id_t get_id() const { return this->last_id_; }
00387 
00388     /// Get current block index 
00389     unsigned block_idx() const { return this->block_idx_; }
00390 
00391 public:
00392     /// member function pointer for bitset-bitset get operations
00393     /// 
00394     typedef 
00395         unsigned (serial_stream_iterator<DEC>::*get_bit_func_type)
00396                                                 (bm::word_t*,bm::word_t*);
00397 
00398     unsigned 
00399     get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
00400     unsigned 
00401     get_bit_block_OR    (bm::word_t* dst_block, bm::word_t* tmp_block);
00402     unsigned 
00403     get_bit_block_AND   (bm::word_t* dst_block, bm::word_t* tmp_block);
00404     unsigned 
00405     get_bit_block_SUB   (bm::word_t* dst_block, bm::word_t* tmp_block);
00406     unsigned 
00407     get_bit_block_XOR   (bm::word_t* dst_block, bm::word_t* tmp_block);
00408     unsigned 
00409     get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
00410     unsigned 
00411     get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
00412     unsigned 
00413     get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
00414     unsigned 
00415     get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
00416     unsigned 
00417     get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
00418     unsigned 
00419     get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
00420     unsigned 
00421     get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
00422     unsigned 
00423     get_bit_block_COUNT_B(bm::word_t* dst_block, bm::word_t* tmp_block)
00424     {
00425         return get_bit_block_COUNT(dst_block, tmp_block);
00426     }
00427 
00428     /// Get array of bits out of the decoder into bit block
00429     /// (Converts inverted list into bits)
00430     /// Returns number of words (bits) being read
00431     unsigned get_arr_bit(bm::word_t* dst_block, 
00432                          bool clear_target=true);
00433 
00434 protected:
00435     get_bit_func_type  bit_func_table_[bm::set_END];
00436 
00437     decoder_type       decoder_;
00438     bool               end_of_stream_;
00439     unsigned           bv_size_;
00440     iterator_state     state_;
00441     unsigned           id_cnt_;  ///< Id counter for id list
00442     bm::id_t           last_id_; ///< Last id from the id list
00443     gap_word_t         glevels_[bm::gap_levels]; ///< GAP levels
00444 
00445     unsigned           block_type_; ///< current block type
00446     unsigned           block_idx_;  ///< current block index
00447     unsigned           mono_block_cnt_; ///< number of 0 or 1 blocks
00448 
00449     gap_word_t         gap_head_;
00450 };
00451 
00452 /**
00453     Class deserializer, can perform logical operation on bit-vector and
00454     serialized bit-vector.
00455 
00456     \ingroup bvserial 
00457 */
00458 template<class BV>
00459 class operation_deserializer
00460 {
00461 public:
00462     typedef BV bvector_type;
00463 public:
00464     static
00465     unsigned deserialize(bvector_type&        bv, 
00466                          const unsigned char* buf, 
00467                          bm::word_t*          temp_block,
00468                          set_operation        op = bm::set_OR);
00469 private:
00470     typedef 
00471         typename BV::blocks_manager_type               blocks_manager_type;
00472     typedef 
00473         serial_stream_iterator<bm::decoder>            serial_stream_current;
00474     typedef 
00475         serial_stream_iterator<bm::decoder_big_endian> serial_stream_be;
00476     typedef 
00477         serial_stream_iterator<bm::decoder_little_endian> serial_stream_le;
00478 
00479 };
00480 
00481 
00482 
00483 
00484 
00485 //---------------------------------------------------------------------
00486 
00487 template<class BV>
00488 serializer<BV>::serializer(const allocator_type&   alloc)
00489 : alloc_(alloc),
00490   gap_serial_(false),
00491   byte_order_serial_(true),
00492   temp_block_(0),
00493   compression_level_(3)
00494 {
00495     temp_block_ = alloc_.alloc_bit_block();
00496 }
00497 
00498 template<class BV>
00499 void serializer<BV>::set_compression_level(unsigned clevel)
00500 {
00501     compression_level_ = clevel;
00502 }
00503 
00504 template<class BV>
00505 unsigned serializer<BV>::get_compression_level() const
00506 {
00507     return compression_level_;
00508 }
00509 
00510 template<class BV>
00511 serializer<BV>::~serializer()
00512 {
00513     alloc_.free_bit_block(temp_block_);
00514 }
00515 
00516 
00517 template<class BV>
00518 void serializer<BV>::gap_length_serialization(bool value)
00519 {
00520     gap_serial_ = value;
00521 }
00522 
00523 template<class BV>
00524 void serializer<BV>::byte_order_serialization(bool value)
00525 {
00526     byte_order_serial_ = value;
00527 }
00528 
00529 template<class BV>
00530 void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc)
00531 {
00532     const blocks_manager_type& bman = bv.get_blocks_manager();
00533 
00534     unsigned char header_flag = 0;
00535     if (bv.size() == bm::id_max) // no dynamic resize
00536         header_flag |= BM_HM_DEFAULT;
00537     else 
00538         header_flag |= BM_HM_RESIZE;
00539 
00540     if (!byte_order_serial_) 
00541         header_flag |= BM_HM_NO_BO;
00542 
00543     if (!gap_serial_) 
00544         header_flag |= BM_HM_NO_GAPL;
00545 
00546     enc.put_8(header_flag);
00547 
00548     if (byte_order_serial_)
00549     {
00550         ByteOrder bo = globals<true>::byte_order();
00551         enc.put_8((unsigned char)bo);
00552     }
00553 
00554     // keep GAP levels information
00555     if (gap_serial_)
00556     {
00557         enc.put_16(bman.glen(), bm::gap_levels);
00558     }
00559 
00560     // save size (only if bvector has been down-sized)
00561     if (header_flag & BM_HM_RESIZE) 
00562     {
00563         enc.put_32(bv.size());
00564     }
00565     
00566 }
00567 
00568 template<class BV>
00569 void serializer<BV>::gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00570 {
00571     unsigned len = gap_length(gap_block);
00572 
00573     // Use Elias Gamma encoding 
00574     if (len > 6 && (compression_level_ > 3)) 
00575     {
00576         encoder::position_type enc_pos0 = enc.get_pos();
00577         {
00578             bit_out_type bout(enc);
00579             gamma_encoder_func gamma(bout);
00580 
00581             enc.put_8(set_block_gap_egamma);        
00582             enc.put_16(gap_block[0]);
00583 
00584             for_each_dgap(gap_block, gamma);        
00585         }
00586 
00587         // evaluate gamma coding efficiency
00588         encoder::position_type enc_pos1 = enc.get_pos();
00589         unsigned gamma_size = enc_pos1 - enc_pos0;        
00590         if (gamma_size > (len-1)*sizeof(gap_word_t))
00591         {
00592             enc.set_pos(enc_pos0);
00593         }
00594         else
00595         {
00596             return;
00597         }
00598     }
00599 
00600     // save as plain GAP block 
00601     enc.put_8(set_block_gap);
00602     enc.put_16(gap_block, len-1);
00603 }
00604 
00605 template<class BV>
00606 void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array, 
00607                                      unsigned              arr_len, 
00608                                      bm::encoder&          enc,
00609                                      bool                  inverted)
00610 {
00611     if (compression_level_ > 3 && arr_len > 25)
00612     {        
00613         encoder::position_type enc_pos0 = enc.get_pos();
00614         {
00615             bit_out_type bout(enc);
00616 
00617             enc.put_8(
00618                 inverted ? set_block_arrgap_egamma_inv 
00619                          : set_block_arrgap_egamma);
00620 
00621             bout.gamma(arr_len);
00622 
00623             gap_word_t prev = gap_array[0];
00624             bout.gamma(prev + 1);
00625 
00626             for (unsigned i = 1; i < arr_len; ++i)
00627             {
00628                 gap_word_t curr = gap_array[i];
00629                 bout.gamma(curr - prev);
00630                 prev = curr;
00631             }
00632         }
00633 
00634         encoder::position_type enc_pos1 = enc.get_pos();
00635         unsigned gamma_size = enc_pos1 - enc_pos0;            
00636         if (gamma_size > (arr_len)*sizeof(gap_word_t))
00637         {
00638             enc.set_pos(enc_pos0);
00639         }
00640         else
00641         {
00642             return;
00643         }
00644     }
00645 
00646     // save as an plain array
00647     enc.put_prefixed_array_16(inverted ? set_block_arrgap_inv : set_block_arrgap, 
00648                               gap_array, arr_len, true);
00649 }
00650 
00651 
00652 template<class BV>
00653 void serializer<BV>::encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00654 {
00655     if (compression_level_ > 2)
00656     {
00657         gap_word_t*  gap_temp_block = (gap_word_t*) temp_block_;    
00658         gap_word_t arr_len;
00659 
00660         unsigned bc = gap_bit_count(gap_block);
00661         if (bc == 1)
00662         {
00663             arr_len = gap_convert_to_arr(gap_temp_block,
00664                                          gap_block,
00665                                          bm::gap_equiv_len-10);
00666             BM_ASSERT(arr_len == 1);
00667             enc.put_8(set_block_bit_1bit);              
00668             enc.put_16(gap_temp_block[0]);
00669             return;
00670         }
00671 
00672         unsigned len = gap_length(gap_block);
00673         bool invert, use_array;
00674         invert = use_array = false;
00675         
00676         if (bc < len-1) 
00677         {
00678             use_array = true;
00679         }
00680         else  // inverted array is a better alternative?
00681         {
00682             unsigned inverted_bc = bm::gap_max_bits - bc;
00683             if (inverted_bc < len-1)
00684             {
00685                 use_array = invert = true;
00686             }
00687         }
00688         if (use_array)
00689         {
00690             arr_len = gap_convert_to_arr(gap_temp_block,
00691                                          gap_block,
00692                                          bm::gap_equiv_len-10,
00693                                          invert);
00694             if (arr_len)
00695             {
00696                 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
00697                 return;
00698             }
00699         }
00700     }
00701 
00702     gamma_gap_block(gap_block, enc);
00703 }
00704 
00705 template<class BV>
00706 void serializer<BV>::encode_bit_interval(const bm::word_t* blk, 
00707                                          bm::encoder&      enc,
00708                                          unsigned          //size_control
00709                                          )
00710 {
00711     enc.put_8(set_block_bit_0runs);
00712     enc.put_8((blk[0]==0) ? 0 : 1); // encode start 
00713 
00714     unsigned i,j;//,k;
00715 
00716     for (i = 0; i < bm::set_block_size; ++i)
00717     {
00718         if (blk[i] == 0)
00719         {
00720             // scan fwd to find 0 island length
00721             for (j = i+1; j < bm::set_block_size; ++j)
00722             {
00723                 if (blk[j] != 0)
00724                     break;
00725             }
00726             enc.put_16(j-i); 
00727             i = j - 1;
00728         }
00729         else
00730         {
00731             // scan fwd to find non-0 island length
00732             for (j = i+1; j < bm::set_block_size; ++j)
00733             {
00734                 if (blk[j] == 0)
00735                 {
00736                     // look ahead to identify and ignore short 0-run
00737                     if (((j+1 < bm::set_block_size) && blk[j+1]) ||
00738                         ((j+2 < bm::set_block_size) && blk[j+2])
00739                        )
00740                     {
00741                         ++j; // skip zero word
00742                         continue;
00743                     }
00744                     break;
00745                 }
00746             }
00747             enc.put_16(j-i); 
00748             // stream all bit-words now
00749             BM_ASSERT(i < j);
00750             enc.put_32(blk + i, j - i);
00751             i = j - 1;
00752         }
00753     }
00754 }
00755 
00756 
00757 template<class BV>
00758 unsigned serializer<BV>::serialize(const BV& bv, 
00759                                    unsigned char* buf, unsigned buf_size)
00760 {
00761     BM_ASSERT(temp_block_);
00762     
00763     const blocks_manager_type& bman = bv.get_blocks_manager();
00764 
00765     gap_word_t*  gap_temp_block = (gap_word_t*) temp_block_;
00766     
00767     bm::encoder enc(buf, buf_size);  // create the encoder
00768     encode_header(bv, enc);
00769 
00770     unsigned i,j;
00771 
00772 
00773     // save blocks.
00774     for (i = 0; i < bm::set_total_blocks; ++i)
00775     {
00776         bm::word_t* blk = bman.get_block(i);
00777         // -----------------------------------------
00778         // Empty or ONE block serialization
00779 
00780         bool flag;
00781         flag = bman.is_block_zero(i, blk, false);
00782         if (flag)
00783         {
00784         zero_block:
00785             unsigned next_nb = bman.find_next_nz_block(i+1, false);
00786             if (next_nb == bm::set_total_blocks) // no more blocks
00787             {
00788                 enc.put_8(set_block_azero);
00789                 return enc.size();
00790             }
00791             unsigned nb = next_nb - i;
00792             
00793             if (nb > 1 && nb < 128)
00794             {
00795                 // special (but frequent) case -- GAP delta fits in 7bits
00796                 unsigned char c = (1 << 7) | nb;
00797                 enc.put_8(c);
00798             }
00799             else 
00800             {
00801                 SER_NEXT_GRP(enc, nb, set_block_1zero, 
00802                                       set_block_8zero, 
00803                                       set_block_16zero, 
00804                                       set_block_32zero) 
00805             }
00806             i = next_nb - 1;
00807             continue;
00808         }
00809         else
00810         {
00811             flag = bman.is_block_one(i, blk, false);
00812             if (flag)
00813             {
00814                 // Look ahead for similar blocks
00815                 for(j = i+1; j < bm::set_total_blocks; ++j)
00816                 {
00817                    bm::word_t* blk_next = bman.get_block(j);
00818                    if (flag != bman.is_block_one(j, blk_next, false))
00819                        break;
00820                 }
00821                 if (j == bm::set_total_blocks)
00822                 {
00823                     enc.put_8(set_block_aone);
00824                     break;
00825                 }
00826                 else
00827                 {
00828                    unsigned nb = j - i;
00829                    SER_NEXT_GRP(enc, nb, set_block_1one, 
00830                                          set_block_8one, 
00831                                          set_block_16one, 
00832                                          set_block_32one) 
00833                 }
00834                 i = j - 1;
00835                 continue;
00836             }
00837         }
00838 
00839         // ------------------------------
00840         // GAP serialization
00841 
00842         if (BM_IS_GAP(bman, blk, i))
00843         {
00844             gap_word_t* gblk = BMGAP_PTR(blk);
00845             encode_gap_block(gblk, enc);
00846             continue;
00847         }
00848                 
00849         // ----------------------------------------------
00850         // BIT BLOCK serialization
00851 
00852         {
00853         if (compression_level_ <= 1)
00854         {
00855             enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00856             continue;            
00857         }
00858 
00859         // compute bit-block statistics: bit-count and number of GAPS
00860         unsigned block_bc = 0;
00861         bm::id_t bit_gaps = 
00862             bit_block_calc_count_change(blk, blk + bm::set_block_size,
00863                                         &block_bc);
00864         unsigned block_bc_inv = bm::gap_max_bits - block_bc;
00865         switch (block_bc)
00866         {
00867         case 1: // corner case: only 1 bit on
00868         {
00869             bm::id_t bit_idx = 0;
00870             bit_find_in_block(blk, bit_idx, &bit_idx);
00871             enc.put_8(set_block_bit_1bit); enc.put_16((short)bit_idx);
00872             continue;
00873         }
00874         case 0: goto zero_block; // empty block
00875         }
00876        
00877        
00878         // compute alternative representation sizes
00879         //
00880         unsigned arr_block_size = sizeof(gap_word_t) + (block_bc * sizeof(gap_word_t));
00881         unsigned arr_block_size_inv = sizeof(gap_word_t) + (block_bc_inv * sizeof(gap_word_t));
00882         unsigned gap_block_size = sizeof(gap_word_t) + ((bit_gaps+1) * sizeof(gap_word_t)); 
00883         unsigned interval_block_size;
00884         interval_block_size = bit_count_nonzero_size(blk, bm::set_block_size);
00885         bool inverted = false;
00886 
00887         if (arr_block_size_inv < arr_block_size &&
00888             arr_block_size_inv < gap_block_size &&
00889             arr_block_size_inv < interval_block_size)
00890         {
00891             inverted = true;
00892             goto bit_as_array;
00893         }
00894 
00895         // if interval representation is not a good alternative
00896         if ((interval_block_size > arr_block_size) || 
00897             (interval_block_size > gap_block_size))
00898         {
00899             if (gap_block_size < (bm::gap_equiv_len-64) &&
00900                 gap_block_size < arr_block_size)
00901             {
00902                 unsigned len = bit_convert_to_gap(gap_temp_block, 
00903                                                   blk, 
00904                                                   bm::gap_max_bits, 
00905                                                   bm::gap_equiv_len-64);
00906                 if (len) // save as GAP
00907                 {
00908                     gamma_gap_block(gap_temp_block, enc);
00909                     continue;
00910                 }
00911             }
00912             
00913             if (arr_block_size < ((bm::gap_equiv_len-64) * sizeof(gap_word_t)))
00914             {
00915             bit_as_array:
00916                 gap_word_t arr_len;
00917                 unsigned mask = inverted ? ~0 : 0;
00918                 arr_len = bit_convert_to_arr(gap_temp_block, 
00919                                              blk, 
00920                                              bm::gap_max_bits, 
00921                                              bm::gap_equiv_len-64,
00922                                              mask);
00923                 if (arr_len)
00924                 {
00925                     gamma_gap_array(gap_temp_block, arr_len, enc, inverted);
00926                     continue;
00927                 }
00928                 
00929             }
00930             // full bit-block
00931             enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00932             continue;            
00933         }
00934         
00935         // if interval block is a winner
00936         if (interval_block_size < arr_block_size &&
00937             interval_block_size < gap_block_size)
00938         {
00939             encode_bit_interval(blk, enc, interval_block_size);
00940             continue;
00941         }
00942         
00943         if (gap_block_size < bm::gap_equiv_len &&
00944             gap_block_size < arr_block_size)
00945         {
00946             unsigned len = bit_convert_to_gap(gap_temp_block, 
00947                                               blk, 
00948                                               bm::gap_max_bits, 
00949                                               bm::gap_equiv_len-64);
00950             if (len) // save as GAP
00951             {
00952                 gamma_gap_block(gap_temp_block, enc);
00953                 continue;
00954             }
00955         }
00956         
00957          
00958         // if array is best
00959         if (arr_block_size < bm::gap_equiv_len-64)
00960         {
00961             goto bit_as_array;
00962         }
00963         
00964         // full bit-block
00965         enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00966         continue;            
00967         }
00968     }
00969 
00970     enc.put_8(set_block_end);
00971 
00972     unsigned encoded_size = enc.size();
00973     return encoded_size;
00974 
00975 }
00976 
00977 
00978 
00979 /// Bit mask flags for serialization algorithm
00980 /// \ingroup bvserial 
00981 enum serialization_flags {
00982     BM_NO_BYTE_ORDER = 1,       ///< save no byte-order info (save some space)
00983     BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
00984 };
00985 
00986 /*!
00987    \brief Saves bitvector into memory.
00988 
00989    Function serializes content of the bitvector into memory.
00990    Serialization adaptively uses compression(variation of GAP encoding) 
00991    when it is benefitial. 
00992 
00993    \param buf - pointer on target memory area. No range checking in the
00994    function. It is responsibility of programmer to allocate sufficient 
00995    amount of memory using information from calc_stat function.
00996 
00997    \param temp_block - pointer on temporary memory block. Cannot be 0; 
00998    If you want to save memory across multiple bvectors
00999    allocate temporary block using allocate_tempblock and pass it to 
01000    serialize.
01001    (Of course serialize does not deallocate temp_block.)
01002 
01003    \param serialization_flags
01004    Flags controlling serilization (bit-mask) 
01005    (use OR-ed serialization flags)
01006 
01007    \ingroup bvserial 
01008 
01009    \return Size of serialization block.
01010    \sa calc_stat, serialization_flags
01011 
01012 */
01013 /*
01014  Serialization format:
01015  <pre>
01016 
01017  | HEADER | BLOCKS |
01018 
01019  Header structure:
01020    BYTE : Serialization header (bit mask of BM_HM_*)
01021    BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
01022    INT16: Reserved (0)
01023    INT16: Reserved Flags (0)
01024 
01025  </pre>
01026 */
01027 template<class BV>
01028 unsigned serialize(const BV& bv, 
01029                    unsigned char* buf, 
01030                    bm::word_t*    temp_block,
01031                    unsigned       serialization_flags = 0)
01032 {
01033     bm::serializer<BV> bv_serial;
01034     if (serialization_flags & BM_NO_BYTE_ORDER) 
01035         bv_serial.byte_order_serialization(false);
01036         
01037     if (serialization_flags & BM_NO_GAP_LENGTH) 
01038         bv_serial.gap_length_serialization(false);
01039     else
01040         bv_serial.gap_length_serialization(true);
01041 
01042     bv_serial.set_compression_level(4);
01043     
01044     return bv_serial.serialize(bv, buf, 0);
01045 }
01046 
01047 /*!
01048    @brief Saves bitvector into memory.
01049    Allocates temporary memory block for bvector.
01050    \ingroup bvserial 
01051 */
01052 
01053 template<class BV>
01054 unsigned serialize(BV& bv, 
01055                    unsigned char* buf, 
01056                    unsigned  serialization_flags=0)
01057 {
01058     bm::serializer<BV> bv_serial;
01059     if (serialization_flags & BM_NO_BYTE_ORDER) 
01060         bv_serial.byte_order_serialization(false);
01061         
01062     if (serialization_flags & BM_NO_GAP_LENGTH) 
01063         bv_serial.gap_length_serialization(false);
01064     else
01065         bv_serial.gap_length_serialization(true);
01066 
01067     bv_serial.set_compression_level(4);
01068     
01069     return bv_serial.serialize(bv, buf, 0);
01070 }
01071 
01072 
01073 
01074 /*!
01075     @brief Bitvector deserialization from memory.
01076 
01077     @param buf - pointer on memory which keeps serialized bvector
01078     @param temp_block - pointer on temporary block, 
01079             if NULL bvector allocates own.
01080     @return Number of bytes consumed by deserializer.
01081 
01082     Function desrializes bitvector from memory block containig results
01083     of previous serialization. Function does not remove bits 
01084     which are currently set. Effectively it means OR logical operation 
01085     between current bitset and previously serialized one.
01086 
01087     @ingroup bvserial
01088 */
01089 template<class BV>
01090 unsigned deserialize(BV& bv, 
01091                      const unsigned char* buf, 
01092                      bm::word_t* temp_block=0)
01093 {
01094     ByteOrder bo_current = globals<true>::byte_order();
01095 
01096     bm::decoder dec(buf);
01097     unsigned char header_flag = dec.get_8();
01098     ByteOrder bo = bo_current;
01099     if (!(header_flag & BM_HM_NO_BO))
01100     {
01101         bo = (bm::ByteOrder) dec.get_8();
01102     }
01103 
01104     if (bo_current == bo)
01105     {
01106         deserializer<BV, bm::decoder> deserial;
01107         return deserial.deserialize(bv, buf, temp_block);
01108     }
01109     switch (bo_current) 
01110     {
01111     case BigEndian:
01112         {
01113         deserializer<BV, bm::decoder_big_endian> deserial;
01114         return deserial.deserialize(bv, buf, temp_block);
01115         }
01116     case LittleEndian:
01117         {
01118         deserializer<BV, bm::decoder_little_endian> deserial;
01119         return deserial.deserialize(bv, buf, temp_block);
01120         }
01121     default:
01122         BM_ASSERT(0);
01123     };
01124     return 0;
01125 }
01126 
01127 
01128 template<class DEC>
01129 void deseriaizer_base<DEC>::read_gap_block(decoder_type&   decoder, 
01130                                            unsigned        block_type, 
01131                                            bm::gap_word_t* dst_block,
01132                                            bm::gap_word_t& gap_head)
01133 {
01134     typedef bit_in<DEC> bit_in_type;
01135 
01136     switch (block_type)
01137     {
01138     case set_block_gap:
01139         {
01140             unsigned len = gap_length(&gap_head);
01141             --len;
01142             *dst_block = gap_head;
01143             decoder.get_16(dst_block+1, len - 1);
01144             dst_block[len] = gap_max_bits - 1;
01145         }
01146         break;
01147     case set_block_bit_1bit:
01148         {
01149             gap_word_t bit_idx = decoder.get_16();
01150             unsigned is_set;
01151             /* unsigned new_block_len = */
01152                 gap_set_value(true, dst_block, bit_idx, &is_set);
01153         }
01154         break;
01155     case set_block_arrgap:
01156     case set_block_arrgap_inv:
01157         {
01158             gap_set_all(dst_block, bm::gap_max_bits, 0);
01159             gap_word_t len = decoder.get_16();
01160 
01161             for (gap_word_t k = 0; k < len; ++k)
01162             {
01163                 gap_word_t bit_idx = decoder.get_16();
01164                 unsigned is_set;
01165                 /* unsigned new_block_len = */
01166                     gap_set_value(true, dst_block, bit_idx, &is_set);
01167             } // for
01168         }
01169         break;
01170     case set_block_arrgap_egamma:
01171     case set_block_arrgap_egamma_inv:
01172         {
01173             bit_in_type bin(decoder);
01174 
01175             gap_set_all(dst_block, bm::gap_max_bits, 0);
01176             gap_word_t len = bin.gamma();
01177 
01178             gap_word_t prev = 0;
01179             for (gap_word_t k = 0; k < len; ++k)
01180             {
01181                 gap_word_t bit_idx = bin.gamma();
01182                 if (k == 0) --bit_idx; // TODO: optimization
01183                 bit_idx += prev;
01184                 prev = bit_idx;
01185 
01186                 unsigned is_set;
01187                 /* unsigned new_block_len = */
01188                     gap_set_value(true, dst_block, bit_idx, &is_set);
01189             } // for
01190         }
01191         break;
01192     case set_block_gap_egamma:
01193         {
01194         unsigned len = (gap_head >> 3);
01195         --len;
01196         // read gamma GAP block into a dest block
01197         {
01198             *dst_block = gap_head;
01199             gap_word_t* gap_data_ptr = dst_block + 1;
01200 
01201             bit_in_type bin(decoder);
01202             {
01203                 gap_word_t gap_sum = 0;
01204                 for (unsigned i = 0; i < len; ++i, ++gap_data_ptr)
01205                 {
01206                     gap_word_t v = bin.gamma();
01207                     if (i == 0) --v; // TODO: optimization out of the loop
01208                     gap_sum += v;
01209                     *gap_data_ptr = gap_sum;
01210                 } 
01211                 dst_block[len+1] = bm::gap_max_bits - 1;
01212             }
01213         }
01214 
01215         }
01216         break;        
01217     default:
01218         BM_ASSERT(0);
01219     }
01220 
01221     if (block_type == set_block_arrgap_egamma_inv || 
01222         block_type == set_block_arrgap_inv)
01223     {
01224         gap_invert(dst_block);
01225     }
01226 
01227 }
01228 
01229 
01230 template<class BV, class DEC>
01231 void 
01232 deserializer<BV, DEC>::deserialize_gap(unsigned char btype, decoder_type& dec, 
01233                                        bvector_type&  bv, blocks_manager_type& bman,
01234                                        unsigned i,
01235                                        bm::word_t* blk)
01236 {
01237     typedef bit_in<DEC> bit_in_type;
01238     gap_word_t gap_head; 
01239 
01240     switch (btype)
01241     {
01242     case set_block_gap: 
01243     case set_block_gapbit:
01244     {
01245         gap_word_t gap_head = 
01246             sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32();
01247 
01248         unsigned len = gap_length(&gap_head);
01249         int level = gap_calc_level(len, bman.glen());
01250         --len;
01251         if (level == -1)  // Too big to be GAP: convert to BIT block
01252         {
01253             *gap_temp_block_ = gap_head;
01254             dec.get_16(gap_temp_block_+1, len - 1);
01255             gap_temp_block_[len] = gap_max_bits - 1;
01256 
01257             if (blk == 0)  // block does not exist yet
01258             {
01259                 blk = bman.get_allocator().alloc_bit_block();
01260                 bman.set_block(i, blk);
01261                 gap_convert_to_bitset(blk, gap_temp_block_);                
01262             }
01263             else  // We have some data already here. Apply OR operation.
01264             {
01265                 gap_convert_to_bitset(temp_block_, 
01266                                       gap_temp_block_);
01267 
01268                 bv.combine_operation_with_block(i, 
01269                                                 temp_block_, 
01270                                                 0, 
01271                                                 BM_OR);
01272             }
01273 
01274             return;
01275         } // level == -1
01276 
01277         set_gap_level(&gap_head, level);
01278 
01279         if (blk == 0)
01280         {
01281             gap_word_t* gap_blk = 
01282               bman.get_allocator().alloc_gap_block(level, bman.glen());
01283             gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
01284             *gap_blk_ptr = gap_head;
01285             set_gap_level(gap_blk_ptr, level);
01286             bman.set_block(i, (bm::word_t*)gap_blk);
01287             bman.set_block_gap(i);
01288             for (unsigned k = 1; k < len; ++k) 
01289             {
01290                  gap_blk[k] = dec.get_16();
01291             }
01292             gap_blk[len] = bm::gap_max_bits - 1;
01293         }
01294         else // target block exists
01295         {
01296             // read GAP block into a temp memory and perform OR
01297             *gap_temp_block_ = gap_head;
01298             for (unsigned k = 1; k < len; ++k) 
01299             {
01300                  gap_temp_block_[k] = dec.get_16();
01301             }
01302             gap_temp_block_[len] = bm::gap_max_bits - 1;
01303 
01304             bv.combine_operation_with_block(i, 
01305                                            (bm::word_t*)gap_temp_block_, 
01306                                             1, 
01307                                             BM_OR);
01308         }
01309         return;
01310     }
01311     case set_block_arrgap: 
01312     {
01313         gap_word_t len = dec.get_16();
01314         int block_type;
01315         unsigned k = 0;
01316         for (; k < len; )
01317         {
01318             bm::word_t* blk = bman.check_allocate_block(i,
01319                                                         true,
01320                                                         BM_GAP,
01321                                                         &block_type,
01322                                                         true // allow null return
01323                                                         );
01324             // block is all 1, no need to do anything
01325             if (!blk)
01326             {
01327                 // read the encoding stream to skip to the next block
01328                 for (; k < len; ++k)
01329                     dec.get_16();
01330                 return;
01331             }
01332 
01333             if (block_type == 1) // gap block
01334             {            
01335                 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01336                 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01337                 
01338                 for (; k < len; ++k)
01339                 {
01340                     gap_word_t bit_idx = dec.get_16();
01341                     unsigned is_set;
01342                     unsigned new_block_len =
01343                         gap_set_value(true, gap_blk, bit_idx, &is_set);
01344                     if (new_block_len > threshold) 
01345                     {
01346                         bman.extend_gap_block(i, gap_blk);
01347                         ++k;
01348                         break;
01349                     }
01350                 } // for
01351             }
01352             else // bit block
01353             {
01354                 // Get the array one by one and set the bits.
01355                 for(;k < len; ++k)
01356                 {
01357                     gap_word_t bit_idx = dec.get_16();
01358                     or_bit_block(blk, bit_idx, 1);
01359                 }
01360             }
01361         } // for
01362         return;
01363     }
01364     case set_block_arrgap_egamma:
01365     { 
01366         bit_in_type bin(dec);
01367         int block_type;
01368 
01369         gap_word_t len = bin.gamma();
01370         gap_word_t prev = 0;
01371 
01372         unsigned k = 0;
01373         for (; k < len; )
01374         {
01375             bm::word_t* blk = bman.check_allocate_block(i,
01376                                                         true,
01377                                                         BM_GAP,
01378                                                         &block_type,
01379                                                         true // allow null return
01380                                                         );
01381             // block is all 1, no need to do anything
01382             if (!blk)
01383             {
01384                 // read the encoding stream to skip to the next block
01385                 for (; k < len; ++k)
01386                     bin.gamma();
01387                 return;
01388             }
01389 
01390             if (block_type == 1) // gap block
01391             {            
01392                 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01393                 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01394                 
01395                 for (; k < len; ++k)
01396                 {
01397                     gap_word_t bit_idx = bin.gamma();
01398                     if (k==0) --bit_idx; //TODO: optimization
01399                     bit_idx += prev;
01400                     prev = bit_idx;
01401 
01402                     unsigned is_set;
01403                     unsigned new_block_len =
01404                         gap_set_value(true, gap_blk, bit_idx, &is_set);
01405                     if (new_block_len > threshold) 
01406                     {
01407                         bman.extend_gap_block(i, gap_blk);
01408                         ++k;
01409                         break;
01410                     }
01411                 } // for
01412             }
01413             else // bit block
01414             {
01415                 // Get the array one by one and set the bits.
01416                 for(;k < len; ++k)
01417                 {
01418                     gap_word_t bit_idx = bin.gamma();
01419                     if (k==0) --bit_idx; //TODO: optimization
01420                     bit_idx += prev;
01421                     prev = bit_idx;
01422 
01423                     or_bit_block(blk, bit_idx, 1);
01424                 }
01425             }
01426         } // for
01427         return;
01428 
01429     }
01430     case set_block_gap_egamma:            
01431         gap_head = 
01432             sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32();
01433     case set_block_arrgap_egamma_inv:
01434     case set_block_arrgap_inv:    
01435         read_gap_block(dec, btype, gap_temp_block_, gap_head);
01436 
01437         // OR temp block into the target vector
01438         //
01439         bv.combine_operation_with_block(i, 
01440                                        (bm::word_t*)gap_temp_block_, 
01441                                         1, 
01442                                         BM_OR);
01443 
01444         return;
01445     default:
01446         BM_ASSERT(0);
01447     }
01448 
01449 }
01450 
01451 
01452 template<class BV, class DEC>
01453 unsigned deserializer<BV, DEC>::deserialize(bvector_type&        bv, 
01454                                             const unsigned char* buf,
01455                                             bm::word_t*          temp_block)
01456 {
01457     blocks_manager_type& bman = bv.get_blocks_manager();
01458 
01459     bm::wordop_t* tmp_buf = 
01460         temp_block ? (bm::wordop_t*) temp_block 
01461                    : (bm::wordop_t*)bman.check_allocate_tempblock();
01462 
01463     temp_block_ = temp_block = (word_t*)tmp_buf;
01464 
01465     decoder_type dec(buf);
01466 
01467     bv.forget_count();
01468 
01469     BM_SET_MMX_GUARD
01470 
01471     // Reading header
01472 
01473     unsigned char header_flag =  dec.get_8();
01474     if (!(header_flag & BM_HM_NO_BO))
01475     {
01476         /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
01477     }
01478 
01479     if (header_flag & BM_HM_ID_LIST)
01480     {
01481         // special case: the next comes plain list of integers
01482         if (header_flag & BM_HM_RESIZE)
01483         {
01484             unsigned bv_size = dec.get_32();
01485             if (bv_size > bv.size())
01486             {
01487                 bv.resize(bv_size);
01488             }
01489         }
01490         
01491 
01492         for (unsigned cnt = dec.get_32(); cnt; --cnt) {
01493             bm::id_t id = dec.get_32();
01494             bv.set(id);
01495         } // for
01496         // -1 for compatibility with other deserialization branches
01497         return dec.size()-1;
01498     }
01499 
01500     unsigned i;
01501 
01502     if (!(header_flag & BM_HM_NO_GAPL)) 
01503     {
01504         gap_word_t glevels[bm::gap_levels];
01505         // read GAP levels information
01506         for (i = 0; i < bm::gap_levels; ++i)
01507         {
01508             glevels[i] = dec.get_16();
01509         }
01510     }
01511 
01512     if (header_flag & (1 << 1))
01513     {
01514         unsigned bv_size = dec.get_32();
01515         if (bv_size > bv.size())
01516         {
01517             bv.resize(bv_size);
01518         }
01519     }
01520 
01521     unsigned char btype;
01522     unsigned nb;
01523 
01524     for (i = 0; i < bm::set_total_blocks; ++i)
01525     {
01526         btype = dec.get_8();
01527         bm::word_t* blk = bman.get_block(i);
01528         // pre-check if we have short zero-run packaging here
01529         //
01530         if (btype & (1 << 7))
01531         {
01532             nb = btype & ~(1 << 7);
01533             i += nb-1;
01534             continue;
01535         }        
01536 
01537         switch (btype)
01538         {
01539         case set_block_azero: 
01540         case set_block_end:
01541             i = bm::set_total_blocks;
01542             break;
01543         case set_block_1zero:
01544             continue;
01545         case set_block_8zero:
01546             nb = dec.get_8();
01547             i += nb-1;
01548             continue;
01549         case set_block_16zero:
01550             nb = dec.get_16();
01551             i += nb-1;
01552             continue;
01553         case set_block_32zero:
01554             nb = dec.get_32();
01555             i += nb-1;
01556             continue;
01557         case set_block_aone:
01558             for (;i < bm::set_total_blocks; ++i)
01559             {
01560                 bman.set_block_all_set(i);
01561             }
01562             break;
01563         case set_block_1one:
01564             bman.set_block_all_set(i);
01565             continue;
01566         case set_block_8one:
01567             BM_SET_ONE_BLOCKS(dec.get_8());
01568             continue;
01569         case set_block_16one:
01570             BM_SET_ONE_BLOCKS(dec.get_16());
01571             continue;
01572         case set_block_32one:
01573             BM_SET_ONE_BLOCKS(dec.get_32());
01574             continue;
01575         case set_block_bit: 
01576         {
01577             if (blk == 0)
01578             {
01579                 blk = bman.get_allocator().alloc_bit_block();
01580                 bman.set_block(i, blk);
01581                 dec.get_32(blk, bm::set_block_size);
01582                 continue;                
01583             }
01584             dec.get_32(temp_block, bm::set_block_size);
01585             bv.combine_operation_with_block(i, 
01586                                             temp_block, 
01587                                             0, BM_OR);
01588             continue;
01589         }
01590         case set_block_bit_1bit:
01591         {
01592             unsigned bit_idx = dec.get_16();
01593             bit_idx += i * bm::bits_in_block; 
01594             bv.set_bit(bit_idx);
01595             continue;
01596         }
01597         case set_block_bit_0runs:
01598         {
01599             //TODO: optimization if block exists
01600             bit_block_set(temp_block, 0);
01601 
01602             unsigned char run_type = dec.get_8();
01603             for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01604             {
01605                 unsigned run_length = dec.get_16();
01606                 if (run_type)
01607                 {
01608                     unsigned run_end = j + run_length;
01609                     for (;j < run_end; ++j)
01610                     {
01611                         BM_ASSERT(j < bm::set_block_size);
01612                         temp_block[j] = dec.get_32();
01613                     }
01614                 }
01615                 else
01616                 {
01617                     j += run_length;
01618                 }
01619             } // for
01620 
01621             bv.combine_operation_with_block(i, 
01622                                             temp_block,
01623                                             0, BM_OR);            
01624             continue;
01625         }
01626         case set_block_bit_interval: 
01627         {
01628             unsigned head_idx, tail_idx;
01629             head_idx = dec.get_16();
01630             tail_idx = dec.get_16();
01631 
01632             if (blk == 0)
01633             {
01634                 blk = bman.get_allocator().alloc_bit_block();
01635                 bman.set_block(i, blk);
01636                 for (unsigned i = 0; i < head_idx; ++i)
01637                 {
01638                     blk[i] = 0;
01639                 }
01640                 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
01641                 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01642                 {
01643                     blk[j] = 0;
01644                 }
01645                 continue;
01646             }
01647             bit_block_set(temp_block, 0);
01648             dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1);
01649 
01650             bv.combine_operation_with_block(i, 
01651                                             temp_block,
01652                                             0, BM_OR);
01653             continue;
01654         }
01655         case set_block_gap: 
01656         case set_block_gapbit:
01657         case set_block_arrgap:
01658         case set_block_gap_egamma:
01659         case set_block_arrgap_egamma:
01660         case set_block_arrgap_egamma_inv:
01661         case set_block_arrgap_inv:    
01662             deserialize_gap(btype, dec, bv, bman, i, blk);
01663             continue;
01664         case set_block_arrbit:
01665         {
01666             gap_word_t len = 
01667                 sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32();
01668 
01669             if (bman.is_block_gap(i))
01670             {
01671                 // Here we most probably does not want to keep
01672                 // the block GAP since generic bitblock offers better
01673                 // performance.
01674                 blk = bman.convert_gap2bitset(i);
01675             }
01676             else
01677             {
01678                 if (blk == 0)  // block does not exists yet
01679                 {
01680                     blk = bman.get_allocator().alloc_bit_block();
01681                     bit_block_set(blk, 0);
01682                     bman.set_block(i, blk);
01683                 }
01684             }
01685 
01686             // Get the array one by one and set the bits.
01687             for (unsigned k = 0; k < len; ++k)
01688             {
01689                 gap_word_t bit_idx = dec.get_16();
01690                 or_bit_block(blk, bit_idx, 1);
01691             }
01692             continue;
01693         }
01694         default:
01695             BM_ASSERT(0); // unknown block type
01696         } // switch
01697     } // for i
01698 
01699     return dec.size();
01700 }
01701 
01702 
01703 
01704 template<class DEC>
01705 serial_stream_iterator<DEC>::serial_stream_iterator(const unsigned char* buf)
01706   : decoder_(buf),
01707     end_of_stream_(false),
01708     bv_size_(0),
01709     state_(e_unknown),
01710     id_cnt_(0),
01711     block_idx_(0),
01712     mono_block_cnt_(0)
01713 {
01714     ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
01715 
01716     bit_func_table_[bm::set_AND] = 
01717         &serial_stream_iterator<DEC>::get_bit_block_AND;
01718     bit_func_table_[bm::set_ASSIGN] = 
01719         &serial_stream_iterator<DEC>::get_bit_block_ASSIGN;
01720     bit_func_table_[bm::set_OR]     = 
01721         &serial_stream_iterator<DEC>::get_bit_block_OR;
01722     bit_func_table_[bm::set_SUB] = 
01723         &serial_stream_iterator<DEC>::get_bit_block_SUB;
01724     bit_func_table_[bm::set_XOR] = 
01725         &serial_stream_iterator<DEC>::get_bit_block_XOR;
01726     bit_func_table_[bm::set_COUNT] = 
01727         &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01728     bit_func_table_[bm::set_COUNT_AND] = 
01729         &serial_stream_iterator<DEC>::get_bit_block_COUNT_AND;
01730     bit_func_table_[bm::set_COUNT_XOR] = 
01731         &serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR;
01732     bit_func_table_[bm::set_COUNT_OR] = 
01733         &serial_stream_iterator<DEC>::get_bit_block_COUNT_OR;
01734     bit_func_table_[bm::set_COUNT_SUB_AB] = 
01735         &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB;
01736     bit_func_table_[bm::set_COUNT_SUB_BA] = 
01737         &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA;
01738     bit_func_table_[bm::set_COUNT_A] = 
01739         &serial_stream_iterator<DEC>::get_bit_block_COUNT_A;
01740     bit_func_table_[bm::set_COUNT_B] = 
01741         &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01742 
01743 
01744     // reading stream header
01745     unsigned char header_flag =  decoder_.get_8();
01746     if (!(header_flag & BM_HM_NO_BO))
01747     {
01748         /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
01749     }
01750 
01751     // check if bitvector comes as an inverted, sorted list of ints
01752     //
01753     if (header_flag & BM_HM_ID_LIST)
01754     {
01755         // special case: the next comes plain list of unsigned integers
01756         if (header_flag & BM_HM_RESIZE)
01757         {
01758             bv_size_ = decoder_.get_32();
01759         }
01760 
01761         state_ = e_list_ids;
01762         id_cnt_ = decoder_.get_32();
01763         next(); // read first id
01764     }
01765     else
01766     {
01767         if (!(header_flag & BM_HM_NO_GAPL)) 
01768         {
01769             unsigned i;
01770             // keep GAP levels info
01771             for (i = 0; i < bm::gap_levels; ++i)
01772             {
01773                 glevels_[i] = decoder_.get_16();
01774             }
01775         }
01776 
01777         if (header_flag & (1 << 1))
01778         {
01779             bv_size_ = decoder_.get_32();
01780         }
01781         state_ = e_blocks;
01782     }
01783 }
01784 
01785 template<class DEC>
01786 void serial_stream_iterator<DEC>::next()
01787 {
01788     if (is_eof()) return;
01789 
01790     switch (state_) 
01791     {
01792     case e_list_ids:
01793         // read inverted ids one by one
01794         if (id_cnt_ == 0)
01795         {
01796             end_of_stream_ = true;
01797             state_ = e_unknown;
01798             break;
01799         }
01800         last_id_ = decoder_.get_32();
01801         --id_cnt_;
01802         break;
01803 
01804     case e_blocks:
01805         if (block_idx_ == bm::set_total_blocks)
01806         {
01807             end_of_stream_ = true;
01808             state_ = e_unknown;
01809             break;
01810         }
01811 
01812         block_type_ = decoder_.get_8();
01813 
01814         // pre-check for 7-bit zero block
01815         //
01816         if (block_type_ & (1 << 7))
01817         {
01818             mono_block_cnt_ = (block_type_ & ~(1 << 7)) - 1;
01819             state_ = e_zero_blocks;
01820             break;
01821         }
01822 
01823         switch (block_type_)
01824         {
01825         case set_block_azero:
01826         case set_block_end:
01827             end_of_stream_ = true; state_ = e_unknown;
01828             break;
01829         case set_block_1zero:
01830             state_ = e_zero_blocks;
01831             mono_block_cnt_ = 0;
01832             break;
01833         case set_block_8zero:
01834             state_ = e_zero_blocks;
01835             mono_block_cnt_ = decoder_.get_8()-1;
01836             break;
01837         case set_block_16zero:
01838             state_ = e_zero_blocks;
01839             mono_block_cnt_ = decoder_.get_16()-1;
01840             break;
01841         case set_block_32zero:
01842             state_ = e_zero_blocks;
01843             mono_block_cnt_ = decoder_.get_32()-1;
01844             break;
01845         case set_block_aone:
01846             state_ = e_one_blocks;
01847             mono_block_cnt_ = bm::set_total_blocks - block_idx_;
01848             break;
01849         case set_block_1one:
01850             state_ = e_one_blocks;
01851             mono_block_cnt_ = 0;
01852             break;
01853         case set_block_8one:
01854             state_ = e_one_blocks;
01855             mono_block_cnt_ = decoder_.get_8()-1;
01856             break;
01857         case set_block_16one:
01858             state_ = e_one_blocks;
01859             mono_block_cnt_ = decoder_.get_16()-1;
01860             break;
01861         case set_block_32one:
01862             state_ = e_one_blocks;
01863             mono_block_cnt_ = decoder_.get_32()-1;
01864             break;
01865 
01866         case set_block_bit:
01867         case set_block_bit_interval:
01868         case set_block_bit_0runs:
01869         case set_block_arrbit:
01870         case set_block_bit_1bit: // TODO: better make it GAP
01871             state_ = e_bit_block;
01872             break;
01873 
01874         case set_block_gap:
01875         case set_block_gap_egamma:
01876             gap_head_ = 
01877                 sizeof(gap_word_t) == 2 ? 
01878                     decoder_.get_16() : decoder_.get_32();
01879         case set_block_arrgap:
01880         case set_block_arrgap_egamma:
01881         case set_block_arrgap_egamma_inv:
01882         case set_block_arrgap_inv:
01883             state_ = e_gap_block;
01884             break;        
01885         case set_block_gapbit:
01886             state_ = e_bit_block; // TODO: make a better decision here
01887             break;
01888         
01889         default:
01890             BM_ASSERT(0);
01891         }// switch
01892 
01893         break;
01894 
01895     case e_zero_blocks:
01896     case e_one_blocks:
01897         ++block_idx_;
01898         if (!mono_block_cnt_)
01899         {
01900             state_ = e_blocks; // get new block token
01901             break;
01902         }
01903         --mono_block_cnt_;
01904         break;
01905 
01906     case e_unknown:
01907     default:
01908         BM_ASSERT(0);
01909     } // switch
01910 }
01911 
01912 
01913 
01914 template<class DEC>
01915 unsigned 
01916 serial_stream_iterator<DEC>::get_bit_block_ASSIGN(
01917                                             bm::word_t*  dst_block,
01918                                             bm::word_t*  /*tmp_block*/)
01919 {
01920     BM_ASSERT(this->state_ == e_bit_block);
01921     unsigned count = 0;
01922 
01923     switch (this->block_type_)
01924     {
01925     case set_block_bit:
01926         decoder_.get_32(dst_block, bm::set_block_size);
01927         break;
01928     case set_block_bit_0runs: 
01929         {
01930         if (dst_block)
01931             bit_block_set(dst_block, 0);
01932         unsigned char run_type = decoder_.get_8();
01933         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01934         {
01935             unsigned run_length = decoder_.get_16();
01936             if (run_type)
01937             {
01938                 unsigned run_end = j + run_length;
01939                 if (dst_block)
01940                 {
01941                     for (;j < run_end; ++j)
01942                     {
01943                         BM_ASSERT(j < bm::set_block_size);
01944                         dst_block[j] = decoder_.get_32();
01945                     }
01946                 }
01947                 else
01948                 {
01949                     // TODO: use decoder_.seek()
01950                     for (;j < run_end; ++j)
01951                     {
01952                         decoder_.get_32();
01953                     }
01954                 }
01955             }
01956             else
01957             {
01958                 j += run_length;
01959             }
01960         } // for
01961         }
01962         break;
01963     case set_block_bit_interval:
01964         {
01965             unsigned head_idx = decoder_.get_16();
01966             unsigned tail_idx = decoder_.get_16();
01967             if (dst_block) 
01968             {
01969                 for (unsigned i = 0; i < head_idx; ++i)
01970                     dst_block[i] = 0;
01971                 decoder_.get_32(dst_block + head_idx, 
01972                                 tail_idx - head_idx + 1);
01973                 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01974                     dst_block[j] = 0;
01975             }
01976             else
01977             {
01978                 decoder_.seek((tail_idx - head_idx + 1) * 4);
01979             }
01980         }
01981         break;
01982     case set_block_arrbit:
01983     case set_block_bit_1bit:
01984         get_arr_bit(dst_block, true /*clear target*/);
01985         break;
01986     case set_block_gapbit:
01987         BM_ASSERT(0);
01988         break;
01989     default:
01990         BM_ASSERT(0);
01991     } // switch
01992     return count;
01993 }
01994 
01995 template<class DEC>
01996 unsigned 
01997 serial_stream_iterator<DEC>::get_bit_block_OR(bm::word_t*  dst_block,
01998                                               bm::word_t*  /*tmp_block*/)
01999 {
02000     BM_ASSERT(this->state_ == e_bit_block);
02001     unsigned count = 0;
02002     switch (block_type_)
02003     {
02004     case set_block_bit:
02005         {
02006         bitblock_get_adapter ga(dst_block);
02007         bit_OR<bm::word_t> func;
02008         bitblock_store_adapter sa(dst_block);
02009 
02010         bit_recomb(ga,
02011                    decoder_,
02012                    func,
02013                    sa
02014                   );
02015         }
02016         break;
02017     case set_block_bit_interval:
02018         {
02019         unsigned head_idx = decoder_.get_16();
02020         unsigned tail_idx = decoder_.get_16();
02021         for (unsigned i = head_idx; i <= tail_idx; ++i)
02022             dst_block[i] |= decoder_.get_32();
02023         }
02024         break;
02025     case set_block_bit_0runs:
02026         {
02027         unsigned char run_type = decoder_.get_8();
02028         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02029         {
02030             unsigned run_length = decoder_.get_16();
02031             if (run_type)
02032             {
02033                 unsigned run_end = j + run_length;
02034                 for (;j < run_end; ++j)
02035                 {
02036                     BM_ASSERT(j < bm::set_block_size);
02037                     dst_block[j] |= decoder_.get_32();
02038                 }
02039             }
02040             else
02041             {
02042                 j += run_length;
02043             }
02044         } // for
02045         }
02046         break;
02047     case set_block_bit_1bit:
02048     case set_block_arrbit:
02049         get_arr_bit(dst_block, false /*don't clear target*/);
02050         break;      
02051     default:
02052         BM_ASSERT(0);
02053     } // switch
02054     return count;
02055 }
02056 
02057 template<class DEC>
02058 unsigned 
02059 serial_stream_iterator<DEC>::get_bit_block_AND(bm::word_t*  dst_block,
02060                                                bm::word_t*  tmp_block)
02061 {
02062     BM_ASSERT(this->state_ == e_bit_block);
02063     BM_ASSERT(dst_block != tmp_block);
02064 
02065     unsigned count = 0;
02066     switch (block_type_)
02067     {
02068     case set_block_bit:
02069         for (unsigned i = 0; i < bm::set_block_size; ++i)
02070             dst_block[i] &= decoder_.get_32();
02071         break;
02072     case set_block_bit_0runs: 
02073         {
02074         unsigned char run_type = decoder_.get_8();
02075         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02076         {
02077             unsigned run_length = decoder_.get_16();
02078             unsigned run_end = j + run_length;
02079             if (run_type)
02080             {
02081                 for (;j < run_end; ++j)
02082                 {
02083                     BM_ASSERT(j < bm::set_block_size);
02084                     dst_block[j] &= decoder_.get_32();
02085                 }
02086             }
02087             else
02088             {
02089                 for (;j < run_end; ++j)
02090                 {
02091                     BM_ASSERT(j < bm::set_block_size);
02092                     dst_block[j] = 0;
02093                 }
02094             }
02095         } // for
02096         }
02097         break;
02098     case set_block_bit_interval:
02099         {
02100             unsigned head_idx = decoder_.get_16();
02101             unsigned tail_idx = decoder_.get_16();
02102             unsigned i;
02103             for ( i = 0; i < head_idx; ++i)
02104                 dst_block[i] = 0;
02105             for ( i = head_idx; i <= tail_idx; ++i)
02106                 dst_block[i] &= decoder_.get_32();
02107             for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
02108                 dst_block[i] = 0;
02109         }
02110         break;
02111     case set_block_bit_1bit:
02112         // TODO: optimization
02113     case set_block_arrbit:
02114         get_arr_bit(tmp_block, true /*clear target*/);
02115         if (dst_block)
02116             bit_block_and(dst_block, tmp_block);
02117         break;      
02118     default:
02119         BM_ASSERT(0);
02120     } // switch
02121     return count;
02122 }
02123 
02124 template<class DEC>
02125 unsigned 
02126 serial_stream_iterator<DEC>::get_bit_block_XOR(bm::word_t*  dst_block,
02127                                                bm::word_t*  tmp_block)
02128 {
02129     BM_ASSERT(this->state_ == e_bit_block);
02130     BM_ASSERT(dst_block != tmp_block);
02131 
02132     unsigned count = 0;
02133     switch (block_type_)
02134     {
02135     case set_block_bit:
02136         for (unsigned i = 0; i < bm::set_block_size; ++i)
02137             dst_block[i] ^= decoder_.get_32();
02138         break;
02139     case set_block_bit_0runs:
02140         {
02141         unsigned char run_type = decoder_.get_8();
02142         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02143         {
02144             unsigned run_length = decoder_.get_16();
02145             if (run_type)
02146             {
02147                 unsigned run_end = j + run_length;
02148                 for (;j < run_end; ++j)
02149                 {
02150                     BM_ASSERT(j < bm::set_block_size);
02151                     dst_block[j] ^= decoder_.get_32();
02152                 }
02153             }
02154             else
02155             {
02156                 j += run_length;
02157             }
02158         } // for
02159         }
02160         break;
02161     case set_block_bit_interval:
02162         {
02163             unsigned head_idx = decoder_.get_16();
02164             unsigned tail_idx = decoder_.get_16();
02165             for (unsigned i = head_idx; i <= tail_idx; ++i)
02166                 dst_block[i] ^= decoder_.get_32();
02167         }
02168         break;
02169     case set_block_bit_1bit:
02170         // TODO: optimization
02171     case set_block_arrbit:
02172         get_arr_bit(tmp_block, true /*clear target*/);
02173         if (dst_block)
02174         {
02175             bit_block_xor(dst_block, tmp_block);
02176         }
02177         break;
02178     default:
02179         BM_ASSERT(0);
02180     } // switch
02181     return count;
02182 }
02183 
02184 template<class DEC>
02185 unsigned 
02186 serial_stream_iterator<DEC>::get_bit_block_SUB(bm::word_t*  dst_block,
02187                                                bm::word_t*  tmp_block)
02188 {
02189     BM_ASSERT(this->state_ == e_bit_block);
02190     BM_ASSERT(dst_block != tmp_block);
02191 
02192     unsigned count = 0;
02193     switch (block_type_)
02194     {
02195     case set_block_bit:
02196         for (unsigned i = 0; i < bm::set_block_size; ++i)
02197             dst_block[i] &= ~decoder_.get_32();
02198         break;
02199     case set_block_bit_0runs:
02200         {
02201         unsigned char run_type = decoder_.get_8();
02202         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02203         {
02204             unsigned run_length = decoder_.get_16();
02205             if (run_type)
02206             {
02207                 unsigned run_end = j + run_length;
02208                 for (;j < run_end; ++j)
02209                 {
02210                     BM_ASSERT(j < bm::set_block_size);
02211                     dst_block[j] &= ~decoder_.get_32();
02212                 }
02213             }
02214             else
02215             {
02216                 j += run_length;
02217             }
02218         } // for
02219         }
02220         break;
02221     case set_block_bit_interval:
02222         {
02223             unsigned head_idx = decoder_.get_16();
02224             unsigned tail_idx = decoder_.get_16();
02225             for (unsigned i = head_idx; i <= tail_idx; ++i)
02226                 dst_block[i] &= ~decoder_.get_32();
02227         }
02228         break;
02229     case set_block_bit_1bit:
02230         // TODO: optimization
02231     case set_block_arrbit:
02232         get_arr_bit(tmp_block, true /*clear target*/);
02233         if (dst_block)
02234             bit_block_sub(dst_block, tmp_block);
02235         break;
02236     default:
02237         BM_ASSERT(0);
02238     } // switch
02239     return count;
02240 }
02241 
02242 
02243 template<class DEC>
02244 unsigned 
02245 serial_stream_iterator<DEC>::get_bit_block_COUNT(bm::word_t*  /*dst_block*/,
02246                                                  bm::word_t*  /*tmp_block*/)
02247 {
02248     BM_ASSERT(this->state_ == e_bit_block);
02249 
02250     unsigned count = 0;
02251     switch (block_type_)
02252     {
02253     case set_block_bit:
02254         for (unsigned i = 0; i < bm::set_block_size; ++i)
02255             count += word_bitcount(decoder_.get_32());
02256         break;
02257     case set_block_bit_0runs:
02258         {
02259         unsigned count = 0;
02260         unsigned char run_type = decoder_.get_8();
02261         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02262         {
02263             unsigned run_length = decoder_.get_16();
02264             if (run_type)
02265             {
02266                 unsigned run_end = j + run_length;
02267                 for (;j < run_end; ++j)
02268                 {
02269                     count += word_bitcount(decoder_.get_32());
02270                 }
02271             }
02272             else
02273             {
02274                 j += run_length;
02275             }
02276         } // for
02277         return count;
02278         }
02279     case set_block_bit_interval:
02280         {
02281             unsigned head_idx = decoder_.get_16();
02282             unsigned tail_idx = decoder_.get_16();
02283             for (unsigned i = head_idx; i <= tail_idx; ++i)
02284                 count += word_bitcount(decoder_.get_32());
02285         }
02286         break;
02287     case set_block_arrbit:
02288         count += get_arr_bit(0);
02289         break;
02290     case set_block_bit_1bit:
02291         ++count;
02292         decoder_.get_16();
02293         break;
02294     default:
02295         BM_ASSERT(0);
02296     } // switch
02297     return count;
02298 }
02299 
02300 template<class DEC>
02301 unsigned 
02302 serial_stream_iterator<DEC>::get_bit_block_COUNT_A(bm::word_t*  dst_block,
02303                                                    bm::word_t*  /*tmp_block*/)
02304 {
02305     BM_ASSERT(this->state_ == e_bit_block);
02306     unsigned count = 0;
02307     if (dst_block)
02308     {
02309         // count the block bitcount
02310         count = 
02311             bit_block_calc_count(dst_block, 
02312                                  dst_block + bm::set_block_size);
02313     }
02314 
02315     switch (block_type_)
02316     {
02317     case set_block_bit:
02318         decoder_.get_32(0, bm::set_block_size);
02319         break;
02320     case set_block_bit_0runs:
02321         {
02322         unsigned char run_type = decoder_.get_8();
02323         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02324         {
02325             unsigned run_length = decoder_.get_16();
02326             if (run_type)
02327             {
02328                 unsigned run_end = j + run_length;
02329                 for (;j < run_end; ++j)
02330                 {
02331                     decoder_.get_32();
02332                 }
02333             }
02334             else
02335             {
02336                 j += run_length;
02337             }
02338         } // for
02339         }
02340         break;
02341 
02342     case set_block_bit_interval:
02343         {
02344             unsigned head_idx = decoder_.get_16();
02345             unsigned tail_idx = decoder_.get_16();
02346             for (unsigned i = head_idx; i <= tail_idx; ++i)
02347                 decoder_.get_32();
02348         }
02349         break;
02350     case set_block_arrbit:
02351         get_arr_bit(0);
02352         break;
02353     case set_block_bit_1bit:
02354         decoder_.get_16();
02355         break;
02356     default:
02357         BM_ASSERT(0);
02358     } // switch
02359     return count;
02360 }
02361 
02362 
02363 template<class DEC>
02364 unsigned 
02365 serial_stream_iterator<DEC>::get_bit_block_COUNT_AND(bm::word_t*  dst_block,
02366                                                      bm::word_t*  tmp_block)
02367 {
02368     BM_ASSERT(this->state_ == e_bit_block);
02369     BM_ASSERT(dst_block);
02370 
02371     unsigned count = 0;
02372     switch (block_type_)
02373     {
02374     case set_block_bit:
02375         for (unsigned i = 0; i < bm::set_block_size; ++i)
02376             count += word_bitcount(dst_block[i] & decoder_.get_32());
02377         break;
02378     case set_block_bit_0runs:
02379         {
02380         unsigned count = 0;
02381         unsigned char run_type = decoder_.get_8();
02382         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02383         {
02384             unsigned run_length = decoder_.get_16();
02385             if (run_type)
02386             {
02387                 unsigned run_end = j + run_length;
02388                 for (;j < run_end; ++j)
02389                 {
02390                     count += word_bitcount(dst_block[j] & decoder_.get_32());
02391                 }
02392             }
02393             else
02394             {
02395                 j += run_length;
02396             }
02397         } // for
02398         return count;
02399         }
02400     case set_block_bit_interval:
02401         {
02402         unsigned head_idx = decoder_.get_16();
02403         unsigned tail_idx = decoder_.get_16();
02404         for (unsigned i = head_idx; i <= tail_idx; ++i)
02405             count += word_bitcount(dst_block[i] & decoder_.get_32());
02406         }
02407         break;
02408     case set_block_bit_1bit:
02409         // TODO: optimization
02410     case set_block_arrbit:
02411         get_arr_bit(tmp_block, true /*clear target*/);
02412         count += 
02413             bit_operation_and_count(dst_block, dst_block + bm::set_block_size, 
02414                                     tmp_block);
02415         break;
02416     default:
02417         BM_ASSERT(0);
02418     } // switch
02419     return count;
02420 }
02421 
02422 template<class DEC>
02423 unsigned 
02424 serial_stream_iterator<DEC>::get_bit_block_COUNT_OR(bm::word_t*  dst_block,
02425                                                     bm::word_t*  tmp_block)
02426 {
02427     BM_ASSERT(this->state_ == e_bit_block);
02428     BM_ASSERT(dst_block);
02429 
02430     bitblock_sum_adapter count_adapter;
02431     switch (block_type_)
02432     {
02433     case set_block_bit:
02434         {
02435         bitblock_get_adapter ga(dst_block);
02436         bit_COUNT_OR<bm::word_t> func;
02437         
02438         bit_recomb(ga,
02439                    decoder_,
02440                    func,
02441                    count_adapter
02442                   );
02443         }
02444         break;
02445     case set_block_bit_0runs: 
02446         {
02447         unsigned count = 0;
02448         unsigned char run_type = decoder_.get_8();
02449         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02450         {
02451             unsigned run_length = decoder_.get_16();
02452             unsigned run_end = j + run_length;
02453             if (run_type)
02454             {
02455                 for (;j < run_end; ++j)
02456                 {
02457                     BM_ASSERT(j < bm::set_block_size);
02458                     count += word_bitcount(dst_block[j] | decoder_.get_32());
02459                 }
02460             }
02461             else
02462             {
02463                 for (;j < run_end; ++j)
02464                 {
02465                     BM_ASSERT(j < bm::set_block_size);
02466                     count += word_bitcount(dst_block[j]);
02467                 }
02468             }
02469         } // for
02470         return count;
02471         }
02472     case set_block_bit_interval:
02473         {
02474         unsigned head_idx = decoder_.get_16();
02475         unsigned tail_idx = decoder_.get_16();
02476         unsigned count = 0;
02477         unsigned i;
02478         for (i = 0; i < head_idx; ++i)
02479             count += word_bitcount(dst_block[i]);
02480         for (i = head_idx; i <= tail_idx; ++i)
02481             count += word_bitcount(dst_block[i] | decoder_.get_32());
02482         for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02483             count += word_bitcount(dst_block[i]);
02484         return count;
02485         }
02486     case set_block_bit_1bit:
02487         // TODO: optimization
02488     case set_block_arrbit:
02489         get_arr_bit(tmp_block, true /* clear target*/);
02490         return 
02491             bit_operation_or_count(dst_block, 
02492                                    dst_block + bm::set_block_size,
02493                                    tmp_block);
02494     default:
02495         BM_ASSERT(0);
02496     } // switch
02497     return count_adapter.sum();
02498 }
02499 
02500 template<class DEC>
02501 unsigned 
02502 serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR(bm::word_t*  dst_block,
02503                                                      bm::word_t*  tmp_block)
02504 {
02505     BM_ASSERT(this->state_ == e_bit_block);
02506     BM_ASSERT(dst_block);
02507 
02508     bitblock_sum_adapter count_adapter;
02509     switch (block_type_)
02510     {
02511     case set_block_bit:
02512         {
02513         bitblock_get_adapter ga(dst_block);
02514         bit_COUNT_XOR<bm::word_t> func;
02515         
02516         bit_recomb(ga,
02517                    decoder_,
02518                    func,
02519                    count_adapter
02520                   );
02521         }
02522         break;
02523     case set_block_bit_0runs: 
02524         {
02525         unsigned count = 0;
02526         unsigned char run_type = decoder_.get_8();
02527         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02528         {
02529             unsigned run_length = decoder_.get_16();
02530             unsigned run_end = j + run_length;
02531             if (run_type)
02532             {
02533                 for (;j < run_end; ++j)
02534                 {
02535                     BM_ASSERT(j < bm::set_block_size);
02536                     count += word_bitcount(dst_block[j] ^ decoder_.get_32());
02537                 }
02538             }
02539             else
02540             {
02541                 for (;j < run_end; ++j)
02542                 {
02543                     BM_ASSERT(j < bm::set_block_size);
02544                     count += word_bitcount(dst_block[j]);
02545                 }
02546             }
02547         } // for
02548         return count;
02549         }
02550     case set_block_bit_interval:
02551         {
02552         unsigned head_idx = decoder_.get_16();
02553         unsigned tail_idx = decoder_.get_16();
02554         unsigned count = 0;
02555         unsigned i;
02556         for (i = 0; i < head_idx; ++i)
02557             count += word_bitcount(dst_block[i]);
02558         for (i = head_idx; i <= tail_idx; ++i)
02559             count += word_bitcount(dst_block[i] ^ decoder_.get_32());
02560         for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02561             count += word_bitcount(dst_block[i]);
02562         return count;
02563         }
02564     case set_block_bit_1bit:
02565         // TODO: optimization
02566     case set_block_arrbit:
02567         get_arr_bit(tmp_block, true /* clear target*/);
02568         return 
02569             bit_operation_xor_count(dst_block, 
02570                                     dst_block + bm::set_block_size,
02571                                     tmp_block);
02572     default:
02573         BM_ASSERT(0);
02574     } // switch
02575     return count_adapter.sum();
02576 }
02577 
02578 template<class DEC>
02579 unsigned 
02580 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB(bm::word_t*  dst_block,
02581                                                         bm::word_t*  tmp_block)
02582 {
02583     BM_ASSERT(this->state_ == e_bit_block);
02584     BM_ASSERT(dst_block);
02585 
02586     bitblock_sum_adapter count_adapter;
02587     switch (block_type_)
02588     {
02589     case set_block_bit:
02590         {
02591         bitblock_get_adapter ga(dst_block);
02592         bit_COUNT_SUB_AB<bm::word_t> func;
02593         
02594         bit_recomb(ga, 
02595                    decoder_,
02596                    func,
02597                    count_adapter
02598                   );
02599         }
02600         break;
02601     case set_block_bit_0runs: 
02602         {
02603         unsigned count = 0;
02604         unsigned char run_type = decoder_.get_8();
02605         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02606         {
02607             unsigned run_length = decoder_.get_16();
02608             unsigned run_end = j + run_length;
02609             if (run_type)
02610             {
02611                 for (;j < run_end; ++j)
02612                 {
02613                     BM_ASSERT(j < bm::set_block_size);
02614                     count += word_bitcount(dst_block[j] & ~decoder_.get_32());
02615                 }
02616             }
02617             else
02618             {
02619                 for (;j < run_end; ++j)
02620                 {
02621                     BM_ASSERT(j < bm::set_block_size);
02622                     count += word_bitcount(dst_block[j]);
02623                 }
02624             }
02625         } // for
02626         return count;
02627         }
02628     case set_block_bit_interval:
02629         {
02630         unsigned head_idx = decoder_.get_16();
02631         unsigned tail_idx = decoder_.get_16();
02632         unsigned count = 0;
02633         unsigned i;
02634         for (i = 0; i < head_idx; ++i)
02635             count += word_bitcount(dst_block[i]);
02636         for (i = head_idx; i <= tail_idx; ++i)
02637             count += word_bitcount(dst_block[i] & (~decoder_.get_32()));
02638         for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02639             count += word_bitcount(dst_block[i]);
02640         return count;
02641         }
02642         break;
02643     case set_block_bit_1bit:
02644         //TODO: optimization
02645     case set_block_arrbit:
02646         get_arr_bit(tmp_block, true /* clear target*/);
02647         return 
02648             bit_operation_sub_count(dst_block, 
02649                                     dst_block + bm::set_block_size,
02650                                     tmp_block);
02651     default:
02652         BM_ASSERT(0);
02653     } // switch
02654     return count_adapter.sum();
02655 }
02656 
02657 template<class DEC>
02658 unsigned 
02659 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA(bm::word_t*  dst_block,
02660                                                         bm::word_t*  tmp_block)
02661 {
02662     BM_ASSERT(this->state_ == e_bit_block);
02663     BM_ASSERT(dst_block);
02664 
02665     bitblock_sum_adapter count_adapter;
02666     switch (block_type_)
02667     {
02668     case set_block_bit:
02669         {
02670         bitblock_get_adapter ga(dst_block);
02671         bit_COUNT_SUB_BA<bm::word_t> func;
02672 
02673         bit_recomb(ga,
02674                    decoder_,
02675                    func,
02676                    count_adapter
02677                   );
02678         }
02679         break;
02680     case set_block_bit_0runs: 
02681         {
02682         unsigned count = 0;
02683         unsigned char run_type = decoder_.get_8();
02684         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02685         {
02686             unsigned run_length = decoder_.get_16();
02687             unsigned run_end = j + run_length;
02688             if (run_type)
02689             {
02690                 for (;j < run_end; ++j)
02691                 {
02692                     BM_ASSERT(j < bm::set_block_size);
02693                     count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
02694                 }
02695             }
02696             else
02697             {
02698                 j += run_length;
02699             }
02700         } // for
02701         return count;
02702         }
02703 
02704     case set_block_bit_interval:
02705         {
02706         unsigned head_idx = decoder_.get_16();
02707         unsigned tail_idx = decoder_.get_16();
02708         unsigned count = 0;
02709         unsigned i;
02710         for (i = head_idx; i <= tail_idx; ++i)
02711             count += word_bitcount(decoder_.get_32() & (~dst_block[i]));
02712         return count;
02713         }
02714         break;
02715     case set_block_bit_1bit:
02716         // TODO: optimization
02717     case set_block_arrbit:
02718         get_arr_bit(tmp_block, true /* clear target*/);
02719         return 
02720             bit_operation_sub_count(tmp_block,
02721                                     tmp_block + bm::set_block_size,
02722                                     dst_block);
02723     default:
02724         BM_ASSERT(0);
02725     } // switch
02726     return count_adapter.sum();
02727 }
02728 
02729 
02730 
02731 template<class DEC>
02732 unsigned serial_stream_iterator<DEC>::get_arr_bit(bm::word_t* dst_block, 
02733                                                   bool        clear_target)
02734 {
02735     BM_ASSERT(this->block_type_ == set_block_arrbit || 
02736               this->block_type_ == set_block_bit_1bit);
02737     
02738     gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
02739     if (dst_block)
02740     {
02741         if (clear_target)
02742             bit_block_set(dst_block, 0);
02743 
02744         if (this->block_type_ == set_block_bit_1bit)
02745         {
02746             // len contains idx of 1 bit set
02747             set_bit(dst_block, len);
02748             return 1;
02749         }
02750 
02751         for (unsigned k = 0; k < len; ++k)
02752         {
02753             gap_word_t bit_idx = decoder_.get_16();
02754             set_bit(dst_block, bit_idx);
02755         }
02756     }
02757     else
02758     {
02759         if (this->block_type_ == set_block_bit_1bit)
02760         {
02761             return 1; // nothing to do: len var already consumed 16bits
02762         }
02763         // fwd the decocing stream
02764         decoder_.seek(len * 2);
02765     }
02766     return len;
02767 }
02768 
02769 template<class DEC>
02770 void 
02771 serial_stream_iterator<DEC>::get_gap_block(bm::gap_word_t* dst_block)
02772 {
02773     BM_ASSERT(this->state_ == e_gap_block || 
02774               this->block_type_ == set_block_bit_1bit);
02775     BM_ASSERT(dst_block);
02776 
02777     read_gap_block(this->decoder_,
02778                    this->block_type_,
02779                    dst_block,
02780                    this->gap_head_);
02781 
02782     ++(this->block_idx_);
02783     this->state_ = e_blocks;
02784 }
02785 
02786 
02787 template<class DEC>
02788 unsigned 
02789 serial_stream_iterator<DEC>::get_bit_block(bm::word_t*    dst_block,
02790                                            bm::word_t*    tmp_block,
02791                                            set_operation  op)
02792 {
02793     BM_ASSERT(this->state_ == e_bit_block);
02794     get_bit_func_type bit_func = bit_func_table_[op];
02795     BM_ASSERT(bit_func);
02796     unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
02797     this->state_ = e_blocks;
02798     ++(this->block_idx_);
02799     return cnt;
02800 }
02801 
02802 
02803 
02804 template<class BV>
02805 unsigned operation_deserializer<BV>::deserialize(
02806                                         bvector_type&        bv, 
02807                                         const unsigned char* buf, 
02808                                         bm::word_t*          temp_block,
02809                                         set_operation        op)
02810 {
02811     ByteOrder bo_current = globals<true>::byte_order();
02812 
02813     bm::decoder dec(buf);
02814     unsigned char header_flag = dec.get_8();
02815     ByteOrder bo = bo_current;
02816     if (!(header_flag & BM_HM_NO_BO))
02817     {
02818         bo = (bm::ByteOrder) dec.get_8();
02819     }
02820 
02821     blocks_manager_type& bman = bv.get_blocks_manager();
02822     bit_block_guard<blocks_manager_type> bg(bman);
02823     if (temp_block == 0)
02824     {
02825         temp_block = bg.allocate();
02826     }
02827 
02828     if (bo_current == bo)
02829     {
02830         serial_stream_current ss(buf);
02831         return 
02832             iterator_deserializer<BV, serial_stream_current>::
02833                 deserialize(bv, ss, temp_block, op);
02834     }
02835     switch (bo_current) 
02836     {
02837     case BigEndian:
02838         {
02839         serial_stream_be ss(buf);
02840         return 
02841             iterator_deserializer<BV, serial_stream_be>::
02842                 deserialize(bv, ss, temp_block, op);
02843         }
02844     case LittleEndian:
02845         {
02846         serial_stream_le ss(buf);
02847         return 
02848             iterator_deserializer<BV, serial_stream_le>::
02849                 deserialize(bv, ss, temp_block, op);
02850         }
02851     default:
02852         BM_ASSERT(0);
02853     };
02854     return 0;
02855 }
02856 
02857 
02858 template<class BV, class SerialIterator>
02859 void iterator_deserializer<BV, SerialIterator>::load_id_list(
02860                                             bvector_type&         bv, 
02861                                             serial_iterator_type& sit,
02862                                             unsigned              id_count,
02863                                             bool                  set_clear)
02864 {
02865     const unsigned win_size = 64;
02866     bm::id_t id_buffer[win_size+1];
02867 
02868     if (set_clear)  // set bits
02869     {
02870         for (unsigned i = 0; i <= id_count;)
02871         {
02872             unsigned j;
02873             for (j = 0; j < win_size && i <= id_count; ++j, ++i) 
02874             {
02875                 id_buffer[j] = sit.get_id();
02876                 sit.next();
02877             } // for j
02878             bm::combine_or(bv, id_buffer, id_buffer + j);
02879         } // for i
02880     } 
02881     else // clear bits
02882     {
02883         for (unsigned i = 0; i <= id_count;)
02884         {
02885             unsigned j;
02886             for (j = 0; j < win_size && i <= id_count; ++j, ++i) 
02887             {
02888                 id_buffer[j] = sit.get_id();
02889                 sit.next();
02890             } // for j
02891             bm::combine_sub(bv, id_buffer, id_buffer + j);
02892         } // for i
02893     }
02894 }
02895 
02896 
02897 template<class BV, class SerialIterator>
02898 unsigned 
02899 iterator_deserializer<BV, SerialIterator>::deserialize(
02900                                        bvector_type&         bv, 
02901                                        serial_iterator_type& sit, 
02902                                        bm::word_t*           temp_block,
02903                                        set_operation         op)
02904 {
02905     BM_ASSERT(temp_block);
02906 
02907     unsigned count = 0;
02908     gap_word_t   gap_temp_block[bm::gap_equiv_len*3];
02909     gap_temp_block[0] = 0;
02910 
02911     blocks_manager_type& bman = bv.get_blocks_manager();
02912 
02913     bv.forget_count();
02914 
02915     if (sit.bv_size() && (sit.bv_size() > bv.size())) 
02916     {
02917         bv.resize(sit.bv_size());
02918     }
02919 
02920     BM_SET_MMX_GUARD
02921 
02922     typename serial_iterator_type::iterator_state state;
02923     state = sit.get_state();
02924     if (state == serial_iterator_type::e_list_ids)
02925     {
02926         unsigned id_count = sit.get_id_count();
02927         bool set_clear = true;
02928         switch (op)
02929         {
02930         case set_AND:
02931             {
02932             // TODO: use some more optimal algorithm without temp vector
02933             BV bv_tmp(BM_GAP);
02934             load_id_list(bv_tmp, sit, id_count, true);
02935             bv &= bv_tmp;
02936             }
02937             break;
02938         case set_ASSIGN:
02939             bv.clear(true);
02940             // intentional case fall through here (not a bug)
02941         case set_OR:
02942             set_clear = true;
02943             // fall to SUB
02944         case set_SUB:
02945             load_id_list(bv, sit, id_count, set_clear);
02946             break;
02947         case set_XOR:
02948             for (unsigned i = 0; i < id_count; ++i)
02949             {
02950                 bm::id_t id = sit.get_id();
02951                 bv[id] ^= true;
02952                 sit.next();
02953             } // for
02954             break;
02955         case set_COUNT: case set_COUNT_B:
02956             for (unsigned i = 0; i < id_count; ++i)
02957             {
02958                 /* bm::id_t id = */ sit.get_id();
02959                 ++count;
02960                 sit.next();
02961             } // for
02962             break;
02963         case set_COUNT_A:
02964             return bv.count();
02965         case set_COUNT_AND:
02966             for (unsigned i = 0; i < id_count; ++i)
02967             {
02968                 bm::id_t id = sit.get_id();
02969                 count += bv.get_bit(id);
02970                 sit.next();
02971             } // for
02972             break;
02973         case set_COUNT_XOR:
02974             {
02975             // TODO: get rid of the temp vector
02976             BV bv_tmp(BM_GAP);
02977             load_id_list(bv_tmp, sit, id_count, true);
02978             count += count_xor(bv, bv_tmp);
02979             }
02980             break;
02981         case set_COUNT_OR:
02982             {
02983             // TODO: get rid of the temp. vector
02984             BV bv_tmp(BM_GAP);
02985             load_id_list(bv_tmp, sit, id_count, true);
02986             count += count_or(bv, bv_tmp);
02987             }
02988             break;
02989         case set_COUNT_SUB_AB:
02990             {
02991             // TODO: get rid of the temp. vector
02992             BV bv_tmp(bv);
02993             load_id_list(bv_tmp, sit, id_count, false);
02994             count += bv_tmp.count();
02995             }
02996             break;
02997         default:
02998             BM_ASSERT(0);
02999         } // switch
03000         return count;
03001     } // state
03002 
03003     unsigned bv_block_idx = 0;
03004 
03005     for (;1;)
03006     {
03007         bm::set_operation sop = op;
03008         if (sit.is_eof()) // argument stream ended
03009         {
03010             // deserialization finalization
03011             switch (op)
03012             {
03013             case set_OR:    case set_SUB:     case set_XOR:
03014             case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
03015             case set_COUNT_SUB_BA:
03016                 // nothing to do
03017                 break;
03018             case set_AND: case set_ASSIGN:
03019                 // clear the rest of the target vector
03020                 {
03021                 unsigned i, j;
03022                 bman.get_block_coord(bv_block_idx, &i, &j);
03023                 bm::word_t*** blk_root = bman.get_rootblock();
03024                 unsigned effective_top_size = 
03025                     bman.effective_top_block_size();
03026                 for (;i < effective_top_size; ++i) 
03027                 {
03028                     bm::word_t** blk_blk = blk_root[i];
03029                     if (blk_blk == 0) 
03030                     {
03031                         bv_block_idx+=bm::set_array_size-j;
03032                         j = 0;
03033                         continue;
03034                     }
03035                     for (;j < bm::set_array_size; ++j, ++bv_block_idx)
03036                     {
03037                         if (blk_blk[j])
03038                             bman.zero_block(bv_block_idx);
03039                     } // for j
03040                     j = 0;
03041                 } // for i
03042 
03043                 }
03044                 break;
03045             case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
03046             case set_COUNT_SUB_AB:
03047                 // count bits in the target vector
03048                 {
03049                 unsigned i, j;
03050                 bman.get_block_coord(bv_block_idx, &i, &j);
03051                 bm::word_t*** blk_root = bman.get_rootblock();
03052                 unsigned effective_top_size = 
03053                     bman.effective_top_block_size();
03054                 for (;i < effective_top_size; ++i) 
03055                 {
03056                     bm::word_t** blk_blk = blk_root[i];
03057                     if (blk_blk == 0) 
03058                     {
03059                         bv_block_idx+=bm::set_array_size-j;
03060                         j = 0;
03061                         continue;
03062                     }
03063                     for (;j < bm::set_array_size; ++j, ++bv_block_idx)
03064                     {
03065                         if (blk_blk[j])
03066                             count += bman.block_bitcount(blk_blk[j], bv_block_idx);
03067                     } // for j
03068                     j = 0;
03069                 } // for i
03070 
03071                 }
03072                 break;
03073             default:
03074                 BM_ASSERT(0);
03075             }
03076             return count;
03077         }
03078 
03079         state = sit.state();
03080         switch (state)
03081         {
03082         case serial_iterator_type::e_blocks:
03083             sit.next();
03084             continue;
03085         case serial_iterator_type::e_bit_block:
03086             {
03087             BM_ASSERT(sit.block_idx() == bv_block_idx);
03088             bm::word_t* blk = bman.get_block(bv_block_idx);
03089 
03090             if (!blk) 
03091             {
03092                 switch (op)
03093                 {
03094                 case set_AND:          case set_SUB: case set_COUNT_AND:
03095                 case set_COUNT_SUB_AB: case set_COUNT_A:
03096                     // one arg is 0, so the operation gives us zero
03097                     // all we need to do is to seek the input stream
03098                     sop = set_ASSIGN;
03099                     break;
03100 
03101                 case set_OR: case set_XOR: case set_ASSIGN:
03102                     blk = bman.make_bit_block(bv_block_idx);
03103                     break;
03104 
03105                 case set_COUNT:        case set_COUNT_XOR: case set_COUNT_OR:
03106                 case set_COUNT_SUB_BA: case set_COUNT_B:
03107                     // first arg is not required (should work as is)
03108                     sop = set_COUNT;
03109                     break;
03110 
03111                 case set_END:
03112                 default:
03113                     BM_ASSERT(0);
03114                 }
03115             }
03116             else // block exists
03117             {
03118                 int is_gap = BM_IS_GAP(bman, blk, bv_block_idx);
03119                 if (is_gap || IS_FULL_BLOCK(blk))
03120                 {
03121                     if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
03122                     {
03123                     }
03124                     else
03125                     {
03126                         // TODO: make sure const operations do not 
03127                         // deoptimize GAP blocks
03128                         blk = bman.deoptimize_block(bv_block_idx);
03129                     }
03130                 }
03131             }
03132 
03133             // 2 bit blocks recombination
03134             unsigned c = sit.get_bit_block(blk, temp_block, sop);
03135             count += c;
03136             }
03137             break;
03138 
03139         case serial_iterator_type::e_zero_blocks:
03140             {
03141             BM_ASSERT(bv_block_idx == sit.block_idx());
03142             bm::word_t* blk = bman.get_block(bv_block_idx);
03143             sit.next();
03144 
03145             if (blk)
03146             {
03147                 switch (op)
03148                 {
03149                 case set_AND: case set_ASSIGN:
03150                     // the result is 0
03151                     blk = bman.zero_block(bv_block_idx);
03152                     break;
03153 
03154                 case set_SUB: case set_COUNT_AND:    case set_OR:
03155                 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
03156                     // nothing to do
03157                     break;
03158                 
03159                 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
03160                 case set_COUNT:        case set_COUNT_XOR:
03161                     // valid bit block recombined with 0 block
03162                     // results with same block data
03163                     // all we need is to bitcount bv block
03164                     {
03165                     unsigned c = bman.block_bitcount(blk, bv_block_idx);
03166                     count += c;
03167                     }
03168                     break;
03169                 case set_END:
03170                 default:
03171                     BM_ASSERT(0);
03172                 } // switch op
03173             } // if blk
03174             }
03175             break;
03176 
03177         case serial_iterator_type::e_one_blocks:
03178             {
03179             BM_ASSERT(bv_block_idx == sit.block_idx());
03180             bm::word_t* blk = bman.get_block(bv_block_idx);
03181             sit.next();
03182 
03183             switch (op)
03184             {
03185             case set_OR: case set_ASSIGN:
03186                 bman.set_block_all_set(bv_block_idx);
03187                 break;
03188             case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
03189                 // block becomes all set
03190                 count += bm::bits_in_block;
03191                 break;
03192             case set_SUB:
03193                 blk = bman.zero_block(bv_block_idx);
03194                 break;
03195             case set_COUNT_SUB_AB: case set_AND:
03196                 // nothing to do
03197                 break;
03198             case set_COUNT_AND: case set_COUNT_A:
03199                 count += bman.block_bitcount(blk, bv_block_idx);
03200                 break;
03201             default:
03202                 if (blk)
03203                 {
03204                     switch (op)
03205                     {
03206                     case set_XOR:
03207                         blk = bman.deoptimize_block(bv_block_idx);
03208                         bit_block_xor(blk, FULL_BLOCK_ADDR);
03209                         break;
03210                     case set_COUNT_XOR:
03211                         {
03212                         int is_gap = BM_IS_GAP(bman, blk, bv_block_idx);
03213                         count += 
03214                             combine_count_operation_with_block(
03215                                                 blk,
03216                                                 is_gap,
03217                                                 FULL_BLOCK_ADDR,
03218                                                 0,
03219                                                 temp_block,
03220                                                 bm::COUNT_XOR);
03221                         }
03222                         break;
03223                     case set_COUNT_SUB_BA:
03224                         {
03225                         int is_gap = BM_IS_GAP(bman, blk, bv_block_idx);
03226                         count += 
03227                             combine_count_operation_with_block(
03228                                                 blk,
03229                                                 is_gap,
03230                                                 FULL_BLOCK_ADDR,
03231                                                 0,
03232                                                 temp_block,
03233                                                 bm::COUNT_SUB_BA);
03234                         }
03235                         break;
03236                     default:
03237                         BM_ASSERT(0);
03238                     } // switch
03239                 }
03240                 else // blk == 0 
03241                 {
03242                     switch (op)
03243                     {
03244                     case set_XOR:
03245                         // 0 XOR 1 = 1
03246                         bman.set_block_all_set(bv_block_idx);
03247                         break;
03248                     case set_COUNT_XOR:
03249                         count += bm::bits_in_block;
03250                         break;
03251                     case set_COUNT_SUB_BA:
03252                         // 1 - 0 = 1
03253                         count += bm::bits_in_block;
03254                         break;
03255                     default:
03256                         break;
03257                     } // switch
03258                 } // else
03259             } // switch
03260 
03261             }
03262             break;
03263 
03264         case serial_iterator_type::e_gap_block:
03265             {
03266             BM_ASSERT(bv_block_idx == sit.block_idx());
03267             bm::word_t* blk = bman.get_block(bv_block_idx);
03268 
03269             sit.get_gap_block(gap_temp_block);
03270             // gap_word_t   gap_head = gap_temp_block[0];
03271 
03272             unsigned len = gap_length(gap_temp_block);
03273             int level = gap_calc_level(len, bman.glen());
03274             --len;
03275 
03276             bool const_op = is_const_set_operation(op);
03277             if (const_op)
03278             {
03279                 distance_metric metric = operation2metric(op);
03280                 int is_gap = blk ? BM_IS_GAP(bman, blk, bv_block_idx) : 0;
03281                 unsigned c = 
03282                     combine_count_operation_with_block(
03283                                         blk,
03284                                         is_gap,
03285                                         (bm::word_t*)gap_temp_block,
03286                                         1, // gap
03287                                         temp_block,
03288                                         metric);
03289                 count += c;
03290             }
03291             else // non-const set operation
03292             {
03293                 if ((sop == set_ASSIGN) && blk) // target block override
03294                 {
03295                     blk = bman.zero_block(bv_block_idx);
03296                     sop = set_OR;
03297                 }
03298                 if (blk == 0) // target block not found
03299                 {
03300                     switch (sop)
03301                     {
03302                     case set_AND: case set_SUB:
03303                         break;
03304                     case set_OR: case set_XOR: case set_ASSIGN:
03305                         if (level == -1) // too big to be GAP: convert to BIT
03306                         {
03307                             blk = bman.get_allocator().alloc_bit_block();
03308                             bman.set_block(bv_block_idx, blk);
03309                             gap_convert_to_bitset(blk, gap_temp_block);
03310                         }
03311                         else // GAP block fits
03312                         {
03313                             gap_word_t* gap_blk = 
03314                                 bman.get_allocator().alloc_gap_block(
03315                                                         level, bman.glen());
03316                             gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
03317                             ::memcpy(gap_blk_ptr, 
03318                                      gap_temp_block, 
03319                                      gap_length(gap_temp_block) 
03320                                                  * sizeof(gap_word_t));
03321                             set_gap_level(gap_blk_ptr, level);
03322                             bman.set_block(bv_block_idx, 
03323                                           (bm::word_t*)gap_blk, true /*GAP*/);
03324                         }
03325                         break;
03326                     default:
03327                         BM_ASSERT(0);
03328                     } // switch
03329                 }
03330                 else  // target block exists
03331                 {
03332                     bm::operation bop = bm::setop2op(op);
03333                     if (level == -1) // too big to GAP
03334                     {
03335                         gap_convert_to_bitset(temp_block, gap_temp_block);
03336                         bv.combine_operation_with_block(bv_block_idx, 
03337                                                         temp_block, 
03338                                                         0, // BIT
03339                                                         bop);
03340                     }
03341                     else // GAP fits
03342                     {
03343                         set_gap_level(gap_temp_block, level);
03344                         bv.combine_operation_with_block(
03345                                                 bv_block_idx, 
03346                                                 (bm::word_t*)gap_temp_block, 
03347                                                 1,  // GAP
03348                                                 bop);
03349                     }
03350                 }
03351             }
03352             }
03353             break;
03354 
03355         default:
03356             BM_ASSERT(0);
03357         } // switch
03358 
03359         ++bv_block_idx;
03360 
03361     } // for (deserialization)
03362 
03363     return count;
03364 }
03365 
03366 
03367 
03368 
03369 } // namespace bm
03370 
03371 #include "bmundef.h"
03372 
03373 #ifdef _MSC_VER
03374 #pragma warning( default : 4311 4312)
03375 #endif
03376 
03377 
03378 #endif
03379 
03380 

Generated on Sun Dec 6 22:15:48 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:49 2009 by modify_doxy.py rev. 173732