1#ifndef BMSERIAL__H__INCLUDED__
2#define BMSERIAL__H__INCLUDED__
33#ifndef BM__H__INCLUDED__
36# error missing include (bm.h or bm64.h)
41#pragma warning( push )
42#pragma warning( disable : 4311 4312 4127)
85 typedef byte_buffer<allocator_type>
buffer;
131 {
return compression_level_; }
156 unsigned char* buf,
size_t buf_size);
194 {
return compression_stat_; }
381 typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
382 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
386 unsigned bit_model_d0_size_;
387 unsigned bit_model_0run_size_;
388 block_arridx_type bit_idx_arr_;
396 bool byte_order_serial_;
402 unsigned compression_level_;
403 bool own_temp_block_;
407 allocator_pool_type pool_;
423template<
typename DEC,
typename BLOCK_IDX>
488template<
class BV,
class DEC>
513 const unsigned char* buf,
607template<
class BV,
class SerialIterator>
626 bool exit_on_one =
false);
629 typedef typename BV::blocks_manager_type blocks_manager_type;
641 size_type finalize_target_vector(blocks_manager_type& bman,
652 {
return "BM::de-serialization format error"; }
654 bool is_range_set_ =
false;
666template<
class DEC,
typename BLOCK_IDX>
847 const unsigned char* buf,
849 bool exit_on_one =
false);
859 const unsigned char* buf,
881 const unsigned char* buf,
884 bool exit_on_one =
false)
895 const unsigned char* buf,
908 { ref_vect_ = ref_vect; }
912 const unsigned char* buf,
921 const unsigned char* buf,
928 typename BV::blocks_manager_type blocks_manager_type;
1045 compression_stat_(0),
1047 byte_order_serial_(true),
1048 sb_bookmarks_(false),
1056 if (temp_block == 0)
1058 temp_block_ = alloc_.alloc_bit_block();
1059 own_temp_block_ =
true;
1063 temp_block_ = temp_block;
1064 own_temp_block_ =
false;
1066 compression_stat_ = (
size_type*) alloc_.alloc_bit_block();
1067 optimize_ = free_ =
false;
1073 compression_stat_(0),
1075 byte_order_serial_(true),
1076 sb_bookmarks_(false),
1084 if (temp_block == 0)
1086 temp_block_ = alloc_.alloc_bit_block();
1087 own_temp_block_ =
true;
1091 temp_block_ = temp_block;
1092 own_temp_block_ =
false;
1094 compression_stat_ = (
size_type*) alloc_.alloc_bit_block();
1095 optimize_ = free_ =
false;
1101 if (own_temp_block_)
1102 alloc_.free_bit_block(temp_block_);
1103 if (compression_stat_)
1104 alloc_.free_bit_block((
bm::word_t*)compression_stat_);
1106 alloc_.free_bit_block(xor_block_);
1113 for (
unsigned i = 0; i < 256; ++i)
1114 compression_stat_[i] = 0;
1122 compression_level_ = clevel;
1128 gap_serial_ = value;
1134 byte_order_serial_ = value;
1140 sb_bookmarks_ = enable;
1143 if (bm_interval > 512)
1146 if (bm_interval < 4)
1149 sb_range_ = bm_interval;
1155 ref_vect_ = ref_vect;
1156 xor_scan_.set_ref_vector(ref_vect);
1158 xor_block_ = alloc_.alloc_bit_block();
1172 unsigned char header_flag = 0;
1178 if (!byte_order_serial_)
1194 enc.put_8(header_flag);
1196 if (byte_order_serial_)
1199 enc.put_8((
unsigned char)bo);
1211 enc.put_64(bv.size());
1213 enc.put_32(bv.size());
1237 if (tail_delta < 256)
1243 enc.put_8((
unsigned char)min_v);
1247 if (tail_delta < 256)
1248 enc.put_8((
unsigned char)tail_delta);
1250 enc.put_16(tail_delta);
1259 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1262 enc.set_pos(enc_pos0);
1273 enc.put_16(gap_block, len-1);
1284 if (len > 3 && (compression_level_ > 3))
1292 enc.put_16(gap_block[0]);
1299 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1302 enc.set_pos(enc_pos0);
1313 enc.put_16(gap_block, len-1);
1326 if (compression_level_ > 3 && arr_len > 1)
1332 bout.
gamma(arr_len);
1334 bout.
gamma(prev + 1);
1336 for (
unsigned i = 1; i < arr_len; ++i)
1339 bout.
gamma(curr - prev);
1344 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1346 if (gamma_size >= plain_size)
1348 enc.set_pos(enc_pos0);
1352 compression_stat_[scode]++;
1358 enc.put_prefixed_array_16(scode, gap_array, arr_len,
true);
1359 compression_stat_[scode]++;
1387 bout.
gamma(arr_len-4);
1392 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1394 if (enc_size >= raw_size)
1396 enc.set_pos(enc_pos0);
1400 compression_stat_[scode]++;
1406 enc.put_prefixed_array_16(scode, gap_block, arr_len,
true);
1407 compression_stat_[scode]++;
1427 if (min_v >= 256 && tail >= 256)
1429 interpolated_gap_array_v0(gap_block, arr_len, enc, inverted);
1447 arr_len |= (1 << 1);
1451 enc.put_8((
unsigned char)min_v);
1456 enc.put_8((
unsigned char)tail);
1466 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1468 if (enc_size >= raw_size)
1470 enc.set_pos(enc_pos0);
1474 compression_stat_[scode]++;
1480 enc.put_prefixed_array_16(scode, gap_block, arr_len,
true);
1481 compression_stat_[scode]++;
1490 scores_[mod_size_] = score; models_[mod_size_] = mod;
1498 unsigned bc, bit_gaps;
1512 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc *
sizeof(
bm::word_t)));
1534 unsigned arr_size_inv =
1539 const unsigned bie_bits_per_int = 4;
1543 32 + (bit_gaps-1) * bie_bits_per_int);
1562 bit_gaps -= bit_gaps > 2 ? 2 : 0;
1571 for (
unsigned i = 0; i < mod_size_; ++i)
1573 if (scores_[i] < min_score)
1575 min_score = scores_[i];
1588 if (compression_level_ >= 5)
1589 return find_bit_best_encoding_l5(block);
1591 unsigned bc, bit_gaps;
1598 if (compression_level_ <= 1)
1603 if (compression_level_ <= 5)
1606 if (compression_level_ >= 2)
1615 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc *
sizeof(
bm::word_t)));
1619 if (compression_level_ >= 4)
1642 if (compression_level_ >= 3)
1646 unsigned arr_size_inv =
1652 if (compression_level_ >= 4)
1654 const unsigned gamma_bits_per_int = 6;
1657 if (compression_level_ == 4)
1661 16 + (bit_gaps-1) * gamma_bits_per_int);
1664 16 + bc * gamma_bits_per_int);
1665 if (inverted_bc > 3 && inverted_bc < bit_gaps && inverted_bc <
bm::gap_equiv_len)
1667 16 + inverted_bc * gamma_bits_per_int);
1677 for (
unsigned i = 0; i < mod_size_; ++i)
1679 if (scores_[i] < min_score)
1681 min_score = scores_[i];
1695 if (compression_level_ <= 2)
1705 if (compression_level_ < 4)
1707 if (compression_level_ == 4)
1712 if (inverted_bc < len)
1714 if (compression_level_ < 4)
1716 if (compression_level_ == 4)
1725 if (compression_level_ == 4)
1739 bool invert =
false;
1741 unsigned char enc_choice = find_gap_best_encoding(gap_block);
1745 gamma_gap_block(gap_block, enc);
1754 enc.
put_16(gap_temp_block[0]);
1771 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
1774 interpolated_encode_gap_block(gap_block, enc);
1786 interpolated_gap_array(gap_temp_block, arr_len, enc, invert);
1789 gamma_gap_block(gap_block, enc);
1800 enc.put_8((blk[0]==0) ? 0 : 1);
1836 enc.put_32(blk + i, j - i);
1854 if (bit_model_0run_size_ < bit_model_d0_size_)
1856 encode_bit_interval(block, enc, 0);
1875 enc.put_32(block[off+j+0]);
1876 enc.put_32(block[off+j+1]);
1877 enc.put_32(block[off+j+2]);
1878 enc.put_32(block[off+j+3]);
1891 encode_bit_interval(block, enc, 0);
1910 bv.calc_stat(&stat);
1915 optimize_ = free_ =
false;
1936 bv.optimize(temp_block_, BV::opt_compress, &st);
1939 optimize_ = free_ =
false;
1948 unsigned mask = inverted ? ~0u : 0u;
1957 unsigned char scode =
1959 enc.put_prefixed_array_16(scode, bit_idx_arr_.data(), arr_len,
true);
1960 compression_stat_[scode]++;
1963 encode_bit_digest(block, enc, digest0_);
1972 gamma_gap_block(bit_idx_arr_.data(), enc);
1980 unsigned mask = inverted ? ~0u : 0u;
1988 gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2000 unsigned mask = inverted ? ~0u : 0u;
2008 interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2011 encode_bit_digest(block, enc, digest0_);
2020 interpolated_encode_gap_block(bit_idx_arr_.data(), enc);
2046 enc.put_8((
unsigned char)head);
2053 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2055 if (enc_size >= raw_size)
2057 enc.set_pos(enc_pos0);
2061 compression_stat_[scode]++;
2064 encode_bit_digest(block, enc, digest0_);
2077 unsigned mask = inverted ? ~0u : 0u;
2085 unsigned char scode =
2104 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2106 if (enc_size >= raw_size)
2108 enc.set_pos(enc_pos0);
2112 if (digest0_ != ~0ull && enc_size > bit_model_d0_size_)
2114 enc.set_pos(enc_pos0);
2118 compression_stat_[scode]++;
2123 encode_bit_digest(block, enc, digest0_);
2128#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \
2130 enc.put_8(B_1ZERO); \
2131 else if (nb < 256u) \
2133 enc.put_8(B_8ZERO); \
2134 enc.put_8((unsigned char)nb); \
2136 else if (nb < 65536u) \
2138 enc.put_8(B_16ZERO); \
2139 enc.put_16((unsigned short)nb); \
2141 else if (nb < bm::id_max32) \
2143 enc.put_8(B_32ZERO); \
2144 enc.put_32(unsigned(nb)); \
2148 enc.put_8(B_64ZERO); \
2161 if (bookm.ptr_ && nb_delta >= bookm.nb_range_)
2163 unsigned char* curr = enc.get_pos();
2164 size_t bytes_delta = size_t(curr - bookm.ptr_);
2165 if (bytes_delta > bookm.min_bytes_range_)
2167 enc.set_pos(bookm.ptr_);
2168 switch (bookm.bm_type_)
2171 bytes_delta -=
sizeof(unsigned);
2172 if (bytes_delta < 0xFFFFFFFF)
2173 enc.put_32(
unsigned(bytes_delta));
2177 bytes_delta -= (
sizeof(unsigned)-1);
2178 if (bytes_delta < 0xFFFFFF)
2179 enc.put_24(
unsigned(bytes_delta));
2182 bytes_delta -=
sizeof(
unsigned short);
2183 if (bytes_delta < 0xFFFF)
2184 enc.put_16((
unsigned short)bytes_delta);
2193 if (nb_delta < 0xFF)
2196 enc.put_8((
unsigned char) nb_delta);
2199 if (nb_delta < 0xFFFF)
2202 enc.put_16((
unsigned short) nb_delta);
2205 if (nb_delta < 0xFFFFFF)
2208 enc.put_24(
unsigned(nb_delta));
2214 enc.put_32(
unsigned(nb_delta));
2219 if (nb_delta < 0xFFFFFFFFFFFFUL)
2222 enc.put_48(nb_delta);
2227 enc.put_64(nb_delta);
2239 bookm.ptr_ = enc.get_pos() + 1;
2240 switch (bookm.bm_type_)
2265 unsigned char* buf,
size_t buf_size)
2269 reset_compression_stats();
2273 encode_header(bv, enc);
2288 process_bookmark(i, sb_bookmark, enc);
2291 const bm::word_t* blk = bman.get_block(i0, j0);
2312 if (nb > 1 && nb < 128)
2315 unsigned char c = (
unsigned char)((1u << 7) | nb);
2342 bm::word_t*** blk_root = bman.top_blocks_root();
2349 const bm::word_t* blk_next = bman.get_block(i0, j0);
2382 bool found = xor_scan_.search_best_xor_gap(blk,
2383 ref_idx_+1, ref_vect_->size(),
2392 size_type ridx = xor_scan_.found_ridx();
2393 size_type plain_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2402 unsigned delta = glen - res_len;
2405 if (plain_idx < 256)
2408 enc.
put_8((
unsigned char) plain_idx);
2412 if (plain_idx < 65536)
2415 enc.
put_16((
unsigned short) plain_idx);
2420 enc.
put_32(
unsigned(plain_idx));
2423 encode_gap_block(tmp_buf, enc);
2437 xor_scan_.compute_x_block_stats(blk);
2439 xor_scan_.search_best_xor_mask(blk,
2440 ref_idx_+1, ref_vect_->size(),
2445 size_type ridx = xor_scan_.found_ridx();
2446 if (xor_scan_.is_eq_found())
2448 size_type row_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2450 enc.
put_32(
unsigned(row_idx));
2454 found = xor_scan_.validate_found(xor_block_, blk);
2459 size_type plain_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2461 if (plain_idx < 256)
2464 enc.
put_8((
unsigned char) plain_idx);
2468 if (plain_idx < 65536)
2471 enc.
put_16((
unsigned short) plain_idx);
2476 enc.
put_32(
unsigned(plain_idx));
2491 unsigned char model = find_bit_best_encoding(blk);
2499 unsigned bit_idx = 0;
2512 encode_bit_array(blk, enc,
false);
2515 encode_bit_array(blk, enc,
true);
2518 gamma_gap_bit_block(blk, enc);
2521 encode_bit_interval(blk, enc, 0);
2524 gamma_arr_bit_block(blk, enc,
false);
2527 gamma_arr_bit_block(blk, enc,
true);
2530 bienc_arr_bit_block(blk, enc,
false);
2533 bienc_arr_bit_block(blk, enc,
true);
2536 interpolated_arr_bit_block(blk, enc,
false);
2539 interpolated_arr_bit_block(blk, enc,
true);
2542 interpolated_gap_bit_block(blk, enc);
2545 bienc_gap_bit_block(blk, enc);
2548 encode_bit_digest(blk, enc, digest0_);
2689 const unsigned char* buf,
2696 unsigned char header_flag = dec.
get_8();
2703 if (bo_current == bo)
2752 const unsigned char* buf,
2753 typename BV::size_type from,
2754 typename BV::size_type to,
2760 unsigned char header_flag = dec.
get_8();
2767 if (bo_current == bo)
2798 bv.keep_range(from, to);
2802template<
typename DEC,
typename BLOCK_IDX>
2805 unsigned block_type,
2830 if (k == 0) --bit_idx;
2833 dst_arr[k] = bit_idx;
2846 dst_arr[len-1] = max_v;
2870 dst_arr[len-1] = max_v;
2878 throw std::logic_error(err_msg());
2880 BM_THROW(BM_ERR_SERIALFORMAT);
2886template<
typename DEC,
typename BLOCK_IDX>
2895 unsigned arr_len = dec.get_16();
2909template<
typename DEC,
typename BLOCK_IDX>
2916 this->read_bic_arr(
decoder, blk);
2920template<
typename DEC,
typename BLOCK_IDX>
2927 unsigned arr_len = dec.get_16();
2932 id_array_[0] = head;
2933 id_array_[1] = min_v;
2934 id_array_[arr_len] = 65535;
2944template<
typename DEC,
typename BLOCK_IDX>
2972 block[off+j+0] |= dec.get_32();
2973 block[off+j+1] |= dec.get_32();
2974 block[off+j+2] |= dec.get_32();
2975 block[off+j+3] |= dec.get_32();
2984template<
typename DEC,
typename BLOCK_IDX>
2992 unsigned char run_type = dec.get_8();
2995 unsigned run_length = dec.get_16();
2998 unsigned run_end = j + run_length;
3000 for (;j < run_end; ++j)
3002 unsigned w = dec.get_32();
3014template<
typename DEC,
typename BLOCK_IDX>
3017 unsigned block_type,
3028 *dst_block = gap_head;
3059 unsigned arr_len = read_id_list(
decoder, block_type, id_array_);
3069 unsigned len = (gap_head >> 3);
3072 *dst_block = gap_head;
3078 for (
unsigned i = 1; i < len; ++i)
3082 *(++gap_data_ptr) = gap_sum;
3089 unsigned len = (gap_head >> 3);
3090 *dst_block = gap_head;
3092 dst_block[1] = min_v;
3101 unsigned len = (gap_head >> 3);
3115 dst_block[0] = gap_head;
3116 dst_block[1] = min_v;
3119 dst_block[len-1] = max_v;
3126 throw std::logic_error(err_msg());
3128 BM_THROW(BM_ERR_SERIALFORMAT);
3141template<
typename DEC,
typename BLOCK_IDX>
3152 if (save_pos > skip_pos_)
3201 if (nb_sync <= expect_nb)
3215template<
class BV,
class DEC>
3230template<
class BV,
class DEC>
3233 alloc_.free_bit_block(temp_block_);
3235 alloc_.free_bit_block(xor_block_);
3239template<
class BV,
class DEC>
3242 ref_vect_ = ref_vect;
3243 if (ref_vect_ && !xor_block_)
3244 xor_block_ = alloc_.alloc_bit_block();
3247template<
class BV,
class DEC>
3257 bool inv_flag =
false;
3266 (
sizeof(
gap_word_t) == 2 ? dec.get_16() : dec.get_32());
3273 *gap_temp_block = gap_head;
3274 dec.get_16(gap_temp_block+1, len - 1);
3279 blk = bman.get_allocator().alloc_bit_block();
3280 bman.set_block(nb, blk);
3287 bv.combine_operation_with_block(nb,
3301 bman.get_allocator().alloc_gap_block(
unsigned(level), bman.glen());
3303 *gap_blk_ptr = gap_head;
3308 dec.get_16(gap_blk + 1, len - 1);
3314 *gap_temp_block = gap_head;
3315 dec.get_16(gap_temp_block + 1, len - 1);
3332 unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
3333 gap_temp_block[0] = 0;
3344 bv.combine_operation_with_block(nb,
3353 gap_head = dec.get_16();
3360 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3365 gap_head = dec.get_16();
3366 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3371 throw std::logic_error(this->err_msg());
3373 BM_THROW(BM_ERR_SERIALFORMAT);
3377 bv.combine_operation_with_block(nb,
3383template<
class BV,
class DEC>
3392 blk = bman.get_allocator().alloc_bit_block();
3393 bman.set_block(nb, blk);
3398 blk = bman.deoptimize_block(nb);
3406 blk = bman.deoptimize_block(nb);
3410 for (
unsigned k = 0; k < len; ++k)
3419 this->read_bic_arr(dec, blk);
3424 blk = bman.deoptimize_block(nb);
3427 this->read_bic_arr(dec, temp_block_);
3432 this->read_bic_gap(dec, blk);
3435 this->read_digest0_block(dec, blk);
3440 throw std::logic_error(this->err_msg());
3442 BM_THROW(BM_ERR_SERIALFORMAT);
3447template<
class BV,
class DEC>
3456 blk = bman.get_allocator().alloc_bit_block();
3457 bman.set_block(nb, blk);
3463 bv.combine_operation_with_block(nb, temp_block_, 0,
BM_OR);
3467template<
class BV,
class DEC>
3473 unsigned head_idx = dec.get_16();
3474 unsigned tail_idx = dec.get_16();
3478 blk = bman.get_allocator().alloc_bit_block();
3479 bman.set_block(nb, blk);
3480 for (
unsigned k = 0; k < head_idx; ++k)
3482 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
3489 dec.get_32(temp_block_ + head_idx, tail_idx - head_idx + 1);
3490 bv.combine_operation_with_block(nb, temp_block_, 0,
BM_OR);
3494template<
class BV,
class DEC>
3504 blk = bman.deoptimize_block(nb);
3510 blk = bman.get_allocator().alloc_bit_block();
3512 bman.set_block(nb, blk);
3517 for (
unsigned k = 0; k < len; ++k)
3523 for (
unsigned k = 0; k < len; ++k)
3530template<
class BV,
class DEC>
3532 const unsigned char* buf,
3536 if (!bman.is_init())
3542 bv.set_new_blocks_strat(
BM_GAP);
3551 unsigned char header_flag = dec.get_8();
3561 throw std::logic_error(this->err_msg());
3563 BM_THROW(BM_ERR_SERIALFORMAT);
3580 bv_size = dec.get_32();
3581 if (bv_size > bv.size())
3584 for (
unsigned cnt = dec.get_32(); cnt; --cnt)
3590 return dec.size()-1;
3610 throw std::logic_error(this->err_msg());
3612 BM_THROW(BM_ERR_SERIALFORMAT);
3617 bv_size = dec.get_32();
3618 if (bv_size > bv.size())
3625 xor_block_ = alloc_.alloc_bit_block();
3635 bool x_ref_gap =
false;
3640 unsigned char btype;
3654 btype = dec.get_8();
3655 if (btype & (1 << 7))
3657 nb = btype & ~(1 << 7);
3663 bm::word_t* blk = bman.get_block_ptr(i0, j0);
3697 throw std::logic_error(this->err_msg());
3699 BM_THROW(BM_ERR_SERIALFORMAT);
3709 bman.set_block_all_set(i);
3712 full_blocks = dec.get_8();
3713 goto process_full_blocks;
3716 full_blocks = dec.get_16();
3717 goto process_full_blocks;
3720 full_blocks = dec.get_32();
3721 goto process_full_blocks;
3725 full_blocks = dec.get_64();
3726 goto process_full_blocks;
3731 throw std::logic_error(this->err_msg());
3733 BM_THROW(BM_ERR_SERIALFORMAT);
3736 process_full_blocks:
3741 bv.set_range(from, to-1);
3746 decode_block_bit(dec, bv, i, blk);
3752 bv.set_bit_no_check(bit_idx);
3758 this->read_0runs_block(dec, temp_block);
3759 bv.combine_operation_with_block(i, temp_block, 0,
BM_OR);
3763 decode_block_bit_interval(dec, bv, i, blk);
3778 deserialize_gap(btype, dec, bv, bman, i, blk);
3781 decode_arrbit(dec, bv, i, blk);
3787 decode_bit_block(btype, dec, bman, i, blk);
3790 decode_bit_block(btype, dec, bman, i, blk);
3796 this->bookmark_idx_ = i;
3797 this->skip_offset_ = dec.get_32();
3798 goto process_bookmark;
3800 this->bookmark_idx_ = i;
3801 this->skip_offset_ = dec.get_24();
3802 goto process_bookmark;
3804 this->bookmark_idx_ = i;
3805 this->skip_offset_ = dec.get_16();
3809 this->skip_pos_ = dec.get_pos() + this->skip_offset_;
3811 nb_from = this->try_skip(dec, i, nb_from);
3818 nb_sync = dec.get_8();
3819 goto process_nb_sync;
3821 nb_sync = dec.get_16();
3822 goto process_nb_sync;
3824 nb_sync = dec.get_24();
3825 goto process_nb_sync;
3827 nb_sync = dec.get_32();
3828 goto process_nb_sync;
3831 goto process_nb_sync;
3835 BM_ASSERT(i == this->bookmark_idx_ + nb_sync);
3836 if (i != this->bookmark_idx_ + nb_sync)
3839 throw std::logic_error(this->err_msg());
3841 BM_THROW(BM_ERR_SERIALFORMAT);
3851 if (x_ref_d64 || x_ref_gap)
3853 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3854 x_ref_d64 = 0; x_ref_gap =
false;
3857 row_idx = dec.get_32();
3858 size_type idx = ref_vect_->find(row_idx);
3859 if (idx == ref_vect_->not_found())
3867 const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
3869 bv.combine_operation_with_block(i, ref_blk,
3874 if (x_ref_d64 || x_ref_gap)
3876 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3877 x_ref_d64 = 0; x_ref_gap =
false;
3879 row_idx = dec.get_8();
3882 if (x_ref_d64 || x_ref_gap)
3884 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3885 x_ref_d64 = 0; x_ref_gap =
false;
3887 row_idx = dec.get_16();
3891 if (x_ref_d64 || x_ref_gap)
3893 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3894 x_ref_d64 = 0; x_ref_gap =
false;
3896 row_idx = dec.get_32();
3898 x_ref_d64 = dec.get_64();
3901 x_ref_idx = ref_vect_->find(row_idx);
3903 if (x_ref_idx == ref_vect_->not_found())
3910 or_block_ = bman.deoptimize_block(i);
3911 bman.set_block_ptr(i, 0);
3916 if (x_ref_d64 || x_ref_gap)
3918 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3919 x_ref_d64 = 0; x_ref_gap =
false;
3921 row_idx = dec.get_8();
3923 goto process_xor_ref;
3925 if (x_ref_d64 || x_ref_gap)
3927 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3928 x_ref_d64 = 0; x_ref_gap =
false;
3930 row_idx = dec.get_16();
3932 goto process_xor_ref;
3934 if (x_ref_d64 || x_ref_gap)
3936 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3937 x_ref_d64 = 0; x_ref_gap =
false;
3939 row_idx = dec.get_32();
3941 goto process_xor_ref;
3947 throw std::logic_error(this->err_msg());
3949 BM_THROW(BM_ERR_SERIALFORMAT);
3958 if (x_ref_d64 || x_ref_gap)
3959 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3961 bv.set_new_blocks_strat(strat);
3968template<
class BV,
class DEC>
3977 const bvector_type* ref_bv = ref_vect_->get_bv(x_ref_idx);
3983 const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
3994 ref_blk = xor_block_;
4005 blk = bman.check_allocate_block(nb, 0);
4015 alloc_.free_bit_block(or_block_);
4017 bman.optimize_bit_block(i0, j0);
4024template<
typename DEC,
typename BLOCK_IDX>
4027 end_of_stream_(false),
4067 unsigned char header_flag =
decoder_.get_8();
4116 throw std::bad_alloc();
4118 BM_THROW(BM_ERR_BADALLOC);
4124template<
typename DEC,
typename BLOCK_IDX>
4128 ::free(block_idx_arr_);
4132template<
typename DEC,
typename BLOCK_IDX>
4148 end_of_stream_ =
true;
4152 last_id_ = decoder_.get_32();
4159 end_of_stream_ =
true;
4164 block_type_ = decoder_.get_8();
4168 if (block_type_ & (1u << 7u))
4170 mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
4171 state_ = e_zero_blocks;
4175 switch (block_type_)
4179 end_of_stream_ =
true; state_ = e_unknown;
4182 state_ = e_zero_blocks;
4183 mono_block_cnt_ = 0;
4186 state_ = e_zero_blocks;
4187 mono_block_cnt_ = decoder_.get_8()-1;
4190 state_ = e_zero_blocks;
4191 mono_block_cnt_ = decoder_.get_16()-1;
4194 state_ = e_zero_blocks;
4195 mono_block_cnt_ = decoder_.get_32()-1;
4198 state_ = e_one_blocks;
4202 state_ = e_one_blocks;
4203 mono_block_cnt_ = 0;
4206 state_ = e_one_blocks;
4207 mono_block_cnt_ = decoder_.get_8()-1;
4210 state_ = e_one_blocks;
4211 mono_block_cnt_ = decoder_.get_16()-1;
4214 state_ = e_one_blocks;
4215 mono_block_cnt_ = decoder_.get_32()-1;
4218 state_ = e_one_blocks;
4231 state_ = e_bit_block;
4240 gap_head_ = decoder_.get_16();
4259 state_ = e_gap_block;
4262 state_ = e_gap_block;
4268 this->bookmark_idx_ = block_idx_;
4269 this->skip_offset_ = decoder_.get_32();
4270 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4273 this->bookmark_idx_ = block_idx_;
4274 this->skip_offset_ = decoder_.get_24();
4275 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4278 this->bookmark_idx_ = block_idx_;
4279 this->skip_offset_ = decoder_.get_16();
4280 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4284 nb_sync = decoder_.get_8();
4285 goto process_nb_sync;
4287 nb_sync = decoder_.get_16();
4288 goto process_nb_sync;
4290 nb_sync = decoder_.get_24();
4291 goto process_nb_sync;
4293 nb_sync = decoder_.get_32();
4294 goto process_nb_sync;
4297 goto process_nb_sync;
4301 BM_ASSERT(block_idx_ == this->bookmark_idx_ + nb_sync);
4302 if (block_idx_ != this->bookmark_idx_ + nb_sync)
4305 throw std::logic_error(this->err_msg());
4307 BM_THROW(BM_ERR_SERIALFORMAT);
4325 throw std::logic_error(this->err_msg());
4327 BM_THROW(BM_ERR_SERIALFORMAT);
4336 if (!mono_block_cnt_)
4348 throw std::logic_error(this->err_msg());
4350 BM_THROW(BM_ERR_SERIALFORMAT);
4355template<
typename DEC,
typename BLOCK_IDX>
4359 BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
4360 if (!mono_block_cnt_)
4364 block_idx_ += mono_block_cnt_+1;
4365 mono_block_cnt_ = 0;
4371template<
typename DEC,
typename BLOCK_IDX>
4379 for (
unsigned k = 0; k < len; ++k)
4387 for (
unsigned k = 0; k < len; ++k)
4393template<
typename DEC,
typename BLOCK_IDX>
4404 switch (this->block_type_)
4413 unsigned char run_type = decoder_.get_8();
4416 unsigned run_length = decoder_.get_16();
4419 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
4427 unsigned head_idx = decoder_.get_16();
4428 unsigned tail_idx = decoder_.get_16();
4431 for (
unsigned i = 0; i < head_idx; ++i)
4433 decoder_.get_32(dst_block + head_idx,
4434 tail_idx - head_idx + 1);
4440 int pos = int(tail_idx - head_idx) + 1;
4448 get_arr_bit(dst_block,
true );
4453 throw std::logic_error(this->err_msg());
4455 BM_THROW(BM_ERR_SERIALFORMAT);
4459 get_inv_arr(dst_block);
4464 this->read_bic_arr(decoder_, dst_block);
4467 this->read_bic_arr_inv(decoder_, tmp_block);
4474 this->read_bic_gap(decoder_, dst_block);
4479 this->read_digest0_block(decoder_, dst_block);
4484 throw std::logic_error(this->err_msg());
4486 BM_THROW(BM_ERR_SERIALFORMAT);
4492template<
typename DEC,
typename BLOCK_IDX>
4500 switch (block_type_)
4507 unsigned head_idx = decoder_.get_16();
4508 unsigned tail_idx = decoder_.get_16();
4509 for (
unsigned i = head_idx; i <= tail_idx; ++i)
4510 dst_block[i] |= decoder_.get_32();
4515 unsigned char run_type = decoder_.get_8();
4518 unsigned run_length = decoder_.get_16();
4521 unsigned run_end = j + run_length;
4522 for (;j < run_end; ++j)
4525 dst_block[j] |= decoder_.get_32();
4537 get_arr_bit(dst_block,
false );
4540 get_inv_arr(tmp_block);
4544 this->read_bic_arr(decoder_, dst_block);
4547 this->read_bic_arr_inv(decoder_, tmp_block);
4551 this->read_bic_gap(decoder_, dst_block);
4554 this->read_digest0_block(decoder_, dst_block);
4559 throw std::logic_error(this->err_msg());
4561 BM_THROW(BM_ERR_SERIALFORMAT);
4567template<
typename DEC,
typename BLOCK_IDX>
4576 switch (block_type_)
4583 unsigned char run_type = decoder_.get_8();
4586 unsigned run_length = decoder_.get_16();
4588 unsigned run_end = j + run_length;
4591 for (;j < run_end; ++j)
4594 dst_block[j] &= decoder_.get_32();
4599 for (;j < run_end; ++j)
4610 unsigned head_idx = decoder_.get_16();
4611 unsigned tail_idx = decoder_.get_16();
4613 for ( i = 0; i < head_idx; ++i)
4615 for ( i = head_idx; i <= tail_idx; ++i)
4616 dst_block[i] &= decoder_.get_32();
4623 get_arr_bit(tmp_block,
true );
4628 get_inv_arr(tmp_block);
4636 this->read_bic_arr(decoder_, tmp_block);
4640 this->read_bic_arr(decoder_, 0);
4643 this->read_bic_arr_inv(decoder_, tmp_block);
4652 this->read_bic_gap(decoder_, tmp_block);
4656 this->read_bic_gap(decoder_, 0);
4663 this->read_digest0_block(decoder_, tmp_block);
4667 this->read_digest0_block(decoder_, 0);
4672 throw std::logic_error(this->err_msg());
4674 BM_THROW(BM_ERR_SERIALFORMAT);
4680template<
typename DEC,
typename BLOCK_IDX>
4690 switch (block_type_)
4694 dst_block[i] ^= decoder_.get_32();
4698 unsigned char run_type = decoder_.get_8();
4701 unsigned run_length = decoder_.get_16();
4704 unsigned run_end = j + run_length;
4705 for (;j < run_end; ++j)
4708 dst_block[j] ^= decoder_.get_32();
4720 unsigned head_idx = decoder_.get_16();
4721 unsigned tail_idx = decoder_.get_16();
4722 for (
unsigned i = head_idx; i <= tail_idx; ++i)
4723 dst_block[i] ^= decoder_.get_32();
4729 get_arr_bit(tmp_block,
true );
4734 get_inv_arr(tmp_block);
4740 this->read_bic_arr(decoder_, tmp_block);
4745 this->read_bic_arr_inv(decoder_, tmp_block);
4757 this->read_bic_gap(decoder_, tmp_block);
4761 this->read_bic_gap(decoder_, 0);
4768 this->read_digest0_block(decoder_, tmp_block);
4772 this->read_digest0_block(decoder_, 0);
4777 throw std::logic_error(this->err_msg());
4779 BM_THROW(BM_ERR_SERIALFORMAT);
4785template<
typename DEC,
typename BLOCK_IDX>
4795 switch (block_type_)
4799 dst_block[i] &= ~decoder_.get_32();
4803 unsigned char run_type = decoder_.get_8();
4806 unsigned run_length = decoder_.get_16();
4809 unsigned run_end = j + run_length;
4810 for (;j < run_end; ++j)
4813 dst_block[j] &= ~decoder_.get_32();
4825 unsigned head_idx = decoder_.get_16();
4826 unsigned tail_idx = decoder_.get_16();
4827 for (
unsigned i = head_idx; i <= tail_idx; ++i)
4828 dst_block[i] &= ~decoder_.get_32();
4834 get_arr_bit(tmp_block,
true );
4839 get_inv_arr(tmp_block);
4845 this->read_bic_arr(decoder_, tmp_block);
4850 this->read_bic_arr_inv(decoder_, tmp_block);
4859 this->read_bic_gap(decoder_, tmp_block);
4863 this->read_bic_gap(decoder_, 0);
4870 this->read_digest0_block(decoder_, tmp_block);
4874 this->read_digest0_block(decoder_, 0);
4879 throw std::logic_error(this->err_msg());
4881 BM_THROW(BM_ERR_SERIALFORMAT);
4888template<
typename DEC,
typename BLOCK_IDX>
4897 switch (block_type_)
4906 unsigned char run_type = decoder_.get_8();
4909 unsigned run_length = decoder_.get_16();
4912 unsigned run_end = j + run_length;
4913 for (;j < run_end; ++j)
4927 unsigned head_idx = decoder_.get_16();
4928 unsigned tail_idx = decoder_.get_16();
4929 for (
unsigned i = head_idx; i <= tail_idx; ++i)
4934 count += get_arr_bit(0);
4941 get_inv_arr(tmp_block);
4945 this->read_bic_arr(decoder_, tmp_block);
4948 this->read_bic_arr_inv(decoder_, tmp_block);
4952 this->read_digest0_block(decoder_, tmp_block);
4956 this->read_bic_gap(decoder_, tmp_block);
4963 throw std::logic_error(this->err_msg());
4965 BM_THROW(BM_ERR_SERIALFORMAT);
4972template<
typename DEC,
typename BLOCK_IDX>
4986 switch (block_type_)
4993 unsigned char run_type = decoder_.get_8();
4996 unsigned run_length = decoder_.get_16();
4999 unsigned run_end = j + run_length;
5000 for (;j < run_end; ++j)
5015 unsigned head_idx = decoder_.get_16();
5016 unsigned tail_idx = decoder_.get_16();
5017 for (
unsigned i = head_idx; i <= tail_idx; ++i)
5028 get_inv_arr(tmp_block);
5031 this->read_bic_arr(decoder_, tmp_block);
5034 this->read_bic_arr_inv(decoder_, tmp_block);
5037 this->read_bic_gap(decoder_, tmp_block);
5040 this->read_digest0_block(decoder_, 0);
5045 throw std::logic_error(this->err_msg());
5047 BM_THROW(BM_ERR_SERIALFORMAT);
5055template<
typename DEC,
typename BLOCK_IDX>
5065 switch (block_type_)
5074 unsigned char run_type = decoder_.get_8();
5077 unsigned run_length = decoder_.get_16();
5080 unsigned run_end = j + run_length;
5081 for (;j < run_end; ++j)
5095 unsigned head_idx = decoder_.get_16();
5096 unsigned tail_idx = decoder_.get_16();
5097 for (
unsigned i = head_idx; i <= tail_idx; ++i)
5104 get_arr_bit(tmp_block,
true );
5108 get_inv_arr(tmp_block);
5113 this->read_bic_arr(decoder_, tmp_block);
5116 this->read_bic_arr_inv(decoder_, tmp_block);
5120 this->read_digest0_block(decoder_, tmp_block);
5124 this->read_bic_gap(decoder_, tmp_block);
5131 throw std::logic_error(this->err_msg());
5133 BM_THROW(BM_ERR_SERIALFORMAT);
5140template<
typename DEC,
typename BLOCK_IDX>
5150 switch (block_type_)
5167 unsigned char run_type = decoder_.get_8();
5170 unsigned run_length = decoder_.get_16();
5171 unsigned run_end = j + run_length;
5174 for (;j < run_end; ++j)
5182 for (;j < run_end; ++j)
5193 unsigned head_idx = decoder_.get_16();
5194 unsigned tail_idx = decoder_.get_16();
5197 for (i = 0; i < head_idx; ++i)
5199 for (i = head_idx; i <= tail_idx; ++i)
5208 get_arr_bit(tmp_block,
true );
5211 get_inv_arr(tmp_block);
5215 this->read_bic_arr(decoder_, tmp_block);
5218 this->read_bic_arr_inv(decoder_, tmp_block);
5222 this->read_digest0_block(decoder_, tmp_block);
5226 this->read_bic_gap(decoder_, tmp_block);
5232 throw std::logic_error(this->err_msg());
5234 BM_THROW(BM_ERR_SERIALFORMAT);
5238 return count_adapter.
sum();
5241template<
typename DEC,
typename BLOCK_IDX>
5251 switch (block_type_)
5268 unsigned char run_type = decoder_.get_8();
5271 unsigned run_length = decoder_.get_16();
5272 unsigned run_end = j + run_length;
5275 for (;j < run_end; ++j)
5283 for (;j < run_end; ++j)
5294 unsigned head_idx = decoder_.get_16();
5295 unsigned tail_idx = decoder_.get_16();
5298 for (i = 0; i < head_idx; ++i)
5300 for (i = head_idx; i <= tail_idx; ++i)
5309 get_arr_bit(tmp_block,
true );
5312 get_inv_arr(tmp_block);
5316 this->read_bic_arr(decoder_, tmp_block);
5319 this->read_bic_arr_inv(decoder_, tmp_block);
5324 this->read_digest0_block(decoder_, tmp_block);
5328 this->read_bic_gap(decoder_, tmp_block);
5334 throw std::logic_error(this->err_msg());
5336 BM_THROW(BM_ERR_SERIALFORMAT);
5340 return count_adapter.
sum();
5343template<
typename DEC,
typename BLOCK_IDX>
5353 switch (block_type_)
5370 unsigned char run_type = decoder_.get_8();
5373 unsigned run_length = decoder_.get_16();
5374 unsigned run_end = j + run_length;
5377 for (;j < run_end; ++j)
5385 for (;j < run_end; ++j)
5396 unsigned head_idx = decoder_.get_16();
5397 unsigned tail_idx = decoder_.get_16();
5400 for (i = 0; i < head_idx; ++i)
5402 for (i = head_idx; i <= tail_idx; ++i)
5412 get_arr_bit(tmp_block,
true );
5415 get_inv_arr(tmp_block);
5419 this->read_bic_arr(decoder_, tmp_block);
5422 this->read_bic_arr_inv(decoder_, tmp_block);
5426 this->read_digest0_block(decoder_, tmp_block);
5430 this->read_bic_gap(decoder_, tmp_block);
5436 throw std::logic_error(this->err_msg());
5438 BM_THROW(BM_ERR_SERIALFORMAT);
5442 return count_adapter.
sum();
5445template<
typename DEC,
typename BLOCK_IDX>
5455 switch (block_type_)
5472 unsigned char run_type = decoder_.get_8();
5475 unsigned run_length = decoder_.get_16();
5476 unsigned run_end = j + run_length;
5479 for (;j < run_end; ++j)
5482 count +=
word_bitcount(decoder_.get_32() & (~dst_block[j]));
5494 unsigned head_idx = decoder_.get_16();
5495 unsigned tail_idx = decoder_.get_16();
5498 for (i = head_idx; i <= tail_idx; ++i)
5506 get_arr_bit(tmp_block,
true );
5509 get_inv_arr(tmp_block);
5513 this->read_bic_arr(decoder_, tmp_block);
5516 this->read_bic_arr_inv(decoder_, tmp_block);
5520 this->read_digest0_block(decoder_, tmp_block);
5524 this->read_bic_gap(decoder_, tmp_block);
5530 throw std::logic_error(this->err_msg());
5532 BM_THROW(BM_ERR_SERIALFORMAT);
5535 return count_adapter.
sum();
5540template<
typename DEC,
typename BLOCK_IDX>
5561 for (
unsigned k = 0; k < len; ++k)
5573 decoder_.seek(len * 2);
5578template<
typename DEC,
typename BLOCK_IDX>
5582 ++(this->block_idx_);
5583 this->state_ = e_blocks;
5585 return decoder_.get_16();
5588template<
typename DEC,
typename BLOCK_IDX>
5592 BM_ASSERT(this->state_ == e_gap_block ||
5596 this->read_gap_block(this->decoder_,
5601 ++(this->block_idx_);
5602 this->state_ = e_blocks;
5606template<
typename DEC,
typename BLOCK_IDX>
5615 get_bit_func_type bit_func = bit_func_table_[op];
5617 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
5618 this->state_ = e_blocks;
5619 ++(this->block_idx_);
5629: temp_block_(0), ref_vect_(0)
5631 temp_block_ = alloc_.alloc_bit_block();
5639 alloc_.free_bit_block(temp_block_);
5645 const unsigned char* buf,
5656 return deserialize_xor(bv, bv_tmp, op);
5660void operation_deserializer<BV>::deserialize_xor_range(
5662 const unsigned char* buf,
5671 unsigned char header_flag = dec.get_8();
5678 if (bo_current == bo)
5680 deserializer<BV, bm::decoder> deserial;
5681 deserial.set_ref_vectors(ref_vect_);
5682 deserial.set_range(idx_from, idx_to);
5683 deserial.deserialize(bv, buf);
5691 deserializer<BV, bm::decoder_big_endian> deserial;
5692 deserial.set_ref_vectors(ref_vect_);
5693 deserial.set_range(idx_from, idx_to);
5694 deserial.deserialize(bv, buf);
5699 deserializer<BV, bm::decoder_little_endian> deserial;
5700 deserial.set_ref_vectors(ref_vect_);
5701 deserial.set_range(idx_from, idx_to);
5702 deserial.deserialize(bv, buf);
5709 bv.keep_range_no_check(idx_from, idx_to);
5716operation_deserializer<BV>::deserialize_xor(
bvector_type& bv,
5720 size_type count = 0;
5736 count = bv_tmp.
count();
5759 throw std::logic_error(
"BM: serialization error");
5761 BM_THROW(BM_ERR_SERIALFORMAT);
5773 const unsigned char* buf,
5779 unsigned char header_flag = dec.
get_8();
5784 return deserialize_xor(bv, buf, op, exit_on_one);
5797 if (bo_current == bo)
5799 serial_stream_current ss(buf);
5800 return it_d_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5806 serial_stream_be ss(buf);
5807 return it_d_be_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5811 serial_stream_le ss(buf);
5812 return it_d_le_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5817 throw std::logic_error(
"BM::platform error: unknown endianness");
5819 BM_THROW(BM_ERR_SERIALFORMAT);
5827 const unsigned char* buf,
5833 unsigned char header_flag = dec.
get_8();
5837 blocks_manager_type& bman = bv.get_blocks_manager();
5838 if (!bman.is_init())
5839 bv.set_range(idx_from, idx_to);
5848 deserialize_xor_range(bv_tmp, buf, idx_from, idx_to);
5866 if (bo_current == bo)
5868 serial_stream_current ss(buf);
5869 it_d_.set_range(idx_from, idx_to);
5870 it_d_.deserialize(bv, ss, temp_block_, op,
false);
5871 it_d_.unset_range();
5878 serial_stream_be ss(buf);
5879 it_d_be_.set_range(idx_from, idx_to);
5880 it_d_be_.deserialize(bv, ss, temp_block_, op,
false);
5881 it_d_be_.unset_range();
5886 serial_stream_le ss(buf);
5887 it_d_le_.set_range(idx_from, idx_to);
5888 it_d_le_.deserialize(bv, ss, temp_block_, op,
false);
5889 it_d_le_.unset_range();
5895 throw std::logic_error(
"BM::platform error: unknown endianness");
5897 BM_THROW(BM_ERR_SERIALFORMAT);
5907template<
class BV,
class SerialIterator>
5911 is_range_set_ =
true;
5917template<
class BV,
class SerialIterator>
5920 serial_iterator_type& sit,
5924 const unsigned win_size = 64;
5929 for (
unsigned i = 0; i <= id_count;)
5932 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
5934 id_buffer[j] = sit.get_id();
5942 for (
unsigned i = 0; i <= id_count;)
5945 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
5947 id_buffer[j] = sit.get_id();
5955template<
class BV,
class SerialIterator>
5957iterator_deserializer<BV, SerialIterator>::finalize_target_vector(
5958 blocks_manager_type& bman,
5960 size_type bv_block_idx)
5962 size_type count = 0;
5973 if (bv_block_idx <= nblock_last)
5974 bman.set_all_zero(bv_block_idx, nblock_last);
5983 bm::word_t*** blk_root = bman.top_blocks_root();
5984 unsigned top_size = bman.top_block_size();
5985 for (;i < top_size; ++i)
5998 count += bman.block_bitcount(blk_blk[j]);
6008 throw std::logic_error(err_msg());
6010 BM_THROW(BM_ERR_SERIALFORMAT);
6016template<
class BV,
class SerialIterator>
6018iterator_deserializer<BV, SerialIterator>::process_id_list(
6020 serial_iterator_type& sit,
6023 size_type count = 0;
6024 unsigned id_count = sit.get_id_count();
6025 bool set_clear =
true;
6032 load_id_list(bv_tmp, sit, id_count,
true);
6045 load_id_list(bv, sit, id_count, set_clear);
6048 for (
unsigned i = 0; i < id_count; ++i)
6056 for (
unsigned i = 0; i < id_count; ++i)
6066 for (size_type i = 0; i < id_count; ++i)
6077 load_id_list(bv_tmp, sit, id_count,
true);
6085 load_id_list(bv_tmp, sit, id_count,
true);
6093 load_id_list(bv_tmp, sit, id_count,
false);
6094 count += bv_tmp.
count();
6100 load_id_list(bv_tmp, sit, id_count,
true);
6108 throw std::logic_error(err_msg());
6110 BM_THROW(BM_ERR_SERIALFORMAT);
6118template<
class BV,
class SerialIterator>
6131 gap_temp_block[0] = 0;
6133 blocks_manager_type& bman = bv.get_blocks_manager();
6134 if (!bman.is_init())
6137 if (sit.bv_size() && (sit.bv_size() > bv.size()))
6138 bv.resize(sit.bv_size());
6140 typename serial_iterator_type::iterator_state state;
6141 state = sit.get_state();
6142 if (state == serial_iterator_type::e_list_ids)
6144 count = process_id_list(bv, sit, op);
6156 count += finalize_target_vector(bman, op, bv_block_idx);
6160 state = sit.state();
6163 case serial_iterator_type::e_blocks:
6168 if (is_range_set_ && (bv_block_idx < nb_range_from_))
6171 bool skip_flag = sit.try_skip(bv_block_idx, nb_range_from_);
6174 bv_block_idx = sit.block_idx();
6175 BM_ASSERT(bv_block_idx <= nb_range_from_);
6176 BM_ASSERT(sit.state() == serial_iterator_type::e_blocks);
6180 case serial_iterator_type::e_bit_block:
6182 BM_ASSERT(sit.block_idx() == bv_block_idx);
6185 bm::word_t* blk = bman.get_block_ptr(i0, j0);
6198 blk = bman.make_bit_block(bv_block_idx);
6211 throw std::logic_error(err_msg());
6213 BM_THROW(BM_ERR_SERIALFORMAT);
6230 blk = bman.deoptimize_block(bv_block_idx);
6236 unsigned c = sit.get_bit_block(blk, temp_block, sop);
6238 if (exit_on_one && count)
6243 bman.optimize_bit_block(i0, j0);
6251 case serial_iterator_type::e_zero_blocks:
6253 BM_ASSERT(bv_block_idx == sit.block_idx());
6260 bv_block_idx = sit.skip_mono_blocks();
6266 bv_block_idx = sit.skip_mono_blocks();
6267 bman.set_all_zero(nb_start, bv_block_idx-1);
6278 bm::word_t* blk = bman.get_block_ptr(i0, j0);
6289 bman.zero_block(bv_block_idx);
6303 count += blk ? bman.block_bitcount(blk) : 0;
6304 if (exit_on_one && count)
6316 case serial_iterator_type::e_one_blocks:
6318 BM_ASSERT(bv_block_idx == sit.block_idx());
6321 bm::word_t* blk = bman.get_block_ptr(i0, j0);
6328 bman.set_block_all_set(bv_block_idx);
6336 bman.zero_block(bv_block_idx);
6340 if (++empty_op_cnt > 64)
6343 bool b = bv.find_reverse(last_id);
6347 if (last_nb < bv_block_idx)
6353 count += blk ? bman.block_bitcount(blk) : 0;
6361 blk = bman.deoptimize_block(bv_block_idx);
6392 bman.set_block_all_set(bv_block_idx);
6406 if (exit_on_one && count)
6411 case serial_iterator_type::e_gap_block:
6413 BM_ASSERT(bv_block_idx == sit.block_idx());
6417 const bm::word_t* blk = bman.get_block(i0, j0);
6419 sit.get_gap_block(gap_temp_block);
6437 if (exit_on_one && count)
6445 bman.zero_block(bv_block_idx);
6456 bv_block_idx, gap_temp_block, level);
6469 bv.combine_operation_with_block(bv_block_idx,
6477 bv.combine_operation_with_block(
6487 blk = bman.get_block_ptr(i0, j0);
6503 throw std::logic_error(err_msg());
6505 BM_THROW(BM_ERR_SERIALFORMAT);
6512 if (is_range_set_ && (bv_block_idx > nb_range_to_))
6528#pragma warning( pop )
Algorithms for bvector<> (main include)
#define IS_FULL_BLOCK(addr)
#define IS_VALID_ADDR(addr)
#define BMPTR_SETBIT0(ptr)
#define BMSET_PTRGAP(ptr)
#define FULL_BLOCK_FAKE_ADDR
#define FULL_BLOCK_REAL_ADDR
Bit manipulation primitives (internal)
#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO)
Utilities for bit transposition (internal) (experimental!)
pre-processor un-defines to avoid global space pollution (internal)
Bit manipulation primitives (internal)
Functions and utilities for XOR filters (internal)
Byte based reader for un-aligned bit streaming.
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Byte based writer for un-aligned bit streaming.
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Bit-block sum adapter, takes values and sums it /internal.
bm::word_t sum() const BMNOEXCEPT
Get accumulated sum.
List of reference bit-vectors with their true index associations.
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv) BMNOEXCEPT
check if vector has no assigned allocator and set one
Bitvector Bit-vector container with runtime compression of bits.
@ opt_compress
compress blocks when possible (GAP/prefix sum)
size_type count() const BMNOEXCEPT
population cout (count of ON bits)
bm::bvector< Alloc > & bit_and(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand AND : this := bv1 AND bv2
blocks_manager< Alloc > blocks_manager_type
void swap(bvector< Alloc > &bvect) BMNOEXCEPT
Exchanges content of bv and this bvector.
blocks_manager_type::block_idx_type block_idx_type
void clear(const size_type *ids, size_type ids_size, bm::sort_order so=bm::BM_UNKNOWN)
clear list of bits in this bitset
bool get_bit(size_type n) const BMNOEXCEPT
returns true if bit n is set and false is bit n is 0.
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Class for decoding data from memory buffer.
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Base deserialization class.
const unsigned char * skip_pos_
decoder skip position
void read_bic_arr(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read binary interpolated list into a bit-set.
static const char * err_msg() BMNOEXCEPT
block_idx_type bookmark_idx_
last bookmark block index
unsigned skip_offset_
bookmark to skip 256 encoded blocks
static void read_0runs_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
read bit-block encoded as runs
void read_bic_arr_inv(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read inverted binary interpolated list into a bit-set.
block_idx_type try_skip(decoder_type &decoder, block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
Try to skip if skip bookmark is available within reach.
void read_digest0_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read digest0-type bit-block.
void read_bic_gap(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read binary interpolated gap blocks into a bitset.
bm::bit_in< DEC > bit_in_type
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
unsigned read_id_list(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_arr)
Read list of bit ids.
bm::gap_word_t * id_array_
ptr to idx array for temp decode use
Deserializer for bit-vector.
void xor_decode(size_type x_ref_idx, bm::id64_t x_ref_d64, blocks_manager_type &bman, block_idx_type nb)
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR de-serialization (no transfer of ownership for the poi...
block_arridx_type bit_idx_arr_
allocator_type::allocator_pool_type allocator_pool_type
deseriaizer_base< DEC, block_idx_type > parent_type
bm::bv_ref_vector< BV > bv_ref_vector_type
void deserialize_gap(unsigned char btype, decoder_type &dec, bvector_type &bv, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
bvector_type::allocator_type allocator_type
parent_type::decoder_type decoder_type
void set_range(size_type from, size_type to) BMNOEXCEPT
set deserialization range [from, to] This is NOT exact, approximate range, content outside range is n...
allocator_pool_type pool_
const bv_ref_vector_type * ref_vect_
ref.vector for XOR compression
void unset_range() BMNOEXCEPT
Disable range deserialization.
BV::blocks_manager_type blocks_manager_type
block_arridx_type gap_temp_block_
bm::word_t * xor_block_
xor product
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block=0)
bvector_type::block_idx_type block_idx_type
void decode_arrbit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
void decode_bit_block(unsigned char btype, decoder_type &dec, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
void decode_block_bit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
void decode_block_bit_interval(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
bm::heap_vector< bm::gap_word_t, allocator_type, true > block_arridx_type
unsigned char * position_type
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Functor for Elias Gamma encoding.
Iterator to walk forward the serialized stream.
void set_range(size_type from, size_type to)
set deserialization range [from, to]
SerialIterator serial_iterator_type
void unset_range() BMNOEXCEPT
disable range filtration
bvector_type::size_type size_type
size_type deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
bm::bv_ref_vector< BV > bv_ref_vector_type
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
void deserialize_range(bvector_type &bv, const unsigned char *buf, size_type idx_from, size_type idx_to)
BV::allocator_type allocator_type
~operation_deserializer()
size_type deserialize(bvector_type &bv, const unsigned char *buf, set_operation op, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
void deserialize_range(bvector_type &bv, const unsigned char *buf, bm::word_t *, size_type idx_from, size_type idx_to)
bvector_type::size_type size_type
size_type deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *, set_operation op=bm::set_OR, bool exit_on_one=false)
Obsolete! Deserialize bvector using buffer as set operation argument.
Serialization stream iterator.
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true) BMNOEXCEPT
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
bm::id_t last_id_
Last id from the id list.
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
deseriaizer_base< DEC, block_idx_type > parent_type
void next()
get next block
serial_stream_iterator(const unsigned char *buf)
block_idx_type mono_block_cnt_
number of 0 or 1 blocks
unsigned block_type_
current block type
unsigned get_id_count() const BMNOEXCEPT
Number of ids in the inverted list (valid for e_list_ids)
unsigned get_bit() BMNOEXCEPT
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
void get_inv_arr(bm::word_t *block) BMNOEXCEPT
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
bm::id_t get_id() const BMNOEXCEPT
Get last id from the id list.
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
bool is_eof() const
Returns true if end of bit-stream reached.
decoder_type & decoder()
Get low level access to the decoder (use carefully)
block_idx_type block_idx_
current block index
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
iterator_state state() const BMNOEXCEPT
Returns iterator internal state.
block_idx_type block_idx() const BMNOEXCEPT
Get current block index.
gap_word_t * block_idx_arr_
unsigned dec_size() const
Return current decoder size.
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
~serial_stream_iterator()
gap_word_t glevels_[bm::gap_levels]
GAP levels.
unsigned(serial_stream_iterator< DEC, BLOCK_IDX >::* get_bit_func_type)(bm::word_t *, bm::word_t *)
member function pointer for bitset-bitset get operations
iterator_state
iterator is a state machine, this enum encodes its key value
@ e_bit_block
one bit block
@ e_list_ids
plain int array
@ e_zero_blocks
one or more zero bit blocks
@ e_gap_block
one gap block
@ e_one_blocks
one or more all-1 bit blocks
@ e_blocks
stream of blocks
bool try_skip(block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
Try to skip if skip bookmark is available within reach.
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
deseriaizer_base< DEC, BLOCK_IDX >::decoder_type decoder_type
get_bit_func_type bit_func_table_[bm::set_END]
unsigned id_cnt_
Id counter for id list.
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_block_type() const BMNOEXCEPT
Get current block type.
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
block_idx_type skip_mono_blocks() BMNOEXCEPT
skip all zero or all-one blocks
block_idx_type bv_size() const
serialized bitvector size
iterator_state get_state() const BMNOEXCEPT
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
Bit-vector serialization class.
void gamma_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
void serialize(const BV &bv, typename serializer< BV >::buffer &buf, const statistics_type *bv_stat=0)
Bitvector serialization into buffer object (resized automatically)
unsigned char find_bit_best_encoding(const bm::word_t *block) BMNOEXCEPT
Determine best representation for a bit-block.
serializer(bm::word_t *temp_block)
void reset_compression_stats() BMNOEXCEPT
Reset all accumulated compression statistics.
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
bvector_type::block_idx_type block_idx_type
bvector_type::size_type size_type
unsigned char find_gap_best_encoding(const bm::gap_word_t *gap_block) BMNOEXCEPT
Determine best representation for GAP block based on current set compression level.
void gamma_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT
Set current index in rer.vector collection (not a row idx or plain idx)
void interpolated_gap_array_v0(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
void set_compression_level(unsigned clevel) BMNOEXCEPT
Set compression level.
void encode_bit_digest(const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0) BMNOEXCEPT
Encode bit-block using digest (hierarchical compression)
bm::bv_ref_vector< BV > bv_ref_vector_type
void byte_order_serialization(bool value) BMNOEXCEPT
Set byte-order serialization (for cross platform compatibility)
unsigned char find_bit_best_encoding_l5(const bm::word_t *block) BMNOEXCEPT
Determine best representation for a bit-block (level 5)
void optimize_serialize_destroy(BV &bv, typename serializer< BV >::buffer &buf)
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized an...
bvector_type::allocator_type allocator_type
byte_buffer< allocator_type > buffer
void encode_bit_array(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Encode bit-block as an array of bits.
void interpolated_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
encode bit-block as interpolated gap block
bvector_type::blocks_manager_type blocks_manager_type
void encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size incre...
unsigned get_compression_level() const BMNOEXCEPT
Get compression level (0-5), Default 5 (recommended) 0 - take as is 1, 2 - apply light weight RLE/GAP...
void reset_models() BMNOEXCEPT
const size_type * get_compression_stat() const BMNOEXCEPT
Return serialization counter vector.
static void process_bookmark(block_idx_type nb, bookmark_state &bookm, bm::encoder &enc) BMNOEXCEPT
Check if bookmark needs to be placed and if so, encode it into serialization BLOB.
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Constructor.
void interpolated_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
void interpolated_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
Encode GAP block as an array with binary interpolated coder.
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false) BMNOEXCEPT
Encode GAP block as delta-array with Elias Gamma coder.
void add_model(unsigned char mod, unsigned score) BMNOEXCEPT
void bienc_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
encode bit-block as interpolated bit block of gaps
void gamma_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
bvector_type::statistics statistics_type
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control) BMNOEXCEPT
Encode BIT block with repeatable runs of zeroes.
void interpolated_encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
void encode_header(const BV &bv, bm::encoder &enc) BMNOEXCEPT
Encode serialization header information.
void bienc_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
XOR scanner to search for complement-similarities in collections of bit-vectors.
Encoding utilities for serialization (internal)
unsigned bit_block_find(const bm::word_t *BMRESTRICT block, unsigned nbit, unsigned *BMRESTRICT pos) BMNOEXCEPT
Searches for the next 1 bit in the BIT block.
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
unsigned bit_block_calc_change(const bm::word_t *block) BMNOEXCEPT
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Bitblock copy operation.
T bit_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len, unsigned mask=0) BMNOEXCEPT
Convert bit block into an array of ints corresponding to 1 bits.
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size) BMNOEXCEPT
Inspects block for full zero words.
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
bm::id_t bit_block_count(const bm::word_t *block) BMNOEXCEPT
Bitcount for bit block.
BMFORCEINLINE void clear_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock OR operation and calculates bitcount of the result.
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
bm::id64_t calc_block_digest0(const bm::word_t *const block) BMNOEXCEPT
Compute digest for 64 non-zero areas.
void bit_invert(T *start) BMNOEXCEPT
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
Bitblock memset operation.
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock XOR operation and calculates bitcount of the result.
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock AND operation and calculates bitcount of the result.
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock SUB operation and calculates bitcount of the result.
set_operation
Codes of set operations.
strategy
Block allocation strategies.
@ BM_GAP
GAP compression is ON.
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
serialization_flags
Bit mask flags for serialization algorithm <>
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
void deserialize_range(BV &bv, const unsigned char *buf, typename BV::size_type from, typename BV::size_type to, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector range deserialization from a memory BLOB.
@ BM_NO_GAP_LENGTH
save no GAP info (save some space)
@ BM_NO_BYTE_ORDER
save no byte-order info (save some space)
distance_metric
Distance metrics codes defined for vectors A and B.
distance_metric operation2metric(set_operation op) BMNOEXCEPT
Convert set operation into compatible distance metric.
@ COUNT_XOR
(A ^ B).count()
@ COUNT_SUB_BA
(B - A).count()
unsigned gap_bit_count_unr(const T *buf) BMNOEXCEPT
Calculates number of bits ON in GAP buffer. Loop unrolled version.
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false) BMNOEXCEPT
Convert gap block into array of ints corresponding to 1 bits.
gap_word_t * gap_operation_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize) BMNOEXCEPT
GAP XOR operation.
void gap_invert(T *buf) BMNOEXCEPT
Inverts all bits in the GAP buffer.
unsigned gap_set_array(T *buf, const T *arr, unsigned len) BMNOEXCEPT
Convert array to GAP buffer.
void set_gap_level(T *buf, int level) BMNOEXCEPT
Sets GAP block capacity level.
void gap_add_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
Adds(OR) GAP block to bitblock.
void gap_convert_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT buf) BMNOEXCEPT
GAP block to bitblock conversion.
void gap_set_all(T *buf, unsigned set_max, unsigned value) BMNOEXCEPT
Sets all bits to 0 or 1 (GAP)
unsigned gap_add_value(T *buf, unsigned pos) BMNOEXCEPT
Add new value to the end of GAP buffer.
int gap_calc_level(unsigned len, const T *glevel_len) BMNOEXCEPT
Calculates GAP block capacity level.
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *BMRESTRICT buf) BMNOEXCEPT
Returs GAP block length.
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
BV::size_type count_and(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of AND operation of two bitsets.
BV::size_type count_or(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of OR operation of two bitsets.
BV::size_type count_sub(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of SUB operation of two bitsets.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of XOR operation of two bitsets.
const unsigned char set_block_ref_eq
block is a copy of a reference block
const unsigned set_block_digest_wave_size
const unsigned char set_block_gap
Plain GAP block.
void bit_block_change_bc(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT gc, unsigned *BMRESTRICT bc) BMNOEXCEPT
const unsigned char set_block_arrbit_inv
List of bits OFF.
const unsigned char set_nb_sync_mark8
bookmark sync point (8-bits)
const unsigned char set_block_bit_interval
Interval block.
bool check_block_zero(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks all conditions and returns true if block consists of only 0 bits.
const unsigned char set_block_bit_digest0
H-compression with digest mask.
const unsigned char set_block_xor_ref32
..... 32-bit (should never happen)
const unsigned char set_block_arrgap_bienc_inv_v2
Interpolated GAP array (inverted)
const unsigned char set_block_bitgap_bienc
Interpolated bit-block as GAPs.
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
bool check_block_one(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks if block has only 1 bits.
const unsigned char set_block_arr_bienc_inv
Interpolated inverted block int array.
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len) BMNOEXCEPT
Convert bit block to GAP representation.
const unsigned set_sub_array_size
const unsigned gap_max_bits_cmrz
const unsigned char set_block_16one
UP to 65536 all-set blocks.
void for_each_dgap(const T *gap_buf, Func &func)
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Internal function computes different distance metrics.
const unsigned set_total_blocks
const unsigned char set_nb_sync_mark48
const unsigned char set_block_arrgap_bienc
Interpolated GAP array.
ByteOrder
Byte orders recognized by the library.
const unsigned char set_block_8one
Up to 256 all-set blocks.
const unsigned char set_nb_sync_mark16
const unsigned char set_block_32one
UP to 4G all-set blocks.
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
const unsigned bie_cut_off
const unsigned char set_block_64one
lots of all-set blocks
const unsigned char set_block_gap_bienc
Interpolated GAP block (legacy)
const unsigned char set_block_xor_gap_ref16
..... 16-bit
const unsigned set_compression_default
Default compression level.
const unsigned char set_block_arrbit
List of bits ON.
const unsigned char set_block_1one
One block all-set (1111...)
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
const unsigned gap_levels
const unsigned char set_block_gapbit
GAP compressed bitblock.
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size) BMNOEXCEPT
bm::operation setop2op(bm::set_operation op) BMNOEXCEPT
Convert set operation to operation.
const unsigned char set_block_xor_chain
XOR chain (reserved)
const unsigned char set_nb_bookmark32
const unsigned set_block_size
unsigned long long int id64_t
const unsigned block_waves
const unsigned char set_block_xor_ref16
block is masked XOR of a reference block (16-bit)
const unsigned char set_block_arrgap_bienc_inv
Interpolated GAP array (inverted)
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
const unsigned gap_equiv_len
const unsigned char set_block_1zero
One all-zero block.
const unsigned char set_block_end
End of serialization.
const unsigned gap_max_buff_len
const unsigned char set_block_arrgap_bienc_v2
//!< Interpolated GAP array (v2)
const unsigned char set_block_xor_gap_ref32
..... 32-bit (should never happen)
const unsigned char set_block_arrgap
List of bits ON (GAP block)
const unsigned char set_nb_sync_mark64
..... 64-bit (should never happen)
const unsigned char set_nb_sync_mark32
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
serialization_header_mask
@ BM_HM_NO_GAPL
no GAP levels
@ BM_HM_ID_LIST
id list stored
@ BM_HM_NO_BO
no byte-order
@ BM_HM_64_BIT
64-bit vector
@ BM_HM_RESIZE
resized vector
@ BM_HM_HXOR
horizontal XOR compression turned ON
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
const unsigned char set_block_arr_bienc
Interpolated block as int array.
const unsigned char set_block_64zero
lots of zero blocks
const unsigned char set_block_gap_bienc_v2
Interpolated GAP block (v2)
const unsigned char set_block_xor_ref8
block is masked XOR of a reference block (8-bit)
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
unsigned short gap_word_t
const unsigned char set_block_32zero
Up to 4G zero blocks.
const unsigned char set_block_8zero
Up to 256 zero blocks.
const unsigned gap_max_bits
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) BMNOEXCEPT
Recalc linear bvector block index into 2D matrix coordinates.
const unsigned char set_block_aone
All other blocks one.
const unsigned char set_nb_bookmark16
const unsigned set_block_shift
const unsigned set_compression_max
Maximum supported compression level.
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
const unsigned char set_nb_sync_mark24
const unsigned char set_block_xor_gap_ref8
..... 8-bit
const unsigned char set_block_azero
All other blocks zero.
const unsigned bits_in_block
const unsigned char set_block_16zero
Up to 65536 zero blocks.
const unsigned char set_block_bit
Plain bit block.
const unsigned char set_block_bitgap_bienc_v2
Interpolated bit-block as GAPs (v2 - reseved)
const unsigned char set_nb_bookmark24
bool is_const_set_operation(set_operation op) BMNOEXCEPT
Returns true if set operation is constant (bitcount)
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
Bit COUNT SUB AB functor.
size_t max_serialize_mem
estimated maximum memory for serialization
Statistical information about bitset's memory allocation details.
static ByteOrder byte_order()
Bookmark state structure.
unsigned bm_type_
0:32-bit, 1: 24-bit, 2: 16-bit
bookmark_state(block_idx_type nb_range) BMNOEXCEPT
size_t min_bytes_range_
minumal distance (bytes) between marks
block_idx_type nb_
bookmark block idx
unsigned char * ptr_
bookmark pointer
block_idx_type nb_range_
target bookmark range in blocks