BitMagic-C++
bmserial.h
Go to the documentation of this file.
1#ifndef BMSERIAL__H__INCLUDED__
2#define BMSERIAL__H__INCLUDED__
3/*
4Copyright(c) 2002-2019 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmserial.h
22 \brief Serialization / compression of bvector<>.
23 Set theoretical operations on compressed BLOBs.
24*/
25
26/*!
27 \defgroup bvserial bvector<> serialization
28 Serialization for bvector<> container
29
30 \ingroup bvector
31*/
32
33#ifndef BM__H__INCLUDED__
34// BitMagic utility headers do not include main "bm.h" declaration
35// #include "bm.h" or "bm64.h" explicitly
36# error missing include (bm.h or bm64.h)
37#endif
38
39
40#ifdef _MSC_VER
41#pragma warning( push )
42#pragma warning( disable : 4311 4312 4127)
43#endif
44
45
46
47#include "encoding.h"
48#include "bmfunc.h"
49#include "bmtrans.h"
50#include "bmalgo.h"
51#include "bmutil.h"
52#include "bmbuffer.h"
53#include "bmdef.h"
54#include "bmxor.h"
55
56namespace bm
57{
58
59const unsigned set_compression_max = 5; ///< Maximum supported compression level
60const unsigned set_compression_default = 5; ///< Default compression level
61
62/**
63 Bit-vector serialization class.
64
65 Class designed to convert sparse bit-vectors into a single block of memory
66 ready for file or database storage or network transfer.
67
68 Reuse of this class for multiple serializations (but not across threads).
69 Class resue offers some performance advantage (helps with temp memory
70 reallocations).
71
72 @ingroup bvserial
73*/
74template<class BV>
76{
77public:
78 typedef BV bvector_type;
84
85 typedef byte_buffer<allocator_type> buffer;
87public:
88 /**
89 Constructor
90
91 \param alloc - memory allocator
92 \param temp_block - temporary block for various operations
93 (if NULL it will be allocated and managed by serializer class)
94 Temp block is used as a scratch memory during serialization,
95 use of external temp block allows to avoid unnecessary re-allocations.
96
97 Temp block attached is not owned by the class and NOT deallocated on
98 destruction.
99 */
101 bm::word_t* temp_block = 0);
102
103 serializer(bm::word_t* temp_block);
104
106
107 /*! @name Compression level settings */
108 //@{
109
110 // --------------------------------------------------------------------
111 /**
112 Set compression level. Higher compression takes more time to process.
113 @param clevel - compression level (0-5)
114 @sa get_compression_level
115 */
116 void set_compression_level(unsigned clevel) BMNOEXCEPT;
117
118 /**
119 Get compression level (0-5), Default 5 (recommended)
120 0 - take as is
121 1, 2 - apply light weight RLE/GAP encodings, limited depth hierarchical
122 compression, intervals encoding
123 3 - variant of 2 with different cut-offs
124 4 - delta transforms plus Elias Gamma encoding where possible legacy)
125 5 - binary interpolated encoding (Moffat, et al)
126
127 Recommended: use 3 or 5
128
129 */
131 { return compression_level_; }
132
133
134 //@}
135
136
137 // --------------------------------------------------------------------
138 /*! @name Serialization Methods */
139 //@{
140
141 /**
142 Bitvector serialization into memory block
143
144 @param bv - input bitvector
145 @param buf - out buffer (pre-allocated)
146 No range checking is done in this method.
147 It is responsibility of caller to allocate sufficient
148 amount of memory using information from calc_stat() function.
149
150 @param buf_size - size of the output buffer
151
152 @return Size of serialization block.
153 @sa calc_stat
154 */
155 size_type serialize(const BV& bv,
156 unsigned char* buf, size_t buf_size);
157
158 /**
159 Bitvector serialization into buffer object (resized automatically)
160
161 @param bv - input bitvector
162 @param buf - output buffer object
163 @param bv_stat - input (optional) bit-vector statistics object
164 if NULL, serialize will compute the statistics
165 */
166 void serialize(const BV& bv,
167 typename serializer<BV>::buffer& buf,
168 const statistics_type* bv_stat = 0);
169
170 /**
171 Bitvector serialization into buffer object (resized automatically)
172 Input bit-vector gets optimized and then destroyed, content is
173 NOT guaranteed after this operation.
174 Effectively it moves data into the buffer.
175
176 The reason this operation exsists is because it is faster to do
177 all three operations in one single pass.
178 This is a destructive serialization!
179
180 @param bv - input/output bitvector
181 @param buf - output buffer object
182 */
184 typename serializer<BV>::buffer& buf);
185
186 //@}
187 // --------------------------------------------------------------------
188
189 /**
190 Return serialization counter vector
191 @internal
192 */
194 { return compression_stat_; }
195
196 /**
197 Set GAP length serialization (serializes GAP levels of the original vector)
198
199 @param value - when TRUE serialized vector includes GAP levels parameters
200 */
202
203 /**
204 Set byte-order serialization (for cross platform compatibility)
205 @param value - TRUE serialization format includes byte-order marker
206 */
208
209 /**
210 Add skip-markers to serialization BLOB for faster range decode
211 at the expense of some BLOB size increase
212
213 @param enable - TRUE searilization will add bookmark codes
214 @param bm_interval - bookmark interval in (number of blocks)
215 (suggested between 4 and 512)
216 smaller interval means more bookmarks added to the skip list thus
217 more increasing the BLOB size
218 */
219 void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT;
220
221 /**
222 Attach collection of reference vectors for XOR serialization
223 (no transfer of ownership for the pointer)
224 @internal
225 */
226 void set_ref_vectors(const bv_ref_vector_type* ref_vect);
227
228 /**
229 Set current index in rer.vector collection
230 (not a row idx or plain idx)
231 */
233
234
235protected:
236 /**
237 Encode serialization header information
238 */
239 void encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT;
240
241 /*! Encode GAP block */
242 void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
243
244 /*! Encode GAP block with Elias Gamma coder */
245 void gamma_gap_block(const bm::gap_word_t* gap_block,
246 bm::encoder& enc) BMNOEXCEPT;
247
248 /**
249 Encode GAP block as delta-array with Elias Gamma coder
250 */
251 void gamma_gap_array(const bm::gap_word_t* gap_block,
252 unsigned arr_len,
253 bm::encoder& enc,
254 bool inverted = false) BMNOEXCEPT;
255
256 /// Encode bit-block as an array of bits
257 void encode_bit_array(const bm::word_t* block,
258 bm::encoder& enc, bool inverted) BMNOEXCEPT;
259
260 void gamma_gap_bit_block(const bm::word_t* block,
261 bm::encoder& enc) BMNOEXCEPT;
262
263 void gamma_arr_bit_block(const bm::word_t* block,
264 bm::encoder& enc, bool inverted) BMNOEXCEPT;
265
266 void bienc_arr_bit_block(const bm::word_t* block,
267 bm::encoder& enc, bool inverted) BMNOEXCEPT;
268
269 /// encode bit-block as interpolated bit block of gaps
270 void bienc_gap_bit_block(const bm::word_t* block,
271 bm::encoder& enc) BMNOEXCEPT;
272
274 bm::encoder& enc, bool inverted) BMNOEXCEPT;
275 /// encode bit-block as interpolated gap block
277 bm::encoder& enc) BMNOEXCEPT;
278
279 /**
280 Encode GAP block as an array with binary interpolated coder
281 */
282 void interpolated_gap_array(const bm::gap_word_t* gap_block,
283 unsigned arr_len,
284 bm::encoder& enc,
285 bool inverted) BMNOEXCEPT;
287 unsigned arr_len,
288 bm::encoder& enc,
289 bool inverted) BMNOEXCEPT;
290
291
292 /*! Encode GAP block with using binary interpolated encoder */
294 const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT;
295
296 /**
297 Encode BIT block with repeatable runs of zeroes
298 */
299 void encode_bit_interval(const bm::word_t* blk,
300 bm::encoder& enc,
301 unsigned size_control) BMNOEXCEPT;
302 /**
303 Encode bit-block using digest (hierarchical compression)
304 */
305 void encode_bit_digest(const bm::word_t* blk,
306 bm::encoder& enc,
307 bm::id64_t d0) BMNOEXCEPT;
308
309 /**
310 Determine best representation for GAP block based
311 on current set compression level
312
313 @return set_block_gap, set_block_bit_1bit, set_block_arrgap
314 set_block_arrgap_egamma, set_block_arrgap_bienc
315 set_block_arrgap_inv, set_block_arrgap_egamma_inv
316 set_block_arrgap_bienc_inv, set_block_gap_egamma
317 set_block_gap_bienc
318
319 @internal
320 */
321 unsigned char
323
324 /// Determine best representation for a bit-block
325 unsigned char find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT;
326
327 /// Determine best representation for a bit-block (level 5)
328 unsigned char find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT;
329
330 /// Reset all accumulated compression statistics
332
333 void reset_models() BMNOEXCEPT { mod_size_ = 0; }
334 void add_model(unsigned char mod, unsigned score) BMNOEXCEPT;
335protected:
336
337 /// Bookmark state structure
339 {
341 : ptr_(0), nb_(0),
342 nb_range_(nb_range), bm_type_(0)
343 {
344 min_bytes_range_ = nb_range * 8;
345 if (min_bytes_range_ < 512)
346 min_bytes_range_ = 512;
347
348 if (nb_range < 15)
349 bm_type_ = 2; // 16-bit offset
350 else
351 if (nb_range < 255)
352 bm_type_ = 1; // 24-bit offset
353 }
354
355 unsigned char* ptr_; ///< bookmark pointer
356 block_idx_type nb_; ///< bookmark block idx
357 block_idx_type nb_range_; ///< target bookmark range in blocks
358 unsigned bm_type_; ///< 0:32-bit, 1: 24-bit, 2: 16-bit
359 size_t min_bytes_range_; ///< minumal distance (bytes) between marks
360 };
361
362 /**
363 Check if bookmark needs to be placed and if so, encode it
364 into serialization BLOB
365
366 @param nb - block idx
367 @param bookm - bookmark state structure
368 @param enc - BLOB encoder
369 */
370 static
371 void process_bookmark(block_idx_type nb, bookmark_state& bookm,
373
374private:
375 serializer(const serializer&);
376 serializer& operator=(const serializer&);
377
378private:
381 typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
382 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
383
384private:
385 bm::id64_t digest0_;
386 unsigned bit_model_d0_size_; ///< memory (bytes) by d0 method (bytes)
387 unsigned bit_model_0run_size_; ///< memory (bytes) by run-0 method (bytes)
388 block_arridx_type bit_idx_arr_;
389 unsigned scores_[bm::block_waves];
390 unsigned char models_[bm::block_waves];
391 unsigned mod_size_;
392
393 allocator_type alloc_;
394 size_type* compression_stat_;
395 bool gap_serial_;
396 bool byte_order_serial_;
397
398 bool sb_bookmarks_;
399 unsigned sb_range_;
400
401 bm::word_t* temp_block_;
402 unsigned compression_level_;
403 bool own_temp_block_;
404
405 bool optimize_; ///< flag to optimize the input vector
406 bool free_; ///< flag to free the input vector
407 allocator_pool_type pool_;
408
409 // XOR compression
410 //
411 const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
412 bm::xor_scanner<BV> xor_scan_; ///< scanner for XOR similarity
413 size_type ref_idx_; ///< current reference index
414 bm::word_t* xor_block_; ///< xor product
415
416};
417
418/**
419 Base deserialization class
420 \ingroup bvserial
421 @internal
422*/
423template<typename DEC, typename BLOCK_IDX>
425{
426protected:
427 typedef DEC decoder_type;
428 typedef BLOCK_IDX block_idx_type;
430
431protected:
435
436 /// Read GAP block from the stream
438 unsigned block_type,
439 bm::gap_word_t* dst_block,
440 bm::gap_word_t& gap_head);
441
442 /// Read list of bit ids
443 ///
444 /// @return number of ids
446 unsigned block_type,
447 bm::gap_word_t* dst_arr);
448
449 /// Read binary interpolated list into a bit-set
451
452 /// Read binary interpolated gap blocks into a bitset
454
455 /// Read inverted binary interpolated list into a bit-set
457
458 /// Read digest0-type bit-block
460
461
462 /// read bit-block encoded as runs
463 static
465
466 static
467 const char* err_msg() BMNOEXCEPT { return "BM::Invalid serialization format"; }
468
469 /// Try to skip if skip bookmark is available within reach
470 /// @return new block idx if skip went well
471 ///
474 block_idx_type expect_nb) BMNOEXCEPT;
475
476protected:
477 bm::gap_word_t* id_array_; ///< ptr to idx array for temp decode use
478
479 block_idx_type bookmark_idx_;///< last bookmark block index
480 unsigned skip_offset_; ///< bookmark to skip 256 encoded blocks
481 const unsigned char* skip_pos_; ///< decoder skip position
482};
483
484/**
485 Deserializer for bit-vector
486 \ingroup bvserial
487*/
488template<class BV, class DEC>
490 protected deseriaizer_base<DEC, typename BV::block_idx_type>
491{
492public:
493 typedef BV bvector_type;
495 typedef typename BV::size_type size_type;
500
501public:
504
505 /*! Deserialize bit-vector (equivalent to logical OR)
506 @param bv - target bit-vector
507 @param buf - BLOB memory pointer
508 @param temp_block - temporary buffer [block size] (not used)
509
510 @return number of consumed bytes
511 */
513 const unsigned char* buf,
514 bm::word_t* temp_block = 0);
515
516 // ----------------------------------------------------------------
517 /**
518 Attach collection of reference vectors for XOR de-serialization
519 (no transfer of ownership for the pointer)
520 @internal
521 */
522 void set_ref_vectors(const bv_ref_vector_type* ref_vect);
523
524
525 /**
526 set deserialization range [from, to]
527 This is NOT exact, approximate range, content outside range
528 is not guaranteed to be absent
529 @sa unset_range()
530 */
532 {
533 is_range_set_ = 1; idx_from_ = from; idx_to_ = to;
534 }
535
536 /**
537 Disable range deserialization
538 @sa set_range()
539 */
541
542protected:
543 typedef typename BV::blocks_manager_type blocks_manager_type;
544
545protected:
546 void xor_decode(size_type x_ref_idx, bm::id64_t x_ref_d64,
548 block_idx_type nb);//,
549 //bm::word_t* blk);
550
551 void deserialize_gap(unsigned char btype, decoder_type& dec,
554 bm::word_t* blk);
555 void decode_bit_block(unsigned char btype, decoder_type& dec,
558 bm::word_t* blk);
559
561 bvector_type& bv,
563 bm::word_t* blk);
564
566 bvector_type& bv,
568 bm::word_t* blk);
569
571 bvector_type& bv,
573 bm::word_t* blk);
574
575protected:
576 typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
577 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
578
579protected:
583
586
587 // XOR compression
588 //
589 const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
590 bm::word_t* xor_block_; ///< xor product
592
593 // Range deserialization settings
594 //
598};
599
600
601/**
602 Iterator to walk forward the serialized stream.
603
604 \internal
605 \ingroup bvserial
606*/
607template<class BV, class SerialIterator>
609{
610public:
611 typedef BV bvector_type;
613 typedef SerialIterator serial_iterator_type;
614public:
615
616 /// set deserialization range [from, to]
618
619 /// disable range filtration
620 void unset_range() BMNOEXCEPT { is_range_set_ = false; }
621
624 bm::word_t* temp_block,
626 bool exit_on_one = false);
627
628private:
629 typedef typename BV::blocks_manager_type blocks_manager_type;
630 typedef typename bvector_type::block_idx_type block_idx_type;
631
632 /// load data from the iterator of type "id list"
633 static
634 void load_id_list(bvector_type& bv,
636 unsigned id_count,
637 bool set_clear);
638
639 /// Finalize the deserialization (zero target vector tail or bit-count tail)
640 static
641 size_type finalize_target_vector(blocks_manager_type& bman,
642 set_operation op,
643 size_type bv_block_idx);
644
645 /// Process (obsolete) id-list serialization format
646 static
647 size_type process_id_list(bvector_type& bv,
649 set_operation op);
650 static
651 const char* err_msg() BMNOEXCEPT
652 { return "BM::de-serialization format error"; }
653private:
654 bool is_range_set_ = false;
655 size_type nb_range_from_ = 0;
656 size_type nb_range_to_ = 0;
657};
658
659/*!
660 @brief Serialization stream iterator
661
662 Iterates blocks and control tokens of serialized bit-stream
663 \ingroup bvserial
664 @internal
665*/
666template<class DEC, typename BLOCK_IDX>
667class serial_stream_iterator : protected deseriaizer_base<DEC, BLOCK_IDX>
668{
669public:
671 typedef BLOCK_IDX block_idx_type;
673
674public:
675 serial_stream_iterator(const unsigned char* buf);
677
678 /// serialized bitvector size
679 block_idx_type bv_size() const { return bv_size_; }
680
681 /// Returns true if end of bit-stream reached
682 bool is_eof() const { return end_of_stream_; }
683
684 /// get next block
685 void next();
686
687 /// skip all zero or all-one blocks
689
690 /// read bit block, using logical operation
691 unsigned get_bit_block(bm::word_t* dst_block,
692 bm::word_t* tmp_block,
693 set_operation op);
694
695
696 /// Read gap block data (with head)
697 void get_gap_block(bm::gap_word_t* dst_block);
698
699 /// Return current decoder size
700 unsigned dec_size() const { return decoder_.size(); }
701
702 /// Get low level access to the decoder (use carefully)
704
705 /// iterator is a state machine, this enum encodes
706 /// its key value
707 ///
709 {
711 e_list_ids, ///< plain int array
712 e_blocks, ///< stream of blocks
713 e_zero_blocks, ///< one or more zero bit blocks
714 e_one_blocks, ///< one or more all-1 bit blocks
715 e_bit_block, ///< one bit block
716 e_gap_block ///< one gap block
717
718 };
719
720 /// Returns iterator internal state
721 iterator_state state() const BMNOEXCEPT { return this->state_; }
722
723 iterator_state get_state() const BMNOEXCEPT { return this->state_; }
724 /// Number of ids in the inverted list (valid for e_list_ids)
725 unsigned get_id_count() const BMNOEXCEPT { return this->id_cnt_; }
726
727 /// Get last id from the id list
728 bm::id_t get_id() const BMNOEXCEPT { return this->last_id_; }
729
730 /// Get current block index
731 block_idx_type block_idx() const BMNOEXCEPT { return this->block_idx_; }
732
733public:
734 /// member function pointer for bitset-bitset get operations
735 ///
736 typedef
738 (bm::word_t*,bm::word_t*);
739
740 unsigned
741 get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
742 unsigned
743 get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
744 unsigned
745 get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
746 unsigned
747 get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
748 unsigned
749 get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
750 unsigned
751 get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
752 unsigned
753 get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
754 unsigned
755 get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
756 unsigned
757 get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
758 unsigned
759 get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
760 unsigned
761 get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
762 unsigned
763 get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
764 unsigned
766 {
767 return get_bit_block_COUNT(dst_block, tmp_block);
768 }
769
770 /// Get array of bits out of the decoder into bit block
771 /// (Converts inverted list into bits)
772 /// Returns number of words (bits) being read
773 unsigned get_arr_bit(bm::word_t* dst_block,
774 bool clear_target=true) BMNOEXCEPT;
775
776 /// Get current block type
777 unsigned get_block_type() const BMNOEXCEPT { return block_type_; }
778
779 unsigned get_bit() BMNOEXCEPT;
780
781 void get_inv_arr(bm::word_t* block) BMNOEXCEPT;
782
783 /// Try to skip if skip bookmark is available within reach
784 /// @return true if skip went well
785 ///
787 {
788 block_idx_type new_nb = parent_type::try_skip(decoder_, nb, expect_nb);
789 if (new_nb)
790 {
791 block_idx_ = new_nb; state_ = e_blocks;
792 }
793 return new_nb;
794 }
795
796protected:
798
803 unsigned id_cnt_; ///< Id counter for id list
804 bm::id_t last_id_; ///< Last id from the id list
806
807 unsigned block_type_; ///< current block type
808 block_idx_type block_idx_; ///< current block index
809 block_idx_type mono_block_cnt_; ///< number of 0 or 1 blocks
810
813};
814
815/**
816 Deserializer, performs logical operations between bit-vector and
817 serialized bit-vector. This utility class potentially provides faster
818 and/or more memory efficient operation than more conventional deserialization
819 into memory bvector and then logical operation
820
821 \ingroup bvserial
822*/
823template<typename BV>
825{
826public:
827 typedef BV bvector_type;
828 typedef typename BV::allocator_type allocator_type;
831
832public:
835
836 /*!
837 \brief Deserialize bvector using buffer as set operation argument
838
839 \param bv - target bvector
840 \param buf - serialized buffer used as as a logical operation argument
841 \param op - set algebra operation (default: OR)
842 \param exit_on_one - quick exit if set operation found some result
843
844 \return bitcount (for COUNT_* operations)
845 */
847 const unsigned char* buf,
848 set_operation op,
849 bool exit_on_one = false);
850
851 /*!
852 Deserialize range using mask vector (AND)
853 \param bv - target bvector (should be set ranged)
854 \param buf - serialized buffer pointer
855 \param idx_from - range of bits set for deserialization [from..to]
856 \param idx_to - range of bits [from..to]
857 */
859 const unsigned char* buf,
860 size_type idx_from,
861 size_type idx_to);
862
863
864
865
866 // -----------------------------------------------------------------
867 // obsolete methods (switch to new ones, without team_block)
868 //
869
870 /*!
871 \brief Obsolete! Deserialize bvector using buffer as set operation argument
872
873 \param bv - target bvector
874 \param buf - serialized buffer as a logical argument
875 \param op - set algebra operation (default: OR)
876 \param exit_on_one - quick exit if set operation found some result
877
878 \return bitcount (for COUNT_* operations)
879 */
881 const unsigned char* buf,
882 bm::word_t*, /*temp_block,*/
884 bool exit_on_one = false)
885 { return deserialize(bv, buf, op, exit_on_one); }
886
887 /*!
888 Deserialize range using mask vector (AND)
889 \param bv - target bvector (should be set ranged)
890 \param buf - serialized buffer pointer
891 \param idx_from - range of bits set for deserialization [from..to]
892 \param idx_to - range of bits [from..to]
893 */
895 const unsigned char* buf,
896 bm::word_t*, /* temp_block, */
897 size_type idx_from,
898 size_type idx_to)
899 { deserialize_range(bv, buf, idx_from, idx_to); }
900 // -----------------------------------------------------------------
901
902 /**
903 Attach collection of reference vectors for XOR serialization
904 (no transfer of ownership for the pointer)
905 @internal
906 */
908 { ref_vect_ = ref_vect; }
909
910private:
911 size_type deserialize_xor(bvector_type& bv,
912 const unsigned char* buf,
913 set_operation op,
914 bool exit_on_one);
915 static
916 size_type deserialize_xor(bvector_type& bv,
917 bvector_type& bv_tmp,
918 set_operation op);
919
920 void deserialize_xor_range(bvector_type& bv,
921 const unsigned char* buf,
922 size_type idx_from,
923 size_type idx_to);
924
925private:
926 typedef typename bvector_type::block_idx_type block_idx_type;
927 typedef
928 typename BV::blocks_manager_type blocks_manager_type;
929 typedef
931 typedef
933 typedef
935
936private:
937 allocator_type alloc_;
938 bm::word_t* temp_block_;
939
940 /// default stream iterator (same endian)
942 /// big-endian stream iterator
944 /// little-endian stream iterator
946
947
948 // XOR compression related fields
949 //
950 const bv_ref_vector_type* ref_vect_; ///< xor ref.vector
951
952
953};
954
955
956
957
958//----------------------------------------------------------------------------
959//
960//----------------------------------------------------------------------------
961
962
963/// \internal
964/// \ingroup bvserial
967 BM_HM_RESIZE = (1 << 1), ///< resized vector
968 BM_HM_ID_LIST = (1 << 2), ///< id list stored
969 BM_HM_NO_BO = (1 << 3), ///< no byte-order
970 BM_HM_NO_GAPL = (1 << 4), ///< no GAP levels
971 BM_HM_64_BIT = (1 << 5), ///< 64-bit vector
972 BM_HM_HXOR = (1 << 6) ///< horizontal XOR compression turned ON
974
975
976// --------------------------------------------------------------------
977// Serialization stream encoding constants
978//
979
980const unsigned char set_block_end = 0; //!< End of serialization
981const unsigned char set_block_1zero = 1; //!< One all-zero block
982const unsigned char set_block_1one = 2; //!< One block all-set (1111...)
983const unsigned char set_block_8zero = 3; //!< Up to 256 zero blocks
984const unsigned char set_block_8one = 4; //!< Up to 256 all-set blocks
985const unsigned char set_block_16zero = 5; //!< Up to 65536 zero blocks
986const unsigned char set_block_16one = 6; //!< UP to 65536 all-set blocks
987const unsigned char set_block_32zero = 7; //!< Up to 4G zero blocks
988const unsigned char set_block_32one = 8; //!< UP to 4G all-set blocks
989const unsigned char set_block_azero = 9; //!< All other blocks zero
990const unsigned char set_block_aone = 10; //!< All other blocks one
991const unsigned char set_block_bit = 11; //!< Plain bit block
992const unsigned char set_block_sgapbit = 12; //!< SGAP compressed bitblock
993const unsigned char set_block_sgapgap = 13; //!< SGAP compressed GAP block
994const unsigned char set_block_gap = 14; //!< Plain GAP block
995const unsigned char set_block_gapbit = 15; //!< GAP compressed bitblock
996const unsigned char set_block_arrbit = 16; //!< List of bits ON
997const unsigned char set_block_bit_interval = 17; //!< Interval block
998const unsigned char set_block_arrgap = 18; //!< List of bits ON (GAP block)
999const unsigned char set_block_bit_1bit = 19; //!< Bit block with 1 bit ON
1000const unsigned char set_block_gap_egamma = 20; //!< Gamma compressed GAP block
1001const unsigned char set_block_arrgap_egamma = 21; //!< Gamma compressed delta GAP array
1002const unsigned char set_block_bit_0runs = 22; //!< Bit block with encoded zero intervals
1003const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
1004const unsigned char set_block_arrgap_inv = 24; //!< List of bits OFF (GAP block)
1005const unsigned char set_block_64zero = 25; //!< lots of zero blocks
1006const unsigned char set_block_64one = 26; //!< lots of all-set blocks
1007
1008const unsigned char set_block_gap_bienc = 27; //!< Interpolated GAP block (legacy)
1009const unsigned char set_block_arrgap_bienc = 28; //!< Interpolated GAP array
1010const unsigned char set_block_arrgap_bienc_inv = 29; //!< Interpolated GAP array (inverted)
1011const unsigned char set_block_arrbit_inv = 30; //!< List of bits OFF
1012const unsigned char set_block_arr_bienc = 31; //!< Interpolated block as int array
1013const unsigned char set_block_arr_bienc_inv = 32; //!< Interpolated inverted block int array
1014const unsigned char set_block_bitgap_bienc = 33; //!< Interpolated bit-block as GAPs
1015const unsigned char set_block_bit_digest0 = 34; //!< H-compression with digest mask
1016
1017const unsigned char set_block_ref_eq = 35; //!< block is a copy of a reference block
1018const unsigned char set_block_xor_ref8 = 36; //!< block is masked XOR of a reference block (8-bit)
1019const unsigned char set_block_xor_ref16 = 37; //!< block is masked XOR of a reference block (16-bit)
1020const unsigned char set_block_xor_ref32 = 38; //!< ..... 32-bit (should never happen)
1021const unsigned char set_block_xor_gap_ref8 = 39; //!< ..... 8-bit
1022const unsigned char set_block_xor_gap_ref16 = 40; //!< ..... 16-bit
1023const unsigned char set_block_xor_gap_ref32 = 41; //!< ..... 32-bit (should never happen)
1024const unsigned char set_block_xor_chain = 42; //!< XOR chain (reserved)
1025
1026const unsigned char set_block_gap_bienc_v2 = 43; //!< Interpolated GAP block (v2)
1027const unsigned char set_block_arrgap_bienc_v2 = 44; //!< //!< Interpolated GAP array (v2)
1028const unsigned char set_block_arrgap_bienc_inv_v2 = 45; //!< Interpolated GAP array (inverted)
1029const unsigned char set_block_bitgap_bienc_v2 = 46; //!< Interpolated bit-block as GAPs (v2 - reseved)
1030
1031const unsigned char set_nb_bookmark16 = 47; //<! jump ahead mark (16-bit)
1032const unsigned char set_nb_bookmark24 = 48; //<! jump ahead mark (24-bit)
1033const unsigned char set_nb_bookmark32 = 49; //<! jump ahead mark (32-bit)
1034const unsigned char set_nb_sync_mark8 = 50; //!< bookmark sync point (8-bits)
1035const unsigned char set_nb_sync_mark16 = 51;
1036const unsigned char set_nb_sync_mark24 = 52;
1037const unsigned char set_nb_sync_mark32 = 53;
1038const unsigned char set_nb_sync_mark48 = 54;
1039const unsigned char set_nb_sync_mark64 = 55; //!< ..... 64-bit (should never happen)
1040
1041template<class BV>
1043 bm::word_t* temp_block)
1044: alloc_(alloc),
1045 compression_stat_(0),
1046 gap_serial_(false),
1047 byte_order_serial_(true),
1048 sb_bookmarks_(false),
1049 sb_range_(0),
1050 compression_level_(bm::set_compression_default),
1051 ref_vect_(0),
1052 ref_idx_(0),
1053 xor_block_(0)
1054{
1055 bit_idx_arr_.resize(bm::gap_max_bits);
1056 if (temp_block == 0)
1057 {
1058 temp_block_ = alloc_.alloc_bit_block();
1059 own_temp_block_ = true;
1060 }
1061 else
1062 {
1063 temp_block_ = temp_block;
1064 own_temp_block_ = false;
1065 }
1066 compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1067 optimize_ = free_ = false;
1068}
1069
1070template<class BV>
1072: alloc_(allocator_type()),
1073 compression_stat_(0),
1074 gap_serial_(false),
1075 byte_order_serial_(true),
1076 sb_bookmarks_(false),
1077 sb_range_(0),
1078 compression_level_(bm::set_compression_default),
1079 ref_vect_(0),
1080 ref_idx_(0),
1081 xor_block_(0)
1082{
1083 bit_idx_arr_.resize(bm::gap_max_bits);
1084 if (temp_block == 0)
1085 {
1086 temp_block_ = alloc_.alloc_bit_block();
1087 own_temp_block_ = true;
1088 }
1089 else
1090 {
1091 temp_block_ = temp_block;
1092 own_temp_block_ = false;
1093 }
1094 compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1095 optimize_ = free_ = false;
1096}
1097
1098template<class BV>
1100{
1101 if (own_temp_block_)
1102 alloc_.free_bit_block(temp_block_);
1103 if (compression_stat_)
1104 alloc_.free_bit_block((bm::word_t*)compression_stat_);
1105 if (xor_block_)
1106 alloc_.free_bit_block(xor_block_);
1107}
1108
1109
1110template<class BV>
1112{
1113 for (unsigned i = 0; i < 256; ++i)
1114 compression_stat_[i] = 0;
1115}
1116
1117
1118template<class BV>
1120{
1121 if (clevel <= bm::set_compression_max)
1122 compression_level_ = clevel;
1123}
1124
1125template<class BV>
1127{
1128 gap_serial_ = value;
1129}
1130
1131template<class BV>
1133{
1134 byte_order_serial_ = value;
1135}
1136
1137template<class BV>
1138void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval) BMNOEXCEPT
1139{
1140 sb_bookmarks_ = enable;
1141 if (enable)
1142 {
1143 if (bm_interval > 512)
1144 bm_interval = 512;
1145 else
1146 if (bm_interval < 4)
1147 bm_interval = 4;
1148 }
1149 sb_range_ = bm_interval;
1150}
1151
1152template<class BV>
1154{
1155 ref_vect_ = ref_vect;
1156 xor_scan_.set_ref_vector(ref_vect);
1157 if (!xor_block_)
1158 xor_block_ = alloc_.alloc_bit_block();
1159}
1160
1161template<class BV>
1163{
1164 ref_idx_ = ref_idx;
1165}
1166
1167template<class BV>
1169{
1170 const blocks_manager_type& bman = bv.get_blocks_manager();
1171
1172 unsigned char header_flag = 0;
1173 if (bv.size() == bm::id_max) // no dynamic resize
1174 header_flag |= BM_HM_DEFAULT;
1175 else
1176 header_flag |= BM_HM_RESIZE;
1177
1178 if (!byte_order_serial_)
1179 header_flag |= BM_HM_NO_BO;
1180
1181 if (!gap_serial_)
1182 header_flag |= BM_HM_NO_GAPL;
1183
1184 #ifdef BM64ADDR
1185 header_flag |= BM_HM_64_BIT;
1186 #endif
1187
1188 if (ref_vect_)
1189 {
1190 // TODO: check if XOR search found anything at all
1191 header_flag |= BM_HM_HXOR; // XOR compression turned ON
1192 }
1193
1194 enc.put_8(header_flag);
1195
1196 if (byte_order_serial_)
1197 {
1199 enc.put_8((unsigned char)bo);
1200 }
1201 // keep GAP levels information
1202 if (gap_serial_)
1203 {
1204 enc.put_16(bman.glen(), bm::gap_levels);
1205 }
1206
1207 // save size (only if bvector has been down-sized)
1208 if (header_flag & BM_HM_RESIZE)
1209 {
1210 #ifdef BM64ADDR
1211 enc.put_64(bv.size());
1212 #else
1213 enc.put_32(bv.size());
1214 #endif
1215 }
1216
1217}
1218
1219template<class BV>
1221 const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT
1222{
1223 unsigned len = bm::gap_length(gap_block);
1224 if (len > 4) // BIC encoding
1225 {
1226 encoder::position_type enc_pos0 = enc.get_pos();
1227 BM_ASSERT(gap_block[len-1] == 65535);
1228
1229 bm::gap_word_t head = gap_block[0];
1230 head &= bm::gap_word_t(~(3 << 1)); // clear the level flags
1231 bm::gap_word_t min_v = gap_block[1];
1232 bm::gap_word_t max_v = gap_block[len-2];
1233 bm::gap_word_t tail_delta = bm::gap_word_t(65535 - max_v);
1234
1235 if (min_v < 256)
1236 head |= (1 << 1);
1237 if (tail_delta < 256)
1238 head |= (1 << 2);
1239
1240 enc.put_8(bm::set_block_gap_bienc_v2);
1241 enc.put_16(head);
1242 if (min_v < 256)
1243 enc.put_8((unsigned char)min_v);
1244 else
1245 enc.put_16(min_v);
1246
1247 if (tail_delta < 256)
1248 enc.put_8((unsigned char)tail_delta);
1249 else
1250 enc.put_16(tail_delta);
1251
1252 bit_out_type bout(enc);
1253 bout.bic_encode_u16(&gap_block[2], len-4, min_v, max_v);
1254 bout.flush();
1255
1256 // re-evaluate coding efficiency
1257 //
1258 encoder::position_type enc_pos1 = enc.get_pos();
1259 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1260 if (gamma_size > (len-1)*sizeof(gap_word_t))
1261 {
1262 enc.set_pos(enc_pos0);
1263 }
1264 else
1265 {
1266 compression_stat_[bm::set_block_gap_bienc]++;
1267 return;
1268 }
1269 }
1270
1271 // save as plain GAP block
1272 enc.put_8(bm::set_block_gap);
1273 enc.put_16(gap_block, len-1);
1274
1275 compression_stat_[bm::set_block_gap]++;
1276}
1277
1278
1279template<class BV>
1282{
1283 unsigned len = gap_length(gap_block);
1284 if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding
1285 {
1286 encoder::position_type enc_pos0 = enc.get_pos();
1287 {
1288 bit_out_type bout(enc);
1289 gamma_encoder_func gamma(bout);
1290
1291 enc.put_8(bm::set_block_gap_egamma);
1292 enc.put_16(gap_block[0]);
1293
1294 for_each_dgap(gap_block, gamma);
1295 }
1296 // re-evaluate coding efficiency
1297 //
1298 encoder::position_type enc_pos1 = enc.get_pos();
1299 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1300 if (gamma_size > (len-1)*sizeof(gap_word_t))
1301 {
1302 enc.set_pos(enc_pos0);
1303 }
1304 else
1305 {
1306 compression_stat_[bm::set_block_gap_egamma]++;
1307 return;
1308 }
1309 }
1310
1311 // save as plain GAP block
1312 enc.put_8(bm::set_block_gap);
1313 enc.put_16(gap_block, len-1);
1314
1315 compression_stat_[bm::set_block_gap]++;
1316}
1317
1318template<class BV>
1320 unsigned arr_len,
1321 bm::encoder& enc,
1322 bool inverted) BMNOEXCEPT
1323{
1324 unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv
1326 if (compression_level_ > 3 && arr_len > 1)
1327 {
1328 encoder::position_type enc_pos0 = enc.get_pos();
1329 {
1330 bit_out_type bout(enc);
1331 enc.put_8(scode);
1332 bout.gamma(arr_len);
1333 gap_word_t prev = gap_array[0];
1334 bout.gamma(prev + 1);
1335
1336 for (unsigned i = 1; i < arr_len; ++i)
1337 {
1338 gap_word_t curr = gap_array[i];
1339 bout.gamma(curr - prev);
1340 prev = curr;
1341 }
1342 }
1343 encoder::position_type enc_pos1 = enc.get_pos();
1344 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1345 unsigned plain_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1346 if (gamma_size >= plain_size)
1347 {
1348 enc.set_pos(enc_pos0); // rollback the bit stream
1349 }
1350 else
1351 {
1352 compression_stat_[scode]++;
1353 return;
1354 }
1355 }
1356 // save as a plain array
1358 enc.put_prefixed_array_16(scode, gap_array, arr_len, true);
1359 compression_stat_[scode]++;
1360}
1361
1362
1363template<class BV>
1365 const bm::gap_word_t* gap_block,
1366 unsigned arr_len,
1367 bm::encoder& enc,
1368 bool inverted) BMNOEXCEPT
1369{
1370 BM_ASSERT(arr_len <= 65535);
1371 unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv
1373 if (arr_len > 4)
1374 {
1375 encoder::position_type enc_pos0 = enc.get_pos();
1376 {
1377 bit_out_type bout(enc);
1378
1379 bm::gap_word_t min_v = gap_block[0];
1380 bm::gap_word_t max_v = gap_block[arr_len-1];
1381 BM_ASSERT(max_v > min_v);
1382
1383 enc.put_8(scode);
1384 enc.put_16(min_v);
1385 enc.put_16(max_v);
1386
1387 bout.gamma(arr_len-4);
1388 bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1389 bout.flush();
1390 }
1391 encoder::position_type enc_pos1 = enc.get_pos();
1392 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1393 unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1394 if (enc_size >= raw_size)
1395 {
1396 enc.set_pos(enc_pos0); // rollback the bit stream
1397 }
1398 else
1399 {
1400 compression_stat_[scode]++;
1401 return;
1402 }
1403 }
1404 // save as a plain array
1406 enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1407 compression_stat_[scode]++;
1408}
1409
1410
1411template<class BV>
1413 unsigned arr_len,
1414 bm::encoder& enc,
1415 bool inverted) BMNOEXCEPT
1416{
1417 BM_ASSERT(arr_len <= 65535);
1418
1419 unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv_v2
1421 if (arr_len > 4)
1422 {
1423 bm::gap_word_t min_v = gap_block[0];
1424 bm::gap_word_t max_v = gap_block[arr_len-1];
1425 bm::gap_word_t tail = bm::gap_word_t(max_v - min_v);
1426
1427 if (min_v >= 256 && tail >= 256)// || arr_len > 128)
1428 {
1429 interpolated_gap_array_v0(gap_block, arr_len, enc, inverted);
1430 return;
1431 }
1432
1433 BM_ASSERT(arr_len < 16383);
1434 encoder::position_type enc_pos0 = enc.get_pos();
1435 {
1436 bit_out_type bout(enc);
1437
1438 BM_ASSERT(max_v > min_v);
1439
1440 enc.put_8(scode);
1441
1442 BM_ASSERT((arr_len & (3 << 14)) == 0);
1443 arr_len <<= 2;
1444 if (min_v < 256)
1445 arr_len |= 1;
1446 if (tail < 256)
1447 arr_len |= (1 << 1);
1448
1449 enc.put_16(bm::gap_word_t(arr_len));
1450 if (min_v < 256)
1451 enc.put_8((unsigned char)min_v);
1452 else
1453 enc.put_16(min_v);
1454
1455 if (tail < 256)
1456 enc.put_8((unsigned char)tail);
1457 else
1458 enc.put_16(tail);
1459
1460 arr_len >>= 2;
1461
1462 bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1463 bout.flush();
1464 }
1465 encoder::position_type enc_pos1 = enc.get_pos();
1466 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1467 unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1468 if (enc_size >= raw_size)
1469 {
1470 enc.set_pos(enc_pos0); // rollback the bit stream
1471 }
1472 else
1473 {
1474 compression_stat_[scode]++;
1475 return;
1476 }
1477 }
1478 // save as a plain array
1480 enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1481 compression_stat_[scode]++;
1482}
1483
1484
1485
1486template<class BV>
1487void serializer<BV>::add_model(unsigned char mod, unsigned score) BMNOEXCEPT
1488{
1489 BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?)
1490 scores_[mod_size_] = score; models_[mod_size_] = mod;
1491 ++mod_size_;
1492}
1493
1494template<class BV>
1495unsigned char
1497{
1498 unsigned bc, bit_gaps;
1499
1500 add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1501
1502 bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1503 add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1504
1505 bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1506 if (!d0)
1507 {
1508 add_model(bm::set_block_azero, 0);
1509 return bm::set_block_azero;
1510 }
1511 unsigned d0_bc = word_bitcount64(d0);
1512 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1513 if (d0 != ~0ull)
1514 add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1515
1516
1517 bm::bit_block_change_bc(block, &bit_gaps, &bc);
1518 BM_ASSERT(bm::bit_block_count(block) == bc);
1519 BM_ASSERT(bm::bit_block_calc_change(block) == bit_gaps);
1520
1521 if (bc == 1)
1522 {
1523 add_model(bm::set_block_bit_1bit, 16);
1525 }
1526 unsigned inverted_bc = bm::gap_max_bits - bc;
1527 if (!inverted_bc)
1528 {
1529 add_model(bm::set_block_aone, 0);
1530 return bm::set_block_aone;
1531 }
1532 unsigned arr_size =
1533 unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1534 unsigned arr_size_inv =
1535 unsigned(sizeof(gap_word_t) + (inverted_bc * sizeof(gap_word_t)));
1536
1537 add_model(bm::set_block_arrbit, arr_size*8);
1538 add_model(bm::set_block_arrbit_inv, arr_size_inv*8);
1539 const unsigned bie_bits_per_int = 4;
1540
1541 if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1542 add_model(bm::set_block_gap_bienc,
1543 32 + (bit_gaps-1) * bie_bits_per_int);
1544 if (bc < bit_gaps && bc < bm::gap_equiv_len)
1545 add_model(bm::set_block_arrgap_bienc, 16*3 + bc*bie_bits_per_int);
1546 else
1547 if (inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1548 add_model(bm::set_block_arrgap_bienc_inv, 16*3 + inverted_bc*bie_bits_per_int);
1549 else
1550 if (bc >= bm::gap_equiv_len && bc < bie_cut_off)
1551 add_model(bm::set_block_arr_bienc, 16*3 + bc * bie_bits_per_int);
1552 else
1553 if (inverted_bc > 3 && inverted_bc >= bm::gap_equiv_len && inverted_bc < bie_cut_off)
1554 add_model(bm::set_block_arr_bienc_inv, 16*3 + inverted_bc * bie_bits_per_int);
1555
1556 if (bit_gaps >= bm::gap_max_buff_len && bit_gaps < bie_cut_off)
1557 add_model(bm::set_block_bitgap_bienc, 16*4 + (bit_gaps-2) * bie_bits_per_int);
1558 else
1559 {
1560 if (bit_gaps < bm::gap_max_buff_len) // GAP block
1561 {
1562 bit_gaps -= bit_gaps > 2 ? 2 : 0;
1563 add_model(bm::set_block_bitgap_bienc, 16*4 + bit_gaps * bie_bits_per_int);
1564 }
1565 }
1566
1567 // find the best representation based on computed approx.models
1568 //
1569 unsigned min_score = bm::gap_max_bits;
1570 unsigned char model = bm::set_block_bit;
1571 for (unsigned i = 0; i < mod_size_; ++i)
1572 {
1573 if (scores_[i] < min_score)
1574 {
1575 min_score = scores_[i];
1576 model = models_[i];
1577 }
1578 }
1579 return model;
1580}
1581
1582template<class BV>
1583unsigned char
1585{
1586 reset_models();
1587
1588 if (compression_level_ >= 5)
1589 return find_bit_best_encoding_l5(block);
1590
1591 unsigned bc, bit_gaps;
1592
1593 // heuristics and hard-coded rules to determine
1594 // the best representation for bit-block
1595 //
1596 add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1597
1598 if (compression_level_ <= 1)
1599 return bm::set_block_bit;
1600
1601 // check if it is a very sparse block with some areas of dense areas
1602 bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1603 if (compression_level_ <= 5)
1604 add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1605
1606 if (compression_level_ >= 2)
1607 {
1608 bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1609 if (!d0)
1610 {
1611 add_model(bm::set_block_azero, 0);
1612 return bm::set_block_azero;
1613 }
1614 unsigned d0_bc = word_bitcount64(d0);
1615 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1616 if (d0 != ~0ull)
1617 add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1618
1619 if (compression_level_ >= 4)
1620 {
1621 bm::bit_block_change_bc(block, &bit_gaps, &bc);
1622 }
1623 else
1624 {
1625 bc = bm::bit_block_count(block);
1626 bit_gaps = 65535;
1627 }
1628 BM_ASSERT(bc);
1629
1630 if (bc == 1)
1631 {
1632 add_model(bm::set_block_bit_1bit, 16);
1634 }
1635 unsigned inverted_bc = bm::gap_max_bits - bc;
1636 if (!inverted_bc)
1637 {
1638 add_model(bm::set_block_aone, 0);
1639 return bm::set_block_aone;
1640 }
1641
1642 if (compression_level_ >= 3)
1643 {
1644 unsigned arr_size =
1645 unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1646 unsigned arr_size_inv =
1647 unsigned(sizeof(gap_word_t) + (inverted_bc * sizeof(gap_word_t)));
1648
1649 add_model(bm::set_block_arrbit, arr_size*8);
1650 add_model(bm::set_block_arrbit_inv, arr_size_inv*8);
1651
1652 if (compression_level_ >= 4)
1653 {
1654 const unsigned gamma_bits_per_int = 6;
1655 //unsigned bit_gaps = bm::bit_block_calc_change(block);
1656
1657 if (compression_level_ == 4)
1658 {
1659 if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1660 add_model(bm::set_block_gap_egamma,
1661 16 + (bit_gaps-1) * gamma_bits_per_int);
1662 if (bc < bit_gaps && bc < bm::gap_equiv_len)
1664 16 + bc * gamma_bits_per_int);
1665 if (inverted_bc > 3 && inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1667 16 + inverted_bc * gamma_bits_per_int);
1668 }
1669 } // level >= 3
1670 } // level >= 3
1671 } // level >= 2
1672
1673 // find the best representation based on computed approx.models
1674 //
1675 unsigned min_score = bm::gap_max_bits;
1676 unsigned char model = bm::set_block_bit;
1677 for (unsigned i = 0; i < mod_size_; ++i)
1678 {
1679 if (scores_[i] < min_score)
1680 {
1681 min_score = scores_[i];
1682 model = models_[i];
1683 }
1684 }
1685 return model;
1686}
1687
1688template<class BV>
1689unsigned char
1691{
1692 // heuristics and hard-coded rules to determine
1693 // the best representation for d-GAP block
1694 //
1695 if (compression_level_ <= 2)
1696 return bm::set_block_gap;
1697 unsigned len = bm::gap_length(gap_block);
1698 if (len == 2)
1699 return bm::set_block_gap;
1700 unsigned bc = bm::gap_bit_count_unr(gap_block);
1701 if (bc == 1)
1703 if (bc < len)
1704 {
1705 if (compression_level_ < 4)
1706 return bm::set_block_arrgap;
1707 if (compression_level_ == 4)
1710 }
1711 unsigned inverted_bc = bm::gap_max_bits - bc;
1712 if (inverted_bc < len)
1713 {
1714 if (compression_level_ < 4)
1716 if (compression_level_ == 4)
1719 }
1720 if (len < 6)
1721 {
1722 return bm::set_block_gap;
1723 }
1724
1725 if (compression_level_ == 4)
1728}
1729
1730
1731
1732
1733template<class BV>
1735{
1736 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
1737
1738 gap_word_t arr_len;
1739 bool invert = false;
1740
1741 unsigned char enc_choice = find_gap_best_encoding(gap_block);
1742 switch (enc_choice)
1743 {
1744 case bm::set_block_gap:
1745 gamma_gap_block(gap_block, enc); // TODO: use plain encode (non-gamma)
1746 break;
1747
1749 arr_len = bm::gap_convert_to_arr(gap_temp_block,
1750 gap_block,
1752 BM_ASSERT(arr_len == 1);
1754 enc.put_16(gap_temp_block[0]);
1755 compression_stat_[bm::set_block_bit_1bit]++;
1756 break;
1759 invert = true;
1761 // fall through
1764 // fall through
1766 arr_len = gap_convert_to_arr(gap_temp_block,
1767 gap_block,
1769 invert);
1770 BM_ASSERT(arr_len);
1771 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
1772 break;
1774 interpolated_encode_gap_block(gap_block, enc);
1775 break;
1777 invert = true;
1779 // fall through
1781 arr_len = gap_convert_to_arr(gap_temp_block,
1782 gap_block,
1784 invert);
1785 BM_ASSERT(arr_len);
1786 interpolated_gap_array(gap_temp_block, arr_len, enc, invert);
1787 break;
1788 default:
1789 gamma_gap_block(gap_block, enc);
1790 } // switch
1791}
1792
1793template<class BV>
1795 bm::encoder& enc,
1796 unsigned //size_control
1797 ) BMNOEXCEPT
1798{
1799 enc.put_8(bm::set_block_bit_0runs);
1800 enc.put_8((blk[0]==0) ? 0 : 1); // encode start
1801
1802 unsigned i, j;
1803 for (i = 0; i < bm::set_block_size; ++i)
1804 {
1805 if (blk[i] == 0)
1806 {
1807 // scan fwd to find 0 island length
1808 for (j = i+1; j < bm::set_block_size; ++j)
1809 {
1810 if (blk[j] != 0)
1811 break;
1812 }
1813 BM_ASSERT(j-i);
1814 enc.put_16((gap_word_t)(j-i));
1815 i = j - 1;
1816 }
1817 else
1818 {
1819 // scan fwd to find non-0 island length
1820 for (j = i+1; j < bm::set_block_size; ++j)
1821 {
1822 if (blk[j] == 0)
1823 {
1824 // look ahead to identify and ignore short 0-run
1825 if (((j+1 < bm::set_block_size) && blk[j+1]) ||
1826 ((j+2 < bm::set_block_size) && blk[j+2]))
1827 {
1828 ++j; // skip zero word
1829 continue;
1830 }
1831 break;
1832 }
1833 }
1834 BM_ASSERT(j-i);
1835 enc.put_16((gap_word_t)(j-i));
1836 enc.put_32(blk + i, j - i); // stream all bit-words now
1837
1838 i = j - 1;
1839 }
1840 }
1841 compression_stat_[bm::set_block_bit_0runs]++;
1842}
1843
1844
1845template<class BV>
1847 bm::encoder& enc,
1849{
1850 // evaluate a few "sure" models here and pick the best
1851 //
1852 if (d0 != ~0ull)
1853 {
1854 if (bit_model_0run_size_ < bit_model_d0_size_)
1855 {
1856 encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
1857 return;
1858 }
1859
1860 // encode using digest0 method
1861 //
1862 enc.put_8(bm::set_block_bit_digest0);
1863 enc.put_64(d0);
1864
1865 while (d0)
1866 {
1867 bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
1868
1869 unsigned wave = bm::word_bitcount64(t - 1);
1870 unsigned off = wave * bm::set_block_digest_wave_size;
1871
1872 unsigned j = 0;
1873 do
1874 {
1875 enc.put_32(block[off+j+0]);
1876 enc.put_32(block[off+j+1]);
1877 enc.put_32(block[off+j+2]);
1878 enc.put_32(block[off+j+3]);
1879 j += 4;
1880 } while (j < bm::set_block_digest_wave_size);
1881
1882 d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
1883 } // while
1884
1885 compression_stat_[bm::set_block_bit_digest0]++;
1886 }
1887 else
1888 {
1889 if (bit_model_0run_size_ < unsigned(bm::set_block_size*sizeof(bm::word_t)))
1890 {
1891 encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
1892 return;
1893 }
1894
1895 enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
1896 compression_stat_[bm::set_block_bit]++;
1897 }
1898}
1899
1900
1901
1902template<class BV>
1904 typename serializer<BV>::buffer& buf,
1905 const statistics_type* bv_stat)
1906{
1907 statistics_type stat;
1908 if (!bv_stat)
1909 {
1910 bv.calc_stat(&stat);
1911 bv_stat = &stat;
1912 }
1913
1914 buf.resize(bv_stat->max_serialize_mem, false); // no-copy resize
1915 optimize_ = free_ = false;
1916
1917 size_type slen = this->serialize(bv, buf.data(), buf.size());
1918 BM_ASSERT(slen <= buf.size()); // or we have a BIG problem with prediction
1919 BM_ASSERT(slen);
1920
1921 buf.resize(slen);
1922}
1923
1924template<class BV>
1926 typename serializer<BV>::buffer& buf)
1927{
1928 statistics_type st;
1929 optimize_ = true;
1930 if (!ref_vect_) // do not use block-free if XOR compression is setup
1931 free_ = true; // set the destructive mode
1932
1933 typename bvector_type::mem_pool_guard mp_g_z;
1934 mp_g_z.assign_if_not_set(pool_, bv);
1935
1936 bv.optimize(temp_block_, BV::opt_compress, &st);
1937 serialize(bv, buf, &st);
1938
1939 optimize_ = free_ = false; // restore the default mode
1940}
1941
1942template<class BV>
1944 bm::encoder& enc,
1945 bool inverted) BMNOEXCEPT
1946{
1947 unsigned arr_len;
1948 unsigned mask = inverted ? ~0u : 0u;
1949 // TODO: get rid of max bits
1950 arr_len = bm::bit_convert_to_arr(bit_idx_arr_.data(),
1951 block,
1954 mask);
1955 if (arr_len)
1956 {
1957 unsigned char scode =
1959 enc.put_prefixed_array_16(scode, bit_idx_arr_.data(), arr_len, true);
1960 compression_stat_[scode]++;
1961 return;
1962 }
1963 encode_bit_digest(block, enc, digest0_);
1964}
1965
1966template<class BV>
1969{
1970 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len);
1971 BM_ASSERT(len); (void)len;
1972 gamma_gap_block(bit_idx_arr_.data(), enc);
1973}
1974
1975template<class BV>
1977 bm::encoder& enc,
1978 bool inverted) BMNOEXCEPT
1979{
1980 unsigned mask = inverted ? ~0u : 0u;
1981 unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
1982 block,
1985 mask);
1986 if (arr_len)
1987 {
1988 gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
1989 return;
1990 }
1991 enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
1992 compression_stat_[bm::set_block_bit]++;
1993}
1994
1995template<class BV>
1997 bm::encoder& enc,
1998 bool inverted) BMNOEXCEPT
1999{
2000 unsigned mask = inverted ? ~0u : 0u;
2001 unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
2002 block,
2005 mask);
2006 if (arr_len)
2007 {
2008 interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2009 return;
2010 }
2011 encode_bit_digest(block, enc, digest0_);
2012}
2013
2014template<class BV>
2017{
2018 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2019 BM_ASSERT(len); (void)len;
2020 interpolated_encode_gap_block(bit_idx_arr_.data(), enc);
2021}
2022
2023
2024template<class BV>
2027{
2028 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2029 BM_ASSERT(len); (void)len;
2030 BM_ASSERT(len <= bie_cut_off);
2031
2032 const unsigned char scode = bm::set_block_bitgap_bienc;
2033
2034 encoder::position_type enc_pos0 = enc.get_pos();
2035 {
2036 bit_out_type bout(enc);
2037
2038 bm::gap_word_t head = (bit_idx_arr_[0] & 1); // isolate start flag
2039 bm::gap_word_t min_v = bit_idx_arr_[1];
2040
2041 BM_ASSERT(bit_idx_arr_[len] == 65535);
2042 BM_ASSERT(bit_idx_arr_[len] > min_v);
2043
2044 enc.put_8(scode);
2045
2046 enc.put_8((unsigned char)head);
2047 enc.put_16(bm::gap_word_t(len));
2048 enc.put_16(min_v);
2049 bout.bic_encode_u16(&bit_idx_arr_[2], len-2, min_v, 65535);
2050 bout.flush();
2051 }
2052 encoder::position_type enc_pos1 = enc.get_pos();
2053 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2054 unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2055 if (enc_size >= raw_size)
2056 {
2057 enc.set_pos(enc_pos0); // rollback the bit stream
2058 }
2059 else
2060 {
2061 compression_stat_[scode]++;
2062 return;
2063 }
2064 encode_bit_digest(block, enc, digest0_);
2065}
2066
2067
2068
2069
2070
2071template<class BV>
2072void
2074 bm::encoder& enc,
2075 bool inverted) BMNOEXCEPT
2076{
2077 unsigned mask = inverted ? ~0u : 0u;
2078 unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
2079 block,
2082 mask);
2083 if (arr_len)
2084 {
2085 unsigned char scode =
2087
2088 encoder::position_type enc_pos0 = enc.get_pos();
2089 {
2090 bit_out_type bout(enc);
2091
2092 bm::gap_word_t min_v = bit_idx_arr_[0];
2093 bm::gap_word_t max_v = bit_idx_arr_[arr_len-1];
2094 BM_ASSERT(max_v > min_v);
2095
2096 enc.put_8(scode);
2097 enc.put_16(min_v);
2098 enc.put_16(max_v);
2099 enc.put_16(bm::gap_word_t(arr_len));
2100 bout.bic_encode_u16(&bit_idx_arr_[1], arr_len-2, min_v, max_v);
2101 bout.flush();
2102 }
2103 encoder::position_type enc_pos1 = enc.get_pos();
2104 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2105 unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2106 if (enc_size >= raw_size)
2107 {
2108 enc.set_pos(enc_pos0); // rollback the bit stream
2109 }
2110 else
2111 {
2112 if (digest0_ != ~0ull && enc_size > bit_model_d0_size_)
2113 {
2114 enc.set_pos(enc_pos0); // rollback the bit stream
2115 }
2116 else
2117 {
2118 compression_stat_[scode]++;
2119 return;
2120 }
2121 }
2122 }
2123 encode_bit_digest(block, enc, digest0_);
2124}
2125
2126
2127
2128#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \
2129 if (nb == 1u) \
2130 enc.put_8(B_1ZERO); \
2131 else if (nb < 256u) \
2132 { \
2133 enc.put_8(B_8ZERO); \
2134 enc.put_8((unsigned char)nb); \
2135 } \
2136 else if (nb < 65536u) \
2137 { \
2138 enc.put_8(B_16ZERO); \
2139 enc.put_16((unsigned short)nb); \
2140 } \
2141 else if (nb < bm::id_max32) \
2142 { \
2143 enc.put_8(B_32ZERO); \
2144 enc.put_32(unsigned(nb)); \
2145 } \
2146 else \
2147 {\
2148 enc.put_8(B_64ZERO); \
2149 enc.put_64(nb); \
2150 }
2151
2152
2153template<class BV>
2155 bookmark_state& bookm,
2157{
2158 BM_ASSERT(bookm.nb_range_);
2159
2160 block_idx_type nb_delta = nb - bookm.nb_;
2161 if (bookm.ptr_ && nb_delta >= bookm.nb_range_)
2162 {
2163 unsigned char* curr = enc.get_pos();
2164 size_t bytes_delta = size_t(curr - bookm.ptr_);
2165 if (bytes_delta > bookm.min_bytes_range_)
2166 {
2167 enc.set_pos(bookm.ptr_); // rewind back and save the skip
2168 switch (bookm.bm_type_)
2169 {
2170 case 0: // 32-bit mark
2171 bytes_delta -= sizeof(unsigned);
2172 if (bytes_delta < 0xFFFFFFFF) // range overflow check
2173 enc.put_32(unsigned(bytes_delta));
2174 // if range is somehow off, bookmark remains 0 (NULL)
2175 break;
2176 case 1: // 24-bit mark
2177 bytes_delta -= (sizeof(unsigned)-1);
2178 if (bytes_delta < 0xFFFFFF)
2179 enc.put_24(unsigned(bytes_delta));
2180 break;
2181 case 2: // 16-bit mark
2182 bytes_delta -= sizeof(unsigned short);
2183 if (bytes_delta < 0xFFFF)
2184 enc.put_16((unsigned short)bytes_delta);
2185 break;
2186 default:
2187 BM_ASSERT(0);
2188 break;
2189 } // switch
2190
2191 enc.set_pos(curr); // restore and save the sync mark
2192
2193 if (nb_delta < 0xFF)
2194 {
2195 enc.put_8(set_nb_sync_mark8);
2196 enc.put_8((unsigned char) nb_delta);
2197 }
2198 else
2199 if (nb_delta < 0xFFFF)
2200 {
2201 enc.put_8(set_nb_sync_mark16);
2202 enc.put_16((unsigned short) nb_delta);
2203 }
2204 else
2205 if (nb_delta < 0xFFFFFF)
2206 {
2207 enc.put_8(set_nb_sync_mark24);
2208 enc.put_24(unsigned(nb_delta));
2209 }
2210 else
2211 if (nb_delta < ~0U)
2212 {
2213 enc.put_8(set_nb_sync_mark32);
2214 enc.put_32(unsigned(nb_delta));
2215 }
2216 else
2217 {
2218 #ifdef BM64ADDR
2219 if (nb_delta < 0xFFFFFFFFFFFFUL)
2220 {
2221 enc.put_8(set_nb_sync_mark48);
2222 enc.put_48(nb_delta);
2223 }
2224 else
2225 {
2226 enc.put_8(set_nb_sync_mark64);
2227 enc.put_64(nb_delta);
2228 }
2229 #endif
2230 }
2231 bookm.ptr_ = 0;
2232 }
2233 }
2234
2235 if (!bookm.ptr_) // start new bookmark
2236 {
2237 // bookmarks use VBR to save offset
2238 bookm.nb_ = nb;
2239 bookm.ptr_ = enc.get_pos() + 1;
2240 switch (bookm.bm_type_)
2241 {
2242 case 0: // 32-bit mark
2243 enc.put_8(bm::set_nb_bookmark32);
2244 enc.put_32(0); // put NULL mark at first
2245 break;
2246 case 1: // 24-bit mark
2247 enc.put_8(bm::set_nb_bookmark24);
2248 enc.put_24(0);
2249 break;
2250 case 2: // 16-bit mark
2251 enc.put_8(bm::set_nb_bookmark16);
2252 enc.put_16(0);
2253 break;
2254 default:
2255 BM_ASSERT(0);
2256 break;
2257 } // switch
2258 }
2259}
2260
2261
2262template<class BV>
2265 unsigned char* buf, size_t buf_size)
2266{
2267 BM_ASSERT(temp_block_);
2268
2269 reset_compression_stats();
2270 const blocks_manager_type& bman = bv.get_blocks_manager();
2271
2272 bm::encoder enc(buf, buf_size); // create the encoder
2273 encode_header(bv, enc);
2274
2275 bookmark_state sb_bookmark(sb_range_);
2276
2277 block_idx_type i, j;
2278 for (i = 0; i < bm::set_total_blocks; ++i)
2279 {
2280 unsigned i0, j0;
2281 bm::get_block_coord(i, i0, j0);
2282
2283 // ----------------------------------------------------
2284 // bookmark the stream to allow fast skip on decode
2285 //
2286 if (sb_bookmarks_)
2287 {
2288 process_bookmark(i, sb_bookmark, enc);
2289 }
2290
2291 const bm::word_t* blk = bman.get_block(i0, j0);
2292
2293 // ----------------------------------------------------
2294 // Empty or ONE block serialization
2295 //
2296 // TODO: make a function to check this in ONE pass
2297 //
2298 bool flag;
2299 flag = bm::check_block_zero(blk, false/*shallow check*/);
2300 if (flag)
2301 {
2302 zero_block:
2303 flag = 1;
2304 block_idx_type next_nb = bman.find_next_nz_block(i+1, false);
2305 if (next_nb == bm::set_total_blocks) // no more blocks
2306 {
2308 return (size_type)enc.size();
2309 }
2310 block_idx_type nb = next_nb - i;
2311
2312 if (nb > 1 && nb < 128)
2313 {
2314 // special (but frequent) case -- GAP delta fits in 7bits
2315 unsigned char c = (unsigned char)((1u << 7) | nb);
2316 enc.put_8(c);
2317 }
2318 else
2319 {
2325 }
2326 i = next_nb - 1;
2327 continue;
2328 }
2329 else
2330 {
2331 flag = bm::check_block_one(blk, false); // shallow scan
2332 if (flag)
2333 {
2334 full_block:
2335 flag = 1;
2336 // Look ahead for similar blocks
2337 for(j = i+1; j < bm::set_total_blocks; ++j)
2338 {
2339 bm::get_block_coord(j, i0, j0);
2340 if (!j0) // look ahead if the whole superblock is 0xFF
2341 {
2342 bm::word_t*** blk_root = bman.top_blocks_root();
2343 if ((bm::word_t*)blk_root[i0] == FULL_BLOCK_FAKE_ADDR)
2344 {
2345 j += 255;
2346 continue;
2347 }
2348 }
2349 const bm::word_t* blk_next = bman.get_block(i0, j0);
2350 if (flag != bm::check_block_one(blk_next, true)) // deep scan
2351 break;
2352 } // for j
2353
2354 // TODO: improve this condition, because last block is always
2355 // has 0 at the end, thus this never happen in practice
2356 if (j == bm::set_total_blocks)
2357 {
2358 enc.put_8(set_block_aone);
2359 break;
2360 }
2361 else
2362 {
2363 block_idx_type nb = j - i;
2369 }
2370 i = j - 1;
2371 continue;
2372 }
2373 }
2374
2375 // --------------------------------------------------
2376 // GAP serialization
2377 //
2378 if (BM_IS_GAP(blk))
2379 {
2380 if (ref_vect_) // XOR filter
2381 {
2382 bool found = xor_scan_.search_best_xor_gap(blk,
2383 ref_idx_+1, ref_vect_->size(),
2384 i0, j0);
2385 if (found)
2386 {
2387 const bm::gap_word_t* gap_block = BMGAP_PTR(blk);
2388 unsigned glen = bm::gap_length(gap_block)-1;
2389
2390 const bm::gap_word_t* gap_xor_block =
2391 (const bm::gap_word_t*) xor_scan_.get_found_block();
2392 size_type ridx = xor_scan_.found_ridx();
2393 size_type plain_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2394
2395 bm::gap_word_t* tmp_buf = (bm::gap_word_t*)xor_block_;
2396
2397 unsigned res_len;
2398 bm::gap_operation_xor(gap_block, gap_xor_block, tmp_buf, res_len);
2399
2400 BM_ASSERT(res_len <= glen);
2401 BM_ASSERT(1+res_len == bm::gap_length(tmp_buf));
2402 unsigned delta = glen - res_len;
2403 if (delta > 2)
2404 {
2405 if (plain_idx < 256)
2406 {
2408 enc.put_8((unsigned char) plain_idx);
2409 }
2410 else
2411 {
2412 if (plain_idx < 65536)
2413 {
2415 enc.put_16((unsigned short) plain_idx);
2416 }
2417 else
2418 {
2420 enc.put_32(unsigned(plain_idx));
2421 }
2422 }
2423 encode_gap_block(tmp_buf, enc);
2424
2425 compression_stat_[bm::set_block_xor_gap_ref32]++;
2426 continue;
2427 }
2428 }
2429 }
2430
2431 encode_gap_block(BMGAP_PTR(blk), enc);
2432 }
2433 else
2434 {
2435 if (ref_vect_) // XOR filter
2436 {
2437 xor_scan_.compute_x_block_stats(blk);
2438 bool found =
2439 xor_scan_.search_best_xor_mask(blk,
2440 ref_idx_+1, ref_vect_->size(),
2441 i0, j0,
2442 xor_block_);
2443 if (found)
2444 {
2445 size_type ridx = xor_scan_.found_ridx();
2446 if (xor_scan_.is_eq_found()) // golden! (found a copy)
2447 {
2448 size_type row_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2450 enc.put_32(unsigned(row_idx));
2451 compression_stat_[bm::set_block_ref_eq]++;
2452 continue;
2453 }
2454 found = xor_scan_.validate_found(xor_block_, blk);
2455 if (found)
2456 {
2457 bm::id64_t d64 = xor_scan_.get_xor_digest();
2458 BM_ASSERT(d64);
2459 size_type plain_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2460
2461 if (plain_idx < 256)
2462 {
2464 enc.put_8((unsigned char) plain_idx);
2465 }
2466 else
2467 {
2468 if (plain_idx < 65536)
2469 {
2471 enc.put_16((unsigned short) plain_idx);
2472 }
2473 else
2474 {
2476 enc.put_32(unsigned(plain_idx));
2477 }
2478 }
2479 enc.put_64(d64); // xor digest
2480 compression_stat_[bm::set_block_xor_ref32]++;
2481 blk = xor_block_; // substitute block with XOR product
2482 }
2483 } // if xor found
2484
2485 }
2486
2487 // ----------------------------------------------
2488 // BIT BLOCK serialization
2489 //
2490
2491 unsigned char model = find_bit_best_encoding(blk);
2492 switch (model)
2493 {
2494 case bm::set_block_bit:
2496 break;
2498 {
2499 unsigned bit_idx = 0;
2500 bm::bit_block_find(blk, bit_idx, &bit_idx);
2501 BM_ASSERT(bit_idx < 65536);
2502 enc.put_8(bm::set_block_bit_1bit); enc.put_16(bm::short_t(bit_idx));
2503 compression_stat_[bm::set_block_bit_1bit]++;
2504 continue;
2505 }
2506 break;
2507 case bm::set_block_azero: // empty block all of the sudden ?
2508 goto zero_block;
2509 case bm::set_block_aone:
2510 goto full_block;
2512 encode_bit_array(blk, enc, false);
2513 break;
2515 encode_bit_array(blk, enc, true);
2516 break;
2518 gamma_gap_bit_block(blk, enc);
2519 break;
2521 encode_bit_interval(blk, enc, 0); // TODO: get rid of param 3 (0)
2522 break;
2524 gamma_arr_bit_block(blk, enc, false);
2525 break;
2527 gamma_arr_bit_block(blk, enc, true);
2528 break;
2530 bienc_arr_bit_block(blk, enc, false);
2531 break;
2533 bienc_arr_bit_block(blk, enc, true);
2534 break;
2536 interpolated_arr_bit_block(blk, enc, false);
2537 break;
2539 interpolated_arr_bit_block(blk, enc, true); // inverted
2540 break;
2542 interpolated_gap_bit_block(blk, enc);
2543 break;
2545 bienc_gap_bit_block(blk, enc);
2546 break;
2548 encode_bit_digest(blk, enc, digest0_);
2549 break;
2550 default:
2551 BM_ASSERT(0); // predictor returned an unknown model
2553 }
2554 } // bit-block processing
2555
2556 // destructive serialization mode
2557 //
2558 if (free_)
2559 {
2560 // const cast is ok, because it is a requested mode
2561 const_cast<blocks_manager_type&>(bman).zero_block(i);
2562 }
2563
2564 } // for i
2565
2566 enc.put_8(set_block_end);
2567 return (size_type)enc.size();
2568}
2569
2570
2571
2572/// Bit mask flags for serialization algorithm
2573/// \ingroup bvserial
2575 BM_NO_BYTE_ORDER = 1, ///< save no byte-order info (save some space)
2576 BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
2578
2579/*!
2580 \brief Saves bitvector into memory.
2581
2582 Function serializes content of the bitvector into memory.
2583 Serialization adaptively uses compression(variation of GAP encoding)
2584 when it is benefitial.
2585
2586 \param bv - source bvecor
2587 \param buf - pointer on target memory area. No range checking in the
2588 function. It is responsibility of programmer to allocate sufficient
2589 amount of memory using information from calc_stat function.
2590
2591 \param temp_block - pointer on temporary memory block. Cannot be 0;
2592 If you want to save memory across multiple bvectors
2593 allocate temporary block using allocate_tempblock and pass it to
2594 serialize.
2595 (Serialize does not deallocate temp_block.)
2596
2597 \param serialization_flags
2598 Flags controlling serilization (bit-mask)
2599 (use OR-ed serialization flags)
2600
2601 \ingroup bvserial
2602
2603 \return Size of serialization block.
2604 \sa calc_stat, serialization_flags
2605
2606*/
2607/*!
2608 Serialization format:
2609 <pre>
2610
2611 | HEADER | BLOCKS |
2612
2613 Header structure:
2614 BYTE : Serialization header (bit mask of BM_HM_*)
2615 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
2616 INT16: Reserved (0)
2617 INT16: Reserved Flags (0)
2618
2619 </pre>
2620*/
2621template<class BV>
2622size_t serialize(const BV& bv,
2623 unsigned char* buf,
2624 bm::word_t* temp_block = 0,
2625 unsigned serialization_flags = 0)
2626{
2627 bm::serializer<BV> bv_serial(bv.get_allocator(), temp_block);
2628
2630 bv_serial.byte_order_serialization(false);
2631
2633 bv_serial.gap_length_serialization(false);
2634 else
2635 bv_serial.gap_length_serialization(true);
2636
2637 return bv_serial.serialize(bv, buf, 0);
2638}
2639
2640/*!
2641 @brief Saves bitvector into memory.
2642 Allocates temporary memory block for bvector.
2643
2644 \param bv - source bvecor
2645 \param buf - pointer on target memory area. No range checking in the
2646 function. It is responsibility of programmer to allocate sufficient
2647 amount of memory using information from calc_stat function.
2648
2649 \param serialization_flags
2650 Flags controlling serilization (bit-mask)
2651 (use OR-ed serialization flags)
2652
2653 \ingroup bvserial
2654*/
2655template<class BV>
2656size_t serialize(BV& bv,
2657 unsigned char* buf,
2658 unsigned serialization_flags=0)
2659{
2660 return bm::serialize(bv, buf, 0, serialization_flags);
2661}
2662
2663
2664
2665/*!
2666 @brief Bitvector deserialization from a memory BLOB
2667
2668 @param bv - target bvector
2669 @param buf - pointer on memory which keeps serialized bvector
2670 @param temp_block - pointer on temporary block,
2671 if NULL bvector allocates own.
2672 @param ref_vect - [in] optional pointer to a list of
2673 reference vectors used for
2674 XOR compression.
2675
2676 @return Number of bytes consumed by deserializer.
2677
2678 Function deserializes bitvector from memory block containig results
2679 of previous serialization. Function does not remove bits
2680 which are currently set. Effectively it is OR logical operation
2681 between the target bit-vector and serialized one.
2682
2683 @sa deserialize_range
2684
2685 @ingroup bvserial
2686*/
2687template<class BV>
2688size_t deserialize(BV& bv,
2689 const unsigned char* buf,
2690 bm::word_t* temp_block = 0,
2691 const bm::bv_ref_vector<BV>* ref_vect = 0)
2692{
2693 ByteOrder bo_current = globals<true>::byte_order();
2694
2695 bm::decoder dec(buf);
2696 unsigned char header_flag = dec.get_8();
2697 ByteOrder bo = bo_current;
2698 if (!(header_flag & BM_HM_NO_BO))
2699 {
2700 bo = (bm::ByteOrder) dec.get_8();
2701 }
2702
2703 if (bo_current == bo)
2704 {
2706 deserial.set_ref_vectors(ref_vect);
2707 return deserial.deserialize(bv, buf, temp_block);
2708 }
2709 switch (bo_current)
2710 {
2711 case BigEndian:
2712 {
2714 deserial.set_ref_vectors(ref_vect);
2715 return deserial.deserialize(bv, buf, temp_block);
2716 }
2717 case LittleEndian:
2718 {
2720 deserial.set_ref_vectors(ref_vect);
2721 return deserial.deserialize(bv, buf, temp_block);
2722 }
2723 default:
2724 BM_ASSERT(0);
2725 };
2726 return 0;
2727}
2728
2729
2730/*!
2731 @brief Bitvector range deserialization from a memory BLOB
2732
2733 @param bv - target bvector
2734 @param buf - pointer on memory which keeps serialized bvector
2735 @param from - bit-vector index to deserialize from
2736 @param to - bit-vector index to deserialize to
2737 @param ref_vect - [in] optional pointer to a list of
2738 reference vectors used for
2739 XOR compression.
2740
2741 Function deserializes bitvector from memory block containig results
2742 of previous serialization. Function does not remove bits
2743 which are currently set. Effectively it is OR logical operation
2744 between the target bit-vector and serialized one.
2745
2746 @sa deserialize
2747
2748 @ingroup bvserial
2749*/
2750template<class BV>
2752 const unsigned char* buf,
2753 typename BV::size_type from,
2754 typename BV::size_type to,
2755 const bm::bv_ref_vector<BV>* ref_vect = 0)
2756{
2757 ByteOrder bo_current = globals<true>::byte_order();
2758
2759 bm::decoder dec(buf);
2760 unsigned char header_flag = dec.get_8();
2761 ByteOrder bo = bo_current;
2762 if (!(header_flag & BM_HM_NO_BO))
2763 {
2764 bo = (bm::ByteOrder) dec.get_8();
2765 }
2766
2767 if (bo_current == bo)
2768 {
2770 deserial.set_ref_vectors(ref_vect);
2771 deserial.set_range(from, to);
2772 deserial.deserialize(bv, buf);
2773 }
2774 else
2775 {
2776 switch (bo_current)
2777 {
2778 case BigEndian:
2779 {
2781 deserial.set_ref_vectors(ref_vect);
2782 deserial.set_range(from, to);
2783 deserial.deserialize(bv, buf);
2784 }
2785 break;
2786 case LittleEndian:
2787 {
2789 deserial.set_ref_vectors(ref_vect);
2790 deserial.set_range(from, to);
2791 deserial.deserialize(bv, buf);
2792 }
2793 break;;
2794 default:
2795 BM_ASSERT(0);
2796 };
2797 }
2798 bv.keep_range(from, to);
2799}
2800
2801
2802template<typename DEC, typename BLOCK_IDX>
2805 unsigned block_type,
2806 bm::gap_word_t* dst_arr)
2807{
2808 bm::gap_word_t len = 0;
2809
2810 switch (block_type)
2811 {
2812 case set_block_bit_1bit:
2813 dst_arr[0] = decoder.get_16();
2814 len = 1;
2815 break;
2816 case set_block_arrgap:
2818 len = decoder.get_16();
2819 decoder.get_16(dst_arr, len);
2820 break;
2823 {
2824 bit_in_type bin(decoder);
2825 len = (gap_word_t)bin.gamma();
2826 gap_word_t prev = 0;
2827 for (gap_word_t k = 0; k < len; ++k)
2828 {
2829 gap_word_t bit_idx = (gap_word_t)bin.gamma();
2830 if (k == 0) --bit_idx; // TODO: optimization
2831 bit_idx = (gap_word_t)(bit_idx + prev);
2832 prev = bit_idx;
2833 dst_arr[k] = bit_idx;
2834 } // for
2835 }
2836 break;
2839 {
2840 bm::gap_word_t min_v = decoder.get_16();
2841 bm::gap_word_t max_v = decoder.get_16();
2842
2843 bit_in_type bin(decoder);
2844 len = bm::gap_word_t(bin.gamma() + 4);
2845 dst_arr[0] = min_v;
2846 dst_arr[len-1] = max_v;
2847 bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
2848 }
2849 break;
2852 {
2853 bm::gap_word_t min_v, max_v;
2854 len = decoder.get_16();
2855 if (len & 1)
2856 min_v = decoder.get_8();
2857 else
2858 min_v = decoder.get_16();
2859
2860 if (len & (1<<1))
2861 max_v = decoder.get_8();
2862 else
2863 max_v = decoder.get_16();
2864 max_v = bm::gap_word_t(min_v + max_v);
2865
2866 len = bm::gap_word_t(len >> 2);
2867
2868 bit_in_type bin(decoder);
2869 dst_arr[0] = min_v;
2870 dst_arr[len-1] = max_v;
2871 bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
2872 }
2873 break;
2874
2875 default:
2876 BM_ASSERT(0);
2877 #ifndef BM_NO_STL
2878 throw std::logic_error(err_msg());
2879 #else
2880 BM_THROW(BM_ERR_SERIALFORMAT);
2881 #endif
2882 }
2883 return len;
2884}
2885
2886template<typename DEC, typename BLOCK_IDX>
2887void
2890{
2891 BM_ASSERT(!BM_IS_GAP(blk));
2892
2893 bm::gap_word_t min_v = dec.get_16();
2894 bm::gap_word_t max_v = dec.get_16();
2895 unsigned arr_len = dec.get_16();
2896
2897 bit_in_type bin(dec);
2898
2899 if (!IS_VALID_ADDR(blk))
2900 {
2901 bin.bic_decode_u16_dry(arr_len-2, min_v, max_v);
2902 return;
2903 }
2904 bm::set_bit(blk, min_v);
2905 bm::set_bit(blk, max_v);
2906 bin.bic_decode_u16_bitset(blk, arr_len-2, min_v, max_v);
2907}
2908
2909template<typename DEC, typename BLOCK_IDX>
2910void
2913{
2914 // TODO: optimization
2915 bm::bit_block_set(blk, 0);
2916 this->read_bic_arr(decoder, blk);
2917 bm::bit_invert(blk);
2918}
2919
2920template<typename DEC, typename BLOCK_IDX>
2923{
2924 BM_ASSERT(!BM_IS_GAP(blk));
2925
2926 bm::gap_word_t head = dec.get_8();
2927 unsigned arr_len = dec.get_16();
2928 bm::gap_word_t min_v = dec.get_16();
2929
2930 BM_ASSERT(arr_len <= bie_cut_off);
2931
2932 id_array_[0] = head;
2933 id_array_[1] = min_v;
2934 id_array_[arr_len] = 65535;
2935
2936 bit_in_type bin(dec);
2937 bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
2938
2939 if (!IS_VALID_ADDR(blk))
2940 return;
2941 bm::gap_add_to_bitset(blk, id_array_, arr_len);
2942}
2943
2944template<typename DEC, typename BLOCK_IDX>
2946 decoder_type& dec,
2947 bm::word_t* block) BMNOEXCEPT
2948{
2949 bm::id64_t d0 = dec.get_64();
2950 while (d0)
2951 {
2952 bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
2953
2954 unsigned wave = bm::word_bitcount64(t - 1);
2955 unsigned off = wave * bm::set_block_digest_wave_size;
2956 unsigned j = 0;
2957 if (!IS_VALID_ADDR(block))
2958 {
2959 do
2960 {
2961 dec.get_32();
2962 dec.get_32();
2963 dec.get_32();
2964 dec.get_32();
2965 j += 4;
2966 } while (j < bm::set_block_digest_wave_size);
2967 }
2968 else
2969 {
2970 do
2971 {
2972 block[off+j+0] |= dec.get_32();
2973 block[off+j+1] |= dec.get_32();
2974 block[off+j+2] |= dec.get_32();
2975 block[off+j+3] |= dec.get_32();
2976 j += 4;
2977 } while (j < bm::set_block_digest_wave_size);
2978 }
2979
2980 d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
2981 } // while
2982}
2983
2984template<typename DEC, typename BLOCK_IDX>
2986 decoder_type& dec,
2988{
2989 //TODO: optimization if block exists and it is OR-ed read
2990 bm::bit_block_set(blk, 0);
2991
2992 unsigned char run_type = dec.get_8();
2993 for (unsigned j = 0; j < bm::set_block_size; run_type = !run_type)
2994 {
2995 unsigned run_length = dec.get_16();
2996 if (run_type)
2997 {
2998 unsigned run_end = j + run_length;
2999 BM_ASSERT(run_end <= bm::set_block_size);
3000 for (;j < run_end; ++j)
3001 {
3002 unsigned w = dec.get_32();
3003 blk[j] = w;
3004 }
3005 }
3006 else
3007 {
3008 j += run_length;
3009 }
3010 } // for j
3011}
3012
3013
3014template<typename DEC, typename BLOCK_IDX>
3015void
3017 unsigned block_type,
3018 bm::gap_word_t* dst_block,
3019 bm::gap_word_t& gap_head)
3020{
3021// typedef bit_in<DEC> bit_in_type;
3022 switch (block_type)
3023 {
3024 case set_block_gap:
3025 {
3026 unsigned len = gap_length(&gap_head);
3027 --len;
3028 *dst_block = gap_head;
3029 decoder.get_16(dst_block+1, len - 1);
3030 dst_block[len] = gap_max_bits - 1;
3031 }
3032 break;
3033 case set_block_bit_1bit:
3034 {
3035 gap_set_all(dst_block, bm::gap_max_bits, 0);
3036 gap_word_t bit_idx = decoder.get_16();
3037 gap_add_value(dst_block, bit_idx);
3038 }
3039 break;
3040 case set_block_arrgap:
3042 {
3043 gap_set_all(dst_block, bm::gap_max_bits, 0);
3044 gap_word_t len = decoder.get_16();
3045 for (gap_word_t k = 0; k < len; ++k)
3046 {
3047 gap_word_t bit_idx = decoder.get_16();
3048 bm::gap_add_value(dst_block, bit_idx);
3049 } // for
3050 }
3051 break;
3058 {
3059 unsigned arr_len = read_id_list(decoder, block_type, id_array_);
3060 dst_block[0] = 0;
3061 unsigned gap_len =
3062 gap_set_array(dst_block, id_array_, arr_len);
3063 BM_ASSERT(gap_len == bm::gap_length(dst_block));
3064 (void)(gap_len);
3065 }
3066 break;
3068 {
3069 unsigned len = (gap_head >> 3);
3070 --len;
3071 // read gamma GAP block into a dest block
3072 *dst_block = gap_head;
3073 gap_word_t* gap_data_ptr = dst_block + 1;
3074
3075 bit_in_type bin(decoder);
3076 gap_word_t v = (gap_word_t)bin.gamma();
3077 gap_word_t gap_sum = *gap_data_ptr = (gap_word_t)(v - 1);
3078 for (unsigned i = 1; i < len; ++i)
3079 {
3080 v = (gap_word_t)bin.gamma();
3081 gap_sum = (gap_word_t)(gap_sum + v);
3082 *(++gap_data_ptr) = gap_sum;
3083 }
3084 dst_block[len+1] = bm::gap_max_bits - 1;
3085 }
3086 break;
3088 {
3089 unsigned len = (gap_head >> 3);
3090 *dst_block = gap_head;
3091 bm::gap_word_t min_v = decoder.get_16();
3092 dst_block[1] = min_v;
3093 bit_in_type bin(decoder);
3094 bin.bic_decode_u16(&dst_block[2], len-2, min_v, 65535);
3095 dst_block[len] = bm::gap_max_bits - 1;
3096 }
3097 break;
3099 {
3100 bm::gap_word_t min_v, max_v;
3101 unsigned len = (gap_head >> 3);
3102 bm::gap_word_t min8 = gap_head & (1 << 1);
3103 bm::gap_word_t tail8 = gap_head & (1 << 2);
3104 gap_head &= bm::gap_word_t(~(3 << 1)); // clear the flags
3105 if (min8)
3106 min_v = decoder.get_8();
3107 else
3108 min_v = decoder.get_16();
3109 if (tail8)
3110 max_v = decoder.get_8();
3111 else
3112 max_v = decoder.get_16();
3113 max_v = bm::gap_word_t(65535 - max_v); // tail correction
3114
3115 dst_block[0] = gap_head;
3116 dst_block[1] = min_v;
3117 bit_in_type bin(decoder);
3118 bin.bic_decode_u16(&dst_block[2], len-3, min_v, max_v);
3119 dst_block[len-1] = max_v;
3120 dst_block[len] = bm::gap_max_bits - 1;
3121 }
3122 break;
3123 default:
3124 BM_ASSERT(0);
3125 #ifndef BM_NO_STL
3126 throw std::logic_error(err_msg());
3127 #else
3128 BM_THROW(BM_ERR_SERIALFORMAT);
3129 #endif
3130 }
3131
3132 if (block_type == set_block_arrgap_egamma_inv ||
3133 block_type == set_block_arrgap_inv ||
3134 block_type == set_block_arrgap_bienc_inv ||
3135 block_type == set_block_arrgap_bienc_inv_v2)
3136 {
3137 gap_invert(dst_block);
3138 }
3139}
3140
3141template<typename DEC, typename BLOCK_IDX>
3145 block_idx_type nb,
3146 block_idx_type expect_nb) BMNOEXCEPT
3147{
3148 if (skip_offset_) // skip bookmark is available
3149 {
3150 const unsigned char* save_pos = decoder.get_pos(); // save position
3151
3152 if (save_pos > skip_pos_) // checkpoint passed
3153 {
3154 skip_offset_ = 0;
3155 return false;
3156 }
3157 decoder.set_pos(skip_pos_);
3158
3159 unsigned char sync_mark = decoder.get_8();
3160 block_idx_type nb_sync;
3161 switch (sync_mark)
3162 {
3163 case set_nb_sync_mark8:
3164 nb_sync = decoder.get_8();
3165 break;
3166 case set_nb_sync_mark16:
3167 nb_sync = decoder.get_16();
3168 break;
3169 case set_nb_sync_mark24:
3170 nb_sync = decoder.get_24();
3171 break;
3172 case set_nb_sync_mark32:
3173 nb_sync = decoder.get_32();
3174 break;
3175 case set_nb_sync_mark48:
3176 nb_sync = block_idx_type(decoder.get_48());
3177 #ifndef BM64ADDR
3178 BM_ASSERT(0);
3179 decoder.set_pos(save_pos);
3180 skip_offset_ = 0;
3181 return 0; // invalid bookmark from 64-bit serialization
3182 #endif
3183 break;
3184 case set_nb_sync_mark64:
3185 nb_sync = block_idx_type(decoder.get_64());
3186 #ifndef BM64ADDR
3187 BM_ASSERT(0);
3188 decoder.set_pos(save_pos);
3189 skip_offset_ = 0;
3190 return 0; // invalid bookmark from 64-bit serialization
3191 #endif
3192 break;
3193 default:
3194 BM_ASSERT(0);
3195 decoder.set_pos(save_pos);
3196 skip_offset_ = 0;
3197 return 0;
3198 } // switch
3199
3200 nb_sync += nb;
3201 if (nb_sync <= expect_nb) // within reach
3202 {
3203 skip_offset_ = 0;
3204 return nb_sync;
3205 }
3206 skip_offset_ = 0;
3207 decoder.set_pos(save_pos);
3208 }
3209 return 0;
3210}
3211
3212
3213// -------------------------------------------------------------------------
3214
3215template<class BV, class DEC>
3217: ref_vect_(0),
3218 xor_block_(0),
3219 or_block_(0),
3220 is_range_set_(0)
3221{
3222 temp_block_ = alloc_.alloc_bit_block();
3224 this->id_array_ = bit_idx_arr_.data();
3226
3227 alloc_.set_pool(&pool_);
3228}
3229
3230template<class BV, class DEC>
3232{
3233 alloc_.free_bit_block(temp_block_);
3234 if (xor_block_)
3235 alloc_.free_bit_block(xor_block_);
3236 BM_ASSERT(!or_block_);
3237}
3238
3239template<class BV, class DEC>
3241{
3242 ref_vect_ = ref_vect;
3243 if (ref_vect_ && !xor_block_)
3244 xor_block_ = alloc_.alloc_bit_block();
3245}
3246
3247template<class BV, class DEC>
3248void
3251 block_idx_type nb,
3252 bm::word_t* blk)
3253{
3254 gap_word_t gap_head;
3255 bm::gap_word_t* gap_temp_block = gap_temp_block_.data();
3256
3257 bool inv_flag = false;
3258
3259 switch (btype)
3260 {
3261 case set_block_gap:
3263 case set_block_gapbit:
3264 {
3265 gap_head = (gap_word_t)
3266 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
3267
3268 unsigned len = gap_length(&gap_head);
3269 int level = gap_calc_level(len, bman.glen());
3270 --len;
3271 if (level == -1) // Too big to be GAP: convert to BIT block
3272 {
3273 *gap_temp_block = gap_head;
3274 dec.get_16(gap_temp_block+1, len - 1);
3275 gap_temp_block[len] = gap_max_bits - 1;
3276
3277 if (blk == 0) // block does not exist yet
3278 {
3279 blk = bman.get_allocator().alloc_bit_block();
3280 bman.set_block(nb, blk);
3281 gap_convert_to_bitset(blk, gap_temp_block);
3282 }
3283 else // We have some data already here. Apply OR operation.
3284 {
3285 gap_convert_to_bitset(temp_block_,
3286 gap_temp_block);
3287 bv.combine_operation_with_block(nb,
3288 temp_block_,
3289 0,
3290 BM_OR);
3291 }
3292 return;
3293 } // level == -1
3294
3295 set_gap_level(&gap_head, level);
3296
3297 if (blk == 0)
3298 {
3299 BM_ASSERT(level >= 0);
3300 gap_word_t* gap_blk =
3301 bman.get_allocator().alloc_gap_block(unsigned(level), bman.glen());
3302 gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
3303 *gap_blk_ptr = gap_head;
3304 bm::set_gap_level(gap_blk_ptr, level);
3305 blk = bman.set_block(nb, (bm::word_t*)BMPTR_SETBIT0(gap_blk));
3306 BM_ASSERT(blk == 0);
3307
3308 dec.get_16(gap_blk + 1, len - 1);
3309 gap_blk[len] = bm::gap_max_bits - 1;
3310 }
3311 else // target block exists
3312 {
3313 // read GAP block into a temp memory and perform OR
3314 *gap_temp_block = gap_head;
3315 dec.get_16(gap_temp_block + 1, len - 1);
3316 gap_temp_block[len] = bm::gap_max_bits - 1;
3317 break;
3318 }
3319 return;
3320 }
3322 inv_flag = true;
3324 case set_block_arrgap:
3331 {
3332 unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
3333 gap_temp_block[0] = 0; // reset unused bits in gap header
3334 unsigned gap_len =
3335 bm::gap_set_array(gap_temp_block, this->id_array_, arr_len);
3336 if (inv_flag)
3337 bm::gap_invert(gap_temp_block);
3338
3339 BM_ASSERT(gap_len == bm::gap_length(gap_temp_block));
3340 int level = bm::gap_calc_level(gap_len, bman.glen());
3341 if (level == -1) // Too big to be GAP: convert to BIT block
3342 {
3343 bm::gap_convert_to_bitset(temp_block_, gap_temp_block);
3344 bv.combine_operation_with_block(nb,
3345 temp_block_,
3346 0,
3347 BM_OR);
3348 return;
3349 }
3350 break;
3351 }
3353 gap_head = dec.get_16();
3360 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3361 break;
3365 gap_head = dec.get_16();
3366 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3367 break;
3368 default:
3369 BM_ASSERT(0);
3370 #ifndef BM_NO_STL
3371 throw std::logic_error(this->err_msg());
3372 #else
3373 BM_THROW(BM_ERR_SERIALFORMAT);
3374 #endif
3375 }
3376
3377 bv.combine_operation_with_block(nb,
3378 (bm::word_t*)gap_temp_block,
3379 1,
3380 BM_OR);
3381}
3382
3383template<class BV, class DEC>
3385 decoder_type& dec,
3386 blocks_manager_type& bman,
3387 block_idx_type nb,
3388 bm::word_t* blk)
3389{
3390 if (!blk)
3391 {
3392 blk = bman.get_allocator().alloc_bit_block();
3393 bman.set_block(nb, blk);
3394 bm::bit_block_set(blk, 0);
3395 }
3396 else
3397 if (BM_IS_GAP(blk))
3398 blk = bman.deoptimize_block(nb);
3399
3400 BM_ASSERT(blk != temp_block_);
3401
3402 switch (btype)
3403 {
3405 if (IS_FULL_BLOCK(blk))
3406 blk = bman.deoptimize_block(nb);
3407 bm::bit_block_set(temp_block_, ~0u);
3408 {
3409 gap_word_t len = dec.get_16();
3410 for (unsigned k = 0; k < len; ++k)
3411 {
3412 gap_word_t bit_idx = dec.get_16();
3413 bm::clear_bit(temp_block_, bit_idx);
3414 } // for
3415 }
3416 bm::bit_block_or(blk, temp_block_);
3417 break;
3419 this->read_bic_arr(dec, blk);
3420 break;
3422 BM_ASSERT(blk != temp_block_);
3423 if (IS_FULL_BLOCK(blk))
3424 blk = bman.deoptimize_block(nb);
3425 // TODO: optimization
3426 bm::bit_block_set(temp_block_, 0);
3427 this->read_bic_arr(dec, temp_block_);
3428 bm::bit_invert(temp_block_);
3429 bm::bit_block_or(blk, temp_block_);
3430 break;
3432 this->read_bic_gap(dec, blk);
3433 break;
3435 this->read_digest0_block(dec, blk);
3436 break;
3437 default:
3438 BM_ASSERT(0);
3439 #ifndef BM_NO_STL
3440 throw std::logic_error(this->err_msg());
3441 #else
3442 BM_THROW(BM_ERR_SERIALFORMAT);
3443 #endif
3444 } // switch
3445}
3446
3447template<class BV, class DEC>
3449 bvector_type& bv,
3450 block_idx_type nb,
3451 bm::word_t* blk)
3452{
3453 if (!blk)
3454 {
3455 blocks_manager_type& bman = bv.get_blocks_manager();
3456 blk = bman.get_allocator().alloc_bit_block();
3457 bman.set_block(nb, blk);
3458 dec.get_32(blk, bm::set_block_size);
3459 }
3460 else
3461 {
3462 dec.get_32(temp_block_, bm::set_block_size);
3463 bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
3464 }
3465}
3466
3467template<class BV, class DEC>
3469 bvector_type& bv,
3470 block_idx_type nb,
3471 bm::word_t* blk)
3472{
3473 unsigned head_idx = dec.get_16();
3474 unsigned tail_idx = dec.get_16();
3475 if (!blk)
3476 {
3477 blocks_manager_type& bman = bv.get_blocks_manager();
3478 blk = bman.get_allocator().alloc_bit_block();
3479 bman.set_block(nb, blk);
3480 for (unsigned k = 0; k < head_idx; ++k)
3481 blk[k] = 0;
3482 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
3483 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
3484 blk[j] = 0;
3485 }
3486 else
3487 {
3488 bm::bit_block_set(temp_block_, 0);
3489 dec.get_32(temp_block_ + head_idx, tail_idx - head_idx + 1);
3490 bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
3491 }
3492}
3493
3494template<class BV, class DEC>
3496 bvector_type& bv,
3497 block_idx_type nb,
3498 bm::word_t* blk)
3499{
3500 blocks_manager_type& bman = bv.get_blocks_manager();
3501 bm::gap_word_t len = dec.get_16();
3502 if (BM_IS_GAP(blk))
3503 {
3504 blk = bman.deoptimize_block(nb);
3505 }
3506 else
3507 {
3508 if (!blk) // block does not exists yet
3509 {
3510 blk = bman.get_allocator().alloc_bit_block();
3511 bm::bit_block_set(blk, 0);
3512 bman.set_block(nb, blk);
3513 }
3514 else
3515 if (IS_FULL_BLOCK(blk)) // nothing to do
3516 {
3517 for (unsigned k = 0; k < len; ++k) // dry read
3518 dec.get_16();
3519 return;
3520 }
3521 }
3522 // Get the array one by one and set the bits.
3523 for (unsigned k = 0; k < len; ++k)
3524 {
3525 gap_word_t bit_idx = dec.get_16();
3526 bm::set_bit(blk, bit_idx);
3527 }
3528}
3529
3530template<class BV, class DEC>
3532 const unsigned char* buf,
3533 bm::word_t* /*temp_block*/)
3534{
3535 blocks_manager_type& bman = bv.get_blocks_manager();
3536 if (!bman.is_init())
3537 bman.init_tree();
3538
3539 bm::word_t* temp_block = temp_block_;
3540
3541 bm::strategy strat = bv.get_new_blocks_strat();
3542 bv.set_new_blocks_strat(BM_GAP);
3543 typename bvector_type::mem_pool_guard mp_guard_bv;
3544 mp_guard_bv.assign_if_not_set(pool_, bv);
3545
3546
3547 decoder_type dec(buf);
3548
3549 // Reading th serialization header
3550 //
3551 unsigned char header_flag = dec.get_8();
3552 if (!(header_flag & BM_HM_NO_BO))
3553 {
3554 /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
3555 }
3556 if (header_flag & BM_HM_64_BIT)
3557 {
3558 #ifndef BM64ADDR
3559 BM_ASSERT(0); // 64-bit address vector cannot read on 32
3560 #ifndef BM_NO_STL
3561 throw std::logic_error(this->err_msg());
3562 #else
3563 BM_THROW(BM_ERR_SERIALFORMAT);
3564 #endif
3565 #endif
3566 }
3567
3568 if (header_flag & BM_HM_ID_LIST)
3569 {
3570 // special case: the next comes plain list of integers
3571 if (header_flag & BM_HM_RESIZE)
3572 {
3573 block_idx_type bv_size;
3574 if (header_flag & BM_HM_64_BIT)
3575 {
3576 BM_ASSERT(sizeof(block_idx_type)==8);
3577 bv_size = (block_idx_type)dec.get_64();
3578 }
3579 else
3580 bv_size = dec.get_32();
3581 if (bv_size > bv.size())
3582 bv.resize(bv_size);
3583 }
3584 for (unsigned cnt = dec.get_32(); cnt; --cnt)
3585 {
3586 bm::id_t idx = dec.get_32();
3587 bv.set(idx);
3588 } // for
3589 // -1 for compatibility with other deserialization branches
3590 return dec.size()-1;
3591 }
3592
3593 if (!(header_flag & BM_HM_NO_GAPL))
3594 {
3595 for (unsigned k = 0; k < bm::gap_levels; ++k) // read GAP levels
3596 {
3597 /*glevels[i] =*/ dec.get_16();
3598 }
3599 }
3600 if (header_flag & BM_HM_RESIZE)
3601 {
3602 block_idx_type bv_size;
3603 if (header_flag & BM_HM_64_BIT)
3604 {
3605 // 64-bit vector cannot be deserialized into 32-bit
3606 BM_ASSERT(sizeof(block_idx_type)==8);
3607 bv_size = (block_idx_type)dec.get_64();
3608 #ifndef BM64ADDR
3609 #ifndef BM_NO_STL
3610 throw std::logic_error(this->err_msg());
3611 #else
3612 BM_THROW(BM_ERR_SERIALFORMAT);
3613 #endif
3614 #endif
3615 }
3616 else
3617 bv_size = dec.get_32();
3618 if (bv_size > bv.size())
3619 bv.resize(bv_size);
3620 }
3621
3622 if (header_flag & BM_HM_HXOR) // XOR compression mode
3623 {
3624 if (!xor_block_)
3625 xor_block_ = alloc_.alloc_bit_block();
3626 }
3627
3628 size_type full_blocks = 0;
3629
3630 // reference XOR compression FSM fields
3631 //
3632 size_type x_ref_idx = 0;
3633 bm::id64_t x_ref_d64 = 0;
3634 block_idx_type x_nb = 0;
3635 bool x_ref_gap = false;
3636 unsigned row_idx;
3637
3638 block_idx_type nb_sync;
3639
3640 unsigned char btype;
3641 block_idx_type nb;
3642 unsigned i0, j0;
3643
3644 block_idx_type i = 0;
3645 do
3646 {
3647 if (is_range_set_)
3648 {
3649 block_idx_type nb_to = (idx_to_ >> bm::set_block_shift);
3650 if (i > nb_to)
3651 break; // early exit
3652 }
3653
3654 btype = dec.get_8();
3655 if (btype & (1 << 7)) // check if short zero-run packed here
3656 {
3657 nb = btype & ~(1 << 7);
3658 BM_ASSERT(nb);
3659 i += nb;
3660 continue;
3661 }
3662 bm::get_block_coord(i, i0, j0);
3663 bm::word_t* blk = bman.get_block_ptr(i0, j0);
3664
3665 switch (btype)
3666 {
3667 case set_block_azero:
3668 case set_block_end:
3670 break;
3671 case set_block_1zero:
3672 break;
3673 case set_block_8zero:
3674 nb = dec.get_8();
3675 BM_ASSERT(nb);
3676 i += nb;
3677 continue; // bypass ++i;
3678 case set_block_16zero:
3679 nb = dec.get_16();
3680 BM_ASSERT(nb);
3681 i += nb;
3682 continue; // bypass ++i;
3683 case set_block_32zero:
3684 nb = dec.get_32();
3685 BM_ASSERT(nb);
3686 i += nb;
3687 continue; // bypass ++i;
3688 case set_block_64zero:
3689 #ifdef BM64ADDR
3690 nb = dec.get_64();
3691 BM_ASSERT(nb);
3692 i += nb;
3693 continue; // bypass ++i;
3694 #else
3695 BM_ASSERT(0); // attempt to read 64-bit vector in 32-bit mode
3696 #ifndef BM_NO_STL
3697 throw std::logic_error(this->err_msg());
3698 #else
3699 BM_THROW(BM_ERR_SERIALFORMAT);
3700 #endif
3701 //i = bm::set_total_blocks;
3702 #endif
3703 break;
3704 case set_block_aone:
3705 bman.set_all_set(i, bm::set_total_blocks-1);
3707 break;
3708 case set_block_1one:
3709 bman.set_block_all_set(i);
3710 break;
3711 case set_block_8one:
3712 full_blocks = dec.get_8();
3713 goto process_full_blocks;
3714 break;
3715 case set_block_16one:
3716 full_blocks = dec.get_16();
3717 goto process_full_blocks;
3718 break;
3719 case set_block_32one:
3720 full_blocks = dec.get_32();
3721 goto process_full_blocks;
3722 break;
3723 case set_block_64one:
3724 #ifdef BM64ADDR
3725 full_blocks = dec.get_64();
3726 goto process_full_blocks;
3727 #else
3728 BM_ASSERT(0); // 32-bit vector cannot read 64-bit
3729 dec.get_64();
3730 #ifndef BM_NO_STL
3731 throw std::logic_error(this->err_msg());
3732 #else
3733 BM_THROW(BM_ERR_SERIALFORMAT);
3734 #endif
3735 #endif
3736 process_full_blocks:
3737 {
3738 BM_ASSERT(full_blocks);
3739 size_type from = i * bm::gap_max_bits;
3740 size_type to = from + full_blocks * bm::gap_max_bits;
3741 bv.set_range(from, to-1);
3742 i += full_blocks-1;
3743 }
3744 break;
3745 case set_block_bit:
3746 decode_block_bit(dec, bv, i, blk);
3747 break;
3748 case set_block_bit_1bit:
3749 {
3750 size_type bit_idx = dec.get_16();
3751 bit_idx += i * bm::bits_in_block;
3752 bv.set_bit_no_check(bit_idx);
3753 break;
3754 }
3756 {
3757 //TODO: optimization if block exists
3758 this->read_0runs_block(dec, temp_block);
3759 bv.combine_operation_with_block(i, temp_block, 0, BM_OR);
3760 break;
3761 }
3763 decode_block_bit_interval(dec, bv, i, blk);
3764 break;
3765 case set_block_gap:
3766 case set_block_gapbit:
3767 case set_block_arrgap:
3778 deserialize_gap(btype, dec, bv, bman, i, blk);
3779 break;
3780 case set_block_arrbit:
3781 decode_arrbit(dec, bv, i, blk);
3782 break;
3787 decode_bit_block(btype, dec, bman, i, blk);
3788 break;
3790 decode_bit_block(btype, dec, bman, i, blk);
3791 break;
3792
3793 // --------------------------------------- bookmarks and skip jumps
3794 //
3795 case set_nb_bookmark32:
3796 this->bookmark_idx_ = i;
3797 this->skip_offset_ = dec.get_32();
3798 goto process_bookmark;
3799 case set_nb_bookmark24:
3800 this->bookmark_idx_ = i;
3801 this->skip_offset_ = dec.get_24();
3802 goto process_bookmark;
3803 case set_nb_bookmark16:
3804 this->bookmark_idx_ = i;
3805 this->skip_offset_ = dec.get_16();
3806 process_bookmark:
3807 if (is_range_set_)
3808 {
3809 this->skip_pos_ = dec.get_pos() + this->skip_offset_;
3810 block_idx_type nb_from = (idx_from_ >> bm::set_block_shift);
3811 nb_from = this->try_skip(dec, i, nb_from);
3812 if (nb_from)
3813 i = nb_from;
3814 }
3815 continue; // bypass ++i;
3816
3817 case set_nb_sync_mark8:
3818 nb_sync = dec.get_8();
3819 goto process_nb_sync;
3820 case set_nb_sync_mark16:
3821 nb_sync = dec.get_16();
3822 goto process_nb_sync;
3823 case set_nb_sync_mark24:
3824 nb_sync = dec.get_24();
3825 goto process_nb_sync;
3826 case set_nb_sync_mark32:
3827 nb_sync = dec.get_32();
3828 goto process_nb_sync;
3829 case set_nb_sync_mark48:
3830 nb_sync = block_idx_type(dec.get_48());
3831 goto process_nb_sync;
3832 case set_nb_sync_mark64:
3833 nb_sync = block_idx_type(dec.get_64());
3834 process_nb_sync:
3835 BM_ASSERT(i == this->bookmark_idx_ + nb_sync);
3836 if (i != this->bookmark_idx_ + nb_sync)
3837 {
3838 #ifndef BM_NO_STL
3839 throw std::logic_error(this->err_msg());
3840 #else
3841 BM_THROW(BM_ERR_SERIALFORMAT);
3842 #endif
3843 }
3844
3845 continue; // bypass ++i;
3846
3847 // ------------------------------------ XOR compression markers
3849 {
3850 BM_ASSERT(ref_vect_); // reference vector has to be attached
3851 if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3852 {
3853 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3854 x_ref_d64 = 0; x_ref_gap = false;
3855 }
3856
3857 row_idx = dec.get_32();
3858 size_type idx = ref_vect_->find(row_idx);
3859 if (idx == ref_vect_->not_found())
3860 {
3861 BM_ASSERT(0);
3862 goto throw_err;
3863 }
3864 const bvector_type* ref_bv = ref_vect_->get_bv(idx);
3865 BM_ASSERT(ref_bv);
3866 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
3867 const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
3868 if (ref_blk)
3869 bv.combine_operation_with_block(i, ref_blk,
3870 BM_IS_GAP(ref_blk), BM_OR);
3871 break;
3872 }
3874 if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3875 {
3876 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3877 x_ref_d64 = 0; x_ref_gap = false;
3878 }
3879 row_idx = dec.get_8();
3880 goto process_xor;
3882 if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3883 {
3884 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3885 x_ref_d64 = 0; x_ref_gap = false;
3886 }
3887 row_idx = dec.get_16();
3888 goto process_xor;
3890 {
3891 if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3892 {
3893 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3894 x_ref_d64 = 0; x_ref_gap = false;
3895 }
3896 row_idx = dec.get_32();
3897 process_xor:
3898 x_ref_d64 = dec.get_64();
3899 process_xor_ref:
3900 BM_ASSERT(ref_vect_); // reference vector has to be attached
3901 x_ref_idx = ref_vect_->find(row_idx);
3902 x_nb = i;
3903 if (x_ref_idx == ref_vect_->not_found())
3904 {
3905 BM_ASSERT(0);
3906 goto throw_err;
3907 }
3908 if (blk)
3909 {
3910 or_block_ = bman.deoptimize_block(i);
3911 bman.set_block_ptr(i, 0); // borrow OR target before XOR
3912 }
3913 }
3914 continue; // important! cont to avoid inc(i)
3916 if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3917 {
3918 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3919 x_ref_d64 = 0; x_ref_gap = false;
3920 }
3921 row_idx = dec.get_8();
3922 x_ref_gap = true;
3923 goto process_xor_ref;
3925 if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3926 {
3927 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3928 x_ref_d64 = 0; x_ref_gap = false;
3929 }
3930 row_idx = dec.get_16();
3931 x_ref_gap = true;
3932 goto process_xor_ref;
3934 if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3935 {
3936 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3937 x_ref_d64 = 0; x_ref_gap = false;
3938 }
3939 row_idx = dec.get_32();
3940 x_ref_gap = true;
3941 goto process_xor_ref;
3942
3943 default:
3944 BM_ASSERT(0); // unknown block type
3945 throw_err:
3946 #ifndef BM_NO_STL
3947 throw std::logic_error(this->err_msg());
3948 #else
3949 BM_THROW(BM_ERR_SERIALFORMAT);
3950 #endif
3951 } // switch
3952
3953 ++i;
3954 } while (i < bm::set_total_blocks);
3955
3956 // process the last delayed XOR ref here
3957 //
3958 if (x_ref_d64 || x_ref_gap) // XOR post proc.
3959 xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3960
3961 bv.set_new_blocks_strat(strat);
3962
3963 return dec.size();
3964}
3965
3966// ---------------------------------------------------------------------------
3967
3968template<class BV, class DEC>
3970 blocks_manager_type& bman,
3971 block_idx_type nb)
3972{
3973 BM_ASSERT(ref_vect_);
3974
3975 unsigned i0, j0;
3976
3977 const bvector_type* ref_bv = ref_vect_->get_bv(x_ref_idx);
3978 BM_ASSERT(ref_bv);
3979 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
3980 BM_ASSERT(&ref_bman != &bman);
3981 bm::get_block_coord(nb, i0, j0);
3982
3983 const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
3984 if (!ref_blk)
3985 {
3986 BM_ASSERT(!or_block_);
3987 return;
3988 }
3989
3990 if (BM_IS_GAP(ref_blk))
3991 {
3992 bm::gap_word_t* gap_block = BMGAP_PTR(ref_blk);
3993 bm::gap_convert_to_bitset(xor_block_, gap_block);
3994 ref_blk = xor_block_;
3995 }
3996 else
3997 {
3998 if (IS_FULL_BLOCK(ref_blk))
3999 ref_blk = FULL_BLOCK_REAL_ADDR;
4000 }
4001
4002 bm::word_t* blk = bman.deoptimize_block(nb);
4003 if (!blk)
4004 {
4005 blk = bman.check_allocate_block(nb, 0);
4006 }
4007 if (x_ref_d64)
4008 bm::bit_block_xor(blk, blk, ref_blk, x_ref_d64);
4009 else
4010 bm::bit_block_xor(blk, ref_blk);
4011
4012 if (or_block_)
4013 {
4014 bm::bit_block_or(blk, or_block_);
4015 alloc_.free_bit_block(or_block_);
4016 or_block_ = 0;
4017 bman.optimize_bit_block(i0, j0);
4018 }
4019}
4020
4021
4022// ---------------------------------------------------------------------------
4023
4024template<typename DEC, typename BLOCK_IDX>
4026 : decoder_(buf),
4027 end_of_stream_(false),
4028 bv_size_(0),
4029 state_(e_unknown),
4030 id_cnt_(0),
4031 block_idx_(0),
4032 mono_block_cnt_(0),
4033 block_idx_arr_(0)
4034{
4035 ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
4036
4039
4064
4065
4066 // reading stream header
4067 unsigned char header_flag = decoder_.get_8();
4068 if (!(header_flag & BM_HM_NO_BO))
4069 {
4070 /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
4071 }
4072
4073 // check if bitvector comes as an inverted, sorted list of ints
4074 //
4075 if (header_flag & BM_HM_ID_LIST)
4076 {
4077 // special case: the next comes plain list of unsigned integers
4078 if (header_flag & BM_HM_RESIZE)
4079 {
4080 if (header_flag & BM_HM_64_BIT)
4081 {
4082 BM_ASSERT(sizeof(block_idx_type)==8);
4083 bv_size_ = (block_idx_type)decoder_.get_64();
4084 }
4085 else
4086 bv_size_ = decoder_.get_32();
4087 }
4088
4090 id_cnt_ = decoder_.get_32();
4091 next(); // read first id
4092 }
4093 else
4094 {
4095 if (!(header_flag & BM_HM_NO_GAPL))
4096 {
4097 for (unsigned i = 0; i < bm::gap_levels; ++i) // load the GAP levels
4098 glevels_[i] = decoder_.get_16();
4099 }
4100 if (header_flag & BM_HM_RESIZE)
4101 {
4102 if (header_flag & BM_HM_64_BIT)
4103 {
4104 BM_ASSERT(sizeof(block_idx_type)==8);
4105 bv_size_ = (block_idx_type)decoder_.get_64();
4106 }
4107 else
4108 bv_size_ = decoder_.get_32();
4109 }
4110 state_ = e_blocks;
4111 }
4113 if (!block_idx_arr_)
4114 {
4115 #ifndef BM_NO_STL
4116 throw std::bad_alloc();
4117 #else
4118 BM_THROW(BM_ERR_BADALLOC);
4119 #endif
4120 }
4121 this->id_array_ = block_idx_arr_;
4122}
4123
4124template<typename DEC, typename BLOCK_IDX>
4126{
4127 if (block_idx_arr_)
4128 ::free(block_idx_arr_);
4129}
4130
4131
4132template<typename DEC, typename BLOCK_IDX>
4134{
4135 if (is_eof())
4136 {
4137 ++block_idx_;
4138 return;
4139 }
4140 block_idx_type nb_sync;
4141
4142 switch (state_)
4143 {
4144 case e_list_ids:
4145 // read inverted ids one by one
4146 if (id_cnt_ == 0)
4147 {
4148 end_of_stream_ = true;
4149 state_ = e_unknown;
4150 break;
4151 }
4152 last_id_ = decoder_.get_32();
4153 --id_cnt_;
4154 break;
4155
4156 case e_blocks:
4157 if (block_idx_ == bm::set_total_blocks)
4158 {
4159 end_of_stream_ = true;
4160 state_ = e_unknown;
4161 break;
4162 }
4163
4164 block_type_ = decoder_.get_8();
4165
4166 // pre-check for 7-bit zero block
4167 //
4168 if (block_type_ & (1u << 7u))
4169 {
4170 mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
4171 state_ = e_zero_blocks;
4172 break;
4173 }
4174
4175 switch (block_type_)
4176 {
4177 case set_block_azero:
4178 case set_block_end:
4179 end_of_stream_ = true; state_ = e_unknown;
4180 break;
4181 case set_block_1zero:
4182 state_ = e_zero_blocks;
4183 mono_block_cnt_ = 0;
4184 break;
4185 case set_block_8zero:
4186 state_ = e_zero_blocks;
4187 mono_block_cnt_ = decoder_.get_8()-1;
4188 break;
4189 case set_block_16zero:
4190 state_ = e_zero_blocks;
4191 mono_block_cnt_ = decoder_.get_16()-1;
4192 break;
4193 case set_block_32zero:
4194 state_ = e_zero_blocks;
4195 mono_block_cnt_ = decoder_.get_32()-1;
4196 break;
4197 case set_block_aone:
4198 state_ = e_one_blocks;
4199 mono_block_cnt_ = bm::set_total_blocks - block_idx_;
4200 break;
4201 case set_block_1one:
4202 state_ = e_one_blocks;
4203 mono_block_cnt_ = 0;
4204 break;
4205 case set_block_8one:
4206 state_ = e_one_blocks;
4207 mono_block_cnt_ = decoder_.get_8()-1;
4208 break;
4209 case set_block_16one:
4210 state_ = e_one_blocks;
4211 mono_block_cnt_ = decoder_.get_16()-1;
4212 break;
4213 case set_block_32one:
4214 state_ = e_one_blocks;
4215 mono_block_cnt_ = decoder_.get_32()-1;
4216 break;
4217 case set_block_64one:
4218 state_ = e_one_blocks;
4219 mono_block_cnt_ = block_idx_type(decoder_.get_64()-1);
4220 break;
4221
4222 case bm::set_block_bit:
4231 state_ = e_bit_block;
4232 break;
4233 case set_block_gap:
4240 gap_head_ = decoder_.get_16();
4242 case set_block_arrgap:
4250 case set_block_bit_1bit:
4259 state_ = e_gap_block;
4260 break;
4261 case set_block_gapbit:
4262 state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
4263 break;
4264
4265 // --------------------------------------------- bookmarks and syncs
4266 //
4267 case set_nb_bookmark32:
4268 this->bookmark_idx_ = block_idx_;
4269 this->skip_offset_ = decoder_.get_32();
4270 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4271 break;
4272 case set_nb_bookmark24:
4273 this->bookmark_idx_ = block_idx_;
4274 this->skip_offset_ = decoder_.get_24();
4275 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4276 break;
4277 case set_nb_bookmark16:
4278 this->bookmark_idx_ = block_idx_;
4279 this->skip_offset_ = decoder_.get_16();
4280 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4281 break;
4282
4283 case set_nb_sync_mark8:
4284 nb_sync = decoder_.get_8();
4285 goto process_nb_sync;
4286 case set_nb_sync_mark16:
4287 nb_sync = decoder_.get_16();
4288 goto process_nb_sync;
4289 case set_nb_sync_mark24:
4290 nb_sync = decoder_.get_24();
4291 goto process_nb_sync;
4292 case set_nb_sync_mark32:
4293 nb_sync = decoder_.get_32();
4294 goto process_nb_sync;
4295 case set_nb_sync_mark48:
4296 nb_sync = block_idx_type(decoder_.get_48());
4297 goto process_nb_sync;
4298 case set_nb_sync_mark64:
4299 nb_sync = block_idx_type(decoder_.get_64());
4300 process_nb_sync:
4301 BM_ASSERT(block_idx_ == this->bookmark_idx_ + nb_sync);
4302 if (block_idx_ != this->bookmark_idx_ + nb_sync)
4303 {
4304 #ifndef BM_NO_STL
4305 throw std::logic_error(this->err_msg());
4306 #else
4307 BM_THROW(BM_ERR_SERIALFORMAT);
4308 #endif
4309 }
4310 break;
4311
4312 // --------------------------------------------------------------
4313 // XOR deserials are incompatible with the operation deserialial
4314 // throw or assert here
4315 //
4316 case set_block_ref_eq:
4317 case set_block_xor_ref8:
4321 // fall through
4322 default:
4323 BM_ASSERT(0);
4324 #ifndef BM_NO_STL
4325 throw std::logic_error(this->err_msg());
4326 #else
4327 BM_THROW(BM_ERR_SERIALFORMAT);
4328 #endif
4329 } // switch
4330
4331 break;
4332
4333 case e_zero_blocks:
4334 case e_one_blocks:
4335 ++block_idx_;
4336 if (!mono_block_cnt_)
4337 {
4338 state_ = e_blocks; // get new block token
4339 break;
4340 }
4341 --mono_block_cnt_;
4342 break;
4343
4344 case e_unknown:
4345 default:
4346 BM_ASSERT(0);
4347 #ifndef BM_NO_STL
4348 throw std::logic_error(this->err_msg());
4349 #else
4350 BM_THROW(BM_ERR_SERIALFORMAT);
4351 #endif
4352 } // switch
4353}
4354
4355template<typename DEC, typename BLOCK_IDX>
4358{
4359 BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
4360 if (!mono_block_cnt_)
4361 ++block_idx_;
4362 else
4363 {
4364 block_idx_ += mono_block_cnt_+1;
4365 mono_block_cnt_ = 0;
4366 }
4367 state_ = e_blocks;
4368 return block_idx_;
4369}
4370
4371template<typename DEC, typename BLOCK_IDX>
4372void
4374{
4375 gap_word_t len = decoder_.get_16();
4376 if (block)
4377 {
4378 bm::bit_block_set(block, ~0u);
4379 for (unsigned k = 0; k < len; ++k)
4380 {
4381 bm::gap_word_t bit_idx = decoder_.get_16();
4382 bm::clear_bit(block, bit_idx);
4383 }
4384 }
4385 else // dry read
4386 {
4387 for (unsigned k = 0; k < len; ++k)
4388 decoder_.get_16();
4389 }
4390}
4391
4392
4393template<typename DEC, typename BLOCK_IDX>
4394unsigned
4396 bm::word_t* dst_block,
4397 bm::word_t* tmp_block)
4398{
4399 // ASSIGN should be ready for dry read (dst_block can be NULL)
4400 //
4401 BM_ASSERT(this->state_ == e_bit_block);
4402 unsigned count = 0;
4403
4404 switch (this->block_type_)
4405 {
4406 case set_block_bit:
4407 decoder_.get_32(dst_block, bm::set_block_size);
4408 break;
4409 case set_block_bit_0runs:
4410 {
4411 if (IS_VALID_ADDR(dst_block))
4412 bm::bit_block_set(dst_block, 0);
4413 unsigned char run_type = decoder_.get_8();
4414 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4415 {
4416 unsigned run_length = decoder_.get_16();
4417 if (run_type)
4418 {
4419 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
4420 }
4421 j += run_length;
4422 } // for
4423 }
4424 break;
4426 {
4427 unsigned head_idx = decoder_.get_16();
4428 unsigned tail_idx = decoder_.get_16();
4429 if (dst_block)
4430 {
4431 for (unsigned i = 0; i < head_idx; ++i)
4432 dst_block[i] = 0;
4433 decoder_.get_32(dst_block + head_idx,
4434 tail_idx - head_idx + 1);
4435 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
4436 dst_block[j] = 0;
4437 }
4438 else
4439 {
4440 int pos = int(tail_idx - head_idx) + 1;
4441 pos *= 4u;
4442 decoder_.seek(pos);
4443 }
4444 }
4445 break;
4446 case set_block_arrbit:
4447 case set_block_bit_1bit:
4448 get_arr_bit(dst_block, true /*clear target*/);
4449 break;
4450 case set_block_gapbit:
4451 BM_ASSERT(0);
4452 #ifndef BM_NO_STL
4453 throw std::logic_error(this->err_msg());
4454 #else
4455 BM_THROW(BM_ERR_SERIALFORMAT);
4456 #endif
4457 break;
4459 get_inv_arr(dst_block);
4460 break;
4462 if (IS_VALID_ADDR(dst_block))
4463 bm::bit_block_set(dst_block, 0);
4464 this->read_bic_arr(decoder_, dst_block);
4465 break;
4467 this->read_bic_arr_inv(decoder_, tmp_block);
4468 if (IS_VALID_ADDR(dst_block))
4469 bm::bit_block_copy(dst_block, tmp_block);
4470 break;
4472 if (IS_VALID_ADDR(dst_block))
4473 bm::bit_block_set(dst_block, 0);
4474 this->read_bic_gap(decoder_, dst_block);
4475 break;
4477 if (IS_VALID_ADDR(dst_block))
4478 bm::bit_block_set(dst_block, 0);
4479 this->read_digest0_block(decoder_, dst_block);
4480 break;
4481 default:
4482 BM_ASSERT(0);
4483 #ifndef BM_NO_STL
4484 throw std::logic_error(this->err_msg());
4485 #else
4486 BM_THROW(BM_ERR_SERIALFORMAT);
4487 #endif
4488 } // switch
4489 return count;
4490}
4491
4492template<typename DEC, typename BLOCK_IDX>
4493unsigned
4495 bm::word_t* dst_block,
4496 bm::word_t* tmp_block)
4497{
4498 BM_ASSERT(this->state_ == e_bit_block);
4499 unsigned count = 0;
4500 switch (block_type_)
4501 {
4502 case set_block_bit:
4503 decoder_.get_32_OR(dst_block, bm::set_block_size);
4504 break;
4506 {
4507 unsigned head_idx = decoder_.get_16();
4508 unsigned tail_idx = decoder_.get_16();
4509 for (unsigned i = head_idx; i <= tail_idx; ++i)
4510 dst_block[i] |= decoder_.get_32();
4511 }
4512 break;
4514 {
4515 unsigned char run_type = decoder_.get_8();
4516 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4517 {
4518 unsigned run_length = decoder_.get_16();
4519 if (run_type)
4520 {
4521 unsigned run_end = j + run_length;
4522 for (;j < run_end; ++j)
4523 {
4525 dst_block[j] |= decoder_.get_32();
4526 }
4527 }
4528 else
4529 {
4530 j += run_length;
4531 }
4532 } // for
4533 }
4534 break;
4535 case set_block_bit_1bit:
4536 case set_block_arrbit:
4537 get_arr_bit(dst_block, false /*don't clear target*/);
4538 break;
4540 get_inv_arr(tmp_block);
4541 bm::bit_block_or(dst_block, tmp_block);
4542 break;
4544 this->read_bic_arr(decoder_, dst_block);
4545 break;
4547 this->read_bic_arr_inv(decoder_, tmp_block);
4548 bm::bit_block_or(dst_block, tmp_block);
4549 break;
4551 this->read_bic_gap(decoder_, dst_block);
4552 break;
4554 this->read_digest0_block(decoder_, dst_block);
4555 break;
4556 default:
4557 BM_ASSERT(0);
4558 #ifndef BM_NO_STL
4559 throw std::logic_error(this->err_msg());
4560 #else
4561 BM_THROW(BM_ERR_SERIALFORMAT);
4562 #endif
4563 } // switch
4564 return count;
4565}
4566
4567template<typename DEC, typename BLOCK_IDX>
4568unsigned
4570 bm::word_t* BMRESTRICT dst_block,
4571 bm::word_t* BMRESTRICT tmp_block)
4572{
4573 BM_ASSERT(this->state_ == e_bit_block);
4574 BM_ASSERT(dst_block != tmp_block);
4575 unsigned count = 0;
4576 switch (block_type_)
4577 {
4578 case set_block_bit:
4579 decoder_.get_32_AND(dst_block, bm::set_block_size);
4580 break;
4582 {
4583 unsigned char run_type = decoder_.get_8();
4584 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4585 {
4586 unsigned run_length = decoder_.get_16();
4587
4588 unsigned run_end = j + run_length;
4589 if (run_type)
4590 {
4591 for (;j < run_end; ++j)
4592 {
4594 dst_block[j] &= decoder_.get_32();
4595 }
4596 }
4597 else
4598 {
4599 for (;j < run_end; ++j)
4600 {
4602 dst_block[j] = 0;
4603 }
4604 }
4605 } // for
4606 }
4607 break;
4609 {
4610 unsigned head_idx = decoder_.get_16();
4611 unsigned tail_idx = decoder_.get_16();
4612 unsigned i;
4613 for ( i = 0; i < head_idx; ++i)
4614 dst_block[i] = 0;
4615 for ( i = head_idx; i <= tail_idx; ++i)
4616 dst_block[i] &= decoder_.get_32();
4617 for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
4618 dst_block[i] = 0;
4619 }
4620 break;
4621 case set_block_bit_1bit:
4622 case set_block_arrbit:
4623 get_arr_bit(tmp_block, true /*clear target*/);
4624 if (dst_block)
4625 bm::bit_block_and(dst_block, tmp_block);
4626 break;
4628 get_inv_arr(tmp_block);
4629 if (dst_block)
4630 bm::bit_block_and(dst_block, tmp_block);
4631 break;
4633 if (dst_block)
4634 {
4635 bm::bit_block_set(tmp_block, 0);
4636 this->read_bic_arr(decoder_, tmp_block);
4637 bm::bit_block_and(dst_block, tmp_block);
4638 }
4639 else
4640 this->read_bic_arr(decoder_, 0); // dry read
4641 break;
4643 this->read_bic_arr_inv(decoder_, tmp_block);
4644 if (dst_block)
4645 bm::bit_block_and(dst_block, tmp_block);
4646 break;
4648 if (dst_block)
4649 {
4650 BM_ASSERT(IS_VALID_ADDR(dst_block));
4651 bm::bit_block_set(tmp_block, 0);
4652 this->read_bic_gap(decoder_, tmp_block);
4653 bm::bit_block_and(dst_block, tmp_block);
4654 }
4655 else
4656 this->read_bic_gap(decoder_, 0); // dry read
4657 break;
4659 if (dst_block)
4660 {
4661 BM_ASSERT(IS_VALID_ADDR(dst_block));
4662 bm::bit_block_set(tmp_block, 0);
4663 this->read_digest0_block(decoder_, tmp_block);
4664 bm::bit_block_and(dst_block, tmp_block);
4665 }
4666 else
4667 this->read_digest0_block(decoder_, 0); // dry read
4668 break;
4669 default:
4670 BM_ASSERT(0);
4671 #ifndef BM_NO_STL
4672 throw std::logic_error(this->err_msg());
4673 #else
4674 BM_THROW(BM_ERR_SERIALFORMAT);
4675 #endif
4676 } // switch
4677 return count;
4678}
4679
4680template<typename DEC, typename BLOCK_IDX>
4681unsigned
4683 bm::word_t* dst_block,
4684 bm::word_t* tmp_block)
4685{
4686 BM_ASSERT(this->state_ == e_bit_block);
4687 BM_ASSERT(dst_block != tmp_block);
4688
4689 unsigned count = 0;
4690 switch (block_type_)
4691 {
4692 case set_block_bit:
4693 for (unsigned i = 0; i < bm::set_block_size; ++i)
4694 dst_block[i] ^= decoder_.get_32();
4695 break;
4697 {
4698 unsigned char run_type = decoder_.get_8();
4699 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4700 {
4701 unsigned run_length = decoder_.get_16();
4702 if (run_type)
4703 {
4704 unsigned run_end = j + run_length;
4705 for (;j < run_end; ++j)
4706 {
4708 dst_block[j] ^= decoder_.get_32();
4709 }
4710 }
4711 else
4712 {
4713 j += run_length;
4714 }
4715 } // for
4716 }
4717 break;
4719 {
4720 unsigned head_idx = decoder_.get_16();
4721 unsigned tail_idx = decoder_.get_16();
4722 for (unsigned i = head_idx; i <= tail_idx; ++i)
4723 dst_block[i] ^= decoder_.get_32();
4724 }
4725 break;
4726 case set_block_bit_1bit:
4727 // TODO: optimization
4728 case set_block_arrbit:
4729 get_arr_bit(tmp_block, true /*clear target*/);
4730 if (dst_block)
4731 bm::bit_block_xor(dst_block, tmp_block);
4732 break;
4734 get_inv_arr(tmp_block);
4735 if (dst_block)
4736 bm::bit_block_xor(dst_block, tmp_block);
4737 break;
4739 bm::bit_block_set(tmp_block, 0);
4740 this->read_bic_arr(decoder_, tmp_block);
4741 if (dst_block)
4742 bm::bit_block_xor(dst_block, tmp_block);
4743 break;
4745 this->read_bic_arr_inv(decoder_, tmp_block);
4746 if (dst_block)
4747 {
4748 BM_ASSERT(IS_VALID_ADDR(dst_block));
4749 bm::bit_block_xor(dst_block, tmp_block);
4750 }
4751 break;
4753 if (dst_block)
4754 {
4755 BM_ASSERT(IS_VALID_ADDR(dst_block));
4756 bm::bit_block_set(tmp_block, 0);
4757 this->read_bic_gap(decoder_, tmp_block);
4758 bm::bit_block_xor(dst_block, tmp_block);
4759 }
4760 else
4761 this->read_bic_gap(decoder_, 0); // dry read
4762 break;
4764 if (dst_block)
4765 {
4766 BM_ASSERT(IS_VALID_ADDR(dst_block));
4767 bm::bit_block_set(tmp_block, 0);
4768 this->read_digest0_block(decoder_, tmp_block);
4769 bm::bit_block_xor(dst_block, tmp_block);
4770 }
4771 else
4772 this->read_digest0_block(decoder_, 0); // dry read
4773 break;
4774 default:
4775 BM_ASSERT(0);
4776 #ifndef BM_NO_STL
4777 throw std::logic_error(this->err_msg());
4778 #else
4779 BM_THROW(BM_ERR_SERIALFORMAT);
4780 #endif
4781 } // switch
4782 return count;
4783}
4784
4785template<typename DEC, typename BLOCK_IDX>
4786unsigned
4788 bm::word_t* dst_block,
4789 bm::word_t* tmp_block)
4790{
4791 BM_ASSERT(this->state_ == e_bit_block);
4792 BM_ASSERT(dst_block != tmp_block);
4793
4794 unsigned count = 0;
4795 switch (block_type_)
4796 {
4797 case set_block_bit:
4798 for (unsigned i = 0; i < bm::set_block_size; ++i)
4799 dst_block[i] &= ~decoder_.get_32();
4800 break;
4802 {
4803 unsigned char run_type = decoder_.get_8();
4804 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4805 {
4806 unsigned run_length = decoder_.get_16();
4807 if (run_type)
4808 {
4809 unsigned run_end = j + run_length;
4810 for (;j < run_end; ++j)
4811 {
4813 dst_block[j] &= ~decoder_.get_32();
4814 }
4815 }
4816 else
4817 {
4818 j += run_length;
4819 }
4820 } // for
4821 }
4822 break;
4824 {
4825 unsigned head_idx = decoder_.get_16();
4826 unsigned tail_idx = decoder_.get_16();
4827 for (unsigned i = head_idx; i <= tail_idx; ++i)
4828 dst_block[i] &= ~decoder_.get_32();
4829 }
4830 break;
4831 case set_block_bit_1bit:
4832 // TODO: optimization
4833 case set_block_arrbit:
4834 get_arr_bit(tmp_block, true /*clear target*/);
4835 if (dst_block)
4836 bm::bit_block_sub(dst_block, tmp_block);
4837 break;
4839 get_inv_arr(tmp_block);
4840 if (dst_block)
4841 bm::bit_block_sub(dst_block, tmp_block);
4842 break;
4844 bm::bit_block_set(tmp_block, 0);
4845 this->read_bic_arr(decoder_, tmp_block);
4846 if (dst_block)
4847 bm::bit_block_sub(dst_block, tmp_block);
4848 break;
4850 this->read_bic_arr_inv(decoder_, tmp_block);
4851 if (dst_block)
4852 bm::bit_block_sub(dst_block, tmp_block);
4853 break;
4855 if (dst_block)
4856 {
4857 BM_ASSERT(IS_VALID_ADDR(dst_block));
4858 bm::bit_block_set(tmp_block, 0);
4859 this->read_bic_gap(decoder_, tmp_block);
4860 bm::bit_block_sub(dst_block, tmp_block);
4861 }
4862 else
4863 this->read_bic_gap(decoder_, 0); // dry read
4864 break;
4866 if (dst_block)
4867 {
4868 BM_ASSERT(IS_VALID_ADDR(dst_block));
4869 bm::bit_block_set(tmp_block, 0);
4870 this->read_digest0_block(decoder_, tmp_block);
4871 bm::bit_block_sub(dst_block, tmp_block);
4872 }
4873 else
4874 this->read_digest0_block(decoder_, 0); // dry read
4875 break;
4876 default:
4877 BM_ASSERT(0);
4878 #ifndef BM_NO_STL
4879 throw std::logic_error(this->err_msg());
4880 #else
4881 BM_THROW(BM_ERR_SERIALFORMAT);
4882 #endif
4883 } // switch
4884 return count;
4885}
4886
4887
4888template<typename DEC, typename BLOCK_IDX>
4889unsigned
4891 bm::word_t* /*dst_block*/,
4892 bm::word_t* tmp_block)
4893{
4894 BM_ASSERT(this->state_ == e_bit_block);
4895
4896 unsigned count = 0;
4897 switch (block_type_)
4898 {
4899 case set_block_bit:
4900 for (unsigned i = 0; i < bm::set_block_size; ++i)
4901 count += bm::word_bitcount(decoder_.get_32());
4902 break;
4904 {
4905 //count = 0;
4906 unsigned char run_type = decoder_.get_8();
4907 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4908 {
4909 unsigned run_length = decoder_.get_16();
4910 if (run_type)
4911 {
4912 unsigned run_end = j + run_length;
4913 for (;j < run_end; ++j)
4914 {
4915 count += word_bitcount(decoder_.get_32());
4916 }
4917 }
4918 else
4919 {
4920 j += run_length;
4921 }
4922 } // for
4923 return count;
4924 }
4926 {
4927 unsigned head_idx = decoder_.get_16();
4928 unsigned tail_idx = decoder_.get_16();
4929 for (unsigned i = head_idx; i <= tail_idx; ++i)
4930 count += bm::word_bitcount(decoder_.get_32());
4931 }
4932 break;
4933 case set_block_arrbit:
4934 count += get_arr_bit(0);
4935 break;
4936 case set_block_bit_1bit:
4937 ++count;
4938 decoder_.get_16();
4939 break;
4941 get_inv_arr(tmp_block);
4942 goto count_tmp;
4944 bm::bit_block_set(tmp_block, 0); // TODO: just add a counted read
4945 this->read_bic_arr(decoder_, tmp_block);
4946 goto count_tmp;
4948 this->read_bic_arr_inv(decoder_, tmp_block);
4949 goto count_tmp;
4951 bm::bit_block_set(tmp_block, 0);
4952 this->read_digest0_block(decoder_, tmp_block);
4953 goto count_tmp;
4955 bm::bit_block_set(tmp_block, 0);
4956 this->read_bic_gap(decoder_, tmp_block);
4957 count_tmp:
4958 count += bm::bit_block_count(tmp_block);
4959 break;
4960 default:
4961 BM_ASSERT(0);
4962 #ifndef BM_NO_STL
4963 throw std::logic_error(this->err_msg());
4964 #else
4965 BM_THROW(BM_ERR_SERIALFORMAT);
4966 #endif
4967
4968 } // switch
4969 return count;
4970}
4971
4972template<typename DEC, typename BLOCK_IDX>
4973unsigned
4975 bm::word_t* dst_block,
4976 bm::word_t* tmp_block)
4977{
4978 BM_ASSERT(this->state_ == e_bit_block);
4979 unsigned count = 0;
4980 if (dst_block)
4981 {
4982 // count the block bitcount
4983 count = bm::bit_block_count(dst_block);
4984 }
4985
4986 switch (block_type_)
4987 {
4988 case set_block_bit:
4989 decoder_.get_32(0, bm::set_block_size);
4990 break;
4992 {
4993 unsigned char run_type = decoder_.get_8();
4994 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4995 {
4996 unsigned run_length = decoder_.get_16();
4997 if (run_type)
4998 {
4999 unsigned run_end = j + run_length;
5000 for (;j < run_end; ++j)
5001 {
5002 decoder_.get_32();
5003 }
5004 }
5005 else
5006 {
5007 j += run_length;
5008 }
5009 } // for
5010 }
5011 break;
5012
5014 {
5015 unsigned head_idx = decoder_.get_16();
5016 unsigned tail_idx = decoder_.get_16();
5017 for (unsigned i = head_idx; i <= tail_idx; ++i)
5018 decoder_.get_32();
5019 }
5020 break;
5021 case set_block_arrbit:
5022 get_arr_bit(0);
5023 break;
5024 case set_block_bit_1bit:
5025 decoder_.get_16();
5026 break;
5028 get_inv_arr(tmp_block);
5029 break;
5031 this->read_bic_arr(decoder_, tmp_block); // TODO: add dry read
5032 break;
5034 this->read_bic_arr_inv(decoder_, tmp_block); // TODO: add dry read
5035 break;
5037 this->read_bic_gap(decoder_, tmp_block);
5038 break;
5040 this->read_digest0_block(decoder_, 0); // dry read
5041 break;
5042 default:
5043 BM_ASSERT(0);
5044 #ifndef BM_NO_STL
5045 throw std::logic_error(this->err_msg());
5046 #else
5047 BM_THROW(BM_ERR_SERIALFORMAT);
5048 #endif
5049
5050 } // switch
5051 return count;
5052}
5053
5054
5055template<typename DEC, typename BLOCK_IDX>
5056unsigned
5058 bm::word_t* dst_block,
5059 bm::word_t* tmp_block)
5060{
5061 BM_ASSERT(this->state_ == e_bit_block);
5062 BM_ASSERT(dst_block);
5063
5064 unsigned count = 0;
5065 switch (block_type_)
5066 {
5067 case set_block_bit:
5068 for (unsigned i = 0; i < bm::set_block_size; ++i)
5069 count += word_bitcount(dst_block[i] & decoder_.get_32());
5070 break;
5072 {
5073 //count = 0;
5074 unsigned char run_type = decoder_.get_8();
5075 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5076 {
5077 unsigned run_length = decoder_.get_16();
5078 if (run_type)
5079 {
5080 unsigned run_end = j + run_length;
5081 for (;j < run_end; ++j)
5082 {
5083 count += word_bitcount(dst_block[j] & decoder_.get_32());
5084 }
5085 }
5086 else
5087 {
5088 j += run_length;
5089 }
5090 } // for
5091 return count;
5092 }
5094 {
5095 unsigned head_idx = decoder_.get_16();
5096 unsigned tail_idx = decoder_.get_16();
5097 for (unsigned i = head_idx; i <= tail_idx; ++i)
5098 count += word_bitcount(dst_block[i] & decoder_.get_32());
5099 }
5100 break;
5101 case set_block_bit_1bit:
5102 // TODO: optimization
5103 case set_block_arrbit:
5104 get_arr_bit(tmp_block, true /*clear target*/);
5105 goto count_tmp;
5106 break;
5108 get_inv_arr(tmp_block);
5109 goto count_tmp;
5110 break;
5112 bm::bit_block_set(tmp_block, 0);
5113 this->read_bic_arr(decoder_, tmp_block);
5114 goto count_tmp;
5116 this->read_bic_arr_inv(decoder_, tmp_block);
5117 goto count_tmp;
5119 bm::bit_block_set(tmp_block, 0);
5120 this->read_digest0_block(decoder_, tmp_block);
5121 goto count_tmp;
5123 bm::bit_block_set(tmp_block, 0);
5124 this->read_bic_gap(decoder_, tmp_block);
5125 count_tmp:
5126 count += bm::bit_operation_and_count(dst_block, tmp_block);
5127 break;
5128 default:
5129 BM_ASSERT(0);
5130 #ifndef BM_NO_STL
5131 throw std::logic_error(this->err_msg());
5132 #else
5133 BM_THROW(BM_ERR_SERIALFORMAT);
5134 #endif
5135
5136 } // switch
5137 return count;
5138}
5139
5140template<typename DEC,typename BLOCK_IDX>
5141unsigned
5143 bm::word_t* dst_block,
5144 bm::word_t* tmp_block)
5145{
5146 BM_ASSERT(this->state_ == e_bit_block);
5147 BM_ASSERT(dst_block);
5148
5149 bitblock_sum_adapter count_adapter;
5150 switch (block_type_)
5151 {
5152 case set_block_bit:
5153 {
5154 bitblock_get_adapter ga(dst_block);
5156
5157 bit_recomb(ga,
5158 decoder_,
5159 func,
5160 count_adapter
5161 );
5162 }
5163 break;
5164 case set_block_bit_0runs:
5165 {
5166 unsigned count = 0;
5167 unsigned char run_type = decoder_.get_8();
5168 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5169 {
5170 unsigned run_length = decoder_.get_16();
5171 unsigned run_end = j + run_length;
5172 if (run_type)
5173 {
5174 for (;j < run_end; ++j)
5175 {
5177 count += word_bitcount(dst_block[j] | decoder_.get_32());
5178 }
5179 }
5180 else
5181 {
5182 for (;j < run_end; ++j)
5183 {
5185 count += word_bitcount(dst_block[j]);
5186 }
5187 }
5188 } // for
5189 return count;
5190 }
5192 {
5193 unsigned head_idx = decoder_.get_16();
5194 unsigned tail_idx = decoder_.get_16();
5195 unsigned count = 0;
5196 unsigned i;
5197 for (i = 0; i < head_idx; ++i)
5198 count += bm::word_bitcount(dst_block[i]);
5199 for (i = head_idx; i <= tail_idx; ++i)
5200 count += bm::word_bitcount(dst_block[i] | decoder_.get_32());
5201 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5202 count += bm::word_bitcount(dst_block[i]);
5203 return count;
5204 }
5205 case set_block_bit_1bit:
5206 // TODO: optimization
5207 case set_block_arrbit:
5208 get_arr_bit(tmp_block, true /* clear target*/);
5209 return bit_operation_or_count(dst_block, tmp_block);
5211 get_inv_arr(tmp_block);
5212 goto count_tmp;
5214 bm::bit_block_set(tmp_block, 0);
5215 this->read_bic_arr(decoder_, tmp_block);
5216 goto count_tmp;
5218 this->read_bic_arr_inv(decoder_, tmp_block);
5219 goto count_tmp;
5221 bm::bit_block_set(tmp_block, 0);
5222 this->read_digest0_block(decoder_, tmp_block);
5223 goto count_tmp;
5225 bm::bit_block_set(tmp_block, 0);
5226 this->read_bic_gap(decoder_, tmp_block);
5227 count_tmp:
5228 return bm::bit_operation_or_count(dst_block, tmp_block);
5229 default:
5230 BM_ASSERT(0);
5231 #ifndef BM_NO_STL
5232 throw std::logic_error(this->err_msg());
5233 #else
5234 BM_THROW(BM_ERR_SERIALFORMAT);
5235 #endif
5236
5237 } // switch
5238 return count_adapter.sum();
5239}
5240
5241template<typename DEC, typename BLOCK_IDX>
5242unsigned
5244 bm::word_t* dst_block,
5245 bm::word_t* tmp_block)
5246{
5247 BM_ASSERT(this->state_ == e_bit_block);
5248 BM_ASSERT(dst_block);
5249
5250 bitblock_sum_adapter count_adapter;
5251 switch (block_type_)
5252 {
5253 case set_block_bit:
5254 {
5255 bitblock_get_adapter ga(dst_block);
5257
5258 bit_recomb(ga,
5259 decoder_,
5260 func,
5261 count_adapter
5262 );
5263 }
5264 break;
5265 case set_block_bit_0runs:
5266 {
5267 unsigned count = 0;
5268 unsigned char run_type = decoder_.get_8();
5269 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5270 {
5271 unsigned run_length = decoder_.get_16();
5272 unsigned run_end = j + run_length;
5273 if (run_type)
5274 {
5275 for (;j < run_end; ++j)
5276 {
5278 count += bm::word_bitcount(dst_block[j] ^ decoder_.get_32());
5279 }
5280 }
5281 else
5282 {
5283 for (;j < run_end; ++j)
5284 {
5286 count += bm::word_bitcount(dst_block[j]);
5287 }
5288 }
5289 } // for
5290 return count;
5291 }
5293 {
5294 unsigned head_idx = decoder_.get_16();
5295 unsigned tail_idx = decoder_.get_16();
5296 unsigned count = 0;
5297 unsigned i;
5298 for (i = 0; i < head_idx; ++i)
5299 count += bm::word_bitcount(dst_block[i]);
5300 for (i = head_idx; i <= tail_idx; ++i)
5301 count += bm::word_bitcount(dst_block[i] ^ decoder_.get_32());
5302 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5303 count += bm::word_bitcount(dst_block[i]);
5304 return count;
5305 }
5306 case set_block_bit_1bit:
5307 // TODO: optimization
5308 case set_block_arrbit:
5309 get_arr_bit(tmp_block, true /* clear target*/);
5310 goto count_tmp;
5312 get_inv_arr(tmp_block);
5313 goto count_tmp;
5315 bm::bit_block_set(tmp_block, 0);
5316 this->read_bic_arr(decoder_, tmp_block);
5317 goto count_tmp;
5319 this->read_bic_arr_inv(decoder_, tmp_block);
5320 goto count_tmp;
5321 break;
5323 bm::bit_block_set(tmp_block, 0);
5324 this->read_digest0_block(decoder_, tmp_block);
5325 goto count_tmp;
5327 bm::bit_block_set(tmp_block, 0);
5328 this->read_bic_gap(decoder_, tmp_block);
5329 count_tmp:
5330 return bm::bit_operation_xor_count(dst_block, tmp_block);
5331 default:
5332 BM_ASSERT(0);
5333 #ifndef BM_NO_STL
5334 throw std::logic_error(this->err_msg());
5335 #else
5336 BM_THROW(BM_ERR_SERIALFORMAT);
5337 #endif
5338
5339 } // switch
5340 return count_adapter.sum();
5341}
5342
5343template<typename DEC, typename BLOCK_IDX>
5344unsigned
5346 bm::word_t* dst_block,
5347 bm::word_t* tmp_block)
5348{
5349 BM_ASSERT(this->state_ == e_bit_block);
5350 BM_ASSERT(dst_block);
5351
5352 bitblock_sum_adapter count_adapter;
5353 switch (block_type_)
5354 {
5355 case set_block_bit:
5356 {
5357 bitblock_get_adapter ga(dst_block);
5359
5360 bit_recomb(ga,
5361 decoder_,
5362 func,
5363 count_adapter
5364 );
5365 }
5366 break;
5367 case set_block_bit_0runs:
5368 {
5369 unsigned count = 0;
5370 unsigned char run_type = decoder_.get_8();
5371 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5372 {
5373 unsigned run_length = decoder_.get_16();
5374 unsigned run_end = j + run_length;
5375 if (run_type)
5376 {
5377 for (;j < run_end; ++j)
5378 {
5380 count += word_bitcount(dst_block[j] & ~decoder_.get_32());
5381 }
5382 }
5383 else
5384 {
5385 for (;j < run_end; ++j)
5386 {
5388 count += bm::word_bitcount(dst_block[j]);
5389 }
5390 }
5391 } // for
5392 return count;
5393 }
5395 {
5396 unsigned head_idx = decoder_.get_16();
5397 unsigned tail_idx = decoder_.get_16();
5398 unsigned count = 0;
5399 unsigned i;
5400 for (i = 0; i < head_idx; ++i)
5401 count += bm::word_bitcount(dst_block[i]);
5402 for (i = head_idx; i <= tail_idx; ++i)
5403 count += bm::word_bitcount(dst_block[i] & (~decoder_.get_32()));
5404 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5405 count += bm::word_bitcount(dst_block[i]);
5406 return count;
5407 }
5408 break;
5409 case set_block_bit_1bit:
5410 //TODO: optimization
5411 case set_block_arrbit:
5412 get_arr_bit(tmp_block, true /* clear target*/);
5413 goto count_tmp;
5415 get_inv_arr(tmp_block);
5416 goto count_tmp;
5418 bm::bit_block_set(tmp_block, 0);
5419 this->read_bic_arr(decoder_, tmp_block);
5420 goto count_tmp;
5422 this->read_bic_arr_inv(decoder_, tmp_block);
5423 goto count_tmp;
5425 bm::bit_block_set(tmp_block, 0);
5426 this->read_digest0_block(decoder_, tmp_block);
5427 goto count_tmp;
5429 bm::bit_block_set(tmp_block, 0);
5430 this->read_bic_gap(decoder_, tmp_block);
5431 count_tmp:
5432 return bm::bit_operation_sub_count(dst_block, tmp_block);
5433 default:
5434 BM_ASSERT(0);
5435 #ifndef BM_NO_STL
5436 throw std::logic_error(this->err_msg());
5437 #else
5438 BM_THROW(BM_ERR_SERIALFORMAT);
5439 #endif
5440
5441 } // switch
5442 return count_adapter.sum();
5443}
5444
5445template<typename DEC, typename BLOCK_IDX>
5446unsigned
5448 bm::word_t* dst_block,
5449 bm::word_t* tmp_block)
5450{
5451 BM_ASSERT(this->state_ == e_bit_block);
5452 BM_ASSERT(dst_block);
5453
5454 bitblock_sum_adapter count_adapter;
5455 switch (block_type_)
5456 {
5457 case set_block_bit:
5458 {
5459 bitblock_get_adapter ga(dst_block);
5461
5462 bit_recomb(ga,
5463 decoder_,
5464 func,
5465 count_adapter
5466 );
5467 }
5468 break;
5469 case set_block_bit_0runs:
5470 {
5471 unsigned count = 0;
5472 unsigned char run_type = decoder_.get_8();
5473 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5474 {
5475 unsigned run_length = decoder_.get_16();
5476 unsigned run_end = j + run_length;
5477 if (run_type)
5478 {
5479 for (;j < run_end; ++j)
5480 {
5482 count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
5483 }
5484 }
5485 else
5486 {
5487 j += run_length;
5488 }
5489 } // for
5490 return count;
5491 }
5493 {
5494 unsigned head_idx = decoder_.get_16();
5495 unsigned tail_idx = decoder_.get_16();
5496 unsigned count = 0;
5497 unsigned i;
5498 for (i = head_idx; i <= tail_idx; ++i)
5499 count += bm::word_bitcount(decoder_.get_32() & (~dst_block[i]));
5500 return count;
5501 }
5502 break;
5503 case set_block_bit_1bit:
5504 // TODO: optimization
5505 case set_block_arrbit:
5506 get_arr_bit(tmp_block, true /* clear target*/);
5507 goto count_tmp;
5509 get_inv_arr(tmp_block);
5510 goto count_tmp;
5512 bm::bit_block_set(tmp_block, 0);
5513 this->read_bic_arr(decoder_, tmp_block);
5514 goto count_tmp;
5516 this->read_bic_arr_inv(decoder_, tmp_block);
5517 goto count_tmp;
5519 bm::bit_block_set(tmp_block, 0);
5520 this->read_digest0_block(decoder_, tmp_block);
5521 goto count_tmp;
5523 bm::bit_block_set(tmp_block, 0);
5524 this->read_bic_gap(decoder_, tmp_block);
5525 count_tmp:
5526 return bm::bit_operation_sub_count(tmp_block, dst_block);
5527 default:
5528 BM_ASSERT(0);
5529 #ifndef BM_NO_STL
5530 throw std::logic_error(this->err_msg());
5531 #else
5532 BM_THROW(BM_ERR_SERIALFORMAT);
5533 #endif
5534 } // switch
5535 return count_adapter.sum();
5536}
5537
5538
5539
5540template<typename DEC, typename BLOCK_IDX>
5542 bm::word_t* dst_block,
5543 bool clear_target) BMNOEXCEPT
5544{
5545 BM_ASSERT(this->block_type_ == set_block_arrbit ||
5546 this->block_type_ == set_block_bit_1bit);
5547
5548 gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
5549 if (dst_block)
5550 {
5551 if (clear_target)
5552 bm::bit_block_set(dst_block, 0);
5553
5554 if (this->block_type_ == set_block_bit_1bit)
5555 {
5556 // len contains idx of 1 bit set
5557 set_bit(dst_block, len);
5558 return 1;
5559 }
5560
5561 for (unsigned k = 0; k < len; ++k)
5562 {
5563 gap_word_t bit_idx = decoder_.get_16();
5564 bm::set_bit(dst_block, bit_idx);
5565 }
5566 }
5567 else
5568 {
5569 if (this->block_type_ == set_block_bit_1bit)
5570 return 1; // nothing to do: len var already consumed 16 bits
5571
5572 // fwd the decode stream
5573 decoder_.seek(len * 2);
5574 }
5575 return len;
5576}
5577
5578template<typename DEC, typename BLOCK_IDX>
5580{
5581 BM_ASSERT(this->block_type_ == set_block_bit_1bit);
5582 ++(this->block_idx_);
5583 this->state_ = e_blocks;
5584
5585 return decoder_.get_16(); // 1bit_idx
5586}
5587
5588template<typename DEC, typename BLOCK_IDX>
5589void
5591{
5592 BM_ASSERT(this->state_ == e_gap_block ||
5593 this->block_type_ == set_block_bit_1bit);
5594 BM_ASSERT(dst_block);
5595
5596 this->read_gap_block(this->decoder_,
5597 this->block_type_,
5598 dst_block,
5599 this->gap_head_);
5600
5601 ++(this->block_idx_);
5602 this->state_ = e_blocks;
5603}
5604
5605
5606template<typename DEC, typename BLOCK_IDX>
5607unsigned
5609 bm::word_t* dst_block,
5610 bm::word_t* tmp_block,
5611 set_operation op)
5612{
5613 BM_ASSERT(this->state_ == e_bit_block);
5614
5615 get_bit_func_type bit_func = bit_func_table_[op];
5616 BM_ASSERT(bit_func);
5617 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
5618 this->state_ = e_blocks;
5619 ++(this->block_idx_);
5620 return cnt;
5621}
5622
5623// ----------------------------------------------------------------------
5624//
5625// ----------------------------------------------------------------------
5626
5627template<class BV>
5629: temp_block_(0), ref_vect_(0)
5630{
5631 temp_block_ = alloc_.alloc_bit_block();
5632}
5633
5634
5635template<class BV>
5637{
5638 if (temp_block_)
5639 alloc_.free_bit_block(temp_block_);
5640}
5641
5642template<class BV>
5645 const unsigned char* buf,
5646 set_operation op,
5647 bool /*exit_on_one*/)
5648{
5649 if (op == set_OR)
5650 {
5651 bm::deserialize(bv, buf, temp_block_, ref_vect_);
5652 return 0;
5653 }
5654 bvector_type bv_tmp;
5655 bm::deserialize(bv_tmp, buf, temp_block_, ref_vect_);
5656 return deserialize_xor(bv, bv_tmp, op);
5657}
5658
5659template<class BV>
5660void operation_deserializer<BV>::deserialize_xor_range(
5661 bvector_type& bv,
5662 const unsigned char* buf,
5663 size_type idx_from,
5664 size_type idx_to)
5665{
5666 bv.clear();
5667
5668 ByteOrder bo_current = globals<true>::byte_order();
5669
5670 bm::decoder dec(buf);
5671 unsigned char header_flag = dec.get_8();
5672 ByteOrder bo = bo_current;
5673 if (!(header_flag & BM_HM_NO_BO))
5674 {
5675 bo = (bm::ByteOrder) dec.get_8();
5676 }
5677
5678 if (bo_current == bo)
5679 {
5680 deserializer<BV, bm::decoder> deserial;
5681 deserial.set_ref_vectors(ref_vect_);
5682 deserial.set_range(idx_from, idx_to);
5683 deserial.deserialize(bv, buf);
5684 }
5685 else
5686 {
5687 switch (bo_current)
5688 {
5689 case BigEndian:
5690 {
5691 deserializer<BV, bm::decoder_big_endian> deserial;
5692 deserial.set_ref_vectors(ref_vect_);
5693 deserial.set_range(idx_from, idx_to);
5694 deserial.deserialize(bv, buf);
5695 }
5696 break;
5697 case LittleEndian:
5698 {
5699 deserializer<BV, bm::decoder_little_endian> deserial;
5700 deserial.set_ref_vectors(ref_vect_);
5701 deserial.set_range(idx_from, idx_to);
5702 deserial.deserialize(bv, buf);
5703 }
5704 break;
5705 default:
5706 BM_ASSERT(0);
5707 };
5708 }
5709 bv.keep_range_no_check(idx_from, idx_to);
5710}
5711
5712
5713
5714template<class BV>
5716operation_deserializer<BV>::deserialize_xor(bvector_type& bv,
5717 bvector_type& bv_tmp,
5718 set_operation op)
5719{
5720 size_type count = 0;
5721 switch (op)
5722 {
5723 case set_AND:
5725 break;
5726 case set_ASSIGN:
5727 bv.swap(bv_tmp);
5728 break;
5729 case set_SUB:
5730 bv -= bv_tmp;
5731 break;
5732 case set_XOR:
5733 bv ^= bv_tmp;
5734 break;
5735 case set_COUNT: case set_COUNT_B:
5736 count = bv_tmp.count();
5737 break;
5738 case set_COUNT_A:
5739 return bv.count();
5740 case set_COUNT_AND:
5741 count += bm::count_and(bv, bv_tmp);
5742 break;
5743 case set_COUNT_XOR:
5744 count += bm::count_xor(bv, bv_tmp);
5745 break;
5746 case set_COUNT_OR:
5747 count += bm::count_or(bv, bv_tmp);
5748 break;
5749 case set_COUNT_SUB_AB:
5750 count += bm::count_sub(bv, bv_tmp);
5751 break;
5752 case set_COUNT_SUB_BA:
5753 count += bm::count_sub(bv_tmp, bv);
5754 break;
5755 case set_END:
5756 default:
5757 BM_ASSERT(0);
5758 #ifndef BM_NO_STL
5759 throw std::logic_error("BM: serialization error");
5760 #else
5761 BM_THROW(BM_ERR_SERIALFORMAT);
5762 #endif
5763 } // switch
5764
5765 return count;
5766}
5767
5768
5769
5770template<class BV>
5773 const unsigned char* buf,
5774 set_operation op,
5775 bool exit_on_one)
5776{
5777 ByteOrder bo_current = globals<true>::byte_order();
5778 bm::decoder dec(buf);
5779 unsigned char header_flag = dec.get_8();
5780
5781 if (header_flag & BM_HM_HXOR) // XOR compression
5782 {
5783 BM_ASSERT(ref_vect_); // reference vector must be set
5784 return deserialize_xor(bv, buf, op, exit_on_one);
5785 }
5786
5787 ByteOrder bo = bo_current;
5788 if (!(header_flag & BM_HM_NO_BO))
5789 bo = (bm::ByteOrder) dec.get_8();
5790
5791 if (op == bm::set_ASSIGN)
5792 {
5793 bv.clear(true);
5794 op = bm::set_OR;
5795 }
5796
5797 if (bo_current == bo)
5798 {
5799 serial_stream_current ss(buf);
5800 return it_d_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5801 }
5802 switch (bo_current)
5803 {
5804 case BigEndian:
5805 {
5806 serial_stream_be ss(buf);
5807 return it_d_be_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5808 }
5809 case LittleEndian:
5810 {
5811 serial_stream_le ss(buf);
5812 return it_d_le_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5813 }
5814 default:
5815 BM_ASSERT(0);
5816 #ifndef BM_NO_STL
5817 throw std::logic_error("BM::platform error: unknown endianness");
5818 #else
5819 BM_THROW(BM_ERR_SERIALFORMAT);
5820 #endif
5821 };
5822}
5823
5824template<class BV>
5826 bvector_type& bv,
5827 const unsigned char* buf,
5828 size_type idx_from,
5829 size_type idx_to)
5830{
5831 ByteOrder bo_current = globals<true>::byte_order();
5832 bm::decoder dec(buf);
5833 unsigned char header_flag = dec.get_8();
5834
5835 // check if it is empty fresh vector, set the range then
5836 //
5837 blocks_manager_type& bman = bv.get_blocks_manager();
5838 if (!bman.is_init())
5839 bv.set_range(idx_from, idx_to);
5840 else
5841 {} // assume that the target range set outside the call
5842
5843
5844 if (header_flag & BM_HM_HXOR) // XOR compression
5845 {
5846 BM_ASSERT(ref_vect_);
5847 bvector_type bv_tmp;
5848 deserialize_xor_range(bv_tmp, buf, idx_from, idx_to);
5849 if (bv.any())
5850 {
5851 bv.bit_and(bv_tmp, bvector_type::opt_compress);
5852 }
5853 else
5854 {
5855 bv.swap(bv_tmp);
5856 }
5857 return;
5858 }
5859
5860 ByteOrder bo = bo_current;
5861 if (!(header_flag & BM_HM_NO_BO))
5862 bo = (bm::ByteOrder) dec.get_8();
5863
5864 const bm::set_operation op = bm::set_AND;
5865
5866 if (bo_current == bo)
5867 {
5868 serial_stream_current ss(buf);
5869 it_d_.set_range(idx_from, idx_to);
5870 it_d_.deserialize(bv, ss, temp_block_, op, false);
5871 it_d_.unset_range();
5872 return;
5873 }
5874 switch (bo_current)
5875 {
5876 case BigEndian:
5877 {
5878 serial_stream_be ss(buf);
5879 it_d_be_.set_range(idx_from, idx_to);
5880 it_d_be_.deserialize(bv, ss, temp_block_, op, false);
5881 it_d_be_.unset_range();
5882 return;
5883 }
5884 case LittleEndian:
5885 {
5886 serial_stream_le ss(buf);
5887 it_d_le_.set_range(idx_from, idx_to);
5888 it_d_le_.deserialize(bv, ss, temp_block_, op, false);
5889 it_d_le_.unset_range();
5890 return;
5891 }
5892 default:
5893 BM_ASSERT(0);
5894 #ifndef BM_NO_STL
5895 throw std::logic_error("BM::platform error: unknown endianness");
5896 #else
5897 BM_THROW(BM_ERR_SERIALFORMAT);
5898 #endif
5899 };
5900 return;
5901}
5902
5903
5904
5905// ------------------------------------------------------------------
5906
5907template<class BV, class SerialIterator>
5909 size_type from, size_type to)
5910{
5911 is_range_set_ = true;
5912 nb_range_from_ = (from >> bm::set_block_shift);
5913 nb_range_to_ = (to >> bm::set_block_shift);
5914}
5915
5916
5917template<class BV, class SerialIterator>
5919 bvector_type& bv,
5920 serial_iterator_type& sit,
5921 unsigned id_count,
5922 bool set_clear)
5923{
5924 const unsigned win_size = 64;
5925 bm::id_t id_buffer[win_size+1];
5926
5927 if (set_clear) // set bits
5928 {
5929 for (unsigned i = 0; i <= id_count;)
5930 {
5931 unsigned j;
5932 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
5933 {
5934 id_buffer[j] = sit.get_id();
5935 sit.next();
5936 } // for j
5937 bm::combine_or(bv, id_buffer, id_buffer + j);
5938 } // for i
5939 }
5940 else // clear bits
5941 {
5942 for (unsigned i = 0; i <= id_count;)
5943 {
5944 unsigned j;
5945 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
5946 {
5947 id_buffer[j] = sit.get_id();
5948 sit.next();
5949 } // for j
5950 bm::combine_sub(bv, id_buffer, id_buffer + j);
5951 } // for i
5952 }
5953}
5954
5955template<class BV, class SerialIterator>
5957iterator_deserializer<BV, SerialIterator>::finalize_target_vector(
5958 blocks_manager_type& bman,
5959 set_operation op,
5960 size_type bv_block_idx)
5961{
5962 size_type count = 0;
5963 switch (op)
5964 {
5965 case set_OR: case set_SUB: case set_XOR:
5966 case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
5967 case set_COUNT_SUB_BA:
5968 // nothing to do
5969 break;
5970 case set_ASSIGN: case set_AND:
5971 {
5972 block_idx_type nblock_last = (bm::id_max >> bm::set_block_shift);
5973 if (bv_block_idx <= nblock_last)
5974 bman.set_all_zero(bv_block_idx, nblock_last); // clear the tail
5975 }
5976 break;
5977 case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
5978 case set_COUNT_SUB_AB:
5979 // count bits in the target vector
5980 {
5981 unsigned i, j;
5982 bm::get_block_coord(bv_block_idx, i, j);
5983 bm::word_t*** blk_root = bman.top_blocks_root();
5984 unsigned top_size = bman.top_block_size();
5985 for (;i < top_size; ++i)
5986 {
5987 bm::word_t** blk_blk = blk_root[i];
5988 if (blk_blk == 0)
5989 {
5990 bv_block_idx+=bm::set_sub_array_size-j;
5991 j = 0;
5992 continue;
5993 }
5994 // TODO: optimize for FULL top level
5995 for (; j < bm::set_sub_array_size; ++j, ++bv_block_idx)
5996 {
5997 if (blk_blk[j])
5998 count += bman.block_bitcount(blk_blk[j]);
5999 } // for j
6000 j = 0;
6001 } // for i
6002 }
6003 break;
6004 case set_END:
6005 default:
6006 BM_ASSERT(0);
6007 #ifndef BM_NO_STL
6008 throw std::logic_error(err_msg());
6009 #else
6010 BM_THROW(BM_ERR_SERIALFORMAT);
6011 #endif
6012 }
6013 return count;
6014}
6015
6016template<class BV, class SerialIterator>
6018iterator_deserializer<BV, SerialIterator>::process_id_list(
6019 bvector_type& bv,
6020 serial_iterator_type& sit,
6021 set_operation op)
6022{
6023 size_type count = 0;
6024 unsigned id_count = sit.get_id_count();
6025 bool set_clear = true;
6026 switch (op)
6027 {
6028 case set_AND:
6029 {
6030 // TODO: use some more optimal algorithm without temp vector
6031 BV bv_tmp(BM_GAP);
6032 load_id_list(bv_tmp, sit, id_count, true);
6033 bv &= bv_tmp;
6034 }
6035 break;
6036 case set_ASSIGN:
6037 BM_ASSERT(0);
6039 // fall through
6040 case set_OR:
6041 set_clear = true;
6043 // fall through
6044 case set_SUB:
6045 load_id_list(bv, sit, id_count, set_clear);
6046 break;
6047 case set_XOR:
6048 for (unsigned i = 0; i < id_count; ++i)
6049 {
6050 bm::id_t id = sit.get_id();
6051 bv[id] ^= true;
6052 sit.next();
6053 } // for
6054 break;
6055 case set_COUNT: case set_COUNT_B:
6056 for (unsigned i = 0; i < id_count; ++i)
6057 {
6058 /* bm::id_t id = */ sit.get_id();
6059 ++count;
6060 sit.next();
6061 } // for
6062 break;
6063 case set_COUNT_A:
6064 return bv.count();
6065 case set_COUNT_AND:
6066 for (size_type i = 0; i < id_count; ++i)
6067 {
6068 bm::id_t id = sit.get_id();
6069 count += bv.get_bit(id);
6070 sit.next();
6071 } // for
6072 break;
6073 case set_COUNT_XOR:
6074 {
6075 // TODO: get rid of the temp vector
6076 BV bv_tmp(BM_GAP);
6077 load_id_list(bv_tmp, sit, id_count, true);
6078 count += bm::count_xor(bv, bv_tmp);
6079 }
6080 break;
6081 case set_COUNT_OR:
6082 {
6083 // TODO: get rid of the temp. vector
6084 BV bv_tmp(BM_GAP);
6085 load_id_list(bv_tmp, sit, id_count, true);
6086 count += bm::count_or(bv, bv_tmp);
6087 }
6088 break;
6089 case set_COUNT_SUB_AB:
6090 {
6091 // TODO: get rid of the temp. vector
6092 BV bv_tmp(bv);
6093 load_id_list(bv_tmp, sit, id_count, false);
6094 count += bv_tmp.count();
6095 }
6096 break;
6097 case set_COUNT_SUB_BA:
6098 {
6099 BV bv_tmp(BM_GAP);
6100 load_id_list(bv_tmp, sit, id_count, true);
6101 count += bm::count_sub(bv_tmp, bv);
6102 }
6103 break;
6104 case set_END:
6105 default:
6106 BM_ASSERT(0);
6107 #ifndef BM_NO_STL
6108 throw std::logic_error(err_msg());
6109 #else
6110 BM_THROW(BM_ERR_SERIALFORMAT);
6111 #endif
6112 } // switch
6113
6114 return count;
6115}
6116
6117
6118template<class BV, class SerialIterator>
6121 bvector_type& bv,
6123 bm::word_t* temp_block,
6124 set_operation op,
6125 bool exit_on_one)
6126{
6127 BM_ASSERT(temp_block);
6128
6129 size_type count = 0;
6130 gap_word_t gap_temp_block[bm::gap_equiv_len * 4];
6131 gap_temp_block[0] = 0;
6132
6133 blocks_manager_type& bman = bv.get_blocks_manager();
6134 if (!bman.is_init())
6135 bman.init_tree();
6136
6137 if (sit.bv_size() && (sit.bv_size() > bv.size()))
6138 bv.resize(sit.bv_size());
6139
6140 typename serial_iterator_type::iterator_state state;
6141 state = sit.get_state();
6142 if (state == serial_iterator_type::e_list_ids)
6143 {
6144 count = process_id_list(bv, sit, op);
6145 return count;
6146 }
6147
6148 size_type bv_block_idx = 0;
6149 size_type empty_op_cnt = 0; // counter for empty target operations
6150
6151 for (;1;)
6152 {
6153 bm::set_operation sop = op;
6154 if (sit.is_eof()) // argument stream ended
6155 {
6156 count += finalize_target_vector(bman, op, bv_block_idx);
6157 return count;
6158 }
6159
6160 state = sit.state();
6161 switch (state)
6162 {
6163 case serial_iterator_type::e_blocks:
6164 sit.next();
6165
6166 // try to skip forward
6167 //
6168 if (is_range_set_ && (bv_block_idx < nb_range_from_))
6169 {
6170 // TODO: use saved bookmark block-idx
6171 bool skip_flag = sit.try_skip(bv_block_idx, nb_range_from_);
6172 if (skip_flag)
6173 {
6174 bv_block_idx = sit.block_idx();
6175 BM_ASSERT(bv_block_idx <= nb_range_from_);
6176 BM_ASSERT(sit.state() == serial_iterator_type::e_blocks);
6177 }
6178 }
6179 continue;
6180 case serial_iterator_type::e_bit_block:
6181 {
6182 BM_ASSERT(sit.block_idx() == bv_block_idx);
6183 unsigned i0, j0;
6184 bm::get_block_coord(bv_block_idx, i0, j0);
6185 bm::word_t* blk = bman.get_block_ptr(i0, j0);
6186 if (!blk)
6187 {
6188 switch (op)
6189 {
6190 case set_AND: case set_SUB: case set_COUNT_AND:
6191 case set_COUNT_SUB_AB: case set_COUNT_A:
6192 // one arg is 0, so the operation gives us zero
6193 // all we need to do is to seek the input stream
6194 sop = set_ASSIGN;
6195 break;
6196
6197 case set_OR: case set_XOR: case set_ASSIGN:
6198 blk = bman.make_bit_block(bv_block_idx);
6199 break;
6200
6201 case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
6202 case set_COUNT_SUB_BA: case set_COUNT_B:
6203 // first arg is not required (should work as is)
6204 sop = set_COUNT;
6205 break;
6206
6207 case set_END:
6208 default:
6209 BM_ASSERT(0);
6210 #ifndef BM_NO_STL
6211 throw std::logic_error(err_msg());
6212 #else
6213 BM_THROW(BM_ERR_SERIALFORMAT);
6214 #endif
6215 }
6216 }
6217 else // block exists
6218 {
6219 int is_gap = BM_IS_GAP(blk);
6220 if (is_gap || IS_FULL_BLOCK(blk))
6221 {
6222 if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
6223 {
6225 }
6226 else
6227 {
6228 // TODO: make sure const operations do not
6229 // deoptimize GAP blocks
6230 blk = bman.deoptimize_block(bv_block_idx);
6231 }
6232 }
6233 }
6234
6235 // 2 bit-blocks recombination
6236 unsigned c = sit.get_bit_block(blk, temp_block, sop);
6237 count += c;
6238 if (exit_on_one && count) // early exit
6239 return count;
6240 switch (op) // target block optimization for non-const operations
6241 {
6242 case set_AND: case set_SUB: case set_XOR: case set_OR:
6243 bman.optimize_bit_block(i0, j0);
6244 break;
6245 default: break;
6246 } // switch
6247
6248 }
6249 break;
6250
6251 case serial_iterator_type::e_zero_blocks:
6252 {
6253 BM_ASSERT(bv_block_idx == sit.block_idx());
6254
6255 switch (op)
6256 {
6257 case set_ASSIGN: // nothing to do to rewind fwd
6258 case set_SUB: case set_COUNT_AND: case set_OR:
6259 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
6260 bv_block_idx = sit.skip_mono_blocks();
6261 continue;
6262
6263 case set_AND: // clear the range
6264 {
6265 size_type nb_start = bv_block_idx;
6266 bv_block_idx = sit.skip_mono_blocks();
6267 bman.set_all_zero(nb_start, bv_block_idx-1);
6268 }
6269 continue;
6270 case set_END:
6271 default:
6272 break;
6273 } // switch op
6274
6275
6276 unsigned i0, j0;
6277 bm::get_block_coord(bv_block_idx, i0, j0);
6278 bm::word_t* blk = bman.get_block_ptr(i0, j0);
6279
6280 sit.next();
6281
6282 if (blk)
6283 {
6284 switch (op)
6285 {
6286 case set_AND: case set_ASSIGN:
6287 // the result is 0
6288 //blk =
6289 bman.zero_block(bv_block_idx);
6290 break;
6291
6292 case set_SUB: case set_COUNT_AND: case set_OR:
6293 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
6294 // nothing to do
6295 break;
6296
6297 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
6298 case set_COUNT: case set_COUNT_XOR:
6299 // valid bit block recombined with 0 block
6300 // results with same block data
6301 // all we need is to bitcount bv block
6302 {
6303 count += blk ? bman.block_bitcount(blk) : 0;
6304 if (exit_on_one && count) // early exit
6305 return count;
6306 }
6307 break;
6308 case set_END:
6309 default:
6310 BM_ASSERT(0);
6311 } // switch op
6312 } // if blk
6313 }
6314 break;
6315
6316 case serial_iterator_type::e_one_blocks:
6317 {
6318 BM_ASSERT(bv_block_idx == sit.block_idx());
6319 unsigned i0, j0;
6320 bm::get_block_coord(bv_block_idx, i0, j0);
6321 bm::word_t* blk = bman.get_block_ptr(i0, j0);
6322
6323 sit.next();
6324
6325 switch (op)
6326 {
6327 case set_OR: case set_ASSIGN:
6328 bman.set_block_all_set(bv_block_idx);
6329 break;
6330 case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
6331 // block becomes all set
6332 count += bm::bits_in_block;
6333 break;
6334 case set_SUB:
6335 //blk =
6336 bman.zero_block(bv_block_idx);
6337 break;
6338 case set_COUNT_SUB_AB: case set_AND:
6339 // nothing to do, check maybe nothing else to do at all
6340 if (++empty_op_cnt > 64)
6341 {
6342 size_type last_id;
6343 bool b = bv.find_reverse(last_id);
6344 if (!b)
6345 return count;
6346 size_type last_nb = (last_id >> bm::set_block_shift);
6347 if (last_nb < bv_block_idx)
6348 return count; // early exit, nothing to do here
6349 empty_op_cnt = 0;
6350 }
6351 break;
6352 case set_COUNT_AND: case set_COUNT_A:
6353 count += blk ? bman.block_bitcount(blk) : 0;
6354 break;
6355 default:
6356 if (blk)
6357 {
6358 switch (op)
6359 {
6360 case set_XOR:
6361 blk = bman.deoptimize_block(bv_block_idx);
6363 break;
6364 case set_COUNT_XOR:
6365 {
6366 count +=
6368 blk,
6371 }
6372 break;
6373 case set_COUNT_SUB_BA:
6374 {
6375 count +=
6377 blk,
6380 }
6381 break;
6382 default:
6383 BM_ASSERT(0);
6384 } // switch
6385 }
6386 else // blk == 0
6387 {
6388 switch (op)
6389 {
6390 case set_XOR:
6391 // 0 XOR 1 = 1
6392 bman.set_block_all_set(bv_block_idx);
6393 break;
6394 case set_COUNT_XOR:
6395 count += bm::bits_in_block;
6396 break;
6397 case set_COUNT_SUB_BA:
6398 // 1 - 0 = 1
6399 count += bm::bits_in_block;
6400 break;
6401 default:
6402 break;
6403 } // switch
6404 } // else
6405 } // switch
6406 if (exit_on_one && count) // early exit
6407 return count;
6408 }
6409 break;
6410
6411 case serial_iterator_type::e_gap_block:
6412 {
6413 BM_ASSERT(bv_block_idx == sit.block_idx());
6414
6415 unsigned i0, j0;
6416 bm::get_block_coord(bv_block_idx, i0, j0);
6417 const bm::word_t* blk = bman.get_block(i0, j0);
6418
6419 sit.get_gap_block(gap_temp_block);
6420
6421 unsigned len = gap_length(gap_temp_block);
6422 int level = gap_calc_level(len, bman.glen());
6423 --len;
6424
6425 bool const_op = is_const_set_operation(op);
6426 if (const_op)
6427 {
6428 distance_metric metric = operation2metric(op);
6429 bm::word_t* gptr = (bm::word_t*)gap_temp_block;
6430 BMSET_PTRGAP(gptr);
6431 unsigned c =
6433 blk,
6434 gptr,
6435 metric);
6436 count += c;
6437 if (exit_on_one && count) // early exit
6438 return count;
6439
6440 }
6441 else // non-const set operation
6442 {
6443 if ((sop == set_ASSIGN) && blk) // target block override
6444 {
6445 bman.zero_block(bv_block_idx);
6446 sop = set_OR;
6447 }
6448 if (blk == 0) // target block not found
6449 {
6450 switch (sop)
6451 {
6452 case set_AND: case set_SUB:
6453 break;
6454 case set_OR: case set_XOR: case set_ASSIGN:
6455 bman.set_gap_block(
6456 bv_block_idx, gap_temp_block, level);
6457 break;
6458
6459 default:
6460 BM_ASSERT(0);
6461 } // switch
6462 }
6463 else // target block exists
6464 {
6465 bm::operation bop = bm::setop2op(op);
6466 if (level == -1) // too big to GAP
6467 {
6468 gap_convert_to_bitset(temp_block, gap_temp_block);
6469 bv.combine_operation_with_block(bv_block_idx,
6470 temp_block,
6471 0, // BIT
6472 bop);
6473 }
6474 else // GAP fits
6475 {
6476 set_gap_level(gap_temp_block, level);
6477 bv.combine_operation_with_block(
6478 bv_block_idx,
6479 (bm::word_t*)gap_temp_block,
6480 1, // GAP
6481 bop);
6482 }
6483 }
6484 if (exit_on_one)
6485 {
6486 bm::get_block_coord(bv_block_idx, i0, j0);
6487 blk = bman.get_block_ptr(i0, j0);
6488 if (blk)
6489 {
6490 bool z = bm::check_block_zero(blk, true/*deep check*/);
6491 if (!z)
6492 return 1;
6493 }
6494 } // if exit_on_one
6495
6496 } // if else non-const op
6497 }
6498 break;
6499
6500 default:
6501 BM_ASSERT(0);
6502 #ifndef BM_NO_STL
6503 throw std::logic_error(err_msg());
6504 #else
6505 BM_THROW(BM_ERR_SERIALFORMAT);
6506 #endif
6507 } // switch
6508
6509 ++bv_block_idx;
6510 BM_ASSERT(bv_block_idx);
6511
6512 if (is_range_set_ && (bv_block_idx > nb_range_to_))
6513 break;
6514
6515 } // for (deserialization)
6516
6517 return count;
6518}
6519
6520
6521
6522
6523} // namespace bm
6524
6525#include "bmundef.h"
6526
6527#ifdef _MSC_VER
6528#pragma warning( pop )
6529#endif
6530
6531
6532#endif
Algorithms for bvector<> (main include)
Definitions(internal)
#define IS_FULL_BLOCK(addr)
Definition bmdef.h:153
#define IS_VALID_ADDR(addr)
Definition bmdef.h:152
#define BM_FALLTHROUGH
Definition bmdef.h:420
#define BMRESTRICT
Definition bmdef.h:193
#define BMNOEXCEPT
Definition bmdef.h:79
#define BMPTR_SETBIT0(ptr)
Definition bmdef.h:167
#define BMGAP_PTR(ptr)
Definition bmdef.h:179
#define BM_IS_GAP(ptr)
Definition bmdef.h:181
#define BMSET_PTRGAP(ptr)
Definition bmdef.h:180
#define BM_ASSERT
Definition bmdef.h:130
#define FULL_BLOCK_FAKE_ADDR
Definition bmdef.h:149
#define FULL_BLOCK_REAL_ADDR
Definition bmdef.h:148
Bit manipulation primitives (internal)
#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO)
Definition bmserial.h:2128
Utilities for bit transposition (internal) (experimental!)
pre-processor un-defines to avoid global space pollution (internal)
Bit manipulation primitives (internal)
Functions and utilities for XOR filters (internal)
Byte based reader for un-aligned bit streaming.
Definition encoding.h:249
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition encoding.h:1732
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition encoding.h:264
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:270
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition encoding.h:275
Byte based writer for un-aligned bit streaming.
Definition encoding.h:175
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
Definition encoding.h:222
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition encoding.h:1103
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition encoding.h:199
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Definition bmfunc.h:8122
Bit-block sum adapter, takes values and sums it /internal.
Definition bmfunc.h:8152
bm::word_t sum() const BMNOEXCEPT
Get accumulated sum.
Definition bmfunc.h:8158
List of reference bit-vectors with their true index associations.
Definition bmxor.h:241
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv) BMNOEXCEPT
check if vector has no assigned allocator and set one
Definition bm.h:789
Bitvector Bit-vector container with runtime compression of bits.
Definition bm.h:108
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:134
size_type count() const BMNOEXCEPT
population cout (count of ON bits)
Definition bm.h:2194
bm::id_t size_type
Definition bm.h:117
bm::bvector< Alloc > & bit_and(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand AND : this := bv1 AND bv2
Definition bm.h:5017
blocks_manager< Alloc > blocks_manager_type
Definition bm.h:112
void swap(bvector< Alloc > &bvect) BMNOEXCEPT
Exchanges content of bv and this bvector.
Definition bm.h:3381
blocks_manager_type::block_idx_type block_idx_type
Definition bm.h:113
void clear(const size_type *ids, size_type ids_size, bm::sort_order so=bm::BM_UNKNOWN)
clear list of bits in this bitset
Definition bm.h:3546
Alloc allocator_type
Definition bm.h:110
bool get_bit(size_type n) const BMNOEXCEPT
returns true if bit n is set and false is bit n is 0.
Definition bm.h:3037
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition encoding.h:101
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition encoding.h:89
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Definition encoding.h:104
Class for decoding data from memory buffer.
Definition encoding.h:118
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:668
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition encoding.h:655
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:703
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition encoding.h:639
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition encoding.h:686
Base deserialization class.
Definition bmserial.h:425
const unsigned char * skip_pos_
decoder skip position
Definition bmserial.h:481
void read_bic_arr(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read binary interpolated list into a bit-set.
Definition bmserial.h:2888
static const char * err_msg() BMNOEXCEPT
Definition bmserial.h:467
block_idx_type bookmark_idx_
last bookmark block index
Definition bmserial.h:479
unsigned skip_offset_
bookmark to skip 256 encoded blocks
Definition bmserial.h:480
static void read_0runs_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
read bit-block encoded as runs
Definition bmserial.h:2985
void read_bic_arr_inv(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read inverted binary interpolated list into a bit-set.
Definition bmserial.h:2911
block_idx_type try_skip(decoder_type &decoder, block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
Try to skip if skip bookmark is available within reach.
Definition bmserial.h:3143
void read_digest0_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read digest0-type bit-block.
Definition bmserial.h:2945
void read_bic_gap(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read binary interpolated gap blocks into a bitset.
Definition bmserial.h:2921
bm::bit_in< DEC > bit_in_type
Definition bmserial.h:429
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
Definition bmserial.h:3016
unsigned read_id_list(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_arr)
Read list of bit ids.
Definition bmserial.h:2803
BLOCK_IDX block_idx_type
Definition bmserial.h:428
bm::gap_word_t * id_array_
ptr to idx array for temp decode use
Definition bmserial.h:477
Deserializer for bit-vector.
Definition bmserial.h:491
void xor_decode(size_type x_ref_idx, bm::id64_t x_ref_d64, blocks_manager_type &bman, block_idx_type nb)
Definition bmserial.h:3969
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR de-serialization (no transfer of ownership for the poi...
Definition bmserial.h:3240
block_arridx_type bit_idx_arr_
Definition bmserial.h:580
allocator_type::allocator_pool_type allocator_pool_type
Definition bmserial.h:577
allocator_type alloc_
Definition bmserial.h:585
unsigned is_range_set_
Definition bmserial.h:595
deseriaizer_base< DEC, block_idx_type > parent_type
Definition bmserial.h:497
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition bmserial.h:499
void deserialize_gap(unsigned char btype, decoder_type &dec, bvector_type &bv, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
Definition bmserial.h:3249
bvector_type::allocator_type allocator_type
Definition bmserial.h:494
parent_type::decoder_type decoder_type
Definition bmserial.h:498
BV::size_type size_type
Definition bmserial.h:495
void set_range(size_type from, size_type to) BMNOEXCEPT
set deserialization range [from, to] This is NOT exact, approximate range, content outside range is n...
Definition bmserial.h:531
allocator_pool_type pool_
Definition bmserial.h:584
const bv_ref_vector_type * ref_vect_
ref.vector for XOR compression
Definition bmserial.h:589
void unset_range() BMNOEXCEPT
Disable range deserialization.
Definition bmserial.h:540
BV::blocks_manager_type blocks_manager_type
Definition bmserial.h:543
size_type idx_to_
Definition bmserial.h:597
block_arridx_type gap_temp_block_
Definition bmserial.h:581
bm::word_t * xor_block_
xor product
Definition bmserial.h:590
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Definition bmserial.h:3531
bvector_type::block_idx_type block_idx_type
Definition bmserial.h:496
void decode_arrbit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
Definition bmserial.h:3495
void decode_bit_block(unsigned char btype, decoder_type &dec, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
Definition bmserial.h:3384
bm::word_t * or_block_
Definition bmserial.h:591
bm::word_t * temp_block_
Definition bmserial.h:582
void decode_block_bit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
Definition bmserial.h:3448
void decode_block_bit_interval(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
Definition bmserial.h:3468
bm::heap_vector< bm::gap_word_t, allocator_type, true > block_arridx_type
Definition bmserial.h:576
size_type idx_from_
Definition bmserial.h:596
Memory encoding.
Definition encoding.h:50
unsigned char * position_type
Definition encoding.h:52
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition encoding.h:485
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition encoding.h:562
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition encoding.h:420
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition encoding.h:527
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition encoding.h:430
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition encoding.h:392
Functor for Elias Gamma encoding.
Definition encoding.h:327
Iterator to walk forward the serialized stream.
Definition bmserial.h:609
void set_range(size_type from, size_type to)
set deserialization range [from, to]
Definition bmserial.h:5908
SerialIterator serial_iterator_type
Definition bmserial.h:613
void unset_range() BMNOEXCEPT
disable range filtration
Definition bmserial.h:620
bvector_type::size_type size_type
Definition bmserial.h:612
size_type deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Definition bmserial.h:6120
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition bmserial.h:825
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition bmserial.h:830
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
Definition bmserial.h:907
void deserialize_range(bvector_type &bv, const unsigned char *buf, size_type idx_from, size_type idx_to)
Definition bmserial.h:5825
BV::allocator_type allocator_type
Definition bmserial.h:828
size_type deserialize(bvector_type &bv, const unsigned char *buf, set_operation op, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition bmserial.h:5772
void deserialize_range(bvector_type &bv, const unsigned char *buf, bm::word_t *, size_type idx_from, size_type idx_to)
Definition bmserial.h:894
bvector_type::size_type size_type
Definition bmserial.h:829
size_type deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *, set_operation op=bm::set_OR, bool exit_on_one=false)
Obsolete! Deserialize bvector using buffer as set operation argument.
Definition bmserial.h:880
Serialization stream iterator.
Definition bmserial.h:668
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true) BMNOEXCEPT
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
Definition bmserial.h:5541
bm::id_t last_id_
Last id from the id list.
Definition bmserial.h:804
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:765
deseriaizer_base< DEC, block_idx_type > parent_type
Definition bmserial.h:672
void next()
get next block
Definition bmserial.h:4133
block_idx_type bv_size_
Definition bmserial.h:801
serial_stream_iterator(const unsigned char *buf)
Definition bmserial.h:4025
block_idx_type mono_block_cnt_
number of 0 or 1 blocks
Definition bmserial.h:809
unsigned block_type_
current block type
Definition bmserial.h:807
unsigned get_id_count() const BMNOEXCEPT
Number of ids in the inverted list (valid for e_list_ids)
Definition bmserial.h:725
unsigned get_bit() BMNOEXCEPT
Definition bmserial.h:5579
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:4787
void get_inv_arr(bm::word_t *block) BMNOEXCEPT
Definition bmserial.h:4373
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5345
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:4569
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5447
bm::id_t get_id() const BMNOEXCEPT
Get last id from the id list.
Definition bmserial.h:728
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5142
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:4890
bool is_eof() const
Returns true if end of bit-stream reached.
Definition bmserial.h:682
decoder_type & decoder()
Get low level access to the decoder (use carefully)
Definition bmserial.h:703
block_idx_type block_idx_
current block index
Definition bmserial.h:808
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:4682
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:4494
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:4395
iterator_state state() const BMNOEXCEPT
Returns iterator internal state.
Definition bmserial.h:721
block_idx_type block_idx() const BMNOEXCEPT
Get current block index.
Definition bmserial.h:731
iterator_state state_
Definition bmserial.h:802
gap_word_t * block_idx_arr_
Definition bmserial.h:812
unsigned dec_size() const
Return current decoder size.
Definition bmserial.h:700
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:4974
gap_word_t glevels_[bm::gap_levels]
GAP levels.
Definition bmserial.h:805
unsigned(serial_stream_iterator< DEC, BLOCK_IDX >::* get_bit_func_type)(bm::word_t *, bm::word_t *)
member function pointer for bitset-bitset get operations
Definition bmserial.h:738
iterator_state
iterator is a state machine, this enum encodes its key value
Definition bmserial.h:709
@ e_bit_block
one bit block
Definition bmserial.h:715
@ e_list_ids
plain int array
Definition bmserial.h:711
@ e_zero_blocks
one or more zero bit blocks
Definition bmserial.h:713
@ e_gap_block
one gap block
Definition bmserial.h:716
@ e_one_blocks
one or more all-1 bit blocks
Definition bmserial.h:714
@ e_blocks
stream of blocks
Definition bmserial.h:712
bool try_skip(block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
Try to skip if skip bookmark is available within reach.
Definition bmserial.h:786
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
Definition bmserial.h:5590
deseriaizer_base< DEC, BLOCK_IDX >::decoder_type decoder_type
Definition bmserial.h:670
get_bit_func_type bit_func_table_[bm::set_END]
Definition bmserial.h:797
unsigned id_cnt_
Id counter for id list.
Definition bmserial.h:803
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5243
unsigned get_block_type() const BMNOEXCEPT
Get current block type.
Definition bmserial.h:777
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition bmserial.h:5057
block_idx_type skip_mono_blocks() BMNOEXCEPT
skip all zero or all-one blocks
Definition bmserial.h:4357
block_idx_type bv_size() const
serialized bitvector size
Definition bmserial.h:679
iterator_state get_state() const BMNOEXCEPT
Definition bmserial.h:723
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
Definition bmserial.h:5608
Bit-vector serialization class.
Definition bmserial.h:76
void gamma_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
Definition bmserial.h:1280
void serialize(const BV &bv, typename serializer< BV >::buffer &buf, const statistics_type *bv_stat=0)
Bitvector serialization into buffer object (resized automatically)
Definition bmserial.h:1903
unsigned char find_bit_best_encoding(const bm::word_t *block) BMNOEXCEPT
Determine best representation for a bit-block.
Definition bmserial.h:1584
serializer(bm::word_t *temp_block)
Definition bmserial.h:1071
void reset_compression_stats() BMNOEXCEPT
Reset all accumulated compression statistics.
Definition bmserial.h:1111
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
Definition bmserial.h:1126
bvector_type::block_idx_type block_idx_type
Definition bmserial.h:82
bvector_type::size_type size_type
Definition bmserial.h:83
unsigned char find_gap_best_encoding(const bm::gap_word_t *gap_block) BMNOEXCEPT
Determine best representation for GAP block based on current set compression level.
Definition bmserial.h:1690
void gamma_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition bmserial.h:1976
void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT
Set current index in rer.vector collection (not a row idx or plain idx)
Definition bmserial.h:1162
void interpolated_gap_array_v0(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition bmserial.h:1364
void set_compression_level(unsigned clevel) BMNOEXCEPT
Set compression level.
Definition bmserial.h:1119
void encode_bit_digest(const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0) BMNOEXCEPT
Encode bit-block using digest (hierarchical compression)
Definition bmserial.h:1846
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition bmserial.h:86
void byte_order_serialization(bool value) BMNOEXCEPT
Set byte-order serialization (for cross platform compatibility)
Definition bmserial.h:1132
unsigned char find_bit_best_encoding_l5(const bm::word_t *block) BMNOEXCEPT
Determine best representation for a bit-block (level 5)
Definition bmserial.h:1496
void optimize_serialize_destroy(BV &bv, typename serializer< BV >::buffer &buf)
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized an...
Definition bmserial.h:1925
bvector_type::allocator_type allocator_type
Definition bmserial.h:79
byte_buffer< allocator_type > buffer
Definition bmserial.h:85
void encode_bit_array(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Encode bit-block as an array of bits.
Definition bmserial.h:1943
void interpolated_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
encode bit-block as interpolated gap block
Definition bmserial.h:2015
bvector_type::blocks_manager_type blocks_manager_type
Definition bmserial.h:80
void encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
Definition bmserial.h:1734
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size incre...
Definition bmserial.h:1138
unsigned get_compression_level() const BMNOEXCEPT
Get compression level (0-5), Default 5 (recommended) 0 - take as is 1, 2 - apply light weight RLE/GAP...
Definition bmserial.h:130
void reset_models() BMNOEXCEPT
Definition bmserial.h:333
const size_type * get_compression_stat() const BMNOEXCEPT
Return serialization counter vector.
Definition bmserial.h:193
static void process_bookmark(block_idx_type nb, bookmark_state &bookm, bm::encoder &enc) BMNOEXCEPT
Check if bookmark needs to be placed and if so, encode it into serialization BLOB.
Definition bmserial.h:2154
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Constructor.
Definition bmserial.h:1042
void interpolated_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition bmserial.h:2073
void interpolated_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
Encode GAP block as an array with binary interpolated coder.
Definition bmserial.h:1412
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false) BMNOEXCEPT
Encode GAP block as delta-array with Elias Gamma coder.
Definition bmserial.h:1319
void add_model(unsigned char mod, unsigned score) BMNOEXCEPT
Definition bmserial.h:1487
void bienc_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
encode bit-block as interpolated bit block of gaps
Definition bmserial.h:2025
void gamma_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
Definition bmserial.h:1967
bvector_type::statistics statistics_type
Definition bmserial.h:81
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
Definition bmserial.h:1153
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control) BMNOEXCEPT
Encode BIT block with repeatable runs of zeroes.
Definition bmserial.h:1794
void interpolated_encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
Definition bmserial.h:1220
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition bmserial.h:2264
void encode_header(const BV &bv, bm::encoder &enc) BMNOEXCEPT
Encode serialization header information.
Definition bmserial.h:1168
void bienc_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition bmserial.h:1996
XOR scanner to search for complement-similarities in collections of bit-vectors.
Definition bmxor.h:331
Encoding utilities for serialization (internal)
unsigned bit_block_find(const bm::word_t *BMRESTRICT block, unsigned nbit, unsigned *BMRESTRICT pos) BMNOEXCEPT
Searches for the next 1 bit in the BIT block.
Definition bmfunc.h:7467
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition bmfunc.h:197
unsigned bit_block_calc_change(const bm::word_t *block) BMNOEXCEPT
Definition bmfunc.h:4607
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Bitblock copy operation.
Definition bmfunc.h:5898
T bit_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len, unsigned mask=0) BMNOEXCEPT
Convert bit block into an array of ints corresponding to 1 bits.
Definition bmfunc.h:7822
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size) BMNOEXCEPT
Inspects block for full zero words.
Definition bmfunc.h:7407
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition bmfunc.h:5940
bm::id_t bit_block_count(const bm::word_t *block) BMNOEXCEPT
Bitcount for bit block.
Definition bmfunc.h:4379
BMFORCEINLINE void clear_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
Definition bmfunc.h:3138
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock OR operation and calculates bitcount of the result.
Definition bmfunc.h:6679
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
Definition bmfunc.h:230
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
Definition bmfunc.h:3125
bm::id64_t calc_block_digest0(const bm::word_t *const block) BMNOEXCEPT
Compute digest for 64 non-zero areas.
Definition bmfunc.h:734
void bit_invert(T *start) BMNOEXCEPT
Definition bmfunc.h:5253
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
Definition bmfunc.h:7240
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
Bitblock memset operation.
Definition bmfunc.h:3841
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
Definition bmfunc.h:6749
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock XOR operation and calculates bitcount of the result.
Definition bmfunc.h:7349
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition bmfunc.h:7019
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock AND operation and calculates bitcount of the result.
Definition bmfunc.h:6540
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock SUB operation and calculates bitcount of the result.
Definition bmfunc.h:6589
operation
Bit operations.
Definition bmconst.h:176
set_operation
Codes of set operations.
Definition bmconst.h:153
strategy
Block allocation strategies.
Definition bmconst.h:142
@ BM_OR
Definition bmconst.h:178
@ set_COUNT_SUB_AB
Definition bmconst.h:163
@ set_OR
Definition bmconst.h:155
@ set_COUNT_XOR
Definition bmconst.h:161
@ set_COUNT_OR
Definition bmconst.h:162
@ set_COUNT_B
Definition bmconst.h:166
@ set_COUNT_SUB_BA
Definition bmconst.h:164
@ set_ASSIGN
Definition bmconst.h:158
@ set_SUB
Definition bmconst.h:156
@ set_COUNT_AND
Definition bmconst.h:160
@ set_COUNT
Definition bmconst.h:159
@ set_AND
Definition bmconst.h:154
@ set_XOR
Definition bmconst.h:157
@ set_COUNT_A
Definition bmconst.h:165
@ set_END
Definition bmconst.h:168
@ BM_GAP
GAP compression is ON.
Definition bmconst.h:144
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition bmserial.h:2622
serialization_flags
Bit mask flags for serialization algorithm <>
Definition bmserial.h:2574
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition bmserial.h:2688
void deserialize_range(BV &bv, const unsigned char *buf, typename BV::size_type from, typename BV::size_type to, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector range deserialization from a memory BLOB.
Definition bmserial.h:2751
@ BM_NO_GAP_LENGTH
save no GAP info (save some space)
Definition bmserial.h:2576
@ BM_NO_BYTE_ORDER
save no byte-order info (save some space)
Definition bmserial.h:2575
distance_metric
Distance metrics codes defined for vectors A and B.
Definition bmalgo_impl.h:58
distance_metric operation2metric(set_operation op) BMNOEXCEPT
Convert set operation into compatible distance metric.
Definition bmalgo_impl.h:73
@ COUNT_XOR
(A ^ B).count()
Definition bmalgo_impl.h:60
@ COUNT_SUB_BA
(B - A).count()
Definition bmalgo_impl.h:63
unsigned gap_bit_count_unr(const T *buf) BMNOEXCEPT
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition bmfunc.h:1801
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false) BMNOEXCEPT
Convert gap block into array of ints corresponding to 1 bits.
Definition bmfunc.h:4320
gap_word_t * gap_operation_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize) BMNOEXCEPT
GAP XOR operation.
Definition bmfunc.h:5712
void gap_invert(T *buf) BMNOEXCEPT
Inverts all bits in the GAP buffer.
Definition bmfunc.h:4001
unsigned gap_set_array(T *buf, const T *arr, unsigned len) BMNOEXCEPT
Convert array to GAP buffer.
Definition bmfunc.h:3005
void set_gap_level(T *buf, int level) BMNOEXCEPT
Sets GAP block capacity level.
Definition bmfunc.h:4020
void gap_add_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
Adds(OR) GAP block to bitblock.
Definition bmfunc.h:3436
void gap_convert_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT buf) BMNOEXCEPT
GAP block to bitblock conversion.
Definition bmfunc.h:3859
void gap_set_all(T *buf, unsigned set_max, unsigned value) BMNOEXCEPT
Sets all bits to 0 or 1 (GAP)
Definition bmfunc.h:3933
unsigned gap_add_value(T *buf, unsigned pos) BMNOEXCEPT
Add new value to the end of GAP buffer.
Definition bmfunc.h:2827
int gap_calc_level(unsigned len, const T *glevel_len) BMNOEXCEPT
Calculates GAP block capacity level.
Definition bmfunc.h:4042
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *BMRESTRICT buf) BMNOEXCEPT
Returs GAP block length.
Definition bmfunc.h:1098
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
BV::size_type count_and(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of AND operation of two bitsets.
Definition bmalgo.h:49
BV::size_type count_or(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of OR operation of two bitsets.
Definition bmalgo.h:149
BV::size_type count_sub(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of SUB operation of two bitsets.
Definition bmalgo.h:115
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of XOR operation of two bitsets.
Definition bmalgo.h:81
Definition bm.h:77
const unsigned char set_block_ref_eq
block is a copy of a reference block
Definition bmserial.h:1017
const unsigned set_block_digest_wave_size
Definition bmconst.h:66
const unsigned char set_block_gap
Plain GAP block.
Definition bmserial.h:994
void bit_block_change_bc(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT gc, unsigned *BMRESTRICT bc) BMNOEXCEPT
Definition bmfunc.h:4581
const unsigned char set_block_arrbit_inv
List of bits OFF.
Definition bmserial.h:1011
const unsigned char set_nb_sync_mark8
bookmark sync point (8-bits)
Definition bmserial.h:1034
const unsigned char set_block_bit_interval
Interval block.
Definition bmserial.h:997
const unsigned id_max
Definition bmconst.h:108
bool check_block_zero(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks all conditions and returns true if block consists of only 0 bits.
Definition bmfunc.h:7858
unsigned int word_t
Definition bmconst.h:38
const unsigned char set_block_bit_digest0
H-compression with digest mask.
Definition bmserial.h:1015
const unsigned char set_block_xor_ref32
..... 32-bit (should never happen)
Definition bmserial.h:1020
const unsigned char set_block_arrgap_bienc_inv_v2
Interpolated GAP array (inverted)
Definition bmserial.h:1028
const unsigned char set_block_bitgap_bienc
Interpolated bit-block as GAPs.
Definition bmserial.h:1014
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
Definition bmserial.h:1003
bool check_block_one(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks if block has only 1 bits.
Definition bmfunc.h:7882
const unsigned char set_block_arr_bienc_inv
Interpolated inverted block int array.
Definition bmserial.h:1013
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len) BMNOEXCEPT
Convert bit block to GAP representation.
Definition bmfunc.h:4248
const unsigned set_sub_array_size
Definition bmconst.h:94
const unsigned gap_max_bits_cmrz
Definition bmconst.h:83
const unsigned char set_block_16one
UP to 65536 all-set blocks.
Definition bmserial.h:986
void for_each_dgap(const T *gap_buf, Func &func)
Definition bmfunc.h:2147
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Internal function computes different distance metrics.
const unsigned set_total_blocks
Definition bmconst.h:110
const unsigned char set_nb_sync_mark48
Definition bmserial.h:1038
const unsigned char set_block_arrgap_bienc
Interpolated GAP array.
Definition bmserial.h:1009
ByteOrder
Byte orders recognized by the library.
Definition bmconst.h:429
@ LittleEndian
Definition bmconst.h:431
@ BigEndian
Definition bmconst.h:430
const unsigned char set_block_8one
Up to 256 all-set blocks.
Definition bmserial.h:984
const unsigned char set_nb_sync_mark16
Definition bmserial.h:1035
const unsigned char set_block_32one
UP to 4G all-set blocks.
Definition bmserial.h:988
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
Definition bmserial.h:1002
const unsigned bie_cut_off
Definition bmconst.h:87
const unsigned char set_block_64one
lots of all-set blocks
Definition bmserial.h:1006
const unsigned char set_block_gap_bienc
Interpolated GAP block (legacy)
Definition bmserial.h:1008
const unsigned char set_block_xor_gap_ref16
..... 16-bit
Definition bmserial.h:1022
const unsigned set_compression_default
Default compression level.
Definition bmserial.h:60
const unsigned char set_block_arrbit
List of bits ON.
Definition bmserial.h:996
const unsigned char set_block_1one
One block all-set (1111...)
Definition bmserial.h:982
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
Definition bmserial.h:1004
const unsigned gap_levels
Definition bmconst.h:84
const unsigned char set_block_gapbit
GAP compressed bitblock.
Definition bmserial.h:995
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size) BMNOEXCEPT
Definition bmfunc.h:8201
bm::operation setop2op(bm::set_operation op) BMNOEXCEPT
Convert set operation to operation.
Definition bmfunc.h:826
const unsigned char set_block_xor_chain
XOR chain (reserved)
Definition bmserial.h:1024
const unsigned char set_nb_bookmark32
Definition bmserial.h:1033
const unsigned set_block_size
Definition bmconst.h:54
unsigned long long int id64_t
Definition bmconst.h:34
const unsigned block_waves
Definition bmconst.h:65
const unsigned char set_block_xor_ref16
block is masked XOR of a reference block (16-bit)
Definition bmserial.h:1019
const unsigned char set_block_arrgap_bienc_inv
Interpolated GAP array (inverted)
Definition bmserial.h:1010
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
Definition bmserial.h:1001
const unsigned gap_equiv_len
Definition bmconst.h:81
const unsigned char set_block_1zero
One all-zero block.
Definition bmserial.h:981
const unsigned char set_block_end
End of serialization.
Definition bmserial.h:980
unsigned int id_t
Definition bmconst.h:37
const unsigned gap_max_buff_len
Definition bmconst.h:79
const unsigned char set_block_arrgap_bienc_v2
//!< Interpolated GAP array (v2)
Definition bmserial.h:1027
const unsigned char set_block_xor_gap_ref32
..... 32-bit (should never happen)
Definition bmserial.h:1023
const unsigned char set_block_arrgap
List of bits ON (GAP block)
Definition bmserial.h:998
const unsigned char set_nb_sync_mark64
..... 64-bit (should never happen)
Definition bmserial.h:1039
const unsigned char set_nb_sync_mark32
Definition bmserial.h:1037
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
Definition bmserial.h:993
serialization_header_mask
Definition bmserial.h:965
@ BM_HM_NO_GAPL
no GAP levels
Definition bmserial.h:970
@ BM_HM_ID_LIST
id list stored
Definition bmserial.h:968
@ BM_HM_NO_BO
no byte-order
Definition bmserial.h:969
@ BM_HM_DEFAULT
Definition bmserial.h:966
@ BM_HM_64_BIT
64-bit vector
Definition bmserial.h:971
@ BM_HM_RESIZE
resized vector
Definition bmserial.h:967
@ BM_HM_HXOR
horizontal XOR compression turned ON
Definition bmserial.h:972
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
Definition bmutil.h:332
const unsigned char set_block_arr_bienc
Interpolated block as int array.
Definition bmserial.h:1012
const unsigned char set_block_64zero
lots of zero blocks
Definition bmserial.h:1005
const unsigned char set_block_gap_bienc_v2
Interpolated GAP block (v2)
Definition bmserial.h:1026
const unsigned char set_block_xor_ref8
block is masked XOR of a reference block (8-bit)
Definition bmserial.h:1018
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
Definition bmserial.h:1000
unsigned short gap_word_t
Definition bmconst.h:77
const unsigned char set_block_32zero
Up to 4G zero blocks.
Definition bmserial.h:987
const unsigned char set_block_8zero
Up to 256 zero blocks.
Definition bmserial.h:983
const unsigned gap_max_bits
Definition bmconst.h:80
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
Definition bmserial.h:999
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) BMNOEXCEPT
Recalc linear bvector block index into 2D matrix coordinates.
Definition bmfunc.h:147
const unsigned char set_block_aone
All other blocks one.
Definition bmserial.h:990
const unsigned char set_nb_bookmark16
Definition bmserial.h:1031
const unsigned set_block_shift
Definition bmconst.h:55
const unsigned set_compression_max
Maximum supported compression level.
Definition bmserial.h:59
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition bmutil.h:342
const unsigned char set_nb_sync_mark24
Definition bmserial.h:1036
const unsigned char set_block_xor_gap_ref8
..... 8-bit
Definition bmserial.h:1021
unsigned short short_t
Definition bmconst.h:39
const unsigned char set_block_azero
All other blocks zero.
Definition bmserial.h:989
const unsigned bits_in_block
Definition bmconst.h:113
const unsigned char set_block_16zero
Up to 65536 zero blocks.
Definition bmserial.h:985
const unsigned char set_block_bit
Plain bit block.
Definition bmserial.h:991
const unsigned char set_block_bitgap_bienc_v2
Interpolated bit-block as GAPs (v2 - reseved)
Definition bmserial.h:1029
const unsigned char set_nb_bookmark24
Definition bmserial.h:1032
bool is_const_set_operation(set_operation op) BMNOEXCEPT
Returns true if set operation is constant (bitcount)
Definition bmfunc.h:817
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
Definition bmserial.h:992
Bit COUNT OR functor.
Definition bmfunc.h:8280
Bit COUNT SUB AB functor.
Definition bmfunc.h:8292
Bit SUB BA functor.
Definition bmfunc.h:8303
Bit COUNT XOR functor.
Definition bmfunc.h:8269
size_t max_serialize_mem
estimated maximum memory for serialization
Definition bmfunc.h:60
Statistical information about bitset's memory allocation details.
Definition bm.h:122
static ByteOrder byte_order()
Definition bmconst.h:464
Bookmark state structure.
Definition bmserial.h:339
unsigned bm_type_
0:32-bit, 1: 24-bit, 2: 16-bit
Definition bmserial.h:358
bookmark_state(block_idx_type nb_range) BMNOEXCEPT
Definition bmserial.h:340
size_t min_bytes_range_
minumal distance (bytes) between marks
Definition bmserial.h:359
block_idx_type nb_
bookmark block idx
Definition bmserial.h:356
unsigned char * ptr_
bookmark pointer
Definition bmserial.h:355
block_idx_type nb_range_
target bookmark range in blocks
Definition bmserial.h:357