1#ifndef BMSPARSEVEC_COMPR_H__INCLUDED__
2#define BMSPARSEVEC_COMPR_H__INCLUDED__
31#ifndef BM__H__INCLUDED__
34# error missing include (bm.h or bm64.h)
57template<
class Val,
class SV>
102 : csv_(csv), idx_(idx)
106 {
return bool(*
this) == bool(ref); }
153 {
return (pos_ == it.pos_) && (csv_ == it.csv_); }
157 {
return pos_ < it.pos_; }
159 {
return pos_ <= it.pos_; }
161 {
return pos_ > it.pos_; }
163 {
return pos_ >= it.pos_; }
203 n_buf_size = 1024 * 8
252 this->
flush(); sv_bi_ = bi.sv_bi_;
260 { this->
add(v);
return *
this; }
317 size_ = csv.size_; max_id_ = csv.max_id_;
318 in_sync_ = csv.in_sync_;
321 bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
338 size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
341 bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
361 bool empty()
const {
return sv_.empty(); }
455 bool zero_mem = true) const;
563 statistics* stat = 0);
611 void sync(
bool force);
638 {
return sv_.get_plain(i); }
641 {
return sv_.get_plain(i); }
647 {
return sv_.effective_plains(); }
653 {
return sparse_vector_type::plains(); }
657 {
return sparse_vector_type::stored_plains(); }
678 {
return sv_.get_bmatrix(); }
714 void construct_bv_blocks();
715 void free_bv_blocks();
734template<
class Val,
class SV>
739: sv_(null_able, ap, bv_max_size, alloc), in_sync_(false)
744 construct_bv_blocks();
749template<
class Val,
class SV>
757template<
class Val,
class SV>
760: sv_(csv.sv_), size_(csv.size_), max_id_(csv.max_id_), in_sync_(csv.in_sync_)
764 construct_bv_blocks();
766 bv_blocks_ptr_->copy_from(*(csv.bv_blocks_ptr_));
771template<
class Val,
class SV>
776 max_id_(0), in_sync_(
false)
781 size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
782 bv_blocks_ptr_ = csv.bv_blocks_ptr_; csv.bv_blocks_ptr_ = 0;
788template<
class Val,
class SV>
797template<
class Val,
class SV>
803 if (idx <= max_id_ && size_)
805 sv_.throw_range_error(
"compressed sparse vector push_back() range error");
807 push_back_no_check(idx, v);
812template<
class Val,
class SV>
818 bv_null->set_bit_no_check(idx);
819 sv_.push_back_no_null(v);
828template<
class Val,
class SV>
834 bool found = bv_null->test(idx);
837 size_type sv_idx = bv_null->count_range(0, idx);
838 bv_null->clear_bit_no_check(idx);
846template<
class Val,
class SV>
852 bool found = bv_null->test(idx);
853 size_type sv_idx = bv_null->count_range(0, idx);
856 sv_.set_value_no_null(--sv_idx, v);
860 sv_.insert_value_no_null(sv_idx, v);
861 bv_null->set_bit_no_check(idx);
874template<
class Val,
class SV>
880 if (max_id_ != csv.max_id_ || size_ != csv.size_)
882 bool same_sv = sv_.equal(csv.sv_);
888template<
class Val,
class SV>
897 const bvector_type* bv_null_src = sv_src.get_null_bvector();
906 *bv_null = *bv_null_src;
910 unsigned src_plains = sv_src.plains();
911 for (
unsigned i = 0; i < src_plains; ++i)
917 rank_compr.
compress(*bv_plain, *bv_null, *bv_src_plain);
929template<
class Val,
class SV>
934 const bvector_type* bv_null_src = this->get_null_bvector();
943 *bv_null = *bv_null_src;
947 unsigned src_plains = sv_.plains();
948 for (
unsigned i = 0; i < src_plains; ++i)
954 rank_compr.
decompress(*bv_plain, *bv_null, *bv_src_plain);
957 sv.resize(this->size());
962template<
class Val,
class SV>
965 if (in_sync_ && force ==
false)
969 bv_null->build_rs_index(bv_blocks_ptr_);
972 bool found = bv_null->find_reverse(max_id_);
987template<
class Val,
class SV>
995 *idx_to = bv_null->count_to_test(idx, *bv_blocks_ptr_);
999 bool found = bv_null->test(idx);
1000 *idx_to = found ? bv_null->count_range(0, idx) : 0;
1002 return bool(*idx_to);
1007template<
class Val,
class SV>
1016 copy_sz = bv_null->count_range(from, to, *bv_blocks_ptr_);
1018 copy_sz = bv_null->count_range(from, to);
1023 sv_left = bv_null->rank_corrected(from, *bv_blocks_ptr_);
1026 sv_left = bv_null->count_range(0, from);
1027 bool tl = bv_null->test(from);
1031 *idx_from = sv_left; *idx_to = sv_left + copy_sz - 1;
1038template<
class Val,
class SV>
1044 sv_.throw_range_error(
"compressed collection access error");
1046 bool found = resolve(idx, &sv_idx);
1049 sv_.throw_range_error(
"compressed collection item not found");
1051 return sv_.at(--sv_idx);
1056template<
class Val,
class SV>
1061 bool found = resolve(idx, &sv_idx);
1065 return sv_.get(--sv_idx);
1070template<
class Val,
class SV>
1075 return !(bv_null->test(idx));
1080template<
class Val,
class SV>
1085 sv_.optimize(temp_block, opt_mode, (
typename sparse_vector_type::statistics*)st);
1090 (
sizeof(
typename rs_index_type::size_type)
1091 +
sizeof(
typename rs_index_type::sb_pair_type));
1097template<
class Val,
class SV>
1101 in_sync_ =
false; max_id_ = size_ = 0;
1106template<
class Val,
class SV>
1111 sv_.calc_stat((
typename sparse_vector_type::statistics*)st);
1115 st->memory_used += bv_blocks_ptr_->get_total() *
1116 (
sizeof(
typename rs_index_type::size_type)
1117 +
sizeof(
typename rs_index_type::sb_pair_type));
1123template<
class Val,
class SV>
1127 return sv_.get_null_bvector();
1132template<
class Val,
class SV>
1141 b = bv_null->select(rank, idx, *bv_blocks_ptr_);
1143 b = bv_null->find_rank(rank, 0, idx);
1150template<
class Val,
class SV>
1155 bool zero_mem)
const
1164 if (idx_from >= this->size())
1169 if ((idx_from + size) > this->size())
1170 size = this->size() - idx_from;
1173 size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
1175 BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1195 arr[i++] = it.
value();
1207template<
class Val,
class SV>
1215 if (!size || (idx_from >= this->size()))
1225 if ((idx_from + size) > this->size())
1226 size = this->size() - idx_from;
1232 size_type rank = bv_null->rank_corrected(idx_from, *bv_blocks_ptr_);
1234 BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1241 if (idx_from + size <= i)
1245 bv_null->count_range(idx_from, idx_from + size - 1, *bv_blocks_ptr_);
1248 auto ex_sz = sv_.decode(arr_buf_tmp, rank, extract_cnt,
true);
1249 BM_ASSERT(ex_sz == extract_cnt); (void) ex_sz;
1251 for (i = 0; i < extract_cnt; ++i)
1255 arr[en_idx-idx_from] = arr_buf_tmp[i];
1265template<
class Val,
class SV>
1272 bv_blocks_ptr_ =
new(bv_blocks_ptr_) rs_index_type();
1277template<
class Val,
class SV>
1278void rsc_sparse_vector<Val, SV>::free_bv_blocks()
1282 bv_blocks_ptr_->~rs_index_type();
1289template<
class Val,
class SV>
1297 if (left >= csv.
size())
1300 size_ = csv.size_; max_id_ = csv.max_id_;
1303 const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
1305 bool range_valid = csv.
resolve_range(left, right, &sv_left, &sv_right);
1308 sv_.clear(); sv_.resize(size_);
1310 bv_null->copy_range(*arg_bv_null, 0, right);
1314 bv_null->copy_range(*arg_bv_null, 0, right);
1315 sv_.copy_range(csv.sv_, sv_left, sv_right,
bm::no_null);
1326template<
class Val,
class SV>
1334template<
class Val,
class SV>
1339 sv_bi_ = csv->sv_.get_back_inserter();
1340 sv_bi_.disable_set_null();
1345template<
class Val,
class SV>
1353template<
class Val,
class SV>
1358 sv_bi_.add_value_no_null(v);
1361 bv_null->set_bit_no_check(csv_->size_);
1369template<
class Val,
class SV>
1379template<
class Val,
class SV>
1384 csv_->max_id_+=count;
1390template<
class Val,
class SV>
1394 csv_->in_sync_ =
false;
1401template<
class Val,
class BV>
1403: csv_(0), pos_(
bm::
id_max), buf_ptr_(0)
1408template<
class Val,
class SV>
1411: csv_(it.csv_), pos_(it.pos_), buf_ptr_(0)
1416template<
class Val,
class SV>
1420: csv_(csv), buf_ptr_(0)
1428template<
class Val,
class SV>
1432: csv_(csv), buf_ptr_(0)
1440template<
class Val,
class SV>
1443 pos_ = (!csv_ || pos >= csv_->size()) ?
bm::id_max : pos;
1449template<
class Val,
class SV>
1455 if (pos_ >= csv_->size())
1463 if (buf_ptr_ - ((
value_type*)vbuffer_.data()) >= n_buf_size)
1471template<
class Val,
class SV>
1480 vbuffer_.reserve(n_buf_size *
sizeof(
value_type));
1481 tbuffer_.reserve(n_buf_size *
sizeof(
value_type));
1485 csv_->decode_buf(buf_ptr_, tmp_buf_ptr, pos_, n_buf_size,
true);
1493template<
class Val,
class SV>
1504 if (++buf_ptr_ < buf_end)
1509 if (pos_ >= csv_->size())
1514 if (buf_ptr_ >= buf_end)
1521template<
class Val,
class SV>
1524 return csv_->is_null(pos_);
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
pre-processor un-defines to avoid global space pollution (internal)
Constant iterator designed to enumerate "ON" bits.
size_type value() const BMNOEXCEPT
Get current position (value)
bool advance() BMNOEXCEPT
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
Bitvector Bit-vector container with runtime compression of bits.
optmode
Optimization mode Every next level means additional checks (better compression vs time)
@ opt_compress
compress blocks when possible (GAP/prefix sum)
allocator_type::allocator_pool_type allocator_pool_type
rs_index< allocator_type > rs_index_type
Algorithms for rank compression of bit-vector.
void decompress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank decompression.
void compress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank compression algorithm based on two palallel iterators/enumerators set of source vector gets re-m...
Back insert iterator implements buffered insert, faster than generic access assignment.
void flush()
flush the accumulated buffer
rsc_sparse_vector_type * rsc_sparse_vector_type_ptr
rsc_sparse_vector_type::sparse_vector_type sparse_vector_type
add value to the buffer without changing the NULL vector
void add(value_type v)
add value to the container
std::output_iterator_tag iterator_category
rsc_sparse_vector_type::bvector_type bvector_type
back_insert_iterator & operator=(value_type v)
push value to the vector
rsc_sparse_vector< Val, SV > rsc_sparse_vector_type
rsc_sparse_vector_type::size_type size_type
rsc_sparse_vector_type::value_type value_type
back_insert_iterator() BMNOEXCEPT
back_insert_iterator & operator*()
noop
back_insert_iterator & operator++(int)
noop
void add_null() BMNOEXCEPT
add NULL (no-value) to the container
back_insert_iterator & operator++()
noop
sparse_vector_type::back_insert_iterator sparse_vector_bi
Const iterator to traverse the rsc sparse vector.
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
bool advance() BMNOEXCEPT
advance iterator forward by one
rsc_sparse_vector< Val, SV > rsc_sparse_vector_type
void skip_zero_values() BMNOEXCEPT
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
rsc_sparse_vector_type::value_type value_type
bool operator!=(const const_iterator &it) const BMNOEXCEPT
bool operator<=(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
bm::byte_buffer< allocator_type > buffer_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
bool operator<(const const_iterator &it) const BMNOEXCEPT
bool operator==(const const_iterator &it) const BMNOEXCEPT
bool is_null() const BMNOEXCEPT
Get NULL status.
rsc_sparse_vector_type::bvector_type bvector_type
std::input_iterator_tag iterator_category
rsc_sparse_vector_type * rsc_sparse_vector_type_ptr
const_iterator & operator++(int)
Advance to the next available value.
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
void invalidate() BMNOEXCEPT
Invalidate current iterator.
bool operator>=(const const_iterator &it) const BMNOEXCEPT
rsc_sparse_vector_type::size_type size_type
const_iterator() BMNOEXCEPT
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
value_type operator*() const
Get current position (value)
value_type value() const
Get current position (value)
bvector_type::allocator_type allocator_type
Reference class to access elements via common [] operator.
bool operator==(const reference &ref) const BMNOEXCEPT
bool is_null() const BMNOEXCEPT
reference(rsc_sparse_vector< Val, SV > &csv, size_type idx) BMNOEXCEPT
Rank-Select compressed sparse vector.
bvector_type * bvector_type_ptr
bool resolve(size_type idx, size_type *idx_to) const BMNOEXCEPT
Resolve logical address to access via rank compressed address.
void load_from(const sparse_vector_type &sv_src)
Load compressed vector from a sparse vector (with NULLs)
rsc_sparse_vector< Val, SV > & operator=(rsc_sparse_vector< Val, SV > &&csv) BMNOEXCEPT
value_type operator[](size_type idx) const
get specified element without bounds checking
static unsigned plains() BMNOEXCEPT
get total number of bit-plains in the vector
const sparse_vector_type & get_sv() const BMNOEXCEPT
access dense vector
bool is_nullable() const
if container supports NULL(unassigned) values (true)
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
size_type decode(value_type *arr, size_type idx_from, size_type size, bool zero_mem=true) const
C-style decode.
void push_back(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
static unsigned stored_plains()
Number of stored bit-plains (value plains + extra.
back_insert_iterator get_back_inserter()
void load_to(sparse_vector_type &sv) const
Exort compressed vector to a sparse vector (with NULLs)
SV::bmatrix_type bmatrix_type
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
value_type at(size_type idx) const
access specified element with bounds checking
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, statistics *stat=0)
run memory optimization for all vector plains
size_type effective_vector_max() const BMNOEXCEPT
Always 1 (non-matrix type)
bvector_type_const_ptr get_plain(unsigned i) const BMNOEXCEPT
get access to bit-plain, function checks and creates a plain
rsc_sparse_vector(bm::null_support null_able=bm::use_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
void unsync() BMNOEXCEPT
Unsync the prefix sum table.
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const BMNOEXCEPT
size_type effective_size() const BMNOEXCEPT
size of internal dense vector
void set_remap() BMNOEXCEPT
void set_null(size_type idx)
set specified element to NULL RSC vector actually erases element when it is set to NULL (expensive).
bool empty() const
return true if vector is empty
void resize_internal(size_type sz)
size_type decode_buf(value_type *arr, value_type *arr_buf_tmp, size_type idx_from, size_type size, bool zero_mem=true) const BMNOEXCEPT
C-style decode (variant with external memory) Analog of decode, but requires two arrays.
size_type size_internal() const BMNOEXCEPT
SV::const_iterator sparse_vector_const_iterator
bool equal(const rsc_sparse_vector< Val, SV > &csv) const BMNOEXCEPT
check if another vector has the same content
bvector_type_ptr get_plain(unsigned i) BMNOEXCEPT
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
static bool is_compressed()
trait if sparse vector is "compressed" (true)
const value_type & const_reference
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
const bvector_type * bvector_type_const_ptr
size_t remap_size() const BMNOEXCEPT
void sync()
Re-calculate prefix sum table used for rank search (if necessary)
const unsigned char * get_remap_buffer() const BMNOEXCEPT
unsigned char * init_remap_buffer() BMNOEXCEPT
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
unsigned effective_plains() const BMNOEXCEPT
void calc_stat(struct rsc_sparse_vector< Val, SV >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
bvector_type::allocator_type allocator_type
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values (or NULL)
void clear() BMNOEXCEPT
resize to zero, free memory
bvector_type::rs_index_type rs_index_type
bool find_rank(size_type rank, size_type &idx) const BMNOEXCEPT
find position of compressed element by its rank
size_type size() const BMNOEXCEPT
return size of the vector
bvector_type::enumerator bvector_enumerator_type
bool in_sync() const BMNOEXCEPT
returns true if prefix sum table is in sync with the vector
void copy_range(const rsc_sparse_vector< Val, SV > &csv, size_type left, size_type right)
copy range of values from another sparse vector
void push_back_no_check(size_type idx, value_type v)
bool is_remap() const BMNOEXCEPT
bvector_type::allocation_policy allocation_policy_type
SV::bvector_type bvector_type
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
sparse vector de-serializer
algorithms for sparse_vector scan/search
null_support
NULL-able value support.
@ use_null
support "non-assigned" or "NULL" logic
@ no_null
do not support NULL values
void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two scalar variables.
void aligned_free(void *ptr) BMNOEXCEPT
Aligned free.
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
Structure with statistical information about memory allocation footprint, serialization projection,...
size_t memory_used
memory usage for all blocks and service tables