1 #ifndef BMRANDOM__H__INCLUDED__
2 #define BMRANDOM__H__INCLUDED__
25 #ifndef BM__H__INCLUDED__
28 # error missing include (bm.h or bm64.h)
76 typename BV::blocks_manager_type blocks_manager_type;
82 void simple_pick(BV& bv_out,
88 void get_subset(BV& bv_out,
100 unsigned take_count);
105 unsigned bit_list_size,
108 unsigned compute_take_count(
unsigned bc,
136 delete [] sub_block_;
144 if (sample_count == 0)
151 bv_in.build_rs_index(&rsi_, &bv_nb_);
154 if (in_count <= sample_count)
160 float pick_ratio = float(sample_count) / float(in_count);
161 if (pick_ratio < 0.054f)
164 bool b = bv_in.find_range(first, last);
168 simple_pick(bv_out, bv_in, sample_count, first, last);
172 if (sample_count > in_count/2)
176 size_type delta_count = in_count - sample_count;
178 get_subset(tmp_bv, bv_in, in_count, delta_count);
183 get_subset(bv_out, bv_in, in_count, sample_count);
189 size_type sample_count,
195 std::random_device rd;
197 std::mt19937_64 mt_rand(rd());
199 std::mt19937 mt_rand(rd());
201 std::uniform_int_distribution<size_type> dist(first, last);
206 size_type ridx = dist(mt_rand);
208 BM_ASSERT(ridx >= first && ridx <= last);
210 bool b = bv_in.find(ridx, fidx);
215 bool is_set = bv_out.set_bit_conditional(fidx,
true,
false);
216 sample_count -= is_set;
221 b = bv_in.find(fidx, fidx);
226 is_set = bv_out.set_bit_conditional(fidx,
true,
false);
227 sample_count -= is_set;
235 void random_subset<BV>::get_subset(BV& bv_out,
238 size_type sample_count)
241 bv_out.resize(bv_in.size());
243 const blocks_manager_type& bman_in = bv_in.get_blocks_manager();
244 blocks_manager_type& bman_out = bv_out.get_blocks_manager();
246 bm::word_t* tmp_block = bman_out.check_allocate_tempblock();
248 size_type first_nb, last_nb;
249 bool b = bv_nb_.find_range(first_nb, last_nb);
254 std::random_device rd;
256 std::mt19937_64 mt_rand(rd());
258 std::mt19937 mt_rand(rd());
260 std::uniform_int_distribution<size_type> dist_nb(first_nb, last_nb);
262 size_type curr_sample_count = sample_count;
263 for (
unsigned take_count = 0; curr_sample_count; curr_sample_count -= take_count)
268 size_type ridx = dist_nb(mt_rand);
269 BM_ASSERT(ridx >= first_nb && ridx <= last_nb);
271 b = bv_nb_.find(ridx, nb);
274 b = bv_nb_.find(first_nb, nb);
277 b = bv_nb_.find(first_nb, nb);
283 bv_nb_.clear_bit_no_check(nb);
287 unsigned bc = rsi_.count(nb);
289 take_count = compute_take_count(bc, in_count, sample_count);
290 if (take_count > curr_sample_count)
291 take_count = unsigned(curr_sample_count);
297 bman_in.get_block_coord(nb, i0, j0);
298 const bm::word_t* blk_src = bman_in.get_block(i0, j0);
302 bm::word_t* blk_out = bman_out.get_block_ptr(i0, j0);
306 blk_out = bman_out.deoptimize_block(nb);
310 blk_out = bman_out.get_allocator().alloc_bit_block();
311 bman_out.set_block(nb, blk_out);
313 if (take_count == bc)
342 get_random_array(blk_out, bit_list_, arr_len, take_count);
353 get_block_subset(blk_out, blk_src, take_count);
360 unsigned random_subset<BV>::compute_take_count(
unsigned bc,
362 size_type sample_count)
364 float block_percent = float(bc) / float(in_count);
365 float bits_to_take = float(sample_count) * block_percent;
366 bits_to_take += 0.99f;
367 unsigned to_take = unsigned(bits_to_take);
375 void random_subset<BV>::get_block_subset(
bm::word_t* blk_out,
379 for (
unsigned rounds = 0; take_count && rounds < 10; ++rounds)
388 if (blk_src[i] && (blk_out[i] != blk_src[i]))
390 new_count = process_word(blk_out, blk_src, i, take_count);
391 take_count -= new_count;
403 sub_block_[i] = blk_src[i] & ~blk_out[i];
413 get_random_array(blk_out, bit_list_, arr_len, take_count);
418 unsigned random_subset<BV>::process_word(
bm::word_t* blk_out,
423 unsigned new_bits, mask;
426 mask = unsigned(rand());
430 unsigned src_rand = blk_src[nword] & mask;
431 new_bits = src_rand & ~blk_out[nword];
437 if (new_count > take_count)
441 unsigned char blist[64];
444 std::random_shuffle(blist, blist + arr_size);
446 for (
unsigned j = 0; j < take_count; ++j)
448 value |= (1u << blist[j]);
451 new_count = take_count;
454 BM_ASSERT((new_bits & ~blk_src[nword]) == 0);
457 blk_out[nword] |= new_bits;
465 void random_subset<BV>::get_random_array(
bm::word_t* blk_out,
467 unsigned bit_list_size,
471 for (
unsigned i = 0; i < count; ++i)