BitMagic-C++
bmstrsparsevec.h
Go to the documentation of this file.
1 #ifndef BMSTRSPARSEVEC__H__INCLUDED__
2 #define BMSTRSPARSEVEC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmstrsparsevec.h
22  \brief string sparse vector based on bit-transposed matrix
23 */
24 
25 #include <stddef.h>
26 #include "bmconst.h"
27 
28 #ifndef BM_NO_STL
29 #include <stdexcept>
30 #endif
31 
32 #ifndef BM__H__INCLUDED__
33 // BitMagic utility headers do not include main "bm.h" declaration
34 // #include "bm.h" or "bm64.h" explicitly
35 # error missing include (bm.h or bm64.h)
36 #endif
37 
38 #include "bmtrans.h"
39 #include "bmalgo.h"
40 #include "bmbuffer.h"
41 #include "bmbmatrix.h"
42 #include "bmdef.h"
43 
44 namespace bm
45 {
46 
47 /*!
48  \brief sparse vector for strings with compression using bit transposition method
49 
50  Initial string is bit-transposed into bit-planes so collection may use less
51  memory due to prefix sum (GAP) compression in bit-plains.
52 
53  @ingroup sv
54 */
55 template<typename CharType, typename BV, unsigned MAX_STR_SIZE>
56 class str_sparse_vector : public base_sparse_vector<CharType, BV, MAX_STR_SIZE>
57 {
58 public:
59  typedef BV bvector_type;
62  typedef CharType value_type;
64  typedef typename BV::allocator_type allocator_type;
67  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
70 
71  /*! Statistical information about memory allocation details. */
72  struct statistics : public bv_statistics
73  {};
74 
76  {
77  sv_octet_plains = MAX_STR_SIZE
78  };
79 
80  typedef
81  bm::heap_matrix<unsigned char,
82  MAX_STR_SIZE, // ROWS
83  256, // COLS
86 
87  struct is_remap_support { enum trait { value = true }; };
88  struct is_rsc_support { enum trait { value = false }; };
89 
90  /**
91  Reference class to access elements via common [] operator
92  @ingroup sv
93  */
95  {
96  public:
98  size_type idx) BMNOEXEPT
99  : str_sv_(str_sv), idx_(idx)
100  {}
101 
102  operator const value_type*() const
103  {
104  str_sv_.get(idx_, buf_, MAX_STR_SIZE);
105  return &(buf_[0]);
106  }
107 
108  bool operator==(const const_reference& ref) const
109  { return bool(*this) == bool(ref); }
110  bool is_null() const { return str_sv_.is_null(idx_); }
111  private:
113  size_type idx_;
114  mutable CharType buf_[MAX_STR_SIZE];
115  };
116 
117  /**
118  Reference class to access elements via common [] operator
119  @ingroup sv
120  */
121  class reference
122  {
123  public:
125  size_type idx) BMNOEXEPT
126  : str_sv_(str_sv), idx_(idx)
127  {}
128 
129  operator const value_type*() const
130  {
131  str_sv_.get(idx_, buf_, MAX_STR_SIZE);
132  return &(buf_[0]);
133  }
134 
136  {
137  // TO DO: implement element copy bit by bit
138  str_sv_.set(idx_, (const value_type*)ref);
139  return *this;
140  }
141 
143  {
144  str_sv_.set(idx_, str);
145  return *this;
146  }
147  bool operator==(const reference& ref) const
148  { return bool(*this) == bool(ref); }
149  bool is_null() const { return str_sv_.is_null(idx_); }
150  private:
152  size_type idx_;
153  mutable CharType buf_[MAX_STR_SIZE];
154  };
155 
156  /**
157  Const iterator to do quick traverse of the sparse vector.
158 
159  Implementation uses buffer for decoding so, competing changes
160  to the original vector may not match the iterator returned values.
161 
162  This iterator keeps an operational buffer of transposed elements,
163  so memory footprint is not negligable.
164 
165  @ingroup sv
166  */
168  {
169  public:
170  friend class str_sparse_vector;
171 #ifndef BM_NO_STL
172  typedef std::input_iterator_tag iterator_category;
173 #endif
180  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
181 
182  typedef long long difference_type;
183  typedef CharType* pointer;
184  typedef CharType*& reference;
185  public:
186  const_iterator();
189  const_iterator(const const_iterator& it);
190 
191  bool operator==(const const_iterator& it) const
192  { return (pos_ == it.pos_) && (sv_ == it.sv_); }
193  bool operator!=(const const_iterator& it) const
194  { return ! operator==(it); }
195  bool operator < (const const_iterator& it) const
196  { return pos_ < it.pos_; }
197  bool operator <= (const const_iterator& it) const
198  { return pos_ <= it.pos_; }
199  bool operator > (const const_iterator& it) const
200  { return pos_ > it.pos_; }
201  bool operator >= (const const_iterator& it) const
202  { return pos_ >= it.pos_; }
203 
204  /// \brief Get current position (value)
205  const value_type* operator*() const { return this->value(); }
206 
207  /// \brief Advance to the next available value
208  const_iterator& operator++() { this->advance(); return *this; }
209 
210  /// \brief Advance to the next available value
212  { const_iterator tmp(*this);this->advance(); return tmp; }
213 
214 
215  /// \brief Get current position (value)
216  const value_type* value() const;
217 
218  /// \brief Get NULL status
219  bool is_null() const { return sv_->is_null(this->pos_); }
220 
221  /// Returns true if iterator is at a valid position
222  bool valid() const { return pos_ != bm::id_max; }
223 
224  /// Invalidate current iterator
225  void invalidate() { pos_ = bm::id_max; }
226 
227  /// Current position (index) in the vector
228  size_type pos() const { return pos_; }
229 
230  /// re-position to a specified position
231  void go_to(size_type pos);
232 
233  /// advance iterator forward by one
234  void advance();
235 
236  protected:
237  typedef bm::heap_matrix<CharType,
238  1024, // ROWS: number of strings in one batch
239  MAX_STR_SIZE, // COLS
241 
242  private:
243  const str_sparse_vector_type* sv_; ///!< ptr to parent
244  mutable size_type pos_; ///!< Position
245  mutable buffer_matrix_type buf_matrix_; ///!< decode value buffer
246  mutable size_type pos_in_buf_; ///!< buffer position
247  };
248 
249 
250  /**
251  Back insert iterator implements buffered insert, faster than generic
252  access assignment.
253 
254  Limitations for buffered inserter:
255  1. Do not use more than one inserter (into one vector) at the same time
256  2. Use method flush() at the end to send the rest of accumulated buffer
257  flush is happening automatically on destruction, but if flush produces an
258  exception (for whatever reason) it will be an exception in destructor.
259  As such, explicit flush() is safer way to finilize the sparse vector load.
260 
261  @ingroup sv
262  */
264  {
265  public:
266 #ifndef BM_NO_STL
267  typedef std::output_iterator_tag iterator_category;
268 #endif
275  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
276 
277  typedef void difference_type;
278  typedef void pointer;
279  typedef void reference;
280 
281  public:
285 
287  {
288  BM_ASSERT(bi.empty());
289  this->flush(); sv_ = bi.sv_;
290  return *this;
291  }
292 
294 
295  /** push value to the vector */
297  { this->add(v); return *this; }
298 
299 
300  /** push value to the vector */
301  template<typename StrType>
302  back_insert_iterator& operator=(const StrType& v)
303  {
304  this->add(v.c_str()); return *this; // TODO: avoid c_str()
305  }
306 
307  /** noop */
308  back_insert_iterator& operator*() { return *this; }
309  /** noop */
310  back_insert_iterator& operator++() { return *this; }
311  /** noop */
312  back_insert_iterator& operator++( int ) { return *this; }
313 
314  /** add value to the container*/
315  void add(const value_type* v);
316 
317  /** add NULL (no-value) to the container */
318  void add_null();
319 
320  /** add a series of consequitve NULLs (no-value) to the container */
321  void add_null(size_type count);
322 
323  /** return true if insertion buffer is empty */
324  bool empty() const;
325 
326  /** flush the accumulated buffer */
327  void flush();
328  protected:
329 
330  /** add value to the buffer without changing the NULL vector
331  @param v - value to push back
332  @return index of added value in the internal buffer
333  @internal
334  */
335  size_type add_value(const value_type* v);
336 
337  private:
338  enum buf_size_e
339  {
340  n_buf_size = 1024 * 8
341  };
342  typedef bm::heap_matrix<CharType,
343  n_buf_size, // ROWS: number of strings in one batch
344  MAX_STR_SIZE, // COLS
345  allocator_type> buffer_matrix_type;
346 
347  private:
348  str_sparse_vector_type* sv_; ///!< pointer on the parent vector
349  bvector_type* bv_null_; ///!< not NULL vector pointer
350  buffer_matrix_type buf_matrix_; ///!< value buffer
351  size_type pos_in_buf_; ///!< buffer position
352  };
353 
354 
355 public:
356 
357  /*!
358  \brief Sparse vector constructor
359 
360  \param null_able - defines if vector supports NULL values flag
361  by default it is OFF, use bm::use_null to enable it
362  \param ap - allocation strategy for underlying bit-vectors
363  Default allocation policy uses BM_BIT setting (fastest access)
364  \param bv_max_size - maximum possible size of underlying bit-vectors
365  Please note, this is NOT size of svector itself, it is dynamic upper limit
366  which should be used very carefully if we surely know the ultimate size
367  \param alloc - allocator for bit-vectors
368 
369  \sa bvector<>
370  \sa bm::bvector<>::allocation_policy
371  \sa bm::startegy
372  */
375  size_type bv_max_size = bm::id_max,
376  const allocator_type& alloc = allocator_type());
377 
378  /*! copy-ctor */
379  str_sparse_vector(const str_sparse_vector& str_sv);
380 
381  /*! copy assignmment operator */
384  {
385  if (this != &str_sv)
386  parent_type::copy_from(str_sv);
387  remap_flags_ = str_sv.remap_flags_;
390  return *this;
391  }
392 #ifndef BM_NO_CXX11
393  /*! move-ctor */
395  {
396  parent_type::swap(str_sv);
397  remap_flags_ = str_sv.remap_flags_;
398  remap_matrix1_.swap(str_sv.remap_matrix1_);
399  remap_matrix2_.swap(str_sv.remap_matrix2_);
400  }
401 
402  /*! move assignmment operator */
405  {
406  if (this != &str_sv)
407  {
408  this->swap(str_sv);
409  }
410  return *this;
411  }
412 #endif
413 
414 public:
415 
416  // ------------------------------------------------------------
417  /*! @name String element access */
418  ///@{
419 
420  /** \brief Operator to get write access to an element */
421  reference operator[](size_type idx) { return reference(*this, idx); }
422 
423  /** \brief Operator to get read access to an element */
425  { return const_reference(*this, idx); }
426 
427  /*!
428  \brief set specified element with bounds checking and automatic resize
429  \param idx - element index (vector auto-resized if needs to)
430  \param str - string to set (zero terminated)
431  */
432  void set(size_type idx, const value_type* str);
433 
434  /*!
435  \brief set NULL status for the specified element
436  Vector is resized automatically
437  \param idx - element index (vector auto-resized if needs to)
438  */
439  void set_null(size_type idx);
440 
441 
442  /*!
443  \brief insert the specified element
444  \param idx - element index (vector auto-resized if needs to)
445  \param str - string to set (zero terminated)
446  */
447  void insert(size_type idx, const value_type* str);
448 
449 
450  /*!
451  \brief insert STL string
452  \param idx - element index (vector auto-resized if needs to)
453  \param str - STL string to set
454  */
455  template<typename StrType>
456  void insert(size_type idx, const StrType& str)
457  {
458  this->insert(idx, str.c_str()); // TODO: avoid c_str()
459  }
460 
461  /*!
462  \brief erase the specified element
463  \param idx - element index
464  */
465  void erase(size_type idx);
466 
467  /*!
468  \brief get specified element
469 
470  \param idx - element index
471  \param str - string buffer
472  \param buf_size - string buffer size
473 
474  @return string length
475  */
476  size_type get(size_type idx, value_type* str, size_type buf_size) const;
477 
478  /*!
479  \brief set specified element with bounds checking and automatic resize
480 
481  This is an equivalent of set() method, but templetized to be
482  more compatible with the STL std::string and the likes
483 
484  \param idx - element index (vector auto-resized if needs to)
485  \param str - input string
486  expected an STL class with size() support,
487  like basic_string<> or vector<char>
488  */
489  template<typename StrType>
490  void assign(size_type idx, const StrType& str)
491  {
492  if (idx >= this->size())
493  this->size_ = idx+1;
494 
495  size_type str_size = size_type(str.size());
496  size_type sz = size_type((str_size < MAX_STR_SIZE) ? str_size : MAX_STR_SIZE-1);
497  if (!sz)
498  {
499  this->clear_value_plains_from(0, idx);
500  return;
501  }
502  unsigned i = 0;
503  for (; i < sz; ++i)
504  {
505  CharType ch = str[i];
506  if (remap_flags_) // compressional re-mapping is in effect
507  {
508  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
509  BM_ASSERT(remap_value);
510  ch = CharType(remap_value);
511  }
512  this->bmatr_.set_octet(idx, i, (unsigned char)ch);
513  if (!ch)
514  break;
515  } // for i
516  if (idx > sz)
517  return;
518  this->bmatr_.set_octet(idx, sz, 0);
519  this->clear_value_plains_from(unsigned(sz*8+1), idx);
520  }
521 
522  /*!
523  \brief push back a string
524  \param str - string to set
525  (STL class with size() support, like basic_string)
526  */
527  template<typename StrType>
528  void push_back(const StrType& str) { assign(this->size_, str); }
529 
530  /*!
531  \brief push back a string (zero terminated)
532  \param str - string to set
533  */
534  void push_back(const value_type* str) { set(this->size_, str); }
535 
536 
537  /*!
538  \brief get specified string element
539 
540  Template method expects an STL-compatible type basic_string<>
541 
542  \param idx - element index (vector auto-resized if needs to)
543  \param str - string to get [out]
544  */
545  template<typename StrType>
546  void get(size_type idx, StrType& str) const
547  {
548  str.clear();
549  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
550  {
551  CharType ch = CharType(this->bmatr_.get_octet(idx, i));
552  if (ch == 0)
553  break;
554  if (remap_flags_)
555  {
556  const unsigned char* remap_row = remap_matrix1_.row(i);
557  unsigned char remap_value = remap_row[unsigned(ch)];
558  BM_ASSERT(remap_value);
559  if (!remap_value) // unknown dictionary element
560  {
561  throw_bad_value(0);
562  break;
563  }
564  ch = CharType(remap_value);
565  }
566  str.push_back(ch);
567  } // for i
568  }
569 
570  /*! Swap content */
571  void swap(str_sparse_vector& str_sv) BMNOEXEPT;
572 
573  ///@}
574 
575  // ------------------------------------------------------------
576  /*! @name Element comparison functions */
577  ///@{
578 
579  /**
580  \brief Compare vector element with argument lexicographically
581 
582  NOTE: for a re-mapped container, input string may have no correct
583  remapping, in this case we have an ambiguity
584  (we know it is not equal (0) but LT or GT?).
585  Behavior is undefined.
586 
587  \param idx - vactor element index
588  \param str - argument to compare with
589 
590  \return 0 - equal, < 0 - vect[i] < str, >0 otherwise
591  */
592  int compare(size_type idx, const value_type* str) const;
593 
594 
595  /**
596  \brief Find size of common prefix between two vector elements in octets
597  \return size of common prefix
598  */
599  unsigned common_prefix_length(size_type idx1, size_type idx2) const;
600 
601  ///@}
602 
603 
604  // ------------------------------------------------------------
605  /*! @name Clear */
606  ///@{
607 
608  /*! \brief resize to zero, free memory */
609  void clear() BMNOEXEPT;
610 
611  /*!
612  \brief clear range (assign bit 0 for all plains)
613  \param left - interval start
614  \param right - interval end (closed interval)
615  \param set_null - set cleared values to unassigned (NULL)
616  */
617  str_sparse_vector<CharType, BV, MAX_STR_SIZE>&
618  clear_range(size_type left, size_type right, bool set_null = false)
619  {
620  parent_type::clear_range(left, right, set_null);
621  return *this;
622  }
623 
624 
625  ///@}
626 
627 
628  // ------------------------------------------------------------
629  /*! @name Size, etc */
630  ///@{
631 
632  /*! \brief return size of the vector
633  \return size of sparse vector
634  */
635  size_type size() const { return this->size_; }
636 
637  /*! \brief return true if vector is empty
638  \return true if empty
639  */
640  bool empty() const { return (size() == 0); }
641 
642  /*! \brief resize vector
643  \param sz - new size
644  */
645  void resize(size_type sz) { parent_type::resize(sz); }
646 
647  /*! \brief get maximum string length capacity
648  \return maximum string length sparse vector can take
649  */
650  static size_type max_str() { return sv_octet_plains; }
651 
652  /*! \brief get effective string length used in vector
653 
654  Method returns efficiency, how close are we
655  to reserved maximum.
656 
657  \return current string length maximum
658  */
660 
661  /*! \brief get effective string length used in vector
662  \return current string length maximum
663  */
665  ///@}
666 
667 
668  // ------------------------------------------------------------
669  /*! @name Memory optimization/compression */
670  ///@{
671 
672  /*!
673  \brief run memory optimization for all vector plains
674  \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
675  \param opt_mode - requested compression depth
676  \param stat - memory allocation statistics after optimization
677  */
678  void optimize(
679  bm::word_t* temp_block = 0,
682 
683  /*!
684  @brief Calculates memory statistics.
685 
686  Function fills statistics structure containing information about how
687  this vector uses memory and estimation of max. amount of memory
688  bvector needs to serialize itself.
689 
690  @param st - pointer on statistics structure to be filled in.
691 
692  @sa statistics
693  */
695 
696 
697  ///@}
698 
699  // ------------------------------------------------------------
700  /*! @name Iterator access */
701  //@{
702 
703  /** Provide const iterator access to container content */
704  const_iterator begin() const;
705 
706  /** Provide const iterator access to the end */
707  const_iterator end() const { return const_iterator(this, bm::id_max); }
708 
709  /** Get const_itertor re-positioned to specific element
710  @param idx - position in the sparse vector
711  */
713  { return const_iterator(this, idx); }
714 
715  /** Provide back insert iterator
716  Back insert iterator implements buffered insertion, which is faster, than random access
717  or push_back
718  */
720  { return back_insert_iterator(this); }
721 
722  ///@}
723 
724 
725 
726  // ------------------------------------------------------------
727  /*! @name Various traits */
728  ///@{
729 
730  /** \brief trait if sparse vector is "compressed" (false)
731  */
732  static
733  bool is_compressed() { return false; }
734 
735  ///@}
736 
737  // ------------------------------------------------------------
738  /*! @name remapping, succinct utilities
739  Remapping implements reduction of dit-depth thus improves
740  search performance. Remapping limits farther modifications
741  of sparse vector.
742  */
743  ///@{
744 
745  /**
746  Get remapping status (true|false)
747  */
748  bool is_remap() const { return remap_flags_ != 0; }
749 
750  /**
751  Build remapping profile and load content from another sparse vector
752  \param str_sv - source sparse vector (assumed it is not remapped)
753  */
754  void remap_from(const str_sparse_vector& str_sv);
755 
756  /*!
757  Calculate flags which octets are present on each byte-plain.
758  @internal
759  */
760  void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const;
761 
762  static
763  void build_octet_remap(
764  plain_octet_matrix_type& octet_remap_matrix1,
765  plain_octet_matrix_type& octet_remap_matrix2,
766  const plain_octet_matrix_type& octet_occupancy_matrix);
767  /*!
768  remap string from external (ASCII) system to matrix internal code
769  @return true if remapping was ok, false if found incorrect value
770  for the plain
771  @internal
772  */
773  static
774  bool remap_tosv(value_type* sv_str,
775  size_type buf_size,
776  const value_type* str,
777  const plain_octet_matrix_type& octet_remap_matrix2);
778 
779  /*!
780  remap string from external (ASCII) system to matrix internal code
781  @internal
782  */
783  bool remap_tosv(value_type* sv_str,
784  size_type buf_size,
785  const value_type* str) const
786  {
787  return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
788  }
789  /*!
790  remap string from internal code to external (ASCII) system
791  @return true if remapping was ok, false if found incorrect value
792  for the plain
793  @internal
794  */
795  static
796  bool remap_fromsv(value_type* str,
797  size_type buf_size,
798  const value_type* sv_str,
799  const plain_octet_matrix_type& octet_remap_matrix1);
800  /*!
801  re-calculate remap matrix2 based on matrix1
802  @internal
803  */
804  void recalc_remap_matrix2();
805 
806  ///@}
807 
808  // ------------------------------------------------------------
809  /*! @name Export content to C-style */
810  ///@{
811 
812  /**
813  \brief Bulk export strings to a C-style matrix of chars
814 
815  \param cmatr - dest matrix (bm::heap_matrix)
816  \param idx_from - index in the sparse vector to export from
817  \param dec_size - decoding size (matrix column allocation should match)
818  \param zero_mem - set to false if target array is pre-initialized
819  with 0s to avoid performance penalty
820 
821  \return number of actually exported elements (can be less than requested)
822  */
823  template<typename CharMatrix>
824  size_type decode(CharMatrix& cmatr,
825  size_type idx_from, size_type dec_size,
826  bool zero_mem = true) const
827  {
828  BM_ASSERT(cmatr.is_init());
829  if (zero_mem)
830  cmatr.set_zero();
831 
832  size_type rows = size_type(cmatr.rows());
833  BM_ASSERT(cmatr.cols() >= MAX_STR_SIZE);
834  size_type max_sz = this->size() - idx_from;
835  if (max_sz < dec_size)
836  dec_size = max_sz;
837  if (rows < dec_size)
838  dec_size = rows;
839  if (!dec_size)
840  return dec_size;
841 
842  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
843  {
844  unsigned bi = 0;
845  for (unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
846  {
847  const bvector_type* bv = this->bmatr_.get_row(k);
848  if (!bv)
849  continue;
850  value_type mask = value_type(1u << bi);
851  typename bvector_type::enumerator en(bv, idx_from);
852  for ( ;en.valid(); ++en )
853  {
854  size_type idx = *en - idx_from;
855  if (idx >= dec_size)
856  break;
857  typename CharMatrix::value_type* str = cmatr.row(idx);
858  str[i] |= mask;
859  } // for en
860  } // for k
861  } // for i
862 
863  if (remap_flags_)
864  {
865  for (unsigned i = 0; i < dec_size; ++i)
866  {
867  typename CharMatrix::value_type* str = cmatr.row(i);
868  remap_matrix1_.remapz(str);
869  } // for i
870  }
871  return dec_size;
872  }
873 
874  /**
875  \brief Bulk import of strings from a C-style matrix of chars
876 
877  \param cmatr - source matrix (bm::heap_matrix)
878  [in/out] parameter gets modified(corrupted)
879  in the process
880  \param idx_from - destination index in the sparse vector
881  \param imp_size - import size (number or rows to import)
882  */
883  template<typename CharMatrix>
884  void import(CharMatrix& cmatr, size_type idx_from, size_type imp_size)
885  {
886  if (!imp_size)
887  return;
888  if (idx_from < this->size_) // in case it touches existing elements
889  {
890  // clear all plains in the range to provide corrrect import of 0 values
891  this->clear_range(idx_from, idx_from + imp_size - 1);
892  }
893  import_no_check(cmatr, idx_from, imp_size);
894  }
895 
896  /**
897  \brief Bulk push-back import of strings from a C-style matrix of chars
898 
899  \param cmatr - source matrix (bm::heap_matrix)
900  [in/out] parameter gets modified(corrupted)
901  in the process
902  \param imp_size - import size (number or rows to import)
903  */
904  template<typename CharMatrix>
905  void import_back(CharMatrix& cmatr, size_type imp_size)
906  {
907  if (!imp_size)
908  return;
909  import_no_check(cmatr, this->size(), imp_size);
910  }
911 
912 
913  ///@}
914 
915  // ------------------------------------------------------------
916 
917  /*! \brief syncronize internal structures */
918  void sync(bool force);
919 
920  /*!
921  \brief check if another sparse vector has the same content and size
922 
923  \param sv - sparse vector for comparison
924  \param null_able - flag to consider NULL vector in comparison (default)
925  or compare only value content plains
926 
927  \return true, if it is the same
928  */
930  bm::null_support null_able = bm::use_null) const;
931 
932  /**
933  \brief find position of compressed element by its rank
934  */
935  static
936  bool find_rank(size_type rank, size_type& pos);
937 
938  /**
939  \brief size of sparse vector (may be different for RSC)
940  */
941  size_type effective_size() const { return size(); }
942 
943 protected:
944 
945  /// @internal
946  template<typename CharMatrix>
947  void import_no_check(CharMatrix& cmatr,
948  size_type idx_from, size_type imp_size,
949  bool set_not_null = true)
950  {
951  BM_ASSERT (cmatr.is_init());
952 
953  unsigned max_str_size = 0;
954  {
955  for (unsigned j = 0; j < imp_size; ++j)
956  {
957  typename CharMatrix::value_type* str = cmatr.row(j);
958  unsigned i;
959  for (i = 0; i < MAX_STR_SIZE; ++i)
960  {
961  value_type ch = str[i];
962  if (!ch)
963  {
964  max_str_size = (i > max_str_size) ? i : max_str_size;
965  break;
966  }
967  if (remap_flags_) // re-mapping is in effect
968  {
969  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
970  BM_ASSERT(remap_value);
971  if (!remap_value) // unknown dictionary element
972  throw_bad_value(0);
973  str[i] = CharType(remap_value);
974  }
975  } // for i
976  if (i == MAX_STR_SIZE)
977  max_str_size = i;
978  } // for j
979  }
980 
981  size_type bit_list[CharMatrix::n_rows];
982  for (unsigned i = 0; i < max_str_size; ++i)
983  {
984  for (unsigned bi = 0; bi < 8; ++bi)
985  {
986  unsigned n_bits = 0;
987  value_type mask = value_type(1u << bi);
988  for (size_type j = 0; j < imp_size; ++j)
989  {
990  typename CharMatrix::value_type* str = cmatr.row(j);
991  value_type ch = str[i];
992  if (!ch)
993  continue;
994  if (ch & mask)
995  {
996  bit_list[n_bits++] = idx_from + j;
997  str[i] ^= mask;
998  }
999  } // for j
1000  if (n_bits) // set transposed bits to the target plain
1001  {
1002  unsigned plain = i*8 + bi;
1003  bvector_type* bv = this->bmatr_.get_row(plain);
1004  if (!bv)
1005  {
1006  bv = this->bmatr_.construct_row(plain);
1007  bv->init();
1008  }
1009  bv->set(&bit_list[0], n_bits, BM_SORTED);
1010  }
1011  } // for k
1012  } // for i
1013 
1014  size_type idx_to = idx_from + imp_size - 1;
1015  if (set_not_null)
1016  {
1017  bvector_type* bv_null = this->get_null_bvect();
1018  if (bv_null)
1019  bv_null->set_range(idx_from, idx_to);
1020  }
1021  if (idx_to >= this->size())
1022  this->size_ = idx_to+1;
1023 
1024  }
1025 
1026  // ------------------------------------------------------------
1027  /*! @name Errors and exceptions */
1028  ///@{
1029 
1030  /**
1031  \brief throw range error
1032  \internal
1033  */
1034  static
1035  void throw_range_error(const char* err_msg);
1036 
1037  /**
1038  \brief throw domain error
1039  \internal
1040  */
1041  static
1042  void throw_bad_value(const char* err_msg);
1043 
1044  ///@}
1045 
1046  /*! \brief set value without checking boundaries */
1047  void set_value(size_type idx, const value_type* str);
1048 
1049  /*! \brief set value without checking boundaries or support of NULL */
1050  void set_value_no_null(size_type idx, const value_type* str);
1051 
1052  /*! \brief insert value without checking boundaries */
1053  void insert_value(size_type idx, const value_type* str);
1054 
1055  /*! \brief insert value without checking boundaries or support of NULL */
1056  void insert_value_no_null(size_type idx, const value_type* str);
1057 
1058 
1059  size_type size_internal() const { return size(); }
1061 
1062  size_t remap_size() const { return remap_matrix1_.get_buffer().size(); }
1063  const unsigned char* get_remap_buffer() const
1064  { return remap_matrix1_.get_buffer().buf(); }
1065  unsigned char* init_remap_buffer()
1066  {
1067  remap_matrix1_.init();
1068  return remap_matrix1_.get_buffer().data();
1069  }
1070  void set_remap() { remap_flags_ = 1; }
1071 
1072 protected:
1073  template<class SVect> friend class sparse_vector_serializer;
1074  template<class SVect> friend class sparse_vector_deserializer;
1075 
1076 protected:
1077  unsigned remap_flags_; ///< remapping status
1078  plain_octet_matrix_type remap_matrix1_; ///< octet remap table 1
1079  plain_octet_matrix_type remap_matrix2_; ///< octet remap table 2
1080 };
1081 
1082 //---------------------------------------------------------------------
1083 //---------------------------------------------------------------------
1084 
1085 
1086 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1088  bm::null_support null_able,
1090  size_type bv_max_size,
1091  const allocator_type& alloc)
1092 : parent_type(null_able, ap, bv_max_size, alloc),
1093  remap_flags_(0)
1094 {}
1095 
1096 
1097 //---------------------------------------------------------------------
1098 
1099 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1101  const str_sparse_vector& str_sv)
1102 : parent_type(str_sv),
1103  remap_flags_(str_sv.remap_flags_),
1104  remap_matrix1_(str_sv.remap_matrix1_),
1105  remap_matrix2_(str_sv.remap_matrix2_)
1106 {}
1107 
1108 //---------------------------------------------------------------------
1109 
1110 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1112 {
1113  parent_type::swap(str_sv);
1114  bm::xor_swap(remap_flags_, str_sv.remap_flags_);
1115  remap_matrix1_.swap(str_sv.remap_matrix1_);
1116  remap_matrix2_.swap(str_sv.remap_matrix2_);
1117 }
1118 
1119 //---------------------------------------------------------------------
1120 
1121 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1123  size_type idx, const value_type* str)
1124 {
1125  if (idx >= this->size())
1126  this->size_ = idx+1;
1127  set_value(idx, str);
1128 }
1129 
1130 //---------------------------------------------------------------------
1131 
1132 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1134  size_type idx, const value_type* str)
1135 {
1136  if (idx >= this->size())
1137  {
1138  this->size_ = idx+1;
1139  set_value(idx, str);
1140  return;
1141  }
1142  insert_value(idx, str);
1143  this->size_++;
1144 }
1145 
1146 //---------------------------------------------------------------------
1147 
1148 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1150 {
1151  BM_ASSERT(idx < this->size_);
1152  if (idx >= this->size_)
1153  return;
1154  this->erase_column(idx);
1155  this->size_--;
1156 }
1157 
1158 //---------------------------------------------------------------------
1159 
1160 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1162 {
1163  bvector_type* bv_null = this->get_null_bvect();
1164  if (bv_null)
1165  bv_null->clear_bit_no_check(idx);
1166  if (idx >= this->size_)
1167  {
1168  this->size_ = idx + 1;
1169  }
1170 }
1171 
1172 //---------------------------------------------------------------------
1173 
1174 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1176  size_type idx, const value_type* str)
1177 {
1178  set_value_no_null(idx, str);
1179  bvector_type* bv_null = this->get_null_bvect();
1180  if (bv_null)
1181  bv_null->set_bit_no_check(idx);
1182 }
1183 
1184 //---------------------------------------------------------------------
1185 
1186 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1188  size_type idx, const value_type* str)
1189 {
1190  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1191  {
1192  CharType ch = str[i];
1193  if (!ch)
1194  {
1195  this->clear_value_plains_from(i*8, idx);
1196  return;
1197  }
1198 
1199  if (remap_flags_) // compressional re-mapping is in effect
1200  {
1201  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1202  BM_ASSERT(remap_value);
1203  if (!remap_value) // unknown dictionary element
1204  {
1205  this->clear_value_plains_from(i*8, idx);
1206  return;
1207  }
1208  ch = CharType(remap_value);
1209  }
1210  this->bmatr_.set_octet(idx, i, (unsigned char)ch);
1211  } // for i
1212 }
1213 
1214 //---------------------------------------------------------------------
1215 
1216 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1218  size_type idx, const value_type* str)
1219 {
1220  insert_value_no_null(idx, str);
1221  this->insert_null(idx, true);
1222 }
1223 
1224 //---------------------------------------------------------------------
1225 
1226 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1228  size_type idx, const value_type* str)
1229 {
1230  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1231  {
1232  CharType ch = str[i];
1233  if (!ch)
1234  {
1235  this->insert_clear_value_plains_from(i*8, idx);
1236  return;
1237  }
1238 
1239  if (remap_flags_) // compressional re-mapping is in effect
1240  {
1241  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1242  BM_ASSERT(remap_value);
1243  if (!remap_value) // unknown dictionary element
1244  {
1245  this->insert_clear_value_plains_from(i*8, idx);
1246  return;
1247  }
1248  ch = CharType(remap_value);
1249  }
1250  this->bmatr_.insert_octet(idx, i, (unsigned char)ch);
1251  } // for i
1252 }
1253 
1254 
1255 //---------------------------------------------------------------------
1256 
1257 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1260  size_type idx, value_type* str, size_type buf_size) const
1261 {
1262  size_type i = 0;
1263  for (; i < MAX_STR_SIZE; ++i)
1264  {
1265  if (i < buf_size)
1266  str[i] = 0;
1267  else
1268  break;
1269  CharType ch = CharType(this->bmatr_.get_octet(idx, i));
1270  if (ch == 0)
1271  {
1272  str[i] = 0;
1273  break;
1274  }
1275  str[i] = ch;
1276  } // for i
1277  if (remap_flags_)
1278  {
1279  remap_matrix1_.remap(str, i);
1280  }
1281  return i;
1282 }
1283 
1284 //---------------------------------------------------------------------
1285 
1286 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1288  bm::word_t* temp_block,
1289  typename bvector_type::optmode opt_mode,
1291 {
1292  typename bvector_type::statistics stbv;
1293  parent_type::optimize(temp_block, opt_mode, &stbv);
1294 
1295  if (st)
1296  st->add(stbv);
1297 }
1298 
1299 //---------------------------------------------------------------------
1300 
1301 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1304 {
1305  BM_ASSERT(st);
1306  typename bvector_type::statistics stbv;
1307  parent_type::calc_stat(&stbv);
1308 
1309  st->reset();
1310 
1311  st->bit_blocks += stbv.bit_blocks;
1312  st->gap_blocks += stbv.gap_blocks;
1313  st->ptr_sub_blocks += stbv.ptr_sub_blocks;
1314  st->bv_count += stbv.bv_count;
1315  st->max_serialize_mem += stbv.max_serialize_mem + 8;
1316  st->memory_used += stbv.memory_used;
1317 
1318  size_t remap_mem_usage = sizeof(remap_flags_);
1319  remap_mem_usage += remap_matrix1_.get_buffer().mem_usage();
1320  remap_mem_usage += remap_matrix2_.get_buffer().mem_usage();
1321 
1322  st->memory_used += remap_mem_usage;
1323  if (remap_flags_) // use of remapping requires some extra storage
1324  {
1325  st->max_serialize_mem += remap_mem_usage;
1326  }
1327 }
1328 
1329 //---------------------------------------------------------------------
1330 
1331 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1333  size_type idx,
1334  const value_type* str) const
1335 {
1336  BM_ASSERT(str);
1337  int res = 0;
1338  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1339  {
1340  CharType ch = str[i];
1341  if (remap_flags_ && ch)
1342  {
1343  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1344  if (!remap_value) // unknown dictionary element
1345  {
1346  throw_bad_value(0);
1347  }
1348  ch = CharType(remap_value);
1349  }
1350 
1351  res = this->bmatr_.compare_octet(idx, i, ch);
1352  if (res || !ch)
1353  break;
1354  } // for
1355  return res;
1356 }
1357 
1358 //---------------------------------------------------------------------
1359 
1360 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1362  size_type idx1, size_type idx2) const
1363 {
1364  unsigned i = 0;
1365  for (; i < MAX_STR_SIZE; ++i)
1366  {
1367  CharType ch1 = CharType(this->bmatr_.get_octet(idx1, i));
1368  CharType ch2 = CharType(this->bmatr_.get_octet(idx2, i));
1369  if (!ch1 || !ch2)
1370  {
1371  if (i)
1372  --i;
1373  break;
1374  }
1375  if (ch1 != ch2)
1376  {
1377  break;
1378  }
1379  } // for
1380 
1381  return i;
1382 }
1383 
1384 //---------------------------------------------------------------------
1385 
1386 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1387 bool
1389  size_type& pos)
1390 {
1391  BM_ASSERT(rank);
1392  pos = rank - 1;
1393  return true;
1394 }
1395 
1396 //---------------------------------------------------------------------
1397 
1398 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1401 {
1402  for (int i = MAX_STR_SIZE-1; i >= 0; --i)
1403  {
1404  unsigned octet_plain = unsigned(i) * unsigned(sizeof(CharType)) * 8;
1405  for (unsigned j = 0; j < sizeof(CharType) * 8; ++j)
1406  {
1407  if (this->bmatr_.row(octet_plain+j))
1408  return unsigned(i)+1;
1409  } // for j
1410  } // for i
1411  return 0;
1412 }
1413 
1414 //---------------------------------------------------------------------
1415 
1416 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1418  plain_octet_matrix_type& octet_matrix) const
1419 {
1420  octet_matrix.init();
1421  octet_matrix.set_zero();
1422 
1423  size_type size = this->size();
1424 
1425  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1426  {
1427  unsigned char* row = octet_matrix.row(i);
1428 
1429  // TODO: optimize partial transposition
1430  for (size_type j = 0; j < size; ++j)
1431  {
1432  unsigned char ch = this->bmatr_.get_octet(j, i);
1433  unsigned k = ch;
1434  if (k)
1435  row[k] = 1;
1436  } // for j
1437  } // for i
1438 }
1439 
1440 //---------------------------------------------------------------------
1441 
1442 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1444  plain_octet_matrix_type& octet_remap_matrix1,
1445  plain_octet_matrix_type& octet_remap_matrix2,
1446  const plain_octet_matrix_type& octet_occupancy_matrix)
1447 {
1448  octet_remap_matrix1.init();
1449  octet_remap_matrix1.set_zero();
1450  octet_remap_matrix2.init();
1451  octet_remap_matrix2.set_zero();
1452 
1453  for (unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
1454  {
1455  const unsigned char* row = octet_occupancy_matrix.row(i);
1456  unsigned char* remap_row1 = octet_remap_matrix1.row(i);
1457  unsigned char* remap_row2 = octet_remap_matrix2.row(i);
1458  unsigned count = 1;
1459  for (unsigned j = 1; j < octet_occupancy_matrix.cols(); ++j)
1460  {
1461  if (row[j]) // octet is present
1462  {
1463  // set two remapping table values
1464  remap_row1[count] = (unsigned char)j;
1465  remap_row2[j] = (unsigned char)count;
1466  ++count;
1467  BM_ASSERT(count < 256);
1468  }
1469  } // for j
1470  } // for i
1471 }
1472 
1473 //---------------------------------------------------------------------
1474 
1475 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1477 {
1478  BM_ASSERT(remap_flags_);
1479 
1480  remap_matrix2_.init();
1481  remap_matrix2_.set_zero();
1482 
1483  for (unsigned i = 0; i < remap_matrix1_.rows(); ++i)
1484  {
1485  const unsigned char* remap_row1 = remap_matrix1_.row(i);
1486  unsigned char* remap_row2 = remap_matrix2_.row(i);
1487  for (unsigned j = 1; j < remap_matrix1_.cols(); ++j)
1488  {
1489  if (remap_row1[j])
1490  {
1491  unsigned count = remap_row1[j];
1492  remap_row2[count] = (unsigned char)j;
1493  BM_ASSERT(count < 256);
1494  }
1495  } // for j
1496  } // for i
1497 }
1498 
1499 //---------------------------------------------------------------------
1500 
1501 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1503  value_type* sv_str,
1504  size_type buf_size,
1505  const value_type* str,
1506  const plain_octet_matrix_type& octet_remap_matrix2)
1507 {
1508  for (unsigned i = 0; i < buf_size; ++i)
1509  {
1510  CharType ch = str[i];
1511  if (ch == 0)
1512  {
1513  sv_str[i] = ch;
1514  break;
1515  }
1516  const unsigned char* remap_row = octet_remap_matrix2.row(i);
1517  unsigned char remap_value = remap_row[unsigned(ch)];
1518  if (!remap_value) // unknown dictionary element
1519  {
1520  return false;
1521  }
1522  sv_str[i] = CharType(remap_value);
1523  } // for i
1524  return true;
1525 }
1526 
1527 //---------------------------------------------------------------------
1528 
1529 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1531  value_type* str,
1532  size_type buf_size,
1533  const value_type* sv_str,
1534  const plain_octet_matrix_type& octet_remap_matrix1)
1535 {
1536  for (unsigned i = 0; i < buf_size; ++i)
1537  {
1538  CharType ch = sv_str[i];
1539  if (ch == 0)
1540  {
1541  str[i] = ch;
1542  break;
1543  }
1544  const unsigned char* remap_row = octet_remap_matrix1.row(i);
1545  unsigned char remap_value = remap_row[unsigned(ch)];
1546  if (!remap_value) // unknown dictionary element
1547  {
1548  return false;
1549  }
1550  str[i] = CharType(remap_value);
1551  } // for i
1552  return true;
1553 }
1554 
1555 //---------------------------------------------------------------------
1556 
1557 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1559 {
1560  if (str_sv.is_remap())
1561  {
1562  *this = str_sv;
1563  return;
1564  }
1565  this->clear();
1566  if (str_sv.empty()) // no content to remap
1567  {
1568  return;
1569  }
1570 
1571  plain_octet_matrix_type omatrix; // occupancy map
1572  str_sv.calc_octet_stat(omatrix);
1573 
1574  str_sv.build_octet_remap(remap_matrix1_, remap_matrix2_, omatrix);
1575  remap_flags_ = 1; // turn ON remapped mode
1576 
1577  const unsigned buffer_size = 1024 * 8;
1578 
1579  typedef bm::heap_matrix<CharType,
1580  buffer_size, // ROWS: number of strings in one batch
1581  MAX_STR_SIZE, // COLS
1582  allocator_type> remap_buffer_type;
1583 
1584  remap_buffer_type cmatr(true);
1585  for (size_type i = 0; true; )
1586  {
1587  size_type dsize = str_sv.decode(cmatr, i, buffer_size, true);
1588  if (!dsize)
1589  break;
1590  this->import(cmatr, i, dsize);
1591  i += dsize;
1592  } // for i
1593 }
1594 
1595 //---------------------------------------------------------------------
1596 
1597 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1599 {
1600  if (remap_flags_)
1601  {
1602  recalc_remap_matrix2();
1603  }
1604 }
1605 
1606 //---------------------------------------------------------------------
1607 
1608 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1611  bm::null_support null_able) const
1612 {
1613  // at this point both vectors should have the same remap settings
1614  // to be considered "equal".
1615  // Strictly speaking this is incorrect, because re-map and non-remap
1616  // vectors may have the same content
1617 
1618  if (remap_flags_ != sv.remap_flags_)
1619  return false;
1620  if (remap_flags_)
1621  {
1622  bool b;
1623  b = remap_matrix1_.get_buffer().equal(sv.remap_matrix1_.get_buffer());
1624  if (!b)
1625  return b;
1626  b = remap_matrix2_.get_buffer().equal(sv.remap_matrix2_.get_buffer());
1627  if (!b)
1628  return b;
1629  }
1630  return parent_type::equal(sv, null_able);
1631 }
1632 
1633 //---------------------------------------------------------------------
1634 
1635 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1638 {
1639  typedef typename
1641  return it_type(this);
1642 }
1643 
1644 //---------------------------------------------------------------------
1645 
1646 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1648 {
1649  parent_type::clear();
1650 }
1651 
1652 //---------------------------------------------------------------------
1653 
1654 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1656  const char* err_msg)
1657 {
1658 #ifndef BM_NO_STL
1659  throw std::range_error(err_msg);
1660 #else
1661  BM_ASSERT_THROW(false, BM_ERR_RANGE);
1662 #endif
1663 }
1664 
1665 //---------------------------------------------------------------------
1666 
1667 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1669  const char* err_msg)
1670 {
1671 #ifndef BM_NO_STL
1672  if (!err_msg)
1673  err_msg = "Unknown/incomparable dictionary character";
1674  throw std::domain_error(err_msg);
1675 #else
1676  BM_ASSERT_THROW(false, BM_BAD_VALUE);
1677 #endif
1678 }
1679 
1680 
1681 //---------------------------------------------------------------------
1682 //
1683 //---------------------------------------------------------------------
1684 
1685 
1686 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1688 : sv_(0), pos_(bm::id_max), pos_in_buf_(~size_type(0))
1689 {}
1690 
1691 //---------------------------------------------------------------------
1692 
1693 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1696 : sv_(it.sv_), pos_(it.pos_), pos_in_buf_(~size_type(0))
1697 {}
1698 
1699 //---------------------------------------------------------------------
1700 
1701 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1704 : sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0))
1705 {}
1706 
1707 //---------------------------------------------------------------------
1708 
1709 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1713 : sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
1714 {}
1715 
1716 //---------------------------------------------------------------------
1717 
1718 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1721 {
1722  BM_ASSERT(sv_);
1723  BM_ASSERT(this->valid());
1724  if (pos_in_buf_ == ~size_type(0))
1725  {
1726  if (!buf_matrix_.is_init())
1727  buf_matrix_.init();
1728  pos_in_buf_ = 0;
1729  size_type d = sv_->decode(buf_matrix_, pos_, buffer_matrix_type::n_rows);
1730  if (!d)
1731  {
1732  pos_ = bm::id_max;
1733  return 0;
1734  }
1735  }
1736  return buf_matrix_.row(pos_in_buf_);
1737 }
1738 
1739 //---------------------------------------------------------------------
1740 
1741 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1744 {
1745  pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
1746  pos_in_buf_ = ~size_type(0);
1747 }
1748 
1749 //---------------------------------------------------------------------
1750 
1751 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1753 {
1754  if (pos_ == bm::id_max) // nothing to do, we are at the end
1755  return;
1756  ++pos_;
1757 
1758  if (pos_ >= sv_->size())
1759  this->invalidate();
1760  else
1761  {
1762  if (pos_in_buf_ != ~size_type(0))
1763  {
1764  ++pos_in_buf_;
1765  if (pos_in_buf_ >= buffer_matrix_type::n_rows)
1766  pos_in_buf_ = ~size_type(0);
1767  }
1768  }
1769 }
1770 
1771 //---------------------------------------------------------------------
1772 //
1773 //---------------------------------------------------------------------
1774 
1775 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1777 : sv_(0), bv_null_(0), pos_in_buf_(~size_type(0))
1778 {}
1779 
1780 //---------------------------------------------------------------------
1781 
1782 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1785 : sv_(sv), pos_in_buf_(~size_type(0))
1786 {
1787  bv_null_ = sv_? sv_->get_null_bvect() : 0;
1788 }
1789 
1790 //---------------------------------------------------------------------
1791 
1792 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1795 : sv_(bi.sv_), bv_null_(bi.bv_null_), pos_in_buf_(~size_type(0))
1796 {
1797  BM_ASSERT(bi.empty());
1798 }
1799 
1800 //---------------------------------------------------------------------
1801 
1802 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1804 {
1805  this->flush();
1806 }
1807 
1808 //---------------------------------------------------------------------
1809 
1810 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1812 {
1813  return (pos_in_buf_ == ~size_type(0) || !sv_);
1814 }
1815 
1816 //---------------------------------------------------------------------
1817 
1818 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1820 {
1821  if (this->empty())
1822  return;
1823 
1824  sv_->import_no_check(buf_matrix_, sv_->size(), pos_in_buf_+1, false);
1825  pos_in_buf_ = ~size_type(0);
1826 }
1827 
1828 //---------------------------------------------------------------------
1829 
1830 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1833 {
1834  if (!v)
1835  {
1836  this->add_null();
1837  return;
1838  }
1839  size_type buf_idx = this->add_value(v);
1840  if (bv_null_)
1841  {
1842  size_type sz = sv_->size();
1843  bv_null_->set_bit_no_check(sz + buf_idx);
1844  }
1845 }
1846 
1847 //---------------------------------------------------------------------
1848 
1849 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1851 {
1852  /*size_type buf_idx = */this->add_value("");
1853 }
1854 
1855 //---------------------------------------------------------------------
1856 
1857 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1860 {
1861  for (size_type i = 0; i < count; ++i) // TODO: optimization
1862  this->add_value("");
1863 }
1864 
1865 
1866 //---------------------------------------------------------------------
1867 
1868 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1872 {
1873  BM_ASSERT(sv_);
1874  BM_ASSERT(v);
1875  if (pos_in_buf_ == ~size_type(0))
1876  {
1877  if (!buf_matrix_.is_init())
1878  buf_matrix_.init();
1879  pos_in_buf_ = 0;
1880  buf_matrix_.set_zero();
1881  }
1882  else
1883  if (pos_in_buf_ >= buffer_matrix_type::n_rows-1)
1884  {
1885  this->flush();
1886  pos_in_buf_ = 0;
1887  buf_matrix_.set_zero();
1888  }
1889  else
1890  {
1891  ++pos_in_buf_;
1892  }
1893  value_type* r = buf_matrix_.row(pos_in_buf_);
1894  for (unsigned i = 0; i < buffer_matrix_type::n_columns; ++i)
1895  {
1896  r[i] = v[i];
1897  if (!r[i])
1898  break;
1899  } // for i
1900  return pos_in_buf_;
1901 }
1902 
1903 //---------------------------------------------------------------------
1904 
1905 
1906 } // namespace
1907 
1908 #endif
bm::str_sparse_vector::effective_max_str
size_type effective_max_str() const
get effective string length used in vector
Definition: bmstrsparsevec.h:1400
bm::str_sparse_vector::back_insert_iterator::flush
void flush()
flush the accumulated buffer
Definition: bmstrsparsevec.h:1819
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::plain
bvector_type_ptr plain(unsigned i)
get access to bit-plain as is (can return NULL)
Definition: bmbmatrix.h:365
bm::str_sparse_vector::init_remap_buffer
unsigned char * init_remap_buffer()
Definition: bmstrsparsevec.h:1065
bm::str_sparse_vector::insert
void insert(size_type idx, const StrType &str)
insert STL string
Definition: bmstrsparsevec.h:456
bmconst.h
Constants, tables and typedefs.
bm::str_sparse_vector::value_type
CharType value_type
Definition: bmstrsparsevec.h:62
bm::str_sparse_vector::const_iterator::str_sparse_vector_type
str_sparse_vector< CharType, BV, MAX_STR_SIZE > str_sparse_vector_type
Definition: bmstrsparsevec.h:174
bm::str_sparse_vector::const_iterator::allocator_type
bvector_type::allocator_type allocator_type
Definition: bmstrsparsevec.h:179
bm::bv_statistics::max_serialize_mem
size_t max_serialize_mem
estimated maximum memory for serialization
Definition: bmfunc.h:60
bm::str_sparse_vector::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmstrsparsevec.h:67
bm::str_sparse_vector::back_insert_iterator::back_insert_iterator
back_insert_iterator()
Definition: bmstrsparsevec.h:1776
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::bmatr_
bmatrix_type bmatr_
bit-transposed matrix
Definition: bmbmatrix.h:462
bm::sparse_vector_deserializer
sparse vector de-serializer
Definition: bmsparsevec_serial.h:192
bm::str_sparse_vector::const_iterator::size_type
str_sparse_vector_type::size_type size_type
Definition: bmstrsparsevec.h:177
bm::str_sparse_vector::const_iterator::operator*
const value_type * operator*() const
Get current position (value)
Definition: bmstrsparsevec.h:205
bm::str_sparse_vector::remap_matrix2_
plain_octet_matrix_type remap_matrix2_
octet remap table 2
Definition: bmstrsparsevec.h:1079
bm::str_sparse_vector::const_iterator::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmstrsparsevec.h:180
bm::str_sparse_vector::octet_plains
octet_plains
Definition: bmstrsparsevec.h:75
bm::bvector::iterator_base::valid
bool valid() const
Checks if iterator is still valid. Analog of != 0 comparison for pointers.
Definition: bm.h:277
bm::str_sparse_vector::back_insert_iterator::bvector_type
str_sparse_vector_type::bvector_type bvector_type
Definition: bmstrsparsevec.h:273
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::clear_value_plains_from
void clear_value_plains_from(unsigned plain_idx, size_type idx)
Definition: bmbmatrix.h:1351
bm::str_sparse_vector::const_iterator::go_to
void go_to(size_type pos)
re-position to a specified position
Definition: bmstrsparsevec.h:1742
bm::str_sparse_vector::size_internal
size_type size_internal() const
Definition: bmstrsparsevec.h:1059
bm::bvector::optmode
optmode
Optimization mode Every next level means additional checks (better compression vs time)
Definition: bm.h:129
bm::str_sparse_vector::remap_tosv
bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str) const
Definition: bmstrsparsevec.h:783
bm::str_sparse_vector::back_insert_iterator::operator++
back_insert_iterator & operator++(int)
noop
Definition: bmstrsparsevec.h:312
bm::str_sparse_vector::const_iterator::pos
size_type pos() const
Current position (index) in the vector.
Definition: bmstrsparsevec.h:228
bm::str_sparse_vector::const_iterator::buffer_matrix_type
bm::heap_matrix< CharType, 1024, MAX_STR_SIZE, allocator_type > buffer_matrix_type
Definition: bmstrsparsevec.h:240
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::swap
void swap(base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXEPT
Definition: bmbmatrix.h:1154
bm::str_sparse_vector::reference::operator=
reference & operator=(const reference &ref)
Definition: bmstrsparsevec.h:135
bm::str_sparse_vector::push_back
void push_back(const StrType &str)
push back a string
Definition: bmstrsparsevec.h:528
bm::basic_bmatrix::get_octet
unsigned char get_octet(size_type pos, size_type octet_idx) const
Definition: bmbmatrix.h:877
bm::bv_statistics::bit_blocks
size_t bit_blocks
Number of bit blocks.
Definition: bmfunc.h:56
bm::str_sparse_vector::resize_internal
void resize_internal(size_type sz)
Definition: bmstrsparsevec.h:1060
bm::base_sparse_vector
Base class for bit-transposed sparse vector construction.
Definition: bmbmatrix.h:244
bm::str_sparse_vector::const_iterator::value
const value_type * value() const
Get current position (value)
Definition: bmstrsparsevec.h:1720
bm::no_null
@ no_null
do not support NULL values
Definition: bmconst.h:217
bm::str_sparse_vector::get_remap_buffer
const unsigned char * get_remap_buffer() const
Definition: bmstrsparsevec.h:1063
bm::str_sparse_vector::const_reference::is_null
bool is_null() const
Definition: bmstrsparsevec.h:110
bm::str_sparse_vector::back_insert_iterator::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmstrsparsevec.h:275
bm::str_sparse_vector::const_iterator::value_type
str_sparse_vector_type::value_type value_type
Definition: bmstrsparsevec.h:176
bm::str_sparse_vector::insert_value_no_null
void insert_value_no_null(size_type idx, const value_type *str)
insert value without checking boundaries or support of NULL
Definition: bmstrsparsevec.h:1227
bm::str_sparse_vector::throw_bad_value
static void throw_bad_value(const char *err_msg)
throw domain error
Definition: bmstrsparsevec.h:1668
bm::str_sparse_vector::set_value
void set_value(size_type idx, const value_type *str)
set value without checking boundaries
Definition: bmstrsparsevec.h:1175
bm::str_sparse_vector::const_iterator::operator<
bool operator<(const const_iterator &it) const
Definition: bmstrsparsevec.h:195
bm::str_sparse_vector::get
void get(size_type idx, StrType &str) const
get specified string element
Definition: bmstrsparsevec.h:546
bm::str_sparse_vector::insert_value
void insert_value(size_type idx, const value_type *str)
insert value without checking boundaries
Definition: bmstrsparsevec.h:1217
bm::BM_SORTED
@ BM_SORTED
input set is sorted (ascending order)
Definition: bmconst.h:192
bm::str_sparse_vector::const_iterator::pointer
CharType * pointer
Definition: bmstrsparsevec.h:183
bm::bv_statistics::reset
void reset()
Reset statisctics.
Definition: bmfunc.h:93
bm::str_sparse_vector::plain_octet_matrix_type
bm::heap_matrix< unsigned char, MAX_STR_SIZE, 256, typename bvector_type::allocator_type > plain_octet_matrix_type
Definition: bmstrsparsevec.h:85
bm::bvector::enumerator
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:590
bm::str_sparse_vector::set_remap
void set_remap()
Definition: bmstrsparsevec.h:1070
BM_ASSERT_THROW
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:388
bm::str_sparse_vector::const_iterator::const_iterator
const_iterator()
Definition: bmstrsparsevec.h:1687
bm::str_sparse_vector::is_rsc_support
Definition: bmstrsparsevec.h:88
bm::str_sparse_vector::effective_vector_max
size_type effective_vector_max() const
get effective string length used in vector
Definition: bmstrsparsevec.h:664
bm::bvector::allocator_type
Alloc allocator_type
Definition: bm.h:110
bm::str_sparse_vector::calc_stat
void calc_stat(struct str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *st) const
Calculates memory statistics.
Definition: bmstrsparsevec.h:1302
bm::str_sparse_vector::back_insert_iterator::operator=
back_insert_iterator & operator=(const value_type *v)
push value to the vector
Definition: bmstrsparsevec.h:296
bm::str_sparse_vector::bvector_enumerator_type
bvector_type::enumerator bvector_enumerator_type
Definition: bmstrsparsevec.h:66
bmalgo.h
Algorithms for bvector<> (main include)
bm::bv_statistics::gap_blocks
size_t gap_blocks
Number of GAP blocks.
Definition: bmfunc.h:57
bm::str_sparse_vector::const_reference::const_reference
const_reference(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv, size_type idx) BMNOEXEPT
Definition: bmstrsparsevec.h:97
bm::str_sparse_vector::operator[]
reference operator[](size_type idx)
Operator to get write access to an element
Definition: bmstrsparsevec.h:421
bm::str_sparse_vector::parent_type
base_sparse_vector< CharType, BV, MAX_STR_SIZE > parent_type
Definition: bmstrsparsevec.h:69
bm::str_sparse_vector::back_insert_iterator::operator*
back_insert_iterator & operator*()
noop
Definition: bmstrsparsevec.h:308
bm::str_sparse_vector::equal
bool equal(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const
check if another sparse vector has the same content and size
Definition: bmstrsparsevec.h:1609
bm::str_sparse_vector::back_insert_iterator::~back_insert_iterator
~back_insert_iterator()
Definition: bmstrsparsevec.h:1803
bm::str_sparse_vector::push_back
void push_back(const value_type *str)
push back a string (zero terminated)
Definition: bmstrsparsevec.h:534
bm::str_sparse_vector::get
size_type get(size_type idx, value_type *str, size_type buf_size) const
get specified element
Definition: bmstrsparsevec.h:1259
bm::str_sparse_vector::empty
bool empty() const
return true if vector is empty
Definition: bmstrsparsevec.h:640
bm::str_sparse_vector::bvector_type_const_ptr
const typedef bvector_type * bvector_type_const_ptr
Definition: bmstrsparsevec.h:61
bm::str_sparse_vector::set_null
void set_null(size_type idx)
set NULL status for the specified element Vector is resized automatically
Definition: bmstrsparsevec.h:1161
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::const_reference
const typedef value_type & const_reference
Definition: bmbmatrix.h:263
bm::str_sparse_vector::allocator_type
BV::allocator_type allocator_type
Definition: bmstrsparsevec.h:64
bm::xor_swap
void xor_swap(W &x, W &y)
XOR swap two scalar variables.
Definition: bmfunc.h:876
bm::bvector
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:107
bm::use_null
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:216
bm::str_sparse_vector::size
size_type size() const
return size of the vector
Definition: bmstrsparsevec.h:635
bm::str_sparse_vector::is_compressed
static bool is_compressed()
trait if sparse vector is "compressed" (false)
Definition: bmstrsparsevec.h:733
bm::str_sparse_vector::reference
Reference class to access elements via common [] operator.
Definition: bmstrsparsevec.h:121
bm::id_max
const unsigned id_max
Definition: bmconst.h:105
bm::str_sparse_vector
sparse vector for strings with compression using bit transposition method
Definition: bmstrsparsevec.h:56
bm::str_sparse_vector::swap
void swap(str_sparse_vector &str_sv) BMNOEXEPT
Definition: bmstrsparsevec.h:1111
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::clear_range
void clear_range(size_type left, size_type right, bool set_null)
Definition: bmbmatrix.h:1185
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::get_null_bvect
bvector_type * get_null_bvect()
Definition: bmbmatrix.h:368
bm::str_sparse_vector::is_rsc_support::trait
trait
Definition: bmstrsparsevec.h:88
bm::str_sparse_vector::remap_flags_
unsigned remap_flags_
remapping status
Definition: bmstrsparsevec.h:1077
bm::str_sparse_vector::back_insert_iterator::operator=
back_insert_iterator & operator=(const StrType &v)
push value to the vector
Definition: bmstrsparsevec.h:302
bm::basic_bmatrix::set_octet
void set_octet(size_type pos, size_type octet_idx, unsigned char octet)
Definition: bmbmatrix.h:785
bm::str_sparse_vector::operator=
str_sparse_vector< CharType, BV, MAX_STR_SIZE > & operator=(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv)
Definition: bmstrsparsevec.h:382
bm::str_sparse_vector::get_back_inserter
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
Definition: bmstrsparsevec.h:719
bm::str_sparse_vector::end
const_iterator end() const
Provide const iterator access to the end
Definition: bmstrsparsevec.h:707
bm::str_sparse_vector::is_remap_support
Definition: bmstrsparsevec.h:87
bm::str_sparse_vector::const_iterator::reference
CharType *& reference
Definition: bmstrsparsevec.h:184
bm::bvector::opt_compress
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:134
bm::bvector::allocation_policy
memory allocation policy
Definition: bm.h:1243
bm::str_sparse_vector::remap_size
size_t remap_size() const
Definition: bmstrsparsevec.h:1062
bm::str_sparse_vector::is_remap_support::value
@ value
Definition: bmstrsparsevec.h:87
bm::str_sparse_vector::const_reference
Reference class to access elements via common [] operator.
Definition: bmstrsparsevec.h:94
bm::str_sparse_vector::reference::is_null
bool is_null() const
Definition: bmstrsparsevec.h:149
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::size_
size_type size_
array size
Definition: bmbmatrix.h:463
bm::str_sparse_vector::back_insert_iterator::allocator_type
bvector_type::allocator_type allocator_type
Definition: bmstrsparsevec.h:274
bm::str_sparse_vector::back_insert_iterator::size_type
str_sparse_vector_type::size_type size_type
Definition: bmstrsparsevec.h:272
bm::str_sparse_vector::const_iterator::invalidate
void invalidate()
Invalidate current iterator.
Definition: bmstrsparsevec.h:225
bm::str_sparse_vector::bvector_type_ptr
bvector_type * bvector_type_ptr
Definition: bmstrsparsevec.h:60
bm::str_sparse_vector::decode
size_type decode(CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
Definition: bmstrsparsevec.h:824
bm::str_sparse_vector::reference::reference
reference(str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv, size_type idx) BMNOEXEPT
Definition: bmstrsparsevec.h:124
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::resize
void resize(size_type new_size)
Definition: bmbmatrix.h:1213
bm::str_sparse_vector::sync
void sync(bool force)
syncronize internal structures
Definition: bmstrsparsevec.h:1598
bm::str_sparse_vector::find_rank
static bool find_rank(size_type rank, size_type &pos)
find position of compressed element by its rank
Definition: bmstrsparsevec.h:1388
bm::str_sparse_vector::const_iterator::operator==
bool operator==(const const_iterator &it) const
Definition: bmstrsparsevec.h:191
bm::str_sparse_vector::back_insert_iterator::operator=
back_insert_iterator & operator=(const back_insert_iterator &bi)
Definition: bmstrsparsevec.h:286
bm::str_sparse_vector::str_sparse_vector
str_sparse_vector(str_sparse_vector< CharType, BV, MAX_STR_SIZE > &&str_sv) BMNOEXEPT
Definition: bmstrsparsevec.h:394
BM_ASSERT
#define BM_ASSERT
Definition: bmdef.h:117
bm::str_sparse_vector::allocation_policy_type
bvector_type::allocation_policy allocation_policy_type
Definition: bmstrsparsevec.h:65
bm::str_sparse_vector::size_type
bvector_type::size_type size_type
Definition: bmstrsparsevec.h:63
bm::base_sparse_vector::is_null
bool is_null(size_type idx) const
test if specified element is NULL
Definition: bmbmatrix.h:1231
bm::str_sparse_vector::const_iterator
Const iterator to do quick traverse of the sparse vector.
Definition: bmstrsparsevec.h:167
bm::str_sparse_vector::build_octet_remap
static void build_octet_remap(plain_octet_matrix_type &octet_remap_matrix1, plain_octet_matrix_type &octet_remap_matrix2, const plain_octet_matrix_type &octet_occupancy_matrix)
Definition: bmstrsparsevec.h:1443
bm::bit_list
unsigned bit_list(T w, B *bits)
Unpacks word into list of ON bit indexes.
Definition: bmfunc.h:426
bm::str_sparse_vector::back_insert_iterator::add
void add(const value_type *v)
add value to the container
Definition: bmstrsparsevec.h:1831
bm::str_sparse_vector::remap_matrix1_
plain_octet_matrix_type remap_matrix1_
octet remap table 1
Definition: bmstrsparsevec.h:1078
bm::str_sparse_vector::const_iterator::operator!=
bool operator!=(const const_iterator &it) const
Definition: bmstrsparsevec.h:193
bm::str_sparse_vector::const_iterator::difference_type
long long difference_type
Definition: bmstrsparsevec.h:182
bm::str_sparse_vector::reference::operator==
bool operator==(const reference &ref) const
Definition: bmstrsparsevec.h:147
bm::str_sparse_vector::clear_range
str_sparse_vector< CharType, BV, MAX_STR_SIZE > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all plains)
Definition: bmstrsparsevec.h:618
bm::str_sparse_vector::is_rsc_support::value
@ value
Definition: bmstrsparsevec.h:88
bm::str_sparse_vector::const_iterator::is_null
bool is_null() const
Get NULL status.
Definition: bmstrsparsevec.h:219
bm::basic_bmatrix::construct_row
bvector_type_ptr construct_row(size_type row)
Definition: bmbmatrix.h:681
bmdef.h
Definitions(internal)
bm::bv_statistics::ptr_sub_blocks
size_t ptr_sub_blocks
Number of sub-blocks.
Definition: bmfunc.h:58
bm::str_sparse_vector::sv_octet_plains
@ sv_octet_plains
Definition: bmstrsparsevec.h:77
bm::str_sparse_vector::remap_fromsv
static bool remap_fromsv(value_type *str, size_type buf_size, const value_type *sv_str, const plain_octet_matrix_type &octet_remap_matrix1)
Definition: bmstrsparsevec.h:1530
bm::basic_bmatrix
Basic dense bit-matrix class.
Definition: bmbmatrix.h:53
bm::str_sparse_vector::operator[]
const_reference operator[](size_type idx) const
Operator to get read access to an element
Definition: bmstrsparsevec.h:424
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::copy_from
void copy_from(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
Definition: bmbmatrix.h:1121
bm::str_sparse_vector::calc_octet_stat
void calc_octet_stat(plain_octet_matrix_type &octet_matrix) const
Definition: bmstrsparsevec.h:1417
bm::str_sparse_vector::const_reference::operator==
bool operator==(const const_reference &ref) const
Definition: bmstrsparsevec.h:108
bm::str_sparse_vector::back_insert_iterator::reference
void reference
Definition: bmstrsparsevec.h:279
bm::str_sparse_vector::back_insert_iterator::difference_type
void difference_type
Definition: bmstrsparsevec.h:277
bm::str_sparse_vector::begin
const_iterator begin() const
Provide const iterator access to container content
Definition: bmstrsparsevec.h:1637
bm::bv_statistics::add
void add(const bv_statistics &st)
Sum data from another sttructure.
Definition: bmfunc.h:102
bm::str_sparse_vector::const_iterator::operator>=
bool operator>=(const const_iterator &it) const
Definition: bmstrsparsevec.h:201
bm::str_sparse_vector::reference::operator=
reference & operator=(const value_type *str)
Definition: bmstrsparsevec.h:142
bm::str_sparse_vector::str_sparse_vector
str_sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
Definition: bmstrsparsevec.h:1087
bm::bv_statistics::bv_count
size_t bv_count
Number of bit-vectors.
Definition: bmfunc.h:59
bm::str_sparse_vector::import_no_check
void import_no_check(CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
Definition: bmstrsparsevec.h:947
bm::str_sparse_vector::const_iterator::operator>
bool operator>(const const_iterator &it) const
Definition: bmstrsparsevec.h:199
bm::str_sparse_vector::const_iterator::operator++
const_iterator & operator++()
Advance to the next available value.
Definition: bmstrsparsevec.h:208
bmbmatrix.h
basic bit-matrix class and utilities
bm::str_sparse_vector::const_iterator::bvector_type
str_sparse_vector_type::bvector_type bvector_type
Definition: bmstrsparsevec.h:178
bm::str_sparse_vector::remap_tosv
static bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str, const plain_octet_matrix_type &octet_remap_matrix2)
Definition: bmstrsparsevec.h:1502
bm::str_sparse_vector::back_insert_iterator
Back insert iterator implements buffered insert, faster than generic access assignment.
Definition: bmstrsparsevec.h:263
bm
Definition: bm.h:76
bm::str_sparse_vector::insert
void insert(size_type idx, const value_type *str)
insert the specified element
Definition: bmstrsparsevec.h:1133
bm::str_sparse_vector::throw_range_error
static void throw_range_error(const char *err_msg)
throw range error
Definition: bmstrsparsevec.h:1655
bm::str_sparse_vector::back_insert_iterator::iterator_category
std::output_iterator_tag iterator_category
Definition: bmstrsparsevec.h:267
bm::bv_statistics::memory_used
size_t memory_used
memory usage for all blocks and service tables
Definition: bmfunc.h:61
bm::str_sparse_vector::const_iterator::str_sparse_vector_type_ptr
str_sparse_vector_type * str_sparse_vector_type_ptr
Definition: bmstrsparsevec.h:175
bm::str_sparse_vector::const_iterator::operator<=
bool operator<=(const const_iterator &it) const
Definition: bmstrsparsevec.h:197
bm::null_support
null_support
NULL-able value support.
Definition: bmconst.h:214
bm::str_sparse_vector::resize
void resize(size_type sz)
resize vector
Definition: bmstrsparsevec.h:645
bm::str_sparse_vector::compare
int compare(size_type idx, const value_type *str) const
Compare vector element with argument lexicographically.
Definition: bmstrsparsevec.h:1332
bm::str_sparse_vector::const_iterator::valid
bool valid() const
Returns true if iterator is at a valid position.
Definition: bmstrsparsevec.h:222
bmtrans.h
Utilities for bit transposition (internal) (experimental!)
bm::word_t
unsigned int word_t
Definition: bmconst.h:38
bm::str_sparse_vector::recalc_remap_matrix2
void recalc_remap_matrix2()
Definition: bmstrsparsevec.h:1476
bm::str_sparse_vector::set_value_no_null
void set_value_no_null(size_type idx, const value_type *str)
set value without checking boundaries or support of NULL
Definition: bmstrsparsevec.h:1187
bm::str_sparse_vector::optimize
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmstrsparsevec.h:1287
bm::str_sparse_vector::back_insert_iterator::value_type
str_sparse_vector_type::value_type value_type
Definition: bmstrsparsevec.h:271
BMNOEXEPT
#define BMNOEXEPT
Definition: bmdef.h:78
bm::str_sparse_vector::bvector_type
BV bvector_type
Definition: bmstrsparsevec.h:59
bm::str_sparse_vector::back_insert_iterator::add_null
void add_null()
add NULL (no-value) to the container
Definition: bmstrsparsevec.h:1850
bm::bvector::size_type
bm::id_t size_type
Definition: bm.h:117
bm::str_sparse_vector::back_insert_iterator::str_sparse_vector_type
str_sparse_vector< CharType, BV, MAX_STR_SIZE > str_sparse_vector_type
Definition: bmstrsparsevec.h:269
bm::str_sparse_vector::const_iterator::iterator_category
std::input_iterator_tag iterator_category
Definition: bmstrsparsevec.h:172
bm::str_sparse_vector::back_insert_iterator::pointer
void pointer
Definition: bmstrsparsevec.h:278
bm::str_sparse_vector::back_insert_iterator::empty
bool empty() const
return true if insertion buffer is empty
Definition: bmstrsparsevec.h:1811
bm::str_sparse_vector::import_back
void import_back(CharMatrix &cmatr, size_type imp_size)
Bulk push-back import of strings from a C-style matrix of chars.
Definition: bmstrsparsevec.h:905
bm::str_sparse_vector::set
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
Definition: bmstrsparsevec.h:1122
bm::str_sparse_vector::is_remap_support::trait
trait
Definition: bmstrsparsevec.h:87
bm::str_sparse_vector::back_insert_iterator::add_value
size_type add_value(const value_type *v)
add value to the buffer without changing the NULL vector
Definition: bmstrsparsevec.h:1870
bm::sparse_vector_serializer
Definition: bmsparsevec_serial.h:157
bm::bv_statistics
Structure with statistical information about memory allocation footprint, serialization projection,...
Definition: bmfunc.h:54
bm::str_sparse_vector::const_iterator::advance
void advance()
advance iterator forward by one
Definition: bmstrsparsevec.h:1752
bm::str_sparse_vector::assign
void assign(size_type idx, const StrType &str)
set specified element with bounds checking and automatic resize
Definition: bmstrsparsevec.h:490
bm::str_sparse_vector::get_const_iterator
const_iterator get_const_iterator(size_type idx) const
Get const_itertor re-positioned to specific element.
Definition: bmstrsparsevec.h:712
bm::str_sparse_vector::is_remap
bool is_remap() const
Get remapping status (true|false)
Definition: bmstrsparsevec.h:748
bm::str_sparse_vector::remap_from
void remap_from(const str_sparse_vector &str_sv)
Build remapping profile and load content from another sparse vector.
Definition: bmstrsparsevec.h:1558
bm::str_sparse_vector::statistics
Definition: bmstrsparsevec.h:72
bm::str_sparse_vector::clear
void clear() BMNOEXEPT
resize to zero, free memory
Definition: bmstrsparsevec.h:1647
bm::str_sparse_vector::max_str
static size_type max_str()
get maximum string length capacity
Definition: bmstrsparsevec.h:650
bm::str_sparse_vector::effective_size
size_type effective_size() const
size of sparse vector (may be different for RSC)
Definition: bmstrsparsevec.h:941
bm::str_sparse_vector::back_insert_iterator::operator++
back_insert_iterator & operator++()
noop
Definition: bmstrsparsevec.h:310
bm::basic_bmatrix::get_row
bvector_type_const_ptr get_row(size_type i) const
Definition: bmbmatrix.h:537
bm::bvector::statistics
Statistical information about bitset's memory allocation details.
Definition: bm.h:121
bm::str_sparse_vector::common_prefix_length
unsigned common_prefix_length(size_type idx1, size_type idx2) const
Find size of common prefix between two vector elements in octets.
Definition: bmstrsparsevec.h:1361
bm::str_sparse_vector::erase
void erase(size_type idx)
erase the specified element
Definition: bmstrsparsevec.h:1149
bm::str_sparse_vector::bmatrix_type
bm::basic_bmatrix< BV > bmatrix_type
Definition: bmstrsparsevec.h:68
bm::str_sparse_vector::const_iterator::operator++
const_iterator & operator++(int)
Advance to the next available value.
Definition: bmstrsparsevec.h:211
bm::str_sparse_vector::back_insert_iterator::str_sparse_vector_type_ptr
str_sparse_vector_type * str_sparse_vector_type_ptr
Definition: bmstrsparsevec.h:270