16 std::vector<std::vector<BIT_SEQUENCE>> bits_seq;
17 std::vector<int64_t> char_rank_vec;
18 std::vector<uint8_t> alphabet;
19 uint64_t rank_bit_size = 0;
31 this->alphabet.clear();
70 uint8_t operator[](uint64_t i)
const
88 return this->bits_seq[0][0].size();
101 return this->alphabet.size();
109 return this->rank_bit_size == 0;
117 assert(this->alphabet.size() > 0);
118 return this->alphabet[0];
125 return this->char_rank_vec[c];
133 return this->bits_seq.size();
141 uint64_t total_size_in_bytes = 0;
142 for (uint64_t h = 0; h < this->bits_seq.size(); h++)
146 total_size_in_bytes += seq.size_in_bytes(only_dynamic_memory);
149 return total_size_in_bytes;
163 uint64_t
at(uint64_t i)
const
172 throw std::range_error(
"Error: DynamicSequence::at(i)");
183 assert(c < char_rank_vec.size());
184 int64_t c_rank = this->char_rank_vec[c];
185 assert(c_rank <= (int64_t)this->alphabet.size());
187 if (c_rank == -1 || i == 0)
193 return this->rank_sub(i - 1, c_rank);
205 int64_t
select(uint64_t i, uint64_t c)
const
209 assert(c < char_rank_vec.size());
210 int64_t c_rank = this->char_rank_vec[c];
211 assert(c_rank <= (int64_t)this->alphabet.size());
219 return this->select_sub(i + 1, c_rank);
237 if (i < this->
size())
239 uint64_t next_nth = i;
241 for (int64_t h = 0; h < (int64_t)this->
height(); h++)
243 bool b = this->bits_seq[h][j].at(next_nth);
244 next_nth = this->bits_seq[h][j].one_based_rank(next_nth, b);
245 j = (j * 2) + (b ? 1 : 0);
251 throw std::range_error(
"Error: DynamicSequence::get_nth_char_rank(nth)");
256 throw std::range_error(
"Error: DynamicSequence::get_nth_char_rank(nth)");
273 uint64_t idx = (c_rank / 2);
274 uint64_t bit_idx = 0;
275 bool b = stool::LSBByte::get_bit(c_rank, bit_idx);
276 return this->bits_seq[this->bits_seq.size() - 1][idx].count_c(b);
293 s.resize(this->
size());
294 for (uint64_t i = 0; i < this->
size(); i++)
306 std::vector<uint8_t> s;
307 s.resize(this->
size());
308 for (uint64_t i = 0; i < this->
size(); i++)
320 std::vector<uint8_t> r = this->alphabet;
336 this->bits_seq.swap(item.bits_seq);
338 this->char_rank_vec.swap(item.char_rank_vec);
339 this->alphabet.swap(item.alphabet);
340 std::swap(this->rank_bit_size, item.rank_bit_size);
350 for (
auto c : _alphabet)
352 this->alphabet.push_back(c);
354 std::sort(this->alphabet.begin(), this->alphabet.end());
356 this->char_rank_vec.resize(256, -1);
357 for (uint64_t i = 0; i < alphabet.size(); i++)
359 this->char_rank_vec[alphabet[i]] = i;
361 this->rank_bit_size = stool::LSBByte::get_code_length(alphabet.size() - 1);
363 for (uint64_t i = 0; i < rank_bit_size; i++)
365 this->bits_seq.push_back(std::vector<BIT_SEQUENCE>());
372 uint64_t x = 1ULL << i;
373 for (uint64_t j = 0; j < x; j++)
386 for (uint64_t i = 0; i < this->bits_seq.size(); i++)
388 for (uint64_t j = 0; j < this->bits_seq[i].size(); j++)
390 this->bits_seq[i][j].clear();
403 int64_t c_rank = this->char_rank_vec[c];
407 throw std::runtime_error(
"Error: DynamicSequence::push_back()");
411 this->push_back_sub(c_rank);
416 throw std::runtime_error(
"Error: DynamicSequence::push_back()");
440 int64_t c_rank = this->char_rank_vec[c];
444 throw std::runtime_error(
"Error: DynamicSequence::insert(i, c)");
448 this->insert_sub(i, c_rank);
453 throw std::runtime_error(
"Error: DynamicSequence::insert(i, c)");
462 if (i < this->
size())
464 uint64_t next_nth = i;
466 for (int64_t h = 0; h < (int64_t)this->
height(); h++)
468 uint64_t current_nth = next_nth;
469 bool b = this->bits_seq[h][j].at(current_nth);
470 next_nth = this->bits_seq[h][j].one_based_rank(current_nth, b);
471 this->bits_seq[h][j].remove(current_nth);
472 j = (j * 2) + (b ? 1 : 0);
477 throw std::range_error(
"Error: DynamicSequence::remove(nth)");
494 std::vector<std::string> r;
495 r.push_back(stool::Message::get_paragraph_string(message_paragraph) +
"=DynamicWaveletTree: " + std::to_string(this->
size_in_bytes()) +
" bytes =");
496 uint64_t counter = 0;
497 for (uint64_t h = 0; h < this->bits_seq.size(); h++)
503 std::vector<std::string> log1 = seq.get_memory_usage_info(message_paragraph + 1);
514 r.push_back(stool::Message::get_paragraph_string(message_paragraph) +
"Omitted details of " + std::to_string(counter - 5) +
" dynamic bit sequences.");
517 r.push_back(stool::Message::get_paragraph_string(message_paragraph) +
"==");
526 std::cout << stool::Message::get_paragraph_string(message_paragraph) <<
"Statistics(DynamicWaveletTree):" << std::endl;
527 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) <<
"Alphabet size: " << this->alphabet.size() << std::endl;
528 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) <<
"Text length: " << this->
size() << std::endl;
529 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) <<
"Estimated memory usage: " << this->
size_in_bytes() <<
" bytes" << std::endl;
530 std::cout << stool::Message::get_paragraph_string(message_paragraph) <<
"[END]" << std::endl;
539 for (std::string &s : log)
541 std::cout << s << std::endl;
548 void print_content(
int message_paragraph = stool::Message::SHOW_MESSAGE)
const
551 std::cout << stool::Message::get_paragraph_string(message_paragraph) <<
"Content(DynamicWaveletTree):" << std::endl;
552 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) <<
"String: " << s <<
"(" << s.size() <<
")" << std::endl;
553 std::cout << stool::Message::get_paragraph_string(message_paragraph) <<
"[END]" << std::endl;
561 std::cout <<
"================DynamicSequence==================" << std::endl;
562 stool::DebugPrinter::print_characters(this->alphabet,
"Alphabet");
563 stool::DebugPrinter::print_integers(this->char_rank_vec,
"Char_rank_vec");
564 std::cout <<
"Rank_bit_size: " << this->rank_bit_size << std::endl;
566 for (uint64_t i = 0; i < this->rank_bit_size; i++)
568 std::cout <<
"level: " << i <<
" / count: " << this->bits_seq[i].size() << std::endl;
569 for (uint64_t j = 0; j < this->bits_seq[i].size(); j++)
571 std::cout << this->bits_seq[i][j].to_string() << std::endl;
574 std::cout <<
"==================================" << std::endl;
588 dwt.build_bits(_text, 0, 0);
598 uint64_t _alphabet_size = 0;
599 ifs.read(
reinterpret_cast<char *
>(&_alphabet_size),
sizeof(uint64_t));
601 std::vector<uint8_t> _alphabet;
602 _alphabet.resize(_alphabet_size);
603 ifs.read(
reinterpret_cast<char *
>(_alphabet.data()), _alphabet_size *
sizeof(uint8_t));
606 for (
auto &it : r.bits_seq)
621 uint64_t alphabet_size = item.alphabet.size();
622 os.write(
reinterpret_cast<const char *
>(&alphabet_size),
sizeof(uint64_t));
623 os.write(
reinterpret_cast<const char *
>(item.alphabet.data()), alphabet_size *
sizeof(uint8_t));
624 for (
auto &it : item.bits_seq)
635 void build_bits(
const std::vector<uint8_t> &_text, uint64_t h, uint64_t i)
637 uint64_t bit_idx = this->rank_bit_size - h - 1;
638 std::vector<bool> bits;
639 bits.resize(_text.size(),
false);
640 uint64_t counter = 0;
641 uint64_t bits1_count = 0;
642 uint64_t bits0_count = 0;
644 for (uint8_t c : _text)
646 int64_t c_rank = this->char_rank_vec[c];
647 bool b = stool::LSBByte::get_bit(c_rank, bit_idx);
659 this->bits_seq[h][i].swap(dbs);
660 if (h + 1 < rank_bit_size)
662 uint64_t next_i = i * 2;
664 std::vector<uint8_t> left;
665 left.resize(bits0_count);
668 for (uint8_t c : _text)
670 int64_t c_rank = this->char_rank_vec[c];
671 bool b = stool::LSBByte::get_bit(c_rank, bit_idx);
677 this->build_bits(left, h + 1, next_i);
680 std::vector<uint8_t> right;
681 right.resize(bits1_count);
684 for (uint8_t c : _text)
686 int64_t c_rank = this->char_rank_vec[c];
687 bool b = stool::LSBByte::get_bit(c_rank, bit_idx);
690 right[counter++] = c;
693 this->build_bits(right, h + 1, next_i + 1);
701 for (uint64_t i = 0; i < this->bits_seq.size(); i++)
703 for (uint64_t j = 0; j < this->bits_seq[i].size(); j++)
705 this->bits_seq[i][j].clear();
709 this->bits_seq.clear();
710 this->char_rank_vec.clear();
711 this->alphabet.clear();
715 void push_back_sub(uint64_t c_rank)
718 uint64_t _size = this->
size();
721 for (uint64_t i = 0; i < this->rank_bit_size; i++)
723 bool b = stool::LSBByte::get_bit(c_rank, this->rank_bit_size - 1 - i);
724 this->bits_seq[i][pos].push_back(b ? 1 : 0);
725 pos = (pos * 2) + (b ? 1 : 0);
729 assert(this->
size() == _size + 1);
732 int64_t rank_sub(uint64_t i, uint64_t c_rank)
const
734 bool b1 = stool::LSBByte::get_bit(c_rank, this->rank_bit_size - 1);
735 uint64_t _rank = this->bits_seq[0][0].one_based_rank(i + 1, b1);
742 uint64_t nth = _rank - 1;
743 uint64_t next = b1 ? 1 : 0;
745 for (uint64_t i = 1; i < this->rank_bit_size; i++)
747 bool bx = stool::LSBByte::get_bit(c_rank, this->rank_bit_size - 1 - i);
749 if (nth >= this->bits_seq[i][next].
size())
752 throw std::range_error(
"Error: DynamicSequence::rank_sub()");
755 uint64_t _rankx = this->bits_seq[i][next].one_based_rank(nth + 1, bx);
763 next = (next * 2) + (bx ? 1 : 0);
769 int64_t select_sub(uint64_t nth, uint64_t c_rank)
const
771 int64_t depth = this->bits_seq.size();
774 int64_t j = c_rank / 2;
775 bool hbit = stool::LSBByte::get_bit(c_rank, h);
776 assert(j < (int64_t)this->bits_seq[depth - h - 1].
size());
777 uint64_t hbit_rank = this->bits_seq[depth - h - 1][j].count_c(hbit);
779 if (nth <= hbit_rank)
781 int64_t hbit_select = this->bits_seq[depth - h - 1][j].select(nth - 1, hbit);
783 assert(hbit_select >= 0);
785 int64_t next_nth = hbit_select + 1;
788 while (h < (int64_t)this->rank_bit_size)
790 hbit = stool::LSBByte::get_bit(c_rank, h);
791 assert(next_nth <= this->bits_seq[depth - h - 1][j].
count_c(hbit));
792 hbit_select = this->bits_seq[depth - h - 1][j].select(next_nth - 1, hbit);
794 assert(hbit_select >= 0);
795 next_nth = hbit_select + 1;
799 assert(next_nth > 0);
807 void insert_sub(uint64_t nth, uint8_t c_rank)
809 if (nth <= this->
size())
812 uint64_t next_nth = nth;
814 for (int64_t h = 0; h < (int64_t)this->
height(); h++)
816 bool hbit = stool::LSBByte::get_bit(c_rank, this->rank_bit_size - h - 1);
817 assert(next_nth <= this->bits_seq[h][j].
size());
818 this->bits_seq[h][j].insert(next_nth, hbit);
819 next_nth = this->bits_seq[h][j].one_based_rank(next_nth, hbit);
820 j = (j * 2) + (hbit ? 1 : 0);
825 throw std::range_error(
"Error: DynamicSequence::insert(nth, c_rank)");
828 void build_from_text(
const std::vector<uint8_t> &_text,
const std::vector<uint8_t> &_alphabet)
A dynamic data structure supporting rank and select queries on a bit sequence.