16 std::vector<std::vector<BIT_SEQUENCE>> bits_seq;
17 std::vector<int64_t> char_rank_vec;
18 std::vector<uint8_t> alphabet;
24 this->alphabet.clear();
43 std::vector<bool> bits;
64 this->bits_seq[
h][i].swap(
dbs);
65 if (
h + 1 < rank_bit_size)
69 std::vector<uint8_t>
left;
85 std::vector<uint8_t>
right;
104 void build_from_text(
const std::vector<uint8_t> &
_text,
const std::vector<uint8_t> &
_alphabet){
119 this->char_rank_vec.
swap(
item.char_rank_vec);
121 std::swap(this->rank_bit_size,
item.rank_bit_size);
131 this->bits_seq[i][j].clear();
135 this->bits_seq.clear();
136 this->char_rank_vec.clear();
137 this->alphabet.clear();
141 void set_alphabet(
const std::vector<uint8_t> &
_alphabet)
147 this->alphabet.push_back(
c);
149 std::sort(this->alphabet.begin(),
this->alphabet.end());
151 this->char_rank_vec.resize(256, -1);
154 this->char_rank_vec[alphabet[i]] = i;
156 this->rank_bit_size = stool::LSBByte::get_code_length(alphabet.size() - 1);
158 for (
uint64_t i = 0; i < rank_bit_size; i++)
160 this->bits_seq.push_back(std::vector<BIT_SEQUENCE>());
177 return this->alphabet.
size();
186 this->bits_seq[i][j].
clear();
193 if (!this->has_empty_alphabet())
195 return this->bits_seq[0][0].
size();
204 return this->bits_seq.
size();
214 for (
uint64_t i = 0; i < this->rank_bit_size; i++)
216 bool b = stool::LSBByte::get_bit(
c_rank, this->rank_bit_size - 1 - i);
217 this->bits_seq[i][
pos].push_back(
b ? 1 : 0);
227 bool b1 = stool::LSBByte::get_bit(
c_rank, this->rank_bit_size - 1);
238 for (
uint64_t i = 1; i < this->rank_bit_size; i++)
240 bool bx = stool::LSBByte::get_bit(
c_rank, this->rank_bit_size - 1 - i);
242 if (
nth >= this->bits_seq[i][
next].size())
245 throw std::range_error(
"Error: DynamicSequence::rank_sub()");
281 while (
h < (
int64_t)this->rank_bit_size)
309 bool hbit = stool::LSBByte::get_bit(
c_rank, this->rank_bit_size -
h - 1);
313 j = (j * 2) + (
hbit ? 1 : 0);
318 throw std::range_error(
"Error: DynamicSequence::insert(nth, c_rank)");
323 bool has_empty_alphabet()
const
325 return this->rank_bit_size == 0;
329 if (!this->has_empty_alphabet())
335 if (
c_rank == -1 || i == 0)
341 return this->rank_sub(i - 1,
c_rank);
352 if (!this->has_empty_alphabet())
364 return this->select_sub(i + 1,
c_rank);
372 void push_many(
const std::vector<uint8_t> &
str)
381 assert(!this->has_empty_alphabet());
382 if (!this->has_empty_alphabet())
388 throw std::runtime_error(
"Error: DynamicSequence::push_back()");
392 this->push_back_sub(
c_rank);
397 throw std::runtime_error(
"Error: DynamicSequence::push_back()");
402 assert(!this->has_empty_alphabet());
403 if (!this->has_empty_alphabet())
413 j = (j * 2) + (
b ? 1 : 0);
419 throw std::range_error(
"Error: DynamicSequence::get_nth_char_rank(nth)");
424 throw std::range_error(
"Error: DynamicSequence::get_nth_char_rank(nth)");
429 assert(!this->has_empty_alphabet());
430 if (!this->has_empty_alphabet())
432 return this->alphabet[this->get_nth_char_rank(
nth)];
436 throw std::range_error(
"Error: DynamicSequence::at(nth)");
452 j = (j * 2) + (
b ? 1 : 0);
457 throw std::range_error(
"Error: DynamicSequence::remove(nth)");
463 assert(!this->has_empty_alphabet());
464 if (!this->has_empty_alphabet())
470 throw std::runtime_error(
"Error: DynamicSequence::insert(nth, c)");
479 throw std::runtime_error(
"Error: DynamicSequence::insert(nth, c)");
482 std::string to_string()
const
486 for (
uint64_t i = 0; i < this->size(); i++)
492 std::vector<uint8_t> to_uint8_str()
const
494 std::vector<uint8_t>
s;
496 for (
uint64_t i = 0; i < this->size(); i++)
504 return this->char_rank_vec[
c];
509 std::cout <<
"================DynamicSequence==================" << std::endl;
510 stool::DebugPrinter::print_characters(this->alphabet,
"Alphabet");
511 stool::DebugPrinter::print_integers(this->char_rank_vec,
"Char_rank_vec");
512 std::cout <<
"Rank_bit_size: " << this->rank_bit_size << std::endl;
514 for (
uint64_t i = 0; i < this->rank_bit_size; i++)
516 std::cout <<
"level: " << i <<
" / count: " << this->bits_seq[i].
size() << std::endl;
519 std::cout << this->bits_seq[i][j].to_string() << std::endl;
522 std::cout <<
"==================================" << std::endl;
530 uint64_t get_smallest_character_in_alphabet()
const
532 assert(this->alphabet.size() > 0);
533 return this->alphabet[0];
547 return this->bits_seq[this->bits_seq.size() - 1][idx].count_c(
b);
556 for (
auto &
it :
item.bits_seq)
576 for (
auto &
it :
r.bits_seq)
599 void print_statistics(
int message_paragraph = stool::Message::SHOW_MESSAGE)
const
601 std::cout << stool::Message::get_paragraph_string(
message_paragraph) <<
"Statistics(DynamicWaveletTree):" << std::endl;
602 std::cout << stool::Message::get_paragraph_string(
message_paragraph + 1) <<
"Alphabet size: " << this->alphabet.
size() << std::endl;
603 std::cout << stool::Message::get_paragraph_string(
message_paragraph + 1) <<
"Text length: " << this->size() << std::endl;
604 std::cout << stool::Message::get_paragraph_string(
message_paragraph + 1) <<
"Estimated memory usage: " << this->size_in_bytes() <<
" bytes" << std::endl;
605 std::cout << stool::Message::get_paragraph_string(
message_paragraph) <<
"[END]" << std::endl;
607 std::vector<std::string> get_memory_usage_info(
int message_paragraph = stool::Message::SHOW_MESSAGE)
const
609 std::vector<std::string>
r;
610 r.
push_back(stool::Message::get_paragraph_string(
message_paragraph) +
"=DynamicWaveletTree: " + std::to_string(this->size_in_bytes()) +
" bytes =");
636 std::string
s = this->to_string();
637 std::cout << stool::Message::get_paragraph_string(
message_paragraph) <<
"Content(DynamicWaveletTree):" << std::endl;
638 std::cout << stool::Message::get_paragraph_string(
message_paragraph+1) <<
"String: " <<
s <<
"(" <<
s.
size() <<
")" << std::endl;
639 std::cout << stool::Message::get_paragraph_string(
message_paragraph) <<
"[END]" << std::endl;
642 void print_memory_usage(
int message_paragraph = stool::Message::SHOW_MESSAGE)
const
645 for (std::string &
s :
log)
647 std::cout <<
s << std::endl;