b-tree-plus-alpha
Loading...
Searching...
No Matches
bp_tree.hpp
1#pragma once
2#include "./bp_internal_node_functions.hpp"
3#include "./bp_postorder_iterator.hpp"
4#include "./bp_value_forward_iterator.hpp"
5#include "./bp_leaf_forward_iterator.hpp"
6
7namespace stool
8{
9 namespace bptree
10 {
11 inline constexpr int DEFAULT_MAX_DEGREE_OF_INTERNAL_NODE = 126;
12 inline constexpr int DEFAULT_MAX_COUNT_OF_VALUES_IN_LEAF = 126;
13
14
15 inline static uint64_t time_count2 = 0;
16
22 template <typename LEAF_CONTAINER, typename VALUE, bool USE_PARENT_FIELD, bool USE_PSUM, uint64_t MAX_DEGREE, uint64_t LEAF_CONTAINER_MAX_SIZE>
23 class BPTree
24 {
25 public:
31
33
34
35 private:
36 std::vector<LEAF_CONTAINER> leaf_container_vec;
37 std::vector<Node *> parent_vec;
38 std::stack<uint64_t> unused_leaf_container_indexes;
39 std::vector<Node *> unused_node_pointers;
40 std::vector<NodePointer> tmp_path;
41
42 /*
43 uint64_t _max_degree_of_internal_node = 8;
44 uint64_t _max_count_of_values_in_leaf = 8;
45 */
46 Node *root = nullptr;
47 BPTree *linked_tree_ = nullptr;
48 double density_threshold = 0.5;
49 uint64_t density1 = 0;
50 uint16_t height_ = 0;
51 bool root_is_leaf_ = false;
52
53 uint64_t split_process_counter = 0;
54 uint64_t insert_operation_counter = 0;
55 uint64_t merge_process_counter = 0;
56 uint64_t remove_operation_counter = 0;
57
58
59 public:
60 ~BPTree()
61 {
62 this->clear();
63 }
64 BPTree()
65 {
66 }
67 BPTree(const BPTree &) = delete;
68 BPTree(BPTree &&other) noexcept
69 {
70 this->leaf_container_vec = std::move(other.leaf_container_vec);
71 this->parent_vec = std::move(other.parent_vec);
72 this->unused_leaf_container_indexes = std::move(other.unused_leaf_container_indexes);
73 this->unused_node_pointers = std::move(other.unused_node_pointers);
74 this->tmp_path = std::move(other.tmp_path);
75
76 //this->_max_degree_of_internal_node = other._max_degree_of_internal_node;
77 //this->_max_count_of_values_in_leaf = other._max_count_of_values_in_leaf;
78 this->root = other.root;
79 this->linked_tree_ = other.linked_tree_;
80 this->density_threshold = other.density_threshold;
81 this->density1 = other.density1;
82 this->height_ = other.height_;
83 this->root_is_leaf_ = other.root_is_leaf_;
84
85 other.leaf_container_vec.clear();
86 other.parent_vec.clear();
87 other.unused_node_pointers.clear();
88 other.tmp_path.clear();
89 while (other.unused_leaf_container_indexes.size() > 0)
90 {
91 other.unused_leaf_container_indexes.pop();
92 }
93
94 other.root = nullptr;
95 other.linked_tree_ = nullptr;
96 other.height_ = 0;
97 other.root_is_leaf_ = false;
98 }
99
100 BPTree &operator=(BPTree &&other) noexcept
101 {
102 if (this != &other)
103 {
104 this->clear();
105 this->leaf_container_vec = std::move(other.leaf_container_vec);
106 this->parent_vec = std::move(other.parent_vec);
107 this->unused_leaf_container_indexes = std::move(other.unused_leaf_container_indexes);
108 this->unused_node_pointers = std::move(other.unused_node_pointers);
109 this->tmp_path = std::move(other.tmp_path);
110
111 //this->_max_degree_of_internal_node = other._max_degree_of_internal_node;
112 //this->_max_count_of_values_in_leaf = other._max_count_of_values_in_leaf;
113 this->root = other.root;
114 this->linked_tree_ = other.linked_tree_;
115 this->density_threshold = other.density_threshold;
116 this->density1 = other.density1;
117 this->height_ = other.height_;
118 this->root_is_leaf_ = other.root_is_leaf_;
119
120 other.leaf_container_vec.clear();
121 other.parent_vec.clear();
122 other.unused_node_pointers.clear();
123 other.tmp_path.clear();
124 while (other.unused_leaf_container_indexes.size() > 0)
125 {
126 other.unused_leaf_container_indexes.pop();
127 }
128
129 other.root = nullptr;
130 other.linked_tree_ = nullptr;
131 other.height_ = 0;
132 other.root_is_leaf_ = false;
133
134 // other->clear();
135 }
136 return *this;
137 }
141
142
143
151 {
152 uint64_t __max_degree_of_internal_node = MAX_DEGREE;
153 uint64_t __max_count_of_values_in_leaf = LEAF_CONTAINER_MAX_SIZE;
154 if (__max_degree_of_internal_node < 4)
155 {
156 throw std::runtime_error("Error: BPTree::initialize(__max_degree_of_internal_node, __max_count_of_values_in_leaf). The __max_degree_of_internal_node must be larger than 3.");
157 }
158 if (__max_count_of_values_in_leaf < 4)
159 {
160 throw std::runtime_error("Error: BPTree::initialize(__max_degree_of_internal_node, __max_count_of_values_in_leaf). The __max_count_of_values_in_leaf must be larger than 3.");
161 }
162
163 this->clear();
164 //this->_max_degree_of_internal_node = __max_degree_of_internal_node;
165 //this->_max_count_of_values_in_leaf = __max_count_of_values_in_leaf;
166 this->root = nullptr;
167 this->root_is_leaf_ = false;
168 }
169
178 void swap(BPTree &_tree, bool swap_linked_tree)
179 {
180 this->leaf_container_vec.swap(_tree.leaf_container_vec);
181 this->parent_vec.swap(_tree.parent_vec);
182 this->unused_node_pointers.swap(_tree.unused_node_pointers);
183 this->unused_leaf_container_indexes.swap(_tree.unused_leaf_container_indexes);
184 this->tmp_path.swap(_tree.tmp_path);
185 //std::swap(this->_max_degree_of_internal_node, _tree._max_degree_of_internal_node);
186 //std::swap(this->_max_count_of_values_in_leaf, _tree._max_count_of_values_in_leaf);
187 std::swap(this->root, _tree.root);
188 if (swap_linked_tree)
189 {
190 std::swap(this->linked_tree_, _tree.linked_tree_);
191 }
192 std::swap(this->density_threshold, _tree.density_threshold);
193 std::swap(this->density1, _tree.density1);
194 std::swap(this->height_, _tree.height_);
195 std::swap(this->root_is_leaf_, _tree.root_is_leaf_);
196 }
197
203 void swap(BPTree &_tree)
204 {
205 this->swap(_tree, true);
206 }
207
219 void clear()
220 {
221 std::stack<Node *> nodes;
222 if (!this->empty() && !this->root_is_leaf_)
223 {
224 nodes.push(this->root);
225 }
226 while (nodes.size() > 0)
227 {
228 Node *top = nodes.top();
229 nodes.pop();
230 if (!top->is_parent_of_leaves())
231 {
232 const stool::SimpleDeque16<Node *> &children = top->get_children();
233 for (Node *child : children)
234 {
235 nodes.push(child);
236 }
237 }
238 delete top;
239 }
240 this->root = nullptr;
241 this->root_is_leaf_ = false;
242 this->height_ = 0;
243
244 while (unused_node_pointers.size() > 0)
245 {
246 Node *top = unused_node_pointers[unused_node_pointers.size() - 1];
247 delete top;
248 unused_node_pointers.pop_back();
249 }
250
251 this->leaf_container_vec.resize(0);
252 this->parent_vec.resize(0);
253
254 while (this->unused_leaf_container_indexes.size() > 0)
255 {
256 this->unused_leaf_container_indexes.pop();
257 }
258 }
259 //}@
260
265
266
270 const std::vector<NodePointer> &get_temporary_path() const
271 {
272 return this->tmp_path;
273 }
274
279 {
280 return MAX_DEGREE;
281 }
282
283 uint64_t get_split_process_counter() const
284 {
285 return this->split_process_counter;
286 }
287
288 uint64_t capacity() const{
289 return (this->leaf_container_vec.size() - this->unused_leaf_container_indexes.size()) * this->get_max_count_of_values_in_leaf();
290 }
291
292 double get_value_density() const{
293 return (double) this->size() / (double) this->capacity();
294 }
295
301 {
302 return LEAF_CONTAINER_MAX_SIZE;
303 }
304
308 uint64_t height() const
309 {
310 return this->height_;
311 }
312
316 uint64_t size() const
317 {
318 if (this->empty())
319 {
320 return 0;
321 }
322 else
323 {
324 if (this->root_is_leaf_)
325 {
326 return this->leaf_container_vec[(uint64_t)this->root].size();
327 }
328 else
329 {
330 return this->root->get_value_count();
331 }
332 }
333 }
337 uint64_t get_leaf_count() const
338 {
339 return this->leaf_container_vec.size() - this->unused_leaf_container_indexes.size();
340 }
341
346 {
347 return this->leaf_container_vec.size();
348 }
349 //}@
350
355
356
367 {
368 if (this->empty())
369 {
370 return PostorderIterator(nullptr);
371 }
372 else
373 {
374 if (this->root_is_leaf_)
375 {
376 return PostorderIterator((uint64_t)this->root);
377 }
378 else
379 {
380 return PostorderIterator(this->root);
381 }
382 }
383 }
384
392 {
393 return PostorderIterator(nullptr);
394 }
395
405 {
406 if (this->empty())
407 {
408 return ValueForwardIterator(nullptr, nullptr);
409 }
410 else
411 {
413 return ValueForwardIterator(&node_it, &this->leaf_container_vec);
414 }
415 }
416
424 {
425 return ValueForwardIterator(nullptr, nullptr);
426 }
427
437 {
438 if (this->empty())
439 {
440 return LeafForwardIterator(nullptr);
441 }
442 else
443 {
444 return LeafForwardIterator(this->root);
445 }
446 }
447
458
463 bool empty() const
464 {
465 return this->height() == 0;
466 }
467
472 uint64_t psum() const
473 {
474 if (!this->empty())
475 {
476 if (this->root_is_leaf_)
477 {
478 return this->leaf_container_vec[(uint64_t)this->root].psum();
479 }
480 else
481 {
482 return BPFunctions::psum(*this->root);
483 }
484 }
485 else
486 {
487 return 0;
488 }
489 }
490
496 uint64_t psum(uint64_t i) const
497 {
498 if (i >= this->size())
499 {
500 throw std::invalid_argument("Error: BPTree::psum(i). The i must be less than the size of the tree.");
501 }
502 assert(!this->empty());
503 if (!this->empty())
504 {
505 if (this->root_is_leaf_)
506 {
507 return this->leaf_container_vec[(uint64_t)this->root].psum(i);
508 }
509 else
510 {
511 return BPFunctions::psum(*this->root, i, this->leaf_container_vec);
512 }
513 }
514 else
515 {
516 throw std::runtime_error("Error:psum(i)");
517 }
518 }
519
525 int64_t select0(uint64_t i) const
526 {
527 if (!this->empty())
528 {
529 if (this->root_is_leaf_)
530 {
531 return this->leaf_container_vec[(uint64_t)this->root].select0(i);
532 }
533 else
534 {
535 return BPFunctions::select0(*this->root, i, this->leaf_container_vec);
536 }
537 }
538 else
539 {
540 return -1;
541 }
542 }
543
549 int64_t search(uint64_t sum) const
550 {
551 if (!this->empty())
552 {
553 if (this->root_is_leaf_)
554 {
555 return this->leaf_container_vec[(uint64_t)this->root].search(sum);
556 }
557 else
558 {
559 return BPFunctions::search(*this->root, sum, this->leaf_container_vec);
560 }
561 }
562 else
563 {
564 return -1;
565 }
566 }
567 inline static uint64_t time_count = 0;
568
574 VALUE at(uint64_t pos) const
575 {
576 if (!this->empty())
577 {
578 assert(pos < this->size());
579 std::vector<NodePointer> path;
580
581 uint64_t idx = this->compute_path_from_root_to_leaf(pos);
582
583 assert(this->tmp_path.size() > 0);
584
585 uint64_t leaf = this->tmp_path[this->tmp_path.size() - 1].get_leaf_container_index();
586
587 #ifdef TIME_DEBUG
588 auto st3 = std::chrono::system_clock::now();
589 #endif
590 uint64_t result = this->leaf_container_vec[leaf].at(idx);
591
592 #ifdef TIME_DEBUG
593 auto st4 = std::chrono::system_clock::now();
594 time_count2 += std::chrono::duration_cast<std::chrono::nanoseconds>(st4 - st3).count();
595 #endif
596
597 return result;
598 }
599 else
600 {
601 throw std::invalid_argument("Error: BPTree::at()");
602 }
603 }
604
610 uint64_t get_value_index(uint64_t leaf_index, uint64_t position_in_leaf_container) const
611 {
612 if (USE_PARENT_FIELD)
613 {
614 assert(leaf_index < this->parent_vec.size());
615 Node *parent = this->parent_vec[leaf_index];
616 uint64_t dist = position_in_leaf_container;
617
618 if (parent == nullptr)
619 {
620 return dist;
621 }
622 else
623 {
624
625 Node *node = (Node *)leaf_index;
626 while (parent != nullptr)
627 {
628
629 int64_t idx = parent->get_index(node);
630 assert(idx != -1);
631
632 const auto &deq = parent->get_value_count_deque();
633 if(idx > 0){
634 uint64_t sum = deq.psum(idx-1);
635 dist += sum;
636 }
637 //uint64_t sum = std::reduce(std::begin(deq), std::next(deq.begin(), idx));
638
639 node = parent;
640 parent = node->get_parent();
641 }
642 return dist;
643 }
644 }
645 else
646 {
647 throw std::runtime_error("BPTree::get_value_index(): this function is not supported.");
648 }
649 }
650
656 const LEAF_CONTAINER &get_leaf_container(uint64_t leaf_index) const
657 {
658 return this->leaf_container_vec[leaf_index];
659 }
660
666 LEAF_CONTAINER &get_leaf_container(uint64_t leaf_index)
667 {
668 assert(leaf_index < this->get_leaf_container_vector_size());
669
670 return this->leaf_container_vec[leaf_index];
671 }
672
674
679
680
685 std::vector<VALUE> to_value_vector() const
686 {
687
688 if (!this->empty())
689 {
690 if (this->root_is_leaf_)
691 {
692 std::vector<VALUE> r;
693 this->leaf_container_vec[(uint64_t)this->root].to_values(r);
694 return r;
695 }
696 else
697 {
698
699 std::vector<VALUE> r;
700 BPFunctions::to_value_vector(*this->root, r, this->leaf_container_vec);
701 return r;
702 }
703 }
704 else
705 {
706 std::vector<VALUE> r;
707 return r;
708 }
709 }
710
712
717
718 public:
719
720 bool verify_sub() const {
721 bool b = true;
722 uint64_t p = 0;
723 for (PostorderIterator it = this->get_postorder_iterator_begin(); it != this->get_postorder_iterator_end(); ++it)
724 {
725 NodePointer pt = *it;
726 if (pt.is_leaf())
727 {
728 assert(pt.get_leaf_container_index() < this->leaf_container_vec.size());
729 uint64_t size = this->leaf_container_vec[pt.get_leaf_container_index()].size();
730 if (this->root_is_leaf_ && pt.get_leaf_container_index() == (uint64_t)this->root)
731 {
732 b = b && (size > 0);
733 assert(b);
734 }
735 else
736 {
737
738 b = b && (size > 0) && ((this->get_max_count_of_values_in_leaf() / 2) <= size);
739 assert(b);
740 }
741 p += size;
742 }
743 else
744 {
745 Node *_node = pt.get_node();
746 b = b && BPFunctions::verify(*_node, this->leaf_container_vec, MAX_DEGREE, this->root == pt.get_node());
747 assert(b);
748 }
749 // std::cout << it.idx << "//" << it._st.size() << "/" << std::flush;
750 }
751 if (p != this->size())
752 {
753 throw std::logic_error("Error(BPTree::verify)");
754 }
755 return b;
756 }
757 void verify_sum_deque(Node *node) const {
758 if(node->is_parent_of_leaves()){
759 uint64_t true_sum = 0;
760 auto &children = node->get_children();
761 for(uint64_t i = 0; i < children.size(); i++){
762 uint64_t id = (uint64_t)children[i];
763 true_sum += this->leaf_container_vec[id].psum();
764 }
765 if(true_sum != node->get_value_sum()){
766 throw std::runtime_error("Error: verify_sum_deque");
767 }
768 }else{
769 uint64_t true_sum = 0;
770 auto &children = node->get_children();
771 for(uint64_t i = 0; i < children.size(); i++){
772 Node *child = children[i];
773 this->verify_sum_deque(child);
774 true_sum += child->get_value_sum();
775 }
776
777 if(true_sum != node->get_value_sum()){
778 throw std::runtime_error("Error: verify_sum_deque");
779 }
780
781 }
782
783 }
788 bool verify() const
789 {
790 bool b1 = this->verify_sub();
791 if(USE_PSUM){
792 if(!this->root_is_leaf_ && this->size() > 0){
793 this->verify_sum_deque(this->root);
794 }
795
796 }
797 return b1;
798 }
799
806 void print_tree() const
807 {
808 std::vector<std::string> tree;
809 if (!this->empty())
810 {
811 if (this->root_is_leaf_)
812 {
813 tree.push_back(this->leaf_container_vec[(uint64_t)this->root].to_string());
814 }
815 else
816 {
817 BPFunctions::get_tree_string(*this->root, tree, this->leaf_container_vec);
818 }
819 }
820 else
821 {
822 tree.push_back("[]");
823 }
824
825 for (int64_t i = tree.size() - 1; i >= 0; i--)
826 {
827 std::cout << tree[i] << std::endl;
828 }
829
830 /*
831 if constexpr (USE_PSUM) {
832 if(!this->root_is_leaf_){
833 this->root->print_info();
834 }
835 }
836 */
837 }
838
845 void print_info(int message_paragraph = stool::Message::SHOW_MESSAGE) const
846 {
847 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "========= BPTREE =========" << std::endl;
848 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "Height: " << this->height_ << std::endl;
849
850 // uint64_t id = 0;
852 {
853 NodePointer pt = *it;
854
855 if (pt.is_leaf())
856 {
857 std::cout << stool::Message::get_paragraph_string(message_paragraph+1) << "Leaf: id = " << pt.get_leaf_container_index() << ", content = " << this->leaf_container_vec[pt.get_leaf_container_index()].to_string() << std::endl;
858
859 }
860 else
861 {
862 pt.get_node()->print_info(message_paragraph+1);
863 }
864 // id++;
865 }
866
867 if (USE_PARENT_FIELD)
868 {
869 std::vector<uint64_t> r;
870 for (Node *x : this->parent_vec)
871 {
872 r.push_back((uint64_t)x);
873 }
874 stool::Printer::print(r);
875 }
876 std::cout << "==========================" << std::endl;
877 }
878
884 void print_leaves() const
885 {
886 // uint64_t id = 0;
888 {
889 NodePointer pt = *it;
890 if (pt.is_leaf())
891 {
892 std::cout << "leaf_index: " << pt.get_leaf_container_index() << ", " << this->leaf_container_vec[pt.get_leaf_container_index()].to_string() << std::endl;
893 }
894 // id++;
895 }
896 }
897 void print_internal_nodes() const{
898 std::cout << "INTERNAL NODES: " << std::endl;
899 for (PostorderIterator it = this->get_postorder_iterator_begin(); it != this->get_postorder_iterator_end(); ++it)
900 {
901 NodePointer pt = *it;
902 if (!pt.is_leaf())
903 {
904 Node *node = (Node *)pt.get_node();
905 std::cout << " " << node->to_string() << std::endl;
906 }
907 // id++;
908 }
909
910 }
911
918 {
919 std::cout << "============ LEAVES ============" << std::endl;
920 for (uint64_t i = 0; i < this->leaf_container_vec.size(); i++)
921 {
922 std::cout << "leaf_container_vec[" << i << "] = " << this->leaf_container_vec[i].to_string() << std::endl;
923 }
924 std::cout << "============ LEAVES[END] ============" << std::endl;
925 }
926
936
942 {
943 uint64_t counter = 0;
944
946 {
947 NodePointer np = *it;
948 if (!np.is_leaf())
949 {
950 counter++;
951 }
952 }
953 return counter;
954 }
955
961 {
962 uint64_t counter = 0;
963
965 {
966 NodePointer np = *it;
967 if (np.is_leaf())
968 {
969 counter++;
970 }
971 }
972 return counter;
973 }
974
980 {
981 uint64_t sum1 = 0;
982
984 {
985 NodePointer pt = *it;
986 if (!pt.is_leaf())
987 {
988 sum1 += pt.get_node()->size_in_bytes();
989 sum1 += sizeof(Node *);
990 }
991 }
992 return sum1;
993 }
994
1000 {
1001 return sizeof(std::stack<uint64_t>) + (this->unused_leaf_container_indexes.size() * sizeof(uint64_t));
1002 }
1003
1009 {
1010 return sizeof(std::vector<uint64_t>) + (this->leaf_container_vec.capacity() * sizeof(LEAF_CONTAINER));
1011 }
1012
1018 {
1019 uint64_t sum2 = 0;
1020 for (const LEAF_CONTAINER &leaf : this->leaf_container_vec)
1021 {
1022 sum2 += leaf.size_in_bytes(true);
1023 }
1024 return sum2;
1025 }
1026
1027 uint64_t get_leaf_container_unused_memory() const
1028 {
1029 uint64_t sum2 = 0;
1030 for (const LEAF_CONTAINER &leaf : this->leaf_container_vec)
1031 {
1032 sum2 += leaf.unused_size_in_bytes();
1033 }
1034 return sum2;
1035 }
1036
1037
1043 {
1044 uint64_t sum6 = 0;
1045 for (Node *node : this->unused_node_pointers)
1046 {
1047 sum6 += node->size_in_bytes();
1048 }
1049
1050 sum6 += (sizeof(Node *) * this->unused_node_pointers.capacity()) + sizeof(std::vector<Node *>);
1051 return sum6;
1052 }
1053
1059 {
1060 return (sizeof(Node *) * this->parent_vec.capacity()) + sizeof(std::vector<Node *>);
1061 }
1062
1068 std::vector<std::string> get_memory_usage_info(int message_paragraph = stool::Message::SHOW_MESSAGE) const
1069 {
1070 std::vector<std::string> log;
1071 uint64_t size_in_bytes = this->size_in_bytes();
1072 uint64_t size = this->size();
1073 double bytes_per_value = size > 0 ? ((double)size_in_bytes / (double)size) : 0;
1074
1075 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + "=BP Tree: " + std::to_string(size_in_bytes) + " bytes, " + std::to_string(size) + " values, " + std::to_string(bytes_per_value) + " bytes per value =");
1076 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Max Degree of Internal Node: " + std::to_string(MAX_DEGREE));
1077 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Max Count of Values in Leaf: " + std::to_string(LEAF_CONTAINER_MAX_SIZE));
1078 //log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " The number of values: " + std::to_string(this->size()));
1079
1080 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Internal Nodes: " + std::to_string(this->get_internal_node_memory()) + " bytes");
1081 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Ununsed Leaf Pointers: " + std::to_string(this->get_unused_leaf_container_vector_memory()) + " bytes");
1082 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Ununsed Node Pointers: " + std::to_string(this->get_unused_node_pointers_memory()) + " bytes");
1083 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Leaf Vector: " + std::to_string(this->get_leaf_container_vector_memory()) + " bytes (vector capacity: " + std::to_string(this->leaf_container_vec.capacity()) + ")");
1084
1085 uint64_t value_count = this->size();
1086 uint64_t leaf_total_memory = this->get_leaf_container_memory();
1087 uint64_t leaf_total_unused_memory = this->get_leaf_container_unused_memory();
1088
1089 double byte_per_value = value_count > 0 ? (double)leaf_total_memory / (double)value_count : 0;
1090 double unused_byte_per_value = value_count > 0 ? (double)leaf_total_unused_memory / (double)value_count : 0;
1091
1092 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Leaves : " + std::to_string(leaf_total_memory) + " bytes (" + std::to_string(byte_per_value) + " bytes per value, leaf count: " + std::to_string(this->leaf_container_vec.size()) + ")");
1093 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Unused memory for leaves : " + std::to_string(leaf_total_unused_memory) + " bytes (" + std::to_string(unused_byte_per_value) + " bytes per value)");
1094
1095 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + " Parent Vector : " + std::to_string(this->get_parent_vector_memory()) + " bytes");
1096 log.push_back(stool::Message::get_paragraph_string(message_paragraph) + "==");
1097 return log;
1098 }
1099
1106 void print_memory_usage(int message_paragraph = stool::Message::SHOW_MESSAGE) const
1107 {
1108 std::vector<std::string> log = this->get_memory_usage_info(message_paragraph);
1109 for (std::string &s : log)
1110 {
1111 std::cout << s << std::endl;
1112 }
1113 }
1115
1120
1121
1122 public:
1129 void get_path_from_root_to_first_leaf(std::vector<NodePointer> &output_path) const
1130 {
1131 if (!this->empty())
1132 {
1133 Node *current_node = this->root;
1134 bool is_leaf = this->root_is_leaf_;
1135
1136 if (!is_leaf)
1137 {
1138 output_path.push_back(NodePointer::build_internal_node_pointer(this->root, -1));
1139 }
1140 else
1141 {
1142 output_path.push_back(NodePointer::build_leaf_pointer((uint64_t)this->root, -1));
1143 }
1144
1145 while (!is_leaf)
1146 {
1147 is_leaf = current_node->is_parent_of_leaves();
1148 current_node = current_node->get_child(0);
1149 if (!is_leaf)
1150 {
1151 output_path.push_back(NodePointer::build_internal_node_pointer(current_node, 0));
1152 }
1153 else
1154 {
1155 output_path.push_back(NodePointer::build_leaf_pointer((uint64_t)current_node, 0));
1156 }
1157 }
1158 }
1159 }
1160
1167 void get_path_from_root_to_last_leaf(std::vector<NodePointer> &output_path) const
1168 {
1169 if (!this->empty())
1170 {
1171 Node *current_node = this->root;
1172 bool is_leaf = this->root_is_leaf_;
1173
1174 if (!is_leaf)
1175 {
1176 output_path.push_back(NodePointer::build_internal_node_pointer(this->root, -1));
1177 }
1178 else
1179 {
1180 output_path.push_back(NodePointer::build_leaf_pointer((uint64_t)this->root, -1));
1181 }
1182
1183 while (!is_leaf)
1184 {
1185 is_leaf = current_node->is_parent_of_leaves();
1186 uint64_t idx = current_node->get_degree() - 1;
1187 current_node = current_node->get_child(idx);
1188 if (!is_leaf)
1189 {
1190 output_path.push_back(NodePointer::build_internal_node_pointer(current_node, idx));
1191 }
1192 else
1193 {
1194 output_path.push_back(NodePointer::build_leaf_pointer((uint64_t)current_node, idx));
1195 }
1196 }
1197 }
1198 }
1199
1207 int64_t compute_path_from_root_to_leaf(uint64_t i) const
1208 {
1209 std::vector<NodePointer> &path = const_cast<std::vector<NodePointer> &>(this->tmp_path);
1210 return this->compute_path_from_root_to_leaf(i, path);
1211 }
1212
1213
1225 int64_t compute_path_from_root_to_leaf(uint64_t i, std::vector<NodePointer> &output_path) const
1226 {
1227 if (output_path.size() != this->height_)
1228 {
1229 output_path.resize(this->height_);
1230 }
1231 uint64_t y = 0;
1232
1233 if (!this->empty())
1234 {
1235
1236 Node *current_node = this->root;
1237 uint64_t current_i = i;
1238 bool is_leaf = this->root_is_leaf_;
1239
1240 //auto st1 = std::chrono::system_clock::now();
1241
1242 output_path[y++] = is_leaf ? NodePointer::build_leaf_pointer((uint64_t)this->root, -1) : NodePointer::build_internal_node_pointer(this->root, -1);
1243
1244
1245 while (!is_leaf)
1246 {
1247
1248 assert(current_i <= current_node->get_value_count());
1249
1250 //auto st1 = std::chrono::system_clock::now();
1251 std::pair<int64_t, uint64_t> result = BPFunctions::access_child_index_by_value_index(*current_node, current_i);
1252 //auto st2 = std::chrono::system_clock::now();
1253 //time_count += std::chrono::duration_cast<std::chrono::nanoseconds>(st2 - st1).count();
1254
1255
1256 if (result.first != -1)
1257 {
1258 is_leaf = current_node->is_parent_of_leaves();
1259 current_node = current_node->get_child(result.first);
1260 current_i = result.second;
1261 output_path[y++] = is_leaf ? NodePointer::build_leaf_pointer((uint64_t)current_node, result.first) : NodePointer::build_internal_node_pointer(current_node, result.first);
1262 }
1263 else
1264 {
1265 throw std::runtime_error("Error: get_path_from_root_to_leaf2(1)");
1266 }
1267
1268 }
1269 //auto st2 = std::chrono::system_clock::now();
1270 //time_count2 += std::chrono::duration_cast<std::chrono::nanoseconds>(st2 - st1).count();
1271
1272 assert(output_path.size() == y);
1273 return current_i;
1274 }
1275 else
1276 {
1277 return -1;
1278 }
1279 }
1280
1282
1287
1288 private:
1295 void defragmentation()
1296 {
1297 std::vector<uint64_t> tmp;
1298 while (this->unused_leaf_container_indexes.size() > 0)
1299 {
1300 tmp.push_back(this->unused_leaf_container_indexes.top());
1301 this->unused_leaf_container_indexes.pop();
1302 }
1303
1304 std::sort(tmp.begin(), tmp.end(), [](const uint64_t &lhs, const uint64_t &rhs)
1305 { return lhs > rhs; });
1306
1307 std::vector<std::pair<Node *, uint64_t>> tmp_nodes;
1308 {
1309 std::unordered_set<uint64_t> checker2;
1310 {
1311 std::unordered_set<uint64_t> checker1;
1312 for (uint64_t p : tmp)
1313 {
1314 checker1.insert(p);
1315 }
1316
1317 for (int64_t i = this->leaf_container_vec.size() - 1; i >= 0; --i)
1318 {
1319 auto xs = checker1.find(i);
1320 if (xs == checker1.end())
1321 {
1322 checker2.insert(i);
1323 }
1324 if (checker2.size() > tmp.size())
1325 {
1326 break;
1327 }
1328 }
1329 }
1330
1331 for (BPPostorderIterator it = this->get_postorder_iterator_begin(); it != this->get_postorder_iterator_end(); ++it)
1332 {
1333 NodePointer pt = *it;
1334 if (!pt.is_leaf() && pt.get_node()->is_parent_of_leaves())
1335 {
1336 Node *node = pt.get_node();
1337 for (int64_t i = 0; i < node->children_count(); i++)
1338 {
1339 uint64_t x = (uint64_t)node->get_child(i);
1340 auto xs = checker2.find(x);
1341 if (xs != checker2.end())
1342 {
1343 tmp_nodes.push_back(std::pair<Node *, uint64_t>(node, i));
1344 }
1345 }
1346 }
1347 }
1348 assert(tmp_nodes.size() == checker2.size());
1349 }
1350
1351 std::sort(tmp_nodes.begin(), tmp_nodes.end(), [](const std::pair<Node *, uint64_t> &lhs, const std::pair<Node *, uint64_t> &rhs)
1352 {
1353 uint64_t x = (uint64_t)lhs.first->get_child(lhs.second);
1354 uint64_t y = (uint64_t)rhs.first->get_child(rhs.second);
1355 return x < y; });
1356
1357 uint64_t k = 0;
1358 while (tmp.size() > 0 && tmp_nodes.size() > 0)
1359 {
1360 std::pair<Node *, uint64_t> top = tmp_nodes[tmp_nodes.size() - 1];
1361 uint64_t idx = (uint64_t)top.first->get_child(top.second);
1362 uint64_t x = tmp[tmp.size() - 1];
1363 if (x < idx)
1364 {
1365 top.first->move_container_index(top.second, x, this->leaf_container_vec);
1366 tmp_nodes.pop_back();
1367 tmp.pop_back();
1368 k++;
1369 }
1370 else
1371 {
1372 tmp.pop_back();
1373 k++;
1374 }
1375 }
1376 this->leaf_container_vec.resize(this->leaf_container_vec.size() - k);
1377 /*
1378 //this->leaf_container_vec.shrink_to_fit();
1379 uint64_t gap = this->leaf_container_vec.size() * this->density_threshold;
1380 //this->leaf_container_vec.reserve(this->leaf_container_vec.size() + gap);
1381 this->density1 = gap;
1382 */
1383 }
1389 void clear_unused_node_pointers()
1390 {
1391 for (uint64_t i = 0; i < this->unused_node_pointers.size(); i++)
1392 {
1393 delete this->unused_node_pointers[i];
1394 }
1395 this->unused_node_pointers.clear();
1396 this->unused_node_pointers.shrink_to_fit();
1397 }
1404 void create_root_leaf(VALUE value)
1405 {
1406 uint64_t p = this->get_new_container_index();
1407 this->leaf_container_vec[p].push_back(value);
1408 this->root = (Node *)p;
1409 this->root_is_leaf_ = true;
1410 this->height_++;
1411 if (USE_PARENT_FIELD)
1412 {
1413 this->parent_vec[p] = nullptr;
1414 }
1415 }
1416
1428 void remove_empty_leaf(uint64_t idx, Node *parent, int64_t child_index)
1429 {
1430 if (parent != nullptr)
1431 {
1432 parent->remove_child(child_index);
1433 }
1434 if (USE_PARENT_FIELD)
1435 {
1436 this->parent_vec[idx] = nullptr;
1437 }
1438 // this->height_ = 0;
1439 this->unused_leaf_container_indexes.push(idx);
1440 this->leaf_container_vec[idx].clear();
1441 }
1442
1455 void remove_empty_node(Node *node, Node *parent, int64_t child_index)
1456 {
1457 // Node *parent = node->get_parent();
1458 if (parent != nullptr)
1459 {
1460 parent->remove_child(child_index);
1461
1462 // uint64_t edge = parent->find_child_index_by_child_pointer(node);
1463 // parent->remove_child(edge);
1464 }
1465 if (this->root == node)
1466 {
1467 this->root = nullptr;
1468 }
1469
1470 node->clear();
1471 if (this->unused_node_pointers.size() <= 4096)
1472 {
1473 this->unused_node_pointers.push_back(node);
1474 }
1475 else
1476 {
1477 delete node;
1478 }
1479 }
1480
1487 uint64_t get_new_container_index()
1488 {
1489 if (this->unused_leaf_container_indexes.size() == 0)
1490 {
1491 this->leaf_container_vec.push_back(LEAF_CONTAINER());
1492 if (USE_PARENT_FIELD)
1493 {
1494 this->parent_vec.push_back(nullptr);
1495 }
1496 this->unused_leaf_container_indexes.push(this->leaf_container_vec.size() - 1);
1497 }
1498 assert(this->unused_leaf_container_indexes.size() > 0);
1499 uint64_t idx = this->unused_leaf_container_indexes.top();
1500 this->unused_leaf_container_indexes.pop();
1501
1502 return idx;
1503 }
1504
1511 Node *get_new_node_pointer()
1512 {
1513 Node *new_node = nullptr;
1514 if (this->unused_node_pointers.size() > 0)
1515 {
1516 new_node = this->unused_node_pointers[this->unused_node_pointers.size() - 1];
1517 this->unused_node_pointers.pop_back();
1518 }
1519 else
1520 {
1521 new_node = new Node();
1522 }
1523
1524 return new_node;
1525 }
1526
1538 void split_process(const std::vector<NodePointer> &path, uint64_t t)
1539 {
1540 const NodePointer &top = path[t];
1541
1542 Node *node = top.get_node();
1543 Node *_new_right_node = nullptr;
1544 uint64_t parent_edge_index = 0;
1545 if (!top.is_leaf())
1546 {
1547 _new_right_node = this->get_new_node_pointer();
1548 _new_right_node->initialize(top.get_node()->is_parent_of_leaves(), this->leaf_container_vec);
1549 }
1550 else
1551 {
1552 _new_right_node = (Node *)this->get_new_container_index();
1553 }
1554
1555 Node *_parent = nullptr;
1556 if (t > 0)
1557 {
1558 _parent = path[t - 1].get_node();
1559 _parent->insert_child(top.get_parent_edge_index() + 1, _new_right_node, 0, 0);
1560 parent_edge_index = top.get_parent_edge_index();
1561 }
1562 else
1563 {
1564 _parent = this->get_new_node_pointer();
1565 if (top.is_leaf())
1566 {
1567 _parent->initialize((uint64_t)node, (uint64_t)_new_right_node, this->leaf_container_vec);
1568 }
1569 else
1570 {
1571 _parent->initialize(node, _new_right_node, this->leaf_container_vec);
1572 }
1573 parent_edge_index = 0;
1574
1575
1576 this->root = _parent;
1577 this->root_is_leaf_ = false;
1578 this->height_++;
1579
1580
1581
1582 if (USE_PARENT_FIELD)
1583 {
1584 if (top.is_leaf())
1585 {
1586 this->parent_vec[(uint64_t)node] = _parent;
1587 }
1588 else
1589 {
1590 node->set_parent(_parent);
1591 }
1592 }
1593 }
1594 if (USE_PARENT_FIELD)
1595 {
1596 if (top.is_leaf())
1597 {
1598 this->parent_vec[(uint64_t)_new_right_node] = _parent;
1599 }
1600 else
1601 {
1602 _new_right_node->set_parent(_parent);
1603 }
1604 }
1605
1606 this->split_node(node, _new_right_node, top.is_leaf(), _parent, parent_edge_index, this->parent_vec);
1607 }
1608
1619 uint64_t balance_for_insertion(const std::vector<NodePointer> &path, bool superLeftPushMode = false)
1620 {
1621 uint64_t split_counter = 0;
1622 for (int64_t t = path.size() - 1; t >= 0; --t)
1623 {
1624 const NodePointer &top = path[t];
1625 uint64_t degree = top.get_degree(this->leaf_container_vec);
1626 uint64_t threshold = top.is_leaf() ? LEAF_CONTAINER_MAX_SIZE : MAX_DEGREE;
1627 uint64_t LR_threshold = threshold;
1628 if(superLeftPushMode){
1629 LR_threshold = (threshold * 3) / 4;
1630 }
1631
1632 if (degree > threshold)
1633 {
1634 // this->split_process(path, t);
1635
1636 if (t > 0)
1637 {
1638 Node *parent = path[t - 1].get_node();
1639 uint64_t parent_edge_index = top.get_parent_edge_index();
1640 Node *leftSibling = parent_edge_index > 0 ? parent->get_child(parent_edge_index - 1) : nullptr;
1641 Node *rightSibling = parent_edge_index + 1 < parent->children_count() ? parent->get_child(parent_edge_index + 1) : nullptr;
1642 uint64_t leftSiblingDegree = UINT64_MAX;
1643 if (parent_edge_index > 0)
1644 {
1645 leftSiblingDegree = top.is_leaf() ? this->leaf_container_vec[(uint64_t)leftSibling].size() : leftSibling->get_degree();
1646
1647 //assert(leftSiblingDegree <= threshold);
1648 }
1649
1650 uint64_t rightSiblingDegree = UINT64_MAX;
1651 if (parent_edge_index + 1 < parent->children_count())
1652 {
1653 rightSiblingDegree = top.is_leaf() ? this->leaf_container_vec[(uint64_t)rightSibling].size() : rightSibling->get_degree();
1654 //assert(rightSiblingDegree <= threshold);
1655 }
1656
1657 if (leftSiblingDegree < LR_threshold || rightSiblingDegree < LR_threshold)
1658 {
1659 if (leftSiblingDegree < LR_threshold)
1660 {
1661 if (superLeftPushMode)
1662 {
1663 uint64_t diff = LR_threshold - leftSiblingDegree;
1664 this->move_values_left(leftSibling, top.get_node(), diff, top.is_leaf(), parent, parent_edge_index);
1665 }
1666 else
1667 {
1668 this->move_values_left(leftSibling, top.get_node(), 1, top.is_leaf(), parent, parent_edge_index);
1669 }
1670 }
1671 else
1672 {
1673 this->move_values_right(top.get_node(), rightSibling, 1, top.is_leaf(), parent, parent_edge_index);
1674 }
1675 break;
1676 }
1677 else
1678 {
1679 this->split_process(path, t);
1680 split_counter++;
1681 }
1682 }
1683 else
1684 {
1685 this->split_process(path, t);
1686 split_counter++;
1687 }
1688 }
1689 else
1690 {
1691 break;
1692 }
1693 }
1694 return split_counter;
1695 }
1696
1707 uint64_t balance_for_removal(const std::vector<NodePointer> &path)
1708 {
1709 // Node *current_node = &node;
1710
1711 uint64_t merge_counter = 0;
1712
1713 for (int64_t t = path.size() - 1; t >= 0; --t)
1714 {
1715 const NodePointer &top = path[t];
1716 uint64_t max_size = top.is_leaf() ? LEAF_CONTAINER_MAX_SIZE : MAX_DEGREE;
1717 uint64_t threshold = max_size / 2;
1718
1719 uint64_t degree = top.get_degree(this->leaf_container_vec);
1720 if (degree >= threshold)
1721 {
1722 break;
1723 }
1724 else
1725 {
1726 if (t > 0)
1727 {
1728 Node *parent = path[t - 1].get_node();
1729 uint64_t parent_edge_index = top.get_parent_edge_index();
1730 Node *leftSibling = parent_edge_index > 0 ? parent->get_child(parent_edge_index - 1) : nullptr;
1731 Node *rightSibling = parent_edge_index + 1 < parent->children_count() ? parent->get_child(parent_edge_index + 1) : nullptr;
1732 uint64_t leftSiblingDegree = 0;
1733 if (parent_edge_index > 0)
1734 {
1735 leftSiblingDegree = top.is_leaf() ? this->leaf_container_vec[(uint64_t)leftSibling].size() : leftSibling->get_degree();
1736 }
1737
1738 uint64_t rightSiblingDegree = 0;
1739 if (parent_edge_index + 1 < parent->children_count())
1740 {
1741 rightSiblingDegree = top.is_leaf() ? this->leaf_container_vec[(uint64_t)rightSibling].size() : rightSibling->get_degree();
1742 }
1743
1744 if (leftSiblingDegree != 0 || rightSiblingDegree != 0)
1745 {
1746 if (leftSiblingDegree > threshold)
1747 {
1748 this->move_values_right(leftSibling, top.get_node(), 1, top.is_leaf(), parent, parent_edge_index - 1);
1749 break;
1750 }
1751 else if (rightSiblingDegree > threshold)
1752 {
1753 this->move_values_left(top.get_node(), rightSibling, 1, top.is_leaf(), parent, parent_edge_index + 1);
1754 break;
1755 }
1756 else
1757 {
1758 assert(leftSiblingDegree == threshold || rightSiblingDegree == threshold);
1759 if (leftSiblingDegree == threshold)
1760 {
1761 this->move_values_left(leftSibling, top.get_node(), degree, top.is_leaf(), parent, parent_edge_index);
1762 if (top.is_leaf())
1763 {
1764 this->remove_empty_leaf(top.get_leaf_container_index(), parent, parent_edge_index);
1765 }
1766 else
1767 {
1768 this->remove_empty_node(top.get_node(), parent, parent_edge_index);
1769 }
1770 }
1771 else
1772 {
1773 this->move_values_right(top.get_node(), rightSibling, degree, top.is_leaf(), parent, parent_edge_index);
1774 if (top.is_leaf())
1775 {
1776 this->remove_empty_leaf(top.get_leaf_container_index(), parent, parent_edge_index);
1777 }
1778 else
1779 {
1780 this->remove_empty_node(top.get_node(), parent, parent_edge_index);
1781 }
1782 }
1783 merge_counter++;
1784 }
1785 }
1786 else
1787 {
1788 throw std::logic_error("Error(BPTree::balance_for_removal(1))");
1789 }
1790 }
1791 else
1792 {
1793 assert(t == 0);
1794 if (degree == 0)
1795 {
1796 throw std::logic_error("Error(BPTree::balance_for_removal(2))");
1797 }
1798 else if (degree == 1)
1799 {
1800 if (!this->root_is_leaf_)
1801 {
1802
1803 bool b = this->root->is_parent_of_leaves();
1804 Node *new_root = this->root->get_child(0);
1805 this->root->remove_child(0);
1806 this->remove_empty_node(this->root, nullptr, -1);
1807 this->root = new_root;
1808 this->root_is_leaf_ = b;
1809
1810
1811 if (USE_PARENT_FIELD)
1812 {
1813 if (b)
1814 {
1815 this->parent_vec[(uint64_t)new_root] = nullptr;
1816 }
1817 else
1818 {
1819 new_root->set_parent(nullptr);
1820 }
1821 }
1822 this->height_--;
1823 }
1824 break;
1825 }
1826 else
1827 {
1828 break;
1829 }
1830 }
1831 }
1832 }
1833 return merge_counter;
1834 }
1836
1841
1842 public:
1849 void push_back(VALUE value)
1850 {
1851 std::vector<VALUE> r;
1852 r.push_back(value);
1853 this->push_many(r);
1854 }
1855
1866 void push_front(VALUE value)
1867 {
1868 std::vector<NodePointer> path;
1870 if (path.size() > 0)
1871 {
1872 uint64_t leaf = path[path.size() - 1].get_leaf_container_index();
1873 this->leaf_container_vec[leaf].push_front(value);
1874
1875 uint64_t sum = this->leaf_container_vec[leaf].psum(0);
1876
1877 for (int64_t i = path.size() - 2; i >= 0; --i)
1878 {
1879 Node *parent = path[i].get_node();
1880 uint64_t parent_edge_index = path[i + 1].get_parent_edge_index();
1881 parent->increment(parent_edge_index, 1, sum);
1882 }
1883 this->balance_for_insertion(path, true);
1884 }
1885 else
1886 {
1887 this->create_root_leaf(value);
1888 }
1889 }
1890
1891 private:
1904 uint64_t push_many_to_leaf(const std::vector<VALUE> &values, uint64_t value_pos, const std::vector<NodePointer> &path)
1905 {
1906 uint64_t x = 0;
1907 uint64_t leaf = path[path.size() - 1].get_leaf_container_index();
1908 uint64_t len = 0;
1909 while ((value_pos + x) < values.size() && leaf_container_vec[leaf].size() <= this->get_max_count_of_values_in_leaf())
1910 {
1911
1912 this->leaf_container_vec[leaf].push_back(values[value_pos + x]);
1913 x++;
1914 len++;
1915 }
1916
1917 uint64_t sum = len > 0 ? this->leaf_container_vec[leaf].reverse_psum(len - 1) : 0;
1918
1919 for (int64_t j = path.size() - 2; j >= 0; --j)
1920 {
1921 Node *parent = path[j].get_node();
1922 uint64_t parent_edge_index = path[j + 1].get_parent_edge_index();
1923 assert(parent_edge_index < parent->children_count());
1924
1925 parent->increment(parent_edge_index, len, sum);
1926 }
1927
1928 this->balance_for_insertion(path, true);
1929
1930 assert(x > 0);
1931 return x;
1932 }
1933
1944 void push_back_to_internal_node(uint64_t leaf_index, const std::vector<NodePointer> &path)
1945 {
1946 Node *node = path[path.size() - 1].get_node();
1947 uint64_t child_count = this->leaf_container_vec[leaf_index].size();
1948 uint64_t child_sum = 0;
1949 if (USE_PSUM)
1950 {
1951 child_sum = this->leaf_container_vec[leaf_index].psum();
1952 }
1953 node->append_child(leaf_index, child_count, child_sum);
1954
1955 for (int64_t j = path.size() - 2; j >= 0; --j)
1956 {
1957 Node *parent = path[j].get_node();
1958 uint64_t parent_edge_index = path[j + 1].get_parent_edge_index();
1959 assert(parent_edge_index < parent->children_count());
1960 parent->increment(parent_edge_index, child_count, child_sum);
1961 }
1962
1963 this->balance_for_insertion(path, true);
1964 }
1965
1966 public:
1976 void push_many(std::vector<LEAF_CONTAINER> &containers)
1977 {
1978 uint64_t i = 0;
1979 std::vector<NodePointer> path;
1980 while (i < containers.size())
1981 {
1982 path.clear();
1984 if (path.size() > 1)
1985 {
1986 uint64_t p = this->get_new_container_index();
1987 this->leaf_container_vec[p].swap(containers[i]);
1988 path.pop_back();
1989 this->push_back_to_internal_node(p, path);
1990
1991 }
1992 else
1993 {
1994
1995 std::vector<VALUE> tmp_values;
1996 containers[i].to_values(tmp_values);
1997 this->push_many(tmp_values);
1998
1999 }
2000 i++;
2001 }
2002 }
2003
2013 void push_many(const std::vector<VALUE> &values)
2014 {
2015 uint64_t i = 0;
2016 std::vector<NodePointer> path;
2017
2018
2019 while (i < values.size())
2020 {
2021 path.clear();
2023 if (path.size() > 0)
2024 {
2025 i += this->push_many_to_leaf(values, i, path);
2026 }
2027 else
2028 {
2029 this->create_root_leaf(values[i]);
2030 i++;
2031 }
2032 }
2033 }
2034
2045 uint64_t remove_using_path(const std::vector<NodePointer> &path, uint64_t position_in_leaf_container)
2046 {
2047 uint64_t merge_counter = 0;
2048 uint64_t x = path[path.size() - 1].get_leaf_container_index();
2049 int64_t delta = leaf_container_vec[x].psum(position_in_leaf_container, position_in_leaf_container);
2050 leaf_container_vec[x].remove(position_in_leaf_container);
2051 for (int64_t i = path.size() - 2; i >= 0; i--)
2052 {
2053 Node *node = path[i].get_node();
2054 uint64_t child_index = path[i + 1].get_parent_edge_index();
2055
2056 node->increment(child_index, -1, -delta);
2057 }
2058
2059 if (path.size() > 1)
2060 {
2061 merge_counter += this->balance_for_removal(this->tmp_path);
2062 }
2063 else
2064 {
2065 assert(path.size() == 1);
2066 if (leaf_container_vec[x].size() == 0)
2067 {
2068 this->remove_empty_leaf((uint64_t)this->root, nullptr, -1);
2069 this->root = nullptr;
2070 this->root_is_leaf_ = false;
2071 this->height_ = 0;
2072 }
2073 }
2074 return merge_counter;
2075 }
2076
2086 void remove(uint64_t pos)
2087 {
2088 if (!this->empty())
2089 {
2090 assert(pos < this->size());
2091 this->remove_operation_counter++;
2092
2093 int64_t position_to_remove = this->compute_path_from_root_to_leaf(pos);
2094 this->merge_process_counter += this->remove_using_path(this->tmp_path, position_to_remove);
2095 }
2096 else
2097 {
2098 throw std::invalid_argument("Error in BPTree::remove(pos). This tree is empty. pos = " + std::to_string(pos));
2099 }
2100 /*
2101 if (this->need_deflag())
2102 {
2103 this->defragmentation();
2104 }
2105 */
2106 }
2107
2120 void insert(uint64_t pos, VALUE value, uint64_t sum_delta)
2121 {
2122 if (pos < this->size())
2123 {
2124 if (!this->empty())
2125 {
2126 assert(pos <= this->size());
2127
2128 int64_t position_to_insert = this->compute_path_from_root_to_leaf(pos);
2129
2130 if (position_to_insert != -1)
2131 {
2132 uint64_t x = this->tmp_path[this->tmp_path.size() - 1].get_leaf_container_index();
2133
2134
2135
2136 this->leaf_container_vec[x].insert(position_to_insert, value);
2137
2138 for (int64_t i = this->tmp_path.size() - 2; i >= 0; i--)
2139 {
2140 Node *node = this->tmp_path[i].get_node();
2141 uint64_t child_index = this->tmp_path[i + 1].get_parent_edge_index();
2142
2143
2144 node->increment(child_index, 1, sum_delta);
2145 }
2146 this->split_process_counter += this->balance_for_insertion(this->tmp_path);
2147 }
2148 else
2149 {
2150 throw std::runtime_error("Error: insert");
2151 }
2152 }
2153 else
2154 {
2155
2156 if (pos == 0)
2157 {
2158 this->create_root_leaf(value);
2159 }
2160 else
2161 {
2162 throw std::invalid_argument("Error(BPTree::insert)");
2163 }
2164 }
2165 }
2166 else
2167 {
2168 assert(pos == this->size());
2169 this->push_back(value);
2170 }
2171 this->insert_operation_counter++;
2172 }
2173 /*
2174 void insert(uint64_t pos, VALUE value, uint64_t sum_delta)
2175 {
2176 std::vector<NodePointer> path;
2177 this->insert(pos, value, sum_delta, path);
2178
2179 }
2180 */
2190 void resize(uint64_t _size, VALUE default_value)
2191 {
2192 if (this->size() < _size)
2193 {
2194 uint64_t i = 0;
2195 std::vector<NodePointer> path;
2196 uint64_t diff = _size - this->size();
2197
2198 while (i < diff)
2199 {
2200 path.clear();
2202 if (path.size() > 0)
2203 {
2204 uint64_t leaf = path[path.size() - 1].get_leaf_container_index();
2205 uint64_t len = 0;
2206 while (i < diff && leaf_container_vec[leaf].size() <= this->get_max_count_of_values_in_leaf())
2207 {
2208 this->leaf_container_vec[leaf].push_back(default_value);
2209 i++;
2210 len++;
2211 }
2212 uint64_t sum = len > 0 ? this->leaf_container_vec[leaf].reverse_psum(len - 1) : 0;
2213
2214 for (int64_t j = path.size() - 2; j >= 0; --j)
2215 {
2216 Node *parent = path[j].get_node();
2217 uint64_t parent_edge_index = path[j + 1].get_parent_edge_index();
2218 parent->increment(parent_edge_index, len, sum);
2219 }
2220 this->balance_for_insertion(path, true);
2221 }
2222 else
2223 {
2224 this->create_root_leaf(default_value);
2225 i++;
2226 }
2227 }
2228 }
2229 else
2230 {
2231 while (this->size() > _size)
2232 {
2233 this->remove(this->size() - 1);
2234 }
2235 }
2236 }
2237
2244 {
2245 this->linked_tree_ = _tree;
2246 }
2247
2254 {
2255 return this->linked_tree_;
2256 }
2257
2259
2264
2265
2278 void move_values_right(Node *left_node, Node *right_node, uint64_t len, bool is_leaf, Node *parent, int64_t parent_edge_index_of_left_node)
2279 {
2280 if (is_leaf)
2281 {
2282 uint64_t left_leaf = (uint64_t)left_node;
2283 uint64_t right_leaf = (uint64_t)right_node;
2284 assert(left_leaf < this->leaf_container_vec.size());
2285 assert(right_leaf < this->leaf_container_vec.size());
2286
2287 if constexpr (USE_PSUM){
2288 if (parent != nullptr)
2289 {
2290 assert(this->leaf_container_vec[left_leaf].psum() == parent->get_value_sum_deque().at(parent_edge_index_of_left_node));
2291 int64_t sum = len != 0 ? this->leaf_container_vec[left_leaf].reverse_psum(len - 1) : 0;
2292
2293 assert(sum <= (int64_t)this->leaf_container_vec[left_leaf].psum());
2294
2295 //assert(parent->get_value_sum_deque().at(parent_edge_index_of_left_node) >= sum);
2296
2297
2298
2299
2300 parent->__increment_a_value_of_sum_deque(parent_edge_index_of_left_node, -sum);
2301 parent->__increment_a_value_of_sum_deque(parent_edge_index_of_left_node + 1, sum);
2302 }
2303
2304 }
2305
2306
2307 if (parent != nullptr)
2308 {
2309 auto &parent_count_deq = parent->get_value_count_deque();
2310 parent_count_deq.decrement(parent_edge_index_of_left_node, len);
2311 parent_count_deq.increment(parent_edge_index_of_left_node + 1, len);
2312 }
2313 auto items = this->leaf_container_vec[left_leaf].pop_back(len);
2314 this->leaf_container_vec[right_leaf].push_front(items);
2315 }
2316 else
2317 {
2318 BPFunctions::move_right(*left_node, *right_node, len, parent, parent_edge_index_of_left_node, this->parent_vec);
2319 }
2320 }
2321
2334 void move_values_left(Node *left_node, Node *right_node, uint64_t len, bool is_leaf, Node *parent, int64_t parent_edge_index_of_right_node)
2335 {
2336 if (is_leaf)
2337 {
2338 uint64_t left_leaf = (uint64_t)left_node;
2339 uint64_t right_leaf = (uint64_t)right_node;
2340 assert(left_leaf < this->leaf_container_vec.size());
2341
2342 if (USE_PSUM && parent != nullptr)
2343 {
2344 uint64_t sum = len != 0 ? this->leaf_container_vec[right_leaf].psum(len - 1) : 0;
2345 parent->__increment_a_value_of_sum_deque(parent_edge_index_of_right_node, -sum);
2346 parent->__increment_a_value_of_sum_deque(parent_edge_index_of_right_node - 1, sum);
2347 }
2348
2349 if (parent != nullptr)
2350 {
2351 auto &parent_count_deq = parent->get_value_count_deque();
2352 parent_count_deq.decrement(parent_edge_index_of_right_node, len);
2353 parent_count_deq.increment(parent_edge_index_of_right_node - 1, len);
2354 }
2355 auto items = this->leaf_container_vec[right_leaf].pop_front(len);
2356 this->leaf_container_vec[left_leaf].push_back(items);
2357 }
2358 else
2359 {
2360 BPFunctions::move_left(*left_node, *right_node, len, parent, parent_edge_index_of_right_node, this->parent_vec);
2361 }
2362 }
2363
2378 void split_node(Node *left_node, Node *right_node, bool is_leaf, Node *parent, int64_t parent_edge_index_of_left_node, [[maybe_unused]] std::vector<Node *> &parent_vec)
2379 {
2380
2381 uint64_t degree = 0;
2382 if (is_leaf)
2383 {
2384 degree = leaf_container_vec[(uint64_t)left_node].size();
2385 assert((uint64_t)left_node < this->leaf_container_vec.size());
2386 assert((uint64_t)right_node < this->leaf_container_vec.size());
2387 assert(this->leaf_container_vec[(uint64_t)left_node].size() == this->get_max_count_of_values_in_leaf() + 1);
2388 }
2389 else
2390 {
2391 degree = left_node->get_degree();
2392 }
2393 uint64_t left_len = degree / 2;
2394 uint64_t right_len = degree - left_len;
2395 this->move_values_right(left_node, right_node, right_len, is_leaf, parent, parent_edge_index_of_left_node);
2396 }
2397
2399
2410 void increment(uint64_t i, int64_t delta)
2411 {
2412 uint64_t pos = this->compute_path_from_root_to_leaf(i);
2413 NodePointer &leaf_pointer = this->tmp_path[this->tmp_path.size() - 1];
2414 LEAF_CONTAINER &leaf = this->leaf_container_vec[leaf_pointer.get_leaf_container_index()];
2415 leaf.increment(pos, delta);
2416 for (int64_t i = this->tmp_path.size() - 2; i >= 0; --i)
2417 {
2418 uint64_t idx = this->tmp_path[i + 1].get_parent_edge_index();
2419 Node *parent = this->tmp_path[i].get_node();
2420 parent->increment(idx, 0, delta);
2421 }
2422 }
2423
2434 {
2435 auto _end = this->get_leaf_forward_iterator_end();
2436 int64_t prev = -1;
2437 bool b = true;
2438 for (auto it = this->get_leaf_forward_iterator_begin(); it != _end; ++it)
2439 {
2440 if (prev + 1 != (int64_t)(*it))
2441 {
2442 b = false;
2443 break;
2444 }
2445 else
2446 {
2447 prev = *it;
2448 }
2449 }
2450 return b;
2451 }
2452
2453 void print_debug_info() const{
2454 std::cout << "BPTree::print_debug_info()" << std::endl;
2455 /*
2456 std::cout << "BPInternalNodeFunctions::time_count: " << (double)stool::__time_count / 1000000.0 << " ms" << std::endl;
2457 std::cout << "BPInternalNodeFunctions::time_count_counter: " << stool::__time_count_counter << std::endl;
2458 std::cout << "BPInternalNodeFunctions::size_count: " << stool::__size_count << std::endl;
2459 std::cout << "average: " << (double)stool::__time_count / stool::__time_count_counter << " ns" << std::endl;
2460 std::cout << "average size: " << (double)stool::__size_count / stool::__time_count_counter << " elements" << std::endl;
2461 */
2462 std::cout << "BPTree::time_count: " << (double)BPTree::time_count / 1000000.0 << " ms" << std::endl;
2463 std::cout << "BPTree::time_count2: " << (double)bptree::time_count2 / 1000000.0 << " ms" << std::endl;
2464 }
2465
2466 private:
2474 void preprocess_for_the_exchange_of_two_leaves([[maybe_unused]] uint64_t leaf_index1, [[maybe_unused]] uint64_t leaf_index2)
2475 {
2476 }
2477
2490 void exchange(stool::SimpleDeque16<Node *> &children1, uint64_t j, uint64_t leaf_index2)
2491 {
2492 uint64_t leaf_index1 = (uint64_t)children1[j];
2493 assert(this->parent_vec[leaf_index1] != nullptr);
2494
2495 if (leaf_index1 != leaf_index2)
2496 {
2497 this->preprocess_for_the_exchange_of_two_leaves(leaf_index1, leaf_index2);
2498
2499 if (this->parent_vec[leaf_index2] != nullptr)
2500 {
2501 auto &children2 = this->parent_vec[leaf_index2]->get_children();
2502 uint64_t idx = this->parent_vec[leaf_index2]->get_index((Node *)leaf_index2);
2503 this->leaf_container_vec[leaf_index2].swap(this->leaf_container_vec[leaf_index1]);
2504 children1[j] = (Node *)leaf_index2;
2505 children2[idx] = (Node *)leaf_index1;
2506 }
2507 else
2508 {
2509 this->leaf_container_vec[leaf_index2].swap(this->leaf_container_vec[leaf_index1]);
2510 children1[j] = (Node *)leaf_index2;
2511 }
2512
2513 Node *tmp_n = this->parent_vec[leaf_index1];
2514 this->parent_vec[leaf_index1] = this->parent_vec[leaf_index2];
2515 this->parent_vec[leaf_index2] = tmp_n;
2516 }
2517 }
2518
2519 public:
2537 {
2538 if (this->size() == 0 || this->root_is_leaf_)
2539 {
2540 return;
2541 }
2542
2543 auto _end = this->get_postorder_iterator_end();
2544
2545 if (!USE_PARENT_FIELD)
2546 {
2547 this->parent_vec.resize(this->leaf_container_vec.size(), nullptr);
2548
2549 for (PostorderIterator it = this->get_postorder_iterator_begin(); it != _end; ++it)
2550 {
2551 NodePointer np = *it;
2552 if (!np.is_leaf())
2553 {
2554 Node *node = np.get_node();
2555 if (node->is_parent_of_leaves())
2556 {
2557 auto &children = node->get_children();
2558 for (Node *v : children)
2559 {
2560 uint64_t leaf = (uint64_t)v;
2561 this->parent_vec[leaf] = node;
2562 }
2563 }
2564 }
2565 }
2566 }
2567 uint64_t counter = 0;
2568
2569 for (PostorderIterator it = this->get_postorder_iterator_begin(); it != _end; ++it)
2570 {
2571 NodePointer np = *it;
2572 if (!np.is_leaf())
2573 {
2574 Node *node = np.get_node();
2575 if (node->is_parent_of_leaves())
2576 {
2577 uint64_t _size = node->children_count();
2578 auto &children1 = node->get_children();
2579
2580 for (uint64_t j = 0; j < _size; j++)
2581 {
2582 this->exchange(children1, j, counter);
2583
2584 counter++;
2585 }
2586 }
2587 }
2588 }
2589 while (this->leaf_container_vec.size() > counter)
2590 {
2591 this->leaf_container_vec.pop_back();
2592 this->parent_vec.pop_back();
2593 }
2594 while (this->unused_leaf_container_indexes.size() > 0)
2595 {
2596 this->unused_leaf_container_indexes.pop();
2597 }
2598
2599 if (!USE_PARENT_FIELD)
2600 {
2601 std::vector<Node *> tmp;
2602 this->parent_vec.swap(tmp);
2603 }
2604
2605 if (this->root_is_leaf_)
2606 {
2607 this->root = (Node *)0;
2608 }
2609
2611 }
2612
2624 std::vector<Node *> build_from_leaf_containers_sub2(std::vector<Node *> &nodes, uint64_t current_height)
2625 {
2626 assert(nodes.size() > 0);
2627 std::vector<Node *> r;
2628 if (nodes.size() == 1)
2629 {
2630 this->root = nodes[0];
2631 this->root_is_leaf_ = false;
2632 this->height_ = current_height;
2633 }
2634 else if (nodes.size() <= MAX_DEGREE)
2635 {
2636
2637 Node *root = this->get_new_node_pointer();
2638 root->initialize(false, this->leaf_container_vec);
2639 for (uint64_t i = 0; i < nodes.size(); i++)
2640 {
2641 if (USE_PARENT_FIELD)
2642 {
2643 nodes[i]->set_parent(root);
2644 }
2645 uint64_t count = nodes[i]->get_value_count();
2646 uint64_t sum = 0;
2647 if (USE_PSUM)
2648 {
2649 sum = nodes[i]->get_value_sum();
2650 }
2651 root->append_child(nodes[i], count, sum);
2652 }
2653 r.push_back(root);
2654 }
2655 else
2656 {
2657 uint64_t i = 0;
2658 uint64_t counter = nodes.size();
2659 uint64_t threshold = MAX_DEGREE * 2;
2660 while (counter > 0)
2661 {
2662 uint64_t d = 0;
2663 if (counter >= threshold)
2664 {
2665 d = MAX_DEGREE;
2666 }
2667 else if (counter > MAX_DEGREE)
2668 {
2669 d = MAX_DEGREE / 2;
2670 }
2671 else
2672 {
2673 d = counter;
2674 }
2675 Node *node = this->get_new_node_pointer();
2676 node->initialize(false, this->leaf_container_vec);
2677
2678 for (uint64_t j = 0; j < d; j++)
2679 {
2680 if (USE_PARENT_FIELD)
2681 {
2682 nodes[i]->set_parent(node);
2683 }
2684 uint64_t count = nodes[i]->get_value_count();
2685 uint64_t sum = 0;
2686 if (USE_PSUM)
2687 {
2688 sum = nodes[i]->get_value_sum();
2689 }
2690 node->append_child(nodes[i], count, sum);
2691 i++;
2692 counter--;
2693 }
2694 r.push_back(node);
2695 }
2696 }
2697 return r;
2698 }
2699
2710 {
2711 std::vector<Node *> r;
2712
2713 if (this->leaf_container_vec.size() == 0)
2714 {
2715 this->root = nullptr;
2716 this->root_is_leaf_ = false;
2717 this->height_ = 0;
2718 }
2719 else if (this->leaf_container_vec.size() == 1)
2720 {
2721 this->root = (Node *)0;
2722 this->root_is_leaf_ = true;
2723 this->height_ = 1;
2724 }
2725 else if (this->leaf_container_vec.size() <= MAX_DEGREE)
2726 {
2727 Node *root = this->get_new_node_pointer();
2728 root->initialize(true, this->leaf_container_vec);
2729 for (uint64_t i = 0; i < this->leaf_container_vec.size(); i++)
2730 {
2731 uint64_t psum = 0;
2732 if (USE_PSUM)
2733 {
2734 psum += this->leaf_container_vec[i].psum();
2735 }
2736 if (USE_PARENT_FIELD)
2737 {
2738 this->parent_vec[i] = root;
2739 }
2740
2741 root->append_child((Node *)i, this->leaf_container_vec[i].size(), psum);
2742 }
2743 r.push_back(root);
2744 }
2745 else
2746 {
2747
2748 uint64_t i = 0;
2749 uint64_t counter = this->leaf_container_vec.size();
2750 uint64_t threshold = MAX_DEGREE * 2;
2751 while (counter > 0)
2752 {
2753 uint64_t d = 0;
2754 if (counter >= threshold)
2755 {
2756 d = MAX_DEGREE;
2757 }
2758 else if (counter > MAX_DEGREE)
2759 {
2760 d = MAX_DEGREE / 2;
2761 }
2762 else
2763 {
2764 d = counter;
2765 }
2766 Node *node = this->get_new_node_pointer();
2767 node->initialize(true, this->leaf_container_vec);
2768
2769 for (uint64_t j = 0; j < d; j++)
2770 {
2771 uint64_t psum = 0;
2772 if (USE_PSUM)
2773 {
2774 psum += this->leaf_container_vec[i].psum();
2775 }
2776 if (USE_PARENT_FIELD)
2777 {
2778 this->parent_vec[i] = node;
2779 }
2780 node->append_child((Node *)i, this->leaf_container_vec[i].size(), psum);
2781 i++;
2782 counter--;
2783 }
2784 r.push_back(node);
2785 }
2786 }
2787 return r;
2788 }
2789
2799 void build_sub(const std::vector<VALUE> &_values)
2800 {
2801 uint64_t i = 0;
2802 if (_values.size() == 0)
2803 {
2804 }
2805 else if (_values.size() <= LEAF_CONTAINER_MAX_SIZE)
2806 {
2807 uint64_t leaf_index = this->get_new_container_index();
2808
2809 while (i < _values.size())
2810 {
2811 this->leaf_container_vec[leaf_index].push_back(_values[i]);
2812 i++;
2813 }
2814 }
2815 else
2816 {
2817 uint64_t threshold = LEAF_CONTAINER_MAX_SIZE * 2;
2818 uint64_t counter = _values.size();
2819
2820 while (i < _values.size())
2821 {
2822 uint64_t d = 0;
2823 if (counter >= threshold)
2824 {
2825 d = LEAF_CONTAINER_MAX_SIZE;
2826 }
2827 else if (counter > LEAF_CONTAINER_MAX_SIZE)
2828 {
2829 d = LEAF_CONTAINER_MAX_SIZE / 2;
2830 }
2831 else
2832 {
2833 d = counter;
2834 }
2835 assert(counter >= (LEAF_CONTAINER_MAX_SIZE / 2));
2836
2837 uint64_t leaf_index = this->get_new_container_index();
2838 for (uint64_t j = 0; j < d; j++)
2839 {
2840 this->leaf_container_vec[leaf_index].push_back(_values[i]);
2841 i++;
2842 counter--;
2843 }
2844 }
2845 }
2846 }
2847
2857 void build(const std::vector<VALUE> &_values)
2858 {
2859 this->clear();
2860 this->build_sub(_values);
2861
2862 std::vector<Node *> layer = this->build_from_leaf_containers_sub1();
2863 uint64_t current_height = 2;
2864 while (layer.size() > 0)
2865 {
2866
2867 std::vector<Node *> next_layer = this->build_from_leaf_containers_sub2(layer, current_height);
2868 layer.swap(next_layer);
2869 current_height++;
2870 }
2871 }
2872
2882 void build_from_leaf_containers(std::vector<LEAF_CONTAINER> &_leaf_containers)
2883 {
2884 this->clear();
2885
2886 this->leaf_container_vec.swap(_leaf_containers);
2887 if (USE_PARENT_FIELD)
2888 {
2889 this->parent_vec.resize(this->leaf_container_vec.size(), nullptr);
2890 }
2891
2892 std::vector<Node *> layer = this->build_from_leaf_containers_sub1();
2893 uint64_t current_height = 2;
2894 while (layer.size() > 0)
2895 {
2896 std::vector<Node *> next_layer = this->build_from_leaf_containers_sub2(layer, current_height);
2897 layer.swap(next_layer);
2898 current_height++;
2899 }
2900 }
2901
2912 void save(std::vector<uint8_t> &output, uint64_t &pos)
2913 {
2915 {
2916 this->sort_leaf_containers();
2917 }
2918
2919 uint64_t _max_degree = MAX_DEGREE;
2920 uint64_t _max_count_of_values_in_leaf = LEAF_CONTAINER_MAX_SIZE;
2921
2922 uint64_t _size = sizeof(_max_degree) + sizeof(_max_count_of_values_in_leaf) + LEAF_CONTAINER::get_byte_size(this->leaf_container_vec);
2923 if (pos + _size > output.size())
2924 {
2925 output.resize(pos + _size);
2926 }
2927
2928 std::memcpy(output.data() + pos, &_max_degree, sizeof(uint64_t));
2929 pos += sizeof(uint64_t);
2930 std::memcpy(output.data() + pos, &_max_count_of_values_in_leaf, sizeof(uint64_t));
2931 pos += sizeof(uint64_t);
2932
2933 LEAF_CONTAINER::save(this->leaf_container_vec, output, pos);
2934 }
2935
2944 void save(std::ofstream &os)
2945 {
2947 {
2948 this->sort_leaf_containers();
2949 }
2950 uint64_t _max_degree = MAX_DEGREE;
2951 uint64_t _max_count_of_values_in_leaf = LEAF_CONTAINER_MAX_SIZE;
2952
2953 os.write((const char *)(&_max_degree), sizeof(uint64_t));
2954 os.write((const char *)(&_max_count_of_values_in_leaf), sizeof(uint64_t));
2955 LEAF_CONTAINER::save(this->leaf_container_vec, os);
2956 }
2957
2968 void build_from_data(const std::vector<uint8_t> &data, uint64_t &pos)
2969 {
2970 uint64_t _max_degree;
2971 uint64_t _max_count_of_values_in_leaf;
2972 this->clear();
2973 std::memcpy(&_max_degree, data.data() + pos, sizeof(uint64_t));
2974 pos += sizeof(uint64_t);
2975 std::memcpy(&_max_count_of_values_in_leaf, data.data() + pos, sizeof(uint64_t));
2976 pos += sizeof(uint64_t);
2977
2978 auto tmp = LEAF_CONTAINER::load_vector(data, pos);
2979
2980 this->build_from_leaf_containers(tmp);
2981 }
2982
2992 void build_from_data(std::ifstream &ifs)
2993 {
2994 this->clear();
2995 uint64_t _max_degree;
2996 uint64_t _max_count_of_values_in_leaf;
2997 ifs.read((char *)(&_max_degree), sizeof(uint64_t));
2998 ifs.read((char *)(&_max_count_of_values_in_leaf), sizeof(uint64_t));
2999
3000 auto tmp = LEAF_CONTAINER::load_vector(ifs);
3001
3002 this->build_from_leaf_containers(tmp);
3003 }
3004
3005 void print_information_about_performance(int message_paragraph = stool::Message::SHOW_MESSAGE) const{
3006 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "Performance Information (BPTree)[" << std::endl;
3007 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "The number of insertion operations: " << this->insert_operation_counter << std::endl;
3008 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "The number of split process in insertion operations: " << this->split_process_counter << std::endl;
3009 if(this->insert_operation_counter > 0){
3010 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "The number of split process per insertion: " << (double)this->split_process_counter / (double)this->insert_operation_counter << std::endl;
3011 }
3012
3013 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "The number of removal operations: " << this->remove_operation_counter << std::endl;
3014 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "The number of merge process in removal operations: " << this->merge_process_counter << std::endl;
3015 if(this->remove_operation_counter > 0){
3016 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "The number of merge process per removal: " << (double)this->merge_process_counter / (double)this->remove_operation_counter << std::endl;
3017 }
3018
3019 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "]" << std::endl;
3020
3021 }
3022
3023
3037 void print_statistics(int message_paragraph = stool::Message::SHOW_MESSAGE) const
3038 {
3039 uint64_t internal_node_count = 0;
3040 uint64_t leaf_count = 0;
3041 uint64_t total_degree_of_internal_node = 0;
3042
3044 {
3045 NodePointer np = *it;
3046 if (!np.is_leaf())
3047 {
3048 internal_node_count++;
3049 total_degree_of_internal_node += np.get_node()->children_count();
3050 }
3051 else
3052 {
3053 leaf_count++;
3054 }
3055 }
3056
3057 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "Statistics(BPTree):" << std::endl;
3058 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The number of internal nodes: " << internal_node_count << std::endl;
3059 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The number of used leaf containers: " << leaf_count << std::endl;
3060 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The height of this tree: " << this->height_ << std::endl;
3061 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The number of stored values: " << this->size() << std::endl;
3062
3063 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The max degree of internal node (parameter): " << MAX_DEGREE << std::endl;
3064 if (internal_node_count > 0)
3065 {
3066 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The average degree of internal node: " << (total_degree_of_internal_node / internal_node_count) << std::endl;
3067 }
3068 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The max count of values in leaf (parameter): " << LEAF_CONTAINER_MAX_SIZE << std::endl;
3069 if (this->size() > 0)
3070 {
3071 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The average count of values in leaf (parameter): " << (this->size() / leaf_count) << std::endl;
3072 }
3073
3074 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The number of unused nodes: " << this->unused_node_pointers.size() << std::endl;
3075 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The number of unused leaf containers: " << this->unused_leaf_container_indexes.size() << std::endl;
3076 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "The number of stored parent pointers: " << this->parent_vec.size() << std::endl;
3077
3078 uint64_t total_memory_usage = this->size_in_bytes();
3079 uint64_t num = this->size();
3080 if(USE_PARENT_FIELD){
3081 total_memory_usage += this->parent_vec.size() * sizeof(Node *);
3082 }
3083 std::cout << stool::Message::get_paragraph_string(message_paragraph + 1) << "Estimated memory usage: " << total_memory_usage << " bytes (Avg: " << (total_memory_usage / num) << " bytes)" << std::endl;
3084 std::cout << stool::Message::get_paragraph_string(message_paragraph) << "[END]" << std::endl;
3085 }
3086 };
3087 }
3088}
Helper functions of BPInternalNode.
Definition bp_internal_node_functions.hpp:16
The internal node of BPTree.
Definition bp_internal_node.hpp:16
A forward iterator for traversing the leaves of a BP-tree.
Definition bp_leaf_forward_iterator.hpp:12
A pointer to a node of BPTree.
Definition bp_node_pointer.hpp:15
The iterator of a post-order traversal on BPTree.
Definition bp_postorder_iterator.hpp:14
An implementation of B+-tree.
Definition bp_tree.hpp:24
LeafForwardIterator get_leaf_forward_iterator_begin() const
Returns an iterator pointing to the first leaf in forward traversal.
Definition bp_tree.hpp:436
uint64_t get_value_index(uint64_t leaf_index, uint64_t position_in_leaf_container) const
Returns the value index at position pos in the B+ tree.
Definition bp_tree.hpp:610
void swap(BPTree &_tree)
Swaps the contents of this B+ tree with another B+ tree.
Definition bp_tree.hpp:203
void move_values_left(Node *left_node, Node *right_node, uint64_t len, bool is_leaf, Node *parent, int64_t parent_edge_index_of_right_node)
Moves values from a right node to a left node in the B+ tree.
Definition bp_tree.hpp:2334
const LEAF_CONTAINER & get_leaf_container(uint64_t leaf_index) const
Returns the leaf container at position leaf_index.
Definition bp_tree.hpp:656
void swap(BPTree &_tree, bool swap_linked_tree)
Swaps the contents of this B+ tree with another B+ tree.
Definition bp_tree.hpp:178
LeafForwardIterator get_leaf_forward_iterator_end() const
Returns an iterator pointing to the end of forward leaf traversal.
Definition bp_tree.hpp:454
VALUE at(uint64_t pos) const
Returns the value at position pos in the B+ tree.
Definition bp_tree.hpp:574
void initialize()
Initializes the B+ tree with specified maximum degrees for internal nodes and leaf nodes.
Definition bp_tree.hpp:150
void push_many(std::vector< LEAF_CONTAINER > &containers)
Pushes multiple leaf containers into the B+ tree.
Definition bp_tree.hpp:1976
void print_statistics(int message_paragraph=stool::Message::SHOW_MESSAGE) const
Prints statistical information about the B+ tree.
Definition bp_tree.hpp:3037
void increment(uint64_t i, int64_t delta)
Increments a value at a given position in the B+ tree.
Definition bp_tree.hpp:2410
int64_t compute_path_from_root_to_leaf(uint64_t i, std::vector< NodePointer > &output_path) const
Computes the path from root to leaf containing position i and returns leaf position.
Definition bp_tree.hpp:1225
uint64_t get_parent_vector_memory() const
Returns the memory size of the parent vector in the B+ tree.
Definition bp_tree.hpp:1058
std::vector< Node * > build_from_leaf_containers_sub1()
Creates the first layer of internal nodes from leaf containers.
Definition bp_tree.hpp:2709
std::vector< VALUE > to_value_vector() const
Converts the B+ tree to a vector of values.
Definition bp_tree.hpp:685
const std::vector< NodePointer > & get_temporary_path() const
Returns the temporary path used during tree operations.
Definition bp_tree.hpp:270
int64_t compute_path_from_root_to_leaf(uint64_t i) const
Computes the path from the root to the leaf at position i.
Definition bp_tree.hpp:1207
void resize(uint64_t _size, VALUE default_value)
Resizes the B+ tree to contain a specified number of elements.
Definition bp_tree.hpp:2190
uint64_t size() const
Return the number of values stored in this tree.
Definition bp_tree.hpp:316
uint64_t get_leaf_container_vector_size() const
Return the size of the vector that stores leaf containers.
Definition bp_tree.hpp:345
int64_t search(uint64_t sum) const
Returns the position of the sum in the B+ tree.
Definition bp_tree.hpp:549
PostorderIterator get_postorder_iterator_begin() const
Returns an iterator pointing to the first node in postorder traversal.
Definition bp_tree.hpp:366
int64_t select0(uint64_t i) const
Returns the position of the i-th 0 in the B+ tree.
Definition bp_tree.hpp:525
void split_node(Node *left_node, Node *right_node, bool is_leaf, Node *parent, int64_t parent_edge_index_of_left_node, std::vector< Node * > &parent_vec)
Splits a node into two nodes in the B+ tree.
Definition bp_tree.hpp:2378
void build(const std::vector< VALUE > &_values)
Builds a complete B+ tree from a vector of values.
Definition bp_tree.hpp:2857
uint64_t get_leaf_container_vector_memory() const
Returns the memory size of the leaf container vector in the B+ tree.
Definition bp_tree.hpp:1008
ValueForwardIterator get_value_forward_iterator_begin() const
Returns an iterator pointing to the first value in forward traversal.
Definition bp_tree.hpp:404
uint64_t psum() const
Returns the prefix sum of the B+ tree.
Definition bp_tree.hpp:472
void sort_leaf_containers()
Sorts the leaf containers in the B+ tree.
Definition bp_tree.hpp:2536
ValueForwardIterator get_value_forward_iterator_end() const
Returns an iterator pointing to the end of forward value traversal.
Definition bp_tree.hpp:423
void build_from_leaf_containers(std::vector< LEAF_CONTAINER > &_leaf_containers)
Builds a B+ tree from a vector of leaf containers.
Definition bp_tree.hpp:2882
uint64_t get_unused_leaf_container_vector_memory() const
Returns the memory size of the unused leaf container vector in the B+ tree.
Definition bp_tree.hpp:999
uint64_t get_max_count_of_values_in_leaf() const
Returns the maximum number of values that can be stored in a leaf node.
Definition bp_tree.hpp:300
void build_from_data(std::ifstream &ifs)
Builds a B+ tree from serialized data in a file stream.
Definition bp_tree.hpp:2992
uint64_t remove_using_path(const std::vector< NodePointer > &path, uint64_t position_in_leaf_container)
Removes a value at a specific position using a path to the leaf.
Definition bp_tree.hpp:2045
void push_back(VALUE value)
Pushes a single value to the B+ tree.
Definition bp_tree.hpp:1849
void remove(uint64_t pos)
Removes a value at a specific position in the B+ tree.
Definition bp_tree.hpp:2086
void build_sub(const std::vector< VALUE > &_values)
Creates leaf containers from a vector of values.
Definition bp_tree.hpp:2799
PostorderIterator get_postorder_iterator_end() const
Returns an iterator pointing to the end of postorder traversal.
Definition bp_tree.hpp:391
uint64_t psum(uint64_t i) const
Returns the prefix sum of the B+ tree at position i.
Definition bp_tree.hpp:496
std::vector< Node * > build_from_leaf_containers_sub2(std::vector< Node * > &nodes, uint64_t current_height)
Builds a layer of internal nodes from a vector of child nodes.
Definition bp_tree.hpp:2624
void print_leaves() const
Prints the leaves of the B+ tree.
Definition bp_tree.hpp:884
bool check_if_leaf_container_vec_is_sorted() const
Checks if the leaf containers in the B+ tree are sorted.
Definition bp_tree.hpp:2433
void save(std::vector< uint8_t > &output, uint64_t &pos)
Saves the B+ tree structure to a byte vector.
Definition bp_tree.hpp:2912
void push_front(VALUE value)
Inserts a value at the front of the B+ tree.
Definition bp_tree.hpp:1866
bool empty() const
Checks if the B+ tree is empty.
Definition bp_tree.hpp:463
void print_tree() const
Prints the B+ tree in a tree-like format.
Definition bp_tree.hpp:806
void print_leaf_containers() const
Prints the leaf containers of the B+ tree.
Definition bp_tree.hpp:917
uint64_t get_max_degree_of_internal_node() const
Return the max degree of this tree. The number of children of every internal node does not exceed the...
Definition bp_tree.hpp:278
void get_path_from_root_to_first_leaf(std::vector< NodePointer > &output_path) const
Returns a vector of NodePointer objects representing the path from the root to the first leaf.
Definition bp_tree.hpp:1129
void print_info(int message_paragraph=stool::Message::SHOW_MESSAGE) const
Prints detailed information about the B+ tree.
Definition bp_tree.hpp:845
uint64_t get_leaf_count() const
Return the number of leaves in this tree.
Definition bp_tree.hpp:337
void clear()
Clears all contents of the B+ tree and resets it to an empty state.
Definition bp_tree.hpp:219
void build_from_data(const std::vector< uint8_t > &data, uint64_t &pos)
Builds a B+ tree from serialized data in a byte vector.
Definition bp_tree.hpp:2968
uint64_t height() const
Return the height of this tree.
Definition bp_tree.hpp:308
uint64_t size_in_bytes() const
Returns the total memory size of the B+ tree.
Definition bp_tree.hpp:931
void get_path_from_root_to_last_leaf(std::vector< NodePointer > &output_path) const
Returns a vector of NodePointer objects representing the path from the root to the last leaf.
Definition bp_tree.hpp:1167
uint64_t get_unused_node_pointers_memory() const
Returns the memory size of the unused node pointers in the B+ tree.
Definition bp_tree.hpp:1042
BPTree * get_linked_tree() const
Gets the linked B+ tree.
Definition bp_tree.hpp:2253
bool verify() const
Verifies the integrity of the B+ tree.
Definition bp_tree.hpp:788
void set_linked_tree(BPTree *_tree)
Sets the linked B+ tree.
Definition bp_tree.hpp:2243
void insert(uint64_t pos, VALUE value, uint64_t sum_delta)
Inserts a value at a specific position in the B+ tree.
Definition bp_tree.hpp:2120
uint64_t compute_leaf_container_count() const
Returns the number of leaf containers in the B+ tree.
Definition bp_tree.hpp:960
LEAF_CONTAINER & get_leaf_container(uint64_t leaf_index)
Returns a reference to the leaf container at position leaf_index.
Definition bp_tree.hpp:666
void move_values_right(Node *left_node, Node *right_node, uint64_t len, bool is_leaf, Node *parent, int64_t parent_edge_index_of_left_node)
Moves values from a left node to a right node in the B+ tree.
Definition bp_tree.hpp:2278
void save(std::ofstream &os)
Saves the B+ tree structure to a file stream.
Definition bp_tree.hpp:2944
uint64_t get_leaf_container_memory() const
Returns the memory size of the leaf containers in the B+ tree.
Definition bp_tree.hpp:1017
void print_memory_usage(int message_paragraph=stool::Message::SHOW_MESSAGE) const
Prints the memory usage information of the B+ tree.
Definition bp_tree.hpp:1106
uint64_t get_internal_node_memory() const
Returns the memory size of the internal nodes in the B+ tree.
Definition bp_tree.hpp:979
uint64_t compute_internal_node_count() const
Returns the number of internal nodes in the B+ tree.
Definition bp_tree.hpp:941
void push_many(const std::vector< VALUE > &values)
Pushes multiple values into the B+ tree.
Definition bp_tree.hpp:2013
std::vector< std::string > get_memory_usage_info(int message_paragraph=stool::Message::SHOW_MESSAGE) const
Returns a vector of strings containing memory usage information about the B+ tree.
Definition bp_tree.hpp:1068
The forward iterator of the values stored in BPTree.
Definition bp_value_forward_iterator.hpp:17