-
Notifications
You must be signed in to change notification settings - Fork 0
Bp implementation #38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
590ed15
6ec3bb8
d9e37c1
bd5375c
e397ba9
12b763b
2f49504
a975bad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| #pragma once | ||
|
|
||
| #include <pixie/rmm_tree.h> | ||
|
|
||
| #include <cstdint> | ||
|
|
||
| namespace pixie { | ||
|
|
||
| /** | ||
| * @brief A tree class based on the balances parentheses (BP) | ||
| * representation | ||
| */ | ||
| class BpTree { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also think it is better to write abbreviations in upper case for class names, i.e. |
||
| private: | ||
| const size_t num_bits_; | ||
| RmMTree rmm; | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| public: | ||
| /** | ||
| * @brief A node class of BP tree | ||
| */ | ||
| struct Node { | ||
| size_t number; | ||
| size_t pos; | ||
|
|
||
| Node(size_t node_number, size_t bp_pos) | ||
| : number(node_number), pos(bp_pos) {} | ||
| }; | ||
|
|
||
| /** | ||
| * @brief Constructor from an external array of uint64_t | ||
| */ | ||
| explicit BpTree(const std::vector<std::uint64_t>& words, size_t tree_size) | ||
| : num_bits_(2 * tree_size), rmm(words, 2 * tree_size) {} | ||
|
|
||
| /** | ||
| * @brief Returns the root node | ||
| */ | ||
| Node root() const { return Node(0, 0); } | ||
|
|
||
| /** | ||
| * @brief Returns the size of the tree | ||
| */ | ||
| size_t size() const { return num_bits_ / 2; } | ||
|
|
||
| /** | ||
| * @brief Indicates if @p node is a leaf | ||
| */ | ||
| bool is_leaf(const Node& node) const { | ||
| return (node.pos + 1 == num_bits_) or rmm.bit(node.pos + 1) == 0; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Indicates if @p node is a root | ||
| */ | ||
| bool is_root(const Node& node) { return node.number == 0; } | ||
|
|
||
| /** | ||
| * @brief Returns the number of children of a @p node | ||
| * this method has O(d) time complexity! | ||
| * | ||
| * TODO try make this faster | ||
| */ | ||
| size_t degree(const Node& node) const { | ||
| if (is_leaf(node)) { | ||
| return 0; | ||
| } | ||
| Node child = first_child(node); | ||
| size_t child_count = 1; | ||
| while (true) { | ||
| if (is_last_child(child)) { | ||
| return child_count; | ||
| } | ||
| child = next_sibling(child); | ||
| child_count++; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns first child of a @p node | ||
| */ | ||
| Node first_child(const Node& node) const { | ||
| size_t pos = node.pos + 1; | ||
| size_t num = node.number + 1; | ||
| return Node(num, pos); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns the i-th child of @p node | ||
| * Indexing starts at 0 | ||
| * this method has O(i) time complexity! | ||
| * | ||
| * TODO try make this faster | ||
| */ | ||
| Node child(const Node& node, size_t i) const { | ||
| Node child = first_child(node); | ||
| while (i--) { | ||
| child = next_sibling(child); | ||
| } | ||
| return child; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns the parent of a @p node if @p node is not root, | ||
| * else returns root | ||
| */ | ||
| Node parent(const Node& node) const { | ||
| if (node.number == 0) { | ||
| return root(); | ||
| } | ||
| size_t pos = rmm.enclose(node.pos + 1); | ||
| size_t num = rmm.rank1(pos) - 1; | ||
| return Node(num, pos); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Indicates if @p node is last child | ||
| */ | ||
| bool is_last_child(const Node& node) const { | ||
| size_t end = rmm.close(node.pos + 1); | ||
| if (end + 1 >= num_bits_) { | ||
| return true; | ||
| } | ||
| return rmm.bit(end + 1) == 0; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns next sibling of a @p node | ||
| */ | ||
| Node next_sibling(const Node& node) const { | ||
| size_t pos = rmm.close(node.pos + 1) + 1; | ||
| size_t num = rmm.rank1(pos + 1) - 1; | ||
| return Node(num, pos); | ||
| } | ||
| }; | ||
| } // namespace pixie | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -966,6 +966,13 @@ class RmMTree { | |
| return (result == npos ? npos : result + 1); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Read bit at position @p position (LSB-first across words). | ||
| */ | ||
| inline int bit(const size_t& position) const noexcept { | ||
| return (bits[position >> 6] >> (position & 63)) & 1u; | ||
| } | ||
|
|
||
| private: | ||
| /** | ||
| * @brief Count "10" occurrences inside a 64-bit slice of given logical | ||
|
|
@@ -2196,6 +2203,24 @@ class RmMTree { | |
| build(leaf_block_bits, max_overhead); | ||
| } | ||
|
|
||
| public: | ||
| /** | ||
| * @brief Export internal bitvector into a 0/1 string. | ||
| */ | ||
| std::string to_string() const { | ||
| std::string result; | ||
| result.resize(num_bits); | ||
|
|
||
| for (size_t i = 0; i < num_bits; ++i) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SUGGESTION: Optimize bit-to-string conversion for large vectors The per-bit loop does a shift and index for every bit. A byte-level LUT (256-entry) or word-level conversion can reduce overhead when exporting large bitvectors.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will wait for better times |
||
| uint64_t word = bits[i >> 6]; | ||
| bool bit = (word >> (i & 63)) & 1ULL; | ||
| result[i] = bit ? '1' : '0'; | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| private: | ||
| /** | ||
| * @brief Build internal structures from 64-bit words. | ||
| * @param words Words with LSB-first bits. | ||
|
|
@@ -2215,13 +2240,6 @@ class RmMTree { | |
| build(leaf_block_bits, max_overhead); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Read bit at position @p position (LSB-first across words). | ||
| */ | ||
| inline int bit(const size_t& position) const noexcept { | ||
| return (bits[position >> 6] >> (position & 63)) & 1u; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Set bit at position @p position to 1. | ||
| */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tests should be added to coverage/build-test workflows (but not for AVX-512).