Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ jobs:
working-directory: ./build
run: ./louds_tree_tests

- name: Run BP Tree Tests
working-directory: ./build
run: ./bp_tree_tests

- name: Run Benchmark Tests
working-directory: ./build
run: ./benchmark_tests
Expand Down Expand Up @@ -92,3 +96,15 @@ jobs:
fi
exit $rc

- name: Run BP Tree Tests
working-directory: ./build
run: |
timeout 1800 sde-external-9.58.0-2025-06-16-lin/sde64 -icl -emu-xinuse 0 -- \
./bp_tree_tests --gtest_output=xml:bp_results.xml
rc=$?
if [ $rc -eq 124 ] && grep -q 'failures="0"' bp_results.xml 2>/dev/null; then
echo "SDE timed out during process teardown (known SDE/ASan issue) - all tests passed, treating as success"
exit 0
fi
exit $rc

9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,15 @@ if(PIXIE_TESTS)
gtest
gtest_main
${PIXIE_DIAGNOSTICS_LIBS})

add_executable(bp_tree_tests
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests should be added to coverage/build-test workflows (but not for AVX-512).

src/tests/bp_tree_tests.cpp)
target_include_directories(bp_tree_tests
PUBLIC include)
target_link_libraries(bp_tree_tests
gtest
gtest_main
${PIXIE_DIAGNOSTICS_LIBS})
endif()

# ---------------------------------------------------------------------------
Expand Down
136 changes: 136 additions & 0 deletions include/pixie/bp_tree.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#pragma once

#include <pixie/rmm_tree.h>

#include <cstdint>

namespace pixie {

/**
* @brief A tree class based on the balances parentheses (BP)
* representation
*/
class BpTree {
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also think it is better to write abbreviations in upper case for class names, i.e. BPTree

private:
const size_t num_bits_;
RmMTree rmm;
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rmm_


public:
/**
* @brief A node class of BP tree
*/
struct Node {
size_t number;
size_t pos;

Node(size_t node_number, size_t bp_pos)
: number(node_number), pos(bp_pos) {}
};

/**
* @brief Constructor from an external array of uint64_t
*/
explicit BpTree(const std::vector<std::uint64_t>& words, size_t tree_size)
: num_bits_(2 * tree_size), rmm(words, 2 * tree_size) {}

/**
* @brief Returns the root node
*/
Node root() const { return Node(0, 0); }

/**
* @brief Returns the size of the tree
*/
size_t size() const { return num_bits_ / 2; }

/**
* @brief Indicates if @p node is a leaf
*/
bool is_leaf(const Node& node) const {
return (node.pos + 1 == num_bits_) or rmm.bit(node.pos + 1) == 0;
}

/**
* @brief Indicates if @p node is a root
*/
bool is_root(const Node& node) { return node.number == 0; }

/**
* @brief Returns the number of children of a @p node
* this method has O(d) time complexity!
*
* TODO try make this faster
*/
size_t degree(const Node& node) const {
if (is_leaf(node)) {
return 0;
}
Node child = first_child(node);
size_t child_count = 1;
while (true) {
if (is_last_child(child)) {
return child_count;
}
child = next_sibling(child);
child_count++;
}
}

/**
* @brief Returns first child of a @p node
*/
Node first_child(const Node& node) const {
size_t pos = node.pos + 1;
size_t num = node.number + 1;
return Node(num, pos);
}

/**
* @brief Returns the i-th child of @p node
* Indexing starts at 0
* this method has O(i) time complexity!
*
* TODO try make this faster
*/
Node child(const Node& node, size_t i) const {
Node child = first_child(node);
while (i--) {
child = next_sibling(child);
}
return child;
}

/**
* @brief Returns the parent of a @p node if @p node is not root,
* else returns root
*/
Node parent(const Node& node) const {
if (node.number == 0) {
return root();
}
size_t pos = rmm.enclose(node.pos + 1);
size_t num = rmm.rank1(pos) - 1;
return Node(num, pos);
}

/**
* @brief Indicates if @p node is last child
*/
bool is_last_child(const Node& node) const {
size_t end = rmm.close(node.pos + 1);
if (end + 1 >= num_bits_) {
return true;
}
return rmm.bit(end + 1) == 0;
}

/**
* @brief Returns next sibling of a @p node
*/
Node next_sibling(const Node& node) const {
size_t pos = rmm.close(node.pos + 1) + 1;
size_t num = rmm.rank1(pos + 1) - 1;
return Node(num, pos);
}
};
} // namespace pixie
32 changes: 25 additions & 7 deletions include/pixie/rmm_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,13 @@ class RmMTree {
return (result == npos ? npos : result + 1);
}

/**
* @brief Read bit at position @p position (LSB-first across words).
*/
inline int bit(const size_t& position) const noexcept {
return (bits[position >> 6] >> (position & 63)) & 1u;
}

private:
/**
* @brief Count "10" occurrences inside a 64-bit slice of given logical
Expand Down Expand Up @@ -2196,6 +2203,24 @@ class RmMTree {
build(leaf_block_bits, max_overhead);
}

public:
/**
* @brief Export internal bitvector into a 0/1 string.
*/
std::string to_string() const {
std::string result;
result.resize(num_bits);

for (size_t i = 0; i < num_bits; ++i) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SUGGESTION: Optimize bit-to-string conversion for large vectors

The per-bit loop does a shift and index for every bit. A byte-level LUT (256-entry) or word-level conversion can reduce overhead when exporting large bitvectors.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will wait for better times

uint64_t word = bits[i >> 6];
bool bit = (word >> (i & 63)) & 1ULL;
result[i] = bit ? '1' : '0';
}

return result;
}

private:
/**
* @brief Build internal structures from 64-bit words.
* @param words Words with LSB-first bits.
Expand All @@ -2215,13 +2240,6 @@ class RmMTree {
build(leaf_block_bits, max_overhead);
}

/**
* @brief Read bit at position @p position (LSB-first across words).
*/
inline int bit(const size_t& position) const noexcept {
return (bits[position >> 6] >> (position & 63)) & 1u;
}

/**
* @brief Set bit at position @p position to 1.
*/
Expand Down
62 changes: 61 additions & 1 deletion include/pixie/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#include <random>
#include <vector>

#include "bp_tree.h"

using Node = pixie::BpTree::Node;
using pixie::LoudsNode;

std::vector<std::vector<size_t>> generate_random_tree(size_t tree_size,
Expand All @@ -30,7 +33,7 @@ std::vector<std::vector<size_t>> bfs_order(
std::queue<std::pair<size_t, size_t>> q;
bfs_adj[0].push_back(0);
q.push({0, 0});
int cnt = 1;
size_t cnt = 1;
while (!q.empty()) {
size_t old_v = q.front().first;
size_t cur_v = q.front().second;
Expand All @@ -46,6 +49,32 @@ std::vector<std::vector<size_t>> bfs_order(
return bfs_adj;
}

std::vector<std::vector<size_t>> dfs_order(
size_t tree_size,
const std::vector<std::vector<size_t>>& adj) {
std::vector<std::vector<size_t>> dfs_adj(tree_size);
std::vector<std::pair<size_t, size_t>> stack;
dfs_adj[0].push_back(0);
stack.push_back({0, 0});
std::vector<size_t> renumbering(tree_size, 0);
size_t next_number = 1;
while (!stack.empty()) {
auto& [v, i] = stack.back();
i++;
if (i == adj[v].size()) {
stack.pop_back();
continue;
}
size_t u = adj[v][i];
renumbering[u] = next_number++;
dfs_adj[renumbering[v]].push_back(renumbering[u]);
dfs_adj[renumbering[u]].push_back(renumbering[v]);

stack.push_back(std::pair{u, 0});
}
return dfs_adj;
}

std::vector<uint64_t> adj_to_louds(
size_t tree_size,
const std::vector<std::vector<size_t>>& adj) {
Expand All @@ -59,6 +88,29 @@ std::vector<uint64_t> adj_to_louds(
return louds;
}

std::vector<uint64_t> adj_to_bp(size_t tree_size,
const std::vector<std::vector<size_t>>& adj) {
size_t bp_size = tree_size * 2;
std::vector<uint64_t> bp((bp_size + 63) / 64, 0);
std::vector<std::pair<size_t, size_t>> stack;
stack.push_back(std::make_pair(0, 0));
size_t pos = 0;
bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63));
while (!stack.empty()) {
auto& [v, p] = stack.back();
p++;
if (p >= adj[v].size()) {
pos++;
stack.pop_back();
continue;
}
pos++;
bp[pos >> 6] = bp[pos >> 6] | (1ULL << (pos & 63));
stack.push_back(std::make_pair(adj[v][p], 0));
}
return bp;
}

struct AdjListNode {
size_t number;
};
Expand All @@ -71,6 +123,14 @@ bool operator==(const LoudsNode& b, const AdjListNode& a) {
return a.number == b.number;
}

bool operator==(const AdjListNode& a, const Node& b) {
return a.number == b.number;
}

bool operator==(const Node& b, const AdjListNode& a) {
return a.number == b.number;
}

class AdjListTree {
private:
std::vector<std::vector<size_t>> adj;
Expand Down
1 change: 1 addition & 0 deletions scripts/coverage_report.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ cmake --build --preset coverage
"${BUILD_DIR}/unittests"
"${BUILD_DIR}/excess_positions_tests"
"${BUILD_DIR}/louds_tree_tests"
"${BUILD_DIR}/bp_tree_tests"
"${BUILD_DIR}/test_rmm"

cd "${BUILD_DIR}"
Expand Down
Loading
Loading