diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index b383db9..29903f7 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -32,6 +32,10 @@ jobs: working-directory: ./build run: ./louds_tree_tests + - name: Run DFUDS Tree Tests + working-directory: ./build + run: ./dfuds_tree_tests + - name: Run Benchmark Tests working-directory: ./build run: ./benchmark_tests @@ -92,3 +96,15 @@ jobs: fi exit $rc + - name: Run DFUDS Tree Tests + working-directory: ./build + run: | + timeout 1800 sde-external-9.58.0-2025-06-16-lin/sde64 -icl -emu-xinuse 0 -- \ + ./dfuds_tree_tests --gtest_output=xml:dfuds_results.xml + rc=$? + if [ $rc -eq 124 ] && grep -q 'failures="0"' dfuds_results.xml 2>/dev/null; then + echo "SDE timed out during process teardown (known SDE/ASan issue) - all tests passed, treating as success" + exit 0 + fi + exit $rc + diff --git a/CMakeLists.txt b/CMakeLists.txt index 01444ac..6ce656b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,6 +157,15 @@ if(PIXIE_TESTS) gtest gtest_main ${PIXIE_DIAGNOSTICS_LIBS}) + + add_executable(dfuds_tree_tests + src/tests/dfuds_tree_tests.cpp) + target_include_directories(dfuds_tree_tests + PUBLIC include) + target_link_libraries(dfuds_tree_tests + gtest + gtest_main + ${PIXIE_DIAGNOSTICS_LIBS}) endif() # --------------------------------------------------------------------------- @@ -201,6 +210,15 @@ if(PIXIE_BENCHMARKS) benchmark_main ${PIXIE_DIAGNOSTICS_LIBS}) + add_executable(dfuds_tree_benchmarks + src/benchmarks/dfuds_tree_benchmarks.cpp) + target_include_directories(dfuds_tree_benchmarks + PUBLIC include) + target_link_libraries(dfuds_tree_benchmarks + benchmark + benchmark_main + ${PIXIE_DIAGNOSTICS_LIBS}) + add_executable(alignment_comparison src/benchmarks/alignment_comparison.cpp) target_include_directories(alignment_comparison diff --git a/include/pixie/dfuds_tree.h b/include/pixie/dfuds_tree.h new file mode 100644 index 0000000..578fcb9 --- /dev/null +++ b/include/pixie/dfuds_tree.h @@ -0,0 +1,162 @@ +#pragma once + +#include + +#include + +#include "utils.h" + +namespace pixie { + +/** + * @brief A tree class based on the depth-first unary degree sequence (DFUDS) + * representation + */ +class DFUDSTree { + private: + const size_t num_bits_; + RmMTree rmm_; + + public: + struct Node { + size_t number; + + size_t pos; + + /** + * @brief A node class of DFUDS tree + */ + Node(size_t node_number, size_t dfuds_pos) + : number(node_number), pos(dfuds_pos) {} + }; + + /** + * @brief Constructor from an external array of uint64_t + * + * @param dfuds_sequence parenthesis sequence in dfuds representation + */ + explicit DFUDSTree(const std::vector& dfuds_sequence, + size_t tree_size) + : num_bits_(2 * tree_size - 1), rmm_(dfuds_sequence, 2 * tree_size - 1) {} + + /** + * @brief Returns the root node + */ + static Node root() { return Node(0, 0); } + + /** + * @brief Returns the size of the tree + */ + size_t size() const { return (num_bits_ + 1) / 2; } + + /** + * @brief Indicates if @p node is a leaf + */ + bool is_leaf(const Node& node) const { + return (node.pos + 1 == num_bits_) or rmm_.bit(node.pos) == 0; + } + + /** + * @brief Indicates if @p node is a root + */ + bool is_root(const Node& node) const { return node.number == 0; } + + /** + * @brief Returns the number of children of a @p node + */ + size_t degree(const Node& node) const { + return rmm_.select0(node.number + 1) - node.pos; + } + + /** + * @brief Returns first child of a @p node + */ + Node first_child(const Node& node) { + size_t pos = rmm_.select0(node.number + 1); + size_t num = node.number + 1; + return Node(num, pos + 1); + } + + /** + * @brief Returns the i-th child of @p node + * Indexing starts at 0 + */ + Node child(const Node& node, size_t i) const { + size_t pos = rmm_.close(rmm_.select0(node.number + 1) - i) + 1; + size_t num = rmm_.rank0(pos); + return Node(num, pos); + } + + /** + * @brief Returns next sibling of a @p node + */ + Node next_sibling(const Node& node) const { + size_t end = rmm_.fwdsearch(node.pos, -1); + size_t pos = end + 1; + size_t num = rmm_.rank0(pos); + return Node(num, pos); + } + + /** + * @brief Returns the parent of a @p node if @p node is not root, + * else returns root + */ + Node parent(const Node& node) const { + if (node.number == 0) { + return root(); + } + size_t open = rmm_.open( + node.pos); // node.pos in 0-based and rmm_.open uses 1-based argument. + // Thus, we use node.pos meaning the parenthesis before + // first parenthesis of the current node + size_t rank = rmm_.rank0(open); + size_t pos = + rmm_.select0(rank) + + 1; // In here we use that rmm_select(0) equals size_t max value so + // rmm_.select(rank) can still be interpreted as pos-1 + return Node(rank, pos); + } + + /** + * @brief Indicates if @p node is last child + */ + bool is_last_child(const Node& node) const { + size_t end = rmm_.fwdsearch(node.pos, -1); + size_t pos = end + 1; + size_t op = rmm_.open(node.pos); + size_t op2 = rmm_.open(pos); + return pos == num_bits_ || op != op2 + 1; + } +}; + +std::vector adj_to_dfuds( + size_t tree_size, + const std::vector>& adj) { + size_t dfuds_size = tree_size * 2 - 1; + std::vector dfuds((dfuds_size + 63) / 64, 0); + std::vector stack; + stack.push_back(0); + size_t pos = 0; + while (!stack.empty()) { + auto v = stack.back(); + stack.pop_back(); + size_t edge_count = adj[v].size(); + for (size_t i = 0; i < edge_count - 1; ++i) { // edge 0 goes to parent + dfuds[pos >> 6] = dfuds[pos >> 6] | (1ULL << (pos & 63)); + pos++; + stack.push_back(adj[v][edge_count - 1 - i]); + } + pos++; + } + return dfuds; +} + +bool operator==(const AdjListNode& a, const DFUDSTree::Node& b) { + return a.number == b.number; +} + +bool operator==(const DFUDSTree::Node& b, const AdjListNode& a) { + return a.number == b.number; +} + +} // namespace pixie diff --git a/include/pixie/rmm_tree.h b/include/pixie/rmm_tree.h index fe8bbe7..03c2c53 100644 --- a/include/pixie/rmm_tree.h +++ b/include/pixie/rmm_tree.h @@ -966,6 +966,13 @@ class RmMTree { return (result == npos ? npos : result + 1); } + /** + * @brief Read bit at position @p position (LSB-first across words). + */ + inline int bit(const size_t& position) const noexcept { + return (bits[position >> 6] >> (position & 63)) & 1u; + } + private: /** * @brief Count "10" occurrences inside a 64-bit slice of given logical @@ -2215,13 +2222,6 @@ class RmMTree { build(leaf_block_bits, max_overhead); } - /** - * @brief Read bit at position @p position (LSB-first across words). - */ - inline int bit(const size_t& position) const noexcept { - return (bits[position >> 6] >> (position & 63)) & 1u; - } - /** * @brief Set bit at position @p position to 1. */ diff --git a/include/pixie/utils.h b/include/pixie/utils.h index 24116d5..2ce0234 100644 --- a/include/pixie/utils.h +++ b/include/pixie/utils.h @@ -46,6 +46,32 @@ std::vector> bfs_order( return bfs_adj; } +std::vector> dfs_order( + size_t tree_size, + const std::vector>& adj) { + std::vector> dfs_adj(tree_size); + std::vector> stack; + dfs_adj[0].push_back(0); + stack.push_back({0, 0}); + std::vector renumbering(tree_size, 0); + size_t next_number = 1; + while (!stack.empty()) { + auto& [v, i] = stack.back(); + i++; + if (i == adj[v].size()) { + stack.pop_back(); + continue; + } + size_t u = adj[v][i]; + renumbering[u] = next_number++; + dfs_adj[renumbering[v]].push_back(renumbering[u]); + dfs_adj[renumbering[u]].push_back(renumbering[v]); + + stack.push_back(std::pair{u, 0}); + } + return dfs_adj; +} + std::vector adj_to_louds( size_t tree_size, const std::vector>& adj) { diff --git a/scripts/coverage_report.sh b/scripts/coverage_report.sh index d1fdd1b..84bec46 100755 --- a/scripts/coverage_report.sh +++ b/scripts/coverage_report.sh @@ -10,6 +10,7 @@ cmake --build --preset coverage "${BUILD_DIR}/unittests" "${BUILD_DIR}/excess_positions_tests" "${BUILD_DIR}/louds_tree_tests" +"${BUILD_DIR}/dfuds_tree_tests" "${BUILD_DIR}/test_rmm" cd "${BUILD_DIR}" diff --git a/src/benchmarks/dfuds_tree_benchmarks.cpp b/src/benchmarks/dfuds_tree_benchmarks.cpp new file mode 100644 index 0000000..b4d4202 --- /dev/null +++ b/src/benchmarks/dfuds_tree_benchmarks.cpp @@ -0,0 +1,68 @@ +#include +#include +#include + +#include + +using Node = pixie::DFUDSTree::Node; +using pixie::adj_to_dfuds; +using pixie::DFUDSTree; + +/** + * DFS with O(1) extra memory + */ +static void BM_DfudsTreeDFS(benchmark::State& state) { + size_t tree_size = state.range(0); + std::mt19937_64 rng(42); + + for (auto _ : state) { + state.PauseTiming(); + + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector dfuds = adj_to_dfuds(tree_size, adj); + DFUDSTree tree(dfuds, tree_size); + + Node cur = tree.root(); + bool above = 1; + + state.ResumeTiming(); + + benchmark::DoNotOptimize(cur); + + while (true) { + if (above) { + if (tree.is_leaf(cur)) { + above = 0; + } else { + cur = tree.first_child(cur); + } + benchmark::DoNotOptimize(cur); + } else { + if (tree.is_last_child(cur)) { + cur = tree.parent(cur); + if (tree.is_root(cur)) { + break; + } + benchmark::DoNotOptimize(cur); + } else { + cur = tree.next_sibling(cur); + above = 1; + benchmark::DoNotOptimize(cur); + } + } + } + } +} + +BENCHMARK(BM_DfudsTreeDFS) + ->ArgNames({"tree_size"}) + ->RangeMultiplier(2) + ->Range(1ull << 8, 1ull << 18) + ->Iterations(100); + +BENCHMARK(BM_DfudsTreeDFS) + ->ArgNames({"tree_size"}) + ->RangeMultiplier(2) + ->Range(1ull << 18, 1ull << 26) + ->Iterations(10); diff --git a/src/tests/dfuds_tree_tests.cpp b/src/tests/dfuds_tree_tests.cpp new file mode 100644 index 0000000..fcaaa39 --- /dev/null +++ b/src/tests/dfuds_tree_tests.cpp @@ -0,0 +1,111 @@ +#include "pixie/dfuds_tree.h" + +#include +#include + +#include +#include + +using Node = pixie::DFUDSTree::Node; +using pixie::adj_to_dfuds; +using pixie::DFUDSTree; + +TEST(DfudsTreeTest, Basic) { + std::vector> adj = {{0, 1}, {0, 2}, {1, 3}, {2, 4}, {3}}; + size_t tree_size = 5; + + std::vector dfuds = adj_to_dfuds(tree_size, adj); + + DFUDSTree dfuds_tree(dfuds, 5); + AdjListTree debug_tree(adj); + + Node cur = dfuds_tree.root(); + AdjListNode debug = debug_tree.root(); + for (size_t i = 0; i < tree_size - 1; i++) { + EXPECT_EQ(cur, debug); + cur = dfuds_tree.child(cur, 0); + debug = debug_tree.child(debug, 0); + } + EXPECT_EQ(cur, debug); +} + +TEST(DfudsTreeTest, RandomTreeDFS) { + for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { + std::mt19937_64 rng(42); + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector dfuds = adj_to_dfuds(tree_size, adj); + DFUDSTree dfuds_tree(dfuds, tree_size); + AdjListTree debug_tree(adj); + + std::stack> st; + + st.push({dfuds_tree.root(), debug_tree.root()}); + + while (!st.empty()) { + auto cur = st.top().first; + auto debug = st.top().second; + st.pop(); + EXPECT_EQ(cur, debug); + EXPECT_EQ(dfuds_tree.parent(cur), debug_tree.parent(debug)); + + if (cur.number > 0) { + EXPECT_EQ(dfuds_tree.is_last_child(cur), + debug_tree.is_last_child(debug)); + } + size_t deg = dfuds_tree.degree(cur); + EXPECT_EQ(deg, debug_tree.degree(debug)); + + if (deg == 0) { + continue; + } + auto child = dfuds_tree.first_child(cur); + auto debug_child = debug_tree.first_child(debug); + st.push({child, debug_child}); + for (size_t i = 1; i < deg; i++) { + child = dfuds_tree.next_sibling(child); + st.push({child, debug_tree.child(debug, i)}); + } + } + } +} + +TEST(DfudsTreeTest, RandomTreeBFS) { + for (size_t tree_size = 8; tree_size < (1 << 22); tree_size <<= 1) { + std::mt19937_64 rng(42); + std::vector> adj = generate_random_tree(tree_size, rng); + adj = dfs_order(tree_size, adj); + std::vector dfuds = adj_to_dfuds(tree_size, adj); + DFUDSTree dfuds_tree(dfuds, tree_size); + AdjListTree debug_tree(adj); + + std::queue> st; + + st.push({dfuds_tree.root(), debug_tree.root()}); + + while (!st.empty()) { + auto cur = st.front().first; + auto debug = st.front().second; + st.pop(); + EXPECT_EQ(dfuds_tree.parent(cur), debug_tree.parent(debug)); + + if (cur.number > 0) { + EXPECT_EQ(dfuds_tree.is_last_child(cur), + debug_tree.is_last_child(debug)); + } + size_t deg = dfuds_tree.degree(cur); + EXPECT_EQ(deg, debug_tree.degree(debug)); + + if (deg == 0) { + continue; + } + auto child = dfuds_tree.first_child(cur); + auto debug_child = debug_tree.first_child(debug); + st.push({child, debug_child}); + for (size_t i = 1; i < deg; i++) { + child = dfuds_tree.next_sibling(child); + st.push({child, debug_tree.child(debug, i)}); + } + } + } +}