Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions include/cuco/detail/open_addressing/constraints.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,16 @@ struct open_addressing_compatible {
[] {
if constexpr (has_payload) {
constexpr auto payload_size = sizeof(typename Value::second_type);
return payload_size == 4 or payload_size == 8
#if defined(CUCO_HAS_128BIT_ATOMICS)
or payload_size == 16
return payload_size <= 16;
#else
return payload_size <= 8;
#endif
;
} else {
return true;
}
}(),
"Payload size must be 4 or 8 bytes (or 16 with sm_90+).");
"Payload size exceeds the maximum supported size (8 bytes, or 16 with sm_90+).");

static_assert(
cuco::is_bitwise_comparable_v<Key>,
Expand Down
18 changes: 17 additions & 1 deletion include/cuco/detail/pair/helpers.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
* Copyright (c) 2021-2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -47,6 +47,22 @@ struct packed {
using type = void; ///< `void` type by default
};

/**
* @brief Denotes the packed type when the size of the object is 1.
*/
template <>
struct packed<sizeof(uint8_t)> {
using type = uint8_t; ///< Packed type as `uint8_t` if the size of the object is 1
};

/**
* @brief Denotes the packed type when the size of the object is 2.
*/
template <>
struct packed<sizeof(uint16_t)> {
using type = uint16_t; ///< Packed type as `uint16_t` if the size of the object is 2
};
Comment thread
sleeepyjack marked this conversation as resolved.

/**
* @brief Denotes the packed type when the size of the object is 8.
*/
Expand Down
36 changes: 24 additions & 12 deletions tests/static_map/find_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ TEMPLATE_TEST_CASE_SIG(
Value,
Probe,
CGSize),
(int8_t, int8_t, cuco::test::probe_sequence::double_hashing, 1),
(int8_t, int8_t, cuco::test::probe_sequence::double_hashing, 2),
Comment thread
sleeepyjack marked this conversation as resolved.
(int8_t, int16_t, cuco::test::probe_sequence::double_hashing, 2),
(int16_t, int16_t, cuco::test::probe_sequence::double_hashing, 1),
(int16_t, int16_t, cuco::test::probe_sequence::double_hashing, 2),
(int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, int64_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, int32_t, cuco::test::probe_sequence::double_hashing, 2),
Expand All @@ -146,6 +151,11 @@ TEMPLATE_TEST_CASE_SIG(
(int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 1),
(int64_t, int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, int64_t, cuco::test::probe_sequence::double_hashing, 2),
(int8_t, int8_t, cuco::test::probe_sequence::linear_probing, 1),
(int8_t, int8_t, cuco::test::probe_sequence::linear_probing, 2),
Comment thread
sleeepyjack marked this conversation as resolved.
(int8_t, int16_t, cuco::test::probe_sequence::linear_probing, 2),
(int16_t, int16_t, cuco::test::probe_sequence::linear_probing, 1),
(int16_t, int16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 1),
(int32_t, int64_t, cuco::test::probe_sequence::linear_probing, 1),
(int32_t, int32_t, cuco::test::probe_sequence::linear_probing, 2),
Expand All @@ -162,7 +172,7 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr size_type num_keys{301};
constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 301;

// XXX: testing static extent is intended, DO NOT CHANGE
using extent_type = cuco::extent<size_type, num_keys>;
Expand All @@ -180,17 +190,19 @@ TEMPLATE_TEST_CASE_SIG(
}
}();

auto map = cuco::static_map<Key,
Value,
extent_type,
cuda::thread_scope_device,
cuda::std::equal_to<Key>,
probe,
cuco::cuda_allocator<cuda::std::byte>,
cuco::storage<2>>{
extent_type{}, cuco::empty_key<Key>{SENTINEL}, cuco::empty_value<Value>{SENTINEL}};

REQUIRE(map.capacity() == gold_capacity);
auto map =
cuco::static_map<Key,
Value,
extent_type,
cuda::thread_scope_device,
cuda::std::equal_to<Key>,
probe,
cuco::cuda_allocator<cuda::std::byte>,
cuco::storage<2>>{extent_type{},
cuco::empty_key<Key>{static_cast<Key>(SENTINEL)},
cuco::empty_value<Value>{static_cast<Value>(SENTINEL)}};

if constexpr (sizeof(Key) > 1) { REQUIRE(map.capacity() == gold_capacity); }

test_unique_sequence(map, num_keys);
}
29 changes: 23 additions & 6 deletions tests/static_set/for_each_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

using size_type = std::size_t;

template <typename Set>
void test_for_each(Set& set, size_type num_keys)
void test_for_each(Set& set, size_type num_keys, size_type expected_evens, size_type expected_odds)
{
using Key = typename Set::key_type;

Expand All @@ -40,7 +42,7 @@ void test_for_each(Set& set, size_type num_keys)
// Insert keys
auto keys_begin = cuda::make_transform_iterator(
cuda::counting_iterator<size_type>{0}, cuda::proclaim_return_type<Key>([] __device__(auto i) {
// generates a sequence of 1, 2, 1, 2, ...
// generates a sequence of 0, 1, 2, ...
return static_cast<Key>(i);
}));
set.insert(keys_begin, keys_begin + num_keys, stream);
Expand All @@ -56,7 +58,7 @@ void test_for_each(Set& set, size_type num_keys)
if (slot % 2 == 0) { counter->fetch_add(slot, cuda::memory_order_relaxed); }
},
stream);
REQUIRE(counter_storage.load_to_host(stream) == 249'500);
REQUIRE(counter_storage.load_to_host(stream) == expected_evens);

counter_storage.reset(stream);

Expand All @@ -68,13 +70,21 @@ void test_for_each(Set& set, size_type num_keys)
if (!(slot % 2 == 0)) { counter->fetch_add(slot, cuda::memory_order_relaxed); }
},
stream);
REQUIRE(counter_storage.load_to_host(stream) == 250'000);
REQUIRE(counter_storage.load_to_host(stream) == expected_odds);
}

TEMPLATE_TEST_CASE_SIG(
"static_set for_each tests",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(int8_t, cuco::test::probe_sequence::double_hashing, 1),
(int8_t, cuco::test::probe_sequence::double_hashing, 2),
(int8_t, cuco::test::probe_sequence::linear_probing, 1),
(int8_t, cuco::test::probe_sequence::linear_probing, 2),
(int16_t, cuco::test::probe_sequence::double_hashing, 1),
(int16_t, cuco::test::probe_sequence::double_hashing, 2),
(int16_t, cuco::test::probe_sequence::linear_probing, 1),
(int16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -92,7 +102,14 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr size_type num_keys{1'000};
// Limit key count for small types: leave room for the -1 sentinel.
// Expected sums are pre-computed per type class:
// int16_t (num_keys=100): sum of evens 0..98 = 2450, sum of odds 1..99 = 2500
// int16_t+ (num_keys=1000): sum of evens 0..998 = 249'500, sum of odds 1..999 = 250'000
constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 1'000;
constexpr size_type expected_evens = (sizeof(Key) == 1) ? 2'450 : 249'500;
constexpr size_type expected_odds = (sizeof(Key) == 1) ? 2'500 : 250'000;

using probe = std::conditional_t<
Probe == cuco::test::probe_sequence::linear_probing,
cuco::linear_probing<CGSize, cuco::murmurhash3_32<Key>>,
Expand All @@ -107,5 +124,5 @@ TEMPLATE_TEST_CASE_SIG(
cuco::storage<2>>;

auto set = set_t{num_keys, cuco::empty_key<Key>{static_cast<Key>(-1)}};
test_for_each(set, num_keys);
test_for_each(set, num_keys, expected_evens, expected_odds);
}
12 changes: 11 additions & 1 deletion tests/static_set/insert_and_find_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

template <typename Set>
void test_insert_and_find(Set& set, std::size_t num_keys)
{
Expand Down Expand Up @@ -56,6 +58,13 @@ TEMPLATE_TEST_CASE_SIG(
"static_set Insert and find",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(int8_t, cuco::test::probe_sequence::double_hashing, 1),
(int8_t, cuco::test::probe_sequence::linear_probing, 1),
(int8_t, cuco::test::probe_sequence::linear_probing, 2),
(int16_t, cuco::test::probe_sequence::double_hashing, 1),
(int16_t, cuco::test::probe_sequence::double_hashing, 2),
(int16_t, cuco::test::probe_sequence::linear_probing, 1),
(int16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -73,7 +82,8 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr std::size_t num_keys{400};
// Limit key count for small types: leave room for the -1 sentinel
constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400;

using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
Expand Down
23 changes: 19 additions & 4 deletions tests/static_set/retrieve_all_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

template <typename Set>
void test_unique_sequence(Set& set, std::size_t num_keys)
{
Expand Down Expand Up @@ -61,6 +63,13 @@ TEMPLATE_TEST_CASE_SIG(
"static_set::retrieve_all tests",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(int8_t, cuco::test::probe_sequence::double_hashing, 1),
(int8_t, cuco::test::probe_sequence::linear_probing, 1),
(int8_t, cuco::test::probe_sequence::linear_probing, 2),
(int16_t, cuco::test::probe_sequence::double_hashing, 1),
(int16_t, cuco::test::probe_sequence::double_hashing, 2),
(int16_t, cuco::test::probe_sequence::linear_probing, 1),
(int16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -78,7 +87,8 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr std::size_t num_keys{400};
// Limit key count for small types: leave room for the -1 sentinel
constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400;
constexpr double desired_load_factor = 1.;

using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
Expand All @@ -87,10 +97,15 @@ TEMPLATE_TEST_CASE_SIG(

constexpr std::size_t gold_capacity = [&]() {
if constexpr (cuco::is_double_hashing<probe>::value) {
return (CGSize == 1) ? 401 // 401 x 1 x 1
: 422; // 211 x 2 x 1
if constexpr (num_keys == 100) {
return (CGSize == 1) ? 101 // 101 x 1 x 1
: 106; // 53 x 2 x 1
} else {
return (CGSize == 1) ? 401 // 401 x 1 x 1
: 422; // 211 x 2 x 1
}
} else {
return 400;
return num_keys;
}
}();

Expand Down
7 changes: 6 additions & 1 deletion tests/static_set/retrieve_if_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

using size_type = std::size_t;

template <class Container>
Expand Down Expand Up @@ -110,6 +112,8 @@ __global__ void test_retrieve_if_all_true_kernel(
TEMPLATE_TEST_CASE_SIG("static_set retrieve_if",
"",
((typename Key), Key),
(int8_t),
(int16_t),
Comment thread
sleeepyjack marked this conversation as resolved.
(int32_t),
(int64_t)
#if defined(CUCO_HAS_128BIT_ATOMICS)
Expand All @@ -118,7 +122,8 @@ TEMPLATE_TEST_CASE_SIG("static_set retrieve_if",
#endif
)
{
constexpr size_type num_keys{400};
// Limit key count for small types: keys start at 1, sentinel is -1
constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 400;

using container_type = cuco::static_set<Key>;

Expand Down
12 changes: 11 additions & 1 deletion tests/static_set/retrieve_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

static constexpr int key_sentinel = -1;

template <typename Set>
Expand Down Expand Up @@ -75,6 +77,13 @@ TEMPLATE_TEST_CASE_SIG(
"static_set retrieve tests",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(int8_t, cuco::test::probe_sequence::double_hashing, 1),
(int8_t, cuco::test::probe_sequence::linear_probing, 1),
(int8_t, cuco::test::probe_sequence::linear_probing, 2),
(int16_t, cuco::test::probe_sequence::double_hashing, 1),
(int16_t, cuco::test::probe_sequence::double_hashing, 2),
(int16_t, cuco::test::probe_sequence::linear_probing, 1),
(int16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -92,7 +101,8 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr std::size_t num_keys{400};
// Limit key count for small types: leave room for the -1 sentinel
constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400;
constexpr double desired_load_factor = 1.;

using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
Expand Down
18 changes: 14 additions & 4 deletions tests/static_set/shared_memory_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
* Copyright (c) 2023-2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,7 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>
#include <limits>

template <std::size_t ValidSize, typename Ref>
Expand All @@ -47,6 +48,8 @@ __global__ void shared_memory_test_kernel(Ref* sets,
auto insert_ref = sets[set_id].make_copy(g, sm_buffer, cuco::thread_scope_block);
auto find_ref = insert_ref.rebind_operators(cuco::op::find);

g.sync();

for (int i = g.thread_rank(); i < number_of_elements; i += g.size()) {
auto found_it = find_ref.find(insterted_keys[offset + i]);

Expand All @@ -67,6 +70,8 @@ __global__ void shared_memory_test_kernel(Ref* sets,
TEMPLATE_TEST_CASE_SIG("static_set shared memory tests",
"",
((typename Key), Key),
(int8_t),
(int16_t),
Comment thread
sleeepyjack marked this conversation as resolved.
(int32_t),
(int64_t)
#if defined(CUCO_HAS_128BIT_ATOMICS)
Expand All @@ -75,8 +80,12 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests",
#endif
)
{
constexpr std::size_t number_of_sets = 1000;
constexpr std::size_t elements_in_set = 500;
// For int8_t: sentinel = -1, so usable key range is -128...127 (excluding -1).
// For int16_t: sentinel = -1, so usable key range is -32768..32767 (excluding -1).
// thrust::sequence over number_of_sets*elements_in_set keys must not wrap.
// Use smaller set count and element count for smaller types.
constexpr std::size_t number_of_sets = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 2 : 100) : 1000;
constexpr std::size_t elements_in_set = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 100 : 600) : 500;
constexpr std::size_t set_capacity = 2 * elements_in_set;

using extent_type = cuco::extent<std::size_t, set_capacity>;
Expand All @@ -96,7 +105,8 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests",
// operator yet
std::vector<std::unique_ptr<set_type>> sets;
for (std::size_t set_id = 0; set_id < number_of_sets; ++set_id) {
sets.push_back(std::make_unique<set_type>(extent_type{}, cuco::empty_key<Key>{-1}));
sets.push_back(
std::make_unique<set_type>(extent_type{}, cuco::empty_key<Key>{static_cast<Key>(-1)}));
}

thrust::device_vector<bool> d_keys_exist(number_of_sets * elements_in_set);
Expand Down
Loading
Loading